1/* 2 * Copyright 2011 Christoph Bumiller 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23#include "nv50_ir.h" 24#include "nv50_ir_target.h" 25 26namespace nv50_ir { 27 28const uint8_t Target::operationSrcNr[] = 29{ 30 0, 0, // NOP, PHI 31 0, 0, 0, 0, // UNION, SPLIT, MERGE, CONSTRAINT 32 1, 1, 2, // MOV, LOAD, STORE 33 2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD 34 3, 3, // SHLADD, XMAD 35 1, 1, 1, // ABS, NEG, NOT 36 2, 2, 2, 3, 2, 2, 3, // AND, OR, XOR, LOP3_LUT, SHL, SHR, SHF 37 2, 2, 1, // MAX, MIN, SAT 38 1, 1, 1, 1, // CEIL, FLOOR, TRUNC, CVT 39 3, 3, 3, 2, 3, 3, // SET_AND,OR,XOR, SET, SELP, SLCT 40 1, 1, 1, 1, 1, 1, // RCP, RSQ, LG2, SIN, COS, EX2 41 1, 1, 1, 1, 1, 2, // EXP, LOG, PRESIN, PREEX2, SQRT, POW 42 0, 0, 0, 0, 0, // BRA, CALL, RET, CONT, BREAK, 43 0, 0, 0, // PRERET,CONT,BREAK 44 0, 0, 0, 0, 0, 0, // BRKPT, JOINAT, JOIN, DISCARD, EXIT, MEMBAR 45 1, 1, 1, 2, 1, 2, // VFETCH, PFETCH, AFETCH, EXPORT, LINTERP, PINTERP 46 1, 1, 1, // EMIT, RESTART, FINAL 47 1, 1, 1, // TEX, TXB, TXL, 48 1, 1, 1, 1, 1, 1, 2, // TXF, TXQ, TXD, TXG, TXLQ, TEXCSAA, TEXPREP 49 1, 1, 2, 2, 2, 2, 2, // SULDB, SULDP, SUSTB, SUSTP, SUREDB, SUREDP, SULEA 50 3, 3, 3, 1, 3, // SUBFM, SUCLAMP, SUEAU, SUQ, MADSP 51 0, // TEXBAR 52 1, 1, // DFDX, DFDY 53 1, 2, 1, 2, 0, 0, // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP 54 2, 3, 2, 1, 1, 2, 3, // POPCNT, INSBF, EXTBF, BFIND, BREV, BMSK, PERMT 55 2, // SGXT 56 3, 2, // ATOM, BAR 57 2, 2, 2, 2, 3, 2, // VADD, VAVG, VMIN, VMAX, VSAD, VSET, 58 2, 2, 2, 1, // VSHR, VSHL, VSEL, CCTL 59 3, // SHFL 60 1, // VOTE 61 1, // BUFQ 62 1, // WARPSYNC 63 0 64}; 65 66const OpClass Target::operationClass[] = 67{ 68 // NOP; PHI; UNION, SPLIT, MERGE, CONSTRAINT 69 OPCLASS_OTHER, 70 OPCLASS_PSEUDO, 71 OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO, 72 // MOV; LOAD; STORE 73 OPCLASS_MOVE, 74 OPCLASS_LOAD, 75 OPCLASS_STORE, 76 // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD, SHLADD, XMAD 77 OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, 78 OPCLASS_ARITH, OPCLASS_ARITH, 79 OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, 80 // ABS, NEG; NOT, AND, OR, XOR, LOP3_LUT; SHL, SHR, SHF 81 OPCLASS_CONVERT, OPCLASS_CONVERT, 82 OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, 83 OPCLASS_SHIFT, OPCLASS_SHIFT, OPCLASS_SHIFT, 84 // MAX, MIN 85 OPCLASS_COMPARE, OPCLASS_COMPARE, 86 // SAT, CEIL, FLOOR, TRUNC; CVT 87 OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT, 88 OPCLASS_CONVERT, 89 // SET(AND,OR,XOR); SELP, SLCT 90 OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE, 91 OPCLASS_COMPARE, OPCLASS_COMPARE, 92 // RCP, RSQ, LG2, SIN, COS; EX2, EXP, LOG, PRESIN, PREEX2; SQRT, POW 93 OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, 94 OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, 95 OPCLASS_SFU, OPCLASS_SFU, 96 // BRA, CALL, RET; CONT, BREAK, PRE(RET,CONT,BREAK); BRKPT, JOINAT, JOIN 97 OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, 98 OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, 99 OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, 100 // DISCARD, EXIT 101 OPCLASS_FLOW, OPCLASS_FLOW, 102 // MEMBAR 103 OPCLASS_CONTROL, 104 // VFETCH, PFETCH, AFETCH, EXPORT 105 OPCLASS_LOAD, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_STORE, 106 // LINTERP, PINTERP 107 OPCLASS_SFU, OPCLASS_SFU, 108 // EMIT, RESTART, FINAL 109 OPCLASS_CONTROL, OPCLASS_CONTROL, OPCLASS_CONTROL, 110 // TEX, TXB, TXL, TXF; TXQ, TXD, TXG, TXLQ; TEXCSAA, TEXPREP 111 OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, 112 OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, 113 OPCLASS_TEXTURE, OPCLASS_TEXTURE, 114 // SULDB, SULDP, SUSTB, SUSTP; SUREDB, SUREDP, SULEA 115 OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_ATOMIC, OPCLASS_SURFACE, 116 OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_SURFACE, 117 // SUBFM, SUCLAMP, SUEAU, SUQ, MADSP 118 OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_ARITH, 119 // TEXBAR 120 OPCLASS_OTHER, 121 // DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP 122 OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, 123 OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_CONTROL, OPCLASS_CONTROL, 124 // POPCNT, INSBF, EXTBF, BFIND, BREV, BMSK; PERMT, SGXT 125 OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, 126 OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, 127 // ATOM, BAR 128 OPCLASS_ATOMIC, OPCLASS_CONTROL, 129 // VADD, VAVG, VMIN, VMAX 130 OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, 131 // VSAD, VSET, VSHR, VSHL 132 OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, 133 // VSEL, CCTL 134 OPCLASS_VECTOR, OPCLASS_CONTROL, 135 // SHFL 136 OPCLASS_OTHER, 137 // VOTE 138 OPCLASS_OTHER, 139 // BUFQ 140 OPCLASS_OTHER, 141 // WARPSYNC 142 OPCLASS_OTHER, 143 OPCLASS_PSEUDO // LAST 144}; 145 146 147extern Target *getTargetGV100(unsigned int chipset); 148extern Target *getTargetGM107(unsigned int chipset); 149extern Target *getTargetNVC0(unsigned int chipset); 150extern Target *getTargetNV50(unsigned int chipset); 151 152Target *Target::create(unsigned int chipset) 153{ 154 STATIC_ASSERT(ARRAY_SIZE(operationSrcNr) == OP_LAST + 1); 155 STATIC_ASSERT(ARRAY_SIZE(operationClass) == OP_LAST + 1); 156 switch (chipset & ~0xf) { 157 case 0x170: 158 case 0x160: 159 case 0x140: 160 return getTargetGV100(chipset); 161 case 0x110: 162 case 0x120: 163 case 0x130: 164 return getTargetGM107(chipset); 165 case 0xc0: 166 case 0xd0: 167 case 0xe0: 168 case 0xf0: 169 case 0x100: 170 return getTargetNVC0(chipset); 171 case 0x50: 172 case 0x80: 173 case 0x90: 174 case 0xa0: 175 return getTargetNV50(chipset); 176 default: 177 ERROR("unsupported target: NV%x\n", chipset); 178 return 0; 179 } 180} 181 182void Target::destroy(Target *targ) 183{ 184 delete targ; 185} 186 187CodeEmitter::CodeEmitter(const Target *target) : targ(target), code(NULL), 188 codeSize(0), codeSizeLimit(0), relocInfo(NULL), fixupInfo(NULL) 189{ 190} 191 192void 193CodeEmitter::setCodeLocation(void *ptr, uint32_t size) 194{ 195 code = reinterpret_cast<uint32_t *>(ptr); 196 codeSize = 0; 197 codeSizeLimit = size; 198} 199 200void 201CodeEmitter::printBinary() const 202{ 203 uint32_t *bin = code - codeSize / 4; 204 INFO("program binary (%u bytes)", codeSize); 205 for (unsigned int pos = 0; pos < codeSize / 4; ++pos) { 206 if ((pos % 8) == 0) 207 INFO("\n"); 208 INFO("%08x ", bin[pos]); 209 } 210 INFO("\n"); 211} 212 213static inline uint32_t sizeToBundlesNVE4(uint32_t size) 214{ 215 return (size + 55) / 56; 216} 217 218void 219CodeEmitter::prepareEmission(Program *prog) 220{ 221 for (ArrayList::Iterator fi = prog->allFuncs.iterator(); 222 !fi.end(); fi.next()) { 223 Function *func = reinterpret_cast<Function *>(fi.get()); 224 func->binPos = prog->binSize; 225 prepareEmission(func); 226 227 // adjust sizes & positions for scheduling info: 228 if (prog->getTarget()->hasSWSched) { 229 uint32_t adjPos = func->binPos; 230 BasicBlock *bb = NULL; 231 for (int i = 0; i < func->bbCount; ++i) { 232 bb = func->bbArray[i]; 233 int32_t adjSize = bb->binSize; 234 if (adjPos % 64) { 235 adjSize -= 64 - adjPos % 64; 236 if (adjSize < 0) 237 adjSize = 0; 238 } 239 adjSize = bb->binSize + sizeToBundlesNVE4(adjSize) * 8; 240 bb->binPos = adjPos; 241 bb->binSize = adjSize; 242 adjPos += adjSize; 243 } 244 if (bb) 245 func->binSize = adjPos - func->binPos; 246 } 247 248 prog->binSize += func->binSize; 249 } 250} 251 252void 253CodeEmitter::prepareEmission(Function *func) 254{ 255 func->bbCount = 0; 256 func->bbArray = new BasicBlock * [func->cfg.getSize()]; 257 258 BasicBlock::get(func->cfg.getRoot())->binPos = func->binPos; 259 260 for (IteratorRef it = func->cfg.iteratorCFG(); !it->end(); it->next()) 261 prepareEmission(BasicBlock::get(*it)); 262} 263 264void 265CodeEmitter::prepareEmission(BasicBlock *bb) 266{ 267 Instruction *i, *next; 268 Function *func = bb->getFunction(); 269 int j; 270 unsigned int nShort; 271 272 for (j = func->bbCount - 1; j >= 0 && !func->bbArray[j]->binSize; --j); 273 274 for (; j >= 0; --j) { 275 BasicBlock *in = func->bbArray[j]; 276 Instruction *exit = in->getExit(); 277 278 if (exit && exit->op == OP_BRA && exit->asFlow()->target.bb == bb) { 279 in->binSize -= 8; 280 func->binSize -= 8; 281 282 for (++j; j < func->bbCount; ++j) 283 func->bbArray[j]->binPos -= 8; 284 285 in->remove(exit); 286 } 287 bb->binPos = in->binPos + in->binSize; 288 if (in->binSize) // no more no-op branches to bb 289 break; 290 } 291 func->bbArray[func->bbCount++] = bb; 292 293 if (!bb->getExit()) 294 return; 295 296 // determine encoding size, try to group short instructions 297 nShort = 0; 298 for (i = bb->getEntry(); i; i = next) { 299 next = i->next; 300 301 i->encSize = getMinEncodingSize(i); 302 if (next && i->encSize < 8) 303 ++nShort; 304 else 305 if ((nShort & 1) && next && getMinEncodingSize(next) == 4) { 306 if (i->isCommutationLegal(i->next)) { 307 bb->permuteAdjacent(i, next); 308 next->encSize = 4; 309 next = i; 310 i = i->prev; 311 ++nShort; 312 } else 313 if (i->isCommutationLegal(i->prev) && next->next) { 314 bb->permuteAdjacent(i->prev, i); 315 next->encSize = 4; 316 next = next->next; 317 bb->binSize += 4; 318 ++nShort; 319 } else { 320 i->encSize = 8; 321 i->prev->encSize = 8; 322 bb->binSize += 4; 323 nShort = 0; 324 } 325 } else { 326 i->encSize = 8; 327 if (nShort & 1) { 328 i->prev->encSize = 8; 329 bb->binSize += 4; 330 } 331 nShort = 0; 332 } 333 bb->binSize += i->encSize; 334 } 335 336 if (bb->getExit()->encSize == 4) { 337 assert(nShort); 338 bb->getExit()->encSize = 8; 339 bb->binSize += 4; 340 341 if ((bb->getExit()->prev->encSize == 4) && !(nShort & 1)) { 342 bb->binSize += 8; 343 bb->getExit()->prev->encSize = 8; 344 } 345 } 346 assert(!bb->getEntry() || (bb->getExit() && bb->getExit()->encSize == 8)); 347 348 func->binSize += bb->binSize; 349} 350 351bool 352Program::emitBinary(struct nv50_ir_prog_info_out *info) 353{ 354 CodeEmitter *emit = target->getCodeEmitter(progType); 355 356 emit->prepareEmission(this); 357 358 if (dbgFlags & NV50_IR_DEBUG_BASIC) 359 this->print(); 360 361 if (!binSize) { 362 code = NULL; 363 return false; 364 } 365 code = reinterpret_cast<uint32_t *>(MALLOC(binSize)); 366 if (!code) 367 return false; 368 emit->setCodeLocation(code, binSize); 369 info->bin.instructions = 0; 370 371 for (ArrayList::Iterator fi = allFuncs.iterator(); !fi.end(); fi.next()) { 372 Function *fn = reinterpret_cast<Function *>(fi.get()); 373 374 assert(emit->getCodeSize() == fn->binPos); 375 376 for (int b = 0; b < fn->bbCount; ++b) { 377 for (Instruction *i = fn->bbArray[b]->getEntry(); i; i = i->next) { 378 emit->emitInstruction(i); 379 info->bin.instructions++; 380 if ((typeSizeof(i->sType) == 8 || typeSizeof(i->dType) == 8) && 381 (isFloatType(i->sType) || isFloatType(i->dType))) 382 info->io.fp64 = true; 383 } 384 } 385 } 386 info->io.fp64 |= fp64; 387 info->bin.relocData = emit->getRelocInfo(); 388 info->bin.fixupData = emit->getFixupInfo(); 389 390 // the nvc0 driver will print the binary itself together with the header 391 if ((dbgFlags & NV50_IR_DEBUG_BASIC) && getTarget()->getChipset() < 0xc0) 392 emit->printBinary(); 393 394 delete emit; 395 return true; 396} 397 398#define RELOC_ALLOC_INCREMENT 8 399 400bool 401CodeEmitter::addReloc(RelocEntry::Type ty, int w, uint32_t data, uint32_t m, 402 int s) 403{ 404 unsigned int n = relocInfo ? relocInfo->count : 0; 405 406 if (!(n % RELOC_ALLOC_INCREMENT)) { 407 size_t size = sizeof(RelocInfo) + n * sizeof(RelocEntry); 408 relocInfo = reinterpret_cast<RelocInfo *>( 409 REALLOC(relocInfo, n ? size : 0, 410 size + RELOC_ALLOC_INCREMENT * sizeof(RelocEntry))); 411 if (!relocInfo) 412 return false; 413 if (n == 0) 414 memset(relocInfo, 0, sizeof(RelocInfo)); 415 } 416 ++relocInfo->count; 417 418 relocInfo->entry[n].data = data; 419 relocInfo->entry[n].mask = m; 420 relocInfo->entry[n].offset = codeSize + w * 4; 421 relocInfo->entry[n].bitPos = s; 422 relocInfo->entry[n].type = ty; 423 424 return true; 425} 426 427bool 428CodeEmitter::addInterp(int ipa, int reg, FixupApply apply) 429{ 430 unsigned int n = fixupInfo ? fixupInfo->count : 0; 431 432 if (!(n % RELOC_ALLOC_INCREMENT)) { 433 size_t size = sizeof(FixupInfo) + n * sizeof(FixupEntry); 434 fixupInfo = reinterpret_cast<FixupInfo *>( 435 REALLOC(fixupInfo, n ? size : 0, 436 size + RELOC_ALLOC_INCREMENT * sizeof(FixupEntry))); 437 if (!fixupInfo) 438 return false; 439 if (n == 0) 440 fixupInfo->count = 0; 441 } 442 ++fixupInfo->count; 443 444 fixupInfo->entry[n] = FixupEntry(apply, ipa, reg, codeSize >> 2); 445 446 return true; 447} 448 449void 450RelocEntry::apply(uint32_t *binary, const RelocInfo *info) const 451{ 452 uint32_t value = 0; 453 454 switch (type) { 455 case TYPE_CODE: value = info->codePos; break; 456 case TYPE_BUILTIN: value = info->libPos; break; 457 case TYPE_DATA: value = info->dataPos; break; 458 default: 459 assert(0); 460 break; 461 } 462 value += data; 463 value = (bitPos < 0) ? (value >> -bitPos) : (value << bitPos); 464 465 binary[offset / 4] &= ~mask; 466 binary[offset / 4] |= value & mask; 467} 468 469} // namespace nv50_ir 470 471 472#include "nv50_ir_driver.h" 473 474extern "C" { 475 476void 477nv50_ir_relocate_code(void *relocData, uint32_t *code, 478 uint32_t codePos, 479 uint32_t libPos, 480 uint32_t dataPos) 481{ 482 nv50_ir::RelocInfo *info = reinterpret_cast<nv50_ir::RelocInfo *>(relocData); 483 484 info->codePos = codePos; 485 info->libPos = libPos; 486 info->dataPos = dataPos; 487 488 for (unsigned int i = 0; i < info->count; ++i) 489 info->entry[i].apply(code, info); 490} 491 492void 493nv50_ir_apply_fixups(void *fixupData, uint32_t *code, 494 bool force_persample_interp, bool flatshade, 495 uint8_t alphatest, bool msaa) 496{ 497 nv50_ir::FixupInfo *info = reinterpret_cast<nv50_ir::FixupInfo *>( 498 fixupData); 499 500 // force_persample_interp: all non-flat -> per-sample 501 // flatshade: all color -> flat 502 // alphatest: PIPE_FUNC_* to use with alphatest 503 // msaa: false = sample id -> 0 for interpolateAtSample 504 nv50_ir::FixupData data(force_persample_interp, flatshade, alphatest, msaa); 505 for (unsigned i = 0; i < info->count; ++i) 506 info->entry[i].apply(&info->entry[i], code, data); 507} 508 509void 510nv50_ir_get_target_library(uint32_t chipset, 511 const uint32_t **code, uint32_t *size) 512{ 513 nv50_ir::Target *targ = nv50_ir::Target::create(chipset); 514 targ->getBuiltinCode(code, size); 515 nv50_ir::Target::destroy(targ); 516} 517 518} 519