1/* 2 * Copyright 2020 Red Hat Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22#include "nv50_ir.h" 23#include "nv50_ir_build_util.h" 24 25#include "nv50_ir_target_nvc0.h" 26#include "nv50_ir_lowering_gv100.h" 27 28#include <limits> 29 30namespace nv50_ir { 31 32bool 33GV100LegalizeSSA::handleCMP(Instruction *i) 34{ 35 Value *pred = bld.getSSA(1, FILE_PREDICATE); 36 37 bld.mkCmp(OP_SET, reverseCondCode(i->asCmp()->setCond), TYPE_U8, pred, 38 i->sType, bld.mkImm(0), i->getSrc(2))->ftz = i->ftz; 39 bld.mkOp3(OP_SELP, TYPE_U32, i->getDef(0), i->getSrc(0), i->getSrc(1), pred); 40 return true; 41} 42 43// NIR deals with most of these for us, but codegen generates more in pointer 44// calculations from other lowering passes. 45bool 46GV100LegalizeSSA::handleIADD64(Instruction *i) 47{ 48 Value *carry = bld.getSSA(1, FILE_PREDICATE); 49 Value *def[2] = { bld.getSSA(), bld.getSSA() }; 50 Value *src[2][2]; 51 52 for (int s = 0; s < 2; s++) { 53 if (i->getSrc(s)->reg.size == 8) { 54 bld.mkSplit(src[s], 4, i->getSrc(s)); 55 } else { 56 src[s][0] = i->getSrc(s); 57 src[s][1] = bld.mkImm(0); 58 } 59 } 60 61 bld.mkOp2(OP_ADD, TYPE_U32, def[0], src[0][0], src[1][0])-> 62 setFlagsDef(1, carry); 63 bld.mkOp2(OP_ADD, TYPE_U32, def[1], src[0][1], src[1][1])-> 64 setFlagsSrc(2, carry); 65 bld.mkOp2(OP_MERGE, i->dType, i->getDef(0), def[0], def[1]); 66 return true; 67} 68 69bool 70GV100LegalizeSSA::handleIMAD_HIGH(Instruction *i) 71{ 72 Value *def = bld.getSSA(8), *defs[2]; 73 Value *src2; 74 75 if (i->srcExists(2) && 76 (!i->getSrc(2)->asImm() || i->getSrc(2)->asImm()->reg.data.u32)) { 77 Value *src2s[2] = { bld.getSSA(), bld.getSSA() }; 78 bld.mkMov(src2s[0], bld.mkImm(0)); 79 bld.mkMov(src2s[1], i->getSrc(2)); 80 src2 = bld.mkOp2(OP_MERGE, TYPE_U64, bld.getSSA(8), src2s[0], src2s[1])->getDef(0); 81 } else { 82 src2 = bld.mkImm(0); 83 } 84 85 bld.mkOp3(OP_MAD, isSignedType(i->sType) ? TYPE_S64 : TYPE_U64, def, 86 i->getSrc(0), i->getSrc(1), src2); 87 88 bld.mkSplit(defs, 4, def); 89 i->def(0).replace(defs[1], false); 90 return true; 91} 92 93// XXX: We should be able to do this in GV100LoweringPass, but codegen messes 94// up somehow and swaps the condcode without swapping the sources. 95// - tests/spec/glsl-1.50/execution/geometry/primitive-id-in.shader_test 96bool 97GV100LegalizeSSA::handleIMNMX(Instruction *i) 98{ 99 Value *pred = bld.getSSA(1, FILE_PREDICATE); 100 101 bld.mkCmp(OP_SET, (i->op == OP_MIN) ? CC_LT : CC_GT, i->dType, pred, 102 i->sType, i->getSrc(0), i->getSrc(1)); 103 bld.mkOp3(OP_SELP, i->dType, i->getDef(0), i->getSrc(0), i->getSrc(1), pred); 104 return true; 105} 106 107bool 108GV100LegalizeSSA::handleIMUL(Instruction *i) 109{ 110 if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) 111 return handleIMAD_HIGH(i); 112 113 bld.mkOp3(OP_MAD, i->dType, i->getDef(0), i->getSrc(0), i->getSrc(1), 114 bld.mkImm(0)); 115 return true; 116} 117 118bool 119GV100LegalizeSSA::handleLOP2(Instruction *i) 120{ 121 uint8_t src0 = NV50_IR_SUBOP_LOP3_LUT_SRC0; 122 uint8_t src1 = NV50_IR_SUBOP_LOP3_LUT_SRC1; 123 uint8_t subOp; 124 125 if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) 126 src0 = ~src0; 127 if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) 128 src1 = ~src1; 129 130 switch (i->op) { 131 case OP_AND: subOp = src0 & src1; break; 132 case OP_OR : subOp = src0 | src1; break; 133 case OP_XOR: subOp = src0 ^ src1; break; 134 default: 135 unreachable("invalid LOP2 opcode"); 136 } 137 138 bld.mkOp3(OP_LOP3_LUT, TYPE_U32, i->getDef(0), i->getSrc(0), i->getSrc(1), 139 bld.mkImm(0))->subOp = subOp; 140 return true; 141} 142 143bool 144GV100LegalizeSSA::handleNOT(Instruction *i) 145{ 146 bld.mkOp3(OP_LOP3_LUT, TYPE_U32, i->getDef(0), bld.mkImm(0), i->getSrc(0), 147 bld.mkImm(0))->subOp = (uint8_t)~NV50_IR_SUBOP_LOP3_LUT_SRC1; 148 return true; 149} 150 151bool 152GV100LegalizeSSA::handlePREEX2(Instruction *i) 153{ 154 i->def(0).replace(i->src(0), false); 155 return true; 156} 157 158bool 159GV100LegalizeSSA::handleQUADON(Instruction *i) 160{ 161 bld.mkBMov(i->getDef(0), bld.mkTSVal(TS_MACTIVE)); 162 Instruction *b = bld.mkBMov(bld.mkTSVal(TS_PQUAD_MACTIVE), i->getDef(0)); 163 b->fixed = 1; 164 return true; 165} 166 167bool 168GV100LegalizeSSA::handleQUADPOP(Instruction *i) 169{ 170 Instruction *b = bld.mkBMov(bld.mkTSVal(TS_MACTIVE), i->getSrc(0)); 171 b->fixed = 1; 172 return true; 173} 174 175bool 176GV100LegalizeSSA::handleSET(Instruction *i) 177{ 178 Value *src2 = i->srcExists(2) ? i->getSrc(2) : NULL; 179 Value *pred = bld.getSSA(1, FILE_PREDICATE), *met; 180 Instruction *xsetp; 181 182 if (isFloatType(i->dType)) { 183 if (i->sType == TYPE_F32) 184 return false; // HW has FSET.BF 185 met = bld.mkImm(0x3f800000); 186 } else { 187 met = bld.mkImm(0xffffffff); 188 } 189 190 xsetp = bld.mkCmp(i->op, i->asCmp()->setCond, TYPE_U8, pred, i->sType, 191 i->getSrc(0), i->getSrc(1)); 192 xsetp->src(0).mod = i->src(0).mod; 193 xsetp->src(1).mod = i->src(1).mod; 194 xsetp->setSrc(2, src2); 195 xsetp->ftz = i->ftz; 196 197 i = bld.mkOp3(OP_SELP, TYPE_U32, i->getDef(0), bld.mkImm(0), met, pred); 198 i->src(2).mod = Modifier(NV50_IR_MOD_NOT); 199 return true; 200} 201 202bool 203GV100LegalizeSSA::handleSHFL(Instruction *i) 204{ 205 Instruction *sync = new_Instruction(func, OP_WARPSYNC, TYPE_NONE); 206 sync->fixed = 1; 207 sync->setSrc(0, bld.mkImm(0xffffffff)); 208 i->bb->insertBefore(i, sync); 209 return false; 210} 211 212bool 213GV100LegalizeSSA::handleShift(Instruction *i) 214{ 215 Value *zero = bld.mkImm(0); 216 Value *src1 = i->getSrc(1); 217 Value *src0, *src2; 218 uint8_t subOp = i->op == OP_SHL ? NV50_IR_SUBOP_SHF_L : NV50_IR_SUBOP_SHF_R; 219 220 if (i->op == OP_SHL && i->src(0).getFile() == FILE_GPR) { 221 src0 = i->getSrc(0); 222 src2 = zero; 223 } else { 224 src0 = zero; 225 src2 = i->getSrc(0); 226 subOp |= NV50_IR_SUBOP_SHF_HI; 227 } 228 if (i->subOp & NV50_IR_SUBOP_SHIFT_WRAP) 229 subOp |= NV50_IR_SUBOP_SHF_W; 230 231 bld.mkOp3(OP_SHF, i->dType, i->getDef(0), src0, src1, src2)->subOp = subOp; 232 return true; 233} 234 235bool 236GV100LegalizeSSA::handleSUB(Instruction *i) 237{ 238 Instruction *xadd = 239 bld.mkOp2(OP_ADD, i->dType, i->getDef(0), i->getSrc(0), i->getSrc(1)); 240 xadd->src(0).mod = i->src(0).mod; 241 xadd->src(1).mod = i->src(1).mod ^ Modifier(NV50_IR_MOD_NEG); 242 xadd->ftz = i->ftz; 243 return true; 244} 245 246bool 247GV100LegalizeSSA::visit(Instruction *i) 248{ 249 bool lowered = false; 250 251 bld.setPosition(i, false); 252 if (i->sType == TYPE_F32 && i->dType != TYPE_F16 && 253 prog->getType() != Program::TYPE_COMPUTE) 254 handleFTZ(i); 255 256 switch (i->op) { 257 case OP_AND: 258 case OP_OR: 259 case OP_XOR: 260 if (i->def(0).getFile() != FILE_PREDICATE) 261 lowered = handleLOP2(i); 262 break; 263 case OP_NOT: 264 lowered = handleNOT(i); 265 break; 266 case OP_SHL: 267 case OP_SHR: 268 lowered = handleShift(i); 269 break; 270 case OP_SET: 271 case OP_SET_AND: 272 case OP_SET_OR: 273 case OP_SET_XOR: 274 if (i->def(0).getFile() != FILE_PREDICATE) 275 lowered = handleSET(i); 276 break; 277 case OP_SLCT: 278 lowered = handleCMP(i); 279 break; 280 case OP_PREEX2: 281 lowered = handlePREEX2(i); 282 break; 283 case OP_MUL: 284 if (!isFloatType(i->dType)) 285 lowered = handleIMUL(i); 286 break; 287 case OP_MAD: 288 if (!isFloatType(i->dType) && i->subOp == NV50_IR_SUBOP_MUL_HIGH) 289 lowered = handleIMAD_HIGH(i); 290 break; 291 case OP_SHFL: 292 lowered = handleSHFL(i); 293 break; 294 case OP_QUADON: 295 lowered = handleQUADON(i); 296 break; 297 case OP_QUADPOP: 298 lowered = handleQUADPOP(i); 299 break; 300 case OP_SUB: 301 lowered = handleSUB(i); 302 break; 303 case OP_MAX: 304 case OP_MIN: 305 if (!isFloatType(i->dType)) 306 lowered = handleIMNMX(i); 307 break; 308 case OP_ADD: 309 if (!isFloatType(i->dType) && typeSizeof(i->dType) == 8) 310 lowered = handleIADD64(i); 311 break; 312 case OP_PFETCH: 313 handlePFETCH(i); 314 break; 315 case OP_LOAD: 316 handleLOAD(i); 317 break; 318 default: 319 break; 320 } 321 322 if (lowered) 323 delete_Instruction(prog, i); 324 325 return true; 326} 327 328bool 329GV100LoweringPass::handleDMNMX(Instruction *i) 330{ 331 Value *pred = bld.getSSA(1, FILE_PREDICATE); 332 Value *src0[2], *src1[2], *dest[2]; 333 334 bld.mkCmp(OP_SET, (i->op == OP_MIN) ? CC_LT : CC_GT, TYPE_U32, pred, 335 i->sType, i->getSrc(0), i->getSrc(1)); 336 bld.mkSplit(src0, 4, i->getSrc(0)); 337 bld.mkSplit(src1, 4, i->getSrc(1)); 338 bld.mkSplit(dest, 4, i->getDef(0)); 339 bld.mkOp3(OP_SELP, TYPE_U32, dest[0], src0[0], src1[0], pred); 340 bld.mkOp3(OP_SELP, TYPE_U32, dest[1], src0[1], src1[1], pred); 341 bld.mkOp2(OP_MERGE, TYPE_U64, i->getDef(0), dest[0], dest[1]); 342 return true; 343} 344 345bool 346GV100LoweringPass::handleEXTBF(Instruction *i) 347{ 348 Value *bit = bld.getScratch(); 349 Value *cnt = bld.getScratch(); 350 Value *mask = bld.getScratch(); 351 Value *zero = bld.mkImm(0); 352 353 bld.mkOp3(OP_PERMT, TYPE_U32, bit, i->getSrc(1), bld.mkImm(0x4440), zero); 354 bld.mkOp3(OP_PERMT, TYPE_U32, cnt, i->getSrc(1), bld.mkImm(0x4441), zero); 355 bld.mkOp2(OP_BMSK, TYPE_U32, mask, bit, cnt); 356 bld.mkOp2(OP_AND, TYPE_U32, mask, i->getSrc(0), mask); 357 bld.mkOp2(OP_SHR, TYPE_U32, i->getDef(0), mask, bit); 358 if (isSignedType(i->dType)) 359 bld.mkOp2(OP_SGXT, TYPE_S32, i->getDef(0), i->getDef(0), cnt); 360 361 return true; 362} 363 364bool 365GV100LoweringPass::handleFLOW(Instruction *i) 366{ 367 i->op = OP_BRA; 368 return false; 369} 370 371bool 372GV100LoweringPass::handleI2I(Instruction *i) 373{ 374 bld.mkCvt(OP_CVT, TYPE_F32, i->getDef(0), i->sType, i->getSrc(0))-> 375 subOp = i->subOp; 376 bld.mkCvt(OP_CVT, i->dType, i->getDef(0), TYPE_F32, i->getDef(0)); 377 return true; 378} 379 380bool 381GV100LoweringPass::handleINSBF(Instruction *i) 382{ 383 Value *bit = bld.getScratch(); 384 Value *cnt = bld.getScratch(); 385 Value *mask = bld.getScratch(); 386 Value *src0 = bld.getScratch(); 387 Value *zero = bld.mkImm(0); 388 389 bld.mkOp3(OP_PERMT, TYPE_U32, bit, i->getSrc(1), bld.mkImm(0x4440), zero); 390 bld.mkOp3(OP_PERMT, TYPE_U32, cnt, i->getSrc(1), bld.mkImm(0x4441), zero); 391 bld.mkOp2(OP_BMSK, TYPE_U32, mask, zero, cnt); 392 393 bld.mkOp2(OP_AND, TYPE_U32, src0, i->getSrc(0), mask); 394 bld.mkOp2(OP_SHL, TYPE_U32, src0, src0, bit); 395 396 bld.mkOp2(OP_SHL, TYPE_U32, mask, mask, bit); 397 bld.mkOp3(OP_LOP3_LUT, TYPE_U32, i->getDef(0), src0, i->getSrc(2), mask)-> 398 subOp = NV50_IR_SUBOP_LOP3_LUT(a | (b & ~c)); 399 400 return true; 401} 402 403bool 404GV100LoweringPass::handlePINTERP(Instruction *i) 405{ 406 Value *src2 = i->srcExists(2) ? i->getSrc(2) : NULL; 407 Instruction *ipa, *mul; 408 409 ipa = bld.mkOp2(OP_LINTERP, TYPE_F32, i->getDef(0), i->getSrc(0), src2); 410 ipa->ipa = i->ipa; 411 mul = bld.mkOp2(OP_MUL, TYPE_F32, i->getDef(0), i->getDef(0), i->getSrc(1)); 412 413 if (i->getInterpMode() == NV50_IR_INTERP_SC) { 414 ipa->setDef(1, bld.getSSA(1, FILE_PREDICATE)); 415 mul->setPredicate(CC_NOT_P, ipa->getDef(1)); 416 } 417 418 return true; 419} 420 421bool 422GV100LoweringPass::handlePREFLOW(Instruction *i) 423{ 424 return true; 425} 426 427bool 428GV100LoweringPass::handlePRESIN(Instruction *i) 429{ 430 const float f = 1.0 / (2.0 * 3.14159265); 431 bld.mkOp2(OP_MUL, i->dType, i->getDef(0), i->getSrc(0), bld.mkImm(f)); 432 return true; 433} 434 435bool 436GV100LoweringPass::visit(Instruction *i) 437{ 438 bool lowered = false; 439 440 bld.setPosition(i, false); 441 442 switch (i->op) { 443 case OP_BREAK: 444 case OP_CONT: 445 lowered = handleFLOW(i); 446 break; 447 case OP_PREBREAK: 448 case OP_PRECONT: 449 lowered = handlePREFLOW(i); 450 break; 451 case OP_CVT: 452 if (i->src(0).getFile() != FILE_PREDICATE && 453 i->def(0).getFile() != FILE_PREDICATE && 454 !isFloatType(i->dType) && !isFloatType(i->sType)) 455 lowered = handleI2I(i); 456 break; 457 case OP_EXTBF: 458 lowered = handleEXTBF(i); 459 break; 460 case OP_INSBF: 461 lowered = handleINSBF(i); 462 break; 463 case OP_MAX: 464 case OP_MIN: 465 if (i->dType == TYPE_F64) 466 lowered = handleDMNMX(i); 467 break; 468 case OP_PINTERP: 469 lowered = handlePINTERP(i); 470 break; 471 case OP_PRESIN: 472 lowered = handlePRESIN(i); 473 break; 474 default: 475 break; 476 } 477 478 if (lowered) 479 delete_Instruction(prog, i); 480 481 return true; 482} 483 484} // namespace nv50_ir 485