1/* 2 * Copyright 2017 Red Hat Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: Karol Herbst <kherbst@redhat.com> 23 */ 24 25#include "compiler/nir/nir.h" 26 27#include "util/u_debug.h" 28#include "util/u_prim.h" 29 30#include "nv50_ir.h" 31#include "nv50_ir_from_common.h" 32#include "nv50_ir_lowering_helper.h" 33#include "nv50_ir_target.h" 34#include "nv50_ir_util.h" 35#include "tgsi/tgsi_from_mesa.h" 36 37#include <unordered_map> 38#include <cstring> 39#include <list> 40#include <vector> 41 42namespace { 43 44using namespace nv50_ir; 45 46int 47type_size(const struct glsl_type *type, bool bindless) 48{ 49 return glsl_count_attribute_slots(type, false); 50} 51 52static void 53function_temp_type_info(const struct glsl_type *type, unsigned *size, unsigned *align) 54{ 55 assert(glsl_type_is_vector_or_scalar(type)); 56 57 if (glsl_type_is_scalar(type)) { 58 glsl_get_natural_size_align_bytes(type, size, align); 59 } else { 60 unsigned comp_size = glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8; 61 unsigned length = glsl_get_vector_elements(type); 62 63 *size = comp_size * length; 64 *align = 0x10; 65 } 66} 67 68class Converter : public ConverterCommon 69{ 70public: 71 Converter(Program *, nir_shader *, nv50_ir_prog_info *, nv50_ir_prog_info_out *); 72 73 bool run(); 74private: 75 typedef std::vector<LValue*> LValues; 76 typedef std::unordered_map<unsigned, LValues> NirDefMap; 77 typedef std::unordered_map<unsigned, nir_load_const_instr*> ImmediateMap; 78 typedef std::unordered_map<unsigned, BasicBlock*> NirBlockMap; 79 80 CacheMode convert(enum gl_access_qualifier); 81 TexTarget convert(glsl_sampler_dim, bool isArray, bool isShadow); 82 LValues& convert(nir_alu_dest *); 83 BasicBlock* convert(nir_block *); 84 LValues& convert(nir_dest *); 85 SVSemantic convert(nir_intrinsic_op); 86 Value* convert(nir_load_const_instr*, uint8_t); 87 LValues& convert(nir_register *); 88 LValues& convert(nir_ssa_def *); 89 90 Value* getSrc(nir_alu_src *, uint8_t component = 0); 91 Value* getSrc(nir_register *, uint8_t); 92 Value* getSrc(nir_src *, uint8_t, bool indirect = false); 93 Value* getSrc(nir_ssa_def *, uint8_t); 94 95 // returned value is the constant part of the given source (either the 96 // nir_src or the selected source component of an intrinsic). Even though 97 // this is mostly an optimization to be able to skip indirects in a few 98 // cases, sometimes we require immediate values or set some fileds on 99 // instructions (e.g. tex) in order for codegen to consume those. 100 // If the found value has not a constant part, the Value gets returned 101 // through the Value parameter. 102 uint32_t getIndirect(nir_src *, uint8_t, Value *&); 103 // isScalar indicates that the addressing is scalar, vec4 addressing is 104 // assumed otherwise 105 uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value *&, 106 bool isScalar = false); 107 108 uint32_t getSlotAddress(nir_intrinsic_instr *, uint8_t idx, uint8_t slot); 109 110 void setInterpolate(nv50_ir_varying *, 111 uint8_t, 112 bool centroid, 113 unsigned semantics); 114 115 Instruction *loadFrom(DataFile, uint8_t, DataType, Value *def, uint32_t base, 116 uint8_t c, Value *indirect0 = NULL, 117 Value *indirect1 = NULL, bool patch = false); 118 void storeTo(nir_intrinsic_instr *, DataFile, operation, DataType, 119 Value *src, uint8_t idx, uint8_t c, Value *indirect0 = NULL, 120 Value *indirect1 = NULL); 121 122 bool isFloatType(nir_alu_type); 123 bool isSignedType(nir_alu_type); 124 bool isResultFloat(nir_op); 125 bool isResultSigned(nir_op); 126 127 DataType getDType(nir_alu_instr *); 128 DataType getDType(nir_intrinsic_instr *); 129 DataType getDType(nir_op, uint8_t); 130 131 DataFile getFile(nir_intrinsic_op); 132 133 std::vector<DataType> getSTypes(nir_alu_instr *); 134 DataType getSType(nir_src &, bool isFloat, bool isSigned); 135 136 operation getOperation(nir_intrinsic_op); 137 operation getOperation(nir_op); 138 operation getOperation(nir_texop); 139 operation preOperationNeeded(nir_op); 140 141 int getSubOp(nir_intrinsic_op); 142 int getSubOp(nir_op); 143 144 CondCode getCondCode(nir_op); 145 146 bool assignSlots(); 147 bool parseNIR(); 148 149 bool visit(nir_alu_instr *); 150 bool visit(nir_block *); 151 bool visit(nir_cf_node *); 152 bool visit(nir_function *); 153 bool visit(nir_if *); 154 bool visit(nir_instr *); 155 bool visit(nir_intrinsic_instr *); 156 bool visit(nir_jump_instr *); 157 bool visit(nir_load_const_instr*); 158 bool visit(nir_loop *); 159 bool visit(nir_ssa_undef_instr *); 160 bool visit(nir_tex_instr *); 161 162 // tex stuff 163 unsigned int getNIRArgCount(TexInstruction::Target&); 164 165 nir_shader *nir; 166 167 NirDefMap ssaDefs; 168 NirDefMap regDefs; 169 ImmediateMap immediates; 170 NirBlockMap blocks; 171 unsigned int curLoopDepth; 172 unsigned int curIfDepth; 173 174 BasicBlock *exit; 175 Value *zero; 176 Instruction *immInsertPos; 177 178 int clipVertexOutput; 179 180 union { 181 struct { 182 Value *position; 183 } fp; 184 }; 185}; 186 187Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info, 188 nv50_ir_prog_info_out *info_out) 189 : ConverterCommon(prog, info, info_out), 190 nir(nir), 191 curLoopDepth(0), 192 curIfDepth(0), 193 exit(NULL), 194 immInsertPos(NULL), 195 clipVertexOutput(-1) 196{ 197 zero = mkImm((uint32_t)0); 198} 199 200BasicBlock * 201Converter::convert(nir_block *block) 202{ 203 NirBlockMap::iterator it = blocks.find(block->index); 204 if (it != blocks.end()) 205 return it->second; 206 207 BasicBlock *bb = new BasicBlock(func); 208 blocks[block->index] = bb; 209 return bb; 210} 211 212bool 213Converter::isFloatType(nir_alu_type type) 214{ 215 return nir_alu_type_get_base_type(type) == nir_type_float; 216} 217 218bool 219Converter::isSignedType(nir_alu_type type) 220{ 221 return nir_alu_type_get_base_type(type) == nir_type_int; 222} 223 224bool 225Converter::isResultFloat(nir_op op) 226{ 227 const nir_op_info &info = nir_op_infos[op]; 228 if (info.output_type != nir_type_invalid) 229 return isFloatType(info.output_type); 230 231 ERROR("isResultFloat not implemented for %s\n", nir_op_infos[op].name); 232 assert(false); 233 return true; 234} 235 236bool 237Converter::isResultSigned(nir_op op) 238{ 239 switch (op) { 240 // there is no umul and we get wrong results if we treat all muls as signed 241 case nir_op_imul: 242 case nir_op_inot: 243 return false; 244 default: 245 const nir_op_info &info = nir_op_infos[op]; 246 if (info.output_type != nir_type_invalid) 247 return isSignedType(info.output_type); 248 ERROR("isResultSigned not implemented for %s\n", nir_op_infos[op].name); 249 assert(false); 250 return true; 251 } 252} 253 254DataType 255Converter::getDType(nir_alu_instr *insn) 256{ 257 if (insn->dest.dest.is_ssa) 258 return getDType(insn->op, insn->dest.dest.ssa.bit_size); 259 else 260 return getDType(insn->op, insn->dest.dest.reg.reg->bit_size); 261} 262 263DataType 264Converter::getDType(nir_intrinsic_instr *insn) 265{ 266 bool isFloat, isSigned; 267 switch (insn->intrinsic) { 268 case nir_intrinsic_bindless_image_atomic_fadd: 269 case nir_intrinsic_global_atomic_fadd: 270 case nir_intrinsic_image_atomic_fadd: 271 case nir_intrinsic_shared_atomic_fadd: 272 case nir_intrinsic_ssbo_atomic_fadd: 273 isFloat = true; 274 isSigned = false; 275 break; 276 case nir_intrinsic_shared_atomic_imax: 277 case nir_intrinsic_shared_atomic_imin: 278 case nir_intrinsic_ssbo_atomic_imax: 279 case nir_intrinsic_ssbo_atomic_imin: 280 isFloat = false; 281 isSigned = true; 282 break; 283 default: 284 isFloat = false; 285 isSigned = false; 286 break; 287 } 288 289 if (insn->dest.is_ssa) 290 return typeOfSize(insn->dest.ssa.bit_size / 8, isFloat, isSigned); 291 else 292 return typeOfSize(insn->dest.reg.reg->bit_size / 8, isFloat, isSigned); 293} 294 295DataType 296Converter::getDType(nir_op op, uint8_t bitSize) 297{ 298 DataType ty = typeOfSize(bitSize / 8, isResultFloat(op), isResultSigned(op)); 299 if (ty == TYPE_NONE) { 300 ERROR("couldn't get Type for op %s with bitSize %u\n", nir_op_infos[op].name, bitSize); 301 assert(false); 302 } 303 return ty; 304} 305 306std::vector<DataType> 307Converter::getSTypes(nir_alu_instr *insn) 308{ 309 const nir_op_info &info = nir_op_infos[insn->op]; 310 std::vector<DataType> res(info.num_inputs); 311 312 for (uint8_t i = 0; i < info.num_inputs; ++i) { 313 if (info.input_types[i] != nir_type_invalid) { 314 res[i] = getSType(insn->src[i].src, isFloatType(info.input_types[i]), isSignedType(info.input_types[i])); 315 } else { 316 ERROR("getSType not implemented for %s idx %u\n", info.name, i); 317 assert(false); 318 res[i] = TYPE_NONE; 319 break; 320 } 321 } 322 323 return res; 324} 325 326DataType 327Converter::getSType(nir_src &src, bool isFloat, bool isSigned) 328{ 329 uint8_t bitSize; 330 if (src.is_ssa) 331 bitSize = src.ssa->bit_size; 332 else 333 bitSize = src.reg.reg->bit_size; 334 335 DataType ty = typeOfSize(bitSize / 8, isFloat, isSigned); 336 if (ty == TYPE_NONE) { 337 const char *str; 338 if (isFloat) 339 str = "float"; 340 else if (isSigned) 341 str = "int"; 342 else 343 str = "uint"; 344 ERROR("couldn't get Type for %s with bitSize %u\n", str, bitSize); 345 assert(false); 346 } 347 return ty; 348} 349 350DataFile 351Converter::getFile(nir_intrinsic_op op) 352{ 353 switch (op) { 354 case nir_intrinsic_load_global: 355 case nir_intrinsic_store_global: 356 case nir_intrinsic_load_global_constant: 357 return FILE_MEMORY_GLOBAL; 358 case nir_intrinsic_load_scratch: 359 case nir_intrinsic_store_scratch: 360 return FILE_MEMORY_LOCAL; 361 case nir_intrinsic_load_shared: 362 case nir_intrinsic_store_shared: 363 return FILE_MEMORY_SHARED; 364 case nir_intrinsic_load_kernel_input: 365 return FILE_SHADER_INPUT; 366 default: 367 ERROR("couldn't get DateFile for op %s\n", nir_intrinsic_infos[op].name); 368 assert(false); 369 } 370 return FILE_NULL; 371} 372 373operation 374Converter::getOperation(nir_op op) 375{ 376 switch (op) { 377 // basic ops with float and int variants 378 case nir_op_fabs: 379 case nir_op_iabs: 380 return OP_ABS; 381 case nir_op_fadd: 382 case nir_op_iadd: 383 return OP_ADD; 384 case nir_op_iand: 385 return OP_AND; 386 case nir_op_ifind_msb: 387 case nir_op_ufind_msb: 388 return OP_BFIND; 389 case nir_op_fceil: 390 return OP_CEIL; 391 case nir_op_fcos: 392 return OP_COS; 393 case nir_op_f2f32: 394 case nir_op_f2f64: 395 case nir_op_f2i32: 396 case nir_op_f2i64: 397 case nir_op_f2u32: 398 case nir_op_f2u64: 399 case nir_op_i2f32: 400 case nir_op_i2f64: 401 case nir_op_i2i32: 402 case nir_op_i2i64: 403 case nir_op_u2f32: 404 case nir_op_u2f64: 405 case nir_op_u2u32: 406 case nir_op_u2u64: 407 return OP_CVT; 408 case nir_op_fddx: 409 case nir_op_fddx_coarse: 410 case nir_op_fddx_fine: 411 return OP_DFDX; 412 case nir_op_fddy: 413 case nir_op_fddy_coarse: 414 case nir_op_fddy_fine: 415 return OP_DFDY; 416 case nir_op_fdiv: 417 case nir_op_idiv: 418 case nir_op_udiv: 419 return OP_DIV; 420 case nir_op_fexp2: 421 return OP_EX2; 422 case nir_op_ffloor: 423 return OP_FLOOR; 424 case nir_op_ffma: 425 /* No FMA op pre-nvc0 */ 426 if (info->target < 0xc0) 427 return OP_MAD; 428 return OP_FMA; 429 case nir_op_flog2: 430 return OP_LG2; 431 case nir_op_fmax: 432 case nir_op_imax: 433 case nir_op_umax: 434 return OP_MAX; 435 case nir_op_pack_64_2x32_split: 436 return OP_MERGE; 437 case nir_op_fmin: 438 case nir_op_imin: 439 case nir_op_umin: 440 return OP_MIN; 441 case nir_op_fmod: 442 case nir_op_imod: 443 case nir_op_umod: 444 case nir_op_frem: 445 case nir_op_irem: 446 return OP_MOD; 447 case nir_op_fmul: 448 case nir_op_imul: 449 case nir_op_imul_high: 450 case nir_op_umul_high: 451 return OP_MUL; 452 case nir_op_fneg: 453 case nir_op_ineg: 454 return OP_NEG; 455 case nir_op_inot: 456 return OP_NOT; 457 case nir_op_ior: 458 return OP_OR; 459 case nir_op_fpow: 460 return OP_POW; 461 case nir_op_frcp: 462 return OP_RCP; 463 case nir_op_frsq: 464 return OP_RSQ; 465 case nir_op_fsat: 466 return OP_SAT; 467 case nir_op_feq32: 468 case nir_op_ieq32: 469 case nir_op_fge32: 470 case nir_op_ige32: 471 case nir_op_uge32: 472 case nir_op_flt32: 473 case nir_op_ilt32: 474 case nir_op_ult32: 475 case nir_op_fneu32: 476 case nir_op_ine32: 477 return OP_SET; 478 case nir_op_ishl: 479 return OP_SHL; 480 case nir_op_ishr: 481 case nir_op_ushr: 482 return OP_SHR; 483 case nir_op_fsin: 484 return OP_SIN; 485 case nir_op_fsqrt: 486 return OP_SQRT; 487 case nir_op_ftrunc: 488 return OP_TRUNC; 489 case nir_op_ixor: 490 return OP_XOR; 491 default: 492 ERROR("couldn't get operation for op %s\n", nir_op_infos[op].name); 493 assert(false); 494 return OP_NOP; 495 } 496} 497 498operation 499Converter::getOperation(nir_texop op) 500{ 501 switch (op) { 502 case nir_texop_tex: 503 return OP_TEX; 504 case nir_texop_lod: 505 return OP_TXLQ; 506 case nir_texop_txb: 507 return OP_TXB; 508 case nir_texop_txd: 509 return OP_TXD; 510 case nir_texop_txf: 511 case nir_texop_txf_ms: 512 return OP_TXF; 513 case nir_texop_tg4: 514 return OP_TXG; 515 case nir_texop_txl: 516 return OP_TXL; 517 case nir_texop_query_levels: 518 case nir_texop_texture_samples: 519 case nir_texop_txs: 520 return OP_TXQ; 521 default: 522 ERROR("couldn't get operation for nir_texop %u\n", op); 523 assert(false); 524 return OP_NOP; 525 } 526} 527 528operation 529Converter::getOperation(nir_intrinsic_op op) 530{ 531 switch (op) { 532 case nir_intrinsic_emit_vertex: 533 return OP_EMIT; 534 case nir_intrinsic_end_primitive: 535 return OP_RESTART; 536 case nir_intrinsic_bindless_image_atomic_add: 537 case nir_intrinsic_image_atomic_add: 538 case nir_intrinsic_bindless_image_atomic_and: 539 case nir_intrinsic_image_atomic_and: 540 case nir_intrinsic_bindless_image_atomic_comp_swap: 541 case nir_intrinsic_image_atomic_comp_swap: 542 case nir_intrinsic_bindless_image_atomic_exchange: 543 case nir_intrinsic_image_atomic_exchange: 544 case nir_intrinsic_bindless_image_atomic_imax: 545 case nir_intrinsic_image_atomic_imax: 546 case nir_intrinsic_bindless_image_atomic_umax: 547 case nir_intrinsic_image_atomic_umax: 548 case nir_intrinsic_bindless_image_atomic_imin: 549 case nir_intrinsic_image_atomic_imin: 550 case nir_intrinsic_bindless_image_atomic_umin: 551 case nir_intrinsic_image_atomic_umin: 552 case nir_intrinsic_bindless_image_atomic_or: 553 case nir_intrinsic_image_atomic_or: 554 case nir_intrinsic_bindless_image_atomic_xor: 555 case nir_intrinsic_image_atomic_xor: 556 case nir_intrinsic_bindless_image_atomic_inc_wrap: 557 case nir_intrinsic_image_atomic_inc_wrap: 558 case nir_intrinsic_bindless_image_atomic_dec_wrap: 559 case nir_intrinsic_image_atomic_dec_wrap: 560 return OP_SUREDP; 561 case nir_intrinsic_bindless_image_load: 562 case nir_intrinsic_image_load: 563 return OP_SULDP; 564 case nir_intrinsic_bindless_image_samples: 565 case nir_intrinsic_image_samples: 566 case nir_intrinsic_bindless_image_size: 567 case nir_intrinsic_image_size: 568 return OP_SUQ; 569 case nir_intrinsic_bindless_image_store: 570 case nir_intrinsic_image_store: 571 return OP_SUSTP; 572 default: 573 ERROR("couldn't get operation for nir_intrinsic_op %u\n", op); 574 assert(false); 575 return OP_NOP; 576 } 577} 578 579operation 580Converter::preOperationNeeded(nir_op op) 581{ 582 switch (op) { 583 case nir_op_fcos: 584 case nir_op_fsin: 585 return OP_PRESIN; 586 default: 587 return OP_NOP; 588 } 589} 590 591int 592Converter::getSubOp(nir_op op) 593{ 594 switch (op) { 595 case nir_op_imul_high: 596 case nir_op_umul_high: 597 return NV50_IR_SUBOP_MUL_HIGH; 598 case nir_op_ishl: 599 case nir_op_ishr: 600 case nir_op_ushr: 601 return NV50_IR_SUBOP_SHIFT_WRAP; 602 default: 603 return 0; 604 } 605} 606 607int 608Converter::getSubOp(nir_intrinsic_op op) 609{ 610 switch (op) { 611 case nir_intrinsic_bindless_image_atomic_add: 612 case nir_intrinsic_global_atomic_add: 613 case nir_intrinsic_image_atomic_add: 614 case nir_intrinsic_shared_atomic_add: 615 case nir_intrinsic_ssbo_atomic_add: 616 return NV50_IR_SUBOP_ATOM_ADD; 617 case nir_intrinsic_bindless_image_atomic_fadd: 618 case nir_intrinsic_global_atomic_fadd: 619 case nir_intrinsic_image_atomic_fadd: 620 case nir_intrinsic_shared_atomic_fadd: 621 case nir_intrinsic_ssbo_atomic_fadd: 622 return NV50_IR_SUBOP_ATOM_ADD; 623 case nir_intrinsic_bindless_image_atomic_and: 624 case nir_intrinsic_global_atomic_and: 625 case nir_intrinsic_image_atomic_and: 626 case nir_intrinsic_shared_atomic_and: 627 case nir_intrinsic_ssbo_atomic_and: 628 return NV50_IR_SUBOP_ATOM_AND; 629 case nir_intrinsic_bindless_image_atomic_comp_swap: 630 case nir_intrinsic_global_atomic_comp_swap: 631 case nir_intrinsic_image_atomic_comp_swap: 632 case nir_intrinsic_shared_atomic_comp_swap: 633 case nir_intrinsic_ssbo_atomic_comp_swap: 634 return NV50_IR_SUBOP_ATOM_CAS; 635 case nir_intrinsic_bindless_image_atomic_exchange: 636 case nir_intrinsic_global_atomic_exchange: 637 case nir_intrinsic_image_atomic_exchange: 638 case nir_intrinsic_shared_atomic_exchange: 639 case nir_intrinsic_ssbo_atomic_exchange: 640 return NV50_IR_SUBOP_ATOM_EXCH; 641 case nir_intrinsic_bindless_image_atomic_or: 642 case nir_intrinsic_global_atomic_or: 643 case nir_intrinsic_image_atomic_or: 644 case nir_intrinsic_shared_atomic_or: 645 case nir_intrinsic_ssbo_atomic_or: 646 return NV50_IR_SUBOP_ATOM_OR; 647 case nir_intrinsic_bindless_image_atomic_imax: 648 case nir_intrinsic_bindless_image_atomic_umax: 649 case nir_intrinsic_global_atomic_imax: 650 case nir_intrinsic_global_atomic_umax: 651 case nir_intrinsic_image_atomic_imax: 652 case nir_intrinsic_image_atomic_umax: 653 case nir_intrinsic_shared_atomic_imax: 654 case nir_intrinsic_shared_atomic_umax: 655 case nir_intrinsic_ssbo_atomic_imax: 656 case nir_intrinsic_ssbo_atomic_umax: 657 return NV50_IR_SUBOP_ATOM_MAX; 658 case nir_intrinsic_bindless_image_atomic_imin: 659 case nir_intrinsic_bindless_image_atomic_umin: 660 case nir_intrinsic_global_atomic_imin: 661 case nir_intrinsic_global_atomic_umin: 662 case nir_intrinsic_image_atomic_imin: 663 case nir_intrinsic_image_atomic_umin: 664 case nir_intrinsic_shared_atomic_imin: 665 case nir_intrinsic_shared_atomic_umin: 666 case nir_intrinsic_ssbo_atomic_imin: 667 case nir_intrinsic_ssbo_atomic_umin: 668 return NV50_IR_SUBOP_ATOM_MIN; 669 case nir_intrinsic_bindless_image_atomic_xor: 670 case nir_intrinsic_global_atomic_xor: 671 case nir_intrinsic_image_atomic_xor: 672 case nir_intrinsic_shared_atomic_xor: 673 case nir_intrinsic_ssbo_atomic_xor: 674 return NV50_IR_SUBOP_ATOM_XOR; 675 case nir_intrinsic_bindless_image_atomic_inc_wrap: 676 case nir_intrinsic_image_atomic_inc_wrap: 677 return NV50_IR_SUBOP_ATOM_INC; 678 case nir_intrinsic_bindless_image_atomic_dec_wrap: 679 case nir_intrinsic_image_atomic_dec_wrap: 680 return NV50_IR_SUBOP_ATOM_DEC; 681 682 case nir_intrinsic_group_memory_barrier: 683 case nir_intrinsic_memory_barrier: 684 case nir_intrinsic_memory_barrier_buffer: 685 case nir_intrinsic_memory_barrier_image: 686 return NV50_IR_SUBOP_MEMBAR(M, GL); 687 case nir_intrinsic_memory_barrier_shared: 688 return NV50_IR_SUBOP_MEMBAR(M, CTA); 689 690 case nir_intrinsic_vote_all: 691 return NV50_IR_SUBOP_VOTE_ALL; 692 case nir_intrinsic_vote_any: 693 return NV50_IR_SUBOP_VOTE_ANY; 694 case nir_intrinsic_vote_ieq: 695 return NV50_IR_SUBOP_VOTE_UNI; 696 default: 697 return 0; 698 } 699} 700 701CondCode 702Converter::getCondCode(nir_op op) 703{ 704 switch (op) { 705 case nir_op_feq32: 706 case nir_op_ieq32: 707 return CC_EQ; 708 case nir_op_fge32: 709 case nir_op_ige32: 710 case nir_op_uge32: 711 return CC_GE; 712 case nir_op_flt32: 713 case nir_op_ilt32: 714 case nir_op_ult32: 715 return CC_LT; 716 case nir_op_fneu32: 717 return CC_NEU; 718 case nir_op_ine32: 719 return CC_NE; 720 default: 721 ERROR("couldn't get CondCode for op %s\n", nir_op_infos[op].name); 722 assert(false); 723 return CC_FL; 724 } 725} 726 727Converter::LValues& 728Converter::convert(nir_alu_dest *dest) 729{ 730 return convert(&dest->dest); 731} 732 733Converter::LValues& 734Converter::convert(nir_dest *dest) 735{ 736 if (dest->is_ssa) 737 return convert(&dest->ssa); 738 if (dest->reg.indirect) { 739 ERROR("no support for indirects."); 740 assert(false); 741 } 742 return convert(dest->reg.reg); 743} 744 745Converter::LValues& 746Converter::convert(nir_register *reg) 747{ 748 assert(!reg->num_array_elems); 749 750 NirDefMap::iterator it = regDefs.find(reg->index); 751 if (it != regDefs.end()) 752 return it->second; 753 754 LValues newDef(reg->num_components); 755 for (uint8_t i = 0; i < reg->num_components; i++) 756 newDef[i] = getScratch(std::max(4, reg->bit_size / 8)); 757 return regDefs[reg->index] = newDef; 758} 759 760Converter::LValues& 761Converter::convert(nir_ssa_def *def) 762{ 763 NirDefMap::iterator it = ssaDefs.find(def->index); 764 if (it != ssaDefs.end()) 765 return it->second; 766 767 LValues newDef(def->num_components); 768 for (uint8_t i = 0; i < def->num_components; i++) 769 newDef[i] = getSSA(std::max(4, def->bit_size / 8)); 770 return ssaDefs[def->index] = newDef; 771} 772 773Value* 774Converter::getSrc(nir_alu_src *src, uint8_t component) 775{ 776 if (src->abs || src->negate) { 777 ERROR("modifiers currently not supported on nir_alu_src\n"); 778 assert(false); 779 } 780 return getSrc(&src->src, src->swizzle[component]); 781} 782 783Value* 784Converter::getSrc(nir_register *reg, uint8_t idx) 785{ 786 NirDefMap::iterator it = regDefs.find(reg->index); 787 if (it == regDefs.end()) 788 return convert(reg)[idx]; 789 return it->second[idx]; 790} 791 792Value* 793Converter::getSrc(nir_src *src, uint8_t idx, bool indirect) 794{ 795 if (src->is_ssa) 796 return getSrc(src->ssa, idx); 797 798 if (src->reg.indirect) { 799 if (indirect) 800 return getSrc(src->reg.indirect, idx); 801 ERROR("no support for indirects."); 802 assert(false); 803 return NULL; 804 } 805 806 return getSrc(src->reg.reg, idx); 807} 808 809Value* 810Converter::getSrc(nir_ssa_def *src, uint8_t idx) 811{ 812 ImmediateMap::iterator iit = immediates.find(src->index); 813 if (iit != immediates.end()) 814 return convert((*iit).second, idx); 815 816 NirDefMap::iterator it = ssaDefs.find(src->index); 817 if (it == ssaDefs.end()) { 818 ERROR("SSA value %u not found\n", src->index); 819 assert(false); 820 return NULL; 821 } 822 return it->second[idx]; 823} 824 825uint32_t 826Converter::getIndirect(nir_src *src, uint8_t idx, Value *&indirect) 827{ 828 nir_const_value *offset = nir_src_as_const_value(*src); 829 830 if (offset) { 831 indirect = NULL; 832 return offset[0].u32; 833 } 834 835 indirect = getSrc(src, idx, true); 836 return 0; 837} 838 839uint32_t 840Converter::getIndirect(nir_intrinsic_instr *insn, uint8_t s, uint8_t c, Value *&indirect, bool isScalar) 841{ 842 int32_t idx = nir_intrinsic_base(insn) + getIndirect(&insn->src[s], c, indirect); 843 844 if (indirect && !isScalar) 845 indirect = mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), indirect, loadImm(NULL, 4)); 846 return idx; 847} 848 849static void 850vert_attrib_to_tgsi_semantic(gl_vert_attrib slot, unsigned *name, unsigned *index) 851{ 852 assert(name && index); 853 854 if (slot >= VERT_ATTRIB_MAX) { 855 ERROR("invalid varying slot %u\n", slot); 856 assert(false); 857 return; 858 } 859 860 if (slot >= VERT_ATTRIB_GENERIC0 && 861 slot < VERT_ATTRIB_GENERIC0 + VERT_ATTRIB_GENERIC_MAX) { 862 *name = TGSI_SEMANTIC_GENERIC; 863 *index = slot - VERT_ATTRIB_GENERIC0; 864 return; 865 } 866 867 if (slot >= VERT_ATTRIB_TEX0 && 868 slot < VERT_ATTRIB_TEX0 + VERT_ATTRIB_TEX_MAX) { 869 *name = TGSI_SEMANTIC_TEXCOORD; 870 *index = slot - VERT_ATTRIB_TEX0; 871 return; 872 } 873 874 switch (slot) { 875 case VERT_ATTRIB_COLOR0: 876 *name = TGSI_SEMANTIC_COLOR; 877 *index = 0; 878 break; 879 case VERT_ATTRIB_COLOR1: 880 *name = TGSI_SEMANTIC_COLOR; 881 *index = 1; 882 break; 883 case VERT_ATTRIB_EDGEFLAG: 884 *name = TGSI_SEMANTIC_EDGEFLAG; 885 *index = 0; 886 break; 887 case VERT_ATTRIB_FOG: 888 *name = TGSI_SEMANTIC_FOG; 889 *index = 0; 890 break; 891 case VERT_ATTRIB_NORMAL: 892 *name = TGSI_SEMANTIC_NORMAL; 893 *index = 0; 894 break; 895 case VERT_ATTRIB_POS: 896 *name = TGSI_SEMANTIC_POSITION; 897 *index = 0; 898 break; 899 case VERT_ATTRIB_POINT_SIZE: 900 *name = TGSI_SEMANTIC_PSIZE; 901 *index = 0; 902 break; 903 default: 904 ERROR("unknown vert attrib slot %u\n", slot); 905 assert(false); 906 break; 907 } 908} 909 910void 911Converter::setInterpolate(nv50_ir_varying *var, 912 uint8_t mode, 913 bool centroid, 914 unsigned semantic) 915{ 916 switch (mode) { 917 case INTERP_MODE_FLAT: 918 var->flat = 1; 919 break; 920 case INTERP_MODE_NONE: 921 if (semantic == TGSI_SEMANTIC_COLOR) 922 var->sc = 1; 923 else if (semantic == TGSI_SEMANTIC_POSITION) 924 var->linear = 1; 925 break; 926 case INTERP_MODE_NOPERSPECTIVE: 927 var->linear = 1; 928 break; 929 case INTERP_MODE_SMOOTH: 930 break; 931 } 932 var->centroid = centroid; 933} 934 935static uint16_t 936calcSlots(const glsl_type *type, Program::Type stage, const shader_info &info, 937 bool input, const nir_variable *var) 938{ 939 if (!type->is_array()) 940 return type->count_attribute_slots(false); 941 942 uint16_t slots; 943 switch (stage) { 944 case Program::TYPE_GEOMETRY: 945 slots = type->count_attribute_slots(false); 946 if (input) 947 slots /= info.gs.vertices_in; 948 break; 949 case Program::TYPE_TESSELLATION_CONTROL: 950 case Program::TYPE_TESSELLATION_EVAL: 951 // remove first dimension 952 if (var->data.patch || (!input && stage == Program::TYPE_TESSELLATION_EVAL)) 953 slots = type->count_attribute_slots(false); 954 else 955 slots = type->fields.array->count_attribute_slots(false); 956 break; 957 default: 958 slots = type->count_attribute_slots(false); 959 break; 960 } 961 962 return slots; 963} 964 965static uint8_t 966getMaskForType(const glsl_type *type, uint8_t slot) { 967 uint16_t comp = type->without_array()->components(); 968 comp = comp ? comp : 4; 969 970 if (glsl_base_type_is_64bit(type->without_array()->base_type)) { 971 comp *= 2; 972 if (comp > 4) { 973 if (slot % 2) 974 comp -= 4; 975 else 976 comp = 4; 977 } 978 } 979 980 return (1 << comp) - 1; 981} 982 983bool Converter::assignSlots() { 984 unsigned name; 985 unsigned index; 986 987 info->io.viewportId = -1; 988 info->io.mul_zero_wins = nir->info.use_legacy_math_rules; 989 info_out->numInputs = 0; 990 info_out->numOutputs = 0; 991 info_out->numSysVals = 0; 992 993 uint8_t i; 994 BITSET_FOREACH_SET(i, nir->info.system_values_read, SYSTEM_VALUE_MAX) { 995 info_out->sv[info_out->numSysVals].sn = tgsi_get_sysval_semantic(i); 996 info_out->sv[info_out->numSysVals].si = 0; 997 info_out->sv[info_out->numSysVals].input = 0; 998 999 switch (i) { 1000 case SYSTEM_VALUE_VERTEX_ID: 1001 info_out->sv[info_out->numSysVals].input = 1; 1002 info_out->io.vertexId = info_out->numSysVals; 1003 break; 1004 case SYSTEM_VALUE_INSTANCE_ID: 1005 info_out->sv[info_out->numSysVals].input = 1; 1006 info_out->io.instanceId = info_out->numSysVals; 1007 break; 1008 case SYSTEM_VALUE_TESS_LEVEL_INNER: 1009 case SYSTEM_VALUE_TESS_LEVEL_OUTER: 1010 info_out->sv[info_out->numSysVals].patch = 1; 1011 break; 1012 default: 1013 break; 1014 } 1015 1016 info_out->numSysVals += 1; 1017 } 1018 1019 if (prog->getType() == Program::TYPE_COMPUTE) 1020 return true; 1021 1022 nir_foreach_shader_in_variable(var, nir) { 1023 const glsl_type *type = var->type; 1024 int slot = var->data.location; 1025 uint16_t slots = calcSlots(type, prog->getType(), nir->info, true, var); 1026 uint32_t vary = var->data.driver_location; 1027 assert(vary + slots <= NV50_CODEGEN_MAX_VARYINGS); 1028 1029 switch(prog->getType()) { 1030 case Program::TYPE_FRAGMENT: 1031 tgsi_get_gl_varying_semantic((gl_varying_slot)slot, true, 1032 &name, &index); 1033 for (uint16_t i = 0; i < slots; ++i) { 1034 setInterpolate(&info_out->in[vary + i], var->data.interpolation, 1035 var->data.centroid | var->data.sample, name); 1036 } 1037 break; 1038 case Program::TYPE_GEOMETRY: 1039 tgsi_get_gl_varying_semantic((gl_varying_slot)slot, true, 1040 &name, &index); 1041 break; 1042 case Program::TYPE_TESSELLATION_CONTROL: 1043 case Program::TYPE_TESSELLATION_EVAL: 1044 tgsi_get_gl_varying_semantic((gl_varying_slot)slot, true, 1045 &name, &index); 1046 if (var->data.patch && name == TGSI_SEMANTIC_PATCH) 1047 info_out->numPatchConstants = MAX2(info_out->numPatchConstants, index + slots); 1048 break; 1049 case Program::TYPE_VERTEX: 1050 if (slot >= VERT_ATTRIB_GENERIC0 && slot < VERT_ATTRIB_GENERIC0 + VERT_ATTRIB_GENERIC_MAX) 1051 slot = VERT_ATTRIB_GENERIC0 + vary; 1052 vert_attrib_to_tgsi_semantic((gl_vert_attrib)slot, &name, &index); 1053 switch (name) { 1054 case TGSI_SEMANTIC_EDGEFLAG: 1055 info_out->io.edgeFlagIn = vary; 1056 break; 1057 default: 1058 break; 1059 } 1060 break; 1061 default: 1062 ERROR("unknown shader type %u in assignSlots\n", prog->getType()); 1063 return false; 1064 } 1065 1066 for (uint16_t i = 0u; i < slots; ++i, ++vary) { 1067 nv50_ir_varying *v = &info_out->in[vary]; 1068 1069 v->patch = var->data.patch; 1070 v->sn = name; 1071 v->si = index + i; 1072 v->mask |= getMaskForType(type, i) << var->data.location_frac; 1073 } 1074 info_out->numInputs = std::max<uint8_t>(info_out->numInputs, vary); 1075 } 1076 1077 nir_foreach_shader_out_variable(var, nir) { 1078 const glsl_type *type = var->type; 1079 int slot = var->data.location; 1080 uint16_t slots = calcSlots(type, prog->getType(), nir->info, false, var); 1081 uint32_t vary = var->data.driver_location; 1082 1083 assert(vary < NV50_CODEGEN_MAX_VARYINGS); 1084 1085 switch(prog->getType()) { 1086 case Program::TYPE_FRAGMENT: 1087 tgsi_get_gl_frag_result_semantic((gl_frag_result)slot, &name, &index); 1088 switch (name) { 1089 case TGSI_SEMANTIC_COLOR: 1090 if (!var->data.fb_fetch_output) 1091 info_out->prop.fp.numColourResults++; 1092 if (var->data.location == FRAG_RESULT_COLOR && 1093 nir->info.outputs_written & BITFIELD64_BIT(var->data.location)) 1094 info_out->prop.fp.separateFragData = true; 1095 // sometimes we get FRAG_RESULT_DATAX with data.index 0 1096 // sometimes we get FRAG_RESULT_DATA0 with data.index X 1097 index = index == 0 ? var->data.index : index; 1098 break; 1099 case TGSI_SEMANTIC_POSITION: 1100 info_out->io.fragDepth = vary; 1101 info_out->prop.fp.writesDepth = true; 1102 break; 1103 case TGSI_SEMANTIC_SAMPLEMASK: 1104 info_out->io.sampleMask = vary; 1105 break; 1106 default: 1107 break; 1108 } 1109 break; 1110 case Program::TYPE_GEOMETRY: 1111 case Program::TYPE_TESSELLATION_CONTROL: 1112 case Program::TYPE_TESSELLATION_EVAL: 1113 case Program::TYPE_VERTEX: 1114 tgsi_get_gl_varying_semantic((gl_varying_slot)slot, true, 1115 &name, &index); 1116 1117 if (var->data.patch && name != TGSI_SEMANTIC_TESSINNER && 1118 name != TGSI_SEMANTIC_TESSOUTER) 1119 info_out->numPatchConstants = MAX2(info_out->numPatchConstants, index + slots); 1120 1121 switch (name) { 1122 case TGSI_SEMANTIC_CLIPDIST: 1123 info_out->io.genUserClip = -1; 1124 break; 1125 case TGSI_SEMANTIC_CLIPVERTEX: 1126 clipVertexOutput = vary; 1127 break; 1128 case TGSI_SEMANTIC_EDGEFLAG: 1129 info_out->io.edgeFlagOut = vary; 1130 break; 1131 case TGSI_SEMANTIC_POSITION: 1132 if (clipVertexOutput < 0) 1133 clipVertexOutput = vary; 1134 break; 1135 default: 1136 break; 1137 } 1138 break; 1139 default: 1140 ERROR("unknown shader type %u in assignSlots\n", prog->getType()); 1141 return false; 1142 } 1143 1144 for (uint16_t i = 0u; i < slots; ++i, ++vary) { 1145 nv50_ir_varying *v = &info_out->out[vary]; 1146 v->patch = var->data.patch; 1147 v->sn = name; 1148 v->si = index + i; 1149 v->mask |= getMaskForType(type, i) << var->data.location_frac; 1150 1151 if (nir->info.outputs_read & 1ull << slot) 1152 v->oread = 1; 1153 } 1154 info_out->numOutputs = std::max<uint8_t>(info_out->numOutputs, vary); 1155 } 1156 1157 if (info_out->io.genUserClip > 0) { 1158 info_out->io.clipDistances = info_out->io.genUserClip; 1159 1160 const unsigned int nOut = (info_out->io.genUserClip + 3) / 4; 1161 1162 for (unsigned int n = 0; n < nOut; ++n) { 1163 unsigned int i = info_out->numOutputs++; 1164 info_out->out[i].id = i; 1165 info_out->out[i].sn = TGSI_SEMANTIC_CLIPDIST; 1166 info_out->out[i].si = n; 1167 info_out->out[i].mask = ((1 << info_out->io.clipDistances) - 1) >> (n * 4); 1168 } 1169 } 1170 1171 return info->assignSlots(info_out) == 0; 1172} 1173 1174uint32_t 1175Converter::getSlotAddress(nir_intrinsic_instr *insn, uint8_t idx, uint8_t slot) 1176{ 1177 DataType ty; 1178 int offset = nir_intrinsic_component(insn); 1179 bool input; 1180 1181 if (nir_intrinsic_infos[insn->intrinsic].has_dest) 1182 ty = getDType(insn); 1183 else 1184 ty = getSType(insn->src[0], false, false); 1185 1186 switch (insn->intrinsic) { 1187 case nir_intrinsic_load_input: 1188 case nir_intrinsic_load_interpolated_input: 1189 case nir_intrinsic_load_per_vertex_input: 1190 input = true; 1191 break; 1192 case nir_intrinsic_load_output: 1193 case nir_intrinsic_load_per_vertex_output: 1194 case nir_intrinsic_store_output: 1195 case nir_intrinsic_store_per_vertex_output: 1196 input = false; 1197 break; 1198 default: 1199 ERROR("unknown intrinsic in getSlotAddress %s", 1200 nir_intrinsic_infos[insn->intrinsic].name); 1201 input = false; 1202 assert(false); 1203 break; 1204 } 1205 1206 if (typeSizeof(ty) == 8) { 1207 slot *= 2; 1208 slot += offset; 1209 if (slot >= 4) { 1210 idx += 1; 1211 slot -= 4; 1212 } 1213 } else { 1214 slot += offset; 1215 } 1216 1217 assert(slot < 4); 1218 assert(!input || idx < NV50_CODEGEN_MAX_VARYINGS); 1219 assert(input || idx < NV50_CODEGEN_MAX_VARYINGS); 1220 1221 const nv50_ir_varying *vary = input ? info_out->in : info_out->out; 1222 return vary[idx].slot[slot] * 4; 1223} 1224 1225Instruction * 1226Converter::loadFrom(DataFile file, uint8_t i, DataType ty, Value *def, 1227 uint32_t base, uint8_t c, Value *indirect0, 1228 Value *indirect1, bool patch) 1229{ 1230 unsigned int tySize = typeSizeof(ty); 1231 1232 if (tySize == 8 && 1233 (indirect0 || !prog->getTarget()->isAccessSupported(file, TYPE_U64))) { 1234 Value *lo = getSSA(); 1235 Value *hi = getSSA(); 1236 1237 Instruction *loi = 1238 mkLoad(TYPE_U32, lo, 1239 mkSymbol(file, i, TYPE_U32, base + c * tySize), 1240 indirect0); 1241 loi->setIndirect(0, 1, indirect1); 1242 loi->perPatch = patch; 1243 1244 Instruction *hii = 1245 mkLoad(TYPE_U32, hi, 1246 mkSymbol(file, i, TYPE_U32, base + c * tySize + 4), 1247 indirect0); 1248 hii->setIndirect(0, 1, indirect1); 1249 hii->perPatch = patch; 1250 1251 return mkOp2(OP_MERGE, ty, def, lo, hi); 1252 } else { 1253 Instruction *ld = 1254 mkLoad(ty, def, mkSymbol(file, i, ty, base + c * tySize), indirect0); 1255 ld->setIndirect(0, 1, indirect1); 1256 ld->perPatch = patch; 1257 return ld; 1258 } 1259} 1260 1261void 1262Converter::storeTo(nir_intrinsic_instr *insn, DataFile file, operation op, 1263 DataType ty, Value *src, uint8_t idx, uint8_t c, 1264 Value *indirect0, Value *indirect1) 1265{ 1266 uint8_t size = typeSizeof(ty); 1267 uint32_t address = getSlotAddress(insn, idx, c); 1268 1269 if (size == 8 && indirect0) { 1270 Value *split[2]; 1271 mkSplit(split, 4, src); 1272 1273 if (op == OP_EXPORT) { 1274 split[0] = mkMov(getSSA(), split[0], ty)->getDef(0); 1275 split[1] = mkMov(getSSA(), split[1], ty)->getDef(0); 1276 } 1277 1278 mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address), indirect0, 1279 split[0])->perPatch = info_out->out[idx].patch; 1280 mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address + 4), indirect0, 1281 split[1])->perPatch = info_out->out[idx].patch; 1282 } else { 1283 if (op == OP_EXPORT) 1284 src = mkMov(getSSA(size), src, ty)->getDef(0); 1285 mkStore(op, ty, mkSymbol(file, 0, ty, address), indirect0, 1286 src)->perPatch = info_out->out[idx].patch; 1287 } 1288} 1289 1290bool 1291Converter::parseNIR() 1292{ 1293 info_out->bin.tlsSpace = nir->scratch_size; 1294 info_out->io.clipDistances = nir->info.clip_distance_array_size; 1295 info_out->io.cullDistances = nir->info.cull_distance_array_size; 1296 info_out->io.layer_viewport_relative = nir->info.layer_viewport_relative; 1297 1298 switch(prog->getType()) { 1299 case Program::TYPE_COMPUTE: 1300 info->prop.cp.numThreads[0] = nir->info.workgroup_size[0]; 1301 info->prop.cp.numThreads[1] = nir->info.workgroup_size[1]; 1302 info->prop.cp.numThreads[2] = nir->info.workgroup_size[2]; 1303 info_out->bin.smemSize = std::max(info_out->bin.smemSize, nir->info.shared_size); 1304 1305 if (info->target < NVISA_GF100_CHIPSET) { 1306 int gmemSlot = 0; 1307 1308 for (unsigned i = 0; i < nir->info.num_ssbos; i++) { 1309 info_out->prop.cp.gmem[gmemSlot++] = {.valid = 1, .image = 0, .slot = i}; 1310 assert(gmemSlot < 16); 1311 } 1312 nir_foreach_image_variable(var, nir) { 1313 int image_count = glsl_type_get_image_count(var->type); 1314 for (int i = 0; i < image_count; i++) { 1315 info_out->prop.cp.gmem[gmemSlot++] = {.valid = 1, .image = 1, .slot = var->data.binding + i}; 1316 assert(gmemSlot < 16); 1317 } 1318 } 1319 } 1320 1321 break; 1322 case Program::TYPE_FRAGMENT: 1323 info_out->prop.fp.earlyFragTests = nir->info.fs.early_fragment_tests; 1324 prog->persampleInvocation = 1325 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID) || 1326 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_POS); 1327 info_out->prop.fp.postDepthCoverage = nir->info.fs.post_depth_coverage; 1328 info_out->prop.fp.readsSampleLocations = 1329 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_POS); 1330 info_out->prop.fp.usesDiscard = nir->info.fs.uses_discard || nir->info.fs.uses_demote; 1331 info_out->prop.fp.usesSampleMaskIn = 1332 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN); 1333 break; 1334 case Program::TYPE_GEOMETRY: 1335 info_out->prop.gp.instanceCount = nir->info.gs.invocations; 1336 info_out->prop.gp.maxVertices = nir->info.gs.vertices_out; 1337 info_out->prop.gp.outputPrim = nir->info.gs.output_primitive; 1338 break; 1339 case Program::TYPE_TESSELLATION_CONTROL: 1340 case Program::TYPE_TESSELLATION_EVAL: 1341 info_out->prop.tp.domain = u_tess_prim_from_shader(nir->info.tess._primitive_mode); 1342 info_out->prop.tp.outputPatchSize = nir->info.tess.tcs_vertices_out; 1343 info_out->prop.tp.outputPrim = 1344 nir->info.tess.point_mode ? PIPE_PRIM_POINTS : PIPE_PRIM_TRIANGLES; 1345 info_out->prop.tp.partitioning = (nir->info.tess.spacing + 1) % 3; 1346 info_out->prop.tp.winding = !nir->info.tess.ccw; 1347 break; 1348 case Program::TYPE_VERTEX: 1349 info_out->prop.vp.usesDrawParameters = 1350 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX) || 1351 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BASE_INSTANCE) || 1352 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_DRAW_ID); 1353 break; 1354 default: 1355 break; 1356 } 1357 1358 return true; 1359} 1360 1361bool 1362Converter::visit(nir_function *function) 1363{ 1364 assert(function->impl); 1365 1366 // usually the blocks will set everything up, but main is special 1367 BasicBlock *entry = new BasicBlock(prog->main); 1368 exit = new BasicBlock(prog->main); 1369 blocks[nir_start_block(function->impl)->index] = entry; 1370 prog->main->setEntry(entry); 1371 prog->main->setExit(exit); 1372 1373 setPosition(entry, true); 1374 1375 if (info_out->io.genUserClip > 0) { 1376 for (int c = 0; c < 4; ++c) 1377 clipVtx[c] = getScratch(); 1378 } 1379 1380 switch (prog->getType()) { 1381 case Program::TYPE_TESSELLATION_CONTROL: 1382 outBase = mkOp2v( 1383 OP_SUB, TYPE_U32, getSSA(), 1384 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)), 1385 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0))); 1386 break; 1387 case Program::TYPE_FRAGMENT: { 1388 Symbol *sv = mkSysVal(SV_POSITION, 3); 1389 fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv); 1390 fp.position = mkOp1v(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]); 1391 break; 1392 } 1393 default: 1394 break; 1395 } 1396 1397 nir_index_ssa_defs(function->impl); 1398 foreach_list_typed(nir_cf_node, node, node, &function->impl->body) { 1399 if (!visit(node)) 1400 return false; 1401 } 1402 1403 bb->cfg.attach(&exit->cfg, Graph::Edge::TREE); 1404 setPosition(exit, true); 1405 1406 if ((prog->getType() == Program::TYPE_VERTEX || 1407 prog->getType() == Program::TYPE_TESSELLATION_EVAL) 1408 && info_out->io.genUserClip > 0) 1409 handleUserClipPlanes(); 1410 1411 // TODO: for non main function this needs to be a OP_RETURN 1412 mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1; 1413 return true; 1414} 1415 1416bool 1417Converter::visit(nir_cf_node *node) 1418{ 1419 switch (node->type) { 1420 case nir_cf_node_block: 1421 return visit(nir_cf_node_as_block(node)); 1422 case nir_cf_node_if: 1423 return visit(nir_cf_node_as_if(node)); 1424 case nir_cf_node_loop: 1425 return visit(nir_cf_node_as_loop(node)); 1426 default: 1427 ERROR("unknown nir_cf_node type %u\n", node->type); 1428 return false; 1429 } 1430} 1431 1432bool 1433Converter::visit(nir_block *block) 1434{ 1435 if (!block->predecessors->entries && block->instr_list.is_empty()) 1436 return true; 1437 1438 BasicBlock *bb = convert(block); 1439 1440 setPosition(bb, true); 1441 nir_foreach_instr(insn, block) { 1442 if (!visit(insn)) 1443 return false; 1444 } 1445 return true; 1446} 1447 1448bool 1449Converter::visit(nir_if *nif) 1450{ 1451 curIfDepth++; 1452 1453 DataType sType = getSType(nif->condition, false, false); 1454 Value *src = getSrc(&nif->condition, 0); 1455 1456 nir_block *lastThen = nir_if_last_then_block(nif); 1457 nir_block *lastElse = nir_if_last_else_block(nif); 1458 1459 BasicBlock *headBB = bb; 1460 BasicBlock *ifBB = convert(nir_if_first_then_block(nif)); 1461 BasicBlock *elseBB = convert(nir_if_first_else_block(nif)); 1462 1463 bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE); 1464 bb->cfg.attach(&elseBB->cfg, Graph::Edge::TREE); 1465 1466 bool insertJoins = lastThen->successors[0] == lastElse->successors[0]; 1467 mkFlow(OP_BRA, elseBB, CC_EQ, src)->setType(sType); 1468 1469 foreach_list_typed(nir_cf_node, node, node, &nif->then_list) { 1470 if (!visit(node)) 1471 return false; 1472 } 1473 1474 setPosition(convert(lastThen), true); 1475 if (!bb->isTerminated()) { 1476 BasicBlock *tailBB = convert(lastThen->successors[0]); 1477 mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL); 1478 bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD); 1479 } else { 1480 insertJoins = insertJoins && bb->getExit()->op == OP_BRA; 1481 } 1482 1483 foreach_list_typed(nir_cf_node, node, node, &nif->else_list) { 1484 if (!visit(node)) 1485 return false; 1486 } 1487 1488 setPosition(convert(lastElse), true); 1489 if (!bb->isTerminated()) { 1490 BasicBlock *tailBB = convert(lastElse->successors[0]); 1491 mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL); 1492 bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD); 1493 } else { 1494 insertJoins = insertJoins && bb->getExit()->op == OP_BRA; 1495 } 1496 1497 if (curIfDepth > 6) { 1498 insertJoins = false; 1499 } 1500 1501 /* we made sure that all threads would converge at the same block */ 1502 if (insertJoins) { 1503 BasicBlock *conv = convert(lastThen->successors[0]); 1504 setPosition(headBB->getExit(), false); 1505 headBB->joinAt = mkFlow(OP_JOINAT, conv, CC_ALWAYS, NULL); 1506 setPosition(conv, false); 1507 mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1; 1508 } 1509 1510 curIfDepth--; 1511 1512 return true; 1513} 1514 1515// TODO: add convergency 1516bool 1517Converter::visit(nir_loop *loop) 1518{ 1519 curLoopDepth += 1; 1520 func->loopNestingBound = std::max(func->loopNestingBound, curLoopDepth); 1521 1522 BasicBlock *loopBB = convert(nir_loop_first_block(loop)); 1523 BasicBlock *tailBB = convert(nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node))); 1524 1525 bb->cfg.attach(&loopBB->cfg, Graph::Edge::TREE); 1526 1527 mkFlow(OP_PREBREAK, tailBB, CC_ALWAYS, NULL); 1528 setPosition(loopBB, false); 1529 mkFlow(OP_PRECONT, loopBB, CC_ALWAYS, NULL); 1530 1531 foreach_list_typed(nir_cf_node, node, node, &loop->body) { 1532 if (!visit(node)) 1533 return false; 1534 } 1535 1536 if (!bb->isTerminated()) { 1537 mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL); 1538 bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK); 1539 } 1540 1541 if (tailBB->cfg.incidentCount() == 0) 1542 loopBB->cfg.attach(&tailBB->cfg, Graph::Edge::TREE); 1543 1544 curLoopDepth -= 1; 1545 1546 info_out->loops++; 1547 1548 return true; 1549} 1550 1551bool 1552Converter::visit(nir_instr *insn) 1553{ 1554 // we need an insertion point for on the fly generated immediate loads 1555 immInsertPos = bb->getExit(); 1556 switch (insn->type) { 1557 case nir_instr_type_alu: 1558 return visit(nir_instr_as_alu(insn)); 1559 case nir_instr_type_intrinsic: 1560 return visit(nir_instr_as_intrinsic(insn)); 1561 case nir_instr_type_jump: 1562 return visit(nir_instr_as_jump(insn)); 1563 case nir_instr_type_load_const: 1564 return visit(nir_instr_as_load_const(insn)); 1565 case nir_instr_type_ssa_undef: 1566 return visit(nir_instr_as_ssa_undef(insn)); 1567 case nir_instr_type_tex: 1568 return visit(nir_instr_as_tex(insn)); 1569 default: 1570 ERROR("unknown nir_instr type %u\n", insn->type); 1571 return false; 1572 } 1573 return true; 1574} 1575 1576SVSemantic 1577Converter::convert(nir_intrinsic_op intr) 1578{ 1579 switch (intr) { 1580 case nir_intrinsic_load_base_vertex: 1581 return SV_BASEVERTEX; 1582 case nir_intrinsic_load_base_instance: 1583 return SV_BASEINSTANCE; 1584 case nir_intrinsic_load_draw_id: 1585 return SV_DRAWID; 1586 case nir_intrinsic_load_front_face: 1587 return SV_FACE; 1588 case nir_intrinsic_is_helper_invocation: 1589 case nir_intrinsic_load_helper_invocation: 1590 return SV_THREAD_KILL; 1591 case nir_intrinsic_load_instance_id: 1592 return SV_INSTANCE_ID; 1593 case nir_intrinsic_load_invocation_id: 1594 return SV_INVOCATION_ID; 1595 case nir_intrinsic_load_workgroup_size: 1596 return SV_NTID; 1597 case nir_intrinsic_load_local_invocation_id: 1598 return SV_TID; 1599 case nir_intrinsic_load_num_workgroups: 1600 return SV_NCTAID; 1601 case nir_intrinsic_load_patch_vertices_in: 1602 return SV_VERTEX_COUNT; 1603 case nir_intrinsic_load_primitive_id: 1604 return SV_PRIMITIVE_ID; 1605 case nir_intrinsic_load_sample_id: 1606 return SV_SAMPLE_INDEX; 1607 case nir_intrinsic_load_sample_mask_in: 1608 return SV_SAMPLE_MASK; 1609 case nir_intrinsic_load_sample_pos: 1610 return SV_SAMPLE_POS; 1611 case nir_intrinsic_load_subgroup_eq_mask: 1612 return SV_LANEMASK_EQ; 1613 case nir_intrinsic_load_subgroup_ge_mask: 1614 return SV_LANEMASK_GE; 1615 case nir_intrinsic_load_subgroup_gt_mask: 1616 return SV_LANEMASK_GT; 1617 case nir_intrinsic_load_subgroup_le_mask: 1618 return SV_LANEMASK_LE; 1619 case nir_intrinsic_load_subgroup_lt_mask: 1620 return SV_LANEMASK_LT; 1621 case nir_intrinsic_load_subgroup_invocation: 1622 return SV_LANEID; 1623 case nir_intrinsic_load_tess_coord: 1624 return SV_TESS_COORD; 1625 case nir_intrinsic_load_tess_level_inner: 1626 return SV_TESS_INNER; 1627 case nir_intrinsic_load_tess_level_outer: 1628 return SV_TESS_OUTER; 1629 case nir_intrinsic_load_vertex_id: 1630 return SV_VERTEX_ID; 1631 case nir_intrinsic_load_workgroup_id: 1632 return SV_CTAID; 1633 case nir_intrinsic_load_work_dim: 1634 return SV_WORK_DIM; 1635 default: 1636 ERROR("unknown SVSemantic for nir_intrinsic_op %s\n", 1637 nir_intrinsic_infos[intr].name); 1638 assert(false); 1639 return SV_LAST; 1640 } 1641} 1642 1643bool 1644Converter::visit(nir_intrinsic_instr *insn) 1645{ 1646 nir_intrinsic_op op = insn->intrinsic; 1647 const nir_intrinsic_info &opInfo = nir_intrinsic_infos[op]; 1648 unsigned dest_components = nir_intrinsic_dest_components(insn); 1649 1650 switch (op) { 1651 case nir_intrinsic_load_uniform: { 1652 LValues &newDefs = convert(&insn->dest); 1653 const DataType dType = getDType(insn); 1654 Value *indirect; 1655 uint32_t coffset = getIndirect(insn, 0, 0, indirect); 1656 for (uint8_t i = 0; i < dest_components; ++i) { 1657 loadFrom(FILE_MEMORY_CONST, 0, dType, newDefs[i], 16 * coffset, i, indirect); 1658 } 1659 break; 1660 } 1661 case nir_intrinsic_store_output: 1662 case nir_intrinsic_store_per_vertex_output: { 1663 Value *indirect; 1664 DataType dType = getSType(insn->src[0], false, false); 1665 uint32_t idx = getIndirect(insn, op == nir_intrinsic_store_output ? 1 : 2, 0, indirect); 1666 1667 for (uint8_t i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) { 1668 if (!((1u << i) & nir_intrinsic_write_mask(insn))) 1669 continue; 1670 1671 uint8_t offset = 0; 1672 Value *src = getSrc(&insn->src[0], i); 1673 switch (prog->getType()) { 1674 case Program::TYPE_FRAGMENT: { 1675 if (info_out->out[idx].sn == TGSI_SEMANTIC_POSITION) { 1676 // TGSI uses a different interface than NIR, TGSI stores that 1677 // value in the z component, NIR in X 1678 offset += 2; 1679 src = mkOp1v(OP_SAT, TYPE_F32, getScratch(), src); 1680 } 1681 break; 1682 } 1683 case Program::TYPE_GEOMETRY: 1684 case Program::TYPE_TESSELLATION_EVAL: 1685 case Program::TYPE_VERTEX: { 1686 if (info_out->io.genUserClip > 0 && idx == (uint32_t)clipVertexOutput) { 1687 mkMov(clipVtx[i], src); 1688 src = clipVtx[i]; 1689 } 1690 break; 1691 } 1692 default: 1693 break; 1694 } 1695 1696 storeTo(insn, FILE_SHADER_OUTPUT, OP_EXPORT, dType, src, idx, i + offset, indirect); 1697 } 1698 break; 1699 } 1700 case nir_intrinsic_load_input: 1701 case nir_intrinsic_load_interpolated_input: 1702 case nir_intrinsic_load_output: { 1703 LValues &newDefs = convert(&insn->dest); 1704 1705 // FBFetch 1706 if (prog->getType() == Program::TYPE_FRAGMENT && 1707 op == nir_intrinsic_load_output) { 1708 std::vector<Value*> defs, srcs; 1709 uint8_t mask = 0; 1710 1711 srcs.push_back(getSSA()); 1712 srcs.push_back(getSSA()); 1713 Value *x = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 0)); 1714 Value *y = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 1)); 1715 mkCvt(OP_CVT, TYPE_U32, srcs[0], TYPE_F32, x)->rnd = ROUND_Z; 1716 mkCvt(OP_CVT, TYPE_U32, srcs[1], TYPE_F32, y)->rnd = ROUND_Z; 1717 1718 srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LAYER, 0))); 1719 srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_SAMPLE_INDEX, 0))); 1720 1721 for (uint8_t i = 0u; i < dest_components; ++i) { 1722 defs.push_back(newDefs[i]); 1723 mask |= 1 << i; 1724 } 1725 1726 TexInstruction *texi = mkTex(OP_TXF, TEX_TARGET_2D_MS_ARRAY, 0, 0, defs, srcs); 1727 texi->tex.levelZero = true; 1728 texi->tex.mask = mask; 1729 texi->tex.useOffsets = 0; 1730 texi->tex.r = 0xffff; 1731 texi->tex.s = 0xffff; 1732 1733 info_out->prop.fp.readsFramebuffer = true; 1734 break; 1735 } 1736 1737 const DataType dType = getDType(insn); 1738 Value *indirect; 1739 bool input = op != nir_intrinsic_load_output; 1740 operation nvirOp; 1741 uint32_t mode = 0; 1742 1743 uint32_t idx = getIndirect(insn, op == nir_intrinsic_load_interpolated_input ? 1 : 0, 0, indirect); 1744 nv50_ir_varying& vary = input ? info_out->in[idx] : info_out->out[idx]; 1745 1746 // see load_barycentric_* handling 1747 if (prog->getType() == Program::TYPE_FRAGMENT) { 1748 if (op == nir_intrinsic_load_interpolated_input) { 1749 ImmediateValue immMode; 1750 if (getSrc(&insn->src[0], 1)->getUniqueInsn()->src(0).getImmediate(immMode)) 1751 mode = immMode.reg.data.u32; 1752 } 1753 if (mode == NV50_IR_INTERP_DEFAULT) 1754 mode |= translateInterpMode(&vary, nvirOp); 1755 else { 1756 if (vary.linear) { 1757 nvirOp = OP_LINTERP; 1758 mode |= NV50_IR_INTERP_LINEAR; 1759 } else { 1760 nvirOp = OP_PINTERP; 1761 mode |= NV50_IR_INTERP_PERSPECTIVE; 1762 } 1763 } 1764 } 1765 1766 for (uint8_t i = 0u; i < dest_components; ++i) { 1767 uint32_t address = getSlotAddress(insn, idx, i); 1768 Symbol *sym = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address); 1769 if (prog->getType() == Program::TYPE_FRAGMENT) { 1770 int s = 1; 1771 if (typeSizeof(dType) == 8) { 1772 Value *lo = getSSA(); 1773 Value *hi = getSSA(); 1774 Instruction *interp; 1775 1776 interp = mkOp1(nvirOp, TYPE_U32, lo, sym); 1777 if (nvirOp == OP_PINTERP) 1778 interp->setSrc(s++, fp.position); 1779 if (mode & NV50_IR_INTERP_OFFSET) 1780 interp->setSrc(s++, getSrc(&insn->src[0], 0)); 1781 interp->setInterpolate(mode); 1782 interp->setIndirect(0, 0, indirect); 1783 1784 Symbol *sym1 = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address + 4); 1785 interp = mkOp1(nvirOp, TYPE_U32, hi, sym1); 1786 if (nvirOp == OP_PINTERP) 1787 interp->setSrc(s++, fp.position); 1788 if (mode & NV50_IR_INTERP_OFFSET) 1789 interp->setSrc(s++, getSrc(&insn->src[0], 0)); 1790 interp->setInterpolate(mode); 1791 interp->setIndirect(0, 0, indirect); 1792 1793 mkOp2(OP_MERGE, dType, newDefs[i], lo, hi); 1794 } else { 1795 Instruction *interp = mkOp1(nvirOp, dType, newDefs[i], sym); 1796 if (nvirOp == OP_PINTERP) 1797 interp->setSrc(s++, fp.position); 1798 if (mode & NV50_IR_INTERP_OFFSET) 1799 interp->setSrc(s++, getSrc(&insn->src[0], 0)); 1800 interp->setInterpolate(mode); 1801 interp->setIndirect(0, 0, indirect); 1802 } 1803 } else { 1804 mkLoad(dType, newDefs[i], sym, indirect)->perPatch = vary.patch; 1805 } 1806 } 1807 break; 1808 } 1809 case nir_intrinsic_load_barycentric_at_offset: 1810 case nir_intrinsic_load_barycentric_at_sample: 1811 case nir_intrinsic_load_barycentric_centroid: 1812 case nir_intrinsic_load_barycentric_pixel: 1813 case nir_intrinsic_load_barycentric_sample: { 1814 LValues &newDefs = convert(&insn->dest); 1815 uint32_t mode; 1816 1817 if (op == nir_intrinsic_load_barycentric_centroid || 1818 op == nir_intrinsic_load_barycentric_sample) { 1819 mode = NV50_IR_INTERP_CENTROID; 1820 } else if (op == nir_intrinsic_load_barycentric_at_offset) { 1821 Value *offs[2]; 1822 for (uint8_t c = 0; c < 2; c++) { 1823 offs[c] = getScratch(); 1824 mkOp2(OP_MIN, TYPE_F32, offs[c], getSrc(&insn->src[0], c), loadImm(NULL, 0.4375f)); 1825 mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f)); 1826 mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f)); 1827 mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]); 1828 } 1829 mkOp3v(OP_INSBF, TYPE_U32, newDefs[0], offs[1], mkImm(0x1010), offs[0]); 1830 1831 mode = NV50_IR_INTERP_OFFSET; 1832 } else if (op == nir_intrinsic_load_barycentric_pixel) { 1833 mode = NV50_IR_INTERP_DEFAULT; 1834 } else if (op == nir_intrinsic_load_barycentric_at_sample) { 1835 info_out->prop.fp.readsSampleLocations = true; 1836 Value *sample = getSSA(); 1837 mkOp3(OP_SELP, TYPE_U32, sample, mkImm(0), getSrc(&insn->src[0], 0), mkImm(0)) 1838 ->subOp = 2; 1839 mkOp1(OP_PIXLD, TYPE_U32, newDefs[0], sample)->subOp = NV50_IR_SUBOP_PIXLD_OFFSET; 1840 mode = NV50_IR_INTERP_OFFSET; 1841 } else { 1842 unreachable("all intrinsics already handled above"); 1843 } 1844 1845 loadImm(newDefs[1], mode); 1846 break; 1847 } 1848 case nir_intrinsic_demote: 1849 case nir_intrinsic_discard: 1850 mkOp(OP_DISCARD, TYPE_NONE, NULL); 1851 break; 1852 case nir_intrinsic_demote_if: 1853 case nir_intrinsic_discard_if: { 1854 Value *pred = getSSA(1, FILE_PREDICATE); 1855 if (insn->num_components > 1) { 1856 ERROR("nir_intrinsic_discard_if only with 1 component supported!\n"); 1857 assert(false); 1858 return false; 1859 } 1860 mkCmp(OP_SET, CC_NE, TYPE_U8, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero); 1861 mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, pred); 1862 break; 1863 } 1864 case nir_intrinsic_load_base_vertex: 1865 case nir_intrinsic_load_base_instance: 1866 case nir_intrinsic_load_draw_id: 1867 case nir_intrinsic_load_front_face: 1868 case nir_intrinsic_is_helper_invocation: 1869 case nir_intrinsic_load_helper_invocation: 1870 case nir_intrinsic_load_instance_id: 1871 case nir_intrinsic_load_invocation_id: 1872 case nir_intrinsic_load_workgroup_size: 1873 case nir_intrinsic_load_local_invocation_id: 1874 case nir_intrinsic_load_num_workgroups: 1875 case nir_intrinsic_load_patch_vertices_in: 1876 case nir_intrinsic_load_primitive_id: 1877 case nir_intrinsic_load_sample_id: 1878 case nir_intrinsic_load_sample_mask_in: 1879 case nir_intrinsic_load_sample_pos: 1880 case nir_intrinsic_load_subgroup_eq_mask: 1881 case nir_intrinsic_load_subgroup_ge_mask: 1882 case nir_intrinsic_load_subgroup_gt_mask: 1883 case nir_intrinsic_load_subgroup_le_mask: 1884 case nir_intrinsic_load_subgroup_lt_mask: 1885 case nir_intrinsic_load_subgroup_invocation: 1886 case nir_intrinsic_load_tess_coord: 1887 case nir_intrinsic_load_tess_level_inner: 1888 case nir_intrinsic_load_tess_level_outer: 1889 case nir_intrinsic_load_vertex_id: 1890 case nir_intrinsic_load_workgroup_id: 1891 case nir_intrinsic_load_work_dim: { 1892 const DataType dType = getDType(insn); 1893 SVSemantic sv = convert(op); 1894 LValues &newDefs = convert(&insn->dest); 1895 1896 for (uint8_t i = 0u; i < nir_intrinsic_dest_components(insn); ++i) { 1897 Value *def; 1898 if (typeSizeof(dType) == 8) 1899 def = getSSA(); 1900 else 1901 def = newDefs[i]; 1902 1903 if (sv == SV_TID && info->prop.cp.numThreads[i] == 1) { 1904 loadImm(def, 0u); 1905 } else { 1906 Symbol *sym = mkSysVal(sv, i); 1907 Instruction *rdsv = mkOp1(OP_RDSV, TYPE_U32, def, sym); 1908 if (sv == SV_TESS_OUTER || sv == SV_TESS_INNER) 1909 rdsv->perPatch = 1; 1910 } 1911 1912 if (typeSizeof(dType) == 8) 1913 mkOp2(OP_MERGE, dType, newDefs[i], def, loadImm(getSSA(), 0u)); 1914 } 1915 break; 1916 } 1917 // constants 1918 case nir_intrinsic_load_subgroup_size: { 1919 LValues &newDefs = convert(&insn->dest); 1920 loadImm(newDefs[0], 32u); 1921 break; 1922 } 1923 case nir_intrinsic_vote_all: 1924 case nir_intrinsic_vote_any: 1925 case nir_intrinsic_vote_ieq: { 1926 LValues &newDefs = convert(&insn->dest); 1927 Value *pred = getScratch(1, FILE_PREDICATE); 1928 mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero); 1929 mkOp1(OP_VOTE, TYPE_U32, pred, pred)->subOp = getSubOp(op); 1930 mkCvt(OP_CVT, TYPE_U32, newDefs[0], TYPE_U8, pred); 1931 break; 1932 } 1933 case nir_intrinsic_ballot: { 1934 LValues &newDefs = convert(&insn->dest); 1935 Value *pred = getSSA(1, FILE_PREDICATE); 1936 mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero); 1937 mkOp1(OP_VOTE, TYPE_U32, newDefs[0], pred)->subOp = NV50_IR_SUBOP_VOTE_ANY; 1938 break; 1939 } 1940 case nir_intrinsic_read_first_invocation: 1941 case nir_intrinsic_read_invocation: { 1942 LValues &newDefs = convert(&insn->dest); 1943 const DataType dType = getDType(insn); 1944 Value *tmp = getScratch(); 1945 1946 if (op == nir_intrinsic_read_first_invocation) { 1947 mkOp1(OP_VOTE, TYPE_U32, tmp, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY; 1948 mkOp1(OP_BREV, TYPE_U32, tmp, tmp); 1949 mkOp1(OP_BFIND, TYPE_U32, tmp, tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT; 1950 } else 1951 tmp = getSrc(&insn->src[1], 0); 1952 1953 for (uint8_t i = 0; i < dest_components; ++i) { 1954 mkOp3(OP_SHFL, dType, newDefs[i], getSrc(&insn->src[0], i), tmp, mkImm(0x1f)) 1955 ->subOp = NV50_IR_SUBOP_SHFL_IDX; 1956 } 1957 break; 1958 } 1959 case nir_intrinsic_load_per_vertex_input: { 1960 const DataType dType = getDType(insn); 1961 LValues &newDefs = convert(&insn->dest); 1962 Value *indirectVertex; 1963 Value *indirectOffset; 1964 uint32_t baseVertex = getIndirect(&insn->src[0], 0, indirectVertex); 1965 uint32_t idx = getIndirect(insn, 1, 0, indirectOffset); 1966 1967 Value *vtxBase = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS), 1968 mkImm(baseVertex), indirectVertex); 1969 for (uint8_t i = 0u; i < dest_components; ++i) { 1970 uint32_t address = getSlotAddress(insn, idx, i); 1971 loadFrom(FILE_SHADER_INPUT, 0, dType, newDefs[i], address, 0, 1972 indirectOffset, vtxBase, info_out->in[idx].patch); 1973 } 1974 break; 1975 } 1976 case nir_intrinsic_load_per_vertex_output: { 1977 const DataType dType = getDType(insn); 1978 LValues &newDefs = convert(&insn->dest); 1979 Value *indirectVertex; 1980 Value *indirectOffset; 1981 uint32_t baseVertex = getIndirect(&insn->src[0], 0, indirectVertex); 1982 uint32_t idx = getIndirect(insn, 1, 0, indirectOffset); 1983 Value *vtxBase = NULL; 1984 1985 if (indirectVertex) 1986 vtxBase = indirectVertex; 1987 else 1988 vtxBase = loadImm(NULL, baseVertex); 1989 1990 vtxBase = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, FILE_ADDRESS), outBase, vtxBase); 1991 1992 for (uint8_t i = 0u; i < dest_components; ++i) { 1993 uint32_t address = getSlotAddress(insn, idx, i); 1994 loadFrom(FILE_SHADER_OUTPUT, 0, dType, newDefs[i], address, 0, 1995 indirectOffset, vtxBase, info_out->in[idx].patch); 1996 } 1997 break; 1998 } 1999 case nir_intrinsic_emit_vertex: { 2000 if (info_out->io.genUserClip > 0) 2001 handleUserClipPlanes(); 2002 uint32_t idx = nir_intrinsic_stream_id(insn); 2003 mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1; 2004 break; 2005 } 2006 case nir_intrinsic_end_primitive: { 2007 uint32_t idx = nir_intrinsic_stream_id(insn); 2008 if (idx) 2009 break; 2010 mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1; 2011 break; 2012 } 2013 case nir_intrinsic_load_ubo: { 2014 const DataType dType = getDType(insn); 2015 LValues &newDefs = convert(&insn->dest); 2016 Value *indirectIndex; 2017 Value *indirectOffset; 2018 uint32_t index = getIndirect(&insn->src[0], 0, indirectIndex) + 1; 2019 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset); 2020 if (indirectOffset) 2021 indirectOffset = mkOp1v(OP_MOV, TYPE_U32, getSSA(4, FILE_ADDRESS), indirectOffset); 2022 2023 for (uint8_t i = 0u; i < dest_components; ++i) { 2024 loadFrom(FILE_MEMORY_CONST, index, dType, newDefs[i], offset, i, 2025 indirectOffset, indirectIndex); 2026 } 2027 break; 2028 } 2029 case nir_intrinsic_get_ssbo_size: { 2030 LValues &newDefs = convert(&insn->dest); 2031 const DataType dType = getDType(insn); 2032 Value *indirectBuffer; 2033 uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer); 2034 2035 Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, 0); 2036 mkOp1(OP_BUFQ, dType, newDefs[0], sym)->setIndirect(0, 0, indirectBuffer); 2037 break; 2038 } 2039 case nir_intrinsic_store_ssbo: { 2040 DataType sType = getSType(insn->src[0], false, false); 2041 Value *indirectBuffer; 2042 Value *indirectOffset; 2043 uint32_t buffer = getIndirect(&insn->src[1], 0, indirectBuffer); 2044 uint32_t offset = getIndirect(&insn->src[2], 0, indirectOffset); 2045 2046 for (uint8_t i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) { 2047 if (!((1u << i) & nir_intrinsic_write_mask(insn))) 2048 continue; 2049 Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, sType, 2050 offset + i * typeSizeof(sType)); 2051 mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(&insn->src[0], i)) 2052 ->setIndirect(0, 1, indirectBuffer); 2053 } 2054 info_out->io.globalAccess |= 0x2; 2055 break; 2056 } 2057 case nir_intrinsic_load_ssbo: { 2058 const DataType dType = getDType(insn); 2059 LValues &newDefs = convert(&insn->dest); 2060 Value *indirectBuffer; 2061 Value *indirectOffset; 2062 uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer); 2063 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset); 2064 2065 for (uint8_t i = 0u; i < dest_components; ++i) 2066 loadFrom(FILE_MEMORY_BUFFER, buffer, dType, newDefs[i], offset, i, 2067 indirectOffset, indirectBuffer); 2068 2069 info_out->io.globalAccess |= 0x1; 2070 break; 2071 } 2072 case nir_intrinsic_shared_atomic_add: 2073 case nir_intrinsic_shared_atomic_fadd: 2074 case nir_intrinsic_shared_atomic_and: 2075 case nir_intrinsic_shared_atomic_comp_swap: 2076 case nir_intrinsic_shared_atomic_exchange: 2077 case nir_intrinsic_shared_atomic_or: 2078 case nir_intrinsic_shared_atomic_imax: 2079 case nir_intrinsic_shared_atomic_imin: 2080 case nir_intrinsic_shared_atomic_umax: 2081 case nir_intrinsic_shared_atomic_umin: 2082 case nir_intrinsic_shared_atomic_xor: { 2083 const DataType dType = getDType(insn); 2084 LValues &newDefs = convert(&insn->dest); 2085 Value *indirectOffset; 2086 uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset); 2087 Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, dType, offset); 2088 Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym, getSrc(&insn->src[1], 0)); 2089 if (op == nir_intrinsic_shared_atomic_comp_swap) 2090 atom->setSrc(2, getSrc(&insn->src[2], 0)); 2091 atom->setIndirect(0, 0, indirectOffset); 2092 atom->subOp = getSubOp(op); 2093 break; 2094 } 2095 case nir_intrinsic_ssbo_atomic_add: 2096 case nir_intrinsic_ssbo_atomic_fadd: 2097 case nir_intrinsic_ssbo_atomic_and: 2098 case nir_intrinsic_ssbo_atomic_comp_swap: 2099 case nir_intrinsic_ssbo_atomic_exchange: 2100 case nir_intrinsic_ssbo_atomic_or: 2101 case nir_intrinsic_ssbo_atomic_imax: 2102 case nir_intrinsic_ssbo_atomic_imin: 2103 case nir_intrinsic_ssbo_atomic_umax: 2104 case nir_intrinsic_ssbo_atomic_umin: 2105 case nir_intrinsic_ssbo_atomic_xor: { 2106 const DataType dType = getDType(insn); 2107 LValues &newDefs = convert(&insn->dest); 2108 Value *indirectBuffer; 2109 Value *indirectOffset; 2110 uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer); 2111 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset); 2112 2113 Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, offset); 2114 Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym, 2115 getSrc(&insn->src[2], 0)); 2116 if (op == nir_intrinsic_ssbo_atomic_comp_swap) 2117 atom->setSrc(2, getSrc(&insn->src[3], 0)); 2118 atom->setIndirect(0, 0, indirectOffset); 2119 atom->setIndirect(0, 1, indirectBuffer); 2120 atom->subOp = getSubOp(op); 2121 2122 info_out->io.globalAccess |= 0x2; 2123 break; 2124 } 2125 case nir_intrinsic_global_atomic_add: 2126 case nir_intrinsic_global_atomic_fadd: 2127 case nir_intrinsic_global_atomic_and: 2128 case nir_intrinsic_global_atomic_comp_swap: 2129 case nir_intrinsic_global_atomic_exchange: 2130 case nir_intrinsic_global_atomic_or: 2131 case nir_intrinsic_global_atomic_imax: 2132 case nir_intrinsic_global_atomic_imin: 2133 case nir_intrinsic_global_atomic_umax: 2134 case nir_intrinsic_global_atomic_umin: 2135 case nir_intrinsic_global_atomic_xor: { 2136 const DataType dType = getDType(insn); 2137 LValues &newDefs = convert(&insn->dest); 2138 Value *address; 2139 uint32_t offset = getIndirect(&insn->src[0], 0, address); 2140 2141 Symbol *sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, dType, offset); 2142 Instruction *atom = 2143 mkOp2(OP_ATOM, dType, newDefs[0], sym, getSrc(&insn->src[1], 0)); 2144 if (op == nir_intrinsic_global_atomic_comp_swap) 2145 atom->setSrc(2, getSrc(&insn->src[2], 0)); 2146 atom->setIndirect(0, 0, address); 2147 atom->subOp = getSubOp(op); 2148 2149 info_out->io.globalAccess |= 0x2; 2150 break; 2151 } 2152 case nir_intrinsic_bindless_image_atomic_add: 2153 case nir_intrinsic_bindless_image_atomic_fadd: 2154 case nir_intrinsic_bindless_image_atomic_and: 2155 case nir_intrinsic_bindless_image_atomic_comp_swap: 2156 case nir_intrinsic_bindless_image_atomic_exchange: 2157 case nir_intrinsic_bindless_image_atomic_imax: 2158 case nir_intrinsic_bindless_image_atomic_umax: 2159 case nir_intrinsic_bindless_image_atomic_imin: 2160 case nir_intrinsic_bindless_image_atomic_umin: 2161 case nir_intrinsic_bindless_image_atomic_or: 2162 case nir_intrinsic_bindless_image_atomic_xor: 2163 case nir_intrinsic_bindless_image_atomic_inc_wrap: 2164 case nir_intrinsic_bindless_image_atomic_dec_wrap: 2165 case nir_intrinsic_bindless_image_load: 2166 case nir_intrinsic_bindless_image_samples: 2167 case nir_intrinsic_bindless_image_size: 2168 case nir_intrinsic_bindless_image_store: 2169 case nir_intrinsic_image_atomic_add: 2170 case nir_intrinsic_image_atomic_fadd: 2171 case nir_intrinsic_image_atomic_and: 2172 case nir_intrinsic_image_atomic_comp_swap: 2173 case nir_intrinsic_image_atomic_exchange: 2174 case nir_intrinsic_image_atomic_imax: 2175 case nir_intrinsic_image_atomic_umax: 2176 case nir_intrinsic_image_atomic_imin: 2177 case nir_intrinsic_image_atomic_umin: 2178 case nir_intrinsic_image_atomic_or: 2179 case nir_intrinsic_image_atomic_xor: 2180 case nir_intrinsic_image_atomic_inc_wrap: 2181 case nir_intrinsic_image_atomic_dec_wrap: 2182 case nir_intrinsic_image_load: 2183 case nir_intrinsic_image_samples: 2184 case nir_intrinsic_image_size: 2185 case nir_intrinsic_image_store: { 2186 std::vector<Value*> srcs, defs; 2187 Value *indirect; 2188 DataType ty; 2189 2190 uint32_t mask = 0; 2191 TexInstruction::Target target = 2192 convert(nir_intrinsic_image_dim(insn), !!nir_intrinsic_image_array(insn), false); 2193 unsigned int argCount = getNIRArgCount(target); 2194 uint16_t location = 0; 2195 2196 if (opInfo.has_dest) { 2197 LValues &newDefs = convert(&insn->dest); 2198 for (uint8_t i = 0u; i < newDefs.size(); ++i) { 2199 defs.push_back(newDefs[i]); 2200 mask |= 1 << i; 2201 } 2202 } 2203 2204 int lod_src = -1; 2205 bool bindless = false; 2206 switch (op) { 2207 case nir_intrinsic_bindless_image_atomic_add: 2208 case nir_intrinsic_bindless_image_atomic_fadd: 2209 case nir_intrinsic_bindless_image_atomic_and: 2210 case nir_intrinsic_bindless_image_atomic_comp_swap: 2211 case nir_intrinsic_bindless_image_atomic_exchange: 2212 case nir_intrinsic_bindless_image_atomic_imax: 2213 case nir_intrinsic_bindless_image_atomic_umax: 2214 case nir_intrinsic_bindless_image_atomic_imin: 2215 case nir_intrinsic_bindless_image_atomic_umin: 2216 case nir_intrinsic_bindless_image_atomic_or: 2217 case nir_intrinsic_bindless_image_atomic_xor: 2218 case nir_intrinsic_bindless_image_atomic_inc_wrap: 2219 case nir_intrinsic_bindless_image_atomic_dec_wrap: 2220 ty = getDType(insn); 2221 bindless = true; 2222 info_out->io.globalAccess |= 0x2; 2223 mask = 0x1; 2224 break; 2225 case nir_intrinsic_image_atomic_add: 2226 case nir_intrinsic_image_atomic_fadd: 2227 case nir_intrinsic_image_atomic_and: 2228 case nir_intrinsic_image_atomic_comp_swap: 2229 case nir_intrinsic_image_atomic_exchange: 2230 case nir_intrinsic_image_atomic_imax: 2231 case nir_intrinsic_image_atomic_umax: 2232 case nir_intrinsic_image_atomic_imin: 2233 case nir_intrinsic_image_atomic_umin: 2234 case nir_intrinsic_image_atomic_or: 2235 case nir_intrinsic_image_atomic_xor: 2236 case nir_intrinsic_image_atomic_inc_wrap: 2237 case nir_intrinsic_image_atomic_dec_wrap: 2238 ty = getDType(insn); 2239 bindless = false; 2240 info_out->io.globalAccess |= 0x2; 2241 mask = 0x1; 2242 break; 2243 case nir_intrinsic_bindless_image_load: 2244 case nir_intrinsic_image_load: 2245 ty = TYPE_U32; 2246 bindless = op == nir_intrinsic_bindless_image_load; 2247 info_out->io.globalAccess |= 0x1; 2248 lod_src = 4; 2249 break; 2250 case nir_intrinsic_bindless_image_store: 2251 case nir_intrinsic_image_store: 2252 ty = TYPE_U32; 2253 bindless = op == nir_intrinsic_bindless_image_store; 2254 info_out->io.globalAccess |= 0x2; 2255 lod_src = 5; 2256 mask = 0xf; 2257 break; 2258 case nir_intrinsic_bindless_image_samples: 2259 mask = 0x8; 2260 FALLTHROUGH; 2261 case nir_intrinsic_image_samples: 2262 ty = TYPE_U32; 2263 bindless = op == nir_intrinsic_bindless_image_samples; 2264 mask = 0x8; 2265 break; 2266 case nir_intrinsic_bindless_image_size: 2267 case nir_intrinsic_image_size: 2268 assert(nir_src_as_uint(insn->src[1]) == 0); 2269 ty = TYPE_U32; 2270 bindless = op == nir_intrinsic_bindless_image_size; 2271 break; 2272 default: 2273 unreachable("unhandled image opcode"); 2274 break; 2275 } 2276 2277 if (bindless) 2278 indirect = getSrc(&insn->src[0], 0); 2279 else 2280 location = getIndirect(&insn->src[0], 0, indirect); 2281 2282 /* Pre-GF100, SSBOs and images are in the same HW file, managed by 2283 * prop.cp.gmem. images are located after SSBOs. 2284 */ 2285 if (info->target < NVISA_GF100_CHIPSET) 2286 location += nir->info.num_ssbos; 2287 2288 // coords 2289 if (opInfo.num_srcs >= 2) 2290 for (unsigned int i = 0u; i < argCount; ++i) 2291 srcs.push_back(getSrc(&insn->src[1], i)); 2292 2293 // the sampler is just another src added after coords 2294 if (opInfo.num_srcs >= 3 && target.isMS()) 2295 srcs.push_back(getSrc(&insn->src[2], 0)); 2296 2297 if (opInfo.num_srcs >= 4 && lod_src != 4) { 2298 unsigned components = opInfo.src_components[3] ? opInfo.src_components[3] : insn->num_components; 2299 for (uint8_t i = 0u; i < components; ++i) 2300 srcs.push_back(getSrc(&insn->src[3], i)); 2301 } 2302 2303 if (opInfo.num_srcs >= 5 && lod_src != 5) 2304 // 1 for aotmic swap 2305 for (uint8_t i = 0u; i < opInfo.src_components[4]; ++i) 2306 srcs.push_back(getSrc(&insn->src[4], i)); 2307 2308 TexInstruction *texi = mkTex(getOperation(op), target.getEnum(), location, 0, defs, srcs); 2309 texi->tex.bindless = bindless; 2310 texi->tex.format = nv50_ir::TexInstruction::translateImgFormat(nir_intrinsic_format(insn)); 2311 texi->tex.mask = mask; 2312 texi->cache = convert(nir_intrinsic_access(insn)); 2313 texi->setType(ty); 2314 texi->subOp = getSubOp(op); 2315 2316 if (indirect) 2317 texi->setIndirectR(indirect); 2318 2319 break; 2320 } 2321 case nir_intrinsic_store_scratch: 2322 case nir_intrinsic_store_shared: { 2323 DataType sType = getSType(insn->src[0], false, false); 2324 Value *indirectOffset; 2325 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset); 2326 if (indirectOffset) 2327 indirectOffset = mkOp1v(OP_MOV, TYPE_U32, getSSA(4, FILE_ADDRESS), indirectOffset); 2328 2329 for (uint8_t i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) { 2330 if (!((1u << i) & nir_intrinsic_write_mask(insn))) 2331 continue; 2332 Symbol *sym = mkSymbol(getFile(op), 0, sType, offset + i * typeSizeof(sType)); 2333 mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(&insn->src[0], i)); 2334 } 2335 break; 2336 } 2337 case nir_intrinsic_load_kernel_input: 2338 case nir_intrinsic_load_scratch: 2339 case nir_intrinsic_load_shared: { 2340 const DataType dType = getDType(insn); 2341 LValues &newDefs = convert(&insn->dest); 2342 Value *indirectOffset; 2343 uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset); 2344 if (indirectOffset) 2345 indirectOffset = mkOp1v(OP_MOV, TYPE_U32, getSSA(4, FILE_ADDRESS), indirectOffset); 2346 2347 for (uint8_t i = 0u; i < dest_components; ++i) 2348 loadFrom(getFile(op), 0, dType, newDefs[i], offset, i, indirectOffset); 2349 2350 break; 2351 } 2352 case nir_intrinsic_control_barrier: { 2353 // TODO: add flag to shader_info 2354 info_out->numBarriers = 1; 2355 Instruction *bar = mkOp2(OP_BAR, TYPE_U32, NULL, mkImm(0), mkImm(0)); 2356 bar->fixed = 1; 2357 bar->subOp = NV50_IR_SUBOP_BAR_SYNC; 2358 break; 2359 } 2360 case nir_intrinsic_group_memory_barrier: 2361 case nir_intrinsic_memory_barrier: 2362 case nir_intrinsic_memory_barrier_buffer: 2363 case nir_intrinsic_memory_barrier_image: 2364 case nir_intrinsic_memory_barrier_shared: { 2365 Instruction *bar = mkOp(OP_MEMBAR, TYPE_NONE, NULL); 2366 bar->fixed = 1; 2367 bar->subOp = getSubOp(op); 2368 break; 2369 } 2370 case nir_intrinsic_memory_barrier_tcs_patch: 2371 break; 2372 case nir_intrinsic_shader_clock: { 2373 const DataType dType = getDType(insn); 2374 LValues &newDefs = convert(&insn->dest); 2375 2376 loadImm(newDefs[0], 0u); 2377 mkOp1(OP_RDSV, dType, newDefs[1], mkSysVal(SV_CLOCK, 0))->fixed = 1; 2378 break; 2379 } 2380 case nir_intrinsic_load_global: 2381 case nir_intrinsic_load_global_constant: { 2382 const DataType dType = getDType(insn); 2383 LValues &newDefs = convert(&insn->dest); 2384 Value *indirectOffset; 2385 uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset); 2386 2387 for (auto i = 0u; i < dest_components; ++i) 2388 loadFrom(FILE_MEMORY_GLOBAL, 0, dType, newDefs[i], offset, i, indirectOffset); 2389 2390 info_out->io.globalAccess |= 0x1; 2391 break; 2392 } 2393 case nir_intrinsic_store_global: { 2394 DataType sType = getSType(insn->src[0], false, false); 2395 2396 for (auto i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) { 2397 if (!((1u << i) & nir_intrinsic_write_mask(insn))) 2398 continue; 2399 if (typeSizeof(sType) == 8) { 2400 Value *split[2]; 2401 mkSplit(split, 4, getSrc(&insn->src[0], i)); 2402 2403 Symbol *sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, TYPE_U32, i * typeSizeof(sType)); 2404 mkStore(OP_STORE, TYPE_U32, sym, getSrc(&insn->src[1], 0), split[0]); 2405 2406 sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, TYPE_U32, i * typeSizeof(sType) + 4); 2407 mkStore(OP_STORE, TYPE_U32, sym, getSrc(&insn->src[1], 0), split[1]); 2408 } else { 2409 Symbol *sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, sType, i * typeSizeof(sType)); 2410 mkStore(OP_STORE, sType, sym, getSrc(&insn->src[1], 0), getSrc(&insn->src[0], i)); 2411 } 2412 } 2413 2414 info_out->io.globalAccess |= 0x2; 2415 break; 2416 } 2417 default: 2418 ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name); 2419 return false; 2420 } 2421 2422 return true; 2423} 2424 2425bool 2426Converter::visit(nir_jump_instr *insn) 2427{ 2428 switch (insn->type) { 2429 case nir_jump_return: 2430 // TODO: this only works in the main function 2431 mkFlow(OP_BRA, exit, CC_ALWAYS, NULL); 2432 bb->cfg.attach(&exit->cfg, Graph::Edge::CROSS); 2433 break; 2434 case nir_jump_break: 2435 case nir_jump_continue: { 2436 bool isBreak = insn->type == nir_jump_break; 2437 nir_block *block = insn->instr.block; 2438 BasicBlock *target = convert(block->successors[0]); 2439 mkFlow(isBreak ? OP_BREAK : OP_CONT, target, CC_ALWAYS, NULL); 2440 bb->cfg.attach(&target->cfg, isBreak ? Graph::Edge::CROSS : Graph::Edge::BACK); 2441 break; 2442 } 2443 default: 2444 ERROR("unknown nir_jump_type %u\n", insn->type); 2445 return false; 2446 } 2447 2448 return true; 2449} 2450 2451Value* 2452Converter::convert(nir_load_const_instr *insn, uint8_t idx) 2453{ 2454 Value *val; 2455 2456 if (immInsertPos) 2457 setPosition(immInsertPos, true); 2458 else 2459 setPosition(bb, false); 2460 2461 switch (insn->def.bit_size) { 2462 case 64: 2463 val = loadImm(getSSA(8), insn->value[idx].u64); 2464 break; 2465 case 32: 2466 val = loadImm(getSSA(4), insn->value[idx].u32); 2467 break; 2468 case 16: 2469 val = loadImm(getSSA(2), insn->value[idx].u16); 2470 break; 2471 case 8: 2472 val = loadImm(getSSA(1), insn->value[idx].u8); 2473 break; 2474 default: 2475 unreachable("unhandled bit size!\n"); 2476 } 2477 setPosition(bb, true); 2478 return val; 2479} 2480 2481bool 2482Converter::visit(nir_load_const_instr *insn) 2483{ 2484 assert(insn->def.bit_size <= 64); 2485 immediates[insn->def.index] = insn; 2486 return true; 2487} 2488 2489#define DEFAULT_CHECKS \ 2490 if (insn->dest.dest.ssa.num_components > 1) { \ 2491 ERROR("nir_alu_instr only supported with 1 component!\n"); \ 2492 return false; \ 2493 } \ 2494 if (insn->dest.write_mask != 1) { \ 2495 ERROR("nir_alu_instr only with write_mask of 1 supported!\n"); \ 2496 return false; \ 2497 } 2498bool 2499Converter::visit(nir_alu_instr *insn) 2500{ 2501 const nir_op op = insn->op; 2502 const nir_op_info &info = nir_op_infos[op]; 2503 DataType dType = getDType(insn); 2504 const std::vector<DataType> sTypes = getSTypes(insn); 2505 2506 Instruction *oldPos = this->bb->getExit(); 2507 2508 switch (op) { 2509 case nir_op_fabs: 2510 case nir_op_iabs: 2511 case nir_op_fadd: 2512 case nir_op_iadd: 2513 case nir_op_iand: 2514 case nir_op_fceil: 2515 case nir_op_fcos: 2516 case nir_op_fddx: 2517 case nir_op_fddx_coarse: 2518 case nir_op_fddx_fine: 2519 case nir_op_fddy: 2520 case nir_op_fddy_coarse: 2521 case nir_op_fddy_fine: 2522 case nir_op_fdiv: 2523 case nir_op_idiv: 2524 case nir_op_udiv: 2525 case nir_op_fexp2: 2526 case nir_op_ffloor: 2527 case nir_op_ffma: 2528 case nir_op_flog2: 2529 case nir_op_fmax: 2530 case nir_op_imax: 2531 case nir_op_umax: 2532 case nir_op_fmin: 2533 case nir_op_imin: 2534 case nir_op_umin: 2535 case nir_op_fmod: 2536 case nir_op_imod: 2537 case nir_op_umod: 2538 case nir_op_fmul: 2539 case nir_op_imul: 2540 case nir_op_imul_high: 2541 case nir_op_umul_high: 2542 case nir_op_fneg: 2543 case nir_op_ineg: 2544 case nir_op_inot: 2545 case nir_op_ior: 2546 case nir_op_pack_64_2x32_split: 2547 case nir_op_fpow: 2548 case nir_op_frcp: 2549 case nir_op_frem: 2550 case nir_op_irem: 2551 case nir_op_frsq: 2552 case nir_op_fsat: 2553 case nir_op_ishr: 2554 case nir_op_ushr: 2555 case nir_op_fsin: 2556 case nir_op_fsqrt: 2557 case nir_op_ftrunc: 2558 case nir_op_ishl: 2559 case nir_op_ixor: { 2560 DEFAULT_CHECKS; 2561 LValues &newDefs = convert(&insn->dest); 2562 operation preOp = preOperationNeeded(op); 2563 if (preOp != OP_NOP) { 2564 assert(info.num_inputs < 2); 2565 Value *tmp = getSSA(typeSizeof(dType)); 2566 Instruction *i0 = mkOp(preOp, dType, tmp); 2567 Instruction *i1 = mkOp(getOperation(op), dType, newDefs[0]); 2568 if (info.num_inputs) { 2569 i0->setSrc(0, getSrc(&insn->src[0])); 2570 i1->setSrc(0, tmp); 2571 } 2572 i1->subOp = getSubOp(op); 2573 } else { 2574 Instruction *i = mkOp(getOperation(op), dType, newDefs[0]); 2575 for (unsigned s = 0u; s < info.num_inputs; ++s) { 2576 i->setSrc(s, getSrc(&insn->src[s])); 2577 2578 if (this->info->io.mul_zero_wins) { 2579 switch (op) { 2580 case nir_op_fmul: 2581 case nir_op_ffma: 2582 i->dnz = true; 2583 break; 2584 default: 2585 break; 2586 } 2587 } 2588 } 2589 i->subOp = getSubOp(op); 2590 } 2591 break; 2592 } 2593 case nir_op_ifind_msb: 2594 case nir_op_ufind_msb: { 2595 DEFAULT_CHECKS; 2596 LValues &newDefs = convert(&insn->dest); 2597 dType = sTypes[0]; 2598 mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0])); 2599 break; 2600 } 2601 case nir_op_fround_even: { 2602 DEFAULT_CHECKS; 2603 LValues &newDefs = convert(&insn->dest); 2604 mkCvt(OP_CVT, dType, newDefs[0], dType, getSrc(&insn->src[0]))->rnd = ROUND_NI; 2605 break; 2606 } 2607 // convert instructions 2608 case nir_op_f2f32: 2609 case nir_op_f2i32: 2610 case nir_op_f2u32: 2611 case nir_op_i2f32: 2612 case nir_op_i2i32: 2613 case nir_op_u2f32: 2614 case nir_op_u2u32: 2615 case nir_op_f2f64: 2616 case nir_op_f2i64: 2617 case nir_op_f2u64: 2618 case nir_op_i2f64: 2619 case nir_op_i2i64: 2620 case nir_op_u2f64: 2621 case nir_op_u2u64: { 2622 DEFAULT_CHECKS; 2623 LValues &newDefs = convert(&insn->dest); 2624 Instruction *i = mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0])); 2625 if (op == nir_op_f2i32 || op == nir_op_f2i64 || op == nir_op_f2u32 || op == nir_op_f2u64) 2626 i->rnd = ROUND_Z; 2627 i->sType = sTypes[0]; 2628 break; 2629 } 2630 // compare instructions 2631 case nir_op_feq32: 2632 case nir_op_ieq32: 2633 case nir_op_fge32: 2634 case nir_op_ige32: 2635 case nir_op_uge32: 2636 case nir_op_flt32: 2637 case nir_op_ilt32: 2638 case nir_op_ult32: 2639 case nir_op_fneu32: 2640 case nir_op_ine32: { 2641 DEFAULT_CHECKS; 2642 LValues &newDefs = convert(&insn->dest); 2643 Instruction *i = mkCmp(getOperation(op), 2644 getCondCode(op), 2645 dType, 2646 newDefs[0], 2647 dType, 2648 getSrc(&insn->src[0]), 2649 getSrc(&insn->src[1])); 2650 if (info.num_inputs == 3) 2651 i->setSrc(2, getSrc(&insn->src[2])); 2652 i->sType = sTypes[0]; 2653 break; 2654 } 2655 case nir_op_mov: 2656 case nir_op_vec2: 2657 case nir_op_vec3: 2658 case nir_op_vec4: 2659 case nir_op_vec8: 2660 case nir_op_vec16: { 2661 LValues &newDefs = convert(&insn->dest); 2662 for (LValues::size_type c = 0u; c < newDefs.size(); ++c) { 2663 mkMov(newDefs[c], getSrc(&insn->src[c]), dType); 2664 } 2665 break; 2666 } 2667 // (un)pack 2668 case nir_op_pack_64_2x32: { 2669 LValues &newDefs = convert(&insn->dest); 2670 Instruction *merge = mkOp(OP_MERGE, dType, newDefs[0]); 2671 merge->setSrc(0, getSrc(&insn->src[0], 0)); 2672 merge->setSrc(1, getSrc(&insn->src[0], 1)); 2673 break; 2674 } 2675 case nir_op_pack_half_2x16_split: { 2676 LValues &newDefs = convert(&insn->dest); 2677 Value *tmpH = getSSA(); 2678 Value *tmpL = getSSA(); 2679 2680 mkCvt(OP_CVT, TYPE_F16, tmpL, TYPE_F32, getSrc(&insn->src[0])); 2681 mkCvt(OP_CVT, TYPE_F16, tmpH, TYPE_F32, getSrc(&insn->src[1])); 2682 mkOp3(OP_INSBF, TYPE_U32, newDefs[0], tmpH, mkImm(0x1010), tmpL); 2683 break; 2684 } 2685 case nir_op_unpack_half_2x16_split_x: 2686 case nir_op_unpack_half_2x16_split_y: { 2687 LValues &newDefs = convert(&insn->dest); 2688 Instruction *cvt = mkCvt(OP_CVT, TYPE_F32, newDefs[0], TYPE_F16, getSrc(&insn->src[0])); 2689 if (op == nir_op_unpack_half_2x16_split_y) 2690 cvt->subOp = 1; 2691 break; 2692 } 2693 case nir_op_unpack_64_2x32: { 2694 LValues &newDefs = convert(&insn->dest); 2695 mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, newDefs[1]); 2696 break; 2697 } 2698 case nir_op_unpack_64_2x32_split_x: { 2699 LValues &newDefs = convert(&insn->dest); 2700 mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, getSSA()); 2701 break; 2702 } 2703 case nir_op_unpack_64_2x32_split_y: { 2704 LValues &newDefs = convert(&insn->dest); 2705 mkOp1(OP_SPLIT, dType, getSSA(), getSrc(&insn->src[0]))->setDef(1, newDefs[0]); 2706 break; 2707 } 2708 // special instructions 2709 case nir_op_fsign: 2710 case nir_op_isign: { 2711 DEFAULT_CHECKS; 2712 DataType iType; 2713 if (::isFloatType(dType)) 2714 iType = TYPE_F32; 2715 else 2716 iType = TYPE_S32; 2717 2718 LValues &newDefs = convert(&insn->dest); 2719 LValue *val0 = getScratch(); 2720 LValue *val1 = getScratch(); 2721 mkCmp(OP_SET, CC_GT, iType, val0, dType, getSrc(&insn->src[0]), zero); 2722 mkCmp(OP_SET, CC_LT, iType, val1, dType, getSrc(&insn->src[0]), zero); 2723 2724 if (dType == TYPE_F64) { 2725 mkOp2(OP_SUB, iType, val0, val0, val1); 2726 mkCvt(OP_CVT, TYPE_F64, newDefs[0], iType, val0); 2727 } else if (dType == TYPE_S64 || dType == TYPE_U64) { 2728 mkOp2(OP_SUB, iType, val0, val1, val0); 2729 mkOp2(OP_SHR, iType, val1, val0, loadImm(NULL, 31)); 2730 mkOp2(OP_MERGE, dType, newDefs[0], val0, val1); 2731 } else if (::isFloatType(dType)) 2732 mkOp2(OP_SUB, iType, newDefs[0], val0, val1); 2733 else 2734 mkOp2(OP_SUB, iType, newDefs[0], val1, val0); 2735 break; 2736 } 2737 case nir_op_fcsel: 2738 case nir_op_b32csel: { 2739 DEFAULT_CHECKS; 2740 LValues &newDefs = convert(&insn->dest); 2741 mkCmp(OP_SLCT, CC_NE, dType, newDefs[0], sTypes[0], getSrc(&insn->src[1]), getSrc(&insn->src[2]), getSrc(&insn->src[0])); 2742 break; 2743 } 2744 case nir_op_ibitfield_extract: 2745 case nir_op_ubitfield_extract: { 2746 DEFAULT_CHECKS; 2747 Value *tmp = getSSA(); 2748 LValues &newDefs = convert(&insn->dest); 2749 mkOp3(OP_INSBF, dType, tmp, getSrc(&insn->src[2]), loadImm(NULL, 0x808), getSrc(&insn->src[1])); 2750 mkOp2(OP_EXTBF, dType, newDefs[0], getSrc(&insn->src[0]), tmp); 2751 break; 2752 } 2753 case nir_op_bfm: { 2754 DEFAULT_CHECKS; 2755 LValues &newDefs = convert(&insn->dest); 2756 mkOp2(OP_BMSK, dType, newDefs[0], getSrc(&insn->src[1]), getSrc(&insn->src[0]))->subOp = NV50_IR_SUBOP_BMSK_W; 2757 break; 2758 } 2759 case nir_op_bitfield_insert: { 2760 DEFAULT_CHECKS; 2761 LValues &newDefs = convert(&insn->dest); 2762 LValue *temp = getSSA(); 2763 mkOp3(OP_INSBF, TYPE_U32, temp, getSrc(&insn->src[3]), mkImm(0x808), getSrc(&insn->src[2])); 2764 mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[1]), temp, getSrc(&insn->src[0])); 2765 break; 2766 } 2767 case nir_op_bit_count: { 2768 DEFAULT_CHECKS; 2769 LValues &newDefs = convert(&insn->dest); 2770 mkOp2(OP_POPCNT, dType, newDefs[0], getSrc(&insn->src[0]), getSrc(&insn->src[0])); 2771 break; 2772 } 2773 case nir_op_bitfield_reverse: { 2774 DEFAULT_CHECKS; 2775 LValues &newDefs = convert(&insn->dest); 2776 mkOp1(OP_BREV, TYPE_U32, newDefs[0], getSrc(&insn->src[0])); 2777 break; 2778 } 2779 case nir_op_find_lsb: { 2780 DEFAULT_CHECKS; 2781 LValues &newDefs = convert(&insn->dest); 2782 Value *tmp = getSSA(); 2783 mkOp1(OP_BREV, TYPE_U32, tmp, getSrc(&insn->src[0])); 2784 mkOp1(OP_BFIND, TYPE_U32, newDefs[0], tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT; 2785 break; 2786 } 2787 case nir_op_extract_u8: { 2788 DEFAULT_CHECKS; 2789 LValues &newDefs = convert(&insn->dest); 2790 Value *prmt = getSSA(); 2791 mkOp2(OP_OR, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x4440)); 2792 mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0)); 2793 break; 2794 } 2795 case nir_op_extract_i8: { 2796 DEFAULT_CHECKS; 2797 LValues &newDefs = convert(&insn->dest); 2798 Value *prmt = getSSA(); 2799 mkOp3(OP_MAD, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x1111), loadImm(NULL, 0x8880)); 2800 mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0)); 2801 break; 2802 } 2803 case nir_op_extract_u16: { 2804 DEFAULT_CHECKS; 2805 LValues &newDefs = convert(&insn->dest); 2806 Value *prmt = getSSA(); 2807 mkOp3(OP_MAD, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x22), loadImm(NULL, 0x4410)); 2808 mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0)); 2809 break; 2810 } 2811 case nir_op_extract_i16: { 2812 DEFAULT_CHECKS; 2813 LValues &newDefs = convert(&insn->dest); 2814 Value *prmt = getSSA(); 2815 mkOp3(OP_MAD, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x2222), loadImm(NULL, 0x9910)); 2816 mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0)); 2817 break; 2818 } 2819 case nir_op_urol: { 2820 DEFAULT_CHECKS; 2821 LValues &newDefs = convert(&insn->dest); 2822 mkOp3(OP_SHF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), 2823 getSrc(&insn->src[1]), getSrc(&insn->src[0])) 2824 ->subOp = NV50_IR_SUBOP_SHF_L | 2825 NV50_IR_SUBOP_SHF_W | 2826 NV50_IR_SUBOP_SHF_HI; 2827 break; 2828 } 2829 case nir_op_uror: { 2830 DEFAULT_CHECKS; 2831 LValues &newDefs = convert(&insn->dest); 2832 mkOp3(OP_SHF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), 2833 getSrc(&insn->src[1]), getSrc(&insn->src[0])) 2834 ->subOp = NV50_IR_SUBOP_SHF_R | 2835 NV50_IR_SUBOP_SHF_W | 2836 NV50_IR_SUBOP_SHF_LO; 2837 break; 2838 } 2839 // boolean conversions 2840 case nir_op_b2f32: { 2841 DEFAULT_CHECKS; 2842 LValues &newDefs = convert(&insn->dest); 2843 mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1.0f)); 2844 break; 2845 } 2846 case nir_op_b2f64: { 2847 DEFAULT_CHECKS; 2848 LValues &newDefs = convert(&insn->dest); 2849 Value *tmp = getSSA(4); 2850 mkOp2(OP_AND, TYPE_U32, tmp, getSrc(&insn->src[0]), loadImm(NULL, 0x3ff00000)); 2851 mkOp2(OP_MERGE, TYPE_U64, newDefs[0], loadImm(NULL, 0), tmp); 2852 break; 2853 } 2854 case nir_op_f2b32: 2855 case nir_op_i2b32: { 2856 DEFAULT_CHECKS; 2857 LValues &newDefs = convert(&insn->dest); 2858 Value *src1; 2859 if (typeSizeof(sTypes[0]) == 8) { 2860 src1 = loadImm(getSSA(8), 0.0); 2861 } else { 2862 src1 = zero; 2863 } 2864 CondCode cc = op == nir_op_f2b32 ? CC_NEU : CC_NE; 2865 mkCmp(OP_SET, cc, TYPE_U32, newDefs[0], sTypes[0], getSrc(&insn->src[0]), src1); 2866 break; 2867 } 2868 case nir_op_b2i32: { 2869 DEFAULT_CHECKS; 2870 LValues &newDefs = convert(&insn->dest); 2871 mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1)); 2872 break; 2873 } 2874 case nir_op_b2i64: { 2875 DEFAULT_CHECKS; 2876 LValues &newDefs = convert(&insn->dest); 2877 LValue *def = getScratch(); 2878 mkOp2(OP_AND, TYPE_U32, def, getSrc(&insn->src[0]), loadImm(NULL, 1)); 2879 mkOp2(OP_MERGE, TYPE_S64, newDefs[0], def, loadImm(NULL, 0)); 2880 break; 2881 } 2882 default: 2883 ERROR("unknown nir_op %s\n", info.name); 2884 assert(false); 2885 return false; 2886 } 2887 2888 if (!oldPos) { 2889 oldPos = this->bb->getEntry(); 2890 oldPos->precise = insn->exact; 2891 } 2892 2893 if (unlikely(!oldPos)) 2894 return true; 2895 2896 while (oldPos->next) { 2897 oldPos = oldPos->next; 2898 oldPos->precise = insn->exact; 2899 } 2900 oldPos->saturate = insn->dest.saturate; 2901 2902 return true; 2903} 2904#undef DEFAULT_CHECKS 2905 2906bool 2907Converter::visit(nir_ssa_undef_instr *insn) 2908{ 2909 LValues &newDefs = convert(&insn->def); 2910 for (uint8_t i = 0u; i < insn->def.num_components; ++i) { 2911 mkOp(OP_NOP, TYPE_NONE, newDefs[i]); 2912 } 2913 return true; 2914} 2915 2916#define CASE_SAMPLER(ty) \ 2917 case GLSL_SAMPLER_DIM_ ## ty : \ 2918 if (isArray && !isShadow) \ 2919 return TEX_TARGET_ ## ty ## _ARRAY; \ 2920 else if (!isArray && isShadow) \ 2921 return TEX_TARGET_## ty ## _SHADOW; \ 2922 else if (isArray && isShadow) \ 2923 return TEX_TARGET_## ty ## _ARRAY_SHADOW; \ 2924 else \ 2925 return TEX_TARGET_ ## ty 2926 2927TexTarget 2928Converter::convert(glsl_sampler_dim dim, bool isArray, bool isShadow) 2929{ 2930 switch (dim) { 2931 CASE_SAMPLER(1D); 2932 CASE_SAMPLER(2D); 2933 CASE_SAMPLER(CUBE); 2934 case GLSL_SAMPLER_DIM_3D: 2935 return TEX_TARGET_3D; 2936 case GLSL_SAMPLER_DIM_MS: 2937 if (isArray) 2938 return TEX_TARGET_2D_MS_ARRAY; 2939 return TEX_TARGET_2D_MS; 2940 case GLSL_SAMPLER_DIM_RECT: 2941 if (isShadow) 2942 return TEX_TARGET_RECT_SHADOW; 2943 return TEX_TARGET_RECT; 2944 case GLSL_SAMPLER_DIM_BUF: 2945 return TEX_TARGET_BUFFER; 2946 case GLSL_SAMPLER_DIM_EXTERNAL: 2947 return TEX_TARGET_2D; 2948 default: 2949 ERROR("unknown glsl_sampler_dim %u\n", dim); 2950 assert(false); 2951 return TEX_TARGET_COUNT; 2952 } 2953} 2954#undef CASE_SAMPLER 2955 2956unsigned int 2957Converter::getNIRArgCount(TexInstruction::Target& target) 2958{ 2959 unsigned int result = target.getArgCount(); 2960 if (target.isCube() && target.isArray()) 2961 result--; 2962 if (target.isMS()) 2963 result--; 2964 return result; 2965} 2966 2967CacheMode 2968Converter::convert(enum gl_access_qualifier access) 2969{ 2970 if (access & ACCESS_VOLATILE) 2971 return CACHE_CV; 2972 if (access & ACCESS_COHERENT) 2973 return CACHE_CG; 2974 return CACHE_CA; 2975} 2976 2977bool 2978Converter::visit(nir_tex_instr *insn) 2979{ 2980 switch (insn->op) { 2981 case nir_texop_lod: 2982 case nir_texop_query_levels: 2983 case nir_texop_tex: 2984 case nir_texop_texture_samples: 2985 case nir_texop_tg4: 2986 case nir_texop_txb: 2987 case nir_texop_txd: 2988 case nir_texop_txf: 2989 case nir_texop_txf_ms: 2990 case nir_texop_txl: 2991 case nir_texop_txs: { 2992 LValues &newDefs = convert(&insn->dest); 2993 std::vector<Value*> srcs; 2994 std::vector<Value*> defs; 2995 std::vector<nir_src*> offsets; 2996 uint8_t mask = 0; 2997 bool lz = false; 2998 TexInstruction::Target target = convert(insn->sampler_dim, insn->is_array, insn->is_shadow); 2999 operation op = getOperation(insn->op); 3000 3001 int r, s; 3002 int biasIdx = nir_tex_instr_src_index(insn, nir_tex_src_bias); 3003 int compIdx = nir_tex_instr_src_index(insn, nir_tex_src_comparator); 3004 int coordsIdx = nir_tex_instr_src_index(insn, nir_tex_src_coord); 3005 int ddxIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddx); 3006 int ddyIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddy); 3007 int msIdx = nir_tex_instr_src_index(insn, nir_tex_src_ms_index); 3008 int lodIdx = nir_tex_instr_src_index(insn, nir_tex_src_lod); 3009 int offsetIdx = nir_tex_instr_src_index(insn, nir_tex_src_offset); 3010 int sampOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_sampler_offset); 3011 int texOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_texture_offset); 3012 int sampHandleIdx = nir_tex_instr_src_index(insn, nir_tex_src_sampler_handle); 3013 int texHandleIdx = nir_tex_instr_src_index(insn, nir_tex_src_texture_handle); 3014 3015 bool bindless = sampHandleIdx != -1 || texHandleIdx != -1; 3016 assert((sampHandleIdx != -1) == (texHandleIdx != -1)); 3017 3018 srcs.resize(insn->coord_components); 3019 for (uint8_t i = 0u; i < insn->coord_components; ++i) 3020 srcs[i] = getSrc(&insn->src[coordsIdx].src, i); 3021 3022 // sometimes we get less args than target.getArgCount, but codegen expects the latter 3023 if (insn->coord_components) { 3024 uint32_t argCount = target.getArgCount(); 3025 3026 if (target.isMS()) 3027 argCount -= 1; 3028 3029 for (uint32_t i = 0u; i < (argCount - insn->coord_components); ++i) 3030 srcs.push_back(getSSA()); 3031 } 3032 3033 if (insn->op == nir_texop_texture_samples) 3034 srcs.push_back(zero); 3035 else if (!insn->num_srcs) 3036 srcs.push_back(loadImm(NULL, 0)); 3037 if (biasIdx != -1) 3038 srcs.push_back(getSrc(&insn->src[biasIdx].src, 0)); 3039 if (lodIdx != -1) 3040 srcs.push_back(getSrc(&insn->src[lodIdx].src, 0)); 3041 else if (op == OP_TXF) 3042 lz = true; 3043 if (msIdx != -1) 3044 srcs.push_back(getSrc(&insn->src[msIdx].src, 0)); 3045 if (offsetIdx != -1) 3046 offsets.push_back(&insn->src[offsetIdx].src); 3047 if (compIdx != -1) 3048 srcs.push_back(getSrc(&insn->src[compIdx].src, 0)); 3049 if (texOffIdx != -1) { 3050 srcs.push_back(getSrc(&insn->src[texOffIdx].src, 0)); 3051 texOffIdx = srcs.size() - 1; 3052 } 3053 if (sampOffIdx != -1) { 3054 srcs.push_back(getSrc(&insn->src[sampOffIdx].src, 0)); 3055 sampOffIdx = srcs.size() - 1; 3056 } 3057 if (bindless) { 3058 // currently we use the lower bits 3059 Value *split[2]; 3060 Value *handle = getSrc(&insn->src[sampHandleIdx].src, 0); 3061 3062 mkSplit(split, 4, handle); 3063 3064 srcs.push_back(split[0]); 3065 texOffIdx = srcs.size() - 1; 3066 } 3067 3068 r = bindless ? 0xff : insn->texture_index; 3069 s = bindless ? 0x1f : insn->sampler_index; 3070 if (op == OP_TXF || op == OP_TXQ) 3071 s = 0; 3072 3073 defs.resize(newDefs.size()); 3074 for (uint8_t d = 0u; d < newDefs.size(); ++d) { 3075 defs[d] = newDefs[d]; 3076 mask |= 1 << d; 3077 } 3078 if (target.isMS() || (op == OP_TEX && prog->getType() != Program::TYPE_FRAGMENT)) 3079 lz = true; 3080 3081 TexInstruction *texi = mkTex(op, target.getEnum(), r, s, defs, srcs); 3082 texi->tex.levelZero = lz; 3083 texi->tex.mask = mask; 3084 texi->tex.bindless = bindless; 3085 3086 if (texOffIdx != -1) 3087 texi->tex.rIndirectSrc = texOffIdx; 3088 if (sampOffIdx != -1) 3089 texi->tex.sIndirectSrc = sampOffIdx; 3090 3091 switch (insn->op) { 3092 case nir_texop_tg4: 3093 if (!target.isShadow()) 3094 texi->tex.gatherComp = insn->component; 3095 break; 3096 case nir_texop_txs: 3097 texi->tex.query = TXQ_DIMS; 3098 break; 3099 case nir_texop_texture_samples: 3100 texi->tex.mask = 0x4; 3101 texi->tex.query = TXQ_TYPE; 3102 break; 3103 case nir_texop_query_levels: 3104 texi->tex.mask = 0x8; 3105 texi->tex.query = TXQ_DIMS; 3106 break; 3107 default: 3108 break; 3109 } 3110 3111 texi->tex.useOffsets = offsets.size(); 3112 if (texi->tex.useOffsets) { 3113 for (uint8_t s = 0; s < texi->tex.useOffsets; ++s) { 3114 for (uint32_t c = 0u; c < 3; ++c) { 3115 uint8_t s2 = std::min(c, target.getDim() - 1); 3116 texi->offset[s][c].set(getSrc(offsets[s], s2)); 3117 texi->offset[s][c].setInsn(texi); 3118 } 3119 } 3120 } 3121 3122 if (op == OP_TXG && offsetIdx == -1) { 3123 if (nir_tex_instr_has_explicit_tg4_offsets(insn)) { 3124 texi->tex.useOffsets = 4; 3125 setPosition(texi, false); 3126 for (uint8_t i = 0; i < 4; ++i) { 3127 for (uint8_t j = 0; j < 2; ++j) { 3128 texi->offset[i][j].set(loadImm(NULL, insn->tg4_offsets[i][j])); 3129 texi->offset[i][j].setInsn(texi); 3130 } 3131 } 3132 setPosition(texi, true); 3133 } 3134 } 3135 3136 if (ddxIdx != -1 && ddyIdx != -1) { 3137 for (uint8_t c = 0u; c < target.getDim() + target.isCube(); ++c) { 3138 texi->dPdx[c].set(getSrc(&insn->src[ddxIdx].src, c)); 3139 texi->dPdy[c].set(getSrc(&insn->src[ddyIdx].src, c)); 3140 } 3141 } 3142 3143 break; 3144 } 3145 default: 3146 ERROR("unknown nir_texop %u\n", insn->op); 3147 return false; 3148 } 3149 return true; 3150} 3151 3152/* nouveau's RA doesn't track the liveness of exported registers in the fragment 3153 * shader, so we need all the store_outputs to appear at the end of the shader 3154 * with no other instructions that might generate a temp value in between them. 3155 */ 3156static void 3157nv_nir_move_stores_to_end(nir_shader *s) 3158{ 3159 nir_function_impl *impl = nir_shader_get_entrypoint(s); 3160 nir_block *block = nir_impl_last_block(impl); 3161 nir_instr *first_store = NULL; 3162 3163 nir_foreach_instr_safe(instr, block) { 3164 if (instr == first_store) 3165 break; 3166 if (instr->type != nir_instr_type_intrinsic) 3167 continue; 3168 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 3169 if (intrin->intrinsic == nir_intrinsic_store_output) { 3170 nir_instr_remove(instr); 3171 nir_instr_insert(nir_after_block(block), instr); 3172 3173 if (!first_store) 3174 first_store = instr; 3175 } 3176 } 3177 nir_metadata_preserve(impl, 3178 nir_metadata_block_index | 3179 nir_metadata_dominance); 3180} 3181 3182bool 3183Converter::run() 3184{ 3185 bool progress; 3186 3187 if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE) 3188 nir_print_shader(nir, stderr); 3189 3190 struct nir_lower_subgroups_options subgroup_options = {}; 3191 subgroup_options.subgroup_size = 32; 3192 subgroup_options.ballot_bit_size = 32; 3193 subgroup_options.ballot_components = 1; 3194 subgroup_options.lower_elect = true; 3195 3196 /* prepare for IO lowering */ 3197 NIR_PASS_V(nir, nir_opt_deref); 3198 NIR_PASS_V(nir, nir_lower_regs_to_ssa); 3199 NIR_PASS_V(nir, nir_lower_vars_to_ssa); 3200 3201 /* codegen assumes vec4 alignment for memory */ 3202 NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_function_temp, function_temp_type_info); 3203 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_function_temp, nir_address_format_32bit_offset); 3204 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL); 3205 3206 NIR_PASS_V(nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out, 3207 type_size, (nir_lower_io_options)0); 3208 3209 NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options); 3210 3211 struct nir_lower_tex_options tex_options = {}; 3212 tex_options.lower_txp = ~0; 3213 3214 NIR_PASS_V(nir, nir_lower_tex, &tex_options); 3215 3216 NIR_PASS_V(nir, nir_lower_load_const_to_scalar); 3217 NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL); 3218 NIR_PASS_V(nir, nir_lower_phis_to_scalar, false); 3219 3220 /*TODO: improve this lowering/optimisation loop so that we can use 3221 * nir_opt_idiv_const effectively before this. 3222 */ 3223 nir_lower_idiv_options idiv_options = { 3224 .imprecise_32bit_lowering = false, 3225 .allow_fp16 = true, 3226 }; 3227 NIR_PASS(progress, nir, nir_lower_idiv, &idiv_options); 3228 3229 do { 3230 progress = false; 3231 NIR_PASS(progress, nir, nir_copy_prop); 3232 NIR_PASS(progress, nir, nir_opt_remove_phis); 3233 NIR_PASS(progress, nir, nir_opt_trivial_continues); 3234 NIR_PASS(progress, nir, nir_opt_cse); 3235 NIR_PASS(progress, nir, nir_opt_algebraic); 3236 NIR_PASS(progress, nir, nir_opt_constant_folding); 3237 NIR_PASS(progress, nir, nir_copy_prop); 3238 NIR_PASS(progress, nir, nir_opt_dce); 3239 NIR_PASS(progress, nir, nir_opt_dead_cf); 3240 NIR_PASS(progress, nir, nir_lower_64bit_phis); 3241 } while (progress); 3242 3243 nir_move_options move_options = 3244 (nir_move_options)(nir_move_const_undef | 3245 nir_move_load_ubo | 3246 nir_move_load_uniform | 3247 nir_move_load_input); 3248 NIR_PASS_V(nir, nir_opt_sink, move_options); 3249 NIR_PASS_V(nir, nir_opt_move, move_options); 3250 3251 if (nir->info.stage == MESA_SHADER_FRAGMENT) 3252 NIR_PASS_V(nir, nv_nir_move_stores_to_end); 3253 3254 NIR_PASS_V(nir, nir_lower_bool_to_int32); 3255 NIR_PASS_V(nir, nir_convert_from_ssa, true); 3256 3257 // Garbage collect dead instructions 3258 nir_sweep(nir); 3259 3260 if (!parseNIR()) { 3261 ERROR("Couldn't prase NIR!\n"); 3262 return false; 3263 } 3264 3265 if (!assignSlots()) { 3266 ERROR("Couldn't assign slots!\n"); 3267 return false; 3268 } 3269 3270 if (prog->dbgFlags & NV50_IR_DEBUG_BASIC) 3271 nir_print_shader(nir, stderr); 3272 3273 nir_foreach_function(function, nir) { 3274 if (!visit(function)) 3275 return false; 3276 } 3277 3278 return true; 3279} 3280 3281} // unnamed namespace 3282 3283namespace nv50_ir { 3284 3285bool 3286Program::makeFromNIR(struct nv50_ir_prog_info *info, 3287 struct nv50_ir_prog_info_out *info_out) 3288{ 3289 nir_shader *nir = (nir_shader*)info->bin.source; 3290 Converter converter(this, nir, info, info_out); 3291 bool result = converter.run(); 3292 if (!result) 3293 return result; 3294 LoweringHelper lowering; 3295 lowering.run(this); 3296 tlsSize = info_out->bin.tlsSpace; 3297 return result; 3298} 3299 3300} // namespace nv50_ir 3301 3302static nir_shader_compiler_options 3303nvir_nir_shader_compiler_options(int chipset, uint8_t shader_type) 3304{ 3305 nir_shader_compiler_options op = {}; 3306 op.lower_fdiv = (chipset >= NVISA_GV100_CHIPSET); 3307 op.lower_ffma16 = false; 3308 op.lower_ffma32 = false; 3309 op.lower_ffma64 = false; 3310 op.fuse_ffma16 = false; /* nir doesn't track mad vs fma */ 3311 op.fuse_ffma32 = false; /* nir doesn't track mad vs fma */ 3312 op.fuse_ffma64 = false; /* nir doesn't track mad vs fma */ 3313 op.lower_flrp16 = (chipset >= NVISA_GV100_CHIPSET); 3314 op.lower_flrp32 = true; 3315 op.lower_flrp64 = true; 3316 op.lower_fpow = false; // TODO: nir's lowering is broken, or we could use it 3317 op.lower_fsat = false; 3318 op.lower_fsqrt = false; // TODO: only before gm200 3319 op.lower_sincos = false; 3320 op.lower_fmod = true; 3321 op.lower_bitfield_extract = false; 3322 op.lower_bitfield_extract_to_shifts = (chipset >= NVISA_GV100_CHIPSET || chipset < NVISA_GF100_CHIPSET); 3323 op.lower_bitfield_insert = false; 3324 op.lower_bitfield_insert_to_shifts = (chipset >= NVISA_GV100_CHIPSET || chipset < NVISA_GF100_CHIPSET); 3325 op.lower_bitfield_insert_to_bitfield_select = false; 3326 op.lower_bitfield_reverse = (chipset < NVISA_GF100_CHIPSET); 3327 op.lower_bit_count = (chipset < NVISA_GF100_CHIPSET); 3328 op.lower_ifind_msb = (chipset < NVISA_GF100_CHIPSET); 3329 op.lower_find_lsb = (chipset < NVISA_GF100_CHIPSET); 3330 op.lower_uadd_carry = true; // TODO 3331 op.lower_usub_borrow = true; // TODO 3332 op.lower_mul_high = false; 3333 op.lower_fneg = false; 3334 op.lower_ineg = false; 3335 op.lower_scmp = true; // TODO: not implemented yet 3336 op.lower_vector_cmp = false; 3337 op.lower_bitops = false; 3338 op.lower_isign = (chipset >= NVISA_GV100_CHIPSET); 3339 op.lower_fsign = (chipset >= NVISA_GV100_CHIPSET); 3340 op.lower_fdph = false; 3341 op.lower_fdot = false; 3342 op.fdot_replicates = false; // TODO 3343 op.lower_ffloor = false; // TODO 3344 op.lower_ffract = true; 3345 op.lower_fceil = false; // TODO 3346 op.lower_ftrunc = false; 3347 op.lower_ldexp = true; 3348 op.lower_pack_half_2x16 = true; 3349 op.lower_pack_unorm_2x16 = true; 3350 op.lower_pack_snorm_2x16 = true; 3351 op.lower_pack_unorm_4x8 = true; 3352 op.lower_pack_snorm_4x8 = true; 3353 op.lower_unpack_half_2x16 = true; 3354 op.lower_unpack_unorm_2x16 = true; 3355 op.lower_unpack_snorm_2x16 = true; 3356 op.lower_unpack_unorm_4x8 = true; 3357 op.lower_unpack_snorm_4x8 = true; 3358 op.lower_pack_split = false; 3359 op.lower_extract_byte = (chipset < NVISA_GM107_CHIPSET); 3360 op.lower_extract_word = (chipset < NVISA_GM107_CHIPSET); 3361 op.lower_insert_byte = true; 3362 op.lower_insert_word = true; 3363 op.lower_all_io_to_temps = false; 3364 op.lower_all_io_to_elements = false; 3365 op.vertex_id_zero_based = false; 3366 op.lower_base_vertex = false; 3367 op.lower_helper_invocation = false; 3368 op.optimize_sample_mask_in = false; 3369 op.lower_cs_local_index_to_id = true; 3370 op.lower_cs_local_id_to_index = false; 3371 op.lower_device_index_to_zero = false; // TODO 3372 op.lower_wpos_pntc = false; // TODO 3373 op.lower_hadd = true; // TODO 3374 op.lower_uadd_sat = true; // TODO 3375 op.lower_usub_sat = true; // TODO 3376 op.lower_iadd_sat = true; // TODO 3377 op.vectorize_io = false; 3378 op.lower_to_scalar = false; 3379 op.unify_interfaces = false; 3380 op.use_interpolated_input_intrinsics = true; 3381 op.lower_mul_2x32_64 = true; // TODO 3382 op.lower_rotate = (chipset < NVISA_GV100_CHIPSET); 3383 op.has_imul24 = false; 3384 op.intel_vec4 = false; 3385 op.force_indirect_unrolling = (nir_variable_mode) ( 3386 ((shader_type == PIPE_SHADER_FRAGMENT) ? nir_var_shader_out : 0) | 3387 /* HW doesn't support indirect addressing of fragment program inputs 3388 * on Volta. The binary driver generates a function to handle every 3389 * possible indirection, and indirectly calls the function to handle 3390 * this instead. 3391 */ 3392 ((chipset >= NVISA_GV100_CHIPSET && shader_type == PIPE_SHADER_FRAGMENT) ? nir_var_shader_in : 0) 3393 ); 3394 op.force_indirect_unrolling_sampler = (chipset < NVISA_GF100_CHIPSET), 3395 op.max_unroll_iterations = 32; 3396 op.lower_int64_options = (nir_lower_int64_options) ( 3397 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul64 : 0) | 3398 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_isign64 : 0) | 3399 nir_lower_divmod64 | 3400 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul_high64 : 0) | 3401 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_mov64 : 0) | 3402 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_icmp64 : 0) | 3403 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_iabs64 : 0) | 3404 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_ineg64 : 0) | 3405 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_logic64 : 0) | 3406 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_minmax64 : 0) | 3407 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_shift64 : 0) | 3408 nir_lower_imul_2x32_64 | 3409 ((chipset >= NVISA_GM107_CHIPSET) ? nir_lower_extract64 : 0) | 3410 nir_lower_ufind_msb64 3411 ); 3412 op.lower_doubles_options = (nir_lower_doubles_options) ( 3413 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_drcp : 0) | 3414 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dsqrt : 0) | 3415 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_drsq : 0) | 3416 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dfract : 0) | 3417 nir_lower_dmod | 3418 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dsub : 0) | 3419 ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_ddiv : 0) 3420 ); 3421 return op; 3422} 3423 3424static const nir_shader_compiler_options g80_nir_shader_compiler_options = 3425nvir_nir_shader_compiler_options(NVISA_G80_CHIPSET, PIPE_SHADER_TYPES); 3426static const nir_shader_compiler_options g80_fs_nir_shader_compiler_options = 3427nvir_nir_shader_compiler_options(NVISA_G80_CHIPSET, PIPE_SHADER_FRAGMENT); 3428static const nir_shader_compiler_options gf100_nir_shader_compiler_options = 3429nvir_nir_shader_compiler_options(NVISA_GF100_CHIPSET, PIPE_SHADER_TYPES); 3430static const nir_shader_compiler_options gf100_fs_nir_shader_compiler_options = 3431nvir_nir_shader_compiler_options(NVISA_GF100_CHIPSET, PIPE_SHADER_FRAGMENT); 3432static const nir_shader_compiler_options gm107_nir_shader_compiler_options = 3433nvir_nir_shader_compiler_options(NVISA_GM107_CHIPSET, PIPE_SHADER_TYPES); 3434static const nir_shader_compiler_options gm107_fs_nir_shader_compiler_options = 3435nvir_nir_shader_compiler_options(NVISA_GM107_CHIPSET, PIPE_SHADER_FRAGMENT); 3436static const nir_shader_compiler_options gv100_nir_shader_compiler_options = 3437nvir_nir_shader_compiler_options(NVISA_GV100_CHIPSET, PIPE_SHADER_TYPES); 3438static const nir_shader_compiler_options gv100_fs_nir_shader_compiler_options = 3439nvir_nir_shader_compiler_options(NVISA_GV100_CHIPSET, PIPE_SHADER_FRAGMENT); 3440 3441const nir_shader_compiler_options * 3442nv50_ir_nir_shader_compiler_options(int chipset, uint8_t shader_type) 3443{ 3444 if (chipset >= NVISA_GV100_CHIPSET) { 3445 if (shader_type == PIPE_SHADER_FRAGMENT) 3446 return &gv100_fs_nir_shader_compiler_options; 3447 else 3448 return &gv100_nir_shader_compiler_options; 3449 } 3450 3451 if (chipset >= NVISA_GM107_CHIPSET) { 3452 if (shader_type == PIPE_SHADER_FRAGMENT) 3453 return &gm107_fs_nir_shader_compiler_options; 3454 else 3455 return &gm107_nir_shader_compiler_options; 3456 } 3457 3458 if (chipset >= NVISA_GF100_CHIPSET) { 3459 if (shader_type == PIPE_SHADER_FRAGMENT) 3460 return &gf100_fs_nir_shader_compiler_options; 3461 else 3462 return &gf100_nir_shader_compiler_options; 3463 } 3464 3465 if (shader_type == PIPE_SHADER_FRAGMENT) 3466 return &g80_fs_nir_shader_compiler_options; 3467 else 3468 return &g80_nir_shader_compiler_options; 3469} 3470