1/* 2 * Copyright (c) 2017 Lima Project 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sub license, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the 12 * next paragraph) shall be included in all copies or substantial portions 13 * of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 * 23 */ 24 25#include <string.h> 26 27#include "util/hash_table.h" 28#include "util/ralloc.h" 29#include "util/bitscan.h" 30#include "compiler/nir/nir.h" 31#include "pipe/p_state.h" 32 33 34#include "ppir.h" 35 36static void *ppir_node_create_ssa(ppir_block *block, ppir_op op, nir_ssa_def *ssa) 37{ 38 ppir_node *node = ppir_node_create(block, op, ssa->index, 0); 39 if (!node) 40 return NULL; 41 42 ppir_dest *dest = ppir_node_get_dest(node); 43 dest->type = ppir_target_ssa; 44 dest->ssa.num_components = ssa->num_components; 45 dest->write_mask = u_bit_consecutive(0, ssa->num_components); 46 47 if (node->type == ppir_node_type_load || 48 node->type == ppir_node_type_store) 49 dest->ssa.is_head = true; 50 51 return node; 52} 53 54static void *ppir_node_create_reg(ppir_block *block, ppir_op op, 55 nir_register *reg, unsigned mask) 56{ 57 ppir_node *node = ppir_node_create(block, op, reg->index, mask); 58 if (!node) 59 return NULL; 60 61 ppir_dest *dest = ppir_node_get_dest(node); 62 63 list_for_each_entry(ppir_reg, r, &block->comp->reg_list, list) { 64 if (r->index == reg->index) { 65 dest->reg = r; 66 break; 67 } 68 } 69 70 dest->type = ppir_target_register; 71 dest->write_mask = mask; 72 73 if (node->type == ppir_node_type_load || 74 node->type == ppir_node_type_store) 75 dest->reg->is_head = true; 76 77 return node; 78} 79 80static void *ppir_node_create_dest(ppir_block *block, ppir_op op, 81 nir_dest *dest, unsigned mask) 82{ 83 unsigned index = -1; 84 85 if (dest) { 86 if (dest->is_ssa) 87 return ppir_node_create_ssa(block, op, &dest->ssa); 88 else 89 return ppir_node_create_reg(block, op, dest->reg.reg, mask); 90 } 91 92 return ppir_node_create(block, op, index, 0); 93} 94 95static void ppir_node_add_src(ppir_compiler *comp, ppir_node *node, 96 ppir_src *ps, nir_src *ns, unsigned mask) 97{ 98 ppir_node *child = NULL; 99 100 if (ns->is_ssa) { 101 child = comp->var_nodes[ns->ssa->index]; 102 if (child->op != ppir_op_undef) 103 ppir_node_add_dep(node, child, ppir_dep_src); 104 } 105 else { 106 nir_register *reg = ns->reg.reg; 107 while (mask) { 108 int swizzle = ps->swizzle[u_bit_scan(&mask)]; 109 child = comp->var_nodes[(reg->index << 2) + comp->reg_base + swizzle]; 110 /* Reg is read before it was written, create a dummy node for it */ 111 if (!child) { 112 child = ppir_node_create_reg(node->block, ppir_op_dummy, reg, 113 u_bit_consecutive(0, 4)); 114 comp->var_nodes[(reg->index << 2) + comp->reg_base + swizzle] = child; 115 } 116 /* Don't add dummies or recursive deps for ops like r1 = r1 + ssa1 */ 117 if (child && node != child && child->op != ppir_op_dummy) 118 ppir_node_add_dep(node, child, ppir_dep_src); 119 } 120 } 121 122 assert(child); 123 ppir_node_target_assign(ps, child); 124} 125 126static int nir_to_ppir_opcodes[nir_num_opcodes] = { 127 [nir_op_mov] = ppir_op_mov, 128 [nir_op_fmul] = ppir_op_mul, 129 [nir_op_fabs] = ppir_op_abs, 130 [nir_op_fneg] = ppir_op_neg, 131 [nir_op_fadd] = ppir_op_add, 132 [nir_op_fsum3] = ppir_op_sum3, 133 [nir_op_fsum4] = ppir_op_sum4, 134 [nir_op_frsq] = ppir_op_rsqrt, 135 [nir_op_flog2] = ppir_op_log2, 136 [nir_op_fexp2] = ppir_op_exp2, 137 [nir_op_fsqrt] = ppir_op_sqrt, 138 [nir_op_fsin] = ppir_op_sin, 139 [nir_op_fcos] = ppir_op_cos, 140 [nir_op_fmax] = ppir_op_max, 141 [nir_op_fmin] = ppir_op_min, 142 [nir_op_frcp] = ppir_op_rcp, 143 [nir_op_ffloor] = ppir_op_floor, 144 [nir_op_fceil] = ppir_op_ceil, 145 [nir_op_ffract] = ppir_op_fract, 146 [nir_op_sge] = ppir_op_ge, 147 [nir_op_slt] = ppir_op_lt, 148 [nir_op_seq] = ppir_op_eq, 149 [nir_op_sne] = ppir_op_ne, 150 [nir_op_fcsel] = ppir_op_select, 151 [nir_op_inot] = ppir_op_not, 152 [nir_op_ftrunc] = ppir_op_trunc, 153 [nir_op_fsat] = ppir_op_sat, 154 [nir_op_fddx] = ppir_op_ddx, 155 [nir_op_fddy] = ppir_op_ddy, 156}; 157 158static bool ppir_emit_alu(ppir_block *block, nir_instr *ni) 159{ 160 nir_alu_instr *instr = nir_instr_as_alu(ni); 161 int op = nir_to_ppir_opcodes[instr->op]; 162 163 if (op == ppir_op_unsupported) { 164 ppir_error("unsupported nir_op: %s\n", nir_op_infos[instr->op].name); 165 return false; 166 } 167 168 ppir_alu_node *node = ppir_node_create_dest(block, op, &instr->dest.dest, 169 instr->dest.write_mask); 170 if (!node) 171 return false; 172 173 ppir_dest *pd = &node->dest; 174 nir_alu_dest *nd = &instr->dest; 175 if (nd->saturate) 176 pd->modifier = ppir_outmod_clamp_fraction; 177 178 unsigned src_mask; 179 switch (op) { 180 case ppir_op_sum3: 181 src_mask = 0b0111; 182 break; 183 case ppir_op_sum4: 184 src_mask = 0b1111; 185 break; 186 default: 187 src_mask = pd->write_mask; 188 break; 189 } 190 191 unsigned num_child = nir_op_infos[instr->op].num_inputs; 192 node->num_src = num_child; 193 194 for (int i = 0; i < num_child; i++) { 195 nir_alu_src *ns = instr->src + i; 196 ppir_src *ps = node->src + i; 197 memcpy(ps->swizzle, ns->swizzle, sizeof(ps->swizzle)); 198 ppir_node_add_src(block->comp, &node->node, ps, &ns->src, src_mask); 199 200 ps->absolute = ns->abs; 201 ps->negate = ns->negate; 202 } 203 204 list_addtail(&node->node.list, &block->node_list); 205 return true; 206} 207 208static ppir_block *ppir_block_create(ppir_compiler *comp); 209 210static bool ppir_emit_discard_block(ppir_compiler *comp) 211{ 212 ppir_block *block = ppir_block_create(comp); 213 ppir_discard_node *discard; 214 if (!block) 215 return false; 216 217 comp->discard_block = block; 218 block->comp = comp; 219 220 discard = ppir_node_create(block, ppir_op_discard, -1, 0); 221 if (discard) 222 list_addtail(&discard->node.list, &block->node_list); 223 else 224 return false; 225 226 return true; 227} 228 229static ppir_node *ppir_emit_discard_if(ppir_block *block, nir_instr *ni) 230{ 231 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni); 232 ppir_node *node; 233 ppir_compiler *comp = block->comp; 234 ppir_branch_node *branch; 235 236 if (!comp->discard_block && !ppir_emit_discard_block(comp)) 237 return NULL; 238 239 node = ppir_node_create(block, ppir_op_branch, -1, 0); 240 if (!node) 241 return NULL; 242 branch = ppir_node_to_branch(node); 243 244 /* second src and condition will be updated during lowering */ 245 ppir_node_add_src(block->comp, node, &branch->src[0], 246 &instr->src[0], u_bit_consecutive(0, instr->num_components)); 247 branch->num_src = 1; 248 branch->target = comp->discard_block; 249 250 return node; 251} 252 253static ppir_node *ppir_emit_discard(ppir_block *block, nir_instr *ni) 254{ 255 ppir_node *node = ppir_node_create(block, ppir_op_discard, -1, 0); 256 257 return node; 258} 259 260static bool ppir_emit_intrinsic(ppir_block *block, nir_instr *ni) 261{ 262 ppir_node *node; 263 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni); 264 unsigned mask = 0; 265 ppir_load_node *lnode; 266 ppir_alu_node *alu_node; 267 268 switch (instr->intrinsic) { 269 case nir_intrinsic_load_input: 270 if (!instr->dest.is_ssa) 271 mask = u_bit_consecutive(0, instr->num_components); 272 273 lnode = ppir_node_create_dest(block, ppir_op_load_varying, &instr->dest, mask); 274 if (!lnode) 275 return false; 276 277 lnode->num_components = instr->num_components; 278 lnode->index = nir_intrinsic_base(instr) * 4 + nir_intrinsic_component(instr); 279 if (nir_src_is_const(instr->src[0])) 280 lnode->index += (uint32_t)(nir_src_as_float(instr->src[0]) * 4); 281 else { 282 lnode->num_src = 1; 283 ppir_node_add_src(block->comp, &lnode->node, &lnode->src, instr->src, 1); 284 } 285 list_addtail(&lnode->node.list, &block->node_list); 286 return true; 287 288 case nir_intrinsic_load_frag_coord: 289 case nir_intrinsic_load_point_coord: 290 case nir_intrinsic_load_front_face: 291 if (!instr->dest.is_ssa) 292 mask = u_bit_consecutive(0, instr->num_components); 293 294 ppir_op op; 295 switch (instr->intrinsic) { 296 case nir_intrinsic_load_frag_coord: 297 op = ppir_op_load_fragcoord; 298 break; 299 case nir_intrinsic_load_point_coord: 300 op = ppir_op_load_pointcoord; 301 break; 302 case nir_intrinsic_load_front_face: 303 op = ppir_op_load_frontface; 304 break; 305 default: 306 unreachable("bad intrinsic"); 307 break; 308 } 309 310 lnode = ppir_node_create_dest(block, op, &instr->dest, mask); 311 if (!lnode) 312 return false; 313 314 lnode->num_components = instr->num_components; 315 list_addtail(&lnode->node.list, &block->node_list); 316 return true; 317 318 case nir_intrinsic_load_uniform: 319 if (!instr->dest.is_ssa) 320 mask = u_bit_consecutive(0, instr->num_components); 321 322 lnode = ppir_node_create_dest(block, ppir_op_load_uniform, &instr->dest, mask); 323 if (!lnode) 324 return false; 325 326 lnode->num_components = instr->num_components; 327 lnode->index = nir_intrinsic_base(instr); 328 if (nir_src_is_const(instr->src[0])) 329 lnode->index += (uint32_t)nir_src_as_float(instr->src[0]); 330 else { 331 lnode->num_src = 1; 332 ppir_node_add_src(block->comp, &lnode->node, &lnode->src, instr->src, 1); 333 } 334 335 list_addtail(&lnode->node.list, &block->node_list); 336 return true; 337 338 case nir_intrinsic_store_output: { 339 /* In simple cases where the store_output is ssa, that register 340 * can be directly marked as the output. 341 * If discard is used or the source is not ssa, things can get a 342 * lot more complicated, so don't try to optimize those and fall 343 * back to inserting a mov at the end. 344 * If the source node will only be able to output to pipeline 345 * registers, fall back to the mov as well. */ 346 assert(nir_src_is_const(instr->src[1]) && 347 "lima doesn't support indirect outputs"); 348 349 nir_io_semantics io = nir_intrinsic_io_semantics(instr); 350 unsigned offset = nir_src_as_uint(instr->src[1]); 351 unsigned slot = io.location + offset; 352 ppir_output_type out_type = ppir_nir_output_to_ppir(slot, 353 block->comp->dual_source_blend ? io.dual_source_blend_index : 0); 354 if (out_type == ppir_output_invalid) { 355 ppir_debug("Unsupported output type: %d\n", slot); 356 return false; 357 } 358 359 if (!block->comp->uses_discard && instr->src->is_ssa) { 360 node = block->comp->var_nodes[instr->src->ssa->index]; 361 switch (node->op) { 362 case ppir_op_load_uniform: 363 case ppir_op_load_texture: 364 case ppir_op_const: 365 break; 366 default: { 367 ppir_dest *dest = ppir_node_get_dest(node); 368 dest->ssa.out_type = out_type; 369 node->is_out = 1; 370 return true; 371 } 372 } 373 } 374 375 alu_node = ppir_node_create_dest(block, ppir_op_mov, NULL, 0); 376 if (!alu_node) 377 return false; 378 379 ppir_dest *dest = ppir_node_get_dest(&alu_node->node); 380 dest->type = ppir_target_ssa; 381 dest->ssa.num_components = instr->num_components; 382 dest->ssa.index = 0; 383 dest->write_mask = u_bit_consecutive(0, instr->num_components); 384 dest->ssa.out_type = out_type; 385 386 alu_node->num_src = 1; 387 388 for (int i = 0; i < instr->num_components; i++) 389 alu_node->src[0].swizzle[i] = i; 390 391 ppir_node_add_src(block->comp, &alu_node->node, alu_node->src, instr->src, 392 u_bit_consecutive(0, instr->num_components)); 393 394 alu_node->node.is_out = 1; 395 396 list_addtail(&alu_node->node.list, &block->node_list); 397 return true; 398 } 399 400 case nir_intrinsic_discard: 401 node = ppir_emit_discard(block, ni); 402 list_addtail(&node->list, &block->node_list); 403 return true; 404 405 case nir_intrinsic_discard_if: 406 node = ppir_emit_discard_if(block, ni); 407 list_addtail(&node->list, &block->node_list); 408 return true; 409 410 default: 411 ppir_error("unsupported nir_intrinsic_instr %s\n", 412 nir_intrinsic_infos[instr->intrinsic].name); 413 return false; 414 } 415} 416 417static bool ppir_emit_load_const(ppir_block *block, nir_instr *ni) 418{ 419 nir_load_const_instr *instr = nir_instr_as_load_const(ni); 420 ppir_const_node *node = ppir_node_create_ssa(block, ppir_op_const, &instr->def); 421 if (!node) 422 return false; 423 424 assert(instr->def.bit_size == 32); 425 426 for (int i = 0; i < instr->def.num_components; i++) 427 node->constant.value[i].i = instr->value[i].i32; 428 node->constant.num = instr->def.num_components; 429 430 list_addtail(&node->node.list, &block->node_list); 431 return true; 432} 433 434static bool ppir_emit_ssa_undef(ppir_block *block, nir_instr *ni) 435{ 436 nir_ssa_undef_instr *undef = nir_instr_as_ssa_undef(ni); 437 ppir_node *node = ppir_node_create_ssa(block, ppir_op_undef, &undef->def); 438 if (!node) 439 return false; 440 ppir_alu_node *alu = ppir_node_to_alu(node); 441 442 ppir_dest *dest = &alu->dest; 443 dest->ssa.undef = true; 444 445 list_addtail(&node->list, &block->node_list); 446 return true; 447} 448 449static bool ppir_emit_tex(ppir_block *block, nir_instr *ni) 450{ 451 nir_tex_instr *instr = nir_instr_as_tex(ni); 452 ppir_load_texture_node *node; 453 454 switch (instr->op) { 455 case nir_texop_tex: 456 case nir_texop_txb: 457 case nir_texop_txl: 458 break; 459 default: 460 ppir_error("unsupported texop %d\n", instr->op); 461 return false; 462 } 463 464 switch (instr->sampler_dim) { 465 case GLSL_SAMPLER_DIM_1D: 466 case GLSL_SAMPLER_DIM_2D: 467 case GLSL_SAMPLER_DIM_3D: 468 case GLSL_SAMPLER_DIM_CUBE: 469 case GLSL_SAMPLER_DIM_RECT: 470 case GLSL_SAMPLER_DIM_EXTERNAL: 471 break; 472 default: 473 ppir_error("unsupported sampler dim: %d\n", instr->sampler_dim); 474 return false; 475 } 476 477 /* emit ld_tex node */ 478 479 unsigned mask = 0; 480 if (!instr->dest.is_ssa) 481 mask = u_bit_consecutive(0, nir_tex_instr_dest_size(instr)); 482 483 node = ppir_node_create_dest(block, ppir_op_load_texture, &instr->dest, mask); 484 if (!node) 485 return false; 486 487 node->sampler = instr->texture_index; 488 node->sampler_dim = instr->sampler_dim; 489 490 for (int i = 0; i < instr->coord_components; i++) 491 node->src[0].swizzle[i] = i; 492 493 bool perspective = false; 494 495 for (int i = 0; i < instr->num_srcs; i++) { 496 switch (instr->src[i].src_type) { 497 case nir_tex_src_backend1: 498 perspective = true; 499 FALLTHROUGH; 500 case nir_tex_src_coord: { 501 nir_src *ns = &instr->src[i].src; 502 if (ns->is_ssa) { 503 ppir_node *child = block->comp->var_nodes[ns->ssa->index]; 504 if (child->op == ppir_op_load_varying) { 505 /* If the successor is load_texture, promote it to load_coords */ 506 nir_tex_src *nts = (nir_tex_src *)ns; 507 if (nts->src_type == nir_tex_src_coord || 508 nts->src_type == nir_tex_src_backend1) 509 child->op = ppir_op_load_coords; 510 } 511 } 512 513 /* src[0] is not used by the ld_tex instruction but ensures 514 * correct scheduling due to the pipeline dependency */ 515 ppir_node_add_src(block->comp, &node->node, &node->src[0], &instr->src[i].src, 516 u_bit_consecutive(0, instr->coord_components)); 517 node->num_src++; 518 break; 519 } 520 case nir_tex_src_bias: 521 case nir_tex_src_lod: 522 node->lod_bias_en = true; 523 node->explicit_lod = (instr->src[i].src_type == nir_tex_src_lod); 524 ppir_node_add_src(block->comp, &node->node, &node->src[1], &instr->src[i].src, 1); 525 node->num_src++; 526 break; 527 default: 528 ppir_error("unsupported texture source type\n"); 529 return false; 530 } 531 } 532 533 list_addtail(&node->node.list, &block->node_list); 534 535 /* validate load coords node */ 536 537 ppir_node *src_coords = ppir_node_get_src(&node->node, 0)->node; 538 ppir_load_node *load = NULL; 539 540 if (src_coords && ppir_node_has_single_src_succ(src_coords) && 541 (src_coords->op == ppir_op_load_coords)) 542 load = ppir_node_to_load(src_coords); 543 else { 544 /* Create load_coords node */ 545 load = ppir_node_create(block, ppir_op_load_coords_reg, -1, 0); 546 if (!load) 547 return false; 548 list_addtail(&load->node.list, &block->node_list); 549 550 load->src = node->src[0]; 551 load->num_src = 1; 552 load->num_components = instr->coord_components; 553 554 ppir_debug("%s create load_coords node %d for %d\n", 555 __FUNCTION__, load->index, node->node.index); 556 557 ppir_node_foreach_pred_safe((&node->node), dep) { 558 ppir_node *pred = dep->pred; 559 ppir_node_remove_dep(dep); 560 ppir_node_add_dep(&load->node, pred, ppir_dep_src); 561 } 562 ppir_node_add_dep(&node->node, &load->node, ppir_dep_src); 563 } 564 565 assert(load); 566 567 if (perspective) { 568 if (instr->coord_components == 3) 569 load->perspective = ppir_perspective_z; 570 else 571 load->perspective = ppir_perspective_w; 572 } 573 574 load->sampler_dim = instr->sampler_dim; 575 node->src[0].type = load->dest.type = ppir_target_pipeline; 576 node->src[0].pipeline = load->dest.pipeline = ppir_pipeline_reg_discard; 577 578 return true; 579} 580 581static ppir_block *ppir_get_block(ppir_compiler *comp, nir_block *nblock) 582{ 583 ppir_block *block = _mesa_hash_table_u64_search(comp->blocks, (uintptr_t)nblock); 584 585 return block; 586} 587 588static bool ppir_emit_jump(ppir_block *block, nir_instr *ni) 589{ 590 ppir_node *node; 591 ppir_compiler *comp = block->comp; 592 ppir_branch_node *branch; 593 ppir_block *jump_block; 594 nir_jump_instr *jump = nir_instr_as_jump(ni); 595 596 switch (jump->type) { 597 case nir_jump_break: { 598 assert(comp->current_block->successors[0]); 599 assert(!comp->current_block->successors[1]); 600 jump_block = comp->current_block->successors[0]; 601 } 602 break; 603 case nir_jump_continue: 604 jump_block = comp->loop_cont_block; 605 break; 606 default: 607 ppir_error("nir_jump_instr not support\n"); 608 return false; 609 } 610 611 assert(jump_block != NULL); 612 613 node = ppir_node_create(block, ppir_op_branch, -1, 0); 614 if (!node) 615 return false; 616 branch = ppir_node_to_branch(node); 617 618 /* Unconditional */ 619 branch->num_src = 0; 620 branch->target = jump_block; 621 622 list_addtail(&node->list, &block->node_list); 623 return true; 624} 625 626static bool (*ppir_emit_instr[nir_instr_type_phi])(ppir_block *, nir_instr *) = { 627 [nir_instr_type_alu] = ppir_emit_alu, 628 [nir_instr_type_intrinsic] = ppir_emit_intrinsic, 629 [nir_instr_type_load_const] = ppir_emit_load_const, 630 [nir_instr_type_ssa_undef] = ppir_emit_ssa_undef, 631 [nir_instr_type_tex] = ppir_emit_tex, 632 [nir_instr_type_jump] = ppir_emit_jump, 633}; 634 635static ppir_block *ppir_block_create(ppir_compiler *comp) 636{ 637 ppir_block *block = rzalloc(comp, ppir_block); 638 if (!block) 639 return NULL; 640 641 list_inithead(&block->node_list); 642 list_inithead(&block->instr_list); 643 644 block->comp = comp; 645 646 return block; 647} 648 649static bool ppir_emit_block(ppir_compiler *comp, nir_block *nblock) 650{ 651 ppir_block *block = ppir_get_block(comp, nblock); 652 653 comp->current_block = block; 654 655 list_addtail(&block->list, &comp->block_list); 656 657 nir_foreach_instr(instr, nblock) { 658 assert(instr->type < nir_instr_type_phi); 659 if (!ppir_emit_instr[instr->type](block, instr)) 660 return false; 661 } 662 663 return true; 664} 665 666static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list); 667 668static bool ppir_emit_if(ppir_compiler *comp, nir_if *if_stmt) 669{ 670 ppir_node *node; 671 ppir_branch_node *else_branch, *after_branch; 672 nir_block *nir_else_block = nir_if_first_else_block(if_stmt); 673 bool empty_else_block = 674 (nir_else_block == nir_if_last_else_block(if_stmt) && 675 exec_list_is_empty(&nir_else_block->instr_list)); 676 ppir_block *block = comp->current_block; 677 678 node = ppir_node_create(block, ppir_op_branch, -1, 0); 679 if (!node) 680 return false; 681 else_branch = ppir_node_to_branch(node); 682 ppir_node_add_src(block->comp, node, &else_branch->src[0], 683 &if_stmt->condition, 1); 684 else_branch->num_src = 1; 685 /* Negate condition to minimize branching. We're generating following: 686 * current_block: { ...; if (!statement) branch else_block; } 687 * then_block: { ...; branch after_block; } 688 * else_block: { ... } 689 * after_block: { ... } 690 * 691 * or if else list is empty: 692 * block: { if (!statement) branch else_block; } 693 * then_block: { ... } 694 * else_block: after_block: { ... } 695 */ 696 else_branch->negate = true; 697 list_addtail(&else_branch->node.list, &block->node_list); 698 699 if (!ppir_emit_cf_list(comp, &if_stmt->then_list)) 700 return false; 701 702 if (empty_else_block) { 703 nir_block *nblock = nir_if_last_else_block(if_stmt); 704 assert(nblock->successors[0]); 705 assert(!nblock->successors[1]); 706 else_branch->target = ppir_get_block(comp, nblock->successors[0]); 707 /* Add empty else block to the list */ 708 list_addtail(&block->successors[1]->list, &comp->block_list); 709 return true; 710 } 711 712 else_branch->target = ppir_get_block(comp, nir_if_first_else_block(if_stmt)); 713 714 nir_block *last_then_block = nir_if_last_then_block(if_stmt); 715 assert(last_then_block->successors[0]); 716 assert(!last_then_block->successors[1]); 717 block = ppir_get_block(comp, last_then_block); 718 node = ppir_node_create(block, ppir_op_branch, -1, 0); 719 if (!node) 720 return false; 721 after_branch = ppir_node_to_branch(node); 722 /* Unconditional */ 723 after_branch->num_src = 0; 724 after_branch->target = ppir_get_block(comp, last_then_block->successors[0]); 725 /* Target should be after_block, will fixup later */ 726 list_addtail(&after_branch->node.list, &block->node_list); 727 728 if (!ppir_emit_cf_list(comp, &if_stmt->else_list)) 729 return false; 730 731 return true; 732} 733 734static bool ppir_emit_loop(ppir_compiler *comp, nir_loop *nloop) 735{ 736 ppir_block *save_loop_cont_block = comp->loop_cont_block; 737 ppir_block *block; 738 ppir_branch_node *loop_branch; 739 nir_block *loop_last_block; 740 ppir_node *node; 741 742 comp->loop_cont_block = ppir_get_block(comp, nir_loop_first_block(nloop)); 743 744 if (!ppir_emit_cf_list(comp, &nloop->body)) 745 return false; 746 747 loop_last_block = nir_loop_last_block(nloop); 748 block = ppir_get_block(comp, loop_last_block); 749 node = ppir_node_create(block, ppir_op_branch, -1, 0); 750 if (!node) 751 return false; 752 loop_branch = ppir_node_to_branch(node); 753 /* Unconditional */ 754 loop_branch->num_src = 0; 755 loop_branch->target = comp->loop_cont_block; 756 list_addtail(&loop_branch->node.list, &block->node_list); 757 758 comp->loop_cont_block = save_loop_cont_block; 759 760 comp->num_loops++; 761 762 return true; 763} 764 765static bool ppir_emit_function(ppir_compiler *comp, nir_function_impl *nfunc) 766{ 767 ppir_error("function nir_cf_node not support\n"); 768 return false; 769} 770 771static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list) 772{ 773 foreach_list_typed(nir_cf_node, node, node, list) { 774 bool ret; 775 776 switch (node->type) { 777 case nir_cf_node_block: 778 ret = ppir_emit_block(comp, nir_cf_node_as_block(node)); 779 break; 780 case nir_cf_node_if: 781 ret = ppir_emit_if(comp, nir_cf_node_as_if(node)); 782 break; 783 case nir_cf_node_loop: 784 ret = ppir_emit_loop(comp, nir_cf_node_as_loop(node)); 785 break; 786 case nir_cf_node_function: 787 ret = ppir_emit_function(comp, nir_cf_node_as_function(node)); 788 break; 789 default: 790 ppir_error("unknown NIR node type %d\n", node->type); 791 return false; 792 } 793 794 if (!ret) 795 return false; 796 } 797 798 return true; 799} 800 801static ppir_compiler *ppir_compiler_create(void *prog, unsigned num_reg, unsigned num_ssa) 802{ 803 ppir_compiler *comp = rzalloc_size( 804 prog, sizeof(*comp) + ((num_reg << 2) + num_ssa) * sizeof(ppir_node *)); 805 if (!comp) 806 return NULL; 807 808 list_inithead(&comp->block_list); 809 list_inithead(&comp->reg_list); 810 comp->reg_num = 0; 811 comp->blocks = _mesa_hash_table_u64_create(prog); 812 813 comp->var_nodes = (ppir_node **)(comp + 1); 814 comp->reg_base = num_ssa; 815 comp->prog = prog; 816 817 return comp; 818} 819 820static void ppir_add_ordering_deps(ppir_compiler *comp) 821{ 822 /* Some intrinsics do not have explicit dependencies and thus depend 823 * on instructions order. Consider discard_if and the is_end node as 824 * example. If we don't add fake dependency of discard_if to is_end, 825 * scheduler may put the is_end first and since is_end terminates 826 * shader on Utgard PP, rest of it will never be executed. 827 * Add fake dependencies for discard/branch/store to preserve 828 * instruction order. 829 * 830 * TODO: scheduler should schedule discard_if as early as possible otherwise 831 * we may end up with suboptimal code for cases like this: 832 * 833 * s3 = s1 < s2 834 * discard_if s3 835 * s4 = s1 + s2 836 * store s4 837 * 838 * In this case store depends on discard_if and s4, but since dependencies can 839 * be scheduled in any order it can result in code like this: 840 * 841 * instr1: s3 = s1 < s3 842 * instr2: s4 = s1 + s2 843 * instr3: discard_if s3 844 * instr4: store s4 845 */ 846 list_for_each_entry(ppir_block, block, &comp->block_list, list) { 847 ppir_node *prev_node = NULL; 848 list_for_each_entry_rev(ppir_node, node, &block->node_list, list) { 849 if (prev_node && ppir_node_is_root(node) && node->op != ppir_op_const) { 850 ppir_node_add_dep(prev_node, node, ppir_dep_sequence); 851 } 852 if (node->is_out || 853 node->op == ppir_op_discard || 854 node->op == ppir_op_store_temp || 855 node->op == ppir_op_branch) { 856 prev_node = node; 857 } 858 } 859 } 860} 861 862static void ppir_print_shader_db(struct nir_shader *nir, ppir_compiler *comp, 863 struct util_debug_callback *debug) 864{ 865 const struct shader_info *info = &nir->info; 866 char *shaderdb; 867 ASSERTED int ret = asprintf(&shaderdb, 868 "%s shader: %d inst, %d loops, %d:%d spills:fills\n", 869 gl_shader_stage_name(info->stage), 870 comp->cur_instr_index, 871 comp->num_loops, 872 comp->num_spills, 873 comp->num_fills); 874 assert(ret >= 0); 875 876 if (lima_debug & LIMA_DEBUG_SHADERDB) 877 fprintf(stderr, "SHADER-DB: %s\n", shaderdb); 878 879 util_debug_message(debug, SHADER_INFO, "%s", shaderdb); 880 free(shaderdb); 881} 882 883static void ppir_add_write_after_read_deps(ppir_compiler *comp) 884{ 885 list_for_each_entry(ppir_block, block, &comp->block_list, list) { 886 list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) { 887 ppir_node *write = NULL; 888 list_for_each_entry_rev(ppir_node, node, &block->node_list, list) { 889 for (int i = 0; i < ppir_node_get_src_num(node); i++) { 890 ppir_src *src = ppir_node_get_src(node, i); 891 if (src && src->type == ppir_target_register && 892 src->reg == reg && 893 write) { 894 ppir_debug("Adding dep %d for write %d\n", node->index, write->index); 895 ppir_node_add_dep(write, node, ppir_dep_write_after_read); 896 } 897 } 898 ppir_dest *dest = ppir_node_get_dest(node); 899 if (dest && dest->type == ppir_target_register && 900 dest->reg == reg) 901 write = node; 902 } 903 } 904 } 905} 906 907bool ppir_compile_nir(struct lima_fs_compiled_shader *prog, struct nir_shader *nir, 908 struct ra_regs *ra, 909 struct util_debug_callback *debug) 910{ 911 nir_function_impl *func = nir_shader_get_entrypoint(nir); 912 ppir_compiler *comp = ppir_compiler_create(prog, func->reg_alloc, func->ssa_alloc); 913 if (!comp) 914 return false; 915 916 comp->ra = ra; 917 comp->uses_discard = nir->info.fs.uses_discard; 918 comp->dual_source_blend = nir->info.fs.color_is_dual_source; 919 920 /* 1st pass: create ppir blocks */ 921 nir_foreach_function(function, nir) { 922 if (!function->impl) 923 continue; 924 925 nir_foreach_block(nblock, function->impl) { 926 ppir_block *block = ppir_block_create(comp); 927 if (!block) 928 return false; 929 block->index = nblock->index; 930 _mesa_hash_table_u64_insert(comp->blocks, (uintptr_t)nblock, block); 931 } 932 } 933 934 /* 2nd pass: populate successors */ 935 nir_foreach_function(function, nir) { 936 if (!function->impl) 937 continue; 938 939 nir_foreach_block(nblock, function->impl) { 940 ppir_block *block = ppir_get_block(comp, nblock); 941 assert(block); 942 943 for (int i = 0; i < 2; i++) { 944 if (nblock->successors[i]) 945 block->successors[i] = ppir_get_block(comp, nblock->successors[i]); 946 } 947 } 948 } 949 950 comp->out_type_to_reg = rzalloc_size(comp, sizeof(int) * ppir_output_num); 951 952 /* -1 means reg is not written by the shader */ 953 for (int i = 0; i < ppir_output_num; i++) 954 comp->out_type_to_reg[i] = -1; 955 956 foreach_list_typed(nir_register, reg, node, &func->registers) { 957 ppir_reg *r = rzalloc(comp, ppir_reg); 958 if (!r) 959 return false; 960 961 r->index = reg->index; 962 r->num_components = reg->num_components; 963 r->is_head = false; 964 list_addtail(&r->list, &comp->reg_list); 965 comp->reg_num++; 966 } 967 968 if (!ppir_emit_cf_list(comp, &func->body)) 969 goto err_out0; 970 971 /* If we have discard block add it to the very end */ 972 if (comp->discard_block) 973 list_addtail(&comp->discard_block->list, &comp->block_list); 974 975 ppir_node_print_prog(comp); 976 977 if (!ppir_lower_prog(comp)) 978 goto err_out0; 979 980 ppir_add_ordering_deps(comp); 981 ppir_add_write_after_read_deps(comp); 982 983 ppir_node_print_prog(comp); 984 985 if (!ppir_node_to_instr(comp)) 986 goto err_out0; 987 988 if (!ppir_schedule_prog(comp)) 989 goto err_out0; 990 991 if (!ppir_regalloc_prog(comp)) 992 goto err_out0; 993 994 if (!ppir_codegen_prog(comp)) 995 goto err_out0; 996 997 ppir_print_shader_db(nir, comp, debug); 998 999 _mesa_hash_table_u64_destroy(comp->blocks); 1000 ralloc_free(comp); 1001 return true; 1002 1003err_out0: 1004 _mesa_hash_table_u64_destroy(comp->blocks); 1005 ralloc_free(comp); 1006 return false; 1007} 1008 1009