1/* 2 * Copyright © 2014 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Jason Ekstrand (jason@jlekstrand.net) 25 * 26 */ 27 28#include "nir.h" 29#include "nir/nir_builder.h" 30#include "nir_control_flow.h" 31#include "nir_search_helpers.h" 32 33/* 34 * Implements a small peephole optimization that looks for 35 * 36 * if (cond) { 37 * <then SSA defs> 38 * } else { 39 * <else SSA defs> 40 * } 41 * phi 42 * ... 43 * phi 44 * 45 * and replaces it with: 46 * 47 * <then SSA defs> 48 * <else SSA defs> 49 * bcsel 50 * ... 51 * bcsel 52 * 53 * where the SSA defs are ALU operations or other cheap instructions (not 54 * texturing, for example). 55 * 56 * If the number of ALU operations in the branches is greater than the limit 57 * parameter, then the optimization is skipped. In limit=0 mode, the SSA defs 58 * must only be MOVs which we expect to get copy-propagated away once they're 59 * out of the inner blocks. 60 */ 61 62static bool 63block_check_for_allowed_instrs(nir_block *block, unsigned *count, 64 unsigned limit, bool indirect_load_ok, 65 bool expensive_alu_ok) 66{ 67 bool alu_ok = limit != 0; 68 69 /* Used on non-control-flow HW to flatten all IFs. */ 70 if (limit == ~0) { 71 nir_foreach_instr(instr, block) { 72 switch (instr->type) { 73 case nir_instr_type_alu: 74 case nir_instr_type_deref: 75 case nir_instr_type_load_const: 76 case nir_instr_type_phi: 77 case nir_instr_type_ssa_undef: 78 case nir_instr_type_tex: 79 break; 80 81 case nir_instr_type_intrinsic: 82 if (!nir_intrinsic_can_reorder(nir_instr_as_intrinsic(instr))) 83 return false; 84 break; 85 86 case nir_instr_type_call: 87 case nir_instr_type_jump: 88 case nir_instr_type_parallel_copy: 89 return false; 90 } 91 } 92 return true; 93 } 94 95 nir_foreach_instr(instr, block) { 96 switch (instr->type) { 97 case nir_instr_type_intrinsic: { 98 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 99 100 switch (intrin->intrinsic) { 101 case nir_intrinsic_load_deref: { 102 nir_deref_instr *const deref = nir_src_as_deref(intrin->src[0]); 103 104 switch (deref->modes) { 105 case nir_var_shader_in: 106 case nir_var_uniform: 107 case nir_var_image: 108 /* Don't try to remove flow control around an indirect load 109 * because that flow control may be trying to avoid invalid 110 * loads. 111 */ 112 if (!indirect_load_ok && nir_deref_instr_has_indirect(deref)) 113 return false; 114 115 break; 116 117 default: 118 return false; 119 } 120 break; 121 } 122 123 case nir_intrinsic_load_uniform: 124 case nir_intrinsic_load_helper_invocation: 125 case nir_intrinsic_is_helper_invocation: 126 case nir_intrinsic_load_front_face: 127 case nir_intrinsic_load_view_index: 128 case nir_intrinsic_load_layer_id: 129 case nir_intrinsic_load_frag_coord: 130 case nir_intrinsic_load_sample_pos: 131 case nir_intrinsic_load_sample_pos_or_center: 132 case nir_intrinsic_load_sample_id: 133 case nir_intrinsic_load_sample_mask_in: 134 case nir_intrinsic_load_vertex_id_zero_base: 135 case nir_intrinsic_load_first_vertex: 136 case nir_intrinsic_load_base_instance: 137 case nir_intrinsic_load_instance_id: 138 case nir_intrinsic_load_draw_id: 139 case nir_intrinsic_load_num_workgroups: 140 case nir_intrinsic_load_workgroup_id: 141 case nir_intrinsic_load_local_invocation_id: 142 case nir_intrinsic_load_local_invocation_index: 143 case nir_intrinsic_load_subgroup_id: 144 case nir_intrinsic_load_subgroup_invocation: 145 case nir_intrinsic_load_num_subgroups: 146 case nir_intrinsic_load_frag_shading_rate: 147 case nir_intrinsic_is_sparse_texels_resident: 148 case nir_intrinsic_sparse_residency_code_and: 149 if (!alu_ok) 150 return false; 151 break; 152 153 default: 154 return false; 155 } 156 157 break; 158 } 159 160 case nir_instr_type_deref: 161 case nir_instr_type_load_const: 162 case nir_instr_type_ssa_undef: 163 break; 164 165 case nir_instr_type_alu: { 166 nir_alu_instr *mov = nir_instr_as_alu(instr); 167 bool movelike = false; 168 169 switch (mov->op) { 170 case nir_op_mov: 171 case nir_op_fneg: 172 case nir_op_ineg: 173 case nir_op_fabs: 174 case nir_op_iabs: 175 case nir_op_vec2: 176 case nir_op_vec3: 177 case nir_op_vec4: 178 case nir_op_vec5: 179 case nir_op_vec8: 180 case nir_op_vec16: 181 movelike = true; 182 break; 183 184 case nir_op_fcos: 185 case nir_op_fdiv: 186 case nir_op_fexp2: 187 case nir_op_flog2: 188 case nir_op_fmod: 189 case nir_op_fpow: 190 case nir_op_frcp: 191 case nir_op_frem: 192 case nir_op_frsq: 193 case nir_op_fsin: 194 case nir_op_idiv: 195 case nir_op_irem: 196 case nir_op_udiv: 197 if (!alu_ok || !expensive_alu_ok) 198 return false; 199 200 break; 201 202 default: 203 if (!alu_ok) { 204 /* It must be a move-like operation. */ 205 return false; 206 } 207 break; 208 } 209 210 /* It must be SSA */ 211 if (!mov->dest.dest.is_ssa) 212 return false; 213 214 if (alu_ok) { 215 /* If the ALU operation is an fsat or a move-like operation, do 216 * not count it. The expectation is that it will eventually be 217 * merged as a destination modifier or source modifier on some 218 * other instruction. 219 */ 220 if (mov->op != nir_op_fsat && !movelike) 221 (*count)++; 222 } else { 223 /* Can't handle saturate */ 224 if (mov->dest.saturate) 225 return false; 226 227 /* It cannot have any if-uses */ 228 if (!list_is_empty(&mov->dest.dest.ssa.if_uses)) 229 return false; 230 231 /* The only uses of this definition must be phis in the successor */ 232 nir_foreach_use(use, &mov->dest.dest.ssa) { 233 if (use->parent_instr->type != nir_instr_type_phi || 234 use->parent_instr->block != block->successors[0]) 235 return false; 236 } 237 } 238 break; 239 } 240 241 default: 242 return false; 243 } 244 } 245 246 return true; 247} 248 249/** 250 * Try to collapse nested ifs: 251 * This optimization turns 252 * 253 * if (cond1) { 254 * <allowed instruction> 255 * if (cond2) { 256 * <any code> 257 * } else { 258 * } 259 * } else { 260 * } 261 * 262 * into 263 * 264 * <allowed instruction> 265 * if (cond1 && cond2) { 266 * <any code> 267 * } else { 268 * } 269 * 270 */ 271static bool 272nir_opt_collapse_if(nir_if *if_stmt, nir_shader *shader, unsigned limit, 273 bool indirect_load_ok, bool expensive_alu_ok) 274{ 275 /* the if has to be nested */ 276 if (if_stmt->cf_node.parent->type != nir_cf_node_if) 277 return false; 278 279 nir_if *parent_if = nir_cf_node_as_if(if_stmt->cf_node.parent); 280 if (parent_if->control == nir_selection_control_dont_flatten) 281 return false; 282 283 /* check if the else block is empty */ 284 if (!nir_cf_list_is_empty_block(&if_stmt->else_list)) 285 return false; 286 287 /* this opt doesn't make much sense if the branch is empty */ 288 if (nir_cf_list_is_empty_block(&if_stmt->then_list)) 289 return false; 290 291 /* the nested if has to be the only cf_node: 292 * i.e. <block> <if_stmt> <block> */ 293 if (exec_list_length(&parent_if->then_list) != 3) 294 return false; 295 296 /* check if the else block of the parent if is empty */ 297 if (!nir_cf_list_is_empty_block(&parent_if->else_list)) 298 return false; 299 300 /* check if the block after the nested if is empty except for phis */ 301 nir_block *last = nir_if_last_then_block(parent_if); 302 nir_instr *last_instr = nir_block_last_instr(last); 303 if (last_instr && last_instr->type != nir_instr_type_phi) 304 return false; 305 306 /* check if all outer phis become trivial after merging the ifs */ 307 nir_foreach_instr(instr, last) { 308 if (parent_if->control == nir_selection_control_flatten) 309 break; 310 311 nir_phi_instr *phi = nir_instr_as_phi(instr); 312 nir_phi_src *else_src = 313 nir_phi_get_src_from_block(phi, nir_if_first_else_block(if_stmt)); 314 315 nir_foreach_use (src, &phi->dest.ssa) { 316 assert(src->parent_instr->type == nir_instr_type_phi); 317 nir_phi_src *phi_src = 318 nir_phi_get_src_from_block(nir_instr_as_phi(src->parent_instr), 319 nir_if_first_else_block(parent_if)); 320 if (phi_src->src.ssa != else_src->src.ssa) 321 return false; 322 } 323 } 324 325 if (parent_if->control == nir_selection_control_flatten) { 326 /* Override driver defaults */ 327 indirect_load_ok = true; 328 expensive_alu_ok = true; 329 } 330 331 /* check if the block before the nested if matches the requirements */ 332 nir_block *first = nir_if_first_then_block(parent_if); 333 unsigned count = 0; 334 if (!block_check_for_allowed_instrs(first, &count, limit != 0, 335 indirect_load_ok, expensive_alu_ok)) 336 return false; 337 338 if (count > limit && parent_if->control != nir_selection_control_flatten) 339 return false; 340 341 /* trivialize succeeding phis */ 342 nir_foreach_instr(instr, last) { 343 nir_phi_instr *phi = nir_instr_as_phi(instr); 344 nir_phi_src *else_src = 345 nir_phi_get_src_from_block(phi, nir_if_first_else_block(if_stmt)); 346 nir_foreach_use_safe(src, &phi->dest.ssa) { 347 nir_phi_src *phi_src = 348 nir_phi_get_src_from_block(nir_instr_as_phi(src->parent_instr), 349 nir_if_first_else_block(parent_if)); 350 if (phi_src->src.ssa == else_src->src.ssa) 351 nir_instr_rewrite_src(src->parent_instr, &phi_src->src, 352 nir_src_for_ssa(&phi->dest.ssa)); 353 } 354 } 355 356 /* combine the conditions */ 357 struct nir_builder b; 358 nir_builder_init(&b, nir_cf_node_get_function(&if_stmt->cf_node)->function->impl); 359 b.cursor = nir_before_cf_node(&if_stmt->cf_node); 360 nir_ssa_def *cond = nir_iand(&b, if_stmt->condition.ssa, 361 parent_if->condition.ssa); 362 nir_if_rewrite_condition(if_stmt, nir_src_for_ssa(cond)); 363 364 /* move the whole inner if before the parent if */ 365 nir_cf_list tmp; 366 nir_cf_extract(&tmp, nir_before_block(first), 367 nir_after_block(last)); 368 nir_cf_reinsert(&tmp, nir_before_cf_node(&parent_if->cf_node)); 369 370 /* The now empty parent if will be cleaned up by other passes */ 371 return true; 372} 373 374static bool 375nir_opt_peephole_select_block(nir_block *block, nir_shader *shader, 376 unsigned limit, bool indirect_load_ok, 377 bool expensive_alu_ok) 378{ 379 if (nir_cf_node_is_first(&block->cf_node)) 380 return false; 381 382 nir_cf_node *prev_node = nir_cf_node_prev(&block->cf_node); 383 if (prev_node->type != nir_cf_node_if) 384 return false; 385 386 nir_block *prev_block = nir_cf_node_as_block(nir_cf_node_prev(prev_node)); 387 388 /* If the last instruction before this if/else block is a jump, we can't 389 * append stuff after it because it would break a bunch of assumption about 390 * control flow (nir_validate expects the successor of a return/halt jump 391 * to be the end of the function, which might not match the successor of 392 * the if/else blocks). 393 */ 394 if (nir_block_ends_in_return_or_halt(prev_block)) 395 return false; 396 397 nir_if *if_stmt = nir_cf_node_as_if(prev_node); 398 399 /* first, try to collapse the if */ 400 if (nir_opt_collapse_if(if_stmt, shader, limit, 401 indirect_load_ok, expensive_alu_ok)) 402 return true; 403 404 if (if_stmt->control == nir_selection_control_dont_flatten) 405 return false; 406 407 nir_block *then_block = nir_if_first_then_block(if_stmt); 408 nir_block *else_block = nir_if_first_else_block(if_stmt); 409 410 /* We can only have one block in each side ... */ 411 if (nir_if_last_then_block(if_stmt) != then_block || 412 nir_if_last_else_block(if_stmt) != else_block) 413 return false; 414 415 if (if_stmt->control == nir_selection_control_flatten) { 416 /* Override driver defaults */ 417 indirect_load_ok = true; 418 expensive_alu_ok = true; 419 } 420 421 /* ... and those blocks must only contain "allowed" instructions. */ 422 unsigned count = 0; 423 if (!block_check_for_allowed_instrs(then_block, &count, limit, 424 indirect_load_ok, expensive_alu_ok) || 425 !block_check_for_allowed_instrs(else_block, &count, limit, 426 indirect_load_ok, expensive_alu_ok)) 427 return false; 428 429 if (count > limit && if_stmt->control != nir_selection_control_flatten) 430 return false; 431 432 /* At this point, we know that the previous CFG node is an if-then 433 * statement containing only moves to phi nodes in this block. We can 434 * just remove that entire CF node and replace all of the phi nodes with 435 * selects. 436 */ 437 438 /* First, we move the remaining instructions from the blocks to the 439 * block before. We have already guaranteed that this is safe by 440 * calling block_check_for_allowed_instrs() 441 */ 442 nir_foreach_instr_safe(instr, then_block) { 443 exec_node_remove(&instr->node); 444 instr->block = prev_block; 445 exec_list_push_tail(&prev_block->instr_list, &instr->node); 446 } 447 448 nir_foreach_instr_safe(instr, else_block) { 449 exec_node_remove(&instr->node); 450 instr->block = prev_block; 451 exec_list_push_tail(&prev_block->instr_list, &instr->node); 452 } 453 454 nir_foreach_instr_safe(instr, block) { 455 if (instr->type != nir_instr_type_phi) 456 break; 457 458 nir_phi_instr *phi = nir_instr_as_phi(instr); 459 nir_alu_instr *sel = nir_alu_instr_create(shader, nir_op_bcsel); 460 nir_src_copy(&sel->src[0].src, &if_stmt->condition); 461 /* Splat the condition to all channels */ 462 memset(sel->src[0].swizzle, 0, sizeof sel->src[0].swizzle); 463 464 assert(exec_list_length(&phi->srcs) == 2); 465 nir_foreach_phi_src(src, phi) { 466 assert(src->pred == then_block || src->pred == else_block); 467 assert(src->src.is_ssa); 468 469 unsigned idx = src->pred == then_block ? 1 : 2; 470 nir_src_copy(&sel->src[idx].src, &src->src); 471 } 472 473 nir_ssa_dest_init(&sel->instr, &sel->dest.dest, 474 phi->dest.ssa.num_components, 475 phi->dest.ssa.bit_size, NULL); 476 sel->dest.write_mask = (1 << phi->dest.ssa.num_components) - 1; 477 478 nir_ssa_def_rewrite_uses(&phi->dest.ssa, 479 &sel->dest.dest.ssa); 480 481 nir_instr_insert_before(&phi->instr, &sel->instr); 482 nir_instr_remove(&phi->instr); 483 } 484 485 nir_cf_node_remove(&if_stmt->cf_node); 486 return true; 487} 488 489static bool 490nir_opt_peephole_select_impl(nir_function_impl *impl, unsigned limit, 491 bool indirect_load_ok, bool expensive_alu_ok) 492{ 493 nir_shader *shader = impl->function->shader; 494 bool progress = false; 495 496 nir_foreach_block_safe(block, impl) { 497 progress |= nir_opt_peephole_select_block(block, shader, limit, 498 indirect_load_ok, 499 expensive_alu_ok); 500 } 501 502 if (progress) { 503 nir_metadata_preserve(impl, nir_metadata_none); 504 } else { 505 nir_metadata_preserve(impl, nir_metadata_all); 506 } 507 508 return progress; 509} 510 511bool 512nir_opt_peephole_select(nir_shader *shader, unsigned limit, 513 bool indirect_load_ok, bool expensive_alu_ok) 514{ 515 bool progress = false; 516 517 nir_foreach_function(function, shader) { 518 if (function->impl) 519 progress |= nir_opt_peephole_select_impl(function->impl, limit, 520 indirect_load_ok, 521 expensive_alu_ok); 522 } 523 524 return progress; 525} 526