1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2014 Intel Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci * 23bf215546Sopenharmony_ci * Authors: 24bf215546Sopenharmony_ci * Jason Ekstrand (jason@jlekstrand.net) 25bf215546Sopenharmony_ci * 26bf215546Sopenharmony_ci */ 27bf215546Sopenharmony_ci 28bf215546Sopenharmony_ci#include "nir.h" 29bf215546Sopenharmony_ci#include "nir/nir_builder.h" 30bf215546Sopenharmony_ci#include "nir_control_flow.h" 31bf215546Sopenharmony_ci#include "nir_search_helpers.h" 32bf215546Sopenharmony_ci 33bf215546Sopenharmony_ci/* 34bf215546Sopenharmony_ci * Implements a small peephole optimization that looks for 35bf215546Sopenharmony_ci * 36bf215546Sopenharmony_ci * if (cond) { 37bf215546Sopenharmony_ci * <then SSA defs> 38bf215546Sopenharmony_ci * } else { 39bf215546Sopenharmony_ci * <else SSA defs> 40bf215546Sopenharmony_ci * } 41bf215546Sopenharmony_ci * phi 42bf215546Sopenharmony_ci * ... 43bf215546Sopenharmony_ci * phi 44bf215546Sopenharmony_ci * 45bf215546Sopenharmony_ci * and replaces it with: 46bf215546Sopenharmony_ci * 47bf215546Sopenharmony_ci * <then SSA defs> 48bf215546Sopenharmony_ci * <else SSA defs> 49bf215546Sopenharmony_ci * bcsel 50bf215546Sopenharmony_ci * ... 51bf215546Sopenharmony_ci * bcsel 52bf215546Sopenharmony_ci * 53bf215546Sopenharmony_ci * where the SSA defs are ALU operations or other cheap instructions (not 54bf215546Sopenharmony_ci * texturing, for example). 55bf215546Sopenharmony_ci * 56bf215546Sopenharmony_ci * If the number of ALU operations in the branches is greater than the limit 57bf215546Sopenharmony_ci * parameter, then the optimization is skipped. In limit=0 mode, the SSA defs 58bf215546Sopenharmony_ci * must only be MOVs which we expect to get copy-propagated away once they're 59bf215546Sopenharmony_ci * out of the inner blocks. 60bf215546Sopenharmony_ci */ 61bf215546Sopenharmony_ci 62bf215546Sopenharmony_cistatic bool 63bf215546Sopenharmony_ciblock_check_for_allowed_instrs(nir_block *block, unsigned *count, 64bf215546Sopenharmony_ci unsigned limit, bool indirect_load_ok, 65bf215546Sopenharmony_ci bool expensive_alu_ok) 66bf215546Sopenharmony_ci{ 67bf215546Sopenharmony_ci bool alu_ok = limit != 0; 68bf215546Sopenharmony_ci 69bf215546Sopenharmony_ci /* Used on non-control-flow HW to flatten all IFs. */ 70bf215546Sopenharmony_ci if (limit == ~0) { 71bf215546Sopenharmony_ci nir_foreach_instr(instr, block) { 72bf215546Sopenharmony_ci switch (instr->type) { 73bf215546Sopenharmony_ci case nir_instr_type_alu: 74bf215546Sopenharmony_ci case nir_instr_type_deref: 75bf215546Sopenharmony_ci case nir_instr_type_load_const: 76bf215546Sopenharmony_ci case nir_instr_type_phi: 77bf215546Sopenharmony_ci case nir_instr_type_ssa_undef: 78bf215546Sopenharmony_ci case nir_instr_type_tex: 79bf215546Sopenharmony_ci break; 80bf215546Sopenharmony_ci 81bf215546Sopenharmony_ci case nir_instr_type_intrinsic: 82bf215546Sopenharmony_ci if (!nir_intrinsic_can_reorder(nir_instr_as_intrinsic(instr))) 83bf215546Sopenharmony_ci return false; 84bf215546Sopenharmony_ci break; 85bf215546Sopenharmony_ci 86bf215546Sopenharmony_ci case nir_instr_type_call: 87bf215546Sopenharmony_ci case nir_instr_type_jump: 88bf215546Sopenharmony_ci case nir_instr_type_parallel_copy: 89bf215546Sopenharmony_ci return false; 90bf215546Sopenharmony_ci } 91bf215546Sopenharmony_ci } 92bf215546Sopenharmony_ci return true; 93bf215546Sopenharmony_ci } 94bf215546Sopenharmony_ci 95bf215546Sopenharmony_ci nir_foreach_instr(instr, block) { 96bf215546Sopenharmony_ci switch (instr->type) { 97bf215546Sopenharmony_ci case nir_instr_type_intrinsic: { 98bf215546Sopenharmony_ci nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 99bf215546Sopenharmony_ci 100bf215546Sopenharmony_ci switch (intrin->intrinsic) { 101bf215546Sopenharmony_ci case nir_intrinsic_load_deref: { 102bf215546Sopenharmony_ci nir_deref_instr *const deref = nir_src_as_deref(intrin->src[0]); 103bf215546Sopenharmony_ci 104bf215546Sopenharmony_ci switch (deref->modes) { 105bf215546Sopenharmony_ci case nir_var_shader_in: 106bf215546Sopenharmony_ci case nir_var_uniform: 107bf215546Sopenharmony_ci case nir_var_image: 108bf215546Sopenharmony_ci /* Don't try to remove flow control around an indirect load 109bf215546Sopenharmony_ci * because that flow control may be trying to avoid invalid 110bf215546Sopenharmony_ci * loads. 111bf215546Sopenharmony_ci */ 112bf215546Sopenharmony_ci if (!indirect_load_ok && nir_deref_instr_has_indirect(deref)) 113bf215546Sopenharmony_ci return false; 114bf215546Sopenharmony_ci 115bf215546Sopenharmony_ci break; 116bf215546Sopenharmony_ci 117bf215546Sopenharmony_ci default: 118bf215546Sopenharmony_ci return false; 119bf215546Sopenharmony_ci } 120bf215546Sopenharmony_ci break; 121bf215546Sopenharmony_ci } 122bf215546Sopenharmony_ci 123bf215546Sopenharmony_ci case nir_intrinsic_load_uniform: 124bf215546Sopenharmony_ci case nir_intrinsic_load_helper_invocation: 125bf215546Sopenharmony_ci case nir_intrinsic_is_helper_invocation: 126bf215546Sopenharmony_ci case nir_intrinsic_load_front_face: 127bf215546Sopenharmony_ci case nir_intrinsic_load_view_index: 128bf215546Sopenharmony_ci case nir_intrinsic_load_layer_id: 129bf215546Sopenharmony_ci case nir_intrinsic_load_frag_coord: 130bf215546Sopenharmony_ci case nir_intrinsic_load_sample_pos: 131bf215546Sopenharmony_ci case nir_intrinsic_load_sample_pos_or_center: 132bf215546Sopenharmony_ci case nir_intrinsic_load_sample_id: 133bf215546Sopenharmony_ci case nir_intrinsic_load_sample_mask_in: 134bf215546Sopenharmony_ci case nir_intrinsic_load_vertex_id_zero_base: 135bf215546Sopenharmony_ci case nir_intrinsic_load_first_vertex: 136bf215546Sopenharmony_ci case nir_intrinsic_load_base_instance: 137bf215546Sopenharmony_ci case nir_intrinsic_load_instance_id: 138bf215546Sopenharmony_ci case nir_intrinsic_load_draw_id: 139bf215546Sopenharmony_ci case nir_intrinsic_load_num_workgroups: 140bf215546Sopenharmony_ci case nir_intrinsic_load_workgroup_id: 141bf215546Sopenharmony_ci case nir_intrinsic_load_local_invocation_id: 142bf215546Sopenharmony_ci case nir_intrinsic_load_local_invocation_index: 143bf215546Sopenharmony_ci case nir_intrinsic_load_subgroup_id: 144bf215546Sopenharmony_ci case nir_intrinsic_load_subgroup_invocation: 145bf215546Sopenharmony_ci case nir_intrinsic_load_num_subgroups: 146bf215546Sopenharmony_ci case nir_intrinsic_load_frag_shading_rate: 147bf215546Sopenharmony_ci case nir_intrinsic_is_sparse_texels_resident: 148bf215546Sopenharmony_ci case nir_intrinsic_sparse_residency_code_and: 149bf215546Sopenharmony_ci if (!alu_ok) 150bf215546Sopenharmony_ci return false; 151bf215546Sopenharmony_ci break; 152bf215546Sopenharmony_ci 153bf215546Sopenharmony_ci default: 154bf215546Sopenharmony_ci return false; 155bf215546Sopenharmony_ci } 156bf215546Sopenharmony_ci 157bf215546Sopenharmony_ci break; 158bf215546Sopenharmony_ci } 159bf215546Sopenharmony_ci 160bf215546Sopenharmony_ci case nir_instr_type_deref: 161bf215546Sopenharmony_ci case nir_instr_type_load_const: 162bf215546Sopenharmony_ci case nir_instr_type_ssa_undef: 163bf215546Sopenharmony_ci break; 164bf215546Sopenharmony_ci 165bf215546Sopenharmony_ci case nir_instr_type_alu: { 166bf215546Sopenharmony_ci nir_alu_instr *mov = nir_instr_as_alu(instr); 167bf215546Sopenharmony_ci bool movelike = false; 168bf215546Sopenharmony_ci 169bf215546Sopenharmony_ci switch (mov->op) { 170bf215546Sopenharmony_ci case nir_op_mov: 171bf215546Sopenharmony_ci case nir_op_fneg: 172bf215546Sopenharmony_ci case nir_op_ineg: 173bf215546Sopenharmony_ci case nir_op_fabs: 174bf215546Sopenharmony_ci case nir_op_iabs: 175bf215546Sopenharmony_ci case nir_op_vec2: 176bf215546Sopenharmony_ci case nir_op_vec3: 177bf215546Sopenharmony_ci case nir_op_vec4: 178bf215546Sopenharmony_ci case nir_op_vec5: 179bf215546Sopenharmony_ci case nir_op_vec8: 180bf215546Sopenharmony_ci case nir_op_vec16: 181bf215546Sopenharmony_ci movelike = true; 182bf215546Sopenharmony_ci break; 183bf215546Sopenharmony_ci 184bf215546Sopenharmony_ci case nir_op_fcos: 185bf215546Sopenharmony_ci case nir_op_fdiv: 186bf215546Sopenharmony_ci case nir_op_fexp2: 187bf215546Sopenharmony_ci case nir_op_flog2: 188bf215546Sopenharmony_ci case nir_op_fmod: 189bf215546Sopenharmony_ci case nir_op_fpow: 190bf215546Sopenharmony_ci case nir_op_frcp: 191bf215546Sopenharmony_ci case nir_op_frem: 192bf215546Sopenharmony_ci case nir_op_frsq: 193bf215546Sopenharmony_ci case nir_op_fsin: 194bf215546Sopenharmony_ci case nir_op_idiv: 195bf215546Sopenharmony_ci case nir_op_irem: 196bf215546Sopenharmony_ci case nir_op_udiv: 197bf215546Sopenharmony_ci if (!alu_ok || !expensive_alu_ok) 198bf215546Sopenharmony_ci return false; 199bf215546Sopenharmony_ci 200bf215546Sopenharmony_ci break; 201bf215546Sopenharmony_ci 202bf215546Sopenharmony_ci default: 203bf215546Sopenharmony_ci if (!alu_ok) { 204bf215546Sopenharmony_ci /* It must be a move-like operation. */ 205bf215546Sopenharmony_ci return false; 206bf215546Sopenharmony_ci } 207bf215546Sopenharmony_ci break; 208bf215546Sopenharmony_ci } 209bf215546Sopenharmony_ci 210bf215546Sopenharmony_ci /* It must be SSA */ 211bf215546Sopenharmony_ci if (!mov->dest.dest.is_ssa) 212bf215546Sopenharmony_ci return false; 213bf215546Sopenharmony_ci 214bf215546Sopenharmony_ci if (alu_ok) { 215bf215546Sopenharmony_ci /* If the ALU operation is an fsat or a move-like operation, do 216bf215546Sopenharmony_ci * not count it. The expectation is that it will eventually be 217bf215546Sopenharmony_ci * merged as a destination modifier or source modifier on some 218bf215546Sopenharmony_ci * other instruction. 219bf215546Sopenharmony_ci */ 220bf215546Sopenharmony_ci if (mov->op != nir_op_fsat && !movelike) 221bf215546Sopenharmony_ci (*count)++; 222bf215546Sopenharmony_ci } else { 223bf215546Sopenharmony_ci /* Can't handle saturate */ 224bf215546Sopenharmony_ci if (mov->dest.saturate) 225bf215546Sopenharmony_ci return false; 226bf215546Sopenharmony_ci 227bf215546Sopenharmony_ci /* It cannot have any if-uses */ 228bf215546Sopenharmony_ci if (!list_is_empty(&mov->dest.dest.ssa.if_uses)) 229bf215546Sopenharmony_ci return false; 230bf215546Sopenharmony_ci 231bf215546Sopenharmony_ci /* The only uses of this definition must be phis in the successor */ 232bf215546Sopenharmony_ci nir_foreach_use(use, &mov->dest.dest.ssa) { 233bf215546Sopenharmony_ci if (use->parent_instr->type != nir_instr_type_phi || 234bf215546Sopenharmony_ci use->parent_instr->block != block->successors[0]) 235bf215546Sopenharmony_ci return false; 236bf215546Sopenharmony_ci } 237bf215546Sopenharmony_ci } 238bf215546Sopenharmony_ci break; 239bf215546Sopenharmony_ci } 240bf215546Sopenharmony_ci 241bf215546Sopenharmony_ci default: 242bf215546Sopenharmony_ci return false; 243bf215546Sopenharmony_ci } 244bf215546Sopenharmony_ci } 245bf215546Sopenharmony_ci 246bf215546Sopenharmony_ci return true; 247bf215546Sopenharmony_ci} 248bf215546Sopenharmony_ci 249bf215546Sopenharmony_ci/** 250bf215546Sopenharmony_ci * Try to collapse nested ifs: 251bf215546Sopenharmony_ci * This optimization turns 252bf215546Sopenharmony_ci * 253bf215546Sopenharmony_ci * if (cond1) { 254bf215546Sopenharmony_ci * <allowed instruction> 255bf215546Sopenharmony_ci * if (cond2) { 256bf215546Sopenharmony_ci * <any code> 257bf215546Sopenharmony_ci * } else { 258bf215546Sopenharmony_ci * } 259bf215546Sopenharmony_ci * } else { 260bf215546Sopenharmony_ci * } 261bf215546Sopenharmony_ci * 262bf215546Sopenharmony_ci * into 263bf215546Sopenharmony_ci * 264bf215546Sopenharmony_ci * <allowed instruction> 265bf215546Sopenharmony_ci * if (cond1 && cond2) { 266bf215546Sopenharmony_ci * <any code> 267bf215546Sopenharmony_ci * } else { 268bf215546Sopenharmony_ci * } 269bf215546Sopenharmony_ci * 270bf215546Sopenharmony_ci */ 271bf215546Sopenharmony_cistatic bool 272bf215546Sopenharmony_cinir_opt_collapse_if(nir_if *if_stmt, nir_shader *shader, unsigned limit, 273bf215546Sopenharmony_ci bool indirect_load_ok, bool expensive_alu_ok) 274bf215546Sopenharmony_ci{ 275bf215546Sopenharmony_ci /* the if has to be nested */ 276bf215546Sopenharmony_ci if (if_stmt->cf_node.parent->type != nir_cf_node_if) 277bf215546Sopenharmony_ci return false; 278bf215546Sopenharmony_ci 279bf215546Sopenharmony_ci nir_if *parent_if = nir_cf_node_as_if(if_stmt->cf_node.parent); 280bf215546Sopenharmony_ci if (parent_if->control == nir_selection_control_dont_flatten) 281bf215546Sopenharmony_ci return false; 282bf215546Sopenharmony_ci 283bf215546Sopenharmony_ci /* check if the else block is empty */ 284bf215546Sopenharmony_ci if (!nir_cf_list_is_empty_block(&if_stmt->else_list)) 285bf215546Sopenharmony_ci return false; 286bf215546Sopenharmony_ci 287bf215546Sopenharmony_ci /* this opt doesn't make much sense if the branch is empty */ 288bf215546Sopenharmony_ci if (nir_cf_list_is_empty_block(&if_stmt->then_list)) 289bf215546Sopenharmony_ci return false; 290bf215546Sopenharmony_ci 291bf215546Sopenharmony_ci /* the nested if has to be the only cf_node: 292bf215546Sopenharmony_ci * i.e. <block> <if_stmt> <block> */ 293bf215546Sopenharmony_ci if (exec_list_length(&parent_if->then_list) != 3) 294bf215546Sopenharmony_ci return false; 295bf215546Sopenharmony_ci 296bf215546Sopenharmony_ci /* check if the else block of the parent if is empty */ 297bf215546Sopenharmony_ci if (!nir_cf_list_is_empty_block(&parent_if->else_list)) 298bf215546Sopenharmony_ci return false; 299bf215546Sopenharmony_ci 300bf215546Sopenharmony_ci /* check if the block after the nested if is empty except for phis */ 301bf215546Sopenharmony_ci nir_block *last = nir_if_last_then_block(parent_if); 302bf215546Sopenharmony_ci nir_instr *last_instr = nir_block_last_instr(last); 303bf215546Sopenharmony_ci if (last_instr && last_instr->type != nir_instr_type_phi) 304bf215546Sopenharmony_ci return false; 305bf215546Sopenharmony_ci 306bf215546Sopenharmony_ci /* check if all outer phis become trivial after merging the ifs */ 307bf215546Sopenharmony_ci nir_foreach_instr(instr, last) { 308bf215546Sopenharmony_ci if (parent_if->control == nir_selection_control_flatten) 309bf215546Sopenharmony_ci break; 310bf215546Sopenharmony_ci 311bf215546Sopenharmony_ci nir_phi_instr *phi = nir_instr_as_phi(instr); 312bf215546Sopenharmony_ci nir_phi_src *else_src = 313bf215546Sopenharmony_ci nir_phi_get_src_from_block(phi, nir_if_first_else_block(if_stmt)); 314bf215546Sopenharmony_ci 315bf215546Sopenharmony_ci nir_foreach_use (src, &phi->dest.ssa) { 316bf215546Sopenharmony_ci assert(src->parent_instr->type == nir_instr_type_phi); 317bf215546Sopenharmony_ci nir_phi_src *phi_src = 318bf215546Sopenharmony_ci nir_phi_get_src_from_block(nir_instr_as_phi(src->parent_instr), 319bf215546Sopenharmony_ci nir_if_first_else_block(parent_if)); 320bf215546Sopenharmony_ci if (phi_src->src.ssa != else_src->src.ssa) 321bf215546Sopenharmony_ci return false; 322bf215546Sopenharmony_ci } 323bf215546Sopenharmony_ci } 324bf215546Sopenharmony_ci 325bf215546Sopenharmony_ci if (parent_if->control == nir_selection_control_flatten) { 326bf215546Sopenharmony_ci /* Override driver defaults */ 327bf215546Sopenharmony_ci indirect_load_ok = true; 328bf215546Sopenharmony_ci expensive_alu_ok = true; 329bf215546Sopenharmony_ci } 330bf215546Sopenharmony_ci 331bf215546Sopenharmony_ci /* check if the block before the nested if matches the requirements */ 332bf215546Sopenharmony_ci nir_block *first = nir_if_first_then_block(parent_if); 333bf215546Sopenharmony_ci unsigned count = 0; 334bf215546Sopenharmony_ci if (!block_check_for_allowed_instrs(first, &count, limit != 0, 335bf215546Sopenharmony_ci indirect_load_ok, expensive_alu_ok)) 336bf215546Sopenharmony_ci return false; 337bf215546Sopenharmony_ci 338bf215546Sopenharmony_ci if (count > limit && parent_if->control != nir_selection_control_flatten) 339bf215546Sopenharmony_ci return false; 340bf215546Sopenharmony_ci 341bf215546Sopenharmony_ci /* trivialize succeeding phis */ 342bf215546Sopenharmony_ci nir_foreach_instr(instr, last) { 343bf215546Sopenharmony_ci nir_phi_instr *phi = nir_instr_as_phi(instr); 344bf215546Sopenharmony_ci nir_phi_src *else_src = 345bf215546Sopenharmony_ci nir_phi_get_src_from_block(phi, nir_if_first_else_block(if_stmt)); 346bf215546Sopenharmony_ci nir_foreach_use_safe(src, &phi->dest.ssa) { 347bf215546Sopenharmony_ci nir_phi_src *phi_src = 348bf215546Sopenharmony_ci nir_phi_get_src_from_block(nir_instr_as_phi(src->parent_instr), 349bf215546Sopenharmony_ci nir_if_first_else_block(parent_if)); 350bf215546Sopenharmony_ci if (phi_src->src.ssa == else_src->src.ssa) 351bf215546Sopenharmony_ci nir_instr_rewrite_src(src->parent_instr, &phi_src->src, 352bf215546Sopenharmony_ci nir_src_for_ssa(&phi->dest.ssa)); 353bf215546Sopenharmony_ci } 354bf215546Sopenharmony_ci } 355bf215546Sopenharmony_ci 356bf215546Sopenharmony_ci /* combine the conditions */ 357bf215546Sopenharmony_ci struct nir_builder b; 358bf215546Sopenharmony_ci nir_builder_init(&b, nir_cf_node_get_function(&if_stmt->cf_node)->function->impl); 359bf215546Sopenharmony_ci b.cursor = nir_before_cf_node(&if_stmt->cf_node); 360bf215546Sopenharmony_ci nir_ssa_def *cond = nir_iand(&b, if_stmt->condition.ssa, 361bf215546Sopenharmony_ci parent_if->condition.ssa); 362bf215546Sopenharmony_ci nir_if_rewrite_condition(if_stmt, nir_src_for_ssa(cond)); 363bf215546Sopenharmony_ci 364bf215546Sopenharmony_ci /* move the whole inner if before the parent if */ 365bf215546Sopenharmony_ci nir_cf_list tmp; 366bf215546Sopenharmony_ci nir_cf_extract(&tmp, nir_before_block(first), 367bf215546Sopenharmony_ci nir_after_block(last)); 368bf215546Sopenharmony_ci nir_cf_reinsert(&tmp, nir_before_cf_node(&parent_if->cf_node)); 369bf215546Sopenharmony_ci 370bf215546Sopenharmony_ci /* The now empty parent if will be cleaned up by other passes */ 371bf215546Sopenharmony_ci return true; 372bf215546Sopenharmony_ci} 373bf215546Sopenharmony_ci 374bf215546Sopenharmony_cistatic bool 375bf215546Sopenharmony_cinir_opt_peephole_select_block(nir_block *block, nir_shader *shader, 376bf215546Sopenharmony_ci unsigned limit, bool indirect_load_ok, 377bf215546Sopenharmony_ci bool expensive_alu_ok) 378bf215546Sopenharmony_ci{ 379bf215546Sopenharmony_ci if (nir_cf_node_is_first(&block->cf_node)) 380bf215546Sopenharmony_ci return false; 381bf215546Sopenharmony_ci 382bf215546Sopenharmony_ci nir_cf_node *prev_node = nir_cf_node_prev(&block->cf_node); 383bf215546Sopenharmony_ci if (prev_node->type != nir_cf_node_if) 384bf215546Sopenharmony_ci return false; 385bf215546Sopenharmony_ci 386bf215546Sopenharmony_ci nir_block *prev_block = nir_cf_node_as_block(nir_cf_node_prev(prev_node)); 387bf215546Sopenharmony_ci 388bf215546Sopenharmony_ci /* If the last instruction before this if/else block is a jump, we can't 389bf215546Sopenharmony_ci * append stuff after it because it would break a bunch of assumption about 390bf215546Sopenharmony_ci * control flow (nir_validate expects the successor of a return/halt jump 391bf215546Sopenharmony_ci * to be the end of the function, which might not match the successor of 392bf215546Sopenharmony_ci * the if/else blocks). 393bf215546Sopenharmony_ci */ 394bf215546Sopenharmony_ci if (nir_block_ends_in_return_or_halt(prev_block)) 395bf215546Sopenharmony_ci return false; 396bf215546Sopenharmony_ci 397bf215546Sopenharmony_ci nir_if *if_stmt = nir_cf_node_as_if(prev_node); 398bf215546Sopenharmony_ci 399bf215546Sopenharmony_ci /* first, try to collapse the if */ 400bf215546Sopenharmony_ci if (nir_opt_collapse_if(if_stmt, shader, limit, 401bf215546Sopenharmony_ci indirect_load_ok, expensive_alu_ok)) 402bf215546Sopenharmony_ci return true; 403bf215546Sopenharmony_ci 404bf215546Sopenharmony_ci if (if_stmt->control == nir_selection_control_dont_flatten) 405bf215546Sopenharmony_ci return false; 406bf215546Sopenharmony_ci 407bf215546Sopenharmony_ci nir_block *then_block = nir_if_first_then_block(if_stmt); 408bf215546Sopenharmony_ci nir_block *else_block = nir_if_first_else_block(if_stmt); 409bf215546Sopenharmony_ci 410bf215546Sopenharmony_ci /* We can only have one block in each side ... */ 411bf215546Sopenharmony_ci if (nir_if_last_then_block(if_stmt) != then_block || 412bf215546Sopenharmony_ci nir_if_last_else_block(if_stmt) != else_block) 413bf215546Sopenharmony_ci return false; 414bf215546Sopenharmony_ci 415bf215546Sopenharmony_ci if (if_stmt->control == nir_selection_control_flatten) { 416bf215546Sopenharmony_ci /* Override driver defaults */ 417bf215546Sopenharmony_ci indirect_load_ok = true; 418bf215546Sopenharmony_ci expensive_alu_ok = true; 419bf215546Sopenharmony_ci } 420bf215546Sopenharmony_ci 421bf215546Sopenharmony_ci /* ... and those blocks must only contain "allowed" instructions. */ 422bf215546Sopenharmony_ci unsigned count = 0; 423bf215546Sopenharmony_ci if (!block_check_for_allowed_instrs(then_block, &count, limit, 424bf215546Sopenharmony_ci indirect_load_ok, expensive_alu_ok) || 425bf215546Sopenharmony_ci !block_check_for_allowed_instrs(else_block, &count, limit, 426bf215546Sopenharmony_ci indirect_load_ok, expensive_alu_ok)) 427bf215546Sopenharmony_ci return false; 428bf215546Sopenharmony_ci 429bf215546Sopenharmony_ci if (count > limit && if_stmt->control != nir_selection_control_flatten) 430bf215546Sopenharmony_ci return false; 431bf215546Sopenharmony_ci 432bf215546Sopenharmony_ci /* At this point, we know that the previous CFG node is an if-then 433bf215546Sopenharmony_ci * statement containing only moves to phi nodes in this block. We can 434bf215546Sopenharmony_ci * just remove that entire CF node and replace all of the phi nodes with 435bf215546Sopenharmony_ci * selects. 436bf215546Sopenharmony_ci */ 437bf215546Sopenharmony_ci 438bf215546Sopenharmony_ci /* First, we move the remaining instructions from the blocks to the 439bf215546Sopenharmony_ci * block before. We have already guaranteed that this is safe by 440bf215546Sopenharmony_ci * calling block_check_for_allowed_instrs() 441bf215546Sopenharmony_ci */ 442bf215546Sopenharmony_ci nir_foreach_instr_safe(instr, then_block) { 443bf215546Sopenharmony_ci exec_node_remove(&instr->node); 444bf215546Sopenharmony_ci instr->block = prev_block; 445bf215546Sopenharmony_ci exec_list_push_tail(&prev_block->instr_list, &instr->node); 446bf215546Sopenharmony_ci } 447bf215546Sopenharmony_ci 448bf215546Sopenharmony_ci nir_foreach_instr_safe(instr, else_block) { 449bf215546Sopenharmony_ci exec_node_remove(&instr->node); 450bf215546Sopenharmony_ci instr->block = prev_block; 451bf215546Sopenharmony_ci exec_list_push_tail(&prev_block->instr_list, &instr->node); 452bf215546Sopenharmony_ci } 453bf215546Sopenharmony_ci 454bf215546Sopenharmony_ci nir_foreach_instr_safe(instr, block) { 455bf215546Sopenharmony_ci if (instr->type != nir_instr_type_phi) 456bf215546Sopenharmony_ci break; 457bf215546Sopenharmony_ci 458bf215546Sopenharmony_ci nir_phi_instr *phi = nir_instr_as_phi(instr); 459bf215546Sopenharmony_ci nir_alu_instr *sel = nir_alu_instr_create(shader, nir_op_bcsel); 460bf215546Sopenharmony_ci nir_src_copy(&sel->src[0].src, &if_stmt->condition); 461bf215546Sopenharmony_ci /* Splat the condition to all channels */ 462bf215546Sopenharmony_ci memset(sel->src[0].swizzle, 0, sizeof sel->src[0].swizzle); 463bf215546Sopenharmony_ci 464bf215546Sopenharmony_ci assert(exec_list_length(&phi->srcs) == 2); 465bf215546Sopenharmony_ci nir_foreach_phi_src(src, phi) { 466bf215546Sopenharmony_ci assert(src->pred == then_block || src->pred == else_block); 467bf215546Sopenharmony_ci assert(src->src.is_ssa); 468bf215546Sopenharmony_ci 469bf215546Sopenharmony_ci unsigned idx = src->pred == then_block ? 1 : 2; 470bf215546Sopenharmony_ci nir_src_copy(&sel->src[idx].src, &src->src); 471bf215546Sopenharmony_ci } 472bf215546Sopenharmony_ci 473bf215546Sopenharmony_ci nir_ssa_dest_init(&sel->instr, &sel->dest.dest, 474bf215546Sopenharmony_ci phi->dest.ssa.num_components, 475bf215546Sopenharmony_ci phi->dest.ssa.bit_size, NULL); 476bf215546Sopenharmony_ci sel->dest.write_mask = (1 << phi->dest.ssa.num_components) - 1; 477bf215546Sopenharmony_ci 478bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&phi->dest.ssa, 479bf215546Sopenharmony_ci &sel->dest.dest.ssa); 480bf215546Sopenharmony_ci 481bf215546Sopenharmony_ci nir_instr_insert_before(&phi->instr, &sel->instr); 482bf215546Sopenharmony_ci nir_instr_remove(&phi->instr); 483bf215546Sopenharmony_ci } 484bf215546Sopenharmony_ci 485bf215546Sopenharmony_ci nir_cf_node_remove(&if_stmt->cf_node); 486bf215546Sopenharmony_ci return true; 487bf215546Sopenharmony_ci} 488bf215546Sopenharmony_ci 489bf215546Sopenharmony_cistatic bool 490bf215546Sopenharmony_cinir_opt_peephole_select_impl(nir_function_impl *impl, unsigned limit, 491bf215546Sopenharmony_ci bool indirect_load_ok, bool expensive_alu_ok) 492bf215546Sopenharmony_ci{ 493bf215546Sopenharmony_ci nir_shader *shader = impl->function->shader; 494bf215546Sopenharmony_ci bool progress = false; 495bf215546Sopenharmony_ci 496bf215546Sopenharmony_ci nir_foreach_block_safe(block, impl) { 497bf215546Sopenharmony_ci progress |= nir_opt_peephole_select_block(block, shader, limit, 498bf215546Sopenharmony_ci indirect_load_ok, 499bf215546Sopenharmony_ci expensive_alu_ok); 500bf215546Sopenharmony_ci } 501bf215546Sopenharmony_ci 502bf215546Sopenharmony_ci if (progress) { 503bf215546Sopenharmony_ci nir_metadata_preserve(impl, nir_metadata_none); 504bf215546Sopenharmony_ci } else { 505bf215546Sopenharmony_ci nir_metadata_preserve(impl, nir_metadata_all); 506bf215546Sopenharmony_ci } 507bf215546Sopenharmony_ci 508bf215546Sopenharmony_ci return progress; 509bf215546Sopenharmony_ci} 510bf215546Sopenharmony_ci 511bf215546Sopenharmony_cibool 512bf215546Sopenharmony_cinir_opt_peephole_select(nir_shader *shader, unsigned limit, 513bf215546Sopenharmony_ci bool indirect_load_ok, bool expensive_alu_ok) 514bf215546Sopenharmony_ci{ 515bf215546Sopenharmony_ci bool progress = false; 516bf215546Sopenharmony_ci 517bf215546Sopenharmony_ci nir_foreach_function(function, shader) { 518bf215546Sopenharmony_ci if (function->impl) 519bf215546Sopenharmony_ci progress |= nir_opt_peephole_select_impl(function->impl, limit, 520bf215546Sopenharmony_ci indirect_load_ok, 521bf215546Sopenharmony_ci expensive_alu_ok); 522bf215546Sopenharmony_ci } 523bf215546Sopenharmony_ci 524bf215546Sopenharmony_ci return progress; 525bf215546Sopenharmony_ci} 526