1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2020 Intel Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "nir.h" 25bf215546Sopenharmony_ci#include "nir_builder.h" 26bf215546Sopenharmony_ci#include "nir_phi_builder.h" 27bf215546Sopenharmony_ci#include "util/u_math.h" 28bf215546Sopenharmony_ci 29bf215546Sopenharmony_cistatic bool 30bf215546Sopenharmony_cimove_system_values_to_top(nir_shader *shader) 31bf215546Sopenharmony_ci{ 32bf215546Sopenharmony_ci nir_function_impl *impl = nir_shader_get_entrypoint(shader); 33bf215546Sopenharmony_ci 34bf215546Sopenharmony_ci bool progress = false; 35bf215546Sopenharmony_ci nir_foreach_block(block, impl) { 36bf215546Sopenharmony_ci nir_foreach_instr_safe(instr, block) { 37bf215546Sopenharmony_ci if (instr->type != nir_instr_type_intrinsic) 38bf215546Sopenharmony_ci continue; 39bf215546Sopenharmony_ci 40bf215546Sopenharmony_ci /* These intrinsics not only can't be re-materialized but aren't 41bf215546Sopenharmony_ci * preserved when moving to the continuation shader. We have to move 42bf215546Sopenharmony_ci * them to the top to ensure they get spilled as needed. 43bf215546Sopenharmony_ci */ 44bf215546Sopenharmony_ci nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 45bf215546Sopenharmony_ci switch (intrin->intrinsic) { 46bf215546Sopenharmony_ci case nir_intrinsic_load_shader_record_ptr: 47bf215546Sopenharmony_ci case nir_intrinsic_load_btd_local_arg_addr_intel: 48bf215546Sopenharmony_ci nir_instr_remove(instr); 49bf215546Sopenharmony_ci nir_instr_insert(nir_before_cf_list(&impl->body), instr); 50bf215546Sopenharmony_ci progress = true; 51bf215546Sopenharmony_ci break; 52bf215546Sopenharmony_ci 53bf215546Sopenharmony_ci default: 54bf215546Sopenharmony_ci break; 55bf215546Sopenharmony_ci } 56bf215546Sopenharmony_ci } 57bf215546Sopenharmony_ci } 58bf215546Sopenharmony_ci 59bf215546Sopenharmony_ci if (progress) { 60bf215546Sopenharmony_ci nir_metadata_preserve(impl, nir_metadata_block_index | 61bf215546Sopenharmony_ci nir_metadata_dominance); 62bf215546Sopenharmony_ci } else { 63bf215546Sopenharmony_ci nir_metadata_preserve(impl, nir_metadata_all); 64bf215546Sopenharmony_ci } 65bf215546Sopenharmony_ci 66bf215546Sopenharmony_ci return progress; 67bf215546Sopenharmony_ci} 68bf215546Sopenharmony_ci 69bf215546Sopenharmony_cistatic bool 70bf215546Sopenharmony_ciinstr_is_shader_call(nir_instr *instr) 71bf215546Sopenharmony_ci{ 72bf215546Sopenharmony_ci if (instr->type != nir_instr_type_intrinsic) 73bf215546Sopenharmony_ci return false; 74bf215546Sopenharmony_ci 75bf215546Sopenharmony_ci nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 76bf215546Sopenharmony_ci return intrin->intrinsic == nir_intrinsic_trace_ray || 77bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_report_ray_intersection || 78bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_execute_callable; 79bf215546Sopenharmony_ci} 80bf215546Sopenharmony_ci 81bf215546Sopenharmony_ci/* Previously named bitset, it had to be renamed as FreeBSD defines a struct 82bf215546Sopenharmony_ci * named bitset in sys/_bitset.h required by pthread_np.h which is included 83bf215546Sopenharmony_ci * from src/util/u_thread.h that is indirectly included by this file. 84bf215546Sopenharmony_ci */ 85bf215546Sopenharmony_cistruct brw_bitset { 86bf215546Sopenharmony_ci BITSET_WORD *set; 87bf215546Sopenharmony_ci unsigned size; 88bf215546Sopenharmony_ci}; 89bf215546Sopenharmony_ci 90bf215546Sopenharmony_cistatic struct brw_bitset 91bf215546Sopenharmony_cibitset_create(void *mem_ctx, unsigned size) 92bf215546Sopenharmony_ci{ 93bf215546Sopenharmony_ci return (struct brw_bitset) { 94bf215546Sopenharmony_ci .set = rzalloc_array(mem_ctx, BITSET_WORD, BITSET_WORDS(size)), 95bf215546Sopenharmony_ci .size = size, 96bf215546Sopenharmony_ci }; 97bf215546Sopenharmony_ci} 98bf215546Sopenharmony_ci 99bf215546Sopenharmony_cistatic bool 100bf215546Sopenharmony_cisrc_is_in_bitset(nir_src *src, void *_set) 101bf215546Sopenharmony_ci{ 102bf215546Sopenharmony_ci struct brw_bitset *set = _set; 103bf215546Sopenharmony_ci assert(src->is_ssa); 104bf215546Sopenharmony_ci 105bf215546Sopenharmony_ci /* Any SSA values which were added after we generated liveness information 106bf215546Sopenharmony_ci * are things generated by this pass and, while most of it is arithmetic 107bf215546Sopenharmony_ci * which we could re-materialize, we don't need to because it's only used 108bf215546Sopenharmony_ci * for a single load/store and so shouldn't cross any shader calls. 109bf215546Sopenharmony_ci */ 110bf215546Sopenharmony_ci if (src->ssa->index >= set->size) 111bf215546Sopenharmony_ci return false; 112bf215546Sopenharmony_ci 113bf215546Sopenharmony_ci return BITSET_TEST(set->set, src->ssa->index); 114bf215546Sopenharmony_ci} 115bf215546Sopenharmony_ci 116bf215546Sopenharmony_cistatic void 117bf215546Sopenharmony_ciadd_ssa_def_to_bitset(nir_ssa_def *def, struct brw_bitset *set) 118bf215546Sopenharmony_ci{ 119bf215546Sopenharmony_ci if (def->index >= set->size) 120bf215546Sopenharmony_ci return; 121bf215546Sopenharmony_ci 122bf215546Sopenharmony_ci BITSET_SET(set->set, def->index); 123bf215546Sopenharmony_ci} 124bf215546Sopenharmony_ci 125bf215546Sopenharmony_cistatic bool 126bf215546Sopenharmony_cican_remat_instr(nir_instr *instr, struct brw_bitset *remat) 127bf215546Sopenharmony_ci{ 128bf215546Sopenharmony_ci /* Set of all values which are trivially re-materializable and we shouldn't 129bf215546Sopenharmony_ci * ever spill them. This includes: 130bf215546Sopenharmony_ci * 131bf215546Sopenharmony_ci * - Undef values 132bf215546Sopenharmony_ci * - Constants 133bf215546Sopenharmony_ci * - Uniforms (UBO or push constant) 134bf215546Sopenharmony_ci * - ALU combinations of any of the above 135bf215546Sopenharmony_ci * - Derefs which are either complete or casts of any of the above 136bf215546Sopenharmony_ci * 137bf215546Sopenharmony_ci * Because this pass rewrites things in-order and phis are always turned 138bf215546Sopenharmony_ci * into register writes, We can use "is it SSA?" to answer the question 139bf215546Sopenharmony_ci * "can my source be re-materialized?". 140bf215546Sopenharmony_ci */ 141bf215546Sopenharmony_ci switch (instr->type) { 142bf215546Sopenharmony_ci case nir_instr_type_alu: 143bf215546Sopenharmony_ci if (!nir_instr_as_alu(instr)->dest.dest.is_ssa) 144bf215546Sopenharmony_ci return false; 145bf215546Sopenharmony_ci 146bf215546Sopenharmony_ci return nir_foreach_src(instr, src_is_in_bitset, remat); 147bf215546Sopenharmony_ci 148bf215546Sopenharmony_ci case nir_instr_type_deref: 149bf215546Sopenharmony_ci return nir_foreach_src(instr, src_is_in_bitset, remat); 150bf215546Sopenharmony_ci 151bf215546Sopenharmony_ci case nir_instr_type_intrinsic: { 152bf215546Sopenharmony_ci nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 153bf215546Sopenharmony_ci switch (intrin->intrinsic) { 154bf215546Sopenharmony_ci case nir_intrinsic_load_ubo: 155bf215546Sopenharmony_ci case nir_intrinsic_vulkan_resource_index: 156bf215546Sopenharmony_ci case nir_intrinsic_vulkan_resource_reindex: 157bf215546Sopenharmony_ci case nir_intrinsic_load_vulkan_descriptor: 158bf215546Sopenharmony_ci case nir_intrinsic_load_push_constant: 159bf215546Sopenharmony_ci /* These intrinsics don't need to be spilled as long as they don't 160bf215546Sopenharmony_ci * depend on any spilled values. 161bf215546Sopenharmony_ci */ 162bf215546Sopenharmony_ci return nir_foreach_src(instr, src_is_in_bitset, remat); 163bf215546Sopenharmony_ci 164bf215546Sopenharmony_ci case nir_intrinsic_load_scratch_base_ptr: 165bf215546Sopenharmony_ci case nir_intrinsic_load_ray_launch_id: 166bf215546Sopenharmony_ci case nir_intrinsic_load_topology_id_intel: 167bf215546Sopenharmony_ci case nir_intrinsic_load_btd_global_arg_addr_intel: 168bf215546Sopenharmony_ci case nir_intrinsic_load_btd_resume_sbt_addr_intel: 169bf215546Sopenharmony_ci case nir_intrinsic_load_ray_base_mem_addr_intel: 170bf215546Sopenharmony_ci case nir_intrinsic_load_ray_hw_stack_size_intel: 171bf215546Sopenharmony_ci case nir_intrinsic_load_ray_sw_stack_size_intel: 172bf215546Sopenharmony_ci case nir_intrinsic_load_ray_num_dss_rt_stacks_intel: 173bf215546Sopenharmony_ci case nir_intrinsic_load_ray_hit_sbt_addr_intel: 174bf215546Sopenharmony_ci case nir_intrinsic_load_ray_hit_sbt_stride_intel: 175bf215546Sopenharmony_ci case nir_intrinsic_load_ray_miss_sbt_addr_intel: 176bf215546Sopenharmony_ci case nir_intrinsic_load_ray_miss_sbt_stride_intel: 177bf215546Sopenharmony_ci case nir_intrinsic_load_callable_sbt_addr_intel: 178bf215546Sopenharmony_ci case nir_intrinsic_load_callable_sbt_stride_intel: 179bf215546Sopenharmony_ci case nir_intrinsic_load_reloc_const_intel: 180bf215546Sopenharmony_ci case nir_intrinsic_load_ray_query_global_intel: 181bf215546Sopenharmony_ci /* Notably missing from the above list is btd_local_arg_addr_intel. 182bf215546Sopenharmony_ci * This is because the resume shader will have a different local 183bf215546Sopenharmony_ci * argument pointer because it has a different BSR. Any access of 184bf215546Sopenharmony_ci * the original shader's local arguments needs to be preserved so 185bf215546Sopenharmony_ci * that pointer has to be saved on the stack. 186bf215546Sopenharmony_ci * 187bf215546Sopenharmony_ci * TODO: There may be some system values we want to avoid 188bf215546Sopenharmony_ci * re-materializing as well but we have to be very careful 189bf215546Sopenharmony_ci * to ensure that it's a system value which cannot change 190bf215546Sopenharmony_ci * across a shader call. 191bf215546Sopenharmony_ci */ 192bf215546Sopenharmony_ci return true; 193bf215546Sopenharmony_ci 194bf215546Sopenharmony_ci default: 195bf215546Sopenharmony_ci return false; 196bf215546Sopenharmony_ci } 197bf215546Sopenharmony_ci } 198bf215546Sopenharmony_ci 199bf215546Sopenharmony_ci case nir_instr_type_ssa_undef: 200bf215546Sopenharmony_ci case nir_instr_type_load_const: 201bf215546Sopenharmony_ci return true; 202bf215546Sopenharmony_ci 203bf215546Sopenharmony_ci default: 204bf215546Sopenharmony_ci return false; 205bf215546Sopenharmony_ci } 206bf215546Sopenharmony_ci} 207bf215546Sopenharmony_ci 208bf215546Sopenharmony_cistatic bool 209bf215546Sopenharmony_cican_remat_ssa_def(nir_ssa_def *def, struct brw_bitset *remat) 210bf215546Sopenharmony_ci{ 211bf215546Sopenharmony_ci return can_remat_instr(def->parent_instr, remat); 212bf215546Sopenharmony_ci} 213bf215546Sopenharmony_ci 214bf215546Sopenharmony_cistatic nir_ssa_def * 215bf215546Sopenharmony_ciremat_ssa_def(nir_builder *b, nir_ssa_def *def) 216bf215546Sopenharmony_ci{ 217bf215546Sopenharmony_ci nir_instr *clone = nir_instr_clone(b->shader, def->parent_instr); 218bf215546Sopenharmony_ci nir_builder_instr_insert(b, clone); 219bf215546Sopenharmony_ci return nir_instr_ssa_def(clone); 220bf215546Sopenharmony_ci} 221bf215546Sopenharmony_ci 222bf215546Sopenharmony_cistruct pbv_array { 223bf215546Sopenharmony_ci struct nir_phi_builder_value **arr; 224bf215546Sopenharmony_ci unsigned len; 225bf215546Sopenharmony_ci}; 226bf215546Sopenharmony_ci 227bf215546Sopenharmony_cistatic struct nir_phi_builder_value * 228bf215546Sopenharmony_ciget_phi_builder_value_for_def(nir_ssa_def *def, 229bf215546Sopenharmony_ci struct pbv_array *pbv_arr) 230bf215546Sopenharmony_ci{ 231bf215546Sopenharmony_ci if (def->index >= pbv_arr->len) 232bf215546Sopenharmony_ci return NULL; 233bf215546Sopenharmony_ci 234bf215546Sopenharmony_ci return pbv_arr->arr[def->index]; 235bf215546Sopenharmony_ci} 236bf215546Sopenharmony_ci 237bf215546Sopenharmony_cistatic nir_ssa_def * 238bf215546Sopenharmony_ciget_phi_builder_def_for_src(nir_src *src, struct pbv_array *pbv_arr, 239bf215546Sopenharmony_ci nir_block *block) 240bf215546Sopenharmony_ci{ 241bf215546Sopenharmony_ci assert(src->is_ssa); 242bf215546Sopenharmony_ci 243bf215546Sopenharmony_ci struct nir_phi_builder_value *pbv = 244bf215546Sopenharmony_ci get_phi_builder_value_for_def(src->ssa, pbv_arr); 245bf215546Sopenharmony_ci if (pbv == NULL) 246bf215546Sopenharmony_ci return NULL; 247bf215546Sopenharmony_ci 248bf215546Sopenharmony_ci return nir_phi_builder_value_get_block_def(pbv, block); 249bf215546Sopenharmony_ci} 250bf215546Sopenharmony_ci 251bf215546Sopenharmony_cistatic bool 252bf215546Sopenharmony_cirewrite_instr_src_from_phi_builder(nir_src *src, void *_pbv_arr) 253bf215546Sopenharmony_ci{ 254bf215546Sopenharmony_ci nir_block *block; 255bf215546Sopenharmony_ci if (src->parent_instr->type == nir_instr_type_phi) { 256bf215546Sopenharmony_ci nir_phi_src *phi_src = exec_node_data(nir_phi_src, src, src); 257bf215546Sopenharmony_ci block = phi_src->pred; 258bf215546Sopenharmony_ci } else { 259bf215546Sopenharmony_ci block = src->parent_instr->block; 260bf215546Sopenharmony_ci } 261bf215546Sopenharmony_ci 262bf215546Sopenharmony_ci nir_ssa_def *new_def = get_phi_builder_def_for_src(src, _pbv_arr, block); 263bf215546Sopenharmony_ci if (new_def != NULL) 264bf215546Sopenharmony_ci nir_instr_rewrite_src(src->parent_instr, src, nir_src_for_ssa(new_def)); 265bf215546Sopenharmony_ci return true; 266bf215546Sopenharmony_ci} 267bf215546Sopenharmony_ci 268bf215546Sopenharmony_cistatic nir_ssa_def * 269bf215546Sopenharmony_cispill_fill(nir_builder *before, nir_builder *after, nir_ssa_def *def, unsigned offset, 270bf215546Sopenharmony_ci nir_address_format address_format, unsigned stack_alignment) 271bf215546Sopenharmony_ci{ 272bf215546Sopenharmony_ci const unsigned comp_size = def->bit_size / 8; 273bf215546Sopenharmony_ci 274bf215546Sopenharmony_ci switch(address_format) { 275bf215546Sopenharmony_ci case nir_address_format_32bit_offset: 276bf215546Sopenharmony_ci nir_store_scratch(before, def, nir_imm_int(before, offset), 277bf215546Sopenharmony_ci .align_mul = MIN2(comp_size, stack_alignment), 278bf215546Sopenharmony_ci .write_mask = BITFIELD_MASK(def->num_components)); 279bf215546Sopenharmony_ci def = nir_load_scratch(after, def->num_components, def->bit_size, 280bf215546Sopenharmony_ci nir_imm_int(after, offset), .align_mul = MIN2(comp_size, stack_alignment)); 281bf215546Sopenharmony_ci break; 282bf215546Sopenharmony_ci case nir_address_format_64bit_global: { 283bf215546Sopenharmony_ci nir_ssa_def *addr = nir_iadd_imm(before, nir_load_scratch_base_ptr(before, 1, 64, 1), offset); 284bf215546Sopenharmony_ci nir_store_global(before, addr, MIN2(comp_size, stack_alignment), def, ~0); 285bf215546Sopenharmony_ci addr = nir_iadd_imm(after, nir_load_scratch_base_ptr(after, 1, 64, 1), offset); 286bf215546Sopenharmony_ci def = nir_load_global(after, addr, MIN2(comp_size, stack_alignment), 287bf215546Sopenharmony_ci def->num_components, def->bit_size); 288bf215546Sopenharmony_ci break; 289bf215546Sopenharmony_ci } 290bf215546Sopenharmony_ci default: 291bf215546Sopenharmony_ci unreachable("Unimplemented address format"); 292bf215546Sopenharmony_ci } 293bf215546Sopenharmony_ci return def; 294bf215546Sopenharmony_ci} 295bf215546Sopenharmony_ci 296bf215546Sopenharmony_cistatic void 297bf215546Sopenharmony_cispill_ssa_defs_and_lower_shader_calls(nir_shader *shader, uint32_t num_calls, 298bf215546Sopenharmony_ci nir_address_format address_format, 299bf215546Sopenharmony_ci unsigned stack_alignment) 300bf215546Sopenharmony_ci{ 301bf215546Sopenharmony_ci /* TODO: If a SSA def is filled more than once, we probably want to just 302bf215546Sopenharmony_ci * spill it at the LCM of the fill sites so we avoid unnecessary 303bf215546Sopenharmony_ci * extra spills 304bf215546Sopenharmony_ci * 305bf215546Sopenharmony_ci * TODO: If a SSA def is defined outside a loop but live through some call 306bf215546Sopenharmony_ci * inside the loop, we probably want to spill outside the loop. We 307bf215546Sopenharmony_ci * may also want to fill outside the loop if it's not used in the 308bf215546Sopenharmony_ci * loop. 309bf215546Sopenharmony_ci * 310bf215546Sopenharmony_ci * TODO: Right now, we only re-materialize things if their immediate 311bf215546Sopenharmony_ci * sources are things which we filled. We probably want to expand 312bf215546Sopenharmony_ci * that to re-materialize things whose sources are things we can 313bf215546Sopenharmony_ci * re-materialize from things we filled. We may want some DAG depth 314bf215546Sopenharmony_ci * heuristic on this. 315bf215546Sopenharmony_ci */ 316bf215546Sopenharmony_ci 317bf215546Sopenharmony_ci /* This happens per-shader rather than per-impl because we mess with 318bf215546Sopenharmony_ci * nir_shader::scratch_size. 319bf215546Sopenharmony_ci */ 320bf215546Sopenharmony_ci nir_function_impl *impl = nir_shader_get_entrypoint(shader); 321bf215546Sopenharmony_ci 322bf215546Sopenharmony_ci nir_metadata_require(impl, nir_metadata_live_ssa_defs | 323bf215546Sopenharmony_ci nir_metadata_dominance | 324bf215546Sopenharmony_ci nir_metadata_block_index); 325bf215546Sopenharmony_ci 326bf215546Sopenharmony_ci void *mem_ctx = ralloc_context(shader); 327bf215546Sopenharmony_ci 328bf215546Sopenharmony_ci const unsigned num_ssa_defs = impl->ssa_alloc; 329bf215546Sopenharmony_ci const unsigned live_words = BITSET_WORDS(num_ssa_defs); 330bf215546Sopenharmony_ci struct brw_bitset trivial_remat = bitset_create(mem_ctx, num_ssa_defs); 331bf215546Sopenharmony_ci 332bf215546Sopenharmony_ci /* Array of all live SSA defs which are spill candidates */ 333bf215546Sopenharmony_ci nir_ssa_def **spill_defs = 334bf215546Sopenharmony_ci rzalloc_array(mem_ctx, nir_ssa_def *, num_ssa_defs); 335bf215546Sopenharmony_ci 336bf215546Sopenharmony_ci /* For each spill candidate, an array of every time it's defined by a fill, 337bf215546Sopenharmony_ci * indexed by call instruction index. 338bf215546Sopenharmony_ci */ 339bf215546Sopenharmony_ci nir_ssa_def ***fill_defs = 340bf215546Sopenharmony_ci rzalloc_array(mem_ctx, nir_ssa_def **, num_ssa_defs); 341bf215546Sopenharmony_ci 342bf215546Sopenharmony_ci /* For each call instruction, the liveness set at the call */ 343bf215546Sopenharmony_ci const BITSET_WORD **call_live = 344bf215546Sopenharmony_ci rzalloc_array(mem_ctx, const BITSET_WORD *, num_calls); 345bf215546Sopenharmony_ci 346bf215546Sopenharmony_ci /* For each call instruction, the block index of the block it lives in */ 347bf215546Sopenharmony_ci uint32_t *call_block_indices = rzalloc_array(mem_ctx, uint32_t, num_calls); 348bf215546Sopenharmony_ci 349bf215546Sopenharmony_ci /* Walk the call instructions and fetch the liveness set and block index 350bf215546Sopenharmony_ci * for each one. We need to do this before we start modifying the shader 351bf215546Sopenharmony_ci * so that liveness doesn't complain that it's been invalidated. Don't 352bf215546Sopenharmony_ci * worry, we'll be very careful with our live sets. :-) 353bf215546Sopenharmony_ci */ 354bf215546Sopenharmony_ci unsigned call_idx = 0; 355bf215546Sopenharmony_ci nir_foreach_block(block, impl) { 356bf215546Sopenharmony_ci nir_foreach_instr(instr, block) { 357bf215546Sopenharmony_ci if (!instr_is_shader_call(instr)) 358bf215546Sopenharmony_ci continue; 359bf215546Sopenharmony_ci 360bf215546Sopenharmony_ci call_block_indices[call_idx] = block->index; 361bf215546Sopenharmony_ci 362bf215546Sopenharmony_ci /* The objective here is to preserve values around shader call 363bf215546Sopenharmony_ci * instructions. Therefore, we use the live set after the 364bf215546Sopenharmony_ci * instruction as the set of things we want to preserve. Because 365bf215546Sopenharmony_ci * none of our shader call intrinsics return anything, we don't have 366bf215546Sopenharmony_ci * to worry about spilling over a return value. 367bf215546Sopenharmony_ci * 368bf215546Sopenharmony_ci * TODO: This isn't quite true for report_intersection. 369bf215546Sopenharmony_ci */ 370bf215546Sopenharmony_ci call_live[call_idx] = 371bf215546Sopenharmony_ci nir_get_live_ssa_defs(nir_after_instr(instr), mem_ctx); 372bf215546Sopenharmony_ci 373bf215546Sopenharmony_ci call_idx++; 374bf215546Sopenharmony_ci } 375bf215546Sopenharmony_ci } 376bf215546Sopenharmony_ci 377bf215546Sopenharmony_ci nir_builder before, after; 378bf215546Sopenharmony_ci nir_builder_init(&before, impl); 379bf215546Sopenharmony_ci nir_builder_init(&after, impl); 380bf215546Sopenharmony_ci 381bf215546Sopenharmony_ci call_idx = 0; 382bf215546Sopenharmony_ci unsigned max_scratch_size = shader->scratch_size; 383bf215546Sopenharmony_ci nir_foreach_block(block, impl) { 384bf215546Sopenharmony_ci nir_foreach_instr_safe(instr, block) { 385bf215546Sopenharmony_ci nir_ssa_def *def = nir_instr_ssa_def(instr); 386bf215546Sopenharmony_ci if (def != NULL) { 387bf215546Sopenharmony_ci if (can_remat_ssa_def(def, &trivial_remat)) { 388bf215546Sopenharmony_ci add_ssa_def_to_bitset(def, &trivial_remat); 389bf215546Sopenharmony_ci } else { 390bf215546Sopenharmony_ci spill_defs[def->index] = def; 391bf215546Sopenharmony_ci } 392bf215546Sopenharmony_ci } 393bf215546Sopenharmony_ci 394bf215546Sopenharmony_ci if (!instr_is_shader_call(instr)) 395bf215546Sopenharmony_ci continue; 396bf215546Sopenharmony_ci 397bf215546Sopenharmony_ci const BITSET_WORD *live = call_live[call_idx]; 398bf215546Sopenharmony_ci 399bf215546Sopenharmony_ci /* Make a copy of trivial_remat that we'll update as we crawl through 400bf215546Sopenharmony_ci * the live SSA defs and unspill them. 401bf215546Sopenharmony_ci */ 402bf215546Sopenharmony_ci struct brw_bitset remat = bitset_create(mem_ctx, num_ssa_defs); 403bf215546Sopenharmony_ci memcpy(remat.set, trivial_remat.set, live_words * sizeof(BITSET_WORD)); 404bf215546Sopenharmony_ci 405bf215546Sopenharmony_ci /* Before the two builders are always separated by the call 406bf215546Sopenharmony_ci * instruction, it won't break anything to have two of them. 407bf215546Sopenharmony_ci */ 408bf215546Sopenharmony_ci before.cursor = nir_before_instr(instr); 409bf215546Sopenharmony_ci after.cursor = nir_after_instr(instr); 410bf215546Sopenharmony_ci 411bf215546Sopenharmony_ci unsigned offset = shader->scratch_size; 412bf215546Sopenharmony_ci for (unsigned w = 0; w < live_words; w++) { 413bf215546Sopenharmony_ci BITSET_WORD spill_mask = live[w] & ~trivial_remat.set[w]; 414bf215546Sopenharmony_ci while (spill_mask) { 415bf215546Sopenharmony_ci int i = u_bit_scan(&spill_mask); 416bf215546Sopenharmony_ci assert(i >= 0); 417bf215546Sopenharmony_ci unsigned index = w * BITSET_WORDBITS + i; 418bf215546Sopenharmony_ci assert(index < num_ssa_defs); 419bf215546Sopenharmony_ci 420bf215546Sopenharmony_ci nir_ssa_def *def = spill_defs[index]; 421bf215546Sopenharmony_ci if (can_remat_ssa_def(def, &remat)) { 422bf215546Sopenharmony_ci /* If this SSA def is re-materializable or based on other 423bf215546Sopenharmony_ci * things we've already spilled, re-materialize it rather 424bf215546Sopenharmony_ci * than spilling and filling. Anything which is trivially 425bf215546Sopenharmony_ci * re-materializable won't even get here because we take 426bf215546Sopenharmony_ci * those into account in spill_mask above. 427bf215546Sopenharmony_ci */ 428bf215546Sopenharmony_ci def = remat_ssa_def(&after, def); 429bf215546Sopenharmony_ci } else { 430bf215546Sopenharmony_ci bool is_bool = def->bit_size == 1; 431bf215546Sopenharmony_ci if (is_bool) 432bf215546Sopenharmony_ci def = nir_b2b32(&before, def); 433bf215546Sopenharmony_ci 434bf215546Sopenharmony_ci const unsigned comp_size = def->bit_size / 8; 435bf215546Sopenharmony_ci offset = ALIGN(offset, comp_size); 436bf215546Sopenharmony_ci 437bf215546Sopenharmony_ci def = spill_fill(&before, &after, def, offset, 438bf215546Sopenharmony_ci address_format,stack_alignment); 439bf215546Sopenharmony_ci 440bf215546Sopenharmony_ci if (is_bool) 441bf215546Sopenharmony_ci def = nir_b2b1(&after, def); 442bf215546Sopenharmony_ci 443bf215546Sopenharmony_ci offset += def->num_components * comp_size; 444bf215546Sopenharmony_ci } 445bf215546Sopenharmony_ci 446bf215546Sopenharmony_ci /* Mark this SSA def as available in the remat set so that, if 447bf215546Sopenharmony_ci * some other SSA def we need is computed based on it, we can 448bf215546Sopenharmony_ci * just re-compute instead of fetching from memory. 449bf215546Sopenharmony_ci */ 450bf215546Sopenharmony_ci BITSET_SET(remat.set, index); 451bf215546Sopenharmony_ci 452bf215546Sopenharmony_ci /* For now, we just make a note of this new SSA def. We'll 453bf215546Sopenharmony_ci * fix things up with the phi builder as a second pass. 454bf215546Sopenharmony_ci */ 455bf215546Sopenharmony_ci if (fill_defs[index] == NULL) { 456bf215546Sopenharmony_ci fill_defs[index] = 457bf215546Sopenharmony_ci rzalloc_array(mem_ctx, nir_ssa_def *, num_calls); 458bf215546Sopenharmony_ci } 459bf215546Sopenharmony_ci fill_defs[index][call_idx] = def; 460bf215546Sopenharmony_ci } 461bf215546Sopenharmony_ci } 462bf215546Sopenharmony_ci 463bf215546Sopenharmony_ci nir_builder *b = &before; 464bf215546Sopenharmony_ci 465bf215546Sopenharmony_ci offset = ALIGN(offset, stack_alignment); 466bf215546Sopenharmony_ci max_scratch_size = MAX2(max_scratch_size, offset); 467bf215546Sopenharmony_ci 468bf215546Sopenharmony_ci /* First thing on the called shader's stack is the resume address 469bf215546Sopenharmony_ci * followed by a pointer to the payload. 470bf215546Sopenharmony_ci */ 471bf215546Sopenharmony_ci nir_intrinsic_instr *call = nir_instr_as_intrinsic(instr); 472bf215546Sopenharmony_ci 473bf215546Sopenharmony_ci /* Lower to generic intrinsics with information about the stack & resume shader. */ 474bf215546Sopenharmony_ci switch (call->intrinsic) { 475bf215546Sopenharmony_ci case nir_intrinsic_trace_ray: { 476bf215546Sopenharmony_ci nir_rt_trace_ray(b, call->src[0].ssa, call->src[1].ssa, 477bf215546Sopenharmony_ci call->src[2].ssa, call->src[3].ssa, 478bf215546Sopenharmony_ci call->src[4].ssa, call->src[5].ssa, 479bf215546Sopenharmony_ci call->src[6].ssa, call->src[7].ssa, 480bf215546Sopenharmony_ci call->src[8].ssa, call->src[9].ssa, 481bf215546Sopenharmony_ci call->src[10].ssa, 482bf215546Sopenharmony_ci .call_idx = call_idx, .stack_size = offset); 483bf215546Sopenharmony_ci break; 484bf215546Sopenharmony_ci } 485bf215546Sopenharmony_ci 486bf215546Sopenharmony_ci case nir_intrinsic_report_ray_intersection: 487bf215546Sopenharmony_ci unreachable("Any-hit shaders must be inlined"); 488bf215546Sopenharmony_ci 489bf215546Sopenharmony_ci case nir_intrinsic_execute_callable: { 490bf215546Sopenharmony_ci nir_rt_execute_callable(b, call->src[0].ssa, call->src[1].ssa, .call_idx = call_idx, .stack_size = offset); 491bf215546Sopenharmony_ci break; 492bf215546Sopenharmony_ci } 493bf215546Sopenharmony_ci 494bf215546Sopenharmony_ci default: 495bf215546Sopenharmony_ci unreachable("Invalid shader call instruction"); 496bf215546Sopenharmony_ci } 497bf215546Sopenharmony_ci 498bf215546Sopenharmony_ci nir_rt_resume(b, .call_idx = call_idx, .stack_size = offset); 499bf215546Sopenharmony_ci 500bf215546Sopenharmony_ci nir_instr_remove(&call->instr); 501bf215546Sopenharmony_ci 502bf215546Sopenharmony_ci call_idx++; 503bf215546Sopenharmony_ci } 504bf215546Sopenharmony_ci } 505bf215546Sopenharmony_ci assert(call_idx == num_calls); 506bf215546Sopenharmony_ci shader->scratch_size = max_scratch_size; 507bf215546Sopenharmony_ci 508bf215546Sopenharmony_ci struct nir_phi_builder *pb = nir_phi_builder_create(impl); 509bf215546Sopenharmony_ci struct pbv_array pbv_arr = { 510bf215546Sopenharmony_ci .arr = rzalloc_array(mem_ctx, struct nir_phi_builder_value *, 511bf215546Sopenharmony_ci num_ssa_defs), 512bf215546Sopenharmony_ci .len = num_ssa_defs, 513bf215546Sopenharmony_ci }; 514bf215546Sopenharmony_ci 515bf215546Sopenharmony_ci const unsigned block_words = BITSET_WORDS(impl->num_blocks); 516bf215546Sopenharmony_ci BITSET_WORD *def_blocks = ralloc_array(mem_ctx, BITSET_WORD, block_words); 517bf215546Sopenharmony_ci 518bf215546Sopenharmony_ci /* Go through and set up phi builder values for each spillable value which 519bf215546Sopenharmony_ci * we ever needed to spill at any point. 520bf215546Sopenharmony_ci */ 521bf215546Sopenharmony_ci for (unsigned index = 0; index < num_ssa_defs; index++) { 522bf215546Sopenharmony_ci if (fill_defs[index] == NULL) 523bf215546Sopenharmony_ci continue; 524bf215546Sopenharmony_ci 525bf215546Sopenharmony_ci nir_ssa_def *def = spill_defs[index]; 526bf215546Sopenharmony_ci 527bf215546Sopenharmony_ci memset(def_blocks, 0, block_words * sizeof(BITSET_WORD)); 528bf215546Sopenharmony_ci BITSET_SET(def_blocks, def->parent_instr->block->index); 529bf215546Sopenharmony_ci for (unsigned call_idx = 0; call_idx < num_calls; call_idx++) { 530bf215546Sopenharmony_ci if (fill_defs[index][call_idx] != NULL) 531bf215546Sopenharmony_ci BITSET_SET(def_blocks, call_block_indices[call_idx]); 532bf215546Sopenharmony_ci } 533bf215546Sopenharmony_ci 534bf215546Sopenharmony_ci pbv_arr.arr[index] = nir_phi_builder_add_value(pb, def->num_components, 535bf215546Sopenharmony_ci def->bit_size, def_blocks); 536bf215546Sopenharmony_ci } 537bf215546Sopenharmony_ci 538bf215546Sopenharmony_ci /* Walk the shader one more time and rewrite SSA defs as needed using the 539bf215546Sopenharmony_ci * phi builder. 540bf215546Sopenharmony_ci */ 541bf215546Sopenharmony_ci nir_foreach_block(block, impl) { 542bf215546Sopenharmony_ci nir_foreach_instr_safe(instr, block) { 543bf215546Sopenharmony_ci nir_ssa_def *def = nir_instr_ssa_def(instr); 544bf215546Sopenharmony_ci if (def != NULL) { 545bf215546Sopenharmony_ci struct nir_phi_builder_value *pbv = 546bf215546Sopenharmony_ci get_phi_builder_value_for_def(def, &pbv_arr); 547bf215546Sopenharmony_ci if (pbv != NULL) 548bf215546Sopenharmony_ci nir_phi_builder_value_set_block_def(pbv, block, def); 549bf215546Sopenharmony_ci } 550bf215546Sopenharmony_ci 551bf215546Sopenharmony_ci if (instr->type == nir_instr_type_phi) 552bf215546Sopenharmony_ci continue; 553bf215546Sopenharmony_ci 554bf215546Sopenharmony_ci nir_foreach_src(instr, rewrite_instr_src_from_phi_builder, &pbv_arr); 555bf215546Sopenharmony_ci 556bf215546Sopenharmony_ci if (instr->type != nir_instr_type_intrinsic) 557bf215546Sopenharmony_ci continue; 558bf215546Sopenharmony_ci 559bf215546Sopenharmony_ci nir_intrinsic_instr *resume = nir_instr_as_intrinsic(instr); 560bf215546Sopenharmony_ci if (resume->intrinsic != nir_intrinsic_rt_resume) 561bf215546Sopenharmony_ci continue; 562bf215546Sopenharmony_ci 563bf215546Sopenharmony_ci call_idx = nir_intrinsic_call_idx(resume); 564bf215546Sopenharmony_ci 565bf215546Sopenharmony_ci /* Technically, this is the wrong place to add the fill defs to the 566bf215546Sopenharmony_ci * phi builder values because we haven't seen any of the load_scratch 567bf215546Sopenharmony_ci * instructions for this call yet. However, we know based on how we 568bf215546Sopenharmony_ci * emitted them that no value ever gets used until after the load 569bf215546Sopenharmony_ci * instruction has been emitted so this should be safe. If we ever 570bf215546Sopenharmony_ci * fail validation due this it likely means a bug in our spilling 571bf215546Sopenharmony_ci * code and not the phi re-construction code here. 572bf215546Sopenharmony_ci */ 573bf215546Sopenharmony_ci for (unsigned index = 0; index < num_ssa_defs; index++) { 574bf215546Sopenharmony_ci if (fill_defs[index] && fill_defs[index][call_idx]) { 575bf215546Sopenharmony_ci nir_phi_builder_value_set_block_def(pbv_arr.arr[index], block, 576bf215546Sopenharmony_ci fill_defs[index][call_idx]); 577bf215546Sopenharmony_ci } 578bf215546Sopenharmony_ci } 579bf215546Sopenharmony_ci } 580bf215546Sopenharmony_ci 581bf215546Sopenharmony_ci nir_if *following_if = nir_block_get_following_if(block); 582bf215546Sopenharmony_ci if (following_if) { 583bf215546Sopenharmony_ci nir_ssa_def *new_def = 584bf215546Sopenharmony_ci get_phi_builder_def_for_src(&following_if->condition, 585bf215546Sopenharmony_ci &pbv_arr, block); 586bf215546Sopenharmony_ci if (new_def != NULL) 587bf215546Sopenharmony_ci nir_if_rewrite_condition(following_if, nir_src_for_ssa(new_def)); 588bf215546Sopenharmony_ci } 589bf215546Sopenharmony_ci 590bf215546Sopenharmony_ci /* Handle phi sources that source from this block. We have to do this 591bf215546Sopenharmony_ci * as a separate pass because the phi builder assumes that uses and 592bf215546Sopenharmony_ci * defs are processed in an order that respects dominance. When we have 593bf215546Sopenharmony_ci * loops, a phi source may be a back-edge so we have to handle it as if 594bf215546Sopenharmony_ci * it were one of the last instructions in the predecessor block. 595bf215546Sopenharmony_ci */ 596bf215546Sopenharmony_ci nir_foreach_phi_src_leaving_block(block, 597bf215546Sopenharmony_ci rewrite_instr_src_from_phi_builder, 598bf215546Sopenharmony_ci &pbv_arr); 599bf215546Sopenharmony_ci } 600bf215546Sopenharmony_ci 601bf215546Sopenharmony_ci nir_phi_builder_finish(pb); 602bf215546Sopenharmony_ci 603bf215546Sopenharmony_ci ralloc_free(mem_ctx); 604bf215546Sopenharmony_ci 605bf215546Sopenharmony_ci nir_metadata_preserve(impl, nir_metadata_block_index | 606bf215546Sopenharmony_ci nir_metadata_dominance); 607bf215546Sopenharmony_ci} 608bf215546Sopenharmony_ci 609bf215546Sopenharmony_cistatic nir_instr * 610bf215546Sopenharmony_cifind_resume_instr(nir_function_impl *impl, unsigned call_idx) 611bf215546Sopenharmony_ci{ 612bf215546Sopenharmony_ci nir_foreach_block(block, impl) { 613bf215546Sopenharmony_ci nir_foreach_instr(instr, block) { 614bf215546Sopenharmony_ci if (instr->type != nir_instr_type_intrinsic) 615bf215546Sopenharmony_ci continue; 616bf215546Sopenharmony_ci 617bf215546Sopenharmony_ci nir_intrinsic_instr *resume = nir_instr_as_intrinsic(instr); 618bf215546Sopenharmony_ci if (resume->intrinsic != nir_intrinsic_rt_resume) 619bf215546Sopenharmony_ci continue; 620bf215546Sopenharmony_ci 621bf215546Sopenharmony_ci if (nir_intrinsic_call_idx(resume) == call_idx) 622bf215546Sopenharmony_ci return &resume->instr; 623bf215546Sopenharmony_ci } 624bf215546Sopenharmony_ci } 625bf215546Sopenharmony_ci unreachable("Couldn't find resume instruction"); 626bf215546Sopenharmony_ci} 627bf215546Sopenharmony_ci 628bf215546Sopenharmony_ci/* Walk the CF tree and duplicate the contents of every loop, one half runs on 629bf215546Sopenharmony_ci * resume and the other half is for any post-resume loop iterations. We are 630bf215546Sopenharmony_ci * careful in our duplication to ensure that resume_instr is in the resume 631bf215546Sopenharmony_ci * half of the loop though a copy of resume_instr will remain in the other 632bf215546Sopenharmony_ci * half as well in case the same shader call happens twice. 633bf215546Sopenharmony_ci */ 634bf215546Sopenharmony_cistatic bool 635bf215546Sopenharmony_ciduplicate_loop_bodies(nir_function_impl *impl, nir_instr *resume_instr) 636bf215546Sopenharmony_ci{ 637bf215546Sopenharmony_ci nir_register *resume_reg = NULL; 638bf215546Sopenharmony_ci for (nir_cf_node *node = resume_instr->block->cf_node.parent; 639bf215546Sopenharmony_ci node->type != nir_cf_node_function; node = node->parent) { 640bf215546Sopenharmony_ci if (node->type != nir_cf_node_loop) 641bf215546Sopenharmony_ci continue; 642bf215546Sopenharmony_ci 643bf215546Sopenharmony_ci nir_loop *loop = nir_cf_node_as_loop(node); 644bf215546Sopenharmony_ci 645bf215546Sopenharmony_ci if (resume_reg == NULL) { 646bf215546Sopenharmony_ci /* We only create resume_reg if we encounter a loop. This way we can 647bf215546Sopenharmony_ci * avoid re-validating the shader and calling ssa_to_regs in the case 648bf215546Sopenharmony_ci * where it's just if-ladders. 649bf215546Sopenharmony_ci */ 650bf215546Sopenharmony_ci resume_reg = nir_local_reg_create(impl); 651bf215546Sopenharmony_ci resume_reg->num_components = 1; 652bf215546Sopenharmony_ci resume_reg->bit_size = 1; 653bf215546Sopenharmony_ci 654bf215546Sopenharmony_ci nir_builder b; 655bf215546Sopenharmony_ci nir_builder_init(&b, impl); 656bf215546Sopenharmony_ci 657bf215546Sopenharmony_ci /* Initialize resume to true */ 658bf215546Sopenharmony_ci b.cursor = nir_before_cf_list(&impl->body); 659bf215546Sopenharmony_ci nir_store_reg(&b, resume_reg, nir_imm_true(&b), 1); 660bf215546Sopenharmony_ci 661bf215546Sopenharmony_ci /* Set resume to false right after the resume instruction */ 662bf215546Sopenharmony_ci b.cursor = nir_after_instr(resume_instr); 663bf215546Sopenharmony_ci nir_store_reg(&b, resume_reg, nir_imm_false(&b), 1); 664bf215546Sopenharmony_ci } 665bf215546Sopenharmony_ci 666bf215546Sopenharmony_ci /* Before we go any further, make sure that everything which exits the 667bf215546Sopenharmony_ci * loop or continues around to the top of the loop does so through 668bf215546Sopenharmony_ci * registers. We're about to duplicate the loop body and we'll have 669bf215546Sopenharmony_ci * serious trouble if we don't do this. 670bf215546Sopenharmony_ci */ 671bf215546Sopenharmony_ci nir_convert_loop_to_lcssa(loop); 672bf215546Sopenharmony_ci nir_lower_phis_to_regs_block(nir_loop_first_block(loop)); 673bf215546Sopenharmony_ci nir_lower_phis_to_regs_block( 674bf215546Sopenharmony_ci nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node))); 675bf215546Sopenharmony_ci 676bf215546Sopenharmony_ci nir_cf_list cf_list; 677bf215546Sopenharmony_ci nir_cf_list_extract(&cf_list, &loop->body); 678bf215546Sopenharmony_ci 679bf215546Sopenharmony_ci nir_if *_if = nir_if_create(impl->function->shader); 680bf215546Sopenharmony_ci _if->condition = nir_src_for_reg(resume_reg); 681bf215546Sopenharmony_ci nir_cf_node_insert(nir_after_cf_list(&loop->body), &_if->cf_node); 682bf215546Sopenharmony_ci 683bf215546Sopenharmony_ci nir_cf_list clone; 684bf215546Sopenharmony_ci nir_cf_list_clone(&clone, &cf_list, &loop->cf_node, NULL); 685bf215546Sopenharmony_ci 686bf215546Sopenharmony_ci /* Insert the clone in the else and the original in the then so that 687bf215546Sopenharmony_ci * the resume_instr remains valid even after the duplication. 688bf215546Sopenharmony_ci */ 689bf215546Sopenharmony_ci nir_cf_reinsert(&cf_list, nir_before_cf_list(&_if->then_list)); 690bf215546Sopenharmony_ci nir_cf_reinsert(&clone, nir_before_cf_list(&_if->else_list)); 691bf215546Sopenharmony_ci } 692bf215546Sopenharmony_ci 693bf215546Sopenharmony_ci if (resume_reg != NULL) 694bf215546Sopenharmony_ci nir_metadata_preserve(impl, nir_metadata_none); 695bf215546Sopenharmony_ci 696bf215546Sopenharmony_ci return resume_reg != NULL; 697bf215546Sopenharmony_ci} 698bf215546Sopenharmony_ci 699bf215546Sopenharmony_cistatic bool 700bf215546Sopenharmony_cicf_node_contains_block(nir_cf_node *node, nir_block *block) 701bf215546Sopenharmony_ci{ 702bf215546Sopenharmony_ci for (nir_cf_node *n = &block->cf_node; n != NULL; n = n->parent) { 703bf215546Sopenharmony_ci if (n == node) 704bf215546Sopenharmony_ci return true; 705bf215546Sopenharmony_ci } 706bf215546Sopenharmony_ci 707bf215546Sopenharmony_ci return false; 708bf215546Sopenharmony_ci} 709bf215546Sopenharmony_ci 710bf215546Sopenharmony_cistatic void 711bf215546Sopenharmony_cirewrite_phis_to_pred(nir_block *block, nir_block *pred) 712bf215546Sopenharmony_ci{ 713bf215546Sopenharmony_ci nir_foreach_instr(instr, block) { 714bf215546Sopenharmony_ci if (instr->type != nir_instr_type_phi) 715bf215546Sopenharmony_ci break; 716bf215546Sopenharmony_ci 717bf215546Sopenharmony_ci nir_phi_instr *phi = nir_instr_as_phi(instr); 718bf215546Sopenharmony_ci 719bf215546Sopenharmony_ci ASSERTED bool found = false; 720bf215546Sopenharmony_ci nir_foreach_phi_src(phi_src, phi) { 721bf215546Sopenharmony_ci if (phi_src->pred == pred) { 722bf215546Sopenharmony_ci found = true; 723bf215546Sopenharmony_ci assert(phi_src->src.is_ssa); 724bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&phi->dest.ssa, phi_src->src.ssa); 725bf215546Sopenharmony_ci break; 726bf215546Sopenharmony_ci } 727bf215546Sopenharmony_ci } 728bf215546Sopenharmony_ci assert(found); 729bf215546Sopenharmony_ci } 730bf215546Sopenharmony_ci} 731bf215546Sopenharmony_ci 732bf215546Sopenharmony_cistatic bool 733bf215546Sopenharmony_cicursor_is_after_jump(nir_cursor cursor) 734bf215546Sopenharmony_ci{ 735bf215546Sopenharmony_ci switch (cursor.option) { 736bf215546Sopenharmony_ci case nir_cursor_before_instr: 737bf215546Sopenharmony_ci case nir_cursor_before_block: 738bf215546Sopenharmony_ci return false; 739bf215546Sopenharmony_ci case nir_cursor_after_instr: 740bf215546Sopenharmony_ci return cursor.instr->type == nir_instr_type_jump; 741bf215546Sopenharmony_ci case nir_cursor_after_block: 742bf215546Sopenharmony_ci return nir_block_ends_in_jump(cursor.block);; 743bf215546Sopenharmony_ci } 744bf215546Sopenharmony_ci unreachable("Invalid cursor option"); 745bf215546Sopenharmony_ci} 746bf215546Sopenharmony_ci 747bf215546Sopenharmony_ci/** Flattens if ladders leading up to a resume 748bf215546Sopenharmony_ci * 749bf215546Sopenharmony_ci * Given a resume_instr, this function flattens any if ladders leading to the 750bf215546Sopenharmony_ci * resume instruction and deletes any code that cannot be encountered on a 751bf215546Sopenharmony_ci * direct path to the resume instruction. This way we get, for the most part, 752bf215546Sopenharmony_ci * straight-line control-flow up to the resume instruction. 753bf215546Sopenharmony_ci * 754bf215546Sopenharmony_ci * While we do this flattening, we also move any code which is in the remat 755bf215546Sopenharmony_ci * set up to the top of the function or to the top of the resume portion of 756bf215546Sopenharmony_ci * the current loop. We don't worry about control-flow as we do this because 757bf215546Sopenharmony_ci * phis will never be in the remat set (see can_remat_instr) and so nothing 758bf215546Sopenharmony_ci * control-dependent will ever need to be re-materialized. It is possible 759bf215546Sopenharmony_ci * that this algorithm will preserve too many instructions by moving them to 760bf215546Sopenharmony_ci * the top but we leave that for DCE to clean up. Any code not in the remat 761bf215546Sopenharmony_ci * set is deleted because it's either unused in the continuation or else 762bf215546Sopenharmony_ci * unspilled from a previous continuation and the unspill code is after the 763bf215546Sopenharmony_ci * resume instruction. 764bf215546Sopenharmony_ci * 765bf215546Sopenharmony_ci * If, for instance, we have something like this: 766bf215546Sopenharmony_ci * 767bf215546Sopenharmony_ci * // block 0 768bf215546Sopenharmony_ci * if (cond1) { 769bf215546Sopenharmony_ci * // block 1 770bf215546Sopenharmony_ci * } else { 771bf215546Sopenharmony_ci * // block 2 772bf215546Sopenharmony_ci * if (cond2) { 773bf215546Sopenharmony_ci * // block 3 774bf215546Sopenharmony_ci * resume; 775bf215546Sopenharmony_ci * if (cond3) { 776bf215546Sopenharmony_ci * // block 4 777bf215546Sopenharmony_ci * } 778bf215546Sopenharmony_ci * } else { 779bf215546Sopenharmony_ci * // block 5 780bf215546Sopenharmony_ci * } 781bf215546Sopenharmony_ci * } 782bf215546Sopenharmony_ci * 783bf215546Sopenharmony_ci * then we know, because we know the resume instruction had to be encoutered, 784bf215546Sopenharmony_ci * that cond1 = false and cond2 = true and we lower as follows: 785bf215546Sopenharmony_ci * 786bf215546Sopenharmony_ci * // block 0 787bf215546Sopenharmony_ci * // block 2 788bf215546Sopenharmony_ci * // block 3 789bf215546Sopenharmony_ci * resume; 790bf215546Sopenharmony_ci * if (cond3) { 791bf215546Sopenharmony_ci * // block 4 792bf215546Sopenharmony_ci * } 793bf215546Sopenharmony_ci * 794bf215546Sopenharmony_ci * As you can see, the code in blocks 1 and 5 was removed because there is no 795bf215546Sopenharmony_ci * path from the start of the shader to the resume instruction which execute 796bf215546Sopenharmony_ci * blocks 1 or 5. Any remat code from blocks 0, 2, and 3 is preserved and 797bf215546Sopenharmony_ci * moved to the top. If the resume instruction is inside a loop then we know 798bf215546Sopenharmony_ci * a priori that it is of the form 799bf215546Sopenharmony_ci * 800bf215546Sopenharmony_ci * loop { 801bf215546Sopenharmony_ci * if (resume) { 802bf215546Sopenharmony_ci * // Contents containing resume_instr 803bf215546Sopenharmony_ci * } else { 804bf215546Sopenharmony_ci * // Second copy of contents 805bf215546Sopenharmony_ci * } 806bf215546Sopenharmony_ci * } 807bf215546Sopenharmony_ci * 808bf215546Sopenharmony_ci * In this case, we only descend into the first half of the loop. The second 809bf215546Sopenharmony_ci * half is left alone as that portion is only ever executed after the resume 810bf215546Sopenharmony_ci * instruction. 811bf215546Sopenharmony_ci */ 812bf215546Sopenharmony_cistatic bool 813bf215546Sopenharmony_ciflatten_resume_if_ladder(nir_builder *b, 814bf215546Sopenharmony_ci nir_cf_node *parent_node, 815bf215546Sopenharmony_ci struct exec_list *child_list, 816bf215546Sopenharmony_ci bool child_list_contains_cursor, 817bf215546Sopenharmony_ci nir_instr *resume_instr, 818bf215546Sopenharmony_ci struct brw_bitset *remat) 819bf215546Sopenharmony_ci{ 820bf215546Sopenharmony_ci nir_cf_list cf_list; 821bf215546Sopenharmony_ci 822bf215546Sopenharmony_ci /* If our child list contains the cursor instruction then we start out 823bf215546Sopenharmony_ci * before the cursor instruction. We need to know this so that we can skip 824bf215546Sopenharmony_ci * moving instructions which are already before the cursor. 825bf215546Sopenharmony_ci */ 826bf215546Sopenharmony_ci bool before_cursor = child_list_contains_cursor; 827bf215546Sopenharmony_ci 828bf215546Sopenharmony_ci nir_cf_node *resume_node = NULL; 829bf215546Sopenharmony_ci foreach_list_typed_safe(nir_cf_node, child, node, child_list) { 830bf215546Sopenharmony_ci switch (child->type) { 831bf215546Sopenharmony_ci case nir_cf_node_block: { 832bf215546Sopenharmony_ci nir_block *block = nir_cf_node_as_block(child); 833bf215546Sopenharmony_ci if (b->cursor.option == nir_cursor_before_block && 834bf215546Sopenharmony_ci b->cursor.block == block) { 835bf215546Sopenharmony_ci assert(before_cursor); 836bf215546Sopenharmony_ci before_cursor = false; 837bf215546Sopenharmony_ci } 838bf215546Sopenharmony_ci nir_foreach_instr_safe(instr, block) { 839bf215546Sopenharmony_ci if ((b->cursor.option == nir_cursor_before_instr || 840bf215546Sopenharmony_ci b->cursor.option == nir_cursor_after_instr) && 841bf215546Sopenharmony_ci b->cursor.instr == instr) { 842bf215546Sopenharmony_ci assert(nir_cf_node_is_first(&block->cf_node)); 843bf215546Sopenharmony_ci assert(before_cursor); 844bf215546Sopenharmony_ci before_cursor = false; 845bf215546Sopenharmony_ci continue; 846bf215546Sopenharmony_ci } 847bf215546Sopenharmony_ci 848bf215546Sopenharmony_ci if (instr == resume_instr) 849bf215546Sopenharmony_ci goto found_resume; 850bf215546Sopenharmony_ci 851bf215546Sopenharmony_ci if (!before_cursor && can_remat_instr(instr, remat)) { 852bf215546Sopenharmony_ci nir_instr_remove(instr); 853bf215546Sopenharmony_ci nir_instr_insert(b->cursor, instr); 854bf215546Sopenharmony_ci b->cursor = nir_after_instr(instr); 855bf215546Sopenharmony_ci 856bf215546Sopenharmony_ci nir_ssa_def *def = nir_instr_ssa_def(instr); 857bf215546Sopenharmony_ci BITSET_SET(remat->set, def->index); 858bf215546Sopenharmony_ci } 859bf215546Sopenharmony_ci } 860bf215546Sopenharmony_ci if (b->cursor.option == nir_cursor_after_block && 861bf215546Sopenharmony_ci b->cursor.block == block) { 862bf215546Sopenharmony_ci assert(before_cursor); 863bf215546Sopenharmony_ci before_cursor = false; 864bf215546Sopenharmony_ci } 865bf215546Sopenharmony_ci break; 866bf215546Sopenharmony_ci } 867bf215546Sopenharmony_ci 868bf215546Sopenharmony_ci case nir_cf_node_if: { 869bf215546Sopenharmony_ci nir_if *_if = nir_cf_node_as_if(child); 870bf215546Sopenharmony_ci 871bf215546Sopenharmony_ci /* Because of the dummy blocks inserted in the first if block of the 872bf215546Sopenharmony_ci * loops, it's possible we find an empty if block that contains our 873bf215546Sopenharmony_ci * cursor. At this point, the block should still be empty and we can 874bf215546Sopenharmony_ci * just skip it and consider we're after the cursor. 875bf215546Sopenharmony_ci */ 876bf215546Sopenharmony_ci if (cf_node_contains_block(&_if->cf_node, 877bf215546Sopenharmony_ci nir_cursor_current_block(b->cursor))) { 878bf215546Sopenharmony_ci /* Some sanity checks to verify this is actually a dummy block */ 879bf215546Sopenharmony_ci assert(nir_src_as_bool(_if->condition) == true); 880bf215546Sopenharmony_ci assert(nir_cf_list_is_empty_block(&_if->then_list)); 881bf215546Sopenharmony_ci assert(nir_cf_list_is_empty_block(&_if->else_list)); 882bf215546Sopenharmony_ci before_cursor = false; 883bf215546Sopenharmony_ci break; 884bf215546Sopenharmony_ci } 885bf215546Sopenharmony_ci assert(!before_cursor); 886bf215546Sopenharmony_ci 887bf215546Sopenharmony_ci if (flatten_resume_if_ladder(b, &_if->cf_node, &_if->then_list, 888bf215546Sopenharmony_ci false, resume_instr, remat)) { 889bf215546Sopenharmony_ci resume_node = child; 890bf215546Sopenharmony_ci rewrite_phis_to_pred(nir_cf_node_as_block(nir_cf_node_next(child)), 891bf215546Sopenharmony_ci nir_if_last_then_block(_if)); 892bf215546Sopenharmony_ci goto found_resume; 893bf215546Sopenharmony_ci } 894bf215546Sopenharmony_ci 895bf215546Sopenharmony_ci if (flatten_resume_if_ladder(b, &_if->cf_node, &_if->else_list, 896bf215546Sopenharmony_ci false, resume_instr, remat)) { 897bf215546Sopenharmony_ci resume_node = child; 898bf215546Sopenharmony_ci rewrite_phis_to_pred(nir_cf_node_as_block(nir_cf_node_next(child)), 899bf215546Sopenharmony_ci nir_if_last_else_block(_if)); 900bf215546Sopenharmony_ci goto found_resume; 901bf215546Sopenharmony_ci } 902bf215546Sopenharmony_ci break; 903bf215546Sopenharmony_ci } 904bf215546Sopenharmony_ci 905bf215546Sopenharmony_ci case nir_cf_node_loop: { 906bf215546Sopenharmony_ci assert(!before_cursor); 907bf215546Sopenharmony_ci nir_loop *loop = nir_cf_node_as_loop(child); 908bf215546Sopenharmony_ci 909bf215546Sopenharmony_ci if (cf_node_contains_block(&loop->cf_node, resume_instr->block)) { 910bf215546Sopenharmony_ci /* Thanks to our loop body duplication pass, every level of loop 911bf215546Sopenharmony_ci * containing the resume instruction contains exactly three nodes: 912bf215546Sopenharmony_ci * two blocks and an if. We don't want to lower away this if 913bf215546Sopenharmony_ci * because it's the resume selection if. The resume half is 914bf215546Sopenharmony_ci * always the then_list so that's what we want to flatten. 915bf215546Sopenharmony_ci */ 916bf215546Sopenharmony_ci nir_block *header = nir_loop_first_block(loop); 917bf215546Sopenharmony_ci nir_if *_if = nir_cf_node_as_if(nir_cf_node_next(&header->cf_node)); 918bf215546Sopenharmony_ci 919bf215546Sopenharmony_ci nir_builder bl; 920bf215546Sopenharmony_ci nir_builder_init(&bl, b->impl); 921bf215546Sopenharmony_ci bl.cursor = nir_before_cf_list(&_if->then_list); 922bf215546Sopenharmony_ci /* We want to place anything re-materialized from inside the loop 923bf215546Sopenharmony_ci * at the top of the resume half of the loop. 924bf215546Sopenharmony_ci * 925bf215546Sopenharmony_ci * Because we're inside a loop, we might run into a break/continue 926bf215546Sopenharmony_ci * instructions. We can't place those within a block of 927bf215546Sopenharmony_ci * instructions, they need to be at the end of a block. So we 928bf215546Sopenharmony_ci * build our own dummy block to place them. 929bf215546Sopenharmony_ci */ 930bf215546Sopenharmony_ci nir_push_if(&bl, nir_imm_true(&bl)); 931bf215546Sopenharmony_ci { 932bf215546Sopenharmony_ci ASSERTED bool found = 933bf215546Sopenharmony_ci flatten_resume_if_ladder(&bl, &_if->cf_node, &_if->then_list, 934bf215546Sopenharmony_ci true, resume_instr, remat); 935bf215546Sopenharmony_ci assert(found); 936bf215546Sopenharmony_ci } 937bf215546Sopenharmony_ci nir_pop_if(&bl, NULL); 938bf215546Sopenharmony_ci 939bf215546Sopenharmony_ci resume_node = child; 940bf215546Sopenharmony_ci goto found_resume; 941bf215546Sopenharmony_ci } else { 942bf215546Sopenharmony_ci ASSERTED bool found = 943bf215546Sopenharmony_ci flatten_resume_if_ladder(b, &loop->cf_node, &loop->body, 944bf215546Sopenharmony_ci false, resume_instr, remat); 945bf215546Sopenharmony_ci assert(!found); 946bf215546Sopenharmony_ci } 947bf215546Sopenharmony_ci break; 948bf215546Sopenharmony_ci } 949bf215546Sopenharmony_ci 950bf215546Sopenharmony_ci case nir_cf_node_function: 951bf215546Sopenharmony_ci unreachable("Unsupported CF node type"); 952bf215546Sopenharmony_ci } 953bf215546Sopenharmony_ci } 954bf215546Sopenharmony_ci assert(!before_cursor); 955bf215546Sopenharmony_ci 956bf215546Sopenharmony_ci /* If we got here, we didn't find the resume node or instruction. */ 957bf215546Sopenharmony_ci return false; 958bf215546Sopenharmony_ci 959bf215546Sopenharmony_cifound_resume: 960bf215546Sopenharmony_ci /* If we got here then we found either the resume node or the resume 961bf215546Sopenharmony_ci * instruction in this CF list. 962bf215546Sopenharmony_ci */ 963bf215546Sopenharmony_ci if (resume_node) { 964bf215546Sopenharmony_ci /* If the resume instruction is buried in side one of our children CF 965bf215546Sopenharmony_ci * nodes, resume_node now points to that child. 966bf215546Sopenharmony_ci */ 967bf215546Sopenharmony_ci if (resume_node->type == nir_cf_node_if) { 968bf215546Sopenharmony_ci /* Thanks to the recursive call, all of the interesting contents of 969bf215546Sopenharmony_ci * resume_node have been copied before the cursor. We just need to 970bf215546Sopenharmony_ci * copy the stuff after resume_node. 971bf215546Sopenharmony_ci */ 972bf215546Sopenharmony_ci nir_cf_extract(&cf_list, nir_after_cf_node(resume_node), 973bf215546Sopenharmony_ci nir_after_cf_list(child_list)); 974bf215546Sopenharmony_ci } else { 975bf215546Sopenharmony_ci /* The loop contains its own cursor and still has useful stuff in it. 976bf215546Sopenharmony_ci * We want to move everything after and including the loop to before 977bf215546Sopenharmony_ci * the cursor. 978bf215546Sopenharmony_ci */ 979bf215546Sopenharmony_ci assert(resume_node->type == nir_cf_node_loop); 980bf215546Sopenharmony_ci nir_cf_extract(&cf_list, nir_before_cf_node(resume_node), 981bf215546Sopenharmony_ci nir_after_cf_list(child_list)); 982bf215546Sopenharmony_ci } 983bf215546Sopenharmony_ci } else { 984bf215546Sopenharmony_ci /* If we found the resume instruction in one of our blocks, grab 985bf215546Sopenharmony_ci * everything after it in the entire list (not just the one block), and 986bf215546Sopenharmony_ci * place it before the cursor instr. 987bf215546Sopenharmony_ci */ 988bf215546Sopenharmony_ci nir_cf_extract(&cf_list, nir_after_instr(resume_instr), 989bf215546Sopenharmony_ci nir_after_cf_list(child_list)); 990bf215546Sopenharmony_ci } 991bf215546Sopenharmony_ci 992bf215546Sopenharmony_ci if (cursor_is_after_jump(b->cursor)) { 993bf215546Sopenharmony_ci /* If the resume instruction is in a loop, it's possible cf_list ends 994bf215546Sopenharmony_ci * in a break or continue instruction, in which case we don't want to 995bf215546Sopenharmony_ci * insert anything. It's also possible we have an early return if 996bf215546Sopenharmony_ci * someone hasn't lowered those yet. In either case, nothing after that 997bf215546Sopenharmony_ci * point executes in this context so we can delete it. 998bf215546Sopenharmony_ci */ 999bf215546Sopenharmony_ci nir_cf_delete(&cf_list); 1000bf215546Sopenharmony_ci } else { 1001bf215546Sopenharmony_ci b->cursor = nir_cf_reinsert(&cf_list, b->cursor); 1002bf215546Sopenharmony_ci } 1003bf215546Sopenharmony_ci 1004bf215546Sopenharmony_ci if (!resume_node) { 1005bf215546Sopenharmony_ci /* We want the resume to be the first "interesting" instruction */ 1006bf215546Sopenharmony_ci nir_instr_remove(resume_instr); 1007bf215546Sopenharmony_ci nir_instr_insert(nir_before_cf_list(&b->impl->body), resume_instr); 1008bf215546Sopenharmony_ci } 1009bf215546Sopenharmony_ci 1010bf215546Sopenharmony_ci /* We've copied everything interesting out of this CF list to before the 1011bf215546Sopenharmony_ci * cursor. Delete everything else. 1012bf215546Sopenharmony_ci */ 1013bf215546Sopenharmony_ci if (child_list_contains_cursor) { 1014bf215546Sopenharmony_ci /* If the cursor is in child_list, then we're either a loop or function 1015bf215546Sopenharmony_ci * that contains the cursor. Cursors are always placed in a wrapper if 1016bf215546Sopenharmony_ci * (true) to deal with break/continue and early returns. We've already 1017bf215546Sopenharmony_ci * moved everything interesting inside the wrapper if and we want to 1018bf215546Sopenharmony_ci * remove whatever is left after it. 1019bf215546Sopenharmony_ci */ 1020bf215546Sopenharmony_ci nir_block *cursor_block = nir_cursor_current_block(b->cursor); 1021bf215546Sopenharmony_ci nir_if *wrapper_if = nir_cf_node_as_if(cursor_block->cf_node.parent); 1022bf215546Sopenharmony_ci assert(wrapper_if->cf_node.parent == parent_node); 1023bf215546Sopenharmony_ci /* The wrapper if blocks are either put into the body of the main 1024bf215546Sopenharmony_ci * function, or within the resume if block of the loops. 1025bf215546Sopenharmony_ci */ 1026bf215546Sopenharmony_ci assert(parent_node->type == nir_cf_node_function || 1027bf215546Sopenharmony_ci (parent_node->type == nir_cf_node_if && 1028bf215546Sopenharmony_ci parent_node->parent->type == nir_cf_node_loop)); 1029bf215546Sopenharmony_ci nir_cf_extract(&cf_list, nir_after_cf_node(&wrapper_if->cf_node), 1030bf215546Sopenharmony_ci nir_after_cf_list(child_list)); 1031bf215546Sopenharmony_ci } else { 1032bf215546Sopenharmony_ci nir_cf_list_extract(&cf_list, child_list); 1033bf215546Sopenharmony_ci } 1034bf215546Sopenharmony_ci nir_cf_delete(&cf_list); 1035bf215546Sopenharmony_ci 1036bf215546Sopenharmony_ci return true; 1037bf215546Sopenharmony_ci} 1038bf215546Sopenharmony_ci 1039bf215546Sopenharmony_cistatic nir_instr * 1040bf215546Sopenharmony_cilower_resume(nir_shader *shader, int call_idx) 1041bf215546Sopenharmony_ci{ 1042bf215546Sopenharmony_ci nir_function_impl *impl = nir_shader_get_entrypoint(shader); 1043bf215546Sopenharmony_ci 1044bf215546Sopenharmony_ci nir_instr *resume_instr = find_resume_instr(impl, call_idx); 1045bf215546Sopenharmony_ci 1046bf215546Sopenharmony_ci if (duplicate_loop_bodies(impl, resume_instr)) { 1047bf215546Sopenharmony_ci nir_validate_shader(shader, "after duplicate_loop_bodies in " 1048bf215546Sopenharmony_ci "brw_nir_lower_shader_calls"); 1049bf215546Sopenharmony_ci /* If we duplicated the bodies of any loops, run regs_to_ssa to get rid 1050bf215546Sopenharmony_ci * of all those pesky registers we just added. 1051bf215546Sopenharmony_ci */ 1052bf215546Sopenharmony_ci NIR_PASS_V(shader, nir_lower_regs_to_ssa); 1053bf215546Sopenharmony_ci } 1054bf215546Sopenharmony_ci 1055bf215546Sopenharmony_ci /* Re-index nir_ssa_def::index. We don't care about actual liveness in 1056bf215546Sopenharmony_ci * this pass but, so we can use the same helpers as the spilling pass, we 1057bf215546Sopenharmony_ci * need to make sure that live_index is something sane. It's used 1058bf215546Sopenharmony_ci * constantly for determining if an SSA value has been added since the 1059bf215546Sopenharmony_ci * start of the pass. 1060bf215546Sopenharmony_ci */ 1061bf215546Sopenharmony_ci nir_index_ssa_defs(impl); 1062bf215546Sopenharmony_ci 1063bf215546Sopenharmony_ci void *mem_ctx = ralloc_context(shader); 1064bf215546Sopenharmony_ci 1065bf215546Sopenharmony_ci /* Used to track which things may have been assumed to be re-materialized 1066bf215546Sopenharmony_ci * by the spilling pass and which we shouldn't delete. 1067bf215546Sopenharmony_ci */ 1068bf215546Sopenharmony_ci struct brw_bitset remat = bitset_create(mem_ctx, impl->ssa_alloc); 1069bf215546Sopenharmony_ci 1070bf215546Sopenharmony_ci /* Create a nop instruction to use as a cursor as we extract and re-insert 1071bf215546Sopenharmony_ci * stuff into the CFG. 1072bf215546Sopenharmony_ci */ 1073bf215546Sopenharmony_ci nir_builder b; 1074bf215546Sopenharmony_ci nir_builder_init(&b, impl); 1075bf215546Sopenharmony_ci b.cursor = nir_before_cf_list(&impl->body); 1076bf215546Sopenharmony_ci 1077bf215546Sopenharmony_ci nir_push_if(&b, nir_imm_true(&b)); 1078bf215546Sopenharmony_ci { 1079bf215546Sopenharmony_ci ASSERTED bool found = 1080bf215546Sopenharmony_ci flatten_resume_if_ladder(&b, &impl->cf_node, &impl->body, 1081bf215546Sopenharmony_ci true, resume_instr, &remat); 1082bf215546Sopenharmony_ci assert(found); 1083bf215546Sopenharmony_ci } 1084bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 1085bf215546Sopenharmony_ci 1086bf215546Sopenharmony_ci ralloc_free(mem_ctx); 1087bf215546Sopenharmony_ci 1088bf215546Sopenharmony_ci nir_validate_shader(shader, "after flatten_resume_if_ladder in " 1089bf215546Sopenharmony_ci "brw_nir_lower_shader_calls"); 1090bf215546Sopenharmony_ci 1091bf215546Sopenharmony_ci nir_metadata_preserve(impl, nir_metadata_none); 1092bf215546Sopenharmony_ci 1093bf215546Sopenharmony_ci return resume_instr; 1094bf215546Sopenharmony_ci} 1095bf215546Sopenharmony_ci 1096bf215546Sopenharmony_cistatic void 1097bf215546Sopenharmony_cireplace_resume_with_halt(nir_shader *shader, nir_instr *keep) 1098bf215546Sopenharmony_ci{ 1099bf215546Sopenharmony_ci nir_function_impl *impl = nir_shader_get_entrypoint(shader); 1100bf215546Sopenharmony_ci 1101bf215546Sopenharmony_ci nir_builder b; 1102bf215546Sopenharmony_ci nir_builder_init(&b, impl); 1103bf215546Sopenharmony_ci 1104bf215546Sopenharmony_ci nir_foreach_block_safe(block, impl) { 1105bf215546Sopenharmony_ci nir_foreach_instr_safe(instr, block) { 1106bf215546Sopenharmony_ci if (instr == keep) 1107bf215546Sopenharmony_ci continue; 1108bf215546Sopenharmony_ci 1109bf215546Sopenharmony_ci if (instr->type != nir_instr_type_intrinsic) 1110bf215546Sopenharmony_ci continue; 1111bf215546Sopenharmony_ci 1112bf215546Sopenharmony_ci nir_intrinsic_instr *resume = nir_instr_as_intrinsic(instr); 1113bf215546Sopenharmony_ci if (resume->intrinsic != nir_intrinsic_rt_resume) 1114bf215546Sopenharmony_ci continue; 1115bf215546Sopenharmony_ci 1116bf215546Sopenharmony_ci /* If this is some other resume, then we've kicked off a ray or 1117bf215546Sopenharmony_ci * bindless thread and we don't want to go any further in this 1118bf215546Sopenharmony_ci * shader. Insert a halt so that NIR will delete any instructions 1119bf215546Sopenharmony_ci * dominated by this call instruction including the scratch_load 1120bf215546Sopenharmony_ci * instructions we inserted. 1121bf215546Sopenharmony_ci */ 1122bf215546Sopenharmony_ci nir_cf_list cf_list; 1123bf215546Sopenharmony_ci nir_cf_extract(&cf_list, nir_after_instr(&resume->instr), 1124bf215546Sopenharmony_ci nir_after_block(block)); 1125bf215546Sopenharmony_ci nir_cf_delete(&cf_list); 1126bf215546Sopenharmony_ci b.cursor = nir_instr_remove(&resume->instr); 1127bf215546Sopenharmony_ci nir_jump(&b, nir_jump_halt); 1128bf215546Sopenharmony_ci break; 1129bf215546Sopenharmony_ci } 1130bf215546Sopenharmony_ci } 1131bf215546Sopenharmony_ci} 1132bf215546Sopenharmony_ci 1133bf215546Sopenharmony_ci/** Lower shader call instructions to split shaders. 1134bf215546Sopenharmony_ci * 1135bf215546Sopenharmony_ci * Shader calls can be split into an initial shader and a series of "resume" 1136bf215546Sopenharmony_ci * shaders. When the shader is first invoked, it is the initial shader which 1137bf215546Sopenharmony_ci * is executed. At any point in the initial shader or any one of the resume 1138bf215546Sopenharmony_ci * shaders, a shader call operation may be performed. The possible shader call 1139bf215546Sopenharmony_ci * operations are: 1140bf215546Sopenharmony_ci * 1141bf215546Sopenharmony_ci * - trace_ray 1142bf215546Sopenharmony_ci * - report_ray_intersection 1143bf215546Sopenharmony_ci * - execute_callable 1144bf215546Sopenharmony_ci * 1145bf215546Sopenharmony_ci * When a shader call operation is performed, we push all live values to the 1146bf215546Sopenharmony_ci * stack,call rt_trace_ray/rt_execute_callable and then kill the shader. Once 1147bf215546Sopenharmony_ci * the operation we invoked is complete, a callee shader will return execution 1148bf215546Sopenharmony_ci * to the respective resume shader. The resume shader pops the contents off 1149bf215546Sopenharmony_ci * the stack and picks up where the calling shader left off. 1150bf215546Sopenharmony_ci * 1151bf215546Sopenharmony_ci * Stack management is assumed to be done after this pass. Call 1152bf215546Sopenharmony_ci * instructions and their resumes get annotated with stack information that 1153bf215546Sopenharmony_ci * should be enough for the backend to implement proper stack management. 1154bf215546Sopenharmony_ci */ 1155bf215546Sopenharmony_cibool 1156bf215546Sopenharmony_cinir_lower_shader_calls(nir_shader *shader, 1157bf215546Sopenharmony_ci nir_address_format address_format, 1158bf215546Sopenharmony_ci unsigned stack_alignment, 1159bf215546Sopenharmony_ci nir_shader ***resume_shaders_out, 1160bf215546Sopenharmony_ci uint32_t *num_resume_shaders_out, 1161bf215546Sopenharmony_ci void *mem_ctx) 1162bf215546Sopenharmony_ci{ 1163bf215546Sopenharmony_ci nir_function_impl *impl = nir_shader_get_entrypoint(shader); 1164bf215546Sopenharmony_ci 1165bf215546Sopenharmony_ci nir_builder b; 1166bf215546Sopenharmony_ci nir_builder_init(&b, impl); 1167bf215546Sopenharmony_ci 1168bf215546Sopenharmony_ci int num_calls = 0; 1169bf215546Sopenharmony_ci nir_foreach_block(block, impl) { 1170bf215546Sopenharmony_ci nir_foreach_instr_safe(instr, block) { 1171bf215546Sopenharmony_ci if (instr_is_shader_call(instr)) 1172bf215546Sopenharmony_ci num_calls++; 1173bf215546Sopenharmony_ci } 1174bf215546Sopenharmony_ci } 1175bf215546Sopenharmony_ci 1176bf215546Sopenharmony_ci if (num_calls == 0) { 1177bf215546Sopenharmony_ci nir_shader_preserve_all_metadata(shader); 1178bf215546Sopenharmony_ci *num_resume_shaders_out = 0; 1179bf215546Sopenharmony_ci return false; 1180bf215546Sopenharmony_ci } 1181bf215546Sopenharmony_ci 1182bf215546Sopenharmony_ci /* Some intrinsics not only can't be re-materialized but aren't preserved 1183bf215546Sopenharmony_ci * when moving to the continuation shader. We have to move them to the top 1184bf215546Sopenharmony_ci * to ensure they get spilled as needed. 1185bf215546Sopenharmony_ci */ 1186bf215546Sopenharmony_ci { 1187bf215546Sopenharmony_ci bool progress = false; 1188bf215546Sopenharmony_ci NIR_PASS(progress, shader, move_system_values_to_top); 1189bf215546Sopenharmony_ci if (progress) 1190bf215546Sopenharmony_ci NIR_PASS(progress, shader, nir_opt_cse); 1191bf215546Sopenharmony_ci } 1192bf215546Sopenharmony_ci 1193bf215546Sopenharmony_ci NIR_PASS_V(shader, spill_ssa_defs_and_lower_shader_calls, 1194bf215546Sopenharmony_ci num_calls, address_format, stack_alignment); 1195bf215546Sopenharmony_ci 1196bf215546Sopenharmony_ci nir_opt_remove_phis(shader); 1197bf215546Sopenharmony_ci 1198bf215546Sopenharmony_ci /* Make N copies of our shader */ 1199bf215546Sopenharmony_ci nir_shader **resume_shaders = ralloc_array(mem_ctx, nir_shader *, num_calls); 1200bf215546Sopenharmony_ci for (unsigned i = 0; i < num_calls; i++) { 1201bf215546Sopenharmony_ci resume_shaders[i] = nir_shader_clone(mem_ctx, shader); 1202bf215546Sopenharmony_ci 1203bf215546Sopenharmony_ci /* Give them a recognizable name */ 1204bf215546Sopenharmony_ci resume_shaders[i]->info.name = 1205bf215546Sopenharmony_ci ralloc_asprintf(mem_ctx, "%s%sresume_%u", 1206bf215546Sopenharmony_ci shader->info.name ? shader->info.name : "", 1207bf215546Sopenharmony_ci shader->info.name ? "-" : "", 1208bf215546Sopenharmony_ci i); 1209bf215546Sopenharmony_ci } 1210bf215546Sopenharmony_ci 1211bf215546Sopenharmony_ci replace_resume_with_halt(shader, NULL); 1212bf215546Sopenharmony_ci for (unsigned i = 0; i < num_calls; i++) { 1213bf215546Sopenharmony_ci nir_instr *resume_instr = lower_resume(resume_shaders[i], i); 1214bf215546Sopenharmony_ci replace_resume_with_halt(resume_shaders[i], resume_instr); 1215bf215546Sopenharmony_ci nir_opt_remove_phis(resume_shaders[i]); 1216bf215546Sopenharmony_ci /* Remove the dummy blocks added by flatten_resume_if_ladder() */ 1217bf215546Sopenharmony_ci nir_opt_if(resume_shaders[i], nir_opt_if_optimize_phi_true_false); 1218bf215546Sopenharmony_ci } 1219bf215546Sopenharmony_ci 1220bf215546Sopenharmony_ci *resume_shaders_out = resume_shaders; 1221bf215546Sopenharmony_ci *num_resume_shaders_out = num_calls; 1222bf215546Sopenharmony_ci 1223bf215546Sopenharmony_ci return true; 1224bf215546Sopenharmony_ci} 1225