1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2014 Intel Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci * 23bf215546Sopenharmony_ci * Authors: 24bf215546Sopenharmony_ci * Jason Ekstrand (jason@jlekstrand.net) 25bf215546Sopenharmony_ci * 26bf215546Sopenharmony_ci */ 27bf215546Sopenharmony_ci 28bf215546Sopenharmony_ci#include "nir.h" 29bf215546Sopenharmony_ci#include "nir_instr_set.h" 30bf215546Sopenharmony_ci 31bf215546Sopenharmony_ci/* 32bf215546Sopenharmony_ci * Implements Global Code Motion. A description of GCM can be found in 33bf215546Sopenharmony_ci * "Global Code Motion; Global Value Numbering" by Cliff Click. 34bf215546Sopenharmony_ci * Unfortunately, the algorithm presented in the paper is broken in a 35bf215546Sopenharmony_ci * number of ways. The algorithm used here differs substantially from the 36bf215546Sopenharmony_ci * one in the paper but it is, in my opinion, much easier to read and 37bf215546Sopenharmony_ci * verify correcness. 38bf215546Sopenharmony_ci */ 39bf215546Sopenharmony_ci 40bf215546Sopenharmony_ci/* This is used to stop GCM moving instruction out of a loop if the loop 41bf215546Sopenharmony_ci * contains too many instructions and moving them would create excess spilling. 42bf215546Sopenharmony_ci * 43bf215546Sopenharmony_ci * TODO: Figure out a better way to decide if we should remove instructions from 44bf215546Sopenharmony_ci * a loop. 45bf215546Sopenharmony_ci */ 46bf215546Sopenharmony_ci#define MAX_LOOP_INSTRUCTIONS 100 47bf215546Sopenharmony_ci 48bf215546Sopenharmony_cistruct gcm_block_info { 49bf215546Sopenharmony_ci /* Number of loops this block is inside */ 50bf215546Sopenharmony_ci unsigned loop_depth; 51bf215546Sopenharmony_ci 52bf215546Sopenharmony_ci /* Number of ifs this block is inside */ 53bf215546Sopenharmony_ci unsigned if_depth; 54bf215546Sopenharmony_ci 55bf215546Sopenharmony_ci unsigned loop_instr_count; 56bf215546Sopenharmony_ci 57bf215546Sopenharmony_ci /* The loop the block is nested inside or NULL */ 58bf215546Sopenharmony_ci nir_loop *loop; 59bf215546Sopenharmony_ci 60bf215546Sopenharmony_ci /* The last instruction inserted into this block. This is used as we 61bf215546Sopenharmony_ci * traverse the instructions and insert them back into the program to 62bf215546Sopenharmony_ci * put them in the right order. 63bf215546Sopenharmony_ci */ 64bf215546Sopenharmony_ci nir_instr *last_instr; 65bf215546Sopenharmony_ci}; 66bf215546Sopenharmony_ci 67bf215546Sopenharmony_cistruct gcm_instr_info { 68bf215546Sopenharmony_ci nir_block *early_block; 69bf215546Sopenharmony_ci}; 70bf215546Sopenharmony_ci 71bf215546Sopenharmony_ci/* Flags used in the instr->pass_flags field for various instruction states */ 72bf215546Sopenharmony_cienum { 73bf215546Sopenharmony_ci GCM_INSTR_PINNED = (1 << 0), 74bf215546Sopenharmony_ci GCM_INSTR_SCHEDULE_EARLIER_ONLY = (1 << 1), 75bf215546Sopenharmony_ci GCM_INSTR_SCHEDULED_EARLY = (1 << 2), 76bf215546Sopenharmony_ci GCM_INSTR_SCHEDULED_LATE = (1 << 3), 77bf215546Sopenharmony_ci GCM_INSTR_PLACED = (1 << 4), 78bf215546Sopenharmony_ci}; 79bf215546Sopenharmony_ci 80bf215546Sopenharmony_cistruct gcm_state { 81bf215546Sopenharmony_ci nir_function_impl *impl; 82bf215546Sopenharmony_ci nir_instr *instr; 83bf215546Sopenharmony_ci 84bf215546Sopenharmony_ci bool progress; 85bf215546Sopenharmony_ci 86bf215546Sopenharmony_ci /* The list of non-pinned instructions. As we do the late scheduling, 87bf215546Sopenharmony_ci * we pull non-pinned instructions out of their blocks and place them in 88bf215546Sopenharmony_ci * this list. This saves us from having linked-list problems when we go 89bf215546Sopenharmony_ci * to put instructions back in their blocks. 90bf215546Sopenharmony_ci */ 91bf215546Sopenharmony_ci struct exec_list instrs; 92bf215546Sopenharmony_ci 93bf215546Sopenharmony_ci struct gcm_block_info *blocks; 94bf215546Sopenharmony_ci 95bf215546Sopenharmony_ci unsigned num_instrs; 96bf215546Sopenharmony_ci struct gcm_instr_info *instr_infos; 97bf215546Sopenharmony_ci}; 98bf215546Sopenharmony_ci 99bf215546Sopenharmony_cistatic unsigned 100bf215546Sopenharmony_ciget_loop_instr_count(struct exec_list *cf_list) 101bf215546Sopenharmony_ci{ 102bf215546Sopenharmony_ci unsigned loop_instr_count = 0; 103bf215546Sopenharmony_ci foreach_list_typed(nir_cf_node, node, node, cf_list) { 104bf215546Sopenharmony_ci switch (node->type) { 105bf215546Sopenharmony_ci case nir_cf_node_block: { 106bf215546Sopenharmony_ci nir_block *block = nir_cf_node_as_block(node); 107bf215546Sopenharmony_ci nir_foreach_instr(instr, block) { 108bf215546Sopenharmony_ci loop_instr_count++; 109bf215546Sopenharmony_ci } 110bf215546Sopenharmony_ci break; 111bf215546Sopenharmony_ci } 112bf215546Sopenharmony_ci case nir_cf_node_if: { 113bf215546Sopenharmony_ci nir_if *if_stmt = nir_cf_node_as_if(node); 114bf215546Sopenharmony_ci loop_instr_count += get_loop_instr_count(&if_stmt->then_list); 115bf215546Sopenharmony_ci loop_instr_count += get_loop_instr_count(&if_stmt->else_list); 116bf215546Sopenharmony_ci break; 117bf215546Sopenharmony_ci } 118bf215546Sopenharmony_ci case nir_cf_node_loop: { 119bf215546Sopenharmony_ci nir_loop *loop = nir_cf_node_as_loop(node); 120bf215546Sopenharmony_ci loop_instr_count += get_loop_instr_count(&loop->body); 121bf215546Sopenharmony_ci break; 122bf215546Sopenharmony_ci } 123bf215546Sopenharmony_ci default: 124bf215546Sopenharmony_ci unreachable("Invalid CF node type"); 125bf215546Sopenharmony_ci } 126bf215546Sopenharmony_ci } 127bf215546Sopenharmony_ci 128bf215546Sopenharmony_ci return loop_instr_count; 129bf215546Sopenharmony_ci} 130bf215546Sopenharmony_ci 131bf215546Sopenharmony_ci/* Recursively walks the CFG and builds the block_info structure */ 132bf215546Sopenharmony_cistatic void 133bf215546Sopenharmony_cigcm_build_block_info(struct exec_list *cf_list, struct gcm_state *state, 134bf215546Sopenharmony_ci nir_loop *loop, unsigned loop_depth, unsigned if_depth, 135bf215546Sopenharmony_ci unsigned loop_instr_count) 136bf215546Sopenharmony_ci{ 137bf215546Sopenharmony_ci foreach_list_typed(nir_cf_node, node, node, cf_list) { 138bf215546Sopenharmony_ci switch (node->type) { 139bf215546Sopenharmony_ci case nir_cf_node_block: { 140bf215546Sopenharmony_ci nir_block *block = nir_cf_node_as_block(node); 141bf215546Sopenharmony_ci state->blocks[block->index].if_depth = if_depth; 142bf215546Sopenharmony_ci state->blocks[block->index].loop_depth = loop_depth; 143bf215546Sopenharmony_ci state->blocks[block->index].loop_instr_count = loop_instr_count; 144bf215546Sopenharmony_ci state->blocks[block->index].loop = loop; 145bf215546Sopenharmony_ci break; 146bf215546Sopenharmony_ci } 147bf215546Sopenharmony_ci case nir_cf_node_if: { 148bf215546Sopenharmony_ci nir_if *if_stmt = nir_cf_node_as_if(node); 149bf215546Sopenharmony_ci gcm_build_block_info(&if_stmt->then_list, state, loop, loop_depth, 150bf215546Sopenharmony_ci if_depth + 1, ~0u); 151bf215546Sopenharmony_ci gcm_build_block_info(&if_stmt->else_list, state, loop, loop_depth, 152bf215546Sopenharmony_ci if_depth + 1, ~0u); 153bf215546Sopenharmony_ci break; 154bf215546Sopenharmony_ci } 155bf215546Sopenharmony_ci case nir_cf_node_loop: { 156bf215546Sopenharmony_ci nir_loop *loop = nir_cf_node_as_loop(node); 157bf215546Sopenharmony_ci gcm_build_block_info(&loop->body, state, loop, loop_depth + 1, if_depth, 158bf215546Sopenharmony_ci get_loop_instr_count(&loop->body)); 159bf215546Sopenharmony_ci break; 160bf215546Sopenharmony_ci } 161bf215546Sopenharmony_ci default: 162bf215546Sopenharmony_ci unreachable("Invalid CF node type"); 163bf215546Sopenharmony_ci } 164bf215546Sopenharmony_ci } 165bf215546Sopenharmony_ci} 166bf215546Sopenharmony_ci 167bf215546Sopenharmony_cistatic bool 168bf215546Sopenharmony_ciis_src_scalarizable(nir_src *src) 169bf215546Sopenharmony_ci{ 170bf215546Sopenharmony_ci assert(src->is_ssa); 171bf215546Sopenharmony_ci 172bf215546Sopenharmony_ci nir_instr *src_instr = src->ssa->parent_instr; 173bf215546Sopenharmony_ci switch (src_instr->type) { 174bf215546Sopenharmony_ci case nir_instr_type_alu: { 175bf215546Sopenharmony_ci nir_alu_instr *src_alu = nir_instr_as_alu(src_instr); 176bf215546Sopenharmony_ci 177bf215546Sopenharmony_ci /* ALU operations with output_size == 0 should be scalarized. We 178bf215546Sopenharmony_ci * will also see a bunch of vecN operations from scalarizing ALU 179bf215546Sopenharmony_ci * operations and, since they can easily be copy-propagated, they 180bf215546Sopenharmony_ci * are ok too. 181bf215546Sopenharmony_ci */ 182bf215546Sopenharmony_ci return nir_op_infos[src_alu->op].output_size == 0 || 183bf215546Sopenharmony_ci src_alu->op == nir_op_vec2 || 184bf215546Sopenharmony_ci src_alu->op == nir_op_vec3 || 185bf215546Sopenharmony_ci src_alu->op == nir_op_vec4; 186bf215546Sopenharmony_ci } 187bf215546Sopenharmony_ci 188bf215546Sopenharmony_ci case nir_instr_type_load_const: 189bf215546Sopenharmony_ci /* These are trivially scalarizable */ 190bf215546Sopenharmony_ci return true; 191bf215546Sopenharmony_ci 192bf215546Sopenharmony_ci case nir_instr_type_ssa_undef: 193bf215546Sopenharmony_ci return true; 194bf215546Sopenharmony_ci 195bf215546Sopenharmony_ci case nir_instr_type_intrinsic: { 196bf215546Sopenharmony_ci nir_intrinsic_instr *src_intrin = nir_instr_as_intrinsic(src_instr); 197bf215546Sopenharmony_ci 198bf215546Sopenharmony_ci switch (src_intrin->intrinsic) { 199bf215546Sopenharmony_ci case nir_intrinsic_load_deref: { 200bf215546Sopenharmony_ci /* Don't scalarize if we see a load of a local variable because it 201bf215546Sopenharmony_ci * might turn into one of the things we can't scalarize. 202bf215546Sopenharmony_ci */ 203bf215546Sopenharmony_ci nir_deref_instr *deref = nir_src_as_deref(src_intrin->src[0]); 204bf215546Sopenharmony_ci return !nir_deref_mode_may_be(deref, (nir_var_function_temp | 205bf215546Sopenharmony_ci nir_var_shader_temp)); 206bf215546Sopenharmony_ci } 207bf215546Sopenharmony_ci 208bf215546Sopenharmony_ci case nir_intrinsic_interp_deref_at_centroid: 209bf215546Sopenharmony_ci case nir_intrinsic_interp_deref_at_sample: 210bf215546Sopenharmony_ci case nir_intrinsic_interp_deref_at_offset: 211bf215546Sopenharmony_ci case nir_intrinsic_load_uniform: 212bf215546Sopenharmony_ci case nir_intrinsic_load_ubo: 213bf215546Sopenharmony_ci case nir_intrinsic_load_ssbo: 214bf215546Sopenharmony_ci case nir_intrinsic_load_global: 215bf215546Sopenharmony_ci case nir_intrinsic_load_global_constant: 216bf215546Sopenharmony_ci case nir_intrinsic_load_input: 217bf215546Sopenharmony_ci return true; 218bf215546Sopenharmony_ci default: 219bf215546Sopenharmony_ci break; 220bf215546Sopenharmony_ci } 221bf215546Sopenharmony_ci 222bf215546Sopenharmony_ci return false; 223bf215546Sopenharmony_ci } 224bf215546Sopenharmony_ci 225bf215546Sopenharmony_ci default: 226bf215546Sopenharmony_ci /* We can't scalarize this type of instruction */ 227bf215546Sopenharmony_ci return false; 228bf215546Sopenharmony_ci } 229bf215546Sopenharmony_ci} 230bf215546Sopenharmony_ci 231bf215546Sopenharmony_cistatic bool 232bf215546Sopenharmony_ciis_binding_uniform(nir_src src) 233bf215546Sopenharmony_ci{ 234bf215546Sopenharmony_ci nir_binding binding = nir_chase_binding(src); 235bf215546Sopenharmony_ci if (!binding.success) 236bf215546Sopenharmony_ci return false; 237bf215546Sopenharmony_ci 238bf215546Sopenharmony_ci for (unsigned i = 0; i < binding.num_indices; i++) { 239bf215546Sopenharmony_ci if (!nir_src_is_always_uniform(binding.indices[i])) 240bf215546Sopenharmony_ci return false; 241bf215546Sopenharmony_ci } 242bf215546Sopenharmony_ci 243bf215546Sopenharmony_ci return true; 244bf215546Sopenharmony_ci} 245bf215546Sopenharmony_ci 246bf215546Sopenharmony_cistatic void 247bf215546Sopenharmony_cipin_intrinsic(nir_intrinsic_instr *intrin) 248bf215546Sopenharmony_ci{ 249bf215546Sopenharmony_ci nir_instr *instr = &intrin->instr; 250bf215546Sopenharmony_ci 251bf215546Sopenharmony_ci if (!nir_intrinsic_can_reorder(intrin)) { 252bf215546Sopenharmony_ci instr->pass_flags = GCM_INSTR_PINNED; 253bf215546Sopenharmony_ci return; 254bf215546Sopenharmony_ci } 255bf215546Sopenharmony_ci 256bf215546Sopenharmony_ci instr->pass_flags = 0; 257bf215546Sopenharmony_ci 258bf215546Sopenharmony_ci /* If the intrinsic requires a uniform source, we can't safely move it across non-uniform 259bf215546Sopenharmony_ci * control flow if it's not uniform at the point it's defined. 260bf215546Sopenharmony_ci * Stores and atomics can never be re-ordered, so we don't have to consider them here. 261bf215546Sopenharmony_ci */ 262bf215546Sopenharmony_ci bool non_uniform = nir_intrinsic_has_access(intrin) && 263bf215546Sopenharmony_ci (nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM); 264bf215546Sopenharmony_ci if (!non_uniform && 265bf215546Sopenharmony_ci (intrin->intrinsic == nir_intrinsic_load_ubo || 266bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_load_ssbo || 267bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_get_ubo_size || 268bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_get_ssbo_size || 269bf215546Sopenharmony_ci nir_intrinsic_has_image_dim(intrin) || 270bf215546Sopenharmony_ci ((intrin->intrinsic == nir_intrinsic_load_deref || 271bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_deref_buffer_array_length) && 272bf215546Sopenharmony_ci nir_deref_mode_may_be(nir_src_as_deref(intrin->src[0]), 273bf215546Sopenharmony_ci nir_var_mem_ubo | nir_var_mem_ssbo)))) { 274bf215546Sopenharmony_ci if (!is_binding_uniform(intrin->src[0])) 275bf215546Sopenharmony_ci instr->pass_flags = GCM_INSTR_PINNED; 276bf215546Sopenharmony_ci } else if (intrin->intrinsic == nir_intrinsic_load_push_constant) { 277bf215546Sopenharmony_ci if (!nir_src_is_always_uniform(intrin->src[0])) 278bf215546Sopenharmony_ci instr->pass_flags = GCM_INSTR_PINNED; 279bf215546Sopenharmony_ci } else if (intrin->intrinsic == nir_intrinsic_load_deref && 280bf215546Sopenharmony_ci nir_deref_mode_is(nir_src_as_deref(intrin->src[0]), 281bf215546Sopenharmony_ci nir_var_mem_push_const)) { 282bf215546Sopenharmony_ci nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 283bf215546Sopenharmony_ci while (deref->deref_type != nir_deref_type_var) { 284bf215546Sopenharmony_ci if ((deref->deref_type == nir_deref_type_array || 285bf215546Sopenharmony_ci deref->deref_type == nir_deref_type_ptr_as_array) && 286bf215546Sopenharmony_ci !nir_src_is_always_uniform(deref->arr.index)) { 287bf215546Sopenharmony_ci instr->pass_flags = GCM_INSTR_PINNED; 288bf215546Sopenharmony_ci return; 289bf215546Sopenharmony_ci } 290bf215546Sopenharmony_ci deref = nir_deref_instr_parent(deref); 291bf215546Sopenharmony_ci if (!deref) { 292bf215546Sopenharmony_ci instr->pass_flags = GCM_INSTR_PINNED; 293bf215546Sopenharmony_ci return; 294bf215546Sopenharmony_ci } 295bf215546Sopenharmony_ci } 296bf215546Sopenharmony_ci } 297bf215546Sopenharmony_ci} 298bf215546Sopenharmony_ci 299bf215546Sopenharmony_ci/* Walks the instruction list and marks immovable instructions as pinned or 300bf215546Sopenharmony_ci * placed. 301bf215546Sopenharmony_ci * 302bf215546Sopenharmony_ci * This function also serves to initialize the instr->pass_flags field. 303bf215546Sopenharmony_ci * After this is completed, all instructions' pass_flags fields will be set 304bf215546Sopenharmony_ci * to either GCM_INSTR_PINNED, GCM_INSTR_PLACED or 0. 305bf215546Sopenharmony_ci */ 306bf215546Sopenharmony_cistatic void 307bf215546Sopenharmony_cigcm_pin_instructions(nir_function_impl *impl, struct gcm_state *state) 308bf215546Sopenharmony_ci{ 309bf215546Sopenharmony_ci state->num_instrs = 0; 310bf215546Sopenharmony_ci 311bf215546Sopenharmony_ci nir_foreach_block(block, impl) { 312bf215546Sopenharmony_ci nir_foreach_instr_safe(instr, block) { 313bf215546Sopenharmony_ci /* Index the instructions for use in gcm_state::instrs */ 314bf215546Sopenharmony_ci instr->index = state->num_instrs++; 315bf215546Sopenharmony_ci 316bf215546Sopenharmony_ci switch (instr->type) { 317bf215546Sopenharmony_ci case nir_instr_type_alu: 318bf215546Sopenharmony_ci switch (nir_instr_as_alu(instr)->op) { 319bf215546Sopenharmony_ci case nir_op_fddx: 320bf215546Sopenharmony_ci case nir_op_fddy: 321bf215546Sopenharmony_ci case nir_op_fddx_fine: 322bf215546Sopenharmony_ci case nir_op_fddy_fine: 323bf215546Sopenharmony_ci case nir_op_fddx_coarse: 324bf215546Sopenharmony_ci case nir_op_fddy_coarse: 325bf215546Sopenharmony_ci /* These can only go in uniform control flow */ 326bf215546Sopenharmony_ci instr->pass_flags = GCM_INSTR_SCHEDULE_EARLIER_ONLY; 327bf215546Sopenharmony_ci break; 328bf215546Sopenharmony_ci 329bf215546Sopenharmony_ci case nir_op_mov: 330bf215546Sopenharmony_ci if (!is_src_scalarizable(&(nir_instr_as_alu(instr)->src[0].src))) { 331bf215546Sopenharmony_ci instr->pass_flags = GCM_INSTR_PINNED; 332bf215546Sopenharmony_ci break; 333bf215546Sopenharmony_ci } 334bf215546Sopenharmony_ci FALLTHROUGH; 335bf215546Sopenharmony_ci 336bf215546Sopenharmony_ci default: 337bf215546Sopenharmony_ci instr->pass_flags = 0; 338bf215546Sopenharmony_ci break; 339bf215546Sopenharmony_ci } 340bf215546Sopenharmony_ci break; 341bf215546Sopenharmony_ci 342bf215546Sopenharmony_ci case nir_instr_type_tex: { 343bf215546Sopenharmony_ci nir_tex_instr *tex = nir_instr_as_tex(instr); 344bf215546Sopenharmony_ci if (nir_tex_instr_has_implicit_derivative(tex)) 345bf215546Sopenharmony_ci instr->pass_flags = GCM_INSTR_SCHEDULE_EARLIER_ONLY; 346bf215546Sopenharmony_ci 347bf215546Sopenharmony_ci for (unsigned i = 0; i < tex->num_srcs; i++) { 348bf215546Sopenharmony_ci nir_tex_src *src = &tex->src[i]; 349bf215546Sopenharmony_ci switch (src->src_type) { 350bf215546Sopenharmony_ci case nir_tex_src_texture_deref: 351bf215546Sopenharmony_ci if (!tex->texture_non_uniform && !is_binding_uniform(src->src)) 352bf215546Sopenharmony_ci instr->pass_flags = GCM_INSTR_PINNED; 353bf215546Sopenharmony_ci break; 354bf215546Sopenharmony_ci case nir_tex_src_sampler_deref: 355bf215546Sopenharmony_ci if (!tex->sampler_non_uniform && !is_binding_uniform(src->src)) 356bf215546Sopenharmony_ci instr->pass_flags = GCM_INSTR_PINNED; 357bf215546Sopenharmony_ci break; 358bf215546Sopenharmony_ci case nir_tex_src_texture_offset: 359bf215546Sopenharmony_ci case nir_tex_src_texture_handle: 360bf215546Sopenharmony_ci if (!tex->texture_non_uniform && !nir_src_is_always_uniform(src->src)) 361bf215546Sopenharmony_ci instr->pass_flags = GCM_INSTR_PINNED; 362bf215546Sopenharmony_ci break; 363bf215546Sopenharmony_ci case nir_tex_src_sampler_offset: 364bf215546Sopenharmony_ci case nir_tex_src_sampler_handle: 365bf215546Sopenharmony_ci if (!tex->sampler_non_uniform && !nir_src_is_always_uniform(src->src)) 366bf215546Sopenharmony_ci instr->pass_flags = GCM_INSTR_PINNED; 367bf215546Sopenharmony_ci break; 368bf215546Sopenharmony_ci default: 369bf215546Sopenharmony_ci break; 370bf215546Sopenharmony_ci } 371bf215546Sopenharmony_ci } 372bf215546Sopenharmony_ci break; 373bf215546Sopenharmony_ci } 374bf215546Sopenharmony_ci 375bf215546Sopenharmony_ci case nir_instr_type_deref: 376bf215546Sopenharmony_ci case nir_instr_type_load_const: 377bf215546Sopenharmony_ci instr->pass_flags = 0; 378bf215546Sopenharmony_ci break; 379bf215546Sopenharmony_ci 380bf215546Sopenharmony_ci case nir_instr_type_intrinsic: 381bf215546Sopenharmony_ci pin_intrinsic(nir_instr_as_intrinsic(instr)); 382bf215546Sopenharmony_ci break; 383bf215546Sopenharmony_ci 384bf215546Sopenharmony_ci case nir_instr_type_call: 385bf215546Sopenharmony_ci instr->pass_flags = GCM_INSTR_PINNED; 386bf215546Sopenharmony_ci break; 387bf215546Sopenharmony_ci 388bf215546Sopenharmony_ci case nir_instr_type_jump: 389bf215546Sopenharmony_ci case nir_instr_type_ssa_undef: 390bf215546Sopenharmony_ci case nir_instr_type_phi: 391bf215546Sopenharmony_ci instr->pass_flags = GCM_INSTR_PLACED; 392bf215546Sopenharmony_ci break; 393bf215546Sopenharmony_ci 394bf215546Sopenharmony_ci default: 395bf215546Sopenharmony_ci unreachable("Invalid instruction type in GCM"); 396bf215546Sopenharmony_ci } 397bf215546Sopenharmony_ci 398bf215546Sopenharmony_ci if (!(instr->pass_flags & GCM_INSTR_PLACED)) { 399bf215546Sopenharmony_ci /* If this is an unplaced instruction, go ahead and pull it out of 400bf215546Sopenharmony_ci * the program and put it on the instrs list. This has a couple 401bf215546Sopenharmony_ci * of benifits. First, it makes the scheduling algorithm more 402bf215546Sopenharmony_ci * efficient because we can avoid walking over basic blocks. 403bf215546Sopenharmony_ci * Second, it keeps us from causing linked list confusion when 404bf215546Sopenharmony_ci * we're trying to put everything in its proper place at the end 405bf215546Sopenharmony_ci * of the pass. 406bf215546Sopenharmony_ci * 407bf215546Sopenharmony_ci * Note that we don't use nir_instr_remove here because that also 408bf215546Sopenharmony_ci * cleans up uses and defs and we want to keep that information. 409bf215546Sopenharmony_ci */ 410bf215546Sopenharmony_ci exec_node_remove(&instr->node); 411bf215546Sopenharmony_ci exec_list_push_tail(&state->instrs, &instr->node); 412bf215546Sopenharmony_ci } 413bf215546Sopenharmony_ci } 414bf215546Sopenharmony_ci } 415bf215546Sopenharmony_ci} 416bf215546Sopenharmony_ci 417bf215546Sopenharmony_cistatic void 418bf215546Sopenharmony_cigcm_schedule_early_instr(nir_instr *instr, struct gcm_state *state); 419bf215546Sopenharmony_ci 420bf215546Sopenharmony_ci/** Update an instructions schedule for the given source 421bf215546Sopenharmony_ci * 422bf215546Sopenharmony_ci * This function is called iteratively as we walk the sources of an 423bf215546Sopenharmony_ci * instruction. It ensures that the given source instruction has been 424bf215546Sopenharmony_ci * scheduled and then update this instruction's block if the source 425bf215546Sopenharmony_ci * instruction is lower down the tree. 426bf215546Sopenharmony_ci */ 427bf215546Sopenharmony_cistatic bool 428bf215546Sopenharmony_cigcm_schedule_early_src(nir_src *src, void *void_state) 429bf215546Sopenharmony_ci{ 430bf215546Sopenharmony_ci struct gcm_state *state = void_state; 431bf215546Sopenharmony_ci nir_instr *instr = state->instr; 432bf215546Sopenharmony_ci 433bf215546Sopenharmony_ci assert(src->is_ssa); 434bf215546Sopenharmony_ci 435bf215546Sopenharmony_ci gcm_schedule_early_instr(src->ssa->parent_instr, void_state); 436bf215546Sopenharmony_ci 437bf215546Sopenharmony_ci /* While the index isn't a proper dominance depth, it does have the 438bf215546Sopenharmony_ci * property that if A dominates B then A->index <= B->index. Since we 439bf215546Sopenharmony_ci * know that this instruction must have been dominated by all of its 440bf215546Sopenharmony_ci * sources at some point (even if it's gone through value-numbering), 441bf215546Sopenharmony_ci * all of the sources must lie on the same branch of the dominance tree. 442bf215546Sopenharmony_ci * Therefore, we can just go ahead and just compare indices. 443bf215546Sopenharmony_ci */ 444bf215546Sopenharmony_ci struct gcm_instr_info *src_info = 445bf215546Sopenharmony_ci &state->instr_infos[src->ssa->parent_instr->index]; 446bf215546Sopenharmony_ci struct gcm_instr_info *info = &state->instr_infos[instr->index]; 447bf215546Sopenharmony_ci if (info->early_block->index < src_info->early_block->index) 448bf215546Sopenharmony_ci info->early_block = src_info->early_block; 449bf215546Sopenharmony_ci 450bf215546Sopenharmony_ci /* We need to restore the state instruction because it may have been 451bf215546Sopenharmony_ci * changed through the gcm_schedule_early_instr call above. Since we 452bf215546Sopenharmony_ci * may still be iterating through sources and future calls to 453bf215546Sopenharmony_ci * gcm_schedule_early_src for the same instruction will still need it. 454bf215546Sopenharmony_ci */ 455bf215546Sopenharmony_ci state->instr = instr; 456bf215546Sopenharmony_ci 457bf215546Sopenharmony_ci return true; 458bf215546Sopenharmony_ci} 459bf215546Sopenharmony_ci 460bf215546Sopenharmony_ci/** Schedules an instruction early 461bf215546Sopenharmony_ci * 462bf215546Sopenharmony_ci * This function performs a recursive depth-first search starting at the 463bf215546Sopenharmony_ci * given instruction and proceeding through the sources to schedule 464bf215546Sopenharmony_ci * instructions as early as they can possibly go in the dominance tree. 465bf215546Sopenharmony_ci * The instructions are "scheduled" by updating the early_block field of 466bf215546Sopenharmony_ci * the corresponding gcm_instr_state entry. 467bf215546Sopenharmony_ci */ 468bf215546Sopenharmony_cistatic void 469bf215546Sopenharmony_cigcm_schedule_early_instr(nir_instr *instr, struct gcm_state *state) 470bf215546Sopenharmony_ci{ 471bf215546Sopenharmony_ci if (instr->pass_flags & GCM_INSTR_SCHEDULED_EARLY) 472bf215546Sopenharmony_ci return; 473bf215546Sopenharmony_ci 474bf215546Sopenharmony_ci instr->pass_flags |= GCM_INSTR_SCHEDULED_EARLY; 475bf215546Sopenharmony_ci 476bf215546Sopenharmony_ci /* Pinned/placed instructions always get scheduled in their original block so 477bf215546Sopenharmony_ci * we don't need to do anything. Also, bailing here keeps us from ever 478bf215546Sopenharmony_ci * following the sources of phi nodes which can be back-edges. 479bf215546Sopenharmony_ci */ 480bf215546Sopenharmony_ci if (instr->pass_flags & GCM_INSTR_PINNED || 481bf215546Sopenharmony_ci instr->pass_flags & GCM_INSTR_PLACED) { 482bf215546Sopenharmony_ci state->instr_infos[instr->index].early_block = instr->block; 483bf215546Sopenharmony_ci return; 484bf215546Sopenharmony_ci } 485bf215546Sopenharmony_ci 486bf215546Sopenharmony_ci /* Start with the instruction at the top. As we iterate over the 487bf215546Sopenharmony_ci * sources, it will get moved down as needed. 488bf215546Sopenharmony_ci */ 489bf215546Sopenharmony_ci state->instr_infos[instr->index].early_block = nir_start_block(state->impl); 490bf215546Sopenharmony_ci state->instr = instr; 491bf215546Sopenharmony_ci 492bf215546Sopenharmony_ci nir_foreach_src(instr, gcm_schedule_early_src, state); 493bf215546Sopenharmony_ci} 494bf215546Sopenharmony_ci 495bf215546Sopenharmony_cistatic bool 496bf215546Sopenharmony_ciset_block_for_loop_instr(struct gcm_state *state, nir_instr *instr, 497bf215546Sopenharmony_ci nir_block *block) 498bf215546Sopenharmony_ci{ 499bf215546Sopenharmony_ci /* If the instruction wasn't in a loop to begin with we don't want to push 500bf215546Sopenharmony_ci * it down into one. 501bf215546Sopenharmony_ci */ 502bf215546Sopenharmony_ci nir_loop *loop = state->blocks[instr->block->index].loop; 503bf215546Sopenharmony_ci if (loop == NULL) 504bf215546Sopenharmony_ci return true; 505bf215546Sopenharmony_ci 506bf215546Sopenharmony_ci if (nir_block_dominates(instr->block, block)) 507bf215546Sopenharmony_ci return true; 508bf215546Sopenharmony_ci 509bf215546Sopenharmony_ci /* If the loop only executes a single time i.e its wrapped in a: 510bf215546Sopenharmony_ci * do{ ... break; } while(true) 511bf215546Sopenharmony_ci * Don't move the instruction as it will not help anything. 512bf215546Sopenharmony_ci */ 513bf215546Sopenharmony_ci if (loop->info->limiting_terminator == NULL && !loop->info->complex_loop && 514bf215546Sopenharmony_ci nir_block_ends_in_break(nir_loop_last_block(loop))) 515bf215546Sopenharmony_ci return false; 516bf215546Sopenharmony_ci 517bf215546Sopenharmony_ci /* Being too aggressive with how we pull instructions out of loops can 518bf215546Sopenharmony_ci * result in extra register pressure and spilling. For example its fairly 519bf215546Sopenharmony_ci * common for loops in compute shaders to calculate SSBO offsets using 520bf215546Sopenharmony_ci * the workgroup id, subgroup id and subgroup invocation, pulling all 521bf215546Sopenharmony_ci * these calculations outside the loop causes register pressure. 522bf215546Sopenharmony_ci * 523bf215546Sopenharmony_ci * To work around these issues for now we only allow constant and texture 524bf215546Sopenharmony_ci * instructions to be moved outside their original loops, or instructions 525bf215546Sopenharmony_ci * where the total loop instruction count is less than 526bf215546Sopenharmony_ci * MAX_LOOP_INSTRUCTIONS. 527bf215546Sopenharmony_ci * 528bf215546Sopenharmony_ci * TODO: figure out some more heuristics to allow more to be moved out of 529bf215546Sopenharmony_ci * loops. 530bf215546Sopenharmony_ci */ 531bf215546Sopenharmony_ci if (state->blocks[instr->block->index].loop_instr_count < MAX_LOOP_INSTRUCTIONS) 532bf215546Sopenharmony_ci return true; 533bf215546Sopenharmony_ci 534bf215546Sopenharmony_ci if (instr->type == nir_instr_type_load_const || 535bf215546Sopenharmony_ci instr->type == nir_instr_type_tex) 536bf215546Sopenharmony_ci return true; 537bf215546Sopenharmony_ci 538bf215546Sopenharmony_ci return false; 539bf215546Sopenharmony_ci} 540bf215546Sopenharmony_ci 541bf215546Sopenharmony_cistatic bool 542bf215546Sopenharmony_ciset_block_to_if_block(struct gcm_state *state, nir_instr *instr, 543bf215546Sopenharmony_ci nir_block *block) 544bf215546Sopenharmony_ci{ 545bf215546Sopenharmony_ci if (instr->type == nir_instr_type_load_const) 546bf215546Sopenharmony_ci return true; 547bf215546Sopenharmony_ci 548bf215546Sopenharmony_ci /* TODO: Figure out some more heuristics to allow more to be moved into 549bf215546Sopenharmony_ci * if-statements. 550bf215546Sopenharmony_ci */ 551bf215546Sopenharmony_ci 552bf215546Sopenharmony_ci return false; 553bf215546Sopenharmony_ci} 554bf215546Sopenharmony_ci 555bf215546Sopenharmony_cistatic nir_block * 556bf215546Sopenharmony_cigcm_choose_block_for_instr(nir_instr *instr, nir_block *early_block, 557bf215546Sopenharmony_ci nir_block *late_block, struct gcm_state *state) 558bf215546Sopenharmony_ci{ 559bf215546Sopenharmony_ci assert(nir_block_dominates(early_block, late_block)); 560bf215546Sopenharmony_ci 561bf215546Sopenharmony_ci bool block_set = false; 562bf215546Sopenharmony_ci 563bf215546Sopenharmony_ci /* First see if we can push the instruction down into an if-statements block */ 564bf215546Sopenharmony_ci nir_block *best = late_block; 565bf215546Sopenharmony_ci for (nir_block *block = late_block; block != NULL; block = block->imm_dom) { 566bf215546Sopenharmony_ci if (state->blocks[block->index].loop_depth > 567bf215546Sopenharmony_ci state->blocks[instr->block->index].loop_depth) 568bf215546Sopenharmony_ci continue; 569bf215546Sopenharmony_ci 570bf215546Sopenharmony_ci if (state->blocks[block->index].if_depth >= 571bf215546Sopenharmony_ci state->blocks[best->index].if_depth && 572bf215546Sopenharmony_ci set_block_to_if_block(state, instr, block)) { 573bf215546Sopenharmony_ci /* If we are pushing the instruction into an if we want it to be 574bf215546Sopenharmony_ci * in the earliest block not the latest to avoid creating register 575bf215546Sopenharmony_ci * pressure issues. So we don't break unless we come across the 576bf215546Sopenharmony_ci * block the instruction was originally in. 577bf215546Sopenharmony_ci */ 578bf215546Sopenharmony_ci best = block; 579bf215546Sopenharmony_ci block_set = true; 580bf215546Sopenharmony_ci if (block == instr->block) 581bf215546Sopenharmony_ci break; 582bf215546Sopenharmony_ci } else if (block == instr->block) { 583bf215546Sopenharmony_ci /* If we couldn't push the instruction later just put is back where it 584bf215546Sopenharmony_ci * was previously. 585bf215546Sopenharmony_ci */ 586bf215546Sopenharmony_ci if (!block_set) 587bf215546Sopenharmony_ci best = block; 588bf215546Sopenharmony_ci break; 589bf215546Sopenharmony_ci } 590bf215546Sopenharmony_ci 591bf215546Sopenharmony_ci if (block == early_block) 592bf215546Sopenharmony_ci break; 593bf215546Sopenharmony_ci } 594bf215546Sopenharmony_ci 595bf215546Sopenharmony_ci /* Now see if we can evict the instruction from a loop */ 596bf215546Sopenharmony_ci for (nir_block *block = late_block; block != NULL; block = block->imm_dom) { 597bf215546Sopenharmony_ci if (state->blocks[block->index].loop_depth < 598bf215546Sopenharmony_ci state->blocks[best->index].loop_depth) { 599bf215546Sopenharmony_ci if (set_block_for_loop_instr(state, instr, block)) { 600bf215546Sopenharmony_ci best = block; 601bf215546Sopenharmony_ci } else if (block == instr->block) { 602bf215546Sopenharmony_ci if (!block_set) 603bf215546Sopenharmony_ci best = block; 604bf215546Sopenharmony_ci break; 605bf215546Sopenharmony_ci } 606bf215546Sopenharmony_ci } 607bf215546Sopenharmony_ci 608bf215546Sopenharmony_ci if (block == early_block) 609bf215546Sopenharmony_ci break; 610bf215546Sopenharmony_ci } 611bf215546Sopenharmony_ci 612bf215546Sopenharmony_ci return best; 613bf215546Sopenharmony_ci} 614bf215546Sopenharmony_ci 615bf215546Sopenharmony_cistatic void 616bf215546Sopenharmony_cigcm_schedule_late_instr(nir_instr *instr, struct gcm_state *state); 617bf215546Sopenharmony_ci 618bf215546Sopenharmony_ci/** Schedules the instruction associated with the given SSA def late 619bf215546Sopenharmony_ci * 620bf215546Sopenharmony_ci * This function works by first walking all of the uses of the given SSA 621bf215546Sopenharmony_ci * definition, ensuring that they are scheduled, and then computing the LCA 622bf215546Sopenharmony_ci * (least common ancestor) of its uses. It then schedules this instruction 623bf215546Sopenharmony_ci * as close to the LCA as possible while trying to stay out of loops. 624bf215546Sopenharmony_ci */ 625bf215546Sopenharmony_cistatic bool 626bf215546Sopenharmony_cigcm_schedule_late_def(nir_ssa_def *def, void *void_state) 627bf215546Sopenharmony_ci{ 628bf215546Sopenharmony_ci struct gcm_state *state = void_state; 629bf215546Sopenharmony_ci 630bf215546Sopenharmony_ci nir_block *lca = NULL; 631bf215546Sopenharmony_ci 632bf215546Sopenharmony_ci nir_foreach_use(use_src, def) { 633bf215546Sopenharmony_ci nir_instr *use_instr = use_src->parent_instr; 634bf215546Sopenharmony_ci 635bf215546Sopenharmony_ci gcm_schedule_late_instr(use_instr, state); 636bf215546Sopenharmony_ci 637bf215546Sopenharmony_ci /* Phi instructions are a bit special. SSA definitions don't have to 638bf215546Sopenharmony_ci * dominate the sources of the phi nodes that use them; instead, they 639bf215546Sopenharmony_ci * have to dominate the predecessor block corresponding to the phi 640bf215546Sopenharmony_ci * source. We handle this by looking through the sources, finding 641bf215546Sopenharmony_ci * any that are usingg this SSA def, and using those blocks instead 642bf215546Sopenharmony_ci * of the one the phi lives in. 643bf215546Sopenharmony_ci */ 644bf215546Sopenharmony_ci if (use_instr->type == nir_instr_type_phi) { 645bf215546Sopenharmony_ci nir_phi_instr *phi = nir_instr_as_phi(use_instr); 646bf215546Sopenharmony_ci 647bf215546Sopenharmony_ci nir_foreach_phi_src(phi_src, phi) { 648bf215546Sopenharmony_ci if (phi_src->src.ssa == def) 649bf215546Sopenharmony_ci lca = nir_dominance_lca(lca, phi_src->pred); 650bf215546Sopenharmony_ci } 651bf215546Sopenharmony_ci } else { 652bf215546Sopenharmony_ci lca = nir_dominance_lca(lca, use_instr->block); 653bf215546Sopenharmony_ci } 654bf215546Sopenharmony_ci } 655bf215546Sopenharmony_ci 656bf215546Sopenharmony_ci nir_foreach_if_use(use_src, def) { 657bf215546Sopenharmony_ci nir_if *if_stmt = use_src->parent_if; 658bf215546Sopenharmony_ci 659bf215546Sopenharmony_ci /* For if statements, we consider the block to be the one immediately 660bf215546Sopenharmony_ci * preceding the if CF node. 661bf215546Sopenharmony_ci */ 662bf215546Sopenharmony_ci nir_block *pred_block = 663bf215546Sopenharmony_ci nir_cf_node_as_block(nir_cf_node_prev(&if_stmt->cf_node)); 664bf215546Sopenharmony_ci 665bf215546Sopenharmony_ci lca = nir_dominance_lca(lca, pred_block); 666bf215546Sopenharmony_ci } 667bf215546Sopenharmony_ci 668bf215546Sopenharmony_ci nir_block *early_block = 669bf215546Sopenharmony_ci state->instr_infos[def->parent_instr->index].early_block; 670bf215546Sopenharmony_ci 671bf215546Sopenharmony_ci /* Some instructions may never be used. Flag them and the instruction 672bf215546Sopenharmony_ci * placement code will get rid of them for us. 673bf215546Sopenharmony_ci */ 674bf215546Sopenharmony_ci if (lca == NULL) { 675bf215546Sopenharmony_ci def->parent_instr->block = NULL; 676bf215546Sopenharmony_ci return true; 677bf215546Sopenharmony_ci } 678bf215546Sopenharmony_ci 679bf215546Sopenharmony_ci if (def->parent_instr->pass_flags & GCM_INSTR_SCHEDULE_EARLIER_ONLY && 680bf215546Sopenharmony_ci lca != def->parent_instr->block && 681bf215546Sopenharmony_ci nir_block_dominates(def->parent_instr->block, lca)) { 682bf215546Sopenharmony_ci lca = def->parent_instr->block; 683bf215546Sopenharmony_ci } 684bf215546Sopenharmony_ci 685bf215546Sopenharmony_ci /* We now have the LCA of all of the uses. If our invariants hold, 686bf215546Sopenharmony_ci * this is dominated by the block that we chose when scheduling early. 687bf215546Sopenharmony_ci * We now walk up the dominance tree and pick the lowest block that is 688bf215546Sopenharmony_ci * as far outside loops as we can get. 689bf215546Sopenharmony_ci */ 690bf215546Sopenharmony_ci nir_block *best_block = 691bf215546Sopenharmony_ci gcm_choose_block_for_instr(def->parent_instr, early_block, lca, state); 692bf215546Sopenharmony_ci 693bf215546Sopenharmony_ci if (def->parent_instr->block != best_block) 694bf215546Sopenharmony_ci state->progress = true; 695bf215546Sopenharmony_ci 696bf215546Sopenharmony_ci def->parent_instr->block = best_block; 697bf215546Sopenharmony_ci 698bf215546Sopenharmony_ci return true; 699bf215546Sopenharmony_ci} 700bf215546Sopenharmony_ci 701bf215546Sopenharmony_ci/** Schedules an instruction late 702bf215546Sopenharmony_ci * 703bf215546Sopenharmony_ci * This function performs a depth-first search starting at the given 704bf215546Sopenharmony_ci * instruction and proceeding through its uses to schedule instructions as 705bf215546Sopenharmony_ci * late as they can reasonably go in the dominance tree. The instructions 706bf215546Sopenharmony_ci * are "scheduled" by updating their instr->block field. 707bf215546Sopenharmony_ci * 708bf215546Sopenharmony_ci * The name of this function is actually a bit of a misnomer as it doesn't 709bf215546Sopenharmony_ci * schedule them "as late as possible" as the paper implies. Instead, it 710bf215546Sopenharmony_ci * first finds the lates possible place it can schedule the instruction and 711bf215546Sopenharmony_ci * then possibly schedules it earlier than that. The actual location is as 712bf215546Sopenharmony_ci * far down the tree as we can go while trying to stay out of loops. 713bf215546Sopenharmony_ci */ 714bf215546Sopenharmony_cistatic void 715bf215546Sopenharmony_cigcm_schedule_late_instr(nir_instr *instr, struct gcm_state *state) 716bf215546Sopenharmony_ci{ 717bf215546Sopenharmony_ci if (instr->pass_flags & GCM_INSTR_SCHEDULED_LATE) 718bf215546Sopenharmony_ci return; 719bf215546Sopenharmony_ci 720bf215546Sopenharmony_ci instr->pass_flags |= GCM_INSTR_SCHEDULED_LATE; 721bf215546Sopenharmony_ci 722bf215546Sopenharmony_ci /* Pinned/placed instructions are already scheduled so we don't need to do 723bf215546Sopenharmony_ci * anything. Also, bailing here keeps us from ever following phi nodes 724bf215546Sopenharmony_ci * which can be back-edges. 725bf215546Sopenharmony_ci */ 726bf215546Sopenharmony_ci if (instr->pass_flags & GCM_INSTR_PLACED || 727bf215546Sopenharmony_ci instr->pass_flags & GCM_INSTR_PINNED) 728bf215546Sopenharmony_ci return; 729bf215546Sopenharmony_ci 730bf215546Sopenharmony_ci nir_foreach_ssa_def(instr, gcm_schedule_late_def, state); 731bf215546Sopenharmony_ci} 732bf215546Sopenharmony_ci 733bf215546Sopenharmony_cistatic bool 734bf215546Sopenharmony_cigcm_replace_def_with_undef(nir_ssa_def *def, void *void_state) 735bf215546Sopenharmony_ci{ 736bf215546Sopenharmony_ci struct gcm_state *state = void_state; 737bf215546Sopenharmony_ci 738bf215546Sopenharmony_ci if (nir_ssa_def_is_unused(def)) 739bf215546Sopenharmony_ci return true; 740bf215546Sopenharmony_ci 741bf215546Sopenharmony_ci nir_ssa_undef_instr *undef = 742bf215546Sopenharmony_ci nir_ssa_undef_instr_create(state->impl->function->shader, 743bf215546Sopenharmony_ci def->num_components, def->bit_size); 744bf215546Sopenharmony_ci nir_instr_insert(nir_before_cf_list(&state->impl->body), &undef->instr); 745bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(def, &undef->def); 746bf215546Sopenharmony_ci 747bf215546Sopenharmony_ci return true; 748bf215546Sopenharmony_ci} 749bf215546Sopenharmony_ci 750bf215546Sopenharmony_ci/** Places an instrution back into the program 751bf215546Sopenharmony_ci * 752bf215546Sopenharmony_ci * The earlier passes of GCM simply choose blocks for each instruction and 753bf215546Sopenharmony_ci * otherwise leave them alone. This pass actually places the instructions 754bf215546Sopenharmony_ci * into their chosen blocks. 755bf215546Sopenharmony_ci * 756bf215546Sopenharmony_ci * To do so, we simply insert instructions in the reverse order they were 757bf215546Sopenharmony_ci * extracted. This will simply place instructions that were scheduled earlier 758bf215546Sopenharmony_ci * onto the end of their new block and instructions that were scheduled later to 759bf215546Sopenharmony_ci * the start of their new block. 760bf215546Sopenharmony_ci */ 761bf215546Sopenharmony_cistatic void 762bf215546Sopenharmony_cigcm_place_instr(nir_instr *instr, struct gcm_state *state) 763bf215546Sopenharmony_ci{ 764bf215546Sopenharmony_ci if (instr->pass_flags & GCM_INSTR_PLACED) 765bf215546Sopenharmony_ci return; 766bf215546Sopenharmony_ci 767bf215546Sopenharmony_ci instr->pass_flags |= GCM_INSTR_PLACED; 768bf215546Sopenharmony_ci 769bf215546Sopenharmony_ci if (instr->block == NULL) { 770bf215546Sopenharmony_ci nir_foreach_ssa_def(instr, gcm_replace_def_with_undef, state); 771bf215546Sopenharmony_ci nir_instr_remove(instr); 772bf215546Sopenharmony_ci return; 773bf215546Sopenharmony_ci } 774bf215546Sopenharmony_ci 775bf215546Sopenharmony_ci struct gcm_block_info *block_info = &state->blocks[instr->block->index]; 776bf215546Sopenharmony_ci exec_node_remove(&instr->node); 777bf215546Sopenharmony_ci 778bf215546Sopenharmony_ci if (block_info->last_instr) { 779bf215546Sopenharmony_ci exec_node_insert_node_before(&block_info->last_instr->node, 780bf215546Sopenharmony_ci &instr->node); 781bf215546Sopenharmony_ci } else { 782bf215546Sopenharmony_ci /* Schedule it at the end of the block */ 783bf215546Sopenharmony_ci nir_instr *jump_instr = nir_block_last_instr(instr->block); 784bf215546Sopenharmony_ci if (jump_instr && jump_instr->type == nir_instr_type_jump) { 785bf215546Sopenharmony_ci exec_node_insert_node_before(&jump_instr->node, &instr->node); 786bf215546Sopenharmony_ci } else { 787bf215546Sopenharmony_ci exec_list_push_tail(&instr->block->instr_list, &instr->node); 788bf215546Sopenharmony_ci } 789bf215546Sopenharmony_ci } 790bf215546Sopenharmony_ci 791bf215546Sopenharmony_ci block_info->last_instr = instr; 792bf215546Sopenharmony_ci} 793bf215546Sopenharmony_ci 794bf215546Sopenharmony_cistatic bool 795bf215546Sopenharmony_ciopt_gcm_impl(nir_shader *shader, nir_function_impl *impl, bool value_number) 796bf215546Sopenharmony_ci{ 797bf215546Sopenharmony_ci nir_metadata_require(impl, nir_metadata_block_index | 798bf215546Sopenharmony_ci nir_metadata_dominance); 799bf215546Sopenharmony_ci nir_metadata_require(impl, nir_metadata_loop_analysis, 800bf215546Sopenharmony_ci shader->options->force_indirect_unrolling, 801bf215546Sopenharmony_ci shader->options->force_indirect_unrolling_sampler); 802bf215546Sopenharmony_ci 803bf215546Sopenharmony_ci /* A previous pass may have left pass_flags dirty, so clear it all out. */ 804bf215546Sopenharmony_ci nir_foreach_block(block, impl) 805bf215546Sopenharmony_ci nir_foreach_instr(instr, block) 806bf215546Sopenharmony_ci instr->pass_flags = 0; 807bf215546Sopenharmony_ci 808bf215546Sopenharmony_ci struct gcm_state state; 809bf215546Sopenharmony_ci 810bf215546Sopenharmony_ci state.impl = impl; 811bf215546Sopenharmony_ci state.instr = NULL; 812bf215546Sopenharmony_ci state.progress = false; 813bf215546Sopenharmony_ci exec_list_make_empty(&state.instrs); 814bf215546Sopenharmony_ci state.blocks = rzalloc_array(NULL, struct gcm_block_info, impl->num_blocks); 815bf215546Sopenharmony_ci 816bf215546Sopenharmony_ci gcm_build_block_info(&impl->body, &state, NULL, 0, 0, ~0u); 817bf215546Sopenharmony_ci 818bf215546Sopenharmony_ci gcm_pin_instructions(impl, &state); 819bf215546Sopenharmony_ci 820bf215546Sopenharmony_ci state.instr_infos = 821bf215546Sopenharmony_ci rzalloc_array(NULL, struct gcm_instr_info, state.num_instrs); 822bf215546Sopenharmony_ci 823bf215546Sopenharmony_ci if (value_number) { 824bf215546Sopenharmony_ci struct set *gvn_set = nir_instr_set_create(NULL); 825bf215546Sopenharmony_ci foreach_list_typed_safe(nir_instr, instr, node, &state.instrs) { 826bf215546Sopenharmony_ci if (instr->pass_flags & GCM_INSTR_PINNED) 827bf215546Sopenharmony_ci continue; 828bf215546Sopenharmony_ci 829bf215546Sopenharmony_ci if (nir_instr_set_add_or_rewrite(gvn_set, instr, NULL)) 830bf215546Sopenharmony_ci state.progress = true; 831bf215546Sopenharmony_ci } 832bf215546Sopenharmony_ci nir_instr_set_destroy(gvn_set); 833bf215546Sopenharmony_ci } 834bf215546Sopenharmony_ci 835bf215546Sopenharmony_ci foreach_list_typed(nir_instr, instr, node, &state.instrs) 836bf215546Sopenharmony_ci gcm_schedule_early_instr(instr, &state); 837bf215546Sopenharmony_ci 838bf215546Sopenharmony_ci foreach_list_typed(nir_instr, instr, node, &state.instrs) 839bf215546Sopenharmony_ci gcm_schedule_late_instr(instr, &state); 840bf215546Sopenharmony_ci 841bf215546Sopenharmony_ci while (!exec_list_is_empty(&state.instrs)) { 842bf215546Sopenharmony_ci nir_instr *instr = exec_node_data(nir_instr, 843bf215546Sopenharmony_ci state.instrs.tail_sentinel.prev, node); 844bf215546Sopenharmony_ci gcm_place_instr(instr, &state); 845bf215546Sopenharmony_ci } 846bf215546Sopenharmony_ci 847bf215546Sopenharmony_ci ralloc_free(state.blocks); 848bf215546Sopenharmony_ci ralloc_free(state.instr_infos); 849bf215546Sopenharmony_ci 850bf215546Sopenharmony_ci nir_metadata_preserve(impl, nir_metadata_block_index | 851bf215546Sopenharmony_ci nir_metadata_dominance | 852bf215546Sopenharmony_ci nir_metadata_loop_analysis); 853bf215546Sopenharmony_ci 854bf215546Sopenharmony_ci return state.progress; 855bf215546Sopenharmony_ci} 856bf215546Sopenharmony_ci 857bf215546Sopenharmony_cibool 858bf215546Sopenharmony_cinir_opt_gcm(nir_shader *shader, bool value_number) 859bf215546Sopenharmony_ci{ 860bf215546Sopenharmony_ci bool progress = false; 861bf215546Sopenharmony_ci 862bf215546Sopenharmony_ci nir_foreach_function(function, shader) { 863bf215546Sopenharmony_ci if (function->impl) 864bf215546Sopenharmony_ci progress |= opt_gcm_impl(shader, function->impl, value_number); 865bf215546Sopenharmony_ci } 866bf215546Sopenharmony_ci 867bf215546Sopenharmony_ci return progress; 868bf215546Sopenharmony_ci} 869