1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2021 Valve Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "nir.h" 25bf215546Sopenharmony_ci#include "nir_builder.h" 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci/* This pass provides a way to move computations that are always the same for 28bf215546Sopenharmony_ci * an entire draw/compute dispatch into a "preamble" that runs before the main 29bf215546Sopenharmony_ci * entrypoint. 30bf215546Sopenharmony_ci * 31bf215546Sopenharmony_ci * We also expose a separate API to get or construct the preamble of a shader 32bf215546Sopenharmony_ci * in case backends want to insert their own code. 33bf215546Sopenharmony_ci */ 34bf215546Sopenharmony_ci 35bf215546Sopenharmony_ci 36bf215546Sopenharmony_cinir_function_impl * 37bf215546Sopenharmony_cinir_shader_get_preamble(nir_shader *shader) 38bf215546Sopenharmony_ci{ 39bf215546Sopenharmony_ci nir_function_impl *entrypoint = nir_shader_get_entrypoint(shader); 40bf215546Sopenharmony_ci if (entrypoint->preamble) { 41bf215546Sopenharmony_ci return entrypoint->preamble->impl; 42bf215546Sopenharmony_ci } else { 43bf215546Sopenharmony_ci nir_function *preamble = nir_function_create(shader, "@preamble"); 44bf215546Sopenharmony_ci preamble->is_preamble = true; 45bf215546Sopenharmony_ci nir_function_impl *impl = nir_function_impl_create(preamble); 46bf215546Sopenharmony_ci entrypoint->preamble = preamble; 47bf215546Sopenharmony_ci return impl; 48bf215546Sopenharmony_ci } 49bf215546Sopenharmony_ci} 50bf215546Sopenharmony_ci 51bf215546Sopenharmony_citypedef struct { 52bf215546Sopenharmony_ci bool can_move; 53bf215546Sopenharmony_ci bool candidate; 54bf215546Sopenharmony_ci bool must_stay; 55bf215546Sopenharmony_ci bool replace; 56bf215546Sopenharmony_ci 57bf215546Sopenharmony_ci unsigned can_move_users; 58bf215546Sopenharmony_ci 59bf215546Sopenharmony_ci unsigned size, align; 60bf215546Sopenharmony_ci 61bf215546Sopenharmony_ci unsigned offset; 62bf215546Sopenharmony_ci 63bf215546Sopenharmony_ci /* Average the cost of a value among its users, to try to account for 64bf215546Sopenharmony_ci * values that have multiple can_move uses. 65bf215546Sopenharmony_ci */ 66bf215546Sopenharmony_ci float value; 67bf215546Sopenharmony_ci 68bf215546Sopenharmony_ci /* Overall benefit, i.e. the value minus any cost to inserting 69bf215546Sopenharmony_ci * load_preamble. 70bf215546Sopenharmony_ci */ 71bf215546Sopenharmony_ci float benefit; 72bf215546Sopenharmony_ci} def_state; 73bf215546Sopenharmony_ci 74bf215546Sopenharmony_citypedef struct { 75bf215546Sopenharmony_ci /* Per-definition array of states */ 76bf215546Sopenharmony_ci def_state *states; 77bf215546Sopenharmony_ci 78bf215546Sopenharmony_ci nir_ssa_def *def; 79bf215546Sopenharmony_ci 80bf215546Sopenharmony_ci const nir_opt_preamble_options *options; 81bf215546Sopenharmony_ci} opt_preamble_ctx; 82bf215546Sopenharmony_ci 83bf215546Sopenharmony_cistatic float 84bf215546Sopenharmony_ciget_instr_cost(nir_instr *instr, const nir_opt_preamble_options *options) 85bf215546Sopenharmony_ci{ 86bf215546Sopenharmony_ci /* No backend will want to hoist load_const or undef by itself, so handle 87bf215546Sopenharmony_ci * this for them. 88bf215546Sopenharmony_ci */ 89bf215546Sopenharmony_ci if (instr->type == nir_instr_type_load_const || 90bf215546Sopenharmony_ci instr->type == nir_instr_type_ssa_undef) 91bf215546Sopenharmony_ci return 0; 92bf215546Sopenharmony_ci 93bf215546Sopenharmony_ci return options->instr_cost_cb(instr, options->cb_data); 94bf215546Sopenharmony_ci} 95bf215546Sopenharmony_ci 96bf215546Sopenharmony_cistatic bool 97bf215546Sopenharmony_cican_move_src(nir_src *src, void *state) 98bf215546Sopenharmony_ci{ 99bf215546Sopenharmony_ci opt_preamble_ctx *ctx = state; 100bf215546Sopenharmony_ci 101bf215546Sopenharmony_ci assert(src->is_ssa); 102bf215546Sopenharmony_ci return ctx->states[src->ssa->index].can_move; 103bf215546Sopenharmony_ci} 104bf215546Sopenharmony_ci 105bf215546Sopenharmony_cistatic bool 106bf215546Sopenharmony_cican_move_srcs(nir_instr *instr, opt_preamble_ctx *ctx) 107bf215546Sopenharmony_ci{ 108bf215546Sopenharmony_ci return nir_foreach_src(instr, can_move_src, ctx); 109bf215546Sopenharmony_ci} 110bf215546Sopenharmony_ci 111bf215546Sopenharmony_cistatic bool 112bf215546Sopenharmony_cican_move_intrinsic(nir_intrinsic_instr *instr, opt_preamble_ctx *ctx) 113bf215546Sopenharmony_ci{ 114bf215546Sopenharmony_ci switch (instr->intrinsic) { 115bf215546Sopenharmony_ci /* Intrinsics which can always be moved */ 116bf215546Sopenharmony_ci case nir_intrinsic_load_push_constant: 117bf215546Sopenharmony_ci case nir_intrinsic_load_work_dim: 118bf215546Sopenharmony_ci case nir_intrinsic_load_num_workgroups: 119bf215546Sopenharmony_ci case nir_intrinsic_load_workgroup_size: 120bf215546Sopenharmony_ci case nir_intrinsic_load_ray_launch_size: 121bf215546Sopenharmony_ci case nir_intrinsic_load_ray_launch_size_addr_amd: 122bf215546Sopenharmony_ci case nir_intrinsic_load_sbt_base_amd: 123bf215546Sopenharmony_ci case nir_intrinsic_load_is_indexed_draw: 124bf215546Sopenharmony_ci case nir_intrinsic_load_viewport_scale: 125bf215546Sopenharmony_ci case nir_intrinsic_load_user_clip_plane: 126bf215546Sopenharmony_ci case nir_intrinsic_load_viewport_x_scale: 127bf215546Sopenharmony_ci case nir_intrinsic_load_viewport_y_scale: 128bf215546Sopenharmony_ci case nir_intrinsic_load_viewport_z_scale: 129bf215546Sopenharmony_ci case nir_intrinsic_load_viewport_offset: 130bf215546Sopenharmony_ci case nir_intrinsic_load_viewport_x_offset: 131bf215546Sopenharmony_ci case nir_intrinsic_load_viewport_y_offset: 132bf215546Sopenharmony_ci case nir_intrinsic_load_viewport_z_offset: 133bf215546Sopenharmony_ci case nir_intrinsic_load_blend_const_color_a_float: 134bf215546Sopenharmony_ci case nir_intrinsic_load_blend_const_color_b_float: 135bf215546Sopenharmony_ci case nir_intrinsic_load_blend_const_color_g_float: 136bf215546Sopenharmony_ci case nir_intrinsic_load_blend_const_color_r_float: 137bf215546Sopenharmony_ci case nir_intrinsic_load_blend_const_color_rgba: 138bf215546Sopenharmony_ci case nir_intrinsic_load_blend_const_color_aaaa8888_unorm: 139bf215546Sopenharmony_ci case nir_intrinsic_load_blend_const_color_rgba8888_unorm: 140bf215546Sopenharmony_ci case nir_intrinsic_load_line_width: 141bf215546Sopenharmony_ci case nir_intrinsic_load_aa_line_width: 142bf215546Sopenharmony_ci case nir_intrinsic_load_fb_layers_v3d: 143bf215546Sopenharmony_ci case nir_intrinsic_load_tcs_num_patches_amd: 144bf215546Sopenharmony_ci case nir_intrinsic_load_sample_positions_pan: 145bf215546Sopenharmony_ci case nir_intrinsic_load_shader_query_enabled_amd: 146bf215546Sopenharmony_ci case nir_intrinsic_load_cull_front_face_enabled_amd: 147bf215546Sopenharmony_ci case nir_intrinsic_load_cull_back_face_enabled_amd: 148bf215546Sopenharmony_ci case nir_intrinsic_load_cull_ccw_amd: 149bf215546Sopenharmony_ci case nir_intrinsic_load_cull_small_primitives_enabled_amd: 150bf215546Sopenharmony_ci case nir_intrinsic_load_cull_any_enabled_amd: 151bf215546Sopenharmony_ci case nir_intrinsic_load_cull_small_prim_precision_amd: 152bf215546Sopenharmony_ci return true; 153bf215546Sopenharmony_ci 154bf215546Sopenharmony_ci /* Intrinsics which can be moved depending on hardware */ 155bf215546Sopenharmony_ci case nir_intrinsic_load_base_instance: 156bf215546Sopenharmony_ci case nir_intrinsic_load_base_vertex: 157bf215546Sopenharmony_ci case nir_intrinsic_load_first_vertex: 158bf215546Sopenharmony_ci case nir_intrinsic_load_draw_id: 159bf215546Sopenharmony_ci return ctx->options->drawid_uniform; 160bf215546Sopenharmony_ci 161bf215546Sopenharmony_ci case nir_intrinsic_load_subgroup_size: 162bf215546Sopenharmony_ci case nir_intrinsic_load_num_subgroups: 163bf215546Sopenharmony_ci return ctx->options->subgroup_size_uniform; 164bf215546Sopenharmony_ci 165bf215546Sopenharmony_ci /* Intrinsics which can be moved if the sources can */ 166bf215546Sopenharmony_ci case nir_intrinsic_load_ubo: 167bf215546Sopenharmony_ci case nir_intrinsic_load_ubo_vec4: 168bf215546Sopenharmony_ci case nir_intrinsic_get_ubo_size: 169bf215546Sopenharmony_ci case nir_intrinsic_get_ssbo_size: 170bf215546Sopenharmony_ci case nir_intrinsic_ballot_bitfield_extract: 171bf215546Sopenharmony_ci case nir_intrinsic_ballot_find_lsb: 172bf215546Sopenharmony_ci case nir_intrinsic_ballot_find_msb: 173bf215546Sopenharmony_ci case nir_intrinsic_ballot_bit_count_reduce: 174bf215546Sopenharmony_ci case nir_intrinsic_load_deref: 175bf215546Sopenharmony_ci case nir_intrinsic_load_global_constant: 176bf215546Sopenharmony_ci case nir_intrinsic_load_uniform: 177bf215546Sopenharmony_ci case nir_intrinsic_load_constant: 178bf215546Sopenharmony_ci case nir_intrinsic_load_sample_pos_from_id: 179bf215546Sopenharmony_ci case nir_intrinsic_load_kernel_input: 180bf215546Sopenharmony_ci case nir_intrinsic_load_buffer_amd: 181bf215546Sopenharmony_ci case nir_intrinsic_image_samples: 182bf215546Sopenharmony_ci case nir_intrinsic_image_deref_samples: 183bf215546Sopenharmony_ci case nir_intrinsic_bindless_image_samples: 184bf215546Sopenharmony_ci case nir_intrinsic_image_size: 185bf215546Sopenharmony_ci case nir_intrinsic_image_deref_size: 186bf215546Sopenharmony_ci case nir_intrinsic_bindless_image_size: 187bf215546Sopenharmony_ci case nir_intrinsic_vulkan_resource_index: 188bf215546Sopenharmony_ci case nir_intrinsic_vulkan_resource_reindex: 189bf215546Sopenharmony_ci case nir_intrinsic_load_vulkan_descriptor: 190bf215546Sopenharmony_ci case nir_intrinsic_quad_swizzle_amd: 191bf215546Sopenharmony_ci case nir_intrinsic_masked_swizzle_amd: 192bf215546Sopenharmony_ci case nir_intrinsic_load_ssbo_address: 193bf215546Sopenharmony_ci case nir_intrinsic_bindless_resource_ir3: 194bf215546Sopenharmony_ci return can_move_srcs(&instr->instr, ctx); 195bf215546Sopenharmony_ci 196bf215546Sopenharmony_ci /* Image/SSBO loads can be moved if they are CAN_REORDER and their 197bf215546Sopenharmony_ci * sources can be moved. 198bf215546Sopenharmony_ci */ 199bf215546Sopenharmony_ci case nir_intrinsic_image_load: 200bf215546Sopenharmony_ci case nir_intrinsic_bindless_image_load: 201bf215546Sopenharmony_ci case nir_intrinsic_load_ssbo: 202bf215546Sopenharmony_ci case nir_intrinsic_load_ssbo_ir3: 203bf215546Sopenharmony_ci return (nir_intrinsic_access(instr) & ACCESS_CAN_REORDER) && 204bf215546Sopenharmony_ci can_move_srcs(&instr->instr, ctx); 205bf215546Sopenharmony_ci 206bf215546Sopenharmony_ci default: 207bf215546Sopenharmony_ci return false; 208bf215546Sopenharmony_ci } 209bf215546Sopenharmony_ci} 210bf215546Sopenharmony_ci 211bf215546Sopenharmony_cistatic bool 212bf215546Sopenharmony_cican_move_instr(nir_instr *instr, opt_preamble_ctx *ctx) 213bf215546Sopenharmony_ci{ 214bf215546Sopenharmony_ci switch (instr->type) { 215bf215546Sopenharmony_ci case nir_instr_type_tex: { 216bf215546Sopenharmony_ci nir_tex_instr *tex = nir_instr_as_tex(instr); 217bf215546Sopenharmony_ci /* See note below about derivatives. We have special code to convert tex 218bf215546Sopenharmony_ci * to txd, though, because it's a common case. 219bf215546Sopenharmony_ci */ 220bf215546Sopenharmony_ci if (nir_tex_instr_has_implicit_derivative(tex) && 221bf215546Sopenharmony_ci tex->op != nir_texop_tex) { 222bf215546Sopenharmony_ci return false; 223bf215546Sopenharmony_ci } 224bf215546Sopenharmony_ci return can_move_srcs(instr, ctx); 225bf215546Sopenharmony_ci } 226bf215546Sopenharmony_ci case nir_instr_type_alu: { 227bf215546Sopenharmony_ci /* The preamble is presumably run with only one thread, so we can't run 228bf215546Sopenharmony_ci * derivatives in it. 229bf215546Sopenharmony_ci * TODO: Replace derivatives with 0 instead, if real apps hit this. 230bf215546Sopenharmony_ci */ 231bf215546Sopenharmony_ci nir_alu_instr *alu = nir_instr_as_alu(instr); 232bf215546Sopenharmony_ci switch (alu->op) { 233bf215546Sopenharmony_ci case nir_op_fddx: 234bf215546Sopenharmony_ci case nir_op_fddy: 235bf215546Sopenharmony_ci case nir_op_fddx_fine: 236bf215546Sopenharmony_ci case nir_op_fddy_fine: 237bf215546Sopenharmony_ci case nir_op_fddx_coarse: 238bf215546Sopenharmony_ci case nir_op_fddy_coarse: 239bf215546Sopenharmony_ci return false; 240bf215546Sopenharmony_ci default: 241bf215546Sopenharmony_ci return can_move_srcs(instr, ctx); 242bf215546Sopenharmony_ci } 243bf215546Sopenharmony_ci } 244bf215546Sopenharmony_ci case nir_instr_type_intrinsic: 245bf215546Sopenharmony_ci return can_move_intrinsic(nir_instr_as_intrinsic(instr), ctx); 246bf215546Sopenharmony_ci 247bf215546Sopenharmony_ci case nir_instr_type_load_const: 248bf215546Sopenharmony_ci case nir_instr_type_ssa_undef: 249bf215546Sopenharmony_ci return true; 250bf215546Sopenharmony_ci 251bf215546Sopenharmony_ci case nir_instr_type_deref: { 252bf215546Sopenharmony_ci nir_deref_instr *deref = nir_instr_as_deref(instr); 253bf215546Sopenharmony_ci if (deref->deref_type == nir_deref_type_var) { 254bf215546Sopenharmony_ci switch (deref->modes) { 255bf215546Sopenharmony_ci case nir_var_uniform: 256bf215546Sopenharmony_ci case nir_var_mem_ubo: 257bf215546Sopenharmony_ci return true; 258bf215546Sopenharmony_ci default: 259bf215546Sopenharmony_ci return false; 260bf215546Sopenharmony_ci } 261bf215546Sopenharmony_ci } else { 262bf215546Sopenharmony_ci return can_move_srcs(instr, ctx); 263bf215546Sopenharmony_ci } 264bf215546Sopenharmony_ci } 265bf215546Sopenharmony_ci 266bf215546Sopenharmony_ci case nir_instr_type_phi: 267bf215546Sopenharmony_ci /* TODO: we could move an if-statement if everything inside it is 268bf215546Sopenharmony_ci * moveable. 269bf215546Sopenharmony_ci */ 270bf215546Sopenharmony_ci return false; 271bf215546Sopenharmony_ci 272bf215546Sopenharmony_ci default: 273bf215546Sopenharmony_ci return false; 274bf215546Sopenharmony_ci } 275bf215546Sopenharmony_ci} 276bf215546Sopenharmony_ci 277bf215546Sopenharmony_ci/* True if we should avoid making this a candidate. This is only called on 278bf215546Sopenharmony_ci * instructions we already determined we can move, this just makes it so that 279bf215546Sopenharmony_ci * uses of this instruction cannot be rewritten. Typically this happens 280bf215546Sopenharmony_ci * because of static constraints on the IR, for example some deref chains 281bf215546Sopenharmony_ci * cannot be broken. 282bf215546Sopenharmony_ci */ 283bf215546Sopenharmony_cistatic bool 284bf215546Sopenharmony_ciavoid_instr(nir_instr *instr, const nir_opt_preamble_options *options) 285bf215546Sopenharmony_ci{ 286bf215546Sopenharmony_ci if (instr->type == nir_instr_type_deref) 287bf215546Sopenharmony_ci return true; 288bf215546Sopenharmony_ci 289bf215546Sopenharmony_ci return options->avoid_instr_cb(instr, options->cb_data); 290bf215546Sopenharmony_ci} 291bf215546Sopenharmony_ci 292bf215546Sopenharmony_cistatic bool 293bf215546Sopenharmony_ciupdate_src_value(nir_src *src, void *data) 294bf215546Sopenharmony_ci{ 295bf215546Sopenharmony_ci opt_preamble_ctx *ctx = data; 296bf215546Sopenharmony_ci 297bf215546Sopenharmony_ci def_state *state = &ctx->states[ctx->def->index]; 298bf215546Sopenharmony_ci def_state *src_state = &ctx->states[src->ssa->index]; 299bf215546Sopenharmony_ci 300bf215546Sopenharmony_ci assert(src_state->can_move); 301bf215546Sopenharmony_ci 302bf215546Sopenharmony_ci /* If an instruction has can_move and non-can_move users, it becomes a 303bf215546Sopenharmony_ci * candidate and its value shouldn't propagate downwards. For example, 304bf215546Sopenharmony_ci * imagine a chain like this: 305bf215546Sopenharmony_ci * 306bf215546Sopenharmony_ci * -- F (cannot move) 307bf215546Sopenharmony_ci * / 308bf215546Sopenharmony_ci * A <-- B <-- C <-- D <-- E (cannot move) 309bf215546Sopenharmony_ci * 310bf215546Sopenharmony_ci * B and D are marked candidates. Picking B removes A and B, picking D 311bf215546Sopenharmony_ci * removes C and D, and picking both removes all 4. Therefore B and D are 312bf215546Sopenharmony_ci * independent and B's value shouldn't flow into D. 313bf215546Sopenharmony_ci * 314bf215546Sopenharmony_ci * A similar argument holds for must_stay values. 315bf215546Sopenharmony_ci */ 316bf215546Sopenharmony_ci if (!src_state->must_stay && !src_state->candidate) 317bf215546Sopenharmony_ci state->value += src_state->value; 318bf215546Sopenharmony_ci return true; 319bf215546Sopenharmony_ci} 320bf215546Sopenharmony_ci 321bf215546Sopenharmony_cistatic int 322bf215546Sopenharmony_cicandidate_sort(const void *data1, const void *data2) 323bf215546Sopenharmony_ci{ 324bf215546Sopenharmony_ci const def_state *state1 = *(def_state **)data1; 325bf215546Sopenharmony_ci const def_state *state2 = *(def_state **)data2; 326bf215546Sopenharmony_ci 327bf215546Sopenharmony_ci float value1 = state1->value / state1->size; 328bf215546Sopenharmony_ci float value2 = state2->value / state2->size; 329bf215546Sopenharmony_ci if (value1 < value2) 330bf215546Sopenharmony_ci return 1; 331bf215546Sopenharmony_ci else if (value1 > value2) 332bf215546Sopenharmony_ci return -1; 333bf215546Sopenharmony_ci else 334bf215546Sopenharmony_ci return 0; 335bf215546Sopenharmony_ci} 336bf215546Sopenharmony_ci 337bf215546Sopenharmony_cibool 338bf215546Sopenharmony_cinir_opt_preamble(nir_shader *shader, const nir_opt_preamble_options *options, 339bf215546Sopenharmony_ci unsigned *size) 340bf215546Sopenharmony_ci{ 341bf215546Sopenharmony_ci opt_preamble_ctx ctx = { 342bf215546Sopenharmony_ci .options = options, 343bf215546Sopenharmony_ci }; 344bf215546Sopenharmony_ci 345bf215546Sopenharmony_ci nir_function_impl *impl = nir_shader_get_entrypoint(shader); 346bf215546Sopenharmony_ci ctx.states = calloc(impl->ssa_alloc, sizeof(*ctx.states)); 347bf215546Sopenharmony_ci 348bf215546Sopenharmony_ci /* Step 1: Calculate can_move */ 349bf215546Sopenharmony_ci nir_foreach_block (block, impl) { 350bf215546Sopenharmony_ci nir_foreach_instr (instr, block) { 351bf215546Sopenharmony_ci nir_ssa_def *def = nir_instr_ssa_def(instr); 352bf215546Sopenharmony_ci if (!def) 353bf215546Sopenharmony_ci continue; 354bf215546Sopenharmony_ci 355bf215546Sopenharmony_ci def_state *state = &ctx.states[def->index]; 356bf215546Sopenharmony_ci 357bf215546Sopenharmony_ci state->can_move = can_move_instr(instr, &ctx); 358bf215546Sopenharmony_ci } 359bf215546Sopenharmony_ci } 360bf215546Sopenharmony_ci 361bf215546Sopenharmony_ci /* Step 2: Calculate is_candidate. This is complicated by the presence of 362bf215546Sopenharmony_ci * non-candidate instructions like derefs whose users cannot be rewritten. 363bf215546Sopenharmony_ci * If a deref chain is used at all by a non-can_move thing, then any offset 364bf215546Sopenharmony_ci * sources anywhere along the chain should be considered candidates because 365bf215546Sopenharmony_ci * the entire deref chain will never be deleted, but if it's only used by 366bf215546Sopenharmony_ci * can_move things then it becomes subsumed by its users and none of the 367bf215546Sopenharmony_ci * offset sources should be considered candidates as they will be removed 368bf215546Sopenharmony_ci * when the users of the deref chain are moved. We need to replace "are 369bf215546Sopenharmony_ci * there any non-can_move users" with "are there any non-can_move users, 370bf215546Sopenharmony_ci * *recursing through non-candidate users*". We do this by walking backward 371bf215546Sopenharmony_ci * and marking when a non-candidate instruction must stay in the final 372bf215546Sopenharmony_ci * program because it has a non-can_move user, including recursively. 373bf215546Sopenharmony_ci */ 374bf215546Sopenharmony_ci unsigned num_candidates = 0; 375bf215546Sopenharmony_ci nir_foreach_block_reverse (block, impl) { 376bf215546Sopenharmony_ci nir_foreach_instr_reverse (instr, block) { 377bf215546Sopenharmony_ci nir_ssa_def *def = nir_instr_ssa_def(instr); 378bf215546Sopenharmony_ci if (!def) 379bf215546Sopenharmony_ci continue; 380bf215546Sopenharmony_ci 381bf215546Sopenharmony_ci def_state *state = &ctx.states[def->index]; 382bf215546Sopenharmony_ci if (!state->can_move) 383bf215546Sopenharmony_ci continue; 384bf215546Sopenharmony_ci 385bf215546Sopenharmony_ci state->value = get_instr_cost(instr, options); 386bf215546Sopenharmony_ci bool is_candidate = !avoid_instr(instr, options); 387bf215546Sopenharmony_ci state->candidate = false; 388bf215546Sopenharmony_ci state->must_stay = false; 389bf215546Sopenharmony_ci nir_foreach_use (use, def) { 390bf215546Sopenharmony_ci nir_ssa_def *use_def = nir_instr_ssa_def(use->parent_instr); 391bf215546Sopenharmony_ci if (!use_def || !ctx.states[use_def->index].can_move || 392bf215546Sopenharmony_ci ctx.states[use_def->index].must_stay) { 393bf215546Sopenharmony_ci if (is_candidate) 394bf215546Sopenharmony_ci state->candidate = true; 395bf215546Sopenharmony_ci else 396bf215546Sopenharmony_ci state->must_stay = true; 397bf215546Sopenharmony_ci } else { 398bf215546Sopenharmony_ci state->can_move_users++; 399bf215546Sopenharmony_ci } 400bf215546Sopenharmony_ci } 401bf215546Sopenharmony_ci 402bf215546Sopenharmony_ci nir_foreach_if_use (use, def) { 403bf215546Sopenharmony_ci if (is_candidate) 404bf215546Sopenharmony_ci state->candidate = true; 405bf215546Sopenharmony_ci else 406bf215546Sopenharmony_ci state->must_stay = true; 407bf215546Sopenharmony_ci break; 408bf215546Sopenharmony_ci } 409bf215546Sopenharmony_ci 410bf215546Sopenharmony_ci if (state->candidate) 411bf215546Sopenharmony_ci num_candidates++; 412bf215546Sopenharmony_ci } 413bf215546Sopenharmony_ci } 414bf215546Sopenharmony_ci 415bf215546Sopenharmony_ci if (num_candidates == 0) { 416bf215546Sopenharmony_ci *size = 0; 417bf215546Sopenharmony_ci free(ctx.states); 418bf215546Sopenharmony_ci return false; 419bf215546Sopenharmony_ci } 420bf215546Sopenharmony_ci 421bf215546Sopenharmony_ci def_state **candidates = malloc(sizeof(*candidates) * num_candidates); 422bf215546Sopenharmony_ci unsigned candidate_idx = 0; 423bf215546Sopenharmony_ci unsigned total_size = 0; 424bf215546Sopenharmony_ci 425bf215546Sopenharmony_ci /* Step 3: Calculate value of candidates by propagating downwards. We try 426bf215546Sopenharmony_ci * to share the value amongst can_move uses, in case there are multiple. 427bf215546Sopenharmony_ci * This won't always find the most optimal solution, but is hopefully a 428bf215546Sopenharmony_ci * good heuristic. 429bf215546Sopenharmony_ci * 430bf215546Sopenharmony_ci * Note that we use the can_move adjusted in the last pass, because if a 431bf215546Sopenharmony_ci * can_move instruction cannot be moved because it's not a candidate and it 432bf215546Sopenharmony_ci * has a non-can_move source then we don't want to count it as a use. 433bf215546Sopenharmony_ci * 434bf215546Sopenharmony_ci * While we're here, also collect an array of candidates. 435bf215546Sopenharmony_ci */ 436bf215546Sopenharmony_ci nir_foreach_block (block, impl) { 437bf215546Sopenharmony_ci nir_foreach_instr (instr, block) { 438bf215546Sopenharmony_ci nir_ssa_def *def = nir_instr_ssa_def(instr); 439bf215546Sopenharmony_ci if (!def) 440bf215546Sopenharmony_ci continue; 441bf215546Sopenharmony_ci 442bf215546Sopenharmony_ci def_state *state = &ctx.states[def->index]; 443bf215546Sopenharmony_ci if (!state->can_move || state->must_stay) 444bf215546Sopenharmony_ci continue; 445bf215546Sopenharmony_ci 446bf215546Sopenharmony_ci ctx.def = def; 447bf215546Sopenharmony_ci nir_foreach_src(instr, update_src_value, &ctx); 448bf215546Sopenharmony_ci 449bf215546Sopenharmony_ci /* If this instruction is a candidate, its value shouldn't be 450bf215546Sopenharmony_ci * propagated so we skip dividing it. 451bf215546Sopenharmony_ci * 452bf215546Sopenharmony_ci * Note: if it's can_move but not a candidate, then all its users 453bf215546Sopenharmony_ci * must be can_move, so if there are no users then it must be dead. 454bf215546Sopenharmony_ci */ 455bf215546Sopenharmony_ci if (!state->candidate && !state->must_stay) { 456bf215546Sopenharmony_ci if (state->can_move_users > 0) 457bf215546Sopenharmony_ci state->value /= state->can_move_users; 458bf215546Sopenharmony_ci else 459bf215546Sopenharmony_ci state->value = 0; 460bf215546Sopenharmony_ci } 461bf215546Sopenharmony_ci 462bf215546Sopenharmony_ci if (state->candidate) { 463bf215546Sopenharmony_ci state->benefit = state->value - 464bf215546Sopenharmony_ci options->rewrite_cost_cb(def, options->cb_data); 465bf215546Sopenharmony_ci 466bf215546Sopenharmony_ci if (state->benefit > 0) { 467bf215546Sopenharmony_ci options->def_size(def, &state->size, &state->align); 468bf215546Sopenharmony_ci total_size = ALIGN_POT(total_size, state->align); 469bf215546Sopenharmony_ci total_size += state->size; 470bf215546Sopenharmony_ci candidates[candidate_idx++] = state; 471bf215546Sopenharmony_ci } 472bf215546Sopenharmony_ci } 473bf215546Sopenharmony_ci } 474bf215546Sopenharmony_ci } 475bf215546Sopenharmony_ci 476bf215546Sopenharmony_ci assert(candidate_idx <= num_candidates); 477bf215546Sopenharmony_ci num_candidates = candidate_idx; 478bf215546Sopenharmony_ci 479bf215546Sopenharmony_ci if (num_candidates == 0) { 480bf215546Sopenharmony_ci *size = 0; 481bf215546Sopenharmony_ci free(ctx.states); 482bf215546Sopenharmony_ci free(candidates); 483bf215546Sopenharmony_ci return false; 484bf215546Sopenharmony_ci } 485bf215546Sopenharmony_ci 486bf215546Sopenharmony_ci /* Step 4: Figure out which candidates we're going to replace and assign an 487bf215546Sopenharmony_ci * offset. Assuming there is no expression sharing, this is similar to the 488bf215546Sopenharmony_ci * 0-1 knapsack problem, except when there is a gap introduced by 489bf215546Sopenharmony_ci * alignment. We use a well-known greedy approximation, sorting by value 490bf215546Sopenharmony_ci * divided by size. 491bf215546Sopenharmony_ci */ 492bf215546Sopenharmony_ci 493bf215546Sopenharmony_ci if (total_size > options->preamble_storage_size) { 494bf215546Sopenharmony_ci qsort(candidates, num_candidates, sizeof(*candidates), candidate_sort); 495bf215546Sopenharmony_ci } 496bf215546Sopenharmony_ci 497bf215546Sopenharmony_ci unsigned offset = 0; 498bf215546Sopenharmony_ci for (unsigned i = 0; i < num_candidates; i++) { 499bf215546Sopenharmony_ci def_state *state = candidates[i]; 500bf215546Sopenharmony_ci offset = ALIGN_POT(offset, state->align); 501bf215546Sopenharmony_ci 502bf215546Sopenharmony_ci if (offset + state->size > options->preamble_storage_size) 503bf215546Sopenharmony_ci break; 504bf215546Sopenharmony_ci 505bf215546Sopenharmony_ci state->replace = true; 506bf215546Sopenharmony_ci state->offset = offset; 507bf215546Sopenharmony_ci 508bf215546Sopenharmony_ci offset += state->size; 509bf215546Sopenharmony_ci } 510bf215546Sopenharmony_ci 511bf215546Sopenharmony_ci *size = offset; 512bf215546Sopenharmony_ci 513bf215546Sopenharmony_ci free(candidates); 514bf215546Sopenharmony_ci 515bf215546Sopenharmony_ci /* Step 5: Actually do the replacement. */ 516bf215546Sopenharmony_ci struct hash_table *remap_table = 517bf215546Sopenharmony_ci _mesa_pointer_hash_table_create(NULL); 518bf215546Sopenharmony_ci nir_function_impl *preamble = 519bf215546Sopenharmony_ci nir_shader_get_preamble(impl->function->shader); 520bf215546Sopenharmony_ci nir_builder _b; 521bf215546Sopenharmony_ci nir_builder *b = &_b; 522bf215546Sopenharmony_ci nir_builder_init(b, preamble); 523bf215546Sopenharmony_ci b->cursor = nir_before_cf_list(&preamble->body); 524bf215546Sopenharmony_ci 525bf215546Sopenharmony_ci nir_foreach_block (block, impl) { 526bf215546Sopenharmony_ci nir_foreach_instr (instr, block) { 527bf215546Sopenharmony_ci nir_ssa_def *def = nir_instr_ssa_def(instr); 528bf215546Sopenharmony_ci if (!def) 529bf215546Sopenharmony_ci continue; 530bf215546Sopenharmony_ci 531bf215546Sopenharmony_ci def_state *state = &ctx.states[def->index]; 532bf215546Sopenharmony_ci if (!state->can_move) 533bf215546Sopenharmony_ci continue; 534bf215546Sopenharmony_ci 535bf215546Sopenharmony_ci nir_instr *clone = nir_instr_clone_deep(impl->function->shader, 536bf215546Sopenharmony_ci instr, remap_table); 537bf215546Sopenharmony_ci 538bf215546Sopenharmony_ci nir_builder_instr_insert(b, clone); 539bf215546Sopenharmony_ci 540bf215546Sopenharmony_ci if (clone->type == nir_instr_type_tex) { 541bf215546Sopenharmony_ci nir_tex_instr *tex = nir_instr_as_tex(clone); 542bf215546Sopenharmony_ci if (tex->op == nir_texop_tex) { 543bf215546Sopenharmony_ci /* For maximum compatibility, replace normal textures with 544bf215546Sopenharmony_ci * textureGrad with a gradient of 0. 545bf215546Sopenharmony_ci * TODO: Handle txb somehow. 546bf215546Sopenharmony_ci */ 547bf215546Sopenharmony_ci b->cursor = nir_before_instr(clone); 548bf215546Sopenharmony_ci 549bf215546Sopenharmony_ci nir_ssa_def *zero = 550bf215546Sopenharmony_ci nir_imm_zero(b, tex->coord_components - tex->is_array, 32); 551bf215546Sopenharmony_ci nir_tex_instr_add_src(tex, nir_tex_src_ddx, nir_src_for_ssa(zero)); 552bf215546Sopenharmony_ci nir_tex_instr_add_src(tex, nir_tex_src_ddy, nir_src_for_ssa(zero)); 553bf215546Sopenharmony_ci tex->op = nir_texop_txd; 554bf215546Sopenharmony_ci 555bf215546Sopenharmony_ci b->cursor = nir_after_instr(clone); 556bf215546Sopenharmony_ci } 557bf215546Sopenharmony_ci } 558bf215546Sopenharmony_ci 559bf215546Sopenharmony_ci if (state->replace) { 560bf215546Sopenharmony_ci nir_ssa_def *clone_def = nir_instr_ssa_def(clone); 561bf215546Sopenharmony_ci nir_store_preamble(b, clone_def, .base = state->offset); 562bf215546Sopenharmony_ci } 563bf215546Sopenharmony_ci } 564bf215546Sopenharmony_ci } 565bf215546Sopenharmony_ci 566bf215546Sopenharmony_ci nir_builder_init(b, impl); 567bf215546Sopenharmony_ci 568bf215546Sopenharmony_ci nir_foreach_block (block, impl) { 569bf215546Sopenharmony_ci nir_foreach_instr_safe (instr, block) { 570bf215546Sopenharmony_ci nir_ssa_def *def = nir_instr_ssa_def(instr); 571bf215546Sopenharmony_ci if (!def) 572bf215546Sopenharmony_ci continue; 573bf215546Sopenharmony_ci 574bf215546Sopenharmony_ci def_state *state = &ctx.states[def->index]; 575bf215546Sopenharmony_ci if (!state->replace) 576bf215546Sopenharmony_ci continue; 577bf215546Sopenharmony_ci 578bf215546Sopenharmony_ci b->cursor = nir_before_instr(instr); 579bf215546Sopenharmony_ci 580bf215546Sopenharmony_ci nir_ssa_def *new_def = 581bf215546Sopenharmony_ci nir_load_preamble(b, def->num_components, def->bit_size, 582bf215546Sopenharmony_ci .base = state->offset); 583bf215546Sopenharmony_ci 584bf215546Sopenharmony_ci 585bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(def, new_def); 586bf215546Sopenharmony_ci nir_instr_free_and_dce(instr); 587bf215546Sopenharmony_ci } 588bf215546Sopenharmony_ci } 589bf215546Sopenharmony_ci 590bf215546Sopenharmony_ci nir_metadata_preserve(impl, 591bf215546Sopenharmony_ci nir_metadata_block_index | 592bf215546Sopenharmony_ci nir_metadata_dominance); 593bf215546Sopenharmony_ci 594bf215546Sopenharmony_ci ralloc_free(remap_table); 595bf215546Sopenharmony_ci free(ctx.states); 596bf215546Sopenharmony_ci return true; 597bf215546Sopenharmony_ci} 598