1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2018 Valve Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci * 23bf215546Sopenharmony_ci */ 24bf215546Sopenharmony_ci 25bf215546Sopenharmony_ci#include "nir.h" 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci/* This pass computes for each ssa definition if it is uniform. 28bf215546Sopenharmony_ci * That is, the variable has the same value for all invocations 29bf215546Sopenharmony_ci * of the group. 30bf215546Sopenharmony_ci * 31bf215546Sopenharmony_ci * This divergence analysis pass expects the shader to be in LCSSA-form. 32bf215546Sopenharmony_ci * 33bf215546Sopenharmony_ci * This algorithm implements "The Simple Divergence Analysis" from 34bf215546Sopenharmony_ci * Diogo Sampaio, Rafael De Souza, Sylvain Collange, Fernando Magno Quintão Pereira. 35bf215546Sopenharmony_ci * Divergence Analysis. ACM Transactions on Programming Languages and Systems (TOPLAS), 36bf215546Sopenharmony_ci * ACM, 2013, 35 (4), pp.13:1-13:36. <10.1145/2523815>. <hal-00909072v2> 37bf215546Sopenharmony_ci */ 38bf215546Sopenharmony_ci 39bf215546Sopenharmony_cistruct divergence_state { 40bf215546Sopenharmony_ci const gl_shader_stage stage; 41bf215546Sopenharmony_ci nir_shader *shader; 42bf215546Sopenharmony_ci 43bf215546Sopenharmony_ci /** current control flow state */ 44bf215546Sopenharmony_ci /* True if some loop-active invocations might take a different control-flow path. 45bf215546Sopenharmony_ci * A divergent break does not cause subsequent control-flow to be considered 46bf215546Sopenharmony_ci * divergent because those invocations are no longer active in the loop. 47bf215546Sopenharmony_ci * For a divergent if, both sides are considered divergent flow because 48bf215546Sopenharmony_ci * the other side is still loop-active. */ 49bf215546Sopenharmony_ci bool divergent_loop_cf; 50bf215546Sopenharmony_ci /* True if a divergent continue happened since the loop header */ 51bf215546Sopenharmony_ci bool divergent_loop_continue; 52bf215546Sopenharmony_ci /* True if a divergent break happened since the loop header */ 53bf215546Sopenharmony_ci bool divergent_loop_break; 54bf215546Sopenharmony_ci 55bf215546Sopenharmony_ci /* True if we visit the block for the fist time */ 56bf215546Sopenharmony_ci bool first_visit; 57bf215546Sopenharmony_ci}; 58bf215546Sopenharmony_ci 59bf215546Sopenharmony_cistatic bool 60bf215546Sopenharmony_civisit_cf_list(struct exec_list *list, struct divergence_state *state); 61bf215546Sopenharmony_ci 62bf215546Sopenharmony_cistatic bool 63bf215546Sopenharmony_civisit_alu(nir_alu_instr *instr) 64bf215546Sopenharmony_ci{ 65bf215546Sopenharmony_ci if (instr->dest.dest.ssa.divergent) 66bf215546Sopenharmony_ci return false; 67bf215546Sopenharmony_ci 68bf215546Sopenharmony_ci unsigned num_src = nir_op_infos[instr->op].num_inputs; 69bf215546Sopenharmony_ci 70bf215546Sopenharmony_ci for (unsigned i = 0; i < num_src; i++) { 71bf215546Sopenharmony_ci if (instr->src[i].src.ssa->divergent) { 72bf215546Sopenharmony_ci instr->dest.dest.ssa.divergent = true; 73bf215546Sopenharmony_ci return true; 74bf215546Sopenharmony_ci } 75bf215546Sopenharmony_ci } 76bf215546Sopenharmony_ci 77bf215546Sopenharmony_ci return false; 78bf215546Sopenharmony_ci} 79bf215546Sopenharmony_ci 80bf215546Sopenharmony_cistatic bool 81bf215546Sopenharmony_civisit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr) 82bf215546Sopenharmony_ci{ 83bf215546Sopenharmony_ci if (!nir_intrinsic_infos[instr->intrinsic].has_dest) 84bf215546Sopenharmony_ci return false; 85bf215546Sopenharmony_ci 86bf215546Sopenharmony_ci if (instr->dest.ssa.divergent) 87bf215546Sopenharmony_ci return false; 88bf215546Sopenharmony_ci 89bf215546Sopenharmony_ci nir_divergence_options options = shader->options->divergence_analysis_options; 90bf215546Sopenharmony_ci gl_shader_stage stage = shader->info.stage; 91bf215546Sopenharmony_ci bool is_divergent = false; 92bf215546Sopenharmony_ci switch (instr->intrinsic) { 93bf215546Sopenharmony_ci /* Intrinsics which are always uniform */ 94bf215546Sopenharmony_ci case nir_intrinsic_shader_clock: 95bf215546Sopenharmony_ci case nir_intrinsic_ballot: 96bf215546Sopenharmony_ci case nir_intrinsic_read_invocation: 97bf215546Sopenharmony_ci case nir_intrinsic_read_first_invocation: 98bf215546Sopenharmony_ci case nir_intrinsic_vote_any: 99bf215546Sopenharmony_ci case nir_intrinsic_vote_all: 100bf215546Sopenharmony_ci case nir_intrinsic_vote_feq: 101bf215546Sopenharmony_ci case nir_intrinsic_vote_ieq: 102bf215546Sopenharmony_ci case nir_intrinsic_load_push_constant: 103bf215546Sopenharmony_ci case nir_intrinsic_load_work_dim: 104bf215546Sopenharmony_ci case nir_intrinsic_load_num_workgroups: 105bf215546Sopenharmony_ci case nir_intrinsic_load_workgroup_size: 106bf215546Sopenharmony_ci case nir_intrinsic_load_subgroup_id: 107bf215546Sopenharmony_ci case nir_intrinsic_load_num_subgroups: 108bf215546Sopenharmony_ci case nir_intrinsic_load_ray_launch_size: 109bf215546Sopenharmony_ci case nir_intrinsic_load_ray_launch_size_addr_amd: 110bf215546Sopenharmony_ci case nir_intrinsic_load_sbt_base_amd: 111bf215546Sopenharmony_ci case nir_intrinsic_load_subgroup_size: 112bf215546Sopenharmony_ci case nir_intrinsic_load_subgroup_eq_mask: 113bf215546Sopenharmony_ci case nir_intrinsic_load_subgroup_ge_mask: 114bf215546Sopenharmony_ci case nir_intrinsic_load_subgroup_gt_mask: 115bf215546Sopenharmony_ci case nir_intrinsic_load_subgroup_le_mask: 116bf215546Sopenharmony_ci case nir_intrinsic_load_subgroup_lt_mask: 117bf215546Sopenharmony_ci case nir_intrinsic_first_invocation: 118bf215546Sopenharmony_ci case nir_intrinsic_last_invocation: 119bf215546Sopenharmony_ci case nir_intrinsic_load_base_instance: 120bf215546Sopenharmony_ci case nir_intrinsic_load_base_vertex: 121bf215546Sopenharmony_ci case nir_intrinsic_load_first_vertex: 122bf215546Sopenharmony_ci case nir_intrinsic_load_draw_id: 123bf215546Sopenharmony_ci case nir_intrinsic_load_is_indexed_draw: 124bf215546Sopenharmony_ci case nir_intrinsic_load_viewport_scale: 125bf215546Sopenharmony_ci case nir_intrinsic_load_user_clip_plane: 126bf215546Sopenharmony_ci case nir_intrinsic_load_viewport_x_scale: 127bf215546Sopenharmony_ci case nir_intrinsic_load_viewport_y_scale: 128bf215546Sopenharmony_ci case nir_intrinsic_load_viewport_z_scale: 129bf215546Sopenharmony_ci case nir_intrinsic_load_viewport_offset: 130bf215546Sopenharmony_ci case nir_intrinsic_load_viewport_x_offset: 131bf215546Sopenharmony_ci case nir_intrinsic_load_viewport_y_offset: 132bf215546Sopenharmony_ci case nir_intrinsic_load_viewport_z_offset: 133bf215546Sopenharmony_ci case nir_intrinsic_load_blend_const_color_a_float: 134bf215546Sopenharmony_ci case nir_intrinsic_load_blend_const_color_b_float: 135bf215546Sopenharmony_ci case nir_intrinsic_load_blend_const_color_g_float: 136bf215546Sopenharmony_ci case nir_intrinsic_load_blend_const_color_r_float: 137bf215546Sopenharmony_ci case nir_intrinsic_load_blend_const_color_rgba: 138bf215546Sopenharmony_ci case nir_intrinsic_load_blend_const_color_aaaa8888_unorm: 139bf215546Sopenharmony_ci case nir_intrinsic_load_blend_const_color_rgba8888_unorm: 140bf215546Sopenharmony_ci case nir_intrinsic_load_line_width: 141bf215546Sopenharmony_ci case nir_intrinsic_load_aa_line_width: 142bf215546Sopenharmony_ci case nir_intrinsic_load_xfb_address: 143bf215546Sopenharmony_ci case nir_intrinsic_load_num_vertices: 144bf215546Sopenharmony_ci case nir_intrinsic_load_fb_layers_v3d: 145bf215546Sopenharmony_ci case nir_intrinsic_load_tcs_num_patches_amd: 146bf215546Sopenharmony_ci case nir_intrinsic_load_ring_tess_factors_amd: 147bf215546Sopenharmony_ci case nir_intrinsic_load_ring_tess_offchip_amd: 148bf215546Sopenharmony_ci case nir_intrinsic_load_ring_tess_factors_offset_amd: 149bf215546Sopenharmony_ci case nir_intrinsic_load_ring_tess_offchip_offset_amd: 150bf215546Sopenharmony_ci case nir_intrinsic_load_ring_mesh_scratch_amd: 151bf215546Sopenharmony_ci case nir_intrinsic_load_ring_mesh_scratch_offset_amd: 152bf215546Sopenharmony_ci case nir_intrinsic_load_ring_esgs_amd: 153bf215546Sopenharmony_ci case nir_intrinsic_load_ring_es2gs_offset_amd: 154bf215546Sopenharmony_ci case nir_intrinsic_load_ring_task_draw_amd: 155bf215546Sopenharmony_ci case nir_intrinsic_load_ring_task_payload_amd: 156bf215546Sopenharmony_ci case nir_intrinsic_load_task_ring_entry_amd: 157bf215546Sopenharmony_ci case nir_intrinsic_load_task_ib_addr: 158bf215546Sopenharmony_ci case nir_intrinsic_load_task_ib_stride: 159bf215546Sopenharmony_ci case nir_intrinsic_load_sample_positions_pan: 160bf215546Sopenharmony_ci case nir_intrinsic_load_workgroup_num_input_vertices_amd: 161bf215546Sopenharmony_ci case nir_intrinsic_load_workgroup_num_input_primitives_amd: 162bf215546Sopenharmony_ci case nir_intrinsic_load_shader_query_enabled_amd: 163bf215546Sopenharmony_ci case nir_intrinsic_load_cull_front_face_enabled_amd: 164bf215546Sopenharmony_ci case nir_intrinsic_load_cull_back_face_enabled_amd: 165bf215546Sopenharmony_ci case nir_intrinsic_load_cull_ccw_amd: 166bf215546Sopenharmony_ci case nir_intrinsic_load_cull_small_primitives_enabled_amd: 167bf215546Sopenharmony_ci case nir_intrinsic_load_cull_any_enabled_amd: 168bf215546Sopenharmony_ci case nir_intrinsic_load_cull_small_prim_precision_amd: 169bf215546Sopenharmony_ci case nir_intrinsic_load_user_data_amd: 170bf215546Sopenharmony_ci case nir_intrinsic_load_force_vrs_rates_amd: 171bf215546Sopenharmony_ci case nir_intrinsic_load_tess_level_inner_default: 172bf215546Sopenharmony_ci case nir_intrinsic_load_tess_level_outer_default: 173bf215546Sopenharmony_ci case nir_intrinsic_load_scalar_arg_amd: 174bf215546Sopenharmony_ci case nir_intrinsic_load_smem_amd: 175bf215546Sopenharmony_ci case nir_intrinsic_load_global_const_block_intel: 176bf215546Sopenharmony_ci case nir_intrinsic_load_reloc_const_intel: 177bf215546Sopenharmony_ci case nir_intrinsic_load_global_block_intel: 178bf215546Sopenharmony_ci case nir_intrinsic_load_btd_global_arg_addr_intel: 179bf215546Sopenharmony_ci case nir_intrinsic_load_btd_local_arg_addr_intel: 180bf215546Sopenharmony_ci case nir_intrinsic_load_mesh_inline_data_intel: 181bf215546Sopenharmony_ci case nir_intrinsic_load_ray_num_dss_rt_stacks_intel: 182bf215546Sopenharmony_ci case nir_intrinsic_load_lshs_vertex_stride_amd: 183bf215546Sopenharmony_ci case nir_intrinsic_load_hs_out_patch_data_offset_amd: 184bf215546Sopenharmony_ci is_divergent = false; 185bf215546Sopenharmony_ci break; 186bf215546Sopenharmony_ci 187bf215546Sopenharmony_ci /* Intrinsics with divergence depending on shader stage and hardware */ 188bf215546Sopenharmony_ci case nir_intrinsic_load_frag_shading_rate: 189bf215546Sopenharmony_ci is_divergent = !(options & nir_divergence_single_frag_shading_rate_per_subgroup); 190bf215546Sopenharmony_ci break; 191bf215546Sopenharmony_ci case nir_intrinsic_load_input: 192bf215546Sopenharmony_ci is_divergent = instr->src[0].ssa->divergent; 193bf215546Sopenharmony_ci if (stage == MESA_SHADER_FRAGMENT) 194bf215546Sopenharmony_ci is_divergent |= !(options & nir_divergence_single_prim_per_subgroup); 195bf215546Sopenharmony_ci else if (stage == MESA_SHADER_TESS_EVAL) 196bf215546Sopenharmony_ci is_divergent |= !(options & nir_divergence_single_patch_per_tes_subgroup); 197bf215546Sopenharmony_ci else if (stage != MESA_SHADER_MESH) 198bf215546Sopenharmony_ci is_divergent = true; 199bf215546Sopenharmony_ci break; 200bf215546Sopenharmony_ci case nir_intrinsic_load_per_vertex_input: 201bf215546Sopenharmony_ci is_divergent = instr->src[0].ssa->divergent || 202bf215546Sopenharmony_ci instr->src[1].ssa->divergent; 203bf215546Sopenharmony_ci if (stage == MESA_SHADER_TESS_CTRL) 204bf215546Sopenharmony_ci is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup); 205bf215546Sopenharmony_ci if (stage == MESA_SHADER_TESS_EVAL) 206bf215546Sopenharmony_ci is_divergent |= !(options & nir_divergence_single_patch_per_tes_subgroup); 207bf215546Sopenharmony_ci else 208bf215546Sopenharmony_ci is_divergent = true; 209bf215546Sopenharmony_ci break; 210bf215546Sopenharmony_ci case nir_intrinsic_load_input_vertex: 211bf215546Sopenharmony_ci is_divergent = instr->src[1].ssa->divergent; 212bf215546Sopenharmony_ci assert(stage == MESA_SHADER_FRAGMENT); 213bf215546Sopenharmony_ci is_divergent |= !(options & nir_divergence_single_prim_per_subgroup); 214bf215546Sopenharmony_ci break; 215bf215546Sopenharmony_ci case nir_intrinsic_load_output: 216bf215546Sopenharmony_ci is_divergent = instr->src[0].ssa->divergent; 217bf215546Sopenharmony_ci switch (stage) { 218bf215546Sopenharmony_ci case MESA_SHADER_TESS_CTRL: 219bf215546Sopenharmony_ci is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup); 220bf215546Sopenharmony_ci break; 221bf215546Sopenharmony_ci case MESA_SHADER_FRAGMENT: 222bf215546Sopenharmony_ci is_divergent = true; 223bf215546Sopenharmony_ci break; 224bf215546Sopenharmony_ci case MESA_SHADER_TASK: 225bf215546Sopenharmony_ci case MESA_SHADER_MESH: 226bf215546Sopenharmony_ci /* Divergent if src[0] is, so nothing else to do. */ 227bf215546Sopenharmony_ci break; 228bf215546Sopenharmony_ci default: 229bf215546Sopenharmony_ci unreachable("Invalid stage for load_output"); 230bf215546Sopenharmony_ci } 231bf215546Sopenharmony_ci break; 232bf215546Sopenharmony_ci case nir_intrinsic_load_per_vertex_output: 233bf215546Sopenharmony_ci assert(stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_MESH); 234bf215546Sopenharmony_ci is_divergent = instr->src[0].ssa->divergent || 235bf215546Sopenharmony_ci instr->src[1].ssa->divergent || 236bf215546Sopenharmony_ci (stage == MESA_SHADER_TESS_CTRL && 237bf215546Sopenharmony_ci !(options & nir_divergence_single_patch_per_tcs_subgroup)); 238bf215546Sopenharmony_ci break; 239bf215546Sopenharmony_ci case nir_intrinsic_load_per_primitive_output: 240bf215546Sopenharmony_ci assert(stage == MESA_SHADER_MESH); 241bf215546Sopenharmony_ci is_divergent = instr->src[0].ssa->divergent || 242bf215546Sopenharmony_ci instr->src[1].ssa->divergent; 243bf215546Sopenharmony_ci break; 244bf215546Sopenharmony_ci case nir_intrinsic_load_layer_id: 245bf215546Sopenharmony_ci case nir_intrinsic_load_front_face: 246bf215546Sopenharmony_ci assert(stage == MESA_SHADER_FRAGMENT); 247bf215546Sopenharmony_ci is_divergent = !(options & nir_divergence_single_prim_per_subgroup); 248bf215546Sopenharmony_ci break; 249bf215546Sopenharmony_ci case nir_intrinsic_load_view_index: 250bf215546Sopenharmony_ci assert(stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_KERNEL); 251bf215546Sopenharmony_ci if (options & nir_divergence_view_index_uniform) 252bf215546Sopenharmony_ci is_divergent = false; 253bf215546Sopenharmony_ci else if (stage == MESA_SHADER_FRAGMENT) 254bf215546Sopenharmony_ci is_divergent = !(options & nir_divergence_single_prim_per_subgroup); 255bf215546Sopenharmony_ci break; 256bf215546Sopenharmony_ci case nir_intrinsic_load_fs_input_interp_deltas: 257bf215546Sopenharmony_ci assert(stage == MESA_SHADER_FRAGMENT); 258bf215546Sopenharmony_ci is_divergent = instr->src[0].ssa->divergent; 259bf215546Sopenharmony_ci is_divergent |= !(options & nir_divergence_single_prim_per_subgroup); 260bf215546Sopenharmony_ci break; 261bf215546Sopenharmony_ci case nir_intrinsic_load_primitive_id: 262bf215546Sopenharmony_ci if (stage == MESA_SHADER_FRAGMENT) 263bf215546Sopenharmony_ci is_divergent = !(options & nir_divergence_single_prim_per_subgroup); 264bf215546Sopenharmony_ci else if (stage == MESA_SHADER_TESS_CTRL) 265bf215546Sopenharmony_ci is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup); 266bf215546Sopenharmony_ci else if (stage == MESA_SHADER_TESS_EVAL) 267bf215546Sopenharmony_ci is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup); 268bf215546Sopenharmony_ci else if (stage == MESA_SHADER_GEOMETRY || stage == MESA_SHADER_VERTEX) 269bf215546Sopenharmony_ci is_divergent = true; 270bf215546Sopenharmony_ci else 271bf215546Sopenharmony_ci unreachable("Invalid stage for load_primitive_id"); 272bf215546Sopenharmony_ci break; 273bf215546Sopenharmony_ci case nir_intrinsic_load_tess_level_inner: 274bf215546Sopenharmony_ci case nir_intrinsic_load_tess_level_outer: 275bf215546Sopenharmony_ci if (stage == MESA_SHADER_TESS_CTRL) 276bf215546Sopenharmony_ci is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup); 277bf215546Sopenharmony_ci else if (stage == MESA_SHADER_TESS_EVAL) 278bf215546Sopenharmony_ci is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup); 279bf215546Sopenharmony_ci else 280bf215546Sopenharmony_ci unreachable("Invalid stage for load_primitive_tess_level_*"); 281bf215546Sopenharmony_ci break; 282bf215546Sopenharmony_ci case nir_intrinsic_load_patch_vertices_in: 283bf215546Sopenharmony_ci if (stage == MESA_SHADER_TESS_EVAL) 284bf215546Sopenharmony_ci is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup); 285bf215546Sopenharmony_ci else 286bf215546Sopenharmony_ci assert(stage == MESA_SHADER_TESS_CTRL); 287bf215546Sopenharmony_ci break; 288bf215546Sopenharmony_ci 289bf215546Sopenharmony_ci case nir_intrinsic_load_workgroup_index: 290bf215546Sopenharmony_ci case nir_intrinsic_load_workgroup_id: 291bf215546Sopenharmony_ci assert(gl_shader_stage_uses_workgroup(stage)); 292bf215546Sopenharmony_ci if (stage == MESA_SHADER_COMPUTE) 293bf215546Sopenharmony_ci is_divergent |= (options & nir_divergence_multiple_workgroup_per_compute_subgroup); 294bf215546Sopenharmony_ci break; 295bf215546Sopenharmony_ci 296bf215546Sopenharmony_ci /* Clustered reductions are uniform if cluster_size == subgroup_size or 297bf215546Sopenharmony_ci * the source is uniform and the operation is invariant. 298bf215546Sopenharmony_ci * Inclusive scans are uniform if 299bf215546Sopenharmony_ci * the source is uniform and the operation is invariant 300bf215546Sopenharmony_ci */ 301bf215546Sopenharmony_ci case nir_intrinsic_reduce: 302bf215546Sopenharmony_ci if (nir_intrinsic_cluster_size(instr) == 0) 303bf215546Sopenharmony_ci return false; 304bf215546Sopenharmony_ci FALLTHROUGH; 305bf215546Sopenharmony_ci case nir_intrinsic_inclusive_scan: { 306bf215546Sopenharmony_ci nir_op op = nir_intrinsic_reduction_op(instr); 307bf215546Sopenharmony_ci is_divergent = instr->src[0].ssa->divergent; 308bf215546Sopenharmony_ci if (op != nir_op_umin && op != nir_op_imin && op != nir_op_fmin && 309bf215546Sopenharmony_ci op != nir_op_umax && op != nir_op_imax && op != nir_op_fmax && 310bf215546Sopenharmony_ci op != nir_op_iand && op != nir_op_ior) 311bf215546Sopenharmony_ci is_divergent = true; 312bf215546Sopenharmony_ci break; 313bf215546Sopenharmony_ci } 314bf215546Sopenharmony_ci 315bf215546Sopenharmony_ci case nir_intrinsic_load_ubo: 316bf215546Sopenharmony_ci case nir_intrinsic_load_ssbo: 317bf215546Sopenharmony_ci is_divergent = (instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) || 318bf215546Sopenharmony_ci instr->src[1].ssa->divergent; 319bf215546Sopenharmony_ci break; 320bf215546Sopenharmony_ci 321bf215546Sopenharmony_ci case nir_intrinsic_get_ssbo_size: 322bf215546Sopenharmony_ci case nir_intrinsic_deref_buffer_array_length: 323bf215546Sopenharmony_ci is_divergent = instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM); 324bf215546Sopenharmony_ci break; 325bf215546Sopenharmony_ci 326bf215546Sopenharmony_ci case nir_intrinsic_image_load: 327bf215546Sopenharmony_ci case nir_intrinsic_image_deref_load: 328bf215546Sopenharmony_ci case nir_intrinsic_bindless_image_load: 329bf215546Sopenharmony_ci case nir_intrinsic_image_sparse_load: 330bf215546Sopenharmony_ci case nir_intrinsic_image_deref_sparse_load: 331bf215546Sopenharmony_ci case nir_intrinsic_bindless_image_sparse_load: 332bf215546Sopenharmony_ci is_divergent = (instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) || 333bf215546Sopenharmony_ci instr->src[1].ssa->divergent || instr->src[2].ssa->divergent || instr->src[3].ssa->divergent; 334bf215546Sopenharmony_ci break; 335bf215546Sopenharmony_ci 336bf215546Sopenharmony_ci 337bf215546Sopenharmony_ci /* Intrinsics with divergence depending on sources */ 338bf215546Sopenharmony_ci case nir_intrinsic_ballot_bitfield_extract: 339bf215546Sopenharmony_ci case nir_intrinsic_ballot_find_lsb: 340bf215546Sopenharmony_ci case nir_intrinsic_ballot_find_msb: 341bf215546Sopenharmony_ci case nir_intrinsic_ballot_bit_count_reduce: 342bf215546Sopenharmony_ci case nir_intrinsic_shuffle_xor: 343bf215546Sopenharmony_ci case nir_intrinsic_shuffle_up: 344bf215546Sopenharmony_ci case nir_intrinsic_shuffle_down: 345bf215546Sopenharmony_ci case nir_intrinsic_quad_broadcast: 346bf215546Sopenharmony_ci case nir_intrinsic_quad_swap_horizontal: 347bf215546Sopenharmony_ci case nir_intrinsic_quad_swap_vertical: 348bf215546Sopenharmony_ci case nir_intrinsic_quad_swap_diagonal: 349bf215546Sopenharmony_ci case nir_intrinsic_byte_permute_amd: 350bf215546Sopenharmony_ci case nir_intrinsic_load_deref: 351bf215546Sopenharmony_ci case nir_intrinsic_load_shared: 352bf215546Sopenharmony_ci case nir_intrinsic_load_shared2_amd: 353bf215546Sopenharmony_ci case nir_intrinsic_load_global: 354bf215546Sopenharmony_ci case nir_intrinsic_load_global_2x32: 355bf215546Sopenharmony_ci case nir_intrinsic_load_global_constant: 356bf215546Sopenharmony_ci case nir_intrinsic_load_global_amd: 357bf215546Sopenharmony_ci case nir_intrinsic_load_uniform: 358bf215546Sopenharmony_ci case nir_intrinsic_load_constant: 359bf215546Sopenharmony_ci case nir_intrinsic_load_sample_pos_from_id: 360bf215546Sopenharmony_ci case nir_intrinsic_load_kernel_input: 361bf215546Sopenharmony_ci case nir_intrinsic_load_task_payload: 362bf215546Sopenharmony_ci case nir_intrinsic_load_buffer_amd: 363bf215546Sopenharmony_ci case nir_intrinsic_image_samples: 364bf215546Sopenharmony_ci case nir_intrinsic_image_deref_samples: 365bf215546Sopenharmony_ci case nir_intrinsic_bindless_image_samples: 366bf215546Sopenharmony_ci case nir_intrinsic_image_size: 367bf215546Sopenharmony_ci case nir_intrinsic_image_deref_size: 368bf215546Sopenharmony_ci case nir_intrinsic_bindless_image_size: 369bf215546Sopenharmony_ci case nir_intrinsic_copy_deref: 370bf215546Sopenharmony_ci case nir_intrinsic_vulkan_resource_index: 371bf215546Sopenharmony_ci case nir_intrinsic_vulkan_resource_reindex: 372bf215546Sopenharmony_ci case nir_intrinsic_load_vulkan_descriptor: 373bf215546Sopenharmony_ci case nir_intrinsic_atomic_counter_read: 374bf215546Sopenharmony_ci case nir_intrinsic_atomic_counter_read_deref: 375bf215546Sopenharmony_ci case nir_intrinsic_quad_swizzle_amd: 376bf215546Sopenharmony_ci case nir_intrinsic_masked_swizzle_amd: 377bf215546Sopenharmony_ci case nir_intrinsic_is_sparse_texels_resident: 378bf215546Sopenharmony_ci case nir_intrinsic_sparse_residency_code_and: 379bf215546Sopenharmony_ci case nir_intrinsic_bvh64_intersect_ray_amd: 380bf215546Sopenharmony_ci case nir_intrinsic_image_deref_load_param_intel: 381bf215546Sopenharmony_ci case nir_intrinsic_image_load_raw_intel: 382bf215546Sopenharmony_ci case nir_intrinsic_get_ubo_size: 383bf215546Sopenharmony_ci case nir_intrinsic_load_ssbo_address: 384bf215546Sopenharmony_ci case nir_intrinsic_load_desc_set_address_intel: { 385bf215546Sopenharmony_ci unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs; 386bf215546Sopenharmony_ci for (unsigned i = 0; i < num_srcs; i++) { 387bf215546Sopenharmony_ci if (instr->src[i].ssa->divergent) { 388bf215546Sopenharmony_ci is_divergent = true; 389bf215546Sopenharmony_ci break; 390bf215546Sopenharmony_ci } 391bf215546Sopenharmony_ci } 392bf215546Sopenharmony_ci break; 393bf215546Sopenharmony_ci } 394bf215546Sopenharmony_ci 395bf215546Sopenharmony_ci case nir_intrinsic_shuffle: 396bf215546Sopenharmony_ci is_divergent = instr->src[0].ssa->divergent && 397bf215546Sopenharmony_ci instr->src[1].ssa->divergent; 398bf215546Sopenharmony_ci break; 399bf215546Sopenharmony_ci 400bf215546Sopenharmony_ci /* Intrinsics which are always divergent */ 401bf215546Sopenharmony_ci case nir_intrinsic_load_color0: 402bf215546Sopenharmony_ci case nir_intrinsic_load_color1: 403bf215546Sopenharmony_ci case nir_intrinsic_load_param: 404bf215546Sopenharmony_ci case nir_intrinsic_load_sample_id: 405bf215546Sopenharmony_ci case nir_intrinsic_load_sample_id_no_per_sample: 406bf215546Sopenharmony_ci case nir_intrinsic_load_sample_mask_in: 407bf215546Sopenharmony_ci case nir_intrinsic_load_interpolated_input: 408bf215546Sopenharmony_ci case nir_intrinsic_load_point_coord_maybe_flipped: 409bf215546Sopenharmony_ci case nir_intrinsic_load_barycentric_pixel: 410bf215546Sopenharmony_ci case nir_intrinsic_load_barycentric_centroid: 411bf215546Sopenharmony_ci case nir_intrinsic_load_barycentric_sample: 412bf215546Sopenharmony_ci case nir_intrinsic_load_barycentric_model: 413bf215546Sopenharmony_ci case nir_intrinsic_load_barycentric_at_sample: 414bf215546Sopenharmony_ci case nir_intrinsic_load_barycentric_at_offset: 415bf215546Sopenharmony_ci case nir_intrinsic_interp_deref_at_offset: 416bf215546Sopenharmony_ci case nir_intrinsic_interp_deref_at_sample: 417bf215546Sopenharmony_ci case nir_intrinsic_interp_deref_at_centroid: 418bf215546Sopenharmony_ci case nir_intrinsic_interp_deref_at_vertex: 419bf215546Sopenharmony_ci case nir_intrinsic_load_tess_coord: 420bf215546Sopenharmony_ci case nir_intrinsic_load_point_coord: 421bf215546Sopenharmony_ci case nir_intrinsic_load_line_coord: 422bf215546Sopenharmony_ci case nir_intrinsic_load_frag_coord: 423bf215546Sopenharmony_ci case nir_intrinsic_load_sample_pos: 424bf215546Sopenharmony_ci case nir_intrinsic_load_sample_pos_or_center: 425bf215546Sopenharmony_ci case nir_intrinsic_load_vertex_id_zero_base: 426bf215546Sopenharmony_ci case nir_intrinsic_load_vertex_id: 427bf215546Sopenharmony_ci case nir_intrinsic_load_instance_id: 428bf215546Sopenharmony_ci case nir_intrinsic_load_invocation_id: 429bf215546Sopenharmony_ci case nir_intrinsic_load_local_invocation_id: 430bf215546Sopenharmony_ci case nir_intrinsic_load_local_invocation_index: 431bf215546Sopenharmony_ci case nir_intrinsic_load_global_invocation_id: 432bf215546Sopenharmony_ci case nir_intrinsic_load_global_invocation_id_zero_base: 433bf215546Sopenharmony_ci case nir_intrinsic_load_global_invocation_index: 434bf215546Sopenharmony_ci case nir_intrinsic_load_subgroup_invocation: 435bf215546Sopenharmony_ci case nir_intrinsic_load_helper_invocation: 436bf215546Sopenharmony_ci case nir_intrinsic_is_helper_invocation: 437bf215546Sopenharmony_ci case nir_intrinsic_load_scratch: 438bf215546Sopenharmony_ci case nir_intrinsic_deref_atomic_add: 439bf215546Sopenharmony_ci case nir_intrinsic_deref_atomic_imin: 440bf215546Sopenharmony_ci case nir_intrinsic_deref_atomic_umin: 441bf215546Sopenharmony_ci case nir_intrinsic_deref_atomic_imax: 442bf215546Sopenharmony_ci case nir_intrinsic_deref_atomic_umax: 443bf215546Sopenharmony_ci case nir_intrinsic_deref_atomic_and: 444bf215546Sopenharmony_ci case nir_intrinsic_deref_atomic_or: 445bf215546Sopenharmony_ci case nir_intrinsic_deref_atomic_xor: 446bf215546Sopenharmony_ci case nir_intrinsic_deref_atomic_exchange: 447bf215546Sopenharmony_ci case nir_intrinsic_deref_atomic_comp_swap: 448bf215546Sopenharmony_ci case nir_intrinsic_deref_atomic_fadd: 449bf215546Sopenharmony_ci case nir_intrinsic_deref_atomic_fmin: 450bf215546Sopenharmony_ci case nir_intrinsic_deref_atomic_fmax: 451bf215546Sopenharmony_ci case nir_intrinsic_deref_atomic_fcomp_swap: 452bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_add: 453bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_imin: 454bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_umin: 455bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_imax: 456bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_umax: 457bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_and: 458bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_or: 459bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_xor: 460bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_exchange: 461bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_comp_swap: 462bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_fadd: 463bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_fmax: 464bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_fmin: 465bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_fcomp_swap: 466bf215546Sopenharmony_ci case nir_intrinsic_image_deref_atomic_add: 467bf215546Sopenharmony_ci case nir_intrinsic_image_deref_atomic_imin: 468bf215546Sopenharmony_ci case nir_intrinsic_image_deref_atomic_umin: 469bf215546Sopenharmony_ci case nir_intrinsic_image_deref_atomic_imax: 470bf215546Sopenharmony_ci case nir_intrinsic_image_deref_atomic_umax: 471bf215546Sopenharmony_ci case nir_intrinsic_image_deref_atomic_and: 472bf215546Sopenharmony_ci case nir_intrinsic_image_deref_atomic_or: 473bf215546Sopenharmony_ci case nir_intrinsic_image_deref_atomic_xor: 474bf215546Sopenharmony_ci case nir_intrinsic_image_deref_atomic_exchange: 475bf215546Sopenharmony_ci case nir_intrinsic_image_deref_atomic_comp_swap: 476bf215546Sopenharmony_ci case nir_intrinsic_image_deref_atomic_fadd: 477bf215546Sopenharmony_ci case nir_intrinsic_image_deref_atomic_fmin: 478bf215546Sopenharmony_ci case nir_intrinsic_image_deref_atomic_fmax: 479bf215546Sopenharmony_ci case nir_intrinsic_image_deref_atomic_inc_wrap: 480bf215546Sopenharmony_ci case nir_intrinsic_image_deref_atomic_dec_wrap: 481bf215546Sopenharmony_ci case nir_intrinsic_image_atomic_add: 482bf215546Sopenharmony_ci case nir_intrinsic_image_atomic_imin: 483bf215546Sopenharmony_ci case nir_intrinsic_image_atomic_umin: 484bf215546Sopenharmony_ci case nir_intrinsic_image_atomic_imax: 485bf215546Sopenharmony_ci case nir_intrinsic_image_atomic_umax: 486bf215546Sopenharmony_ci case nir_intrinsic_image_atomic_and: 487bf215546Sopenharmony_ci case nir_intrinsic_image_atomic_or: 488bf215546Sopenharmony_ci case nir_intrinsic_image_atomic_xor: 489bf215546Sopenharmony_ci case nir_intrinsic_image_atomic_exchange: 490bf215546Sopenharmony_ci case nir_intrinsic_image_atomic_comp_swap: 491bf215546Sopenharmony_ci case nir_intrinsic_image_atomic_fadd: 492bf215546Sopenharmony_ci case nir_intrinsic_image_atomic_fmin: 493bf215546Sopenharmony_ci case nir_intrinsic_image_atomic_fmax: 494bf215546Sopenharmony_ci case nir_intrinsic_bindless_image_atomic_add: 495bf215546Sopenharmony_ci case nir_intrinsic_bindless_image_atomic_imin: 496bf215546Sopenharmony_ci case nir_intrinsic_bindless_image_atomic_umin: 497bf215546Sopenharmony_ci case nir_intrinsic_bindless_image_atomic_imax: 498bf215546Sopenharmony_ci case nir_intrinsic_bindless_image_atomic_umax: 499bf215546Sopenharmony_ci case nir_intrinsic_bindless_image_atomic_and: 500bf215546Sopenharmony_ci case nir_intrinsic_bindless_image_atomic_or: 501bf215546Sopenharmony_ci case nir_intrinsic_bindless_image_atomic_xor: 502bf215546Sopenharmony_ci case nir_intrinsic_bindless_image_atomic_exchange: 503bf215546Sopenharmony_ci case nir_intrinsic_bindless_image_atomic_comp_swap: 504bf215546Sopenharmony_ci case nir_intrinsic_bindless_image_atomic_fadd: 505bf215546Sopenharmony_ci case nir_intrinsic_bindless_image_atomic_fmin: 506bf215546Sopenharmony_ci case nir_intrinsic_bindless_image_atomic_fmax: 507bf215546Sopenharmony_ci case nir_intrinsic_shared_atomic_add: 508bf215546Sopenharmony_ci case nir_intrinsic_shared_atomic_imin: 509bf215546Sopenharmony_ci case nir_intrinsic_shared_atomic_umin: 510bf215546Sopenharmony_ci case nir_intrinsic_shared_atomic_imax: 511bf215546Sopenharmony_ci case nir_intrinsic_shared_atomic_umax: 512bf215546Sopenharmony_ci case nir_intrinsic_shared_atomic_and: 513bf215546Sopenharmony_ci case nir_intrinsic_shared_atomic_or: 514bf215546Sopenharmony_ci case nir_intrinsic_shared_atomic_xor: 515bf215546Sopenharmony_ci case nir_intrinsic_shared_atomic_exchange: 516bf215546Sopenharmony_ci case nir_intrinsic_shared_atomic_comp_swap: 517bf215546Sopenharmony_ci case nir_intrinsic_shared_atomic_fadd: 518bf215546Sopenharmony_ci case nir_intrinsic_shared_atomic_fmin: 519bf215546Sopenharmony_ci case nir_intrinsic_shared_atomic_fmax: 520bf215546Sopenharmony_ci case nir_intrinsic_shared_atomic_fcomp_swap: 521bf215546Sopenharmony_ci case nir_intrinsic_task_payload_atomic_add: 522bf215546Sopenharmony_ci case nir_intrinsic_task_payload_atomic_imin: 523bf215546Sopenharmony_ci case nir_intrinsic_task_payload_atomic_umin: 524bf215546Sopenharmony_ci case nir_intrinsic_task_payload_atomic_imax: 525bf215546Sopenharmony_ci case nir_intrinsic_task_payload_atomic_umax: 526bf215546Sopenharmony_ci case nir_intrinsic_task_payload_atomic_and: 527bf215546Sopenharmony_ci case nir_intrinsic_task_payload_atomic_or: 528bf215546Sopenharmony_ci case nir_intrinsic_task_payload_atomic_xor: 529bf215546Sopenharmony_ci case nir_intrinsic_task_payload_atomic_exchange: 530bf215546Sopenharmony_ci case nir_intrinsic_task_payload_atomic_comp_swap: 531bf215546Sopenharmony_ci case nir_intrinsic_task_payload_atomic_fadd: 532bf215546Sopenharmony_ci case nir_intrinsic_task_payload_atomic_fmin: 533bf215546Sopenharmony_ci case nir_intrinsic_task_payload_atomic_fmax: 534bf215546Sopenharmony_ci case nir_intrinsic_task_payload_atomic_fcomp_swap: 535bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_add: 536bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_imin: 537bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_umin: 538bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_imax: 539bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_umax: 540bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_and: 541bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_or: 542bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_xor: 543bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_exchange: 544bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_comp_swap: 545bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_fadd: 546bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_fmin: 547bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_fmax: 548bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_fcomp_swap: 549bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_add_amd: 550bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_imin_amd: 551bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_umin_amd: 552bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_imax_amd: 553bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_umax_amd: 554bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_and_amd: 555bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_or_amd: 556bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_xor_amd: 557bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_exchange_amd: 558bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_comp_swap_amd: 559bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_fadd_amd: 560bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_fmin_amd: 561bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_fmax_amd: 562bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_fcomp_swap_amd: 563bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_add_2x32: 564bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_imin_2x32: 565bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_umin_2x32: 566bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_imax_2x32: 567bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_umax_2x32: 568bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_and_2x32: 569bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_or_2x32: 570bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_xor_2x32: 571bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_exchange_2x32: 572bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_comp_swap_2x32: 573bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_fadd_2x32: 574bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_fmin_2x32: 575bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_fmax_2x32: 576bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_fcomp_swap_2x32: 577bf215546Sopenharmony_ci case nir_intrinsic_atomic_counter_add: 578bf215546Sopenharmony_ci case nir_intrinsic_atomic_counter_min: 579bf215546Sopenharmony_ci case nir_intrinsic_atomic_counter_max: 580bf215546Sopenharmony_ci case nir_intrinsic_atomic_counter_and: 581bf215546Sopenharmony_ci case nir_intrinsic_atomic_counter_or: 582bf215546Sopenharmony_ci case nir_intrinsic_atomic_counter_xor: 583bf215546Sopenharmony_ci case nir_intrinsic_atomic_counter_inc: 584bf215546Sopenharmony_ci case nir_intrinsic_atomic_counter_pre_dec: 585bf215546Sopenharmony_ci case nir_intrinsic_atomic_counter_post_dec: 586bf215546Sopenharmony_ci case nir_intrinsic_atomic_counter_exchange: 587bf215546Sopenharmony_ci case nir_intrinsic_atomic_counter_comp_swap: 588bf215546Sopenharmony_ci case nir_intrinsic_atomic_counter_add_deref: 589bf215546Sopenharmony_ci case nir_intrinsic_atomic_counter_min_deref: 590bf215546Sopenharmony_ci case nir_intrinsic_atomic_counter_max_deref: 591bf215546Sopenharmony_ci case nir_intrinsic_atomic_counter_and_deref: 592bf215546Sopenharmony_ci case nir_intrinsic_atomic_counter_or_deref: 593bf215546Sopenharmony_ci case nir_intrinsic_atomic_counter_xor_deref: 594bf215546Sopenharmony_ci case nir_intrinsic_atomic_counter_inc_deref: 595bf215546Sopenharmony_ci case nir_intrinsic_atomic_counter_pre_dec_deref: 596bf215546Sopenharmony_ci case nir_intrinsic_atomic_counter_post_dec_deref: 597bf215546Sopenharmony_ci case nir_intrinsic_atomic_counter_exchange_deref: 598bf215546Sopenharmony_ci case nir_intrinsic_atomic_counter_comp_swap_deref: 599bf215546Sopenharmony_ci case nir_intrinsic_exclusive_scan: 600bf215546Sopenharmony_ci case nir_intrinsic_ballot_bit_count_exclusive: 601bf215546Sopenharmony_ci case nir_intrinsic_ballot_bit_count_inclusive: 602bf215546Sopenharmony_ci case nir_intrinsic_write_invocation_amd: 603bf215546Sopenharmony_ci case nir_intrinsic_mbcnt_amd: 604bf215546Sopenharmony_ci case nir_intrinsic_lane_permute_16_amd: 605bf215546Sopenharmony_ci case nir_intrinsic_elect: 606bf215546Sopenharmony_ci case nir_intrinsic_load_tlb_color_v3d: 607bf215546Sopenharmony_ci case nir_intrinsic_load_tess_rel_patch_id_amd: 608bf215546Sopenharmony_ci case nir_intrinsic_load_gs_vertex_offset_amd: 609bf215546Sopenharmony_ci case nir_intrinsic_has_input_vertex_amd: 610bf215546Sopenharmony_ci case nir_intrinsic_has_input_primitive_amd: 611bf215546Sopenharmony_ci case nir_intrinsic_load_packed_passthrough_primitive_amd: 612bf215546Sopenharmony_ci case nir_intrinsic_load_initial_edgeflags_amd: 613bf215546Sopenharmony_ci case nir_intrinsic_gds_atomic_add_amd: 614bf215546Sopenharmony_ci case nir_intrinsic_load_rt_arg_scratch_offset_amd: 615bf215546Sopenharmony_ci case nir_intrinsic_load_intersection_opaque_amd: 616bf215546Sopenharmony_ci case nir_intrinsic_load_vector_arg_amd: 617bf215546Sopenharmony_ci case nir_intrinsic_load_btd_stack_id_intel: 618bf215546Sopenharmony_ci case nir_intrinsic_load_topology_id_intel: 619bf215546Sopenharmony_ci case nir_intrinsic_load_scratch_base_ptr: 620bf215546Sopenharmony_ci is_divergent = true; 621bf215546Sopenharmony_ci break; 622bf215546Sopenharmony_ci 623bf215546Sopenharmony_ci default: 624bf215546Sopenharmony_ci#ifdef NDEBUG 625bf215546Sopenharmony_ci is_divergent = true; 626bf215546Sopenharmony_ci break; 627bf215546Sopenharmony_ci#else 628bf215546Sopenharmony_ci nir_print_instr(&instr->instr, stderr); 629bf215546Sopenharmony_ci unreachable("\nNIR divergence analysis: Unhandled intrinsic."); 630bf215546Sopenharmony_ci#endif 631bf215546Sopenharmony_ci } 632bf215546Sopenharmony_ci 633bf215546Sopenharmony_ci instr->dest.ssa.divergent = is_divergent; 634bf215546Sopenharmony_ci return is_divergent; 635bf215546Sopenharmony_ci} 636bf215546Sopenharmony_ci 637bf215546Sopenharmony_cistatic bool 638bf215546Sopenharmony_civisit_tex(nir_tex_instr *instr) 639bf215546Sopenharmony_ci{ 640bf215546Sopenharmony_ci if (instr->dest.ssa.divergent) 641bf215546Sopenharmony_ci return false; 642bf215546Sopenharmony_ci 643bf215546Sopenharmony_ci bool is_divergent = false; 644bf215546Sopenharmony_ci 645bf215546Sopenharmony_ci for (unsigned i = 0; i < instr->num_srcs; i++) { 646bf215546Sopenharmony_ci switch (instr->src[i].src_type) { 647bf215546Sopenharmony_ci case nir_tex_src_sampler_deref: 648bf215546Sopenharmony_ci case nir_tex_src_sampler_handle: 649bf215546Sopenharmony_ci case nir_tex_src_sampler_offset: 650bf215546Sopenharmony_ci is_divergent |= instr->src[i].src.ssa->divergent && 651bf215546Sopenharmony_ci instr->sampler_non_uniform; 652bf215546Sopenharmony_ci break; 653bf215546Sopenharmony_ci case nir_tex_src_texture_deref: 654bf215546Sopenharmony_ci case nir_tex_src_texture_handle: 655bf215546Sopenharmony_ci case nir_tex_src_texture_offset: 656bf215546Sopenharmony_ci is_divergent |= instr->src[i].src.ssa->divergent && 657bf215546Sopenharmony_ci instr->texture_non_uniform; 658bf215546Sopenharmony_ci break; 659bf215546Sopenharmony_ci default: 660bf215546Sopenharmony_ci is_divergent |= instr->src[i].src.ssa->divergent; 661bf215546Sopenharmony_ci break; 662bf215546Sopenharmony_ci } 663bf215546Sopenharmony_ci } 664bf215546Sopenharmony_ci 665bf215546Sopenharmony_ci instr->dest.ssa.divergent = is_divergent; 666bf215546Sopenharmony_ci return is_divergent; 667bf215546Sopenharmony_ci} 668bf215546Sopenharmony_ci 669bf215546Sopenharmony_cistatic bool 670bf215546Sopenharmony_civisit_load_const(nir_load_const_instr *instr) 671bf215546Sopenharmony_ci{ 672bf215546Sopenharmony_ci return false; 673bf215546Sopenharmony_ci} 674bf215546Sopenharmony_ci 675bf215546Sopenharmony_cistatic bool 676bf215546Sopenharmony_civisit_ssa_undef(nir_ssa_undef_instr *instr) 677bf215546Sopenharmony_ci{ 678bf215546Sopenharmony_ci return false; 679bf215546Sopenharmony_ci} 680bf215546Sopenharmony_ci 681bf215546Sopenharmony_cistatic bool 682bf215546Sopenharmony_cinir_variable_mode_is_uniform(nir_variable_mode mode) { 683bf215546Sopenharmony_ci switch (mode) { 684bf215546Sopenharmony_ci case nir_var_uniform: 685bf215546Sopenharmony_ci case nir_var_mem_ubo: 686bf215546Sopenharmony_ci case nir_var_mem_ssbo: 687bf215546Sopenharmony_ci case nir_var_mem_shared: 688bf215546Sopenharmony_ci case nir_var_mem_task_payload: 689bf215546Sopenharmony_ci case nir_var_mem_global: 690bf215546Sopenharmony_ci case nir_var_image: 691bf215546Sopenharmony_ci return true; 692bf215546Sopenharmony_ci default: 693bf215546Sopenharmony_ci return false; 694bf215546Sopenharmony_ci } 695bf215546Sopenharmony_ci} 696bf215546Sopenharmony_ci 697bf215546Sopenharmony_cistatic bool 698bf215546Sopenharmony_cinir_variable_is_uniform(nir_shader *shader, nir_variable *var) 699bf215546Sopenharmony_ci{ 700bf215546Sopenharmony_ci if (nir_variable_mode_is_uniform(var->data.mode)) 701bf215546Sopenharmony_ci return true; 702bf215546Sopenharmony_ci 703bf215546Sopenharmony_ci nir_divergence_options options = shader->options->divergence_analysis_options; 704bf215546Sopenharmony_ci gl_shader_stage stage = shader->info.stage; 705bf215546Sopenharmony_ci 706bf215546Sopenharmony_ci if (stage == MESA_SHADER_FRAGMENT && 707bf215546Sopenharmony_ci (options & nir_divergence_single_prim_per_subgroup) && 708bf215546Sopenharmony_ci var->data.mode == nir_var_shader_in && 709bf215546Sopenharmony_ci var->data.interpolation == INTERP_MODE_FLAT) 710bf215546Sopenharmony_ci return true; 711bf215546Sopenharmony_ci 712bf215546Sopenharmony_ci if (stage == MESA_SHADER_TESS_CTRL && 713bf215546Sopenharmony_ci (options & nir_divergence_single_patch_per_tcs_subgroup) && 714bf215546Sopenharmony_ci var->data.mode == nir_var_shader_out && var->data.patch) 715bf215546Sopenharmony_ci return true; 716bf215546Sopenharmony_ci 717bf215546Sopenharmony_ci if (stage == MESA_SHADER_TESS_EVAL && 718bf215546Sopenharmony_ci (options & nir_divergence_single_patch_per_tes_subgroup) && 719bf215546Sopenharmony_ci var->data.mode == nir_var_shader_in && var->data.patch) 720bf215546Sopenharmony_ci return true; 721bf215546Sopenharmony_ci 722bf215546Sopenharmony_ci return false; 723bf215546Sopenharmony_ci} 724bf215546Sopenharmony_ci 725bf215546Sopenharmony_cistatic bool 726bf215546Sopenharmony_civisit_deref(nir_shader *shader, nir_deref_instr *deref) 727bf215546Sopenharmony_ci{ 728bf215546Sopenharmony_ci if (deref->dest.ssa.divergent) 729bf215546Sopenharmony_ci return false; 730bf215546Sopenharmony_ci 731bf215546Sopenharmony_ci bool is_divergent = false; 732bf215546Sopenharmony_ci switch (deref->deref_type) { 733bf215546Sopenharmony_ci case nir_deref_type_var: 734bf215546Sopenharmony_ci is_divergent = !nir_variable_is_uniform(shader, deref->var); 735bf215546Sopenharmony_ci break; 736bf215546Sopenharmony_ci case nir_deref_type_array: 737bf215546Sopenharmony_ci case nir_deref_type_ptr_as_array: 738bf215546Sopenharmony_ci is_divergent = deref->arr.index.ssa->divergent; 739bf215546Sopenharmony_ci FALLTHROUGH; 740bf215546Sopenharmony_ci case nir_deref_type_struct: 741bf215546Sopenharmony_ci case nir_deref_type_array_wildcard: 742bf215546Sopenharmony_ci is_divergent |= deref->parent.ssa->divergent; 743bf215546Sopenharmony_ci break; 744bf215546Sopenharmony_ci case nir_deref_type_cast: 745bf215546Sopenharmony_ci is_divergent = !nir_variable_mode_is_uniform(deref->var->data.mode) || 746bf215546Sopenharmony_ci deref->parent.ssa->divergent; 747bf215546Sopenharmony_ci break; 748bf215546Sopenharmony_ci } 749bf215546Sopenharmony_ci 750bf215546Sopenharmony_ci deref->dest.ssa.divergent = is_divergent; 751bf215546Sopenharmony_ci return is_divergent; 752bf215546Sopenharmony_ci} 753bf215546Sopenharmony_ci 754bf215546Sopenharmony_cistatic bool 755bf215546Sopenharmony_civisit_jump(nir_jump_instr *jump, struct divergence_state *state) 756bf215546Sopenharmony_ci{ 757bf215546Sopenharmony_ci switch (jump->type) { 758bf215546Sopenharmony_ci case nir_jump_continue: 759bf215546Sopenharmony_ci if (state->divergent_loop_continue) 760bf215546Sopenharmony_ci return false; 761bf215546Sopenharmony_ci if (state->divergent_loop_cf) 762bf215546Sopenharmony_ci state->divergent_loop_continue = true; 763bf215546Sopenharmony_ci return state->divergent_loop_continue; 764bf215546Sopenharmony_ci case nir_jump_break: 765bf215546Sopenharmony_ci if (state->divergent_loop_break) 766bf215546Sopenharmony_ci return false; 767bf215546Sopenharmony_ci if (state->divergent_loop_cf) 768bf215546Sopenharmony_ci state->divergent_loop_break = true; 769bf215546Sopenharmony_ci return state->divergent_loop_break; 770bf215546Sopenharmony_ci case nir_jump_halt: 771bf215546Sopenharmony_ci /* This totally kills invocations so it doesn't add divergence */ 772bf215546Sopenharmony_ci break; 773bf215546Sopenharmony_ci case nir_jump_return: 774bf215546Sopenharmony_ci unreachable("NIR divergence analysis: Unsupported return instruction."); 775bf215546Sopenharmony_ci break; 776bf215546Sopenharmony_ci case nir_jump_goto: 777bf215546Sopenharmony_ci case nir_jump_goto_if: 778bf215546Sopenharmony_ci unreachable("NIR divergence analysis: Unsupported goto_if instruction."); 779bf215546Sopenharmony_ci break; 780bf215546Sopenharmony_ci } 781bf215546Sopenharmony_ci return false; 782bf215546Sopenharmony_ci} 783bf215546Sopenharmony_ci 784bf215546Sopenharmony_cistatic bool 785bf215546Sopenharmony_ciset_ssa_def_not_divergent(nir_ssa_def *def, UNUSED void *_state) 786bf215546Sopenharmony_ci{ 787bf215546Sopenharmony_ci def->divergent = false; 788bf215546Sopenharmony_ci return true; 789bf215546Sopenharmony_ci} 790bf215546Sopenharmony_ci 791bf215546Sopenharmony_cistatic bool 792bf215546Sopenharmony_ciupdate_instr_divergence(nir_shader *shader, nir_instr *instr) 793bf215546Sopenharmony_ci{ 794bf215546Sopenharmony_ci switch (instr->type) { 795bf215546Sopenharmony_ci case nir_instr_type_alu: 796bf215546Sopenharmony_ci return visit_alu(nir_instr_as_alu(instr)); 797bf215546Sopenharmony_ci case nir_instr_type_intrinsic: 798bf215546Sopenharmony_ci return visit_intrinsic(shader, nir_instr_as_intrinsic(instr)); 799bf215546Sopenharmony_ci case nir_instr_type_tex: 800bf215546Sopenharmony_ci return visit_tex(nir_instr_as_tex(instr)); 801bf215546Sopenharmony_ci case nir_instr_type_load_const: 802bf215546Sopenharmony_ci return visit_load_const(nir_instr_as_load_const(instr)); 803bf215546Sopenharmony_ci case nir_instr_type_ssa_undef: 804bf215546Sopenharmony_ci return visit_ssa_undef(nir_instr_as_ssa_undef(instr)); 805bf215546Sopenharmony_ci case nir_instr_type_deref: 806bf215546Sopenharmony_ci return visit_deref(shader, nir_instr_as_deref(instr)); 807bf215546Sopenharmony_ci case nir_instr_type_jump: 808bf215546Sopenharmony_ci case nir_instr_type_phi: 809bf215546Sopenharmony_ci case nir_instr_type_call: 810bf215546Sopenharmony_ci case nir_instr_type_parallel_copy: 811bf215546Sopenharmony_ci default: 812bf215546Sopenharmony_ci unreachable("NIR divergence analysis: Unsupported instruction type."); 813bf215546Sopenharmony_ci } 814bf215546Sopenharmony_ci} 815bf215546Sopenharmony_ci 816bf215546Sopenharmony_cistatic bool 817bf215546Sopenharmony_civisit_block(nir_block *block, struct divergence_state *state) 818bf215546Sopenharmony_ci{ 819bf215546Sopenharmony_ci bool has_changed = false; 820bf215546Sopenharmony_ci 821bf215546Sopenharmony_ci nir_foreach_instr(instr, block) { 822bf215546Sopenharmony_ci /* phis are handled when processing the branches */ 823bf215546Sopenharmony_ci if (instr->type == nir_instr_type_phi) 824bf215546Sopenharmony_ci continue; 825bf215546Sopenharmony_ci 826bf215546Sopenharmony_ci if (state->first_visit) 827bf215546Sopenharmony_ci nir_foreach_ssa_def(instr, set_ssa_def_not_divergent, NULL); 828bf215546Sopenharmony_ci 829bf215546Sopenharmony_ci if (instr->type == nir_instr_type_jump) 830bf215546Sopenharmony_ci has_changed |= visit_jump(nir_instr_as_jump(instr), state); 831bf215546Sopenharmony_ci else 832bf215546Sopenharmony_ci has_changed |= update_instr_divergence(state->shader, instr); 833bf215546Sopenharmony_ci } 834bf215546Sopenharmony_ci 835bf215546Sopenharmony_ci return has_changed; 836bf215546Sopenharmony_ci} 837bf215546Sopenharmony_ci 838bf215546Sopenharmony_ci/* There are 3 types of phi instructions: 839bf215546Sopenharmony_ci * (1) gamma: represent the joining point of different paths 840bf215546Sopenharmony_ci * created by an “if-then-else” branch. 841bf215546Sopenharmony_ci * The resulting value is divergent if the branch condition 842bf215546Sopenharmony_ci * or any of the source values is divergent. */ 843bf215546Sopenharmony_cistatic bool 844bf215546Sopenharmony_civisit_if_merge_phi(nir_phi_instr *phi, bool if_cond_divergent) 845bf215546Sopenharmony_ci{ 846bf215546Sopenharmony_ci if (phi->dest.ssa.divergent) 847bf215546Sopenharmony_ci return false; 848bf215546Sopenharmony_ci 849bf215546Sopenharmony_ci unsigned defined_srcs = 0; 850bf215546Sopenharmony_ci nir_foreach_phi_src(src, phi) { 851bf215546Sopenharmony_ci /* if any source value is divergent, the resulting value is divergent */ 852bf215546Sopenharmony_ci if (src->src.ssa->divergent) { 853bf215546Sopenharmony_ci phi->dest.ssa.divergent = true; 854bf215546Sopenharmony_ci return true; 855bf215546Sopenharmony_ci } 856bf215546Sopenharmony_ci if (src->src.ssa->parent_instr->type != nir_instr_type_ssa_undef) { 857bf215546Sopenharmony_ci defined_srcs++; 858bf215546Sopenharmony_ci } 859bf215546Sopenharmony_ci } 860bf215546Sopenharmony_ci 861bf215546Sopenharmony_ci /* if the condition is divergent and two sources defined, the definition is divergent */ 862bf215546Sopenharmony_ci if (defined_srcs > 1 && if_cond_divergent) { 863bf215546Sopenharmony_ci phi->dest.ssa.divergent = true; 864bf215546Sopenharmony_ci return true; 865bf215546Sopenharmony_ci } 866bf215546Sopenharmony_ci 867bf215546Sopenharmony_ci return false; 868bf215546Sopenharmony_ci} 869bf215546Sopenharmony_ci 870bf215546Sopenharmony_ci/* There are 3 types of phi instructions: 871bf215546Sopenharmony_ci * (2) mu: which only exist at loop headers, 872bf215546Sopenharmony_ci * merge initial and loop-carried values. 873bf215546Sopenharmony_ci * The resulting value is divergent if any source value 874bf215546Sopenharmony_ci * is divergent or a divergent loop continue condition 875bf215546Sopenharmony_ci * is associated with a different ssa-def. */ 876bf215546Sopenharmony_cistatic bool 877bf215546Sopenharmony_civisit_loop_header_phi(nir_phi_instr *phi, nir_block *preheader, bool divergent_continue) 878bf215546Sopenharmony_ci{ 879bf215546Sopenharmony_ci if (phi->dest.ssa.divergent) 880bf215546Sopenharmony_ci return false; 881bf215546Sopenharmony_ci 882bf215546Sopenharmony_ci nir_ssa_def* same = NULL; 883bf215546Sopenharmony_ci nir_foreach_phi_src(src, phi) { 884bf215546Sopenharmony_ci /* if any source value is divergent, the resulting value is divergent */ 885bf215546Sopenharmony_ci if (src->src.ssa->divergent) { 886bf215546Sopenharmony_ci phi->dest.ssa.divergent = true; 887bf215546Sopenharmony_ci return true; 888bf215546Sopenharmony_ci } 889bf215546Sopenharmony_ci /* if this loop is uniform, we're done here */ 890bf215546Sopenharmony_ci if (!divergent_continue) 891bf215546Sopenharmony_ci continue; 892bf215546Sopenharmony_ci /* skip the loop preheader */ 893bf215546Sopenharmony_ci if (src->pred == preheader) 894bf215546Sopenharmony_ci continue; 895bf215546Sopenharmony_ci /* skip undef values */ 896bf215546Sopenharmony_ci if (nir_src_is_undef(src->src)) 897bf215546Sopenharmony_ci continue; 898bf215546Sopenharmony_ci 899bf215546Sopenharmony_ci /* check if all loop-carried values are from the same ssa-def */ 900bf215546Sopenharmony_ci if (!same) 901bf215546Sopenharmony_ci same = src->src.ssa; 902bf215546Sopenharmony_ci else if (same != src->src.ssa) { 903bf215546Sopenharmony_ci phi->dest.ssa.divergent = true; 904bf215546Sopenharmony_ci return true; 905bf215546Sopenharmony_ci } 906bf215546Sopenharmony_ci } 907bf215546Sopenharmony_ci 908bf215546Sopenharmony_ci return false; 909bf215546Sopenharmony_ci} 910bf215546Sopenharmony_ci 911bf215546Sopenharmony_ci/* There are 3 types of phi instructions: 912bf215546Sopenharmony_ci * (3) eta: represent values that leave a loop. 913bf215546Sopenharmony_ci * The resulting value is divergent if the source value is divergent 914bf215546Sopenharmony_ci * or any loop exit condition is divergent for a value which is 915bf215546Sopenharmony_ci * not loop-invariant. 916bf215546Sopenharmony_ci * (note: there should be no phi for loop-invariant variables.) */ 917bf215546Sopenharmony_cistatic bool 918bf215546Sopenharmony_civisit_loop_exit_phi(nir_phi_instr *phi, bool divergent_break) 919bf215546Sopenharmony_ci{ 920bf215546Sopenharmony_ci if (phi->dest.ssa.divergent) 921bf215546Sopenharmony_ci return false; 922bf215546Sopenharmony_ci 923bf215546Sopenharmony_ci if (divergent_break) { 924bf215546Sopenharmony_ci phi->dest.ssa.divergent = true; 925bf215546Sopenharmony_ci return true; 926bf215546Sopenharmony_ci } 927bf215546Sopenharmony_ci 928bf215546Sopenharmony_ci /* if any source value is divergent, the resulting value is divergent */ 929bf215546Sopenharmony_ci nir_foreach_phi_src(src, phi) { 930bf215546Sopenharmony_ci if (src->src.ssa->divergent) { 931bf215546Sopenharmony_ci phi->dest.ssa.divergent = true; 932bf215546Sopenharmony_ci return true; 933bf215546Sopenharmony_ci } 934bf215546Sopenharmony_ci } 935bf215546Sopenharmony_ci 936bf215546Sopenharmony_ci return false; 937bf215546Sopenharmony_ci} 938bf215546Sopenharmony_ci 939bf215546Sopenharmony_cistatic bool 940bf215546Sopenharmony_civisit_if(nir_if *if_stmt, struct divergence_state *state) 941bf215546Sopenharmony_ci{ 942bf215546Sopenharmony_ci bool progress = false; 943bf215546Sopenharmony_ci 944bf215546Sopenharmony_ci struct divergence_state then_state = *state; 945bf215546Sopenharmony_ci then_state.divergent_loop_cf |= if_stmt->condition.ssa->divergent; 946bf215546Sopenharmony_ci progress |= visit_cf_list(&if_stmt->then_list, &then_state); 947bf215546Sopenharmony_ci 948bf215546Sopenharmony_ci struct divergence_state else_state = *state; 949bf215546Sopenharmony_ci else_state.divergent_loop_cf |= if_stmt->condition.ssa->divergent; 950bf215546Sopenharmony_ci progress |= visit_cf_list(&if_stmt->else_list, &else_state); 951bf215546Sopenharmony_ci 952bf215546Sopenharmony_ci /* handle phis after the IF */ 953bf215546Sopenharmony_ci nir_foreach_instr(instr, nir_cf_node_cf_tree_next(&if_stmt->cf_node)) { 954bf215546Sopenharmony_ci if (instr->type != nir_instr_type_phi) 955bf215546Sopenharmony_ci break; 956bf215546Sopenharmony_ci 957bf215546Sopenharmony_ci if (state->first_visit) 958bf215546Sopenharmony_ci nir_instr_as_phi(instr)->dest.ssa.divergent = false; 959bf215546Sopenharmony_ci progress |= visit_if_merge_phi(nir_instr_as_phi(instr), 960bf215546Sopenharmony_ci if_stmt->condition.ssa->divergent); 961bf215546Sopenharmony_ci } 962bf215546Sopenharmony_ci 963bf215546Sopenharmony_ci /* join loop divergence information from both branch legs */ 964bf215546Sopenharmony_ci state->divergent_loop_continue |= then_state.divergent_loop_continue || 965bf215546Sopenharmony_ci else_state.divergent_loop_continue; 966bf215546Sopenharmony_ci state->divergent_loop_break |= then_state.divergent_loop_break || 967bf215546Sopenharmony_ci else_state.divergent_loop_break; 968bf215546Sopenharmony_ci 969bf215546Sopenharmony_ci /* A divergent continue makes succeeding loop CF divergent: 970bf215546Sopenharmony_ci * not all loop-active invocations participate in the remaining loop-body 971bf215546Sopenharmony_ci * which means that a following break might be taken by some invocations, only */ 972bf215546Sopenharmony_ci state->divergent_loop_cf |= state->divergent_loop_continue; 973bf215546Sopenharmony_ci 974bf215546Sopenharmony_ci return progress; 975bf215546Sopenharmony_ci} 976bf215546Sopenharmony_ci 977bf215546Sopenharmony_cistatic bool 978bf215546Sopenharmony_civisit_loop(nir_loop *loop, struct divergence_state *state) 979bf215546Sopenharmony_ci{ 980bf215546Sopenharmony_ci bool progress = false; 981bf215546Sopenharmony_ci nir_block *loop_header = nir_loop_first_block(loop); 982bf215546Sopenharmony_ci nir_block *loop_preheader = nir_block_cf_tree_prev(loop_header); 983bf215546Sopenharmony_ci 984bf215546Sopenharmony_ci /* handle loop header phis first: we have no knowledge yet about 985bf215546Sopenharmony_ci * the loop's control flow or any loop-carried sources. */ 986bf215546Sopenharmony_ci nir_foreach_instr(instr, loop_header) { 987bf215546Sopenharmony_ci if (instr->type != nir_instr_type_phi) 988bf215546Sopenharmony_ci break; 989bf215546Sopenharmony_ci 990bf215546Sopenharmony_ci nir_phi_instr *phi = nir_instr_as_phi(instr); 991bf215546Sopenharmony_ci if (!state->first_visit && phi->dest.ssa.divergent) 992bf215546Sopenharmony_ci continue; 993bf215546Sopenharmony_ci 994bf215546Sopenharmony_ci nir_foreach_phi_src(src, phi) { 995bf215546Sopenharmony_ci if (src->pred == loop_preheader) { 996bf215546Sopenharmony_ci phi->dest.ssa.divergent = src->src.ssa->divergent; 997bf215546Sopenharmony_ci break; 998bf215546Sopenharmony_ci } 999bf215546Sopenharmony_ci } 1000bf215546Sopenharmony_ci progress |= phi->dest.ssa.divergent; 1001bf215546Sopenharmony_ci } 1002bf215546Sopenharmony_ci 1003bf215546Sopenharmony_ci /* setup loop state */ 1004bf215546Sopenharmony_ci struct divergence_state loop_state = *state; 1005bf215546Sopenharmony_ci loop_state.divergent_loop_cf = false; 1006bf215546Sopenharmony_ci loop_state.divergent_loop_continue = false; 1007bf215546Sopenharmony_ci loop_state.divergent_loop_break = false; 1008bf215546Sopenharmony_ci 1009bf215546Sopenharmony_ci /* process loop body until no further changes are made */ 1010bf215546Sopenharmony_ci bool repeat; 1011bf215546Sopenharmony_ci do { 1012bf215546Sopenharmony_ci progress |= visit_cf_list(&loop->body, &loop_state); 1013bf215546Sopenharmony_ci repeat = false; 1014bf215546Sopenharmony_ci 1015bf215546Sopenharmony_ci /* revisit loop header phis to see if something has changed */ 1016bf215546Sopenharmony_ci nir_foreach_instr(instr, loop_header) { 1017bf215546Sopenharmony_ci if (instr->type != nir_instr_type_phi) 1018bf215546Sopenharmony_ci break; 1019bf215546Sopenharmony_ci 1020bf215546Sopenharmony_ci repeat |= visit_loop_header_phi(nir_instr_as_phi(instr), 1021bf215546Sopenharmony_ci loop_preheader, 1022bf215546Sopenharmony_ci loop_state.divergent_loop_continue); 1023bf215546Sopenharmony_ci } 1024bf215546Sopenharmony_ci 1025bf215546Sopenharmony_ci loop_state.divergent_loop_cf = false; 1026bf215546Sopenharmony_ci loop_state.first_visit = false; 1027bf215546Sopenharmony_ci } while (repeat); 1028bf215546Sopenharmony_ci 1029bf215546Sopenharmony_ci /* handle phis after the loop */ 1030bf215546Sopenharmony_ci nir_foreach_instr(instr, nir_cf_node_cf_tree_next(&loop->cf_node)) { 1031bf215546Sopenharmony_ci if (instr->type != nir_instr_type_phi) 1032bf215546Sopenharmony_ci break; 1033bf215546Sopenharmony_ci 1034bf215546Sopenharmony_ci if (state->first_visit) 1035bf215546Sopenharmony_ci nir_instr_as_phi(instr)->dest.ssa.divergent = false; 1036bf215546Sopenharmony_ci progress |= visit_loop_exit_phi(nir_instr_as_phi(instr), 1037bf215546Sopenharmony_ci loop_state.divergent_loop_break); 1038bf215546Sopenharmony_ci } 1039bf215546Sopenharmony_ci 1040bf215546Sopenharmony_ci loop->divergent = (loop_state.divergent_loop_break || loop_state.divergent_loop_continue); 1041bf215546Sopenharmony_ci 1042bf215546Sopenharmony_ci return progress; 1043bf215546Sopenharmony_ci} 1044bf215546Sopenharmony_ci 1045bf215546Sopenharmony_cistatic bool 1046bf215546Sopenharmony_civisit_cf_list(struct exec_list *list, struct divergence_state *state) 1047bf215546Sopenharmony_ci{ 1048bf215546Sopenharmony_ci bool has_changed = false; 1049bf215546Sopenharmony_ci 1050bf215546Sopenharmony_ci foreach_list_typed(nir_cf_node, node, node, list) { 1051bf215546Sopenharmony_ci switch (node->type) { 1052bf215546Sopenharmony_ci case nir_cf_node_block: 1053bf215546Sopenharmony_ci has_changed |= visit_block(nir_cf_node_as_block(node), state); 1054bf215546Sopenharmony_ci break; 1055bf215546Sopenharmony_ci case nir_cf_node_if: 1056bf215546Sopenharmony_ci has_changed |= visit_if(nir_cf_node_as_if(node), state); 1057bf215546Sopenharmony_ci break; 1058bf215546Sopenharmony_ci case nir_cf_node_loop: 1059bf215546Sopenharmony_ci has_changed |= visit_loop(nir_cf_node_as_loop(node), state); 1060bf215546Sopenharmony_ci break; 1061bf215546Sopenharmony_ci case nir_cf_node_function: 1062bf215546Sopenharmony_ci unreachable("NIR divergence analysis: Unsupported cf_node type."); 1063bf215546Sopenharmony_ci } 1064bf215546Sopenharmony_ci } 1065bf215546Sopenharmony_ci 1066bf215546Sopenharmony_ci return has_changed; 1067bf215546Sopenharmony_ci} 1068bf215546Sopenharmony_ci 1069bf215546Sopenharmony_civoid 1070bf215546Sopenharmony_cinir_divergence_analysis(nir_shader *shader) 1071bf215546Sopenharmony_ci{ 1072bf215546Sopenharmony_ci shader->info.divergence_analysis_run = true; 1073bf215546Sopenharmony_ci 1074bf215546Sopenharmony_ci struct divergence_state state = { 1075bf215546Sopenharmony_ci .stage = shader->info.stage, 1076bf215546Sopenharmony_ci .shader = shader, 1077bf215546Sopenharmony_ci .divergent_loop_cf = false, 1078bf215546Sopenharmony_ci .divergent_loop_continue = false, 1079bf215546Sopenharmony_ci .divergent_loop_break = false, 1080bf215546Sopenharmony_ci .first_visit = true, 1081bf215546Sopenharmony_ci }; 1082bf215546Sopenharmony_ci 1083bf215546Sopenharmony_ci visit_cf_list(&nir_shader_get_entrypoint(shader)->body, &state); 1084bf215546Sopenharmony_ci} 1085bf215546Sopenharmony_ci 1086bf215546Sopenharmony_cibool nir_update_instr_divergence(nir_shader *shader, nir_instr *instr) 1087bf215546Sopenharmony_ci{ 1088bf215546Sopenharmony_ci nir_foreach_ssa_def(instr, set_ssa_def_not_divergent, NULL); 1089bf215546Sopenharmony_ci 1090bf215546Sopenharmony_ci if (instr->type == nir_instr_type_phi) { 1091bf215546Sopenharmony_ci nir_cf_node *prev = nir_cf_node_prev(&instr->block->cf_node); 1092bf215546Sopenharmony_ci /* can only update gamma/if phis */ 1093bf215546Sopenharmony_ci if (!prev || prev->type != nir_cf_node_if) 1094bf215546Sopenharmony_ci return false; 1095bf215546Sopenharmony_ci 1096bf215546Sopenharmony_ci nir_if *nif = nir_cf_node_as_if(prev); 1097bf215546Sopenharmony_ci 1098bf215546Sopenharmony_ci visit_if_merge_phi(nir_instr_as_phi(instr), nir_src_is_divergent(nif->condition)); 1099bf215546Sopenharmony_ci return true; 1100bf215546Sopenharmony_ci } 1101bf215546Sopenharmony_ci 1102bf215546Sopenharmony_ci update_instr_divergence(shader, instr); 1103bf215546Sopenharmony_ci return true; 1104bf215546Sopenharmony_ci} 1105bf215546Sopenharmony_ci 1106bf215546Sopenharmony_ci 1107bf215546Sopenharmony_cibool 1108bf215546Sopenharmony_cinir_has_divergent_loop(nir_shader *shader) 1109bf215546Sopenharmony_ci{ 1110bf215546Sopenharmony_ci bool divergent_loop = false; 1111bf215546Sopenharmony_ci nir_function_impl *func = nir_shader_get_entrypoint(shader); 1112bf215546Sopenharmony_ci 1113bf215546Sopenharmony_ci foreach_list_typed(nir_cf_node, node, node, &func->body) { 1114bf215546Sopenharmony_ci if (node->type == nir_cf_node_loop && nir_cf_node_as_loop(node)->divergent) { 1115bf215546Sopenharmony_ci divergent_loop = true; 1116bf215546Sopenharmony_ci break; 1117bf215546Sopenharmony_ci } 1118bf215546Sopenharmony_ci } 1119bf215546Sopenharmony_ci 1120bf215546Sopenharmony_ci return divergent_loop; 1121bf215546Sopenharmony_ci} 1122