1/* 2 * Copyright © 2018 Valve Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 25#include "nir.h" 26 27/* This pass computes for each ssa definition if it is uniform. 28 * That is, the variable has the same value for all invocations 29 * of the group. 30 * 31 * This divergence analysis pass expects the shader to be in LCSSA-form. 32 * 33 * This algorithm implements "The Simple Divergence Analysis" from 34 * Diogo Sampaio, Rafael De Souza, Sylvain Collange, Fernando Magno Quintão Pereira. 35 * Divergence Analysis. ACM Transactions on Programming Languages and Systems (TOPLAS), 36 * ACM, 2013, 35 (4), pp.13:1-13:36. <10.1145/2523815>. <hal-00909072v2> 37 */ 38 39struct divergence_state { 40 const gl_shader_stage stage; 41 nir_shader *shader; 42 43 /** current control flow state */ 44 /* True if some loop-active invocations might take a different control-flow path. 45 * A divergent break does not cause subsequent control-flow to be considered 46 * divergent because those invocations are no longer active in the loop. 47 * For a divergent if, both sides are considered divergent flow because 48 * the other side is still loop-active. */ 49 bool divergent_loop_cf; 50 /* True if a divergent continue happened since the loop header */ 51 bool divergent_loop_continue; 52 /* True if a divergent break happened since the loop header */ 53 bool divergent_loop_break; 54 55 /* True if we visit the block for the fist time */ 56 bool first_visit; 57}; 58 59static bool 60visit_cf_list(struct exec_list *list, struct divergence_state *state); 61 62static bool 63visit_alu(nir_alu_instr *instr) 64{ 65 if (instr->dest.dest.ssa.divergent) 66 return false; 67 68 unsigned num_src = nir_op_infos[instr->op].num_inputs; 69 70 for (unsigned i = 0; i < num_src; i++) { 71 if (instr->src[i].src.ssa->divergent) { 72 instr->dest.dest.ssa.divergent = true; 73 return true; 74 } 75 } 76 77 return false; 78} 79 80static bool 81visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr) 82{ 83 if (!nir_intrinsic_infos[instr->intrinsic].has_dest) 84 return false; 85 86 if (instr->dest.ssa.divergent) 87 return false; 88 89 nir_divergence_options options = shader->options->divergence_analysis_options; 90 gl_shader_stage stage = shader->info.stage; 91 bool is_divergent = false; 92 switch (instr->intrinsic) { 93 /* Intrinsics which are always uniform */ 94 case nir_intrinsic_shader_clock: 95 case nir_intrinsic_ballot: 96 case nir_intrinsic_read_invocation: 97 case nir_intrinsic_read_first_invocation: 98 case nir_intrinsic_vote_any: 99 case nir_intrinsic_vote_all: 100 case nir_intrinsic_vote_feq: 101 case nir_intrinsic_vote_ieq: 102 case nir_intrinsic_load_push_constant: 103 case nir_intrinsic_load_work_dim: 104 case nir_intrinsic_load_num_workgroups: 105 case nir_intrinsic_load_workgroup_size: 106 case nir_intrinsic_load_subgroup_id: 107 case nir_intrinsic_load_num_subgroups: 108 case nir_intrinsic_load_ray_launch_size: 109 case nir_intrinsic_load_ray_launch_size_addr_amd: 110 case nir_intrinsic_load_sbt_base_amd: 111 case nir_intrinsic_load_subgroup_size: 112 case nir_intrinsic_load_subgroup_eq_mask: 113 case nir_intrinsic_load_subgroup_ge_mask: 114 case nir_intrinsic_load_subgroup_gt_mask: 115 case nir_intrinsic_load_subgroup_le_mask: 116 case nir_intrinsic_load_subgroup_lt_mask: 117 case nir_intrinsic_first_invocation: 118 case nir_intrinsic_last_invocation: 119 case nir_intrinsic_load_base_instance: 120 case nir_intrinsic_load_base_vertex: 121 case nir_intrinsic_load_first_vertex: 122 case nir_intrinsic_load_draw_id: 123 case nir_intrinsic_load_is_indexed_draw: 124 case nir_intrinsic_load_viewport_scale: 125 case nir_intrinsic_load_user_clip_plane: 126 case nir_intrinsic_load_viewport_x_scale: 127 case nir_intrinsic_load_viewport_y_scale: 128 case nir_intrinsic_load_viewport_z_scale: 129 case nir_intrinsic_load_viewport_offset: 130 case nir_intrinsic_load_viewport_x_offset: 131 case nir_intrinsic_load_viewport_y_offset: 132 case nir_intrinsic_load_viewport_z_offset: 133 case nir_intrinsic_load_blend_const_color_a_float: 134 case nir_intrinsic_load_blend_const_color_b_float: 135 case nir_intrinsic_load_blend_const_color_g_float: 136 case nir_intrinsic_load_blend_const_color_r_float: 137 case nir_intrinsic_load_blend_const_color_rgba: 138 case nir_intrinsic_load_blend_const_color_aaaa8888_unorm: 139 case nir_intrinsic_load_blend_const_color_rgba8888_unorm: 140 case nir_intrinsic_load_line_width: 141 case nir_intrinsic_load_aa_line_width: 142 case nir_intrinsic_load_xfb_address: 143 case nir_intrinsic_load_num_vertices: 144 case nir_intrinsic_load_fb_layers_v3d: 145 case nir_intrinsic_load_tcs_num_patches_amd: 146 case nir_intrinsic_load_ring_tess_factors_amd: 147 case nir_intrinsic_load_ring_tess_offchip_amd: 148 case nir_intrinsic_load_ring_tess_factors_offset_amd: 149 case nir_intrinsic_load_ring_tess_offchip_offset_amd: 150 case nir_intrinsic_load_ring_mesh_scratch_amd: 151 case nir_intrinsic_load_ring_mesh_scratch_offset_amd: 152 case nir_intrinsic_load_ring_esgs_amd: 153 case nir_intrinsic_load_ring_es2gs_offset_amd: 154 case nir_intrinsic_load_ring_task_draw_amd: 155 case nir_intrinsic_load_ring_task_payload_amd: 156 case nir_intrinsic_load_task_ring_entry_amd: 157 case nir_intrinsic_load_task_ib_addr: 158 case nir_intrinsic_load_task_ib_stride: 159 case nir_intrinsic_load_sample_positions_pan: 160 case nir_intrinsic_load_workgroup_num_input_vertices_amd: 161 case nir_intrinsic_load_workgroup_num_input_primitives_amd: 162 case nir_intrinsic_load_shader_query_enabled_amd: 163 case nir_intrinsic_load_cull_front_face_enabled_amd: 164 case nir_intrinsic_load_cull_back_face_enabled_amd: 165 case nir_intrinsic_load_cull_ccw_amd: 166 case nir_intrinsic_load_cull_small_primitives_enabled_amd: 167 case nir_intrinsic_load_cull_any_enabled_amd: 168 case nir_intrinsic_load_cull_small_prim_precision_amd: 169 case nir_intrinsic_load_user_data_amd: 170 case nir_intrinsic_load_force_vrs_rates_amd: 171 case nir_intrinsic_load_tess_level_inner_default: 172 case nir_intrinsic_load_tess_level_outer_default: 173 case nir_intrinsic_load_scalar_arg_amd: 174 case nir_intrinsic_load_smem_amd: 175 case nir_intrinsic_load_global_const_block_intel: 176 case nir_intrinsic_load_reloc_const_intel: 177 case nir_intrinsic_load_global_block_intel: 178 case nir_intrinsic_load_btd_global_arg_addr_intel: 179 case nir_intrinsic_load_btd_local_arg_addr_intel: 180 case nir_intrinsic_load_mesh_inline_data_intel: 181 case nir_intrinsic_load_ray_num_dss_rt_stacks_intel: 182 case nir_intrinsic_load_lshs_vertex_stride_amd: 183 case nir_intrinsic_load_hs_out_patch_data_offset_amd: 184 is_divergent = false; 185 break; 186 187 /* Intrinsics with divergence depending on shader stage and hardware */ 188 case nir_intrinsic_load_frag_shading_rate: 189 is_divergent = !(options & nir_divergence_single_frag_shading_rate_per_subgroup); 190 break; 191 case nir_intrinsic_load_input: 192 is_divergent = instr->src[0].ssa->divergent; 193 if (stage == MESA_SHADER_FRAGMENT) 194 is_divergent |= !(options & nir_divergence_single_prim_per_subgroup); 195 else if (stage == MESA_SHADER_TESS_EVAL) 196 is_divergent |= !(options & nir_divergence_single_patch_per_tes_subgroup); 197 else if (stage != MESA_SHADER_MESH) 198 is_divergent = true; 199 break; 200 case nir_intrinsic_load_per_vertex_input: 201 is_divergent = instr->src[0].ssa->divergent || 202 instr->src[1].ssa->divergent; 203 if (stage == MESA_SHADER_TESS_CTRL) 204 is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup); 205 if (stage == MESA_SHADER_TESS_EVAL) 206 is_divergent |= !(options & nir_divergence_single_patch_per_tes_subgroup); 207 else 208 is_divergent = true; 209 break; 210 case nir_intrinsic_load_input_vertex: 211 is_divergent = instr->src[1].ssa->divergent; 212 assert(stage == MESA_SHADER_FRAGMENT); 213 is_divergent |= !(options & nir_divergence_single_prim_per_subgroup); 214 break; 215 case nir_intrinsic_load_output: 216 is_divergent = instr->src[0].ssa->divergent; 217 switch (stage) { 218 case MESA_SHADER_TESS_CTRL: 219 is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup); 220 break; 221 case MESA_SHADER_FRAGMENT: 222 is_divergent = true; 223 break; 224 case MESA_SHADER_TASK: 225 case MESA_SHADER_MESH: 226 /* Divergent if src[0] is, so nothing else to do. */ 227 break; 228 default: 229 unreachable("Invalid stage for load_output"); 230 } 231 break; 232 case nir_intrinsic_load_per_vertex_output: 233 assert(stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_MESH); 234 is_divergent = instr->src[0].ssa->divergent || 235 instr->src[1].ssa->divergent || 236 (stage == MESA_SHADER_TESS_CTRL && 237 !(options & nir_divergence_single_patch_per_tcs_subgroup)); 238 break; 239 case nir_intrinsic_load_per_primitive_output: 240 assert(stage == MESA_SHADER_MESH); 241 is_divergent = instr->src[0].ssa->divergent || 242 instr->src[1].ssa->divergent; 243 break; 244 case nir_intrinsic_load_layer_id: 245 case nir_intrinsic_load_front_face: 246 assert(stage == MESA_SHADER_FRAGMENT); 247 is_divergent = !(options & nir_divergence_single_prim_per_subgroup); 248 break; 249 case nir_intrinsic_load_view_index: 250 assert(stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_KERNEL); 251 if (options & nir_divergence_view_index_uniform) 252 is_divergent = false; 253 else if (stage == MESA_SHADER_FRAGMENT) 254 is_divergent = !(options & nir_divergence_single_prim_per_subgroup); 255 break; 256 case nir_intrinsic_load_fs_input_interp_deltas: 257 assert(stage == MESA_SHADER_FRAGMENT); 258 is_divergent = instr->src[0].ssa->divergent; 259 is_divergent |= !(options & nir_divergence_single_prim_per_subgroup); 260 break; 261 case nir_intrinsic_load_primitive_id: 262 if (stage == MESA_SHADER_FRAGMENT) 263 is_divergent = !(options & nir_divergence_single_prim_per_subgroup); 264 else if (stage == MESA_SHADER_TESS_CTRL) 265 is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup); 266 else if (stage == MESA_SHADER_TESS_EVAL) 267 is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup); 268 else if (stage == MESA_SHADER_GEOMETRY || stage == MESA_SHADER_VERTEX) 269 is_divergent = true; 270 else 271 unreachable("Invalid stage for load_primitive_id"); 272 break; 273 case nir_intrinsic_load_tess_level_inner: 274 case nir_intrinsic_load_tess_level_outer: 275 if (stage == MESA_SHADER_TESS_CTRL) 276 is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup); 277 else if (stage == MESA_SHADER_TESS_EVAL) 278 is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup); 279 else 280 unreachable("Invalid stage for load_primitive_tess_level_*"); 281 break; 282 case nir_intrinsic_load_patch_vertices_in: 283 if (stage == MESA_SHADER_TESS_EVAL) 284 is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup); 285 else 286 assert(stage == MESA_SHADER_TESS_CTRL); 287 break; 288 289 case nir_intrinsic_load_workgroup_index: 290 case nir_intrinsic_load_workgroup_id: 291 assert(gl_shader_stage_uses_workgroup(stage)); 292 if (stage == MESA_SHADER_COMPUTE) 293 is_divergent |= (options & nir_divergence_multiple_workgroup_per_compute_subgroup); 294 break; 295 296 /* Clustered reductions are uniform if cluster_size == subgroup_size or 297 * the source is uniform and the operation is invariant. 298 * Inclusive scans are uniform if 299 * the source is uniform and the operation is invariant 300 */ 301 case nir_intrinsic_reduce: 302 if (nir_intrinsic_cluster_size(instr) == 0) 303 return false; 304 FALLTHROUGH; 305 case nir_intrinsic_inclusive_scan: { 306 nir_op op = nir_intrinsic_reduction_op(instr); 307 is_divergent = instr->src[0].ssa->divergent; 308 if (op != nir_op_umin && op != nir_op_imin && op != nir_op_fmin && 309 op != nir_op_umax && op != nir_op_imax && op != nir_op_fmax && 310 op != nir_op_iand && op != nir_op_ior) 311 is_divergent = true; 312 break; 313 } 314 315 case nir_intrinsic_load_ubo: 316 case nir_intrinsic_load_ssbo: 317 is_divergent = (instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) || 318 instr->src[1].ssa->divergent; 319 break; 320 321 case nir_intrinsic_get_ssbo_size: 322 case nir_intrinsic_deref_buffer_array_length: 323 is_divergent = instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM); 324 break; 325 326 case nir_intrinsic_image_load: 327 case nir_intrinsic_image_deref_load: 328 case nir_intrinsic_bindless_image_load: 329 case nir_intrinsic_image_sparse_load: 330 case nir_intrinsic_image_deref_sparse_load: 331 case nir_intrinsic_bindless_image_sparse_load: 332 is_divergent = (instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) || 333 instr->src[1].ssa->divergent || instr->src[2].ssa->divergent || instr->src[3].ssa->divergent; 334 break; 335 336 337 /* Intrinsics with divergence depending on sources */ 338 case nir_intrinsic_ballot_bitfield_extract: 339 case nir_intrinsic_ballot_find_lsb: 340 case nir_intrinsic_ballot_find_msb: 341 case nir_intrinsic_ballot_bit_count_reduce: 342 case nir_intrinsic_shuffle_xor: 343 case nir_intrinsic_shuffle_up: 344 case nir_intrinsic_shuffle_down: 345 case nir_intrinsic_quad_broadcast: 346 case nir_intrinsic_quad_swap_horizontal: 347 case nir_intrinsic_quad_swap_vertical: 348 case nir_intrinsic_quad_swap_diagonal: 349 case nir_intrinsic_byte_permute_amd: 350 case nir_intrinsic_load_deref: 351 case nir_intrinsic_load_shared: 352 case nir_intrinsic_load_shared2_amd: 353 case nir_intrinsic_load_global: 354 case nir_intrinsic_load_global_2x32: 355 case nir_intrinsic_load_global_constant: 356 case nir_intrinsic_load_global_amd: 357 case nir_intrinsic_load_uniform: 358 case nir_intrinsic_load_constant: 359 case nir_intrinsic_load_sample_pos_from_id: 360 case nir_intrinsic_load_kernel_input: 361 case nir_intrinsic_load_task_payload: 362 case nir_intrinsic_load_buffer_amd: 363 case nir_intrinsic_image_samples: 364 case nir_intrinsic_image_deref_samples: 365 case nir_intrinsic_bindless_image_samples: 366 case nir_intrinsic_image_size: 367 case nir_intrinsic_image_deref_size: 368 case nir_intrinsic_bindless_image_size: 369 case nir_intrinsic_copy_deref: 370 case nir_intrinsic_vulkan_resource_index: 371 case nir_intrinsic_vulkan_resource_reindex: 372 case nir_intrinsic_load_vulkan_descriptor: 373 case nir_intrinsic_atomic_counter_read: 374 case nir_intrinsic_atomic_counter_read_deref: 375 case nir_intrinsic_quad_swizzle_amd: 376 case nir_intrinsic_masked_swizzle_amd: 377 case nir_intrinsic_is_sparse_texels_resident: 378 case nir_intrinsic_sparse_residency_code_and: 379 case nir_intrinsic_bvh64_intersect_ray_amd: 380 case nir_intrinsic_image_deref_load_param_intel: 381 case nir_intrinsic_image_load_raw_intel: 382 case nir_intrinsic_get_ubo_size: 383 case nir_intrinsic_load_ssbo_address: 384 case nir_intrinsic_load_desc_set_address_intel: { 385 unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs; 386 for (unsigned i = 0; i < num_srcs; i++) { 387 if (instr->src[i].ssa->divergent) { 388 is_divergent = true; 389 break; 390 } 391 } 392 break; 393 } 394 395 case nir_intrinsic_shuffle: 396 is_divergent = instr->src[0].ssa->divergent && 397 instr->src[1].ssa->divergent; 398 break; 399 400 /* Intrinsics which are always divergent */ 401 case nir_intrinsic_load_color0: 402 case nir_intrinsic_load_color1: 403 case nir_intrinsic_load_param: 404 case nir_intrinsic_load_sample_id: 405 case nir_intrinsic_load_sample_id_no_per_sample: 406 case nir_intrinsic_load_sample_mask_in: 407 case nir_intrinsic_load_interpolated_input: 408 case nir_intrinsic_load_point_coord_maybe_flipped: 409 case nir_intrinsic_load_barycentric_pixel: 410 case nir_intrinsic_load_barycentric_centroid: 411 case nir_intrinsic_load_barycentric_sample: 412 case nir_intrinsic_load_barycentric_model: 413 case nir_intrinsic_load_barycentric_at_sample: 414 case nir_intrinsic_load_barycentric_at_offset: 415 case nir_intrinsic_interp_deref_at_offset: 416 case nir_intrinsic_interp_deref_at_sample: 417 case nir_intrinsic_interp_deref_at_centroid: 418 case nir_intrinsic_interp_deref_at_vertex: 419 case nir_intrinsic_load_tess_coord: 420 case nir_intrinsic_load_point_coord: 421 case nir_intrinsic_load_line_coord: 422 case nir_intrinsic_load_frag_coord: 423 case nir_intrinsic_load_sample_pos: 424 case nir_intrinsic_load_sample_pos_or_center: 425 case nir_intrinsic_load_vertex_id_zero_base: 426 case nir_intrinsic_load_vertex_id: 427 case nir_intrinsic_load_instance_id: 428 case nir_intrinsic_load_invocation_id: 429 case nir_intrinsic_load_local_invocation_id: 430 case nir_intrinsic_load_local_invocation_index: 431 case nir_intrinsic_load_global_invocation_id: 432 case nir_intrinsic_load_global_invocation_id_zero_base: 433 case nir_intrinsic_load_global_invocation_index: 434 case nir_intrinsic_load_subgroup_invocation: 435 case nir_intrinsic_load_helper_invocation: 436 case nir_intrinsic_is_helper_invocation: 437 case nir_intrinsic_load_scratch: 438 case nir_intrinsic_deref_atomic_add: 439 case nir_intrinsic_deref_atomic_imin: 440 case nir_intrinsic_deref_atomic_umin: 441 case nir_intrinsic_deref_atomic_imax: 442 case nir_intrinsic_deref_atomic_umax: 443 case nir_intrinsic_deref_atomic_and: 444 case nir_intrinsic_deref_atomic_or: 445 case nir_intrinsic_deref_atomic_xor: 446 case nir_intrinsic_deref_atomic_exchange: 447 case nir_intrinsic_deref_atomic_comp_swap: 448 case nir_intrinsic_deref_atomic_fadd: 449 case nir_intrinsic_deref_atomic_fmin: 450 case nir_intrinsic_deref_atomic_fmax: 451 case nir_intrinsic_deref_atomic_fcomp_swap: 452 case nir_intrinsic_ssbo_atomic_add: 453 case nir_intrinsic_ssbo_atomic_imin: 454 case nir_intrinsic_ssbo_atomic_umin: 455 case nir_intrinsic_ssbo_atomic_imax: 456 case nir_intrinsic_ssbo_atomic_umax: 457 case nir_intrinsic_ssbo_atomic_and: 458 case nir_intrinsic_ssbo_atomic_or: 459 case nir_intrinsic_ssbo_atomic_xor: 460 case nir_intrinsic_ssbo_atomic_exchange: 461 case nir_intrinsic_ssbo_atomic_comp_swap: 462 case nir_intrinsic_ssbo_atomic_fadd: 463 case nir_intrinsic_ssbo_atomic_fmax: 464 case nir_intrinsic_ssbo_atomic_fmin: 465 case nir_intrinsic_ssbo_atomic_fcomp_swap: 466 case nir_intrinsic_image_deref_atomic_add: 467 case nir_intrinsic_image_deref_atomic_imin: 468 case nir_intrinsic_image_deref_atomic_umin: 469 case nir_intrinsic_image_deref_atomic_imax: 470 case nir_intrinsic_image_deref_atomic_umax: 471 case nir_intrinsic_image_deref_atomic_and: 472 case nir_intrinsic_image_deref_atomic_or: 473 case nir_intrinsic_image_deref_atomic_xor: 474 case nir_intrinsic_image_deref_atomic_exchange: 475 case nir_intrinsic_image_deref_atomic_comp_swap: 476 case nir_intrinsic_image_deref_atomic_fadd: 477 case nir_intrinsic_image_deref_atomic_fmin: 478 case nir_intrinsic_image_deref_atomic_fmax: 479 case nir_intrinsic_image_deref_atomic_inc_wrap: 480 case nir_intrinsic_image_deref_atomic_dec_wrap: 481 case nir_intrinsic_image_atomic_add: 482 case nir_intrinsic_image_atomic_imin: 483 case nir_intrinsic_image_atomic_umin: 484 case nir_intrinsic_image_atomic_imax: 485 case nir_intrinsic_image_atomic_umax: 486 case nir_intrinsic_image_atomic_and: 487 case nir_intrinsic_image_atomic_or: 488 case nir_intrinsic_image_atomic_xor: 489 case nir_intrinsic_image_atomic_exchange: 490 case nir_intrinsic_image_atomic_comp_swap: 491 case nir_intrinsic_image_atomic_fadd: 492 case nir_intrinsic_image_atomic_fmin: 493 case nir_intrinsic_image_atomic_fmax: 494 case nir_intrinsic_bindless_image_atomic_add: 495 case nir_intrinsic_bindless_image_atomic_imin: 496 case nir_intrinsic_bindless_image_atomic_umin: 497 case nir_intrinsic_bindless_image_atomic_imax: 498 case nir_intrinsic_bindless_image_atomic_umax: 499 case nir_intrinsic_bindless_image_atomic_and: 500 case nir_intrinsic_bindless_image_atomic_or: 501 case nir_intrinsic_bindless_image_atomic_xor: 502 case nir_intrinsic_bindless_image_atomic_exchange: 503 case nir_intrinsic_bindless_image_atomic_comp_swap: 504 case nir_intrinsic_bindless_image_atomic_fadd: 505 case nir_intrinsic_bindless_image_atomic_fmin: 506 case nir_intrinsic_bindless_image_atomic_fmax: 507 case nir_intrinsic_shared_atomic_add: 508 case nir_intrinsic_shared_atomic_imin: 509 case nir_intrinsic_shared_atomic_umin: 510 case nir_intrinsic_shared_atomic_imax: 511 case nir_intrinsic_shared_atomic_umax: 512 case nir_intrinsic_shared_atomic_and: 513 case nir_intrinsic_shared_atomic_or: 514 case nir_intrinsic_shared_atomic_xor: 515 case nir_intrinsic_shared_atomic_exchange: 516 case nir_intrinsic_shared_atomic_comp_swap: 517 case nir_intrinsic_shared_atomic_fadd: 518 case nir_intrinsic_shared_atomic_fmin: 519 case nir_intrinsic_shared_atomic_fmax: 520 case nir_intrinsic_shared_atomic_fcomp_swap: 521 case nir_intrinsic_task_payload_atomic_add: 522 case nir_intrinsic_task_payload_atomic_imin: 523 case nir_intrinsic_task_payload_atomic_umin: 524 case nir_intrinsic_task_payload_atomic_imax: 525 case nir_intrinsic_task_payload_atomic_umax: 526 case nir_intrinsic_task_payload_atomic_and: 527 case nir_intrinsic_task_payload_atomic_or: 528 case nir_intrinsic_task_payload_atomic_xor: 529 case nir_intrinsic_task_payload_atomic_exchange: 530 case nir_intrinsic_task_payload_atomic_comp_swap: 531 case nir_intrinsic_task_payload_atomic_fadd: 532 case nir_intrinsic_task_payload_atomic_fmin: 533 case nir_intrinsic_task_payload_atomic_fmax: 534 case nir_intrinsic_task_payload_atomic_fcomp_swap: 535 case nir_intrinsic_global_atomic_add: 536 case nir_intrinsic_global_atomic_imin: 537 case nir_intrinsic_global_atomic_umin: 538 case nir_intrinsic_global_atomic_imax: 539 case nir_intrinsic_global_atomic_umax: 540 case nir_intrinsic_global_atomic_and: 541 case nir_intrinsic_global_atomic_or: 542 case nir_intrinsic_global_atomic_xor: 543 case nir_intrinsic_global_atomic_exchange: 544 case nir_intrinsic_global_atomic_comp_swap: 545 case nir_intrinsic_global_atomic_fadd: 546 case nir_intrinsic_global_atomic_fmin: 547 case nir_intrinsic_global_atomic_fmax: 548 case nir_intrinsic_global_atomic_fcomp_swap: 549 case nir_intrinsic_global_atomic_add_amd: 550 case nir_intrinsic_global_atomic_imin_amd: 551 case nir_intrinsic_global_atomic_umin_amd: 552 case nir_intrinsic_global_atomic_imax_amd: 553 case nir_intrinsic_global_atomic_umax_amd: 554 case nir_intrinsic_global_atomic_and_amd: 555 case nir_intrinsic_global_atomic_or_amd: 556 case nir_intrinsic_global_atomic_xor_amd: 557 case nir_intrinsic_global_atomic_exchange_amd: 558 case nir_intrinsic_global_atomic_comp_swap_amd: 559 case nir_intrinsic_global_atomic_fadd_amd: 560 case nir_intrinsic_global_atomic_fmin_amd: 561 case nir_intrinsic_global_atomic_fmax_amd: 562 case nir_intrinsic_global_atomic_fcomp_swap_amd: 563 case nir_intrinsic_global_atomic_add_2x32: 564 case nir_intrinsic_global_atomic_imin_2x32: 565 case nir_intrinsic_global_atomic_umin_2x32: 566 case nir_intrinsic_global_atomic_imax_2x32: 567 case nir_intrinsic_global_atomic_umax_2x32: 568 case nir_intrinsic_global_atomic_and_2x32: 569 case nir_intrinsic_global_atomic_or_2x32: 570 case nir_intrinsic_global_atomic_xor_2x32: 571 case nir_intrinsic_global_atomic_exchange_2x32: 572 case nir_intrinsic_global_atomic_comp_swap_2x32: 573 case nir_intrinsic_global_atomic_fadd_2x32: 574 case nir_intrinsic_global_atomic_fmin_2x32: 575 case nir_intrinsic_global_atomic_fmax_2x32: 576 case nir_intrinsic_global_atomic_fcomp_swap_2x32: 577 case nir_intrinsic_atomic_counter_add: 578 case nir_intrinsic_atomic_counter_min: 579 case nir_intrinsic_atomic_counter_max: 580 case nir_intrinsic_atomic_counter_and: 581 case nir_intrinsic_atomic_counter_or: 582 case nir_intrinsic_atomic_counter_xor: 583 case nir_intrinsic_atomic_counter_inc: 584 case nir_intrinsic_atomic_counter_pre_dec: 585 case nir_intrinsic_atomic_counter_post_dec: 586 case nir_intrinsic_atomic_counter_exchange: 587 case nir_intrinsic_atomic_counter_comp_swap: 588 case nir_intrinsic_atomic_counter_add_deref: 589 case nir_intrinsic_atomic_counter_min_deref: 590 case nir_intrinsic_atomic_counter_max_deref: 591 case nir_intrinsic_atomic_counter_and_deref: 592 case nir_intrinsic_atomic_counter_or_deref: 593 case nir_intrinsic_atomic_counter_xor_deref: 594 case nir_intrinsic_atomic_counter_inc_deref: 595 case nir_intrinsic_atomic_counter_pre_dec_deref: 596 case nir_intrinsic_atomic_counter_post_dec_deref: 597 case nir_intrinsic_atomic_counter_exchange_deref: 598 case nir_intrinsic_atomic_counter_comp_swap_deref: 599 case nir_intrinsic_exclusive_scan: 600 case nir_intrinsic_ballot_bit_count_exclusive: 601 case nir_intrinsic_ballot_bit_count_inclusive: 602 case nir_intrinsic_write_invocation_amd: 603 case nir_intrinsic_mbcnt_amd: 604 case nir_intrinsic_lane_permute_16_amd: 605 case nir_intrinsic_elect: 606 case nir_intrinsic_load_tlb_color_v3d: 607 case nir_intrinsic_load_tess_rel_patch_id_amd: 608 case nir_intrinsic_load_gs_vertex_offset_amd: 609 case nir_intrinsic_has_input_vertex_amd: 610 case nir_intrinsic_has_input_primitive_amd: 611 case nir_intrinsic_load_packed_passthrough_primitive_amd: 612 case nir_intrinsic_load_initial_edgeflags_amd: 613 case nir_intrinsic_gds_atomic_add_amd: 614 case nir_intrinsic_load_rt_arg_scratch_offset_amd: 615 case nir_intrinsic_load_intersection_opaque_amd: 616 case nir_intrinsic_load_vector_arg_amd: 617 case nir_intrinsic_load_btd_stack_id_intel: 618 case nir_intrinsic_load_topology_id_intel: 619 case nir_intrinsic_load_scratch_base_ptr: 620 is_divergent = true; 621 break; 622 623 default: 624#ifdef NDEBUG 625 is_divergent = true; 626 break; 627#else 628 nir_print_instr(&instr->instr, stderr); 629 unreachable("\nNIR divergence analysis: Unhandled intrinsic."); 630#endif 631 } 632 633 instr->dest.ssa.divergent = is_divergent; 634 return is_divergent; 635} 636 637static bool 638visit_tex(nir_tex_instr *instr) 639{ 640 if (instr->dest.ssa.divergent) 641 return false; 642 643 bool is_divergent = false; 644 645 for (unsigned i = 0; i < instr->num_srcs; i++) { 646 switch (instr->src[i].src_type) { 647 case nir_tex_src_sampler_deref: 648 case nir_tex_src_sampler_handle: 649 case nir_tex_src_sampler_offset: 650 is_divergent |= instr->src[i].src.ssa->divergent && 651 instr->sampler_non_uniform; 652 break; 653 case nir_tex_src_texture_deref: 654 case nir_tex_src_texture_handle: 655 case nir_tex_src_texture_offset: 656 is_divergent |= instr->src[i].src.ssa->divergent && 657 instr->texture_non_uniform; 658 break; 659 default: 660 is_divergent |= instr->src[i].src.ssa->divergent; 661 break; 662 } 663 } 664 665 instr->dest.ssa.divergent = is_divergent; 666 return is_divergent; 667} 668 669static bool 670visit_load_const(nir_load_const_instr *instr) 671{ 672 return false; 673} 674 675static bool 676visit_ssa_undef(nir_ssa_undef_instr *instr) 677{ 678 return false; 679} 680 681static bool 682nir_variable_mode_is_uniform(nir_variable_mode mode) { 683 switch (mode) { 684 case nir_var_uniform: 685 case nir_var_mem_ubo: 686 case nir_var_mem_ssbo: 687 case nir_var_mem_shared: 688 case nir_var_mem_task_payload: 689 case nir_var_mem_global: 690 case nir_var_image: 691 return true; 692 default: 693 return false; 694 } 695} 696 697static bool 698nir_variable_is_uniform(nir_shader *shader, nir_variable *var) 699{ 700 if (nir_variable_mode_is_uniform(var->data.mode)) 701 return true; 702 703 nir_divergence_options options = shader->options->divergence_analysis_options; 704 gl_shader_stage stage = shader->info.stage; 705 706 if (stage == MESA_SHADER_FRAGMENT && 707 (options & nir_divergence_single_prim_per_subgroup) && 708 var->data.mode == nir_var_shader_in && 709 var->data.interpolation == INTERP_MODE_FLAT) 710 return true; 711 712 if (stage == MESA_SHADER_TESS_CTRL && 713 (options & nir_divergence_single_patch_per_tcs_subgroup) && 714 var->data.mode == nir_var_shader_out && var->data.patch) 715 return true; 716 717 if (stage == MESA_SHADER_TESS_EVAL && 718 (options & nir_divergence_single_patch_per_tes_subgroup) && 719 var->data.mode == nir_var_shader_in && var->data.patch) 720 return true; 721 722 return false; 723} 724 725static bool 726visit_deref(nir_shader *shader, nir_deref_instr *deref) 727{ 728 if (deref->dest.ssa.divergent) 729 return false; 730 731 bool is_divergent = false; 732 switch (deref->deref_type) { 733 case nir_deref_type_var: 734 is_divergent = !nir_variable_is_uniform(shader, deref->var); 735 break; 736 case nir_deref_type_array: 737 case nir_deref_type_ptr_as_array: 738 is_divergent = deref->arr.index.ssa->divergent; 739 FALLTHROUGH; 740 case nir_deref_type_struct: 741 case nir_deref_type_array_wildcard: 742 is_divergent |= deref->parent.ssa->divergent; 743 break; 744 case nir_deref_type_cast: 745 is_divergent = !nir_variable_mode_is_uniform(deref->var->data.mode) || 746 deref->parent.ssa->divergent; 747 break; 748 } 749 750 deref->dest.ssa.divergent = is_divergent; 751 return is_divergent; 752} 753 754static bool 755visit_jump(nir_jump_instr *jump, struct divergence_state *state) 756{ 757 switch (jump->type) { 758 case nir_jump_continue: 759 if (state->divergent_loop_continue) 760 return false; 761 if (state->divergent_loop_cf) 762 state->divergent_loop_continue = true; 763 return state->divergent_loop_continue; 764 case nir_jump_break: 765 if (state->divergent_loop_break) 766 return false; 767 if (state->divergent_loop_cf) 768 state->divergent_loop_break = true; 769 return state->divergent_loop_break; 770 case nir_jump_halt: 771 /* This totally kills invocations so it doesn't add divergence */ 772 break; 773 case nir_jump_return: 774 unreachable("NIR divergence analysis: Unsupported return instruction."); 775 break; 776 case nir_jump_goto: 777 case nir_jump_goto_if: 778 unreachable("NIR divergence analysis: Unsupported goto_if instruction."); 779 break; 780 } 781 return false; 782} 783 784static bool 785set_ssa_def_not_divergent(nir_ssa_def *def, UNUSED void *_state) 786{ 787 def->divergent = false; 788 return true; 789} 790 791static bool 792update_instr_divergence(nir_shader *shader, nir_instr *instr) 793{ 794 switch (instr->type) { 795 case nir_instr_type_alu: 796 return visit_alu(nir_instr_as_alu(instr)); 797 case nir_instr_type_intrinsic: 798 return visit_intrinsic(shader, nir_instr_as_intrinsic(instr)); 799 case nir_instr_type_tex: 800 return visit_tex(nir_instr_as_tex(instr)); 801 case nir_instr_type_load_const: 802 return visit_load_const(nir_instr_as_load_const(instr)); 803 case nir_instr_type_ssa_undef: 804 return visit_ssa_undef(nir_instr_as_ssa_undef(instr)); 805 case nir_instr_type_deref: 806 return visit_deref(shader, nir_instr_as_deref(instr)); 807 case nir_instr_type_jump: 808 case nir_instr_type_phi: 809 case nir_instr_type_call: 810 case nir_instr_type_parallel_copy: 811 default: 812 unreachable("NIR divergence analysis: Unsupported instruction type."); 813 } 814} 815 816static bool 817visit_block(nir_block *block, struct divergence_state *state) 818{ 819 bool has_changed = false; 820 821 nir_foreach_instr(instr, block) { 822 /* phis are handled when processing the branches */ 823 if (instr->type == nir_instr_type_phi) 824 continue; 825 826 if (state->first_visit) 827 nir_foreach_ssa_def(instr, set_ssa_def_not_divergent, NULL); 828 829 if (instr->type == nir_instr_type_jump) 830 has_changed |= visit_jump(nir_instr_as_jump(instr), state); 831 else 832 has_changed |= update_instr_divergence(state->shader, instr); 833 } 834 835 return has_changed; 836} 837 838/* There are 3 types of phi instructions: 839 * (1) gamma: represent the joining point of different paths 840 * created by an “if-then-else” branch. 841 * The resulting value is divergent if the branch condition 842 * or any of the source values is divergent. */ 843static bool 844visit_if_merge_phi(nir_phi_instr *phi, bool if_cond_divergent) 845{ 846 if (phi->dest.ssa.divergent) 847 return false; 848 849 unsigned defined_srcs = 0; 850 nir_foreach_phi_src(src, phi) { 851 /* if any source value is divergent, the resulting value is divergent */ 852 if (src->src.ssa->divergent) { 853 phi->dest.ssa.divergent = true; 854 return true; 855 } 856 if (src->src.ssa->parent_instr->type != nir_instr_type_ssa_undef) { 857 defined_srcs++; 858 } 859 } 860 861 /* if the condition is divergent and two sources defined, the definition is divergent */ 862 if (defined_srcs > 1 && if_cond_divergent) { 863 phi->dest.ssa.divergent = true; 864 return true; 865 } 866 867 return false; 868} 869 870/* There are 3 types of phi instructions: 871 * (2) mu: which only exist at loop headers, 872 * merge initial and loop-carried values. 873 * The resulting value is divergent if any source value 874 * is divergent or a divergent loop continue condition 875 * is associated with a different ssa-def. */ 876static bool 877visit_loop_header_phi(nir_phi_instr *phi, nir_block *preheader, bool divergent_continue) 878{ 879 if (phi->dest.ssa.divergent) 880 return false; 881 882 nir_ssa_def* same = NULL; 883 nir_foreach_phi_src(src, phi) { 884 /* if any source value is divergent, the resulting value is divergent */ 885 if (src->src.ssa->divergent) { 886 phi->dest.ssa.divergent = true; 887 return true; 888 } 889 /* if this loop is uniform, we're done here */ 890 if (!divergent_continue) 891 continue; 892 /* skip the loop preheader */ 893 if (src->pred == preheader) 894 continue; 895 /* skip undef values */ 896 if (nir_src_is_undef(src->src)) 897 continue; 898 899 /* check if all loop-carried values are from the same ssa-def */ 900 if (!same) 901 same = src->src.ssa; 902 else if (same != src->src.ssa) { 903 phi->dest.ssa.divergent = true; 904 return true; 905 } 906 } 907 908 return false; 909} 910 911/* There are 3 types of phi instructions: 912 * (3) eta: represent values that leave a loop. 913 * The resulting value is divergent if the source value is divergent 914 * or any loop exit condition is divergent for a value which is 915 * not loop-invariant. 916 * (note: there should be no phi for loop-invariant variables.) */ 917static bool 918visit_loop_exit_phi(nir_phi_instr *phi, bool divergent_break) 919{ 920 if (phi->dest.ssa.divergent) 921 return false; 922 923 if (divergent_break) { 924 phi->dest.ssa.divergent = true; 925 return true; 926 } 927 928 /* if any source value is divergent, the resulting value is divergent */ 929 nir_foreach_phi_src(src, phi) { 930 if (src->src.ssa->divergent) { 931 phi->dest.ssa.divergent = true; 932 return true; 933 } 934 } 935 936 return false; 937} 938 939static bool 940visit_if(nir_if *if_stmt, struct divergence_state *state) 941{ 942 bool progress = false; 943 944 struct divergence_state then_state = *state; 945 then_state.divergent_loop_cf |= if_stmt->condition.ssa->divergent; 946 progress |= visit_cf_list(&if_stmt->then_list, &then_state); 947 948 struct divergence_state else_state = *state; 949 else_state.divergent_loop_cf |= if_stmt->condition.ssa->divergent; 950 progress |= visit_cf_list(&if_stmt->else_list, &else_state); 951 952 /* handle phis after the IF */ 953 nir_foreach_instr(instr, nir_cf_node_cf_tree_next(&if_stmt->cf_node)) { 954 if (instr->type != nir_instr_type_phi) 955 break; 956 957 if (state->first_visit) 958 nir_instr_as_phi(instr)->dest.ssa.divergent = false; 959 progress |= visit_if_merge_phi(nir_instr_as_phi(instr), 960 if_stmt->condition.ssa->divergent); 961 } 962 963 /* join loop divergence information from both branch legs */ 964 state->divergent_loop_continue |= then_state.divergent_loop_continue || 965 else_state.divergent_loop_continue; 966 state->divergent_loop_break |= then_state.divergent_loop_break || 967 else_state.divergent_loop_break; 968 969 /* A divergent continue makes succeeding loop CF divergent: 970 * not all loop-active invocations participate in the remaining loop-body 971 * which means that a following break might be taken by some invocations, only */ 972 state->divergent_loop_cf |= state->divergent_loop_continue; 973 974 return progress; 975} 976 977static bool 978visit_loop(nir_loop *loop, struct divergence_state *state) 979{ 980 bool progress = false; 981 nir_block *loop_header = nir_loop_first_block(loop); 982 nir_block *loop_preheader = nir_block_cf_tree_prev(loop_header); 983 984 /* handle loop header phis first: we have no knowledge yet about 985 * the loop's control flow or any loop-carried sources. */ 986 nir_foreach_instr(instr, loop_header) { 987 if (instr->type != nir_instr_type_phi) 988 break; 989 990 nir_phi_instr *phi = nir_instr_as_phi(instr); 991 if (!state->first_visit && phi->dest.ssa.divergent) 992 continue; 993 994 nir_foreach_phi_src(src, phi) { 995 if (src->pred == loop_preheader) { 996 phi->dest.ssa.divergent = src->src.ssa->divergent; 997 break; 998 } 999 } 1000 progress |= phi->dest.ssa.divergent; 1001 } 1002 1003 /* setup loop state */ 1004 struct divergence_state loop_state = *state; 1005 loop_state.divergent_loop_cf = false; 1006 loop_state.divergent_loop_continue = false; 1007 loop_state.divergent_loop_break = false; 1008 1009 /* process loop body until no further changes are made */ 1010 bool repeat; 1011 do { 1012 progress |= visit_cf_list(&loop->body, &loop_state); 1013 repeat = false; 1014 1015 /* revisit loop header phis to see if something has changed */ 1016 nir_foreach_instr(instr, loop_header) { 1017 if (instr->type != nir_instr_type_phi) 1018 break; 1019 1020 repeat |= visit_loop_header_phi(nir_instr_as_phi(instr), 1021 loop_preheader, 1022 loop_state.divergent_loop_continue); 1023 } 1024 1025 loop_state.divergent_loop_cf = false; 1026 loop_state.first_visit = false; 1027 } while (repeat); 1028 1029 /* handle phis after the loop */ 1030 nir_foreach_instr(instr, nir_cf_node_cf_tree_next(&loop->cf_node)) { 1031 if (instr->type != nir_instr_type_phi) 1032 break; 1033 1034 if (state->first_visit) 1035 nir_instr_as_phi(instr)->dest.ssa.divergent = false; 1036 progress |= visit_loop_exit_phi(nir_instr_as_phi(instr), 1037 loop_state.divergent_loop_break); 1038 } 1039 1040 loop->divergent = (loop_state.divergent_loop_break || loop_state.divergent_loop_continue); 1041 1042 return progress; 1043} 1044 1045static bool 1046visit_cf_list(struct exec_list *list, struct divergence_state *state) 1047{ 1048 bool has_changed = false; 1049 1050 foreach_list_typed(nir_cf_node, node, node, list) { 1051 switch (node->type) { 1052 case nir_cf_node_block: 1053 has_changed |= visit_block(nir_cf_node_as_block(node), state); 1054 break; 1055 case nir_cf_node_if: 1056 has_changed |= visit_if(nir_cf_node_as_if(node), state); 1057 break; 1058 case nir_cf_node_loop: 1059 has_changed |= visit_loop(nir_cf_node_as_loop(node), state); 1060 break; 1061 case nir_cf_node_function: 1062 unreachable("NIR divergence analysis: Unsupported cf_node type."); 1063 } 1064 } 1065 1066 return has_changed; 1067} 1068 1069void 1070nir_divergence_analysis(nir_shader *shader) 1071{ 1072 shader->info.divergence_analysis_run = true; 1073 1074 struct divergence_state state = { 1075 .stage = shader->info.stage, 1076 .shader = shader, 1077 .divergent_loop_cf = false, 1078 .divergent_loop_continue = false, 1079 .divergent_loop_break = false, 1080 .first_visit = true, 1081 }; 1082 1083 visit_cf_list(&nir_shader_get_entrypoint(shader)->body, &state); 1084} 1085 1086bool nir_update_instr_divergence(nir_shader *shader, nir_instr *instr) 1087{ 1088 nir_foreach_ssa_def(instr, set_ssa_def_not_divergent, NULL); 1089 1090 if (instr->type == nir_instr_type_phi) { 1091 nir_cf_node *prev = nir_cf_node_prev(&instr->block->cf_node); 1092 /* can only update gamma/if phis */ 1093 if (!prev || prev->type != nir_cf_node_if) 1094 return false; 1095 1096 nir_if *nif = nir_cf_node_as_if(prev); 1097 1098 visit_if_merge_phi(nir_instr_as_phi(instr), nir_src_is_divergent(nif->condition)); 1099 return true; 1100 } 1101 1102 update_instr_divergence(shader, instr); 1103 return true; 1104} 1105 1106 1107bool 1108nir_has_divergent_loop(nir_shader *shader) 1109{ 1110 bool divergent_loop = false; 1111 nir_function_impl *func = nir_shader_get_entrypoint(shader); 1112 1113 foreach_list_typed(nir_cf_node, node, node, &func->body) { 1114 if (node->type == nir_cf_node_loop && nir_cf_node_as_loop(node)->divergent) { 1115 divergent_loop = true; 1116 break; 1117 } 1118 } 1119 1120 return divergent_loop; 1121} 1122