1/*
2 * Copyright © 2018 Valve Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25#include "nir.h"
26
27/* This pass computes for each ssa definition if it is uniform.
28 * That is, the variable has the same value for all invocations
29 * of the group.
30 *
31 * This divergence analysis pass expects the shader to be in LCSSA-form.
32 *
33 * This algorithm implements "The Simple Divergence Analysis" from
34 * Diogo Sampaio, Rafael De Souza, Sylvain Collange, Fernando Magno Quintão Pereira.
35 * Divergence Analysis.  ACM Transactions on Programming Languages and Systems (TOPLAS),
36 * ACM, 2013, 35 (4), pp.13:1-13:36. <10.1145/2523815>. <hal-00909072v2>
37 */
38
39struct divergence_state {
40   const gl_shader_stage stage;
41   nir_shader *shader;
42
43   /** current control flow state */
44   /* True if some loop-active invocations might take a different control-flow path.
45    * A divergent break does not cause subsequent control-flow to be considered
46    * divergent because those invocations are no longer active in the loop.
47    * For a divergent if, both sides are considered divergent flow because
48    * the other side is still loop-active. */
49   bool divergent_loop_cf;
50   /* True if a divergent continue happened since the loop header */
51   bool divergent_loop_continue;
52   /* True if a divergent break happened since the loop header */
53   bool divergent_loop_break;
54
55   /* True if we visit the block for the fist time */
56   bool first_visit;
57};
58
59static bool
60visit_cf_list(struct exec_list *list, struct divergence_state *state);
61
62static bool
63visit_alu(nir_alu_instr *instr)
64{
65   if (instr->dest.dest.ssa.divergent)
66      return false;
67
68   unsigned num_src = nir_op_infos[instr->op].num_inputs;
69
70   for (unsigned i = 0; i < num_src; i++) {
71      if (instr->src[i].src.ssa->divergent) {
72         instr->dest.dest.ssa.divergent = true;
73         return true;
74      }
75   }
76
77   return false;
78}
79
80static bool
81visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr)
82{
83   if (!nir_intrinsic_infos[instr->intrinsic].has_dest)
84      return false;
85
86   if (instr->dest.ssa.divergent)
87      return false;
88
89   nir_divergence_options options = shader->options->divergence_analysis_options;
90   gl_shader_stage stage = shader->info.stage;
91   bool is_divergent = false;
92   switch (instr->intrinsic) {
93   /* Intrinsics which are always uniform */
94   case nir_intrinsic_shader_clock:
95   case nir_intrinsic_ballot:
96   case nir_intrinsic_read_invocation:
97   case nir_intrinsic_read_first_invocation:
98   case nir_intrinsic_vote_any:
99   case nir_intrinsic_vote_all:
100   case nir_intrinsic_vote_feq:
101   case nir_intrinsic_vote_ieq:
102   case nir_intrinsic_load_push_constant:
103   case nir_intrinsic_load_work_dim:
104   case nir_intrinsic_load_num_workgroups:
105   case nir_intrinsic_load_workgroup_size:
106   case nir_intrinsic_load_subgroup_id:
107   case nir_intrinsic_load_num_subgroups:
108   case nir_intrinsic_load_ray_launch_size:
109   case nir_intrinsic_load_ray_launch_size_addr_amd:
110   case nir_intrinsic_load_sbt_base_amd:
111   case nir_intrinsic_load_subgroup_size:
112   case nir_intrinsic_load_subgroup_eq_mask:
113   case nir_intrinsic_load_subgroup_ge_mask:
114   case nir_intrinsic_load_subgroup_gt_mask:
115   case nir_intrinsic_load_subgroup_le_mask:
116   case nir_intrinsic_load_subgroup_lt_mask:
117   case nir_intrinsic_first_invocation:
118   case nir_intrinsic_last_invocation:
119   case nir_intrinsic_load_base_instance:
120   case nir_intrinsic_load_base_vertex:
121   case nir_intrinsic_load_first_vertex:
122   case nir_intrinsic_load_draw_id:
123   case nir_intrinsic_load_is_indexed_draw:
124   case nir_intrinsic_load_viewport_scale:
125   case nir_intrinsic_load_user_clip_plane:
126   case nir_intrinsic_load_viewport_x_scale:
127   case nir_intrinsic_load_viewport_y_scale:
128   case nir_intrinsic_load_viewport_z_scale:
129   case nir_intrinsic_load_viewport_offset:
130   case nir_intrinsic_load_viewport_x_offset:
131   case nir_intrinsic_load_viewport_y_offset:
132   case nir_intrinsic_load_viewport_z_offset:
133   case nir_intrinsic_load_blend_const_color_a_float:
134   case nir_intrinsic_load_blend_const_color_b_float:
135   case nir_intrinsic_load_blend_const_color_g_float:
136   case nir_intrinsic_load_blend_const_color_r_float:
137   case nir_intrinsic_load_blend_const_color_rgba:
138   case nir_intrinsic_load_blend_const_color_aaaa8888_unorm:
139   case nir_intrinsic_load_blend_const_color_rgba8888_unorm:
140   case nir_intrinsic_load_line_width:
141   case nir_intrinsic_load_aa_line_width:
142   case nir_intrinsic_load_xfb_address:
143   case nir_intrinsic_load_num_vertices:
144   case nir_intrinsic_load_fb_layers_v3d:
145   case nir_intrinsic_load_tcs_num_patches_amd:
146   case nir_intrinsic_load_ring_tess_factors_amd:
147   case nir_intrinsic_load_ring_tess_offchip_amd:
148   case nir_intrinsic_load_ring_tess_factors_offset_amd:
149   case nir_intrinsic_load_ring_tess_offchip_offset_amd:
150   case nir_intrinsic_load_ring_mesh_scratch_amd:
151   case nir_intrinsic_load_ring_mesh_scratch_offset_amd:
152   case nir_intrinsic_load_ring_esgs_amd:
153   case nir_intrinsic_load_ring_es2gs_offset_amd:
154   case nir_intrinsic_load_ring_task_draw_amd:
155   case nir_intrinsic_load_ring_task_payload_amd:
156   case nir_intrinsic_load_task_ring_entry_amd:
157   case nir_intrinsic_load_task_ib_addr:
158   case nir_intrinsic_load_task_ib_stride:
159   case nir_intrinsic_load_sample_positions_pan:
160   case nir_intrinsic_load_workgroup_num_input_vertices_amd:
161   case nir_intrinsic_load_workgroup_num_input_primitives_amd:
162   case nir_intrinsic_load_shader_query_enabled_amd:
163   case nir_intrinsic_load_cull_front_face_enabled_amd:
164   case nir_intrinsic_load_cull_back_face_enabled_amd:
165   case nir_intrinsic_load_cull_ccw_amd:
166   case nir_intrinsic_load_cull_small_primitives_enabled_amd:
167   case nir_intrinsic_load_cull_any_enabled_amd:
168   case nir_intrinsic_load_cull_small_prim_precision_amd:
169   case nir_intrinsic_load_user_data_amd:
170   case nir_intrinsic_load_force_vrs_rates_amd:
171   case nir_intrinsic_load_tess_level_inner_default:
172   case nir_intrinsic_load_tess_level_outer_default:
173   case nir_intrinsic_load_scalar_arg_amd:
174   case nir_intrinsic_load_smem_amd:
175   case nir_intrinsic_load_global_const_block_intel:
176   case nir_intrinsic_load_reloc_const_intel:
177   case nir_intrinsic_load_global_block_intel:
178   case nir_intrinsic_load_btd_global_arg_addr_intel:
179   case nir_intrinsic_load_btd_local_arg_addr_intel:
180   case nir_intrinsic_load_mesh_inline_data_intel:
181   case nir_intrinsic_load_ray_num_dss_rt_stacks_intel:
182   case nir_intrinsic_load_lshs_vertex_stride_amd:
183   case nir_intrinsic_load_hs_out_patch_data_offset_amd:
184      is_divergent = false;
185      break;
186
187   /* Intrinsics with divergence depending on shader stage and hardware */
188   case nir_intrinsic_load_frag_shading_rate:
189      is_divergent = !(options & nir_divergence_single_frag_shading_rate_per_subgroup);
190      break;
191   case nir_intrinsic_load_input:
192      is_divergent = instr->src[0].ssa->divergent;
193      if (stage == MESA_SHADER_FRAGMENT)
194         is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
195      else if (stage == MESA_SHADER_TESS_EVAL)
196         is_divergent |= !(options & nir_divergence_single_patch_per_tes_subgroup);
197      else if (stage != MESA_SHADER_MESH)
198         is_divergent = true;
199      break;
200   case nir_intrinsic_load_per_vertex_input:
201      is_divergent = instr->src[0].ssa->divergent ||
202                     instr->src[1].ssa->divergent;
203      if (stage == MESA_SHADER_TESS_CTRL)
204         is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup);
205      if (stage == MESA_SHADER_TESS_EVAL)
206         is_divergent |= !(options & nir_divergence_single_patch_per_tes_subgroup);
207      else
208         is_divergent = true;
209      break;
210   case nir_intrinsic_load_input_vertex:
211      is_divergent = instr->src[1].ssa->divergent;
212      assert(stage == MESA_SHADER_FRAGMENT);
213      is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
214      break;
215   case nir_intrinsic_load_output:
216      is_divergent = instr->src[0].ssa->divergent;
217      switch (stage) {
218      case MESA_SHADER_TESS_CTRL:
219         is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup);
220         break;
221      case MESA_SHADER_FRAGMENT:
222         is_divergent = true;
223         break;
224      case MESA_SHADER_TASK:
225      case MESA_SHADER_MESH:
226         /* Divergent if src[0] is, so nothing else to do. */
227         break;
228      default:
229         unreachable("Invalid stage for load_output");
230      }
231      break;
232   case nir_intrinsic_load_per_vertex_output:
233      assert(stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_MESH);
234      is_divergent = instr->src[0].ssa->divergent ||
235                     instr->src[1].ssa->divergent ||
236                     (stage == MESA_SHADER_TESS_CTRL &&
237                      !(options & nir_divergence_single_patch_per_tcs_subgroup));
238      break;
239   case nir_intrinsic_load_per_primitive_output:
240      assert(stage == MESA_SHADER_MESH);
241      is_divergent = instr->src[0].ssa->divergent ||
242                     instr->src[1].ssa->divergent;
243      break;
244   case nir_intrinsic_load_layer_id:
245   case nir_intrinsic_load_front_face:
246      assert(stage == MESA_SHADER_FRAGMENT);
247      is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
248      break;
249   case nir_intrinsic_load_view_index:
250      assert(stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_KERNEL);
251      if (options & nir_divergence_view_index_uniform)
252         is_divergent = false;
253      else if (stage == MESA_SHADER_FRAGMENT)
254         is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
255      break;
256   case nir_intrinsic_load_fs_input_interp_deltas:
257      assert(stage == MESA_SHADER_FRAGMENT);
258      is_divergent = instr->src[0].ssa->divergent;
259      is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
260      break;
261   case nir_intrinsic_load_primitive_id:
262      if (stage == MESA_SHADER_FRAGMENT)
263         is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
264      else if (stage == MESA_SHADER_TESS_CTRL)
265         is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup);
266      else if (stage == MESA_SHADER_TESS_EVAL)
267         is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
268      else if (stage == MESA_SHADER_GEOMETRY || stage == MESA_SHADER_VERTEX)
269         is_divergent = true;
270      else
271         unreachable("Invalid stage for load_primitive_id");
272      break;
273   case nir_intrinsic_load_tess_level_inner:
274   case nir_intrinsic_load_tess_level_outer:
275      if (stage == MESA_SHADER_TESS_CTRL)
276         is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup);
277      else if (stage == MESA_SHADER_TESS_EVAL)
278         is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
279      else
280         unreachable("Invalid stage for load_primitive_tess_level_*");
281      break;
282   case nir_intrinsic_load_patch_vertices_in:
283      if (stage == MESA_SHADER_TESS_EVAL)
284         is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
285      else
286         assert(stage == MESA_SHADER_TESS_CTRL);
287      break;
288
289   case nir_intrinsic_load_workgroup_index:
290   case nir_intrinsic_load_workgroup_id:
291      assert(gl_shader_stage_uses_workgroup(stage));
292      if (stage == MESA_SHADER_COMPUTE)
293         is_divergent |= (options & nir_divergence_multiple_workgroup_per_compute_subgroup);
294      break;
295
296   /* Clustered reductions are uniform if cluster_size == subgroup_size or
297    * the source is uniform and the operation is invariant.
298    * Inclusive scans are uniform if
299    * the source is uniform and the operation is invariant
300    */
301   case nir_intrinsic_reduce:
302      if (nir_intrinsic_cluster_size(instr) == 0)
303         return false;
304      FALLTHROUGH;
305   case nir_intrinsic_inclusive_scan: {
306      nir_op op = nir_intrinsic_reduction_op(instr);
307      is_divergent = instr->src[0].ssa->divergent;
308      if (op != nir_op_umin && op != nir_op_imin && op != nir_op_fmin &&
309          op != nir_op_umax && op != nir_op_imax && op != nir_op_fmax &&
310          op != nir_op_iand && op != nir_op_ior)
311         is_divergent = true;
312      break;
313   }
314
315   case nir_intrinsic_load_ubo:
316   case nir_intrinsic_load_ssbo:
317      is_divergent = (instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) ||
318                     instr->src[1].ssa->divergent;
319      break;
320
321   case nir_intrinsic_get_ssbo_size:
322   case nir_intrinsic_deref_buffer_array_length:
323      is_divergent = instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM);
324      break;
325
326   case nir_intrinsic_image_load:
327   case nir_intrinsic_image_deref_load:
328   case nir_intrinsic_bindless_image_load:
329   case nir_intrinsic_image_sparse_load:
330   case nir_intrinsic_image_deref_sparse_load:
331   case nir_intrinsic_bindless_image_sparse_load:
332      is_divergent = (instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) ||
333                     instr->src[1].ssa->divergent || instr->src[2].ssa->divergent || instr->src[3].ssa->divergent;
334      break;
335
336
337   /* Intrinsics with divergence depending on sources */
338   case nir_intrinsic_ballot_bitfield_extract:
339   case nir_intrinsic_ballot_find_lsb:
340   case nir_intrinsic_ballot_find_msb:
341   case nir_intrinsic_ballot_bit_count_reduce:
342   case nir_intrinsic_shuffle_xor:
343   case nir_intrinsic_shuffle_up:
344   case nir_intrinsic_shuffle_down:
345   case nir_intrinsic_quad_broadcast:
346   case nir_intrinsic_quad_swap_horizontal:
347   case nir_intrinsic_quad_swap_vertical:
348   case nir_intrinsic_quad_swap_diagonal:
349   case nir_intrinsic_byte_permute_amd:
350   case nir_intrinsic_load_deref:
351   case nir_intrinsic_load_shared:
352   case nir_intrinsic_load_shared2_amd:
353   case nir_intrinsic_load_global:
354   case nir_intrinsic_load_global_2x32:
355   case nir_intrinsic_load_global_constant:
356   case nir_intrinsic_load_global_amd:
357   case nir_intrinsic_load_uniform:
358   case nir_intrinsic_load_constant:
359   case nir_intrinsic_load_sample_pos_from_id:
360   case nir_intrinsic_load_kernel_input:
361   case nir_intrinsic_load_task_payload:
362   case nir_intrinsic_load_buffer_amd:
363   case nir_intrinsic_image_samples:
364   case nir_intrinsic_image_deref_samples:
365   case nir_intrinsic_bindless_image_samples:
366   case nir_intrinsic_image_size:
367   case nir_intrinsic_image_deref_size:
368   case nir_intrinsic_bindless_image_size:
369   case nir_intrinsic_copy_deref:
370   case nir_intrinsic_vulkan_resource_index:
371   case nir_intrinsic_vulkan_resource_reindex:
372   case nir_intrinsic_load_vulkan_descriptor:
373   case nir_intrinsic_atomic_counter_read:
374   case nir_intrinsic_atomic_counter_read_deref:
375   case nir_intrinsic_quad_swizzle_amd:
376   case nir_intrinsic_masked_swizzle_amd:
377   case nir_intrinsic_is_sparse_texels_resident:
378   case nir_intrinsic_sparse_residency_code_and:
379   case nir_intrinsic_bvh64_intersect_ray_amd:
380   case nir_intrinsic_image_deref_load_param_intel:
381   case nir_intrinsic_image_load_raw_intel:
382   case nir_intrinsic_get_ubo_size:
383   case nir_intrinsic_load_ssbo_address:
384   case nir_intrinsic_load_desc_set_address_intel: {
385      unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
386      for (unsigned i = 0; i < num_srcs; i++) {
387         if (instr->src[i].ssa->divergent) {
388            is_divergent = true;
389            break;
390         }
391      }
392      break;
393   }
394
395   case nir_intrinsic_shuffle:
396      is_divergent = instr->src[0].ssa->divergent &&
397                     instr->src[1].ssa->divergent;
398      break;
399
400   /* Intrinsics which are always divergent */
401   case nir_intrinsic_load_color0:
402   case nir_intrinsic_load_color1:
403   case nir_intrinsic_load_param:
404   case nir_intrinsic_load_sample_id:
405   case nir_intrinsic_load_sample_id_no_per_sample:
406   case nir_intrinsic_load_sample_mask_in:
407   case nir_intrinsic_load_interpolated_input:
408   case nir_intrinsic_load_point_coord_maybe_flipped:
409   case nir_intrinsic_load_barycentric_pixel:
410   case nir_intrinsic_load_barycentric_centroid:
411   case nir_intrinsic_load_barycentric_sample:
412   case nir_intrinsic_load_barycentric_model:
413   case nir_intrinsic_load_barycentric_at_sample:
414   case nir_intrinsic_load_barycentric_at_offset:
415   case nir_intrinsic_interp_deref_at_offset:
416   case nir_intrinsic_interp_deref_at_sample:
417   case nir_intrinsic_interp_deref_at_centroid:
418   case nir_intrinsic_interp_deref_at_vertex:
419   case nir_intrinsic_load_tess_coord:
420   case nir_intrinsic_load_point_coord:
421   case nir_intrinsic_load_line_coord:
422   case nir_intrinsic_load_frag_coord:
423   case nir_intrinsic_load_sample_pos:
424   case nir_intrinsic_load_sample_pos_or_center:
425   case nir_intrinsic_load_vertex_id_zero_base:
426   case nir_intrinsic_load_vertex_id:
427   case nir_intrinsic_load_instance_id:
428   case nir_intrinsic_load_invocation_id:
429   case nir_intrinsic_load_local_invocation_id:
430   case nir_intrinsic_load_local_invocation_index:
431   case nir_intrinsic_load_global_invocation_id:
432   case nir_intrinsic_load_global_invocation_id_zero_base:
433   case nir_intrinsic_load_global_invocation_index:
434   case nir_intrinsic_load_subgroup_invocation:
435   case nir_intrinsic_load_helper_invocation:
436   case nir_intrinsic_is_helper_invocation:
437   case nir_intrinsic_load_scratch:
438   case nir_intrinsic_deref_atomic_add:
439   case nir_intrinsic_deref_atomic_imin:
440   case nir_intrinsic_deref_atomic_umin:
441   case nir_intrinsic_deref_atomic_imax:
442   case nir_intrinsic_deref_atomic_umax:
443   case nir_intrinsic_deref_atomic_and:
444   case nir_intrinsic_deref_atomic_or:
445   case nir_intrinsic_deref_atomic_xor:
446   case nir_intrinsic_deref_atomic_exchange:
447   case nir_intrinsic_deref_atomic_comp_swap:
448   case nir_intrinsic_deref_atomic_fadd:
449   case nir_intrinsic_deref_atomic_fmin:
450   case nir_intrinsic_deref_atomic_fmax:
451   case nir_intrinsic_deref_atomic_fcomp_swap:
452   case nir_intrinsic_ssbo_atomic_add:
453   case nir_intrinsic_ssbo_atomic_imin:
454   case nir_intrinsic_ssbo_atomic_umin:
455   case nir_intrinsic_ssbo_atomic_imax:
456   case nir_intrinsic_ssbo_atomic_umax:
457   case nir_intrinsic_ssbo_atomic_and:
458   case nir_intrinsic_ssbo_atomic_or:
459   case nir_intrinsic_ssbo_atomic_xor:
460   case nir_intrinsic_ssbo_atomic_exchange:
461   case nir_intrinsic_ssbo_atomic_comp_swap:
462   case nir_intrinsic_ssbo_atomic_fadd:
463   case nir_intrinsic_ssbo_atomic_fmax:
464   case nir_intrinsic_ssbo_atomic_fmin:
465   case nir_intrinsic_ssbo_atomic_fcomp_swap:
466   case nir_intrinsic_image_deref_atomic_add:
467   case nir_intrinsic_image_deref_atomic_imin:
468   case nir_intrinsic_image_deref_atomic_umin:
469   case nir_intrinsic_image_deref_atomic_imax:
470   case nir_intrinsic_image_deref_atomic_umax:
471   case nir_intrinsic_image_deref_atomic_and:
472   case nir_intrinsic_image_deref_atomic_or:
473   case nir_intrinsic_image_deref_atomic_xor:
474   case nir_intrinsic_image_deref_atomic_exchange:
475   case nir_intrinsic_image_deref_atomic_comp_swap:
476   case nir_intrinsic_image_deref_atomic_fadd:
477   case nir_intrinsic_image_deref_atomic_fmin:
478   case nir_intrinsic_image_deref_atomic_fmax:
479   case nir_intrinsic_image_deref_atomic_inc_wrap:
480   case nir_intrinsic_image_deref_atomic_dec_wrap:
481   case nir_intrinsic_image_atomic_add:
482   case nir_intrinsic_image_atomic_imin:
483   case nir_intrinsic_image_atomic_umin:
484   case nir_intrinsic_image_atomic_imax:
485   case nir_intrinsic_image_atomic_umax:
486   case nir_intrinsic_image_atomic_and:
487   case nir_intrinsic_image_atomic_or:
488   case nir_intrinsic_image_atomic_xor:
489   case nir_intrinsic_image_atomic_exchange:
490   case nir_intrinsic_image_atomic_comp_swap:
491   case nir_intrinsic_image_atomic_fadd:
492   case nir_intrinsic_image_atomic_fmin:
493   case nir_intrinsic_image_atomic_fmax:
494   case nir_intrinsic_bindless_image_atomic_add:
495   case nir_intrinsic_bindless_image_atomic_imin:
496   case nir_intrinsic_bindless_image_atomic_umin:
497   case nir_intrinsic_bindless_image_atomic_imax:
498   case nir_intrinsic_bindless_image_atomic_umax:
499   case nir_intrinsic_bindless_image_atomic_and:
500   case nir_intrinsic_bindless_image_atomic_or:
501   case nir_intrinsic_bindless_image_atomic_xor:
502   case nir_intrinsic_bindless_image_atomic_exchange:
503   case nir_intrinsic_bindless_image_atomic_comp_swap:
504   case nir_intrinsic_bindless_image_atomic_fadd:
505   case nir_intrinsic_bindless_image_atomic_fmin:
506   case nir_intrinsic_bindless_image_atomic_fmax:
507   case nir_intrinsic_shared_atomic_add:
508   case nir_intrinsic_shared_atomic_imin:
509   case nir_intrinsic_shared_atomic_umin:
510   case nir_intrinsic_shared_atomic_imax:
511   case nir_intrinsic_shared_atomic_umax:
512   case nir_intrinsic_shared_atomic_and:
513   case nir_intrinsic_shared_atomic_or:
514   case nir_intrinsic_shared_atomic_xor:
515   case nir_intrinsic_shared_atomic_exchange:
516   case nir_intrinsic_shared_atomic_comp_swap:
517   case nir_intrinsic_shared_atomic_fadd:
518   case nir_intrinsic_shared_atomic_fmin:
519   case nir_intrinsic_shared_atomic_fmax:
520   case nir_intrinsic_shared_atomic_fcomp_swap:
521   case nir_intrinsic_task_payload_atomic_add:
522   case nir_intrinsic_task_payload_atomic_imin:
523   case nir_intrinsic_task_payload_atomic_umin:
524   case nir_intrinsic_task_payload_atomic_imax:
525   case nir_intrinsic_task_payload_atomic_umax:
526   case nir_intrinsic_task_payload_atomic_and:
527   case nir_intrinsic_task_payload_atomic_or:
528   case nir_intrinsic_task_payload_atomic_xor:
529   case nir_intrinsic_task_payload_atomic_exchange:
530   case nir_intrinsic_task_payload_atomic_comp_swap:
531   case nir_intrinsic_task_payload_atomic_fadd:
532   case nir_intrinsic_task_payload_atomic_fmin:
533   case nir_intrinsic_task_payload_atomic_fmax:
534   case nir_intrinsic_task_payload_atomic_fcomp_swap:
535   case nir_intrinsic_global_atomic_add:
536   case nir_intrinsic_global_atomic_imin:
537   case nir_intrinsic_global_atomic_umin:
538   case nir_intrinsic_global_atomic_imax:
539   case nir_intrinsic_global_atomic_umax:
540   case nir_intrinsic_global_atomic_and:
541   case nir_intrinsic_global_atomic_or:
542   case nir_intrinsic_global_atomic_xor:
543   case nir_intrinsic_global_atomic_exchange:
544   case nir_intrinsic_global_atomic_comp_swap:
545   case nir_intrinsic_global_atomic_fadd:
546   case nir_intrinsic_global_atomic_fmin:
547   case nir_intrinsic_global_atomic_fmax:
548   case nir_intrinsic_global_atomic_fcomp_swap:
549   case nir_intrinsic_global_atomic_add_amd:
550   case nir_intrinsic_global_atomic_imin_amd:
551   case nir_intrinsic_global_atomic_umin_amd:
552   case nir_intrinsic_global_atomic_imax_amd:
553   case nir_intrinsic_global_atomic_umax_amd:
554   case nir_intrinsic_global_atomic_and_amd:
555   case nir_intrinsic_global_atomic_or_amd:
556   case nir_intrinsic_global_atomic_xor_amd:
557   case nir_intrinsic_global_atomic_exchange_amd:
558   case nir_intrinsic_global_atomic_comp_swap_amd:
559   case nir_intrinsic_global_atomic_fadd_amd:
560   case nir_intrinsic_global_atomic_fmin_amd:
561   case nir_intrinsic_global_atomic_fmax_amd:
562   case nir_intrinsic_global_atomic_fcomp_swap_amd:
563   case nir_intrinsic_global_atomic_add_2x32:
564   case nir_intrinsic_global_atomic_imin_2x32:
565   case nir_intrinsic_global_atomic_umin_2x32:
566   case nir_intrinsic_global_atomic_imax_2x32:
567   case nir_intrinsic_global_atomic_umax_2x32:
568   case nir_intrinsic_global_atomic_and_2x32:
569   case nir_intrinsic_global_atomic_or_2x32:
570   case nir_intrinsic_global_atomic_xor_2x32:
571   case nir_intrinsic_global_atomic_exchange_2x32:
572   case nir_intrinsic_global_atomic_comp_swap_2x32:
573   case nir_intrinsic_global_atomic_fadd_2x32:
574   case nir_intrinsic_global_atomic_fmin_2x32:
575   case nir_intrinsic_global_atomic_fmax_2x32:
576   case nir_intrinsic_global_atomic_fcomp_swap_2x32:
577   case nir_intrinsic_atomic_counter_add:
578   case nir_intrinsic_atomic_counter_min:
579   case nir_intrinsic_atomic_counter_max:
580   case nir_intrinsic_atomic_counter_and:
581   case nir_intrinsic_atomic_counter_or:
582   case nir_intrinsic_atomic_counter_xor:
583   case nir_intrinsic_atomic_counter_inc:
584   case nir_intrinsic_atomic_counter_pre_dec:
585   case nir_intrinsic_atomic_counter_post_dec:
586   case nir_intrinsic_atomic_counter_exchange:
587   case nir_intrinsic_atomic_counter_comp_swap:
588   case nir_intrinsic_atomic_counter_add_deref:
589   case nir_intrinsic_atomic_counter_min_deref:
590   case nir_intrinsic_atomic_counter_max_deref:
591   case nir_intrinsic_atomic_counter_and_deref:
592   case nir_intrinsic_atomic_counter_or_deref:
593   case nir_intrinsic_atomic_counter_xor_deref:
594   case nir_intrinsic_atomic_counter_inc_deref:
595   case nir_intrinsic_atomic_counter_pre_dec_deref:
596   case nir_intrinsic_atomic_counter_post_dec_deref:
597   case nir_intrinsic_atomic_counter_exchange_deref:
598   case nir_intrinsic_atomic_counter_comp_swap_deref:
599   case nir_intrinsic_exclusive_scan:
600   case nir_intrinsic_ballot_bit_count_exclusive:
601   case nir_intrinsic_ballot_bit_count_inclusive:
602   case nir_intrinsic_write_invocation_amd:
603   case nir_intrinsic_mbcnt_amd:
604   case nir_intrinsic_lane_permute_16_amd:
605   case nir_intrinsic_elect:
606   case nir_intrinsic_load_tlb_color_v3d:
607   case nir_intrinsic_load_tess_rel_patch_id_amd:
608   case nir_intrinsic_load_gs_vertex_offset_amd:
609   case nir_intrinsic_has_input_vertex_amd:
610   case nir_intrinsic_has_input_primitive_amd:
611   case nir_intrinsic_load_packed_passthrough_primitive_amd:
612   case nir_intrinsic_load_initial_edgeflags_amd:
613   case nir_intrinsic_gds_atomic_add_amd:
614   case nir_intrinsic_load_rt_arg_scratch_offset_amd:
615   case nir_intrinsic_load_intersection_opaque_amd:
616   case nir_intrinsic_load_vector_arg_amd:
617   case nir_intrinsic_load_btd_stack_id_intel:
618   case nir_intrinsic_load_topology_id_intel:
619   case nir_intrinsic_load_scratch_base_ptr:
620      is_divergent = true;
621      break;
622
623   default:
624#ifdef NDEBUG
625      is_divergent = true;
626      break;
627#else
628      nir_print_instr(&instr->instr, stderr);
629      unreachable("\nNIR divergence analysis: Unhandled intrinsic.");
630#endif
631   }
632
633   instr->dest.ssa.divergent = is_divergent;
634   return is_divergent;
635}
636
637static bool
638visit_tex(nir_tex_instr *instr)
639{
640   if (instr->dest.ssa.divergent)
641      return false;
642
643   bool is_divergent = false;
644
645   for (unsigned i = 0; i < instr->num_srcs; i++) {
646      switch (instr->src[i].src_type) {
647      case nir_tex_src_sampler_deref:
648      case nir_tex_src_sampler_handle:
649      case nir_tex_src_sampler_offset:
650         is_divergent |= instr->src[i].src.ssa->divergent &&
651                         instr->sampler_non_uniform;
652         break;
653      case nir_tex_src_texture_deref:
654      case nir_tex_src_texture_handle:
655      case nir_tex_src_texture_offset:
656         is_divergent |= instr->src[i].src.ssa->divergent &&
657                         instr->texture_non_uniform;
658         break;
659      default:
660         is_divergent |= instr->src[i].src.ssa->divergent;
661         break;
662      }
663   }
664
665   instr->dest.ssa.divergent = is_divergent;
666   return is_divergent;
667}
668
669static bool
670visit_load_const(nir_load_const_instr *instr)
671{
672   return false;
673}
674
675static bool
676visit_ssa_undef(nir_ssa_undef_instr *instr)
677{
678   return false;
679}
680
681static bool
682nir_variable_mode_is_uniform(nir_variable_mode mode) {
683   switch (mode) {
684   case nir_var_uniform:
685   case nir_var_mem_ubo:
686   case nir_var_mem_ssbo:
687   case nir_var_mem_shared:
688   case nir_var_mem_task_payload:
689   case nir_var_mem_global:
690   case nir_var_image:
691      return true;
692   default:
693      return false;
694   }
695}
696
697static bool
698nir_variable_is_uniform(nir_shader *shader, nir_variable *var)
699{
700   if (nir_variable_mode_is_uniform(var->data.mode))
701      return true;
702
703   nir_divergence_options options = shader->options->divergence_analysis_options;
704   gl_shader_stage stage = shader->info.stage;
705
706   if (stage == MESA_SHADER_FRAGMENT &&
707       (options & nir_divergence_single_prim_per_subgroup) &&
708       var->data.mode == nir_var_shader_in &&
709       var->data.interpolation == INTERP_MODE_FLAT)
710      return true;
711
712   if (stage == MESA_SHADER_TESS_CTRL &&
713       (options & nir_divergence_single_patch_per_tcs_subgroup) &&
714       var->data.mode == nir_var_shader_out && var->data.patch)
715      return true;
716
717   if (stage == MESA_SHADER_TESS_EVAL &&
718       (options & nir_divergence_single_patch_per_tes_subgroup) &&
719       var->data.mode == nir_var_shader_in && var->data.patch)
720      return true;
721
722   return false;
723}
724
725static bool
726visit_deref(nir_shader *shader, nir_deref_instr *deref)
727{
728   if (deref->dest.ssa.divergent)
729      return false;
730
731   bool is_divergent = false;
732   switch (deref->deref_type) {
733   case nir_deref_type_var:
734      is_divergent = !nir_variable_is_uniform(shader, deref->var);
735      break;
736   case nir_deref_type_array:
737   case nir_deref_type_ptr_as_array:
738      is_divergent = deref->arr.index.ssa->divergent;
739      FALLTHROUGH;
740   case nir_deref_type_struct:
741   case nir_deref_type_array_wildcard:
742      is_divergent |= deref->parent.ssa->divergent;
743      break;
744   case nir_deref_type_cast:
745      is_divergent = !nir_variable_mode_is_uniform(deref->var->data.mode) ||
746                     deref->parent.ssa->divergent;
747      break;
748   }
749
750   deref->dest.ssa.divergent = is_divergent;
751   return is_divergent;
752}
753
754static bool
755visit_jump(nir_jump_instr *jump, struct divergence_state *state)
756{
757   switch (jump->type) {
758   case nir_jump_continue:
759      if (state->divergent_loop_continue)
760         return false;
761      if (state->divergent_loop_cf)
762         state->divergent_loop_continue = true;
763      return state->divergent_loop_continue;
764   case nir_jump_break:
765      if (state->divergent_loop_break)
766         return false;
767      if (state->divergent_loop_cf)
768         state->divergent_loop_break = true;
769      return state->divergent_loop_break;
770   case nir_jump_halt:
771      /* This totally kills invocations so it doesn't add divergence */
772      break;
773   case nir_jump_return:
774      unreachable("NIR divergence analysis: Unsupported return instruction.");
775      break;
776   case nir_jump_goto:
777   case nir_jump_goto_if:
778      unreachable("NIR divergence analysis: Unsupported goto_if instruction.");
779      break;
780   }
781   return false;
782}
783
784static bool
785set_ssa_def_not_divergent(nir_ssa_def *def, UNUSED void *_state)
786{
787   def->divergent = false;
788   return true;
789}
790
791static bool
792update_instr_divergence(nir_shader *shader, nir_instr *instr)
793{
794   switch (instr->type) {
795   case nir_instr_type_alu:
796      return visit_alu(nir_instr_as_alu(instr));
797   case nir_instr_type_intrinsic:
798      return visit_intrinsic(shader, nir_instr_as_intrinsic(instr));
799   case nir_instr_type_tex:
800      return visit_tex(nir_instr_as_tex(instr));
801   case nir_instr_type_load_const:
802      return visit_load_const(nir_instr_as_load_const(instr));
803   case nir_instr_type_ssa_undef:
804      return visit_ssa_undef(nir_instr_as_ssa_undef(instr));
805   case nir_instr_type_deref:
806      return visit_deref(shader, nir_instr_as_deref(instr));
807   case nir_instr_type_jump:
808   case nir_instr_type_phi:
809   case nir_instr_type_call:
810   case nir_instr_type_parallel_copy:
811   default:
812      unreachable("NIR divergence analysis: Unsupported instruction type.");
813   }
814}
815
816static bool
817visit_block(nir_block *block, struct divergence_state *state)
818{
819   bool has_changed = false;
820
821   nir_foreach_instr(instr, block) {
822      /* phis are handled when processing the branches */
823      if (instr->type == nir_instr_type_phi)
824         continue;
825
826      if (state->first_visit)
827         nir_foreach_ssa_def(instr, set_ssa_def_not_divergent, NULL);
828
829      if (instr->type == nir_instr_type_jump)
830         has_changed |= visit_jump(nir_instr_as_jump(instr), state);
831      else
832         has_changed |= update_instr_divergence(state->shader, instr);
833   }
834
835   return has_changed;
836}
837
838/* There are 3 types of phi instructions:
839 * (1) gamma: represent the joining point of different paths
840 *     created by an “if-then-else” branch.
841 *     The resulting value is divergent if the branch condition
842 *     or any of the source values is divergent. */
843static bool
844visit_if_merge_phi(nir_phi_instr *phi, bool if_cond_divergent)
845{
846   if (phi->dest.ssa.divergent)
847      return false;
848
849   unsigned defined_srcs = 0;
850   nir_foreach_phi_src(src, phi) {
851      /* if any source value is divergent, the resulting value is divergent */
852      if (src->src.ssa->divergent) {
853         phi->dest.ssa.divergent = true;
854         return true;
855      }
856      if (src->src.ssa->parent_instr->type != nir_instr_type_ssa_undef) {
857         defined_srcs++;
858      }
859   }
860
861   /* if the condition is divergent and two sources defined, the definition is divergent */
862   if (defined_srcs > 1 && if_cond_divergent) {
863      phi->dest.ssa.divergent = true;
864      return true;
865   }
866
867   return false;
868}
869
870/* There are 3 types of phi instructions:
871 * (2) mu: which only exist at loop headers,
872 *     merge initial and loop-carried values.
873 *     The resulting value is divergent if any source value
874 *     is divergent or a divergent loop continue condition
875 *     is associated with a different ssa-def. */
876static bool
877visit_loop_header_phi(nir_phi_instr *phi, nir_block *preheader, bool divergent_continue)
878{
879   if (phi->dest.ssa.divergent)
880      return false;
881
882   nir_ssa_def* same = NULL;
883   nir_foreach_phi_src(src, phi) {
884      /* if any source value is divergent, the resulting value is divergent */
885      if (src->src.ssa->divergent) {
886         phi->dest.ssa.divergent = true;
887         return true;
888      }
889      /* if this loop is uniform, we're done here */
890      if (!divergent_continue)
891         continue;
892      /* skip the loop preheader */
893      if (src->pred == preheader)
894         continue;
895      /* skip undef values */
896      if (nir_src_is_undef(src->src))
897         continue;
898
899      /* check if all loop-carried values are from the same ssa-def */
900      if (!same)
901         same = src->src.ssa;
902      else if (same != src->src.ssa) {
903         phi->dest.ssa.divergent = true;
904         return true;
905      }
906   }
907
908   return false;
909}
910
911/* There are 3 types of phi instructions:
912 * (3) eta: represent values that leave a loop.
913 *     The resulting value is divergent if the source value is divergent
914 *     or any loop exit condition is divergent for a value which is
915 *     not loop-invariant.
916 *     (note: there should be no phi for loop-invariant variables.) */
917static bool
918visit_loop_exit_phi(nir_phi_instr *phi, bool divergent_break)
919{
920   if (phi->dest.ssa.divergent)
921      return false;
922
923   if (divergent_break) {
924      phi->dest.ssa.divergent = true;
925      return true;
926   }
927
928   /* if any source value is divergent, the resulting value is divergent */
929   nir_foreach_phi_src(src, phi) {
930      if (src->src.ssa->divergent) {
931         phi->dest.ssa.divergent = true;
932         return true;
933      }
934   }
935
936   return false;
937}
938
939static bool
940visit_if(nir_if *if_stmt, struct divergence_state *state)
941{
942   bool progress = false;
943
944   struct divergence_state then_state = *state;
945   then_state.divergent_loop_cf |= if_stmt->condition.ssa->divergent;
946   progress |= visit_cf_list(&if_stmt->then_list, &then_state);
947
948   struct divergence_state else_state = *state;
949   else_state.divergent_loop_cf |= if_stmt->condition.ssa->divergent;
950   progress |= visit_cf_list(&if_stmt->else_list, &else_state);
951
952   /* handle phis after the IF */
953   nir_foreach_instr(instr, nir_cf_node_cf_tree_next(&if_stmt->cf_node)) {
954      if (instr->type != nir_instr_type_phi)
955         break;
956
957      if (state->first_visit)
958         nir_instr_as_phi(instr)->dest.ssa.divergent = false;
959      progress |= visit_if_merge_phi(nir_instr_as_phi(instr),
960                                     if_stmt->condition.ssa->divergent);
961   }
962
963   /* join loop divergence information from both branch legs */
964   state->divergent_loop_continue |= then_state.divergent_loop_continue ||
965                                     else_state.divergent_loop_continue;
966   state->divergent_loop_break |= then_state.divergent_loop_break ||
967                                  else_state.divergent_loop_break;
968
969   /* A divergent continue makes succeeding loop CF divergent:
970    * not all loop-active invocations participate in the remaining loop-body
971    * which means that a following break might be taken by some invocations, only */
972   state->divergent_loop_cf |= state->divergent_loop_continue;
973
974   return progress;
975}
976
977static bool
978visit_loop(nir_loop *loop, struct divergence_state *state)
979{
980   bool progress = false;
981   nir_block *loop_header = nir_loop_first_block(loop);
982   nir_block *loop_preheader = nir_block_cf_tree_prev(loop_header);
983
984   /* handle loop header phis first: we have no knowledge yet about
985    * the loop's control flow or any loop-carried sources. */
986   nir_foreach_instr(instr, loop_header) {
987      if (instr->type != nir_instr_type_phi)
988         break;
989
990      nir_phi_instr *phi = nir_instr_as_phi(instr);
991      if (!state->first_visit && phi->dest.ssa.divergent)
992         continue;
993
994      nir_foreach_phi_src(src, phi) {
995         if (src->pred == loop_preheader) {
996            phi->dest.ssa.divergent = src->src.ssa->divergent;
997            break;
998         }
999      }
1000      progress |= phi->dest.ssa.divergent;
1001   }
1002
1003   /* setup loop state */
1004   struct divergence_state loop_state = *state;
1005   loop_state.divergent_loop_cf = false;
1006   loop_state.divergent_loop_continue = false;
1007   loop_state.divergent_loop_break = false;
1008
1009   /* process loop body until no further changes are made */
1010   bool repeat;
1011   do {
1012      progress |= visit_cf_list(&loop->body, &loop_state);
1013      repeat = false;
1014
1015      /* revisit loop header phis to see if something has changed */
1016      nir_foreach_instr(instr, loop_header) {
1017         if (instr->type != nir_instr_type_phi)
1018            break;
1019
1020         repeat |= visit_loop_header_phi(nir_instr_as_phi(instr),
1021                                         loop_preheader,
1022                                         loop_state.divergent_loop_continue);
1023      }
1024
1025      loop_state.divergent_loop_cf = false;
1026      loop_state.first_visit = false;
1027   } while (repeat);
1028
1029   /* handle phis after the loop */
1030   nir_foreach_instr(instr, nir_cf_node_cf_tree_next(&loop->cf_node)) {
1031      if (instr->type != nir_instr_type_phi)
1032         break;
1033
1034      if (state->first_visit)
1035         nir_instr_as_phi(instr)->dest.ssa.divergent = false;
1036      progress |= visit_loop_exit_phi(nir_instr_as_phi(instr),
1037                                      loop_state.divergent_loop_break);
1038   }
1039
1040   loop->divergent = (loop_state.divergent_loop_break || loop_state.divergent_loop_continue);
1041
1042   return progress;
1043}
1044
1045static bool
1046visit_cf_list(struct exec_list *list, struct divergence_state *state)
1047{
1048   bool has_changed = false;
1049
1050   foreach_list_typed(nir_cf_node, node, node, list) {
1051      switch (node->type) {
1052      case nir_cf_node_block:
1053         has_changed |= visit_block(nir_cf_node_as_block(node), state);
1054         break;
1055      case nir_cf_node_if:
1056         has_changed |= visit_if(nir_cf_node_as_if(node), state);
1057         break;
1058      case nir_cf_node_loop:
1059         has_changed |= visit_loop(nir_cf_node_as_loop(node), state);
1060         break;
1061      case nir_cf_node_function:
1062         unreachable("NIR divergence analysis: Unsupported cf_node type.");
1063      }
1064   }
1065
1066   return has_changed;
1067}
1068
1069void
1070nir_divergence_analysis(nir_shader *shader)
1071{
1072   shader->info.divergence_analysis_run = true;
1073
1074   struct divergence_state state = {
1075      .stage = shader->info.stage,
1076      .shader = shader,
1077      .divergent_loop_cf = false,
1078      .divergent_loop_continue = false,
1079      .divergent_loop_break = false,
1080      .first_visit = true,
1081   };
1082
1083   visit_cf_list(&nir_shader_get_entrypoint(shader)->body, &state);
1084}
1085
1086bool nir_update_instr_divergence(nir_shader *shader, nir_instr *instr)
1087{
1088   nir_foreach_ssa_def(instr, set_ssa_def_not_divergent, NULL);
1089
1090   if (instr->type == nir_instr_type_phi) {
1091      nir_cf_node *prev = nir_cf_node_prev(&instr->block->cf_node);
1092      /* can only update gamma/if phis */
1093      if (!prev || prev->type != nir_cf_node_if)
1094         return false;
1095
1096      nir_if *nif = nir_cf_node_as_if(prev);
1097
1098      visit_if_merge_phi(nir_instr_as_phi(instr), nir_src_is_divergent(nif->condition));
1099      return true;
1100   }
1101
1102   update_instr_divergence(shader, instr);
1103   return true;
1104}
1105
1106
1107bool
1108nir_has_divergent_loop(nir_shader *shader)
1109{
1110   bool divergent_loop = false;
1111   nir_function_impl *func = nir_shader_get_entrypoint(shader);
1112
1113   foreach_list_typed(nir_cf_node, node, node, &func->body) {
1114      if (node->type == nir_cf_node_loop && nir_cf_node_as_loop(node)->divergent) {
1115         divergent_loop = true;
1116         break;
1117      }
1118   }
1119
1120   return divergent_loop;
1121}
1122