1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2018 Valve Corporation
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21bf215546Sopenharmony_ci * IN THE SOFTWARE.
22bf215546Sopenharmony_ci *
23bf215546Sopenharmony_ci */
24bf215546Sopenharmony_ci
25bf215546Sopenharmony_ci#include "nir.h"
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_ci/* This pass computes for each ssa definition if it is uniform.
28bf215546Sopenharmony_ci * That is, the variable has the same value for all invocations
29bf215546Sopenharmony_ci * of the group.
30bf215546Sopenharmony_ci *
31bf215546Sopenharmony_ci * This divergence analysis pass expects the shader to be in LCSSA-form.
32bf215546Sopenharmony_ci *
33bf215546Sopenharmony_ci * This algorithm implements "The Simple Divergence Analysis" from
34bf215546Sopenharmony_ci * Diogo Sampaio, Rafael De Souza, Sylvain Collange, Fernando Magno Quintão Pereira.
35bf215546Sopenharmony_ci * Divergence Analysis.  ACM Transactions on Programming Languages and Systems (TOPLAS),
36bf215546Sopenharmony_ci * ACM, 2013, 35 (4), pp.13:1-13:36. <10.1145/2523815>. <hal-00909072v2>
37bf215546Sopenharmony_ci */
38bf215546Sopenharmony_ci
39bf215546Sopenharmony_cistruct divergence_state {
40bf215546Sopenharmony_ci   const gl_shader_stage stage;
41bf215546Sopenharmony_ci   nir_shader *shader;
42bf215546Sopenharmony_ci
43bf215546Sopenharmony_ci   /** current control flow state */
44bf215546Sopenharmony_ci   /* True if some loop-active invocations might take a different control-flow path.
45bf215546Sopenharmony_ci    * A divergent break does not cause subsequent control-flow to be considered
46bf215546Sopenharmony_ci    * divergent because those invocations are no longer active in the loop.
47bf215546Sopenharmony_ci    * For a divergent if, both sides are considered divergent flow because
48bf215546Sopenharmony_ci    * the other side is still loop-active. */
49bf215546Sopenharmony_ci   bool divergent_loop_cf;
50bf215546Sopenharmony_ci   /* True if a divergent continue happened since the loop header */
51bf215546Sopenharmony_ci   bool divergent_loop_continue;
52bf215546Sopenharmony_ci   /* True if a divergent break happened since the loop header */
53bf215546Sopenharmony_ci   bool divergent_loop_break;
54bf215546Sopenharmony_ci
55bf215546Sopenharmony_ci   /* True if we visit the block for the fist time */
56bf215546Sopenharmony_ci   bool first_visit;
57bf215546Sopenharmony_ci};
58bf215546Sopenharmony_ci
59bf215546Sopenharmony_cistatic bool
60bf215546Sopenharmony_civisit_cf_list(struct exec_list *list, struct divergence_state *state);
61bf215546Sopenharmony_ci
62bf215546Sopenharmony_cistatic bool
63bf215546Sopenharmony_civisit_alu(nir_alu_instr *instr)
64bf215546Sopenharmony_ci{
65bf215546Sopenharmony_ci   if (instr->dest.dest.ssa.divergent)
66bf215546Sopenharmony_ci      return false;
67bf215546Sopenharmony_ci
68bf215546Sopenharmony_ci   unsigned num_src = nir_op_infos[instr->op].num_inputs;
69bf215546Sopenharmony_ci
70bf215546Sopenharmony_ci   for (unsigned i = 0; i < num_src; i++) {
71bf215546Sopenharmony_ci      if (instr->src[i].src.ssa->divergent) {
72bf215546Sopenharmony_ci         instr->dest.dest.ssa.divergent = true;
73bf215546Sopenharmony_ci         return true;
74bf215546Sopenharmony_ci      }
75bf215546Sopenharmony_ci   }
76bf215546Sopenharmony_ci
77bf215546Sopenharmony_ci   return false;
78bf215546Sopenharmony_ci}
79bf215546Sopenharmony_ci
80bf215546Sopenharmony_cistatic bool
81bf215546Sopenharmony_civisit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr)
82bf215546Sopenharmony_ci{
83bf215546Sopenharmony_ci   if (!nir_intrinsic_infos[instr->intrinsic].has_dest)
84bf215546Sopenharmony_ci      return false;
85bf215546Sopenharmony_ci
86bf215546Sopenharmony_ci   if (instr->dest.ssa.divergent)
87bf215546Sopenharmony_ci      return false;
88bf215546Sopenharmony_ci
89bf215546Sopenharmony_ci   nir_divergence_options options = shader->options->divergence_analysis_options;
90bf215546Sopenharmony_ci   gl_shader_stage stage = shader->info.stage;
91bf215546Sopenharmony_ci   bool is_divergent = false;
92bf215546Sopenharmony_ci   switch (instr->intrinsic) {
93bf215546Sopenharmony_ci   /* Intrinsics which are always uniform */
94bf215546Sopenharmony_ci   case nir_intrinsic_shader_clock:
95bf215546Sopenharmony_ci   case nir_intrinsic_ballot:
96bf215546Sopenharmony_ci   case nir_intrinsic_read_invocation:
97bf215546Sopenharmony_ci   case nir_intrinsic_read_first_invocation:
98bf215546Sopenharmony_ci   case nir_intrinsic_vote_any:
99bf215546Sopenharmony_ci   case nir_intrinsic_vote_all:
100bf215546Sopenharmony_ci   case nir_intrinsic_vote_feq:
101bf215546Sopenharmony_ci   case nir_intrinsic_vote_ieq:
102bf215546Sopenharmony_ci   case nir_intrinsic_load_push_constant:
103bf215546Sopenharmony_ci   case nir_intrinsic_load_work_dim:
104bf215546Sopenharmony_ci   case nir_intrinsic_load_num_workgroups:
105bf215546Sopenharmony_ci   case nir_intrinsic_load_workgroup_size:
106bf215546Sopenharmony_ci   case nir_intrinsic_load_subgroup_id:
107bf215546Sopenharmony_ci   case nir_intrinsic_load_num_subgroups:
108bf215546Sopenharmony_ci   case nir_intrinsic_load_ray_launch_size:
109bf215546Sopenharmony_ci   case nir_intrinsic_load_ray_launch_size_addr_amd:
110bf215546Sopenharmony_ci   case nir_intrinsic_load_sbt_base_amd:
111bf215546Sopenharmony_ci   case nir_intrinsic_load_subgroup_size:
112bf215546Sopenharmony_ci   case nir_intrinsic_load_subgroup_eq_mask:
113bf215546Sopenharmony_ci   case nir_intrinsic_load_subgroup_ge_mask:
114bf215546Sopenharmony_ci   case nir_intrinsic_load_subgroup_gt_mask:
115bf215546Sopenharmony_ci   case nir_intrinsic_load_subgroup_le_mask:
116bf215546Sopenharmony_ci   case nir_intrinsic_load_subgroup_lt_mask:
117bf215546Sopenharmony_ci   case nir_intrinsic_first_invocation:
118bf215546Sopenharmony_ci   case nir_intrinsic_last_invocation:
119bf215546Sopenharmony_ci   case nir_intrinsic_load_base_instance:
120bf215546Sopenharmony_ci   case nir_intrinsic_load_base_vertex:
121bf215546Sopenharmony_ci   case nir_intrinsic_load_first_vertex:
122bf215546Sopenharmony_ci   case nir_intrinsic_load_draw_id:
123bf215546Sopenharmony_ci   case nir_intrinsic_load_is_indexed_draw:
124bf215546Sopenharmony_ci   case nir_intrinsic_load_viewport_scale:
125bf215546Sopenharmony_ci   case nir_intrinsic_load_user_clip_plane:
126bf215546Sopenharmony_ci   case nir_intrinsic_load_viewport_x_scale:
127bf215546Sopenharmony_ci   case nir_intrinsic_load_viewport_y_scale:
128bf215546Sopenharmony_ci   case nir_intrinsic_load_viewport_z_scale:
129bf215546Sopenharmony_ci   case nir_intrinsic_load_viewport_offset:
130bf215546Sopenharmony_ci   case nir_intrinsic_load_viewport_x_offset:
131bf215546Sopenharmony_ci   case nir_intrinsic_load_viewport_y_offset:
132bf215546Sopenharmony_ci   case nir_intrinsic_load_viewport_z_offset:
133bf215546Sopenharmony_ci   case nir_intrinsic_load_blend_const_color_a_float:
134bf215546Sopenharmony_ci   case nir_intrinsic_load_blend_const_color_b_float:
135bf215546Sopenharmony_ci   case nir_intrinsic_load_blend_const_color_g_float:
136bf215546Sopenharmony_ci   case nir_intrinsic_load_blend_const_color_r_float:
137bf215546Sopenharmony_ci   case nir_intrinsic_load_blend_const_color_rgba:
138bf215546Sopenharmony_ci   case nir_intrinsic_load_blend_const_color_aaaa8888_unorm:
139bf215546Sopenharmony_ci   case nir_intrinsic_load_blend_const_color_rgba8888_unorm:
140bf215546Sopenharmony_ci   case nir_intrinsic_load_line_width:
141bf215546Sopenharmony_ci   case nir_intrinsic_load_aa_line_width:
142bf215546Sopenharmony_ci   case nir_intrinsic_load_xfb_address:
143bf215546Sopenharmony_ci   case nir_intrinsic_load_num_vertices:
144bf215546Sopenharmony_ci   case nir_intrinsic_load_fb_layers_v3d:
145bf215546Sopenharmony_ci   case nir_intrinsic_load_tcs_num_patches_amd:
146bf215546Sopenharmony_ci   case nir_intrinsic_load_ring_tess_factors_amd:
147bf215546Sopenharmony_ci   case nir_intrinsic_load_ring_tess_offchip_amd:
148bf215546Sopenharmony_ci   case nir_intrinsic_load_ring_tess_factors_offset_amd:
149bf215546Sopenharmony_ci   case nir_intrinsic_load_ring_tess_offchip_offset_amd:
150bf215546Sopenharmony_ci   case nir_intrinsic_load_ring_mesh_scratch_amd:
151bf215546Sopenharmony_ci   case nir_intrinsic_load_ring_mesh_scratch_offset_amd:
152bf215546Sopenharmony_ci   case nir_intrinsic_load_ring_esgs_amd:
153bf215546Sopenharmony_ci   case nir_intrinsic_load_ring_es2gs_offset_amd:
154bf215546Sopenharmony_ci   case nir_intrinsic_load_ring_task_draw_amd:
155bf215546Sopenharmony_ci   case nir_intrinsic_load_ring_task_payload_amd:
156bf215546Sopenharmony_ci   case nir_intrinsic_load_task_ring_entry_amd:
157bf215546Sopenharmony_ci   case nir_intrinsic_load_task_ib_addr:
158bf215546Sopenharmony_ci   case nir_intrinsic_load_task_ib_stride:
159bf215546Sopenharmony_ci   case nir_intrinsic_load_sample_positions_pan:
160bf215546Sopenharmony_ci   case nir_intrinsic_load_workgroup_num_input_vertices_amd:
161bf215546Sopenharmony_ci   case nir_intrinsic_load_workgroup_num_input_primitives_amd:
162bf215546Sopenharmony_ci   case nir_intrinsic_load_shader_query_enabled_amd:
163bf215546Sopenharmony_ci   case nir_intrinsic_load_cull_front_face_enabled_amd:
164bf215546Sopenharmony_ci   case nir_intrinsic_load_cull_back_face_enabled_amd:
165bf215546Sopenharmony_ci   case nir_intrinsic_load_cull_ccw_amd:
166bf215546Sopenharmony_ci   case nir_intrinsic_load_cull_small_primitives_enabled_amd:
167bf215546Sopenharmony_ci   case nir_intrinsic_load_cull_any_enabled_amd:
168bf215546Sopenharmony_ci   case nir_intrinsic_load_cull_small_prim_precision_amd:
169bf215546Sopenharmony_ci   case nir_intrinsic_load_user_data_amd:
170bf215546Sopenharmony_ci   case nir_intrinsic_load_force_vrs_rates_amd:
171bf215546Sopenharmony_ci   case nir_intrinsic_load_tess_level_inner_default:
172bf215546Sopenharmony_ci   case nir_intrinsic_load_tess_level_outer_default:
173bf215546Sopenharmony_ci   case nir_intrinsic_load_scalar_arg_amd:
174bf215546Sopenharmony_ci   case nir_intrinsic_load_smem_amd:
175bf215546Sopenharmony_ci   case nir_intrinsic_load_global_const_block_intel:
176bf215546Sopenharmony_ci   case nir_intrinsic_load_reloc_const_intel:
177bf215546Sopenharmony_ci   case nir_intrinsic_load_global_block_intel:
178bf215546Sopenharmony_ci   case nir_intrinsic_load_btd_global_arg_addr_intel:
179bf215546Sopenharmony_ci   case nir_intrinsic_load_btd_local_arg_addr_intel:
180bf215546Sopenharmony_ci   case nir_intrinsic_load_mesh_inline_data_intel:
181bf215546Sopenharmony_ci   case nir_intrinsic_load_ray_num_dss_rt_stacks_intel:
182bf215546Sopenharmony_ci   case nir_intrinsic_load_lshs_vertex_stride_amd:
183bf215546Sopenharmony_ci   case nir_intrinsic_load_hs_out_patch_data_offset_amd:
184bf215546Sopenharmony_ci      is_divergent = false;
185bf215546Sopenharmony_ci      break;
186bf215546Sopenharmony_ci
187bf215546Sopenharmony_ci   /* Intrinsics with divergence depending on shader stage and hardware */
188bf215546Sopenharmony_ci   case nir_intrinsic_load_frag_shading_rate:
189bf215546Sopenharmony_ci      is_divergent = !(options & nir_divergence_single_frag_shading_rate_per_subgroup);
190bf215546Sopenharmony_ci      break;
191bf215546Sopenharmony_ci   case nir_intrinsic_load_input:
192bf215546Sopenharmony_ci      is_divergent = instr->src[0].ssa->divergent;
193bf215546Sopenharmony_ci      if (stage == MESA_SHADER_FRAGMENT)
194bf215546Sopenharmony_ci         is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
195bf215546Sopenharmony_ci      else if (stage == MESA_SHADER_TESS_EVAL)
196bf215546Sopenharmony_ci         is_divergent |= !(options & nir_divergence_single_patch_per_tes_subgroup);
197bf215546Sopenharmony_ci      else if (stage != MESA_SHADER_MESH)
198bf215546Sopenharmony_ci         is_divergent = true;
199bf215546Sopenharmony_ci      break;
200bf215546Sopenharmony_ci   case nir_intrinsic_load_per_vertex_input:
201bf215546Sopenharmony_ci      is_divergent = instr->src[0].ssa->divergent ||
202bf215546Sopenharmony_ci                     instr->src[1].ssa->divergent;
203bf215546Sopenharmony_ci      if (stage == MESA_SHADER_TESS_CTRL)
204bf215546Sopenharmony_ci         is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup);
205bf215546Sopenharmony_ci      if (stage == MESA_SHADER_TESS_EVAL)
206bf215546Sopenharmony_ci         is_divergent |= !(options & nir_divergence_single_patch_per_tes_subgroup);
207bf215546Sopenharmony_ci      else
208bf215546Sopenharmony_ci         is_divergent = true;
209bf215546Sopenharmony_ci      break;
210bf215546Sopenharmony_ci   case nir_intrinsic_load_input_vertex:
211bf215546Sopenharmony_ci      is_divergent = instr->src[1].ssa->divergent;
212bf215546Sopenharmony_ci      assert(stage == MESA_SHADER_FRAGMENT);
213bf215546Sopenharmony_ci      is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
214bf215546Sopenharmony_ci      break;
215bf215546Sopenharmony_ci   case nir_intrinsic_load_output:
216bf215546Sopenharmony_ci      is_divergent = instr->src[0].ssa->divergent;
217bf215546Sopenharmony_ci      switch (stage) {
218bf215546Sopenharmony_ci      case MESA_SHADER_TESS_CTRL:
219bf215546Sopenharmony_ci         is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup);
220bf215546Sopenharmony_ci         break;
221bf215546Sopenharmony_ci      case MESA_SHADER_FRAGMENT:
222bf215546Sopenharmony_ci         is_divergent = true;
223bf215546Sopenharmony_ci         break;
224bf215546Sopenharmony_ci      case MESA_SHADER_TASK:
225bf215546Sopenharmony_ci      case MESA_SHADER_MESH:
226bf215546Sopenharmony_ci         /* Divergent if src[0] is, so nothing else to do. */
227bf215546Sopenharmony_ci         break;
228bf215546Sopenharmony_ci      default:
229bf215546Sopenharmony_ci         unreachable("Invalid stage for load_output");
230bf215546Sopenharmony_ci      }
231bf215546Sopenharmony_ci      break;
232bf215546Sopenharmony_ci   case nir_intrinsic_load_per_vertex_output:
233bf215546Sopenharmony_ci      assert(stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_MESH);
234bf215546Sopenharmony_ci      is_divergent = instr->src[0].ssa->divergent ||
235bf215546Sopenharmony_ci                     instr->src[1].ssa->divergent ||
236bf215546Sopenharmony_ci                     (stage == MESA_SHADER_TESS_CTRL &&
237bf215546Sopenharmony_ci                      !(options & nir_divergence_single_patch_per_tcs_subgroup));
238bf215546Sopenharmony_ci      break;
239bf215546Sopenharmony_ci   case nir_intrinsic_load_per_primitive_output:
240bf215546Sopenharmony_ci      assert(stage == MESA_SHADER_MESH);
241bf215546Sopenharmony_ci      is_divergent = instr->src[0].ssa->divergent ||
242bf215546Sopenharmony_ci                     instr->src[1].ssa->divergent;
243bf215546Sopenharmony_ci      break;
244bf215546Sopenharmony_ci   case nir_intrinsic_load_layer_id:
245bf215546Sopenharmony_ci   case nir_intrinsic_load_front_face:
246bf215546Sopenharmony_ci      assert(stage == MESA_SHADER_FRAGMENT);
247bf215546Sopenharmony_ci      is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
248bf215546Sopenharmony_ci      break;
249bf215546Sopenharmony_ci   case nir_intrinsic_load_view_index:
250bf215546Sopenharmony_ci      assert(stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_KERNEL);
251bf215546Sopenharmony_ci      if (options & nir_divergence_view_index_uniform)
252bf215546Sopenharmony_ci         is_divergent = false;
253bf215546Sopenharmony_ci      else if (stage == MESA_SHADER_FRAGMENT)
254bf215546Sopenharmony_ci         is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
255bf215546Sopenharmony_ci      break;
256bf215546Sopenharmony_ci   case nir_intrinsic_load_fs_input_interp_deltas:
257bf215546Sopenharmony_ci      assert(stage == MESA_SHADER_FRAGMENT);
258bf215546Sopenharmony_ci      is_divergent = instr->src[0].ssa->divergent;
259bf215546Sopenharmony_ci      is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
260bf215546Sopenharmony_ci      break;
261bf215546Sopenharmony_ci   case nir_intrinsic_load_primitive_id:
262bf215546Sopenharmony_ci      if (stage == MESA_SHADER_FRAGMENT)
263bf215546Sopenharmony_ci         is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
264bf215546Sopenharmony_ci      else if (stage == MESA_SHADER_TESS_CTRL)
265bf215546Sopenharmony_ci         is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup);
266bf215546Sopenharmony_ci      else if (stage == MESA_SHADER_TESS_EVAL)
267bf215546Sopenharmony_ci         is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
268bf215546Sopenharmony_ci      else if (stage == MESA_SHADER_GEOMETRY || stage == MESA_SHADER_VERTEX)
269bf215546Sopenharmony_ci         is_divergent = true;
270bf215546Sopenharmony_ci      else
271bf215546Sopenharmony_ci         unreachable("Invalid stage for load_primitive_id");
272bf215546Sopenharmony_ci      break;
273bf215546Sopenharmony_ci   case nir_intrinsic_load_tess_level_inner:
274bf215546Sopenharmony_ci   case nir_intrinsic_load_tess_level_outer:
275bf215546Sopenharmony_ci      if (stage == MESA_SHADER_TESS_CTRL)
276bf215546Sopenharmony_ci         is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup);
277bf215546Sopenharmony_ci      else if (stage == MESA_SHADER_TESS_EVAL)
278bf215546Sopenharmony_ci         is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
279bf215546Sopenharmony_ci      else
280bf215546Sopenharmony_ci         unreachable("Invalid stage for load_primitive_tess_level_*");
281bf215546Sopenharmony_ci      break;
282bf215546Sopenharmony_ci   case nir_intrinsic_load_patch_vertices_in:
283bf215546Sopenharmony_ci      if (stage == MESA_SHADER_TESS_EVAL)
284bf215546Sopenharmony_ci         is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
285bf215546Sopenharmony_ci      else
286bf215546Sopenharmony_ci         assert(stage == MESA_SHADER_TESS_CTRL);
287bf215546Sopenharmony_ci      break;
288bf215546Sopenharmony_ci
289bf215546Sopenharmony_ci   case nir_intrinsic_load_workgroup_index:
290bf215546Sopenharmony_ci   case nir_intrinsic_load_workgroup_id:
291bf215546Sopenharmony_ci      assert(gl_shader_stage_uses_workgroup(stage));
292bf215546Sopenharmony_ci      if (stage == MESA_SHADER_COMPUTE)
293bf215546Sopenharmony_ci         is_divergent |= (options & nir_divergence_multiple_workgroup_per_compute_subgroup);
294bf215546Sopenharmony_ci      break;
295bf215546Sopenharmony_ci
296bf215546Sopenharmony_ci   /* Clustered reductions are uniform if cluster_size == subgroup_size or
297bf215546Sopenharmony_ci    * the source is uniform and the operation is invariant.
298bf215546Sopenharmony_ci    * Inclusive scans are uniform if
299bf215546Sopenharmony_ci    * the source is uniform and the operation is invariant
300bf215546Sopenharmony_ci    */
301bf215546Sopenharmony_ci   case nir_intrinsic_reduce:
302bf215546Sopenharmony_ci      if (nir_intrinsic_cluster_size(instr) == 0)
303bf215546Sopenharmony_ci         return false;
304bf215546Sopenharmony_ci      FALLTHROUGH;
305bf215546Sopenharmony_ci   case nir_intrinsic_inclusive_scan: {
306bf215546Sopenharmony_ci      nir_op op = nir_intrinsic_reduction_op(instr);
307bf215546Sopenharmony_ci      is_divergent = instr->src[0].ssa->divergent;
308bf215546Sopenharmony_ci      if (op != nir_op_umin && op != nir_op_imin && op != nir_op_fmin &&
309bf215546Sopenharmony_ci          op != nir_op_umax && op != nir_op_imax && op != nir_op_fmax &&
310bf215546Sopenharmony_ci          op != nir_op_iand && op != nir_op_ior)
311bf215546Sopenharmony_ci         is_divergent = true;
312bf215546Sopenharmony_ci      break;
313bf215546Sopenharmony_ci   }
314bf215546Sopenharmony_ci
315bf215546Sopenharmony_ci   case nir_intrinsic_load_ubo:
316bf215546Sopenharmony_ci   case nir_intrinsic_load_ssbo:
317bf215546Sopenharmony_ci      is_divergent = (instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) ||
318bf215546Sopenharmony_ci                     instr->src[1].ssa->divergent;
319bf215546Sopenharmony_ci      break;
320bf215546Sopenharmony_ci
321bf215546Sopenharmony_ci   case nir_intrinsic_get_ssbo_size:
322bf215546Sopenharmony_ci   case nir_intrinsic_deref_buffer_array_length:
323bf215546Sopenharmony_ci      is_divergent = instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM);
324bf215546Sopenharmony_ci      break;
325bf215546Sopenharmony_ci
326bf215546Sopenharmony_ci   case nir_intrinsic_image_load:
327bf215546Sopenharmony_ci   case nir_intrinsic_image_deref_load:
328bf215546Sopenharmony_ci   case nir_intrinsic_bindless_image_load:
329bf215546Sopenharmony_ci   case nir_intrinsic_image_sparse_load:
330bf215546Sopenharmony_ci   case nir_intrinsic_image_deref_sparse_load:
331bf215546Sopenharmony_ci   case nir_intrinsic_bindless_image_sparse_load:
332bf215546Sopenharmony_ci      is_divergent = (instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) ||
333bf215546Sopenharmony_ci                     instr->src[1].ssa->divergent || instr->src[2].ssa->divergent || instr->src[3].ssa->divergent;
334bf215546Sopenharmony_ci      break;
335bf215546Sopenharmony_ci
336bf215546Sopenharmony_ci
337bf215546Sopenharmony_ci   /* Intrinsics with divergence depending on sources */
338bf215546Sopenharmony_ci   case nir_intrinsic_ballot_bitfield_extract:
339bf215546Sopenharmony_ci   case nir_intrinsic_ballot_find_lsb:
340bf215546Sopenharmony_ci   case nir_intrinsic_ballot_find_msb:
341bf215546Sopenharmony_ci   case nir_intrinsic_ballot_bit_count_reduce:
342bf215546Sopenharmony_ci   case nir_intrinsic_shuffle_xor:
343bf215546Sopenharmony_ci   case nir_intrinsic_shuffle_up:
344bf215546Sopenharmony_ci   case nir_intrinsic_shuffle_down:
345bf215546Sopenharmony_ci   case nir_intrinsic_quad_broadcast:
346bf215546Sopenharmony_ci   case nir_intrinsic_quad_swap_horizontal:
347bf215546Sopenharmony_ci   case nir_intrinsic_quad_swap_vertical:
348bf215546Sopenharmony_ci   case nir_intrinsic_quad_swap_diagonal:
349bf215546Sopenharmony_ci   case nir_intrinsic_byte_permute_amd:
350bf215546Sopenharmony_ci   case nir_intrinsic_load_deref:
351bf215546Sopenharmony_ci   case nir_intrinsic_load_shared:
352bf215546Sopenharmony_ci   case nir_intrinsic_load_shared2_amd:
353bf215546Sopenharmony_ci   case nir_intrinsic_load_global:
354bf215546Sopenharmony_ci   case nir_intrinsic_load_global_2x32:
355bf215546Sopenharmony_ci   case nir_intrinsic_load_global_constant:
356bf215546Sopenharmony_ci   case nir_intrinsic_load_global_amd:
357bf215546Sopenharmony_ci   case nir_intrinsic_load_uniform:
358bf215546Sopenharmony_ci   case nir_intrinsic_load_constant:
359bf215546Sopenharmony_ci   case nir_intrinsic_load_sample_pos_from_id:
360bf215546Sopenharmony_ci   case nir_intrinsic_load_kernel_input:
361bf215546Sopenharmony_ci   case nir_intrinsic_load_task_payload:
362bf215546Sopenharmony_ci   case nir_intrinsic_load_buffer_amd:
363bf215546Sopenharmony_ci   case nir_intrinsic_image_samples:
364bf215546Sopenharmony_ci   case nir_intrinsic_image_deref_samples:
365bf215546Sopenharmony_ci   case nir_intrinsic_bindless_image_samples:
366bf215546Sopenharmony_ci   case nir_intrinsic_image_size:
367bf215546Sopenharmony_ci   case nir_intrinsic_image_deref_size:
368bf215546Sopenharmony_ci   case nir_intrinsic_bindless_image_size:
369bf215546Sopenharmony_ci   case nir_intrinsic_copy_deref:
370bf215546Sopenharmony_ci   case nir_intrinsic_vulkan_resource_index:
371bf215546Sopenharmony_ci   case nir_intrinsic_vulkan_resource_reindex:
372bf215546Sopenharmony_ci   case nir_intrinsic_load_vulkan_descriptor:
373bf215546Sopenharmony_ci   case nir_intrinsic_atomic_counter_read:
374bf215546Sopenharmony_ci   case nir_intrinsic_atomic_counter_read_deref:
375bf215546Sopenharmony_ci   case nir_intrinsic_quad_swizzle_amd:
376bf215546Sopenharmony_ci   case nir_intrinsic_masked_swizzle_amd:
377bf215546Sopenharmony_ci   case nir_intrinsic_is_sparse_texels_resident:
378bf215546Sopenharmony_ci   case nir_intrinsic_sparse_residency_code_and:
379bf215546Sopenharmony_ci   case nir_intrinsic_bvh64_intersect_ray_amd:
380bf215546Sopenharmony_ci   case nir_intrinsic_image_deref_load_param_intel:
381bf215546Sopenharmony_ci   case nir_intrinsic_image_load_raw_intel:
382bf215546Sopenharmony_ci   case nir_intrinsic_get_ubo_size:
383bf215546Sopenharmony_ci   case nir_intrinsic_load_ssbo_address:
384bf215546Sopenharmony_ci   case nir_intrinsic_load_desc_set_address_intel: {
385bf215546Sopenharmony_ci      unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
386bf215546Sopenharmony_ci      for (unsigned i = 0; i < num_srcs; i++) {
387bf215546Sopenharmony_ci         if (instr->src[i].ssa->divergent) {
388bf215546Sopenharmony_ci            is_divergent = true;
389bf215546Sopenharmony_ci            break;
390bf215546Sopenharmony_ci         }
391bf215546Sopenharmony_ci      }
392bf215546Sopenharmony_ci      break;
393bf215546Sopenharmony_ci   }
394bf215546Sopenharmony_ci
395bf215546Sopenharmony_ci   case nir_intrinsic_shuffle:
396bf215546Sopenharmony_ci      is_divergent = instr->src[0].ssa->divergent &&
397bf215546Sopenharmony_ci                     instr->src[1].ssa->divergent;
398bf215546Sopenharmony_ci      break;
399bf215546Sopenharmony_ci
400bf215546Sopenharmony_ci   /* Intrinsics which are always divergent */
401bf215546Sopenharmony_ci   case nir_intrinsic_load_color0:
402bf215546Sopenharmony_ci   case nir_intrinsic_load_color1:
403bf215546Sopenharmony_ci   case nir_intrinsic_load_param:
404bf215546Sopenharmony_ci   case nir_intrinsic_load_sample_id:
405bf215546Sopenharmony_ci   case nir_intrinsic_load_sample_id_no_per_sample:
406bf215546Sopenharmony_ci   case nir_intrinsic_load_sample_mask_in:
407bf215546Sopenharmony_ci   case nir_intrinsic_load_interpolated_input:
408bf215546Sopenharmony_ci   case nir_intrinsic_load_point_coord_maybe_flipped:
409bf215546Sopenharmony_ci   case nir_intrinsic_load_barycentric_pixel:
410bf215546Sopenharmony_ci   case nir_intrinsic_load_barycentric_centroid:
411bf215546Sopenharmony_ci   case nir_intrinsic_load_barycentric_sample:
412bf215546Sopenharmony_ci   case nir_intrinsic_load_barycentric_model:
413bf215546Sopenharmony_ci   case nir_intrinsic_load_barycentric_at_sample:
414bf215546Sopenharmony_ci   case nir_intrinsic_load_barycentric_at_offset:
415bf215546Sopenharmony_ci   case nir_intrinsic_interp_deref_at_offset:
416bf215546Sopenharmony_ci   case nir_intrinsic_interp_deref_at_sample:
417bf215546Sopenharmony_ci   case nir_intrinsic_interp_deref_at_centroid:
418bf215546Sopenharmony_ci   case nir_intrinsic_interp_deref_at_vertex:
419bf215546Sopenharmony_ci   case nir_intrinsic_load_tess_coord:
420bf215546Sopenharmony_ci   case nir_intrinsic_load_point_coord:
421bf215546Sopenharmony_ci   case nir_intrinsic_load_line_coord:
422bf215546Sopenharmony_ci   case nir_intrinsic_load_frag_coord:
423bf215546Sopenharmony_ci   case nir_intrinsic_load_sample_pos:
424bf215546Sopenharmony_ci   case nir_intrinsic_load_sample_pos_or_center:
425bf215546Sopenharmony_ci   case nir_intrinsic_load_vertex_id_zero_base:
426bf215546Sopenharmony_ci   case nir_intrinsic_load_vertex_id:
427bf215546Sopenharmony_ci   case nir_intrinsic_load_instance_id:
428bf215546Sopenharmony_ci   case nir_intrinsic_load_invocation_id:
429bf215546Sopenharmony_ci   case nir_intrinsic_load_local_invocation_id:
430bf215546Sopenharmony_ci   case nir_intrinsic_load_local_invocation_index:
431bf215546Sopenharmony_ci   case nir_intrinsic_load_global_invocation_id:
432bf215546Sopenharmony_ci   case nir_intrinsic_load_global_invocation_id_zero_base:
433bf215546Sopenharmony_ci   case nir_intrinsic_load_global_invocation_index:
434bf215546Sopenharmony_ci   case nir_intrinsic_load_subgroup_invocation:
435bf215546Sopenharmony_ci   case nir_intrinsic_load_helper_invocation:
436bf215546Sopenharmony_ci   case nir_intrinsic_is_helper_invocation:
437bf215546Sopenharmony_ci   case nir_intrinsic_load_scratch:
438bf215546Sopenharmony_ci   case nir_intrinsic_deref_atomic_add:
439bf215546Sopenharmony_ci   case nir_intrinsic_deref_atomic_imin:
440bf215546Sopenharmony_ci   case nir_intrinsic_deref_atomic_umin:
441bf215546Sopenharmony_ci   case nir_intrinsic_deref_atomic_imax:
442bf215546Sopenharmony_ci   case nir_intrinsic_deref_atomic_umax:
443bf215546Sopenharmony_ci   case nir_intrinsic_deref_atomic_and:
444bf215546Sopenharmony_ci   case nir_intrinsic_deref_atomic_or:
445bf215546Sopenharmony_ci   case nir_intrinsic_deref_atomic_xor:
446bf215546Sopenharmony_ci   case nir_intrinsic_deref_atomic_exchange:
447bf215546Sopenharmony_ci   case nir_intrinsic_deref_atomic_comp_swap:
448bf215546Sopenharmony_ci   case nir_intrinsic_deref_atomic_fadd:
449bf215546Sopenharmony_ci   case nir_intrinsic_deref_atomic_fmin:
450bf215546Sopenharmony_ci   case nir_intrinsic_deref_atomic_fmax:
451bf215546Sopenharmony_ci   case nir_intrinsic_deref_atomic_fcomp_swap:
452bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_add:
453bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_imin:
454bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_umin:
455bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_imax:
456bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_umax:
457bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_and:
458bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_or:
459bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_xor:
460bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_exchange:
461bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_comp_swap:
462bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_fadd:
463bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_fmax:
464bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_fmin:
465bf215546Sopenharmony_ci   case nir_intrinsic_ssbo_atomic_fcomp_swap:
466bf215546Sopenharmony_ci   case nir_intrinsic_image_deref_atomic_add:
467bf215546Sopenharmony_ci   case nir_intrinsic_image_deref_atomic_imin:
468bf215546Sopenharmony_ci   case nir_intrinsic_image_deref_atomic_umin:
469bf215546Sopenharmony_ci   case nir_intrinsic_image_deref_atomic_imax:
470bf215546Sopenharmony_ci   case nir_intrinsic_image_deref_atomic_umax:
471bf215546Sopenharmony_ci   case nir_intrinsic_image_deref_atomic_and:
472bf215546Sopenharmony_ci   case nir_intrinsic_image_deref_atomic_or:
473bf215546Sopenharmony_ci   case nir_intrinsic_image_deref_atomic_xor:
474bf215546Sopenharmony_ci   case nir_intrinsic_image_deref_atomic_exchange:
475bf215546Sopenharmony_ci   case nir_intrinsic_image_deref_atomic_comp_swap:
476bf215546Sopenharmony_ci   case nir_intrinsic_image_deref_atomic_fadd:
477bf215546Sopenharmony_ci   case nir_intrinsic_image_deref_atomic_fmin:
478bf215546Sopenharmony_ci   case nir_intrinsic_image_deref_atomic_fmax:
479bf215546Sopenharmony_ci   case nir_intrinsic_image_deref_atomic_inc_wrap:
480bf215546Sopenharmony_ci   case nir_intrinsic_image_deref_atomic_dec_wrap:
481bf215546Sopenharmony_ci   case nir_intrinsic_image_atomic_add:
482bf215546Sopenharmony_ci   case nir_intrinsic_image_atomic_imin:
483bf215546Sopenharmony_ci   case nir_intrinsic_image_atomic_umin:
484bf215546Sopenharmony_ci   case nir_intrinsic_image_atomic_imax:
485bf215546Sopenharmony_ci   case nir_intrinsic_image_atomic_umax:
486bf215546Sopenharmony_ci   case nir_intrinsic_image_atomic_and:
487bf215546Sopenharmony_ci   case nir_intrinsic_image_atomic_or:
488bf215546Sopenharmony_ci   case nir_intrinsic_image_atomic_xor:
489bf215546Sopenharmony_ci   case nir_intrinsic_image_atomic_exchange:
490bf215546Sopenharmony_ci   case nir_intrinsic_image_atomic_comp_swap:
491bf215546Sopenharmony_ci   case nir_intrinsic_image_atomic_fadd:
492bf215546Sopenharmony_ci   case nir_intrinsic_image_atomic_fmin:
493bf215546Sopenharmony_ci   case nir_intrinsic_image_atomic_fmax:
494bf215546Sopenharmony_ci   case nir_intrinsic_bindless_image_atomic_add:
495bf215546Sopenharmony_ci   case nir_intrinsic_bindless_image_atomic_imin:
496bf215546Sopenharmony_ci   case nir_intrinsic_bindless_image_atomic_umin:
497bf215546Sopenharmony_ci   case nir_intrinsic_bindless_image_atomic_imax:
498bf215546Sopenharmony_ci   case nir_intrinsic_bindless_image_atomic_umax:
499bf215546Sopenharmony_ci   case nir_intrinsic_bindless_image_atomic_and:
500bf215546Sopenharmony_ci   case nir_intrinsic_bindless_image_atomic_or:
501bf215546Sopenharmony_ci   case nir_intrinsic_bindless_image_atomic_xor:
502bf215546Sopenharmony_ci   case nir_intrinsic_bindless_image_atomic_exchange:
503bf215546Sopenharmony_ci   case nir_intrinsic_bindless_image_atomic_comp_swap:
504bf215546Sopenharmony_ci   case nir_intrinsic_bindless_image_atomic_fadd:
505bf215546Sopenharmony_ci   case nir_intrinsic_bindless_image_atomic_fmin:
506bf215546Sopenharmony_ci   case nir_intrinsic_bindless_image_atomic_fmax:
507bf215546Sopenharmony_ci   case nir_intrinsic_shared_atomic_add:
508bf215546Sopenharmony_ci   case nir_intrinsic_shared_atomic_imin:
509bf215546Sopenharmony_ci   case nir_intrinsic_shared_atomic_umin:
510bf215546Sopenharmony_ci   case nir_intrinsic_shared_atomic_imax:
511bf215546Sopenharmony_ci   case nir_intrinsic_shared_atomic_umax:
512bf215546Sopenharmony_ci   case nir_intrinsic_shared_atomic_and:
513bf215546Sopenharmony_ci   case nir_intrinsic_shared_atomic_or:
514bf215546Sopenharmony_ci   case nir_intrinsic_shared_atomic_xor:
515bf215546Sopenharmony_ci   case nir_intrinsic_shared_atomic_exchange:
516bf215546Sopenharmony_ci   case nir_intrinsic_shared_atomic_comp_swap:
517bf215546Sopenharmony_ci   case nir_intrinsic_shared_atomic_fadd:
518bf215546Sopenharmony_ci   case nir_intrinsic_shared_atomic_fmin:
519bf215546Sopenharmony_ci   case nir_intrinsic_shared_atomic_fmax:
520bf215546Sopenharmony_ci   case nir_intrinsic_shared_atomic_fcomp_swap:
521bf215546Sopenharmony_ci   case nir_intrinsic_task_payload_atomic_add:
522bf215546Sopenharmony_ci   case nir_intrinsic_task_payload_atomic_imin:
523bf215546Sopenharmony_ci   case nir_intrinsic_task_payload_atomic_umin:
524bf215546Sopenharmony_ci   case nir_intrinsic_task_payload_atomic_imax:
525bf215546Sopenharmony_ci   case nir_intrinsic_task_payload_atomic_umax:
526bf215546Sopenharmony_ci   case nir_intrinsic_task_payload_atomic_and:
527bf215546Sopenharmony_ci   case nir_intrinsic_task_payload_atomic_or:
528bf215546Sopenharmony_ci   case nir_intrinsic_task_payload_atomic_xor:
529bf215546Sopenharmony_ci   case nir_intrinsic_task_payload_atomic_exchange:
530bf215546Sopenharmony_ci   case nir_intrinsic_task_payload_atomic_comp_swap:
531bf215546Sopenharmony_ci   case nir_intrinsic_task_payload_atomic_fadd:
532bf215546Sopenharmony_ci   case nir_intrinsic_task_payload_atomic_fmin:
533bf215546Sopenharmony_ci   case nir_intrinsic_task_payload_atomic_fmax:
534bf215546Sopenharmony_ci   case nir_intrinsic_task_payload_atomic_fcomp_swap:
535bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_add:
536bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_imin:
537bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_umin:
538bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_imax:
539bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_umax:
540bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_and:
541bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_or:
542bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_xor:
543bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_exchange:
544bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_comp_swap:
545bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_fadd:
546bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_fmin:
547bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_fmax:
548bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_fcomp_swap:
549bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_add_amd:
550bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_imin_amd:
551bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_umin_amd:
552bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_imax_amd:
553bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_umax_amd:
554bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_and_amd:
555bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_or_amd:
556bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_xor_amd:
557bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_exchange_amd:
558bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_comp_swap_amd:
559bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_fadd_amd:
560bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_fmin_amd:
561bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_fmax_amd:
562bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_fcomp_swap_amd:
563bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_add_2x32:
564bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_imin_2x32:
565bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_umin_2x32:
566bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_imax_2x32:
567bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_umax_2x32:
568bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_and_2x32:
569bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_or_2x32:
570bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_xor_2x32:
571bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_exchange_2x32:
572bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_comp_swap_2x32:
573bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_fadd_2x32:
574bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_fmin_2x32:
575bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_fmax_2x32:
576bf215546Sopenharmony_ci   case nir_intrinsic_global_atomic_fcomp_swap_2x32:
577bf215546Sopenharmony_ci   case nir_intrinsic_atomic_counter_add:
578bf215546Sopenharmony_ci   case nir_intrinsic_atomic_counter_min:
579bf215546Sopenharmony_ci   case nir_intrinsic_atomic_counter_max:
580bf215546Sopenharmony_ci   case nir_intrinsic_atomic_counter_and:
581bf215546Sopenharmony_ci   case nir_intrinsic_atomic_counter_or:
582bf215546Sopenharmony_ci   case nir_intrinsic_atomic_counter_xor:
583bf215546Sopenharmony_ci   case nir_intrinsic_atomic_counter_inc:
584bf215546Sopenharmony_ci   case nir_intrinsic_atomic_counter_pre_dec:
585bf215546Sopenharmony_ci   case nir_intrinsic_atomic_counter_post_dec:
586bf215546Sopenharmony_ci   case nir_intrinsic_atomic_counter_exchange:
587bf215546Sopenharmony_ci   case nir_intrinsic_atomic_counter_comp_swap:
588bf215546Sopenharmony_ci   case nir_intrinsic_atomic_counter_add_deref:
589bf215546Sopenharmony_ci   case nir_intrinsic_atomic_counter_min_deref:
590bf215546Sopenharmony_ci   case nir_intrinsic_atomic_counter_max_deref:
591bf215546Sopenharmony_ci   case nir_intrinsic_atomic_counter_and_deref:
592bf215546Sopenharmony_ci   case nir_intrinsic_atomic_counter_or_deref:
593bf215546Sopenharmony_ci   case nir_intrinsic_atomic_counter_xor_deref:
594bf215546Sopenharmony_ci   case nir_intrinsic_atomic_counter_inc_deref:
595bf215546Sopenharmony_ci   case nir_intrinsic_atomic_counter_pre_dec_deref:
596bf215546Sopenharmony_ci   case nir_intrinsic_atomic_counter_post_dec_deref:
597bf215546Sopenharmony_ci   case nir_intrinsic_atomic_counter_exchange_deref:
598bf215546Sopenharmony_ci   case nir_intrinsic_atomic_counter_comp_swap_deref:
599bf215546Sopenharmony_ci   case nir_intrinsic_exclusive_scan:
600bf215546Sopenharmony_ci   case nir_intrinsic_ballot_bit_count_exclusive:
601bf215546Sopenharmony_ci   case nir_intrinsic_ballot_bit_count_inclusive:
602bf215546Sopenharmony_ci   case nir_intrinsic_write_invocation_amd:
603bf215546Sopenharmony_ci   case nir_intrinsic_mbcnt_amd:
604bf215546Sopenharmony_ci   case nir_intrinsic_lane_permute_16_amd:
605bf215546Sopenharmony_ci   case nir_intrinsic_elect:
606bf215546Sopenharmony_ci   case nir_intrinsic_load_tlb_color_v3d:
607bf215546Sopenharmony_ci   case nir_intrinsic_load_tess_rel_patch_id_amd:
608bf215546Sopenharmony_ci   case nir_intrinsic_load_gs_vertex_offset_amd:
609bf215546Sopenharmony_ci   case nir_intrinsic_has_input_vertex_amd:
610bf215546Sopenharmony_ci   case nir_intrinsic_has_input_primitive_amd:
611bf215546Sopenharmony_ci   case nir_intrinsic_load_packed_passthrough_primitive_amd:
612bf215546Sopenharmony_ci   case nir_intrinsic_load_initial_edgeflags_amd:
613bf215546Sopenharmony_ci   case nir_intrinsic_gds_atomic_add_amd:
614bf215546Sopenharmony_ci   case nir_intrinsic_load_rt_arg_scratch_offset_amd:
615bf215546Sopenharmony_ci   case nir_intrinsic_load_intersection_opaque_amd:
616bf215546Sopenharmony_ci   case nir_intrinsic_load_vector_arg_amd:
617bf215546Sopenharmony_ci   case nir_intrinsic_load_btd_stack_id_intel:
618bf215546Sopenharmony_ci   case nir_intrinsic_load_topology_id_intel:
619bf215546Sopenharmony_ci   case nir_intrinsic_load_scratch_base_ptr:
620bf215546Sopenharmony_ci      is_divergent = true;
621bf215546Sopenharmony_ci      break;
622bf215546Sopenharmony_ci
623bf215546Sopenharmony_ci   default:
624bf215546Sopenharmony_ci#ifdef NDEBUG
625bf215546Sopenharmony_ci      is_divergent = true;
626bf215546Sopenharmony_ci      break;
627bf215546Sopenharmony_ci#else
628bf215546Sopenharmony_ci      nir_print_instr(&instr->instr, stderr);
629bf215546Sopenharmony_ci      unreachable("\nNIR divergence analysis: Unhandled intrinsic.");
630bf215546Sopenharmony_ci#endif
631bf215546Sopenharmony_ci   }
632bf215546Sopenharmony_ci
633bf215546Sopenharmony_ci   instr->dest.ssa.divergent = is_divergent;
634bf215546Sopenharmony_ci   return is_divergent;
635bf215546Sopenharmony_ci}
636bf215546Sopenharmony_ci
637bf215546Sopenharmony_cistatic bool
638bf215546Sopenharmony_civisit_tex(nir_tex_instr *instr)
639bf215546Sopenharmony_ci{
640bf215546Sopenharmony_ci   if (instr->dest.ssa.divergent)
641bf215546Sopenharmony_ci      return false;
642bf215546Sopenharmony_ci
643bf215546Sopenharmony_ci   bool is_divergent = false;
644bf215546Sopenharmony_ci
645bf215546Sopenharmony_ci   for (unsigned i = 0; i < instr->num_srcs; i++) {
646bf215546Sopenharmony_ci      switch (instr->src[i].src_type) {
647bf215546Sopenharmony_ci      case nir_tex_src_sampler_deref:
648bf215546Sopenharmony_ci      case nir_tex_src_sampler_handle:
649bf215546Sopenharmony_ci      case nir_tex_src_sampler_offset:
650bf215546Sopenharmony_ci         is_divergent |= instr->src[i].src.ssa->divergent &&
651bf215546Sopenharmony_ci                         instr->sampler_non_uniform;
652bf215546Sopenharmony_ci         break;
653bf215546Sopenharmony_ci      case nir_tex_src_texture_deref:
654bf215546Sopenharmony_ci      case nir_tex_src_texture_handle:
655bf215546Sopenharmony_ci      case nir_tex_src_texture_offset:
656bf215546Sopenharmony_ci         is_divergent |= instr->src[i].src.ssa->divergent &&
657bf215546Sopenharmony_ci                         instr->texture_non_uniform;
658bf215546Sopenharmony_ci         break;
659bf215546Sopenharmony_ci      default:
660bf215546Sopenharmony_ci         is_divergent |= instr->src[i].src.ssa->divergent;
661bf215546Sopenharmony_ci         break;
662bf215546Sopenharmony_ci      }
663bf215546Sopenharmony_ci   }
664bf215546Sopenharmony_ci
665bf215546Sopenharmony_ci   instr->dest.ssa.divergent = is_divergent;
666bf215546Sopenharmony_ci   return is_divergent;
667bf215546Sopenharmony_ci}
668bf215546Sopenharmony_ci
669bf215546Sopenharmony_cistatic bool
670bf215546Sopenharmony_civisit_load_const(nir_load_const_instr *instr)
671bf215546Sopenharmony_ci{
672bf215546Sopenharmony_ci   return false;
673bf215546Sopenharmony_ci}
674bf215546Sopenharmony_ci
675bf215546Sopenharmony_cistatic bool
676bf215546Sopenharmony_civisit_ssa_undef(nir_ssa_undef_instr *instr)
677bf215546Sopenharmony_ci{
678bf215546Sopenharmony_ci   return false;
679bf215546Sopenharmony_ci}
680bf215546Sopenharmony_ci
681bf215546Sopenharmony_cistatic bool
682bf215546Sopenharmony_cinir_variable_mode_is_uniform(nir_variable_mode mode) {
683bf215546Sopenharmony_ci   switch (mode) {
684bf215546Sopenharmony_ci   case nir_var_uniform:
685bf215546Sopenharmony_ci   case nir_var_mem_ubo:
686bf215546Sopenharmony_ci   case nir_var_mem_ssbo:
687bf215546Sopenharmony_ci   case nir_var_mem_shared:
688bf215546Sopenharmony_ci   case nir_var_mem_task_payload:
689bf215546Sopenharmony_ci   case nir_var_mem_global:
690bf215546Sopenharmony_ci   case nir_var_image:
691bf215546Sopenharmony_ci      return true;
692bf215546Sopenharmony_ci   default:
693bf215546Sopenharmony_ci      return false;
694bf215546Sopenharmony_ci   }
695bf215546Sopenharmony_ci}
696bf215546Sopenharmony_ci
697bf215546Sopenharmony_cistatic bool
698bf215546Sopenharmony_cinir_variable_is_uniform(nir_shader *shader, nir_variable *var)
699bf215546Sopenharmony_ci{
700bf215546Sopenharmony_ci   if (nir_variable_mode_is_uniform(var->data.mode))
701bf215546Sopenharmony_ci      return true;
702bf215546Sopenharmony_ci
703bf215546Sopenharmony_ci   nir_divergence_options options = shader->options->divergence_analysis_options;
704bf215546Sopenharmony_ci   gl_shader_stage stage = shader->info.stage;
705bf215546Sopenharmony_ci
706bf215546Sopenharmony_ci   if (stage == MESA_SHADER_FRAGMENT &&
707bf215546Sopenharmony_ci       (options & nir_divergence_single_prim_per_subgroup) &&
708bf215546Sopenharmony_ci       var->data.mode == nir_var_shader_in &&
709bf215546Sopenharmony_ci       var->data.interpolation == INTERP_MODE_FLAT)
710bf215546Sopenharmony_ci      return true;
711bf215546Sopenharmony_ci
712bf215546Sopenharmony_ci   if (stage == MESA_SHADER_TESS_CTRL &&
713bf215546Sopenharmony_ci       (options & nir_divergence_single_patch_per_tcs_subgroup) &&
714bf215546Sopenharmony_ci       var->data.mode == nir_var_shader_out && var->data.patch)
715bf215546Sopenharmony_ci      return true;
716bf215546Sopenharmony_ci
717bf215546Sopenharmony_ci   if (stage == MESA_SHADER_TESS_EVAL &&
718bf215546Sopenharmony_ci       (options & nir_divergence_single_patch_per_tes_subgroup) &&
719bf215546Sopenharmony_ci       var->data.mode == nir_var_shader_in && var->data.patch)
720bf215546Sopenharmony_ci      return true;
721bf215546Sopenharmony_ci
722bf215546Sopenharmony_ci   return false;
723bf215546Sopenharmony_ci}
724bf215546Sopenharmony_ci
725bf215546Sopenharmony_cistatic bool
726bf215546Sopenharmony_civisit_deref(nir_shader *shader, nir_deref_instr *deref)
727bf215546Sopenharmony_ci{
728bf215546Sopenharmony_ci   if (deref->dest.ssa.divergent)
729bf215546Sopenharmony_ci      return false;
730bf215546Sopenharmony_ci
731bf215546Sopenharmony_ci   bool is_divergent = false;
732bf215546Sopenharmony_ci   switch (deref->deref_type) {
733bf215546Sopenharmony_ci   case nir_deref_type_var:
734bf215546Sopenharmony_ci      is_divergent = !nir_variable_is_uniform(shader, deref->var);
735bf215546Sopenharmony_ci      break;
736bf215546Sopenharmony_ci   case nir_deref_type_array:
737bf215546Sopenharmony_ci   case nir_deref_type_ptr_as_array:
738bf215546Sopenharmony_ci      is_divergent = deref->arr.index.ssa->divergent;
739bf215546Sopenharmony_ci      FALLTHROUGH;
740bf215546Sopenharmony_ci   case nir_deref_type_struct:
741bf215546Sopenharmony_ci   case nir_deref_type_array_wildcard:
742bf215546Sopenharmony_ci      is_divergent |= deref->parent.ssa->divergent;
743bf215546Sopenharmony_ci      break;
744bf215546Sopenharmony_ci   case nir_deref_type_cast:
745bf215546Sopenharmony_ci      is_divergent = !nir_variable_mode_is_uniform(deref->var->data.mode) ||
746bf215546Sopenharmony_ci                     deref->parent.ssa->divergent;
747bf215546Sopenharmony_ci      break;
748bf215546Sopenharmony_ci   }
749bf215546Sopenharmony_ci
750bf215546Sopenharmony_ci   deref->dest.ssa.divergent = is_divergent;
751bf215546Sopenharmony_ci   return is_divergent;
752bf215546Sopenharmony_ci}
753bf215546Sopenharmony_ci
754bf215546Sopenharmony_cistatic bool
755bf215546Sopenharmony_civisit_jump(nir_jump_instr *jump, struct divergence_state *state)
756bf215546Sopenharmony_ci{
757bf215546Sopenharmony_ci   switch (jump->type) {
758bf215546Sopenharmony_ci   case nir_jump_continue:
759bf215546Sopenharmony_ci      if (state->divergent_loop_continue)
760bf215546Sopenharmony_ci         return false;
761bf215546Sopenharmony_ci      if (state->divergent_loop_cf)
762bf215546Sopenharmony_ci         state->divergent_loop_continue = true;
763bf215546Sopenharmony_ci      return state->divergent_loop_continue;
764bf215546Sopenharmony_ci   case nir_jump_break:
765bf215546Sopenharmony_ci      if (state->divergent_loop_break)
766bf215546Sopenharmony_ci         return false;
767bf215546Sopenharmony_ci      if (state->divergent_loop_cf)
768bf215546Sopenharmony_ci         state->divergent_loop_break = true;
769bf215546Sopenharmony_ci      return state->divergent_loop_break;
770bf215546Sopenharmony_ci   case nir_jump_halt:
771bf215546Sopenharmony_ci      /* This totally kills invocations so it doesn't add divergence */
772bf215546Sopenharmony_ci      break;
773bf215546Sopenharmony_ci   case nir_jump_return:
774bf215546Sopenharmony_ci      unreachable("NIR divergence analysis: Unsupported return instruction.");
775bf215546Sopenharmony_ci      break;
776bf215546Sopenharmony_ci   case nir_jump_goto:
777bf215546Sopenharmony_ci   case nir_jump_goto_if:
778bf215546Sopenharmony_ci      unreachable("NIR divergence analysis: Unsupported goto_if instruction.");
779bf215546Sopenharmony_ci      break;
780bf215546Sopenharmony_ci   }
781bf215546Sopenharmony_ci   return false;
782bf215546Sopenharmony_ci}
783bf215546Sopenharmony_ci
784bf215546Sopenharmony_cistatic bool
785bf215546Sopenharmony_ciset_ssa_def_not_divergent(nir_ssa_def *def, UNUSED void *_state)
786bf215546Sopenharmony_ci{
787bf215546Sopenharmony_ci   def->divergent = false;
788bf215546Sopenharmony_ci   return true;
789bf215546Sopenharmony_ci}
790bf215546Sopenharmony_ci
791bf215546Sopenharmony_cistatic bool
792bf215546Sopenharmony_ciupdate_instr_divergence(nir_shader *shader, nir_instr *instr)
793bf215546Sopenharmony_ci{
794bf215546Sopenharmony_ci   switch (instr->type) {
795bf215546Sopenharmony_ci   case nir_instr_type_alu:
796bf215546Sopenharmony_ci      return visit_alu(nir_instr_as_alu(instr));
797bf215546Sopenharmony_ci   case nir_instr_type_intrinsic:
798bf215546Sopenharmony_ci      return visit_intrinsic(shader, nir_instr_as_intrinsic(instr));
799bf215546Sopenharmony_ci   case nir_instr_type_tex:
800bf215546Sopenharmony_ci      return visit_tex(nir_instr_as_tex(instr));
801bf215546Sopenharmony_ci   case nir_instr_type_load_const:
802bf215546Sopenharmony_ci      return visit_load_const(nir_instr_as_load_const(instr));
803bf215546Sopenharmony_ci   case nir_instr_type_ssa_undef:
804bf215546Sopenharmony_ci      return visit_ssa_undef(nir_instr_as_ssa_undef(instr));
805bf215546Sopenharmony_ci   case nir_instr_type_deref:
806bf215546Sopenharmony_ci      return visit_deref(shader, nir_instr_as_deref(instr));
807bf215546Sopenharmony_ci   case nir_instr_type_jump:
808bf215546Sopenharmony_ci   case nir_instr_type_phi:
809bf215546Sopenharmony_ci   case nir_instr_type_call:
810bf215546Sopenharmony_ci   case nir_instr_type_parallel_copy:
811bf215546Sopenharmony_ci   default:
812bf215546Sopenharmony_ci      unreachable("NIR divergence analysis: Unsupported instruction type.");
813bf215546Sopenharmony_ci   }
814bf215546Sopenharmony_ci}
815bf215546Sopenharmony_ci
816bf215546Sopenharmony_cistatic bool
817bf215546Sopenharmony_civisit_block(nir_block *block, struct divergence_state *state)
818bf215546Sopenharmony_ci{
819bf215546Sopenharmony_ci   bool has_changed = false;
820bf215546Sopenharmony_ci
821bf215546Sopenharmony_ci   nir_foreach_instr(instr, block) {
822bf215546Sopenharmony_ci      /* phis are handled when processing the branches */
823bf215546Sopenharmony_ci      if (instr->type == nir_instr_type_phi)
824bf215546Sopenharmony_ci         continue;
825bf215546Sopenharmony_ci
826bf215546Sopenharmony_ci      if (state->first_visit)
827bf215546Sopenharmony_ci         nir_foreach_ssa_def(instr, set_ssa_def_not_divergent, NULL);
828bf215546Sopenharmony_ci
829bf215546Sopenharmony_ci      if (instr->type == nir_instr_type_jump)
830bf215546Sopenharmony_ci         has_changed |= visit_jump(nir_instr_as_jump(instr), state);
831bf215546Sopenharmony_ci      else
832bf215546Sopenharmony_ci         has_changed |= update_instr_divergence(state->shader, instr);
833bf215546Sopenharmony_ci   }
834bf215546Sopenharmony_ci
835bf215546Sopenharmony_ci   return has_changed;
836bf215546Sopenharmony_ci}
837bf215546Sopenharmony_ci
838bf215546Sopenharmony_ci/* There are 3 types of phi instructions:
839bf215546Sopenharmony_ci * (1) gamma: represent the joining point of different paths
840bf215546Sopenharmony_ci *     created by an “if-then-else” branch.
841bf215546Sopenharmony_ci *     The resulting value is divergent if the branch condition
842bf215546Sopenharmony_ci *     or any of the source values is divergent. */
843bf215546Sopenharmony_cistatic bool
844bf215546Sopenharmony_civisit_if_merge_phi(nir_phi_instr *phi, bool if_cond_divergent)
845bf215546Sopenharmony_ci{
846bf215546Sopenharmony_ci   if (phi->dest.ssa.divergent)
847bf215546Sopenharmony_ci      return false;
848bf215546Sopenharmony_ci
849bf215546Sopenharmony_ci   unsigned defined_srcs = 0;
850bf215546Sopenharmony_ci   nir_foreach_phi_src(src, phi) {
851bf215546Sopenharmony_ci      /* if any source value is divergent, the resulting value is divergent */
852bf215546Sopenharmony_ci      if (src->src.ssa->divergent) {
853bf215546Sopenharmony_ci         phi->dest.ssa.divergent = true;
854bf215546Sopenharmony_ci         return true;
855bf215546Sopenharmony_ci      }
856bf215546Sopenharmony_ci      if (src->src.ssa->parent_instr->type != nir_instr_type_ssa_undef) {
857bf215546Sopenharmony_ci         defined_srcs++;
858bf215546Sopenharmony_ci      }
859bf215546Sopenharmony_ci   }
860bf215546Sopenharmony_ci
861bf215546Sopenharmony_ci   /* if the condition is divergent and two sources defined, the definition is divergent */
862bf215546Sopenharmony_ci   if (defined_srcs > 1 && if_cond_divergent) {
863bf215546Sopenharmony_ci      phi->dest.ssa.divergent = true;
864bf215546Sopenharmony_ci      return true;
865bf215546Sopenharmony_ci   }
866bf215546Sopenharmony_ci
867bf215546Sopenharmony_ci   return false;
868bf215546Sopenharmony_ci}
869bf215546Sopenharmony_ci
870bf215546Sopenharmony_ci/* There are 3 types of phi instructions:
871bf215546Sopenharmony_ci * (2) mu: which only exist at loop headers,
872bf215546Sopenharmony_ci *     merge initial and loop-carried values.
873bf215546Sopenharmony_ci *     The resulting value is divergent if any source value
874bf215546Sopenharmony_ci *     is divergent or a divergent loop continue condition
875bf215546Sopenharmony_ci *     is associated with a different ssa-def. */
876bf215546Sopenharmony_cistatic bool
877bf215546Sopenharmony_civisit_loop_header_phi(nir_phi_instr *phi, nir_block *preheader, bool divergent_continue)
878bf215546Sopenharmony_ci{
879bf215546Sopenharmony_ci   if (phi->dest.ssa.divergent)
880bf215546Sopenharmony_ci      return false;
881bf215546Sopenharmony_ci
882bf215546Sopenharmony_ci   nir_ssa_def* same = NULL;
883bf215546Sopenharmony_ci   nir_foreach_phi_src(src, phi) {
884bf215546Sopenharmony_ci      /* if any source value is divergent, the resulting value is divergent */
885bf215546Sopenharmony_ci      if (src->src.ssa->divergent) {
886bf215546Sopenharmony_ci         phi->dest.ssa.divergent = true;
887bf215546Sopenharmony_ci         return true;
888bf215546Sopenharmony_ci      }
889bf215546Sopenharmony_ci      /* if this loop is uniform, we're done here */
890bf215546Sopenharmony_ci      if (!divergent_continue)
891bf215546Sopenharmony_ci         continue;
892bf215546Sopenharmony_ci      /* skip the loop preheader */
893bf215546Sopenharmony_ci      if (src->pred == preheader)
894bf215546Sopenharmony_ci         continue;
895bf215546Sopenharmony_ci      /* skip undef values */
896bf215546Sopenharmony_ci      if (nir_src_is_undef(src->src))
897bf215546Sopenharmony_ci         continue;
898bf215546Sopenharmony_ci
899bf215546Sopenharmony_ci      /* check if all loop-carried values are from the same ssa-def */
900bf215546Sopenharmony_ci      if (!same)
901bf215546Sopenharmony_ci         same = src->src.ssa;
902bf215546Sopenharmony_ci      else if (same != src->src.ssa) {
903bf215546Sopenharmony_ci         phi->dest.ssa.divergent = true;
904bf215546Sopenharmony_ci         return true;
905bf215546Sopenharmony_ci      }
906bf215546Sopenharmony_ci   }
907bf215546Sopenharmony_ci
908bf215546Sopenharmony_ci   return false;
909bf215546Sopenharmony_ci}
910bf215546Sopenharmony_ci
911bf215546Sopenharmony_ci/* There are 3 types of phi instructions:
912bf215546Sopenharmony_ci * (3) eta: represent values that leave a loop.
913bf215546Sopenharmony_ci *     The resulting value is divergent if the source value is divergent
914bf215546Sopenharmony_ci *     or any loop exit condition is divergent for a value which is
915bf215546Sopenharmony_ci *     not loop-invariant.
916bf215546Sopenharmony_ci *     (note: there should be no phi for loop-invariant variables.) */
917bf215546Sopenharmony_cistatic bool
918bf215546Sopenharmony_civisit_loop_exit_phi(nir_phi_instr *phi, bool divergent_break)
919bf215546Sopenharmony_ci{
920bf215546Sopenharmony_ci   if (phi->dest.ssa.divergent)
921bf215546Sopenharmony_ci      return false;
922bf215546Sopenharmony_ci
923bf215546Sopenharmony_ci   if (divergent_break) {
924bf215546Sopenharmony_ci      phi->dest.ssa.divergent = true;
925bf215546Sopenharmony_ci      return true;
926bf215546Sopenharmony_ci   }
927bf215546Sopenharmony_ci
928bf215546Sopenharmony_ci   /* if any source value is divergent, the resulting value is divergent */
929bf215546Sopenharmony_ci   nir_foreach_phi_src(src, phi) {
930bf215546Sopenharmony_ci      if (src->src.ssa->divergent) {
931bf215546Sopenharmony_ci         phi->dest.ssa.divergent = true;
932bf215546Sopenharmony_ci         return true;
933bf215546Sopenharmony_ci      }
934bf215546Sopenharmony_ci   }
935bf215546Sopenharmony_ci
936bf215546Sopenharmony_ci   return false;
937bf215546Sopenharmony_ci}
938bf215546Sopenharmony_ci
939bf215546Sopenharmony_cistatic bool
940bf215546Sopenharmony_civisit_if(nir_if *if_stmt, struct divergence_state *state)
941bf215546Sopenharmony_ci{
942bf215546Sopenharmony_ci   bool progress = false;
943bf215546Sopenharmony_ci
944bf215546Sopenharmony_ci   struct divergence_state then_state = *state;
945bf215546Sopenharmony_ci   then_state.divergent_loop_cf |= if_stmt->condition.ssa->divergent;
946bf215546Sopenharmony_ci   progress |= visit_cf_list(&if_stmt->then_list, &then_state);
947bf215546Sopenharmony_ci
948bf215546Sopenharmony_ci   struct divergence_state else_state = *state;
949bf215546Sopenharmony_ci   else_state.divergent_loop_cf |= if_stmt->condition.ssa->divergent;
950bf215546Sopenharmony_ci   progress |= visit_cf_list(&if_stmt->else_list, &else_state);
951bf215546Sopenharmony_ci
952bf215546Sopenharmony_ci   /* handle phis after the IF */
953bf215546Sopenharmony_ci   nir_foreach_instr(instr, nir_cf_node_cf_tree_next(&if_stmt->cf_node)) {
954bf215546Sopenharmony_ci      if (instr->type != nir_instr_type_phi)
955bf215546Sopenharmony_ci         break;
956bf215546Sopenharmony_ci
957bf215546Sopenharmony_ci      if (state->first_visit)
958bf215546Sopenharmony_ci         nir_instr_as_phi(instr)->dest.ssa.divergent = false;
959bf215546Sopenharmony_ci      progress |= visit_if_merge_phi(nir_instr_as_phi(instr),
960bf215546Sopenharmony_ci                                     if_stmt->condition.ssa->divergent);
961bf215546Sopenharmony_ci   }
962bf215546Sopenharmony_ci
963bf215546Sopenharmony_ci   /* join loop divergence information from both branch legs */
964bf215546Sopenharmony_ci   state->divergent_loop_continue |= then_state.divergent_loop_continue ||
965bf215546Sopenharmony_ci                                     else_state.divergent_loop_continue;
966bf215546Sopenharmony_ci   state->divergent_loop_break |= then_state.divergent_loop_break ||
967bf215546Sopenharmony_ci                                  else_state.divergent_loop_break;
968bf215546Sopenharmony_ci
969bf215546Sopenharmony_ci   /* A divergent continue makes succeeding loop CF divergent:
970bf215546Sopenharmony_ci    * not all loop-active invocations participate in the remaining loop-body
971bf215546Sopenharmony_ci    * which means that a following break might be taken by some invocations, only */
972bf215546Sopenharmony_ci   state->divergent_loop_cf |= state->divergent_loop_continue;
973bf215546Sopenharmony_ci
974bf215546Sopenharmony_ci   return progress;
975bf215546Sopenharmony_ci}
976bf215546Sopenharmony_ci
977bf215546Sopenharmony_cistatic bool
978bf215546Sopenharmony_civisit_loop(nir_loop *loop, struct divergence_state *state)
979bf215546Sopenharmony_ci{
980bf215546Sopenharmony_ci   bool progress = false;
981bf215546Sopenharmony_ci   nir_block *loop_header = nir_loop_first_block(loop);
982bf215546Sopenharmony_ci   nir_block *loop_preheader = nir_block_cf_tree_prev(loop_header);
983bf215546Sopenharmony_ci
984bf215546Sopenharmony_ci   /* handle loop header phis first: we have no knowledge yet about
985bf215546Sopenharmony_ci    * the loop's control flow or any loop-carried sources. */
986bf215546Sopenharmony_ci   nir_foreach_instr(instr, loop_header) {
987bf215546Sopenharmony_ci      if (instr->type != nir_instr_type_phi)
988bf215546Sopenharmony_ci         break;
989bf215546Sopenharmony_ci
990bf215546Sopenharmony_ci      nir_phi_instr *phi = nir_instr_as_phi(instr);
991bf215546Sopenharmony_ci      if (!state->first_visit && phi->dest.ssa.divergent)
992bf215546Sopenharmony_ci         continue;
993bf215546Sopenharmony_ci
994bf215546Sopenharmony_ci      nir_foreach_phi_src(src, phi) {
995bf215546Sopenharmony_ci         if (src->pred == loop_preheader) {
996bf215546Sopenharmony_ci            phi->dest.ssa.divergent = src->src.ssa->divergent;
997bf215546Sopenharmony_ci            break;
998bf215546Sopenharmony_ci         }
999bf215546Sopenharmony_ci      }
1000bf215546Sopenharmony_ci      progress |= phi->dest.ssa.divergent;
1001bf215546Sopenharmony_ci   }
1002bf215546Sopenharmony_ci
1003bf215546Sopenharmony_ci   /* setup loop state */
1004bf215546Sopenharmony_ci   struct divergence_state loop_state = *state;
1005bf215546Sopenharmony_ci   loop_state.divergent_loop_cf = false;
1006bf215546Sopenharmony_ci   loop_state.divergent_loop_continue = false;
1007bf215546Sopenharmony_ci   loop_state.divergent_loop_break = false;
1008bf215546Sopenharmony_ci
1009bf215546Sopenharmony_ci   /* process loop body until no further changes are made */
1010bf215546Sopenharmony_ci   bool repeat;
1011bf215546Sopenharmony_ci   do {
1012bf215546Sopenharmony_ci      progress |= visit_cf_list(&loop->body, &loop_state);
1013bf215546Sopenharmony_ci      repeat = false;
1014bf215546Sopenharmony_ci
1015bf215546Sopenharmony_ci      /* revisit loop header phis to see if something has changed */
1016bf215546Sopenharmony_ci      nir_foreach_instr(instr, loop_header) {
1017bf215546Sopenharmony_ci         if (instr->type != nir_instr_type_phi)
1018bf215546Sopenharmony_ci            break;
1019bf215546Sopenharmony_ci
1020bf215546Sopenharmony_ci         repeat |= visit_loop_header_phi(nir_instr_as_phi(instr),
1021bf215546Sopenharmony_ci                                         loop_preheader,
1022bf215546Sopenharmony_ci                                         loop_state.divergent_loop_continue);
1023bf215546Sopenharmony_ci      }
1024bf215546Sopenharmony_ci
1025bf215546Sopenharmony_ci      loop_state.divergent_loop_cf = false;
1026bf215546Sopenharmony_ci      loop_state.first_visit = false;
1027bf215546Sopenharmony_ci   } while (repeat);
1028bf215546Sopenharmony_ci
1029bf215546Sopenharmony_ci   /* handle phis after the loop */
1030bf215546Sopenharmony_ci   nir_foreach_instr(instr, nir_cf_node_cf_tree_next(&loop->cf_node)) {
1031bf215546Sopenharmony_ci      if (instr->type != nir_instr_type_phi)
1032bf215546Sopenharmony_ci         break;
1033bf215546Sopenharmony_ci
1034bf215546Sopenharmony_ci      if (state->first_visit)
1035bf215546Sopenharmony_ci         nir_instr_as_phi(instr)->dest.ssa.divergent = false;
1036bf215546Sopenharmony_ci      progress |= visit_loop_exit_phi(nir_instr_as_phi(instr),
1037bf215546Sopenharmony_ci                                      loop_state.divergent_loop_break);
1038bf215546Sopenharmony_ci   }
1039bf215546Sopenharmony_ci
1040bf215546Sopenharmony_ci   loop->divergent = (loop_state.divergent_loop_break || loop_state.divergent_loop_continue);
1041bf215546Sopenharmony_ci
1042bf215546Sopenharmony_ci   return progress;
1043bf215546Sopenharmony_ci}
1044bf215546Sopenharmony_ci
1045bf215546Sopenharmony_cistatic bool
1046bf215546Sopenharmony_civisit_cf_list(struct exec_list *list, struct divergence_state *state)
1047bf215546Sopenharmony_ci{
1048bf215546Sopenharmony_ci   bool has_changed = false;
1049bf215546Sopenharmony_ci
1050bf215546Sopenharmony_ci   foreach_list_typed(nir_cf_node, node, node, list) {
1051bf215546Sopenharmony_ci      switch (node->type) {
1052bf215546Sopenharmony_ci      case nir_cf_node_block:
1053bf215546Sopenharmony_ci         has_changed |= visit_block(nir_cf_node_as_block(node), state);
1054bf215546Sopenharmony_ci         break;
1055bf215546Sopenharmony_ci      case nir_cf_node_if:
1056bf215546Sopenharmony_ci         has_changed |= visit_if(nir_cf_node_as_if(node), state);
1057bf215546Sopenharmony_ci         break;
1058bf215546Sopenharmony_ci      case nir_cf_node_loop:
1059bf215546Sopenharmony_ci         has_changed |= visit_loop(nir_cf_node_as_loop(node), state);
1060bf215546Sopenharmony_ci         break;
1061bf215546Sopenharmony_ci      case nir_cf_node_function:
1062bf215546Sopenharmony_ci         unreachable("NIR divergence analysis: Unsupported cf_node type.");
1063bf215546Sopenharmony_ci      }
1064bf215546Sopenharmony_ci   }
1065bf215546Sopenharmony_ci
1066bf215546Sopenharmony_ci   return has_changed;
1067bf215546Sopenharmony_ci}
1068bf215546Sopenharmony_ci
1069bf215546Sopenharmony_civoid
1070bf215546Sopenharmony_cinir_divergence_analysis(nir_shader *shader)
1071bf215546Sopenharmony_ci{
1072bf215546Sopenharmony_ci   shader->info.divergence_analysis_run = true;
1073bf215546Sopenharmony_ci
1074bf215546Sopenharmony_ci   struct divergence_state state = {
1075bf215546Sopenharmony_ci      .stage = shader->info.stage,
1076bf215546Sopenharmony_ci      .shader = shader,
1077bf215546Sopenharmony_ci      .divergent_loop_cf = false,
1078bf215546Sopenharmony_ci      .divergent_loop_continue = false,
1079bf215546Sopenharmony_ci      .divergent_loop_break = false,
1080bf215546Sopenharmony_ci      .first_visit = true,
1081bf215546Sopenharmony_ci   };
1082bf215546Sopenharmony_ci
1083bf215546Sopenharmony_ci   visit_cf_list(&nir_shader_get_entrypoint(shader)->body, &state);
1084bf215546Sopenharmony_ci}
1085bf215546Sopenharmony_ci
1086bf215546Sopenharmony_cibool nir_update_instr_divergence(nir_shader *shader, nir_instr *instr)
1087bf215546Sopenharmony_ci{
1088bf215546Sopenharmony_ci   nir_foreach_ssa_def(instr, set_ssa_def_not_divergent, NULL);
1089bf215546Sopenharmony_ci
1090bf215546Sopenharmony_ci   if (instr->type == nir_instr_type_phi) {
1091bf215546Sopenharmony_ci      nir_cf_node *prev = nir_cf_node_prev(&instr->block->cf_node);
1092bf215546Sopenharmony_ci      /* can only update gamma/if phis */
1093bf215546Sopenharmony_ci      if (!prev || prev->type != nir_cf_node_if)
1094bf215546Sopenharmony_ci         return false;
1095bf215546Sopenharmony_ci
1096bf215546Sopenharmony_ci      nir_if *nif = nir_cf_node_as_if(prev);
1097bf215546Sopenharmony_ci
1098bf215546Sopenharmony_ci      visit_if_merge_phi(nir_instr_as_phi(instr), nir_src_is_divergent(nif->condition));
1099bf215546Sopenharmony_ci      return true;
1100bf215546Sopenharmony_ci   }
1101bf215546Sopenharmony_ci
1102bf215546Sopenharmony_ci   update_instr_divergence(shader, instr);
1103bf215546Sopenharmony_ci   return true;
1104bf215546Sopenharmony_ci}
1105bf215546Sopenharmony_ci
1106bf215546Sopenharmony_ci
1107bf215546Sopenharmony_cibool
1108bf215546Sopenharmony_cinir_has_divergent_loop(nir_shader *shader)
1109bf215546Sopenharmony_ci{
1110bf215546Sopenharmony_ci   bool divergent_loop = false;
1111bf215546Sopenharmony_ci   nir_function_impl *func = nir_shader_get_entrypoint(shader);
1112bf215546Sopenharmony_ci
1113bf215546Sopenharmony_ci   foreach_list_typed(nir_cf_node, node, node, &func->body) {
1114bf215546Sopenharmony_ci      if (node->type == nir_cf_node_loop && nir_cf_node_as_loop(node)->divergent) {
1115bf215546Sopenharmony_ci         divergent_loop = true;
1116bf215546Sopenharmony_ci         break;
1117bf215546Sopenharmony_ci      }
1118bf215546Sopenharmony_ci   }
1119bf215546Sopenharmony_ci
1120bf215546Sopenharmony_ci   return divergent_loop;
1121bf215546Sopenharmony_ci}
1122