1/* 2 * Copyright © Microsoft Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "nir.h" 25#include "nir_builder.h" 26#include "nir_control_flow.h" 27 28#include "dxil_nir.h" 29 30static void 31remove_hs_intrinsics(nir_function_impl *impl) 32{ 33 nir_foreach_block(block, impl) { 34 nir_foreach_instr_safe(instr, block) { 35 if (instr->type != nir_instr_type_intrinsic) 36 continue; 37 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 38 if (intr->intrinsic != nir_intrinsic_store_output && 39 intr->intrinsic != nir_intrinsic_memory_barrier_tcs_patch && 40 intr->intrinsic != nir_intrinsic_control_barrier) 41 continue; 42 nir_instr_remove(instr); 43 } 44 } 45 nir_metadata_preserve(impl, nir_metadata_block_index | nir_metadata_dominance); 46} 47 48static void 49add_instr_and_srcs_to_set(struct set *instr_set, nir_instr *instr); 50 51static bool 52add_srcs_to_set(nir_src *src, void *state) 53{ 54 assert(src->is_ssa); 55 add_instr_and_srcs_to_set(state, src->ssa->parent_instr); 56 return true; 57} 58 59static void 60add_instr_and_srcs_to_set(struct set *instr_set, nir_instr *instr) 61{ 62 bool was_already_found = false; 63 _mesa_set_search_or_add(instr_set, instr, &was_already_found); 64 if (!was_already_found) 65 nir_foreach_src(instr, add_srcs_to_set, instr_set); 66} 67 68static void 69prune_patch_function_to_intrinsic_and_srcs(nir_function_impl *impl) 70{ 71 struct set *instr_set = _mesa_pointer_set_create(NULL); 72 73 /* Do this in two phases: 74 * 1. Find all instructions that contribute to a store_output and add them to 75 * the set. Also, add instructions that contribute to control flow. 76 * 2. Erase every instruction that isn't in the set 77 */ 78 nir_foreach_block(block, impl) { 79 nir_if *following_if = nir_block_get_following_if(block); 80 if (following_if) { 81 assert(following_if->condition.is_ssa); 82 add_instr_and_srcs_to_set(instr_set, following_if->condition.ssa->parent_instr); 83 } 84 nir_foreach_instr_safe(instr, block) { 85 if (instr->type == nir_instr_type_intrinsic) { 86 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 87 if (intr->intrinsic != nir_intrinsic_store_output && 88 intr->intrinsic != nir_intrinsic_memory_barrier_tcs_patch) 89 continue; 90 } else if (instr->type != nir_instr_type_jump) 91 continue; 92 add_instr_and_srcs_to_set(instr_set, instr); 93 } 94 } 95 96 nir_foreach_block_reverse(block, impl) { 97 nir_foreach_instr_reverse_safe(instr, block) { 98 struct set_entry *entry = _mesa_set_search(instr_set, instr); 99 if (!entry) 100 nir_instr_remove(instr); 101 } 102 } 103 104 _mesa_set_destroy(instr_set, NULL); 105} 106 107static nir_cursor 108get_cursor_for_instr_without_cf(nir_instr *instr) 109{ 110 nir_block *block = instr->block; 111 if (block->cf_node.parent->type == nir_cf_node_function) 112 return nir_before_instr(instr); 113 114 do { 115 block = nir_cf_node_as_block(nir_cf_node_prev(block->cf_node.parent)); 116 } while (block->cf_node.parent->type != nir_cf_node_function); 117 return nir_after_block_before_jump(block); 118} 119 120struct tcs_patch_loop_state { 121 nir_ssa_def *deref, *count; 122 nir_cursor begin_cursor, end_cursor, insert_cursor; 123 nir_loop *loop; 124}; 125 126static void 127start_tcs_loop(nir_builder *b, struct tcs_patch_loop_state *state, nir_deref_instr *loop_var_deref) 128{ 129 if (!loop_var_deref) 130 return; 131 132 nir_store_deref(b, loop_var_deref, nir_imm_int(b, 0), 1); 133 state->loop = nir_push_loop(b); 134 state->count = nir_load_deref(b, loop_var_deref); 135 nir_push_if(b, nir_ige(b, state->count, nir_imm_int(b, b->impl->function->shader->info.tess.tcs_vertices_out))); 136 nir_jump(b, nir_jump_break); 137 nir_pop_if(b, NULL); 138 state->insert_cursor = b->cursor; 139 nir_store_deref(b, loop_var_deref, nir_iadd_imm(b, state->count, 1), 1); 140 nir_pop_loop(b, state->loop); 141} 142 143static void 144end_tcs_loop(nir_builder *b, struct tcs_patch_loop_state *state) 145{ 146 if (!state->loop) 147 return; 148 149 nir_cf_list extracted; 150 nir_cf_extract(&extracted, state->begin_cursor, state->end_cursor); 151 nir_cf_reinsert(&extracted, state->insert_cursor); 152 153 *state = (struct tcs_patch_loop_state ){ 0 }; 154} 155 156/* In HLSL/DXIL, the hull (tesselation control) shader is split into two: 157 * 1. The main hull shader, which runs once per output control point. 158 * 2. A patch constant function, which runs once overall. 159 * In GLSL/NIR, these are combined. Each invocation must write to the output 160 * array with a constant gl_InvocationID, which is (apparently) lowered to an 161 * if/else ladder in nir. Each invocation must write the same value to patch 162 * constants - or else undefined behavior strikes. NIR uses store_output to 163 * write the patch constants, and store_per_vertex_output to write the control 164 * point values. 165 * 166 * We clone the NIR function to produce 2: one with the store_output intrinsics 167 * removed, which becomes the main shader (only writes control points), and one 168 * with everything that doesn't contribute to store_output removed, which becomes 169 * the patch constant function. 170 * 171 * For the patch constant function, if the expressions rely on gl_InvocationID, 172 * then we need to run the resulting logic in a loop, using the loop counter to 173 * replace gl_InvocationID. This loop can be terminated when a barrier is hit. If 174 * gl_InvocationID is used again after the barrier, then another loop needs to begin. 175 */ 176void 177dxil_nir_split_tess_ctrl(nir_shader *nir, nir_function **patch_const_func) 178{ 179 assert(nir->info.stage == MESA_SHADER_TESS_CTRL); 180 assert(exec_list_length(&nir->functions) == 1); 181 nir_function_impl *entrypoint = nir_shader_get_entrypoint(nir); 182 183 *patch_const_func = nir_function_create(nir, "PatchConstantFunc"); 184 nir_function_impl *patch_const_func_impl = nir_function_impl_clone(nir, entrypoint); 185 (*patch_const_func)->impl = patch_const_func_impl; 186 patch_const_func_impl->function = *patch_const_func; 187 188 remove_hs_intrinsics(entrypoint); 189 prune_patch_function_to_intrinsic_and_srcs(patch_const_func_impl); 190 191 /* Kill dead references to the invocation ID from the patch const func so we don't 192 * insert unnecessarily loops 193 */ 194 bool progress; 195 do { 196 progress = false; 197 progress |= nir_opt_dead_cf(nir); 198 progress |= nir_opt_dce(nir); 199 } while (progress); 200 201 /* Now, the patch constant function needs to be split into blocks and loops. 202 * The series of instructions up to the first block containing a load_invocation_id 203 * will run sequentially. Then a loop is inserted so load_invocation_id will load the 204 * loop counter. This loop continues until a barrier is reached, when the loop 205 * is closed and the process begins again. 206 * 207 * First, sink load_invocation_id so that it's present on both sides of barriers. 208 * Each use gets a unique load of the invocation ID. 209 */ 210 nir_builder b; 211 nir_builder_init(&b, patch_const_func_impl); 212 nir_foreach_block(block, patch_const_func_impl) { 213 nir_foreach_instr_safe(instr, block) { 214 if (instr->type != nir_instr_type_intrinsic) 215 continue; 216 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 217 if (intr->intrinsic != nir_intrinsic_load_invocation_id || 218 list_length(&intr->dest.ssa.uses) + 219 list_length(&intr->dest.ssa.if_uses) <= 1) 220 continue; 221 nir_foreach_use_safe(src, &intr->dest.ssa) { 222 b.cursor = nir_before_src(src, false); 223 nir_instr_rewrite_src_ssa(src->parent_instr, src, nir_load_invocation_id(&b)); 224 } 225 nir_foreach_if_use_safe(src, &intr->dest.ssa) { 226 b.cursor = nir_before_src(src, true); 227 nir_if_rewrite_condition_ssa(src->parent_if, src, nir_load_invocation_id(&b)); 228 } 229 nir_instr_remove(instr); 230 } 231 } 232 233 /* Now replace those invocation ID loads with loads of a local variable that's used as a loop counter */ 234 nir_variable *loop_var = NULL; 235 nir_deref_instr *loop_var_deref = NULL; 236 struct tcs_patch_loop_state state = { 0 }; 237 nir_foreach_block_safe(block, patch_const_func_impl) { 238 nir_foreach_instr_safe(instr, block) { 239 if (instr->type != nir_instr_type_intrinsic) 240 continue; 241 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 242 switch (intr->intrinsic) { 243 case nir_intrinsic_load_invocation_id: { 244 if (!loop_var) { 245 loop_var = nir_local_variable_create(patch_const_func_impl, glsl_int_type(), "PatchConstInvocId"); 246 b.cursor = nir_before_cf_list(&patch_const_func_impl->body); 247 loop_var_deref = nir_build_deref_var(&b, loop_var); 248 } 249 if (!state.loop) { 250 b.cursor = state.begin_cursor = get_cursor_for_instr_without_cf(instr); 251 start_tcs_loop(&b, &state, loop_var_deref); 252 } 253 nir_ssa_def_rewrite_uses(&intr->dest.ssa, state.count); 254 break; 255 } 256 case nir_intrinsic_memory_barrier_tcs_patch: 257 /* The GL tessellation spec says: 258 * The barrier() function may only be called inside the main entry point of the tessellation control shader 259 * and may not be called in potentially divergent flow control. In particular, barrier() may not be called 260 * inside a switch statement, in either sub-statement of an if statement, inside a do, for, or while loop, 261 * or at any point after a return statement in the function main(). 262 * 263 * Therefore, we should be at function-level control flow. 264 */ 265 assert(nir_cursors_equal(nir_before_instr(instr), get_cursor_for_instr_without_cf(instr))); 266 state.end_cursor = nir_before_instr(instr); 267 end_tcs_loop(&b, &state); 268 nir_instr_remove(instr); 269 break; 270 default: 271 break; 272 } 273 } 274 } 275 state.end_cursor = nir_after_block_before_jump(nir_impl_last_block(patch_const_func_impl)); 276 end_tcs_loop(&b, &state); 277} 278 279struct remove_tess_level_accesses_data { 280 unsigned location; 281 unsigned size; 282}; 283 284static bool 285remove_tess_level_accesses(nir_builder *b, nir_instr *instr, void *_data) 286{ 287 struct remove_tess_level_accesses_data *data = _data; 288 if (instr->type != nir_instr_type_intrinsic) 289 return false; 290 291 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 292 if (intr->intrinsic != nir_intrinsic_store_output && 293 intr->intrinsic != nir_intrinsic_load_input) 294 return false; 295 296 nir_io_semantics io = nir_intrinsic_io_semantics(intr); 297 if (io.location != data->location) 298 return false; 299 300 if (nir_intrinsic_component(intr) < data->size) 301 return false; 302 303 if (intr->intrinsic == nir_intrinsic_store_output) { 304 assert(intr->src[0].is_ssa && intr->src[0].ssa->num_components == 1); 305 nir_instr_remove(instr); 306 } else { 307 b->cursor = nir_after_instr(instr); 308 assert(intr->dest.is_ssa && intr->dest.ssa.num_components == 1); 309 nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_ssa_undef(b, 1, intr->dest.ssa.bit_size)); 310 } 311 return true; 312} 313 314/* Update the types of the tess level variables and remove writes to removed components. 315 * GL always has a 4-component outer tess level and 2-component inner, while D3D requires 316 * the number of components to vary based on the primitive mode. 317 * The 4 and 2 is for quads, while triangles are 3 and 1, and lines are 2 and 0. 318 */ 319bool 320dxil_nir_fixup_tess_level_for_domain(nir_shader *nir) 321{ 322 bool progress = false; 323 if (nir->info.tess._primitive_mode != TESS_PRIMITIVE_QUADS) { 324 nir_foreach_variable_with_modes_safe(var, nir, nir_var_shader_out | nir_var_shader_in) { 325 unsigned new_array_size = 4; 326 unsigned old_array_size = glsl_array_size(var->type); 327 if (var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER) { 328 new_array_size = nir->info.tess._primitive_mode == TESS_PRIMITIVE_TRIANGLES ? 3 : 2; 329 assert(var->data.compact && (old_array_size == 4 || old_array_size == new_array_size)); 330 } else if (var->data.location == VARYING_SLOT_TESS_LEVEL_INNER) { 331 new_array_size = nir->info.tess._primitive_mode == TESS_PRIMITIVE_TRIANGLES ? 1 : 0; 332 assert(var->data.compact && (old_array_size == 2 || old_array_size == new_array_size)); 333 } else 334 continue; 335 336 if (new_array_size == old_array_size) 337 continue; 338 339 progress = true; 340 if (new_array_size) 341 var->type = glsl_array_type(glsl_float_type(), new_array_size, 0); 342 else { 343 exec_node_remove(&var->node); 344 ralloc_free(var); 345 } 346 347 struct remove_tess_level_accesses_data pass_data = { 348 .location = var->data.location, 349 .size = new_array_size 350 }; 351 352 nir_shader_instructions_pass(nir, remove_tess_level_accesses, 353 nir_metadata_block_index | nir_metadata_dominance, &pass_data); 354 } 355 } 356 return progress; 357} 358 359static bool 360tcs_update_deref_input_types(nir_builder *b, nir_instr *instr, void *data) 361{ 362 if (instr->type != nir_instr_type_deref) 363 return false; 364 365 nir_deref_instr *deref = nir_instr_as_deref(instr); 366 if (deref->deref_type != nir_deref_type_var) 367 return false; 368 369 nir_variable *var = deref->var; 370 deref->type = var->type; 371 return true; 372} 373 374bool 375dxil_nir_set_tcs_patches_in(nir_shader *nir, unsigned num_control_points) 376{ 377 bool progress = false; 378 nir_foreach_variable_with_modes(var, nir, nir_var_shader_in) { 379 if (nir_is_arrayed_io(var, MESA_SHADER_TESS_CTRL)) { 380 var->type = glsl_array_type(glsl_get_array_element(var->type), num_control_points, 0); 381 progress = true; 382 } 383 } 384 385 if (progress) 386 nir_shader_instructions_pass(nir, tcs_update_deref_input_types, nir_metadata_all, NULL); 387 388 return progress; 389} 390