1/* 2 * Copyright © Microsoft Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "dzn_nir.h" 25 26#include "spirv_to_dxil.h" 27#include "nir_to_dxil.h" 28#include "nir_builder.h" 29#include "nir_vulkan.h" 30 31static nir_ssa_def * 32dzn_nir_create_bo_desc(nir_builder *b, 33 nir_variable_mode mode, 34 uint32_t desc_set, 35 uint32_t binding, 36 const char *name, 37 unsigned access) 38{ 39 struct glsl_struct_field field = { 40 .type = mode == nir_var_mem_ubo ? 41 glsl_array_type(glsl_uint_type(), 4096, 4) : 42 glsl_uint_type(), 43 .name = "dummy_int", 44 }; 45 const struct glsl_type *dummy_type = 46 glsl_struct_type(&field, 1, "dummy_type", false); 47 48 nir_variable *var = 49 nir_variable_create(b->shader, mode, dummy_type, name); 50 var->data.descriptor_set = desc_set; 51 var->data.binding = binding; 52 var->data.access = access; 53 54 assert(mode == nir_var_mem_ubo || mode == nir_var_mem_ssbo); 55 if (mode == nir_var_mem_ubo) 56 b->shader->info.num_ubos++; 57 else 58 b->shader->info.num_ssbos++; 59 60 VkDescriptorType desc_type = 61 var->data.mode == nir_var_mem_ubo ? 62 VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER : 63 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; 64 nir_address_format addr_format = nir_address_format_32bit_index_offset; 65 nir_ssa_def *index = 66 nir_vulkan_resource_index(b, 67 nir_address_format_num_components(addr_format), 68 nir_address_format_bit_size(addr_format), 69 nir_imm_int(b, 0), 70 .desc_set = desc_set, 71 .binding = binding, 72 .desc_type = desc_type); 73 74 nir_ssa_def *desc = 75 nir_load_vulkan_descriptor(b, 76 nir_address_format_num_components(addr_format), 77 nir_address_format_bit_size(addr_format), 78 index, 79 .desc_type = desc_type); 80 81 return nir_channel(b, desc, 0); 82} 83 84nir_shader * 85dzn_nir_indirect_draw_shader(enum dzn_indirect_draw_type type) 86{ 87 const char *type_str[] = { 88 "draw", 89 "draw_count", 90 "indexed_draw", 91 "indexed_draw_count", 92 "draw_triangle_fan", 93 "draw_count_triangle_fan", 94 "indexed_draw_triangle_fan", 95 "indexed_draw_count_triangle_fan", 96 "indexed_draw_triangle_fan_prim_restart", 97 "indexed_draw_count_triangle_fan_prim_restart", 98 }; 99 100 assert(type < ARRAY_SIZE(type_str)); 101 102 bool indexed = type == DZN_INDIRECT_INDEXED_DRAW || 103 type == DZN_INDIRECT_INDEXED_DRAW_COUNT || 104 type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN || 105 type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN || 106 type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN_PRIM_RESTART || 107 type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN_PRIM_RESTART; 108 bool triangle_fan = type == DZN_INDIRECT_DRAW_TRIANGLE_FAN || 109 type == DZN_INDIRECT_DRAW_COUNT_TRIANGLE_FAN || 110 type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN || 111 type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN || 112 type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN_PRIM_RESTART || 113 type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN_PRIM_RESTART; 114 bool indirect_count = type == DZN_INDIRECT_DRAW_COUNT || 115 type == DZN_INDIRECT_INDEXED_DRAW_COUNT || 116 type == DZN_INDIRECT_DRAW_COUNT_TRIANGLE_FAN || 117 type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN || 118 type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN_PRIM_RESTART; 119 bool prim_restart = type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN_PRIM_RESTART || 120 type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN_PRIM_RESTART; 121 nir_builder b = 122 nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, 123 dxil_get_nir_compiler_options(), 124 "dzn_meta_indirect_%s()", 125 type_str[type]); 126 b.shader->info.internal = true; 127 128 nir_ssa_def *params_desc = 129 dzn_nir_create_bo_desc(&b, nir_var_mem_ubo, 0, 0, "params", 0); 130 nir_ssa_def *draw_buf_desc = 131 dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 1, "draw_buf", ACCESS_NON_WRITEABLE); 132 nir_ssa_def *exec_buf_desc = 133 dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 2, "exec_buf", ACCESS_NON_READABLE); 134 135 unsigned params_size; 136 if (triangle_fan) 137 params_size = sizeof(struct dzn_indirect_draw_triangle_fan_rewrite_params); 138 else 139 params_size = sizeof(struct dzn_indirect_draw_rewrite_params); 140 141 nir_ssa_def *params = 142 nir_load_ubo(&b, params_size / 4, 32, 143 params_desc, nir_imm_int(&b, 0), 144 .align_mul = 4, .align_offset = 0, .range_base = 0, .range = ~0); 145 146 nir_ssa_def *draw_stride = nir_channel(&b, params, 0); 147 nir_ssa_def *exec_stride = 148 triangle_fan ? 149 nir_imm_int(&b, sizeof(struct dzn_indirect_triangle_fan_draw_exec_params)) : 150 nir_imm_int(&b, sizeof(struct dzn_indirect_draw_exec_params)); 151 nir_ssa_def *index = 152 nir_channel(&b, nir_load_global_invocation_id(&b, 32), 0); 153 154 if (indirect_count) { 155 nir_ssa_def *count_buf_desc = 156 dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 3, "count_buf", ACCESS_NON_WRITEABLE); 157 158 nir_ssa_def *draw_count = 159 nir_load_ssbo(&b, 1, 32, count_buf_desc, nir_imm_int(&b, 0), .align_mul = 4); 160 161 nir_push_if(&b, nir_ieq(&b, index, nir_imm_int(&b, 0))); 162 nir_store_ssbo(&b, draw_count, exec_buf_desc, nir_imm_int(&b, 0), 163 .write_mask = 0x1, .access = ACCESS_NON_READABLE, 164 .align_mul = 16); 165 nir_pop_if(&b, NULL); 166 167 nir_push_if(&b, nir_ult(&b, index, draw_count)); 168 } 169 170 nir_ssa_def *draw_offset = nir_imul(&b, draw_stride, index); 171 172 /* The first entry contains the indirect count */ 173 nir_ssa_def *exec_offset = 174 indirect_count ? 175 nir_imul(&b, exec_stride, nir_iadd_imm(&b, index, 1)) : 176 nir_imul(&b, exec_stride, index); 177 178 nir_ssa_def *draw_info1 = 179 nir_load_ssbo(&b, 4, 32, draw_buf_desc, draw_offset, .align_mul = 4); 180 nir_ssa_def *draw_info2 = 181 indexed ? 182 nir_load_ssbo(&b, 1, 32, draw_buf_desc, 183 nir_iadd_imm(&b, draw_offset, 16), .align_mul = 4) : 184 nir_imm_int(&b, 0); 185 186 nir_ssa_def *first_vertex = nir_channel(&b, draw_info1, indexed ? 3 : 2); 187 nir_ssa_def *base_instance = 188 indexed ? draw_info2 : nir_channel(&b, draw_info1, 3); 189 190 nir_ssa_def *exec_vals[8] = { 191 first_vertex, 192 base_instance, 193 index, 194 }; 195 196 if (triangle_fan) { 197 /* Patch {vertex,index}_count and first_index */ 198 nir_ssa_def *triangle_count = 199 nir_usub_sat(&b, nir_channel(&b, draw_info1, 0), nir_imm_int(&b, 2)); 200 exec_vals[3] = nir_imul_imm(&b, triangle_count, 3); 201 exec_vals[4] = nir_channel(&b, draw_info1, 1); 202 exec_vals[5] = nir_imm_int(&b, 0); 203 exec_vals[6] = first_vertex; 204 exec_vals[7] = base_instance; 205 206 nir_ssa_def *triangle_fan_exec_buf_desc = 207 dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 4, 208 "triangle_fan_exec_buf", 209 ACCESS_NON_READABLE); 210 nir_ssa_def *triangle_fan_index_buf_stride = nir_channel(&b, params, 1); 211 nir_ssa_def *triangle_fan_index_buf_addr_lo = 212 nir_iadd(&b, nir_channel(&b, params, 2), 213 nir_imul(&b, triangle_fan_index_buf_stride, index)); 214 215 nir_ssa_def *triangle_fan_exec_vals[9] = { 0 }; 216 uint32_t triangle_fan_exec_param_count = 0; 217 nir_ssa_def *addr_lo_overflow = 218 nir_ult(&b, triangle_fan_index_buf_addr_lo, nir_channel(&b, params, 2)); 219 nir_ssa_def *triangle_fan_index_buf_addr_hi = 220 nir_iadd(&b, nir_channel(&b, params, 3), 221 nir_bcsel(&b, addr_lo_overflow, nir_imm_int(&b, 1), nir_imm_int(&b, 0))); 222 223 triangle_fan_exec_vals[triangle_fan_exec_param_count++] = triangle_fan_index_buf_addr_lo; 224 triangle_fan_exec_vals[triangle_fan_exec_param_count++] = triangle_fan_index_buf_addr_hi; 225 226 if (prim_restart) { 227 triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_channel(&b, draw_info1, 2); 228 triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_channel(&b, draw_info1, 0); 229 uint32_t index_count_offset = 230 offsetof(struct dzn_indirect_triangle_fan_draw_exec_params, indexed_draw.index_count); 231 nir_ssa_def *exec_buf_start = 232 nir_load_ubo(&b, 2, 32, 233 params_desc, nir_imm_int(&b, 16), 234 .align_mul = 4, .align_offset = 0, .range_base = 0, .range = ~0); 235 nir_ssa_def *exec_buf_start_lo = 236 nir_iadd(&b, nir_imm_int(&b, index_count_offset), 237 nir_iadd(&b, nir_channel(&b, exec_buf_start, 0), 238 nir_imul(&b, exec_stride, index))); 239 addr_lo_overflow = nir_ult(&b, exec_buf_start_lo, nir_channel(&b, exec_buf_start, 0)); 240 nir_ssa_def *exec_buf_start_hi = 241 nir_iadd(&b, nir_channel(&b, exec_buf_start, 0), 242 nir_bcsel(&b, addr_lo_overflow, nir_imm_int(&b, 1), nir_imm_int(&b, 0))); 243 triangle_fan_exec_vals[triangle_fan_exec_param_count++] = exec_buf_start_lo; 244 triangle_fan_exec_vals[triangle_fan_exec_param_count++] = exec_buf_start_hi; 245 triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_imm_int(&b, 1); 246 } else { 247 triangle_fan_exec_vals[triangle_fan_exec_param_count++] = 248 indexed ? nir_channel(&b, draw_info1, 2) : nir_imm_int(&b, 0); 249 triangle_fan_exec_vals[triangle_fan_exec_param_count++] = 250 triangle_count; 251 } 252 triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_imm_int(&b, 1); 253 triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_imm_int(&b, 1); 254 255 unsigned rewrite_index_exec_params = 256 prim_restart ? 257 sizeof(struct dzn_indirect_triangle_fan_prim_restart_rewrite_index_exec_params) : 258 sizeof(struct dzn_indirect_triangle_fan_rewrite_index_exec_params); 259 nir_ssa_def *triangle_fan_exec_stride = 260 nir_imm_int(&b, rewrite_index_exec_params); 261 nir_ssa_def *triangle_fan_exec_offset = 262 nir_imul(&b, triangle_fan_exec_stride, index); 263 264 for (uint32_t i = 0; i < triangle_fan_exec_param_count; i += 4) { 265 unsigned comps = MIN2(triangle_fan_exec_param_count - i, 4); 266 uint32_t mask = (1 << comps) - 1; 267 268 nir_store_ssbo(&b, nir_vec(&b, &triangle_fan_exec_vals[i], comps), 269 triangle_fan_exec_buf_desc, 270 nir_iadd_imm(&b, triangle_fan_exec_offset, i * 4), 271 .write_mask = mask, .access = ACCESS_NON_READABLE, .align_mul = 4); 272 } 273 274 nir_ssa_def *ibview_vals[] = { 275 triangle_fan_index_buf_addr_lo, 276 triangle_fan_index_buf_addr_hi, 277 triangle_fan_index_buf_stride, 278 nir_imm_int(&b, DXGI_FORMAT_R32_UINT), 279 }; 280 281 nir_store_ssbo(&b, nir_vec(&b, ibview_vals, ARRAY_SIZE(ibview_vals)), 282 exec_buf_desc, exec_offset, 283 .write_mask = 0xf, .access = ACCESS_NON_READABLE, .align_mul = 16); 284 exec_offset = nir_iadd_imm(&b, exec_offset, ARRAY_SIZE(ibview_vals) * 4); 285 } else { 286 exec_vals[3] = nir_channel(&b, draw_info1, 0); 287 exec_vals[4] = nir_channel(&b, draw_info1, 1); 288 exec_vals[5] = nir_channel(&b, draw_info1, 2); 289 exec_vals[6] = nir_channel(&b, draw_info1, 3); 290 exec_vals[7] = draw_info2; 291 } 292 293 nir_store_ssbo(&b, nir_vec(&b, exec_vals, 4), 294 exec_buf_desc, exec_offset, 295 .write_mask = 0xf, .access = ACCESS_NON_READABLE, .align_mul = 16); 296 nir_store_ssbo(&b, nir_vec(&b, &exec_vals[4], 4), 297 exec_buf_desc, nir_iadd_imm(&b, exec_offset, 16), 298 .write_mask = 0xf, .access = ACCESS_NON_READABLE, .align_mul = 16); 299 300 if (indirect_count) 301 nir_pop_if(&b, NULL); 302 303 return b.shader; 304} 305 306nir_shader * 307dzn_nir_triangle_fan_prim_restart_rewrite_index_shader(uint8_t old_index_size) 308{ 309 assert(old_index_size == 2 || old_index_size == 4); 310 311 nir_builder b = 312 nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, 313 dxil_get_nir_compiler_options(), 314 "dzn_meta_triangle_prim_rewrite_index(old_index_size=%d)", 315 old_index_size); 316 b.shader->info.internal = true; 317 318 nir_ssa_def *params_desc = 319 dzn_nir_create_bo_desc(&b, nir_var_mem_ubo, 0, 0, "params", 0); 320 nir_ssa_def *new_index_buf_desc = 321 dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 1, 322 "new_index_buf", ACCESS_NON_READABLE); 323 nir_ssa_def *old_index_buf_desc = 324 dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 2, 325 "old_index_buf", ACCESS_NON_WRITEABLE); 326 nir_ssa_def *new_index_count_ptr_desc = 327 dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 3, 328 "new_index_count_ptr", ACCESS_NON_READABLE); 329 330 nir_ssa_def *params = 331 nir_load_ubo(&b, sizeof(struct dzn_triangle_fan_prim_restart_rewrite_index_params) / 4, 32, 332 params_desc, nir_imm_int(&b, 0), 333 .align_mul = 4, .align_offset = 0, .range_base = 0, .range = ~0); 334 335 nir_ssa_def *prim_restart_val = 336 nir_imm_int(&b, old_index_size == 2 ? 0xffff : 0xffffffff); 337 nir_variable *old_index_ptr_var = 338 nir_local_variable_create(b.impl, glsl_uint_type(), "old_index_ptr_var"); 339 nir_ssa_def *old_index_ptr = nir_channel(&b, params, 0); 340 nir_store_var(&b, old_index_ptr_var, old_index_ptr, 1); 341 nir_variable *new_index_ptr_var = 342 nir_local_variable_create(b.impl, glsl_uint_type(), "new_index_ptr_var"); 343 nir_store_var(&b, new_index_ptr_var, nir_imm_int(&b, 0), 1); 344 nir_ssa_def *old_index_count = nir_channel(&b, params, 1); 345 nir_variable *index0_var = 346 nir_local_variable_create(b.impl, glsl_uint_type(), "index0_var"); 347 nir_store_var(&b, index0_var, prim_restart_val, 1); 348 349 /* 350 * Filter out all primitive-restart magic values, and generate a triangle list 351 * from the triangle fan definition. 352 * 353 * Basically: 354 * 355 * new_index_ptr = 0; 356 * index0 = restart_prim_value; // 0xffff or 0xffffffff 357 * for (old_index_ptr = firstIndex; old_index_ptr < indexCount;) { 358 * // If we have no starting-point we need at least 3 vertices, 359 * // otherwise we can do with two. If there's not enough vertices 360 * // to form a primitive, we just bail out. 361 * min_indices = index0 == restart_prim_value ? 3 : 2; 362 * if (old_index_ptr + min_indices > firstIndex + indexCount) 363 * break; 364 * 365 * if (index0 == restart_prim_value) { 366 * // No starting point, skip all entries until we have a 367 * // non-primitive-restart value 368 * index0 = old_index_buf[old_index_ptr++]; 369 * continue; 370 * } 371 * 372 * // If at least one index contains the primitive-restart pattern, 373 // ignore this triangle, and skip the unused entries 374 * if (old_index_buf[old_index_ptr + 1] == restart_prim_value) { 375 * old_index_ptr += 2; 376 * continue; 377 * } 378 * if (old_index_buf[old_index_ptr] == restart_prim_value) { 379 * old_index_ptr++; 380 * continue; 381 * } 382 * 383 * // We have a valid primitive, queue it to the new index buffer 384 * new_index_buf[new_index_ptr++] = old_index_buf[old_index_ptr]; 385 * new_index_buf[new_index_ptr++] = old_index_buf[old_index_ptr + 1]; 386 * new_index_buf[new_index_ptr++] = index0; 387 * } 388 * 389 * expressed in NIR, which admitedly is not super easy to grasp with. 390 * TODO: Might be a good thing to use use the CL compiler we have and turn 391 * those shaders into CL kernels. 392 */ 393 nir_push_loop(&b); 394 395 old_index_ptr = nir_load_var(&b, old_index_ptr_var); 396 nir_ssa_def *index0 = nir_load_var(&b, index0_var); 397 398 nir_ssa_def *read_index_count = 399 nir_bcsel(&b, nir_ieq(&b, index0, prim_restart_val), 400 nir_imm_int(&b, 3), nir_imm_int(&b, 2)); 401 nir_push_if(&b, nir_ult(&b, old_index_count, nir_iadd(&b, old_index_ptr, read_index_count))); 402 nir_jump(&b, nir_jump_break); 403 nir_pop_if(&b, NULL); 404 405 nir_ssa_def *old_index_offset = 406 nir_imul_imm(&b, old_index_ptr, old_index_size); 407 408 nir_push_if(&b, nir_ieq(&b, index0, prim_restart_val)); 409 nir_ssa_def *index_val = 410 nir_load_ssbo(&b, 1, 32, old_index_buf_desc, 411 old_index_size == 2 ? nir_iand_imm(&b, old_index_offset, ~3ULL) : old_index_offset, 412 .align_mul = 4); 413 if (old_index_size == 2) { 414 index_val = nir_bcsel(&b, nir_test_mask(&b, old_index_offset, 0x2), 415 nir_ushr_imm(&b, index_val, 16), 416 nir_iand_imm(&b, index_val, 0xffff)); 417 } 418 419 nir_store_var(&b, index0_var, index_val, 1); 420 nir_store_var(&b, old_index_ptr_var, nir_iadd_imm(&b, old_index_ptr, 1), 1); 421 nir_jump(&b, nir_jump_continue); 422 nir_pop_if(&b, NULL); 423 424 nir_ssa_def *index12 = 425 nir_load_ssbo(&b, 2, 32, old_index_buf_desc, 426 old_index_size == 2 ? nir_iand_imm(&b, old_index_offset, ~3ULL) : old_index_offset, 427 .align_mul = 4); 428 if (old_index_size == 2) { 429 nir_ssa_def *indices[] = { 430 nir_iand_imm(&b, nir_channel(&b, index12, 0), 0xffff), 431 nir_ushr_imm(&b, nir_channel(&b, index12, 0), 16), 432 nir_iand_imm(&b, nir_channel(&b, index12, 1), 0xffff), 433 }; 434 435 index12 = nir_bcsel(&b, nir_test_mask(&b, old_index_offset, 0x2), 436 nir_vec2(&b, indices[1], indices[2]), 437 nir_vec2(&b, indices[0], indices[1])); 438 } 439 440 nir_push_if(&b, nir_ieq(&b, nir_channel(&b, index12, 1), prim_restart_val)); 441 nir_store_var(&b, old_index_ptr_var, nir_iadd_imm(&b, old_index_ptr, 2), 1); 442 nir_store_var(&b, index0_var, prim_restart_val, 1); 443 nir_jump(&b, nir_jump_continue); 444 nir_push_else(&b, NULL); 445 nir_store_var(&b, old_index_ptr_var, nir_iadd_imm(&b, old_index_ptr, 1), 1); 446 nir_push_if(&b, nir_ieq(&b, nir_channel(&b, index12, 0), prim_restart_val)); 447 nir_store_var(&b, index0_var, prim_restart_val, 1); 448 nir_jump(&b, nir_jump_continue); 449 nir_push_else(&b, NULL); 450 nir_ssa_def *new_indices = 451 nir_vec3(&b, nir_channel(&b, index12, 0), nir_channel(&b, index12, 1), index0); 452 nir_ssa_def *new_index_ptr = nir_load_var(&b, new_index_ptr_var); 453 nir_ssa_def *new_index_offset = nir_imul_imm(&b, new_index_ptr, sizeof(uint32_t)); 454 nir_store_ssbo(&b, new_indices, new_index_buf_desc, 455 new_index_offset, 456 .write_mask = 7, .access = ACCESS_NON_READABLE, .align_mul = 4); 457 nir_store_var(&b, new_index_ptr_var, nir_iadd_imm(&b, new_index_ptr, 3), 1); 458 nir_pop_if(&b, NULL); 459 nir_pop_if(&b, NULL); 460 nir_pop_loop(&b, NULL); 461 462 nir_store_ssbo(&b, nir_load_var(&b, new_index_ptr_var), 463 new_index_count_ptr_desc, nir_imm_int(&b, 0), 464 .write_mask = 1, .access = ACCESS_NON_READABLE, .align_mul = 4); 465 466 return b.shader; 467} 468 469nir_shader * 470dzn_nir_triangle_fan_rewrite_index_shader(uint8_t old_index_size) 471{ 472 assert(old_index_size == 0 || old_index_size == 2 || old_index_size == 4); 473 474 nir_builder b = 475 nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, 476 dxil_get_nir_compiler_options(), 477 "dzn_meta_triangle_rewrite_index(old_index_size=%d)", 478 old_index_size); 479 b.shader->info.internal = true; 480 481 nir_ssa_def *params_desc = 482 dzn_nir_create_bo_desc(&b, nir_var_mem_ubo, 0, 0, "params", 0); 483 nir_ssa_def *new_index_buf_desc = 484 dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 1, 485 "new_index_buf", ACCESS_NON_READABLE); 486 487 nir_ssa_def *old_index_buf_desc = NULL; 488 if (old_index_size > 0) { 489 old_index_buf_desc = 490 dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 2, 491 "old_index_buf", ACCESS_NON_WRITEABLE); 492 } 493 494 nir_ssa_def *params = 495 nir_load_ubo(&b, sizeof(struct dzn_triangle_fan_rewrite_index_params) / 4, 32, 496 params_desc, nir_imm_int(&b, 0), 497 .align_mul = 4, .align_offset = 0, .range_base = 0, .range = ~0); 498 499 nir_ssa_def *triangle = nir_channel(&b, nir_load_global_invocation_id(&b, 32), 0); 500 nir_ssa_def *new_indices; 501 502 if (old_index_size > 0) { 503 nir_ssa_def *old_first_index = nir_channel(&b, params, 0); 504 nir_ssa_def *old_index0_offset = 505 nir_imul_imm(&b, old_first_index, old_index_size); 506 nir_ssa_def *old_index1_offset = 507 nir_imul_imm(&b, nir_iadd(&b, nir_iadd_imm(&b, triangle, 1), old_first_index), 508 old_index_size); 509 510 nir_ssa_def *old_index0 = 511 nir_load_ssbo(&b, 1, 32, old_index_buf_desc, 512 old_index_size == 2 ? nir_iand_imm(&b, old_index0_offset, ~3ULL) : old_index0_offset, 513 .align_mul = 4); 514 515 if (old_index_size == 2) { 516 old_index0 = nir_bcsel(&b, nir_test_mask(&b, old_index0_offset, 0x2), 517 nir_ushr_imm(&b, old_index0, 16), 518 nir_iand_imm(&b, old_index0, 0xffff)); 519 } 520 521 nir_ssa_def *old_index12 = 522 nir_load_ssbo(&b, 2, 32, old_index_buf_desc, 523 old_index_size == 2 ? nir_iand_imm(&b, old_index1_offset, ~3ULL) : old_index1_offset, 524 .align_mul = 4); 525 if (old_index_size == 2) { 526 nir_ssa_def *indices[] = { 527 nir_iand_imm(&b, nir_channel(&b, old_index12, 0), 0xffff), 528 nir_ushr_imm(&b, nir_channel(&b, old_index12, 0), 16), 529 nir_iand_imm(&b, nir_channel(&b, old_index12, 1), 0xffff), 530 }; 531 532 old_index12 = nir_bcsel(&b, nir_test_mask(&b, old_index1_offset, 0x2), 533 nir_vec2(&b, indices[1], indices[2]), 534 nir_vec2(&b, indices[0], indices[1])); 535 } 536 537 /* TODO: VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT */ 538 new_indices = 539 nir_vec3(&b, nir_channel(&b, old_index12, 0), 540 nir_channel(&b, old_index12, 1), old_index0); 541 } else { 542 new_indices = 543 nir_vec3(&b, 544 nir_iadd_imm(&b, triangle, 1), 545 nir_iadd_imm(&b, triangle, 2), 546 nir_imm_int(&b, 0)); 547 } 548 549 nir_ssa_def *new_index_offset = 550 nir_imul_imm(&b, triangle, 4 * 3); 551 552 nir_store_ssbo(&b, new_indices, new_index_buf_desc, 553 new_index_offset, 554 .write_mask = 7, .access = ACCESS_NON_READABLE, .align_mul = 4); 555 556 return b.shader; 557} 558 559nir_shader * 560dzn_nir_blit_vs(void) 561{ 562 nir_builder b = 563 nir_builder_init_simple_shader(MESA_SHADER_VERTEX, 564 dxil_get_nir_compiler_options(), 565 "dzn_meta_blit_vs()"); 566 b.shader->info.internal = true; 567 568 nir_ssa_def *params_desc = 569 dzn_nir_create_bo_desc(&b, nir_var_mem_ubo, 0, 0, "params", 0); 570 571 nir_variable *out_pos = 572 nir_variable_create(b.shader, nir_var_shader_out, glsl_vec4_type(), 573 "gl_Position"); 574 out_pos->data.location = VARYING_SLOT_POS; 575 out_pos->data.driver_location = 0; 576 577 nir_variable *out_coords = 578 nir_variable_create(b.shader, nir_var_shader_out, glsl_vec_type(3), 579 "coords"); 580 out_coords->data.location = VARYING_SLOT_TEX0; 581 out_coords->data.driver_location = 1; 582 583 nir_ssa_def *vertex = nir_load_vertex_id(&b); 584 nir_ssa_def *base = nir_imul_imm(&b, vertex, 4 * sizeof(float)); 585 nir_ssa_def *coords = 586 nir_load_ubo(&b, 4, 32, params_desc, base, 587 .align_mul = 16, .align_offset = 0, .range_base = 0, .range = ~0); 588 nir_ssa_def *pos = 589 nir_vec4(&b, nir_channel(&b, coords, 0), nir_channel(&b, coords, 1), 590 nir_imm_float(&b, 0.0), nir_imm_float(&b, 1.0)); 591 nir_ssa_def *z_coord = 592 nir_load_ubo(&b, 1, 32, params_desc, nir_imm_int(&b, 4 * 4 * sizeof(float)), 593 .align_mul = 64, .align_offset = 0, .range_base = 0, .range = ~0); 594 coords = nir_vec3(&b, nir_channel(&b, coords, 2), nir_channel(&b, coords, 3), z_coord); 595 596 nir_store_var(&b, out_pos, pos, 0xf); 597 nir_store_var(&b, out_coords, coords, 0x7); 598 return b.shader; 599} 600 601nir_shader * 602dzn_nir_blit_fs(const struct dzn_nir_blit_info *info) 603{ 604 bool ms = info->src_samples > 1; 605 nir_alu_type nir_out_type = 606 nir_get_nir_type_for_glsl_base_type(info->out_type); 607 uint32_t coord_comps = 608 glsl_get_sampler_dim_coordinate_components(info->sampler_dim) + 609 info->src_is_array; 610 611 nir_builder b = 612 nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, 613 dxil_get_nir_compiler_options(), 614 "dzn_meta_blit_fs()"); 615 b.shader->info.internal = true; 616 617 const struct glsl_type *tex_type = 618 glsl_texture_type(info->sampler_dim, info->src_is_array, info->out_type); 619 nir_variable *tex_var = 620 nir_variable_create(b.shader, nir_var_uniform, tex_type, "texture"); 621 nir_deref_instr *tex_deref = nir_build_deref_var(&b, tex_var); 622 623 nir_variable *pos_var = 624 nir_variable_create(b.shader, nir_var_shader_in, 625 glsl_vector_type(GLSL_TYPE_FLOAT, 4), 626 "gl_FragCoord"); 627 pos_var->data.location = VARYING_SLOT_POS; 628 pos_var->data.driver_location = 0; 629 630 nir_variable *coord_var = 631 nir_variable_create(b.shader, nir_var_shader_in, 632 glsl_vector_type(GLSL_TYPE_FLOAT, 3), 633 "coord"); 634 coord_var->data.location = VARYING_SLOT_TEX0; 635 coord_var->data.driver_location = 1; 636 nir_ssa_def *coord = 637 nir_channels(&b, nir_load_var(&b, coord_var), (1 << coord_comps) - 1); 638 639 uint32_t out_comps = 640 (info->loc == FRAG_RESULT_DEPTH || info->loc == FRAG_RESULT_STENCIL) ? 1 : 4; 641 nir_variable *out = 642 nir_variable_create(b.shader, nir_var_shader_out, 643 glsl_vector_type(info->out_type, out_comps), 644 "out"); 645 out->data.location = info->loc; 646 647 nir_ssa_def *res = NULL; 648 649 if (info->resolve) { 650 /* When resolving a float type, we need to calculate the average of all 651 * samples. For integer resolve, Vulkan says that one sample should be 652 * chosen without telling which. Let's just pick the first one in that 653 * case. 654 */ 655 656 unsigned nsamples = info->out_type == GLSL_TYPE_FLOAT ? 657 info->src_samples : 1; 658 for (unsigned s = 0; s < nsamples; s++) { 659 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 4); 660 661 tex->op = nir_texop_txf_ms; 662 tex->dest_type = nir_out_type; 663 tex->texture_index = 0; 664 tex->is_array = info->src_is_array; 665 tex->sampler_dim = info->sampler_dim; 666 667 tex->src[0].src_type = nir_tex_src_coord; 668 tex->src[0].src = nir_src_for_ssa(nir_f2i32(&b, coord)); 669 tex->coord_components = coord_comps; 670 671 tex->src[1].src_type = nir_tex_src_ms_index; 672 tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, s)); 673 674 tex->src[2].src_type = nir_tex_src_lod; 675 tex->src[2].src = nir_src_for_ssa(nir_imm_int(&b, 0)); 676 677 tex->src[3].src_type = nir_tex_src_texture_deref; 678 tex->src[3].src = nir_src_for_ssa(&tex_deref->dest.ssa); 679 680 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL); 681 682 nir_builder_instr_insert(&b, &tex->instr); 683 res = res ? nir_fadd(&b, res, &tex->dest.ssa) : &tex->dest.ssa; 684 } 685 686 if (nsamples > 1) { 687 unsigned type_sz = nir_alu_type_get_type_size(nir_out_type); 688 res = nir_fmul(&b, res, nir_imm_floatN_t(&b, 1.0f / nsamples, type_sz)); 689 } 690 } else { 691 nir_tex_instr *tex = 692 nir_tex_instr_create(b.shader, ms ? 4 : 3); 693 694 tex->dest_type = nir_out_type; 695 tex->is_array = info->src_is_array; 696 tex->sampler_dim = info->sampler_dim; 697 698 if (ms) { 699 tex->op = nir_texop_txf_ms; 700 701 tex->src[0].src_type = nir_tex_src_coord; 702 tex->src[0].src = nir_src_for_ssa(nir_f2i32(&b, coord)); 703 tex->coord_components = coord_comps; 704 705 tex->src[1].src_type = nir_tex_src_ms_index; 706 tex->src[1].src = nir_src_for_ssa(nir_load_sample_id(&b)); 707 708 tex->src[2].src_type = nir_tex_src_lod; 709 tex->src[2].src = nir_src_for_ssa(nir_imm_int(&b, 0)); 710 711 tex->src[3].src_type = nir_tex_src_texture_deref; 712 tex->src[3].src = nir_src_for_ssa(&tex_deref->dest.ssa); 713 } else { 714 nir_variable *sampler_var = 715 nir_variable_create(b.shader, nir_var_uniform, glsl_bare_sampler_type(), "sampler"); 716 nir_deref_instr *sampler_deref = nir_build_deref_var(&b, sampler_var); 717 718 tex->op = nir_texop_tex; 719 tex->sampler_index = 0; 720 721 tex->src[0].src_type = nir_tex_src_coord; 722 tex->src[0].src = nir_src_for_ssa(coord); 723 tex->coord_components = coord_comps; 724 725 tex->src[1].src_type = nir_tex_src_texture_deref; 726 tex->src[1].src = nir_src_for_ssa(&tex_deref->dest.ssa); 727 728 tex->src[2].src_type = nir_tex_src_sampler_deref; 729 tex->src[2].src = nir_src_for_ssa(&sampler_deref->dest.ssa); 730 } 731 732 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL); 733 nir_builder_instr_insert(&b, &tex->instr); 734 res = &tex->dest.ssa; 735 } 736 737 nir_store_var(&b, out, nir_channels(&b, res, (1 << out_comps) - 1), 0xf); 738 739 return b.shader; 740} 741