1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © Microsoft Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "dzn_nir.h" 25bf215546Sopenharmony_ci 26bf215546Sopenharmony_ci#include "spirv_to_dxil.h" 27bf215546Sopenharmony_ci#include "nir_to_dxil.h" 28bf215546Sopenharmony_ci#include "nir_builder.h" 29bf215546Sopenharmony_ci#include "nir_vulkan.h" 30bf215546Sopenharmony_ci 31bf215546Sopenharmony_cistatic nir_ssa_def * 32bf215546Sopenharmony_cidzn_nir_create_bo_desc(nir_builder *b, 33bf215546Sopenharmony_ci nir_variable_mode mode, 34bf215546Sopenharmony_ci uint32_t desc_set, 35bf215546Sopenharmony_ci uint32_t binding, 36bf215546Sopenharmony_ci const char *name, 37bf215546Sopenharmony_ci unsigned access) 38bf215546Sopenharmony_ci{ 39bf215546Sopenharmony_ci struct glsl_struct_field field = { 40bf215546Sopenharmony_ci .type = mode == nir_var_mem_ubo ? 41bf215546Sopenharmony_ci glsl_array_type(glsl_uint_type(), 4096, 4) : 42bf215546Sopenharmony_ci glsl_uint_type(), 43bf215546Sopenharmony_ci .name = "dummy_int", 44bf215546Sopenharmony_ci }; 45bf215546Sopenharmony_ci const struct glsl_type *dummy_type = 46bf215546Sopenharmony_ci glsl_struct_type(&field, 1, "dummy_type", false); 47bf215546Sopenharmony_ci 48bf215546Sopenharmony_ci nir_variable *var = 49bf215546Sopenharmony_ci nir_variable_create(b->shader, mode, dummy_type, name); 50bf215546Sopenharmony_ci var->data.descriptor_set = desc_set; 51bf215546Sopenharmony_ci var->data.binding = binding; 52bf215546Sopenharmony_ci var->data.access = access; 53bf215546Sopenharmony_ci 54bf215546Sopenharmony_ci assert(mode == nir_var_mem_ubo || mode == nir_var_mem_ssbo); 55bf215546Sopenharmony_ci if (mode == nir_var_mem_ubo) 56bf215546Sopenharmony_ci b->shader->info.num_ubos++; 57bf215546Sopenharmony_ci else 58bf215546Sopenharmony_ci b->shader->info.num_ssbos++; 59bf215546Sopenharmony_ci 60bf215546Sopenharmony_ci VkDescriptorType desc_type = 61bf215546Sopenharmony_ci var->data.mode == nir_var_mem_ubo ? 62bf215546Sopenharmony_ci VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER : 63bf215546Sopenharmony_ci VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; 64bf215546Sopenharmony_ci nir_address_format addr_format = nir_address_format_32bit_index_offset; 65bf215546Sopenharmony_ci nir_ssa_def *index = 66bf215546Sopenharmony_ci nir_vulkan_resource_index(b, 67bf215546Sopenharmony_ci nir_address_format_num_components(addr_format), 68bf215546Sopenharmony_ci nir_address_format_bit_size(addr_format), 69bf215546Sopenharmony_ci nir_imm_int(b, 0), 70bf215546Sopenharmony_ci .desc_set = desc_set, 71bf215546Sopenharmony_ci .binding = binding, 72bf215546Sopenharmony_ci .desc_type = desc_type); 73bf215546Sopenharmony_ci 74bf215546Sopenharmony_ci nir_ssa_def *desc = 75bf215546Sopenharmony_ci nir_load_vulkan_descriptor(b, 76bf215546Sopenharmony_ci nir_address_format_num_components(addr_format), 77bf215546Sopenharmony_ci nir_address_format_bit_size(addr_format), 78bf215546Sopenharmony_ci index, 79bf215546Sopenharmony_ci .desc_type = desc_type); 80bf215546Sopenharmony_ci 81bf215546Sopenharmony_ci return nir_channel(b, desc, 0); 82bf215546Sopenharmony_ci} 83bf215546Sopenharmony_ci 84bf215546Sopenharmony_cinir_shader * 85bf215546Sopenharmony_cidzn_nir_indirect_draw_shader(enum dzn_indirect_draw_type type) 86bf215546Sopenharmony_ci{ 87bf215546Sopenharmony_ci const char *type_str[] = { 88bf215546Sopenharmony_ci "draw", 89bf215546Sopenharmony_ci "draw_count", 90bf215546Sopenharmony_ci "indexed_draw", 91bf215546Sopenharmony_ci "indexed_draw_count", 92bf215546Sopenharmony_ci "draw_triangle_fan", 93bf215546Sopenharmony_ci "draw_count_triangle_fan", 94bf215546Sopenharmony_ci "indexed_draw_triangle_fan", 95bf215546Sopenharmony_ci "indexed_draw_count_triangle_fan", 96bf215546Sopenharmony_ci "indexed_draw_triangle_fan_prim_restart", 97bf215546Sopenharmony_ci "indexed_draw_count_triangle_fan_prim_restart", 98bf215546Sopenharmony_ci }; 99bf215546Sopenharmony_ci 100bf215546Sopenharmony_ci assert(type < ARRAY_SIZE(type_str)); 101bf215546Sopenharmony_ci 102bf215546Sopenharmony_ci bool indexed = type == DZN_INDIRECT_INDEXED_DRAW || 103bf215546Sopenharmony_ci type == DZN_INDIRECT_INDEXED_DRAW_COUNT || 104bf215546Sopenharmony_ci type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN || 105bf215546Sopenharmony_ci type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN || 106bf215546Sopenharmony_ci type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN_PRIM_RESTART || 107bf215546Sopenharmony_ci type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN_PRIM_RESTART; 108bf215546Sopenharmony_ci bool triangle_fan = type == DZN_INDIRECT_DRAW_TRIANGLE_FAN || 109bf215546Sopenharmony_ci type == DZN_INDIRECT_DRAW_COUNT_TRIANGLE_FAN || 110bf215546Sopenharmony_ci type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN || 111bf215546Sopenharmony_ci type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN || 112bf215546Sopenharmony_ci type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN_PRIM_RESTART || 113bf215546Sopenharmony_ci type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN_PRIM_RESTART; 114bf215546Sopenharmony_ci bool indirect_count = type == DZN_INDIRECT_DRAW_COUNT || 115bf215546Sopenharmony_ci type == DZN_INDIRECT_INDEXED_DRAW_COUNT || 116bf215546Sopenharmony_ci type == DZN_INDIRECT_DRAW_COUNT_TRIANGLE_FAN || 117bf215546Sopenharmony_ci type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN || 118bf215546Sopenharmony_ci type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN_PRIM_RESTART; 119bf215546Sopenharmony_ci bool prim_restart = type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN_PRIM_RESTART || 120bf215546Sopenharmony_ci type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN_PRIM_RESTART; 121bf215546Sopenharmony_ci nir_builder b = 122bf215546Sopenharmony_ci nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, 123bf215546Sopenharmony_ci dxil_get_nir_compiler_options(), 124bf215546Sopenharmony_ci "dzn_meta_indirect_%s()", 125bf215546Sopenharmony_ci type_str[type]); 126bf215546Sopenharmony_ci b.shader->info.internal = true; 127bf215546Sopenharmony_ci 128bf215546Sopenharmony_ci nir_ssa_def *params_desc = 129bf215546Sopenharmony_ci dzn_nir_create_bo_desc(&b, nir_var_mem_ubo, 0, 0, "params", 0); 130bf215546Sopenharmony_ci nir_ssa_def *draw_buf_desc = 131bf215546Sopenharmony_ci dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 1, "draw_buf", ACCESS_NON_WRITEABLE); 132bf215546Sopenharmony_ci nir_ssa_def *exec_buf_desc = 133bf215546Sopenharmony_ci dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 2, "exec_buf", ACCESS_NON_READABLE); 134bf215546Sopenharmony_ci 135bf215546Sopenharmony_ci unsigned params_size; 136bf215546Sopenharmony_ci if (triangle_fan) 137bf215546Sopenharmony_ci params_size = sizeof(struct dzn_indirect_draw_triangle_fan_rewrite_params); 138bf215546Sopenharmony_ci else 139bf215546Sopenharmony_ci params_size = sizeof(struct dzn_indirect_draw_rewrite_params); 140bf215546Sopenharmony_ci 141bf215546Sopenharmony_ci nir_ssa_def *params = 142bf215546Sopenharmony_ci nir_load_ubo(&b, params_size / 4, 32, 143bf215546Sopenharmony_ci params_desc, nir_imm_int(&b, 0), 144bf215546Sopenharmony_ci .align_mul = 4, .align_offset = 0, .range_base = 0, .range = ~0); 145bf215546Sopenharmony_ci 146bf215546Sopenharmony_ci nir_ssa_def *draw_stride = nir_channel(&b, params, 0); 147bf215546Sopenharmony_ci nir_ssa_def *exec_stride = 148bf215546Sopenharmony_ci triangle_fan ? 149bf215546Sopenharmony_ci nir_imm_int(&b, sizeof(struct dzn_indirect_triangle_fan_draw_exec_params)) : 150bf215546Sopenharmony_ci nir_imm_int(&b, sizeof(struct dzn_indirect_draw_exec_params)); 151bf215546Sopenharmony_ci nir_ssa_def *index = 152bf215546Sopenharmony_ci nir_channel(&b, nir_load_global_invocation_id(&b, 32), 0); 153bf215546Sopenharmony_ci 154bf215546Sopenharmony_ci if (indirect_count) { 155bf215546Sopenharmony_ci nir_ssa_def *count_buf_desc = 156bf215546Sopenharmony_ci dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 3, "count_buf", ACCESS_NON_WRITEABLE); 157bf215546Sopenharmony_ci 158bf215546Sopenharmony_ci nir_ssa_def *draw_count = 159bf215546Sopenharmony_ci nir_load_ssbo(&b, 1, 32, count_buf_desc, nir_imm_int(&b, 0), .align_mul = 4); 160bf215546Sopenharmony_ci 161bf215546Sopenharmony_ci nir_push_if(&b, nir_ieq(&b, index, nir_imm_int(&b, 0))); 162bf215546Sopenharmony_ci nir_store_ssbo(&b, draw_count, exec_buf_desc, nir_imm_int(&b, 0), 163bf215546Sopenharmony_ci .write_mask = 0x1, .access = ACCESS_NON_READABLE, 164bf215546Sopenharmony_ci .align_mul = 16); 165bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 166bf215546Sopenharmony_ci 167bf215546Sopenharmony_ci nir_push_if(&b, nir_ult(&b, index, draw_count)); 168bf215546Sopenharmony_ci } 169bf215546Sopenharmony_ci 170bf215546Sopenharmony_ci nir_ssa_def *draw_offset = nir_imul(&b, draw_stride, index); 171bf215546Sopenharmony_ci 172bf215546Sopenharmony_ci /* The first entry contains the indirect count */ 173bf215546Sopenharmony_ci nir_ssa_def *exec_offset = 174bf215546Sopenharmony_ci indirect_count ? 175bf215546Sopenharmony_ci nir_imul(&b, exec_stride, nir_iadd_imm(&b, index, 1)) : 176bf215546Sopenharmony_ci nir_imul(&b, exec_stride, index); 177bf215546Sopenharmony_ci 178bf215546Sopenharmony_ci nir_ssa_def *draw_info1 = 179bf215546Sopenharmony_ci nir_load_ssbo(&b, 4, 32, draw_buf_desc, draw_offset, .align_mul = 4); 180bf215546Sopenharmony_ci nir_ssa_def *draw_info2 = 181bf215546Sopenharmony_ci indexed ? 182bf215546Sopenharmony_ci nir_load_ssbo(&b, 1, 32, draw_buf_desc, 183bf215546Sopenharmony_ci nir_iadd_imm(&b, draw_offset, 16), .align_mul = 4) : 184bf215546Sopenharmony_ci nir_imm_int(&b, 0); 185bf215546Sopenharmony_ci 186bf215546Sopenharmony_ci nir_ssa_def *first_vertex = nir_channel(&b, draw_info1, indexed ? 3 : 2); 187bf215546Sopenharmony_ci nir_ssa_def *base_instance = 188bf215546Sopenharmony_ci indexed ? draw_info2 : nir_channel(&b, draw_info1, 3); 189bf215546Sopenharmony_ci 190bf215546Sopenharmony_ci nir_ssa_def *exec_vals[8] = { 191bf215546Sopenharmony_ci first_vertex, 192bf215546Sopenharmony_ci base_instance, 193bf215546Sopenharmony_ci index, 194bf215546Sopenharmony_ci }; 195bf215546Sopenharmony_ci 196bf215546Sopenharmony_ci if (triangle_fan) { 197bf215546Sopenharmony_ci /* Patch {vertex,index}_count and first_index */ 198bf215546Sopenharmony_ci nir_ssa_def *triangle_count = 199bf215546Sopenharmony_ci nir_usub_sat(&b, nir_channel(&b, draw_info1, 0), nir_imm_int(&b, 2)); 200bf215546Sopenharmony_ci exec_vals[3] = nir_imul_imm(&b, triangle_count, 3); 201bf215546Sopenharmony_ci exec_vals[4] = nir_channel(&b, draw_info1, 1); 202bf215546Sopenharmony_ci exec_vals[5] = nir_imm_int(&b, 0); 203bf215546Sopenharmony_ci exec_vals[6] = first_vertex; 204bf215546Sopenharmony_ci exec_vals[7] = base_instance; 205bf215546Sopenharmony_ci 206bf215546Sopenharmony_ci nir_ssa_def *triangle_fan_exec_buf_desc = 207bf215546Sopenharmony_ci dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 4, 208bf215546Sopenharmony_ci "triangle_fan_exec_buf", 209bf215546Sopenharmony_ci ACCESS_NON_READABLE); 210bf215546Sopenharmony_ci nir_ssa_def *triangle_fan_index_buf_stride = nir_channel(&b, params, 1); 211bf215546Sopenharmony_ci nir_ssa_def *triangle_fan_index_buf_addr_lo = 212bf215546Sopenharmony_ci nir_iadd(&b, nir_channel(&b, params, 2), 213bf215546Sopenharmony_ci nir_imul(&b, triangle_fan_index_buf_stride, index)); 214bf215546Sopenharmony_ci 215bf215546Sopenharmony_ci nir_ssa_def *triangle_fan_exec_vals[9] = { 0 }; 216bf215546Sopenharmony_ci uint32_t triangle_fan_exec_param_count = 0; 217bf215546Sopenharmony_ci nir_ssa_def *addr_lo_overflow = 218bf215546Sopenharmony_ci nir_ult(&b, triangle_fan_index_buf_addr_lo, nir_channel(&b, params, 2)); 219bf215546Sopenharmony_ci nir_ssa_def *triangle_fan_index_buf_addr_hi = 220bf215546Sopenharmony_ci nir_iadd(&b, nir_channel(&b, params, 3), 221bf215546Sopenharmony_ci nir_bcsel(&b, addr_lo_overflow, nir_imm_int(&b, 1), nir_imm_int(&b, 0))); 222bf215546Sopenharmony_ci 223bf215546Sopenharmony_ci triangle_fan_exec_vals[triangle_fan_exec_param_count++] = triangle_fan_index_buf_addr_lo; 224bf215546Sopenharmony_ci triangle_fan_exec_vals[triangle_fan_exec_param_count++] = triangle_fan_index_buf_addr_hi; 225bf215546Sopenharmony_ci 226bf215546Sopenharmony_ci if (prim_restart) { 227bf215546Sopenharmony_ci triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_channel(&b, draw_info1, 2); 228bf215546Sopenharmony_ci triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_channel(&b, draw_info1, 0); 229bf215546Sopenharmony_ci uint32_t index_count_offset = 230bf215546Sopenharmony_ci offsetof(struct dzn_indirect_triangle_fan_draw_exec_params, indexed_draw.index_count); 231bf215546Sopenharmony_ci nir_ssa_def *exec_buf_start = 232bf215546Sopenharmony_ci nir_load_ubo(&b, 2, 32, 233bf215546Sopenharmony_ci params_desc, nir_imm_int(&b, 16), 234bf215546Sopenharmony_ci .align_mul = 4, .align_offset = 0, .range_base = 0, .range = ~0); 235bf215546Sopenharmony_ci nir_ssa_def *exec_buf_start_lo = 236bf215546Sopenharmony_ci nir_iadd(&b, nir_imm_int(&b, index_count_offset), 237bf215546Sopenharmony_ci nir_iadd(&b, nir_channel(&b, exec_buf_start, 0), 238bf215546Sopenharmony_ci nir_imul(&b, exec_stride, index))); 239bf215546Sopenharmony_ci addr_lo_overflow = nir_ult(&b, exec_buf_start_lo, nir_channel(&b, exec_buf_start, 0)); 240bf215546Sopenharmony_ci nir_ssa_def *exec_buf_start_hi = 241bf215546Sopenharmony_ci nir_iadd(&b, nir_channel(&b, exec_buf_start, 0), 242bf215546Sopenharmony_ci nir_bcsel(&b, addr_lo_overflow, nir_imm_int(&b, 1), nir_imm_int(&b, 0))); 243bf215546Sopenharmony_ci triangle_fan_exec_vals[triangle_fan_exec_param_count++] = exec_buf_start_lo; 244bf215546Sopenharmony_ci triangle_fan_exec_vals[triangle_fan_exec_param_count++] = exec_buf_start_hi; 245bf215546Sopenharmony_ci triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_imm_int(&b, 1); 246bf215546Sopenharmony_ci } else { 247bf215546Sopenharmony_ci triangle_fan_exec_vals[triangle_fan_exec_param_count++] = 248bf215546Sopenharmony_ci indexed ? nir_channel(&b, draw_info1, 2) : nir_imm_int(&b, 0); 249bf215546Sopenharmony_ci triangle_fan_exec_vals[triangle_fan_exec_param_count++] = 250bf215546Sopenharmony_ci triangle_count; 251bf215546Sopenharmony_ci } 252bf215546Sopenharmony_ci triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_imm_int(&b, 1); 253bf215546Sopenharmony_ci triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_imm_int(&b, 1); 254bf215546Sopenharmony_ci 255bf215546Sopenharmony_ci unsigned rewrite_index_exec_params = 256bf215546Sopenharmony_ci prim_restart ? 257bf215546Sopenharmony_ci sizeof(struct dzn_indirect_triangle_fan_prim_restart_rewrite_index_exec_params) : 258bf215546Sopenharmony_ci sizeof(struct dzn_indirect_triangle_fan_rewrite_index_exec_params); 259bf215546Sopenharmony_ci nir_ssa_def *triangle_fan_exec_stride = 260bf215546Sopenharmony_ci nir_imm_int(&b, rewrite_index_exec_params); 261bf215546Sopenharmony_ci nir_ssa_def *triangle_fan_exec_offset = 262bf215546Sopenharmony_ci nir_imul(&b, triangle_fan_exec_stride, index); 263bf215546Sopenharmony_ci 264bf215546Sopenharmony_ci for (uint32_t i = 0; i < triangle_fan_exec_param_count; i += 4) { 265bf215546Sopenharmony_ci unsigned comps = MIN2(triangle_fan_exec_param_count - i, 4); 266bf215546Sopenharmony_ci uint32_t mask = (1 << comps) - 1; 267bf215546Sopenharmony_ci 268bf215546Sopenharmony_ci nir_store_ssbo(&b, nir_vec(&b, &triangle_fan_exec_vals[i], comps), 269bf215546Sopenharmony_ci triangle_fan_exec_buf_desc, 270bf215546Sopenharmony_ci nir_iadd_imm(&b, triangle_fan_exec_offset, i * 4), 271bf215546Sopenharmony_ci .write_mask = mask, .access = ACCESS_NON_READABLE, .align_mul = 4); 272bf215546Sopenharmony_ci } 273bf215546Sopenharmony_ci 274bf215546Sopenharmony_ci nir_ssa_def *ibview_vals[] = { 275bf215546Sopenharmony_ci triangle_fan_index_buf_addr_lo, 276bf215546Sopenharmony_ci triangle_fan_index_buf_addr_hi, 277bf215546Sopenharmony_ci triangle_fan_index_buf_stride, 278bf215546Sopenharmony_ci nir_imm_int(&b, DXGI_FORMAT_R32_UINT), 279bf215546Sopenharmony_ci }; 280bf215546Sopenharmony_ci 281bf215546Sopenharmony_ci nir_store_ssbo(&b, nir_vec(&b, ibview_vals, ARRAY_SIZE(ibview_vals)), 282bf215546Sopenharmony_ci exec_buf_desc, exec_offset, 283bf215546Sopenharmony_ci .write_mask = 0xf, .access = ACCESS_NON_READABLE, .align_mul = 16); 284bf215546Sopenharmony_ci exec_offset = nir_iadd_imm(&b, exec_offset, ARRAY_SIZE(ibview_vals) * 4); 285bf215546Sopenharmony_ci } else { 286bf215546Sopenharmony_ci exec_vals[3] = nir_channel(&b, draw_info1, 0); 287bf215546Sopenharmony_ci exec_vals[4] = nir_channel(&b, draw_info1, 1); 288bf215546Sopenharmony_ci exec_vals[5] = nir_channel(&b, draw_info1, 2); 289bf215546Sopenharmony_ci exec_vals[6] = nir_channel(&b, draw_info1, 3); 290bf215546Sopenharmony_ci exec_vals[7] = draw_info2; 291bf215546Sopenharmony_ci } 292bf215546Sopenharmony_ci 293bf215546Sopenharmony_ci nir_store_ssbo(&b, nir_vec(&b, exec_vals, 4), 294bf215546Sopenharmony_ci exec_buf_desc, exec_offset, 295bf215546Sopenharmony_ci .write_mask = 0xf, .access = ACCESS_NON_READABLE, .align_mul = 16); 296bf215546Sopenharmony_ci nir_store_ssbo(&b, nir_vec(&b, &exec_vals[4], 4), 297bf215546Sopenharmony_ci exec_buf_desc, nir_iadd_imm(&b, exec_offset, 16), 298bf215546Sopenharmony_ci .write_mask = 0xf, .access = ACCESS_NON_READABLE, .align_mul = 16); 299bf215546Sopenharmony_ci 300bf215546Sopenharmony_ci if (indirect_count) 301bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 302bf215546Sopenharmony_ci 303bf215546Sopenharmony_ci return b.shader; 304bf215546Sopenharmony_ci} 305bf215546Sopenharmony_ci 306bf215546Sopenharmony_cinir_shader * 307bf215546Sopenharmony_cidzn_nir_triangle_fan_prim_restart_rewrite_index_shader(uint8_t old_index_size) 308bf215546Sopenharmony_ci{ 309bf215546Sopenharmony_ci assert(old_index_size == 2 || old_index_size == 4); 310bf215546Sopenharmony_ci 311bf215546Sopenharmony_ci nir_builder b = 312bf215546Sopenharmony_ci nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, 313bf215546Sopenharmony_ci dxil_get_nir_compiler_options(), 314bf215546Sopenharmony_ci "dzn_meta_triangle_prim_rewrite_index(old_index_size=%d)", 315bf215546Sopenharmony_ci old_index_size); 316bf215546Sopenharmony_ci b.shader->info.internal = true; 317bf215546Sopenharmony_ci 318bf215546Sopenharmony_ci nir_ssa_def *params_desc = 319bf215546Sopenharmony_ci dzn_nir_create_bo_desc(&b, nir_var_mem_ubo, 0, 0, "params", 0); 320bf215546Sopenharmony_ci nir_ssa_def *new_index_buf_desc = 321bf215546Sopenharmony_ci dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 1, 322bf215546Sopenharmony_ci "new_index_buf", ACCESS_NON_READABLE); 323bf215546Sopenharmony_ci nir_ssa_def *old_index_buf_desc = 324bf215546Sopenharmony_ci dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 2, 325bf215546Sopenharmony_ci "old_index_buf", ACCESS_NON_WRITEABLE); 326bf215546Sopenharmony_ci nir_ssa_def *new_index_count_ptr_desc = 327bf215546Sopenharmony_ci dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 3, 328bf215546Sopenharmony_ci "new_index_count_ptr", ACCESS_NON_READABLE); 329bf215546Sopenharmony_ci 330bf215546Sopenharmony_ci nir_ssa_def *params = 331bf215546Sopenharmony_ci nir_load_ubo(&b, sizeof(struct dzn_triangle_fan_prim_restart_rewrite_index_params) / 4, 32, 332bf215546Sopenharmony_ci params_desc, nir_imm_int(&b, 0), 333bf215546Sopenharmony_ci .align_mul = 4, .align_offset = 0, .range_base = 0, .range = ~0); 334bf215546Sopenharmony_ci 335bf215546Sopenharmony_ci nir_ssa_def *prim_restart_val = 336bf215546Sopenharmony_ci nir_imm_int(&b, old_index_size == 2 ? 0xffff : 0xffffffff); 337bf215546Sopenharmony_ci nir_variable *old_index_ptr_var = 338bf215546Sopenharmony_ci nir_local_variable_create(b.impl, glsl_uint_type(), "old_index_ptr_var"); 339bf215546Sopenharmony_ci nir_ssa_def *old_index_ptr = nir_channel(&b, params, 0); 340bf215546Sopenharmony_ci nir_store_var(&b, old_index_ptr_var, old_index_ptr, 1); 341bf215546Sopenharmony_ci nir_variable *new_index_ptr_var = 342bf215546Sopenharmony_ci nir_local_variable_create(b.impl, glsl_uint_type(), "new_index_ptr_var"); 343bf215546Sopenharmony_ci nir_store_var(&b, new_index_ptr_var, nir_imm_int(&b, 0), 1); 344bf215546Sopenharmony_ci nir_ssa_def *old_index_count = nir_channel(&b, params, 1); 345bf215546Sopenharmony_ci nir_variable *index0_var = 346bf215546Sopenharmony_ci nir_local_variable_create(b.impl, glsl_uint_type(), "index0_var"); 347bf215546Sopenharmony_ci nir_store_var(&b, index0_var, prim_restart_val, 1); 348bf215546Sopenharmony_ci 349bf215546Sopenharmony_ci /* 350bf215546Sopenharmony_ci * Filter out all primitive-restart magic values, and generate a triangle list 351bf215546Sopenharmony_ci * from the triangle fan definition. 352bf215546Sopenharmony_ci * 353bf215546Sopenharmony_ci * Basically: 354bf215546Sopenharmony_ci * 355bf215546Sopenharmony_ci * new_index_ptr = 0; 356bf215546Sopenharmony_ci * index0 = restart_prim_value; // 0xffff or 0xffffffff 357bf215546Sopenharmony_ci * for (old_index_ptr = firstIndex; old_index_ptr < indexCount;) { 358bf215546Sopenharmony_ci * // If we have no starting-point we need at least 3 vertices, 359bf215546Sopenharmony_ci * // otherwise we can do with two. If there's not enough vertices 360bf215546Sopenharmony_ci * // to form a primitive, we just bail out. 361bf215546Sopenharmony_ci * min_indices = index0 == restart_prim_value ? 3 : 2; 362bf215546Sopenharmony_ci * if (old_index_ptr + min_indices > firstIndex + indexCount) 363bf215546Sopenharmony_ci * break; 364bf215546Sopenharmony_ci * 365bf215546Sopenharmony_ci * if (index0 == restart_prim_value) { 366bf215546Sopenharmony_ci * // No starting point, skip all entries until we have a 367bf215546Sopenharmony_ci * // non-primitive-restart value 368bf215546Sopenharmony_ci * index0 = old_index_buf[old_index_ptr++]; 369bf215546Sopenharmony_ci * continue; 370bf215546Sopenharmony_ci * } 371bf215546Sopenharmony_ci * 372bf215546Sopenharmony_ci * // If at least one index contains the primitive-restart pattern, 373bf215546Sopenharmony_ci // ignore this triangle, and skip the unused entries 374bf215546Sopenharmony_ci * if (old_index_buf[old_index_ptr + 1] == restart_prim_value) { 375bf215546Sopenharmony_ci * old_index_ptr += 2; 376bf215546Sopenharmony_ci * continue; 377bf215546Sopenharmony_ci * } 378bf215546Sopenharmony_ci * if (old_index_buf[old_index_ptr] == restart_prim_value) { 379bf215546Sopenharmony_ci * old_index_ptr++; 380bf215546Sopenharmony_ci * continue; 381bf215546Sopenharmony_ci * } 382bf215546Sopenharmony_ci * 383bf215546Sopenharmony_ci * // We have a valid primitive, queue it to the new index buffer 384bf215546Sopenharmony_ci * new_index_buf[new_index_ptr++] = old_index_buf[old_index_ptr]; 385bf215546Sopenharmony_ci * new_index_buf[new_index_ptr++] = old_index_buf[old_index_ptr + 1]; 386bf215546Sopenharmony_ci * new_index_buf[new_index_ptr++] = index0; 387bf215546Sopenharmony_ci * } 388bf215546Sopenharmony_ci * 389bf215546Sopenharmony_ci * expressed in NIR, which admitedly is not super easy to grasp with. 390bf215546Sopenharmony_ci * TODO: Might be a good thing to use use the CL compiler we have and turn 391bf215546Sopenharmony_ci * those shaders into CL kernels. 392bf215546Sopenharmony_ci */ 393bf215546Sopenharmony_ci nir_push_loop(&b); 394bf215546Sopenharmony_ci 395bf215546Sopenharmony_ci old_index_ptr = nir_load_var(&b, old_index_ptr_var); 396bf215546Sopenharmony_ci nir_ssa_def *index0 = nir_load_var(&b, index0_var); 397bf215546Sopenharmony_ci 398bf215546Sopenharmony_ci nir_ssa_def *read_index_count = 399bf215546Sopenharmony_ci nir_bcsel(&b, nir_ieq(&b, index0, prim_restart_val), 400bf215546Sopenharmony_ci nir_imm_int(&b, 3), nir_imm_int(&b, 2)); 401bf215546Sopenharmony_ci nir_push_if(&b, nir_ult(&b, old_index_count, nir_iadd(&b, old_index_ptr, read_index_count))); 402bf215546Sopenharmony_ci nir_jump(&b, nir_jump_break); 403bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 404bf215546Sopenharmony_ci 405bf215546Sopenharmony_ci nir_ssa_def *old_index_offset = 406bf215546Sopenharmony_ci nir_imul_imm(&b, old_index_ptr, old_index_size); 407bf215546Sopenharmony_ci 408bf215546Sopenharmony_ci nir_push_if(&b, nir_ieq(&b, index0, prim_restart_val)); 409bf215546Sopenharmony_ci nir_ssa_def *index_val = 410bf215546Sopenharmony_ci nir_load_ssbo(&b, 1, 32, old_index_buf_desc, 411bf215546Sopenharmony_ci old_index_size == 2 ? nir_iand_imm(&b, old_index_offset, ~3ULL) : old_index_offset, 412bf215546Sopenharmony_ci .align_mul = 4); 413bf215546Sopenharmony_ci if (old_index_size == 2) { 414bf215546Sopenharmony_ci index_val = nir_bcsel(&b, nir_test_mask(&b, old_index_offset, 0x2), 415bf215546Sopenharmony_ci nir_ushr_imm(&b, index_val, 16), 416bf215546Sopenharmony_ci nir_iand_imm(&b, index_val, 0xffff)); 417bf215546Sopenharmony_ci } 418bf215546Sopenharmony_ci 419bf215546Sopenharmony_ci nir_store_var(&b, index0_var, index_val, 1); 420bf215546Sopenharmony_ci nir_store_var(&b, old_index_ptr_var, nir_iadd_imm(&b, old_index_ptr, 1), 1); 421bf215546Sopenharmony_ci nir_jump(&b, nir_jump_continue); 422bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 423bf215546Sopenharmony_ci 424bf215546Sopenharmony_ci nir_ssa_def *index12 = 425bf215546Sopenharmony_ci nir_load_ssbo(&b, 2, 32, old_index_buf_desc, 426bf215546Sopenharmony_ci old_index_size == 2 ? nir_iand_imm(&b, old_index_offset, ~3ULL) : old_index_offset, 427bf215546Sopenharmony_ci .align_mul = 4); 428bf215546Sopenharmony_ci if (old_index_size == 2) { 429bf215546Sopenharmony_ci nir_ssa_def *indices[] = { 430bf215546Sopenharmony_ci nir_iand_imm(&b, nir_channel(&b, index12, 0), 0xffff), 431bf215546Sopenharmony_ci nir_ushr_imm(&b, nir_channel(&b, index12, 0), 16), 432bf215546Sopenharmony_ci nir_iand_imm(&b, nir_channel(&b, index12, 1), 0xffff), 433bf215546Sopenharmony_ci }; 434bf215546Sopenharmony_ci 435bf215546Sopenharmony_ci index12 = nir_bcsel(&b, nir_test_mask(&b, old_index_offset, 0x2), 436bf215546Sopenharmony_ci nir_vec2(&b, indices[1], indices[2]), 437bf215546Sopenharmony_ci nir_vec2(&b, indices[0], indices[1])); 438bf215546Sopenharmony_ci } 439bf215546Sopenharmony_ci 440bf215546Sopenharmony_ci nir_push_if(&b, nir_ieq(&b, nir_channel(&b, index12, 1), prim_restart_val)); 441bf215546Sopenharmony_ci nir_store_var(&b, old_index_ptr_var, nir_iadd_imm(&b, old_index_ptr, 2), 1); 442bf215546Sopenharmony_ci nir_store_var(&b, index0_var, prim_restart_val, 1); 443bf215546Sopenharmony_ci nir_jump(&b, nir_jump_continue); 444bf215546Sopenharmony_ci nir_push_else(&b, NULL); 445bf215546Sopenharmony_ci nir_store_var(&b, old_index_ptr_var, nir_iadd_imm(&b, old_index_ptr, 1), 1); 446bf215546Sopenharmony_ci nir_push_if(&b, nir_ieq(&b, nir_channel(&b, index12, 0), prim_restart_val)); 447bf215546Sopenharmony_ci nir_store_var(&b, index0_var, prim_restart_val, 1); 448bf215546Sopenharmony_ci nir_jump(&b, nir_jump_continue); 449bf215546Sopenharmony_ci nir_push_else(&b, NULL); 450bf215546Sopenharmony_ci nir_ssa_def *new_indices = 451bf215546Sopenharmony_ci nir_vec3(&b, nir_channel(&b, index12, 0), nir_channel(&b, index12, 1), index0); 452bf215546Sopenharmony_ci nir_ssa_def *new_index_ptr = nir_load_var(&b, new_index_ptr_var); 453bf215546Sopenharmony_ci nir_ssa_def *new_index_offset = nir_imul_imm(&b, new_index_ptr, sizeof(uint32_t)); 454bf215546Sopenharmony_ci nir_store_ssbo(&b, new_indices, new_index_buf_desc, 455bf215546Sopenharmony_ci new_index_offset, 456bf215546Sopenharmony_ci .write_mask = 7, .access = ACCESS_NON_READABLE, .align_mul = 4); 457bf215546Sopenharmony_ci nir_store_var(&b, new_index_ptr_var, nir_iadd_imm(&b, new_index_ptr, 3), 1); 458bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 459bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 460bf215546Sopenharmony_ci nir_pop_loop(&b, NULL); 461bf215546Sopenharmony_ci 462bf215546Sopenharmony_ci nir_store_ssbo(&b, nir_load_var(&b, new_index_ptr_var), 463bf215546Sopenharmony_ci new_index_count_ptr_desc, nir_imm_int(&b, 0), 464bf215546Sopenharmony_ci .write_mask = 1, .access = ACCESS_NON_READABLE, .align_mul = 4); 465bf215546Sopenharmony_ci 466bf215546Sopenharmony_ci return b.shader; 467bf215546Sopenharmony_ci} 468bf215546Sopenharmony_ci 469bf215546Sopenharmony_cinir_shader * 470bf215546Sopenharmony_cidzn_nir_triangle_fan_rewrite_index_shader(uint8_t old_index_size) 471bf215546Sopenharmony_ci{ 472bf215546Sopenharmony_ci assert(old_index_size == 0 || old_index_size == 2 || old_index_size == 4); 473bf215546Sopenharmony_ci 474bf215546Sopenharmony_ci nir_builder b = 475bf215546Sopenharmony_ci nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, 476bf215546Sopenharmony_ci dxil_get_nir_compiler_options(), 477bf215546Sopenharmony_ci "dzn_meta_triangle_rewrite_index(old_index_size=%d)", 478bf215546Sopenharmony_ci old_index_size); 479bf215546Sopenharmony_ci b.shader->info.internal = true; 480bf215546Sopenharmony_ci 481bf215546Sopenharmony_ci nir_ssa_def *params_desc = 482bf215546Sopenharmony_ci dzn_nir_create_bo_desc(&b, nir_var_mem_ubo, 0, 0, "params", 0); 483bf215546Sopenharmony_ci nir_ssa_def *new_index_buf_desc = 484bf215546Sopenharmony_ci dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 1, 485bf215546Sopenharmony_ci "new_index_buf", ACCESS_NON_READABLE); 486bf215546Sopenharmony_ci 487bf215546Sopenharmony_ci nir_ssa_def *old_index_buf_desc = NULL; 488bf215546Sopenharmony_ci if (old_index_size > 0) { 489bf215546Sopenharmony_ci old_index_buf_desc = 490bf215546Sopenharmony_ci dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 2, 491bf215546Sopenharmony_ci "old_index_buf", ACCESS_NON_WRITEABLE); 492bf215546Sopenharmony_ci } 493bf215546Sopenharmony_ci 494bf215546Sopenharmony_ci nir_ssa_def *params = 495bf215546Sopenharmony_ci nir_load_ubo(&b, sizeof(struct dzn_triangle_fan_rewrite_index_params) / 4, 32, 496bf215546Sopenharmony_ci params_desc, nir_imm_int(&b, 0), 497bf215546Sopenharmony_ci .align_mul = 4, .align_offset = 0, .range_base = 0, .range = ~0); 498bf215546Sopenharmony_ci 499bf215546Sopenharmony_ci nir_ssa_def *triangle = nir_channel(&b, nir_load_global_invocation_id(&b, 32), 0); 500bf215546Sopenharmony_ci nir_ssa_def *new_indices; 501bf215546Sopenharmony_ci 502bf215546Sopenharmony_ci if (old_index_size > 0) { 503bf215546Sopenharmony_ci nir_ssa_def *old_first_index = nir_channel(&b, params, 0); 504bf215546Sopenharmony_ci nir_ssa_def *old_index0_offset = 505bf215546Sopenharmony_ci nir_imul_imm(&b, old_first_index, old_index_size); 506bf215546Sopenharmony_ci nir_ssa_def *old_index1_offset = 507bf215546Sopenharmony_ci nir_imul_imm(&b, nir_iadd(&b, nir_iadd_imm(&b, triangle, 1), old_first_index), 508bf215546Sopenharmony_ci old_index_size); 509bf215546Sopenharmony_ci 510bf215546Sopenharmony_ci nir_ssa_def *old_index0 = 511bf215546Sopenharmony_ci nir_load_ssbo(&b, 1, 32, old_index_buf_desc, 512bf215546Sopenharmony_ci old_index_size == 2 ? nir_iand_imm(&b, old_index0_offset, ~3ULL) : old_index0_offset, 513bf215546Sopenharmony_ci .align_mul = 4); 514bf215546Sopenharmony_ci 515bf215546Sopenharmony_ci if (old_index_size == 2) { 516bf215546Sopenharmony_ci old_index0 = nir_bcsel(&b, nir_test_mask(&b, old_index0_offset, 0x2), 517bf215546Sopenharmony_ci nir_ushr_imm(&b, old_index0, 16), 518bf215546Sopenharmony_ci nir_iand_imm(&b, old_index0, 0xffff)); 519bf215546Sopenharmony_ci } 520bf215546Sopenharmony_ci 521bf215546Sopenharmony_ci nir_ssa_def *old_index12 = 522bf215546Sopenharmony_ci nir_load_ssbo(&b, 2, 32, old_index_buf_desc, 523bf215546Sopenharmony_ci old_index_size == 2 ? nir_iand_imm(&b, old_index1_offset, ~3ULL) : old_index1_offset, 524bf215546Sopenharmony_ci .align_mul = 4); 525bf215546Sopenharmony_ci if (old_index_size == 2) { 526bf215546Sopenharmony_ci nir_ssa_def *indices[] = { 527bf215546Sopenharmony_ci nir_iand_imm(&b, nir_channel(&b, old_index12, 0), 0xffff), 528bf215546Sopenharmony_ci nir_ushr_imm(&b, nir_channel(&b, old_index12, 0), 16), 529bf215546Sopenharmony_ci nir_iand_imm(&b, nir_channel(&b, old_index12, 1), 0xffff), 530bf215546Sopenharmony_ci }; 531bf215546Sopenharmony_ci 532bf215546Sopenharmony_ci old_index12 = nir_bcsel(&b, nir_test_mask(&b, old_index1_offset, 0x2), 533bf215546Sopenharmony_ci nir_vec2(&b, indices[1], indices[2]), 534bf215546Sopenharmony_ci nir_vec2(&b, indices[0], indices[1])); 535bf215546Sopenharmony_ci } 536bf215546Sopenharmony_ci 537bf215546Sopenharmony_ci /* TODO: VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT */ 538bf215546Sopenharmony_ci new_indices = 539bf215546Sopenharmony_ci nir_vec3(&b, nir_channel(&b, old_index12, 0), 540bf215546Sopenharmony_ci nir_channel(&b, old_index12, 1), old_index0); 541bf215546Sopenharmony_ci } else { 542bf215546Sopenharmony_ci new_indices = 543bf215546Sopenharmony_ci nir_vec3(&b, 544bf215546Sopenharmony_ci nir_iadd_imm(&b, triangle, 1), 545bf215546Sopenharmony_ci nir_iadd_imm(&b, triangle, 2), 546bf215546Sopenharmony_ci nir_imm_int(&b, 0)); 547bf215546Sopenharmony_ci } 548bf215546Sopenharmony_ci 549bf215546Sopenharmony_ci nir_ssa_def *new_index_offset = 550bf215546Sopenharmony_ci nir_imul_imm(&b, triangle, 4 * 3); 551bf215546Sopenharmony_ci 552bf215546Sopenharmony_ci nir_store_ssbo(&b, new_indices, new_index_buf_desc, 553bf215546Sopenharmony_ci new_index_offset, 554bf215546Sopenharmony_ci .write_mask = 7, .access = ACCESS_NON_READABLE, .align_mul = 4); 555bf215546Sopenharmony_ci 556bf215546Sopenharmony_ci return b.shader; 557bf215546Sopenharmony_ci} 558bf215546Sopenharmony_ci 559bf215546Sopenharmony_cinir_shader * 560bf215546Sopenharmony_cidzn_nir_blit_vs(void) 561bf215546Sopenharmony_ci{ 562bf215546Sopenharmony_ci nir_builder b = 563bf215546Sopenharmony_ci nir_builder_init_simple_shader(MESA_SHADER_VERTEX, 564bf215546Sopenharmony_ci dxil_get_nir_compiler_options(), 565bf215546Sopenharmony_ci "dzn_meta_blit_vs()"); 566bf215546Sopenharmony_ci b.shader->info.internal = true; 567bf215546Sopenharmony_ci 568bf215546Sopenharmony_ci nir_ssa_def *params_desc = 569bf215546Sopenharmony_ci dzn_nir_create_bo_desc(&b, nir_var_mem_ubo, 0, 0, "params", 0); 570bf215546Sopenharmony_ci 571bf215546Sopenharmony_ci nir_variable *out_pos = 572bf215546Sopenharmony_ci nir_variable_create(b.shader, nir_var_shader_out, glsl_vec4_type(), 573bf215546Sopenharmony_ci "gl_Position"); 574bf215546Sopenharmony_ci out_pos->data.location = VARYING_SLOT_POS; 575bf215546Sopenharmony_ci out_pos->data.driver_location = 0; 576bf215546Sopenharmony_ci 577bf215546Sopenharmony_ci nir_variable *out_coords = 578bf215546Sopenharmony_ci nir_variable_create(b.shader, nir_var_shader_out, glsl_vec_type(3), 579bf215546Sopenharmony_ci "coords"); 580bf215546Sopenharmony_ci out_coords->data.location = VARYING_SLOT_TEX0; 581bf215546Sopenharmony_ci out_coords->data.driver_location = 1; 582bf215546Sopenharmony_ci 583bf215546Sopenharmony_ci nir_ssa_def *vertex = nir_load_vertex_id(&b); 584bf215546Sopenharmony_ci nir_ssa_def *base = nir_imul_imm(&b, vertex, 4 * sizeof(float)); 585bf215546Sopenharmony_ci nir_ssa_def *coords = 586bf215546Sopenharmony_ci nir_load_ubo(&b, 4, 32, params_desc, base, 587bf215546Sopenharmony_ci .align_mul = 16, .align_offset = 0, .range_base = 0, .range = ~0); 588bf215546Sopenharmony_ci nir_ssa_def *pos = 589bf215546Sopenharmony_ci nir_vec4(&b, nir_channel(&b, coords, 0), nir_channel(&b, coords, 1), 590bf215546Sopenharmony_ci nir_imm_float(&b, 0.0), nir_imm_float(&b, 1.0)); 591bf215546Sopenharmony_ci nir_ssa_def *z_coord = 592bf215546Sopenharmony_ci nir_load_ubo(&b, 1, 32, params_desc, nir_imm_int(&b, 4 * 4 * sizeof(float)), 593bf215546Sopenharmony_ci .align_mul = 64, .align_offset = 0, .range_base = 0, .range = ~0); 594bf215546Sopenharmony_ci coords = nir_vec3(&b, nir_channel(&b, coords, 2), nir_channel(&b, coords, 3), z_coord); 595bf215546Sopenharmony_ci 596bf215546Sopenharmony_ci nir_store_var(&b, out_pos, pos, 0xf); 597bf215546Sopenharmony_ci nir_store_var(&b, out_coords, coords, 0x7); 598bf215546Sopenharmony_ci return b.shader; 599bf215546Sopenharmony_ci} 600bf215546Sopenharmony_ci 601bf215546Sopenharmony_cinir_shader * 602bf215546Sopenharmony_cidzn_nir_blit_fs(const struct dzn_nir_blit_info *info) 603bf215546Sopenharmony_ci{ 604bf215546Sopenharmony_ci bool ms = info->src_samples > 1; 605bf215546Sopenharmony_ci nir_alu_type nir_out_type = 606bf215546Sopenharmony_ci nir_get_nir_type_for_glsl_base_type(info->out_type); 607bf215546Sopenharmony_ci uint32_t coord_comps = 608bf215546Sopenharmony_ci glsl_get_sampler_dim_coordinate_components(info->sampler_dim) + 609bf215546Sopenharmony_ci info->src_is_array; 610bf215546Sopenharmony_ci 611bf215546Sopenharmony_ci nir_builder b = 612bf215546Sopenharmony_ci nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, 613bf215546Sopenharmony_ci dxil_get_nir_compiler_options(), 614bf215546Sopenharmony_ci "dzn_meta_blit_fs()"); 615bf215546Sopenharmony_ci b.shader->info.internal = true; 616bf215546Sopenharmony_ci 617bf215546Sopenharmony_ci const struct glsl_type *tex_type = 618bf215546Sopenharmony_ci glsl_texture_type(info->sampler_dim, info->src_is_array, info->out_type); 619bf215546Sopenharmony_ci nir_variable *tex_var = 620bf215546Sopenharmony_ci nir_variable_create(b.shader, nir_var_uniform, tex_type, "texture"); 621bf215546Sopenharmony_ci nir_deref_instr *tex_deref = nir_build_deref_var(&b, tex_var); 622bf215546Sopenharmony_ci 623bf215546Sopenharmony_ci nir_variable *pos_var = 624bf215546Sopenharmony_ci nir_variable_create(b.shader, nir_var_shader_in, 625bf215546Sopenharmony_ci glsl_vector_type(GLSL_TYPE_FLOAT, 4), 626bf215546Sopenharmony_ci "gl_FragCoord"); 627bf215546Sopenharmony_ci pos_var->data.location = VARYING_SLOT_POS; 628bf215546Sopenharmony_ci pos_var->data.driver_location = 0; 629bf215546Sopenharmony_ci 630bf215546Sopenharmony_ci nir_variable *coord_var = 631bf215546Sopenharmony_ci nir_variable_create(b.shader, nir_var_shader_in, 632bf215546Sopenharmony_ci glsl_vector_type(GLSL_TYPE_FLOAT, 3), 633bf215546Sopenharmony_ci "coord"); 634bf215546Sopenharmony_ci coord_var->data.location = VARYING_SLOT_TEX0; 635bf215546Sopenharmony_ci coord_var->data.driver_location = 1; 636bf215546Sopenharmony_ci nir_ssa_def *coord = 637bf215546Sopenharmony_ci nir_channels(&b, nir_load_var(&b, coord_var), (1 << coord_comps) - 1); 638bf215546Sopenharmony_ci 639bf215546Sopenharmony_ci uint32_t out_comps = 640bf215546Sopenharmony_ci (info->loc == FRAG_RESULT_DEPTH || info->loc == FRAG_RESULT_STENCIL) ? 1 : 4; 641bf215546Sopenharmony_ci nir_variable *out = 642bf215546Sopenharmony_ci nir_variable_create(b.shader, nir_var_shader_out, 643bf215546Sopenharmony_ci glsl_vector_type(info->out_type, out_comps), 644bf215546Sopenharmony_ci "out"); 645bf215546Sopenharmony_ci out->data.location = info->loc; 646bf215546Sopenharmony_ci 647bf215546Sopenharmony_ci nir_ssa_def *res = NULL; 648bf215546Sopenharmony_ci 649bf215546Sopenharmony_ci if (info->resolve) { 650bf215546Sopenharmony_ci /* When resolving a float type, we need to calculate the average of all 651bf215546Sopenharmony_ci * samples. For integer resolve, Vulkan says that one sample should be 652bf215546Sopenharmony_ci * chosen without telling which. Let's just pick the first one in that 653bf215546Sopenharmony_ci * case. 654bf215546Sopenharmony_ci */ 655bf215546Sopenharmony_ci 656bf215546Sopenharmony_ci unsigned nsamples = info->out_type == GLSL_TYPE_FLOAT ? 657bf215546Sopenharmony_ci info->src_samples : 1; 658bf215546Sopenharmony_ci for (unsigned s = 0; s < nsamples; s++) { 659bf215546Sopenharmony_ci nir_tex_instr *tex = nir_tex_instr_create(b.shader, 4); 660bf215546Sopenharmony_ci 661bf215546Sopenharmony_ci tex->op = nir_texop_txf_ms; 662bf215546Sopenharmony_ci tex->dest_type = nir_out_type; 663bf215546Sopenharmony_ci tex->texture_index = 0; 664bf215546Sopenharmony_ci tex->is_array = info->src_is_array; 665bf215546Sopenharmony_ci tex->sampler_dim = info->sampler_dim; 666bf215546Sopenharmony_ci 667bf215546Sopenharmony_ci tex->src[0].src_type = nir_tex_src_coord; 668bf215546Sopenharmony_ci tex->src[0].src = nir_src_for_ssa(nir_f2i32(&b, coord)); 669bf215546Sopenharmony_ci tex->coord_components = coord_comps; 670bf215546Sopenharmony_ci 671bf215546Sopenharmony_ci tex->src[1].src_type = nir_tex_src_ms_index; 672bf215546Sopenharmony_ci tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, s)); 673bf215546Sopenharmony_ci 674bf215546Sopenharmony_ci tex->src[2].src_type = nir_tex_src_lod; 675bf215546Sopenharmony_ci tex->src[2].src = nir_src_for_ssa(nir_imm_int(&b, 0)); 676bf215546Sopenharmony_ci 677bf215546Sopenharmony_ci tex->src[3].src_type = nir_tex_src_texture_deref; 678bf215546Sopenharmony_ci tex->src[3].src = nir_src_for_ssa(&tex_deref->dest.ssa); 679bf215546Sopenharmony_ci 680bf215546Sopenharmony_ci nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL); 681bf215546Sopenharmony_ci 682bf215546Sopenharmony_ci nir_builder_instr_insert(&b, &tex->instr); 683bf215546Sopenharmony_ci res = res ? nir_fadd(&b, res, &tex->dest.ssa) : &tex->dest.ssa; 684bf215546Sopenharmony_ci } 685bf215546Sopenharmony_ci 686bf215546Sopenharmony_ci if (nsamples > 1) { 687bf215546Sopenharmony_ci unsigned type_sz = nir_alu_type_get_type_size(nir_out_type); 688bf215546Sopenharmony_ci res = nir_fmul(&b, res, nir_imm_floatN_t(&b, 1.0f / nsamples, type_sz)); 689bf215546Sopenharmony_ci } 690bf215546Sopenharmony_ci } else { 691bf215546Sopenharmony_ci nir_tex_instr *tex = 692bf215546Sopenharmony_ci nir_tex_instr_create(b.shader, ms ? 4 : 3); 693bf215546Sopenharmony_ci 694bf215546Sopenharmony_ci tex->dest_type = nir_out_type; 695bf215546Sopenharmony_ci tex->is_array = info->src_is_array; 696bf215546Sopenharmony_ci tex->sampler_dim = info->sampler_dim; 697bf215546Sopenharmony_ci 698bf215546Sopenharmony_ci if (ms) { 699bf215546Sopenharmony_ci tex->op = nir_texop_txf_ms; 700bf215546Sopenharmony_ci 701bf215546Sopenharmony_ci tex->src[0].src_type = nir_tex_src_coord; 702bf215546Sopenharmony_ci tex->src[0].src = nir_src_for_ssa(nir_f2i32(&b, coord)); 703bf215546Sopenharmony_ci tex->coord_components = coord_comps; 704bf215546Sopenharmony_ci 705bf215546Sopenharmony_ci tex->src[1].src_type = nir_tex_src_ms_index; 706bf215546Sopenharmony_ci tex->src[1].src = nir_src_for_ssa(nir_load_sample_id(&b)); 707bf215546Sopenharmony_ci 708bf215546Sopenharmony_ci tex->src[2].src_type = nir_tex_src_lod; 709bf215546Sopenharmony_ci tex->src[2].src = nir_src_for_ssa(nir_imm_int(&b, 0)); 710bf215546Sopenharmony_ci 711bf215546Sopenharmony_ci tex->src[3].src_type = nir_tex_src_texture_deref; 712bf215546Sopenharmony_ci tex->src[3].src = nir_src_for_ssa(&tex_deref->dest.ssa); 713bf215546Sopenharmony_ci } else { 714bf215546Sopenharmony_ci nir_variable *sampler_var = 715bf215546Sopenharmony_ci nir_variable_create(b.shader, nir_var_uniform, glsl_bare_sampler_type(), "sampler"); 716bf215546Sopenharmony_ci nir_deref_instr *sampler_deref = nir_build_deref_var(&b, sampler_var); 717bf215546Sopenharmony_ci 718bf215546Sopenharmony_ci tex->op = nir_texop_tex; 719bf215546Sopenharmony_ci tex->sampler_index = 0; 720bf215546Sopenharmony_ci 721bf215546Sopenharmony_ci tex->src[0].src_type = nir_tex_src_coord; 722bf215546Sopenharmony_ci tex->src[0].src = nir_src_for_ssa(coord); 723bf215546Sopenharmony_ci tex->coord_components = coord_comps; 724bf215546Sopenharmony_ci 725bf215546Sopenharmony_ci tex->src[1].src_type = nir_tex_src_texture_deref; 726bf215546Sopenharmony_ci tex->src[1].src = nir_src_for_ssa(&tex_deref->dest.ssa); 727bf215546Sopenharmony_ci 728bf215546Sopenharmony_ci tex->src[2].src_type = nir_tex_src_sampler_deref; 729bf215546Sopenharmony_ci tex->src[2].src = nir_src_for_ssa(&sampler_deref->dest.ssa); 730bf215546Sopenharmony_ci } 731bf215546Sopenharmony_ci 732bf215546Sopenharmony_ci nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL); 733bf215546Sopenharmony_ci nir_builder_instr_insert(&b, &tex->instr); 734bf215546Sopenharmony_ci res = &tex->dest.ssa; 735bf215546Sopenharmony_ci } 736bf215546Sopenharmony_ci 737bf215546Sopenharmony_ci nir_store_var(&b, out, nir_channels(&b, res, (1 << out_comps) - 1), 0xf); 738bf215546Sopenharmony_ci 739bf215546Sopenharmony_ci return b.shader; 740bf215546Sopenharmony_ci} 741