1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © Microsoft Corporation
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21bf215546Sopenharmony_ci * IN THE SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci#include "dzn_nir.h"
25bf215546Sopenharmony_ci
26bf215546Sopenharmony_ci#include "spirv_to_dxil.h"
27bf215546Sopenharmony_ci#include "nir_to_dxil.h"
28bf215546Sopenharmony_ci#include "nir_builder.h"
29bf215546Sopenharmony_ci#include "nir_vulkan.h"
30bf215546Sopenharmony_ci
31bf215546Sopenharmony_cistatic nir_ssa_def *
32bf215546Sopenharmony_cidzn_nir_create_bo_desc(nir_builder *b,
33bf215546Sopenharmony_ci                       nir_variable_mode mode,
34bf215546Sopenharmony_ci                       uint32_t desc_set,
35bf215546Sopenharmony_ci                       uint32_t binding,
36bf215546Sopenharmony_ci                       const char *name,
37bf215546Sopenharmony_ci                       unsigned access)
38bf215546Sopenharmony_ci{
39bf215546Sopenharmony_ci   struct glsl_struct_field field = {
40bf215546Sopenharmony_ci      .type = mode == nir_var_mem_ubo ?
41bf215546Sopenharmony_ci              glsl_array_type(glsl_uint_type(), 4096, 4) :
42bf215546Sopenharmony_ci              glsl_uint_type(),
43bf215546Sopenharmony_ci      .name = "dummy_int",
44bf215546Sopenharmony_ci   };
45bf215546Sopenharmony_ci   const struct glsl_type *dummy_type =
46bf215546Sopenharmony_ci      glsl_struct_type(&field, 1, "dummy_type", false);
47bf215546Sopenharmony_ci
48bf215546Sopenharmony_ci   nir_variable *var =
49bf215546Sopenharmony_ci      nir_variable_create(b->shader, mode, dummy_type, name);
50bf215546Sopenharmony_ci   var->data.descriptor_set = desc_set;
51bf215546Sopenharmony_ci   var->data.binding = binding;
52bf215546Sopenharmony_ci   var->data.access = access;
53bf215546Sopenharmony_ci
54bf215546Sopenharmony_ci   assert(mode == nir_var_mem_ubo || mode == nir_var_mem_ssbo);
55bf215546Sopenharmony_ci   if (mode == nir_var_mem_ubo)
56bf215546Sopenharmony_ci      b->shader->info.num_ubos++;
57bf215546Sopenharmony_ci   else
58bf215546Sopenharmony_ci      b->shader->info.num_ssbos++;
59bf215546Sopenharmony_ci
60bf215546Sopenharmony_ci   VkDescriptorType desc_type =
61bf215546Sopenharmony_ci      var->data.mode == nir_var_mem_ubo ?
62bf215546Sopenharmony_ci      VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER :
63bf215546Sopenharmony_ci      VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
64bf215546Sopenharmony_ci   nir_address_format addr_format = nir_address_format_32bit_index_offset;
65bf215546Sopenharmony_ci   nir_ssa_def *index =
66bf215546Sopenharmony_ci      nir_vulkan_resource_index(b,
67bf215546Sopenharmony_ci                                nir_address_format_num_components(addr_format),
68bf215546Sopenharmony_ci                                nir_address_format_bit_size(addr_format),
69bf215546Sopenharmony_ci                                nir_imm_int(b, 0),
70bf215546Sopenharmony_ci                                .desc_set = desc_set,
71bf215546Sopenharmony_ci                                .binding = binding,
72bf215546Sopenharmony_ci                                .desc_type = desc_type);
73bf215546Sopenharmony_ci
74bf215546Sopenharmony_ci   nir_ssa_def *desc =
75bf215546Sopenharmony_ci      nir_load_vulkan_descriptor(b,
76bf215546Sopenharmony_ci                                 nir_address_format_num_components(addr_format),
77bf215546Sopenharmony_ci                                 nir_address_format_bit_size(addr_format),
78bf215546Sopenharmony_ci                                 index,
79bf215546Sopenharmony_ci                                 .desc_type = desc_type);
80bf215546Sopenharmony_ci
81bf215546Sopenharmony_ci   return nir_channel(b, desc, 0);
82bf215546Sopenharmony_ci}
83bf215546Sopenharmony_ci
84bf215546Sopenharmony_cinir_shader *
85bf215546Sopenharmony_cidzn_nir_indirect_draw_shader(enum dzn_indirect_draw_type type)
86bf215546Sopenharmony_ci{
87bf215546Sopenharmony_ci   const char *type_str[] = {
88bf215546Sopenharmony_ci      "draw",
89bf215546Sopenharmony_ci      "draw_count",
90bf215546Sopenharmony_ci      "indexed_draw",
91bf215546Sopenharmony_ci      "indexed_draw_count",
92bf215546Sopenharmony_ci      "draw_triangle_fan",
93bf215546Sopenharmony_ci      "draw_count_triangle_fan",
94bf215546Sopenharmony_ci      "indexed_draw_triangle_fan",
95bf215546Sopenharmony_ci      "indexed_draw_count_triangle_fan",
96bf215546Sopenharmony_ci      "indexed_draw_triangle_fan_prim_restart",
97bf215546Sopenharmony_ci      "indexed_draw_count_triangle_fan_prim_restart",
98bf215546Sopenharmony_ci   };
99bf215546Sopenharmony_ci
100bf215546Sopenharmony_ci   assert(type < ARRAY_SIZE(type_str));
101bf215546Sopenharmony_ci
102bf215546Sopenharmony_ci   bool indexed = type == DZN_INDIRECT_INDEXED_DRAW ||
103bf215546Sopenharmony_ci                  type == DZN_INDIRECT_INDEXED_DRAW_COUNT ||
104bf215546Sopenharmony_ci                  type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN ||
105bf215546Sopenharmony_ci                  type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN ||
106bf215546Sopenharmony_ci                  type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN_PRIM_RESTART ||
107bf215546Sopenharmony_ci                  type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN_PRIM_RESTART;
108bf215546Sopenharmony_ci   bool triangle_fan = type == DZN_INDIRECT_DRAW_TRIANGLE_FAN ||
109bf215546Sopenharmony_ci                       type == DZN_INDIRECT_DRAW_COUNT_TRIANGLE_FAN ||
110bf215546Sopenharmony_ci                       type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN ||
111bf215546Sopenharmony_ci                       type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN ||
112bf215546Sopenharmony_ci                       type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN_PRIM_RESTART ||
113bf215546Sopenharmony_ci                       type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN_PRIM_RESTART;
114bf215546Sopenharmony_ci   bool indirect_count = type == DZN_INDIRECT_DRAW_COUNT ||
115bf215546Sopenharmony_ci                         type == DZN_INDIRECT_INDEXED_DRAW_COUNT ||
116bf215546Sopenharmony_ci                         type == DZN_INDIRECT_DRAW_COUNT_TRIANGLE_FAN ||
117bf215546Sopenharmony_ci                         type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN ||
118bf215546Sopenharmony_ci                         type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN_PRIM_RESTART;
119bf215546Sopenharmony_ci   bool prim_restart = type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN_PRIM_RESTART ||
120bf215546Sopenharmony_ci                       type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN_PRIM_RESTART;
121bf215546Sopenharmony_ci   nir_builder b =
122bf215546Sopenharmony_ci      nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
123bf215546Sopenharmony_ci                                     dxil_get_nir_compiler_options(),
124bf215546Sopenharmony_ci                                     "dzn_meta_indirect_%s()",
125bf215546Sopenharmony_ci                                     type_str[type]);
126bf215546Sopenharmony_ci   b.shader->info.internal = true;
127bf215546Sopenharmony_ci
128bf215546Sopenharmony_ci   nir_ssa_def *params_desc =
129bf215546Sopenharmony_ci      dzn_nir_create_bo_desc(&b, nir_var_mem_ubo, 0, 0, "params", 0);
130bf215546Sopenharmony_ci   nir_ssa_def *draw_buf_desc =
131bf215546Sopenharmony_ci      dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 1, "draw_buf", ACCESS_NON_WRITEABLE);
132bf215546Sopenharmony_ci   nir_ssa_def *exec_buf_desc =
133bf215546Sopenharmony_ci      dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 2, "exec_buf", ACCESS_NON_READABLE);
134bf215546Sopenharmony_ci
135bf215546Sopenharmony_ci   unsigned params_size;
136bf215546Sopenharmony_ci   if (triangle_fan)
137bf215546Sopenharmony_ci      params_size = sizeof(struct dzn_indirect_draw_triangle_fan_rewrite_params);
138bf215546Sopenharmony_ci   else
139bf215546Sopenharmony_ci      params_size = sizeof(struct dzn_indirect_draw_rewrite_params);
140bf215546Sopenharmony_ci
141bf215546Sopenharmony_ci   nir_ssa_def *params =
142bf215546Sopenharmony_ci      nir_load_ubo(&b, params_size / 4, 32,
143bf215546Sopenharmony_ci                   params_desc, nir_imm_int(&b, 0),
144bf215546Sopenharmony_ci                   .align_mul = 4, .align_offset = 0, .range_base = 0, .range = ~0);
145bf215546Sopenharmony_ci
146bf215546Sopenharmony_ci   nir_ssa_def *draw_stride = nir_channel(&b, params, 0);
147bf215546Sopenharmony_ci   nir_ssa_def *exec_stride =
148bf215546Sopenharmony_ci      triangle_fan ?
149bf215546Sopenharmony_ci      nir_imm_int(&b, sizeof(struct dzn_indirect_triangle_fan_draw_exec_params)) :
150bf215546Sopenharmony_ci      nir_imm_int(&b, sizeof(struct dzn_indirect_draw_exec_params));
151bf215546Sopenharmony_ci   nir_ssa_def *index =
152bf215546Sopenharmony_ci      nir_channel(&b, nir_load_global_invocation_id(&b, 32), 0);
153bf215546Sopenharmony_ci
154bf215546Sopenharmony_ci   if (indirect_count) {
155bf215546Sopenharmony_ci      nir_ssa_def *count_buf_desc =
156bf215546Sopenharmony_ci         dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 3, "count_buf", ACCESS_NON_WRITEABLE);
157bf215546Sopenharmony_ci
158bf215546Sopenharmony_ci      nir_ssa_def *draw_count =
159bf215546Sopenharmony_ci         nir_load_ssbo(&b, 1, 32, count_buf_desc, nir_imm_int(&b, 0), .align_mul = 4);
160bf215546Sopenharmony_ci
161bf215546Sopenharmony_ci      nir_push_if(&b, nir_ieq(&b, index, nir_imm_int(&b, 0)));
162bf215546Sopenharmony_ci      nir_store_ssbo(&b, draw_count, exec_buf_desc, nir_imm_int(&b, 0),
163bf215546Sopenharmony_ci                    .write_mask = 0x1, .access = ACCESS_NON_READABLE,
164bf215546Sopenharmony_ci                    .align_mul = 16);
165bf215546Sopenharmony_ci      nir_pop_if(&b, NULL);
166bf215546Sopenharmony_ci
167bf215546Sopenharmony_ci      nir_push_if(&b, nir_ult(&b, index, draw_count));
168bf215546Sopenharmony_ci   }
169bf215546Sopenharmony_ci
170bf215546Sopenharmony_ci   nir_ssa_def *draw_offset = nir_imul(&b, draw_stride, index);
171bf215546Sopenharmony_ci
172bf215546Sopenharmony_ci   /* The first entry contains the indirect count */
173bf215546Sopenharmony_ci   nir_ssa_def *exec_offset =
174bf215546Sopenharmony_ci      indirect_count ?
175bf215546Sopenharmony_ci      nir_imul(&b, exec_stride, nir_iadd_imm(&b, index, 1)) :
176bf215546Sopenharmony_ci      nir_imul(&b, exec_stride, index);
177bf215546Sopenharmony_ci
178bf215546Sopenharmony_ci   nir_ssa_def *draw_info1 =
179bf215546Sopenharmony_ci      nir_load_ssbo(&b, 4, 32, draw_buf_desc, draw_offset, .align_mul = 4);
180bf215546Sopenharmony_ci   nir_ssa_def *draw_info2 =
181bf215546Sopenharmony_ci      indexed ?
182bf215546Sopenharmony_ci      nir_load_ssbo(&b, 1, 32, draw_buf_desc,
183bf215546Sopenharmony_ci                    nir_iadd_imm(&b, draw_offset, 16), .align_mul = 4) :
184bf215546Sopenharmony_ci      nir_imm_int(&b, 0);
185bf215546Sopenharmony_ci
186bf215546Sopenharmony_ci   nir_ssa_def *first_vertex = nir_channel(&b, draw_info1, indexed ? 3 : 2);
187bf215546Sopenharmony_ci   nir_ssa_def *base_instance =
188bf215546Sopenharmony_ci      indexed ? draw_info2 : nir_channel(&b, draw_info1, 3);
189bf215546Sopenharmony_ci
190bf215546Sopenharmony_ci   nir_ssa_def *exec_vals[8] = {
191bf215546Sopenharmony_ci      first_vertex,
192bf215546Sopenharmony_ci      base_instance,
193bf215546Sopenharmony_ci      index,
194bf215546Sopenharmony_ci   };
195bf215546Sopenharmony_ci
196bf215546Sopenharmony_ci   if (triangle_fan) {
197bf215546Sopenharmony_ci      /* Patch {vertex,index}_count and first_index */
198bf215546Sopenharmony_ci      nir_ssa_def *triangle_count =
199bf215546Sopenharmony_ci         nir_usub_sat(&b, nir_channel(&b, draw_info1, 0), nir_imm_int(&b, 2));
200bf215546Sopenharmony_ci      exec_vals[3] = nir_imul_imm(&b, triangle_count, 3);
201bf215546Sopenharmony_ci      exec_vals[4] = nir_channel(&b, draw_info1, 1);
202bf215546Sopenharmony_ci      exec_vals[5] = nir_imm_int(&b, 0);
203bf215546Sopenharmony_ci      exec_vals[6] = first_vertex;
204bf215546Sopenharmony_ci      exec_vals[7] = base_instance;
205bf215546Sopenharmony_ci
206bf215546Sopenharmony_ci      nir_ssa_def *triangle_fan_exec_buf_desc =
207bf215546Sopenharmony_ci         dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 4,
208bf215546Sopenharmony_ci                                "triangle_fan_exec_buf",
209bf215546Sopenharmony_ci                                ACCESS_NON_READABLE);
210bf215546Sopenharmony_ci      nir_ssa_def *triangle_fan_index_buf_stride = nir_channel(&b, params, 1);
211bf215546Sopenharmony_ci      nir_ssa_def *triangle_fan_index_buf_addr_lo =
212bf215546Sopenharmony_ci         nir_iadd(&b, nir_channel(&b, params, 2),
213bf215546Sopenharmony_ci                  nir_imul(&b, triangle_fan_index_buf_stride, index));
214bf215546Sopenharmony_ci
215bf215546Sopenharmony_ci      nir_ssa_def *triangle_fan_exec_vals[9] = { 0 };
216bf215546Sopenharmony_ci      uint32_t triangle_fan_exec_param_count = 0;
217bf215546Sopenharmony_ci      nir_ssa_def *addr_lo_overflow =
218bf215546Sopenharmony_ci         nir_ult(&b, triangle_fan_index_buf_addr_lo, nir_channel(&b, params, 2));
219bf215546Sopenharmony_ci      nir_ssa_def *triangle_fan_index_buf_addr_hi =
220bf215546Sopenharmony_ci         nir_iadd(&b, nir_channel(&b, params, 3),
221bf215546Sopenharmony_ci                  nir_bcsel(&b, addr_lo_overflow, nir_imm_int(&b, 1), nir_imm_int(&b, 0)));
222bf215546Sopenharmony_ci
223bf215546Sopenharmony_ci      triangle_fan_exec_vals[triangle_fan_exec_param_count++] = triangle_fan_index_buf_addr_lo;
224bf215546Sopenharmony_ci      triangle_fan_exec_vals[triangle_fan_exec_param_count++] = triangle_fan_index_buf_addr_hi;
225bf215546Sopenharmony_ci
226bf215546Sopenharmony_ci      if (prim_restart) {
227bf215546Sopenharmony_ci         triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_channel(&b, draw_info1, 2);
228bf215546Sopenharmony_ci         triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_channel(&b, draw_info1, 0);
229bf215546Sopenharmony_ci         uint32_t index_count_offset =
230bf215546Sopenharmony_ci            offsetof(struct dzn_indirect_triangle_fan_draw_exec_params, indexed_draw.index_count);
231bf215546Sopenharmony_ci         nir_ssa_def *exec_buf_start =
232bf215546Sopenharmony_ci            nir_load_ubo(&b, 2, 32,
233bf215546Sopenharmony_ci                         params_desc, nir_imm_int(&b, 16),
234bf215546Sopenharmony_ci                         .align_mul = 4, .align_offset = 0, .range_base = 0, .range = ~0);
235bf215546Sopenharmony_ci         nir_ssa_def *exec_buf_start_lo =
236bf215546Sopenharmony_ci            nir_iadd(&b, nir_imm_int(&b, index_count_offset),
237bf215546Sopenharmony_ci                     nir_iadd(&b, nir_channel(&b, exec_buf_start, 0),
238bf215546Sopenharmony_ci                              nir_imul(&b, exec_stride, index)));
239bf215546Sopenharmony_ci         addr_lo_overflow = nir_ult(&b, exec_buf_start_lo, nir_channel(&b, exec_buf_start, 0));
240bf215546Sopenharmony_ci         nir_ssa_def *exec_buf_start_hi =
241bf215546Sopenharmony_ci            nir_iadd(&b, nir_channel(&b, exec_buf_start, 0),
242bf215546Sopenharmony_ci                     nir_bcsel(&b, addr_lo_overflow, nir_imm_int(&b, 1), nir_imm_int(&b, 0)));
243bf215546Sopenharmony_ci         triangle_fan_exec_vals[triangle_fan_exec_param_count++] = exec_buf_start_lo;
244bf215546Sopenharmony_ci         triangle_fan_exec_vals[triangle_fan_exec_param_count++] = exec_buf_start_hi;
245bf215546Sopenharmony_ci         triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_imm_int(&b, 1);
246bf215546Sopenharmony_ci      } else {
247bf215546Sopenharmony_ci         triangle_fan_exec_vals[triangle_fan_exec_param_count++] =
248bf215546Sopenharmony_ci            indexed ? nir_channel(&b, draw_info1, 2) : nir_imm_int(&b, 0);
249bf215546Sopenharmony_ci         triangle_fan_exec_vals[triangle_fan_exec_param_count++] =
250bf215546Sopenharmony_ci            triangle_count;
251bf215546Sopenharmony_ci      }
252bf215546Sopenharmony_ci      triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_imm_int(&b, 1);
253bf215546Sopenharmony_ci      triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_imm_int(&b, 1);
254bf215546Sopenharmony_ci
255bf215546Sopenharmony_ci      unsigned rewrite_index_exec_params =
256bf215546Sopenharmony_ci         prim_restart ?
257bf215546Sopenharmony_ci         sizeof(struct dzn_indirect_triangle_fan_prim_restart_rewrite_index_exec_params) :
258bf215546Sopenharmony_ci         sizeof(struct dzn_indirect_triangle_fan_rewrite_index_exec_params);
259bf215546Sopenharmony_ci      nir_ssa_def *triangle_fan_exec_stride =
260bf215546Sopenharmony_ci         nir_imm_int(&b, rewrite_index_exec_params);
261bf215546Sopenharmony_ci      nir_ssa_def *triangle_fan_exec_offset =
262bf215546Sopenharmony_ci         nir_imul(&b, triangle_fan_exec_stride, index);
263bf215546Sopenharmony_ci
264bf215546Sopenharmony_ci      for (uint32_t i = 0; i < triangle_fan_exec_param_count; i += 4) {
265bf215546Sopenharmony_ci         unsigned comps = MIN2(triangle_fan_exec_param_count - i, 4);
266bf215546Sopenharmony_ci         uint32_t mask = (1 << comps) - 1;
267bf215546Sopenharmony_ci
268bf215546Sopenharmony_ci         nir_store_ssbo(&b, nir_vec(&b, &triangle_fan_exec_vals[i], comps),
269bf215546Sopenharmony_ci                        triangle_fan_exec_buf_desc,
270bf215546Sopenharmony_ci                        nir_iadd_imm(&b, triangle_fan_exec_offset, i * 4),
271bf215546Sopenharmony_ci                        .write_mask = mask, .access = ACCESS_NON_READABLE, .align_mul = 4);
272bf215546Sopenharmony_ci      }
273bf215546Sopenharmony_ci
274bf215546Sopenharmony_ci      nir_ssa_def *ibview_vals[] = {
275bf215546Sopenharmony_ci         triangle_fan_index_buf_addr_lo,
276bf215546Sopenharmony_ci         triangle_fan_index_buf_addr_hi,
277bf215546Sopenharmony_ci         triangle_fan_index_buf_stride,
278bf215546Sopenharmony_ci         nir_imm_int(&b, DXGI_FORMAT_R32_UINT),
279bf215546Sopenharmony_ci      };
280bf215546Sopenharmony_ci
281bf215546Sopenharmony_ci      nir_store_ssbo(&b, nir_vec(&b, ibview_vals, ARRAY_SIZE(ibview_vals)),
282bf215546Sopenharmony_ci                     exec_buf_desc, exec_offset,
283bf215546Sopenharmony_ci                     .write_mask = 0xf, .access = ACCESS_NON_READABLE, .align_mul = 16);
284bf215546Sopenharmony_ci      exec_offset = nir_iadd_imm(&b, exec_offset, ARRAY_SIZE(ibview_vals) * 4);
285bf215546Sopenharmony_ci   } else {
286bf215546Sopenharmony_ci      exec_vals[3] = nir_channel(&b, draw_info1, 0);
287bf215546Sopenharmony_ci      exec_vals[4] = nir_channel(&b, draw_info1, 1);
288bf215546Sopenharmony_ci      exec_vals[5] = nir_channel(&b, draw_info1, 2);
289bf215546Sopenharmony_ci      exec_vals[6] = nir_channel(&b, draw_info1, 3);
290bf215546Sopenharmony_ci      exec_vals[7] = draw_info2;
291bf215546Sopenharmony_ci   }
292bf215546Sopenharmony_ci
293bf215546Sopenharmony_ci   nir_store_ssbo(&b, nir_vec(&b, exec_vals, 4),
294bf215546Sopenharmony_ci                  exec_buf_desc, exec_offset,
295bf215546Sopenharmony_ci                  .write_mask = 0xf, .access = ACCESS_NON_READABLE, .align_mul = 16);
296bf215546Sopenharmony_ci   nir_store_ssbo(&b, nir_vec(&b, &exec_vals[4], 4),
297bf215546Sopenharmony_ci                  exec_buf_desc, nir_iadd_imm(&b, exec_offset, 16),
298bf215546Sopenharmony_ci                  .write_mask = 0xf, .access = ACCESS_NON_READABLE, .align_mul = 16);
299bf215546Sopenharmony_ci
300bf215546Sopenharmony_ci   if (indirect_count)
301bf215546Sopenharmony_ci      nir_pop_if(&b, NULL);
302bf215546Sopenharmony_ci
303bf215546Sopenharmony_ci   return b.shader;
304bf215546Sopenharmony_ci}
305bf215546Sopenharmony_ci
306bf215546Sopenharmony_cinir_shader *
307bf215546Sopenharmony_cidzn_nir_triangle_fan_prim_restart_rewrite_index_shader(uint8_t old_index_size)
308bf215546Sopenharmony_ci{
309bf215546Sopenharmony_ci   assert(old_index_size == 2 || old_index_size == 4);
310bf215546Sopenharmony_ci
311bf215546Sopenharmony_ci   nir_builder b =
312bf215546Sopenharmony_ci      nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
313bf215546Sopenharmony_ci                                     dxil_get_nir_compiler_options(),
314bf215546Sopenharmony_ci                                     "dzn_meta_triangle_prim_rewrite_index(old_index_size=%d)",
315bf215546Sopenharmony_ci                                     old_index_size);
316bf215546Sopenharmony_ci   b.shader->info.internal = true;
317bf215546Sopenharmony_ci
318bf215546Sopenharmony_ci   nir_ssa_def *params_desc =
319bf215546Sopenharmony_ci      dzn_nir_create_bo_desc(&b, nir_var_mem_ubo, 0, 0, "params", 0);
320bf215546Sopenharmony_ci   nir_ssa_def *new_index_buf_desc =
321bf215546Sopenharmony_ci      dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 1,
322bf215546Sopenharmony_ci                             "new_index_buf", ACCESS_NON_READABLE);
323bf215546Sopenharmony_ci   nir_ssa_def *old_index_buf_desc =
324bf215546Sopenharmony_ci      dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 2,
325bf215546Sopenharmony_ci                             "old_index_buf", ACCESS_NON_WRITEABLE);
326bf215546Sopenharmony_ci   nir_ssa_def *new_index_count_ptr_desc =
327bf215546Sopenharmony_ci      dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 3,
328bf215546Sopenharmony_ci                             "new_index_count_ptr", ACCESS_NON_READABLE);
329bf215546Sopenharmony_ci
330bf215546Sopenharmony_ci   nir_ssa_def *params =
331bf215546Sopenharmony_ci      nir_load_ubo(&b, sizeof(struct dzn_triangle_fan_prim_restart_rewrite_index_params) / 4, 32,
332bf215546Sopenharmony_ci                   params_desc, nir_imm_int(&b, 0),
333bf215546Sopenharmony_ci                   .align_mul = 4, .align_offset = 0, .range_base = 0, .range = ~0);
334bf215546Sopenharmony_ci
335bf215546Sopenharmony_ci   nir_ssa_def *prim_restart_val =
336bf215546Sopenharmony_ci      nir_imm_int(&b, old_index_size == 2 ? 0xffff : 0xffffffff);
337bf215546Sopenharmony_ci   nir_variable *old_index_ptr_var =
338bf215546Sopenharmony_ci      nir_local_variable_create(b.impl, glsl_uint_type(), "old_index_ptr_var");
339bf215546Sopenharmony_ci   nir_ssa_def *old_index_ptr = nir_channel(&b, params, 0);
340bf215546Sopenharmony_ci   nir_store_var(&b, old_index_ptr_var, old_index_ptr, 1);
341bf215546Sopenharmony_ci   nir_variable *new_index_ptr_var =
342bf215546Sopenharmony_ci      nir_local_variable_create(b.impl, glsl_uint_type(), "new_index_ptr_var");
343bf215546Sopenharmony_ci   nir_store_var(&b, new_index_ptr_var, nir_imm_int(&b, 0), 1);
344bf215546Sopenharmony_ci   nir_ssa_def *old_index_count = nir_channel(&b, params, 1);
345bf215546Sopenharmony_ci   nir_variable *index0_var =
346bf215546Sopenharmony_ci      nir_local_variable_create(b.impl, glsl_uint_type(), "index0_var");
347bf215546Sopenharmony_ci   nir_store_var(&b, index0_var, prim_restart_val, 1);
348bf215546Sopenharmony_ci
349bf215546Sopenharmony_ci   /*
350bf215546Sopenharmony_ci    * Filter out all primitive-restart magic values, and generate a triangle list
351bf215546Sopenharmony_ci    * from the triangle fan definition.
352bf215546Sopenharmony_ci    *
353bf215546Sopenharmony_ci    * Basically:
354bf215546Sopenharmony_ci    *
355bf215546Sopenharmony_ci    * new_index_ptr = 0;
356bf215546Sopenharmony_ci    * index0 = restart_prim_value; // 0xffff or 0xffffffff
357bf215546Sopenharmony_ci    * for (old_index_ptr = firstIndex; old_index_ptr < indexCount;) {
358bf215546Sopenharmony_ci    *    // If we have no starting-point we need at least 3 vertices,
359bf215546Sopenharmony_ci    *    // otherwise we can do with two. If there's not enough vertices
360bf215546Sopenharmony_ci    *    // to form a primitive, we just bail out.
361bf215546Sopenharmony_ci    *    min_indices = index0 == restart_prim_value ? 3 : 2;
362bf215546Sopenharmony_ci    *    if (old_index_ptr + min_indices > firstIndex + indexCount)
363bf215546Sopenharmony_ci    *       break;
364bf215546Sopenharmony_ci    *
365bf215546Sopenharmony_ci    *    if (index0 == restart_prim_value) {
366bf215546Sopenharmony_ci    *       // No starting point, skip all entries until we have a
367bf215546Sopenharmony_ci    *       // non-primitive-restart value
368bf215546Sopenharmony_ci    *       index0 = old_index_buf[old_index_ptr++];
369bf215546Sopenharmony_ci    *       continue;
370bf215546Sopenharmony_ci    *    }
371bf215546Sopenharmony_ci    *
372bf215546Sopenharmony_ci    *    // If at least one index contains the primitive-restart pattern,
373bf215546Sopenharmony_ci         // ignore this triangle, and skip the unused entries
374bf215546Sopenharmony_ci    *    if (old_index_buf[old_index_ptr + 1] == restart_prim_value) {
375bf215546Sopenharmony_ci    *       old_index_ptr += 2;
376bf215546Sopenharmony_ci    *       continue;
377bf215546Sopenharmony_ci    *    }
378bf215546Sopenharmony_ci    *    if (old_index_buf[old_index_ptr] == restart_prim_value) {
379bf215546Sopenharmony_ci    *       old_index_ptr++;
380bf215546Sopenharmony_ci    *       continue;
381bf215546Sopenharmony_ci    *    }
382bf215546Sopenharmony_ci    *
383bf215546Sopenharmony_ci    *    // We have a valid primitive, queue it to the new index buffer
384bf215546Sopenharmony_ci    *    new_index_buf[new_index_ptr++] = old_index_buf[old_index_ptr];
385bf215546Sopenharmony_ci    *    new_index_buf[new_index_ptr++] = old_index_buf[old_index_ptr + 1];
386bf215546Sopenharmony_ci    *    new_index_buf[new_index_ptr++] = index0;
387bf215546Sopenharmony_ci    * }
388bf215546Sopenharmony_ci    *
389bf215546Sopenharmony_ci    * expressed in NIR, which admitedly is not super easy to grasp with.
390bf215546Sopenharmony_ci    * TODO: Might be a good thing to use use the CL compiler we have and turn
391bf215546Sopenharmony_ci    * those shaders into CL kernels.
392bf215546Sopenharmony_ci    */
393bf215546Sopenharmony_ci   nir_push_loop(&b);
394bf215546Sopenharmony_ci
395bf215546Sopenharmony_ci   old_index_ptr = nir_load_var(&b, old_index_ptr_var);
396bf215546Sopenharmony_ci   nir_ssa_def *index0 = nir_load_var(&b, index0_var);
397bf215546Sopenharmony_ci
398bf215546Sopenharmony_ci   nir_ssa_def *read_index_count =
399bf215546Sopenharmony_ci      nir_bcsel(&b, nir_ieq(&b, index0, prim_restart_val),
400bf215546Sopenharmony_ci                nir_imm_int(&b, 3), nir_imm_int(&b, 2));
401bf215546Sopenharmony_ci   nir_push_if(&b, nir_ult(&b, old_index_count, nir_iadd(&b, old_index_ptr, read_index_count)));
402bf215546Sopenharmony_ci   nir_jump(&b, nir_jump_break);
403bf215546Sopenharmony_ci   nir_pop_if(&b, NULL);
404bf215546Sopenharmony_ci
405bf215546Sopenharmony_ci   nir_ssa_def *old_index_offset =
406bf215546Sopenharmony_ci      nir_imul_imm(&b, old_index_ptr, old_index_size);
407bf215546Sopenharmony_ci
408bf215546Sopenharmony_ci   nir_push_if(&b, nir_ieq(&b, index0, prim_restart_val));
409bf215546Sopenharmony_ci   nir_ssa_def *index_val =
410bf215546Sopenharmony_ci      nir_load_ssbo(&b, 1, 32, old_index_buf_desc,
411bf215546Sopenharmony_ci                    old_index_size == 2 ? nir_iand_imm(&b, old_index_offset, ~3ULL) : old_index_offset,
412bf215546Sopenharmony_ci                    .align_mul = 4);
413bf215546Sopenharmony_ci   if (old_index_size == 2) {
414bf215546Sopenharmony_ci     index_val = nir_bcsel(&b, nir_test_mask(&b, old_index_offset, 0x2),
415bf215546Sopenharmony_ci                           nir_ushr_imm(&b, index_val, 16),
416bf215546Sopenharmony_ci                           nir_iand_imm(&b, index_val, 0xffff));
417bf215546Sopenharmony_ci   }
418bf215546Sopenharmony_ci
419bf215546Sopenharmony_ci   nir_store_var(&b, index0_var, index_val, 1);
420bf215546Sopenharmony_ci   nir_store_var(&b, old_index_ptr_var, nir_iadd_imm(&b, old_index_ptr, 1), 1);
421bf215546Sopenharmony_ci   nir_jump(&b, nir_jump_continue);
422bf215546Sopenharmony_ci   nir_pop_if(&b, NULL);
423bf215546Sopenharmony_ci
424bf215546Sopenharmony_ci   nir_ssa_def *index12 =
425bf215546Sopenharmony_ci      nir_load_ssbo(&b, 2, 32, old_index_buf_desc,
426bf215546Sopenharmony_ci                    old_index_size == 2 ? nir_iand_imm(&b, old_index_offset, ~3ULL) : old_index_offset,
427bf215546Sopenharmony_ci                    .align_mul = 4);
428bf215546Sopenharmony_ci   if (old_index_size == 2) {
429bf215546Sopenharmony_ci      nir_ssa_def *indices[] = {
430bf215546Sopenharmony_ci         nir_iand_imm(&b, nir_channel(&b, index12, 0), 0xffff),
431bf215546Sopenharmony_ci         nir_ushr_imm(&b, nir_channel(&b, index12, 0), 16),
432bf215546Sopenharmony_ci         nir_iand_imm(&b, nir_channel(&b, index12, 1), 0xffff),
433bf215546Sopenharmony_ci      };
434bf215546Sopenharmony_ci
435bf215546Sopenharmony_ci      index12 = nir_bcsel(&b, nir_test_mask(&b, old_index_offset, 0x2),
436bf215546Sopenharmony_ci                          nir_vec2(&b, indices[1], indices[2]),
437bf215546Sopenharmony_ci                          nir_vec2(&b, indices[0], indices[1]));
438bf215546Sopenharmony_ci   }
439bf215546Sopenharmony_ci
440bf215546Sopenharmony_ci   nir_push_if(&b, nir_ieq(&b, nir_channel(&b, index12, 1), prim_restart_val));
441bf215546Sopenharmony_ci   nir_store_var(&b, old_index_ptr_var, nir_iadd_imm(&b, old_index_ptr, 2), 1);
442bf215546Sopenharmony_ci   nir_store_var(&b, index0_var, prim_restart_val, 1);
443bf215546Sopenharmony_ci   nir_jump(&b, nir_jump_continue);
444bf215546Sopenharmony_ci   nir_push_else(&b, NULL);
445bf215546Sopenharmony_ci   nir_store_var(&b, old_index_ptr_var, nir_iadd_imm(&b, old_index_ptr, 1), 1);
446bf215546Sopenharmony_ci   nir_push_if(&b, nir_ieq(&b, nir_channel(&b, index12, 0), prim_restart_val));
447bf215546Sopenharmony_ci   nir_store_var(&b, index0_var, prim_restart_val, 1);
448bf215546Sopenharmony_ci   nir_jump(&b, nir_jump_continue);
449bf215546Sopenharmony_ci   nir_push_else(&b, NULL);
450bf215546Sopenharmony_ci   nir_ssa_def *new_indices =
451bf215546Sopenharmony_ci      nir_vec3(&b, nir_channel(&b, index12, 0), nir_channel(&b, index12, 1), index0);
452bf215546Sopenharmony_ci   nir_ssa_def *new_index_ptr = nir_load_var(&b, new_index_ptr_var);
453bf215546Sopenharmony_ci   nir_ssa_def *new_index_offset = nir_imul_imm(&b, new_index_ptr, sizeof(uint32_t));
454bf215546Sopenharmony_ci   nir_store_ssbo(&b, new_indices, new_index_buf_desc,
455bf215546Sopenharmony_ci                  new_index_offset,
456bf215546Sopenharmony_ci                  .write_mask = 7, .access = ACCESS_NON_READABLE, .align_mul = 4);
457bf215546Sopenharmony_ci   nir_store_var(&b, new_index_ptr_var, nir_iadd_imm(&b, new_index_ptr, 3), 1);
458bf215546Sopenharmony_ci   nir_pop_if(&b, NULL);
459bf215546Sopenharmony_ci   nir_pop_if(&b, NULL);
460bf215546Sopenharmony_ci   nir_pop_loop(&b, NULL);
461bf215546Sopenharmony_ci
462bf215546Sopenharmony_ci   nir_store_ssbo(&b, nir_load_var(&b, new_index_ptr_var),
463bf215546Sopenharmony_ci                  new_index_count_ptr_desc, nir_imm_int(&b, 0),
464bf215546Sopenharmony_ci                  .write_mask = 1, .access = ACCESS_NON_READABLE, .align_mul = 4);
465bf215546Sopenharmony_ci
466bf215546Sopenharmony_ci   return b.shader;
467bf215546Sopenharmony_ci}
468bf215546Sopenharmony_ci
469bf215546Sopenharmony_cinir_shader *
470bf215546Sopenharmony_cidzn_nir_triangle_fan_rewrite_index_shader(uint8_t old_index_size)
471bf215546Sopenharmony_ci{
472bf215546Sopenharmony_ci   assert(old_index_size == 0 || old_index_size == 2 || old_index_size == 4);
473bf215546Sopenharmony_ci
474bf215546Sopenharmony_ci   nir_builder b =
475bf215546Sopenharmony_ci      nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
476bf215546Sopenharmony_ci                                     dxil_get_nir_compiler_options(),
477bf215546Sopenharmony_ci                                     "dzn_meta_triangle_rewrite_index(old_index_size=%d)",
478bf215546Sopenharmony_ci                                     old_index_size);
479bf215546Sopenharmony_ci   b.shader->info.internal = true;
480bf215546Sopenharmony_ci
481bf215546Sopenharmony_ci   nir_ssa_def *params_desc =
482bf215546Sopenharmony_ci      dzn_nir_create_bo_desc(&b, nir_var_mem_ubo, 0, 0, "params", 0);
483bf215546Sopenharmony_ci   nir_ssa_def *new_index_buf_desc =
484bf215546Sopenharmony_ci      dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 1,
485bf215546Sopenharmony_ci                             "new_index_buf", ACCESS_NON_READABLE);
486bf215546Sopenharmony_ci
487bf215546Sopenharmony_ci   nir_ssa_def *old_index_buf_desc = NULL;
488bf215546Sopenharmony_ci   if (old_index_size > 0) {
489bf215546Sopenharmony_ci      old_index_buf_desc =
490bf215546Sopenharmony_ci         dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 2,
491bf215546Sopenharmony_ci                                "old_index_buf", ACCESS_NON_WRITEABLE);
492bf215546Sopenharmony_ci   }
493bf215546Sopenharmony_ci
494bf215546Sopenharmony_ci   nir_ssa_def *params =
495bf215546Sopenharmony_ci      nir_load_ubo(&b, sizeof(struct dzn_triangle_fan_rewrite_index_params) / 4, 32,
496bf215546Sopenharmony_ci                   params_desc, nir_imm_int(&b, 0),
497bf215546Sopenharmony_ci                   .align_mul = 4, .align_offset = 0, .range_base = 0, .range = ~0);
498bf215546Sopenharmony_ci
499bf215546Sopenharmony_ci   nir_ssa_def *triangle = nir_channel(&b, nir_load_global_invocation_id(&b, 32), 0);
500bf215546Sopenharmony_ci   nir_ssa_def *new_indices;
501bf215546Sopenharmony_ci
502bf215546Sopenharmony_ci   if (old_index_size > 0) {
503bf215546Sopenharmony_ci      nir_ssa_def *old_first_index = nir_channel(&b, params, 0);
504bf215546Sopenharmony_ci      nir_ssa_def *old_index0_offset =
505bf215546Sopenharmony_ci         nir_imul_imm(&b, old_first_index, old_index_size);
506bf215546Sopenharmony_ci      nir_ssa_def *old_index1_offset =
507bf215546Sopenharmony_ci         nir_imul_imm(&b, nir_iadd(&b, nir_iadd_imm(&b, triangle, 1), old_first_index),
508bf215546Sopenharmony_ci                      old_index_size);
509bf215546Sopenharmony_ci
510bf215546Sopenharmony_ci      nir_ssa_def *old_index0 =
511bf215546Sopenharmony_ci         nir_load_ssbo(&b, 1, 32, old_index_buf_desc,
512bf215546Sopenharmony_ci                       old_index_size == 2 ? nir_iand_imm(&b, old_index0_offset, ~3ULL) : old_index0_offset,
513bf215546Sopenharmony_ci                       .align_mul = 4);
514bf215546Sopenharmony_ci
515bf215546Sopenharmony_ci      if (old_index_size == 2) {
516bf215546Sopenharmony_ci        old_index0 = nir_bcsel(&b, nir_test_mask(&b, old_index0_offset, 0x2),
517bf215546Sopenharmony_ci                               nir_ushr_imm(&b, old_index0, 16),
518bf215546Sopenharmony_ci                               nir_iand_imm(&b, old_index0, 0xffff));
519bf215546Sopenharmony_ci      }
520bf215546Sopenharmony_ci
521bf215546Sopenharmony_ci      nir_ssa_def *old_index12 =
522bf215546Sopenharmony_ci         nir_load_ssbo(&b, 2, 32, old_index_buf_desc,
523bf215546Sopenharmony_ci                       old_index_size == 2 ? nir_iand_imm(&b, old_index1_offset, ~3ULL) : old_index1_offset,
524bf215546Sopenharmony_ci                       .align_mul = 4);
525bf215546Sopenharmony_ci      if (old_index_size == 2) {
526bf215546Sopenharmony_ci         nir_ssa_def *indices[] = {
527bf215546Sopenharmony_ci            nir_iand_imm(&b, nir_channel(&b, old_index12, 0), 0xffff),
528bf215546Sopenharmony_ci            nir_ushr_imm(&b, nir_channel(&b, old_index12, 0), 16),
529bf215546Sopenharmony_ci            nir_iand_imm(&b, nir_channel(&b, old_index12, 1), 0xffff),
530bf215546Sopenharmony_ci         };
531bf215546Sopenharmony_ci
532bf215546Sopenharmony_ci         old_index12 = nir_bcsel(&b, nir_test_mask(&b, old_index1_offset, 0x2),
533bf215546Sopenharmony_ci                                 nir_vec2(&b, indices[1], indices[2]),
534bf215546Sopenharmony_ci                                 nir_vec2(&b, indices[0], indices[1]));
535bf215546Sopenharmony_ci      }
536bf215546Sopenharmony_ci
537bf215546Sopenharmony_ci      /* TODO: VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT */
538bf215546Sopenharmony_ci      new_indices =
539bf215546Sopenharmony_ci         nir_vec3(&b, nir_channel(&b, old_index12, 0),
540bf215546Sopenharmony_ci                  nir_channel(&b, old_index12, 1), old_index0);
541bf215546Sopenharmony_ci   } else {
542bf215546Sopenharmony_ci      new_indices =
543bf215546Sopenharmony_ci         nir_vec3(&b,
544bf215546Sopenharmony_ci                  nir_iadd_imm(&b, triangle, 1),
545bf215546Sopenharmony_ci                  nir_iadd_imm(&b, triangle, 2),
546bf215546Sopenharmony_ci                  nir_imm_int(&b, 0));
547bf215546Sopenharmony_ci   }
548bf215546Sopenharmony_ci
549bf215546Sopenharmony_ci   nir_ssa_def *new_index_offset =
550bf215546Sopenharmony_ci      nir_imul_imm(&b, triangle, 4 * 3);
551bf215546Sopenharmony_ci
552bf215546Sopenharmony_ci   nir_store_ssbo(&b, new_indices, new_index_buf_desc,
553bf215546Sopenharmony_ci                  new_index_offset,
554bf215546Sopenharmony_ci                  .write_mask = 7, .access = ACCESS_NON_READABLE, .align_mul = 4);
555bf215546Sopenharmony_ci
556bf215546Sopenharmony_ci   return b.shader;
557bf215546Sopenharmony_ci}
558bf215546Sopenharmony_ci
559bf215546Sopenharmony_cinir_shader *
560bf215546Sopenharmony_cidzn_nir_blit_vs(void)
561bf215546Sopenharmony_ci{
562bf215546Sopenharmony_ci   nir_builder b =
563bf215546Sopenharmony_ci      nir_builder_init_simple_shader(MESA_SHADER_VERTEX,
564bf215546Sopenharmony_ci                                     dxil_get_nir_compiler_options(),
565bf215546Sopenharmony_ci                                     "dzn_meta_blit_vs()");
566bf215546Sopenharmony_ci   b.shader->info.internal = true;
567bf215546Sopenharmony_ci
568bf215546Sopenharmony_ci   nir_ssa_def *params_desc =
569bf215546Sopenharmony_ci      dzn_nir_create_bo_desc(&b, nir_var_mem_ubo, 0, 0, "params", 0);
570bf215546Sopenharmony_ci
571bf215546Sopenharmony_ci   nir_variable *out_pos =
572bf215546Sopenharmony_ci      nir_variable_create(b.shader, nir_var_shader_out, glsl_vec4_type(),
573bf215546Sopenharmony_ci                          "gl_Position");
574bf215546Sopenharmony_ci   out_pos->data.location = VARYING_SLOT_POS;
575bf215546Sopenharmony_ci   out_pos->data.driver_location = 0;
576bf215546Sopenharmony_ci
577bf215546Sopenharmony_ci   nir_variable *out_coords =
578bf215546Sopenharmony_ci      nir_variable_create(b.shader, nir_var_shader_out, glsl_vec_type(3),
579bf215546Sopenharmony_ci                          "coords");
580bf215546Sopenharmony_ci   out_coords->data.location = VARYING_SLOT_TEX0;
581bf215546Sopenharmony_ci   out_coords->data.driver_location = 1;
582bf215546Sopenharmony_ci
583bf215546Sopenharmony_ci   nir_ssa_def *vertex = nir_load_vertex_id(&b);
584bf215546Sopenharmony_ci   nir_ssa_def *base = nir_imul_imm(&b, vertex, 4 * sizeof(float));
585bf215546Sopenharmony_ci   nir_ssa_def *coords =
586bf215546Sopenharmony_ci      nir_load_ubo(&b, 4, 32, params_desc, base,
587bf215546Sopenharmony_ci                   .align_mul = 16, .align_offset = 0, .range_base = 0, .range = ~0);
588bf215546Sopenharmony_ci   nir_ssa_def *pos =
589bf215546Sopenharmony_ci      nir_vec4(&b, nir_channel(&b, coords, 0), nir_channel(&b, coords, 1),
590bf215546Sopenharmony_ci               nir_imm_float(&b, 0.0), nir_imm_float(&b, 1.0));
591bf215546Sopenharmony_ci   nir_ssa_def *z_coord =
592bf215546Sopenharmony_ci      nir_load_ubo(&b, 1, 32, params_desc, nir_imm_int(&b, 4 * 4 * sizeof(float)),
593bf215546Sopenharmony_ci                   .align_mul = 64, .align_offset = 0, .range_base = 0, .range = ~0);
594bf215546Sopenharmony_ci   coords = nir_vec3(&b, nir_channel(&b, coords, 2), nir_channel(&b, coords, 3), z_coord);
595bf215546Sopenharmony_ci
596bf215546Sopenharmony_ci   nir_store_var(&b, out_pos, pos, 0xf);
597bf215546Sopenharmony_ci   nir_store_var(&b, out_coords, coords, 0x7);
598bf215546Sopenharmony_ci   return b.shader;
599bf215546Sopenharmony_ci}
600bf215546Sopenharmony_ci
601bf215546Sopenharmony_cinir_shader *
602bf215546Sopenharmony_cidzn_nir_blit_fs(const struct dzn_nir_blit_info *info)
603bf215546Sopenharmony_ci{
604bf215546Sopenharmony_ci   bool ms = info->src_samples > 1;
605bf215546Sopenharmony_ci   nir_alu_type nir_out_type =
606bf215546Sopenharmony_ci      nir_get_nir_type_for_glsl_base_type(info->out_type);
607bf215546Sopenharmony_ci   uint32_t coord_comps =
608bf215546Sopenharmony_ci      glsl_get_sampler_dim_coordinate_components(info->sampler_dim) +
609bf215546Sopenharmony_ci      info->src_is_array;
610bf215546Sopenharmony_ci
611bf215546Sopenharmony_ci   nir_builder b =
612bf215546Sopenharmony_ci      nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
613bf215546Sopenharmony_ci                                     dxil_get_nir_compiler_options(),
614bf215546Sopenharmony_ci                                     "dzn_meta_blit_fs()");
615bf215546Sopenharmony_ci   b.shader->info.internal = true;
616bf215546Sopenharmony_ci
617bf215546Sopenharmony_ci   const struct glsl_type *tex_type =
618bf215546Sopenharmony_ci      glsl_texture_type(info->sampler_dim, info->src_is_array, info->out_type);
619bf215546Sopenharmony_ci   nir_variable *tex_var =
620bf215546Sopenharmony_ci      nir_variable_create(b.shader, nir_var_uniform, tex_type, "texture");
621bf215546Sopenharmony_ci   nir_deref_instr *tex_deref = nir_build_deref_var(&b, tex_var);
622bf215546Sopenharmony_ci
623bf215546Sopenharmony_ci   nir_variable *pos_var =
624bf215546Sopenharmony_ci      nir_variable_create(b.shader, nir_var_shader_in,
625bf215546Sopenharmony_ci                          glsl_vector_type(GLSL_TYPE_FLOAT, 4),
626bf215546Sopenharmony_ci                          "gl_FragCoord");
627bf215546Sopenharmony_ci   pos_var->data.location = VARYING_SLOT_POS;
628bf215546Sopenharmony_ci   pos_var->data.driver_location = 0;
629bf215546Sopenharmony_ci
630bf215546Sopenharmony_ci   nir_variable *coord_var =
631bf215546Sopenharmony_ci      nir_variable_create(b.shader, nir_var_shader_in,
632bf215546Sopenharmony_ci                          glsl_vector_type(GLSL_TYPE_FLOAT, 3),
633bf215546Sopenharmony_ci                          "coord");
634bf215546Sopenharmony_ci   coord_var->data.location = VARYING_SLOT_TEX0;
635bf215546Sopenharmony_ci   coord_var->data.driver_location = 1;
636bf215546Sopenharmony_ci   nir_ssa_def *coord =
637bf215546Sopenharmony_ci      nir_channels(&b, nir_load_var(&b, coord_var), (1 << coord_comps) - 1);
638bf215546Sopenharmony_ci
639bf215546Sopenharmony_ci   uint32_t out_comps =
640bf215546Sopenharmony_ci      (info->loc == FRAG_RESULT_DEPTH || info->loc == FRAG_RESULT_STENCIL) ? 1 : 4;
641bf215546Sopenharmony_ci   nir_variable *out =
642bf215546Sopenharmony_ci      nir_variable_create(b.shader, nir_var_shader_out,
643bf215546Sopenharmony_ci                          glsl_vector_type(info->out_type, out_comps),
644bf215546Sopenharmony_ci                          "out");
645bf215546Sopenharmony_ci   out->data.location = info->loc;
646bf215546Sopenharmony_ci
647bf215546Sopenharmony_ci   nir_ssa_def *res = NULL;
648bf215546Sopenharmony_ci
649bf215546Sopenharmony_ci   if (info->resolve) {
650bf215546Sopenharmony_ci      /* When resolving a float type, we need to calculate the average of all
651bf215546Sopenharmony_ci       * samples. For integer resolve, Vulkan says that one sample should be
652bf215546Sopenharmony_ci       * chosen without telling which. Let's just pick the first one in that
653bf215546Sopenharmony_ci       * case.
654bf215546Sopenharmony_ci       */
655bf215546Sopenharmony_ci
656bf215546Sopenharmony_ci      unsigned nsamples = info->out_type == GLSL_TYPE_FLOAT ?
657bf215546Sopenharmony_ci                          info->src_samples : 1;
658bf215546Sopenharmony_ci      for (unsigned s = 0; s < nsamples; s++) {
659bf215546Sopenharmony_ci         nir_tex_instr *tex = nir_tex_instr_create(b.shader, 4);
660bf215546Sopenharmony_ci
661bf215546Sopenharmony_ci         tex->op = nir_texop_txf_ms;
662bf215546Sopenharmony_ci         tex->dest_type = nir_out_type;
663bf215546Sopenharmony_ci         tex->texture_index = 0;
664bf215546Sopenharmony_ci         tex->is_array = info->src_is_array;
665bf215546Sopenharmony_ci         tex->sampler_dim = info->sampler_dim;
666bf215546Sopenharmony_ci
667bf215546Sopenharmony_ci         tex->src[0].src_type = nir_tex_src_coord;
668bf215546Sopenharmony_ci         tex->src[0].src = nir_src_for_ssa(nir_f2i32(&b, coord));
669bf215546Sopenharmony_ci         tex->coord_components = coord_comps;
670bf215546Sopenharmony_ci
671bf215546Sopenharmony_ci         tex->src[1].src_type = nir_tex_src_ms_index;
672bf215546Sopenharmony_ci         tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, s));
673bf215546Sopenharmony_ci
674bf215546Sopenharmony_ci         tex->src[2].src_type = nir_tex_src_lod;
675bf215546Sopenharmony_ci         tex->src[2].src = nir_src_for_ssa(nir_imm_int(&b, 0));
676bf215546Sopenharmony_ci
677bf215546Sopenharmony_ci         tex->src[3].src_type = nir_tex_src_texture_deref;
678bf215546Sopenharmony_ci         tex->src[3].src = nir_src_for_ssa(&tex_deref->dest.ssa);
679bf215546Sopenharmony_ci
680bf215546Sopenharmony_ci         nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL);
681bf215546Sopenharmony_ci
682bf215546Sopenharmony_ci         nir_builder_instr_insert(&b, &tex->instr);
683bf215546Sopenharmony_ci         res = res ? nir_fadd(&b, res, &tex->dest.ssa) : &tex->dest.ssa;
684bf215546Sopenharmony_ci      }
685bf215546Sopenharmony_ci
686bf215546Sopenharmony_ci      if (nsamples > 1) {
687bf215546Sopenharmony_ci         unsigned type_sz = nir_alu_type_get_type_size(nir_out_type);
688bf215546Sopenharmony_ci         res = nir_fmul(&b, res, nir_imm_floatN_t(&b, 1.0f / nsamples, type_sz));
689bf215546Sopenharmony_ci      }
690bf215546Sopenharmony_ci   } else {
691bf215546Sopenharmony_ci      nir_tex_instr *tex =
692bf215546Sopenharmony_ci         nir_tex_instr_create(b.shader, ms ? 4 : 3);
693bf215546Sopenharmony_ci
694bf215546Sopenharmony_ci      tex->dest_type = nir_out_type;
695bf215546Sopenharmony_ci      tex->is_array = info->src_is_array;
696bf215546Sopenharmony_ci      tex->sampler_dim = info->sampler_dim;
697bf215546Sopenharmony_ci
698bf215546Sopenharmony_ci      if (ms) {
699bf215546Sopenharmony_ci         tex->op = nir_texop_txf_ms;
700bf215546Sopenharmony_ci
701bf215546Sopenharmony_ci         tex->src[0].src_type = nir_tex_src_coord;
702bf215546Sopenharmony_ci         tex->src[0].src = nir_src_for_ssa(nir_f2i32(&b, coord));
703bf215546Sopenharmony_ci         tex->coord_components = coord_comps;
704bf215546Sopenharmony_ci
705bf215546Sopenharmony_ci         tex->src[1].src_type = nir_tex_src_ms_index;
706bf215546Sopenharmony_ci         tex->src[1].src = nir_src_for_ssa(nir_load_sample_id(&b));
707bf215546Sopenharmony_ci
708bf215546Sopenharmony_ci         tex->src[2].src_type = nir_tex_src_lod;
709bf215546Sopenharmony_ci         tex->src[2].src = nir_src_for_ssa(nir_imm_int(&b, 0));
710bf215546Sopenharmony_ci
711bf215546Sopenharmony_ci         tex->src[3].src_type = nir_tex_src_texture_deref;
712bf215546Sopenharmony_ci         tex->src[3].src = nir_src_for_ssa(&tex_deref->dest.ssa);
713bf215546Sopenharmony_ci      } else {
714bf215546Sopenharmony_ci         nir_variable *sampler_var =
715bf215546Sopenharmony_ci            nir_variable_create(b.shader, nir_var_uniform, glsl_bare_sampler_type(), "sampler");
716bf215546Sopenharmony_ci         nir_deref_instr *sampler_deref = nir_build_deref_var(&b, sampler_var);
717bf215546Sopenharmony_ci
718bf215546Sopenharmony_ci         tex->op = nir_texop_tex;
719bf215546Sopenharmony_ci         tex->sampler_index = 0;
720bf215546Sopenharmony_ci
721bf215546Sopenharmony_ci         tex->src[0].src_type = nir_tex_src_coord;
722bf215546Sopenharmony_ci         tex->src[0].src = nir_src_for_ssa(coord);
723bf215546Sopenharmony_ci         tex->coord_components = coord_comps;
724bf215546Sopenharmony_ci
725bf215546Sopenharmony_ci         tex->src[1].src_type = nir_tex_src_texture_deref;
726bf215546Sopenharmony_ci         tex->src[1].src = nir_src_for_ssa(&tex_deref->dest.ssa);
727bf215546Sopenharmony_ci
728bf215546Sopenharmony_ci         tex->src[2].src_type = nir_tex_src_sampler_deref;
729bf215546Sopenharmony_ci         tex->src[2].src = nir_src_for_ssa(&sampler_deref->dest.ssa);
730bf215546Sopenharmony_ci      }
731bf215546Sopenharmony_ci
732bf215546Sopenharmony_ci      nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL);
733bf215546Sopenharmony_ci      nir_builder_instr_insert(&b, &tex->instr);
734bf215546Sopenharmony_ci      res = &tex->dest.ssa;
735bf215546Sopenharmony_ci   }
736bf215546Sopenharmony_ci
737bf215546Sopenharmony_ci   nir_store_var(&b, out, nir_channels(&b, res, (1 << out_comps) - 1), 0xf);
738bf215546Sopenharmony_ci
739bf215546Sopenharmony_ci   return b.shader;
740bf215546Sopenharmony_ci}
741