1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2019 Raspberry Pi Ltd
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21bf215546Sopenharmony_ci * IN THE SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci#include "vk_util.h"
25bf215546Sopenharmony_ci
26bf215546Sopenharmony_ci#include "v3dv_debug.h"
27bf215546Sopenharmony_ci#include "v3dv_private.h"
28bf215546Sopenharmony_ci
29bf215546Sopenharmony_ci#include "common/v3d_debug.h"
30bf215546Sopenharmony_ci#include "qpu/qpu_disasm.h"
31bf215546Sopenharmony_ci
32bf215546Sopenharmony_ci#include "compiler/nir/nir_builder.h"
33bf215546Sopenharmony_ci#include "nir/nir_serialize.h"
34bf215546Sopenharmony_ci
35bf215546Sopenharmony_ci#include "util/u_atomic.h"
36bf215546Sopenharmony_ci#include "util/u_prim.h"
37bf215546Sopenharmony_ci#include "util/os_time.h"
38bf215546Sopenharmony_ci
39bf215546Sopenharmony_ci#include "vk_pipeline.h"
40bf215546Sopenharmony_ci#include "vulkan/util/vk_format.h"
41bf215546Sopenharmony_ci
42bf215546Sopenharmony_cistatic VkResult
43bf215546Sopenharmony_cicompute_vpm_config(struct v3dv_pipeline *pipeline);
44bf215546Sopenharmony_ci
45bf215546Sopenharmony_civoid
46bf215546Sopenharmony_civ3dv_print_v3d_key(struct v3d_key *key,
47bf215546Sopenharmony_ci                   uint32_t v3d_key_size)
48bf215546Sopenharmony_ci{
49bf215546Sopenharmony_ci   struct mesa_sha1 ctx;
50bf215546Sopenharmony_ci   unsigned char sha1[20];
51bf215546Sopenharmony_ci   char sha1buf[41];
52bf215546Sopenharmony_ci
53bf215546Sopenharmony_ci   _mesa_sha1_init(&ctx);
54bf215546Sopenharmony_ci
55bf215546Sopenharmony_ci   _mesa_sha1_update(&ctx, key, v3d_key_size);
56bf215546Sopenharmony_ci
57bf215546Sopenharmony_ci   _mesa_sha1_final(&ctx, sha1);
58bf215546Sopenharmony_ci   _mesa_sha1_format(sha1buf, sha1);
59bf215546Sopenharmony_ci
60bf215546Sopenharmony_ci   fprintf(stderr, "key %p: %s\n", key, sha1buf);
61bf215546Sopenharmony_ci}
62bf215546Sopenharmony_ci
63bf215546Sopenharmony_cistatic void
64bf215546Sopenharmony_cipipeline_compute_sha1_from_nir(struct v3dv_pipeline_stage *p_stage)
65bf215546Sopenharmony_ci{
66bf215546Sopenharmony_ci   VkPipelineShaderStageCreateInfo info = {
67bf215546Sopenharmony_ci      .module = vk_shader_module_handle_from_nir(p_stage->nir),
68bf215546Sopenharmony_ci      .pName = p_stage->entrypoint,
69bf215546Sopenharmony_ci      .stage = mesa_to_vk_shader_stage(p_stage->nir->info.stage),
70bf215546Sopenharmony_ci   };
71bf215546Sopenharmony_ci
72bf215546Sopenharmony_ci   vk_pipeline_hash_shader_stage(&info, p_stage->shader_sha1);
73bf215546Sopenharmony_ci}
74bf215546Sopenharmony_ci
75bf215546Sopenharmony_civoid
76bf215546Sopenharmony_civ3dv_shader_variant_destroy(struct v3dv_device *device,
77bf215546Sopenharmony_ci                            struct v3dv_shader_variant *variant)
78bf215546Sopenharmony_ci{
79bf215546Sopenharmony_ci   /* The assembly BO is shared by all variants in the pipeline, so it can't
80bf215546Sopenharmony_ci    * be freed here and should be freed with the pipeline
81bf215546Sopenharmony_ci    */
82bf215546Sopenharmony_ci   if (variant->qpu_insts)
83bf215546Sopenharmony_ci      free(variant->qpu_insts);
84bf215546Sopenharmony_ci   ralloc_free(variant->prog_data.base);
85bf215546Sopenharmony_ci   vk_free(&device->vk.alloc, variant);
86bf215546Sopenharmony_ci}
87bf215546Sopenharmony_ci
88bf215546Sopenharmony_cistatic void
89bf215546Sopenharmony_cidestroy_pipeline_stage(struct v3dv_device *device,
90bf215546Sopenharmony_ci                       struct v3dv_pipeline_stage *p_stage,
91bf215546Sopenharmony_ci                       const VkAllocationCallbacks *pAllocator)
92bf215546Sopenharmony_ci{
93bf215546Sopenharmony_ci   if (!p_stage)
94bf215546Sopenharmony_ci      return;
95bf215546Sopenharmony_ci
96bf215546Sopenharmony_ci   ralloc_free(p_stage->nir);
97bf215546Sopenharmony_ci   vk_free2(&device->vk.alloc, pAllocator, p_stage);
98bf215546Sopenharmony_ci}
99bf215546Sopenharmony_ci
100bf215546Sopenharmony_cistatic void
101bf215546Sopenharmony_cipipeline_free_stages(struct v3dv_device *device,
102bf215546Sopenharmony_ci                     struct v3dv_pipeline *pipeline,
103bf215546Sopenharmony_ci                     const VkAllocationCallbacks *pAllocator)
104bf215546Sopenharmony_ci{
105bf215546Sopenharmony_ci   assert(pipeline);
106bf215546Sopenharmony_ci
107bf215546Sopenharmony_ci   /* FIXME: we can't just use a loop over mesa stage due the bin, would be
108bf215546Sopenharmony_ci    * good to find an alternative.
109bf215546Sopenharmony_ci    */
110bf215546Sopenharmony_ci   destroy_pipeline_stage(device, pipeline->vs, pAllocator);
111bf215546Sopenharmony_ci   destroy_pipeline_stage(device, pipeline->vs_bin, pAllocator);
112bf215546Sopenharmony_ci   destroy_pipeline_stage(device, pipeline->gs, pAllocator);
113bf215546Sopenharmony_ci   destroy_pipeline_stage(device, pipeline->gs_bin, pAllocator);
114bf215546Sopenharmony_ci   destroy_pipeline_stage(device, pipeline->fs, pAllocator);
115bf215546Sopenharmony_ci   destroy_pipeline_stage(device, pipeline->cs, pAllocator);
116bf215546Sopenharmony_ci
117bf215546Sopenharmony_ci   pipeline->vs = NULL;
118bf215546Sopenharmony_ci   pipeline->vs_bin = NULL;
119bf215546Sopenharmony_ci   pipeline->gs = NULL;
120bf215546Sopenharmony_ci   pipeline->gs_bin = NULL;
121bf215546Sopenharmony_ci   pipeline->fs = NULL;
122bf215546Sopenharmony_ci   pipeline->cs = NULL;
123bf215546Sopenharmony_ci}
124bf215546Sopenharmony_ci
125bf215546Sopenharmony_cistatic void
126bf215546Sopenharmony_civ3dv_destroy_pipeline(struct v3dv_pipeline *pipeline,
127bf215546Sopenharmony_ci                      struct v3dv_device *device,
128bf215546Sopenharmony_ci                      const VkAllocationCallbacks *pAllocator)
129bf215546Sopenharmony_ci{
130bf215546Sopenharmony_ci   if (!pipeline)
131bf215546Sopenharmony_ci      return;
132bf215546Sopenharmony_ci
133bf215546Sopenharmony_ci   pipeline_free_stages(device, pipeline, pAllocator);
134bf215546Sopenharmony_ci
135bf215546Sopenharmony_ci   if (pipeline->shared_data) {
136bf215546Sopenharmony_ci      v3dv_pipeline_shared_data_unref(device, pipeline->shared_data);
137bf215546Sopenharmony_ci      pipeline->shared_data = NULL;
138bf215546Sopenharmony_ci   }
139bf215546Sopenharmony_ci
140bf215546Sopenharmony_ci   if (pipeline->spill.bo) {
141bf215546Sopenharmony_ci      assert(pipeline->spill.size_per_thread > 0);
142bf215546Sopenharmony_ci      v3dv_bo_free(device, pipeline->spill.bo);
143bf215546Sopenharmony_ci   }
144bf215546Sopenharmony_ci
145bf215546Sopenharmony_ci   if (pipeline->default_attribute_values) {
146bf215546Sopenharmony_ci      v3dv_bo_free(device, pipeline->default_attribute_values);
147bf215546Sopenharmony_ci      pipeline->default_attribute_values = NULL;
148bf215546Sopenharmony_ci   }
149bf215546Sopenharmony_ci
150bf215546Sopenharmony_ci   if (pipeline->executables.mem_ctx)
151bf215546Sopenharmony_ci      ralloc_free(pipeline->executables.mem_ctx);
152bf215546Sopenharmony_ci
153bf215546Sopenharmony_ci   vk_object_free(&device->vk, pAllocator, pipeline);
154bf215546Sopenharmony_ci}
155bf215546Sopenharmony_ci
156bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
157bf215546Sopenharmony_civ3dv_DestroyPipeline(VkDevice _device,
158bf215546Sopenharmony_ci                     VkPipeline _pipeline,
159bf215546Sopenharmony_ci                     const VkAllocationCallbacks *pAllocator)
160bf215546Sopenharmony_ci{
161bf215546Sopenharmony_ci   V3DV_FROM_HANDLE(v3dv_device, device, _device);
162bf215546Sopenharmony_ci   V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, _pipeline);
163bf215546Sopenharmony_ci
164bf215546Sopenharmony_ci   if (!pipeline)
165bf215546Sopenharmony_ci      return;
166bf215546Sopenharmony_ci
167bf215546Sopenharmony_ci   v3dv_destroy_pipeline(pipeline, device, pAllocator);
168bf215546Sopenharmony_ci}
169bf215546Sopenharmony_ci
170bf215546Sopenharmony_cistatic const struct spirv_to_nir_options default_spirv_options =  {
171bf215546Sopenharmony_ci   .caps = {
172bf215546Sopenharmony_ci      .device_group = true,
173bf215546Sopenharmony_ci      .float_controls = true,
174bf215546Sopenharmony_ci      .multiview = true,
175bf215546Sopenharmony_ci      .storage_8bit = true,
176bf215546Sopenharmony_ci      .storage_16bit = true,
177bf215546Sopenharmony_ci      .subgroup_basic = true,
178bf215546Sopenharmony_ci      .variable_pointers = true,
179bf215546Sopenharmony_ci      .vk_memory_model = true,
180bf215546Sopenharmony_ci      .vk_memory_model_device_scope = true,
181bf215546Sopenharmony_ci      .physical_storage_buffer_address = true,
182bf215546Sopenharmony_ci    },
183bf215546Sopenharmony_ci   .ubo_addr_format = nir_address_format_32bit_index_offset,
184bf215546Sopenharmony_ci   .ssbo_addr_format = nir_address_format_32bit_index_offset,
185bf215546Sopenharmony_ci   .phys_ssbo_addr_format = nir_address_format_2x32bit_global,
186bf215546Sopenharmony_ci   .push_const_addr_format = nir_address_format_logical,
187bf215546Sopenharmony_ci   .shared_addr_format = nir_address_format_32bit_offset,
188bf215546Sopenharmony_ci};
189bf215546Sopenharmony_ci
190bf215546Sopenharmony_ciconst nir_shader_compiler_options v3dv_nir_options = {
191bf215546Sopenharmony_ci   .lower_uadd_sat = true,
192bf215546Sopenharmony_ci   .lower_usub_sat = true,
193bf215546Sopenharmony_ci   .lower_iadd_sat = true,
194bf215546Sopenharmony_ci   .lower_all_io_to_temps = true,
195bf215546Sopenharmony_ci   .lower_extract_byte = true,
196bf215546Sopenharmony_ci   .lower_extract_word = true,
197bf215546Sopenharmony_ci   .lower_insert_byte = true,
198bf215546Sopenharmony_ci   .lower_insert_word = true,
199bf215546Sopenharmony_ci   .lower_bitfield_insert_to_shifts = true,
200bf215546Sopenharmony_ci   .lower_bitfield_extract_to_shifts = true,
201bf215546Sopenharmony_ci   .lower_bitfield_reverse = true,
202bf215546Sopenharmony_ci   .lower_bit_count = true,
203bf215546Sopenharmony_ci   .lower_cs_local_id_to_index = true,
204bf215546Sopenharmony_ci   .lower_ffract = true,
205bf215546Sopenharmony_ci   .lower_fmod = true,
206bf215546Sopenharmony_ci   .lower_pack_unorm_2x16 = true,
207bf215546Sopenharmony_ci   .lower_pack_snorm_2x16 = true,
208bf215546Sopenharmony_ci   .lower_unpack_unorm_2x16 = true,
209bf215546Sopenharmony_ci   .lower_unpack_snorm_2x16 = true,
210bf215546Sopenharmony_ci   .lower_pack_unorm_4x8 = true,
211bf215546Sopenharmony_ci   .lower_pack_snorm_4x8 = true,
212bf215546Sopenharmony_ci   .lower_unpack_unorm_4x8 = true,
213bf215546Sopenharmony_ci   .lower_unpack_snorm_4x8 = true,
214bf215546Sopenharmony_ci   .lower_pack_half_2x16 = true,
215bf215546Sopenharmony_ci   .lower_unpack_half_2x16 = true,
216bf215546Sopenharmony_ci   .lower_pack_32_2x16 = true,
217bf215546Sopenharmony_ci   .lower_pack_32_2x16_split = true,
218bf215546Sopenharmony_ci   .lower_unpack_32_2x16_split = true,
219bf215546Sopenharmony_ci   .lower_mul_2x32_64 = true,
220bf215546Sopenharmony_ci   .lower_fdiv = true,
221bf215546Sopenharmony_ci   .lower_find_lsb = true,
222bf215546Sopenharmony_ci   .lower_ffma16 = true,
223bf215546Sopenharmony_ci   .lower_ffma32 = true,
224bf215546Sopenharmony_ci   .lower_ffma64 = true,
225bf215546Sopenharmony_ci   .lower_flrp32 = true,
226bf215546Sopenharmony_ci   .lower_fpow = true,
227bf215546Sopenharmony_ci   .lower_fsat = true,
228bf215546Sopenharmony_ci   .lower_fsqrt = true,
229bf215546Sopenharmony_ci   .lower_ifind_msb = true,
230bf215546Sopenharmony_ci   .lower_isign = true,
231bf215546Sopenharmony_ci   .lower_ldexp = true,
232bf215546Sopenharmony_ci   .lower_mul_high = true,
233bf215546Sopenharmony_ci   .lower_wpos_pntc = true,
234bf215546Sopenharmony_ci   .lower_rotate = true,
235bf215546Sopenharmony_ci   .lower_to_scalar = true,
236bf215546Sopenharmony_ci   .lower_device_index_to_zero = true,
237bf215546Sopenharmony_ci   .has_fsub = true,
238bf215546Sopenharmony_ci   .has_isub = true,
239bf215546Sopenharmony_ci   .vertex_id_zero_based = false, /* FIXME: to set this to true, the intrinsic
240bf215546Sopenharmony_ci                                   * needs to be supported */
241bf215546Sopenharmony_ci   .lower_interpolate_at = true,
242bf215546Sopenharmony_ci   .max_unroll_iterations = 16,
243bf215546Sopenharmony_ci   .force_indirect_unrolling = (nir_var_shader_in | nir_var_function_temp),
244bf215546Sopenharmony_ci   .divergence_analysis_options =
245bf215546Sopenharmony_ci      nir_divergence_multiple_workgroup_per_compute_subgroup
246bf215546Sopenharmony_ci};
247bf215546Sopenharmony_ci
248bf215546Sopenharmony_ciconst nir_shader_compiler_options *
249bf215546Sopenharmony_civ3dv_pipeline_get_nir_options(void)
250bf215546Sopenharmony_ci{
251bf215546Sopenharmony_ci   return &v3dv_nir_options;
252bf215546Sopenharmony_ci}
253bf215546Sopenharmony_ci
254bf215546Sopenharmony_ci#define OPT(pass, ...) ({                                  \
255bf215546Sopenharmony_ci   bool this_progress = false;                             \
256bf215546Sopenharmony_ci   NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__);      \
257bf215546Sopenharmony_ci   if (this_progress)                                      \
258bf215546Sopenharmony_ci      progress = true;                                     \
259bf215546Sopenharmony_ci   this_progress;                                          \
260bf215546Sopenharmony_ci})
261bf215546Sopenharmony_ci
262bf215546Sopenharmony_cistatic void
263bf215546Sopenharmony_cinir_optimize(nir_shader *nir, bool allow_copies)
264bf215546Sopenharmony_ci{
265bf215546Sopenharmony_ci   bool progress;
266bf215546Sopenharmony_ci
267bf215546Sopenharmony_ci   do {
268bf215546Sopenharmony_ci      progress = false;
269bf215546Sopenharmony_ci      OPT(nir_split_array_vars, nir_var_function_temp);
270bf215546Sopenharmony_ci      OPT(nir_shrink_vec_array_vars, nir_var_function_temp);
271bf215546Sopenharmony_ci      OPT(nir_opt_deref);
272bf215546Sopenharmony_ci      OPT(nir_lower_vars_to_ssa);
273bf215546Sopenharmony_ci      if (allow_copies) {
274bf215546Sopenharmony_ci         /* Only run this pass in the first call to nir_optimize.  Later calls
275bf215546Sopenharmony_ci          * assume that we've lowered away any copy_deref instructions and we
276bf215546Sopenharmony_ci          * don't want to introduce any more.
277bf215546Sopenharmony_ci          */
278bf215546Sopenharmony_ci         OPT(nir_opt_find_array_copies);
279bf215546Sopenharmony_ci      }
280bf215546Sopenharmony_ci
281bf215546Sopenharmony_ci      OPT(nir_remove_dead_variables,
282bf215546Sopenharmony_ci          (nir_variable_mode)(nir_var_function_temp |
283bf215546Sopenharmony_ci                              nir_var_shader_temp |
284bf215546Sopenharmony_ci                              nir_var_mem_shared),
285bf215546Sopenharmony_ci          NULL);
286bf215546Sopenharmony_ci
287bf215546Sopenharmony_ci      OPT(nir_opt_copy_prop_vars);
288bf215546Sopenharmony_ci      OPT(nir_opt_dead_write_vars);
289bf215546Sopenharmony_ci      OPT(nir_opt_combine_stores, nir_var_all);
290bf215546Sopenharmony_ci
291bf215546Sopenharmony_ci      OPT(nir_lower_alu_to_scalar, NULL, NULL);
292bf215546Sopenharmony_ci
293bf215546Sopenharmony_ci      OPT(nir_copy_prop);
294bf215546Sopenharmony_ci      OPT(nir_lower_phis_to_scalar, false);
295bf215546Sopenharmony_ci
296bf215546Sopenharmony_ci      OPT(nir_copy_prop);
297bf215546Sopenharmony_ci      OPT(nir_opt_dce);
298bf215546Sopenharmony_ci      OPT(nir_opt_cse);
299bf215546Sopenharmony_ci      OPT(nir_opt_combine_stores, nir_var_all);
300bf215546Sopenharmony_ci
301bf215546Sopenharmony_ci      /* Passing 0 to the peephole select pass causes it to convert
302bf215546Sopenharmony_ci       * if-statements that contain only move instructions in the branches
303bf215546Sopenharmony_ci       * regardless of the count.
304bf215546Sopenharmony_ci       *
305bf215546Sopenharmony_ci       * Passing 1 to the peephole select pass causes it to convert
306bf215546Sopenharmony_ci       * if-statements that contain at most a single ALU instruction (total)
307bf215546Sopenharmony_ci       * in both branches.
308bf215546Sopenharmony_ci       */
309bf215546Sopenharmony_ci      OPT(nir_opt_peephole_select, 0, false, false);
310bf215546Sopenharmony_ci      OPT(nir_opt_peephole_select, 8, false, true);
311bf215546Sopenharmony_ci
312bf215546Sopenharmony_ci      OPT(nir_opt_intrinsics);
313bf215546Sopenharmony_ci      OPT(nir_opt_idiv_const, 32);
314bf215546Sopenharmony_ci      OPT(nir_opt_algebraic);
315bf215546Sopenharmony_ci      OPT(nir_lower_alu);
316bf215546Sopenharmony_ci      OPT(nir_opt_constant_folding);
317bf215546Sopenharmony_ci
318bf215546Sopenharmony_ci      OPT(nir_opt_dead_cf);
319bf215546Sopenharmony_ci      if (nir_opt_trivial_continues(nir)) {
320bf215546Sopenharmony_ci         progress = true;
321bf215546Sopenharmony_ci         OPT(nir_copy_prop);
322bf215546Sopenharmony_ci         OPT(nir_opt_dce);
323bf215546Sopenharmony_ci      }
324bf215546Sopenharmony_ci      OPT(nir_opt_conditional_discard);
325bf215546Sopenharmony_ci
326bf215546Sopenharmony_ci      OPT(nir_opt_remove_phis);
327bf215546Sopenharmony_ci      OPT(nir_opt_gcm, false);
328bf215546Sopenharmony_ci      OPT(nir_opt_if, nir_opt_if_optimize_phi_true_false);
329bf215546Sopenharmony_ci      OPT(nir_opt_undef);
330bf215546Sopenharmony_ci      OPT(nir_lower_pack);
331bf215546Sopenharmony_ci
332bf215546Sopenharmony_ci      /* There are two optimizations that we don't do here, and we rely on the
333bf215546Sopenharmony_ci       * backend:
334bf215546Sopenharmony_ci       *
335bf215546Sopenharmony_ci       * nir_lower_flrp only needs to be called once, as nothing should
336bf215546Sopenharmony_ci       * rematerialize any flrps. As we are already calling it on the backend
337bf215546Sopenharmony_ci       * compiler, we don't call it again.
338bf215546Sopenharmony_ci       *
339bf215546Sopenharmony_ci       * nir_opt_loop_unroll: as the backend includes custom strategies in
340bf215546Sopenharmony_ci       * order to get the lowest spill/fills possible, and some of them
341bf215546Sopenharmony_ci       * include disable loop unrolling.
342bf215546Sopenharmony_ci       *
343bf215546Sopenharmony_ci       * FIXME: ideally we would like to just remove this method and
344bf215546Sopenharmony_ci       * v3d_optimize_nir. But:
345bf215546Sopenharmony_ci       *
346bf215546Sopenharmony_ci       *   * Using it leads to some regressions on Vulkan CTS tests, due to
347bf215546Sopenharmony_ci       *     some lowering use there
348bf215546Sopenharmony_ci       *   * We would need to move to the backend some additional
349bf215546Sopenharmony_ci       *     lowerings/optimizations that are used on the Vulkan
350bf215546Sopenharmony_ci       *     frontend. That would require to check that we are not getting any
351bf215546Sopenharmony_ci       *     regression or performance drop on OpenGL
352bf215546Sopenharmony_ci       *
353bf215546Sopenharmony_ci       * For now we would keep this Vulkan fronted nir_optimize
354bf215546Sopenharmony_ci       */
355bf215546Sopenharmony_ci
356bf215546Sopenharmony_ci   } while (progress);
357bf215546Sopenharmony_ci}
358bf215546Sopenharmony_ci
359bf215546Sopenharmony_cistatic void
360bf215546Sopenharmony_cipreprocess_nir(nir_shader *nir)
361bf215546Sopenharmony_ci{
362bf215546Sopenharmony_ci   const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = {
363bf215546Sopenharmony_ci      .frag_coord = true,
364bf215546Sopenharmony_ci      .point_coord = true,
365bf215546Sopenharmony_ci   };
366bf215546Sopenharmony_ci   NIR_PASS(_, nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings);
367bf215546Sopenharmony_ci
368bf215546Sopenharmony_ci   /* Vulkan uses the separate-shader linking model */
369bf215546Sopenharmony_ci   nir->info.separate_shader = true;
370bf215546Sopenharmony_ci
371bf215546Sopenharmony_ci   /* Make sure we lower variable initializers on output variables so that
372bf215546Sopenharmony_ci    * nir_remove_dead_variables below sees the corresponding stores
373bf215546Sopenharmony_ci    */
374bf215546Sopenharmony_ci   NIR_PASS(_, nir, nir_lower_variable_initializers, nir_var_shader_out);
375bf215546Sopenharmony_ci
376bf215546Sopenharmony_ci   if (nir->info.stage == MESA_SHADER_FRAGMENT)
377bf215546Sopenharmony_ci      NIR_PASS(_, nir, nir_lower_io_to_vector, nir_var_shader_out);
378bf215546Sopenharmony_ci   if (nir->info.stage == MESA_SHADER_FRAGMENT) {
379bf215546Sopenharmony_ci      NIR_PASS(_, nir, nir_lower_input_attachments,
380bf215546Sopenharmony_ci                 &(nir_input_attachment_options) {
381bf215546Sopenharmony_ci                    .use_fragcoord_sysval = false,
382bf215546Sopenharmony_ci                       });
383bf215546Sopenharmony_ci   }
384bf215546Sopenharmony_ci
385bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_lower_io_to_temporaries,
386bf215546Sopenharmony_ci              nir_shader_get_entrypoint(nir), true, false);
387bf215546Sopenharmony_ci
388bf215546Sopenharmony_ci   NIR_PASS(_, nir, nir_lower_system_values);
389bf215546Sopenharmony_ci
390bf215546Sopenharmony_ci   NIR_PASS(_, nir, nir_lower_alu_to_scalar, NULL, NULL);
391bf215546Sopenharmony_ci
392bf215546Sopenharmony_ci   NIR_PASS(_, nir, nir_normalize_cubemap_coords);
393bf215546Sopenharmony_ci
394bf215546Sopenharmony_ci   NIR_PASS(_, nir, nir_lower_global_vars_to_local);
395bf215546Sopenharmony_ci
396bf215546Sopenharmony_ci   NIR_PASS(_, nir, nir_split_var_copies);
397bf215546Sopenharmony_ci   NIR_PASS(_, nir, nir_split_struct_vars, nir_var_function_temp);
398bf215546Sopenharmony_ci
399bf215546Sopenharmony_ci   nir_optimize(nir, true);
400bf215546Sopenharmony_ci
401bf215546Sopenharmony_ci   NIR_PASS(_, nir, nir_lower_explicit_io,
402bf215546Sopenharmony_ci            nir_var_mem_push_const,
403bf215546Sopenharmony_ci            nir_address_format_32bit_offset);
404bf215546Sopenharmony_ci
405bf215546Sopenharmony_ci   NIR_PASS(_, nir, nir_lower_explicit_io,
406bf215546Sopenharmony_ci            nir_var_mem_ubo | nir_var_mem_ssbo,
407bf215546Sopenharmony_ci            nir_address_format_32bit_index_offset);
408bf215546Sopenharmony_ci
409bf215546Sopenharmony_ci   NIR_PASS(_, nir, nir_lower_explicit_io,
410bf215546Sopenharmony_ci            nir_var_mem_global,
411bf215546Sopenharmony_ci            nir_address_format_2x32bit_global);
412bf215546Sopenharmony_ci
413bf215546Sopenharmony_ci   NIR_PASS(_, nir, nir_lower_load_const_to_scalar);
414bf215546Sopenharmony_ci
415bf215546Sopenharmony_ci   /* Lower a bunch of stuff */
416bf215546Sopenharmony_ci   NIR_PASS(_, nir, nir_lower_var_copies);
417bf215546Sopenharmony_ci
418bf215546Sopenharmony_ci   NIR_PASS(_, nir, nir_lower_indirect_derefs, nir_var_shader_in, UINT32_MAX);
419bf215546Sopenharmony_ci
420bf215546Sopenharmony_ci   NIR_PASS(_, nir, nir_lower_indirect_derefs,
421bf215546Sopenharmony_ci            nir_var_function_temp, 2);
422bf215546Sopenharmony_ci
423bf215546Sopenharmony_ci   NIR_PASS(_, nir, nir_lower_array_deref_of_vec,
424bf215546Sopenharmony_ci            nir_var_mem_ubo | nir_var_mem_ssbo,
425bf215546Sopenharmony_ci            nir_lower_direct_array_deref_of_vec_load);
426bf215546Sopenharmony_ci
427bf215546Sopenharmony_ci   NIR_PASS(_, nir, nir_lower_frexp);
428bf215546Sopenharmony_ci
429bf215546Sopenharmony_ci   /* Get rid of split copies */
430bf215546Sopenharmony_ci   nir_optimize(nir, false);
431bf215546Sopenharmony_ci}
432bf215546Sopenharmony_ci
433bf215546Sopenharmony_cistatic nir_shader *
434bf215546Sopenharmony_cishader_module_compile_to_nir(struct v3dv_device *device,
435bf215546Sopenharmony_ci                             struct v3dv_pipeline_stage *stage)
436bf215546Sopenharmony_ci{
437bf215546Sopenharmony_ci   nir_shader *nir;
438bf215546Sopenharmony_ci   const nir_shader_compiler_options *nir_options = &v3dv_nir_options;
439bf215546Sopenharmony_ci
440bf215546Sopenharmony_ci
441bf215546Sopenharmony_ci   if (unlikely(V3D_DEBUG & V3D_DEBUG_DUMP_SPIRV) && stage->module->nir == NULL)
442bf215546Sopenharmony_ci      v3dv_print_spirv(stage->module->data, stage->module->size, stderr);
443bf215546Sopenharmony_ci
444bf215546Sopenharmony_ci   /* vk_shader_module_to_nir also handles internal shaders, when module->nir
445bf215546Sopenharmony_ci    * != NULL. It also calls nir_validate_shader on both cases, so we don't
446bf215546Sopenharmony_ci    * call it again here.
447bf215546Sopenharmony_ci    */
448bf215546Sopenharmony_ci   VkResult result = vk_shader_module_to_nir(&device->vk, stage->module,
449bf215546Sopenharmony_ci                                             broadcom_shader_stage_to_gl(stage->stage),
450bf215546Sopenharmony_ci                                             stage->entrypoint,
451bf215546Sopenharmony_ci                                             stage->spec_info,
452bf215546Sopenharmony_ci                                             &default_spirv_options,
453bf215546Sopenharmony_ci                                             nir_options,
454bf215546Sopenharmony_ci                                             NULL, &nir);
455bf215546Sopenharmony_ci   if (result != VK_SUCCESS)
456bf215546Sopenharmony_ci      return NULL;
457bf215546Sopenharmony_ci   assert(nir->info.stage == broadcom_shader_stage_to_gl(stage->stage));
458bf215546Sopenharmony_ci
459bf215546Sopenharmony_ci   if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERDB) && stage->module->nir == NULL) {
460bf215546Sopenharmony_ci      char sha1buf[41];
461bf215546Sopenharmony_ci      _mesa_sha1_format(sha1buf, stage->pipeline->sha1);
462bf215546Sopenharmony_ci      nir->info.name = ralloc_strdup(nir, sha1buf);
463bf215546Sopenharmony_ci   }
464bf215546Sopenharmony_ci
465bf215546Sopenharmony_ci   if (unlikely(V3D_DEBUG & (V3D_DEBUG_NIR |
466bf215546Sopenharmony_ci                             v3d_debug_flag_for_shader_stage(
467bf215546Sopenharmony_ci                                broadcom_shader_stage_to_gl(stage->stage))))) {
468bf215546Sopenharmony_ci      fprintf(stderr, "NIR after vk_shader_module_to_nir: %s prog %d NIR:\n",
469bf215546Sopenharmony_ci              broadcom_shader_stage_name(stage->stage),
470bf215546Sopenharmony_ci              stage->program_id);
471bf215546Sopenharmony_ci      nir_print_shader(nir, stderr);
472bf215546Sopenharmony_ci      fprintf(stderr, "\n");
473bf215546Sopenharmony_ci   }
474bf215546Sopenharmony_ci
475bf215546Sopenharmony_ci   preprocess_nir(nir);
476bf215546Sopenharmony_ci
477bf215546Sopenharmony_ci   return nir;
478bf215546Sopenharmony_ci}
479bf215546Sopenharmony_ci
480bf215546Sopenharmony_cistatic int
481bf215546Sopenharmony_citype_size_vec4(const struct glsl_type *type, bool bindless)
482bf215546Sopenharmony_ci{
483bf215546Sopenharmony_ci   return glsl_count_attribute_slots(type, false);
484bf215546Sopenharmony_ci}
485bf215546Sopenharmony_ci
486bf215546Sopenharmony_ci/* FIXME: the number of parameters for this method is somewhat big. Perhaps
487bf215546Sopenharmony_ci * rethink.
488bf215546Sopenharmony_ci */
489bf215546Sopenharmony_cistatic unsigned
490bf215546Sopenharmony_cidescriptor_map_add(struct v3dv_descriptor_map *map,
491bf215546Sopenharmony_ci                   int set,
492bf215546Sopenharmony_ci                   int binding,
493bf215546Sopenharmony_ci                   int array_index,
494bf215546Sopenharmony_ci                   int array_size,
495bf215546Sopenharmony_ci                   int start_index,
496bf215546Sopenharmony_ci                   uint8_t return_size)
497bf215546Sopenharmony_ci{
498bf215546Sopenharmony_ci   assert(array_index < array_size);
499bf215546Sopenharmony_ci   assert(return_size == 16 || return_size == 32);
500bf215546Sopenharmony_ci
501bf215546Sopenharmony_ci   unsigned index = start_index;
502bf215546Sopenharmony_ci   for (; index < map->num_desc; index++) {
503bf215546Sopenharmony_ci      if (map->used[index] &&
504bf215546Sopenharmony_ci          set == map->set[index] &&
505bf215546Sopenharmony_ci          binding == map->binding[index] &&
506bf215546Sopenharmony_ci          array_index == map->array_index[index]) {
507bf215546Sopenharmony_ci         assert(array_size == map->array_size[index]);
508bf215546Sopenharmony_ci         if (return_size != map->return_size[index]) {
509bf215546Sopenharmony_ci            /* It the return_size is different it means that the same sampler
510bf215546Sopenharmony_ci             * was used for operations with different precision
511bf215546Sopenharmony_ci             * requirement. In this case we need to ensure that we use the
512bf215546Sopenharmony_ci             * larger one.
513bf215546Sopenharmony_ci             */
514bf215546Sopenharmony_ci            map->return_size[index] = 32;
515bf215546Sopenharmony_ci         }
516bf215546Sopenharmony_ci         return index;
517bf215546Sopenharmony_ci      } else if (!map->used[index]) {
518bf215546Sopenharmony_ci         break;
519bf215546Sopenharmony_ci      }
520bf215546Sopenharmony_ci   }
521bf215546Sopenharmony_ci
522bf215546Sopenharmony_ci   assert(index < DESCRIPTOR_MAP_SIZE);
523bf215546Sopenharmony_ci   assert(!map->used[index]);
524bf215546Sopenharmony_ci
525bf215546Sopenharmony_ci   map->used[index] = true;
526bf215546Sopenharmony_ci   map->set[index] = set;
527bf215546Sopenharmony_ci   map->binding[index] = binding;
528bf215546Sopenharmony_ci   map->array_index[index] = array_index;
529bf215546Sopenharmony_ci   map->array_size[index] = array_size;
530bf215546Sopenharmony_ci   map->return_size[index] = return_size;
531bf215546Sopenharmony_ci   map->num_desc = MAX2(map->num_desc, index + 1);
532bf215546Sopenharmony_ci
533bf215546Sopenharmony_ci   return index;
534bf215546Sopenharmony_ci}
535bf215546Sopenharmony_ci
536bf215546Sopenharmony_cistruct lower_pipeline_layout_state {
537bf215546Sopenharmony_ci   struct v3dv_pipeline *pipeline;
538bf215546Sopenharmony_ci   const struct v3dv_pipeline_layout *layout;
539bf215546Sopenharmony_ci   bool needs_default_sampler_state;
540bf215546Sopenharmony_ci};
541bf215546Sopenharmony_ci
542bf215546Sopenharmony_ci
543bf215546Sopenharmony_cistatic void
544bf215546Sopenharmony_cilower_load_push_constant(nir_builder *b, nir_intrinsic_instr *instr,
545bf215546Sopenharmony_ci                         struct lower_pipeline_layout_state *state)
546bf215546Sopenharmony_ci{
547bf215546Sopenharmony_ci   assert(instr->intrinsic == nir_intrinsic_load_push_constant);
548bf215546Sopenharmony_ci   instr->intrinsic = nir_intrinsic_load_uniform;
549bf215546Sopenharmony_ci}
550bf215546Sopenharmony_ci
551bf215546Sopenharmony_cistatic struct v3dv_descriptor_map*
552bf215546Sopenharmony_cipipeline_get_descriptor_map(struct v3dv_pipeline *pipeline,
553bf215546Sopenharmony_ci                            VkDescriptorType desc_type,
554bf215546Sopenharmony_ci                            gl_shader_stage gl_stage,
555bf215546Sopenharmony_ci                            bool is_sampler)
556bf215546Sopenharmony_ci{
557bf215546Sopenharmony_ci   enum broadcom_shader_stage broadcom_stage =
558bf215546Sopenharmony_ci      gl_shader_stage_to_broadcom(gl_stage);
559bf215546Sopenharmony_ci
560bf215546Sopenharmony_ci   assert(pipeline->shared_data &&
561bf215546Sopenharmony_ci          pipeline->shared_data->maps[broadcom_stage]);
562bf215546Sopenharmony_ci
563bf215546Sopenharmony_ci   switch(desc_type) {
564bf215546Sopenharmony_ci   case VK_DESCRIPTOR_TYPE_SAMPLER:
565bf215546Sopenharmony_ci      return &pipeline->shared_data->maps[broadcom_stage]->sampler_map;
566bf215546Sopenharmony_ci   case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
567bf215546Sopenharmony_ci   case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
568bf215546Sopenharmony_ci   case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
569bf215546Sopenharmony_ci   case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
570bf215546Sopenharmony_ci   case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
571bf215546Sopenharmony_ci      return &pipeline->shared_data->maps[broadcom_stage]->texture_map;
572bf215546Sopenharmony_ci   case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
573bf215546Sopenharmony_ci      return is_sampler ?
574bf215546Sopenharmony_ci         &pipeline->shared_data->maps[broadcom_stage]->sampler_map :
575bf215546Sopenharmony_ci         &pipeline->shared_data->maps[broadcom_stage]->texture_map;
576bf215546Sopenharmony_ci   case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
577bf215546Sopenharmony_ci   case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
578bf215546Sopenharmony_ci   case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK:
579bf215546Sopenharmony_ci      return &pipeline->shared_data->maps[broadcom_stage]->ubo_map;
580bf215546Sopenharmony_ci   case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
581bf215546Sopenharmony_ci   case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
582bf215546Sopenharmony_ci      return &pipeline->shared_data->maps[broadcom_stage]->ssbo_map;
583bf215546Sopenharmony_ci   default:
584bf215546Sopenharmony_ci      unreachable("Descriptor type unknown or not having a descriptor map");
585bf215546Sopenharmony_ci   }
586bf215546Sopenharmony_ci}
587bf215546Sopenharmony_ci
588bf215546Sopenharmony_ci/* Gathers info from the intrinsic (set and binding) and then lowers it so it
589bf215546Sopenharmony_ci * could be used by the v3d_compiler */
590bf215546Sopenharmony_cistatic void
591bf215546Sopenharmony_cilower_vulkan_resource_index(nir_builder *b,
592bf215546Sopenharmony_ci                            nir_intrinsic_instr *instr,
593bf215546Sopenharmony_ci                            struct lower_pipeline_layout_state *state)
594bf215546Sopenharmony_ci{
595bf215546Sopenharmony_ci   assert(instr->intrinsic == nir_intrinsic_vulkan_resource_index);
596bf215546Sopenharmony_ci
597bf215546Sopenharmony_ci   nir_const_value *const_val = nir_src_as_const_value(instr->src[0]);
598bf215546Sopenharmony_ci
599bf215546Sopenharmony_ci   unsigned set = nir_intrinsic_desc_set(instr);
600bf215546Sopenharmony_ci   unsigned binding = nir_intrinsic_binding(instr);
601bf215546Sopenharmony_ci   struct v3dv_descriptor_set_layout *set_layout = state->layout->set[set].layout;
602bf215546Sopenharmony_ci   struct v3dv_descriptor_set_binding_layout *binding_layout =
603bf215546Sopenharmony_ci      &set_layout->binding[binding];
604bf215546Sopenharmony_ci   unsigned index = 0;
605bf215546Sopenharmony_ci
606bf215546Sopenharmony_ci   switch (binding_layout->type) {
607bf215546Sopenharmony_ci   case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
608bf215546Sopenharmony_ci   case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
609bf215546Sopenharmony_ci   case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
610bf215546Sopenharmony_ci   case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
611bf215546Sopenharmony_ci   case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: {
612bf215546Sopenharmony_ci      struct v3dv_descriptor_map *descriptor_map =
613bf215546Sopenharmony_ci         pipeline_get_descriptor_map(state->pipeline, binding_layout->type,
614bf215546Sopenharmony_ci                                     b->shader->info.stage, false);
615bf215546Sopenharmony_ci
616bf215546Sopenharmony_ci      if (!const_val)
617bf215546Sopenharmony_ci         unreachable("non-constant vulkan_resource_index array index");
618bf215546Sopenharmony_ci
619bf215546Sopenharmony_ci      /* At compile-time we will need to know if we are processing a UBO load
620bf215546Sopenharmony_ci       * for an inline or a regular UBO so we can handle inline loads like
621bf215546Sopenharmony_ci       * push constants. At the level of NIR level however, the inline
622bf215546Sopenharmony_ci       * information is gone, so we rely on the index to make this distinction.
623bf215546Sopenharmony_ci       * Particularly, we reserve indices 1..MAX_INLINE_UNIFORM_BUFFERS for
624bf215546Sopenharmony_ci       * inline buffers. This means that at the descriptor map level
625bf215546Sopenharmony_ci       * we store inline buffers at slots 0..MAX_INLINE_UNIFORM_BUFFERS - 1,
626bf215546Sopenharmony_ci       * and regular UBOs at indices starting from MAX_INLINE_UNIFORM_BUFFERS.
627bf215546Sopenharmony_ci       */
628bf215546Sopenharmony_ci      uint32_t start_index = 0;
629bf215546Sopenharmony_ci      if (binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
630bf215546Sopenharmony_ci          binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) {
631bf215546Sopenharmony_ci         start_index = MAX_INLINE_UNIFORM_BUFFERS;
632bf215546Sopenharmony_ci      }
633bf215546Sopenharmony_ci
634bf215546Sopenharmony_ci      index = descriptor_map_add(descriptor_map, set, binding,
635bf215546Sopenharmony_ci                                 const_val->u32,
636bf215546Sopenharmony_ci                                 binding_layout->array_size,
637bf215546Sopenharmony_ci                                 start_index,
638bf215546Sopenharmony_ci                                 32 /* return_size: doesn't really apply for this case */);
639bf215546Sopenharmony_ci
640bf215546Sopenharmony_ci      /* We always reserve index 0 for push constants */
641bf215546Sopenharmony_ci      if (binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
642bf215546Sopenharmony_ci          binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
643bf215546Sopenharmony_ci          binding_layout->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
644bf215546Sopenharmony_ci         index++;
645bf215546Sopenharmony_ci      }
646bf215546Sopenharmony_ci
647bf215546Sopenharmony_ci      break;
648bf215546Sopenharmony_ci   }
649bf215546Sopenharmony_ci
650bf215546Sopenharmony_ci   default:
651bf215546Sopenharmony_ci      unreachable("unsupported descriptor type for vulkan_resource_index");
652bf215546Sopenharmony_ci      break;
653bf215546Sopenharmony_ci   }
654bf215546Sopenharmony_ci
655bf215546Sopenharmony_ci   /* Since we use the deref pass, both vulkan_resource_index and
656bf215546Sopenharmony_ci    * vulkan_load_descriptor return a vec2 providing an index and
657bf215546Sopenharmony_ci    * offset. Our backend compiler only cares about the index part.
658bf215546Sopenharmony_ci    */
659bf215546Sopenharmony_ci   nir_ssa_def_rewrite_uses(&instr->dest.ssa,
660bf215546Sopenharmony_ci                            nir_imm_ivec2(b, index, 0));
661bf215546Sopenharmony_ci   nir_instr_remove(&instr->instr);
662bf215546Sopenharmony_ci}
663bf215546Sopenharmony_ci
664bf215546Sopenharmony_ci/* Returns return_size, so it could be used for the case of not having a
665bf215546Sopenharmony_ci * sampler object
666bf215546Sopenharmony_ci */
667bf215546Sopenharmony_cistatic uint8_t
668bf215546Sopenharmony_cilower_tex_src_to_offset(nir_builder *b,
669bf215546Sopenharmony_ci                        nir_tex_instr *instr,
670bf215546Sopenharmony_ci                        unsigned src_idx,
671bf215546Sopenharmony_ci                        struct lower_pipeline_layout_state *state)
672bf215546Sopenharmony_ci{
673bf215546Sopenharmony_ci   nir_ssa_def *index = NULL;
674bf215546Sopenharmony_ci   unsigned base_index = 0;
675bf215546Sopenharmony_ci   unsigned array_elements = 1;
676bf215546Sopenharmony_ci   nir_tex_src *src = &instr->src[src_idx];
677bf215546Sopenharmony_ci   bool is_sampler = src->src_type == nir_tex_src_sampler_deref;
678bf215546Sopenharmony_ci
679bf215546Sopenharmony_ci   /* We compute first the offsets */
680bf215546Sopenharmony_ci   nir_deref_instr *deref = nir_instr_as_deref(src->src.ssa->parent_instr);
681bf215546Sopenharmony_ci   while (deref->deref_type != nir_deref_type_var) {
682bf215546Sopenharmony_ci      assert(deref->parent.is_ssa);
683bf215546Sopenharmony_ci      nir_deref_instr *parent =
684bf215546Sopenharmony_ci         nir_instr_as_deref(deref->parent.ssa->parent_instr);
685bf215546Sopenharmony_ci
686bf215546Sopenharmony_ci      assert(deref->deref_type == nir_deref_type_array);
687bf215546Sopenharmony_ci
688bf215546Sopenharmony_ci      if (nir_src_is_const(deref->arr.index) && index == NULL) {
689bf215546Sopenharmony_ci         /* We're still building a direct index */
690bf215546Sopenharmony_ci         base_index += nir_src_as_uint(deref->arr.index) * array_elements;
691bf215546Sopenharmony_ci      } else {
692bf215546Sopenharmony_ci         if (index == NULL) {
693bf215546Sopenharmony_ci            /* We used to be direct but not anymore */
694bf215546Sopenharmony_ci            index = nir_imm_int(b, base_index);
695bf215546Sopenharmony_ci            base_index = 0;
696bf215546Sopenharmony_ci         }
697bf215546Sopenharmony_ci
698bf215546Sopenharmony_ci         index = nir_iadd(b, index,
699bf215546Sopenharmony_ci                          nir_imul(b, nir_imm_int(b, array_elements),
700bf215546Sopenharmony_ci                                   nir_ssa_for_src(b, deref->arr.index, 1)));
701bf215546Sopenharmony_ci      }
702bf215546Sopenharmony_ci
703bf215546Sopenharmony_ci      array_elements *= glsl_get_length(parent->type);
704bf215546Sopenharmony_ci
705bf215546Sopenharmony_ci      deref = parent;
706bf215546Sopenharmony_ci   }
707bf215546Sopenharmony_ci
708bf215546Sopenharmony_ci   if (index)
709bf215546Sopenharmony_ci      index = nir_umin(b, index, nir_imm_int(b, array_elements - 1));
710bf215546Sopenharmony_ci
711bf215546Sopenharmony_ci   /* We have the offsets, we apply them, rewriting the source or removing
712bf215546Sopenharmony_ci    * instr if needed
713bf215546Sopenharmony_ci    */
714bf215546Sopenharmony_ci   if (index) {
715bf215546Sopenharmony_ci      nir_instr_rewrite_src(&instr->instr, &src->src,
716bf215546Sopenharmony_ci                            nir_src_for_ssa(index));
717bf215546Sopenharmony_ci
718bf215546Sopenharmony_ci      src->src_type = is_sampler ?
719bf215546Sopenharmony_ci         nir_tex_src_sampler_offset :
720bf215546Sopenharmony_ci         nir_tex_src_texture_offset;
721bf215546Sopenharmony_ci   } else {
722bf215546Sopenharmony_ci      nir_tex_instr_remove_src(instr, src_idx);
723bf215546Sopenharmony_ci   }
724bf215546Sopenharmony_ci
725bf215546Sopenharmony_ci   uint32_t set = deref->var->data.descriptor_set;
726bf215546Sopenharmony_ci   uint32_t binding = deref->var->data.binding;
727bf215546Sopenharmony_ci   /* FIXME: this is a really simplified check for the precision to be used
728bf215546Sopenharmony_ci    * for the sampling. Right now we are ony checking for the variables used
729bf215546Sopenharmony_ci    * on the operation itself, but there are other cases that we could use to
730bf215546Sopenharmony_ci    * infer the precision requirement.
731bf215546Sopenharmony_ci    */
732bf215546Sopenharmony_ci   bool relaxed_precision = deref->var->data.precision == GLSL_PRECISION_MEDIUM ||
733bf215546Sopenharmony_ci                            deref->var->data.precision == GLSL_PRECISION_LOW;
734bf215546Sopenharmony_ci   struct v3dv_descriptor_set_layout *set_layout = state->layout->set[set].layout;
735bf215546Sopenharmony_ci   struct v3dv_descriptor_set_binding_layout *binding_layout =
736bf215546Sopenharmony_ci      &set_layout->binding[binding];
737bf215546Sopenharmony_ci
738bf215546Sopenharmony_ci   /* For input attachments, the shader includes the attachment_idx. As we are
739bf215546Sopenharmony_ci    * treating them as a texture, we only want the base_index
740bf215546Sopenharmony_ci    */
741bf215546Sopenharmony_ci   uint32_t array_index = binding_layout->type != VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT ?
742bf215546Sopenharmony_ci      deref->var->data.index + base_index :
743bf215546Sopenharmony_ci      base_index;
744bf215546Sopenharmony_ci
745bf215546Sopenharmony_ci   uint8_t return_size;
746bf215546Sopenharmony_ci   if (unlikely(V3D_DEBUG & V3D_DEBUG_TMU_16BIT))
747bf215546Sopenharmony_ci      return_size = 16;
748bf215546Sopenharmony_ci   else  if (unlikely(V3D_DEBUG & V3D_DEBUG_TMU_32BIT))
749bf215546Sopenharmony_ci      return_size = 32;
750bf215546Sopenharmony_ci   else
751bf215546Sopenharmony_ci      return_size = relaxed_precision || instr->is_shadow ? 16 : 32;
752bf215546Sopenharmony_ci
753bf215546Sopenharmony_ci   struct v3dv_descriptor_map *map =
754bf215546Sopenharmony_ci      pipeline_get_descriptor_map(state->pipeline, binding_layout->type,
755bf215546Sopenharmony_ci                                  b->shader->info.stage, is_sampler);
756bf215546Sopenharmony_ci   int desc_index =
757bf215546Sopenharmony_ci      descriptor_map_add(map,
758bf215546Sopenharmony_ci                         deref->var->data.descriptor_set,
759bf215546Sopenharmony_ci                         deref->var->data.binding,
760bf215546Sopenharmony_ci                         array_index,
761bf215546Sopenharmony_ci                         binding_layout->array_size,
762bf215546Sopenharmony_ci                         0,
763bf215546Sopenharmony_ci                         return_size);
764bf215546Sopenharmony_ci
765bf215546Sopenharmony_ci   if (is_sampler)
766bf215546Sopenharmony_ci      instr->sampler_index = desc_index;
767bf215546Sopenharmony_ci   else
768bf215546Sopenharmony_ci      instr->texture_index = desc_index;
769bf215546Sopenharmony_ci
770bf215546Sopenharmony_ci   return return_size;
771bf215546Sopenharmony_ci}
772bf215546Sopenharmony_ci
773bf215546Sopenharmony_cistatic bool
774bf215546Sopenharmony_cilower_sampler(nir_builder *b,
775bf215546Sopenharmony_ci              nir_tex_instr *instr,
776bf215546Sopenharmony_ci              struct lower_pipeline_layout_state *state)
777bf215546Sopenharmony_ci{
778bf215546Sopenharmony_ci   uint8_t return_size = 0;
779bf215546Sopenharmony_ci
780bf215546Sopenharmony_ci   int texture_idx =
781bf215546Sopenharmony_ci      nir_tex_instr_src_index(instr, nir_tex_src_texture_deref);
782bf215546Sopenharmony_ci
783bf215546Sopenharmony_ci   if (texture_idx >= 0)
784bf215546Sopenharmony_ci      return_size = lower_tex_src_to_offset(b, instr, texture_idx, state);
785bf215546Sopenharmony_ci
786bf215546Sopenharmony_ci   int sampler_idx =
787bf215546Sopenharmony_ci      nir_tex_instr_src_index(instr, nir_tex_src_sampler_deref);
788bf215546Sopenharmony_ci
789bf215546Sopenharmony_ci   if (sampler_idx >= 0)
790bf215546Sopenharmony_ci      lower_tex_src_to_offset(b, instr, sampler_idx, state);
791bf215546Sopenharmony_ci
792bf215546Sopenharmony_ci   if (texture_idx < 0 && sampler_idx < 0)
793bf215546Sopenharmony_ci      return false;
794bf215546Sopenharmony_ci
795bf215546Sopenharmony_ci   /* If we don't have a sampler, we assign it the idx we reserve for this
796bf215546Sopenharmony_ci    * case, and we ensure that it is using the correct return size.
797bf215546Sopenharmony_ci    */
798bf215546Sopenharmony_ci   if (sampler_idx < 0) {
799bf215546Sopenharmony_ci      state->needs_default_sampler_state = true;
800bf215546Sopenharmony_ci      instr->sampler_index = return_size == 16 ?
801bf215546Sopenharmony_ci         V3DV_NO_SAMPLER_16BIT_IDX : V3DV_NO_SAMPLER_32BIT_IDX;
802bf215546Sopenharmony_ci   }
803bf215546Sopenharmony_ci
804bf215546Sopenharmony_ci   return true;
805bf215546Sopenharmony_ci}
806bf215546Sopenharmony_ci
807bf215546Sopenharmony_ci/* FIXME: really similar to lower_tex_src_to_offset, perhaps refactor? */
808bf215546Sopenharmony_cistatic void
809bf215546Sopenharmony_cilower_image_deref(nir_builder *b,
810bf215546Sopenharmony_ci                  nir_intrinsic_instr *instr,
811bf215546Sopenharmony_ci                  struct lower_pipeline_layout_state *state)
812bf215546Sopenharmony_ci{
813bf215546Sopenharmony_ci   nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
814bf215546Sopenharmony_ci   nir_ssa_def *index = NULL;
815bf215546Sopenharmony_ci   unsigned array_elements = 1;
816bf215546Sopenharmony_ci   unsigned base_index = 0;
817bf215546Sopenharmony_ci
818bf215546Sopenharmony_ci   while (deref->deref_type != nir_deref_type_var) {
819bf215546Sopenharmony_ci      assert(deref->parent.is_ssa);
820bf215546Sopenharmony_ci      nir_deref_instr *parent =
821bf215546Sopenharmony_ci         nir_instr_as_deref(deref->parent.ssa->parent_instr);
822bf215546Sopenharmony_ci
823bf215546Sopenharmony_ci      assert(deref->deref_type == nir_deref_type_array);
824bf215546Sopenharmony_ci
825bf215546Sopenharmony_ci      if (nir_src_is_const(deref->arr.index) && index == NULL) {
826bf215546Sopenharmony_ci         /* We're still building a direct index */
827bf215546Sopenharmony_ci         base_index += nir_src_as_uint(deref->arr.index) * array_elements;
828bf215546Sopenharmony_ci      } else {
829bf215546Sopenharmony_ci         if (index == NULL) {
830bf215546Sopenharmony_ci            /* We used to be direct but not anymore */
831bf215546Sopenharmony_ci            index = nir_imm_int(b, base_index);
832bf215546Sopenharmony_ci            base_index = 0;
833bf215546Sopenharmony_ci         }
834bf215546Sopenharmony_ci
835bf215546Sopenharmony_ci         index = nir_iadd(b, index,
836bf215546Sopenharmony_ci                          nir_imul(b, nir_imm_int(b, array_elements),
837bf215546Sopenharmony_ci                                   nir_ssa_for_src(b, deref->arr.index, 1)));
838bf215546Sopenharmony_ci      }
839bf215546Sopenharmony_ci
840bf215546Sopenharmony_ci      array_elements *= glsl_get_length(parent->type);
841bf215546Sopenharmony_ci
842bf215546Sopenharmony_ci      deref = parent;
843bf215546Sopenharmony_ci   }
844bf215546Sopenharmony_ci
845bf215546Sopenharmony_ci   if (index)
846bf215546Sopenharmony_ci      index = nir_umin(b, index, nir_imm_int(b, array_elements - 1));
847bf215546Sopenharmony_ci
848bf215546Sopenharmony_ci   uint32_t set = deref->var->data.descriptor_set;
849bf215546Sopenharmony_ci   uint32_t binding = deref->var->data.binding;
850bf215546Sopenharmony_ci   struct v3dv_descriptor_set_layout *set_layout = state->layout->set[set].layout;
851bf215546Sopenharmony_ci   struct v3dv_descriptor_set_binding_layout *binding_layout =
852bf215546Sopenharmony_ci      &set_layout->binding[binding];
853bf215546Sopenharmony_ci
854bf215546Sopenharmony_ci   uint32_t array_index = deref->var->data.index + base_index;
855bf215546Sopenharmony_ci
856bf215546Sopenharmony_ci   assert(binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE ||
857bf215546Sopenharmony_ci          binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER);
858bf215546Sopenharmony_ci
859bf215546Sopenharmony_ci   struct v3dv_descriptor_map *map =
860bf215546Sopenharmony_ci      pipeline_get_descriptor_map(state->pipeline, binding_layout->type,
861bf215546Sopenharmony_ci                                  b->shader->info.stage, false);
862bf215546Sopenharmony_ci
863bf215546Sopenharmony_ci   int desc_index =
864bf215546Sopenharmony_ci      descriptor_map_add(map,
865bf215546Sopenharmony_ci                         deref->var->data.descriptor_set,
866bf215546Sopenharmony_ci                         deref->var->data.binding,
867bf215546Sopenharmony_ci                         array_index,
868bf215546Sopenharmony_ci                         binding_layout->array_size,
869bf215546Sopenharmony_ci                         0,
870bf215546Sopenharmony_ci                         32 /* return_size: doesn't apply for textures */);
871bf215546Sopenharmony_ci
872bf215546Sopenharmony_ci   /* Note: we don't need to do anything here in relation to the precision and
873bf215546Sopenharmony_ci    * the output size because for images we can infer that info from the image
874bf215546Sopenharmony_ci    * intrinsic, that includes the image format (see
875bf215546Sopenharmony_ci    * NIR_INTRINSIC_FORMAT). That is done by the v3d compiler.
876bf215546Sopenharmony_ci    */
877bf215546Sopenharmony_ci
878bf215546Sopenharmony_ci   index = nir_imm_int(b, desc_index);
879bf215546Sopenharmony_ci
880bf215546Sopenharmony_ci   nir_rewrite_image_intrinsic(instr, index, false);
881bf215546Sopenharmony_ci}
882bf215546Sopenharmony_ci
883bf215546Sopenharmony_cistatic bool
884bf215546Sopenharmony_cilower_intrinsic(nir_builder *b,
885bf215546Sopenharmony_ci                nir_intrinsic_instr *instr,
886bf215546Sopenharmony_ci                struct lower_pipeline_layout_state *state)
887bf215546Sopenharmony_ci{
888bf215546Sopenharmony_ci   switch (instr->intrinsic) {
889bf215546Sopenharmony_ci   case nir_intrinsic_load_push_constant:
890bf215546Sopenharmony_ci      lower_load_push_constant(b, instr, state);
891bf215546Sopenharmony_ci      return true;
892bf215546Sopenharmony_ci
893bf215546Sopenharmony_ci   case nir_intrinsic_vulkan_resource_index:
894bf215546Sopenharmony_ci      lower_vulkan_resource_index(b, instr, state);
895bf215546Sopenharmony_ci      return true;
896bf215546Sopenharmony_ci
897bf215546Sopenharmony_ci   case nir_intrinsic_load_vulkan_descriptor: {
898bf215546Sopenharmony_ci      /* Loading the descriptor happens as part of load/store instructions,
899bf215546Sopenharmony_ci       * so for us this is a no-op.
900bf215546Sopenharmony_ci       */
901bf215546Sopenharmony_ci      nir_ssa_def_rewrite_uses(&instr->dest.ssa, instr->src[0].ssa);
902bf215546Sopenharmony_ci      nir_instr_remove(&instr->instr);
903bf215546Sopenharmony_ci      return true;
904bf215546Sopenharmony_ci   }
905bf215546Sopenharmony_ci
906bf215546Sopenharmony_ci   case nir_intrinsic_image_deref_load:
907bf215546Sopenharmony_ci   case nir_intrinsic_image_deref_store:
908bf215546Sopenharmony_ci   case nir_intrinsic_image_deref_atomic_add:
909bf215546Sopenharmony_ci   case nir_intrinsic_image_deref_atomic_imin:
910bf215546Sopenharmony_ci   case nir_intrinsic_image_deref_atomic_umin:
911bf215546Sopenharmony_ci   case nir_intrinsic_image_deref_atomic_imax:
912bf215546Sopenharmony_ci   case nir_intrinsic_image_deref_atomic_umax:
913bf215546Sopenharmony_ci   case nir_intrinsic_image_deref_atomic_and:
914bf215546Sopenharmony_ci   case nir_intrinsic_image_deref_atomic_or:
915bf215546Sopenharmony_ci   case nir_intrinsic_image_deref_atomic_xor:
916bf215546Sopenharmony_ci   case nir_intrinsic_image_deref_atomic_exchange:
917bf215546Sopenharmony_ci   case nir_intrinsic_image_deref_atomic_comp_swap:
918bf215546Sopenharmony_ci   case nir_intrinsic_image_deref_size:
919bf215546Sopenharmony_ci   case nir_intrinsic_image_deref_samples:
920bf215546Sopenharmony_ci      lower_image_deref(b, instr, state);
921bf215546Sopenharmony_ci      return true;
922bf215546Sopenharmony_ci
923bf215546Sopenharmony_ci   default:
924bf215546Sopenharmony_ci      return false;
925bf215546Sopenharmony_ci   }
926bf215546Sopenharmony_ci}
927bf215546Sopenharmony_ci
928bf215546Sopenharmony_cistatic bool
929bf215546Sopenharmony_cilower_pipeline_layout_cb(nir_builder *b,
930bf215546Sopenharmony_ci                         nir_instr *instr,
931bf215546Sopenharmony_ci                         void *_state)
932bf215546Sopenharmony_ci{
933bf215546Sopenharmony_ci   bool progress = false;
934bf215546Sopenharmony_ci   struct lower_pipeline_layout_state *state = _state;
935bf215546Sopenharmony_ci
936bf215546Sopenharmony_ci   b->cursor = nir_before_instr(instr);
937bf215546Sopenharmony_ci   switch (instr->type) {
938bf215546Sopenharmony_ci   case nir_instr_type_tex:
939bf215546Sopenharmony_ci      progress |= lower_sampler(b, nir_instr_as_tex(instr), state);
940bf215546Sopenharmony_ci      break;
941bf215546Sopenharmony_ci   case nir_instr_type_intrinsic:
942bf215546Sopenharmony_ci      progress |= lower_intrinsic(b, nir_instr_as_intrinsic(instr), state);
943bf215546Sopenharmony_ci      break;
944bf215546Sopenharmony_ci   default:
945bf215546Sopenharmony_ci      break;
946bf215546Sopenharmony_ci   }
947bf215546Sopenharmony_ci
948bf215546Sopenharmony_ci   return progress;
949bf215546Sopenharmony_ci}
950bf215546Sopenharmony_ci
951bf215546Sopenharmony_cistatic bool
952bf215546Sopenharmony_cilower_pipeline_layout_info(nir_shader *shader,
953bf215546Sopenharmony_ci                           struct v3dv_pipeline *pipeline,
954bf215546Sopenharmony_ci                           const struct v3dv_pipeline_layout *layout,
955bf215546Sopenharmony_ci                           bool *needs_default_sampler_state)
956bf215546Sopenharmony_ci{
957bf215546Sopenharmony_ci   bool progress = false;
958bf215546Sopenharmony_ci
959bf215546Sopenharmony_ci   struct lower_pipeline_layout_state state = {
960bf215546Sopenharmony_ci      .pipeline = pipeline,
961bf215546Sopenharmony_ci      .layout = layout,
962bf215546Sopenharmony_ci      .needs_default_sampler_state = false,
963bf215546Sopenharmony_ci   };
964bf215546Sopenharmony_ci
965bf215546Sopenharmony_ci   progress = nir_shader_instructions_pass(shader, lower_pipeline_layout_cb,
966bf215546Sopenharmony_ci                                           nir_metadata_block_index |
967bf215546Sopenharmony_ci                                           nir_metadata_dominance,
968bf215546Sopenharmony_ci                                           &state);
969bf215546Sopenharmony_ci
970bf215546Sopenharmony_ci   *needs_default_sampler_state = state.needs_default_sampler_state;
971bf215546Sopenharmony_ci
972bf215546Sopenharmony_ci   return progress;
973bf215546Sopenharmony_ci}
974bf215546Sopenharmony_ci
975bf215546Sopenharmony_ci
976bf215546Sopenharmony_cistatic void
977bf215546Sopenharmony_cilower_fs_io(nir_shader *nir)
978bf215546Sopenharmony_ci{
979bf215546Sopenharmony_ci   /* Our backend doesn't handle array fragment shader outputs */
980bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
981bf215546Sopenharmony_ci   NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_shader_out, NULL);
982bf215546Sopenharmony_ci
983bf215546Sopenharmony_ci   nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
984bf215546Sopenharmony_ci                               MESA_SHADER_FRAGMENT);
985bf215546Sopenharmony_ci
986bf215546Sopenharmony_ci   nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
987bf215546Sopenharmony_ci                               MESA_SHADER_FRAGMENT);
988bf215546Sopenharmony_ci
989bf215546Sopenharmony_ci   NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
990bf215546Sopenharmony_ci            type_size_vec4, 0);
991bf215546Sopenharmony_ci}
992bf215546Sopenharmony_ci
993bf215546Sopenharmony_cistatic void
994bf215546Sopenharmony_cilower_gs_io(struct nir_shader *nir)
995bf215546Sopenharmony_ci{
996bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
997bf215546Sopenharmony_ci
998bf215546Sopenharmony_ci   nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
999bf215546Sopenharmony_ci                               MESA_SHADER_GEOMETRY);
1000bf215546Sopenharmony_ci
1001bf215546Sopenharmony_ci   nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
1002bf215546Sopenharmony_ci                               MESA_SHADER_GEOMETRY);
1003bf215546Sopenharmony_ci}
1004bf215546Sopenharmony_ci
1005bf215546Sopenharmony_cistatic void
1006bf215546Sopenharmony_cilower_vs_io(struct nir_shader *nir)
1007bf215546Sopenharmony_ci{
1008bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
1009bf215546Sopenharmony_ci
1010bf215546Sopenharmony_ci   nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
1011bf215546Sopenharmony_ci                               MESA_SHADER_VERTEX);
1012bf215546Sopenharmony_ci
1013bf215546Sopenharmony_ci   nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
1014bf215546Sopenharmony_ci                               MESA_SHADER_VERTEX);
1015bf215546Sopenharmony_ci
1016bf215546Sopenharmony_ci   /* FIXME: if we call nir_lower_io, we get a crash later. Likely because it
1017bf215546Sopenharmony_ci    * overlaps with v3d_nir_lower_io. Need further research though.
1018bf215546Sopenharmony_ci    */
1019bf215546Sopenharmony_ci}
1020bf215546Sopenharmony_ci
1021bf215546Sopenharmony_cistatic void
1022bf215546Sopenharmony_cishader_debug_output(const char *message, void *data)
1023bf215546Sopenharmony_ci{
1024bf215546Sopenharmony_ci   /* FIXME: We probably don't want to debug anything extra here, and in fact
1025bf215546Sopenharmony_ci    * the compiler is not using this callback too much, only as an alternative
1026bf215546Sopenharmony_ci    * way to debug out the shaderdb stats, that you can already get using
1027bf215546Sopenharmony_ci    * V3D_DEBUG=shaderdb. Perhaps it would make sense to revisit the v3d
1028bf215546Sopenharmony_ci    * compiler to remove that callback.
1029bf215546Sopenharmony_ci    */
1030bf215546Sopenharmony_ci}
1031bf215546Sopenharmony_ci
1032bf215546Sopenharmony_cistatic void
1033bf215546Sopenharmony_cipipeline_populate_v3d_key(struct v3d_key *key,
1034bf215546Sopenharmony_ci                          const struct v3dv_pipeline_stage *p_stage,
1035bf215546Sopenharmony_ci                          uint32_t ucp_enables,
1036bf215546Sopenharmony_ci                          bool robust_buffer_access)
1037bf215546Sopenharmony_ci{
1038bf215546Sopenharmony_ci   assert(p_stage->pipeline->shared_data &&
1039bf215546Sopenharmony_ci          p_stage->pipeline->shared_data->maps[p_stage->stage]);
1040bf215546Sopenharmony_ci
1041bf215546Sopenharmony_ci   /* The following values are default values used at pipeline create. We use
1042bf215546Sopenharmony_ci    * there 32 bit as default return size.
1043bf215546Sopenharmony_ci    */
1044bf215546Sopenharmony_ci   struct v3dv_descriptor_map *sampler_map =
1045bf215546Sopenharmony_ci      &p_stage->pipeline->shared_data->maps[p_stage->stage]->sampler_map;
1046bf215546Sopenharmony_ci   struct v3dv_descriptor_map *texture_map =
1047bf215546Sopenharmony_ci      &p_stage->pipeline->shared_data->maps[p_stage->stage]->texture_map;
1048bf215546Sopenharmony_ci
1049bf215546Sopenharmony_ci   key->num_tex_used = texture_map->num_desc;
1050bf215546Sopenharmony_ci   assert(key->num_tex_used <= V3D_MAX_TEXTURE_SAMPLERS);
1051bf215546Sopenharmony_ci   for (uint32_t tex_idx = 0; tex_idx < texture_map->num_desc; tex_idx++) {
1052bf215546Sopenharmony_ci      key->tex[tex_idx].swizzle[0] = PIPE_SWIZZLE_X;
1053bf215546Sopenharmony_ci      key->tex[tex_idx].swizzle[1] = PIPE_SWIZZLE_Y;
1054bf215546Sopenharmony_ci      key->tex[tex_idx].swizzle[2] = PIPE_SWIZZLE_Z;
1055bf215546Sopenharmony_ci      key->tex[tex_idx].swizzle[3] = PIPE_SWIZZLE_W;
1056bf215546Sopenharmony_ci   }
1057bf215546Sopenharmony_ci
1058bf215546Sopenharmony_ci   key->num_samplers_used = sampler_map->num_desc;
1059bf215546Sopenharmony_ci   assert(key->num_samplers_used <= V3D_MAX_TEXTURE_SAMPLERS);
1060bf215546Sopenharmony_ci   for (uint32_t sampler_idx = 0; sampler_idx < sampler_map->num_desc;
1061bf215546Sopenharmony_ci        sampler_idx++) {
1062bf215546Sopenharmony_ci      key->sampler[sampler_idx].return_size =
1063bf215546Sopenharmony_ci         sampler_map->return_size[sampler_idx];
1064bf215546Sopenharmony_ci
1065bf215546Sopenharmony_ci      key->sampler[sampler_idx].return_channels =
1066bf215546Sopenharmony_ci         key->sampler[sampler_idx].return_size == 32 ? 4 : 2;
1067bf215546Sopenharmony_ci   }
1068bf215546Sopenharmony_ci
1069bf215546Sopenharmony_ci   switch (p_stage->stage) {
1070bf215546Sopenharmony_ci   case BROADCOM_SHADER_VERTEX:
1071bf215546Sopenharmony_ci   case BROADCOM_SHADER_VERTEX_BIN:
1072bf215546Sopenharmony_ci      key->is_last_geometry_stage = p_stage->pipeline->gs == NULL;
1073bf215546Sopenharmony_ci      break;
1074bf215546Sopenharmony_ci   case BROADCOM_SHADER_GEOMETRY:
1075bf215546Sopenharmony_ci   case BROADCOM_SHADER_GEOMETRY_BIN:
1076bf215546Sopenharmony_ci      /* FIXME: while we don't implement tessellation shaders */
1077bf215546Sopenharmony_ci      key->is_last_geometry_stage = true;
1078bf215546Sopenharmony_ci      break;
1079bf215546Sopenharmony_ci   case BROADCOM_SHADER_FRAGMENT:
1080bf215546Sopenharmony_ci   case BROADCOM_SHADER_COMPUTE:
1081bf215546Sopenharmony_ci      key->is_last_geometry_stage = false;
1082bf215546Sopenharmony_ci      break;
1083bf215546Sopenharmony_ci   default:
1084bf215546Sopenharmony_ci      unreachable("unsupported shader stage");
1085bf215546Sopenharmony_ci   }
1086bf215546Sopenharmony_ci
1087bf215546Sopenharmony_ci   /* Vulkan doesn't have fixed function state for user clip planes. Instead,
1088bf215546Sopenharmony_ci    * shaders can write to gl_ClipDistance[], in which case the SPIR-V compiler
1089bf215546Sopenharmony_ci    * takes care of adding a single compact array variable at
1090bf215546Sopenharmony_ci    * VARYING_SLOT_CLIP_DIST0, so we don't need any user clip plane lowering.
1091bf215546Sopenharmony_ci    *
1092bf215546Sopenharmony_ci    * The only lowering we are interested is specific to the fragment shader,
1093bf215546Sopenharmony_ci    * where we want to emit discards to honor writes to gl_ClipDistance[] in
1094bf215546Sopenharmony_ci    * previous stages. This is done via nir_lower_clip_fs() so we only set up
1095bf215546Sopenharmony_ci    * the ucp enable mask for that stage.
1096bf215546Sopenharmony_ci    */
1097bf215546Sopenharmony_ci   key->ucp_enables = ucp_enables;
1098bf215546Sopenharmony_ci
1099bf215546Sopenharmony_ci   key->robust_buffer_access = robust_buffer_access;
1100bf215546Sopenharmony_ci
1101bf215546Sopenharmony_ci   key->environment = V3D_ENVIRONMENT_VULKAN;
1102bf215546Sopenharmony_ci}
1103bf215546Sopenharmony_ci
1104bf215546Sopenharmony_ci/* FIXME: anv maps to hw primitive type. Perhaps eventually we would do the
1105bf215546Sopenharmony_ci * same. For not using prim_mode that is the one already used on v3d
1106bf215546Sopenharmony_ci */
1107bf215546Sopenharmony_cistatic const enum pipe_prim_type vk_to_pipe_prim_type[] = {
1108bf215546Sopenharmony_ci   [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = PIPE_PRIM_POINTS,
1109bf215546Sopenharmony_ci   [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = PIPE_PRIM_LINES,
1110bf215546Sopenharmony_ci   [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = PIPE_PRIM_LINE_STRIP,
1111bf215546Sopenharmony_ci   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = PIPE_PRIM_TRIANGLES,
1112bf215546Sopenharmony_ci   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = PIPE_PRIM_TRIANGLE_STRIP,
1113bf215546Sopenharmony_ci   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = PIPE_PRIM_TRIANGLE_FAN,
1114bf215546Sopenharmony_ci   [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = PIPE_PRIM_LINES_ADJACENCY,
1115bf215546Sopenharmony_ci   [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = PIPE_PRIM_LINE_STRIP_ADJACENCY,
1116bf215546Sopenharmony_ci   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = PIPE_PRIM_TRIANGLES_ADJACENCY,
1117bf215546Sopenharmony_ci   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY,
1118bf215546Sopenharmony_ci};
1119bf215546Sopenharmony_ci
1120bf215546Sopenharmony_cistatic const enum pipe_logicop vk_to_pipe_logicop[] = {
1121bf215546Sopenharmony_ci   [VK_LOGIC_OP_CLEAR] = PIPE_LOGICOP_CLEAR,
1122bf215546Sopenharmony_ci   [VK_LOGIC_OP_AND] = PIPE_LOGICOP_AND,
1123bf215546Sopenharmony_ci   [VK_LOGIC_OP_AND_REVERSE] = PIPE_LOGICOP_AND_REVERSE,
1124bf215546Sopenharmony_ci   [VK_LOGIC_OP_COPY] = PIPE_LOGICOP_COPY,
1125bf215546Sopenharmony_ci   [VK_LOGIC_OP_AND_INVERTED] = PIPE_LOGICOP_AND_INVERTED,
1126bf215546Sopenharmony_ci   [VK_LOGIC_OP_NO_OP] = PIPE_LOGICOP_NOOP,
1127bf215546Sopenharmony_ci   [VK_LOGIC_OP_XOR] = PIPE_LOGICOP_XOR,
1128bf215546Sopenharmony_ci   [VK_LOGIC_OP_OR] = PIPE_LOGICOP_OR,
1129bf215546Sopenharmony_ci   [VK_LOGIC_OP_NOR] = PIPE_LOGICOP_NOR,
1130bf215546Sopenharmony_ci   [VK_LOGIC_OP_EQUIVALENT] = PIPE_LOGICOP_EQUIV,
1131bf215546Sopenharmony_ci   [VK_LOGIC_OP_INVERT] = PIPE_LOGICOP_INVERT,
1132bf215546Sopenharmony_ci   [VK_LOGIC_OP_OR_REVERSE] = PIPE_LOGICOP_OR_REVERSE,
1133bf215546Sopenharmony_ci   [VK_LOGIC_OP_COPY_INVERTED] = PIPE_LOGICOP_COPY_INVERTED,
1134bf215546Sopenharmony_ci   [VK_LOGIC_OP_OR_INVERTED] = PIPE_LOGICOP_OR_INVERTED,
1135bf215546Sopenharmony_ci   [VK_LOGIC_OP_NAND] = PIPE_LOGICOP_NAND,
1136bf215546Sopenharmony_ci   [VK_LOGIC_OP_SET] = PIPE_LOGICOP_SET,
1137bf215546Sopenharmony_ci};
1138bf215546Sopenharmony_ci
1139bf215546Sopenharmony_cistatic void
1140bf215546Sopenharmony_cipipeline_populate_v3d_fs_key(struct v3d_fs_key *key,
1141bf215546Sopenharmony_ci                             const VkGraphicsPipelineCreateInfo *pCreateInfo,
1142bf215546Sopenharmony_ci                             const struct v3dv_pipeline_stage *p_stage,
1143bf215546Sopenharmony_ci                             bool has_geometry_shader,
1144bf215546Sopenharmony_ci                             uint32_t ucp_enables)
1145bf215546Sopenharmony_ci{
1146bf215546Sopenharmony_ci   assert(p_stage->stage == BROADCOM_SHADER_FRAGMENT);
1147bf215546Sopenharmony_ci
1148bf215546Sopenharmony_ci   memset(key, 0, sizeof(*key));
1149bf215546Sopenharmony_ci
1150bf215546Sopenharmony_ci   const bool rba = p_stage->pipeline->device->features.robustBufferAccess;
1151bf215546Sopenharmony_ci   pipeline_populate_v3d_key(&key->base, p_stage, ucp_enables, rba);
1152bf215546Sopenharmony_ci
1153bf215546Sopenharmony_ci   const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1154bf215546Sopenharmony_ci      pCreateInfo->pInputAssemblyState;
1155bf215546Sopenharmony_ci   uint8_t topology = vk_to_pipe_prim_type[ia_info->topology];
1156bf215546Sopenharmony_ci
1157bf215546Sopenharmony_ci   key->is_points = (topology == PIPE_PRIM_POINTS);
1158bf215546Sopenharmony_ci   key->is_lines = (topology >= PIPE_PRIM_LINES &&
1159bf215546Sopenharmony_ci                    topology <= PIPE_PRIM_LINE_STRIP);
1160bf215546Sopenharmony_ci   key->has_gs = has_geometry_shader;
1161bf215546Sopenharmony_ci
1162bf215546Sopenharmony_ci   const VkPipelineColorBlendStateCreateInfo *cb_info =
1163bf215546Sopenharmony_ci      !pCreateInfo->pRasterizationState->rasterizerDiscardEnable ?
1164bf215546Sopenharmony_ci      pCreateInfo->pColorBlendState : NULL;
1165bf215546Sopenharmony_ci
1166bf215546Sopenharmony_ci   key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ?
1167bf215546Sopenharmony_ci                       vk_to_pipe_logicop[cb_info->logicOp] :
1168bf215546Sopenharmony_ci                       PIPE_LOGICOP_COPY;
1169bf215546Sopenharmony_ci
1170bf215546Sopenharmony_ci   const bool raster_enabled =
1171bf215546Sopenharmony_ci      !pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
1172bf215546Sopenharmony_ci
1173bf215546Sopenharmony_ci   /* Multisample rasterization state must be ignored if rasterization
1174bf215546Sopenharmony_ci    * is disabled.
1175bf215546Sopenharmony_ci    */
1176bf215546Sopenharmony_ci   const VkPipelineMultisampleStateCreateInfo *ms_info =
1177bf215546Sopenharmony_ci      raster_enabled ? pCreateInfo->pMultisampleState : NULL;
1178bf215546Sopenharmony_ci   if (ms_info) {
1179bf215546Sopenharmony_ci      assert(ms_info->rasterizationSamples == VK_SAMPLE_COUNT_1_BIT ||
1180bf215546Sopenharmony_ci             ms_info->rasterizationSamples == VK_SAMPLE_COUNT_4_BIT);
1181bf215546Sopenharmony_ci      key->msaa = ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT;
1182bf215546Sopenharmony_ci
1183bf215546Sopenharmony_ci      if (key->msaa) {
1184bf215546Sopenharmony_ci         key->sample_coverage =
1185bf215546Sopenharmony_ci            p_stage->pipeline->sample_mask != (1 << V3D_MAX_SAMPLES) - 1;
1186bf215546Sopenharmony_ci         key->sample_alpha_to_coverage = ms_info->alphaToCoverageEnable;
1187bf215546Sopenharmony_ci         key->sample_alpha_to_one = ms_info->alphaToOneEnable;
1188bf215546Sopenharmony_ci      }
1189bf215546Sopenharmony_ci   }
1190bf215546Sopenharmony_ci
1191bf215546Sopenharmony_ci   /* This is intended for V3D versions before 4.1, otherwise we just use the
1192bf215546Sopenharmony_ci    * tile buffer load/store swap R/B bit.
1193bf215546Sopenharmony_ci    */
1194bf215546Sopenharmony_ci   key->swap_color_rb = 0;
1195bf215546Sopenharmony_ci
1196bf215546Sopenharmony_ci   const struct v3dv_render_pass *pass =
1197bf215546Sopenharmony_ci      v3dv_render_pass_from_handle(pCreateInfo->renderPass);
1198bf215546Sopenharmony_ci   const struct v3dv_subpass *subpass = p_stage->pipeline->subpass;
1199bf215546Sopenharmony_ci   for (uint32_t i = 0; i < subpass->color_count; i++) {
1200bf215546Sopenharmony_ci      const uint32_t att_idx = subpass->color_attachments[i].attachment;
1201bf215546Sopenharmony_ci      if (att_idx == VK_ATTACHMENT_UNUSED)
1202bf215546Sopenharmony_ci         continue;
1203bf215546Sopenharmony_ci
1204bf215546Sopenharmony_ci      key->cbufs |= 1 << i;
1205bf215546Sopenharmony_ci
1206bf215546Sopenharmony_ci      VkFormat fb_format = pass->attachments[att_idx].desc.format;
1207bf215546Sopenharmony_ci      enum pipe_format fb_pipe_format = vk_format_to_pipe_format(fb_format);
1208bf215546Sopenharmony_ci
1209bf215546Sopenharmony_ci      /* If logic operations are enabled then we might emit color reads and we
1210bf215546Sopenharmony_ci       * need to know the color buffer format and swizzle for that
1211bf215546Sopenharmony_ci       */
1212bf215546Sopenharmony_ci      if (key->logicop_func != PIPE_LOGICOP_COPY) {
1213bf215546Sopenharmony_ci         key->color_fmt[i].format = fb_pipe_format;
1214bf215546Sopenharmony_ci         memcpy(key->color_fmt[i].swizzle,
1215bf215546Sopenharmony_ci                v3dv_get_format_swizzle(p_stage->pipeline->device, fb_format),
1216bf215546Sopenharmony_ci                sizeof(key->color_fmt[i].swizzle));
1217bf215546Sopenharmony_ci      }
1218bf215546Sopenharmony_ci
1219bf215546Sopenharmony_ci      const struct util_format_description *desc =
1220bf215546Sopenharmony_ci         vk_format_description(fb_format);
1221bf215546Sopenharmony_ci
1222bf215546Sopenharmony_ci      if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT &&
1223bf215546Sopenharmony_ci          desc->channel[0].size == 32) {
1224bf215546Sopenharmony_ci         key->f32_color_rb |= 1 << i;
1225bf215546Sopenharmony_ci      }
1226bf215546Sopenharmony_ci
1227bf215546Sopenharmony_ci      if (p_stage->nir->info.fs.untyped_color_outputs) {
1228bf215546Sopenharmony_ci         if (util_format_is_pure_uint(fb_pipe_format))
1229bf215546Sopenharmony_ci            key->uint_color_rb |= 1 << i;
1230bf215546Sopenharmony_ci         else if (util_format_is_pure_sint(fb_pipe_format))
1231bf215546Sopenharmony_ci            key->int_color_rb |= 1 << i;
1232bf215546Sopenharmony_ci      }
1233bf215546Sopenharmony_ci
1234bf215546Sopenharmony_ci      if (key->is_points) {
1235bf215546Sopenharmony_ci         /* This mask represents state for GL_ARB_point_sprite which is not
1236bf215546Sopenharmony_ci          * relevant to Vulkan.
1237bf215546Sopenharmony_ci          */
1238bf215546Sopenharmony_ci         key->point_sprite_mask = 0;
1239bf215546Sopenharmony_ci
1240bf215546Sopenharmony_ci         /* Vulkan mandates upper left. */
1241bf215546Sopenharmony_ci         key->point_coord_upper_left = true;
1242bf215546Sopenharmony_ci      }
1243bf215546Sopenharmony_ci   }
1244bf215546Sopenharmony_ci}
1245bf215546Sopenharmony_ci
1246bf215546Sopenharmony_cistatic void
1247bf215546Sopenharmony_cisetup_stage_outputs_from_next_stage_inputs(
1248bf215546Sopenharmony_ci   uint8_t next_stage_num_inputs,
1249bf215546Sopenharmony_ci   struct v3d_varying_slot *next_stage_input_slots,
1250bf215546Sopenharmony_ci   uint8_t *num_used_outputs,
1251bf215546Sopenharmony_ci   struct v3d_varying_slot *used_output_slots,
1252bf215546Sopenharmony_ci   uint32_t size_of_used_output_slots)
1253bf215546Sopenharmony_ci{
1254bf215546Sopenharmony_ci   *num_used_outputs = next_stage_num_inputs;
1255bf215546Sopenharmony_ci   memcpy(used_output_slots, next_stage_input_slots, size_of_used_output_slots);
1256bf215546Sopenharmony_ci}
1257bf215546Sopenharmony_ci
1258bf215546Sopenharmony_cistatic void
1259bf215546Sopenharmony_cipipeline_populate_v3d_gs_key(struct v3d_gs_key *key,
1260bf215546Sopenharmony_ci                             const VkGraphicsPipelineCreateInfo *pCreateInfo,
1261bf215546Sopenharmony_ci                             const struct v3dv_pipeline_stage *p_stage)
1262bf215546Sopenharmony_ci{
1263bf215546Sopenharmony_ci   assert(p_stage->stage == BROADCOM_SHADER_GEOMETRY ||
1264bf215546Sopenharmony_ci          p_stage->stage == BROADCOM_SHADER_GEOMETRY_BIN);
1265bf215546Sopenharmony_ci
1266bf215546Sopenharmony_ci   memset(key, 0, sizeof(*key));
1267bf215546Sopenharmony_ci
1268bf215546Sopenharmony_ci   const bool rba = p_stage->pipeline->device->features.robustBufferAccess;
1269bf215546Sopenharmony_ci   pipeline_populate_v3d_key(&key->base, p_stage, 0, rba);
1270bf215546Sopenharmony_ci
1271bf215546Sopenharmony_ci   struct v3dv_pipeline *pipeline = p_stage->pipeline;
1272bf215546Sopenharmony_ci
1273bf215546Sopenharmony_ci   key->per_vertex_point_size =
1274bf215546Sopenharmony_ci      p_stage->nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ);
1275bf215546Sopenharmony_ci
1276bf215546Sopenharmony_ci   key->is_coord = broadcom_shader_stage_is_binning(p_stage->stage);
1277bf215546Sopenharmony_ci
1278bf215546Sopenharmony_ci   assert(key->base.is_last_geometry_stage);
1279bf215546Sopenharmony_ci   if (key->is_coord) {
1280bf215546Sopenharmony_ci      /* Output varyings in the last binning shader are only used for transform
1281bf215546Sopenharmony_ci       * feedback. Set to 0 as VK_EXT_transform_feedback is not supported.
1282bf215546Sopenharmony_ci       */
1283bf215546Sopenharmony_ci      key->num_used_outputs = 0;
1284bf215546Sopenharmony_ci   } else {
1285bf215546Sopenharmony_ci      struct v3dv_shader_variant *fs_variant =
1286bf215546Sopenharmony_ci         pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
1287bf215546Sopenharmony_ci
1288bf215546Sopenharmony_ci      STATIC_ASSERT(sizeof(key->used_outputs) ==
1289bf215546Sopenharmony_ci                    sizeof(fs_variant->prog_data.fs->input_slots));
1290bf215546Sopenharmony_ci
1291bf215546Sopenharmony_ci      setup_stage_outputs_from_next_stage_inputs(
1292bf215546Sopenharmony_ci         fs_variant->prog_data.fs->num_inputs,
1293bf215546Sopenharmony_ci         fs_variant->prog_data.fs->input_slots,
1294bf215546Sopenharmony_ci         &key->num_used_outputs,
1295bf215546Sopenharmony_ci         key->used_outputs,
1296bf215546Sopenharmony_ci         sizeof(key->used_outputs));
1297bf215546Sopenharmony_ci   }
1298bf215546Sopenharmony_ci}
1299bf215546Sopenharmony_ci
1300bf215546Sopenharmony_cistatic void
1301bf215546Sopenharmony_cipipeline_populate_v3d_vs_key(struct v3d_vs_key *key,
1302bf215546Sopenharmony_ci                             const VkGraphicsPipelineCreateInfo *pCreateInfo,
1303bf215546Sopenharmony_ci                             const struct v3dv_pipeline_stage *p_stage)
1304bf215546Sopenharmony_ci{
1305bf215546Sopenharmony_ci   assert(p_stage->stage == BROADCOM_SHADER_VERTEX ||
1306bf215546Sopenharmony_ci          p_stage->stage == BROADCOM_SHADER_VERTEX_BIN);
1307bf215546Sopenharmony_ci
1308bf215546Sopenharmony_ci   memset(key, 0, sizeof(*key));
1309bf215546Sopenharmony_ci
1310bf215546Sopenharmony_ci   const bool rba = p_stage->pipeline->device->features.robustBufferAccess;
1311bf215546Sopenharmony_ci   pipeline_populate_v3d_key(&key->base, p_stage, 0, rba);
1312bf215546Sopenharmony_ci
1313bf215546Sopenharmony_ci   struct v3dv_pipeline *pipeline = p_stage->pipeline;
1314bf215546Sopenharmony_ci
1315bf215546Sopenharmony_ci   /* Vulkan specifies a point size per vertex, so true for if the prim are
1316bf215546Sopenharmony_ci    * points, like on ES2)
1317bf215546Sopenharmony_ci    */
1318bf215546Sopenharmony_ci   const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1319bf215546Sopenharmony_ci      pCreateInfo->pInputAssemblyState;
1320bf215546Sopenharmony_ci   uint8_t topology = vk_to_pipe_prim_type[ia_info->topology];
1321bf215546Sopenharmony_ci
1322bf215546Sopenharmony_ci   /* FIXME: PRIM_POINTS is not enough, in gallium the full check is
1323bf215546Sopenharmony_ci    * PIPE_PRIM_POINTS && v3d->rasterizer->base.point_size_per_vertex */
1324bf215546Sopenharmony_ci   key->per_vertex_point_size = (topology == PIPE_PRIM_POINTS);
1325bf215546Sopenharmony_ci
1326bf215546Sopenharmony_ci   key->is_coord = broadcom_shader_stage_is_binning(p_stage->stage);
1327bf215546Sopenharmony_ci
1328bf215546Sopenharmony_ci   if (key->is_coord) { /* Binning VS*/
1329bf215546Sopenharmony_ci      if (key->base.is_last_geometry_stage) {
1330bf215546Sopenharmony_ci         /* Output varyings in the last binning shader are only used for
1331bf215546Sopenharmony_ci          * transform feedback. Set to 0 as VK_EXT_transform_feedback is not
1332bf215546Sopenharmony_ci          * supported.
1333bf215546Sopenharmony_ci          */
1334bf215546Sopenharmony_ci         key->num_used_outputs = 0;
1335bf215546Sopenharmony_ci      } else {
1336bf215546Sopenharmony_ci         /* Linking against GS binning program */
1337bf215546Sopenharmony_ci         assert(pipeline->gs);
1338bf215546Sopenharmony_ci         struct v3dv_shader_variant *gs_bin_variant =
1339bf215546Sopenharmony_ci            pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN];
1340bf215546Sopenharmony_ci
1341bf215546Sopenharmony_ci         STATIC_ASSERT(sizeof(key->used_outputs) ==
1342bf215546Sopenharmony_ci                       sizeof(gs_bin_variant->prog_data.gs->input_slots));
1343bf215546Sopenharmony_ci
1344bf215546Sopenharmony_ci         setup_stage_outputs_from_next_stage_inputs(
1345bf215546Sopenharmony_ci            gs_bin_variant->prog_data.gs->num_inputs,
1346bf215546Sopenharmony_ci            gs_bin_variant->prog_data.gs->input_slots,
1347bf215546Sopenharmony_ci            &key->num_used_outputs,
1348bf215546Sopenharmony_ci            key->used_outputs,
1349bf215546Sopenharmony_ci            sizeof(key->used_outputs));
1350bf215546Sopenharmony_ci      }
1351bf215546Sopenharmony_ci   } else { /* Render VS */
1352bf215546Sopenharmony_ci      if (pipeline->gs) {
1353bf215546Sopenharmony_ci         /* Linking against GS render program */
1354bf215546Sopenharmony_ci         struct v3dv_shader_variant *gs_variant =
1355bf215546Sopenharmony_ci            pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY];
1356bf215546Sopenharmony_ci
1357bf215546Sopenharmony_ci         STATIC_ASSERT(sizeof(key->used_outputs) ==
1358bf215546Sopenharmony_ci                       sizeof(gs_variant->prog_data.gs->input_slots));
1359bf215546Sopenharmony_ci
1360bf215546Sopenharmony_ci         setup_stage_outputs_from_next_stage_inputs(
1361bf215546Sopenharmony_ci            gs_variant->prog_data.gs->num_inputs,
1362bf215546Sopenharmony_ci            gs_variant->prog_data.gs->input_slots,
1363bf215546Sopenharmony_ci            &key->num_used_outputs,
1364bf215546Sopenharmony_ci            key->used_outputs,
1365bf215546Sopenharmony_ci            sizeof(key->used_outputs));
1366bf215546Sopenharmony_ci      } else {
1367bf215546Sopenharmony_ci         /* Linking against FS program */
1368bf215546Sopenharmony_ci         struct v3dv_shader_variant *fs_variant =
1369bf215546Sopenharmony_ci            pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
1370bf215546Sopenharmony_ci
1371bf215546Sopenharmony_ci         STATIC_ASSERT(sizeof(key->used_outputs) ==
1372bf215546Sopenharmony_ci                       sizeof(fs_variant->prog_data.fs->input_slots));
1373bf215546Sopenharmony_ci
1374bf215546Sopenharmony_ci         setup_stage_outputs_from_next_stage_inputs(
1375bf215546Sopenharmony_ci            fs_variant->prog_data.fs->num_inputs,
1376bf215546Sopenharmony_ci            fs_variant->prog_data.fs->input_slots,
1377bf215546Sopenharmony_ci            &key->num_used_outputs,
1378bf215546Sopenharmony_ci            key->used_outputs,
1379bf215546Sopenharmony_ci            sizeof(key->used_outputs));
1380bf215546Sopenharmony_ci      }
1381bf215546Sopenharmony_ci   }
1382bf215546Sopenharmony_ci
1383bf215546Sopenharmony_ci   const VkPipelineVertexInputStateCreateInfo *vi_info =
1384bf215546Sopenharmony_ci      pCreateInfo->pVertexInputState;
1385bf215546Sopenharmony_ci   for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
1386bf215546Sopenharmony_ci      const VkVertexInputAttributeDescription *desc =
1387bf215546Sopenharmony_ci         &vi_info->pVertexAttributeDescriptions[i];
1388bf215546Sopenharmony_ci      assert(desc->location < MAX_VERTEX_ATTRIBS);
1389bf215546Sopenharmony_ci      if (desc->format == VK_FORMAT_B8G8R8A8_UNORM)
1390bf215546Sopenharmony_ci         key->va_swap_rb_mask |= 1 << (VERT_ATTRIB_GENERIC0 + desc->location);
1391bf215546Sopenharmony_ci   }
1392bf215546Sopenharmony_ci}
1393bf215546Sopenharmony_ci
1394bf215546Sopenharmony_ci/**
1395bf215546Sopenharmony_ci * Creates the initial form of the pipeline stage for a binning shader by
1396bf215546Sopenharmony_ci * cloning the render shader and flagging it as a coordinate shader.
1397bf215546Sopenharmony_ci *
1398bf215546Sopenharmony_ci * Returns NULL if it was not able to allocate the object, so it should be
1399bf215546Sopenharmony_ci * handled as a VK_ERROR_OUT_OF_HOST_MEMORY error.
1400bf215546Sopenharmony_ci */
1401bf215546Sopenharmony_cistatic struct v3dv_pipeline_stage *
1402bf215546Sopenharmony_cipipeline_stage_create_binning(const struct v3dv_pipeline_stage *src,
1403bf215546Sopenharmony_ci                              const VkAllocationCallbacks *pAllocator)
1404bf215546Sopenharmony_ci{
1405bf215546Sopenharmony_ci   struct v3dv_device *device = src->pipeline->device;
1406bf215546Sopenharmony_ci
1407bf215546Sopenharmony_ci   struct v3dv_pipeline_stage *p_stage =
1408bf215546Sopenharmony_ci      vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
1409bf215546Sopenharmony_ci                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1410bf215546Sopenharmony_ci
1411bf215546Sopenharmony_ci   if (p_stage == NULL)
1412bf215546Sopenharmony_ci      return NULL;
1413bf215546Sopenharmony_ci
1414bf215546Sopenharmony_ci   assert(src->stage == BROADCOM_SHADER_VERTEX ||
1415bf215546Sopenharmony_ci          src->stage == BROADCOM_SHADER_GEOMETRY);
1416bf215546Sopenharmony_ci
1417bf215546Sopenharmony_ci   enum broadcom_shader_stage bin_stage =
1418bf215546Sopenharmony_ci      src->stage == BROADCOM_SHADER_VERTEX ?
1419bf215546Sopenharmony_ci         BROADCOM_SHADER_VERTEX_BIN :
1420bf215546Sopenharmony_ci         BROADCOM_SHADER_GEOMETRY_BIN;
1421bf215546Sopenharmony_ci
1422bf215546Sopenharmony_ci   p_stage->pipeline = src->pipeline;
1423bf215546Sopenharmony_ci   p_stage->stage = bin_stage;
1424bf215546Sopenharmony_ci   p_stage->entrypoint = src->entrypoint;
1425bf215546Sopenharmony_ci   p_stage->module = src->module;
1426bf215546Sopenharmony_ci   /* For binning shaders we will clone the NIR code from the corresponding
1427bf215546Sopenharmony_ci    * render shader later, when we call pipeline_compile_xxx_shader. This way
1428bf215546Sopenharmony_ci    * we only have to run the relevant NIR lowerings once for render shaders
1429bf215546Sopenharmony_ci    */
1430bf215546Sopenharmony_ci   p_stage->nir = NULL;
1431bf215546Sopenharmony_ci   p_stage->spec_info = src->spec_info;
1432bf215546Sopenharmony_ci   p_stage->feedback = (VkPipelineCreationFeedback) { 0 };
1433bf215546Sopenharmony_ci   memcpy(p_stage->shader_sha1, src->shader_sha1, 20);
1434bf215546Sopenharmony_ci
1435bf215546Sopenharmony_ci   return p_stage;
1436bf215546Sopenharmony_ci}
1437bf215546Sopenharmony_ci
1438bf215546Sopenharmony_ci/**
1439bf215546Sopenharmony_ci * Returns false if it was not able to allocate or map the assembly bo memory.
1440bf215546Sopenharmony_ci */
1441bf215546Sopenharmony_cistatic bool
1442bf215546Sopenharmony_ciupload_assembly(struct v3dv_pipeline *pipeline)
1443bf215546Sopenharmony_ci{
1444bf215546Sopenharmony_ci   uint32_t total_size = 0;
1445bf215546Sopenharmony_ci   for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
1446bf215546Sopenharmony_ci      struct v3dv_shader_variant *variant =
1447bf215546Sopenharmony_ci         pipeline->shared_data->variants[stage];
1448bf215546Sopenharmony_ci
1449bf215546Sopenharmony_ci      if (variant != NULL)
1450bf215546Sopenharmony_ci         total_size += variant->qpu_insts_size;
1451bf215546Sopenharmony_ci   }
1452bf215546Sopenharmony_ci
1453bf215546Sopenharmony_ci   struct v3dv_bo *bo = v3dv_bo_alloc(pipeline->device, total_size,
1454bf215546Sopenharmony_ci                                      "pipeline shader assembly", true);
1455bf215546Sopenharmony_ci   if (!bo) {
1456bf215546Sopenharmony_ci      fprintf(stderr, "failed to allocate memory for shader\n");
1457bf215546Sopenharmony_ci      return false;
1458bf215546Sopenharmony_ci   }
1459bf215546Sopenharmony_ci
1460bf215546Sopenharmony_ci   bool ok = v3dv_bo_map(pipeline->device, bo, total_size);
1461bf215546Sopenharmony_ci   if (!ok) {
1462bf215546Sopenharmony_ci      fprintf(stderr, "failed to map source shader buffer\n");
1463bf215546Sopenharmony_ci      return false;
1464bf215546Sopenharmony_ci   }
1465bf215546Sopenharmony_ci
1466bf215546Sopenharmony_ci   uint32_t offset = 0;
1467bf215546Sopenharmony_ci   for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
1468bf215546Sopenharmony_ci      struct v3dv_shader_variant *variant =
1469bf215546Sopenharmony_ci         pipeline->shared_data->variants[stage];
1470bf215546Sopenharmony_ci
1471bf215546Sopenharmony_ci      if (variant != NULL) {
1472bf215546Sopenharmony_ci         variant->assembly_offset = offset;
1473bf215546Sopenharmony_ci
1474bf215546Sopenharmony_ci         memcpy(bo->map + offset, variant->qpu_insts, variant->qpu_insts_size);
1475bf215546Sopenharmony_ci         offset += variant->qpu_insts_size;
1476bf215546Sopenharmony_ci
1477bf215546Sopenharmony_ci         /* We dont need qpu_insts anymore. */
1478bf215546Sopenharmony_ci         free(variant->qpu_insts);
1479bf215546Sopenharmony_ci         variant->qpu_insts = NULL;
1480bf215546Sopenharmony_ci      }
1481bf215546Sopenharmony_ci   }
1482bf215546Sopenharmony_ci   assert(total_size == offset);
1483bf215546Sopenharmony_ci
1484bf215546Sopenharmony_ci   pipeline->shared_data->assembly_bo = bo;
1485bf215546Sopenharmony_ci
1486bf215546Sopenharmony_ci   return true;
1487bf215546Sopenharmony_ci}
1488bf215546Sopenharmony_ci
1489bf215546Sopenharmony_cistatic void
1490bf215546Sopenharmony_cipipeline_hash_graphics(const struct v3dv_pipeline *pipeline,
1491bf215546Sopenharmony_ci                       struct v3dv_pipeline_key *key,
1492bf215546Sopenharmony_ci                       unsigned char *sha1_out)
1493bf215546Sopenharmony_ci{
1494bf215546Sopenharmony_ci   struct mesa_sha1 ctx;
1495bf215546Sopenharmony_ci   _mesa_sha1_init(&ctx);
1496bf215546Sopenharmony_ci
1497bf215546Sopenharmony_ci   if (pipeline->layout) {
1498bf215546Sopenharmony_ci      _mesa_sha1_update(&ctx, &pipeline->layout->sha1,
1499bf215546Sopenharmony_ci                        sizeof(pipeline->layout->sha1));
1500bf215546Sopenharmony_ci   }
1501bf215546Sopenharmony_ci
1502bf215546Sopenharmony_ci   /* We need to include all shader stages in the sha1 key as linking may modify
1503bf215546Sopenharmony_ci    * the shader code in any stage. An alternative would be to use the
1504bf215546Sopenharmony_ci    * serialized NIR, but that seems like an overkill.
1505bf215546Sopenharmony_ci    */
1506bf215546Sopenharmony_ci   _mesa_sha1_update(&ctx, pipeline->vs->shader_sha1,
1507bf215546Sopenharmony_ci                     sizeof(pipeline->vs->shader_sha1));
1508bf215546Sopenharmony_ci
1509bf215546Sopenharmony_ci   if (pipeline->gs) {
1510bf215546Sopenharmony_ci      _mesa_sha1_update(&ctx, pipeline->gs->shader_sha1,
1511bf215546Sopenharmony_ci                        sizeof(pipeline->gs->shader_sha1));
1512bf215546Sopenharmony_ci   }
1513bf215546Sopenharmony_ci
1514bf215546Sopenharmony_ci   _mesa_sha1_update(&ctx, pipeline->fs->shader_sha1,
1515bf215546Sopenharmony_ci                     sizeof(pipeline->fs->shader_sha1));
1516bf215546Sopenharmony_ci
1517bf215546Sopenharmony_ci   _mesa_sha1_update(&ctx, key, sizeof(struct v3dv_pipeline_key));
1518bf215546Sopenharmony_ci
1519bf215546Sopenharmony_ci   _mesa_sha1_final(&ctx, sha1_out);
1520bf215546Sopenharmony_ci}
1521bf215546Sopenharmony_ci
1522bf215546Sopenharmony_cistatic void
1523bf215546Sopenharmony_cipipeline_hash_compute(const struct v3dv_pipeline *pipeline,
1524bf215546Sopenharmony_ci                      struct v3dv_pipeline_key *key,
1525bf215546Sopenharmony_ci                      unsigned char *sha1_out)
1526bf215546Sopenharmony_ci{
1527bf215546Sopenharmony_ci   struct mesa_sha1 ctx;
1528bf215546Sopenharmony_ci   _mesa_sha1_init(&ctx);
1529bf215546Sopenharmony_ci
1530bf215546Sopenharmony_ci   if (pipeline->layout) {
1531bf215546Sopenharmony_ci      _mesa_sha1_update(&ctx, &pipeline->layout->sha1,
1532bf215546Sopenharmony_ci                        sizeof(pipeline->layout->sha1));
1533bf215546Sopenharmony_ci   }
1534bf215546Sopenharmony_ci
1535bf215546Sopenharmony_ci   _mesa_sha1_update(&ctx, pipeline->cs->shader_sha1,
1536bf215546Sopenharmony_ci                     sizeof(pipeline->cs->shader_sha1));
1537bf215546Sopenharmony_ci
1538bf215546Sopenharmony_ci   _mesa_sha1_update(&ctx, key, sizeof(struct v3dv_pipeline_key));
1539bf215546Sopenharmony_ci
1540bf215546Sopenharmony_ci   _mesa_sha1_final(&ctx, sha1_out);
1541bf215546Sopenharmony_ci}
1542bf215546Sopenharmony_ci
1543bf215546Sopenharmony_ci/* Checks that the pipeline has enough spill size to use for any of their
1544bf215546Sopenharmony_ci * variants
1545bf215546Sopenharmony_ci */
1546bf215546Sopenharmony_cistatic void
1547bf215546Sopenharmony_cipipeline_check_spill_size(struct v3dv_pipeline *pipeline)
1548bf215546Sopenharmony_ci{
1549bf215546Sopenharmony_ci   uint32_t max_spill_size = 0;
1550bf215546Sopenharmony_ci
1551bf215546Sopenharmony_ci   for(uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
1552bf215546Sopenharmony_ci      struct v3dv_shader_variant *variant =
1553bf215546Sopenharmony_ci         pipeline->shared_data->variants[stage];
1554bf215546Sopenharmony_ci
1555bf215546Sopenharmony_ci      if (variant != NULL) {
1556bf215546Sopenharmony_ci         max_spill_size = MAX2(variant->prog_data.base->spill_size,
1557bf215546Sopenharmony_ci                               max_spill_size);
1558bf215546Sopenharmony_ci      }
1559bf215546Sopenharmony_ci   }
1560bf215546Sopenharmony_ci
1561bf215546Sopenharmony_ci   if (max_spill_size > 0) {
1562bf215546Sopenharmony_ci      struct v3dv_device *device = pipeline->device;
1563bf215546Sopenharmony_ci
1564bf215546Sopenharmony_ci      /* The TIDX register we use for choosing the area to access
1565bf215546Sopenharmony_ci       * for scratch space is: (core << 6) | (qpu << 2) | thread.
1566bf215546Sopenharmony_ci       * Even at minimum threadcount in a particular shader, that
1567bf215546Sopenharmony_ci       * means we still multiply by qpus by 4.
1568bf215546Sopenharmony_ci       */
1569bf215546Sopenharmony_ci      const uint32_t total_spill_size =
1570bf215546Sopenharmony_ci         4 * device->devinfo.qpu_count * max_spill_size;
1571bf215546Sopenharmony_ci      if (pipeline->spill.bo) {
1572bf215546Sopenharmony_ci         assert(pipeline->spill.size_per_thread > 0);
1573bf215546Sopenharmony_ci         v3dv_bo_free(device, pipeline->spill.bo);
1574bf215546Sopenharmony_ci      }
1575bf215546Sopenharmony_ci      pipeline->spill.bo =
1576bf215546Sopenharmony_ci         v3dv_bo_alloc(device, total_spill_size, "spill", true);
1577bf215546Sopenharmony_ci      pipeline->spill.size_per_thread = max_spill_size;
1578bf215546Sopenharmony_ci   }
1579bf215546Sopenharmony_ci}
1580bf215546Sopenharmony_ci
1581bf215546Sopenharmony_ci/**
1582bf215546Sopenharmony_ci * Creates a new shader_variant_create. Note that for prog_data is not const,
1583bf215546Sopenharmony_ci * so it is assumed that the caller will prove a pointer that the
1584bf215546Sopenharmony_ci * shader_variant will own.
1585bf215546Sopenharmony_ci *
1586bf215546Sopenharmony_ci * Creation doesn't include allocate a BO to store the content of qpu_insts,
1587bf215546Sopenharmony_ci * as we will try to share the same bo for several shader variants. Also note
1588bf215546Sopenharmony_ci * that qpu_ints being NULL is valid, for example if we are creating the
1589bf215546Sopenharmony_ci * shader_variants from the cache, so we can just upload the assembly of all
1590bf215546Sopenharmony_ci * the shader stages at once.
1591bf215546Sopenharmony_ci */
1592bf215546Sopenharmony_cistruct v3dv_shader_variant *
1593bf215546Sopenharmony_civ3dv_shader_variant_create(struct v3dv_device *device,
1594bf215546Sopenharmony_ci                           enum broadcom_shader_stage stage,
1595bf215546Sopenharmony_ci                           struct v3d_prog_data *prog_data,
1596bf215546Sopenharmony_ci                           uint32_t prog_data_size,
1597bf215546Sopenharmony_ci                           uint32_t assembly_offset,
1598bf215546Sopenharmony_ci                           uint64_t *qpu_insts,
1599bf215546Sopenharmony_ci                           uint32_t qpu_insts_size,
1600bf215546Sopenharmony_ci                           VkResult *out_vk_result)
1601bf215546Sopenharmony_ci{
1602bf215546Sopenharmony_ci   struct v3dv_shader_variant *variant =
1603bf215546Sopenharmony_ci      vk_zalloc(&device->vk.alloc, sizeof(*variant), 8,
1604bf215546Sopenharmony_ci                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1605bf215546Sopenharmony_ci
1606bf215546Sopenharmony_ci   if (variant == NULL) {
1607bf215546Sopenharmony_ci      *out_vk_result = VK_ERROR_OUT_OF_HOST_MEMORY;
1608bf215546Sopenharmony_ci      return NULL;
1609bf215546Sopenharmony_ci   }
1610bf215546Sopenharmony_ci
1611bf215546Sopenharmony_ci   variant->stage = stage;
1612bf215546Sopenharmony_ci   variant->prog_data_size = prog_data_size;
1613bf215546Sopenharmony_ci   variant->prog_data.base = prog_data;
1614bf215546Sopenharmony_ci
1615bf215546Sopenharmony_ci   variant->assembly_offset = assembly_offset;
1616bf215546Sopenharmony_ci   variant->qpu_insts_size = qpu_insts_size;
1617bf215546Sopenharmony_ci   variant->qpu_insts = qpu_insts;
1618bf215546Sopenharmony_ci
1619bf215546Sopenharmony_ci   *out_vk_result = VK_SUCCESS;
1620bf215546Sopenharmony_ci
1621bf215546Sopenharmony_ci   return variant;
1622bf215546Sopenharmony_ci}
1623bf215546Sopenharmony_ci
1624bf215546Sopenharmony_ci/* For a given key, it returns the compiled version of the shader.  Returns a
1625bf215546Sopenharmony_ci * new reference to the shader_variant to the caller, or NULL.
1626bf215546Sopenharmony_ci *
1627bf215546Sopenharmony_ci * If the method returns NULL it means that something wrong happened:
1628bf215546Sopenharmony_ci *   * Not enough memory: this is one of the possible outcomes defined by
1629bf215546Sopenharmony_ci *     vkCreateXXXPipelines. out_vk_result will return the proper oom error.
1630bf215546Sopenharmony_ci *   * Compilation error: hypothetically this shouldn't happen, as the spec
1631bf215546Sopenharmony_ci *     states that vkShaderModule needs to be created with a valid SPIR-V, so
1632bf215546Sopenharmony_ci *     any compilation failure is a driver bug. In the practice, something as
1633bf215546Sopenharmony_ci *     common as failing to register allocate can lead to a compilation
1634bf215546Sopenharmony_ci *     failure. In that case the only option (for any driver) is
1635bf215546Sopenharmony_ci *     VK_ERROR_UNKNOWN, even if we know that the problem was a compiler
1636bf215546Sopenharmony_ci *     error.
1637bf215546Sopenharmony_ci */
1638bf215546Sopenharmony_cistatic struct v3dv_shader_variant *
1639bf215546Sopenharmony_cipipeline_compile_shader_variant(struct v3dv_pipeline_stage *p_stage,
1640bf215546Sopenharmony_ci                                struct v3d_key *key,
1641bf215546Sopenharmony_ci                                size_t key_size,
1642bf215546Sopenharmony_ci                                const VkAllocationCallbacks *pAllocator,
1643bf215546Sopenharmony_ci                                VkResult *out_vk_result)
1644bf215546Sopenharmony_ci{
1645bf215546Sopenharmony_ci   int64_t stage_start = os_time_get_nano();
1646bf215546Sopenharmony_ci
1647bf215546Sopenharmony_ci   struct v3dv_pipeline *pipeline = p_stage->pipeline;
1648bf215546Sopenharmony_ci   struct v3dv_physical_device *physical_device =
1649bf215546Sopenharmony_ci      &pipeline->device->instance->physicalDevice;
1650bf215546Sopenharmony_ci   const struct v3d_compiler *compiler = physical_device->compiler;
1651bf215546Sopenharmony_ci
1652bf215546Sopenharmony_ci   if (unlikely(V3D_DEBUG & (V3D_DEBUG_NIR |
1653bf215546Sopenharmony_ci                             v3d_debug_flag_for_shader_stage
1654bf215546Sopenharmony_ci                             (broadcom_shader_stage_to_gl(p_stage->stage))))) {
1655bf215546Sopenharmony_ci      fprintf(stderr, "Just before v3d_compile: %s prog %d NIR:\n",
1656bf215546Sopenharmony_ci              broadcom_shader_stage_name(p_stage->stage),
1657bf215546Sopenharmony_ci              p_stage->program_id);
1658bf215546Sopenharmony_ci      nir_print_shader(p_stage->nir, stderr);
1659bf215546Sopenharmony_ci      fprintf(stderr, "\n");
1660bf215546Sopenharmony_ci   }
1661bf215546Sopenharmony_ci
1662bf215546Sopenharmony_ci   uint64_t *qpu_insts;
1663bf215546Sopenharmony_ci   uint32_t qpu_insts_size;
1664bf215546Sopenharmony_ci   struct v3d_prog_data *prog_data;
1665bf215546Sopenharmony_ci   uint32_t prog_data_size =
1666bf215546Sopenharmony_ci      v3d_prog_data_size(broadcom_shader_stage_to_gl(p_stage->stage));
1667bf215546Sopenharmony_ci
1668bf215546Sopenharmony_ci   qpu_insts = v3d_compile(compiler,
1669bf215546Sopenharmony_ci                           key, &prog_data,
1670bf215546Sopenharmony_ci                           p_stage->nir,
1671bf215546Sopenharmony_ci                           shader_debug_output, NULL,
1672bf215546Sopenharmony_ci                           p_stage->program_id, 0,
1673bf215546Sopenharmony_ci                           &qpu_insts_size);
1674bf215546Sopenharmony_ci
1675bf215546Sopenharmony_ci   struct v3dv_shader_variant *variant = NULL;
1676bf215546Sopenharmony_ci
1677bf215546Sopenharmony_ci   if (!qpu_insts) {
1678bf215546Sopenharmony_ci      fprintf(stderr, "Failed to compile %s prog %d NIR to VIR\n",
1679bf215546Sopenharmony_ci              gl_shader_stage_name(p_stage->stage),
1680bf215546Sopenharmony_ci              p_stage->program_id);
1681bf215546Sopenharmony_ci      *out_vk_result = VK_ERROR_UNKNOWN;
1682bf215546Sopenharmony_ci   } else {
1683bf215546Sopenharmony_ci      variant =
1684bf215546Sopenharmony_ci         v3dv_shader_variant_create(pipeline->device, p_stage->stage,
1685bf215546Sopenharmony_ci                                    prog_data, prog_data_size,
1686bf215546Sopenharmony_ci                                    0, /* assembly_offset, no final value yet */
1687bf215546Sopenharmony_ci                                    qpu_insts, qpu_insts_size,
1688bf215546Sopenharmony_ci                                    out_vk_result);
1689bf215546Sopenharmony_ci   }
1690bf215546Sopenharmony_ci   /* At this point we don't need anymore the nir shader, but we are freeing
1691bf215546Sopenharmony_ci    * all the temporary p_stage structs used during the pipeline creation when
1692bf215546Sopenharmony_ci    * we finish it, so let's not worry about freeing the nir here.
1693bf215546Sopenharmony_ci    */
1694bf215546Sopenharmony_ci
1695bf215546Sopenharmony_ci   p_stage->feedback.duration += os_time_get_nano() - stage_start;
1696bf215546Sopenharmony_ci
1697bf215546Sopenharmony_ci   return variant;
1698bf215546Sopenharmony_ci}
1699bf215546Sopenharmony_ci
1700bf215546Sopenharmony_cistatic void
1701bf215546Sopenharmony_cilink_shaders(nir_shader *producer, nir_shader *consumer)
1702bf215546Sopenharmony_ci{
1703bf215546Sopenharmony_ci   assert(producer);
1704bf215546Sopenharmony_ci   assert(consumer);
1705bf215546Sopenharmony_ci
1706bf215546Sopenharmony_ci   if (producer->options->lower_to_scalar) {
1707bf215546Sopenharmony_ci      NIR_PASS(_, producer, nir_lower_io_to_scalar_early, nir_var_shader_out);
1708bf215546Sopenharmony_ci      NIR_PASS(_, consumer, nir_lower_io_to_scalar_early, nir_var_shader_in);
1709bf215546Sopenharmony_ci   }
1710bf215546Sopenharmony_ci
1711bf215546Sopenharmony_ci   nir_lower_io_arrays_to_elements(producer, consumer);
1712bf215546Sopenharmony_ci
1713bf215546Sopenharmony_ci   nir_optimize(producer, false);
1714bf215546Sopenharmony_ci   nir_optimize(consumer, false);
1715bf215546Sopenharmony_ci
1716bf215546Sopenharmony_ci   if (nir_link_opt_varyings(producer, consumer))
1717bf215546Sopenharmony_ci      nir_optimize(consumer, false);
1718bf215546Sopenharmony_ci
1719bf215546Sopenharmony_ci   NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
1720bf215546Sopenharmony_ci   NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
1721bf215546Sopenharmony_ci
1722bf215546Sopenharmony_ci   if (nir_remove_unused_varyings(producer, consumer)) {
1723bf215546Sopenharmony_ci      NIR_PASS(_, producer, nir_lower_global_vars_to_local);
1724bf215546Sopenharmony_ci      NIR_PASS(_, consumer, nir_lower_global_vars_to_local);
1725bf215546Sopenharmony_ci
1726bf215546Sopenharmony_ci      nir_optimize(producer, false);
1727bf215546Sopenharmony_ci      nir_optimize(consumer, false);
1728bf215546Sopenharmony_ci
1729bf215546Sopenharmony_ci      /* Optimizations can cause varyings to become unused.
1730bf215546Sopenharmony_ci       * nir_compact_varyings() depends on all dead varyings being removed so
1731bf215546Sopenharmony_ci       * we need to call nir_remove_dead_variables() again here.
1732bf215546Sopenharmony_ci       */
1733bf215546Sopenharmony_ci      NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
1734bf215546Sopenharmony_ci      NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
1735bf215546Sopenharmony_ci   }
1736bf215546Sopenharmony_ci}
1737bf215546Sopenharmony_ci
1738bf215546Sopenharmony_cistatic void
1739bf215546Sopenharmony_cipipeline_lower_nir(struct v3dv_pipeline *pipeline,
1740bf215546Sopenharmony_ci                   struct v3dv_pipeline_stage *p_stage,
1741bf215546Sopenharmony_ci                   struct v3dv_pipeline_layout *layout)
1742bf215546Sopenharmony_ci{
1743bf215546Sopenharmony_ci   int64_t stage_start = os_time_get_nano();
1744bf215546Sopenharmony_ci
1745bf215546Sopenharmony_ci   assert(pipeline->shared_data &&
1746bf215546Sopenharmony_ci          pipeline->shared_data->maps[p_stage->stage]);
1747bf215546Sopenharmony_ci
1748bf215546Sopenharmony_ci   nir_shader_gather_info(p_stage->nir, nir_shader_get_entrypoint(p_stage->nir));
1749bf215546Sopenharmony_ci
1750bf215546Sopenharmony_ci   /* We add this because we need a valid sampler for nir_lower_tex to do
1751bf215546Sopenharmony_ci    * unpacking of the texture operation result, even for the case where there
1752bf215546Sopenharmony_ci    * is no sampler state.
1753bf215546Sopenharmony_ci    *
1754bf215546Sopenharmony_ci    * We add two of those, one for the case we need a 16bit return_size, and
1755bf215546Sopenharmony_ci    * another for the case we need a 32bit return size.
1756bf215546Sopenharmony_ci    */
1757bf215546Sopenharmony_ci   struct v3dv_descriptor_maps *maps =
1758bf215546Sopenharmony_ci      pipeline->shared_data->maps[p_stage->stage];
1759bf215546Sopenharmony_ci
1760bf215546Sopenharmony_ci   UNUSED unsigned index;
1761bf215546Sopenharmony_ci   index = descriptor_map_add(&maps->sampler_map, -1, -1, -1, 0, 0, 16);
1762bf215546Sopenharmony_ci   assert(index == V3DV_NO_SAMPLER_16BIT_IDX);
1763bf215546Sopenharmony_ci
1764bf215546Sopenharmony_ci   index = descriptor_map_add(&maps->sampler_map, -2, -2, -2, 0, 0, 32);
1765bf215546Sopenharmony_ci   assert(index == V3DV_NO_SAMPLER_32BIT_IDX);
1766bf215546Sopenharmony_ci
1767bf215546Sopenharmony_ci   /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
1768bf215546Sopenharmony_ci   bool needs_default_sampler_state = false;
1769bf215546Sopenharmony_ci   NIR_PASS(_, p_stage->nir, lower_pipeline_layout_info, pipeline, layout,
1770bf215546Sopenharmony_ci            &needs_default_sampler_state);
1771bf215546Sopenharmony_ci
1772bf215546Sopenharmony_ci   /* If in the end we didn't need to use the default sampler states and the
1773bf215546Sopenharmony_ci    * shader doesn't need any other samplers, get rid of them so we can
1774bf215546Sopenharmony_ci    * recognize that this program doesn't use any samplers at all.
1775bf215546Sopenharmony_ci    */
1776bf215546Sopenharmony_ci   if (!needs_default_sampler_state && maps->sampler_map.num_desc == 2)
1777bf215546Sopenharmony_ci      maps->sampler_map.num_desc = 0;
1778bf215546Sopenharmony_ci
1779bf215546Sopenharmony_ci   p_stage->feedback.duration += os_time_get_nano() - stage_start;
1780bf215546Sopenharmony_ci}
1781bf215546Sopenharmony_ci
1782bf215546Sopenharmony_ci/**
1783bf215546Sopenharmony_ci * The SPIR-V compiler will insert a sized compact array for
1784bf215546Sopenharmony_ci * VARYING_SLOT_CLIP_DIST0 if the vertex shader writes to gl_ClipDistance[],
1785bf215546Sopenharmony_ci * where the size of the array determines the number of active clip planes.
1786bf215546Sopenharmony_ci */
1787bf215546Sopenharmony_cistatic uint32_t
1788bf215546Sopenharmony_ciget_ucp_enable_mask(struct v3dv_pipeline_stage *p_stage)
1789bf215546Sopenharmony_ci{
1790bf215546Sopenharmony_ci   assert(p_stage->stage == BROADCOM_SHADER_VERTEX);
1791bf215546Sopenharmony_ci   const nir_shader *shader = p_stage->nir;
1792bf215546Sopenharmony_ci   assert(shader);
1793bf215546Sopenharmony_ci
1794bf215546Sopenharmony_ci   nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
1795bf215546Sopenharmony_ci      if (var->data.location == VARYING_SLOT_CLIP_DIST0) {
1796bf215546Sopenharmony_ci         assert(var->data.compact);
1797bf215546Sopenharmony_ci         return (1 << glsl_get_length(var->type)) - 1;
1798bf215546Sopenharmony_ci      }
1799bf215546Sopenharmony_ci   }
1800bf215546Sopenharmony_ci   return 0;
1801bf215546Sopenharmony_ci}
1802bf215546Sopenharmony_ci
1803bf215546Sopenharmony_cistatic nir_shader *
1804bf215546Sopenharmony_cipipeline_stage_get_nir(struct v3dv_pipeline_stage *p_stage,
1805bf215546Sopenharmony_ci                       struct v3dv_pipeline *pipeline,
1806bf215546Sopenharmony_ci                       struct v3dv_pipeline_cache *cache)
1807bf215546Sopenharmony_ci{
1808bf215546Sopenharmony_ci   int64_t stage_start = os_time_get_nano();
1809bf215546Sopenharmony_ci
1810bf215546Sopenharmony_ci   nir_shader *nir = NULL;
1811bf215546Sopenharmony_ci
1812bf215546Sopenharmony_ci   nir = v3dv_pipeline_cache_search_for_nir(pipeline, cache,
1813bf215546Sopenharmony_ci                                            &v3dv_nir_options,
1814bf215546Sopenharmony_ci                                            p_stage->shader_sha1);
1815bf215546Sopenharmony_ci
1816bf215546Sopenharmony_ci   if (nir) {
1817bf215546Sopenharmony_ci      assert(nir->info.stage == broadcom_shader_stage_to_gl(p_stage->stage));
1818bf215546Sopenharmony_ci
1819bf215546Sopenharmony_ci      /* A NIR cach hit doesn't avoid the large majority of pipeline stage
1820bf215546Sopenharmony_ci       * creation so the cache hit is not recorded in the pipeline feedback
1821bf215546Sopenharmony_ci       * flags
1822bf215546Sopenharmony_ci       */
1823bf215546Sopenharmony_ci
1824bf215546Sopenharmony_ci      p_stage->feedback.duration += os_time_get_nano() - stage_start;
1825bf215546Sopenharmony_ci
1826bf215546Sopenharmony_ci      return nir;
1827bf215546Sopenharmony_ci   }
1828bf215546Sopenharmony_ci
1829bf215546Sopenharmony_ci   nir = shader_module_compile_to_nir(pipeline->device, p_stage);
1830bf215546Sopenharmony_ci
1831bf215546Sopenharmony_ci   if (nir) {
1832bf215546Sopenharmony_ci      struct v3dv_pipeline_cache *default_cache =
1833bf215546Sopenharmony_ci         &pipeline->device->default_pipeline_cache;
1834bf215546Sopenharmony_ci
1835bf215546Sopenharmony_ci      v3dv_pipeline_cache_upload_nir(pipeline, cache, nir,
1836bf215546Sopenharmony_ci                                     p_stage->shader_sha1);
1837bf215546Sopenharmony_ci
1838bf215546Sopenharmony_ci      /* Ensure that the variant is on the default cache, as cmd_buffer could
1839bf215546Sopenharmony_ci       * need to change the current variant
1840bf215546Sopenharmony_ci       */
1841bf215546Sopenharmony_ci      if (default_cache != cache) {
1842bf215546Sopenharmony_ci         v3dv_pipeline_cache_upload_nir(pipeline, default_cache, nir,
1843bf215546Sopenharmony_ci                                        p_stage->shader_sha1);
1844bf215546Sopenharmony_ci      }
1845bf215546Sopenharmony_ci
1846bf215546Sopenharmony_ci      p_stage->feedback.duration += os_time_get_nano() - stage_start;
1847bf215546Sopenharmony_ci
1848bf215546Sopenharmony_ci      return nir;
1849bf215546Sopenharmony_ci   }
1850bf215546Sopenharmony_ci
1851bf215546Sopenharmony_ci   /* FIXME: this shouldn't happen, raise error? */
1852bf215546Sopenharmony_ci   return NULL;
1853bf215546Sopenharmony_ci}
1854bf215546Sopenharmony_ci
1855bf215546Sopenharmony_cistatic VkResult
1856bf215546Sopenharmony_cipipeline_compile_vertex_shader(struct v3dv_pipeline *pipeline,
1857bf215546Sopenharmony_ci                               const VkAllocationCallbacks *pAllocator,
1858bf215546Sopenharmony_ci                               const VkGraphicsPipelineCreateInfo *pCreateInfo)
1859bf215546Sopenharmony_ci{
1860bf215546Sopenharmony_ci   assert(pipeline->vs_bin != NULL);
1861bf215546Sopenharmony_ci   if (pipeline->vs_bin->nir == NULL) {
1862bf215546Sopenharmony_ci      assert(pipeline->vs->nir);
1863bf215546Sopenharmony_ci      pipeline->vs_bin->nir = nir_shader_clone(NULL, pipeline->vs->nir);
1864bf215546Sopenharmony_ci   }
1865bf215546Sopenharmony_ci
1866bf215546Sopenharmony_ci   VkResult vk_result;
1867bf215546Sopenharmony_ci   struct v3d_vs_key key;
1868bf215546Sopenharmony_ci   pipeline_populate_v3d_vs_key(&key, pCreateInfo, pipeline->vs);
1869bf215546Sopenharmony_ci   pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX] =
1870bf215546Sopenharmony_ci      pipeline_compile_shader_variant(pipeline->vs, &key.base, sizeof(key),
1871bf215546Sopenharmony_ci                                      pAllocator, &vk_result);
1872bf215546Sopenharmony_ci   if (vk_result != VK_SUCCESS)
1873bf215546Sopenharmony_ci      return vk_result;
1874bf215546Sopenharmony_ci
1875bf215546Sopenharmony_ci   pipeline_populate_v3d_vs_key(&key, pCreateInfo, pipeline->vs_bin);
1876bf215546Sopenharmony_ci   pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN] =
1877bf215546Sopenharmony_ci      pipeline_compile_shader_variant(pipeline->vs_bin, &key.base, sizeof(key),
1878bf215546Sopenharmony_ci                                      pAllocator, &vk_result);
1879bf215546Sopenharmony_ci
1880bf215546Sopenharmony_ci   return vk_result;
1881bf215546Sopenharmony_ci}
1882bf215546Sopenharmony_ci
1883bf215546Sopenharmony_cistatic VkResult
1884bf215546Sopenharmony_cipipeline_compile_geometry_shader(struct v3dv_pipeline *pipeline,
1885bf215546Sopenharmony_ci                                 const VkAllocationCallbacks *pAllocator,
1886bf215546Sopenharmony_ci                                 const VkGraphicsPipelineCreateInfo *pCreateInfo)
1887bf215546Sopenharmony_ci{
1888bf215546Sopenharmony_ci   assert(pipeline->gs);
1889bf215546Sopenharmony_ci
1890bf215546Sopenharmony_ci   assert(pipeline->gs_bin != NULL);
1891bf215546Sopenharmony_ci   if (pipeline->gs_bin->nir == NULL) {
1892bf215546Sopenharmony_ci      assert(pipeline->gs->nir);
1893bf215546Sopenharmony_ci      pipeline->gs_bin->nir = nir_shader_clone(NULL, pipeline->gs->nir);
1894bf215546Sopenharmony_ci   }
1895bf215546Sopenharmony_ci
1896bf215546Sopenharmony_ci   VkResult vk_result;
1897bf215546Sopenharmony_ci   struct v3d_gs_key key;
1898bf215546Sopenharmony_ci   pipeline_populate_v3d_gs_key(&key, pCreateInfo, pipeline->gs);
1899bf215546Sopenharmony_ci   pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY] =
1900bf215546Sopenharmony_ci      pipeline_compile_shader_variant(pipeline->gs, &key.base, sizeof(key),
1901bf215546Sopenharmony_ci                                      pAllocator, &vk_result);
1902bf215546Sopenharmony_ci   if (vk_result != VK_SUCCESS)
1903bf215546Sopenharmony_ci      return vk_result;
1904bf215546Sopenharmony_ci
1905bf215546Sopenharmony_ci   pipeline_populate_v3d_gs_key(&key, pCreateInfo, pipeline->gs_bin);
1906bf215546Sopenharmony_ci   pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN] =
1907bf215546Sopenharmony_ci      pipeline_compile_shader_variant(pipeline->gs_bin, &key.base, sizeof(key),
1908bf215546Sopenharmony_ci                                      pAllocator, &vk_result);
1909bf215546Sopenharmony_ci
1910bf215546Sopenharmony_ci   return vk_result;
1911bf215546Sopenharmony_ci}
1912bf215546Sopenharmony_ci
1913bf215546Sopenharmony_cistatic VkResult
1914bf215546Sopenharmony_cipipeline_compile_fragment_shader(struct v3dv_pipeline *pipeline,
1915bf215546Sopenharmony_ci                                 const VkAllocationCallbacks *pAllocator,
1916bf215546Sopenharmony_ci                                 const VkGraphicsPipelineCreateInfo *pCreateInfo)
1917bf215546Sopenharmony_ci{
1918bf215546Sopenharmony_ci   struct v3dv_pipeline_stage *p_stage = pipeline->vs;
1919bf215546Sopenharmony_ci
1920bf215546Sopenharmony_ci   p_stage = pipeline->fs;
1921bf215546Sopenharmony_ci
1922bf215546Sopenharmony_ci   struct v3d_fs_key key;
1923bf215546Sopenharmony_ci
1924bf215546Sopenharmony_ci   pipeline_populate_v3d_fs_key(&key, pCreateInfo, p_stage,
1925bf215546Sopenharmony_ci                                pipeline->gs != NULL,
1926bf215546Sopenharmony_ci                                get_ucp_enable_mask(pipeline->vs));
1927bf215546Sopenharmony_ci
1928bf215546Sopenharmony_ci   VkResult vk_result;
1929bf215546Sopenharmony_ci   pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT] =
1930bf215546Sopenharmony_ci      pipeline_compile_shader_variant(p_stage, &key.base, sizeof(key),
1931bf215546Sopenharmony_ci                                      pAllocator, &vk_result);
1932bf215546Sopenharmony_ci
1933bf215546Sopenharmony_ci   return vk_result;
1934bf215546Sopenharmony_ci}
1935bf215546Sopenharmony_ci
1936bf215546Sopenharmony_cistatic void
1937bf215546Sopenharmony_cipipeline_populate_graphics_key(struct v3dv_pipeline *pipeline,
1938bf215546Sopenharmony_ci                               struct v3dv_pipeline_key *key,
1939bf215546Sopenharmony_ci                               const VkGraphicsPipelineCreateInfo *pCreateInfo)
1940bf215546Sopenharmony_ci{
1941bf215546Sopenharmony_ci   memset(key, 0, sizeof(*key));
1942bf215546Sopenharmony_ci   key->robust_buffer_access =
1943bf215546Sopenharmony_ci      pipeline->device->features.robustBufferAccess;
1944bf215546Sopenharmony_ci
1945bf215546Sopenharmony_ci   const bool raster_enabled =
1946bf215546Sopenharmony_ci      !pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
1947bf215546Sopenharmony_ci
1948bf215546Sopenharmony_ci   const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1949bf215546Sopenharmony_ci      pCreateInfo->pInputAssemblyState;
1950bf215546Sopenharmony_ci   key->topology = vk_to_pipe_prim_type[ia_info->topology];
1951bf215546Sopenharmony_ci
1952bf215546Sopenharmony_ci   const VkPipelineColorBlendStateCreateInfo *cb_info =
1953bf215546Sopenharmony_ci      raster_enabled ? pCreateInfo->pColorBlendState : NULL;
1954bf215546Sopenharmony_ci
1955bf215546Sopenharmony_ci   key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ?
1956bf215546Sopenharmony_ci      vk_to_pipe_logicop[cb_info->logicOp] :
1957bf215546Sopenharmony_ci      PIPE_LOGICOP_COPY;
1958bf215546Sopenharmony_ci
1959bf215546Sopenharmony_ci   /* Multisample rasterization state must be ignored if rasterization
1960bf215546Sopenharmony_ci    * is disabled.
1961bf215546Sopenharmony_ci    */
1962bf215546Sopenharmony_ci   const VkPipelineMultisampleStateCreateInfo *ms_info =
1963bf215546Sopenharmony_ci      raster_enabled ? pCreateInfo->pMultisampleState : NULL;
1964bf215546Sopenharmony_ci   if (ms_info) {
1965bf215546Sopenharmony_ci      assert(ms_info->rasterizationSamples == VK_SAMPLE_COUNT_1_BIT ||
1966bf215546Sopenharmony_ci             ms_info->rasterizationSamples == VK_SAMPLE_COUNT_4_BIT);
1967bf215546Sopenharmony_ci      key->msaa = ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT;
1968bf215546Sopenharmony_ci
1969bf215546Sopenharmony_ci      if (key->msaa) {
1970bf215546Sopenharmony_ci         key->sample_coverage =
1971bf215546Sopenharmony_ci            pipeline->sample_mask != (1 << V3D_MAX_SAMPLES) - 1;
1972bf215546Sopenharmony_ci         key->sample_alpha_to_coverage = ms_info->alphaToCoverageEnable;
1973bf215546Sopenharmony_ci         key->sample_alpha_to_one = ms_info->alphaToOneEnable;
1974bf215546Sopenharmony_ci      }
1975bf215546Sopenharmony_ci   }
1976bf215546Sopenharmony_ci
1977bf215546Sopenharmony_ci   const struct v3dv_render_pass *pass =
1978bf215546Sopenharmony_ci      v3dv_render_pass_from_handle(pCreateInfo->renderPass);
1979bf215546Sopenharmony_ci   const struct v3dv_subpass *subpass = pipeline->subpass;
1980bf215546Sopenharmony_ci   for (uint32_t i = 0; i < subpass->color_count; i++) {
1981bf215546Sopenharmony_ci      const uint32_t att_idx = subpass->color_attachments[i].attachment;
1982bf215546Sopenharmony_ci      if (att_idx == VK_ATTACHMENT_UNUSED)
1983bf215546Sopenharmony_ci         continue;
1984bf215546Sopenharmony_ci
1985bf215546Sopenharmony_ci      key->cbufs |= 1 << i;
1986bf215546Sopenharmony_ci
1987bf215546Sopenharmony_ci      VkFormat fb_format = pass->attachments[att_idx].desc.format;
1988bf215546Sopenharmony_ci      enum pipe_format fb_pipe_format = vk_format_to_pipe_format(fb_format);
1989bf215546Sopenharmony_ci
1990bf215546Sopenharmony_ci      /* If logic operations are enabled then we might emit color reads and we
1991bf215546Sopenharmony_ci       * need to know the color buffer format and swizzle for that
1992bf215546Sopenharmony_ci       */
1993bf215546Sopenharmony_ci      if (key->logicop_func != PIPE_LOGICOP_COPY) {
1994bf215546Sopenharmony_ci         key->color_fmt[i].format = fb_pipe_format;
1995bf215546Sopenharmony_ci         memcpy(key->color_fmt[i].swizzle,
1996bf215546Sopenharmony_ci                v3dv_get_format_swizzle(pipeline->device, fb_format),
1997bf215546Sopenharmony_ci                sizeof(key->color_fmt[i].swizzle));
1998bf215546Sopenharmony_ci      }
1999bf215546Sopenharmony_ci
2000bf215546Sopenharmony_ci      const struct util_format_description *desc =
2001bf215546Sopenharmony_ci         vk_format_description(fb_format);
2002bf215546Sopenharmony_ci
2003bf215546Sopenharmony_ci      if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT &&
2004bf215546Sopenharmony_ci          desc->channel[0].size == 32) {
2005bf215546Sopenharmony_ci         key->f32_color_rb |= 1 << i;
2006bf215546Sopenharmony_ci      }
2007bf215546Sopenharmony_ci   }
2008bf215546Sopenharmony_ci
2009bf215546Sopenharmony_ci   const VkPipelineVertexInputStateCreateInfo *vi_info =
2010bf215546Sopenharmony_ci      pCreateInfo->pVertexInputState;
2011bf215546Sopenharmony_ci   for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
2012bf215546Sopenharmony_ci      const VkVertexInputAttributeDescription *desc =
2013bf215546Sopenharmony_ci         &vi_info->pVertexAttributeDescriptions[i];
2014bf215546Sopenharmony_ci      assert(desc->location < MAX_VERTEX_ATTRIBS);
2015bf215546Sopenharmony_ci      if (desc->format == VK_FORMAT_B8G8R8A8_UNORM)
2016bf215546Sopenharmony_ci         key->va_swap_rb_mask |= 1 << (VERT_ATTRIB_GENERIC0 + desc->location);
2017bf215546Sopenharmony_ci   }
2018bf215546Sopenharmony_ci
2019bf215546Sopenharmony_ci   assert(pipeline->subpass);
2020bf215546Sopenharmony_ci   key->has_multiview = pipeline->subpass->view_mask != 0;
2021bf215546Sopenharmony_ci}
2022bf215546Sopenharmony_ci
2023bf215546Sopenharmony_cistatic void
2024bf215546Sopenharmony_cipipeline_populate_compute_key(struct v3dv_pipeline *pipeline,
2025bf215546Sopenharmony_ci                              struct v3dv_pipeline_key *key,
2026bf215546Sopenharmony_ci                              const VkComputePipelineCreateInfo *pCreateInfo)
2027bf215546Sopenharmony_ci{
2028bf215546Sopenharmony_ci   /* We use the same pipeline key for graphics and compute, but we don't need
2029bf215546Sopenharmony_ci    * to add a field to flag compute keys because this key is not used alone
2030bf215546Sopenharmony_ci    * to search in the cache, we also use the SPIR-V or the serialized NIR for
2031bf215546Sopenharmony_ci    * example, which already flags compute shaders.
2032bf215546Sopenharmony_ci    */
2033bf215546Sopenharmony_ci   memset(key, 0, sizeof(*key));
2034bf215546Sopenharmony_ci   key->robust_buffer_access =
2035bf215546Sopenharmony_ci      pipeline->device->features.robustBufferAccess;
2036bf215546Sopenharmony_ci}
2037bf215546Sopenharmony_ci
2038bf215546Sopenharmony_cistatic struct v3dv_pipeline_shared_data *
2039bf215546Sopenharmony_civ3dv_pipeline_shared_data_new_empty(const unsigned char sha1_key[20],
2040bf215546Sopenharmony_ci                                    struct v3dv_pipeline *pipeline,
2041bf215546Sopenharmony_ci                                    bool is_graphics_pipeline)
2042bf215546Sopenharmony_ci{
2043bf215546Sopenharmony_ci   /* We create new_entry using the device alloc. Right now shared_data is ref
2044bf215546Sopenharmony_ci    * and unref by both the pipeline and the pipeline cache, so we can't
2045bf215546Sopenharmony_ci    * ensure that the cache or pipeline alloc will be available on the last
2046bf215546Sopenharmony_ci    * unref.
2047bf215546Sopenharmony_ci    */
2048bf215546Sopenharmony_ci   struct v3dv_pipeline_shared_data *new_entry =
2049bf215546Sopenharmony_ci      vk_zalloc2(&pipeline->device->vk.alloc, NULL,
2050bf215546Sopenharmony_ci                 sizeof(struct v3dv_pipeline_shared_data), 8,
2051bf215546Sopenharmony_ci                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2052bf215546Sopenharmony_ci
2053bf215546Sopenharmony_ci   if (new_entry == NULL)
2054bf215546Sopenharmony_ci      return NULL;
2055bf215546Sopenharmony_ci
2056bf215546Sopenharmony_ci   for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
2057bf215546Sopenharmony_ci      /* We don't need specific descriptor maps for binning stages we use the
2058bf215546Sopenharmony_ci       * map for the render stage.
2059bf215546Sopenharmony_ci       */
2060bf215546Sopenharmony_ci      if (broadcom_shader_stage_is_binning(stage))
2061bf215546Sopenharmony_ci         continue;
2062bf215546Sopenharmony_ci
2063bf215546Sopenharmony_ci      if ((is_graphics_pipeline && stage == BROADCOM_SHADER_COMPUTE) ||
2064bf215546Sopenharmony_ci          (!is_graphics_pipeline && stage != BROADCOM_SHADER_COMPUTE)) {
2065bf215546Sopenharmony_ci         continue;
2066bf215546Sopenharmony_ci      }
2067bf215546Sopenharmony_ci
2068bf215546Sopenharmony_ci      if (stage == BROADCOM_SHADER_GEOMETRY && !pipeline->gs) {
2069bf215546Sopenharmony_ci         /* We always inject a custom GS if we have multiview */
2070bf215546Sopenharmony_ci         if (!pipeline->subpass->view_mask)
2071bf215546Sopenharmony_ci            continue;
2072bf215546Sopenharmony_ci      }
2073bf215546Sopenharmony_ci
2074bf215546Sopenharmony_ci      struct v3dv_descriptor_maps *new_maps =
2075bf215546Sopenharmony_ci         vk_zalloc2(&pipeline->device->vk.alloc, NULL,
2076bf215546Sopenharmony_ci                    sizeof(struct v3dv_descriptor_maps), 8,
2077bf215546Sopenharmony_ci                    VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2078bf215546Sopenharmony_ci
2079bf215546Sopenharmony_ci      if (new_maps == NULL)
2080bf215546Sopenharmony_ci         goto fail;
2081bf215546Sopenharmony_ci
2082bf215546Sopenharmony_ci      new_entry->maps[stage] = new_maps;
2083bf215546Sopenharmony_ci   }
2084bf215546Sopenharmony_ci
2085bf215546Sopenharmony_ci   new_entry->maps[BROADCOM_SHADER_VERTEX_BIN] =
2086bf215546Sopenharmony_ci      new_entry->maps[BROADCOM_SHADER_VERTEX];
2087bf215546Sopenharmony_ci
2088bf215546Sopenharmony_ci   new_entry->maps[BROADCOM_SHADER_GEOMETRY_BIN] =
2089bf215546Sopenharmony_ci      new_entry->maps[BROADCOM_SHADER_GEOMETRY];
2090bf215546Sopenharmony_ci
2091bf215546Sopenharmony_ci   new_entry->ref_cnt = 1;
2092bf215546Sopenharmony_ci   memcpy(new_entry->sha1_key, sha1_key, 20);
2093bf215546Sopenharmony_ci
2094bf215546Sopenharmony_ci   return new_entry;
2095bf215546Sopenharmony_ci
2096bf215546Sopenharmony_cifail:
2097bf215546Sopenharmony_ci   if (new_entry != NULL) {
2098bf215546Sopenharmony_ci      for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
2099bf215546Sopenharmony_ci         if (new_entry->maps[stage] != NULL)
2100bf215546Sopenharmony_ci            vk_free(&pipeline->device->vk.alloc, new_entry->maps[stage]);
2101bf215546Sopenharmony_ci      }
2102bf215546Sopenharmony_ci   }
2103bf215546Sopenharmony_ci
2104bf215546Sopenharmony_ci   vk_free(&pipeline->device->vk.alloc, new_entry);
2105bf215546Sopenharmony_ci
2106bf215546Sopenharmony_ci   return NULL;
2107bf215546Sopenharmony_ci}
2108bf215546Sopenharmony_ci
2109bf215546Sopenharmony_cistatic void
2110bf215546Sopenharmony_ciwrite_creation_feedback(struct v3dv_pipeline *pipeline,
2111bf215546Sopenharmony_ci                        const void *next,
2112bf215546Sopenharmony_ci                        const VkPipelineCreationFeedback *pipeline_feedback,
2113bf215546Sopenharmony_ci                        uint32_t stage_count,
2114bf215546Sopenharmony_ci                        const VkPipelineShaderStageCreateInfo *stages)
2115bf215546Sopenharmony_ci{
2116bf215546Sopenharmony_ci   const VkPipelineCreationFeedbackCreateInfo *create_feedback =
2117bf215546Sopenharmony_ci      vk_find_struct_const(next, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
2118bf215546Sopenharmony_ci
2119bf215546Sopenharmony_ci   if (create_feedback) {
2120bf215546Sopenharmony_ci      typed_memcpy(create_feedback->pPipelineCreationFeedback,
2121bf215546Sopenharmony_ci             pipeline_feedback,
2122bf215546Sopenharmony_ci             1);
2123bf215546Sopenharmony_ci
2124bf215546Sopenharmony_ci      assert(stage_count == create_feedback->pipelineStageCreationFeedbackCount);
2125bf215546Sopenharmony_ci
2126bf215546Sopenharmony_ci      for (uint32_t i = 0; i < stage_count; i++) {
2127bf215546Sopenharmony_ci         gl_shader_stage s = vk_to_mesa_shader_stage(stages[i].stage);
2128bf215546Sopenharmony_ci         switch (s) {
2129bf215546Sopenharmony_ci         case MESA_SHADER_VERTEX:
2130bf215546Sopenharmony_ci            create_feedback->pPipelineStageCreationFeedbacks[i] =
2131bf215546Sopenharmony_ci               pipeline->vs->feedback;
2132bf215546Sopenharmony_ci
2133bf215546Sopenharmony_ci            create_feedback->pPipelineStageCreationFeedbacks[i].duration +=
2134bf215546Sopenharmony_ci               pipeline->vs_bin->feedback.duration;
2135bf215546Sopenharmony_ci            break;
2136bf215546Sopenharmony_ci
2137bf215546Sopenharmony_ci         case MESA_SHADER_GEOMETRY:
2138bf215546Sopenharmony_ci            create_feedback->pPipelineStageCreationFeedbacks[i] =
2139bf215546Sopenharmony_ci               pipeline->gs->feedback;
2140bf215546Sopenharmony_ci
2141bf215546Sopenharmony_ci            create_feedback->pPipelineStageCreationFeedbacks[i].duration +=
2142bf215546Sopenharmony_ci               pipeline->gs_bin->feedback.duration;
2143bf215546Sopenharmony_ci            break;
2144bf215546Sopenharmony_ci
2145bf215546Sopenharmony_ci         case MESA_SHADER_FRAGMENT:
2146bf215546Sopenharmony_ci            create_feedback->pPipelineStageCreationFeedbacks[i] =
2147bf215546Sopenharmony_ci               pipeline->fs->feedback;
2148bf215546Sopenharmony_ci            break;
2149bf215546Sopenharmony_ci
2150bf215546Sopenharmony_ci         case MESA_SHADER_COMPUTE:
2151bf215546Sopenharmony_ci            create_feedback->pPipelineStageCreationFeedbacks[i] =
2152bf215546Sopenharmony_ci               pipeline->cs->feedback;
2153bf215546Sopenharmony_ci            break;
2154bf215546Sopenharmony_ci
2155bf215546Sopenharmony_ci         default:
2156bf215546Sopenharmony_ci            unreachable("not supported shader stage");
2157bf215546Sopenharmony_ci         }
2158bf215546Sopenharmony_ci      }
2159bf215546Sopenharmony_ci   }
2160bf215546Sopenharmony_ci}
2161bf215546Sopenharmony_ci
2162bf215546Sopenharmony_cistatic enum shader_prim
2163bf215546Sopenharmony_cimultiview_gs_input_primitive_from_pipeline(struct v3dv_pipeline *pipeline)
2164bf215546Sopenharmony_ci{
2165bf215546Sopenharmony_ci   switch (pipeline->topology) {
2166bf215546Sopenharmony_ci   case PIPE_PRIM_POINTS:
2167bf215546Sopenharmony_ci      return SHADER_PRIM_POINTS;
2168bf215546Sopenharmony_ci   case PIPE_PRIM_LINES:
2169bf215546Sopenharmony_ci   case PIPE_PRIM_LINE_STRIP:
2170bf215546Sopenharmony_ci      return SHADER_PRIM_LINES;
2171bf215546Sopenharmony_ci   case PIPE_PRIM_TRIANGLES:
2172bf215546Sopenharmony_ci   case PIPE_PRIM_TRIANGLE_STRIP:
2173bf215546Sopenharmony_ci   case PIPE_PRIM_TRIANGLE_FAN:
2174bf215546Sopenharmony_ci      return SHADER_PRIM_TRIANGLES;
2175bf215546Sopenharmony_ci   default:
2176bf215546Sopenharmony_ci      /* Since we don't allow GS with multiview, we can only see non-adjacency
2177bf215546Sopenharmony_ci       * primitives.
2178bf215546Sopenharmony_ci       */
2179bf215546Sopenharmony_ci      unreachable("Unexpected pipeline primitive type");
2180bf215546Sopenharmony_ci   }
2181bf215546Sopenharmony_ci}
2182bf215546Sopenharmony_ci
2183bf215546Sopenharmony_cistatic enum shader_prim
2184bf215546Sopenharmony_cimultiview_gs_output_primitive_from_pipeline(struct v3dv_pipeline *pipeline)
2185bf215546Sopenharmony_ci{
2186bf215546Sopenharmony_ci   switch (pipeline->topology) {
2187bf215546Sopenharmony_ci   case PIPE_PRIM_POINTS:
2188bf215546Sopenharmony_ci      return SHADER_PRIM_POINTS;
2189bf215546Sopenharmony_ci   case PIPE_PRIM_LINES:
2190bf215546Sopenharmony_ci   case PIPE_PRIM_LINE_STRIP:
2191bf215546Sopenharmony_ci      return SHADER_PRIM_LINE_STRIP;
2192bf215546Sopenharmony_ci   case PIPE_PRIM_TRIANGLES:
2193bf215546Sopenharmony_ci   case PIPE_PRIM_TRIANGLE_STRIP:
2194bf215546Sopenharmony_ci   case PIPE_PRIM_TRIANGLE_FAN:
2195bf215546Sopenharmony_ci      return SHADER_PRIM_TRIANGLE_STRIP;
2196bf215546Sopenharmony_ci   default:
2197bf215546Sopenharmony_ci      /* Since we don't allow GS with multiview, we can only see non-adjacency
2198bf215546Sopenharmony_ci       * primitives.
2199bf215546Sopenharmony_ci       */
2200bf215546Sopenharmony_ci      unreachable("Unexpected pipeline primitive type");
2201bf215546Sopenharmony_ci   }
2202bf215546Sopenharmony_ci}
2203bf215546Sopenharmony_ci
2204bf215546Sopenharmony_cistatic bool
2205bf215546Sopenharmony_cipipeline_add_multiview_gs(struct v3dv_pipeline *pipeline,
2206bf215546Sopenharmony_ci                          struct v3dv_pipeline_cache *cache,
2207bf215546Sopenharmony_ci                          const VkAllocationCallbacks *pAllocator)
2208bf215546Sopenharmony_ci{
2209bf215546Sopenharmony_ci   /* Create the passthrough GS from the VS output interface */
2210bf215546Sopenharmony_ci   pipeline->vs->nir = pipeline_stage_get_nir(pipeline->vs, pipeline, cache);
2211bf215546Sopenharmony_ci   nir_shader *vs_nir = pipeline->vs->nir;
2212bf215546Sopenharmony_ci
2213bf215546Sopenharmony_ci   const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
2214bf215546Sopenharmony_ci   nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options,
2215bf215546Sopenharmony_ci                                                  "multiview broadcast gs");
2216bf215546Sopenharmony_ci   nir_shader *nir = b.shader;
2217bf215546Sopenharmony_ci   nir->info.inputs_read = vs_nir->info.outputs_written;
2218bf215546Sopenharmony_ci   nir->info.outputs_written = vs_nir->info.outputs_written |
2219bf215546Sopenharmony_ci                               (1ull << VARYING_SLOT_LAYER);
2220bf215546Sopenharmony_ci
2221bf215546Sopenharmony_ci   uint32_t vertex_count = u_vertices_per_prim(pipeline->topology);
2222bf215546Sopenharmony_ci   nir->info.gs.input_primitive =
2223bf215546Sopenharmony_ci      multiview_gs_input_primitive_from_pipeline(pipeline);
2224bf215546Sopenharmony_ci   nir->info.gs.output_primitive =
2225bf215546Sopenharmony_ci      multiview_gs_output_primitive_from_pipeline(pipeline);
2226bf215546Sopenharmony_ci   nir->info.gs.vertices_in = vertex_count;
2227bf215546Sopenharmony_ci   nir->info.gs.vertices_out = nir->info.gs.vertices_in;
2228bf215546Sopenharmony_ci   nir->info.gs.invocations = 1;
2229bf215546Sopenharmony_ci   nir->info.gs.active_stream_mask = 0x1;
2230bf215546Sopenharmony_ci
2231bf215546Sopenharmony_ci   /* Make a list of GS input/output variables from the VS outputs */
2232bf215546Sopenharmony_ci   nir_variable *in_vars[100];
2233bf215546Sopenharmony_ci   nir_variable *out_vars[100];
2234bf215546Sopenharmony_ci   uint32_t var_count = 0;
2235bf215546Sopenharmony_ci   nir_foreach_shader_out_variable(out_vs_var, vs_nir) {
2236bf215546Sopenharmony_ci      char name[8];
2237bf215546Sopenharmony_ci      snprintf(name, ARRAY_SIZE(name), "in_%d", var_count);
2238bf215546Sopenharmony_ci
2239bf215546Sopenharmony_ci      in_vars[var_count] =
2240bf215546Sopenharmony_ci         nir_variable_create(nir, nir_var_shader_in,
2241bf215546Sopenharmony_ci                             glsl_array_type(out_vs_var->type, vertex_count, 0),
2242bf215546Sopenharmony_ci                             name);
2243bf215546Sopenharmony_ci      in_vars[var_count]->data.location = out_vs_var->data.location;
2244bf215546Sopenharmony_ci      in_vars[var_count]->data.location_frac = out_vs_var->data.location_frac;
2245bf215546Sopenharmony_ci      in_vars[var_count]->data.interpolation = out_vs_var->data.interpolation;
2246bf215546Sopenharmony_ci
2247bf215546Sopenharmony_ci      snprintf(name, ARRAY_SIZE(name), "out_%d", var_count);
2248bf215546Sopenharmony_ci      out_vars[var_count] =
2249bf215546Sopenharmony_ci         nir_variable_create(nir, nir_var_shader_out, out_vs_var->type, name);
2250bf215546Sopenharmony_ci      out_vars[var_count]->data.location = out_vs_var->data.location;
2251bf215546Sopenharmony_ci      out_vars[var_count]->data.interpolation = out_vs_var->data.interpolation;
2252bf215546Sopenharmony_ci
2253bf215546Sopenharmony_ci      var_count++;
2254bf215546Sopenharmony_ci   }
2255bf215546Sopenharmony_ci
2256bf215546Sopenharmony_ci   /* Add the gl_Layer output variable */
2257bf215546Sopenharmony_ci   nir_variable *out_layer =
2258bf215546Sopenharmony_ci      nir_variable_create(nir, nir_var_shader_out, glsl_int_type(),
2259bf215546Sopenharmony_ci                          "out_Layer");
2260bf215546Sopenharmony_ci   out_layer->data.location = VARYING_SLOT_LAYER;
2261bf215546Sopenharmony_ci
2262bf215546Sopenharmony_ci   /* Get the view index value that we will write to gl_Layer */
2263bf215546Sopenharmony_ci   nir_ssa_def *layer =
2264bf215546Sopenharmony_ci      nir_load_system_value(&b, nir_intrinsic_load_view_index, 0, 1, 32);
2265bf215546Sopenharmony_ci
2266bf215546Sopenharmony_ci   /* Emit all output vertices */
2267bf215546Sopenharmony_ci   for (uint32_t vi = 0; vi < vertex_count; vi++) {
2268bf215546Sopenharmony_ci      /* Emit all output varyings */
2269bf215546Sopenharmony_ci      for (uint32_t i = 0; i < var_count; i++) {
2270bf215546Sopenharmony_ci         nir_deref_instr *in_value =
2271bf215546Sopenharmony_ci            nir_build_deref_array_imm(&b, nir_build_deref_var(&b, in_vars[i]), vi);
2272bf215546Sopenharmony_ci         nir_copy_deref(&b, nir_build_deref_var(&b, out_vars[i]), in_value);
2273bf215546Sopenharmony_ci      }
2274bf215546Sopenharmony_ci
2275bf215546Sopenharmony_ci      /* Emit gl_Layer write */
2276bf215546Sopenharmony_ci      nir_store_var(&b, out_layer, layer, 0x1);
2277bf215546Sopenharmony_ci
2278bf215546Sopenharmony_ci      nir_emit_vertex(&b, 0);
2279bf215546Sopenharmony_ci   }
2280bf215546Sopenharmony_ci   nir_end_primitive(&b, 0);
2281bf215546Sopenharmony_ci
2282bf215546Sopenharmony_ci   /* Make sure we run our pre-process NIR passes so we produce NIR compatible
2283bf215546Sopenharmony_ci    * with what we expect from SPIR-V modules.
2284bf215546Sopenharmony_ci    */
2285bf215546Sopenharmony_ci   preprocess_nir(nir);
2286bf215546Sopenharmony_ci
2287bf215546Sopenharmony_ci   /* Attach the geometry shader to the  pipeline */
2288bf215546Sopenharmony_ci   struct v3dv_device *device = pipeline->device;
2289bf215546Sopenharmony_ci   struct v3dv_physical_device *physical_device =
2290bf215546Sopenharmony_ci      &device->instance->physicalDevice;
2291bf215546Sopenharmony_ci
2292bf215546Sopenharmony_ci   struct v3dv_pipeline_stage *p_stage =
2293bf215546Sopenharmony_ci      vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
2294bf215546Sopenharmony_ci                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2295bf215546Sopenharmony_ci
2296bf215546Sopenharmony_ci   if (p_stage == NULL) {
2297bf215546Sopenharmony_ci      ralloc_free(nir);
2298bf215546Sopenharmony_ci      return false;
2299bf215546Sopenharmony_ci   }
2300bf215546Sopenharmony_ci
2301bf215546Sopenharmony_ci   p_stage->pipeline = pipeline;
2302bf215546Sopenharmony_ci   p_stage->stage = BROADCOM_SHADER_GEOMETRY;
2303bf215546Sopenharmony_ci   p_stage->entrypoint = "main";
2304bf215546Sopenharmony_ci   p_stage->module = 0;
2305bf215546Sopenharmony_ci   p_stage->nir = nir;
2306bf215546Sopenharmony_ci   pipeline_compute_sha1_from_nir(p_stage);
2307bf215546Sopenharmony_ci   p_stage->program_id = p_atomic_inc_return(&physical_device->next_program_id);
2308bf215546Sopenharmony_ci
2309bf215546Sopenharmony_ci   pipeline->has_gs = true;
2310bf215546Sopenharmony_ci   pipeline->gs = p_stage;
2311bf215546Sopenharmony_ci   pipeline->active_stages |= MESA_SHADER_GEOMETRY;
2312bf215546Sopenharmony_ci
2313bf215546Sopenharmony_ci   pipeline->gs_bin =
2314bf215546Sopenharmony_ci      pipeline_stage_create_binning(pipeline->gs, pAllocator);
2315bf215546Sopenharmony_ci      if (pipeline->gs_bin == NULL)
2316bf215546Sopenharmony_ci         return false;
2317bf215546Sopenharmony_ci
2318bf215546Sopenharmony_ci   return true;
2319bf215546Sopenharmony_ci}
2320bf215546Sopenharmony_ci
2321bf215546Sopenharmony_cistatic void
2322bf215546Sopenharmony_cipipeline_check_buffer_device_address(struct v3dv_pipeline *pipeline)
2323bf215546Sopenharmony_ci{
2324bf215546Sopenharmony_ci   for (int i = BROADCOM_SHADER_VERTEX; i < BROADCOM_SHADER_STAGES; i++) {
2325bf215546Sopenharmony_ci      struct v3dv_shader_variant *variant = pipeline->shared_data->variants[i];
2326bf215546Sopenharmony_ci      if (variant && variant->prog_data.base->has_global_address) {
2327bf215546Sopenharmony_ci         pipeline->uses_buffer_device_address = true;
2328bf215546Sopenharmony_ci         return;
2329bf215546Sopenharmony_ci      }
2330bf215546Sopenharmony_ci   }
2331bf215546Sopenharmony_ci
2332bf215546Sopenharmony_ci   pipeline->uses_buffer_device_address = false;
2333bf215546Sopenharmony_ci}
2334bf215546Sopenharmony_ci
2335bf215546Sopenharmony_ci/*
2336bf215546Sopenharmony_ci * It compiles a pipeline. Note that it also allocate internal object, but if
2337bf215546Sopenharmony_ci * some allocations success, but other fails, the method is not freeing the
2338bf215546Sopenharmony_ci * successful ones.
2339bf215546Sopenharmony_ci *
2340bf215546Sopenharmony_ci * This is done to simplify the code, as what we do in this case is just call
2341bf215546Sopenharmony_ci * the pipeline destroy method, and this would handle freeing the internal
2342bf215546Sopenharmony_ci * objects allocated. We just need to be careful setting to NULL the objects
2343bf215546Sopenharmony_ci * not allocated.
2344bf215546Sopenharmony_ci */
2345bf215546Sopenharmony_cistatic VkResult
2346bf215546Sopenharmony_cipipeline_compile_graphics(struct v3dv_pipeline *pipeline,
2347bf215546Sopenharmony_ci                          struct v3dv_pipeline_cache *cache,
2348bf215546Sopenharmony_ci                          const VkGraphicsPipelineCreateInfo *pCreateInfo,
2349bf215546Sopenharmony_ci                          const VkAllocationCallbacks *pAllocator)
2350bf215546Sopenharmony_ci{
2351bf215546Sopenharmony_ci   VkPipelineCreationFeedback pipeline_feedback = {
2352bf215546Sopenharmony_ci      .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
2353bf215546Sopenharmony_ci   };
2354bf215546Sopenharmony_ci   int64_t pipeline_start = os_time_get_nano();
2355bf215546Sopenharmony_ci
2356bf215546Sopenharmony_ci   struct v3dv_device *device = pipeline->device;
2357bf215546Sopenharmony_ci   struct v3dv_physical_device *physical_device =
2358bf215546Sopenharmony_ci      &device->instance->physicalDevice;
2359bf215546Sopenharmony_ci
2360bf215546Sopenharmony_ci   /* First pass to get some common info from the shader, and create the
2361bf215546Sopenharmony_ci    * individual pipeline_stage objects
2362bf215546Sopenharmony_ci    */
2363bf215546Sopenharmony_ci   for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
2364bf215546Sopenharmony_ci      const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i];
2365bf215546Sopenharmony_ci      gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
2366bf215546Sopenharmony_ci
2367bf215546Sopenharmony_ci      struct v3dv_pipeline_stage *p_stage =
2368bf215546Sopenharmony_ci         vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
2369bf215546Sopenharmony_ci                    VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2370bf215546Sopenharmony_ci
2371bf215546Sopenharmony_ci      if (p_stage == NULL)
2372bf215546Sopenharmony_ci         return VK_ERROR_OUT_OF_HOST_MEMORY;
2373bf215546Sopenharmony_ci
2374bf215546Sopenharmony_ci      /* Note that we are assigning program_id slightly differently that
2375bf215546Sopenharmony_ci       * v3d. Here we are assigning one per pipeline stage, so vs and vs_bin
2376bf215546Sopenharmony_ci       * would have a different program_id, while v3d would have the same for
2377bf215546Sopenharmony_ci       * both. For the case of v3dv, it is more natural to have an id this way,
2378bf215546Sopenharmony_ci       * as right now we are using it for debugging, not for shader-db.
2379bf215546Sopenharmony_ci       */
2380bf215546Sopenharmony_ci      p_stage->program_id =
2381bf215546Sopenharmony_ci         p_atomic_inc_return(&physical_device->next_program_id);
2382bf215546Sopenharmony_ci
2383bf215546Sopenharmony_ci      p_stage->pipeline = pipeline;
2384bf215546Sopenharmony_ci      p_stage->stage = gl_shader_stage_to_broadcom(stage);
2385bf215546Sopenharmony_ci      p_stage->entrypoint = sinfo->pName;
2386bf215546Sopenharmony_ci      p_stage->module = vk_shader_module_from_handle(sinfo->module);
2387bf215546Sopenharmony_ci      p_stage->spec_info = sinfo->pSpecializationInfo;
2388bf215546Sopenharmony_ci
2389bf215546Sopenharmony_ci      vk_pipeline_hash_shader_stage(&pCreateInfo->pStages[i], p_stage->shader_sha1);
2390bf215546Sopenharmony_ci
2391bf215546Sopenharmony_ci      pipeline->active_stages |= sinfo->stage;
2392bf215546Sopenharmony_ci
2393bf215546Sopenharmony_ci      /* We will try to get directly the compiled shader variant, so let's not
2394bf215546Sopenharmony_ci       * worry about getting the nir shader for now.
2395bf215546Sopenharmony_ci       */
2396bf215546Sopenharmony_ci      p_stage->nir = NULL;
2397bf215546Sopenharmony_ci
2398bf215546Sopenharmony_ci      switch(stage) {
2399bf215546Sopenharmony_ci      case MESA_SHADER_VERTEX:
2400bf215546Sopenharmony_ci         pipeline->vs = p_stage;
2401bf215546Sopenharmony_ci         pipeline->vs_bin =
2402bf215546Sopenharmony_ci            pipeline_stage_create_binning(pipeline->vs, pAllocator);
2403bf215546Sopenharmony_ci         if (pipeline->vs_bin == NULL)
2404bf215546Sopenharmony_ci            return VK_ERROR_OUT_OF_HOST_MEMORY;
2405bf215546Sopenharmony_ci         break;
2406bf215546Sopenharmony_ci
2407bf215546Sopenharmony_ci      case MESA_SHADER_GEOMETRY:
2408bf215546Sopenharmony_ci         pipeline->has_gs = true;
2409bf215546Sopenharmony_ci         pipeline->gs = p_stage;
2410bf215546Sopenharmony_ci         pipeline->gs_bin =
2411bf215546Sopenharmony_ci            pipeline_stage_create_binning(pipeline->gs, pAllocator);
2412bf215546Sopenharmony_ci         if (pipeline->gs_bin == NULL)
2413bf215546Sopenharmony_ci            return VK_ERROR_OUT_OF_HOST_MEMORY;
2414bf215546Sopenharmony_ci         break;
2415bf215546Sopenharmony_ci
2416bf215546Sopenharmony_ci      case MESA_SHADER_FRAGMENT:
2417bf215546Sopenharmony_ci         pipeline->fs = p_stage;
2418bf215546Sopenharmony_ci         break;
2419bf215546Sopenharmony_ci
2420bf215546Sopenharmony_ci      default:
2421bf215546Sopenharmony_ci         unreachable("not supported shader stage");
2422bf215546Sopenharmony_ci      }
2423bf215546Sopenharmony_ci   }
2424bf215546Sopenharmony_ci
2425bf215546Sopenharmony_ci   /* Add a no-op fragment shader if needed */
2426bf215546Sopenharmony_ci   if (!pipeline->fs) {
2427bf215546Sopenharmony_ci      nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
2428bf215546Sopenharmony_ci                                                     &v3dv_nir_options,
2429bf215546Sopenharmony_ci                                                     "noop_fs");
2430bf215546Sopenharmony_ci
2431bf215546Sopenharmony_ci      struct v3dv_pipeline_stage *p_stage =
2432bf215546Sopenharmony_ci         vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
2433bf215546Sopenharmony_ci                    VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2434bf215546Sopenharmony_ci
2435bf215546Sopenharmony_ci      if (p_stage == NULL)
2436bf215546Sopenharmony_ci         return VK_ERROR_OUT_OF_HOST_MEMORY;
2437bf215546Sopenharmony_ci
2438bf215546Sopenharmony_ci      p_stage->pipeline = pipeline;
2439bf215546Sopenharmony_ci      p_stage->stage = BROADCOM_SHADER_FRAGMENT;
2440bf215546Sopenharmony_ci      p_stage->entrypoint = "main";
2441bf215546Sopenharmony_ci      p_stage->module = 0;
2442bf215546Sopenharmony_ci      p_stage->nir = b.shader;
2443bf215546Sopenharmony_ci      pipeline_compute_sha1_from_nir(p_stage);
2444bf215546Sopenharmony_ci      p_stage->program_id =
2445bf215546Sopenharmony_ci         p_atomic_inc_return(&physical_device->next_program_id);
2446bf215546Sopenharmony_ci
2447bf215546Sopenharmony_ci      pipeline->fs = p_stage;
2448bf215546Sopenharmony_ci      pipeline->active_stages |= MESA_SHADER_FRAGMENT;
2449bf215546Sopenharmony_ci   }
2450bf215546Sopenharmony_ci
2451bf215546Sopenharmony_ci   /* If multiview is enabled, we inject a custom passthrough geometry shader
2452bf215546Sopenharmony_ci    * to broadcast draw calls to the appropriate views.
2453bf215546Sopenharmony_ci    */
2454bf215546Sopenharmony_ci   assert(!pipeline->subpass->view_mask || (!pipeline->has_gs && !pipeline->gs));
2455bf215546Sopenharmony_ci   if (pipeline->subpass->view_mask) {
2456bf215546Sopenharmony_ci      if (!pipeline_add_multiview_gs(pipeline, cache, pAllocator))
2457bf215546Sopenharmony_ci         return VK_ERROR_OUT_OF_HOST_MEMORY;
2458bf215546Sopenharmony_ci   }
2459bf215546Sopenharmony_ci
2460bf215546Sopenharmony_ci   /* First we try to get the variants from the pipeline cache (unless we are
2461bf215546Sopenharmony_ci    * required to capture internal representations, since in that case we need
2462bf215546Sopenharmony_ci    * compile).
2463bf215546Sopenharmony_ci    */
2464bf215546Sopenharmony_ci   bool needs_executable_info =
2465bf215546Sopenharmony_ci      pCreateInfo->flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR;
2466bf215546Sopenharmony_ci   if (!needs_executable_info) {
2467bf215546Sopenharmony_ci      struct v3dv_pipeline_key pipeline_key;
2468bf215546Sopenharmony_ci      pipeline_populate_graphics_key(pipeline, &pipeline_key, pCreateInfo);
2469bf215546Sopenharmony_ci      pipeline_hash_graphics(pipeline, &pipeline_key, pipeline->sha1);
2470bf215546Sopenharmony_ci
2471bf215546Sopenharmony_ci      bool cache_hit = false;
2472bf215546Sopenharmony_ci
2473bf215546Sopenharmony_ci      pipeline->shared_data =
2474bf215546Sopenharmony_ci         v3dv_pipeline_cache_search_for_pipeline(cache,
2475bf215546Sopenharmony_ci                                                 pipeline->sha1,
2476bf215546Sopenharmony_ci                                                 &cache_hit);
2477bf215546Sopenharmony_ci
2478bf215546Sopenharmony_ci      if (pipeline->shared_data != NULL) {
2479bf215546Sopenharmony_ci         /* A correct pipeline must have at least a VS and FS */
2480bf215546Sopenharmony_ci         assert(pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]);
2481bf215546Sopenharmony_ci         assert(pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]);
2482bf215546Sopenharmony_ci         assert(pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]);
2483bf215546Sopenharmony_ci         assert(!pipeline->gs ||
2484bf215546Sopenharmony_ci                pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]);
2485bf215546Sopenharmony_ci         assert(!pipeline->gs ||
2486bf215546Sopenharmony_ci                pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]);
2487bf215546Sopenharmony_ci
2488bf215546Sopenharmony_ci         if (cache_hit && cache != &pipeline->device->default_pipeline_cache)
2489bf215546Sopenharmony_ci            pipeline_feedback.flags |=
2490bf215546Sopenharmony_ci               VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
2491bf215546Sopenharmony_ci
2492bf215546Sopenharmony_ci         goto success;
2493bf215546Sopenharmony_ci      }
2494bf215546Sopenharmony_ci   }
2495bf215546Sopenharmony_ci
2496bf215546Sopenharmony_ci   if (pCreateInfo->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT)
2497bf215546Sopenharmony_ci      return VK_PIPELINE_COMPILE_REQUIRED;
2498bf215546Sopenharmony_ci
2499bf215546Sopenharmony_ci   /* Otherwise we try to get the NIR shaders (either from the original SPIR-V
2500bf215546Sopenharmony_ci    * shader or the pipeline cache) and compile.
2501bf215546Sopenharmony_ci    */
2502bf215546Sopenharmony_ci   pipeline->shared_data =
2503bf215546Sopenharmony_ci      v3dv_pipeline_shared_data_new_empty(pipeline->sha1, pipeline, true);
2504bf215546Sopenharmony_ci   if (!pipeline->shared_data)
2505bf215546Sopenharmony_ci      return VK_ERROR_OUT_OF_HOST_MEMORY;
2506bf215546Sopenharmony_ci
2507bf215546Sopenharmony_ci   pipeline->vs->feedback.flags |=
2508bf215546Sopenharmony_ci      VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
2509bf215546Sopenharmony_ci   if (pipeline->gs)
2510bf215546Sopenharmony_ci      pipeline->gs->feedback.flags |=
2511bf215546Sopenharmony_ci         VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
2512bf215546Sopenharmony_ci   pipeline->fs->feedback.flags |=
2513bf215546Sopenharmony_ci      VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
2514bf215546Sopenharmony_ci
2515bf215546Sopenharmony_ci   if (!pipeline->vs->nir)
2516bf215546Sopenharmony_ci      pipeline->vs->nir = pipeline_stage_get_nir(pipeline->vs, pipeline, cache);
2517bf215546Sopenharmony_ci   if (pipeline->gs && !pipeline->gs->nir)
2518bf215546Sopenharmony_ci      pipeline->gs->nir = pipeline_stage_get_nir(pipeline->gs, pipeline, cache);
2519bf215546Sopenharmony_ci   if (!pipeline->fs->nir)
2520bf215546Sopenharmony_ci      pipeline->fs->nir = pipeline_stage_get_nir(pipeline->fs, pipeline, cache);
2521bf215546Sopenharmony_ci
2522bf215546Sopenharmony_ci   /* Linking + pipeline lowerings */
2523bf215546Sopenharmony_ci   if (pipeline->gs) {
2524bf215546Sopenharmony_ci      link_shaders(pipeline->gs->nir, pipeline->fs->nir);
2525bf215546Sopenharmony_ci      link_shaders(pipeline->vs->nir, pipeline->gs->nir);
2526bf215546Sopenharmony_ci   } else {
2527bf215546Sopenharmony_ci      link_shaders(pipeline->vs->nir, pipeline->fs->nir);
2528bf215546Sopenharmony_ci   }
2529bf215546Sopenharmony_ci
2530bf215546Sopenharmony_ci   pipeline_lower_nir(pipeline, pipeline->fs, pipeline->layout);
2531bf215546Sopenharmony_ci   lower_fs_io(pipeline->fs->nir);
2532bf215546Sopenharmony_ci
2533bf215546Sopenharmony_ci   if (pipeline->gs) {
2534bf215546Sopenharmony_ci      pipeline_lower_nir(pipeline, pipeline->gs, pipeline->layout);
2535bf215546Sopenharmony_ci      lower_gs_io(pipeline->gs->nir);
2536bf215546Sopenharmony_ci   }
2537bf215546Sopenharmony_ci
2538bf215546Sopenharmony_ci   pipeline_lower_nir(pipeline, pipeline->vs, pipeline->layout);
2539bf215546Sopenharmony_ci   lower_vs_io(pipeline->vs->nir);
2540bf215546Sopenharmony_ci
2541bf215546Sopenharmony_ci   /* Compiling to vir */
2542bf215546Sopenharmony_ci   VkResult vk_result;
2543bf215546Sopenharmony_ci
2544bf215546Sopenharmony_ci   /* We should have got all the variants or no variants from the cache */
2545bf215546Sopenharmony_ci   assert(!pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]);
2546bf215546Sopenharmony_ci   vk_result = pipeline_compile_fragment_shader(pipeline, pAllocator, pCreateInfo);
2547bf215546Sopenharmony_ci   if (vk_result != VK_SUCCESS)
2548bf215546Sopenharmony_ci      return vk_result;
2549bf215546Sopenharmony_ci
2550bf215546Sopenharmony_ci   assert(!pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY] &&
2551bf215546Sopenharmony_ci          !pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]);
2552bf215546Sopenharmony_ci
2553bf215546Sopenharmony_ci   if (pipeline->gs) {
2554bf215546Sopenharmony_ci      vk_result =
2555bf215546Sopenharmony_ci         pipeline_compile_geometry_shader(pipeline, pAllocator, pCreateInfo);
2556bf215546Sopenharmony_ci      if (vk_result != VK_SUCCESS)
2557bf215546Sopenharmony_ci         return vk_result;
2558bf215546Sopenharmony_ci   }
2559bf215546Sopenharmony_ci
2560bf215546Sopenharmony_ci   assert(!pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX] &&
2561bf215546Sopenharmony_ci          !pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]);
2562bf215546Sopenharmony_ci
2563bf215546Sopenharmony_ci   vk_result = pipeline_compile_vertex_shader(pipeline, pAllocator, pCreateInfo);
2564bf215546Sopenharmony_ci   if (vk_result != VK_SUCCESS)
2565bf215546Sopenharmony_ci      return vk_result;
2566bf215546Sopenharmony_ci
2567bf215546Sopenharmony_ci   if (!upload_assembly(pipeline))
2568bf215546Sopenharmony_ci      return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2569bf215546Sopenharmony_ci
2570bf215546Sopenharmony_ci   v3dv_pipeline_cache_upload_pipeline(pipeline, cache);
2571bf215546Sopenharmony_ci
2572bf215546Sopenharmony_ci success:
2573bf215546Sopenharmony_ci
2574bf215546Sopenharmony_ci   pipeline_check_buffer_device_address(pipeline);
2575bf215546Sopenharmony_ci
2576bf215546Sopenharmony_ci   pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
2577bf215546Sopenharmony_ci   write_creation_feedback(pipeline,
2578bf215546Sopenharmony_ci                           pCreateInfo->pNext,
2579bf215546Sopenharmony_ci                           &pipeline_feedback,
2580bf215546Sopenharmony_ci                           pCreateInfo->stageCount,
2581bf215546Sopenharmony_ci                           pCreateInfo->pStages);
2582bf215546Sopenharmony_ci
2583bf215546Sopenharmony_ci   /* Since we have the variants in the pipeline shared data we can now free
2584bf215546Sopenharmony_ci    * the pipeline stages.
2585bf215546Sopenharmony_ci    */
2586bf215546Sopenharmony_ci   if (!needs_executable_info)
2587bf215546Sopenharmony_ci      pipeline_free_stages(device, pipeline, pAllocator);
2588bf215546Sopenharmony_ci
2589bf215546Sopenharmony_ci   pipeline_check_spill_size(pipeline);
2590bf215546Sopenharmony_ci
2591bf215546Sopenharmony_ci   return compute_vpm_config(pipeline);
2592bf215546Sopenharmony_ci}
2593bf215546Sopenharmony_ci
2594bf215546Sopenharmony_cistatic VkResult
2595bf215546Sopenharmony_cicompute_vpm_config(struct v3dv_pipeline *pipeline)
2596bf215546Sopenharmony_ci{
2597bf215546Sopenharmony_ci   struct v3dv_shader_variant *vs_variant =
2598bf215546Sopenharmony_ci      pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX];
2599bf215546Sopenharmony_ci   struct v3dv_shader_variant *vs_bin_variant =
2600bf215546Sopenharmony_ci      pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX];
2601bf215546Sopenharmony_ci   struct v3d_vs_prog_data *vs = vs_variant->prog_data.vs;
2602bf215546Sopenharmony_ci   struct v3d_vs_prog_data *vs_bin =vs_bin_variant->prog_data.vs;
2603bf215546Sopenharmony_ci
2604bf215546Sopenharmony_ci   struct v3d_gs_prog_data *gs = NULL;
2605bf215546Sopenharmony_ci   struct v3d_gs_prog_data *gs_bin = NULL;
2606bf215546Sopenharmony_ci   if (pipeline->has_gs) {
2607bf215546Sopenharmony_ci      struct v3dv_shader_variant *gs_variant =
2608bf215546Sopenharmony_ci         pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY];
2609bf215546Sopenharmony_ci      struct v3dv_shader_variant *gs_bin_variant =
2610bf215546Sopenharmony_ci         pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN];
2611bf215546Sopenharmony_ci      gs = gs_variant->prog_data.gs;
2612bf215546Sopenharmony_ci      gs_bin = gs_bin_variant->prog_data.gs;
2613bf215546Sopenharmony_ci   }
2614bf215546Sopenharmony_ci
2615bf215546Sopenharmony_ci   if (!v3d_compute_vpm_config(&pipeline->device->devinfo,
2616bf215546Sopenharmony_ci                               vs_bin, vs, gs_bin, gs,
2617bf215546Sopenharmony_ci                               &pipeline->vpm_cfg_bin,
2618bf215546Sopenharmony_ci                               &pipeline->vpm_cfg)) {
2619bf215546Sopenharmony_ci      return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2620bf215546Sopenharmony_ci   }
2621bf215546Sopenharmony_ci
2622bf215546Sopenharmony_ci   return VK_SUCCESS;
2623bf215546Sopenharmony_ci}
2624bf215546Sopenharmony_ci
2625bf215546Sopenharmony_cistatic unsigned
2626bf215546Sopenharmony_civ3dv_dynamic_state_mask(VkDynamicState state)
2627bf215546Sopenharmony_ci{
2628bf215546Sopenharmony_ci   switch(state) {
2629bf215546Sopenharmony_ci   case VK_DYNAMIC_STATE_VIEWPORT:
2630bf215546Sopenharmony_ci      return V3DV_DYNAMIC_VIEWPORT;
2631bf215546Sopenharmony_ci   case VK_DYNAMIC_STATE_SCISSOR:
2632bf215546Sopenharmony_ci      return V3DV_DYNAMIC_SCISSOR;
2633bf215546Sopenharmony_ci   case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
2634bf215546Sopenharmony_ci      return V3DV_DYNAMIC_STENCIL_COMPARE_MASK;
2635bf215546Sopenharmony_ci   case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
2636bf215546Sopenharmony_ci      return V3DV_DYNAMIC_STENCIL_WRITE_MASK;
2637bf215546Sopenharmony_ci   case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
2638bf215546Sopenharmony_ci      return V3DV_DYNAMIC_STENCIL_REFERENCE;
2639bf215546Sopenharmony_ci   case VK_DYNAMIC_STATE_BLEND_CONSTANTS:
2640bf215546Sopenharmony_ci      return V3DV_DYNAMIC_BLEND_CONSTANTS;
2641bf215546Sopenharmony_ci   case VK_DYNAMIC_STATE_DEPTH_BIAS:
2642bf215546Sopenharmony_ci      return V3DV_DYNAMIC_DEPTH_BIAS;
2643bf215546Sopenharmony_ci   case VK_DYNAMIC_STATE_LINE_WIDTH:
2644bf215546Sopenharmony_ci      return V3DV_DYNAMIC_LINE_WIDTH;
2645bf215546Sopenharmony_ci   case VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT:
2646bf215546Sopenharmony_ci      return V3DV_DYNAMIC_COLOR_WRITE_ENABLE;
2647bf215546Sopenharmony_ci
2648bf215546Sopenharmony_ci   /* Depth bounds testing is not available in in V3D 4.2 so here we are just
2649bf215546Sopenharmony_ci    * ignoring this dynamic state. We are already asserting at pipeline creation
2650bf215546Sopenharmony_ci    * time that depth bounds testing is not enabled.
2651bf215546Sopenharmony_ci    */
2652bf215546Sopenharmony_ci   case VK_DYNAMIC_STATE_DEPTH_BOUNDS:
2653bf215546Sopenharmony_ci      return 0;
2654bf215546Sopenharmony_ci
2655bf215546Sopenharmony_ci   default:
2656bf215546Sopenharmony_ci      unreachable("Unhandled dynamic state");
2657bf215546Sopenharmony_ci   }
2658bf215546Sopenharmony_ci}
2659bf215546Sopenharmony_ci
2660bf215546Sopenharmony_cistatic void
2661bf215546Sopenharmony_cipipeline_init_dynamic_state(
2662bf215546Sopenharmony_ci   struct v3dv_pipeline *pipeline,
2663bf215546Sopenharmony_ci   const VkPipelineDynamicStateCreateInfo *pDynamicState,
2664bf215546Sopenharmony_ci   const VkPipelineViewportStateCreateInfo *pViewportState,
2665bf215546Sopenharmony_ci   const VkPipelineDepthStencilStateCreateInfo *pDepthStencilState,
2666bf215546Sopenharmony_ci   const VkPipelineColorBlendStateCreateInfo *pColorBlendState,
2667bf215546Sopenharmony_ci   const VkPipelineRasterizationStateCreateInfo *pRasterizationState,
2668bf215546Sopenharmony_ci   const VkPipelineColorWriteCreateInfoEXT *pColorWriteState)
2669bf215546Sopenharmony_ci{
2670bf215546Sopenharmony_ci   /* Initialize to default values */
2671bf215546Sopenharmony_ci   struct v3dv_dynamic_state *dynamic = &pipeline->dynamic_state;
2672bf215546Sopenharmony_ci   memset(dynamic, 0, sizeof(*dynamic));
2673bf215546Sopenharmony_ci   dynamic->stencil_compare_mask.front = ~0;
2674bf215546Sopenharmony_ci   dynamic->stencil_compare_mask.back = ~0;
2675bf215546Sopenharmony_ci   dynamic->stencil_write_mask.front = ~0;
2676bf215546Sopenharmony_ci   dynamic->stencil_write_mask.back = ~0;
2677bf215546Sopenharmony_ci   dynamic->line_width = 1.0f;
2678bf215546Sopenharmony_ci   dynamic->color_write_enable = (1ull << (4 * V3D_MAX_DRAW_BUFFERS)) - 1;
2679bf215546Sopenharmony_ci
2680bf215546Sopenharmony_ci   /* Create a mask of enabled dynamic states */
2681bf215546Sopenharmony_ci   uint32_t dynamic_states = 0;
2682bf215546Sopenharmony_ci   if (pDynamicState) {
2683bf215546Sopenharmony_ci      uint32_t count = pDynamicState->dynamicStateCount;
2684bf215546Sopenharmony_ci      for (uint32_t s = 0; s < count; s++) {
2685bf215546Sopenharmony_ci         dynamic_states |=
2686bf215546Sopenharmony_ci            v3dv_dynamic_state_mask(pDynamicState->pDynamicStates[s]);
2687bf215546Sopenharmony_ci      }
2688bf215546Sopenharmony_ci   }
2689bf215546Sopenharmony_ci
2690bf215546Sopenharmony_ci   /* For any pipeline states that are not dynamic, set the dynamic state
2691bf215546Sopenharmony_ci    * from the static pipeline state.
2692bf215546Sopenharmony_ci    */
2693bf215546Sopenharmony_ci   if (pViewportState) {
2694bf215546Sopenharmony_ci      if (!(dynamic_states & V3DV_DYNAMIC_VIEWPORT)) {
2695bf215546Sopenharmony_ci         dynamic->viewport.count = pViewportState->viewportCount;
2696bf215546Sopenharmony_ci         typed_memcpy(dynamic->viewport.viewports, pViewportState->pViewports,
2697bf215546Sopenharmony_ci                      pViewportState->viewportCount);
2698bf215546Sopenharmony_ci
2699bf215546Sopenharmony_ci         for (uint32_t i = 0; i < dynamic->viewport.count; i++) {
2700bf215546Sopenharmony_ci            v3dv_viewport_compute_xform(&dynamic->viewport.viewports[i],
2701bf215546Sopenharmony_ci                                        dynamic->viewport.scale[i],
2702bf215546Sopenharmony_ci                                        dynamic->viewport.translate[i]);
2703bf215546Sopenharmony_ci         }
2704bf215546Sopenharmony_ci      }
2705bf215546Sopenharmony_ci
2706bf215546Sopenharmony_ci      if (!(dynamic_states & V3DV_DYNAMIC_SCISSOR)) {
2707bf215546Sopenharmony_ci         dynamic->scissor.count = pViewportState->scissorCount;
2708bf215546Sopenharmony_ci         typed_memcpy(dynamic->scissor.scissors, pViewportState->pScissors,
2709bf215546Sopenharmony_ci                      pViewportState->scissorCount);
2710bf215546Sopenharmony_ci      }
2711bf215546Sopenharmony_ci   }
2712bf215546Sopenharmony_ci
2713bf215546Sopenharmony_ci   if (pDepthStencilState) {
2714bf215546Sopenharmony_ci      if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_COMPARE_MASK)) {
2715bf215546Sopenharmony_ci         dynamic->stencil_compare_mask.front =
2716bf215546Sopenharmony_ci            pDepthStencilState->front.compareMask;
2717bf215546Sopenharmony_ci         dynamic->stencil_compare_mask.back =
2718bf215546Sopenharmony_ci            pDepthStencilState->back.compareMask;
2719bf215546Sopenharmony_ci      }
2720bf215546Sopenharmony_ci
2721bf215546Sopenharmony_ci      if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_WRITE_MASK)) {
2722bf215546Sopenharmony_ci         dynamic->stencil_write_mask.front = pDepthStencilState->front.writeMask;
2723bf215546Sopenharmony_ci         dynamic->stencil_write_mask.back = pDepthStencilState->back.writeMask;
2724bf215546Sopenharmony_ci      }
2725bf215546Sopenharmony_ci
2726bf215546Sopenharmony_ci      if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_REFERENCE)) {
2727bf215546Sopenharmony_ci         dynamic->stencil_reference.front = pDepthStencilState->front.reference;
2728bf215546Sopenharmony_ci         dynamic->stencil_reference.back = pDepthStencilState->back.reference;
2729bf215546Sopenharmony_ci      }
2730bf215546Sopenharmony_ci   }
2731bf215546Sopenharmony_ci
2732bf215546Sopenharmony_ci   if (pColorBlendState && !(dynamic_states & V3DV_DYNAMIC_BLEND_CONSTANTS)) {
2733bf215546Sopenharmony_ci      memcpy(dynamic->blend_constants, pColorBlendState->blendConstants,
2734bf215546Sopenharmony_ci             sizeof(dynamic->blend_constants));
2735bf215546Sopenharmony_ci   }
2736bf215546Sopenharmony_ci
2737bf215546Sopenharmony_ci   if (pRasterizationState) {
2738bf215546Sopenharmony_ci      if (pRasterizationState->depthBiasEnable &&
2739bf215546Sopenharmony_ci          !(dynamic_states & V3DV_DYNAMIC_DEPTH_BIAS)) {
2740bf215546Sopenharmony_ci         dynamic->depth_bias.constant_factor =
2741bf215546Sopenharmony_ci            pRasterizationState->depthBiasConstantFactor;
2742bf215546Sopenharmony_ci         dynamic->depth_bias.depth_bias_clamp =
2743bf215546Sopenharmony_ci            pRasterizationState->depthBiasClamp;
2744bf215546Sopenharmony_ci         dynamic->depth_bias.slope_factor =
2745bf215546Sopenharmony_ci            pRasterizationState->depthBiasSlopeFactor;
2746bf215546Sopenharmony_ci      }
2747bf215546Sopenharmony_ci      if (!(dynamic_states & V3DV_DYNAMIC_LINE_WIDTH))
2748bf215546Sopenharmony_ci         dynamic->line_width = pRasterizationState->lineWidth;
2749bf215546Sopenharmony_ci   }
2750bf215546Sopenharmony_ci
2751bf215546Sopenharmony_ci   if (pColorWriteState && !(dynamic_states & V3DV_DYNAMIC_COLOR_WRITE_ENABLE)) {
2752bf215546Sopenharmony_ci      dynamic->color_write_enable = 0;
2753bf215546Sopenharmony_ci      for (uint32_t i = 0; i < pColorWriteState->attachmentCount; i++)
2754bf215546Sopenharmony_ci         dynamic->color_write_enable |= pColorWriteState->pColorWriteEnables[i] ? (0xfu << (i * 4)) : 0;
2755bf215546Sopenharmony_ci   }
2756bf215546Sopenharmony_ci
2757bf215546Sopenharmony_ci   pipeline->dynamic_state.mask = dynamic_states;
2758bf215546Sopenharmony_ci}
2759bf215546Sopenharmony_ci
2760bf215546Sopenharmony_cistatic bool
2761bf215546Sopenharmony_cistencil_op_is_no_op(const VkStencilOpState *stencil)
2762bf215546Sopenharmony_ci{
2763bf215546Sopenharmony_ci   return stencil->depthFailOp == VK_STENCIL_OP_KEEP &&
2764bf215546Sopenharmony_ci          stencil->compareOp == VK_COMPARE_OP_ALWAYS;
2765bf215546Sopenharmony_ci}
2766bf215546Sopenharmony_ci
2767bf215546Sopenharmony_cistatic void
2768bf215546Sopenharmony_cienable_depth_bias(struct v3dv_pipeline *pipeline,
2769bf215546Sopenharmony_ci                  const VkPipelineRasterizationStateCreateInfo *rs_info)
2770bf215546Sopenharmony_ci{
2771bf215546Sopenharmony_ci   pipeline->depth_bias.enabled = false;
2772bf215546Sopenharmony_ci   pipeline->depth_bias.is_z16 = false;
2773bf215546Sopenharmony_ci
2774bf215546Sopenharmony_ci   if (!rs_info || !rs_info->depthBiasEnable)
2775bf215546Sopenharmony_ci      return;
2776bf215546Sopenharmony_ci
2777bf215546Sopenharmony_ci   /* Check the depth/stencil attachment description for the subpass used with
2778bf215546Sopenharmony_ci    * this pipeline.
2779bf215546Sopenharmony_ci    */
2780bf215546Sopenharmony_ci   assert(pipeline->pass && pipeline->subpass);
2781bf215546Sopenharmony_ci   struct v3dv_render_pass *pass = pipeline->pass;
2782bf215546Sopenharmony_ci   struct v3dv_subpass *subpass = pipeline->subpass;
2783bf215546Sopenharmony_ci
2784bf215546Sopenharmony_ci   if (subpass->ds_attachment.attachment == VK_ATTACHMENT_UNUSED)
2785bf215546Sopenharmony_ci      return;
2786bf215546Sopenharmony_ci
2787bf215546Sopenharmony_ci   assert(subpass->ds_attachment.attachment < pass->attachment_count);
2788bf215546Sopenharmony_ci   struct v3dv_render_pass_attachment *att =
2789bf215546Sopenharmony_ci      &pass->attachments[subpass->ds_attachment.attachment];
2790bf215546Sopenharmony_ci
2791bf215546Sopenharmony_ci   if (att->desc.format == VK_FORMAT_D16_UNORM)
2792bf215546Sopenharmony_ci      pipeline->depth_bias.is_z16 = true;
2793bf215546Sopenharmony_ci
2794bf215546Sopenharmony_ci   pipeline->depth_bias.enabled = true;
2795bf215546Sopenharmony_ci}
2796bf215546Sopenharmony_ci
2797bf215546Sopenharmony_cistatic void
2798bf215546Sopenharmony_cipipeline_set_ez_state(struct v3dv_pipeline *pipeline,
2799bf215546Sopenharmony_ci                      const VkPipelineDepthStencilStateCreateInfo *ds_info)
2800bf215546Sopenharmony_ci{
2801bf215546Sopenharmony_ci   if (!ds_info || !ds_info->depthTestEnable) {
2802bf215546Sopenharmony_ci      pipeline->ez_state = V3D_EZ_DISABLED;
2803bf215546Sopenharmony_ci      return;
2804bf215546Sopenharmony_ci   }
2805bf215546Sopenharmony_ci
2806bf215546Sopenharmony_ci   switch (ds_info->depthCompareOp) {
2807bf215546Sopenharmony_ci   case VK_COMPARE_OP_LESS:
2808bf215546Sopenharmony_ci   case VK_COMPARE_OP_LESS_OR_EQUAL:
2809bf215546Sopenharmony_ci      pipeline->ez_state = V3D_EZ_LT_LE;
2810bf215546Sopenharmony_ci      break;
2811bf215546Sopenharmony_ci   case VK_COMPARE_OP_GREATER:
2812bf215546Sopenharmony_ci   case VK_COMPARE_OP_GREATER_OR_EQUAL:
2813bf215546Sopenharmony_ci      pipeline->ez_state = V3D_EZ_GT_GE;
2814bf215546Sopenharmony_ci      break;
2815bf215546Sopenharmony_ci   case VK_COMPARE_OP_NEVER:
2816bf215546Sopenharmony_ci   case VK_COMPARE_OP_EQUAL:
2817bf215546Sopenharmony_ci      pipeline->ez_state = V3D_EZ_UNDECIDED;
2818bf215546Sopenharmony_ci      break;
2819bf215546Sopenharmony_ci   default:
2820bf215546Sopenharmony_ci      pipeline->ez_state = V3D_EZ_DISABLED;
2821bf215546Sopenharmony_ci      pipeline->incompatible_ez_test = true;
2822bf215546Sopenharmony_ci      break;
2823bf215546Sopenharmony_ci   }
2824bf215546Sopenharmony_ci
2825bf215546Sopenharmony_ci   /* If stencil is enabled and is not a no-op, we need to disable EZ */
2826bf215546Sopenharmony_ci   if (ds_info->stencilTestEnable &&
2827bf215546Sopenharmony_ci       (!stencil_op_is_no_op(&ds_info->front) ||
2828bf215546Sopenharmony_ci        !stencil_op_is_no_op(&ds_info->back))) {
2829bf215546Sopenharmony_ci         pipeline->ez_state = V3D_EZ_DISABLED;
2830bf215546Sopenharmony_ci   }
2831bf215546Sopenharmony_ci
2832bf215546Sopenharmony_ci   /* If the FS writes Z, then it may update against the chosen EZ direction */
2833bf215546Sopenharmony_ci   struct v3dv_shader_variant *fs_variant =
2834bf215546Sopenharmony_ci      pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
2835bf215546Sopenharmony_ci   if (fs_variant && fs_variant->prog_data.fs->writes_z &&
2836bf215546Sopenharmony_ci       !fs_variant->prog_data.fs->writes_z_from_fep) {
2837bf215546Sopenharmony_ci      pipeline->ez_state = V3D_EZ_DISABLED;
2838bf215546Sopenharmony_ci   }
2839bf215546Sopenharmony_ci}
2840bf215546Sopenharmony_ci
2841bf215546Sopenharmony_cistatic bool
2842bf215546Sopenharmony_cipipeline_has_integer_vertex_attrib(struct v3dv_pipeline *pipeline)
2843bf215546Sopenharmony_ci{
2844bf215546Sopenharmony_ci   for (uint8_t i = 0; i < pipeline->va_count; i++) {
2845bf215546Sopenharmony_ci      if (vk_format_is_int(pipeline->va[i].vk_format))
2846bf215546Sopenharmony_ci         return true;
2847bf215546Sopenharmony_ci   }
2848bf215546Sopenharmony_ci   return false;
2849bf215546Sopenharmony_ci}
2850bf215546Sopenharmony_ci
2851bf215546Sopenharmony_ci/* @pipeline can be NULL. We assume in that case that all the attributes have
2852bf215546Sopenharmony_ci * a float format (we only create an all-float BO once and we reuse it with
2853bf215546Sopenharmony_ci * all float pipelines), otherwise we look at the actual type of each
2854bf215546Sopenharmony_ci * attribute used with the specific pipeline passed in.
2855bf215546Sopenharmony_ci */
2856bf215546Sopenharmony_cistruct v3dv_bo *
2857bf215546Sopenharmony_civ3dv_pipeline_create_default_attribute_values(struct v3dv_device *device,
2858bf215546Sopenharmony_ci                                              struct v3dv_pipeline *pipeline)
2859bf215546Sopenharmony_ci{
2860bf215546Sopenharmony_ci   uint32_t size = MAX_VERTEX_ATTRIBS * sizeof(float) * 4;
2861bf215546Sopenharmony_ci   struct v3dv_bo *bo;
2862bf215546Sopenharmony_ci
2863bf215546Sopenharmony_ci   bo = v3dv_bo_alloc(device, size, "default_vi_attributes", true);
2864bf215546Sopenharmony_ci
2865bf215546Sopenharmony_ci   if (!bo) {
2866bf215546Sopenharmony_ci      fprintf(stderr, "failed to allocate memory for the default "
2867bf215546Sopenharmony_ci              "attribute values\n");
2868bf215546Sopenharmony_ci      return NULL;
2869bf215546Sopenharmony_ci   }
2870bf215546Sopenharmony_ci
2871bf215546Sopenharmony_ci   bool ok = v3dv_bo_map(device, bo, size);
2872bf215546Sopenharmony_ci   if (!ok) {
2873bf215546Sopenharmony_ci      fprintf(stderr, "failed to map default attribute values buffer\n");
2874bf215546Sopenharmony_ci      return false;
2875bf215546Sopenharmony_ci   }
2876bf215546Sopenharmony_ci
2877bf215546Sopenharmony_ci   uint32_t *attrs = bo->map;
2878bf215546Sopenharmony_ci   uint8_t va_count = pipeline != NULL ? pipeline->va_count : 0;
2879bf215546Sopenharmony_ci   for (int i = 0; i < MAX_VERTEX_ATTRIBS; i++) {
2880bf215546Sopenharmony_ci      attrs[i * 4 + 0] = 0;
2881bf215546Sopenharmony_ci      attrs[i * 4 + 1] = 0;
2882bf215546Sopenharmony_ci      attrs[i * 4 + 2] = 0;
2883bf215546Sopenharmony_ci      VkFormat attr_format =
2884bf215546Sopenharmony_ci         pipeline != NULL ? pipeline->va[i].vk_format : VK_FORMAT_UNDEFINED;
2885bf215546Sopenharmony_ci      if (i < va_count && vk_format_is_int(attr_format)) {
2886bf215546Sopenharmony_ci         attrs[i * 4 + 3] = 1;
2887bf215546Sopenharmony_ci      } else {
2888bf215546Sopenharmony_ci         attrs[i * 4 + 3] = fui(1.0);
2889bf215546Sopenharmony_ci      }
2890bf215546Sopenharmony_ci   }
2891bf215546Sopenharmony_ci
2892bf215546Sopenharmony_ci   v3dv_bo_unmap(device, bo);
2893bf215546Sopenharmony_ci
2894bf215546Sopenharmony_ci   return bo;
2895bf215546Sopenharmony_ci}
2896bf215546Sopenharmony_ci
2897bf215546Sopenharmony_cistatic void
2898bf215546Sopenharmony_cipipeline_set_sample_mask(struct v3dv_pipeline *pipeline,
2899bf215546Sopenharmony_ci                         const VkPipelineMultisampleStateCreateInfo *ms_info)
2900bf215546Sopenharmony_ci{
2901bf215546Sopenharmony_ci   pipeline->sample_mask = (1 << V3D_MAX_SAMPLES) - 1;
2902bf215546Sopenharmony_ci
2903bf215546Sopenharmony_ci   /* Ignore pSampleMask if we are not enabling multisampling. The hardware
2904bf215546Sopenharmony_ci    * requires this to be 0xf or 0x0 if using a single sample.
2905bf215546Sopenharmony_ci    */
2906bf215546Sopenharmony_ci   if (ms_info && ms_info->pSampleMask &&
2907bf215546Sopenharmony_ci       ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT) {
2908bf215546Sopenharmony_ci      pipeline->sample_mask &= ms_info->pSampleMask[0];
2909bf215546Sopenharmony_ci   }
2910bf215546Sopenharmony_ci}
2911bf215546Sopenharmony_ci
2912bf215546Sopenharmony_cistatic void
2913bf215546Sopenharmony_cipipeline_set_sample_rate_shading(struct v3dv_pipeline *pipeline,
2914bf215546Sopenharmony_ci                                 const VkPipelineMultisampleStateCreateInfo *ms_info)
2915bf215546Sopenharmony_ci{
2916bf215546Sopenharmony_ci   pipeline->sample_rate_shading =
2917bf215546Sopenharmony_ci      ms_info && ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT &&
2918bf215546Sopenharmony_ci      ms_info->sampleShadingEnable;
2919bf215546Sopenharmony_ci}
2920bf215546Sopenharmony_ci
2921bf215546Sopenharmony_cistatic VkResult
2922bf215546Sopenharmony_cipipeline_init(struct v3dv_pipeline *pipeline,
2923bf215546Sopenharmony_ci              struct v3dv_device *device,
2924bf215546Sopenharmony_ci              struct v3dv_pipeline_cache *cache,
2925bf215546Sopenharmony_ci              const VkGraphicsPipelineCreateInfo *pCreateInfo,
2926bf215546Sopenharmony_ci              const VkAllocationCallbacks *pAllocator)
2927bf215546Sopenharmony_ci{
2928bf215546Sopenharmony_ci   VkResult result = VK_SUCCESS;
2929bf215546Sopenharmony_ci
2930bf215546Sopenharmony_ci   pipeline->device = device;
2931bf215546Sopenharmony_ci
2932bf215546Sopenharmony_ci   V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, pCreateInfo->layout);
2933bf215546Sopenharmony_ci   pipeline->layout = layout;
2934bf215546Sopenharmony_ci
2935bf215546Sopenharmony_ci   V3DV_FROM_HANDLE(v3dv_render_pass, render_pass, pCreateInfo->renderPass);
2936bf215546Sopenharmony_ci   assert(pCreateInfo->subpass < render_pass->subpass_count);
2937bf215546Sopenharmony_ci   pipeline->pass = render_pass;
2938bf215546Sopenharmony_ci   pipeline->subpass = &render_pass->subpasses[pCreateInfo->subpass];
2939bf215546Sopenharmony_ci
2940bf215546Sopenharmony_ci   const VkPipelineInputAssemblyStateCreateInfo *ia_info =
2941bf215546Sopenharmony_ci      pCreateInfo->pInputAssemblyState;
2942bf215546Sopenharmony_ci   pipeline->topology = vk_to_pipe_prim_type[ia_info->topology];
2943bf215546Sopenharmony_ci
2944bf215546Sopenharmony_ci   /* If rasterization is not enabled, various CreateInfo structs must be
2945bf215546Sopenharmony_ci    * ignored.
2946bf215546Sopenharmony_ci    */
2947bf215546Sopenharmony_ci   const bool raster_enabled =
2948bf215546Sopenharmony_ci      !pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
2949bf215546Sopenharmony_ci
2950bf215546Sopenharmony_ci   const VkPipelineViewportStateCreateInfo *vp_info =
2951bf215546Sopenharmony_ci      raster_enabled ? pCreateInfo->pViewportState : NULL;
2952bf215546Sopenharmony_ci
2953bf215546Sopenharmony_ci   const VkPipelineDepthStencilStateCreateInfo *ds_info =
2954bf215546Sopenharmony_ci      raster_enabled ? pCreateInfo->pDepthStencilState : NULL;
2955bf215546Sopenharmony_ci
2956bf215546Sopenharmony_ci   const VkPipelineRasterizationStateCreateInfo *rs_info =
2957bf215546Sopenharmony_ci      raster_enabled ? pCreateInfo->pRasterizationState : NULL;
2958bf215546Sopenharmony_ci
2959bf215546Sopenharmony_ci   const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *pv_info =
2960bf215546Sopenharmony_ci      rs_info ? vk_find_struct_const(
2961bf215546Sopenharmony_ci         rs_info->pNext,
2962bf215546Sopenharmony_ci         PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT) :
2963bf215546Sopenharmony_ci            NULL;
2964bf215546Sopenharmony_ci
2965bf215546Sopenharmony_ci   const VkPipelineRasterizationLineStateCreateInfoEXT *ls_info =
2966bf215546Sopenharmony_ci      rs_info ? vk_find_struct_const(
2967bf215546Sopenharmony_ci         rs_info->pNext,
2968bf215546Sopenharmony_ci         PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT) :
2969bf215546Sopenharmony_ci            NULL;
2970bf215546Sopenharmony_ci
2971bf215546Sopenharmony_ci   const VkPipelineColorBlendStateCreateInfo *cb_info =
2972bf215546Sopenharmony_ci      raster_enabled ? pCreateInfo->pColorBlendState : NULL;
2973bf215546Sopenharmony_ci
2974bf215546Sopenharmony_ci   const VkPipelineMultisampleStateCreateInfo *ms_info =
2975bf215546Sopenharmony_ci      raster_enabled ? pCreateInfo->pMultisampleState : NULL;
2976bf215546Sopenharmony_ci
2977bf215546Sopenharmony_ci   const VkPipelineColorWriteCreateInfoEXT *cw_info =
2978bf215546Sopenharmony_ci      cb_info ? vk_find_struct_const(cb_info->pNext,
2979bf215546Sopenharmony_ci                                     PIPELINE_COLOR_WRITE_CREATE_INFO_EXT) :
2980bf215546Sopenharmony_ci                NULL;
2981bf215546Sopenharmony_ci
2982bf215546Sopenharmony_ci   pipeline_init_dynamic_state(pipeline,
2983bf215546Sopenharmony_ci                               pCreateInfo->pDynamicState,
2984bf215546Sopenharmony_ci                               vp_info, ds_info, cb_info, rs_info, cw_info);
2985bf215546Sopenharmony_ci
2986bf215546Sopenharmony_ci   /* V3D 4.2 doesn't support depth bounds testing so we don't advertise that
2987bf215546Sopenharmony_ci    * feature and it shouldn't be used by any pipeline.
2988bf215546Sopenharmony_ci    */
2989bf215546Sopenharmony_ci   assert(!ds_info || !ds_info->depthBoundsTestEnable);
2990bf215546Sopenharmony_ci
2991bf215546Sopenharmony_ci   v3dv_X(device, pipeline_pack_state)(pipeline, cb_info, ds_info,
2992bf215546Sopenharmony_ci                                       rs_info, pv_info, ls_info,
2993bf215546Sopenharmony_ci                                       ms_info);
2994bf215546Sopenharmony_ci
2995bf215546Sopenharmony_ci   enable_depth_bias(pipeline, rs_info);
2996bf215546Sopenharmony_ci   pipeline_set_sample_mask(pipeline, ms_info);
2997bf215546Sopenharmony_ci   pipeline_set_sample_rate_shading(pipeline, ms_info);
2998bf215546Sopenharmony_ci
2999bf215546Sopenharmony_ci   pipeline->primitive_restart =
3000bf215546Sopenharmony_ci      pCreateInfo->pInputAssemblyState->primitiveRestartEnable;
3001bf215546Sopenharmony_ci
3002bf215546Sopenharmony_ci   result = pipeline_compile_graphics(pipeline, cache, pCreateInfo, pAllocator);
3003bf215546Sopenharmony_ci
3004bf215546Sopenharmony_ci   if (result != VK_SUCCESS) {
3005bf215546Sopenharmony_ci      /* Caller would already destroy the pipeline, and we didn't allocate any
3006bf215546Sopenharmony_ci       * extra info. We don't need to do anything else.
3007bf215546Sopenharmony_ci       */
3008bf215546Sopenharmony_ci      return result;
3009bf215546Sopenharmony_ci   }
3010bf215546Sopenharmony_ci
3011bf215546Sopenharmony_ci   const VkPipelineVertexInputStateCreateInfo *vi_info =
3012bf215546Sopenharmony_ci      pCreateInfo->pVertexInputState;
3013bf215546Sopenharmony_ci
3014bf215546Sopenharmony_ci   const VkPipelineVertexInputDivisorStateCreateInfoEXT *vd_info =
3015bf215546Sopenharmony_ci      vk_find_struct_const(vi_info->pNext,
3016bf215546Sopenharmony_ci                           PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT);
3017bf215546Sopenharmony_ci
3018bf215546Sopenharmony_ci   v3dv_X(device, pipeline_pack_compile_state)(pipeline, vi_info, vd_info);
3019bf215546Sopenharmony_ci
3020bf215546Sopenharmony_ci   if (pipeline_has_integer_vertex_attrib(pipeline)) {
3021bf215546Sopenharmony_ci      pipeline->default_attribute_values =
3022bf215546Sopenharmony_ci         v3dv_pipeline_create_default_attribute_values(pipeline->device, pipeline);
3023bf215546Sopenharmony_ci      if (!pipeline->default_attribute_values)
3024bf215546Sopenharmony_ci         return VK_ERROR_OUT_OF_DEVICE_MEMORY;
3025bf215546Sopenharmony_ci   } else {
3026bf215546Sopenharmony_ci      pipeline->default_attribute_values = NULL;
3027bf215546Sopenharmony_ci   }
3028bf215546Sopenharmony_ci
3029bf215546Sopenharmony_ci   /* This must be done after the pipeline has been compiled */
3030bf215546Sopenharmony_ci   pipeline_set_ez_state(pipeline, ds_info);
3031bf215546Sopenharmony_ci
3032bf215546Sopenharmony_ci   return result;
3033bf215546Sopenharmony_ci}
3034bf215546Sopenharmony_ci
3035bf215546Sopenharmony_cistatic VkResult
3036bf215546Sopenharmony_cigraphics_pipeline_create(VkDevice _device,
3037bf215546Sopenharmony_ci                         VkPipelineCache _cache,
3038bf215546Sopenharmony_ci                         const VkGraphicsPipelineCreateInfo *pCreateInfo,
3039bf215546Sopenharmony_ci                         const VkAllocationCallbacks *pAllocator,
3040bf215546Sopenharmony_ci                         VkPipeline *pPipeline)
3041bf215546Sopenharmony_ci{
3042bf215546Sopenharmony_ci   V3DV_FROM_HANDLE(v3dv_device, device, _device);
3043bf215546Sopenharmony_ci   V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
3044bf215546Sopenharmony_ci
3045bf215546Sopenharmony_ci   struct v3dv_pipeline *pipeline;
3046bf215546Sopenharmony_ci   VkResult result;
3047bf215546Sopenharmony_ci
3048bf215546Sopenharmony_ci   /* Use the default pipeline cache if none is specified */
3049bf215546Sopenharmony_ci   if (cache == NULL && device->instance->default_pipeline_cache_enabled)
3050bf215546Sopenharmony_ci      cache = &device->default_pipeline_cache;
3051bf215546Sopenharmony_ci
3052bf215546Sopenharmony_ci   pipeline = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pipeline),
3053bf215546Sopenharmony_ci                               VK_OBJECT_TYPE_PIPELINE);
3054bf215546Sopenharmony_ci
3055bf215546Sopenharmony_ci   if (pipeline == NULL)
3056bf215546Sopenharmony_ci      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3057bf215546Sopenharmony_ci
3058bf215546Sopenharmony_ci   result = pipeline_init(pipeline, device, cache,
3059bf215546Sopenharmony_ci                          pCreateInfo,
3060bf215546Sopenharmony_ci                          pAllocator);
3061bf215546Sopenharmony_ci
3062bf215546Sopenharmony_ci   if (result != VK_SUCCESS) {
3063bf215546Sopenharmony_ci      v3dv_destroy_pipeline(pipeline, device, pAllocator);
3064bf215546Sopenharmony_ci      if (result == VK_PIPELINE_COMPILE_REQUIRED)
3065bf215546Sopenharmony_ci         *pPipeline = VK_NULL_HANDLE;
3066bf215546Sopenharmony_ci      return result;
3067bf215546Sopenharmony_ci   }
3068bf215546Sopenharmony_ci
3069bf215546Sopenharmony_ci   *pPipeline = v3dv_pipeline_to_handle(pipeline);
3070bf215546Sopenharmony_ci
3071bf215546Sopenharmony_ci   return VK_SUCCESS;
3072bf215546Sopenharmony_ci}
3073bf215546Sopenharmony_ci
3074bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL
3075bf215546Sopenharmony_civ3dv_CreateGraphicsPipelines(VkDevice _device,
3076bf215546Sopenharmony_ci                             VkPipelineCache pipelineCache,
3077bf215546Sopenharmony_ci                             uint32_t count,
3078bf215546Sopenharmony_ci                             const VkGraphicsPipelineCreateInfo *pCreateInfos,
3079bf215546Sopenharmony_ci                             const VkAllocationCallbacks *pAllocator,
3080bf215546Sopenharmony_ci                             VkPipeline *pPipelines)
3081bf215546Sopenharmony_ci{
3082bf215546Sopenharmony_ci   V3DV_FROM_HANDLE(v3dv_device, device, _device);
3083bf215546Sopenharmony_ci   VkResult result = VK_SUCCESS;
3084bf215546Sopenharmony_ci
3085bf215546Sopenharmony_ci   if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))
3086bf215546Sopenharmony_ci      mtx_lock(&device->pdevice->mutex);
3087bf215546Sopenharmony_ci
3088bf215546Sopenharmony_ci   uint32_t i = 0;
3089bf215546Sopenharmony_ci   for (; i < count; i++) {
3090bf215546Sopenharmony_ci      VkResult local_result;
3091bf215546Sopenharmony_ci
3092bf215546Sopenharmony_ci      local_result = graphics_pipeline_create(_device,
3093bf215546Sopenharmony_ci                                              pipelineCache,
3094bf215546Sopenharmony_ci                                              &pCreateInfos[i],
3095bf215546Sopenharmony_ci                                              pAllocator,
3096bf215546Sopenharmony_ci                                              &pPipelines[i]);
3097bf215546Sopenharmony_ci
3098bf215546Sopenharmony_ci      if (local_result != VK_SUCCESS) {
3099bf215546Sopenharmony_ci         result = local_result;
3100bf215546Sopenharmony_ci         pPipelines[i] = VK_NULL_HANDLE;
3101bf215546Sopenharmony_ci
3102bf215546Sopenharmony_ci         if (pCreateInfos[i].flags &
3103bf215546Sopenharmony_ci             VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
3104bf215546Sopenharmony_ci            break;
3105bf215546Sopenharmony_ci      }
3106bf215546Sopenharmony_ci   }
3107bf215546Sopenharmony_ci
3108bf215546Sopenharmony_ci   for (; i < count; i++)
3109bf215546Sopenharmony_ci      pPipelines[i] = VK_NULL_HANDLE;
3110bf215546Sopenharmony_ci
3111bf215546Sopenharmony_ci   if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))
3112bf215546Sopenharmony_ci      mtx_unlock(&device->pdevice->mutex);
3113bf215546Sopenharmony_ci
3114bf215546Sopenharmony_ci   return result;
3115bf215546Sopenharmony_ci}
3116bf215546Sopenharmony_ci
3117bf215546Sopenharmony_cistatic void
3118bf215546Sopenharmony_cishared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
3119bf215546Sopenharmony_ci{
3120bf215546Sopenharmony_ci   assert(glsl_type_is_vector_or_scalar(type));
3121bf215546Sopenharmony_ci
3122bf215546Sopenharmony_ci   uint32_t comp_size = glsl_type_is_boolean(type)
3123bf215546Sopenharmony_ci      ? 4 : glsl_get_bit_size(type) / 8;
3124bf215546Sopenharmony_ci   unsigned length = glsl_get_vector_elements(type);
3125bf215546Sopenharmony_ci   *size = comp_size * length,
3126bf215546Sopenharmony_ci   *align = comp_size * (length == 3 ? 4 : length);
3127bf215546Sopenharmony_ci}
3128bf215546Sopenharmony_ci
3129bf215546Sopenharmony_cistatic void
3130bf215546Sopenharmony_cilower_cs_shared(struct nir_shader *nir)
3131bf215546Sopenharmony_ci{
3132bf215546Sopenharmony_ci   NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
3133bf215546Sopenharmony_ci            nir_var_mem_shared, shared_type_info);
3134bf215546Sopenharmony_ci   NIR_PASS(_, nir, nir_lower_explicit_io,
3135bf215546Sopenharmony_ci            nir_var_mem_shared, nir_address_format_32bit_offset);
3136bf215546Sopenharmony_ci}
3137bf215546Sopenharmony_ci
3138bf215546Sopenharmony_cistatic VkResult
3139bf215546Sopenharmony_cipipeline_compile_compute(struct v3dv_pipeline *pipeline,
3140bf215546Sopenharmony_ci                         struct v3dv_pipeline_cache *cache,
3141bf215546Sopenharmony_ci                         const VkComputePipelineCreateInfo *info,
3142bf215546Sopenharmony_ci                         const VkAllocationCallbacks *alloc)
3143bf215546Sopenharmony_ci{
3144bf215546Sopenharmony_ci   VkPipelineCreationFeedback pipeline_feedback = {
3145bf215546Sopenharmony_ci      .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
3146bf215546Sopenharmony_ci   };
3147bf215546Sopenharmony_ci   int64_t pipeline_start = os_time_get_nano();
3148bf215546Sopenharmony_ci
3149bf215546Sopenharmony_ci   struct v3dv_device *device = pipeline->device;
3150bf215546Sopenharmony_ci   struct v3dv_physical_device *physical_device =
3151bf215546Sopenharmony_ci      &device->instance->physicalDevice;
3152bf215546Sopenharmony_ci
3153bf215546Sopenharmony_ci   const VkPipelineShaderStageCreateInfo *sinfo = &info->stage;
3154bf215546Sopenharmony_ci   gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
3155bf215546Sopenharmony_ci
3156bf215546Sopenharmony_ci   struct v3dv_pipeline_stage *p_stage =
3157bf215546Sopenharmony_ci      vk_zalloc2(&device->vk.alloc, alloc, sizeof(*p_stage), 8,
3158bf215546Sopenharmony_ci                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3159bf215546Sopenharmony_ci   if (!p_stage)
3160bf215546Sopenharmony_ci      return VK_ERROR_OUT_OF_HOST_MEMORY;
3161bf215546Sopenharmony_ci
3162bf215546Sopenharmony_ci   p_stage->program_id = p_atomic_inc_return(&physical_device->next_program_id);
3163bf215546Sopenharmony_ci   p_stage->pipeline = pipeline;
3164bf215546Sopenharmony_ci   p_stage->stage = gl_shader_stage_to_broadcom(stage);
3165bf215546Sopenharmony_ci   p_stage->entrypoint = sinfo->pName;
3166bf215546Sopenharmony_ci   p_stage->module = vk_shader_module_from_handle(sinfo->module);
3167bf215546Sopenharmony_ci   p_stage->spec_info = sinfo->pSpecializationInfo;
3168bf215546Sopenharmony_ci   p_stage->feedback = (VkPipelineCreationFeedback) { 0 };
3169bf215546Sopenharmony_ci
3170bf215546Sopenharmony_ci   vk_pipeline_hash_shader_stage(&info->stage, p_stage->shader_sha1);
3171bf215546Sopenharmony_ci
3172bf215546Sopenharmony_ci   p_stage->nir = NULL;
3173bf215546Sopenharmony_ci
3174bf215546Sopenharmony_ci   pipeline->cs = p_stage;
3175bf215546Sopenharmony_ci   pipeline->active_stages |= sinfo->stage;
3176bf215546Sopenharmony_ci
3177bf215546Sopenharmony_ci   /* First we try to get the variants from the pipeline cache (unless we are
3178bf215546Sopenharmony_ci    * required to capture internal representations, since in that case we need
3179bf215546Sopenharmony_ci    * compile).
3180bf215546Sopenharmony_ci    */
3181bf215546Sopenharmony_ci   bool needs_executable_info =
3182bf215546Sopenharmony_ci      info->flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR;
3183bf215546Sopenharmony_ci   if (!needs_executable_info) {
3184bf215546Sopenharmony_ci      struct v3dv_pipeline_key pipeline_key;
3185bf215546Sopenharmony_ci      pipeline_populate_compute_key(pipeline, &pipeline_key, info);
3186bf215546Sopenharmony_ci      pipeline_hash_compute(pipeline, &pipeline_key, pipeline->sha1);
3187bf215546Sopenharmony_ci
3188bf215546Sopenharmony_ci      bool cache_hit = false;
3189bf215546Sopenharmony_ci      pipeline->shared_data =
3190bf215546Sopenharmony_ci         v3dv_pipeline_cache_search_for_pipeline(cache, pipeline->sha1, &cache_hit);
3191bf215546Sopenharmony_ci
3192bf215546Sopenharmony_ci      if (pipeline->shared_data != NULL) {
3193bf215546Sopenharmony_ci         assert(pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE]);
3194bf215546Sopenharmony_ci         if (cache_hit && cache != &pipeline->device->default_pipeline_cache)
3195bf215546Sopenharmony_ci            pipeline_feedback.flags |=
3196bf215546Sopenharmony_ci               VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
3197bf215546Sopenharmony_ci
3198bf215546Sopenharmony_ci         goto success;
3199bf215546Sopenharmony_ci      }
3200bf215546Sopenharmony_ci   }
3201bf215546Sopenharmony_ci
3202bf215546Sopenharmony_ci   if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT)
3203bf215546Sopenharmony_ci      return VK_PIPELINE_COMPILE_REQUIRED;
3204bf215546Sopenharmony_ci
3205bf215546Sopenharmony_ci   pipeline->shared_data = v3dv_pipeline_shared_data_new_empty(pipeline->sha1,
3206bf215546Sopenharmony_ci                                                               pipeline,
3207bf215546Sopenharmony_ci                                                               false);
3208bf215546Sopenharmony_ci   if (!pipeline->shared_data)
3209bf215546Sopenharmony_ci      return VK_ERROR_OUT_OF_HOST_MEMORY;
3210bf215546Sopenharmony_ci
3211bf215546Sopenharmony_ci   p_stage->feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
3212bf215546Sopenharmony_ci
3213bf215546Sopenharmony_ci   /* If not found on cache, compile it */
3214bf215546Sopenharmony_ci   p_stage->nir = pipeline_stage_get_nir(p_stage, pipeline, cache);
3215bf215546Sopenharmony_ci   assert(p_stage->nir);
3216bf215546Sopenharmony_ci
3217bf215546Sopenharmony_ci   nir_optimize(p_stage->nir, false);
3218bf215546Sopenharmony_ci   pipeline_lower_nir(pipeline, p_stage, pipeline->layout);
3219bf215546Sopenharmony_ci   lower_cs_shared(p_stage->nir);
3220bf215546Sopenharmony_ci
3221bf215546Sopenharmony_ci   VkResult result = VK_SUCCESS;
3222bf215546Sopenharmony_ci
3223bf215546Sopenharmony_ci   struct v3d_key key;
3224bf215546Sopenharmony_ci   memset(&key, 0, sizeof(key));
3225bf215546Sopenharmony_ci   pipeline_populate_v3d_key(&key, p_stage, 0,
3226bf215546Sopenharmony_ci                             pipeline->device->features.robustBufferAccess);
3227bf215546Sopenharmony_ci   pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE] =
3228bf215546Sopenharmony_ci      pipeline_compile_shader_variant(p_stage, &key, sizeof(key),
3229bf215546Sopenharmony_ci                                      alloc, &result);
3230bf215546Sopenharmony_ci
3231bf215546Sopenharmony_ci   if (result != VK_SUCCESS)
3232bf215546Sopenharmony_ci      return result;
3233bf215546Sopenharmony_ci
3234bf215546Sopenharmony_ci   if (!upload_assembly(pipeline))
3235bf215546Sopenharmony_ci      return VK_ERROR_OUT_OF_DEVICE_MEMORY;
3236bf215546Sopenharmony_ci
3237bf215546Sopenharmony_ci   v3dv_pipeline_cache_upload_pipeline(pipeline, cache);
3238bf215546Sopenharmony_ci
3239bf215546Sopenharmony_cisuccess:
3240bf215546Sopenharmony_ci
3241bf215546Sopenharmony_ci   pipeline_check_buffer_device_address(pipeline);
3242bf215546Sopenharmony_ci
3243bf215546Sopenharmony_ci   pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
3244bf215546Sopenharmony_ci   write_creation_feedback(pipeline,
3245bf215546Sopenharmony_ci                           info->pNext,
3246bf215546Sopenharmony_ci                           &pipeline_feedback,
3247bf215546Sopenharmony_ci                           1,
3248bf215546Sopenharmony_ci                           &info->stage);
3249bf215546Sopenharmony_ci
3250bf215546Sopenharmony_ci   /* As we got the variants in pipeline->shared_data, after compiling we
3251bf215546Sopenharmony_ci    * don't need the pipeline_stages.
3252bf215546Sopenharmony_ci    */
3253bf215546Sopenharmony_ci   if (!needs_executable_info)
3254bf215546Sopenharmony_ci      pipeline_free_stages(device, pipeline, alloc);
3255bf215546Sopenharmony_ci
3256bf215546Sopenharmony_ci   pipeline_check_spill_size(pipeline);
3257bf215546Sopenharmony_ci
3258bf215546Sopenharmony_ci   return VK_SUCCESS;
3259bf215546Sopenharmony_ci}
3260bf215546Sopenharmony_ci
3261bf215546Sopenharmony_cistatic VkResult
3262bf215546Sopenharmony_cicompute_pipeline_init(struct v3dv_pipeline *pipeline,
3263bf215546Sopenharmony_ci                      struct v3dv_device *device,
3264bf215546Sopenharmony_ci                      struct v3dv_pipeline_cache *cache,
3265bf215546Sopenharmony_ci                      const VkComputePipelineCreateInfo *info,
3266bf215546Sopenharmony_ci                      const VkAllocationCallbacks *alloc)
3267bf215546Sopenharmony_ci{
3268bf215546Sopenharmony_ci   V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, info->layout);
3269bf215546Sopenharmony_ci
3270bf215546Sopenharmony_ci   pipeline->device = device;
3271bf215546Sopenharmony_ci   pipeline->layout = layout;
3272bf215546Sopenharmony_ci
3273bf215546Sopenharmony_ci   VkResult result = pipeline_compile_compute(pipeline, cache, info, alloc);
3274bf215546Sopenharmony_ci
3275bf215546Sopenharmony_ci   return result;
3276bf215546Sopenharmony_ci}
3277bf215546Sopenharmony_ci
3278bf215546Sopenharmony_cistatic VkResult
3279bf215546Sopenharmony_cicompute_pipeline_create(VkDevice _device,
3280bf215546Sopenharmony_ci                         VkPipelineCache _cache,
3281bf215546Sopenharmony_ci                         const VkComputePipelineCreateInfo *pCreateInfo,
3282bf215546Sopenharmony_ci                         const VkAllocationCallbacks *pAllocator,
3283bf215546Sopenharmony_ci                         VkPipeline *pPipeline)
3284bf215546Sopenharmony_ci{
3285bf215546Sopenharmony_ci   V3DV_FROM_HANDLE(v3dv_device, device, _device);
3286bf215546Sopenharmony_ci   V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
3287bf215546Sopenharmony_ci
3288bf215546Sopenharmony_ci   struct v3dv_pipeline *pipeline;
3289bf215546Sopenharmony_ci   VkResult result;
3290bf215546Sopenharmony_ci
3291bf215546Sopenharmony_ci   /* Use the default pipeline cache if none is specified */
3292bf215546Sopenharmony_ci   if (cache == NULL && device->instance->default_pipeline_cache_enabled)
3293bf215546Sopenharmony_ci      cache = &device->default_pipeline_cache;
3294bf215546Sopenharmony_ci
3295bf215546Sopenharmony_ci   pipeline = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pipeline),
3296bf215546Sopenharmony_ci                               VK_OBJECT_TYPE_PIPELINE);
3297bf215546Sopenharmony_ci   if (pipeline == NULL)
3298bf215546Sopenharmony_ci      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3299bf215546Sopenharmony_ci
3300bf215546Sopenharmony_ci   result = compute_pipeline_init(pipeline, device, cache,
3301bf215546Sopenharmony_ci                                  pCreateInfo, pAllocator);
3302bf215546Sopenharmony_ci   if (result != VK_SUCCESS) {
3303bf215546Sopenharmony_ci      v3dv_destroy_pipeline(pipeline, device, pAllocator);
3304bf215546Sopenharmony_ci      if (result == VK_PIPELINE_COMPILE_REQUIRED)
3305bf215546Sopenharmony_ci         *pPipeline = VK_NULL_HANDLE;
3306bf215546Sopenharmony_ci      return result;
3307bf215546Sopenharmony_ci   }
3308bf215546Sopenharmony_ci
3309bf215546Sopenharmony_ci   *pPipeline = v3dv_pipeline_to_handle(pipeline);
3310bf215546Sopenharmony_ci
3311bf215546Sopenharmony_ci   return VK_SUCCESS;
3312bf215546Sopenharmony_ci}
3313bf215546Sopenharmony_ci
3314bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL
3315bf215546Sopenharmony_civ3dv_CreateComputePipelines(VkDevice _device,
3316bf215546Sopenharmony_ci                            VkPipelineCache pipelineCache,
3317bf215546Sopenharmony_ci                            uint32_t createInfoCount,
3318bf215546Sopenharmony_ci                            const VkComputePipelineCreateInfo *pCreateInfos,
3319bf215546Sopenharmony_ci                            const VkAllocationCallbacks *pAllocator,
3320bf215546Sopenharmony_ci                            VkPipeline *pPipelines)
3321bf215546Sopenharmony_ci{
3322bf215546Sopenharmony_ci   V3DV_FROM_HANDLE(v3dv_device, device, _device);
3323bf215546Sopenharmony_ci   VkResult result = VK_SUCCESS;
3324bf215546Sopenharmony_ci
3325bf215546Sopenharmony_ci   if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))
3326bf215546Sopenharmony_ci      mtx_lock(&device->pdevice->mutex);
3327bf215546Sopenharmony_ci
3328bf215546Sopenharmony_ci   uint32_t i = 0;
3329bf215546Sopenharmony_ci   for (; i < createInfoCount; i++) {
3330bf215546Sopenharmony_ci      VkResult local_result;
3331bf215546Sopenharmony_ci      local_result = compute_pipeline_create(_device,
3332bf215546Sopenharmony_ci                                              pipelineCache,
3333bf215546Sopenharmony_ci                                              &pCreateInfos[i],
3334bf215546Sopenharmony_ci                                              pAllocator,
3335bf215546Sopenharmony_ci                                              &pPipelines[i]);
3336bf215546Sopenharmony_ci
3337bf215546Sopenharmony_ci      if (local_result != VK_SUCCESS) {
3338bf215546Sopenharmony_ci         result = local_result;
3339bf215546Sopenharmony_ci         pPipelines[i] = VK_NULL_HANDLE;
3340bf215546Sopenharmony_ci
3341bf215546Sopenharmony_ci         if (pCreateInfos[i].flags &
3342bf215546Sopenharmony_ci             VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
3343bf215546Sopenharmony_ci            break;
3344bf215546Sopenharmony_ci      }
3345bf215546Sopenharmony_ci   }
3346bf215546Sopenharmony_ci
3347bf215546Sopenharmony_ci   for (; i < createInfoCount; i++)
3348bf215546Sopenharmony_ci      pPipelines[i] = VK_NULL_HANDLE;
3349bf215546Sopenharmony_ci
3350bf215546Sopenharmony_ci   if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))
3351bf215546Sopenharmony_ci      mtx_unlock(&device->pdevice->mutex);
3352bf215546Sopenharmony_ci
3353bf215546Sopenharmony_ci   return result;
3354bf215546Sopenharmony_ci}
3355bf215546Sopenharmony_ci
3356bf215546Sopenharmony_cistatic nir_shader *
3357bf215546Sopenharmony_cipipeline_get_nir(struct v3dv_pipeline *pipeline,
3358bf215546Sopenharmony_ci                 enum broadcom_shader_stage stage)
3359bf215546Sopenharmony_ci{
3360bf215546Sopenharmony_ci   switch (stage) {
3361bf215546Sopenharmony_ci   case BROADCOM_SHADER_VERTEX:
3362bf215546Sopenharmony_ci      if (pipeline->vs)
3363bf215546Sopenharmony_ci         return pipeline->vs->nir;
3364bf215546Sopenharmony_ci      break;
3365bf215546Sopenharmony_ci   case BROADCOM_SHADER_VERTEX_BIN:
3366bf215546Sopenharmony_ci      if(pipeline->vs_bin)
3367bf215546Sopenharmony_ci         return pipeline->vs_bin->nir;
3368bf215546Sopenharmony_ci      break;
3369bf215546Sopenharmony_ci   case BROADCOM_SHADER_GEOMETRY:
3370bf215546Sopenharmony_ci      if(pipeline->gs)
3371bf215546Sopenharmony_ci         return pipeline->gs->nir;
3372bf215546Sopenharmony_ci      break;
3373bf215546Sopenharmony_ci   case BROADCOM_SHADER_GEOMETRY_BIN:
3374bf215546Sopenharmony_ci      if (pipeline->gs_bin)
3375bf215546Sopenharmony_ci         return pipeline->gs_bin->nir;
3376bf215546Sopenharmony_ci      break;
3377bf215546Sopenharmony_ci   case BROADCOM_SHADER_FRAGMENT:
3378bf215546Sopenharmony_ci      if (pipeline->fs)
3379bf215546Sopenharmony_ci         return pipeline->fs->nir;
3380bf215546Sopenharmony_ci      break;
3381bf215546Sopenharmony_ci   case BROADCOM_SHADER_COMPUTE:
3382bf215546Sopenharmony_ci      if(pipeline->cs)
3383bf215546Sopenharmony_ci         return pipeline->cs->nir;
3384bf215546Sopenharmony_ci      break;
3385bf215546Sopenharmony_ci   default:
3386bf215546Sopenharmony_ci      unreachable("Unsupported shader stage");
3387bf215546Sopenharmony_ci   }
3388bf215546Sopenharmony_ci
3389bf215546Sopenharmony_ci   return NULL;
3390bf215546Sopenharmony_ci}
3391bf215546Sopenharmony_ci
3392bf215546Sopenharmony_cistatic struct v3d_prog_data *
3393bf215546Sopenharmony_cipipeline_get_prog_data(struct v3dv_pipeline *pipeline,
3394bf215546Sopenharmony_ci                       enum broadcom_shader_stage stage)
3395bf215546Sopenharmony_ci{
3396bf215546Sopenharmony_ci   if (pipeline->shared_data->variants[stage])
3397bf215546Sopenharmony_ci      return pipeline->shared_data->variants[stage]->prog_data.base;
3398bf215546Sopenharmony_ci   return NULL;
3399bf215546Sopenharmony_ci}
3400bf215546Sopenharmony_ci
3401bf215546Sopenharmony_cistatic uint64_t *
3402bf215546Sopenharmony_cipipeline_get_qpu(struct v3dv_pipeline *pipeline,
3403bf215546Sopenharmony_ci                 enum broadcom_shader_stage stage,
3404bf215546Sopenharmony_ci                 uint32_t *qpu_size)
3405bf215546Sopenharmony_ci{
3406bf215546Sopenharmony_ci   struct v3dv_shader_variant *variant =
3407bf215546Sopenharmony_ci      pipeline->shared_data->variants[stage];
3408bf215546Sopenharmony_ci   if (!variant) {
3409bf215546Sopenharmony_ci      *qpu_size = 0;
3410bf215546Sopenharmony_ci      return NULL;
3411bf215546Sopenharmony_ci   }
3412bf215546Sopenharmony_ci
3413bf215546Sopenharmony_ci   /* We expect the QPU BO to have been mapped before calling here */
3414bf215546Sopenharmony_ci   struct v3dv_bo *qpu_bo = pipeline->shared_data->assembly_bo;
3415bf215546Sopenharmony_ci   assert(qpu_bo && qpu_bo->map_size >= variant->assembly_offset +
3416bf215546Sopenharmony_ci                                        variant->qpu_insts_size);
3417bf215546Sopenharmony_ci
3418bf215546Sopenharmony_ci   *qpu_size = variant->qpu_insts_size;
3419bf215546Sopenharmony_ci   uint64_t *qpu = (uint64_t *)
3420bf215546Sopenharmony_ci      (((uint8_t *) qpu_bo->map) + variant->assembly_offset);
3421bf215546Sopenharmony_ci   return qpu;
3422bf215546Sopenharmony_ci}
3423bf215546Sopenharmony_ci
3424bf215546Sopenharmony_ci/* FIXME: we use the same macro in various drivers, maybe move it to
3425bf215546Sopenharmony_ci * the comon vk_util.h?
3426bf215546Sopenharmony_ci */
3427bf215546Sopenharmony_ci#define WRITE_STR(field, ...) ({                                \
3428bf215546Sopenharmony_ci   memset(field, 0, sizeof(field));                             \
3429bf215546Sopenharmony_ci   UNUSED int _i = snprintf(field, sizeof(field), __VA_ARGS__); \
3430bf215546Sopenharmony_ci   assert(_i > 0 && _i < sizeof(field));                        \
3431bf215546Sopenharmony_ci})
3432bf215546Sopenharmony_ci
3433bf215546Sopenharmony_cistatic bool
3434bf215546Sopenharmony_ciwrite_ir_text(VkPipelineExecutableInternalRepresentationKHR* ir,
3435bf215546Sopenharmony_ci              const char *data)
3436bf215546Sopenharmony_ci{
3437bf215546Sopenharmony_ci   ir->isText = VK_TRUE;
3438bf215546Sopenharmony_ci
3439bf215546Sopenharmony_ci   size_t data_len = strlen(data) + 1;
3440bf215546Sopenharmony_ci
3441bf215546Sopenharmony_ci   if (ir->pData == NULL) {
3442bf215546Sopenharmony_ci      ir->dataSize = data_len;
3443bf215546Sopenharmony_ci      return true;
3444bf215546Sopenharmony_ci   }
3445bf215546Sopenharmony_ci
3446bf215546Sopenharmony_ci   strncpy(ir->pData, data, ir->dataSize);
3447bf215546Sopenharmony_ci   if (ir->dataSize < data_len)
3448bf215546Sopenharmony_ci      return false;
3449bf215546Sopenharmony_ci
3450bf215546Sopenharmony_ci   ir->dataSize = data_len;
3451bf215546Sopenharmony_ci   return true;
3452bf215546Sopenharmony_ci}
3453bf215546Sopenharmony_ci
3454bf215546Sopenharmony_cistatic void
3455bf215546Sopenharmony_ciappend(char **str, size_t *offset, const char *fmt, ...)
3456bf215546Sopenharmony_ci{
3457bf215546Sopenharmony_ci   va_list args;
3458bf215546Sopenharmony_ci   va_start(args, fmt);
3459bf215546Sopenharmony_ci   ralloc_vasprintf_rewrite_tail(str, offset, fmt, args);
3460bf215546Sopenharmony_ci   va_end(args);
3461bf215546Sopenharmony_ci}
3462bf215546Sopenharmony_ci
3463bf215546Sopenharmony_cistatic void
3464bf215546Sopenharmony_cipipeline_collect_executable_data(struct v3dv_pipeline *pipeline)
3465bf215546Sopenharmony_ci{
3466bf215546Sopenharmony_ci   if (pipeline->executables.mem_ctx)
3467bf215546Sopenharmony_ci      return;
3468bf215546Sopenharmony_ci
3469bf215546Sopenharmony_ci   pipeline->executables.mem_ctx = ralloc_context(NULL);
3470bf215546Sopenharmony_ci   util_dynarray_init(&pipeline->executables.data,
3471bf215546Sopenharmony_ci                      pipeline->executables.mem_ctx);
3472bf215546Sopenharmony_ci
3473bf215546Sopenharmony_ci   /* Don't crash for failed/bogus pipelines */
3474bf215546Sopenharmony_ci   if (!pipeline->shared_data || !pipeline->shared_data->assembly_bo)
3475bf215546Sopenharmony_ci      return;
3476bf215546Sopenharmony_ci
3477bf215546Sopenharmony_ci   /* Map the assembly BO so we can read the pipeline's QPU code */
3478bf215546Sopenharmony_ci   struct v3dv_bo *qpu_bo = pipeline->shared_data->assembly_bo;
3479bf215546Sopenharmony_ci
3480bf215546Sopenharmony_ci   if (!v3dv_bo_map(pipeline->device, qpu_bo, qpu_bo->size)) {
3481bf215546Sopenharmony_ci      fprintf(stderr, "failed to map QPU buffer\n");
3482bf215546Sopenharmony_ci      return;
3483bf215546Sopenharmony_ci   }
3484bf215546Sopenharmony_ci
3485bf215546Sopenharmony_ci   for (int s = BROADCOM_SHADER_VERTEX; s <= BROADCOM_SHADER_COMPUTE; s++) {
3486bf215546Sopenharmony_ci      VkShaderStageFlags vk_stage =
3487bf215546Sopenharmony_ci         mesa_to_vk_shader_stage(broadcom_shader_stage_to_gl(s));
3488bf215546Sopenharmony_ci      if (!(vk_stage & pipeline->active_stages))
3489bf215546Sopenharmony_ci         continue;
3490bf215546Sopenharmony_ci
3491bf215546Sopenharmony_ci      nir_shader *nir = pipeline_get_nir(pipeline, s);
3492bf215546Sopenharmony_ci      char *nir_str = nir ?
3493bf215546Sopenharmony_ci         nir_shader_as_str(nir, pipeline->executables.mem_ctx) : NULL;
3494bf215546Sopenharmony_ci
3495bf215546Sopenharmony_ci      char *qpu_str = NULL;
3496bf215546Sopenharmony_ci      uint32_t qpu_size;
3497bf215546Sopenharmony_ci      uint64_t *qpu = pipeline_get_qpu(pipeline, s, &qpu_size);
3498bf215546Sopenharmony_ci      if (qpu) {
3499bf215546Sopenharmony_ci         uint32_t qpu_inst_count = qpu_size / sizeof(uint64_t);
3500bf215546Sopenharmony_ci         qpu_str = rzalloc_size(pipeline->executables.mem_ctx,
3501bf215546Sopenharmony_ci                                qpu_inst_count * 96);
3502bf215546Sopenharmony_ci         size_t offset = 0;
3503bf215546Sopenharmony_ci         for (int i = 0; i < qpu_inst_count; i++) {
3504bf215546Sopenharmony_ci            const char *str = v3d_qpu_disasm(&pipeline->device->devinfo, qpu[i]);
3505bf215546Sopenharmony_ci            append(&qpu_str, &offset, "%s\n", str);
3506bf215546Sopenharmony_ci            ralloc_free((void *)str);
3507bf215546Sopenharmony_ci         }
3508bf215546Sopenharmony_ci      }
3509bf215546Sopenharmony_ci
3510bf215546Sopenharmony_ci      struct v3dv_pipeline_executable_data data = {
3511bf215546Sopenharmony_ci         .stage = s,
3512bf215546Sopenharmony_ci         .nir_str = nir_str,
3513bf215546Sopenharmony_ci         .qpu_str = qpu_str,
3514bf215546Sopenharmony_ci      };
3515bf215546Sopenharmony_ci      util_dynarray_append(&pipeline->executables.data,
3516bf215546Sopenharmony_ci                           struct v3dv_pipeline_executable_data, data);
3517bf215546Sopenharmony_ci   }
3518bf215546Sopenharmony_ci
3519bf215546Sopenharmony_ci   v3dv_bo_unmap(pipeline->device, qpu_bo);
3520bf215546Sopenharmony_ci}
3521bf215546Sopenharmony_ci
3522bf215546Sopenharmony_cistatic const struct v3dv_pipeline_executable_data *
3523bf215546Sopenharmony_cipipeline_get_executable(struct v3dv_pipeline *pipeline, uint32_t index)
3524bf215546Sopenharmony_ci{
3525bf215546Sopenharmony_ci   assert(index < util_dynarray_num_elements(&pipeline->executables.data,
3526bf215546Sopenharmony_ci                                             struct v3dv_pipeline_executable_data));
3527bf215546Sopenharmony_ci   return util_dynarray_element(&pipeline->executables.data,
3528bf215546Sopenharmony_ci                                struct v3dv_pipeline_executable_data,
3529bf215546Sopenharmony_ci                                index);
3530bf215546Sopenharmony_ci}
3531bf215546Sopenharmony_ci
3532bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL
3533bf215546Sopenharmony_civ3dv_GetPipelineExecutableInternalRepresentationsKHR(
3534bf215546Sopenharmony_ci   VkDevice device,
3535bf215546Sopenharmony_ci   const VkPipelineExecutableInfoKHR *pExecutableInfo,
3536bf215546Sopenharmony_ci   uint32_t *pInternalRepresentationCount,
3537bf215546Sopenharmony_ci   VkPipelineExecutableInternalRepresentationKHR *pInternalRepresentations)
3538bf215546Sopenharmony_ci{
3539bf215546Sopenharmony_ci   V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, pExecutableInfo->pipeline);
3540bf215546Sopenharmony_ci
3541bf215546Sopenharmony_ci   pipeline_collect_executable_data(pipeline);
3542bf215546Sopenharmony_ci
3543bf215546Sopenharmony_ci   VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableInternalRepresentationKHR, out,
3544bf215546Sopenharmony_ci                          pInternalRepresentations, pInternalRepresentationCount);
3545bf215546Sopenharmony_ci
3546bf215546Sopenharmony_ci   bool incomplete = false;
3547bf215546Sopenharmony_ci   const struct v3dv_pipeline_executable_data *exe =
3548bf215546Sopenharmony_ci      pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
3549bf215546Sopenharmony_ci
3550bf215546Sopenharmony_ci   if (exe->nir_str) {
3551bf215546Sopenharmony_ci      vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR,
3552bf215546Sopenharmony_ci                               &out, ir) {
3553bf215546Sopenharmony_ci         WRITE_STR(ir->name, "NIR (%s)", broadcom_shader_stage_name(exe->stage));
3554bf215546Sopenharmony_ci         WRITE_STR(ir->description, "Final NIR form");
3555bf215546Sopenharmony_ci         if (!write_ir_text(ir, exe->nir_str))
3556bf215546Sopenharmony_ci            incomplete = true;
3557bf215546Sopenharmony_ci      }
3558bf215546Sopenharmony_ci   }
3559bf215546Sopenharmony_ci
3560bf215546Sopenharmony_ci   if (exe->qpu_str) {
3561bf215546Sopenharmony_ci      vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR,
3562bf215546Sopenharmony_ci                               &out, ir) {
3563bf215546Sopenharmony_ci         WRITE_STR(ir->name, "QPU (%s)", broadcom_shader_stage_name(exe->stage));
3564bf215546Sopenharmony_ci         WRITE_STR(ir->description, "Final QPU assembly");
3565bf215546Sopenharmony_ci         if (!write_ir_text(ir, exe->qpu_str))
3566bf215546Sopenharmony_ci            incomplete = true;
3567bf215546Sopenharmony_ci      }
3568bf215546Sopenharmony_ci   }
3569bf215546Sopenharmony_ci
3570bf215546Sopenharmony_ci   return incomplete ? VK_INCOMPLETE : vk_outarray_status(&out);
3571bf215546Sopenharmony_ci}
3572bf215546Sopenharmony_ci
3573bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL
3574bf215546Sopenharmony_civ3dv_GetPipelineExecutablePropertiesKHR(
3575bf215546Sopenharmony_ci   VkDevice device,
3576bf215546Sopenharmony_ci   const VkPipelineInfoKHR *pPipelineInfo,
3577bf215546Sopenharmony_ci   uint32_t *pExecutableCount,
3578bf215546Sopenharmony_ci   VkPipelineExecutablePropertiesKHR *pProperties)
3579bf215546Sopenharmony_ci{
3580bf215546Sopenharmony_ci   V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, pPipelineInfo->pipeline);
3581bf215546Sopenharmony_ci
3582bf215546Sopenharmony_ci   pipeline_collect_executable_data(pipeline);
3583bf215546Sopenharmony_ci
3584bf215546Sopenharmony_ci   VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutablePropertiesKHR, out,
3585bf215546Sopenharmony_ci                          pProperties, pExecutableCount);
3586bf215546Sopenharmony_ci
3587bf215546Sopenharmony_ci   util_dynarray_foreach(&pipeline->executables.data,
3588bf215546Sopenharmony_ci                         struct v3dv_pipeline_executable_data, exe) {
3589bf215546Sopenharmony_ci      vk_outarray_append_typed(VkPipelineExecutablePropertiesKHR, &out, props) {
3590bf215546Sopenharmony_ci         gl_shader_stage mesa_stage = broadcom_shader_stage_to_gl(exe->stage);
3591bf215546Sopenharmony_ci         props->stages = mesa_to_vk_shader_stage(mesa_stage);
3592bf215546Sopenharmony_ci
3593bf215546Sopenharmony_ci         WRITE_STR(props->name, "%s (%s)",
3594bf215546Sopenharmony_ci                   _mesa_shader_stage_to_abbrev(mesa_stage),
3595bf215546Sopenharmony_ci                   broadcom_shader_stage_is_binning(exe->stage) ?
3596bf215546Sopenharmony_ci                     "Binning" : "Render");
3597bf215546Sopenharmony_ci
3598bf215546Sopenharmony_ci         WRITE_STR(props->description, "%s",
3599bf215546Sopenharmony_ci                   _mesa_shader_stage_to_string(mesa_stage));
3600bf215546Sopenharmony_ci
3601bf215546Sopenharmony_ci         props->subgroupSize = V3D_CHANNELS;
3602bf215546Sopenharmony_ci      }
3603bf215546Sopenharmony_ci   }
3604bf215546Sopenharmony_ci
3605bf215546Sopenharmony_ci   return vk_outarray_status(&out);
3606bf215546Sopenharmony_ci}
3607bf215546Sopenharmony_ci
3608bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL
3609bf215546Sopenharmony_civ3dv_GetPipelineExecutableStatisticsKHR(
3610bf215546Sopenharmony_ci   VkDevice device,
3611bf215546Sopenharmony_ci   const VkPipelineExecutableInfoKHR *pExecutableInfo,
3612bf215546Sopenharmony_ci   uint32_t *pStatisticCount,
3613bf215546Sopenharmony_ci   VkPipelineExecutableStatisticKHR *pStatistics)
3614bf215546Sopenharmony_ci{
3615bf215546Sopenharmony_ci   V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, pExecutableInfo->pipeline);
3616bf215546Sopenharmony_ci
3617bf215546Sopenharmony_ci   pipeline_collect_executable_data(pipeline);
3618bf215546Sopenharmony_ci
3619bf215546Sopenharmony_ci   const struct v3dv_pipeline_executable_data *exe =
3620bf215546Sopenharmony_ci      pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
3621bf215546Sopenharmony_ci
3622bf215546Sopenharmony_ci   struct v3d_prog_data *prog_data =
3623bf215546Sopenharmony_ci      pipeline_get_prog_data(pipeline, exe->stage);
3624bf215546Sopenharmony_ci
3625bf215546Sopenharmony_ci   struct v3dv_shader_variant *variant =
3626bf215546Sopenharmony_ci      pipeline->shared_data->variants[exe->stage];
3627bf215546Sopenharmony_ci   uint32_t qpu_inst_count = variant->qpu_insts_size / sizeof(uint64_t);
3628bf215546Sopenharmony_ci
3629bf215546Sopenharmony_ci   VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableStatisticKHR, out,
3630bf215546Sopenharmony_ci                          pStatistics, pStatisticCount);
3631bf215546Sopenharmony_ci
3632bf215546Sopenharmony_ci   if (qpu_inst_count > 0) {
3633bf215546Sopenharmony_ci      vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3634bf215546Sopenharmony_ci         WRITE_STR(stat->name, "Compile Strategy");
3635bf215546Sopenharmony_ci         WRITE_STR(stat->description, "Chosen compile strategy index");
3636bf215546Sopenharmony_ci         stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3637bf215546Sopenharmony_ci         stat->value.u64 = prog_data->compile_strategy_idx;
3638bf215546Sopenharmony_ci      }
3639bf215546Sopenharmony_ci
3640bf215546Sopenharmony_ci      vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3641bf215546Sopenharmony_ci         WRITE_STR(stat->name, "Instruction Count");
3642bf215546Sopenharmony_ci         WRITE_STR(stat->description, "Number of QPU instructions");
3643bf215546Sopenharmony_ci         stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3644bf215546Sopenharmony_ci         stat->value.u64 = qpu_inst_count;
3645bf215546Sopenharmony_ci      }
3646bf215546Sopenharmony_ci
3647bf215546Sopenharmony_ci      vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3648bf215546Sopenharmony_ci         WRITE_STR(stat->name, "Thread Count");
3649bf215546Sopenharmony_ci         WRITE_STR(stat->description, "Number of QPU threads dispatched");
3650bf215546Sopenharmony_ci         stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3651bf215546Sopenharmony_ci         stat->value.u64 = prog_data->threads;
3652bf215546Sopenharmony_ci      }
3653bf215546Sopenharmony_ci
3654bf215546Sopenharmony_ci      vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3655bf215546Sopenharmony_ci         WRITE_STR(stat->name, "Spill Size");
3656bf215546Sopenharmony_ci         WRITE_STR(stat->description, "Size of the spill buffer in bytes");
3657bf215546Sopenharmony_ci         stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3658bf215546Sopenharmony_ci         stat->value.u64 = prog_data->spill_size;
3659bf215546Sopenharmony_ci      }
3660bf215546Sopenharmony_ci
3661bf215546Sopenharmony_ci      vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3662bf215546Sopenharmony_ci         WRITE_STR(stat->name, "TMU Spills");
3663bf215546Sopenharmony_ci         WRITE_STR(stat->description, "Number of times a register was spilled "
3664bf215546Sopenharmony_ci                                      "to memory");
3665bf215546Sopenharmony_ci         stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3666bf215546Sopenharmony_ci         stat->value.u64 = prog_data->spill_size;
3667bf215546Sopenharmony_ci      }
3668bf215546Sopenharmony_ci
3669bf215546Sopenharmony_ci      vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3670bf215546Sopenharmony_ci         WRITE_STR(stat->name, "TMU Fills");
3671bf215546Sopenharmony_ci         WRITE_STR(stat->description, "Number of times a register was filled "
3672bf215546Sopenharmony_ci                                      "from memory");
3673bf215546Sopenharmony_ci         stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3674bf215546Sopenharmony_ci         stat->value.u64 = prog_data->spill_size;
3675bf215546Sopenharmony_ci      }
3676bf215546Sopenharmony_ci
3677bf215546Sopenharmony_ci      vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3678bf215546Sopenharmony_ci         WRITE_STR(stat->name, "QPU Read Stalls");
3679bf215546Sopenharmony_ci         WRITE_STR(stat->description, "Number of cycles the QPU stalls for a "
3680bf215546Sopenharmony_ci                                      "register read dependency");
3681bf215546Sopenharmony_ci         stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3682bf215546Sopenharmony_ci         stat->value.u64 = prog_data->qpu_read_stalls;
3683bf215546Sopenharmony_ci      }
3684bf215546Sopenharmony_ci   }
3685bf215546Sopenharmony_ci
3686bf215546Sopenharmony_ci   return vk_outarray_status(&out);
3687bf215546Sopenharmony_ci}
3688