1/*
2 * Copyright © 2019 Raspberry Pi Ltd
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "vk_util.h"
25
26#include "v3dv_debug.h"
27#include "v3dv_private.h"
28
29#include "common/v3d_debug.h"
30#include "qpu/qpu_disasm.h"
31
32#include "compiler/nir/nir_builder.h"
33#include "nir/nir_serialize.h"
34
35#include "util/u_atomic.h"
36#include "util/u_prim.h"
37#include "util/os_time.h"
38
39#include "vk_pipeline.h"
40#include "vulkan/util/vk_format.h"
41
42static VkResult
43compute_vpm_config(struct v3dv_pipeline *pipeline);
44
45void
46v3dv_print_v3d_key(struct v3d_key *key,
47                   uint32_t v3d_key_size)
48{
49   struct mesa_sha1 ctx;
50   unsigned char sha1[20];
51   char sha1buf[41];
52
53   _mesa_sha1_init(&ctx);
54
55   _mesa_sha1_update(&ctx, key, v3d_key_size);
56
57   _mesa_sha1_final(&ctx, sha1);
58   _mesa_sha1_format(sha1buf, sha1);
59
60   fprintf(stderr, "key %p: %s\n", key, sha1buf);
61}
62
63static void
64pipeline_compute_sha1_from_nir(struct v3dv_pipeline_stage *p_stage)
65{
66   VkPipelineShaderStageCreateInfo info = {
67      .module = vk_shader_module_handle_from_nir(p_stage->nir),
68      .pName = p_stage->entrypoint,
69      .stage = mesa_to_vk_shader_stage(p_stage->nir->info.stage),
70   };
71
72   vk_pipeline_hash_shader_stage(&info, p_stage->shader_sha1);
73}
74
75void
76v3dv_shader_variant_destroy(struct v3dv_device *device,
77                            struct v3dv_shader_variant *variant)
78{
79   /* The assembly BO is shared by all variants in the pipeline, so it can't
80    * be freed here and should be freed with the pipeline
81    */
82   if (variant->qpu_insts)
83      free(variant->qpu_insts);
84   ralloc_free(variant->prog_data.base);
85   vk_free(&device->vk.alloc, variant);
86}
87
88static void
89destroy_pipeline_stage(struct v3dv_device *device,
90                       struct v3dv_pipeline_stage *p_stage,
91                       const VkAllocationCallbacks *pAllocator)
92{
93   if (!p_stage)
94      return;
95
96   ralloc_free(p_stage->nir);
97   vk_free2(&device->vk.alloc, pAllocator, p_stage);
98}
99
100static void
101pipeline_free_stages(struct v3dv_device *device,
102                     struct v3dv_pipeline *pipeline,
103                     const VkAllocationCallbacks *pAllocator)
104{
105   assert(pipeline);
106
107   /* FIXME: we can't just use a loop over mesa stage due the bin, would be
108    * good to find an alternative.
109    */
110   destroy_pipeline_stage(device, pipeline->vs, pAllocator);
111   destroy_pipeline_stage(device, pipeline->vs_bin, pAllocator);
112   destroy_pipeline_stage(device, pipeline->gs, pAllocator);
113   destroy_pipeline_stage(device, pipeline->gs_bin, pAllocator);
114   destroy_pipeline_stage(device, pipeline->fs, pAllocator);
115   destroy_pipeline_stage(device, pipeline->cs, pAllocator);
116
117   pipeline->vs = NULL;
118   pipeline->vs_bin = NULL;
119   pipeline->gs = NULL;
120   pipeline->gs_bin = NULL;
121   pipeline->fs = NULL;
122   pipeline->cs = NULL;
123}
124
125static void
126v3dv_destroy_pipeline(struct v3dv_pipeline *pipeline,
127                      struct v3dv_device *device,
128                      const VkAllocationCallbacks *pAllocator)
129{
130   if (!pipeline)
131      return;
132
133   pipeline_free_stages(device, pipeline, pAllocator);
134
135   if (pipeline->shared_data) {
136      v3dv_pipeline_shared_data_unref(device, pipeline->shared_data);
137      pipeline->shared_data = NULL;
138   }
139
140   if (pipeline->spill.bo) {
141      assert(pipeline->spill.size_per_thread > 0);
142      v3dv_bo_free(device, pipeline->spill.bo);
143   }
144
145   if (pipeline->default_attribute_values) {
146      v3dv_bo_free(device, pipeline->default_attribute_values);
147      pipeline->default_attribute_values = NULL;
148   }
149
150   if (pipeline->executables.mem_ctx)
151      ralloc_free(pipeline->executables.mem_ctx);
152
153   vk_object_free(&device->vk, pAllocator, pipeline);
154}
155
156VKAPI_ATTR void VKAPI_CALL
157v3dv_DestroyPipeline(VkDevice _device,
158                     VkPipeline _pipeline,
159                     const VkAllocationCallbacks *pAllocator)
160{
161   V3DV_FROM_HANDLE(v3dv_device, device, _device);
162   V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, _pipeline);
163
164   if (!pipeline)
165      return;
166
167   v3dv_destroy_pipeline(pipeline, device, pAllocator);
168}
169
170static const struct spirv_to_nir_options default_spirv_options =  {
171   .caps = {
172      .device_group = true,
173      .float_controls = true,
174      .multiview = true,
175      .storage_8bit = true,
176      .storage_16bit = true,
177      .subgroup_basic = true,
178      .variable_pointers = true,
179      .vk_memory_model = true,
180      .vk_memory_model_device_scope = true,
181      .physical_storage_buffer_address = true,
182    },
183   .ubo_addr_format = nir_address_format_32bit_index_offset,
184   .ssbo_addr_format = nir_address_format_32bit_index_offset,
185   .phys_ssbo_addr_format = nir_address_format_2x32bit_global,
186   .push_const_addr_format = nir_address_format_logical,
187   .shared_addr_format = nir_address_format_32bit_offset,
188};
189
190const nir_shader_compiler_options v3dv_nir_options = {
191   .lower_uadd_sat = true,
192   .lower_usub_sat = true,
193   .lower_iadd_sat = true,
194   .lower_all_io_to_temps = true,
195   .lower_extract_byte = true,
196   .lower_extract_word = true,
197   .lower_insert_byte = true,
198   .lower_insert_word = true,
199   .lower_bitfield_insert_to_shifts = true,
200   .lower_bitfield_extract_to_shifts = true,
201   .lower_bitfield_reverse = true,
202   .lower_bit_count = true,
203   .lower_cs_local_id_to_index = true,
204   .lower_ffract = true,
205   .lower_fmod = true,
206   .lower_pack_unorm_2x16 = true,
207   .lower_pack_snorm_2x16 = true,
208   .lower_unpack_unorm_2x16 = true,
209   .lower_unpack_snorm_2x16 = true,
210   .lower_pack_unorm_4x8 = true,
211   .lower_pack_snorm_4x8 = true,
212   .lower_unpack_unorm_4x8 = true,
213   .lower_unpack_snorm_4x8 = true,
214   .lower_pack_half_2x16 = true,
215   .lower_unpack_half_2x16 = true,
216   .lower_pack_32_2x16 = true,
217   .lower_pack_32_2x16_split = true,
218   .lower_unpack_32_2x16_split = true,
219   .lower_mul_2x32_64 = true,
220   .lower_fdiv = true,
221   .lower_find_lsb = true,
222   .lower_ffma16 = true,
223   .lower_ffma32 = true,
224   .lower_ffma64 = true,
225   .lower_flrp32 = true,
226   .lower_fpow = true,
227   .lower_fsat = true,
228   .lower_fsqrt = true,
229   .lower_ifind_msb = true,
230   .lower_isign = true,
231   .lower_ldexp = true,
232   .lower_mul_high = true,
233   .lower_wpos_pntc = true,
234   .lower_rotate = true,
235   .lower_to_scalar = true,
236   .lower_device_index_to_zero = true,
237   .has_fsub = true,
238   .has_isub = true,
239   .vertex_id_zero_based = false, /* FIXME: to set this to true, the intrinsic
240                                   * needs to be supported */
241   .lower_interpolate_at = true,
242   .max_unroll_iterations = 16,
243   .force_indirect_unrolling = (nir_var_shader_in | nir_var_function_temp),
244   .divergence_analysis_options =
245      nir_divergence_multiple_workgroup_per_compute_subgroup
246};
247
248const nir_shader_compiler_options *
249v3dv_pipeline_get_nir_options(void)
250{
251   return &v3dv_nir_options;
252}
253
254#define OPT(pass, ...) ({                                  \
255   bool this_progress = false;                             \
256   NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__);      \
257   if (this_progress)                                      \
258      progress = true;                                     \
259   this_progress;                                          \
260})
261
262static void
263nir_optimize(nir_shader *nir, bool allow_copies)
264{
265   bool progress;
266
267   do {
268      progress = false;
269      OPT(nir_split_array_vars, nir_var_function_temp);
270      OPT(nir_shrink_vec_array_vars, nir_var_function_temp);
271      OPT(nir_opt_deref);
272      OPT(nir_lower_vars_to_ssa);
273      if (allow_copies) {
274         /* Only run this pass in the first call to nir_optimize.  Later calls
275          * assume that we've lowered away any copy_deref instructions and we
276          * don't want to introduce any more.
277          */
278         OPT(nir_opt_find_array_copies);
279      }
280
281      OPT(nir_remove_dead_variables,
282          (nir_variable_mode)(nir_var_function_temp |
283                              nir_var_shader_temp |
284                              nir_var_mem_shared),
285          NULL);
286
287      OPT(nir_opt_copy_prop_vars);
288      OPT(nir_opt_dead_write_vars);
289      OPT(nir_opt_combine_stores, nir_var_all);
290
291      OPT(nir_lower_alu_to_scalar, NULL, NULL);
292
293      OPT(nir_copy_prop);
294      OPT(nir_lower_phis_to_scalar, false);
295
296      OPT(nir_copy_prop);
297      OPT(nir_opt_dce);
298      OPT(nir_opt_cse);
299      OPT(nir_opt_combine_stores, nir_var_all);
300
301      /* Passing 0 to the peephole select pass causes it to convert
302       * if-statements that contain only move instructions in the branches
303       * regardless of the count.
304       *
305       * Passing 1 to the peephole select pass causes it to convert
306       * if-statements that contain at most a single ALU instruction (total)
307       * in both branches.
308       */
309      OPT(nir_opt_peephole_select, 0, false, false);
310      OPT(nir_opt_peephole_select, 8, false, true);
311
312      OPT(nir_opt_intrinsics);
313      OPT(nir_opt_idiv_const, 32);
314      OPT(nir_opt_algebraic);
315      OPT(nir_lower_alu);
316      OPT(nir_opt_constant_folding);
317
318      OPT(nir_opt_dead_cf);
319      if (nir_opt_trivial_continues(nir)) {
320         progress = true;
321         OPT(nir_copy_prop);
322         OPT(nir_opt_dce);
323      }
324      OPT(nir_opt_conditional_discard);
325
326      OPT(nir_opt_remove_phis);
327      OPT(nir_opt_gcm, false);
328      OPT(nir_opt_if, nir_opt_if_optimize_phi_true_false);
329      OPT(nir_opt_undef);
330      OPT(nir_lower_pack);
331
332      /* There are two optimizations that we don't do here, and we rely on the
333       * backend:
334       *
335       * nir_lower_flrp only needs to be called once, as nothing should
336       * rematerialize any flrps. As we are already calling it on the backend
337       * compiler, we don't call it again.
338       *
339       * nir_opt_loop_unroll: as the backend includes custom strategies in
340       * order to get the lowest spill/fills possible, and some of them
341       * include disable loop unrolling.
342       *
343       * FIXME: ideally we would like to just remove this method and
344       * v3d_optimize_nir. But:
345       *
346       *   * Using it leads to some regressions on Vulkan CTS tests, due to
347       *     some lowering use there
348       *   * We would need to move to the backend some additional
349       *     lowerings/optimizations that are used on the Vulkan
350       *     frontend. That would require to check that we are not getting any
351       *     regression or performance drop on OpenGL
352       *
353       * For now we would keep this Vulkan fronted nir_optimize
354       */
355
356   } while (progress);
357}
358
359static void
360preprocess_nir(nir_shader *nir)
361{
362   const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = {
363      .frag_coord = true,
364      .point_coord = true,
365   };
366   NIR_PASS(_, nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings);
367
368   /* Vulkan uses the separate-shader linking model */
369   nir->info.separate_shader = true;
370
371   /* Make sure we lower variable initializers on output variables so that
372    * nir_remove_dead_variables below sees the corresponding stores
373    */
374   NIR_PASS(_, nir, nir_lower_variable_initializers, nir_var_shader_out);
375
376   if (nir->info.stage == MESA_SHADER_FRAGMENT)
377      NIR_PASS(_, nir, nir_lower_io_to_vector, nir_var_shader_out);
378   if (nir->info.stage == MESA_SHADER_FRAGMENT) {
379      NIR_PASS(_, nir, nir_lower_input_attachments,
380                 &(nir_input_attachment_options) {
381                    .use_fragcoord_sysval = false,
382                       });
383   }
384
385   NIR_PASS_V(nir, nir_lower_io_to_temporaries,
386              nir_shader_get_entrypoint(nir), true, false);
387
388   NIR_PASS(_, nir, nir_lower_system_values);
389
390   NIR_PASS(_, nir, nir_lower_alu_to_scalar, NULL, NULL);
391
392   NIR_PASS(_, nir, nir_normalize_cubemap_coords);
393
394   NIR_PASS(_, nir, nir_lower_global_vars_to_local);
395
396   NIR_PASS(_, nir, nir_split_var_copies);
397   NIR_PASS(_, nir, nir_split_struct_vars, nir_var_function_temp);
398
399   nir_optimize(nir, true);
400
401   NIR_PASS(_, nir, nir_lower_explicit_io,
402            nir_var_mem_push_const,
403            nir_address_format_32bit_offset);
404
405   NIR_PASS(_, nir, nir_lower_explicit_io,
406            nir_var_mem_ubo | nir_var_mem_ssbo,
407            nir_address_format_32bit_index_offset);
408
409   NIR_PASS(_, nir, nir_lower_explicit_io,
410            nir_var_mem_global,
411            nir_address_format_2x32bit_global);
412
413   NIR_PASS(_, nir, nir_lower_load_const_to_scalar);
414
415   /* Lower a bunch of stuff */
416   NIR_PASS(_, nir, nir_lower_var_copies);
417
418   NIR_PASS(_, nir, nir_lower_indirect_derefs, nir_var_shader_in, UINT32_MAX);
419
420   NIR_PASS(_, nir, nir_lower_indirect_derefs,
421            nir_var_function_temp, 2);
422
423   NIR_PASS(_, nir, nir_lower_array_deref_of_vec,
424            nir_var_mem_ubo | nir_var_mem_ssbo,
425            nir_lower_direct_array_deref_of_vec_load);
426
427   NIR_PASS(_, nir, nir_lower_frexp);
428
429   /* Get rid of split copies */
430   nir_optimize(nir, false);
431}
432
433static nir_shader *
434shader_module_compile_to_nir(struct v3dv_device *device,
435                             struct v3dv_pipeline_stage *stage)
436{
437   nir_shader *nir;
438   const nir_shader_compiler_options *nir_options = &v3dv_nir_options;
439
440
441   if (unlikely(V3D_DEBUG & V3D_DEBUG_DUMP_SPIRV) && stage->module->nir == NULL)
442      v3dv_print_spirv(stage->module->data, stage->module->size, stderr);
443
444   /* vk_shader_module_to_nir also handles internal shaders, when module->nir
445    * != NULL. It also calls nir_validate_shader on both cases, so we don't
446    * call it again here.
447    */
448   VkResult result = vk_shader_module_to_nir(&device->vk, stage->module,
449                                             broadcom_shader_stage_to_gl(stage->stage),
450                                             stage->entrypoint,
451                                             stage->spec_info,
452                                             &default_spirv_options,
453                                             nir_options,
454                                             NULL, &nir);
455   if (result != VK_SUCCESS)
456      return NULL;
457   assert(nir->info.stage == broadcom_shader_stage_to_gl(stage->stage));
458
459   if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERDB) && stage->module->nir == NULL) {
460      char sha1buf[41];
461      _mesa_sha1_format(sha1buf, stage->pipeline->sha1);
462      nir->info.name = ralloc_strdup(nir, sha1buf);
463   }
464
465   if (unlikely(V3D_DEBUG & (V3D_DEBUG_NIR |
466                             v3d_debug_flag_for_shader_stage(
467                                broadcom_shader_stage_to_gl(stage->stage))))) {
468      fprintf(stderr, "NIR after vk_shader_module_to_nir: %s prog %d NIR:\n",
469              broadcom_shader_stage_name(stage->stage),
470              stage->program_id);
471      nir_print_shader(nir, stderr);
472      fprintf(stderr, "\n");
473   }
474
475   preprocess_nir(nir);
476
477   return nir;
478}
479
480static int
481type_size_vec4(const struct glsl_type *type, bool bindless)
482{
483   return glsl_count_attribute_slots(type, false);
484}
485
486/* FIXME: the number of parameters for this method is somewhat big. Perhaps
487 * rethink.
488 */
489static unsigned
490descriptor_map_add(struct v3dv_descriptor_map *map,
491                   int set,
492                   int binding,
493                   int array_index,
494                   int array_size,
495                   int start_index,
496                   uint8_t return_size)
497{
498   assert(array_index < array_size);
499   assert(return_size == 16 || return_size == 32);
500
501   unsigned index = start_index;
502   for (; index < map->num_desc; index++) {
503      if (map->used[index] &&
504          set == map->set[index] &&
505          binding == map->binding[index] &&
506          array_index == map->array_index[index]) {
507         assert(array_size == map->array_size[index]);
508         if (return_size != map->return_size[index]) {
509            /* It the return_size is different it means that the same sampler
510             * was used for operations with different precision
511             * requirement. In this case we need to ensure that we use the
512             * larger one.
513             */
514            map->return_size[index] = 32;
515         }
516         return index;
517      } else if (!map->used[index]) {
518         break;
519      }
520   }
521
522   assert(index < DESCRIPTOR_MAP_SIZE);
523   assert(!map->used[index]);
524
525   map->used[index] = true;
526   map->set[index] = set;
527   map->binding[index] = binding;
528   map->array_index[index] = array_index;
529   map->array_size[index] = array_size;
530   map->return_size[index] = return_size;
531   map->num_desc = MAX2(map->num_desc, index + 1);
532
533   return index;
534}
535
536struct lower_pipeline_layout_state {
537   struct v3dv_pipeline *pipeline;
538   const struct v3dv_pipeline_layout *layout;
539   bool needs_default_sampler_state;
540};
541
542
543static void
544lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *instr,
545                         struct lower_pipeline_layout_state *state)
546{
547   assert(instr->intrinsic == nir_intrinsic_load_push_constant);
548   instr->intrinsic = nir_intrinsic_load_uniform;
549}
550
551static struct v3dv_descriptor_map*
552pipeline_get_descriptor_map(struct v3dv_pipeline *pipeline,
553                            VkDescriptorType desc_type,
554                            gl_shader_stage gl_stage,
555                            bool is_sampler)
556{
557   enum broadcom_shader_stage broadcom_stage =
558      gl_shader_stage_to_broadcom(gl_stage);
559
560   assert(pipeline->shared_data &&
561          pipeline->shared_data->maps[broadcom_stage]);
562
563   switch(desc_type) {
564   case VK_DESCRIPTOR_TYPE_SAMPLER:
565      return &pipeline->shared_data->maps[broadcom_stage]->sampler_map;
566   case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
567   case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
568   case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
569   case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
570   case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
571      return &pipeline->shared_data->maps[broadcom_stage]->texture_map;
572   case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
573      return is_sampler ?
574         &pipeline->shared_data->maps[broadcom_stage]->sampler_map :
575         &pipeline->shared_data->maps[broadcom_stage]->texture_map;
576   case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
577   case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
578   case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK:
579      return &pipeline->shared_data->maps[broadcom_stage]->ubo_map;
580   case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
581   case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
582      return &pipeline->shared_data->maps[broadcom_stage]->ssbo_map;
583   default:
584      unreachable("Descriptor type unknown or not having a descriptor map");
585   }
586}
587
588/* Gathers info from the intrinsic (set and binding) and then lowers it so it
589 * could be used by the v3d_compiler */
590static void
591lower_vulkan_resource_index(nir_builder *b,
592                            nir_intrinsic_instr *instr,
593                            struct lower_pipeline_layout_state *state)
594{
595   assert(instr->intrinsic == nir_intrinsic_vulkan_resource_index);
596
597   nir_const_value *const_val = nir_src_as_const_value(instr->src[0]);
598
599   unsigned set = nir_intrinsic_desc_set(instr);
600   unsigned binding = nir_intrinsic_binding(instr);
601   struct v3dv_descriptor_set_layout *set_layout = state->layout->set[set].layout;
602   struct v3dv_descriptor_set_binding_layout *binding_layout =
603      &set_layout->binding[binding];
604   unsigned index = 0;
605
606   switch (binding_layout->type) {
607   case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
608   case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
609   case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
610   case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
611   case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: {
612      struct v3dv_descriptor_map *descriptor_map =
613         pipeline_get_descriptor_map(state->pipeline, binding_layout->type,
614                                     b->shader->info.stage, false);
615
616      if (!const_val)
617         unreachable("non-constant vulkan_resource_index array index");
618
619      /* At compile-time we will need to know if we are processing a UBO load
620       * for an inline or a regular UBO so we can handle inline loads like
621       * push constants. At the level of NIR level however, the inline
622       * information is gone, so we rely on the index to make this distinction.
623       * Particularly, we reserve indices 1..MAX_INLINE_UNIFORM_BUFFERS for
624       * inline buffers. This means that at the descriptor map level
625       * we store inline buffers at slots 0..MAX_INLINE_UNIFORM_BUFFERS - 1,
626       * and regular UBOs at indices starting from MAX_INLINE_UNIFORM_BUFFERS.
627       */
628      uint32_t start_index = 0;
629      if (binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
630          binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) {
631         start_index = MAX_INLINE_UNIFORM_BUFFERS;
632      }
633
634      index = descriptor_map_add(descriptor_map, set, binding,
635                                 const_val->u32,
636                                 binding_layout->array_size,
637                                 start_index,
638                                 32 /* return_size: doesn't really apply for this case */);
639
640      /* We always reserve index 0 for push constants */
641      if (binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
642          binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
643          binding_layout->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
644         index++;
645      }
646
647      break;
648   }
649
650   default:
651      unreachable("unsupported descriptor type for vulkan_resource_index");
652      break;
653   }
654
655   /* Since we use the deref pass, both vulkan_resource_index and
656    * vulkan_load_descriptor return a vec2 providing an index and
657    * offset. Our backend compiler only cares about the index part.
658    */
659   nir_ssa_def_rewrite_uses(&instr->dest.ssa,
660                            nir_imm_ivec2(b, index, 0));
661   nir_instr_remove(&instr->instr);
662}
663
664/* Returns return_size, so it could be used for the case of not having a
665 * sampler object
666 */
667static uint8_t
668lower_tex_src_to_offset(nir_builder *b,
669                        nir_tex_instr *instr,
670                        unsigned src_idx,
671                        struct lower_pipeline_layout_state *state)
672{
673   nir_ssa_def *index = NULL;
674   unsigned base_index = 0;
675   unsigned array_elements = 1;
676   nir_tex_src *src = &instr->src[src_idx];
677   bool is_sampler = src->src_type == nir_tex_src_sampler_deref;
678
679   /* We compute first the offsets */
680   nir_deref_instr *deref = nir_instr_as_deref(src->src.ssa->parent_instr);
681   while (deref->deref_type != nir_deref_type_var) {
682      assert(deref->parent.is_ssa);
683      nir_deref_instr *parent =
684         nir_instr_as_deref(deref->parent.ssa->parent_instr);
685
686      assert(deref->deref_type == nir_deref_type_array);
687
688      if (nir_src_is_const(deref->arr.index) && index == NULL) {
689         /* We're still building a direct index */
690         base_index += nir_src_as_uint(deref->arr.index) * array_elements;
691      } else {
692         if (index == NULL) {
693            /* We used to be direct but not anymore */
694            index = nir_imm_int(b, base_index);
695            base_index = 0;
696         }
697
698         index = nir_iadd(b, index,
699                          nir_imul(b, nir_imm_int(b, array_elements),
700                                   nir_ssa_for_src(b, deref->arr.index, 1)));
701      }
702
703      array_elements *= glsl_get_length(parent->type);
704
705      deref = parent;
706   }
707
708   if (index)
709      index = nir_umin(b, index, nir_imm_int(b, array_elements - 1));
710
711   /* We have the offsets, we apply them, rewriting the source or removing
712    * instr if needed
713    */
714   if (index) {
715      nir_instr_rewrite_src(&instr->instr, &src->src,
716                            nir_src_for_ssa(index));
717
718      src->src_type = is_sampler ?
719         nir_tex_src_sampler_offset :
720         nir_tex_src_texture_offset;
721   } else {
722      nir_tex_instr_remove_src(instr, src_idx);
723   }
724
725   uint32_t set = deref->var->data.descriptor_set;
726   uint32_t binding = deref->var->data.binding;
727   /* FIXME: this is a really simplified check for the precision to be used
728    * for the sampling. Right now we are ony checking for the variables used
729    * on the operation itself, but there are other cases that we could use to
730    * infer the precision requirement.
731    */
732   bool relaxed_precision = deref->var->data.precision == GLSL_PRECISION_MEDIUM ||
733                            deref->var->data.precision == GLSL_PRECISION_LOW;
734   struct v3dv_descriptor_set_layout *set_layout = state->layout->set[set].layout;
735   struct v3dv_descriptor_set_binding_layout *binding_layout =
736      &set_layout->binding[binding];
737
738   /* For input attachments, the shader includes the attachment_idx. As we are
739    * treating them as a texture, we only want the base_index
740    */
741   uint32_t array_index = binding_layout->type != VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT ?
742      deref->var->data.index + base_index :
743      base_index;
744
745   uint8_t return_size;
746   if (unlikely(V3D_DEBUG & V3D_DEBUG_TMU_16BIT))
747      return_size = 16;
748   else  if (unlikely(V3D_DEBUG & V3D_DEBUG_TMU_32BIT))
749      return_size = 32;
750   else
751      return_size = relaxed_precision || instr->is_shadow ? 16 : 32;
752
753   struct v3dv_descriptor_map *map =
754      pipeline_get_descriptor_map(state->pipeline, binding_layout->type,
755                                  b->shader->info.stage, is_sampler);
756   int desc_index =
757      descriptor_map_add(map,
758                         deref->var->data.descriptor_set,
759                         deref->var->data.binding,
760                         array_index,
761                         binding_layout->array_size,
762                         0,
763                         return_size);
764
765   if (is_sampler)
766      instr->sampler_index = desc_index;
767   else
768      instr->texture_index = desc_index;
769
770   return return_size;
771}
772
773static bool
774lower_sampler(nir_builder *b,
775              nir_tex_instr *instr,
776              struct lower_pipeline_layout_state *state)
777{
778   uint8_t return_size = 0;
779
780   int texture_idx =
781      nir_tex_instr_src_index(instr, nir_tex_src_texture_deref);
782
783   if (texture_idx >= 0)
784      return_size = lower_tex_src_to_offset(b, instr, texture_idx, state);
785
786   int sampler_idx =
787      nir_tex_instr_src_index(instr, nir_tex_src_sampler_deref);
788
789   if (sampler_idx >= 0)
790      lower_tex_src_to_offset(b, instr, sampler_idx, state);
791
792   if (texture_idx < 0 && sampler_idx < 0)
793      return false;
794
795   /* If we don't have a sampler, we assign it the idx we reserve for this
796    * case, and we ensure that it is using the correct return size.
797    */
798   if (sampler_idx < 0) {
799      state->needs_default_sampler_state = true;
800      instr->sampler_index = return_size == 16 ?
801         V3DV_NO_SAMPLER_16BIT_IDX : V3DV_NO_SAMPLER_32BIT_IDX;
802   }
803
804   return true;
805}
806
807/* FIXME: really similar to lower_tex_src_to_offset, perhaps refactor? */
808static void
809lower_image_deref(nir_builder *b,
810                  nir_intrinsic_instr *instr,
811                  struct lower_pipeline_layout_state *state)
812{
813   nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
814   nir_ssa_def *index = NULL;
815   unsigned array_elements = 1;
816   unsigned base_index = 0;
817
818   while (deref->deref_type != nir_deref_type_var) {
819      assert(deref->parent.is_ssa);
820      nir_deref_instr *parent =
821         nir_instr_as_deref(deref->parent.ssa->parent_instr);
822
823      assert(deref->deref_type == nir_deref_type_array);
824
825      if (nir_src_is_const(deref->arr.index) && index == NULL) {
826         /* We're still building a direct index */
827         base_index += nir_src_as_uint(deref->arr.index) * array_elements;
828      } else {
829         if (index == NULL) {
830            /* We used to be direct but not anymore */
831            index = nir_imm_int(b, base_index);
832            base_index = 0;
833         }
834
835         index = nir_iadd(b, index,
836                          nir_imul(b, nir_imm_int(b, array_elements),
837                                   nir_ssa_for_src(b, deref->arr.index, 1)));
838      }
839
840      array_elements *= glsl_get_length(parent->type);
841
842      deref = parent;
843   }
844
845   if (index)
846      index = nir_umin(b, index, nir_imm_int(b, array_elements - 1));
847
848   uint32_t set = deref->var->data.descriptor_set;
849   uint32_t binding = deref->var->data.binding;
850   struct v3dv_descriptor_set_layout *set_layout = state->layout->set[set].layout;
851   struct v3dv_descriptor_set_binding_layout *binding_layout =
852      &set_layout->binding[binding];
853
854   uint32_t array_index = deref->var->data.index + base_index;
855
856   assert(binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE ||
857          binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER);
858
859   struct v3dv_descriptor_map *map =
860      pipeline_get_descriptor_map(state->pipeline, binding_layout->type,
861                                  b->shader->info.stage, false);
862
863   int desc_index =
864      descriptor_map_add(map,
865                         deref->var->data.descriptor_set,
866                         deref->var->data.binding,
867                         array_index,
868                         binding_layout->array_size,
869                         0,
870                         32 /* return_size: doesn't apply for textures */);
871
872   /* Note: we don't need to do anything here in relation to the precision and
873    * the output size because for images we can infer that info from the image
874    * intrinsic, that includes the image format (see
875    * NIR_INTRINSIC_FORMAT). That is done by the v3d compiler.
876    */
877
878   index = nir_imm_int(b, desc_index);
879
880   nir_rewrite_image_intrinsic(instr, index, false);
881}
882
883static bool
884lower_intrinsic(nir_builder *b,
885                nir_intrinsic_instr *instr,
886                struct lower_pipeline_layout_state *state)
887{
888   switch (instr->intrinsic) {
889   case nir_intrinsic_load_push_constant:
890      lower_load_push_constant(b, instr, state);
891      return true;
892
893   case nir_intrinsic_vulkan_resource_index:
894      lower_vulkan_resource_index(b, instr, state);
895      return true;
896
897   case nir_intrinsic_load_vulkan_descriptor: {
898      /* Loading the descriptor happens as part of load/store instructions,
899       * so for us this is a no-op.
900       */
901      nir_ssa_def_rewrite_uses(&instr->dest.ssa, instr->src[0].ssa);
902      nir_instr_remove(&instr->instr);
903      return true;
904   }
905
906   case nir_intrinsic_image_deref_load:
907   case nir_intrinsic_image_deref_store:
908   case nir_intrinsic_image_deref_atomic_add:
909   case nir_intrinsic_image_deref_atomic_imin:
910   case nir_intrinsic_image_deref_atomic_umin:
911   case nir_intrinsic_image_deref_atomic_imax:
912   case nir_intrinsic_image_deref_atomic_umax:
913   case nir_intrinsic_image_deref_atomic_and:
914   case nir_intrinsic_image_deref_atomic_or:
915   case nir_intrinsic_image_deref_atomic_xor:
916   case nir_intrinsic_image_deref_atomic_exchange:
917   case nir_intrinsic_image_deref_atomic_comp_swap:
918   case nir_intrinsic_image_deref_size:
919   case nir_intrinsic_image_deref_samples:
920      lower_image_deref(b, instr, state);
921      return true;
922
923   default:
924      return false;
925   }
926}
927
928static bool
929lower_pipeline_layout_cb(nir_builder *b,
930                         nir_instr *instr,
931                         void *_state)
932{
933   bool progress = false;
934   struct lower_pipeline_layout_state *state = _state;
935
936   b->cursor = nir_before_instr(instr);
937   switch (instr->type) {
938   case nir_instr_type_tex:
939      progress |= lower_sampler(b, nir_instr_as_tex(instr), state);
940      break;
941   case nir_instr_type_intrinsic:
942      progress |= lower_intrinsic(b, nir_instr_as_intrinsic(instr), state);
943      break;
944   default:
945      break;
946   }
947
948   return progress;
949}
950
951static bool
952lower_pipeline_layout_info(nir_shader *shader,
953                           struct v3dv_pipeline *pipeline,
954                           const struct v3dv_pipeline_layout *layout,
955                           bool *needs_default_sampler_state)
956{
957   bool progress = false;
958
959   struct lower_pipeline_layout_state state = {
960      .pipeline = pipeline,
961      .layout = layout,
962      .needs_default_sampler_state = false,
963   };
964
965   progress = nir_shader_instructions_pass(shader, lower_pipeline_layout_cb,
966                                           nir_metadata_block_index |
967                                           nir_metadata_dominance,
968                                           &state);
969
970   *needs_default_sampler_state = state.needs_default_sampler_state;
971
972   return progress;
973}
974
975
976static void
977lower_fs_io(nir_shader *nir)
978{
979   /* Our backend doesn't handle array fragment shader outputs */
980   NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
981   NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_shader_out, NULL);
982
983   nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
984                               MESA_SHADER_FRAGMENT);
985
986   nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
987                               MESA_SHADER_FRAGMENT);
988
989   NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
990            type_size_vec4, 0);
991}
992
993static void
994lower_gs_io(struct nir_shader *nir)
995{
996   NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
997
998   nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
999                               MESA_SHADER_GEOMETRY);
1000
1001   nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
1002                               MESA_SHADER_GEOMETRY);
1003}
1004
1005static void
1006lower_vs_io(struct nir_shader *nir)
1007{
1008   NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
1009
1010   nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
1011                               MESA_SHADER_VERTEX);
1012
1013   nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
1014                               MESA_SHADER_VERTEX);
1015
1016   /* FIXME: if we call nir_lower_io, we get a crash later. Likely because it
1017    * overlaps with v3d_nir_lower_io. Need further research though.
1018    */
1019}
1020
1021static void
1022shader_debug_output(const char *message, void *data)
1023{
1024   /* FIXME: We probably don't want to debug anything extra here, and in fact
1025    * the compiler is not using this callback too much, only as an alternative
1026    * way to debug out the shaderdb stats, that you can already get using
1027    * V3D_DEBUG=shaderdb. Perhaps it would make sense to revisit the v3d
1028    * compiler to remove that callback.
1029    */
1030}
1031
1032static void
1033pipeline_populate_v3d_key(struct v3d_key *key,
1034                          const struct v3dv_pipeline_stage *p_stage,
1035                          uint32_t ucp_enables,
1036                          bool robust_buffer_access)
1037{
1038   assert(p_stage->pipeline->shared_data &&
1039          p_stage->pipeline->shared_data->maps[p_stage->stage]);
1040
1041   /* The following values are default values used at pipeline create. We use
1042    * there 32 bit as default return size.
1043    */
1044   struct v3dv_descriptor_map *sampler_map =
1045      &p_stage->pipeline->shared_data->maps[p_stage->stage]->sampler_map;
1046   struct v3dv_descriptor_map *texture_map =
1047      &p_stage->pipeline->shared_data->maps[p_stage->stage]->texture_map;
1048
1049   key->num_tex_used = texture_map->num_desc;
1050   assert(key->num_tex_used <= V3D_MAX_TEXTURE_SAMPLERS);
1051   for (uint32_t tex_idx = 0; tex_idx < texture_map->num_desc; tex_idx++) {
1052      key->tex[tex_idx].swizzle[0] = PIPE_SWIZZLE_X;
1053      key->tex[tex_idx].swizzle[1] = PIPE_SWIZZLE_Y;
1054      key->tex[tex_idx].swizzle[2] = PIPE_SWIZZLE_Z;
1055      key->tex[tex_idx].swizzle[3] = PIPE_SWIZZLE_W;
1056   }
1057
1058   key->num_samplers_used = sampler_map->num_desc;
1059   assert(key->num_samplers_used <= V3D_MAX_TEXTURE_SAMPLERS);
1060   for (uint32_t sampler_idx = 0; sampler_idx < sampler_map->num_desc;
1061        sampler_idx++) {
1062      key->sampler[sampler_idx].return_size =
1063         sampler_map->return_size[sampler_idx];
1064
1065      key->sampler[sampler_idx].return_channels =
1066         key->sampler[sampler_idx].return_size == 32 ? 4 : 2;
1067   }
1068
1069   switch (p_stage->stage) {
1070   case BROADCOM_SHADER_VERTEX:
1071   case BROADCOM_SHADER_VERTEX_BIN:
1072      key->is_last_geometry_stage = p_stage->pipeline->gs == NULL;
1073      break;
1074   case BROADCOM_SHADER_GEOMETRY:
1075   case BROADCOM_SHADER_GEOMETRY_BIN:
1076      /* FIXME: while we don't implement tessellation shaders */
1077      key->is_last_geometry_stage = true;
1078      break;
1079   case BROADCOM_SHADER_FRAGMENT:
1080   case BROADCOM_SHADER_COMPUTE:
1081      key->is_last_geometry_stage = false;
1082      break;
1083   default:
1084      unreachable("unsupported shader stage");
1085   }
1086
1087   /* Vulkan doesn't have fixed function state for user clip planes. Instead,
1088    * shaders can write to gl_ClipDistance[], in which case the SPIR-V compiler
1089    * takes care of adding a single compact array variable at
1090    * VARYING_SLOT_CLIP_DIST0, so we don't need any user clip plane lowering.
1091    *
1092    * The only lowering we are interested is specific to the fragment shader,
1093    * where we want to emit discards to honor writes to gl_ClipDistance[] in
1094    * previous stages. This is done via nir_lower_clip_fs() so we only set up
1095    * the ucp enable mask for that stage.
1096    */
1097   key->ucp_enables = ucp_enables;
1098
1099   key->robust_buffer_access = robust_buffer_access;
1100
1101   key->environment = V3D_ENVIRONMENT_VULKAN;
1102}
1103
1104/* FIXME: anv maps to hw primitive type. Perhaps eventually we would do the
1105 * same. For not using prim_mode that is the one already used on v3d
1106 */
1107static const enum pipe_prim_type vk_to_pipe_prim_type[] = {
1108   [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = PIPE_PRIM_POINTS,
1109   [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = PIPE_PRIM_LINES,
1110   [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = PIPE_PRIM_LINE_STRIP,
1111   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = PIPE_PRIM_TRIANGLES,
1112   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = PIPE_PRIM_TRIANGLE_STRIP,
1113   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = PIPE_PRIM_TRIANGLE_FAN,
1114   [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = PIPE_PRIM_LINES_ADJACENCY,
1115   [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = PIPE_PRIM_LINE_STRIP_ADJACENCY,
1116   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = PIPE_PRIM_TRIANGLES_ADJACENCY,
1117   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY,
1118};
1119
1120static const enum pipe_logicop vk_to_pipe_logicop[] = {
1121   [VK_LOGIC_OP_CLEAR] = PIPE_LOGICOP_CLEAR,
1122   [VK_LOGIC_OP_AND] = PIPE_LOGICOP_AND,
1123   [VK_LOGIC_OP_AND_REVERSE] = PIPE_LOGICOP_AND_REVERSE,
1124   [VK_LOGIC_OP_COPY] = PIPE_LOGICOP_COPY,
1125   [VK_LOGIC_OP_AND_INVERTED] = PIPE_LOGICOP_AND_INVERTED,
1126   [VK_LOGIC_OP_NO_OP] = PIPE_LOGICOP_NOOP,
1127   [VK_LOGIC_OP_XOR] = PIPE_LOGICOP_XOR,
1128   [VK_LOGIC_OP_OR] = PIPE_LOGICOP_OR,
1129   [VK_LOGIC_OP_NOR] = PIPE_LOGICOP_NOR,
1130   [VK_LOGIC_OP_EQUIVALENT] = PIPE_LOGICOP_EQUIV,
1131   [VK_LOGIC_OP_INVERT] = PIPE_LOGICOP_INVERT,
1132   [VK_LOGIC_OP_OR_REVERSE] = PIPE_LOGICOP_OR_REVERSE,
1133   [VK_LOGIC_OP_COPY_INVERTED] = PIPE_LOGICOP_COPY_INVERTED,
1134   [VK_LOGIC_OP_OR_INVERTED] = PIPE_LOGICOP_OR_INVERTED,
1135   [VK_LOGIC_OP_NAND] = PIPE_LOGICOP_NAND,
1136   [VK_LOGIC_OP_SET] = PIPE_LOGICOP_SET,
1137};
1138
1139static void
1140pipeline_populate_v3d_fs_key(struct v3d_fs_key *key,
1141                             const VkGraphicsPipelineCreateInfo *pCreateInfo,
1142                             const struct v3dv_pipeline_stage *p_stage,
1143                             bool has_geometry_shader,
1144                             uint32_t ucp_enables)
1145{
1146   assert(p_stage->stage == BROADCOM_SHADER_FRAGMENT);
1147
1148   memset(key, 0, sizeof(*key));
1149
1150   const bool rba = p_stage->pipeline->device->features.robustBufferAccess;
1151   pipeline_populate_v3d_key(&key->base, p_stage, ucp_enables, rba);
1152
1153   const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1154      pCreateInfo->pInputAssemblyState;
1155   uint8_t topology = vk_to_pipe_prim_type[ia_info->topology];
1156
1157   key->is_points = (topology == PIPE_PRIM_POINTS);
1158   key->is_lines = (topology >= PIPE_PRIM_LINES &&
1159                    topology <= PIPE_PRIM_LINE_STRIP);
1160   key->has_gs = has_geometry_shader;
1161
1162   const VkPipelineColorBlendStateCreateInfo *cb_info =
1163      !pCreateInfo->pRasterizationState->rasterizerDiscardEnable ?
1164      pCreateInfo->pColorBlendState : NULL;
1165
1166   key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ?
1167                       vk_to_pipe_logicop[cb_info->logicOp] :
1168                       PIPE_LOGICOP_COPY;
1169
1170   const bool raster_enabled =
1171      !pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
1172
1173   /* Multisample rasterization state must be ignored if rasterization
1174    * is disabled.
1175    */
1176   const VkPipelineMultisampleStateCreateInfo *ms_info =
1177      raster_enabled ? pCreateInfo->pMultisampleState : NULL;
1178   if (ms_info) {
1179      assert(ms_info->rasterizationSamples == VK_SAMPLE_COUNT_1_BIT ||
1180             ms_info->rasterizationSamples == VK_SAMPLE_COUNT_4_BIT);
1181      key->msaa = ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT;
1182
1183      if (key->msaa) {
1184         key->sample_coverage =
1185            p_stage->pipeline->sample_mask != (1 << V3D_MAX_SAMPLES) - 1;
1186         key->sample_alpha_to_coverage = ms_info->alphaToCoverageEnable;
1187         key->sample_alpha_to_one = ms_info->alphaToOneEnable;
1188      }
1189   }
1190
1191   /* This is intended for V3D versions before 4.1, otherwise we just use the
1192    * tile buffer load/store swap R/B bit.
1193    */
1194   key->swap_color_rb = 0;
1195
1196   const struct v3dv_render_pass *pass =
1197      v3dv_render_pass_from_handle(pCreateInfo->renderPass);
1198   const struct v3dv_subpass *subpass = p_stage->pipeline->subpass;
1199   for (uint32_t i = 0; i < subpass->color_count; i++) {
1200      const uint32_t att_idx = subpass->color_attachments[i].attachment;
1201      if (att_idx == VK_ATTACHMENT_UNUSED)
1202         continue;
1203
1204      key->cbufs |= 1 << i;
1205
1206      VkFormat fb_format = pass->attachments[att_idx].desc.format;
1207      enum pipe_format fb_pipe_format = vk_format_to_pipe_format(fb_format);
1208
1209      /* If logic operations are enabled then we might emit color reads and we
1210       * need to know the color buffer format and swizzle for that
1211       */
1212      if (key->logicop_func != PIPE_LOGICOP_COPY) {
1213         key->color_fmt[i].format = fb_pipe_format;
1214         memcpy(key->color_fmt[i].swizzle,
1215                v3dv_get_format_swizzle(p_stage->pipeline->device, fb_format),
1216                sizeof(key->color_fmt[i].swizzle));
1217      }
1218
1219      const struct util_format_description *desc =
1220         vk_format_description(fb_format);
1221
1222      if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT &&
1223          desc->channel[0].size == 32) {
1224         key->f32_color_rb |= 1 << i;
1225      }
1226
1227      if (p_stage->nir->info.fs.untyped_color_outputs) {
1228         if (util_format_is_pure_uint(fb_pipe_format))
1229            key->uint_color_rb |= 1 << i;
1230         else if (util_format_is_pure_sint(fb_pipe_format))
1231            key->int_color_rb |= 1 << i;
1232      }
1233
1234      if (key->is_points) {
1235         /* This mask represents state for GL_ARB_point_sprite which is not
1236          * relevant to Vulkan.
1237          */
1238         key->point_sprite_mask = 0;
1239
1240         /* Vulkan mandates upper left. */
1241         key->point_coord_upper_left = true;
1242      }
1243   }
1244}
1245
1246static void
1247setup_stage_outputs_from_next_stage_inputs(
1248   uint8_t next_stage_num_inputs,
1249   struct v3d_varying_slot *next_stage_input_slots,
1250   uint8_t *num_used_outputs,
1251   struct v3d_varying_slot *used_output_slots,
1252   uint32_t size_of_used_output_slots)
1253{
1254   *num_used_outputs = next_stage_num_inputs;
1255   memcpy(used_output_slots, next_stage_input_slots, size_of_used_output_slots);
1256}
1257
1258static void
1259pipeline_populate_v3d_gs_key(struct v3d_gs_key *key,
1260                             const VkGraphicsPipelineCreateInfo *pCreateInfo,
1261                             const struct v3dv_pipeline_stage *p_stage)
1262{
1263   assert(p_stage->stage == BROADCOM_SHADER_GEOMETRY ||
1264          p_stage->stage == BROADCOM_SHADER_GEOMETRY_BIN);
1265
1266   memset(key, 0, sizeof(*key));
1267
1268   const bool rba = p_stage->pipeline->device->features.robustBufferAccess;
1269   pipeline_populate_v3d_key(&key->base, p_stage, 0, rba);
1270
1271   struct v3dv_pipeline *pipeline = p_stage->pipeline;
1272
1273   key->per_vertex_point_size =
1274      p_stage->nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ);
1275
1276   key->is_coord = broadcom_shader_stage_is_binning(p_stage->stage);
1277
1278   assert(key->base.is_last_geometry_stage);
1279   if (key->is_coord) {
1280      /* Output varyings in the last binning shader are only used for transform
1281       * feedback. Set to 0 as VK_EXT_transform_feedback is not supported.
1282       */
1283      key->num_used_outputs = 0;
1284   } else {
1285      struct v3dv_shader_variant *fs_variant =
1286         pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
1287
1288      STATIC_ASSERT(sizeof(key->used_outputs) ==
1289                    sizeof(fs_variant->prog_data.fs->input_slots));
1290
1291      setup_stage_outputs_from_next_stage_inputs(
1292         fs_variant->prog_data.fs->num_inputs,
1293         fs_variant->prog_data.fs->input_slots,
1294         &key->num_used_outputs,
1295         key->used_outputs,
1296         sizeof(key->used_outputs));
1297   }
1298}
1299
1300static void
1301pipeline_populate_v3d_vs_key(struct v3d_vs_key *key,
1302                             const VkGraphicsPipelineCreateInfo *pCreateInfo,
1303                             const struct v3dv_pipeline_stage *p_stage)
1304{
1305   assert(p_stage->stage == BROADCOM_SHADER_VERTEX ||
1306          p_stage->stage == BROADCOM_SHADER_VERTEX_BIN);
1307
1308   memset(key, 0, sizeof(*key));
1309
1310   const bool rba = p_stage->pipeline->device->features.robustBufferAccess;
1311   pipeline_populate_v3d_key(&key->base, p_stage, 0, rba);
1312
1313   struct v3dv_pipeline *pipeline = p_stage->pipeline;
1314
1315   /* Vulkan specifies a point size per vertex, so true for if the prim are
1316    * points, like on ES2)
1317    */
1318   const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1319      pCreateInfo->pInputAssemblyState;
1320   uint8_t topology = vk_to_pipe_prim_type[ia_info->topology];
1321
1322   /* FIXME: PRIM_POINTS is not enough, in gallium the full check is
1323    * PIPE_PRIM_POINTS && v3d->rasterizer->base.point_size_per_vertex */
1324   key->per_vertex_point_size = (topology == PIPE_PRIM_POINTS);
1325
1326   key->is_coord = broadcom_shader_stage_is_binning(p_stage->stage);
1327
1328   if (key->is_coord) { /* Binning VS*/
1329      if (key->base.is_last_geometry_stage) {
1330         /* Output varyings in the last binning shader are only used for
1331          * transform feedback. Set to 0 as VK_EXT_transform_feedback is not
1332          * supported.
1333          */
1334         key->num_used_outputs = 0;
1335      } else {
1336         /* Linking against GS binning program */
1337         assert(pipeline->gs);
1338         struct v3dv_shader_variant *gs_bin_variant =
1339            pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN];
1340
1341         STATIC_ASSERT(sizeof(key->used_outputs) ==
1342                       sizeof(gs_bin_variant->prog_data.gs->input_slots));
1343
1344         setup_stage_outputs_from_next_stage_inputs(
1345            gs_bin_variant->prog_data.gs->num_inputs,
1346            gs_bin_variant->prog_data.gs->input_slots,
1347            &key->num_used_outputs,
1348            key->used_outputs,
1349            sizeof(key->used_outputs));
1350      }
1351   } else { /* Render VS */
1352      if (pipeline->gs) {
1353         /* Linking against GS render program */
1354         struct v3dv_shader_variant *gs_variant =
1355            pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY];
1356
1357         STATIC_ASSERT(sizeof(key->used_outputs) ==
1358                       sizeof(gs_variant->prog_data.gs->input_slots));
1359
1360         setup_stage_outputs_from_next_stage_inputs(
1361            gs_variant->prog_data.gs->num_inputs,
1362            gs_variant->prog_data.gs->input_slots,
1363            &key->num_used_outputs,
1364            key->used_outputs,
1365            sizeof(key->used_outputs));
1366      } else {
1367         /* Linking against FS program */
1368         struct v3dv_shader_variant *fs_variant =
1369            pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
1370
1371         STATIC_ASSERT(sizeof(key->used_outputs) ==
1372                       sizeof(fs_variant->prog_data.fs->input_slots));
1373
1374         setup_stage_outputs_from_next_stage_inputs(
1375            fs_variant->prog_data.fs->num_inputs,
1376            fs_variant->prog_data.fs->input_slots,
1377            &key->num_used_outputs,
1378            key->used_outputs,
1379            sizeof(key->used_outputs));
1380      }
1381   }
1382
1383   const VkPipelineVertexInputStateCreateInfo *vi_info =
1384      pCreateInfo->pVertexInputState;
1385   for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
1386      const VkVertexInputAttributeDescription *desc =
1387         &vi_info->pVertexAttributeDescriptions[i];
1388      assert(desc->location < MAX_VERTEX_ATTRIBS);
1389      if (desc->format == VK_FORMAT_B8G8R8A8_UNORM)
1390         key->va_swap_rb_mask |= 1 << (VERT_ATTRIB_GENERIC0 + desc->location);
1391   }
1392}
1393
1394/**
1395 * Creates the initial form of the pipeline stage for a binning shader by
1396 * cloning the render shader and flagging it as a coordinate shader.
1397 *
1398 * Returns NULL if it was not able to allocate the object, so it should be
1399 * handled as a VK_ERROR_OUT_OF_HOST_MEMORY error.
1400 */
1401static struct v3dv_pipeline_stage *
1402pipeline_stage_create_binning(const struct v3dv_pipeline_stage *src,
1403                              const VkAllocationCallbacks *pAllocator)
1404{
1405   struct v3dv_device *device = src->pipeline->device;
1406
1407   struct v3dv_pipeline_stage *p_stage =
1408      vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
1409                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1410
1411   if (p_stage == NULL)
1412      return NULL;
1413
1414   assert(src->stage == BROADCOM_SHADER_VERTEX ||
1415          src->stage == BROADCOM_SHADER_GEOMETRY);
1416
1417   enum broadcom_shader_stage bin_stage =
1418      src->stage == BROADCOM_SHADER_VERTEX ?
1419         BROADCOM_SHADER_VERTEX_BIN :
1420         BROADCOM_SHADER_GEOMETRY_BIN;
1421
1422   p_stage->pipeline = src->pipeline;
1423   p_stage->stage = bin_stage;
1424   p_stage->entrypoint = src->entrypoint;
1425   p_stage->module = src->module;
1426   /* For binning shaders we will clone the NIR code from the corresponding
1427    * render shader later, when we call pipeline_compile_xxx_shader. This way
1428    * we only have to run the relevant NIR lowerings once for render shaders
1429    */
1430   p_stage->nir = NULL;
1431   p_stage->spec_info = src->spec_info;
1432   p_stage->feedback = (VkPipelineCreationFeedback) { 0 };
1433   memcpy(p_stage->shader_sha1, src->shader_sha1, 20);
1434
1435   return p_stage;
1436}
1437
1438/**
1439 * Returns false if it was not able to allocate or map the assembly bo memory.
1440 */
1441static bool
1442upload_assembly(struct v3dv_pipeline *pipeline)
1443{
1444   uint32_t total_size = 0;
1445   for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
1446      struct v3dv_shader_variant *variant =
1447         pipeline->shared_data->variants[stage];
1448
1449      if (variant != NULL)
1450         total_size += variant->qpu_insts_size;
1451   }
1452
1453   struct v3dv_bo *bo = v3dv_bo_alloc(pipeline->device, total_size,
1454                                      "pipeline shader assembly", true);
1455   if (!bo) {
1456      fprintf(stderr, "failed to allocate memory for shader\n");
1457      return false;
1458   }
1459
1460   bool ok = v3dv_bo_map(pipeline->device, bo, total_size);
1461   if (!ok) {
1462      fprintf(stderr, "failed to map source shader buffer\n");
1463      return false;
1464   }
1465
1466   uint32_t offset = 0;
1467   for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
1468      struct v3dv_shader_variant *variant =
1469         pipeline->shared_data->variants[stage];
1470
1471      if (variant != NULL) {
1472         variant->assembly_offset = offset;
1473
1474         memcpy(bo->map + offset, variant->qpu_insts, variant->qpu_insts_size);
1475         offset += variant->qpu_insts_size;
1476
1477         /* We dont need qpu_insts anymore. */
1478         free(variant->qpu_insts);
1479         variant->qpu_insts = NULL;
1480      }
1481   }
1482   assert(total_size == offset);
1483
1484   pipeline->shared_data->assembly_bo = bo;
1485
1486   return true;
1487}
1488
1489static void
1490pipeline_hash_graphics(const struct v3dv_pipeline *pipeline,
1491                       struct v3dv_pipeline_key *key,
1492                       unsigned char *sha1_out)
1493{
1494   struct mesa_sha1 ctx;
1495   _mesa_sha1_init(&ctx);
1496
1497   if (pipeline->layout) {
1498      _mesa_sha1_update(&ctx, &pipeline->layout->sha1,
1499                        sizeof(pipeline->layout->sha1));
1500   }
1501
1502   /* We need to include all shader stages in the sha1 key as linking may modify
1503    * the shader code in any stage. An alternative would be to use the
1504    * serialized NIR, but that seems like an overkill.
1505    */
1506   _mesa_sha1_update(&ctx, pipeline->vs->shader_sha1,
1507                     sizeof(pipeline->vs->shader_sha1));
1508
1509   if (pipeline->gs) {
1510      _mesa_sha1_update(&ctx, pipeline->gs->shader_sha1,
1511                        sizeof(pipeline->gs->shader_sha1));
1512   }
1513
1514   _mesa_sha1_update(&ctx, pipeline->fs->shader_sha1,
1515                     sizeof(pipeline->fs->shader_sha1));
1516
1517   _mesa_sha1_update(&ctx, key, sizeof(struct v3dv_pipeline_key));
1518
1519   _mesa_sha1_final(&ctx, sha1_out);
1520}
1521
1522static void
1523pipeline_hash_compute(const struct v3dv_pipeline *pipeline,
1524                      struct v3dv_pipeline_key *key,
1525                      unsigned char *sha1_out)
1526{
1527   struct mesa_sha1 ctx;
1528   _mesa_sha1_init(&ctx);
1529
1530   if (pipeline->layout) {
1531      _mesa_sha1_update(&ctx, &pipeline->layout->sha1,
1532                        sizeof(pipeline->layout->sha1));
1533   }
1534
1535   _mesa_sha1_update(&ctx, pipeline->cs->shader_sha1,
1536                     sizeof(pipeline->cs->shader_sha1));
1537
1538   _mesa_sha1_update(&ctx, key, sizeof(struct v3dv_pipeline_key));
1539
1540   _mesa_sha1_final(&ctx, sha1_out);
1541}
1542
1543/* Checks that the pipeline has enough spill size to use for any of their
1544 * variants
1545 */
1546static void
1547pipeline_check_spill_size(struct v3dv_pipeline *pipeline)
1548{
1549   uint32_t max_spill_size = 0;
1550
1551   for(uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
1552      struct v3dv_shader_variant *variant =
1553         pipeline->shared_data->variants[stage];
1554
1555      if (variant != NULL) {
1556         max_spill_size = MAX2(variant->prog_data.base->spill_size,
1557                               max_spill_size);
1558      }
1559   }
1560
1561   if (max_spill_size > 0) {
1562      struct v3dv_device *device = pipeline->device;
1563
1564      /* The TIDX register we use for choosing the area to access
1565       * for scratch space is: (core << 6) | (qpu << 2) | thread.
1566       * Even at minimum threadcount in a particular shader, that
1567       * means we still multiply by qpus by 4.
1568       */
1569      const uint32_t total_spill_size =
1570         4 * device->devinfo.qpu_count * max_spill_size;
1571      if (pipeline->spill.bo) {
1572         assert(pipeline->spill.size_per_thread > 0);
1573         v3dv_bo_free(device, pipeline->spill.bo);
1574      }
1575      pipeline->spill.bo =
1576         v3dv_bo_alloc(device, total_spill_size, "spill", true);
1577      pipeline->spill.size_per_thread = max_spill_size;
1578   }
1579}
1580
1581/**
1582 * Creates a new shader_variant_create. Note that for prog_data is not const,
1583 * so it is assumed that the caller will prove a pointer that the
1584 * shader_variant will own.
1585 *
1586 * Creation doesn't include allocate a BO to store the content of qpu_insts,
1587 * as we will try to share the same bo for several shader variants. Also note
1588 * that qpu_ints being NULL is valid, for example if we are creating the
1589 * shader_variants from the cache, so we can just upload the assembly of all
1590 * the shader stages at once.
1591 */
1592struct v3dv_shader_variant *
1593v3dv_shader_variant_create(struct v3dv_device *device,
1594                           enum broadcom_shader_stage stage,
1595                           struct v3d_prog_data *prog_data,
1596                           uint32_t prog_data_size,
1597                           uint32_t assembly_offset,
1598                           uint64_t *qpu_insts,
1599                           uint32_t qpu_insts_size,
1600                           VkResult *out_vk_result)
1601{
1602   struct v3dv_shader_variant *variant =
1603      vk_zalloc(&device->vk.alloc, sizeof(*variant), 8,
1604                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1605
1606   if (variant == NULL) {
1607      *out_vk_result = VK_ERROR_OUT_OF_HOST_MEMORY;
1608      return NULL;
1609   }
1610
1611   variant->stage = stage;
1612   variant->prog_data_size = prog_data_size;
1613   variant->prog_data.base = prog_data;
1614
1615   variant->assembly_offset = assembly_offset;
1616   variant->qpu_insts_size = qpu_insts_size;
1617   variant->qpu_insts = qpu_insts;
1618
1619   *out_vk_result = VK_SUCCESS;
1620
1621   return variant;
1622}
1623
1624/* For a given key, it returns the compiled version of the shader.  Returns a
1625 * new reference to the shader_variant to the caller, or NULL.
1626 *
1627 * If the method returns NULL it means that something wrong happened:
1628 *   * Not enough memory: this is one of the possible outcomes defined by
1629 *     vkCreateXXXPipelines. out_vk_result will return the proper oom error.
1630 *   * Compilation error: hypothetically this shouldn't happen, as the spec
1631 *     states that vkShaderModule needs to be created with a valid SPIR-V, so
1632 *     any compilation failure is a driver bug. In the practice, something as
1633 *     common as failing to register allocate can lead to a compilation
1634 *     failure. In that case the only option (for any driver) is
1635 *     VK_ERROR_UNKNOWN, even if we know that the problem was a compiler
1636 *     error.
1637 */
1638static struct v3dv_shader_variant *
1639pipeline_compile_shader_variant(struct v3dv_pipeline_stage *p_stage,
1640                                struct v3d_key *key,
1641                                size_t key_size,
1642                                const VkAllocationCallbacks *pAllocator,
1643                                VkResult *out_vk_result)
1644{
1645   int64_t stage_start = os_time_get_nano();
1646
1647   struct v3dv_pipeline *pipeline = p_stage->pipeline;
1648   struct v3dv_physical_device *physical_device =
1649      &pipeline->device->instance->physicalDevice;
1650   const struct v3d_compiler *compiler = physical_device->compiler;
1651
1652   if (unlikely(V3D_DEBUG & (V3D_DEBUG_NIR |
1653                             v3d_debug_flag_for_shader_stage
1654                             (broadcom_shader_stage_to_gl(p_stage->stage))))) {
1655      fprintf(stderr, "Just before v3d_compile: %s prog %d NIR:\n",
1656              broadcom_shader_stage_name(p_stage->stage),
1657              p_stage->program_id);
1658      nir_print_shader(p_stage->nir, stderr);
1659      fprintf(stderr, "\n");
1660   }
1661
1662   uint64_t *qpu_insts;
1663   uint32_t qpu_insts_size;
1664   struct v3d_prog_data *prog_data;
1665   uint32_t prog_data_size =
1666      v3d_prog_data_size(broadcom_shader_stage_to_gl(p_stage->stage));
1667
1668   qpu_insts = v3d_compile(compiler,
1669                           key, &prog_data,
1670                           p_stage->nir,
1671                           shader_debug_output, NULL,
1672                           p_stage->program_id, 0,
1673                           &qpu_insts_size);
1674
1675   struct v3dv_shader_variant *variant = NULL;
1676
1677   if (!qpu_insts) {
1678      fprintf(stderr, "Failed to compile %s prog %d NIR to VIR\n",
1679              gl_shader_stage_name(p_stage->stage),
1680              p_stage->program_id);
1681      *out_vk_result = VK_ERROR_UNKNOWN;
1682   } else {
1683      variant =
1684         v3dv_shader_variant_create(pipeline->device, p_stage->stage,
1685                                    prog_data, prog_data_size,
1686                                    0, /* assembly_offset, no final value yet */
1687                                    qpu_insts, qpu_insts_size,
1688                                    out_vk_result);
1689   }
1690   /* At this point we don't need anymore the nir shader, but we are freeing
1691    * all the temporary p_stage structs used during the pipeline creation when
1692    * we finish it, so let's not worry about freeing the nir here.
1693    */
1694
1695   p_stage->feedback.duration += os_time_get_nano() - stage_start;
1696
1697   return variant;
1698}
1699
1700static void
1701link_shaders(nir_shader *producer, nir_shader *consumer)
1702{
1703   assert(producer);
1704   assert(consumer);
1705
1706   if (producer->options->lower_to_scalar) {
1707      NIR_PASS(_, producer, nir_lower_io_to_scalar_early, nir_var_shader_out);
1708      NIR_PASS(_, consumer, nir_lower_io_to_scalar_early, nir_var_shader_in);
1709   }
1710
1711   nir_lower_io_arrays_to_elements(producer, consumer);
1712
1713   nir_optimize(producer, false);
1714   nir_optimize(consumer, false);
1715
1716   if (nir_link_opt_varyings(producer, consumer))
1717      nir_optimize(consumer, false);
1718
1719   NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
1720   NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
1721
1722   if (nir_remove_unused_varyings(producer, consumer)) {
1723      NIR_PASS(_, producer, nir_lower_global_vars_to_local);
1724      NIR_PASS(_, consumer, nir_lower_global_vars_to_local);
1725
1726      nir_optimize(producer, false);
1727      nir_optimize(consumer, false);
1728
1729      /* Optimizations can cause varyings to become unused.
1730       * nir_compact_varyings() depends on all dead varyings being removed so
1731       * we need to call nir_remove_dead_variables() again here.
1732       */
1733      NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
1734      NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
1735   }
1736}
1737
1738static void
1739pipeline_lower_nir(struct v3dv_pipeline *pipeline,
1740                   struct v3dv_pipeline_stage *p_stage,
1741                   struct v3dv_pipeline_layout *layout)
1742{
1743   int64_t stage_start = os_time_get_nano();
1744
1745   assert(pipeline->shared_data &&
1746          pipeline->shared_data->maps[p_stage->stage]);
1747
1748   nir_shader_gather_info(p_stage->nir, nir_shader_get_entrypoint(p_stage->nir));
1749
1750   /* We add this because we need a valid sampler for nir_lower_tex to do
1751    * unpacking of the texture operation result, even for the case where there
1752    * is no sampler state.
1753    *
1754    * We add two of those, one for the case we need a 16bit return_size, and
1755    * another for the case we need a 32bit return size.
1756    */
1757   struct v3dv_descriptor_maps *maps =
1758      pipeline->shared_data->maps[p_stage->stage];
1759
1760   UNUSED unsigned index;
1761   index = descriptor_map_add(&maps->sampler_map, -1, -1, -1, 0, 0, 16);
1762   assert(index == V3DV_NO_SAMPLER_16BIT_IDX);
1763
1764   index = descriptor_map_add(&maps->sampler_map, -2, -2, -2, 0, 0, 32);
1765   assert(index == V3DV_NO_SAMPLER_32BIT_IDX);
1766
1767   /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
1768   bool needs_default_sampler_state = false;
1769   NIR_PASS(_, p_stage->nir, lower_pipeline_layout_info, pipeline, layout,
1770            &needs_default_sampler_state);
1771
1772   /* If in the end we didn't need to use the default sampler states and the
1773    * shader doesn't need any other samplers, get rid of them so we can
1774    * recognize that this program doesn't use any samplers at all.
1775    */
1776   if (!needs_default_sampler_state && maps->sampler_map.num_desc == 2)
1777      maps->sampler_map.num_desc = 0;
1778
1779   p_stage->feedback.duration += os_time_get_nano() - stage_start;
1780}
1781
1782/**
1783 * The SPIR-V compiler will insert a sized compact array for
1784 * VARYING_SLOT_CLIP_DIST0 if the vertex shader writes to gl_ClipDistance[],
1785 * where the size of the array determines the number of active clip planes.
1786 */
1787static uint32_t
1788get_ucp_enable_mask(struct v3dv_pipeline_stage *p_stage)
1789{
1790   assert(p_stage->stage == BROADCOM_SHADER_VERTEX);
1791   const nir_shader *shader = p_stage->nir;
1792   assert(shader);
1793
1794   nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
1795      if (var->data.location == VARYING_SLOT_CLIP_DIST0) {
1796         assert(var->data.compact);
1797         return (1 << glsl_get_length(var->type)) - 1;
1798      }
1799   }
1800   return 0;
1801}
1802
1803static nir_shader *
1804pipeline_stage_get_nir(struct v3dv_pipeline_stage *p_stage,
1805                       struct v3dv_pipeline *pipeline,
1806                       struct v3dv_pipeline_cache *cache)
1807{
1808   int64_t stage_start = os_time_get_nano();
1809
1810   nir_shader *nir = NULL;
1811
1812   nir = v3dv_pipeline_cache_search_for_nir(pipeline, cache,
1813                                            &v3dv_nir_options,
1814                                            p_stage->shader_sha1);
1815
1816   if (nir) {
1817      assert(nir->info.stage == broadcom_shader_stage_to_gl(p_stage->stage));
1818
1819      /* A NIR cach hit doesn't avoid the large majority of pipeline stage
1820       * creation so the cache hit is not recorded in the pipeline feedback
1821       * flags
1822       */
1823
1824      p_stage->feedback.duration += os_time_get_nano() - stage_start;
1825
1826      return nir;
1827   }
1828
1829   nir = shader_module_compile_to_nir(pipeline->device, p_stage);
1830
1831   if (nir) {
1832      struct v3dv_pipeline_cache *default_cache =
1833         &pipeline->device->default_pipeline_cache;
1834
1835      v3dv_pipeline_cache_upload_nir(pipeline, cache, nir,
1836                                     p_stage->shader_sha1);
1837
1838      /* Ensure that the variant is on the default cache, as cmd_buffer could
1839       * need to change the current variant
1840       */
1841      if (default_cache != cache) {
1842         v3dv_pipeline_cache_upload_nir(pipeline, default_cache, nir,
1843                                        p_stage->shader_sha1);
1844      }
1845
1846      p_stage->feedback.duration += os_time_get_nano() - stage_start;
1847
1848      return nir;
1849   }
1850
1851   /* FIXME: this shouldn't happen, raise error? */
1852   return NULL;
1853}
1854
1855static VkResult
1856pipeline_compile_vertex_shader(struct v3dv_pipeline *pipeline,
1857                               const VkAllocationCallbacks *pAllocator,
1858                               const VkGraphicsPipelineCreateInfo *pCreateInfo)
1859{
1860   assert(pipeline->vs_bin != NULL);
1861   if (pipeline->vs_bin->nir == NULL) {
1862      assert(pipeline->vs->nir);
1863      pipeline->vs_bin->nir = nir_shader_clone(NULL, pipeline->vs->nir);
1864   }
1865
1866   VkResult vk_result;
1867   struct v3d_vs_key key;
1868   pipeline_populate_v3d_vs_key(&key, pCreateInfo, pipeline->vs);
1869   pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX] =
1870      pipeline_compile_shader_variant(pipeline->vs, &key.base, sizeof(key),
1871                                      pAllocator, &vk_result);
1872   if (vk_result != VK_SUCCESS)
1873      return vk_result;
1874
1875   pipeline_populate_v3d_vs_key(&key, pCreateInfo, pipeline->vs_bin);
1876   pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN] =
1877      pipeline_compile_shader_variant(pipeline->vs_bin, &key.base, sizeof(key),
1878                                      pAllocator, &vk_result);
1879
1880   return vk_result;
1881}
1882
1883static VkResult
1884pipeline_compile_geometry_shader(struct v3dv_pipeline *pipeline,
1885                                 const VkAllocationCallbacks *pAllocator,
1886                                 const VkGraphicsPipelineCreateInfo *pCreateInfo)
1887{
1888   assert(pipeline->gs);
1889
1890   assert(pipeline->gs_bin != NULL);
1891   if (pipeline->gs_bin->nir == NULL) {
1892      assert(pipeline->gs->nir);
1893      pipeline->gs_bin->nir = nir_shader_clone(NULL, pipeline->gs->nir);
1894   }
1895
1896   VkResult vk_result;
1897   struct v3d_gs_key key;
1898   pipeline_populate_v3d_gs_key(&key, pCreateInfo, pipeline->gs);
1899   pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY] =
1900      pipeline_compile_shader_variant(pipeline->gs, &key.base, sizeof(key),
1901                                      pAllocator, &vk_result);
1902   if (vk_result != VK_SUCCESS)
1903      return vk_result;
1904
1905   pipeline_populate_v3d_gs_key(&key, pCreateInfo, pipeline->gs_bin);
1906   pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN] =
1907      pipeline_compile_shader_variant(pipeline->gs_bin, &key.base, sizeof(key),
1908                                      pAllocator, &vk_result);
1909
1910   return vk_result;
1911}
1912
1913static VkResult
1914pipeline_compile_fragment_shader(struct v3dv_pipeline *pipeline,
1915                                 const VkAllocationCallbacks *pAllocator,
1916                                 const VkGraphicsPipelineCreateInfo *pCreateInfo)
1917{
1918   struct v3dv_pipeline_stage *p_stage = pipeline->vs;
1919
1920   p_stage = pipeline->fs;
1921
1922   struct v3d_fs_key key;
1923
1924   pipeline_populate_v3d_fs_key(&key, pCreateInfo, p_stage,
1925                                pipeline->gs != NULL,
1926                                get_ucp_enable_mask(pipeline->vs));
1927
1928   VkResult vk_result;
1929   pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT] =
1930      pipeline_compile_shader_variant(p_stage, &key.base, sizeof(key),
1931                                      pAllocator, &vk_result);
1932
1933   return vk_result;
1934}
1935
1936static void
1937pipeline_populate_graphics_key(struct v3dv_pipeline *pipeline,
1938                               struct v3dv_pipeline_key *key,
1939                               const VkGraphicsPipelineCreateInfo *pCreateInfo)
1940{
1941   memset(key, 0, sizeof(*key));
1942   key->robust_buffer_access =
1943      pipeline->device->features.robustBufferAccess;
1944
1945   const bool raster_enabled =
1946      !pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
1947
1948   const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1949      pCreateInfo->pInputAssemblyState;
1950   key->topology = vk_to_pipe_prim_type[ia_info->topology];
1951
1952   const VkPipelineColorBlendStateCreateInfo *cb_info =
1953      raster_enabled ? pCreateInfo->pColorBlendState : NULL;
1954
1955   key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ?
1956      vk_to_pipe_logicop[cb_info->logicOp] :
1957      PIPE_LOGICOP_COPY;
1958
1959   /* Multisample rasterization state must be ignored if rasterization
1960    * is disabled.
1961    */
1962   const VkPipelineMultisampleStateCreateInfo *ms_info =
1963      raster_enabled ? pCreateInfo->pMultisampleState : NULL;
1964   if (ms_info) {
1965      assert(ms_info->rasterizationSamples == VK_SAMPLE_COUNT_1_BIT ||
1966             ms_info->rasterizationSamples == VK_SAMPLE_COUNT_4_BIT);
1967      key->msaa = ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT;
1968
1969      if (key->msaa) {
1970         key->sample_coverage =
1971            pipeline->sample_mask != (1 << V3D_MAX_SAMPLES) - 1;
1972         key->sample_alpha_to_coverage = ms_info->alphaToCoverageEnable;
1973         key->sample_alpha_to_one = ms_info->alphaToOneEnable;
1974      }
1975   }
1976
1977   const struct v3dv_render_pass *pass =
1978      v3dv_render_pass_from_handle(pCreateInfo->renderPass);
1979   const struct v3dv_subpass *subpass = pipeline->subpass;
1980   for (uint32_t i = 0; i < subpass->color_count; i++) {
1981      const uint32_t att_idx = subpass->color_attachments[i].attachment;
1982      if (att_idx == VK_ATTACHMENT_UNUSED)
1983         continue;
1984
1985      key->cbufs |= 1 << i;
1986
1987      VkFormat fb_format = pass->attachments[att_idx].desc.format;
1988      enum pipe_format fb_pipe_format = vk_format_to_pipe_format(fb_format);
1989
1990      /* If logic operations are enabled then we might emit color reads and we
1991       * need to know the color buffer format and swizzle for that
1992       */
1993      if (key->logicop_func != PIPE_LOGICOP_COPY) {
1994         key->color_fmt[i].format = fb_pipe_format;
1995         memcpy(key->color_fmt[i].swizzle,
1996                v3dv_get_format_swizzle(pipeline->device, fb_format),
1997                sizeof(key->color_fmt[i].swizzle));
1998      }
1999
2000      const struct util_format_description *desc =
2001         vk_format_description(fb_format);
2002
2003      if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT &&
2004          desc->channel[0].size == 32) {
2005         key->f32_color_rb |= 1 << i;
2006      }
2007   }
2008
2009   const VkPipelineVertexInputStateCreateInfo *vi_info =
2010      pCreateInfo->pVertexInputState;
2011   for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
2012      const VkVertexInputAttributeDescription *desc =
2013         &vi_info->pVertexAttributeDescriptions[i];
2014      assert(desc->location < MAX_VERTEX_ATTRIBS);
2015      if (desc->format == VK_FORMAT_B8G8R8A8_UNORM)
2016         key->va_swap_rb_mask |= 1 << (VERT_ATTRIB_GENERIC0 + desc->location);
2017   }
2018
2019   assert(pipeline->subpass);
2020   key->has_multiview = pipeline->subpass->view_mask != 0;
2021}
2022
2023static void
2024pipeline_populate_compute_key(struct v3dv_pipeline *pipeline,
2025                              struct v3dv_pipeline_key *key,
2026                              const VkComputePipelineCreateInfo *pCreateInfo)
2027{
2028   /* We use the same pipeline key for graphics and compute, but we don't need
2029    * to add a field to flag compute keys because this key is not used alone
2030    * to search in the cache, we also use the SPIR-V or the serialized NIR for
2031    * example, which already flags compute shaders.
2032    */
2033   memset(key, 0, sizeof(*key));
2034   key->robust_buffer_access =
2035      pipeline->device->features.robustBufferAccess;
2036}
2037
2038static struct v3dv_pipeline_shared_data *
2039v3dv_pipeline_shared_data_new_empty(const unsigned char sha1_key[20],
2040                                    struct v3dv_pipeline *pipeline,
2041                                    bool is_graphics_pipeline)
2042{
2043   /* We create new_entry using the device alloc. Right now shared_data is ref
2044    * and unref by both the pipeline and the pipeline cache, so we can't
2045    * ensure that the cache or pipeline alloc will be available on the last
2046    * unref.
2047    */
2048   struct v3dv_pipeline_shared_data *new_entry =
2049      vk_zalloc2(&pipeline->device->vk.alloc, NULL,
2050                 sizeof(struct v3dv_pipeline_shared_data), 8,
2051                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2052
2053   if (new_entry == NULL)
2054      return NULL;
2055
2056   for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
2057      /* We don't need specific descriptor maps for binning stages we use the
2058       * map for the render stage.
2059       */
2060      if (broadcom_shader_stage_is_binning(stage))
2061         continue;
2062
2063      if ((is_graphics_pipeline && stage == BROADCOM_SHADER_COMPUTE) ||
2064          (!is_graphics_pipeline && stage != BROADCOM_SHADER_COMPUTE)) {
2065         continue;
2066      }
2067
2068      if (stage == BROADCOM_SHADER_GEOMETRY && !pipeline->gs) {
2069         /* We always inject a custom GS if we have multiview */
2070         if (!pipeline->subpass->view_mask)
2071            continue;
2072      }
2073
2074      struct v3dv_descriptor_maps *new_maps =
2075         vk_zalloc2(&pipeline->device->vk.alloc, NULL,
2076                    sizeof(struct v3dv_descriptor_maps), 8,
2077                    VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2078
2079      if (new_maps == NULL)
2080         goto fail;
2081
2082      new_entry->maps[stage] = new_maps;
2083   }
2084
2085   new_entry->maps[BROADCOM_SHADER_VERTEX_BIN] =
2086      new_entry->maps[BROADCOM_SHADER_VERTEX];
2087
2088   new_entry->maps[BROADCOM_SHADER_GEOMETRY_BIN] =
2089      new_entry->maps[BROADCOM_SHADER_GEOMETRY];
2090
2091   new_entry->ref_cnt = 1;
2092   memcpy(new_entry->sha1_key, sha1_key, 20);
2093
2094   return new_entry;
2095
2096fail:
2097   if (new_entry != NULL) {
2098      for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
2099         if (new_entry->maps[stage] != NULL)
2100            vk_free(&pipeline->device->vk.alloc, new_entry->maps[stage]);
2101      }
2102   }
2103
2104   vk_free(&pipeline->device->vk.alloc, new_entry);
2105
2106   return NULL;
2107}
2108
2109static void
2110write_creation_feedback(struct v3dv_pipeline *pipeline,
2111                        const void *next,
2112                        const VkPipelineCreationFeedback *pipeline_feedback,
2113                        uint32_t stage_count,
2114                        const VkPipelineShaderStageCreateInfo *stages)
2115{
2116   const VkPipelineCreationFeedbackCreateInfo *create_feedback =
2117      vk_find_struct_const(next, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
2118
2119   if (create_feedback) {
2120      typed_memcpy(create_feedback->pPipelineCreationFeedback,
2121             pipeline_feedback,
2122             1);
2123
2124      assert(stage_count == create_feedback->pipelineStageCreationFeedbackCount);
2125
2126      for (uint32_t i = 0; i < stage_count; i++) {
2127         gl_shader_stage s = vk_to_mesa_shader_stage(stages[i].stage);
2128         switch (s) {
2129         case MESA_SHADER_VERTEX:
2130            create_feedback->pPipelineStageCreationFeedbacks[i] =
2131               pipeline->vs->feedback;
2132
2133            create_feedback->pPipelineStageCreationFeedbacks[i].duration +=
2134               pipeline->vs_bin->feedback.duration;
2135            break;
2136
2137         case MESA_SHADER_GEOMETRY:
2138            create_feedback->pPipelineStageCreationFeedbacks[i] =
2139               pipeline->gs->feedback;
2140
2141            create_feedback->pPipelineStageCreationFeedbacks[i].duration +=
2142               pipeline->gs_bin->feedback.duration;
2143            break;
2144
2145         case MESA_SHADER_FRAGMENT:
2146            create_feedback->pPipelineStageCreationFeedbacks[i] =
2147               pipeline->fs->feedback;
2148            break;
2149
2150         case MESA_SHADER_COMPUTE:
2151            create_feedback->pPipelineStageCreationFeedbacks[i] =
2152               pipeline->cs->feedback;
2153            break;
2154
2155         default:
2156            unreachable("not supported shader stage");
2157         }
2158      }
2159   }
2160}
2161
2162static enum shader_prim
2163multiview_gs_input_primitive_from_pipeline(struct v3dv_pipeline *pipeline)
2164{
2165   switch (pipeline->topology) {
2166   case PIPE_PRIM_POINTS:
2167      return SHADER_PRIM_POINTS;
2168   case PIPE_PRIM_LINES:
2169   case PIPE_PRIM_LINE_STRIP:
2170      return SHADER_PRIM_LINES;
2171   case PIPE_PRIM_TRIANGLES:
2172   case PIPE_PRIM_TRIANGLE_STRIP:
2173   case PIPE_PRIM_TRIANGLE_FAN:
2174      return SHADER_PRIM_TRIANGLES;
2175   default:
2176      /* Since we don't allow GS with multiview, we can only see non-adjacency
2177       * primitives.
2178       */
2179      unreachable("Unexpected pipeline primitive type");
2180   }
2181}
2182
2183static enum shader_prim
2184multiview_gs_output_primitive_from_pipeline(struct v3dv_pipeline *pipeline)
2185{
2186   switch (pipeline->topology) {
2187   case PIPE_PRIM_POINTS:
2188      return SHADER_PRIM_POINTS;
2189   case PIPE_PRIM_LINES:
2190   case PIPE_PRIM_LINE_STRIP:
2191      return SHADER_PRIM_LINE_STRIP;
2192   case PIPE_PRIM_TRIANGLES:
2193   case PIPE_PRIM_TRIANGLE_STRIP:
2194   case PIPE_PRIM_TRIANGLE_FAN:
2195      return SHADER_PRIM_TRIANGLE_STRIP;
2196   default:
2197      /* Since we don't allow GS with multiview, we can only see non-adjacency
2198       * primitives.
2199       */
2200      unreachable("Unexpected pipeline primitive type");
2201   }
2202}
2203
2204static bool
2205pipeline_add_multiview_gs(struct v3dv_pipeline *pipeline,
2206                          struct v3dv_pipeline_cache *cache,
2207                          const VkAllocationCallbacks *pAllocator)
2208{
2209   /* Create the passthrough GS from the VS output interface */
2210   pipeline->vs->nir = pipeline_stage_get_nir(pipeline->vs, pipeline, cache);
2211   nir_shader *vs_nir = pipeline->vs->nir;
2212
2213   const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
2214   nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options,
2215                                                  "multiview broadcast gs");
2216   nir_shader *nir = b.shader;
2217   nir->info.inputs_read = vs_nir->info.outputs_written;
2218   nir->info.outputs_written = vs_nir->info.outputs_written |
2219                               (1ull << VARYING_SLOT_LAYER);
2220
2221   uint32_t vertex_count = u_vertices_per_prim(pipeline->topology);
2222   nir->info.gs.input_primitive =
2223      multiview_gs_input_primitive_from_pipeline(pipeline);
2224   nir->info.gs.output_primitive =
2225      multiview_gs_output_primitive_from_pipeline(pipeline);
2226   nir->info.gs.vertices_in = vertex_count;
2227   nir->info.gs.vertices_out = nir->info.gs.vertices_in;
2228   nir->info.gs.invocations = 1;
2229   nir->info.gs.active_stream_mask = 0x1;
2230
2231   /* Make a list of GS input/output variables from the VS outputs */
2232   nir_variable *in_vars[100];
2233   nir_variable *out_vars[100];
2234   uint32_t var_count = 0;
2235   nir_foreach_shader_out_variable(out_vs_var, vs_nir) {
2236      char name[8];
2237      snprintf(name, ARRAY_SIZE(name), "in_%d", var_count);
2238
2239      in_vars[var_count] =
2240         nir_variable_create(nir, nir_var_shader_in,
2241                             glsl_array_type(out_vs_var->type, vertex_count, 0),
2242                             name);
2243      in_vars[var_count]->data.location = out_vs_var->data.location;
2244      in_vars[var_count]->data.location_frac = out_vs_var->data.location_frac;
2245      in_vars[var_count]->data.interpolation = out_vs_var->data.interpolation;
2246
2247      snprintf(name, ARRAY_SIZE(name), "out_%d", var_count);
2248      out_vars[var_count] =
2249         nir_variable_create(nir, nir_var_shader_out, out_vs_var->type, name);
2250      out_vars[var_count]->data.location = out_vs_var->data.location;
2251      out_vars[var_count]->data.interpolation = out_vs_var->data.interpolation;
2252
2253      var_count++;
2254   }
2255
2256   /* Add the gl_Layer output variable */
2257   nir_variable *out_layer =
2258      nir_variable_create(nir, nir_var_shader_out, glsl_int_type(),
2259                          "out_Layer");
2260   out_layer->data.location = VARYING_SLOT_LAYER;
2261
2262   /* Get the view index value that we will write to gl_Layer */
2263   nir_ssa_def *layer =
2264      nir_load_system_value(&b, nir_intrinsic_load_view_index, 0, 1, 32);
2265
2266   /* Emit all output vertices */
2267   for (uint32_t vi = 0; vi < vertex_count; vi++) {
2268      /* Emit all output varyings */
2269      for (uint32_t i = 0; i < var_count; i++) {
2270         nir_deref_instr *in_value =
2271            nir_build_deref_array_imm(&b, nir_build_deref_var(&b, in_vars[i]), vi);
2272         nir_copy_deref(&b, nir_build_deref_var(&b, out_vars[i]), in_value);
2273      }
2274
2275      /* Emit gl_Layer write */
2276      nir_store_var(&b, out_layer, layer, 0x1);
2277
2278      nir_emit_vertex(&b, 0);
2279   }
2280   nir_end_primitive(&b, 0);
2281
2282   /* Make sure we run our pre-process NIR passes so we produce NIR compatible
2283    * with what we expect from SPIR-V modules.
2284    */
2285   preprocess_nir(nir);
2286
2287   /* Attach the geometry shader to the  pipeline */
2288   struct v3dv_device *device = pipeline->device;
2289   struct v3dv_physical_device *physical_device =
2290      &device->instance->physicalDevice;
2291
2292   struct v3dv_pipeline_stage *p_stage =
2293      vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
2294                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2295
2296   if (p_stage == NULL) {
2297      ralloc_free(nir);
2298      return false;
2299   }
2300
2301   p_stage->pipeline = pipeline;
2302   p_stage->stage = BROADCOM_SHADER_GEOMETRY;
2303   p_stage->entrypoint = "main";
2304   p_stage->module = 0;
2305   p_stage->nir = nir;
2306   pipeline_compute_sha1_from_nir(p_stage);
2307   p_stage->program_id = p_atomic_inc_return(&physical_device->next_program_id);
2308
2309   pipeline->has_gs = true;
2310   pipeline->gs = p_stage;
2311   pipeline->active_stages |= MESA_SHADER_GEOMETRY;
2312
2313   pipeline->gs_bin =
2314      pipeline_stage_create_binning(pipeline->gs, pAllocator);
2315      if (pipeline->gs_bin == NULL)
2316         return false;
2317
2318   return true;
2319}
2320
2321static void
2322pipeline_check_buffer_device_address(struct v3dv_pipeline *pipeline)
2323{
2324   for (int i = BROADCOM_SHADER_VERTEX; i < BROADCOM_SHADER_STAGES; i++) {
2325      struct v3dv_shader_variant *variant = pipeline->shared_data->variants[i];
2326      if (variant && variant->prog_data.base->has_global_address) {
2327         pipeline->uses_buffer_device_address = true;
2328         return;
2329      }
2330   }
2331
2332   pipeline->uses_buffer_device_address = false;
2333}
2334
2335/*
2336 * It compiles a pipeline. Note that it also allocate internal object, but if
2337 * some allocations success, but other fails, the method is not freeing the
2338 * successful ones.
2339 *
2340 * This is done to simplify the code, as what we do in this case is just call
2341 * the pipeline destroy method, and this would handle freeing the internal
2342 * objects allocated. We just need to be careful setting to NULL the objects
2343 * not allocated.
2344 */
2345static VkResult
2346pipeline_compile_graphics(struct v3dv_pipeline *pipeline,
2347                          struct v3dv_pipeline_cache *cache,
2348                          const VkGraphicsPipelineCreateInfo *pCreateInfo,
2349                          const VkAllocationCallbacks *pAllocator)
2350{
2351   VkPipelineCreationFeedback pipeline_feedback = {
2352      .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
2353   };
2354   int64_t pipeline_start = os_time_get_nano();
2355
2356   struct v3dv_device *device = pipeline->device;
2357   struct v3dv_physical_device *physical_device =
2358      &device->instance->physicalDevice;
2359
2360   /* First pass to get some common info from the shader, and create the
2361    * individual pipeline_stage objects
2362    */
2363   for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
2364      const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i];
2365      gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
2366
2367      struct v3dv_pipeline_stage *p_stage =
2368         vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
2369                    VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2370
2371      if (p_stage == NULL)
2372         return VK_ERROR_OUT_OF_HOST_MEMORY;
2373
2374      /* Note that we are assigning program_id slightly differently that
2375       * v3d. Here we are assigning one per pipeline stage, so vs and vs_bin
2376       * would have a different program_id, while v3d would have the same for
2377       * both. For the case of v3dv, it is more natural to have an id this way,
2378       * as right now we are using it for debugging, not for shader-db.
2379       */
2380      p_stage->program_id =
2381         p_atomic_inc_return(&physical_device->next_program_id);
2382
2383      p_stage->pipeline = pipeline;
2384      p_stage->stage = gl_shader_stage_to_broadcom(stage);
2385      p_stage->entrypoint = sinfo->pName;
2386      p_stage->module = vk_shader_module_from_handle(sinfo->module);
2387      p_stage->spec_info = sinfo->pSpecializationInfo;
2388
2389      vk_pipeline_hash_shader_stage(&pCreateInfo->pStages[i], p_stage->shader_sha1);
2390
2391      pipeline->active_stages |= sinfo->stage;
2392
2393      /* We will try to get directly the compiled shader variant, so let's not
2394       * worry about getting the nir shader for now.
2395       */
2396      p_stage->nir = NULL;
2397
2398      switch(stage) {
2399      case MESA_SHADER_VERTEX:
2400         pipeline->vs = p_stage;
2401         pipeline->vs_bin =
2402            pipeline_stage_create_binning(pipeline->vs, pAllocator);
2403         if (pipeline->vs_bin == NULL)
2404            return VK_ERROR_OUT_OF_HOST_MEMORY;
2405         break;
2406
2407      case MESA_SHADER_GEOMETRY:
2408         pipeline->has_gs = true;
2409         pipeline->gs = p_stage;
2410         pipeline->gs_bin =
2411            pipeline_stage_create_binning(pipeline->gs, pAllocator);
2412         if (pipeline->gs_bin == NULL)
2413            return VK_ERROR_OUT_OF_HOST_MEMORY;
2414         break;
2415
2416      case MESA_SHADER_FRAGMENT:
2417         pipeline->fs = p_stage;
2418         break;
2419
2420      default:
2421         unreachable("not supported shader stage");
2422      }
2423   }
2424
2425   /* Add a no-op fragment shader if needed */
2426   if (!pipeline->fs) {
2427      nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
2428                                                     &v3dv_nir_options,
2429                                                     "noop_fs");
2430
2431      struct v3dv_pipeline_stage *p_stage =
2432         vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
2433                    VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2434
2435      if (p_stage == NULL)
2436         return VK_ERROR_OUT_OF_HOST_MEMORY;
2437
2438      p_stage->pipeline = pipeline;
2439      p_stage->stage = BROADCOM_SHADER_FRAGMENT;
2440      p_stage->entrypoint = "main";
2441      p_stage->module = 0;
2442      p_stage->nir = b.shader;
2443      pipeline_compute_sha1_from_nir(p_stage);
2444      p_stage->program_id =
2445         p_atomic_inc_return(&physical_device->next_program_id);
2446
2447      pipeline->fs = p_stage;
2448      pipeline->active_stages |= MESA_SHADER_FRAGMENT;
2449   }
2450
2451   /* If multiview is enabled, we inject a custom passthrough geometry shader
2452    * to broadcast draw calls to the appropriate views.
2453    */
2454   assert(!pipeline->subpass->view_mask || (!pipeline->has_gs && !pipeline->gs));
2455   if (pipeline->subpass->view_mask) {
2456      if (!pipeline_add_multiview_gs(pipeline, cache, pAllocator))
2457         return VK_ERROR_OUT_OF_HOST_MEMORY;
2458   }
2459
2460   /* First we try to get the variants from the pipeline cache (unless we are
2461    * required to capture internal representations, since in that case we need
2462    * compile).
2463    */
2464   bool needs_executable_info =
2465      pCreateInfo->flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR;
2466   if (!needs_executable_info) {
2467      struct v3dv_pipeline_key pipeline_key;
2468      pipeline_populate_graphics_key(pipeline, &pipeline_key, pCreateInfo);
2469      pipeline_hash_graphics(pipeline, &pipeline_key, pipeline->sha1);
2470
2471      bool cache_hit = false;
2472
2473      pipeline->shared_data =
2474         v3dv_pipeline_cache_search_for_pipeline(cache,
2475                                                 pipeline->sha1,
2476                                                 &cache_hit);
2477
2478      if (pipeline->shared_data != NULL) {
2479         /* A correct pipeline must have at least a VS and FS */
2480         assert(pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]);
2481         assert(pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]);
2482         assert(pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]);
2483         assert(!pipeline->gs ||
2484                pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]);
2485         assert(!pipeline->gs ||
2486                pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]);
2487
2488         if (cache_hit && cache != &pipeline->device->default_pipeline_cache)
2489            pipeline_feedback.flags |=
2490               VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
2491
2492         goto success;
2493      }
2494   }
2495
2496   if (pCreateInfo->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT)
2497      return VK_PIPELINE_COMPILE_REQUIRED;
2498
2499   /* Otherwise we try to get the NIR shaders (either from the original SPIR-V
2500    * shader or the pipeline cache) and compile.
2501    */
2502   pipeline->shared_data =
2503      v3dv_pipeline_shared_data_new_empty(pipeline->sha1, pipeline, true);
2504   if (!pipeline->shared_data)
2505      return VK_ERROR_OUT_OF_HOST_MEMORY;
2506
2507   pipeline->vs->feedback.flags |=
2508      VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
2509   if (pipeline->gs)
2510      pipeline->gs->feedback.flags |=
2511         VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
2512   pipeline->fs->feedback.flags |=
2513      VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
2514
2515   if (!pipeline->vs->nir)
2516      pipeline->vs->nir = pipeline_stage_get_nir(pipeline->vs, pipeline, cache);
2517   if (pipeline->gs && !pipeline->gs->nir)
2518      pipeline->gs->nir = pipeline_stage_get_nir(pipeline->gs, pipeline, cache);
2519   if (!pipeline->fs->nir)
2520      pipeline->fs->nir = pipeline_stage_get_nir(pipeline->fs, pipeline, cache);
2521
2522   /* Linking + pipeline lowerings */
2523   if (pipeline->gs) {
2524      link_shaders(pipeline->gs->nir, pipeline->fs->nir);
2525      link_shaders(pipeline->vs->nir, pipeline->gs->nir);
2526   } else {
2527      link_shaders(pipeline->vs->nir, pipeline->fs->nir);
2528   }
2529
2530   pipeline_lower_nir(pipeline, pipeline->fs, pipeline->layout);
2531   lower_fs_io(pipeline->fs->nir);
2532
2533   if (pipeline->gs) {
2534      pipeline_lower_nir(pipeline, pipeline->gs, pipeline->layout);
2535      lower_gs_io(pipeline->gs->nir);
2536   }
2537
2538   pipeline_lower_nir(pipeline, pipeline->vs, pipeline->layout);
2539   lower_vs_io(pipeline->vs->nir);
2540
2541   /* Compiling to vir */
2542   VkResult vk_result;
2543
2544   /* We should have got all the variants or no variants from the cache */
2545   assert(!pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]);
2546   vk_result = pipeline_compile_fragment_shader(pipeline, pAllocator, pCreateInfo);
2547   if (vk_result != VK_SUCCESS)
2548      return vk_result;
2549
2550   assert(!pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY] &&
2551          !pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]);
2552
2553   if (pipeline->gs) {
2554      vk_result =
2555         pipeline_compile_geometry_shader(pipeline, pAllocator, pCreateInfo);
2556      if (vk_result != VK_SUCCESS)
2557         return vk_result;
2558   }
2559
2560   assert(!pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX] &&
2561          !pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]);
2562
2563   vk_result = pipeline_compile_vertex_shader(pipeline, pAllocator, pCreateInfo);
2564   if (vk_result != VK_SUCCESS)
2565      return vk_result;
2566
2567   if (!upload_assembly(pipeline))
2568      return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2569
2570   v3dv_pipeline_cache_upload_pipeline(pipeline, cache);
2571
2572 success:
2573
2574   pipeline_check_buffer_device_address(pipeline);
2575
2576   pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
2577   write_creation_feedback(pipeline,
2578                           pCreateInfo->pNext,
2579                           &pipeline_feedback,
2580                           pCreateInfo->stageCount,
2581                           pCreateInfo->pStages);
2582
2583   /* Since we have the variants in the pipeline shared data we can now free
2584    * the pipeline stages.
2585    */
2586   if (!needs_executable_info)
2587      pipeline_free_stages(device, pipeline, pAllocator);
2588
2589   pipeline_check_spill_size(pipeline);
2590
2591   return compute_vpm_config(pipeline);
2592}
2593
2594static VkResult
2595compute_vpm_config(struct v3dv_pipeline *pipeline)
2596{
2597   struct v3dv_shader_variant *vs_variant =
2598      pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX];
2599   struct v3dv_shader_variant *vs_bin_variant =
2600      pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX];
2601   struct v3d_vs_prog_data *vs = vs_variant->prog_data.vs;
2602   struct v3d_vs_prog_data *vs_bin =vs_bin_variant->prog_data.vs;
2603
2604   struct v3d_gs_prog_data *gs = NULL;
2605   struct v3d_gs_prog_data *gs_bin = NULL;
2606   if (pipeline->has_gs) {
2607      struct v3dv_shader_variant *gs_variant =
2608         pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY];
2609      struct v3dv_shader_variant *gs_bin_variant =
2610         pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN];
2611      gs = gs_variant->prog_data.gs;
2612      gs_bin = gs_bin_variant->prog_data.gs;
2613   }
2614
2615   if (!v3d_compute_vpm_config(&pipeline->device->devinfo,
2616                               vs_bin, vs, gs_bin, gs,
2617                               &pipeline->vpm_cfg_bin,
2618                               &pipeline->vpm_cfg)) {
2619      return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2620   }
2621
2622   return VK_SUCCESS;
2623}
2624
2625static unsigned
2626v3dv_dynamic_state_mask(VkDynamicState state)
2627{
2628   switch(state) {
2629   case VK_DYNAMIC_STATE_VIEWPORT:
2630      return V3DV_DYNAMIC_VIEWPORT;
2631   case VK_DYNAMIC_STATE_SCISSOR:
2632      return V3DV_DYNAMIC_SCISSOR;
2633   case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
2634      return V3DV_DYNAMIC_STENCIL_COMPARE_MASK;
2635   case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
2636      return V3DV_DYNAMIC_STENCIL_WRITE_MASK;
2637   case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
2638      return V3DV_DYNAMIC_STENCIL_REFERENCE;
2639   case VK_DYNAMIC_STATE_BLEND_CONSTANTS:
2640      return V3DV_DYNAMIC_BLEND_CONSTANTS;
2641   case VK_DYNAMIC_STATE_DEPTH_BIAS:
2642      return V3DV_DYNAMIC_DEPTH_BIAS;
2643   case VK_DYNAMIC_STATE_LINE_WIDTH:
2644      return V3DV_DYNAMIC_LINE_WIDTH;
2645   case VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT:
2646      return V3DV_DYNAMIC_COLOR_WRITE_ENABLE;
2647
2648   /* Depth bounds testing is not available in in V3D 4.2 so here we are just
2649    * ignoring this dynamic state. We are already asserting at pipeline creation
2650    * time that depth bounds testing is not enabled.
2651    */
2652   case VK_DYNAMIC_STATE_DEPTH_BOUNDS:
2653      return 0;
2654
2655   default:
2656      unreachable("Unhandled dynamic state");
2657   }
2658}
2659
2660static void
2661pipeline_init_dynamic_state(
2662   struct v3dv_pipeline *pipeline,
2663   const VkPipelineDynamicStateCreateInfo *pDynamicState,
2664   const VkPipelineViewportStateCreateInfo *pViewportState,
2665   const VkPipelineDepthStencilStateCreateInfo *pDepthStencilState,
2666   const VkPipelineColorBlendStateCreateInfo *pColorBlendState,
2667   const VkPipelineRasterizationStateCreateInfo *pRasterizationState,
2668   const VkPipelineColorWriteCreateInfoEXT *pColorWriteState)
2669{
2670   /* Initialize to default values */
2671   struct v3dv_dynamic_state *dynamic = &pipeline->dynamic_state;
2672   memset(dynamic, 0, sizeof(*dynamic));
2673   dynamic->stencil_compare_mask.front = ~0;
2674   dynamic->stencil_compare_mask.back = ~0;
2675   dynamic->stencil_write_mask.front = ~0;
2676   dynamic->stencil_write_mask.back = ~0;
2677   dynamic->line_width = 1.0f;
2678   dynamic->color_write_enable = (1ull << (4 * V3D_MAX_DRAW_BUFFERS)) - 1;
2679
2680   /* Create a mask of enabled dynamic states */
2681   uint32_t dynamic_states = 0;
2682   if (pDynamicState) {
2683      uint32_t count = pDynamicState->dynamicStateCount;
2684      for (uint32_t s = 0; s < count; s++) {
2685         dynamic_states |=
2686            v3dv_dynamic_state_mask(pDynamicState->pDynamicStates[s]);
2687      }
2688   }
2689
2690   /* For any pipeline states that are not dynamic, set the dynamic state
2691    * from the static pipeline state.
2692    */
2693   if (pViewportState) {
2694      if (!(dynamic_states & V3DV_DYNAMIC_VIEWPORT)) {
2695         dynamic->viewport.count = pViewportState->viewportCount;
2696         typed_memcpy(dynamic->viewport.viewports, pViewportState->pViewports,
2697                      pViewportState->viewportCount);
2698
2699         for (uint32_t i = 0; i < dynamic->viewport.count; i++) {
2700            v3dv_viewport_compute_xform(&dynamic->viewport.viewports[i],
2701                                        dynamic->viewport.scale[i],
2702                                        dynamic->viewport.translate[i]);
2703         }
2704      }
2705
2706      if (!(dynamic_states & V3DV_DYNAMIC_SCISSOR)) {
2707         dynamic->scissor.count = pViewportState->scissorCount;
2708         typed_memcpy(dynamic->scissor.scissors, pViewportState->pScissors,
2709                      pViewportState->scissorCount);
2710      }
2711   }
2712
2713   if (pDepthStencilState) {
2714      if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_COMPARE_MASK)) {
2715         dynamic->stencil_compare_mask.front =
2716            pDepthStencilState->front.compareMask;
2717         dynamic->stencil_compare_mask.back =
2718            pDepthStencilState->back.compareMask;
2719      }
2720
2721      if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_WRITE_MASK)) {
2722         dynamic->stencil_write_mask.front = pDepthStencilState->front.writeMask;
2723         dynamic->stencil_write_mask.back = pDepthStencilState->back.writeMask;
2724      }
2725
2726      if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_REFERENCE)) {
2727         dynamic->stencil_reference.front = pDepthStencilState->front.reference;
2728         dynamic->stencil_reference.back = pDepthStencilState->back.reference;
2729      }
2730   }
2731
2732   if (pColorBlendState && !(dynamic_states & V3DV_DYNAMIC_BLEND_CONSTANTS)) {
2733      memcpy(dynamic->blend_constants, pColorBlendState->blendConstants,
2734             sizeof(dynamic->blend_constants));
2735   }
2736
2737   if (pRasterizationState) {
2738      if (pRasterizationState->depthBiasEnable &&
2739          !(dynamic_states & V3DV_DYNAMIC_DEPTH_BIAS)) {
2740         dynamic->depth_bias.constant_factor =
2741            pRasterizationState->depthBiasConstantFactor;
2742         dynamic->depth_bias.depth_bias_clamp =
2743            pRasterizationState->depthBiasClamp;
2744         dynamic->depth_bias.slope_factor =
2745            pRasterizationState->depthBiasSlopeFactor;
2746      }
2747      if (!(dynamic_states & V3DV_DYNAMIC_LINE_WIDTH))
2748         dynamic->line_width = pRasterizationState->lineWidth;
2749   }
2750
2751   if (pColorWriteState && !(dynamic_states & V3DV_DYNAMIC_COLOR_WRITE_ENABLE)) {
2752      dynamic->color_write_enable = 0;
2753      for (uint32_t i = 0; i < pColorWriteState->attachmentCount; i++)
2754         dynamic->color_write_enable |= pColorWriteState->pColorWriteEnables[i] ? (0xfu << (i * 4)) : 0;
2755   }
2756
2757   pipeline->dynamic_state.mask = dynamic_states;
2758}
2759
2760static bool
2761stencil_op_is_no_op(const VkStencilOpState *stencil)
2762{
2763   return stencil->depthFailOp == VK_STENCIL_OP_KEEP &&
2764          stencil->compareOp == VK_COMPARE_OP_ALWAYS;
2765}
2766
2767static void
2768enable_depth_bias(struct v3dv_pipeline *pipeline,
2769                  const VkPipelineRasterizationStateCreateInfo *rs_info)
2770{
2771   pipeline->depth_bias.enabled = false;
2772   pipeline->depth_bias.is_z16 = false;
2773
2774   if (!rs_info || !rs_info->depthBiasEnable)
2775      return;
2776
2777   /* Check the depth/stencil attachment description for the subpass used with
2778    * this pipeline.
2779    */
2780   assert(pipeline->pass && pipeline->subpass);
2781   struct v3dv_render_pass *pass = pipeline->pass;
2782   struct v3dv_subpass *subpass = pipeline->subpass;
2783
2784   if (subpass->ds_attachment.attachment == VK_ATTACHMENT_UNUSED)
2785      return;
2786
2787   assert(subpass->ds_attachment.attachment < pass->attachment_count);
2788   struct v3dv_render_pass_attachment *att =
2789      &pass->attachments[subpass->ds_attachment.attachment];
2790
2791   if (att->desc.format == VK_FORMAT_D16_UNORM)
2792      pipeline->depth_bias.is_z16 = true;
2793
2794   pipeline->depth_bias.enabled = true;
2795}
2796
2797static void
2798pipeline_set_ez_state(struct v3dv_pipeline *pipeline,
2799                      const VkPipelineDepthStencilStateCreateInfo *ds_info)
2800{
2801   if (!ds_info || !ds_info->depthTestEnable) {
2802      pipeline->ez_state = V3D_EZ_DISABLED;
2803      return;
2804   }
2805
2806   switch (ds_info->depthCompareOp) {
2807   case VK_COMPARE_OP_LESS:
2808   case VK_COMPARE_OP_LESS_OR_EQUAL:
2809      pipeline->ez_state = V3D_EZ_LT_LE;
2810      break;
2811   case VK_COMPARE_OP_GREATER:
2812   case VK_COMPARE_OP_GREATER_OR_EQUAL:
2813      pipeline->ez_state = V3D_EZ_GT_GE;
2814      break;
2815   case VK_COMPARE_OP_NEVER:
2816   case VK_COMPARE_OP_EQUAL:
2817      pipeline->ez_state = V3D_EZ_UNDECIDED;
2818      break;
2819   default:
2820      pipeline->ez_state = V3D_EZ_DISABLED;
2821      pipeline->incompatible_ez_test = true;
2822      break;
2823   }
2824
2825   /* If stencil is enabled and is not a no-op, we need to disable EZ */
2826   if (ds_info->stencilTestEnable &&
2827       (!stencil_op_is_no_op(&ds_info->front) ||
2828        !stencil_op_is_no_op(&ds_info->back))) {
2829         pipeline->ez_state = V3D_EZ_DISABLED;
2830   }
2831
2832   /* If the FS writes Z, then it may update against the chosen EZ direction */
2833   struct v3dv_shader_variant *fs_variant =
2834      pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
2835   if (fs_variant && fs_variant->prog_data.fs->writes_z &&
2836       !fs_variant->prog_data.fs->writes_z_from_fep) {
2837      pipeline->ez_state = V3D_EZ_DISABLED;
2838   }
2839}
2840
2841static bool
2842pipeline_has_integer_vertex_attrib(struct v3dv_pipeline *pipeline)
2843{
2844   for (uint8_t i = 0; i < pipeline->va_count; i++) {
2845      if (vk_format_is_int(pipeline->va[i].vk_format))
2846         return true;
2847   }
2848   return false;
2849}
2850
2851/* @pipeline can be NULL. We assume in that case that all the attributes have
2852 * a float format (we only create an all-float BO once and we reuse it with
2853 * all float pipelines), otherwise we look at the actual type of each
2854 * attribute used with the specific pipeline passed in.
2855 */
2856struct v3dv_bo *
2857v3dv_pipeline_create_default_attribute_values(struct v3dv_device *device,
2858                                              struct v3dv_pipeline *pipeline)
2859{
2860   uint32_t size = MAX_VERTEX_ATTRIBS * sizeof(float) * 4;
2861   struct v3dv_bo *bo;
2862
2863   bo = v3dv_bo_alloc(device, size, "default_vi_attributes", true);
2864
2865   if (!bo) {
2866      fprintf(stderr, "failed to allocate memory for the default "
2867              "attribute values\n");
2868      return NULL;
2869   }
2870
2871   bool ok = v3dv_bo_map(device, bo, size);
2872   if (!ok) {
2873      fprintf(stderr, "failed to map default attribute values buffer\n");
2874      return false;
2875   }
2876
2877   uint32_t *attrs = bo->map;
2878   uint8_t va_count = pipeline != NULL ? pipeline->va_count : 0;
2879   for (int i = 0; i < MAX_VERTEX_ATTRIBS; i++) {
2880      attrs[i * 4 + 0] = 0;
2881      attrs[i * 4 + 1] = 0;
2882      attrs[i * 4 + 2] = 0;
2883      VkFormat attr_format =
2884         pipeline != NULL ? pipeline->va[i].vk_format : VK_FORMAT_UNDEFINED;
2885      if (i < va_count && vk_format_is_int(attr_format)) {
2886         attrs[i * 4 + 3] = 1;
2887      } else {
2888         attrs[i * 4 + 3] = fui(1.0);
2889      }
2890   }
2891
2892   v3dv_bo_unmap(device, bo);
2893
2894   return bo;
2895}
2896
2897static void
2898pipeline_set_sample_mask(struct v3dv_pipeline *pipeline,
2899                         const VkPipelineMultisampleStateCreateInfo *ms_info)
2900{
2901   pipeline->sample_mask = (1 << V3D_MAX_SAMPLES) - 1;
2902
2903   /* Ignore pSampleMask if we are not enabling multisampling. The hardware
2904    * requires this to be 0xf or 0x0 if using a single sample.
2905    */
2906   if (ms_info && ms_info->pSampleMask &&
2907       ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT) {
2908      pipeline->sample_mask &= ms_info->pSampleMask[0];
2909   }
2910}
2911
2912static void
2913pipeline_set_sample_rate_shading(struct v3dv_pipeline *pipeline,
2914                                 const VkPipelineMultisampleStateCreateInfo *ms_info)
2915{
2916   pipeline->sample_rate_shading =
2917      ms_info && ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT &&
2918      ms_info->sampleShadingEnable;
2919}
2920
2921static VkResult
2922pipeline_init(struct v3dv_pipeline *pipeline,
2923              struct v3dv_device *device,
2924              struct v3dv_pipeline_cache *cache,
2925              const VkGraphicsPipelineCreateInfo *pCreateInfo,
2926              const VkAllocationCallbacks *pAllocator)
2927{
2928   VkResult result = VK_SUCCESS;
2929
2930   pipeline->device = device;
2931
2932   V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, pCreateInfo->layout);
2933   pipeline->layout = layout;
2934
2935   V3DV_FROM_HANDLE(v3dv_render_pass, render_pass, pCreateInfo->renderPass);
2936   assert(pCreateInfo->subpass < render_pass->subpass_count);
2937   pipeline->pass = render_pass;
2938   pipeline->subpass = &render_pass->subpasses[pCreateInfo->subpass];
2939
2940   const VkPipelineInputAssemblyStateCreateInfo *ia_info =
2941      pCreateInfo->pInputAssemblyState;
2942   pipeline->topology = vk_to_pipe_prim_type[ia_info->topology];
2943
2944   /* If rasterization is not enabled, various CreateInfo structs must be
2945    * ignored.
2946    */
2947   const bool raster_enabled =
2948      !pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
2949
2950   const VkPipelineViewportStateCreateInfo *vp_info =
2951      raster_enabled ? pCreateInfo->pViewportState : NULL;
2952
2953   const VkPipelineDepthStencilStateCreateInfo *ds_info =
2954      raster_enabled ? pCreateInfo->pDepthStencilState : NULL;
2955
2956   const VkPipelineRasterizationStateCreateInfo *rs_info =
2957      raster_enabled ? pCreateInfo->pRasterizationState : NULL;
2958
2959   const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *pv_info =
2960      rs_info ? vk_find_struct_const(
2961         rs_info->pNext,
2962         PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT) :
2963            NULL;
2964
2965   const VkPipelineRasterizationLineStateCreateInfoEXT *ls_info =
2966      rs_info ? vk_find_struct_const(
2967         rs_info->pNext,
2968         PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT) :
2969            NULL;
2970
2971   const VkPipelineColorBlendStateCreateInfo *cb_info =
2972      raster_enabled ? pCreateInfo->pColorBlendState : NULL;
2973
2974   const VkPipelineMultisampleStateCreateInfo *ms_info =
2975      raster_enabled ? pCreateInfo->pMultisampleState : NULL;
2976
2977   const VkPipelineColorWriteCreateInfoEXT *cw_info =
2978      cb_info ? vk_find_struct_const(cb_info->pNext,
2979                                     PIPELINE_COLOR_WRITE_CREATE_INFO_EXT) :
2980                NULL;
2981
2982   pipeline_init_dynamic_state(pipeline,
2983                               pCreateInfo->pDynamicState,
2984                               vp_info, ds_info, cb_info, rs_info, cw_info);
2985
2986   /* V3D 4.2 doesn't support depth bounds testing so we don't advertise that
2987    * feature and it shouldn't be used by any pipeline.
2988    */
2989   assert(!ds_info || !ds_info->depthBoundsTestEnable);
2990
2991   v3dv_X(device, pipeline_pack_state)(pipeline, cb_info, ds_info,
2992                                       rs_info, pv_info, ls_info,
2993                                       ms_info);
2994
2995   enable_depth_bias(pipeline, rs_info);
2996   pipeline_set_sample_mask(pipeline, ms_info);
2997   pipeline_set_sample_rate_shading(pipeline, ms_info);
2998
2999   pipeline->primitive_restart =
3000      pCreateInfo->pInputAssemblyState->primitiveRestartEnable;
3001
3002   result = pipeline_compile_graphics(pipeline, cache, pCreateInfo, pAllocator);
3003
3004   if (result != VK_SUCCESS) {
3005      /* Caller would already destroy the pipeline, and we didn't allocate any
3006       * extra info. We don't need to do anything else.
3007       */
3008      return result;
3009   }
3010
3011   const VkPipelineVertexInputStateCreateInfo *vi_info =
3012      pCreateInfo->pVertexInputState;
3013
3014   const VkPipelineVertexInputDivisorStateCreateInfoEXT *vd_info =
3015      vk_find_struct_const(vi_info->pNext,
3016                           PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT);
3017
3018   v3dv_X(device, pipeline_pack_compile_state)(pipeline, vi_info, vd_info);
3019
3020   if (pipeline_has_integer_vertex_attrib(pipeline)) {
3021      pipeline->default_attribute_values =
3022         v3dv_pipeline_create_default_attribute_values(pipeline->device, pipeline);
3023      if (!pipeline->default_attribute_values)
3024         return VK_ERROR_OUT_OF_DEVICE_MEMORY;
3025   } else {
3026      pipeline->default_attribute_values = NULL;
3027   }
3028
3029   /* This must be done after the pipeline has been compiled */
3030   pipeline_set_ez_state(pipeline, ds_info);
3031
3032   return result;
3033}
3034
3035static VkResult
3036graphics_pipeline_create(VkDevice _device,
3037                         VkPipelineCache _cache,
3038                         const VkGraphicsPipelineCreateInfo *pCreateInfo,
3039                         const VkAllocationCallbacks *pAllocator,
3040                         VkPipeline *pPipeline)
3041{
3042   V3DV_FROM_HANDLE(v3dv_device, device, _device);
3043   V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
3044
3045   struct v3dv_pipeline *pipeline;
3046   VkResult result;
3047
3048   /* Use the default pipeline cache if none is specified */
3049   if (cache == NULL && device->instance->default_pipeline_cache_enabled)
3050      cache = &device->default_pipeline_cache;
3051
3052   pipeline = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pipeline),
3053                               VK_OBJECT_TYPE_PIPELINE);
3054
3055   if (pipeline == NULL)
3056      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3057
3058   result = pipeline_init(pipeline, device, cache,
3059                          pCreateInfo,
3060                          pAllocator);
3061
3062   if (result != VK_SUCCESS) {
3063      v3dv_destroy_pipeline(pipeline, device, pAllocator);
3064      if (result == VK_PIPELINE_COMPILE_REQUIRED)
3065         *pPipeline = VK_NULL_HANDLE;
3066      return result;
3067   }
3068
3069   *pPipeline = v3dv_pipeline_to_handle(pipeline);
3070
3071   return VK_SUCCESS;
3072}
3073
3074VKAPI_ATTR VkResult VKAPI_CALL
3075v3dv_CreateGraphicsPipelines(VkDevice _device,
3076                             VkPipelineCache pipelineCache,
3077                             uint32_t count,
3078                             const VkGraphicsPipelineCreateInfo *pCreateInfos,
3079                             const VkAllocationCallbacks *pAllocator,
3080                             VkPipeline *pPipelines)
3081{
3082   V3DV_FROM_HANDLE(v3dv_device, device, _device);
3083   VkResult result = VK_SUCCESS;
3084
3085   if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))
3086      mtx_lock(&device->pdevice->mutex);
3087
3088   uint32_t i = 0;
3089   for (; i < count; i++) {
3090      VkResult local_result;
3091
3092      local_result = graphics_pipeline_create(_device,
3093                                              pipelineCache,
3094                                              &pCreateInfos[i],
3095                                              pAllocator,
3096                                              &pPipelines[i]);
3097
3098      if (local_result != VK_SUCCESS) {
3099         result = local_result;
3100         pPipelines[i] = VK_NULL_HANDLE;
3101
3102         if (pCreateInfos[i].flags &
3103             VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
3104            break;
3105      }
3106   }
3107
3108   for (; i < count; i++)
3109      pPipelines[i] = VK_NULL_HANDLE;
3110
3111   if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))
3112      mtx_unlock(&device->pdevice->mutex);
3113
3114   return result;
3115}
3116
3117static void
3118shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
3119{
3120   assert(glsl_type_is_vector_or_scalar(type));
3121
3122   uint32_t comp_size = glsl_type_is_boolean(type)
3123      ? 4 : glsl_get_bit_size(type) / 8;
3124   unsigned length = glsl_get_vector_elements(type);
3125   *size = comp_size * length,
3126   *align = comp_size * (length == 3 ? 4 : length);
3127}
3128
3129static void
3130lower_cs_shared(struct nir_shader *nir)
3131{
3132   NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
3133            nir_var_mem_shared, shared_type_info);
3134   NIR_PASS(_, nir, nir_lower_explicit_io,
3135            nir_var_mem_shared, nir_address_format_32bit_offset);
3136}
3137
3138static VkResult
3139pipeline_compile_compute(struct v3dv_pipeline *pipeline,
3140                         struct v3dv_pipeline_cache *cache,
3141                         const VkComputePipelineCreateInfo *info,
3142                         const VkAllocationCallbacks *alloc)
3143{
3144   VkPipelineCreationFeedback pipeline_feedback = {
3145      .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
3146   };
3147   int64_t pipeline_start = os_time_get_nano();
3148
3149   struct v3dv_device *device = pipeline->device;
3150   struct v3dv_physical_device *physical_device =
3151      &device->instance->physicalDevice;
3152
3153   const VkPipelineShaderStageCreateInfo *sinfo = &info->stage;
3154   gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
3155
3156   struct v3dv_pipeline_stage *p_stage =
3157      vk_zalloc2(&device->vk.alloc, alloc, sizeof(*p_stage), 8,
3158                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3159   if (!p_stage)
3160      return VK_ERROR_OUT_OF_HOST_MEMORY;
3161
3162   p_stage->program_id = p_atomic_inc_return(&physical_device->next_program_id);
3163   p_stage->pipeline = pipeline;
3164   p_stage->stage = gl_shader_stage_to_broadcom(stage);
3165   p_stage->entrypoint = sinfo->pName;
3166   p_stage->module = vk_shader_module_from_handle(sinfo->module);
3167   p_stage->spec_info = sinfo->pSpecializationInfo;
3168   p_stage->feedback = (VkPipelineCreationFeedback) { 0 };
3169
3170   vk_pipeline_hash_shader_stage(&info->stage, p_stage->shader_sha1);
3171
3172   p_stage->nir = NULL;
3173
3174   pipeline->cs = p_stage;
3175   pipeline->active_stages |= sinfo->stage;
3176
3177   /* First we try to get the variants from the pipeline cache (unless we are
3178    * required to capture internal representations, since in that case we need
3179    * compile).
3180    */
3181   bool needs_executable_info =
3182      info->flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR;
3183   if (!needs_executable_info) {
3184      struct v3dv_pipeline_key pipeline_key;
3185      pipeline_populate_compute_key(pipeline, &pipeline_key, info);
3186      pipeline_hash_compute(pipeline, &pipeline_key, pipeline->sha1);
3187
3188      bool cache_hit = false;
3189      pipeline->shared_data =
3190         v3dv_pipeline_cache_search_for_pipeline(cache, pipeline->sha1, &cache_hit);
3191
3192      if (pipeline->shared_data != NULL) {
3193         assert(pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE]);
3194         if (cache_hit && cache != &pipeline->device->default_pipeline_cache)
3195            pipeline_feedback.flags |=
3196               VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
3197
3198         goto success;
3199      }
3200   }
3201
3202   if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT)
3203      return VK_PIPELINE_COMPILE_REQUIRED;
3204
3205   pipeline->shared_data = v3dv_pipeline_shared_data_new_empty(pipeline->sha1,
3206                                                               pipeline,
3207                                                               false);
3208   if (!pipeline->shared_data)
3209      return VK_ERROR_OUT_OF_HOST_MEMORY;
3210
3211   p_stage->feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
3212
3213   /* If not found on cache, compile it */
3214   p_stage->nir = pipeline_stage_get_nir(p_stage, pipeline, cache);
3215   assert(p_stage->nir);
3216
3217   nir_optimize(p_stage->nir, false);
3218   pipeline_lower_nir(pipeline, p_stage, pipeline->layout);
3219   lower_cs_shared(p_stage->nir);
3220
3221   VkResult result = VK_SUCCESS;
3222
3223   struct v3d_key key;
3224   memset(&key, 0, sizeof(key));
3225   pipeline_populate_v3d_key(&key, p_stage, 0,
3226                             pipeline->device->features.robustBufferAccess);
3227   pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE] =
3228      pipeline_compile_shader_variant(p_stage, &key, sizeof(key),
3229                                      alloc, &result);
3230
3231   if (result != VK_SUCCESS)
3232      return result;
3233
3234   if (!upload_assembly(pipeline))
3235      return VK_ERROR_OUT_OF_DEVICE_MEMORY;
3236
3237   v3dv_pipeline_cache_upload_pipeline(pipeline, cache);
3238
3239success:
3240
3241   pipeline_check_buffer_device_address(pipeline);
3242
3243   pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
3244   write_creation_feedback(pipeline,
3245                           info->pNext,
3246                           &pipeline_feedback,
3247                           1,
3248                           &info->stage);
3249
3250   /* As we got the variants in pipeline->shared_data, after compiling we
3251    * don't need the pipeline_stages.
3252    */
3253   if (!needs_executable_info)
3254      pipeline_free_stages(device, pipeline, alloc);
3255
3256   pipeline_check_spill_size(pipeline);
3257
3258   return VK_SUCCESS;
3259}
3260
3261static VkResult
3262compute_pipeline_init(struct v3dv_pipeline *pipeline,
3263                      struct v3dv_device *device,
3264                      struct v3dv_pipeline_cache *cache,
3265                      const VkComputePipelineCreateInfo *info,
3266                      const VkAllocationCallbacks *alloc)
3267{
3268   V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, info->layout);
3269
3270   pipeline->device = device;
3271   pipeline->layout = layout;
3272
3273   VkResult result = pipeline_compile_compute(pipeline, cache, info, alloc);
3274
3275   return result;
3276}
3277
3278static VkResult
3279compute_pipeline_create(VkDevice _device,
3280                         VkPipelineCache _cache,
3281                         const VkComputePipelineCreateInfo *pCreateInfo,
3282                         const VkAllocationCallbacks *pAllocator,
3283                         VkPipeline *pPipeline)
3284{
3285   V3DV_FROM_HANDLE(v3dv_device, device, _device);
3286   V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
3287
3288   struct v3dv_pipeline *pipeline;
3289   VkResult result;
3290
3291   /* Use the default pipeline cache if none is specified */
3292   if (cache == NULL && device->instance->default_pipeline_cache_enabled)
3293      cache = &device->default_pipeline_cache;
3294
3295   pipeline = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pipeline),
3296                               VK_OBJECT_TYPE_PIPELINE);
3297   if (pipeline == NULL)
3298      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3299
3300   result = compute_pipeline_init(pipeline, device, cache,
3301                                  pCreateInfo, pAllocator);
3302   if (result != VK_SUCCESS) {
3303      v3dv_destroy_pipeline(pipeline, device, pAllocator);
3304      if (result == VK_PIPELINE_COMPILE_REQUIRED)
3305         *pPipeline = VK_NULL_HANDLE;
3306      return result;
3307   }
3308
3309   *pPipeline = v3dv_pipeline_to_handle(pipeline);
3310
3311   return VK_SUCCESS;
3312}
3313
3314VKAPI_ATTR VkResult VKAPI_CALL
3315v3dv_CreateComputePipelines(VkDevice _device,
3316                            VkPipelineCache pipelineCache,
3317                            uint32_t createInfoCount,
3318                            const VkComputePipelineCreateInfo *pCreateInfos,
3319                            const VkAllocationCallbacks *pAllocator,
3320                            VkPipeline *pPipelines)
3321{
3322   V3DV_FROM_HANDLE(v3dv_device, device, _device);
3323   VkResult result = VK_SUCCESS;
3324
3325   if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))
3326      mtx_lock(&device->pdevice->mutex);
3327
3328   uint32_t i = 0;
3329   for (; i < createInfoCount; i++) {
3330      VkResult local_result;
3331      local_result = compute_pipeline_create(_device,
3332                                              pipelineCache,
3333                                              &pCreateInfos[i],
3334                                              pAllocator,
3335                                              &pPipelines[i]);
3336
3337      if (local_result != VK_SUCCESS) {
3338         result = local_result;
3339         pPipelines[i] = VK_NULL_HANDLE;
3340
3341         if (pCreateInfos[i].flags &
3342             VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
3343            break;
3344      }
3345   }
3346
3347   for (; i < createInfoCount; i++)
3348      pPipelines[i] = VK_NULL_HANDLE;
3349
3350   if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))
3351      mtx_unlock(&device->pdevice->mutex);
3352
3353   return result;
3354}
3355
3356static nir_shader *
3357pipeline_get_nir(struct v3dv_pipeline *pipeline,
3358                 enum broadcom_shader_stage stage)
3359{
3360   switch (stage) {
3361   case BROADCOM_SHADER_VERTEX:
3362      if (pipeline->vs)
3363         return pipeline->vs->nir;
3364      break;
3365   case BROADCOM_SHADER_VERTEX_BIN:
3366      if(pipeline->vs_bin)
3367         return pipeline->vs_bin->nir;
3368      break;
3369   case BROADCOM_SHADER_GEOMETRY:
3370      if(pipeline->gs)
3371         return pipeline->gs->nir;
3372      break;
3373   case BROADCOM_SHADER_GEOMETRY_BIN:
3374      if (pipeline->gs_bin)
3375         return pipeline->gs_bin->nir;
3376      break;
3377   case BROADCOM_SHADER_FRAGMENT:
3378      if (pipeline->fs)
3379         return pipeline->fs->nir;
3380      break;
3381   case BROADCOM_SHADER_COMPUTE:
3382      if(pipeline->cs)
3383         return pipeline->cs->nir;
3384      break;
3385   default:
3386      unreachable("Unsupported shader stage");
3387   }
3388
3389   return NULL;
3390}
3391
3392static struct v3d_prog_data *
3393pipeline_get_prog_data(struct v3dv_pipeline *pipeline,
3394                       enum broadcom_shader_stage stage)
3395{
3396   if (pipeline->shared_data->variants[stage])
3397      return pipeline->shared_data->variants[stage]->prog_data.base;
3398   return NULL;
3399}
3400
3401static uint64_t *
3402pipeline_get_qpu(struct v3dv_pipeline *pipeline,
3403                 enum broadcom_shader_stage stage,
3404                 uint32_t *qpu_size)
3405{
3406   struct v3dv_shader_variant *variant =
3407      pipeline->shared_data->variants[stage];
3408   if (!variant) {
3409      *qpu_size = 0;
3410      return NULL;
3411   }
3412
3413   /* We expect the QPU BO to have been mapped before calling here */
3414   struct v3dv_bo *qpu_bo = pipeline->shared_data->assembly_bo;
3415   assert(qpu_bo && qpu_bo->map_size >= variant->assembly_offset +
3416                                        variant->qpu_insts_size);
3417
3418   *qpu_size = variant->qpu_insts_size;
3419   uint64_t *qpu = (uint64_t *)
3420      (((uint8_t *) qpu_bo->map) + variant->assembly_offset);
3421   return qpu;
3422}
3423
3424/* FIXME: we use the same macro in various drivers, maybe move it to
3425 * the comon vk_util.h?
3426 */
3427#define WRITE_STR(field, ...) ({                                \
3428   memset(field, 0, sizeof(field));                             \
3429   UNUSED int _i = snprintf(field, sizeof(field), __VA_ARGS__); \
3430   assert(_i > 0 && _i < sizeof(field));                        \
3431})
3432
3433static bool
3434write_ir_text(VkPipelineExecutableInternalRepresentationKHR* ir,
3435              const char *data)
3436{
3437   ir->isText = VK_TRUE;
3438
3439   size_t data_len = strlen(data) + 1;
3440
3441   if (ir->pData == NULL) {
3442      ir->dataSize = data_len;
3443      return true;
3444   }
3445
3446   strncpy(ir->pData, data, ir->dataSize);
3447   if (ir->dataSize < data_len)
3448      return false;
3449
3450   ir->dataSize = data_len;
3451   return true;
3452}
3453
3454static void
3455append(char **str, size_t *offset, const char *fmt, ...)
3456{
3457   va_list args;
3458   va_start(args, fmt);
3459   ralloc_vasprintf_rewrite_tail(str, offset, fmt, args);
3460   va_end(args);
3461}
3462
3463static void
3464pipeline_collect_executable_data(struct v3dv_pipeline *pipeline)
3465{
3466   if (pipeline->executables.mem_ctx)
3467      return;
3468
3469   pipeline->executables.mem_ctx = ralloc_context(NULL);
3470   util_dynarray_init(&pipeline->executables.data,
3471                      pipeline->executables.mem_ctx);
3472
3473   /* Don't crash for failed/bogus pipelines */
3474   if (!pipeline->shared_data || !pipeline->shared_data->assembly_bo)
3475      return;
3476
3477   /* Map the assembly BO so we can read the pipeline's QPU code */
3478   struct v3dv_bo *qpu_bo = pipeline->shared_data->assembly_bo;
3479
3480   if (!v3dv_bo_map(pipeline->device, qpu_bo, qpu_bo->size)) {
3481      fprintf(stderr, "failed to map QPU buffer\n");
3482      return;
3483   }
3484
3485   for (int s = BROADCOM_SHADER_VERTEX; s <= BROADCOM_SHADER_COMPUTE; s++) {
3486      VkShaderStageFlags vk_stage =
3487         mesa_to_vk_shader_stage(broadcom_shader_stage_to_gl(s));
3488      if (!(vk_stage & pipeline->active_stages))
3489         continue;
3490
3491      nir_shader *nir = pipeline_get_nir(pipeline, s);
3492      char *nir_str = nir ?
3493         nir_shader_as_str(nir, pipeline->executables.mem_ctx) : NULL;
3494
3495      char *qpu_str = NULL;
3496      uint32_t qpu_size;
3497      uint64_t *qpu = pipeline_get_qpu(pipeline, s, &qpu_size);
3498      if (qpu) {
3499         uint32_t qpu_inst_count = qpu_size / sizeof(uint64_t);
3500         qpu_str = rzalloc_size(pipeline->executables.mem_ctx,
3501                                qpu_inst_count * 96);
3502         size_t offset = 0;
3503         for (int i = 0; i < qpu_inst_count; i++) {
3504            const char *str = v3d_qpu_disasm(&pipeline->device->devinfo, qpu[i]);
3505            append(&qpu_str, &offset, "%s\n", str);
3506            ralloc_free((void *)str);
3507         }
3508      }
3509
3510      struct v3dv_pipeline_executable_data data = {
3511         .stage = s,
3512         .nir_str = nir_str,
3513         .qpu_str = qpu_str,
3514      };
3515      util_dynarray_append(&pipeline->executables.data,
3516                           struct v3dv_pipeline_executable_data, data);
3517   }
3518
3519   v3dv_bo_unmap(pipeline->device, qpu_bo);
3520}
3521
3522static const struct v3dv_pipeline_executable_data *
3523pipeline_get_executable(struct v3dv_pipeline *pipeline, uint32_t index)
3524{
3525   assert(index < util_dynarray_num_elements(&pipeline->executables.data,
3526                                             struct v3dv_pipeline_executable_data));
3527   return util_dynarray_element(&pipeline->executables.data,
3528                                struct v3dv_pipeline_executable_data,
3529                                index);
3530}
3531
3532VKAPI_ATTR VkResult VKAPI_CALL
3533v3dv_GetPipelineExecutableInternalRepresentationsKHR(
3534   VkDevice device,
3535   const VkPipelineExecutableInfoKHR *pExecutableInfo,
3536   uint32_t *pInternalRepresentationCount,
3537   VkPipelineExecutableInternalRepresentationKHR *pInternalRepresentations)
3538{
3539   V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, pExecutableInfo->pipeline);
3540
3541   pipeline_collect_executable_data(pipeline);
3542
3543   VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableInternalRepresentationKHR, out,
3544                          pInternalRepresentations, pInternalRepresentationCount);
3545
3546   bool incomplete = false;
3547   const struct v3dv_pipeline_executable_data *exe =
3548      pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
3549
3550   if (exe->nir_str) {
3551      vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR,
3552                               &out, ir) {
3553         WRITE_STR(ir->name, "NIR (%s)", broadcom_shader_stage_name(exe->stage));
3554         WRITE_STR(ir->description, "Final NIR form");
3555         if (!write_ir_text(ir, exe->nir_str))
3556            incomplete = true;
3557      }
3558   }
3559
3560   if (exe->qpu_str) {
3561      vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR,
3562                               &out, ir) {
3563         WRITE_STR(ir->name, "QPU (%s)", broadcom_shader_stage_name(exe->stage));
3564         WRITE_STR(ir->description, "Final QPU assembly");
3565         if (!write_ir_text(ir, exe->qpu_str))
3566            incomplete = true;
3567      }
3568   }
3569
3570   return incomplete ? VK_INCOMPLETE : vk_outarray_status(&out);
3571}
3572
3573VKAPI_ATTR VkResult VKAPI_CALL
3574v3dv_GetPipelineExecutablePropertiesKHR(
3575   VkDevice device,
3576   const VkPipelineInfoKHR *pPipelineInfo,
3577   uint32_t *pExecutableCount,
3578   VkPipelineExecutablePropertiesKHR *pProperties)
3579{
3580   V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, pPipelineInfo->pipeline);
3581
3582   pipeline_collect_executable_data(pipeline);
3583
3584   VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutablePropertiesKHR, out,
3585                          pProperties, pExecutableCount);
3586
3587   util_dynarray_foreach(&pipeline->executables.data,
3588                         struct v3dv_pipeline_executable_data, exe) {
3589      vk_outarray_append_typed(VkPipelineExecutablePropertiesKHR, &out, props) {
3590         gl_shader_stage mesa_stage = broadcom_shader_stage_to_gl(exe->stage);
3591         props->stages = mesa_to_vk_shader_stage(mesa_stage);
3592
3593         WRITE_STR(props->name, "%s (%s)",
3594                   _mesa_shader_stage_to_abbrev(mesa_stage),
3595                   broadcom_shader_stage_is_binning(exe->stage) ?
3596                     "Binning" : "Render");
3597
3598         WRITE_STR(props->description, "%s",
3599                   _mesa_shader_stage_to_string(mesa_stage));
3600
3601         props->subgroupSize = V3D_CHANNELS;
3602      }
3603   }
3604
3605   return vk_outarray_status(&out);
3606}
3607
3608VKAPI_ATTR VkResult VKAPI_CALL
3609v3dv_GetPipelineExecutableStatisticsKHR(
3610   VkDevice device,
3611   const VkPipelineExecutableInfoKHR *pExecutableInfo,
3612   uint32_t *pStatisticCount,
3613   VkPipelineExecutableStatisticKHR *pStatistics)
3614{
3615   V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, pExecutableInfo->pipeline);
3616
3617   pipeline_collect_executable_data(pipeline);
3618
3619   const struct v3dv_pipeline_executable_data *exe =
3620      pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
3621
3622   struct v3d_prog_data *prog_data =
3623      pipeline_get_prog_data(pipeline, exe->stage);
3624
3625   struct v3dv_shader_variant *variant =
3626      pipeline->shared_data->variants[exe->stage];
3627   uint32_t qpu_inst_count = variant->qpu_insts_size / sizeof(uint64_t);
3628
3629   VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableStatisticKHR, out,
3630                          pStatistics, pStatisticCount);
3631
3632   if (qpu_inst_count > 0) {
3633      vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3634         WRITE_STR(stat->name, "Compile Strategy");
3635         WRITE_STR(stat->description, "Chosen compile strategy index");
3636         stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3637         stat->value.u64 = prog_data->compile_strategy_idx;
3638      }
3639
3640      vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3641         WRITE_STR(stat->name, "Instruction Count");
3642         WRITE_STR(stat->description, "Number of QPU instructions");
3643         stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3644         stat->value.u64 = qpu_inst_count;
3645      }
3646
3647      vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3648         WRITE_STR(stat->name, "Thread Count");
3649         WRITE_STR(stat->description, "Number of QPU threads dispatched");
3650         stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3651         stat->value.u64 = prog_data->threads;
3652      }
3653
3654      vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3655         WRITE_STR(stat->name, "Spill Size");
3656         WRITE_STR(stat->description, "Size of the spill buffer in bytes");
3657         stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3658         stat->value.u64 = prog_data->spill_size;
3659      }
3660
3661      vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3662         WRITE_STR(stat->name, "TMU Spills");
3663         WRITE_STR(stat->description, "Number of times a register was spilled "
3664                                      "to memory");
3665         stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3666         stat->value.u64 = prog_data->spill_size;
3667      }
3668
3669      vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3670         WRITE_STR(stat->name, "TMU Fills");
3671         WRITE_STR(stat->description, "Number of times a register was filled "
3672                                      "from memory");
3673         stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3674         stat->value.u64 = prog_data->spill_size;
3675      }
3676
3677      vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3678         WRITE_STR(stat->name, "QPU Read Stalls");
3679         WRITE_STR(stat->description, "Number of cycles the QPU stalls for a "
3680                                      "register read dependency");
3681         stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3682         stat->value.u64 = prog_data->qpu_read_stalls;
3683      }
3684   }
3685
3686   return vk_outarray_status(&out);
3687}
3688