1/*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include <assert.h>
25#include <stdbool.h>
26#include <string.h>
27#include <unistd.h>
28#include <fcntl.h>
29
30#include "util/mesa-sha1.h"
31#include "util/os_time.h"
32#include "common/intel_l3_config.h"
33#include "common/intel_disasm.h"
34#include "common/intel_sample_positions.h"
35#include "anv_private.h"
36#include "compiler/brw_nir.h"
37#include "compiler/brw_nir_rt.h"
38#include "anv_nir.h"
39#include "nir/nir_xfb_info.h"
40#include "spirv/nir_spirv.h"
41#include "vk_pipeline.h"
42#include "vk_render_pass.h"
43#include "vk_util.h"
44
45/* Needed for SWIZZLE macros */
46#include "program/prog_instruction.h"
47
48/* Eventually, this will become part of anv_CreateShader.  Unfortunately,
49 * we can't do that yet because we don't have the ability to copy nir.
50 */
51static nir_shader *
52anv_shader_stage_to_nir(struct anv_device *device,
53                        const VkPipelineShaderStageCreateInfo *stage_info,
54                        void *mem_ctx)
55{
56   const struct anv_physical_device *pdevice = device->physical;
57   const struct anv_instance *instance = pdevice->instance;
58   const struct brw_compiler *compiler = pdevice->compiler;
59   gl_shader_stage stage = vk_to_mesa_shader_stage(stage_info->stage);
60   const nir_shader_compiler_options *nir_options =
61      compiler->nir_options[stage];
62
63   const struct spirv_to_nir_options spirv_options = {
64      .caps = {
65         .demote_to_helper_invocation = true,
66         .derivative_group = true,
67         .descriptor_array_dynamic_indexing = true,
68         .descriptor_array_non_uniform_indexing = true,
69         .descriptor_indexing = true,
70         .device_group = true,
71         .draw_parameters = true,
72         .float16 = pdevice->info.ver >= 8,
73         .float32_atomic_add = pdevice->info.has_lsc,
74         .float32_atomic_min_max = pdevice->info.ver >= 9,
75         .float64 = pdevice->info.ver >= 8,
76         .float64_atomic_min_max = pdevice->info.has_lsc,
77         .fragment_shader_sample_interlock = pdevice->info.ver >= 9,
78         .fragment_shader_pixel_interlock = pdevice->info.ver >= 9,
79         .geometry_streams = true,
80         /* When using Vulkan 1.3 or KHR_format_feature_flags2 is enabled, the
81          * read/write without format is per format, so just report true. It's
82          * up to the application to check.
83          */
84         .image_read_without_format = instance->vk.app_info.api_version >= VK_API_VERSION_1_3 || device->vk.enabled_extensions.KHR_format_feature_flags2,
85         .image_write_without_format = true,
86         .int8 = pdevice->info.ver >= 8,
87         .int16 = pdevice->info.ver >= 8,
88         .int64 = pdevice->info.ver >= 8,
89         .int64_atomics = pdevice->info.ver >= 9 && pdevice->use_softpin,
90         .integer_functions2 = pdevice->info.ver >= 8,
91         .mesh_shading_nv = pdevice->vk.supported_extensions.NV_mesh_shader,
92         .min_lod = true,
93         .multiview = true,
94         .physical_storage_buffer_address = pdevice->has_a64_buffer_access,
95         .post_depth_coverage = pdevice->info.ver >= 9,
96         .runtime_descriptor_array = true,
97         .float_controls = pdevice->info.ver >= 8,
98         .ray_query = pdevice->info.has_ray_tracing,
99         .ray_tracing = pdevice->info.has_ray_tracing,
100         .shader_clock = true,
101         .shader_viewport_index_layer = true,
102         .stencil_export = pdevice->info.ver >= 9,
103         .storage_8bit = pdevice->info.ver >= 8,
104         .storage_16bit = pdevice->info.ver >= 8,
105         .subgroup_arithmetic = true,
106         .subgroup_basic = true,
107         .subgroup_ballot = true,
108         .subgroup_dispatch = true,
109         .subgroup_quad = true,
110         .subgroup_uniform_control_flow = true,
111         .subgroup_shuffle = true,
112         .subgroup_vote = true,
113         .tessellation = true,
114         .transform_feedback = pdevice->info.ver >= 8,
115         .variable_pointers = true,
116         .vk_memory_model = true,
117         .vk_memory_model_device_scope = true,
118         .workgroup_memory_explicit_layout = true,
119         .fragment_shading_rate = pdevice->info.ver >= 11,
120      },
121      .ubo_addr_format =
122         anv_nir_ubo_addr_format(pdevice, device->robust_buffer_access),
123      .ssbo_addr_format =
124          anv_nir_ssbo_addr_format(pdevice, device->robust_buffer_access),
125      .phys_ssbo_addr_format = nir_address_format_64bit_global,
126      .push_const_addr_format = nir_address_format_logical,
127
128      /* TODO: Consider changing this to an address format that has the NULL
129       * pointer equals to 0.  That might be a better format to play nice
130       * with certain code / code generators.
131       */
132      .shared_addr_format = nir_address_format_32bit_offset,
133   };
134
135   nir_shader *nir;
136   VkResult result =
137      vk_pipeline_shader_stage_to_nir(&device->vk, stage_info,
138                                      &spirv_options, nir_options,
139                                      mem_ctx, &nir);
140   if (result != VK_SUCCESS)
141      return NULL;
142
143   if (INTEL_DEBUG(intel_debug_flag_for_shader_stage(stage))) {
144      fprintf(stderr, "NIR (from SPIR-V) for %s shader:\n",
145              gl_shader_stage_name(stage));
146      nir_print_shader(nir, stderr);
147   }
148
149   NIR_PASS_V(nir, nir_lower_io_to_temporaries,
150              nir_shader_get_entrypoint(nir), true, false);
151
152   const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = {
153      .point_coord = true,
154   };
155   NIR_PASS(_, nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings);
156
157   const nir_opt_access_options opt_access_options = {
158      .is_vulkan = true,
159      .infer_non_readable = true,
160   };
161   NIR_PASS(_, nir, nir_opt_access, &opt_access_options);
162
163   NIR_PASS(_, nir, nir_lower_frexp);
164
165   /* Vulkan uses the separate-shader linking model */
166   nir->info.separate_shader = true;
167
168   brw_preprocess_nir(compiler, nir, NULL);
169
170   return nir;
171}
172
173VkResult
174anv_pipeline_init(struct anv_pipeline *pipeline,
175                  struct anv_device *device,
176                  enum anv_pipeline_type type,
177                  VkPipelineCreateFlags flags,
178                  const VkAllocationCallbacks *pAllocator)
179{
180   VkResult result;
181
182   memset(pipeline, 0, sizeof(*pipeline));
183
184   vk_object_base_init(&device->vk, &pipeline->base,
185                       VK_OBJECT_TYPE_PIPELINE);
186   pipeline->device = device;
187
188   /* It's the job of the child class to provide actual backing storage for
189    * the batch by setting batch.start, batch.next, and batch.end.
190    */
191   pipeline->batch.alloc = pAllocator ? pAllocator : &device->vk.alloc;
192   pipeline->batch.relocs = &pipeline->batch_relocs;
193   pipeline->batch.status = VK_SUCCESS;
194
195   result = anv_reloc_list_init(&pipeline->batch_relocs,
196                                pipeline->batch.alloc);
197   if (result != VK_SUCCESS)
198      return result;
199
200   pipeline->mem_ctx = ralloc_context(NULL);
201
202   pipeline->type = type;
203   pipeline->flags = flags;
204
205   util_dynarray_init(&pipeline->executables, pipeline->mem_ctx);
206
207   return VK_SUCCESS;
208}
209
210void
211anv_pipeline_finish(struct anv_pipeline *pipeline,
212                    struct anv_device *device,
213                    const VkAllocationCallbacks *pAllocator)
214{
215   anv_reloc_list_finish(&pipeline->batch_relocs,
216                         pAllocator ? pAllocator : &device->vk.alloc);
217   ralloc_free(pipeline->mem_ctx);
218   vk_object_base_finish(&pipeline->base);
219}
220
221void anv_DestroyPipeline(
222    VkDevice                                    _device,
223    VkPipeline                                  _pipeline,
224    const VkAllocationCallbacks*                pAllocator)
225{
226   ANV_FROM_HANDLE(anv_device, device, _device);
227   ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
228
229   if (!pipeline)
230      return;
231
232   switch (pipeline->type) {
233   case ANV_PIPELINE_GRAPHICS: {
234      struct anv_graphics_pipeline *gfx_pipeline =
235         anv_pipeline_to_graphics(pipeline);
236
237      for (unsigned s = 0; s < ARRAY_SIZE(gfx_pipeline->shaders); s++) {
238         if (gfx_pipeline->shaders[s])
239            anv_shader_bin_unref(device, gfx_pipeline->shaders[s]);
240      }
241      break;
242   }
243
244   case ANV_PIPELINE_COMPUTE: {
245      struct anv_compute_pipeline *compute_pipeline =
246         anv_pipeline_to_compute(pipeline);
247
248      if (compute_pipeline->cs)
249         anv_shader_bin_unref(device, compute_pipeline->cs);
250
251      break;
252   }
253
254   case ANV_PIPELINE_RAY_TRACING: {
255      struct anv_ray_tracing_pipeline *rt_pipeline =
256         anv_pipeline_to_ray_tracing(pipeline);
257
258      util_dynarray_foreach(&rt_pipeline->shaders,
259                            struct anv_shader_bin *, shader) {
260         anv_shader_bin_unref(device, *shader);
261      }
262      break;
263   }
264
265   default:
266      unreachable("invalid pipeline type");
267   }
268
269   anv_pipeline_finish(pipeline, device, pAllocator);
270   vk_free2(&device->vk.alloc, pAllocator, pipeline);
271}
272
273static void
274populate_sampler_prog_key(const struct intel_device_info *devinfo,
275                          struct brw_sampler_prog_key_data *key)
276{
277   /* Almost all multisampled textures are compressed.  The only time when we
278    * don't compress a multisampled texture is for 16x MSAA with a surface
279    * width greater than 8k which is a bit of an edge case.  Since the sampler
280    * just ignores the MCS parameter to ld2ms when MCS is disabled, it's safe
281    * to tell the compiler to always assume compression.
282    */
283   key->compressed_multisample_layout_mask = ~0;
284
285   /* SkyLake added support for 16x MSAA.  With this came a new message for
286    * reading from a 16x MSAA surface with compression.  The new message was
287    * needed because now the MCS data is 64 bits instead of 32 or lower as is
288    * the case for 8x, 4x, and 2x.  The key->msaa_16 bit-field controls which
289    * message we use.  Fortunately, the 16x message works for 8x, 4x, and 2x
290    * so we can just use it unconditionally.  This may not be quite as
291    * efficient but it saves us from recompiling.
292    */
293   if (devinfo->ver >= 9)
294      key->msaa_16 = ~0;
295
296   /* XXX: Handle texture swizzle on HSW- */
297   for (int i = 0; i < BRW_MAX_SAMPLERS; i++) {
298      /* Assume color sampler, no swizzling. (Works for BDW+) */
299      key->swizzles[i] = SWIZZLE_XYZW;
300   }
301}
302
303static void
304populate_base_prog_key(const struct anv_device *device,
305                       bool robust_buffer_acccess,
306                       struct brw_base_prog_key *key)
307{
308   key->robust_buffer_access = robust_buffer_acccess;
309   key->limit_trig_input_range =
310      device->physical->instance->limit_trig_input_range;
311
312   populate_sampler_prog_key(&device->info, &key->tex);
313}
314
315static void
316populate_vs_prog_key(const struct anv_device *device,
317                     bool robust_buffer_acccess,
318                     struct brw_vs_prog_key *key)
319{
320   memset(key, 0, sizeof(*key));
321
322   populate_base_prog_key(device, robust_buffer_acccess, &key->base);
323
324   /* XXX: Handle vertex input work-arounds */
325
326   /* XXX: Handle sampler_prog_key */
327}
328
329static void
330populate_tcs_prog_key(const struct anv_device *device,
331                      bool robust_buffer_acccess,
332                      unsigned input_vertices,
333                      struct brw_tcs_prog_key *key)
334{
335   memset(key, 0, sizeof(*key));
336
337   populate_base_prog_key(device, robust_buffer_acccess, &key->base);
338
339   key->input_vertices = input_vertices;
340}
341
342static void
343populate_tes_prog_key(const struct anv_device *device,
344                      bool robust_buffer_acccess,
345                      struct brw_tes_prog_key *key)
346{
347   memset(key, 0, sizeof(*key));
348
349   populate_base_prog_key(device, robust_buffer_acccess, &key->base);
350}
351
352static void
353populate_gs_prog_key(const struct anv_device *device,
354                     bool robust_buffer_acccess,
355                     struct brw_gs_prog_key *key)
356{
357   memset(key, 0, sizeof(*key));
358
359   populate_base_prog_key(device, robust_buffer_acccess, &key->base);
360}
361
362static bool
363pipeline_has_coarse_pixel(const struct anv_graphics_pipeline *pipeline,
364                          const BITSET_WORD *dynamic,
365                          const struct vk_multisample_state *ms,
366                          const struct vk_fragment_shading_rate_state *fsr)
367{
368   /* The Vulkan 1.2.199 spec says:
369    *
370    *    "If any of the following conditions are met, Cxy' must be set to
371    *    {1,1}:
372    *
373    *     * If Sample Shading is enabled.
374    *     * [...]"
375    *
376    * And "sample shading" is defined as follows:
377    *
378    *    "Sample shading is enabled for a graphics pipeline:
379    *
380    *     * If the interface of the fragment shader entry point of the
381    *       graphics pipeline includes an input variable decorated with
382    *       SampleId or SamplePosition. In this case minSampleShadingFactor
383    *       takes the value 1.0.
384    *
385    *     * Else if the sampleShadingEnable member of the
386    *       VkPipelineMultisampleStateCreateInfo structure specified when
387    *       creating the graphics pipeline is set to VK_TRUE. In this case
388    *       minSampleShadingFactor takes the value of
389    *       VkPipelineMultisampleStateCreateInfo::minSampleShading.
390    *
391    *    Otherwise, sample shading is considered disabled."
392    *
393    * The first bullet above is handled by the back-end compiler because those
394    * inputs both force per-sample dispatch.  The second bullet is handled
395    * here.  Note that this sample shading being enabled has nothing to do
396    * with minSampleShading.
397    */
398   if (ms != NULL && ms->sample_shading_enable)
399      return false;
400
401   /* Not dynamic & pipeline has a 1x1 fragment shading rate with no
402    * possibility for element of the pipeline to change the value.
403    */
404   if (!BITSET_TEST(dynamic, MESA_VK_DYNAMIC_FSR) &&
405       fsr->fragment_size.width <= 1 &&
406       fsr->fragment_size.height <= 1 &&
407       fsr->combiner_ops[0] == VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR &&
408       fsr->combiner_ops[1] == VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR)
409      return false;
410
411   return true;
412}
413
414static void
415populate_task_prog_key(const struct anv_device *device,
416                       bool robust_buffer_access,
417                       struct brw_task_prog_key *key)
418{
419   memset(key, 0, sizeof(*key));
420
421   populate_base_prog_key(device, robust_buffer_access, &key->base);
422}
423
424static void
425populate_mesh_prog_key(const struct anv_device *device,
426                       bool robust_buffer_access,
427                       struct brw_mesh_prog_key *key)
428{
429   memset(key, 0, sizeof(*key));
430
431   populate_base_prog_key(device, robust_buffer_access, &key->base);
432}
433
434static void
435populate_wm_prog_key(const struct anv_graphics_pipeline *pipeline,
436                     bool robust_buffer_acccess,
437                     const BITSET_WORD *dynamic,
438                     const struct vk_multisample_state *ms,
439                     const struct vk_fragment_shading_rate_state *fsr,
440                     const struct vk_render_pass_state *rp,
441                     struct brw_wm_prog_key *key)
442{
443   const struct anv_device *device = pipeline->base.device;
444
445   memset(key, 0, sizeof(*key));
446
447   populate_base_prog_key(device, robust_buffer_acccess, &key->base);
448
449   /* We set this to 0 here and set to the actual value before we call
450    * brw_compile_fs.
451    */
452   key->input_slots_valid = 0;
453
454   /* XXX Vulkan doesn't appear to specify */
455   key->clamp_fragment_color = false;
456
457   key->ignore_sample_mask_out = false;
458
459   assert(rp->color_attachment_count <= MAX_RTS);
460   /* Consider all inputs as valid until look at the NIR variables. */
461   key->color_outputs_valid = (1u << rp->color_attachment_count) - 1;
462   key->nr_color_regions = rp->color_attachment_count;
463
464   /* To reduce possible shader recompilations we would need to know if
465    * there is a SampleMask output variable to compute if we should emit
466    * code to workaround the issue that hardware disables alpha to coverage
467    * when there is SampleMask output.
468    */
469   key->alpha_to_coverage = ms != NULL && ms->alpha_to_coverage_enable;
470
471   /* Vulkan doesn't support fixed-function alpha test */
472   key->alpha_test_replicate_alpha = false;
473
474   if (ms != NULL) {
475      /* We should probably pull this out of the shader, but it's fairly
476       * harmless to compute it and then let dead-code take care of it.
477       */
478      if (ms->rasterization_samples > 1) {
479         key->persample_interp = ms->sample_shading_enable &&
480            (ms->min_sample_shading * ms->rasterization_samples) > 1;
481         key->multisample_fbo = true;
482      }
483
484      if (device->physical->instance->sample_mask_out_opengl_behaviour)
485         key->ignore_sample_mask_out = !key->multisample_fbo;
486   }
487
488   key->coarse_pixel =
489      !key->persample_interp &&
490      device->vk.enabled_extensions.KHR_fragment_shading_rate &&
491      pipeline_has_coarse_pixel(pipeline, dynamic, ms, fsr);
492}
493
494static void
495populate_cs_prog_key(const struct anv_device *device,
496                     bool robust_buffer_acccess,
497                     struct brw_cs_prog_key *key)
498{
499   memset(key, 0, sizeof(*key));
500
501   populate_base_prog_key(device, robust_buffer_acccess, &key->base);
502}
503
504static void
505populate_bs_prog_key(const struct anv_device *device,
506                     bool robust_buffer_access,
507                     struct brw_bs_prog_key *key)
508{
509   memset(key, 0, sizeof(*key));
510
511   populate_base_prog_key(device, robust_buffer_access, &key->base);
512}
513
514struct anv_pipeline_stage {
515   gl_shader_stage stage;
516
517   const VkPipelineShaderStageCreateInfo *info;
518
519   unsigned char shader_sha1[20];
520
521   union brw_any_prog_key key;
522
523   struct {
524      gl_shader_stage stage;
525      unsigned char sha1[20];
526   } cache_key;
527
528   nir_shader *nir;
529
530   struct anv_pipeline_binding surface_to_descriptor[256];
531   struct anv_pipeline_binding sampler_to_descriptor[256];
532   struct anv_pipeline_bind_map bind_map;
533
534   union brw_any_prog_data prog_data;
535
536   uint32_t num_stats;
537   struct brw_compile_stats stats[3];
538   char *disasm[3];
539
540   VkPipelineCreationFeedback feedback;
541
542   const unsigned *code;
543
544   struct anv_shader_bin *bin;
545};
546
547static void
548anv_pipeline_hash_graphics(struct anv_graphics_pipeline *pipeline,
549                           struct anv_pipeline_layout *layout,
550                           struct anv_pipeline_stage *stages,
551                           unsigned char *sha1_out)
552{
553   struct mesa_sha1 ctx;
554   _mesa_sha1_init(&ctx);
555
556   _mesa_sha1_update(&ctx, &pipeline->view_mask,
557                     sizeof(pipeline->view_mask));
558
559   if (layout)
560      _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
561
562   const bool rba = pipeline->base.device->robust_buffer_access;
563   _mesa_sha1_update(&ctx, &rba, sizeof(rba));
564
565   for (uint32_t s = 0; s < ANV_GRAPHICS_SHADER_STAGE_COUNT; s++) {
566      if (stages[s].info) {
567         _mesa_sha1_update(&ctx, stages[s].shader_sha1,
568                           sizeof(stages[s].shader_sha1));
569         _mesa_sha1_update(&ctx, &stages[s].key, brw_prog_key_size(s));
570      }
571   }
572
573   _mesa_sha1_final(&ctx, sha1_out);
574}
575
576static void
577anv_pipeline_hash_compute(struct anv_compute_pipeline *pipeline,
578                          struct anv_pipeline_layout *layout,
579                          struct anv_pipeline_stage *stage,
580                          unsigned char *sha1_out)
581{
582   struct mesa_sha1 ctx;
583   _mesa_sha1_init(&ctx);
584
585   if (layout)
586      _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
587
588   const struct anv_device *device = pipeline->base.device;
589
590   const bool rba = device->robust_buffer_access;
591   _mesa_sha1_update(&ctx, &rba, sizeof(rba));
592
593   const bool afs = device->physical->instance->assume_full_subgroups;
594   _mesa_sha1_update(&ctx, &afs, sizeof(afs));
595
596   _mesa_sha1_update(&ctx, stage->shader_sha1,
597                     sizeof(stage->shader_sha1));
598   _mesa_sha1_update(&ctx, &stage->key.cs, sizeof(stage->key.cs));
599
600   _mesa_sha1_final(&ctx, sha1_out);
601}
602
603static void
604anv_pipeline_hash_ray_tracing_shader(struct anv_ray_tracing_pipeline *pipeline,
605                                     struct anv_pipeline_layout *layout,
606                                     struct anv_pipeline_stage *stage,
607                                     unsigned char *sha1_out)
608{
609   struct mesa_sha1 ctx;
610   _mesa_sha1_init(&ctx);
611
612   if (layout != NULL)
613      _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
614
615   const bool rba = pipeline->base.device->robust_buffer_access;
616   _mesa_sha1_update(&ctx, &rba, sizeof(rba));
617
618   _mesa_sha1_update(&ctx, stage->shader_sha1, sizeof(stage->shader_sha1));
619   _mesa_sha1_update(&ctx, &stage->key, sizeof(stage->key.bs));
620
621   _mesa_sha1_final(&ctx, sha1_out);
622}
623
624static void
625anv_pipeline_hash_ray_tracing_combined_shader(struct anv_ray_tracing_pipeline *pipeline,
626                                              struct anv_pipeline_layout *layout,
627                                              struct anv_pipeline_stage *intersection,
628                                              struct anv_pipeline_stage *any_hit,
629                                              unsigned char *sha1_out)
630{
631   struct mesa_sha1 ctx;
632   _mesa_sha1_init(&ctx);
633
634   if (layout != NULL)
635      _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
636
637   const bool rba = pipeline->base.device->robust_buffer_access;
638   _mesa_sha1_update(&ctx, &rba, sizeof(rba));
639
640   _mesa_sha1_update(&ctx, intersection->shader_sha1, sizeof(intersection->shader_sha1));
641   _mesa_sha1_update(&ctx, &intersection->key, sizeof(intersection->key.bs));
642   _mesa_sha1_update(&ctx, any_hit->shader_sha1, sizeof(any_hit->shader_sha1));
643   _mesa_sha1_update(&ctx, &any_hit->key, sizeof(any_hit->key.bs));
644
645   _mesa_sha1_final(&ctx, sha1_out);
646}
647
648static nir_shader *
649anv_pipeline_stage_get_nir(struct anv_pipeline *pipeline,
650                           struct vk_pipeline_cache *cache,
651                           void *mem_ctx,
652                           struct anv_pipeline_stage *stage)
653{
654   const struct brw_compiler *compiler =
655      pipeline->device->physical->compiler;
656   const nir_shader_compiler_options *nir_options =
657      compiler->nir_options[stage->stage];
658   nir_shader *nir;
659
660   nir = anv_device_search_for_nir(pipeline->device, cache,
661                                   nir_options,
662                                   stage->shader_sha1,
663                                   mem_ctx);
664   if (nir) {
665      assert(nir->info.stage == stage->stage);
666      return nir;
667   }
668
669   nir = anv_shader_stage_to_nir(pipeline->device, stage->info, mem_ctx);
670   if (nir) {
671      anv_device_upload_nir(pipeline->device, cache, nir, stage->shader_sha1);
672      return nir;
673   }
674
675   return NULL;
676}
677
678static void
679shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
680{
681   assert(glsl_type_is_vector_or_scalar(type));
682
683   uint32_t comp_size = glsl_type_is_boolean(type)
684      ? 4 : glsl_get_bit_size(type) / 8;
685   unsigned length = glsl_get_vector_elements(type);
686   *size = comp_size * length,
687   *align = comp_size * (length == 3 ? 4 : length);
688}
689
690static void
691anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
692                       void *mem_ctx,
693                       struct anv_pipeline_stage *stage,
694                       struct anv_pipeline_layout *layout)
695{
696   const struct anv_physical_device *pdevice = pipeline->device->physical;
697   const struct brw_compiler *compiler = pdevice->compiler;
698
699   struct brw_stage_prog_data *prog_data = &stage->prog_data.base;
700   nir_shader *nir = stage->nir;
701
702   if (nir->info.stage == MESA_SHADER_FRAGMENT) {
703      NIR_PASS(_, nir, nir_lower_wpos_center);
704      NIR_PASS(_, nir, nir_lower_input_attachments,
705               &(nir_input_attachment_options) {
706                   .use_fragcoord_sysval = true,
707                   .use_layer_id_sysval = true,
708               });
709   }
710
711   NIR_PASS(_, nir, anv_nir_lower_ycbcr_textures, layout);
712
713   if (pipeline->type == ANV_PIPELINE_GRAPHICS) {
714      NIR_PASS(_, nir, anv_nir_lower_multiview,
715               anv_pipeline_to_graphics(pipeline));
716   }
717
718   nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
719
720   NIR_PASS(_, nir, brw_nir_lower_storage_image, compiler->devinfo);
721
722   NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_global,
723            nir_address_format_64bit_global);
724   NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_push_const,
725            nir_address_format_32bit_offset);
726
727   NIR_PASS(_, nir, brw_nir_lower_ray_queries, &pdevice->info);
728
729   /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
730   NIR_PASS_V(nir, anv_nir_apply_pipeline_layout,
731              pdevice, pipeline->device->robust_buffer_access,
732              layout, &stage->bind_map);
733
734   NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ubo,
735            anv_nir_ubo_addr_format(pdevice,
736               pipeline->device->robust_buffer_access));
737   NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ssbo,
738            anv_nir_ssbo_addr_format(pdevice,
739               pipeline->device->robust_buffer_access));
740
741   /* First run copy-prop to get rid of all of the vec() that address
742    * calculations often create and then constant-fold so that, when we
743    * get to anv_nir_lower_ubo_loads, we can detect constant offsets.
744    */
745   NIR_PASS(_, nir, nir_copy_prop);
746   NIR_PASS(_, nir, nir_opt_constant_folding);
747
748   NIR_PASS(_, nir, anv_nir_lower_ubo_loads);
749
750   /* We don't support non-uniform UBOs and non-uniform SSBO access is
751    * handled naturally by falling back to A64 messages.
752    */
753   NIR_PASS(_, nir, nir_lower_non_uniform_access,
754            &(nir_lower_non_uniform_access_options) {
755                .types = nir_lower_non_uniform_texture_access |
756                         nir_lower_non_uniform_image_access,
757                .callback = NULL,
758            });
759
760   NIR_PASS_V(nir, anv_nir_compute_push_layout,
761              pdevice, pipeline->device->robust_buffer_access,
762              prog_data, &stage->bind_map, mem_ctx);
763
764   if (gl_shader_stage_uses_workgroup(nir->info.stage)) {
765      if (!nir->info.shared_memory_explicit_layout) {
766         NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
767                  nir_var_mem_shared, shared_type_info);
768      }
769
770      NIR_PASS(_, nir, nir_lower_explicit_io,
771               nir_var_mem_shared, nir_address_format_32bit_offset);
772
773      if (nir->info.zero_initialize_shared_memory &&
774          nir->info.shared_size > 0) {
775         /* The effective Shared Local Memory size is at least 1024 bytes and
776          * is always rounded to a power of two, so it is OK to align the size
777          * used by the shader to chunk_size -- which does simplify the logic.
778          */
779         const unsigned chunk_size = 16;
780         const unsigned shared_size = ALIGN(nir->info.shared_size, chunk_size);
781         assert(shared_size <=
782                intel_calculate_slm_size(compiler->devinfo->ver, nir->info.shared_size));
783
784         NIR_PASS(_, nir, nir_zero_initialize_shared_memory,
785                  shared_size, chunk_size);
786      }
787   }
788
789   if (gl_shader_stage_is_compute(nir->info.stage) ||
790       gl_shader_stage_is_mesh(nir->info.stage))
791      NIR_PASS(_, nir, brw_nir_lower_cs_intrinsics);
792
793   stage->nir = nir;
794}
795
796static void
797anv_pipeline_link_vs(const struct brw_compiler *compiler,
798                     struct anv_pipeline_stage *vs_stage,
799                     struct anv_pipeline_stage *next_stage)
800{
801   if (next_stage)
802      brw_nir_link_shaders(compiler, vs_stage->nir, next_stage->nir);
803}
804
805static void
806anv_pipeline_compile_vs(const struct brw_compiler *compiler,
807                        void *mem_ctx,
808                        struct anv_graphics_pipeline *pipeline,
809                        struct anv_pipeline_stage *vs_stage)
810{
811   /* When using Primitive Replication for multiview, each view gets its own
812    * position slot.
813    */
814   uint32_t pos_slots = pipeline->use_primitive_replication ?
815      MAX2(1, util_bitcount(pipeline->view_mask)) : 1;
816
817   brw_compute_vue_map(compiler->devinfo,
818                       &vs_stage->prog_data.vs.base.vue_map,
819                       vs_stage->nir->info.outputs_written,
820                       vs_stage->nir->info.separate_shader,
821                       pos_slots);
822
823   vs_stage->num_stats = 1;
824
825   struct brw_compile_vs_params params = {
826      .nir = vs_stage->nir,
827      .key = &vs_stage->key.vs,
828      .prog_data = &vs_stage->prog_data.vs,
829      .stats = vs_stage->stats,
830      .log_data = pipeline->base.device,
831   };
832
833   vs_stage->code = brw_compile_vs(compiler, mem_ctx, &params);
834}
835
836static void
837merge_tess_info(struct shader_info *tes_info,
838                const struct shader_info *tcs_info)
839{
840   /* The Vulkan 1.0.38 spec, section 21.1 Tessellator says:
841    *
842    *    "PointMode. Controls generation of points rather than triangles
843    *     or lines. This functionality defaults to disabled, and is
844    *     enabled if either shader stage includes the execution mode.
845    *
846    * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw,
847    * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd,
848    * and OutputVertices, it says:
849    *
850    *    "One mode must be set in at least one of the tessellation
851    *     shader stages."
852    *
853    * So, the fields can be set in either the TCS or TES, but they must
854    * agree if set in both.  Our backend looks at TES, so bitwise-or in
855    * the values from the TCS.
856    */
857   assert(tcs_info->tess.tcs_vertices_out == 0 ||
858          tes_info->tess.tcs_vertices_out == 0 ||
859          tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out);
860   tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out;
861
862   assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
863          tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
864          tcs_info->tess.spacing == tes_info->tess.spacing);
865   tes_info->tess.spacing |= tcs_info->tess.spacing;
866
867   assert(tcs_info->tess._primitive_mode == 0 ||
868          tes_info->tess._primitive_mode == 0 ||
869          tcs_info->tess._primitive_mode == tes_info->tess._primitive_mode);
870   tes_info->tess._primitive_mode |= tcs_info->tess._primitive_mode;
871   tes_info->tess.ccw |= tcs_info->tess.ccw;
872   tes_info->tess.point_mode |= tcs_info->tess.point_mode;
873}
874
875static void
876anv_pipeline_link_tcs(const struct brw_compiler *compiler,
877                      struct anv_pipeline_stage *tcs_stage,
878                      struct anv_pipeline_stage *tes_stage)
879{
880   assert(tes_stage && tes_stage->stage == MESA_SHADER_TESS_EVAL);
881
882   brw_nir_link_shaders(compiler, tcs_stage->nir, tes_stage->nir);
883
884   nir_lower_patch_vertices(tes_stage->nir,
885                            tcs_stage->nir->info.tess.tcs_vertices_out,
886                            NULL);
887
888   /* Copy TCS info into the TES info */
889   merge_tess_info(&tes_stage->nir->info, &tcs_stage->nir->info);
890
891   /* Whacking the key after cache lookup is a bit sketchy, but all of
892    * this comes from the SPIR-V, which is part of the hash used for the
893    * pipeline cache.  So it should be safe.
894    */
895   tcs_stage->key.tcs._tes_primitive_mode =
896      tes_stage->nir->info.tess._primitive_mode;
897   tcs_stage->key.tcs.quads_workaround =
898      compiler->devinfo->ver < 9 &&
899      tes_stage->nir->info.tess._primitive_mode == TESS_PRIMITIVE_QUADS &&
900      tes_stage->nir->info.tess.spacing == TESS_SPACING_EQUAL;
901}
902
903static void
904anv_pipeline_compile_tcs(const struct brw_compiler *compiler,
905                         void *mem_ctx,
906                         struct anv_device *device,
907                         struct anv_pipeline_stage *tcs_stage,
908                         struct anv_pipeline_stage *prev_stage)
909{
910   tcs_stage->key.tcs.outputs_written =
911      tcs_stage->nir->info.outputs_written;
912   tcs_stage->key.tcs.patch_outputs_written =
913      tcs_stage->nir->info.patch_outputs_written;
914
915   tcs_stage->num_stats = 1;
916
917   struct brw_compile_tcs_params params = {
918      .nir = tcs_stage->nir,
919      .key = &tcs_stage->key.tcs,
920      .prog_data = &tcs_stage->prog_data.tcs,
921      .stats = tcs_stage->stats,
922      .log_data = device,
923   };
924
925   tcs_stage->code = brw_compile_tcs(compiler, mem_ctx, &params);
926}
927
928static void
929anv_pipeline_link_tes(const struct brw_compiler *compiler,
930                      struct anv_pipeline_stage *tes_stage,
931                      struct anv_pipeline_stage *next_stage)
932{
933   if (next_stage)
934      brw_nir_link_shaders(compiler, tes_stage->nir, next_stage->nir);
935}
936
937static void
938anv_pipeline_compile_tes(const struct brw_compiler *compiler,
939                         void *mem_ctx,
940                         struct anv_device *device,
941                         struct anv_pipeline_stage *tes_stage,
942                         struct anv_pipeline_stage *tcs_stage)
943{
944   tes_stage->key.tes.inputs_read =
945      tcs_stage->nir->info.outputs_written;
946   tes_stage->key.tes.patch_inputs_read =
947      tcs_stage->nir->info.patch_outputs_written;
948
949   tes_stage->num_stats = 1;
950
951   struct brw_compile_tes_params params = {
952      .nir = tes_stage->nir,
953      .key = &tes_stage->key.tes,
954      .prog_data = &tes_stage->prog_data.tes,
955      .input_vue_map = &tcs_stage->prog_data.tcs.base.vue_map,
956      .stats = tes_stage->stats,
957      .log_data = device,
958   };
959
960   tes_stage->code = brw_compile_tes(compiler, mem_ctx, &params);
961}
962
963static void
964anv_pipeline_link_gs(const struct brw_compiler *compiler,
965                     struct anv_pipeline_stage *gs_stage,
966                     struct anv_pipeline_stage *next_stage)
967{
968   if (next_stage)
969      brw_nir_link_shaders(compiler, gs_stage->nir, next_stage->nir);
970}
971
972static void
973anv_pipeline_compile_gs(const struct brw_compiler *compiler,
974                        void *mem_ctx,
975                        struct anv_device *device,
976                        struct anv_pipeline_stage *gs_stage,
977                        struct anv_pipeline_stage *prev_stage)
978{
979   brw_compute_vue_map(compiler->devinfo,
980                       &gs_stage->prog_data.gs.base.vue_map,
981                       gs_stage->nir->info.outputs_written,
982                       gs_stage->nir->info.separate_shader, 1);
983
984   gs_stage->num_stats = 1;
985
986   struct brw_compile_gs_params params = {
987      .nir = gs_stage->nir,
988      .key = &gs_stage->key.gs,
989      .prog_data = &gs_stage->prog_data.gs,
990      .stats = gs_stage->stats,
991      .log_data = device,
992   };
993
994   gs_stage->code = brw_compile_gs(compiler, mem_ctx, &params);
995}
996
997static void
998anv_pipeline_link_task(const struct brw_compiler *compiler,
999                       struct anv_pipeline_stage *task_stage,
1000                       struct anv_pipeline_stage *next_stage)
1001{
1002   assert(next_stage);
1003   assert(next_stage->stage == MESA_SHADER_MESH);
1004   brw_nir_link_shaders(compiler, task_stage->nir, next_stage->nir);
1005}
1006
1007static void
1008anv_pipeline_compile_task(const struct brw_compiler *compiler,
1009                          void *mem_ctx,
1010                          struct anv_device *device,
1011                          struct anv_pipeline_stage *task_stage)
1012{
1013   task_stage->num_stats = 1;
1014
1015   struct brw_compile_task_params params = {
1016      .nir = task_stage->nir,
1017      .key = &task_stage->key.task,
1018      .prog_data = &task_stage->prog_data.task,
1019      .stats = task_stage->stats,
1020      .log_data = device,
1021   };
1022
1023   task_stage->code = brw_compile_task(compiler, mem_ctx, &params);
1024}
1025
1026static void
1027anv_pipeline_link_mesh(const struct brw_compiler *compiler,
1028                       struct anv_pipeline_stage *mesh_stage,
1029                       struct anv_pipeline_stage *next_stage)
1030{
1031   if (next_stage) {
1032      brw_nir_link_shaders(compiler, mesh_stage->nir, next_stage->nir);
1033   }
1034}
1035
1036static void
1037anv_pipeline_compile_mesh(const struct brw_compiler *compiler,
1038                          void *mem_ctx,
1039                          struct anv_device *device,
1040                          struct anv_pipeline_stage *mesh_stage,
1041                          struct anv_pipeline_stage *prev_stage)
1042{
1043   mesh_stage->num_stats = 1;
1044
1045   struct brw_compile_mesh_params params = {
1046      .nir = mesh_stage->nir,
1047      .key = &mesh_stage->key.mesh,
1048      .prog_data = &mesh_stage->prog_data.mesh,
1049      .stats = mesh_stage->stats,
1050      .log_data = device,
1051   };
1052
1053   if (prev_stage) {
1054      assert(prev_stage->stage == MESA_SHADER_TASK);
1055      params.tue_map = &prev_stage->prog_data.task.map;
1056   }
1057
1058   mesh_stage->code = brw_compile_mesh(compiler, mem_ctx, &params);
1059}
1060
1061static void
1062anv_pipeline_link_fs(const struct brw_compiler *compiler,
1063                     struct anv_pipeline_stage *stage,
1064                     const struct vk_render_pass_state *rp)
1065{
1066   /* Initially the valid outputs value is set to all possible render targets
1067    * valid (see populate_wm_prog_key()), before we look at the shader
1068    * variables. Here we look at the output variables of the shader an compute
1069    * a correct number of render target outputs.
1070    */
1071   stage->key.wm.color_outputs_valid = 0;
1072   nir_foreach_shader_out_variable_safe(var, stage->nir) {
1073      if (var->data.location < FRAG_RESULT_DATA0)
1074         continue;
1075
1076      const unsigned rt = var->data.location - FRAG_RESULT_DATA0;
1077      const unsigned array_len =
1078         glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1;
1079      assert(rt + array_len <= MAX_RTS);
1080
1081      stage->key.wm.color_outputs_valid |= BITFIELD_RANGE(rt, array_len);
1082   }
1083   stage->key.wm.color_outputs_valid &=
1084      (1u << rp->color_attachment_count) - 1;
1085   stage->key.wm.nr_color_regions =
1086      util_last_bit(stage->key.wm.color_outputs_valid);
1087
1088   unsigned num_rt_bindings;
1089   struct anv_pipeline_binding rt_bindings[MAX_RTS];
1090   if (stage->key.wm.nr_color_regions > 0) {
1091      assert(stage->key.wm.nr_color_regions <= MAX_RTS);
1092      for (unsigned rt = 0; rt < stage->key.wm.nr_color_regions; rt++) {
1093         if (stage->key.wm.color_outputs_valid & BITFIELD_BIT(rt)) {
1094            rt_bindings[rt] = (struct anv_pipeline_binding) {
1095               .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
1096               .index = rt,
1097            };
1098         } else {
1099            /* Setup a null render target */
1100            rt_bindings[rt] = (struct anv_pipeline_binding) {
1101               .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
1102               .index = UINT32_MAX,
1103            };
1104         }
1105      }
1106      num_rt_bindings = stage->key.wm.nr_color_regions;
1107   } else {
1108      /* Setup a null render target */
1109      rt_bindings[0] = (struct anv_pipeline_binding) {
1110         .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
1111         .index = UINT32_MAX,
1112      };
1113      num_rt_bindings = 1;
1114   }
1115
1116   assert(num_rt_bindings <= MAX_RTS);
1117   assert(stage->bind_map.surface_count == 0);
1118   typed_memcpy(stage->bind_map.surface_to_descriptor,
1119                rt_bindings, num_rt_bindings);
1120   stage->bind_map.surface_count += num_rt_bindings;
1121}
1122
1123static void
1124anv_pipeline_compile_fs(const struct brw_compiler *compiler,
1125                        void *mem_ctx,
1126                        struct anv_device *device,
1127                        struct anv_pipeline_stage *fs_stage,
1128                        struct anv_pipeline_stage *prev_stage)
1129{
1130   /* TODO: we could set this to 0 based on the information in nir_shader, but
1131    * we need this before we call spirv_to_nir.
1132    */
1133   assert(prev_stage);
1134
1135   struct brw_compile_fs_params params = {
1136      .nir = fs_stage->nir,
1137      .key = &fs_stage->key.wm,
1138      .prog_data = &fs_stage->prog_data.wm,
1139
1140      .allow_spilling = true,
1141      .stats = fs_stage->stats,
1142      .log_data = device,
1143   };
1144
1145   if (prev_stage->stage == MESA_SHADER_MESH) {
1146      params.mue_map = &prev_stage->prog_data.mesh.map;
1147      /* TODO(mesh): Slots valid, do we even use/rely on it? */
1148   } else {
1149      fs_stage->key.wm.input_slots_valid =
1150         prev_stage->prog_data.vue.vue_map.slots_valid;
1151   }
1152
1153   fs_stage->code = brw_compile_fs(compiler, mem_ctx, &params);
1154
1155   fs_stage->num_stats = (uint32_t)fs_stage->prog_data.wm.dispatch_8 +
1156                         (uint32_t)fs_stage->prog_data.wm.dispatch_16 +
1157                         (uint32_t)fs_stage->prog_data.wm.dispatch_32;
1158}
1159
1160static void
1161anv_pipeline_add_executable(struct anv_pipeline *pipeline,
1162                            struct anv_pipeline_stage *stage,
1163                            struct brw_compile_stats *stats,
1164                            uint32_t code_offset)
1165{
1166   char *nir = NULL;
1167   if (stage->nir &&
1168       (pipeline->flags &
1169        VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)) {
1170      nir = nir_shader_as_str(stage->nir, pipeline->mem_ctx);
1171   }
1172
1173   char *disasm = NULL;
1174   if (stage->code &&
1175       (pipeline->flags &
1176        VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)) {
1177      char *stream_data = NULL;
1178      size_t stream_size = 0;
1179      FILE *stream = open_memstream(&stream_data, &stream_size);
1180
1181      uint32_t push_size = 0;
1182      for (unsigned i = 0; i < 4; i++)
1183         push_size += stage->bind_map.push_ranges[i].length;
1184      if (push_size > 0) {
1185         fprintf(stream, "Push constant ranges:\n");
1186         for (unsigned i = 0; i < 4; i++) {
1187            if (stage->bind_map.push_ranges[i].length == 0)
1188               continue;
1189
1190            fprintf(stream, "    RANGE%d (%dB): ", i,
1191                    stage->bind_map.push_ranges[i].length * 32);
1192
1193            switch (stage->bind_map.push_ranges[i].set) {
1194            case ANV_DESCRIPTOR_SET_NULL:
1195               fprintf(stream, "NULL");
1196               break;
1197
1198            case ANV_DESCRIPTOR_SET_PUSH_CONSTANTS:
1199               fprintf(stream, "Vulkan push constants and API params");
1200               break;
1201
1202            case ANV_DESCRIPTOR_SET_DESCRIPTORS:
1203               fprintf(stream, "Descriptor buffer for set %d (start=%dB)",
1204                       stage->bind_map.push_ranges[i].index,
1205                       stage->bind_map.push_ranges[i].start * 32);
1206               break;
1207
1208            case ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS:
1209               unreachable("gl_NumWorkgroups is never pushed");
1210
1211            case ANV_DESCRIPTOR_SET_SHADER_CONSTANTS:
1212               fprintf(stream, "Inline shader constant data (start=%dB)",
1213                       stage->bind_map.push_ranges[i].start * 32);
1214               break;
1215
1216            case ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS:
1217               unreachable("Color attachments can't be pushed");
1218
1219            default:
1220               fprintf(stream, "UBO (set=%d binding=%d start=%dB)",
1221                       stage->bind_map.push_ranges[i].set,
1222                       stage->bind_map.push_ranges[i].index,
1223                       stage->bind_map.push_ranges[i].start * 32);
1224               break;
1225            }
1226            fprintf(stream, "\n");
1227         }
1228         fprintf(stream, "\n");
1229      }
1230
1231      /* Creating this is far cheaper than it looks.  It's perfectly fine to
1232       * do it for every binary.
1233       */
1234      intel_disassemble(&pipeline->device->physical->compiler->isa,
1235                        stage->code, code_offset, stream);
1236
1237      fclose(stream);
1238
1239      /* Copy it to a ralloc'd thing */
1240      disasm = ralloc_size(pipeline->mem_ctx, stream_size + 1);
1241      memcpy(disasm, stream_data, stream_size);
1242      disasm[stream_size] = 0;
1243
1244      free(stream_data);
1245   }
1246
1247   const struct anv_pipeline_executable exe = {
1248      .stage = stage->stage,
1249      .stats = *stats,
1250      .nir = nir,
1251      .disasm = disasm,
1252   };
1253   util_dynarray_append(&pipeline->executables,
1254                        struct anv_pipeline_executable, exe);
1255}
1256
1257static void
1258anv_pipeline_add_executables(struct anv_pipeline *pipeline,
1259                             struct anv_pipeline_stage *stage,
1260                             struct anv_shader_bin *bin)
1261{
1262   if (stage->stage == MESA_SHADER_FRAGMENT) {
1263      /* We pull the prog data and stats out of the anv_shader_bin because
1264       * the anv_pipeline_stage may not be fully populated if we successfully
1265       * looked up the shader in a cache.
1266       */
1267      const struct brw_wm_prog_data *wm_prog_data =
1268         (const struct brw_wm_prog_data *)bin->prog_data;
1269      struct brw_compile_stats *stats = bin->stats;
1270
1271      if (wm_prog_data->dispatch_8) {
1272         anv_pipeline_add_executable(pipeline, stage, stats++, 0);
1273      }
1274
1275      if (wm_prog_data->dispatch_16) {
1276         anv_pipeline_add_executable(pipeline, stage, stats++,
1277                                     wm_prog_data->prog_offset_16);
1278      }
1279
1280      if (wm_prog_data->dispatch_32) {
1281         anv_pipeline_add_executable(pipeline, stage, stats++,
1282                                     wm_prog_data->prog_offset_32);
1283      }
1284   } else {
1285      anv_pipeline_add_executable(pipeline, stage, bin->stats, 0);
1286   }
1287
1288   pipeline->ray_queries = MAX2(pipeline->ray_queries, bin->prog_data->ray_queries);
1289}
1290
1291static void
1292anv_pipeline_init_from_cached_graphics(struct anv_graphics_pipeline *pipeline)
1293{
1294   /* TODO: Cache this pipeline-wide information. */
1295
1296   if (anv_pipeline_is_primitive(pipeline)) {
1297      /* Primitive replication depends on information from all the shaders.
1298       * Recover this bit from the fact that we have more than one position slot
1299       * in the vertex shader when using it.
1300       */
1301      assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT);
1302      int pos_slots = 0;
1303      const struct brw_vue_prog_data *vue_prog_data =
1304         (const void *) pipeline->shaders[MESA_SHADER_VERTEX]->prog_data;
1305      const struct brw_vue_map *vue_map = &vue_prog_data->vue_map;
1306      for (int i = 0; i < vue_map->num_slots; i++) {
1307         if (vue_map->slot_to_varying[i] == VARYING_SLOT_POS)
1308            pos_slots++;
1309      }
1310      pipeline->use_primitive_replication = pos_slots > 1;
1311   }
1312}
1313
1314static void
1315anv_graphics_pipeline_init_keys(struct anv_graphics_pipeline *pipeline,
1316                                const struct vk_graphics_pipeline_state *state,
1317                                struct anv_pipeline_stage *stages)
1318{
1319   for (uint32_t s = 0; s < ANV_GRAPHICS_SHADER_STAGE_COUNT; s++) {
1320      if (!stages[s].info)
1321         continue;
1322
1323      int64_t stage_start = os_time_get_nano();
1324
1325      vk_pipeline_hash_shader_stage(stages[s].info, stages[s].shader_sha1);
1326
1327      const struct anv_device *device = pipeline->base.device;
1328      switch (stages[s].stage) {
1329      case MESA_SHADER_VERTEX:
1330         populate_vs_prog_key(device,
1331                              pipeline->base.device->robust_buffer_access,
1332                              &stages[s].key.vs);
1333         break;
1334      case MESA_SHADER_TESS_CTRL:
1335         populate_tcs_prog_key(device,
1336                               pipeline->base.device->robust_buffer_access,
1337                               state->ts->patch_control_points,
1338                               &stages[s].key.tcs);
1339         break;
1340      case MESA_SHADER_TESS_EVAL:
1341         populate_tes_prog_key(device,
1342                               pipeline->base.device->robust_buffer_access,
1343                               &stages[s].key.tes);
1344         break;
1345      case MESA_SHADER_GEOMETRY:
1346         populate_gs_prog_key(device,
1347                              pipeline->base.device->robust_buffer_access,
1348                              &stages[s].key.gs);
1349         break;
1350      case MESA_SHADER_FRAGMENT: {
1351         populate_wm_prog_key(pipeline,
1352                              pipeline->base.device->robust_buffer_access,
1353                              state->dynamic, state->ms, state->fsr, state->rp,
1354                              &stages[s].key.wm);
1355         break;
1356      }
1357      case MESA_SHADER_TASK:
1358         populate_task_prog_key(device,
1359                                pipeline->base.device->robust_buffer_access,
1360                                &stages[s].key.task);
1361         break;
1362      case MESA_SHADER_MESH:
1363         populate_mesh_prog_key(device,
1364                                pipeline->base.device->robust_buffer_access,
1365                                &stages[s].key.mesh);
1366         break;
1367      default:
1368         unreachable("Invalid graphics shader stage");
1369      }
1370
1371      stages[s].feedback.duration += os_time_get_nano() - stage_start;
1372      stages[s].feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
1373   }
1374
1375   assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT ||
1376          pipeline->active_stages & VK_SHADER_STAGE_MESH_BIT_NV);
1377}
1378
1379static bool
1380anv_graphics_pipeline_load_cached_shaders(struct anv_graphics_pipeline *pipeline,
1381                                          struct vk_pipeline_cache *cache,
1382                                          struct anv_pipeline_stage *stages,
1383                                          VkPipelineCreationFeedbackEXT *pipeline_feedback)
1384{
1385   unsigned found = 0;
1386   unsigned cache_hits = 0;
1387   for (unsigned s = 0; s < ANV_GRAPHICS_SHADER_STAGE_COUNT; s++) {
1388      if (!stages[s].info)
1389         continue;
1390
1391      int64_t stage_start = os_time_get_nano();
1392
1393      bool cache_hit;
1394      struct anv_shader_bin *bin =
1395         anv_device_search_for_kernel(pipeline->base.device, cache,
1396                                      &stages[s].cache_key,
1397                                      sizeof(stages[s].cache_key), &cache_hit);
1398      if (bin) {
1399         found++;
1400         pipeline->shaders[s] = bin;
1401      }
1402
1403      if (cache_hit) {
1404         cache_hits++;
1405         stages[s].feedback.flags |=
1406            VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
1407      }
1408      stages[s].feedback.duration += os_time_get_nano() - stage_start;
1409   }
1410
1411   if (found == __builtin_popcount(pipeline->active_stages)) {
1412      if (cache_hits == found) {
1413         pipeline_feedback->flags |=
1414            VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
1415      }
1416      /* We found all our shaders in the cache.  We're done. */
1417      for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1418         if (!stages[s].info)
1419            continue;
1420
1421         anv_pipeline_add_executables(&pipeline->base, &stages[s],
1422                                      pipeline->shaders[s]);
1423      }
1424      anv_pipeline_init_from_cached_graphics(pipeline);
1425      return true;
1426   } else if (found > 0) {
1427      /* We found some but not all of our shaders. This shouldn't happen most
1428       * of the time but it can if we have a partially populated pipeline
1429       * cache.
1430       */
1431      assert(found < __builtin_popcount(pipeline->active_stages));
1432
1433      vk_perf(VK_LOG_OBJS(cache ? &cache->base :
1434                                  &pipeline->base.device->vk.base),
1435              "Found a partial pipeline in the cache.  This is "
1436              "most likely caused by an incomplete pipeline cache "
1437              "import or export");
1438
1439      /* We're going to have to recompile anyway, so just throw away our
1440       * references to the shaders in the cache.  We'll get them out of the
1441       * cache again as part of the compilation process.
1442       */
1443      for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1444         stages[s].feedback.flags = 0;
1445         if (pipeline->shaders[s]) {
1446            anv_shader_bin_unref(pipeline->base.device, pipeline->shaders[s]);
1447            pipeline->shaders[s] = NULL;
1448         }
1449      }
1450   }
1451
1452   return false;
1453}
1454
1455static const gl_shader_stage graphics_shader_order[] = {
1456   MESA_SHADER_VERTEX,
1457   MESA_SHADER_TESS_CTRL,
1458   MESA_SHADER_TESS_EVAL,
1459   MESA_SHADER_GEOMETRY,
1460
1461   MESA_SHADER_TASK,
1462   MESA_SHADER_MESH,
1463
1464   MESA_SHADER_FRAGMENT,
1465};
1466
1467static VkResult
1468anv_graphics_pipeline_load_nir(struct anv_graphics_pipeline *pipeline,
1469                               struct vk_pipeline_cache *cache,
1470                               struct anv_pipeline_stage *stages,
1471                               void *pipeline_ctx)
1472{
1473   for (unsigned i = 0; i < ARRAY_SIZE(graphics_shader_order); i++) {
1474      gl_shader_stage s = graphics_shader_order[i];
1475      if (!stages[s].info)
1476         continue;
1477
1478      int64_t stage_start = os_time_get_nano();
1479
1480      assert(stages[s].stage == s);
1481      assert(pipeline->shaders[s] == NULL);
1482
1483      stages[s].bind_map = (struct anv_pipeline_bind_map) {
1484         .surface_to_descriptor = stages[s].surface_to_descriptor,
1485         .sampler_to_descriptor = stages[s].sampler_to_descriptor
1486      };
1487
1488      stages[s].nir = anv_pipeline_stage_get_nir(&pipeline->base, cache,
1489                                                 pipeline_ctx,
1490                                                 &stages[s]);
1491      if (stages[s].nir == NULL) {
1492         return vk_error(pipeline, VK_ERROR_UNKNOWN);
1493      }
1494
1495      stages[s].feedback.duration += os_time_get_nano() - stage_start;
1496   }
1497
1498   return VK_SUCCESS;
1499}
1500
1501static VkResult
1502anv_graphics_pipeline_compile(struct anv_graphics_pipeline *pipeline,
1503                              struct vk_pipeline_cache *cache,
1504                              const VkGraphicsPipelineCreateInfo *info,
1505                              const struct vk_graphics_pipeline_state *state)
1506{
1507   ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
1508   VkResult result;
1509
1510   VkPipelineCreationFeedbackEXT pipeline_feedback = {
1511      .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
1512   };
1513   int64_t pipeline_start = os_time_get_nano();
1514
1515   const struct brw_compiler *compiler = pipeline->base.device->physical->compiler;
1516   struct anv_pipeline_stage stages[ANV_GRAPHICS_SHADER_STAGE_COUNT] = {};
1517   for (uint32_t i = 0; i < info->stageCount; i++) {
1518      gl_shader_stage stage = vk_to_mesa_shader_stage(info->pStages[i].stage);
1519      stages[stage].stage = stage;
1520      stages[stage].info = &info->pStages[i];
1521   }
1522
1523   anv_graphics_pipeline_init_keys(pipeline, state, stages);
1524
1525   unsigned char sha1[20];
1526   anv_pipeline_hash_graphics(pipeline, layout, stages, sha1);
1527
1528   for (unsigned s = 0; s < ARRAY_SIZE(stages); s++) {
1529      if (!stages[s].info)
1530         continue;
1531
1532      stages[s].cache_key.stage = s;
1533      memcpy(stages[s].cache_key.sha1, sha1, sizeof(sha1));
1534   }
1535
1536   const bool skip_cache_lookup =
1537      (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
1538   if (!skip_cache_lookup) {
1539      bool found_all_shaders =
1540         anv_graphics_pipeline_load_cached_shaders(pipeline, cache, stages,
1541                                                   &pipeline_feedback);
1542      if (found_all_shaders)
1543         goto done;
1544   }
1545
1546   if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT)
1547      return VK_PIPELINE_COMPILE_REQUIRED;
1548
1549   void *pipeline_ctx = ralloc_context(NULL);
1550
1551   result = anv_graphics_pipeline_load_nir(pipeline, cache, stages,
1552                                           pipeline_ctx);
1553   if (result != VK_SUCCESS)
1554      goto fail;
1555
1556   /* Walk backwards to link */
1557   struct anv_pipeline_stage *next_stage = NULL;
1558   for (int i = ARRAY_SIZE(graphics_shader_order) - 1; i >= 0; i--) {
1559      gl_shader_stage s = graphics_shader_order[i];
1560      if (!stages[s].info)
1561         continue;
1562
1563      switch (s) {
1564      case MESA_SHADER_VERTEX:
1565         anv_pipeline_link_vs(compiler, &stages[s], next_stage);
1566         break;
1567      case MESA_SHADER_TESS_CTRL:
1568         anv_pipeline_link_tcs(compiler, &stages[s], next_stage);
1569         break;
1570      case MESA_SHADER_TESS_EVAL:
1571         anv_pipeline_link_tes(compiler, &stages[s], next_stage);
1572         break;
1573      case MESA_SHADER_GEOMETRY:
1574         anv_pipeline_link_gs(compiler, &stages[s], next_stage);
1575         break;
1576      case MESA_SHADER_TASK:
1577         anv_pipeline_link_task(compiler, &stages[s], next_stage);
1578         break;
1579      case MESA_SHADER_MESH:
1580         anv_pipeline_link_mesh(compiler, &stages[s], next_stage);
1581         break;
1582      case MESA_SHADER_FRAGMENT:
1583         anv_pipeline_link_fs(compiler, &stages[s], state->rp);
1584         break;
1585      default:
1586         unreachable("Invalid graphics shader stage");
1587      }
1588
1589      next_stage = &stages[s];
1590   }
1591
1592   if (pipeline->base.device->info.ver >= 12 &&
1593       pipeline->view_mask != 0) {
1594      /* For some pipelines HW Primitive Replication can be used instead of
1595       * instancing to implement Multiview.  This depend on how viewIndex is
1596       * used in all the active shaders, so this check can't be done per
1597       * individual shaders.
1598       */
1599      nir_shader *shaders[ANV_GRAPHICS_SHADER_STAGE_COUNT] = {};
1600      for (unsigned s = 0; s < ARRAY_SIZE(shaders); s++)
1601         shaders[s] = stages[s].nir;
1602
1603      pipeline->use_primitive_replication =
1604         anv_check_for_primitive_replication(shaders, pipeline);
1605   } else {
1606      pipeline->use_primitive_replication = false;
1607   }
1608
1609   struct anv_pipeline_stage *prev_stage = NULL;
1610   for (unsigned i = 0; i < ARRAY_SIZE(graphics_shader_order); i++) {
1611      gl_shader_stage s = graphics_shader_order[i];
1612      if (!stages[s].info)
1613         continue;
1614
1615      int64_t stage_start = os_time_get_nano();
1616
1617      void *stage_ctx = ralloc_context(NULL);
1618
1619      anv_pipeline_lower_nir(&pipeline->base, stage_ctx, &stages[s], layout);
1620
1621      if (prev_stage && compiler->nir_options[s]->unify_interfaces) {
1622         prev_stage->nir->info.outputs_written |= stages[s].nir->info.inputs_read &
1623                  ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
1624         stages[s].nir->info.inputs_read |= prev_stage->nir->info.outputs_written &
1625                  ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
1626         prev_stage->nir->info.patch_outputs_written |= stages[s].nir->info.patch_inputs_read;
1627         stages[s].nir->info.patch_inputs_read |= prev_stage->nir->info.patch_outputs_written;
1628      }
1629
1630      ralloc_free(stage_ctx);
1631
1632      stages[s].feedback.duration += os_time_get_nano() - stage_start;
1633
1634      prev_stage = &stages[s];
1635   }
1636
1637   /* In the case the platform can write the primitive variable shading rate,
1638    * figure out the last geometry stage that should write the primitive
1639    * shading rate, and ensure it is marked as used there. The backend will
1640    * write a default value if the shader doesn't actually write it.
1641    *
1642    * We iterate backwards in the stage and stop on the first shader that can
1643    * set the value.
1644    */
1645   const struct intel_device_info *devinfo = &pipeline->base.device->info;
1646   if (devinfo->has_coarse_pixel_primitive_and_cb &&
1647       stages[MESA_SHADER_FRAGMENT].info &&
1648       stages[MESA_SHADER_FRAGMENT].key.wm.coarse_pixel &&
1649       !stages[MESA_SHADER_FRAGMENT].nir->info.fs.uses_sample_shading &&
1650       stages[MESA_SHADER_MESH].info == NULL) {
1651      struct anv_pipeline_stage *last_psr = NULL;
1652
1653      for (unsigned i = 0; i < ARRAY_SIZE(graphics_shader_order); i++) {
1654         gl_shader_stage s =
1655            graphics_shader_order[ARRAY_SIZE(graphics_shader_order) - i - 1];
1656
1657         if (!stages[s].info ||
1658             !gl_shader_stage_can_set_fragment_shading_rate(s))
1659            continue;
1660
1661         last_psr = &stages[s];
1662         break;
1663      }
1664
1665      assert(last_psr);
1666      last_psr->nir->info.outputs_written |= VARYING_BIT_PRIMITIVE_SHADING_RATE;
1667   }
1668
1669   prev_stage = NULL;
1670   for (unsigned i = 0; i < ARRAY_SIZE(graphics_shader_order); i++) {
1671      gl_shader_stage s = graphics_shader_order[i];
1672      if (!stages[s].info)
1673         continue;
1674
1675      int64_t stage_start = os_time_get_nano();
1676
1677      void *stage_ctx = ralloc_context(NULL);
1678
1679      switch (s) {
1680      case MESA_SHADER_VERTEX:
1681         anv_pipeline_compile_vs(compiler, stage_ctx, pipeline,
1682                                 &stages[s]);
1683         break;
1684      case MESA_SHADER_TESS_CTRL:
1685         anv_pipeline_compile_tcs(compiler, stage_ctx, pipeline->base.device,
1686                                  &stages[s], prev_stage);
1687         break;
1688      case MESA_SHADER_TESS_EVAL:
1689         anv_pipeline_compile_tes(compiler, stage_ctx, pipeline->base.device,
1690                                  &stages[s], prev_stage);
1691         break;
1692      case MESA_SHADER_GEOMETRY:
1693         anv_pipeline_compile_gs(compiler, stage_ctx, pipeline->base.device,
1694                                 &stages[s], prev_stage);
1695         break;
1696      case MESA_SHADER_TASK:
1697         anv_pipeline_compile_task(compiler, stage_ctx, pipeline->base.device,
1698                                   &stages[s]);
1699         break;
1700      case MESA_SHADER_MESH:
1701         anv_pipeline_compile_mesh(compiler, stage_ctx, pipeline->base.device,
1702                                   &stages[s], prev_stage);
1703         break;
1704      case MESA_SHADER_FRAGMENT:
1705         anv_pipeline_compile_fs(compiler, stage_ctx, pipeline->base.device,
1706                                 &stages[s], prev_stage);
1707         break;
1708      default:
1709         unreachable("Invalid graphics shader stage");
1710      }
1711      if (stages[s].code == NULL) {
1712         ralloc_free(stage_ctx);
1713         result = vk_error(pipeline->base.device, VK_ERROR_OUT_OF_HOST_MEMORY);
1714         goto fail;
1715      }
1716
1717      anv_nir_validate_push_layout(&stages[s].prog_data.base,
1718                                   &stages[s].bind_map);
1719
1720      struct anv_shader_bin *bin =
1721         anv_device_upload_kernel(pipeline->base.device, cache, s,
1722                                  &stages[s].cache_key,
1723                                  sizeof(stages[s].cache_key),
1724                                  stages[s].code,
1725                                  stages[s].prog_data.base.program_size,
1726                                  &stages[s].prog_data.base,
1727                                  brw_prog_data_size(s),
1728                                  stages[s].stats, stages[s].num_stats,
1729                                  stages[s].nir->xfb_info,
1730                                  &stages[s].bind_map);
1731      if (!bin) {
1732         ralloc_free(stage_ctx);
1733         result = vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
1734         goto fail;
1735      }
1736
1737      anv_pipeline_add_executables(&pipeline->base, &stages[s], bin);
1738
1739      pipeline->shaders[s] = bin;
1740      ralloc_free(stage_ctx);
1741
1742      stages[s].feedback.duration += os_time_get_nano() - stage_start;
1743
1744      prev_stage = &stages[s];
1745   }
1746
1747   ralloc_free(pipeline_ctx);
1748
1749done:
1750
1751   pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
1752
1753   const VkPipelineCreationFeedbackCreateInfo *create_feedback =
1754      vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
1755   if (create_feedback) {
1756      *create_feedback->pPipelineCreationFeedback = pipeline_feedback;
1757
1758      uint32_t stage_count = create_feedback->pipelineStageCreationFeedbackCount;
1759      assert(stage_count == 0 || info->stageCount == stage_count);
1760      for (uint32_t i = 0; i < stage_count; i++) {
1761         gl_shader_stage s = vk_to_mesa_shader_stage(info->pStages[i].stage);
1762         create_feedback->pPipelineStageCreationFeedbacks[i] = stages[s].feedback;
1763      }
1764   }
1765
1766   return VK_SUCCESS;
1767
1768fail:
1769   ralloc_free(pipeline_ctx);
1770
1771   for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1772      if (pipeline->shaders[s])
1773         anv_shader_bin_unref(pipeline->base.device, pipeline->shaders[s]);
1774   }
1775
1776   return result;
1777}
1778
1779static VkResult
1780anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
1781                        struct vk_pipeline_cache *cache,
1782                        const VkComputePipelineCreateInfo *info)
1783{
1784   const VkPipelineShaderStageCreateInfo *sinfo = &info->stage;
1785   assert(sinfo->stage == VK_SHADER_STAGE_COMPUTE_BIT);
1786
1787   VkPipelineCreationFeedback pipeline_feedback = {
1788      .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
1789   };
1790   int64_t pipeline_start = os_time_get_nano();
1791
1792   struct anv_device *device = pipeline->base.device;
1793   const struct brw_compiler *compiler = device->physical->compiler;
1794
1795   struct anv_pipeline_stage stage = {
1796      .stage = MESA_SHADER_COMPUTE,
1797      .info = &info->stage,
1798      .cache_key = {
1799         .stage = MESA_SHADER_COMPUTE,
1800      },
1801      .feedback = {
1802         .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
1803      },
1804   };
1805   vk_pipeline_hash_shader_stage(&info->stage, stage.shader_sha1);
1806
1807   struct anv_shader_bin *bin = NULL;
1808
1809   populate_cs_prog_key(device, device->robust_buffer_access, &stage.key.cs);
1810
1811   ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
1812
1813   const bool skip_cache_lookup =
1814      (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
1815
1816   anv_pipeline_hash_compute(pipeline, layout, &stage, stage.cache_key.sha1);
1817
1818   bool cache_hit = false;
1819   if (!skip_cache_lookup) {
1820      bin = anv_device_search_for_kernel(device, cache,
1821                                         &stage.cache_key,
1822                                         sizeof(stage.cache_key),
1823                                         &cache_hit);
1824   }
1825
1826   if (bin == NULL &&
1827       (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT))
1828      return VK_PIPELINE_COMPILE_REQUIRED;
1829
1830   void *mem_ctx = ralloc_context(NULL);
1831   if (bin == NULL) {
1832      int64_t stage_start = os_time_get_nano();
1833
1834      stage.bind_map = (struct anv_pipeline_bind_map) {
1835         .surface_to_descriptor = stage.surface_to_descriptor,
1836         .sampler_to_descriptor = stage.sampler_to_descriptor
1837      };
1838
1839      /* Set up a binding for the gl_NumWorkGroups */
1840      stage.bind_map.surface_count = 1;
1841      stage.bind_map.surface_to_descriptor[0] = (struct anv_pipeline_binding) {
1842         .set = ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS,
1843      };
1844
1845      stage.nir = anv_pipeline_stage_get_nir(&pipeline->base, cache, mem_ctx, &stage);
1846      if (stage.nir == NULL) {
1847         ralloc_free(mem_ctx);
1848         return vk_error(pipeline, VK_ERROR_UNKNOWN);
1849      }
1850
1851      NIR_PASS(_, stage.nir, anv_nir_add_base_work_group_id);
1852
1853      anv_pipeline_lower_nir(&pipeline->base, mem_ctx, &stage, layout);
1854
1855      unsigned local_size = stage.nir->info.workgroup_size[0] *
1856                            stage.nir->info.workgroup_size[1] *
1857                            stage.nir->info.workgroup_size[2];
1858
1859      /* Games don't always request full subgroups when they should,
1860       * which can cause bugs, as they may expect bigger size of the
1861       * subgroup than we choose for the execution.
1862       */
1863      if (device->physical->instance->assume_full_subgroups &&
1864          stage.nir->info.cs.uses_wide_subgroup_intrinsics &&
1865          stage.nir->info.subgroup_size == SUBGROUP_SIZE_API_CONSTANT &&
1866          local_size &&
1867          local_size % BRW_SUBGROUP_SIZE == 0)
1868         stage.nir->info.subgroup_size = SUBGROUP_SIZE_FULL_SUBGROUPS;
1869
1870      /* If the client requests that we dispatch full subgroups but doesn't
1871       * allow us to pick a subgroup size, we have to smash it to the API
1872       * value of 32.  Performance will likely be terrible in this case but
1873       * there's nothing we can do about that.  The client should have chosen
1874       * a size.
1875       */
1876      if (stage.nir->info.subgroup_size == SUBGROUP_SIZE_FULL_SUBGROUPS)
1877         stage.nir->info.subgroup_size = BRW_SUBGROUP_SIZE;
1878
1879      stage.num_stats = 1;
1880
1881      struct brw_compile_cs_params params = {
1882         .nir = stage.nir,
1883         .key = &stage.key.cs,
1884         .prog_data = &stage.prog_data.cs,
1885         .stats = stage.stats,
1886         .log_data = device,
1887      };
1888
1889      stage.code = brw_compile_cs(compiler, mem_ctx, &params);
1890      if (stage.code == NULL) {
1891         ralloc_free(mem_ctx);
1892         return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
1893      }
1894
1895      anv_nir_validate_push_layout(&stage.prog_data.base, &stage.bind_map);
1896
1897      if (!stage.prog_data.cs.uses_num_work_groups) {
1898         assert(stage.bind_map.surface_to_descriptor[0].set ==
1899                ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS);
1900         stage.bind_map.surface_to_descriptor[0].set = ANV_DESCRIPTOR_SET_NULL;
1901      }
1902
1903      const unsigned code_size = stage.prog_data.base.program_size;
1904      bin = anv_device_upload_kernel(device, cache,
1905                                     MESA_SHADER_COMPUTE,
1906                                     &stage.cache_key, sizeof(stage.cache_key),
1907                                     stage.code, code_size,
1908                                     &stage.prog_data.base,
1909                                     sizeof(stage.prog_data.cs),
1910                                     stage.stats, stage.num_stats,
1911                                     NULL, &stage.bind_map);
1912      if (!bin) {
1913         ralloc_free(mem_ctx);
1914         return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
1915      }
1916
1917      stage.feedback.duration = os_time_get_nano() - stage_start;
1918   }
1919
1920   anv_pipeline_add_executables(&pipeline->base, &stage, bin);
1921
1922   ralloc_free(mem_ctx);
1923
1924   if (cache_hit) {
1925      stage.feedback.flags |=
1926         VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
1927      pipeline_feedback.flags |=
1928         VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
1929   }
1930   pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
1931
1932   const VkPipelineCreationFeedbackCreateInfo *create_feedback =
1933      vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
1934   if (create_feedback) {
1935      *create_feedback->pPipelineCreationFeedback = pipeline_feedback;
1936
1937      if (create_feedback->pipelineStageCreationFeedbackCount) {
1938         assert(create_feedback->pipelineStageCreationFeedbackCount == 1);
1939         create_feedback->pPipelineStageCreationFeedbacks[0] = stage.feedback;
1940      }
1941   }
1942
1943   pipeline->cs = bin;
1944
1945   return VK_SUCCESS;
1946}
1947
1948static VkResult
1949anv_compute_pipeline_create(struct anv_device *device,
1950                            struct vk_pipeline_cache *cache,
1951                            const VkComputePipelineCreateInfo *pCreateInfo,
1952                            const VkAllocationCallbacks *pAllocator,
1953                            VkPipeline *pPipeline)
1954{
1955   struct anv_compute_pipeline *pipeline;
1956   VkResult result;
1957
1958   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO);
1959
1960   pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
1961                         VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1962   if (pipeline == NULL)
1963      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1964
1965   result = anv_pipeline_init(&pipeline->base, device,
1966                              ANV_PIPELINE_COMPUTE, pCreateInfo->flags,
1967                              pAllocator);
1968   if (result != VK_SUCCESS) {
1969      vk_free2(&device->vk.alloc, pAllocator, pipeline);
1970      return result;
1971   }
1972
1973   anv_batch_set_storage(&pipeline->base.batch, ANV_NULL_ADDRESS,
1974                         pipeline->batch_data, sizeof(pipeline->batch_data));
1975
1976   result = anv_pipeline_compile_cs(pipeline, cache, pCreateInfo);
1977   if (result != VK_SUCCESS) {
1978      anv_pipeline_finish(&pipeline->base, device, pAllocator);
1979      vk_free2(&device->vk.alloc, pAllocator, pipeline);
1980      return result;
1981   }
1982
1983   anv_genX(&device->info, compute_pipeline_emit)(pipeline);
1984
1985   *pPipeline = anv_pipeline_to_handle(&pipeline->base);
1986
1987   return pipeline->base.batch.status;
1988}
1989
1990VkResult anv_CreateComputePipelines(
1991    VkDevice                                    _device,
1992    VkPipelineCache                             pipelineCache,
1993    uint32_t                                    count,
1994    const VkComputePipelineCreateInfo*          pCreateInfos,
1995    const VkAllocationCallbacks*                pAllocator,
1996    VkPipeline*                                 pPipelines)
1997{
1998   ANV_FROM_HANDLE(anv_device, device, _device);
1999   ANV_FROM_HANDLE(vk_pipeline_cache, pipeline_cache, pipelineCache);
2000
2001   VkResult result = VK_SUCCESS;
2002
2003   unsigned i;
2004   for (i = 0; i < count; i++) {
2005      VkResult res = anv_compute_pipeline_create(device, pipeline_cache,
2006                                                 &pCreateInfos[i],
2007                                                 pAllocator, &pPipelines[i]);
2008
2009      if (res == VK_SUCCESS)
2010         continue;
2011
2012      /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED as it
2013       * is not obvious what error should be report upon 2 different failures.
2014       * */
2015      result = res;
2016      if (res != VK_PIPELINE_COMPILE_REQUIRED)
2017         break;
2018
2019      pPipelines[i] = VK_NULL_HANDLE;
2020
2021      if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
2022         break;
2023   }
2024
2025   for (; i < count; i++)
2026      pPipelines[i] = VK_NULL_HANDLE;
2027
2028   return result;
2029}
2030
2031/**
2032 * Calculate the desired L3 partitioning based on the current state of the
2033 * pipeline.  For now this simply returns the conservative defaults calculated
2034 * by get_default_l3_weights(), but we could probably do better by gathering
2035 * more statistics from the pipeline state (e.g. guess of expected URB usage
2036 * and bound surfaces), or by using feed-back from performance counters.
2037 */
2038void
2039anv_pipeline_setup_l3_config(struct anv_pipeline *pipeline, bool needs_slm)
2040{
2041   const struct intel_device_info *devinfo = &pipeline->device->info;
2042
2043   const struct intel_l3_weights w =
2044      intel_get_default_l3_weights(devinfo, true, needs_slm);
2045
2046   pipeline->l3_config = intel_get_l3_config(devinfo, w);
2047}
2048
2049static VkResult
2050anv_graphics_pipeline_init(struct anv_graphics_pipeline *pipeline,
2051                           struct anv_device *device,
2052                           struct vk_pipeline_cache *cache,
2053                           const struct VkGraphicsPipelineCreateInfo *pCreateInfo,
2054                           const struct vk_graphics_pipeline_state *state,
2055                           const VkAllocationCallbacks *alloc)
2056{
2057   VkResult result;
2058
2059   result = anv_pipeline_init(&pipeline->base, device,
2060                              ANV_PIPELINE_GRAPHICS, pCreateInfo->flags,
2061                              alloc);
2062   if (result != VK_SUCCESS)
2063      return result;
2064
2065   anv_batch_set_storage(&pipeline->base.batch, ANV_NULL_ADDRESS,
2066                         pipeline->batch_data, sizeof(pipeline->batch_data));
2067
2068   pipeline->active_stages = 0;
2069   for (uint32_t i = 0; i < pCreateInfo->stageCount; i++)
2070      pipeline->active_stages |= pCreateInfo->pStages[i].stage;
2071
2072   if (pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
2073      pipeline->active_stages |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
2074
2075   if (anv_pipeline_is_mesh(pipeline))
2076      assert(device->physical->vk.supported_extensions.NV_mesh_shader);
2077
2078   pipeline->dynamic_state.ms.sample_locations = &pipeline->sample_locations;
2079   vk_dynamic_graphics_state_fill(&pipeline->dynamic_state, state);
2080
2081   pipeline->depth_clamp_enable = state->rs->depth_clamp_enable;
2082   pipeline->depth_clip_enable = state->rs->depth_clip_enable;
2083   pipeline->view_mask = state->rp->view_mask;
2084
2085   result = anv_graphics_pipeline_compile(pipeline, cache, pCreateInfo, state);
2086   if (result != VK_SUCCESS) {
2087      anv_pipeline_finish(&pipeline->base, device, alloc);
2088      return result;
2089   }
2090
2091   anv_pipeline_setup_l3_config(&pipeline->base, false);
2092
2093   if (anv_pipeline_is_primitive(pipeline)) {
2094      const uint64_t inputs_read = get_vs_prog_data(pipeline)->inputs_read;
2095
2096      u_foreach_bit(a, state->vi->attributes_valid) {
2097         if (inputs_read & BITFIELD64_BIT(VERT_ATTRIB_GENERIC0 + a))
2098            pipeline->vb_used |= BITFIELD64_BIT(state->vi->attributes[a].binding);
2099      }
2100
2101      u_foreach_bit(b, state->vi->bindings_valid) {
2102         pipeline->vb[b].stride = state->vi->bindings[b].stride;
2103         pipeline->vb[b].instanced = state->vi->bindings[b].input_rate ==
2104                                      VK_VERTEX_INPUT_RATE_INSTANCE;
2105         pipeline->vb[b].instance_divisor = state->vi->bindings[b].divisor;
2106      }
2107
2108      /* Our implementation of VK_KHR_multiview uses instancing to draw the
2109       * different views.  If the client asks for instancing, we need to multiply
2110       * the instance divisor by the number of views ensure that we repeat the
2111       * client's per-instance data once for each view.
2112       */
2113      pipeline->instance_multiplier = 1;
2114      if (pipeline->view_mask && !pipeline->use_primitive_replication)
2115         pipeline->instance_multiplier = util_bitcount(pipeline->view_mask);
2116   } else {
2117      assert(anv_pipeline_is_mesh(pipeline));
2118      /* TODO(mesh): Mesh vs. Multiview with Instancing. */
2119   }
2120
2121   pipeline->negative_one_to_one =
2122      state->vp != NULL && state->vp->negative_one_to_one;
2123
2124   /* Store line mode, polygon mode and rasterization samples, these are used
2125    * for dynamic primitive topology.
2126    */
2127   pipeline->polygon_mode = state->rs->polygon_mode;
2128   pipeline->rasterization_samples =
2129      state->ms != NULL ? state->ms->rasterization_samples : 1;
2130   pipeline->line_mode = state->rs->line.mode;
2131   if (pipeline->line_mode == VK_LINE_RASTERIZATION_MODE_DEFAULT_EXT) {
2132      if (pipeline->rasterization_samples > 1) {
2133         pipeline->line_mode = VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT;
2134      } else {
2135         pipeline->line_mode = VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT;
2136      }
2137   }
2138   pipeline->patch_control_points =
2139      state->ts != NULL ? state->ts->patch_control_points : 0;
2140
2141   /* Store the color write masks, to be merged with color write enable if
2142    * dynamic.
2143    */
2144   if (state->cb != NULL) {
2145      for (unsigned i = 0; i < state->cb->attachment_count; i++)
2146         pipeline->color_comp_writes[i] = state->cb->attachments[i].write_mask;
2147   }
2148
2149   return VK_SUCCESS;
2150}
2151
2152static VkResult
2153anv_graphics_pipeline_create(struct anv_device *device,
2154                             struct vk_pipeline_cache *cache,
2155                             const VkGraphicsPipelineCreateInfo *pCreateInfo,
2156                             const VkAllocationCallbacks *pAllocator,
2157                             VkPipeline *pPipeline)
2158{
2159   struct anv_graphics_pipeline *pipeline;
2160   VkResult result;
2161
2162   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
2163
2164   pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
2165                         VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2166   if (pipeline == NULL)
2167      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2168
2169   struct vk_graphics_pipeline_all_state all;
2170   struct vk_graphics_pipeline_state state = { };
2171   result = vk_graphics_pipeline_state_fill(&device->vk, &state, pCreateInfo,
2172                                            NULL /* sp_info */,
2173                                            &all, NULL, 0, NULL);
2174   if (result != VK_SUCCESS) {
2175      vk_free2(&device->vk.alloc, pAllocator, pipeline);
2176      return result;
2177   }
2178
2179   result = anv_graphics_pipeline_init(pipeline, device, cache,
2180                                       pCreateInfo, &state, pAllocator);
2181   if (result != VK_SUCCESS) {
2182      vk_free2(&device->vk.alloc, pAllocator, pipeline);
2183      return result;
2184   }
2185
2186   anv_genX(&device->info, graphics_pipeline_emit)(pipeline, &state);
2187
2188   *pPipeline = anv_pipeline_to_handle(&pipeline->base);
2189
2190   return pipeline->base.batch.status;
2191}
2192
2193VkResult anv_CreateGraphicsPipelines(
2194    VkDevice                                    _device,
2195    VkPipelineCache                             pipelineCache,
2196    uint32_t                                    count,
2197    const VkGraphicsPipelineCreateInfo*         pCreateInfos,
2198    const VkAllocationCallbacks*                pAllocator,
2199    VkPipeline*                                 pPipelines)
2200{
2201   ANV_FROM_HANDLE(anv_device, device, _device);
2202   ANV_FROM_HANDLE(vk_pipeline_cache, pipeline_cache, pipelineCache);
2203
2204   VkResult result = VK_SUCCESS;
2205
2206   unsigned i;
2207   for (i = 0; i < count; i++) {
2208      VkResult res = anv_graphics_pipeline_create(device,
2209                                                  pipeline_cache,
2210                                                  &pCreateInfos[i],
2211                                                  pAllocator, &pPipelines[i]);
2212
2213      if (res == VK_SUCCESS)
2214         continue;
2215
2216      /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED as it
2217       * is not obvious what error should be report upon 2 different failures.
2218       * */
2219      result = res;
2220      if (res != VK_PIPELINE_COMPILE_REQUIRED)
2221         break;
2222
2223      pPipelines[i] = VK_NULL_HANDLE;
2224
2225      if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
2226         break;
2227   }
2228
2229   for (; i < count; i++)
2230      pPipelines[i] = VK_NULL_HANDLE;
2231
2232   return result;
2233}
2234
2235static VkResult
2236compile_upload_rt_shader(struct anv_ray_tracing_pipeline *pipeline,
2237                         struct vk_pipeline_cache *cache,
2238                         nir_shader *nir,
2239                         struct anv_pipeline_stage *stage,
2240                         struct anv_shader_bin **shader_out,
2241                         void *mem_ctx)
2242{
2243   const struct brw_compiler *compiler =
2244      pipeline->base.device->physical->compiler;
2245   const struct intel_device_info *devinfo = compiler->devinfo;
2246
2247   nir_shader **resume_shaders = NULL;
2248   uint32_t num_resume_shaders = 0;
2249   if (nir->info.stage != MESA_SHADER_COMPUTE) {
2250      NIR_PASS(_, nir, nir_lower_shader_calls,
2251               nir_address_format_64bit_global,
2252               BRW_BTD_STACK_ALIGN,
2253               &resume_shaders, &num_resume_shaders, mem_ctx);
2254      NIR_PASS(_, nir, brw_nir_lower_shader_calls);
2255      NIR_PASS_V(nir, brw_nir_lower_rt_intrinsics, devinfo);
2256   }
2257
2258   for (unsigned i = 0; i < num_resume_shaders; i++) {
2259      NIR_PASS(_,resume_shaders[i], brw_nir_lower_shader_calls);
2260      NIR_PASS_V(resume_shaders[i], brw_nir_lower_rt_intrinsics, devinfo);
2261   }
2262
2263   struct brw_compile_bs_params params = {
2264      .nir = nir,
2265      .key = &stage->key.bs,
2266      .prog_data = &stage->prog_data.bs,
2267      .num_resume_shaders = num_resume_shaders,
2268      .resume_shaders = resume_shaders,
2269
2270      .stats = stage->stats,
2271      .log_data = pipeline->base.device,
2272   };
2273
2274   stage->code = brw_compile_bs(compiler, mem_ctx, &params);
2275   if (stage->code == NULL)
2276      return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
2277
2278   /* Ray-tracing shaders don't have a "real" bind map */
2279   struct anv_pipeline_bind_map empty_bind_map = {};
2280
2281   const unsigned code_size = stage->prog_data.base.program_size;
2282   struct anv_shader_bin *bin =
2283      anv_device_upload_kernel(pipeline->base.device,
2284                               cache,
2285                               stage->stage,
2286                               &stage->cache_key, sizeof(stage->cache_key),
2287                               stage->code, code_size,
2288                               &stage->prog_data.base,
2289                               sizeof(stage->prog_data.bs),
2290                               stage->stats, 1,
2291                               NULL, &empty_bind_map);
2292   if (bin == NULL)
2293      return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
2294
2295   /* TODO: Figure out executables for resume shaders */
2296   anv_pipeline_add_executables(&pipeline->base, stage, bin);
2297   util_dynarray_append(&pipeline->shaders, struct anv_shader_bin *, bin);
2298
2299   *shader_out = bin;
2300
2301   return VK_SUCCESS;
2302}
2303
2304static bool
2305is_rt_stack_size_dynamic(const VkRayTracingPipelineCreateInfoKHR *info)
2306{
2307   if (info->pDynamicState == NULL)
2308      return false;
2309
2310   for (unsigned i = 0; i < info->pDynamicState->dynamicStateCount; i++) {
2311      if (info->pDynamicState->pDynamicStates[i] ==
2312          VK_DYNAMIC_STATE_RAY_TRACING_PIPELINE_STACK_SIZE_KHR)
2313         return true;
2314   }
2315
2316   return false;
2317}
2318
2319static void
2320anv_pipeline_compute_ray_tracing_stacks(struct anv_ray_tracing_pipeline *pipeline,
2321                                        const VkRayTracingPipelineCreateInfoKHR *info,
2322                                        uint32_t *stack_max)
2323{
2324   if (is_rt_stack_size_dynamic(info)) {
2325      pipeline->stack_size = 0; /* 0 means dynamic */
2326   } else {
2327      /* From the Vulkan spec:
2328       *
2329       *    "If the stack size is not set explicitly, the stack size for a
2330       *    pipeline is:
2331       *
2332       *       rayGenStackMax +
2333       *       min(1, maxPipelineRayRecursionDepth) ×
2334       *       max(closestHitStackMax, missStackMax,
2335       *           intersectionStackMax + anyHitStackMax) +
2336       *       max(0, maxPipelineRayRecursionDepth-1) ×
2337       *       max(closestHitStackMax, missStackMax) +
2338       *       2 × callableStackMax"
2339       */
2340      pipeline->stack_size =
2341         stack_max[MESA_SHADER_RAYGEN] +
2342         MIN2(1, info->maxPipelineRayRecursionDepth) *
2343         MAX4(stack_max[MESA_SHADER_CLOSEST_HIT],
2344              stack_max[MESA_SHADER_MISS],
2345              stack_max[MESA_SHADER_INTERSECTION],
2346              stack_max[MESA_SHADER_ANY_HIT]) +
2347         MAX2(0, (int)info->maxPipelineRayRecursionDepth - 1) *
2348         MAX2(stack_max[MESA_SHADER_CLOSEST_HIT],
2349              stack_max[MESA_SHADER_MISS]) +
2350         2 * stack_max[MESA_SHADER_CALLABLE];
2351
2352      /* This is an extremely unlikely case but we need to set it to some
2353       * non-zero value so that we don't accidentally think it's dynamic.
2354       * Our minimum stack size is 2KB anyway so we could set to any small
2355       * value we like.
2356       */
2357      if (pipeline->stack_size == 0)
2358         pipeline->stack_size = 1;
2359   }
2360}
2361
2362static struct anv_pipeline_stage *
2363anv_pipeline_init_ray_tracing_stages(struct anv_ray_tracing_pipeline *pipeline,
2364                                     const VkRayTracingPipelineCreateInfoKHR *info,
2365                                     void *pipeline_ctx)
2366{
2367   ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
2368
2369   /* Create enough stage entries for all shader modules plus potential
2370    * combinaisons in the groups.
2371    */
2372   struct anv_pipeline_stage *stages =
2373      rzalloc_array(pipeline_ctx, struct anv_pipeline_stage, info->stageCount);
2374
2375   for (uint32_t i = 0; i < info->stageCount; i++) {
2376      const VkPipelineShaderStageCreateInfo *sinfo = &info->pStages[i];
2377      if (vk_pipeline_shader_stage_is_null(sinfo))
2378         continue;
2379
2380      int64_t stage_start = os_time_get_nano();
2381
2382      stages[i] = (struct anv_pipeline_stage) {
2383         .stage = vk_to_mesa_shader_stage(sinfo->stage),
2384         .info = sinfo,
2385         .cache_key = {
2386            .stage = vk_to_mesa_shader_stage(sinfo->stage),
2387         },
2388         .feedback = {
2389            .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
2390         },
2391      };
2392
2393      populate_bs_prog_key(pipeline->base.device,
2394                           pipeline->base.device->robust_buffer_access,
2395                           &stages[i].key.bs);
2396
2397      vk_pipeline_hash_shader_stage(sinfo, stages[i].shader_sha1);
2398
2399      if (stages[i].stage != MESA_SHADER_INTERSECTION) {
2400         anv_pipeline_hash_ray_tracing_shader(pipeline, layout, &stages[i],
2401                                              stages[i].cache_key.sha1);
2402      }
2403
2404      stages[i].feedback.duration += os_time_get_nano() - stage_start;
2405   }
2406
2407   for (uint32_t i = 0; i < info->groupCount; i++) {
2408      const VkRayTracingShaderGroupCreateInfoKHR *ginfo = &info->pGroups[i];
2409
2410      if (ginfo->type != VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR)
2411         continue;
2412
2413      int64_t stage_start = os_time_get_nano();
2414
2415      uint32_t intersection_idx = ginfo->intersectionShader;
2416      assert(intersection_idx < info->stageCount);
2417
2418      uint32_t any_hit_idx = ginfo->anyHitShader;
2419      if (any_hit_idx != VK_SHADER_UNUSED_KHR) {
2420         assert(any_hit_idx < info->stageCount);
2421         anv_pipeline_hash_ray_tracing_combined_shader(pipeline,
2422                                                       layout,
2423                                                       &stages[intersection_idx],
2424                                                       &stages[any_hit_idx],
2425                                                       stages[intersection_idx].cache_key.sha1);
2426      } else {
2427         anv_pipeline_hash_ray_tracing_shader(pipeline, layout,
2428                                              &stages[intersection_idx],
2429                                              stages[intersection_idx].cache_key.sha1);
2430      }
2431
2432      stages[intersection_idx].feedback.duration += os_time_get_nano() - stage_start;
2433   }
2434
2435   return stages;
2436}
2437
2438static bool
2439anv_pipeline_load_cached_shaders(struct anv_ray_tracing_pipeline *pipeline,
2440                                 struct vk_pipeline_cache *cache,
2441                                 const VkRayTracingPipelineCreateInfoKHR *info,
2442                                 struct anv_pipeline_stage *stages,
2443                                 uint32_t *stack_max)
2444{
2445   uint32_t shaders = 0, cache_hits = 0;
2446   for (uint32_t i = 0; i < info->stageCount; i++) {
2447      if (stages[i].info == NULL)
2448         continue;
2449
2450      shaders++;
2451
2452      int64_t stage_start = os_time_get_nano();
2453
2454      bool cache_hit;
2455      stages[i].bin = anv_device_search_for_kernel(pipeline->base.device, cache,
2456                                                   &stages[i].cache_key,
2457                                                   sizeof(stages[i].cache_key),
2458                                                   &cache_hit);
2459      if (cache_hit) {
2460         cache_hits++;
2461         stages[i].feedback.flags |=
2462            VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
2463      }
2464
2465      if (stages[i].bin != NULL) {
2466         anv_pipeline_add_executables(&pipeline->base, &stages[i], stages[i].bin);
2467         util_dynarray_append(&pipeline->shaders, struct anv_shader_bin *, stages[i].bin);
2468
2469         uint32_t stack_size =
2470            brw_bs_prog_data_const(stages[i].bin->prog_data)->max_stack_size;
2471         stack_max[stages[i].stage] =
2472            MAX2(stack_max[stages[i].stage], stack_size);
2473      }
2474
2475      stages[i].feedback.duration += os_time_get_nano() - stage_start;
2476   }
2477
2478   return cache_hits == shaders;
2479}
2480
2481static VkResult
2482anv_pipeline_compile_ray_tracing(struct anv_ray_tracing_pipeline *pipeline,
2483                                 struct vk_pipeline_cache *cache,
2484                                 const VkRayTracingPipelineCreateInfoKHR *info)
2485{
2486   const struct intel_device_info *devinfo = &pipeline->base.device->info;
2487   VkResult result;
2488
2489   VkPipelineCreationFeedback pipeline_feedback = {
2490      .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
2491   };
2492   int64_t pipeline_start = os_time_get_nano();
2493
2494   void *pipeline_ctx = ralloc_context(NULL);
2495
2496   struct anv_pipeline_stage *stages =
2497      anv_pipeline_init_ray_tracing_stages(pipeline, info, pipeline_ctx);
2498
2499   ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
2500
2501   const bool skip_cache_lookup =
2502      (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
2503
2504   uint32_t stack_max[MESA_VULKAN_SHADER_STAGES] = {};
2505
2506   if (!skip_cache_lookup &&
2507       anv_pipeline_load_cached_shaders(pipeline, cache, info, stages, stack_max)) {
2508      pipeline_feedback.flags |=
2509         VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
2510      goto done;
2511   }
2512
2513   if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT) {
2514      ralloc_free(pipeline_ctx);
2515      return VK_PIPELINE_COMPILE_REQUIRED;
2516   }
2517
2518   for (uint32_t i = 0; i < info->stageCount; i++) {
2519      if (stages[i].info == NULL)
2520         continue;
2521
2522      int64_t stage_start = os_time_get_nano();
2523
2524      stages[i].nir = anv_pipeline_stage_get_nir(&pipeline->base, cache,
2525                                                 pipeline_ctx, &stages[i]);
2526      if (stages[i].nir == NULL) {
2527         ralloc_free(pipeline_ctx);
2528         return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
2529      }
2530
2531      anv_pipeline_lower_nir(&pipeline->base, pipeline_ctx, &stages[i], layout);
2532
2533      stages[i].feedback.duration += os_time_get_nano() - stage_start;
2534   }
2535
2536   for (uint32_t i = 0; i < info->stageCount; i++) {
2537      if (stages[i].info == NULL)
2538         continue;
2539
2540      /* Shader found in cache already. */
2541      if (stages[i].bin != NULL)
2542         continue;
2543
2544      /* We handle intersection shaders as part of the group */
2545      if (stages[i].stage == MESA_SHADER_INTERSECTION)
2546         continue;
2547
2548      int64_t stage_start = os_time_get_nano();
2549
2550      void *stage_ctx = ralloc_context(pipeline_ctx);
2551
2552      nir_shader *nir = nir_shader_clone(stage_ctx, stages[i].nir);
2553      switch (stages[i].stage) {
2554      case MESA_SHADER_RAYGEN:
2555         brw_nir_lower_raygen(nir);
2556         break;
2557
2558      case MESA_SHADER_ANY_HIT:
2559         brw_nir_lower_any_hit(nir, devinfo);
2560         break;
2561
2562      case MESA_SHADER_CLOSEST_HIT:
2563         brw_nir_lower_closest_hit(nir);
2564         break;
2565
2566      case MESA_SHADER_MISS:
2567         brw_nir_lower_miss(nir);
2568         break;
2569
2570      case MESA_SHADER_INTERSECTION:
2571         unreachable("These are handled later");
2572
2573      case MESA_SHADER_CALLABLE:
2574         brw_nir_lower_callable(nir);
2575         break;
2576
2577      default:
2578         unreachable("Invalid ray-tracing shader stage");
2579      }
2580
2581      result = compile_upload_rt_shader(pipeline, cache, nir, &stages[i],
2582                                        &stages[i].bin, stage_ctx);
2583      if (result != VK_SUCCESS) {
2584         ralloc_free(pipeline_ctx);
2585         return result;
2586      }
2587
2588      uint32_t stack_size =
2589         brw_bs_prog_data_const(stages[i].bin->prog_data)->max_stack_size;
2590      stack_max[stages[i].stage] = MAX2(stack_max[stages[i].stage], stack_size);
2591
2592      ralloc_free(stage_ctx);
2593
2594      stages[i].feedback.duration += os_time_get_nano() - stage_start;
2595   }
2596
2597   for (uint32_t i = 0; i < info->groupCount; i++) {
2598      const VkRayTracingShaderGroupCreateInfoKHR *ginfo = &info->pGroups[i];
2599      struct anv_rt_shader_group *group = &pipeline->groups[i];
2600      group->type = ginfo->type;
2601      switch (ginfo->type) {
2602      case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR:
2603         assert(ginfo->generalShader < info->stageCount);
2604         group->general = stages[ginfo->generalShader].bin;
2605         break;
2606
2607      case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR:
2608         if (ginfo->anyHitShader < info->stageCount)
2609            group->any_hit = stages[ginfo->anyHitShader].bin;
2610
2611         if (ginfo->closestHitShader < info->stageCount)
2612            group->closest_hit = stages[ginfo->closestHitShader].bin;
2613         break;
2614
2615      case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR: {
2616         if (ginfo->closestHitShader < info->stageCount)
2617            group->closest_hit = stages[ginfo->closestHitShader].bin;
2618
2619         uint32_t intersection_idx = info->pGroups[i].intersectionShader;
2620         assert(intersection_idx < info->stageCount);
2621
2622         /* Only compile this stage if not already found in the cache. */
2623         if (stages[intersection_idx].bin == NULL) {
2624            /* The any-hit and intersection shader have to be combined */
2625            uint32_t any_hit_idx = info->pGroups[i].anyHitShader;
2626            const nir_shader *any_hit = NULL;
2627            if (any_hit_idx < info->stageCount)
2628               any_hit = stages[any_hit_idx].nir;
2629
2630            void *group_ctx = ralloc_context(pipeline_ctx);
2631            nir_shader *intersection =
2632               nir_shader_clone(group_ctx, stages[intersection_idx].nir);
2633
2634            brw_nir_lower_combined_intersection_any_hit(intersection, any_hit,
2635                                                        devinfo);
2636
2637            result = compile_upload_rt_shader(pipeline, cache,
2638                                              intersection,
2639                                              &stages[intersection_idx],
2640                                              &group->intersection,
2641                                              group_ctx);
2642            ralloc_free(group_ctx);
2643            if (result != VK_SUCCESS)
2644               return result;
2645         } else {
2646            group->intersection = stages[intersection_idx].bin;
2647         }
2648
2649         uint32_t stack_size =
2650            brw_bs_prog_data_const(group->intersection->prog_data)->max_stack_size;
2651         stack_max[MESA_SHADER_INTERSECTION] =
2652            MAX2(stack_max[MESA_SHADER_INTERSECTION], stack_size);
2653
2654         break;
2655      }
2656
2657      default:
2658         unreachable("Invalid ray tracing shader group type");
2659      }
2660   }
2661
2662 done:
2663   ralloc_free(pipeline_ctx);
2664
2665   anv_pipeline_compute_ray_tracing_stacks(pipeline, info, stack_max);
2666
2667   pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
2668
2669   const VkPipelineCreationFeedbackCreateInfo *create_feedback =
2670      vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
2671   if (create_feedback) {
2672      *create_feedback->pPipelineCreationFeedback = pipeline_feedback;
2673
2674      uint32_t stage_count = create_feedback->pipelineStageCreationFeedbackCount;
2675      assert(stage_count == 0 || info->stageCount == stage_count);
2676      for (uint32_t i = 0; i < stage_count; i++) {
2677         gl_shader_stage s = vk_to_mesa_shader_stage(info->pStages[i].stage);
2678         create_feedback->pPipelineStageCreationFeedbacks[i] = stages[s].feedback;
2679      }
2680   }
2681
2682   return VK_SUCCESS;
2683}
2684
2685VkResult
2686anv_device_init_rt_shaders(struct anv_device *device)
2687{
2688   if (!device->vk.enabled_extensions.KHR_ray_tracing_pipeline)
2689      return VK_SUCCESS;
2690
2691   bool cache_hit;
2692
2693   struct brw_rt_trampoline {
2694      char name[16];
2695      struct brw_cs_prog_key key;
2696   } trampoline_key = {
2697      .name = "rt-trampoline",
2698   };
2699   device->rt_trampoline =
2700      anv_device_search_for_kernel(device, device->internal_cache,
2701                                   &trampoline_key, sizeof(trampoline_key),
2702                                   &cache_hit);
2703   if (device->rt_trampoline == NULL) {
2704
2705      void *tmp_ctx = ralloc_context(NULL);
2706      nir_shader *trampoline_nir =
2707         brw_nir_create_raygen_trampoline(device->physical->compiler, tmp_ctx);
2708
2709      trampoline_nir->info.subgroup_size = SUBGROUP_SIZE_REQUIRE_8;
2710
2711      struct anv_pipeline_bind_map bind_map = {
2712         .surface_count = 0,
2713         .sampler_count = 0,
2714      };
2715      uint32_t dummy_params[4] = { 0, };
2716      struct brw_cs_prog_data trampoline_prog_data = {
2717         .base.nr_params = 4,
2718         .base.param = dummy_params,
2719         .uses_inline_data = true,
2720         .uses_btd_stack_ids = true,
2721      };
2722      struct brw_compile_cs_params params = {
2723         .nir = trampoline_nir,
2724         .key = &trampoline_key.key,
2725         .prog_data = &trampoline_prog_data,
2726         .log_data = device,
2727      };
2728      const unsigned *tramp_data =
2729         brw_compile_cs(device->physical->compiler, tmp_ctx, &params);
2730
2731      device->rt_trampoline =
2732         anv_device_upload_kernel(device, device->internal_cache,
2733                                  MESA_SHADER_COMPUTE,
2734                                  &trampoline_key, sizeof(trampoline_key),
2735                                  tramp_data,
2736                                  trampoline_prog_data.base.program_size,
2737                                  &trampoline_prog_data.base,
2738                                  sizeof(trampoline_prog_data),
2739                                  NULL, 0, NULL, &bind_map);
2740
2741      ralloc_free(tmp_ctx);
2742
2743      if (device->rt_trampoline == NULL)
2744         return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2745   }
2746
2747   /* The cache already has a reference and it's not going anywhere so there
2748    * is no need to hold a second reference.
2749    */
2750   anv_shader_bin_unref(device, device->rt_trampoline);
2751
2752   struct brw_rt_trivial_return {
2753      char name[16];
2754      struct brw_bs_prog_key key;
2755   } return_key = {
2756      .name = "rt-trivial-ret",
2757   };
2758   device->rt_trivial_return =
2759      anv_device_search_for_kernel(device, device->internal_cache,
2760                                   &return_key, sizeof(return_key),
2761                                   &cache_hit);
2762   if (device->rt_trivial_return == NULL) {
2763      void *tmp_ctx = ralloc_context(NULL);
2764      nir_shader *trivial_return_nir =
2765         brw_nir_create_trivial_return_shader(device->physical->compiler, tmp_ctx);
2766
2767      NIR_PASS_V(trivial_return_nir, brw_nir_lower_rt_intrinsics, &device->info);
2768
2769      struct anv_pipeline_bind_map bind_map = {
2770         .surface_count = 0,
2771         .sampler_count = 0,
2772      };
2773      struct brw_bs_prog_data return_prog_data = { 0, };
2774      struct brw_compile_bs_params params = {
2775         .nir = trivial_return_nir,
2776         .key = &return_key.key,
2777         .prog_data = &return_prog_data,
2778
2779         .log_data = device,
2780      };
2781      const unsigned *return_data =
2782         brw_compile_bs(device->physical->compiler, tmp_ctx, &params);
2783
2784      device->rt_trivial_return =
2785         anv_device_upload_kernel(device, device->internal_cache,
2786                                  MESA_SHADER_CALLABLE,
2787                                  &return_key, sizeof(return_key),
2788                                  return_data, return_prog_data.base.program_size,
2789                                  &return_prog_data.base, sizeof(return_prog_data),
2790                                  NULL, 0, NULL, &bind_map);
2791
2792      ralloc_free(tmp_ctx);
2793
2794      if (device->rt_trivial_return == NULL)
2795         return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2796   }
2797
2798   /* The cache already has a reference and it's not going anywhere so there
2799    * is no need to hold a second reference.
2800    */
2801   anv_shader_bin_unref(device, device->rt_trivial_return);
2802
2803   return VK_SUCCESS;
2804}
2805
2806void
2807anv_device_finish_rt_shaders(struct anv_device *device)
2808{
2809   if (!device->vk.enabled_extensions.KHR_ray_tracing_pipeline)
2810      return;
2811}
2812
2813static VkResult
2814anv_ray_tracing_pipeline_init(struct anv_ray_tracing_pipeline *pipeline,
2815                              struct anv_device *device,
2816                              struct vk_pipeline_cache *cache,
2817                              const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
2818                              const VkAllocationCallbacks *alloc)
2819{
2820   VkResult result;
2821
2822   util_dynarray_init(&pipeline->shaders, pipeline->base.mem_ctx);
2823
2824   result = anv_pipeline_compile_ray_tracing(pipeline, cache, pCreateInfo);
2825   if (result != VK_SUCCESS)
2826      goto fail;
2827
2828   anv_pipeline_setup_l3_config(&pipeline->base, /* needs_slm */ false);
2829
2830   return VK_SUCCESS;
2831
2832fail:
2833   util_dynarray_foreach(&pipeline->shaders,
2834                         struct anv_shader_bin *, shader) {
2835      anv_shader_bin_unref(device, *shader);
2836   }
2837   return result;
2838}
2839
2840static void
2841assert_rt_stage_index_valid(const VkRayTracingPipelineCreateInfoKHR* pCreateInfo,
2842                            uint32_t stage_idx,
2843                            VkShaderStageFlags valid_stages)
2844{
2845   if (stage_idx == VK_SHADER_UNUSED_KHR)
2846      return;
2847
2848   assert(stage_idx <= pCreateInfo->stageCount);
2849   assert(util_bitcount(pCreateInfo->pStages[stage_idx].stage) == 1);
2850   assert(pCreateInfo->pStages[stage_idx].stage & valid_stages);
2851}
2852
2853static VkResult
2854anv_ray_tracing_pipeline_create(
2855    VkDevice                                    _device,
2856    struct vk_pipeline_cache *                  cache,
2857    const VkRayTracingPipelineCreateInfoKHR*    pCreateInfo,
2858    const VkAllocationCallbacks*                pAllocator,
2859    VkPipeline*                                 pPipeline)
2860{
2861   ANV_FROM_HANDLE(anv_device, device, _device);
2862   VkResult result;
2863
2864   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR);
2865
2866   VK_MULTIALLOC(ma);
2867   VK_MULTIALLOC_DECL(&ma, struct anv_ray_tracing_pipeline, pipeline, 1);
2868   VK_MULTIALLOC_DECL(&ma, struct anv_rt_shader_group, groups, pCreateInfo->groupCount);
2869   if (!vk_multialloc_zalloc2(&ma, &device->vk.alloc, pAllocator,
2870                              VK_SYSTEM_ALLOCATION_SCOPE_DEVICE))
2871      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2872
2873   result = anv_pipeline_init(&pipeline->base, device,
2874                              ANV_PIPELINE_RAY_TRACING, pCreateInfo->flags,
2875                              pAllocator);
2876   if (result != VK_SUCCESS) {
2877      vk_free2(&device->vk.alloc, pAllocator, pipeline);
2878      return result;
2879   }
2880
2881   pipeline->group_count = pCreateInfo->groupCount;
2882   pipeline->groups = groups;
2883
2884   ASSERTED const VkShaderStageFlags ray_tracing_stages =
2885      VK_SHADER_STAGE_RAYGEN_BIT_KHR |
2886      VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
2887      VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR |
2888      VK_SHADER_STAGE_MISS_BIT_KHR |
2889      VK_SHADER_STAGE_INTERSECTION_BIT_KHR |
2890      VK_SHADER_STAGE_CALLABLE_BIT_KHR;
2891
2892   for (uint32_t i = 0; i < pCreateInfo->stageCount; i++)
2893      assert((pCreateInfo->pStages[i].stage & ~ray_tracing_stages) == 0);
2894
2895   for (uint32_t i = 0; i < pCreateInfo->groupCount; i++) {
2896      const VkRayTracingShaderGroupCreateInfoKHR *ginfo =
2897         &pCreateInfo->pGroups[i];
2898      assert_rt_stage_index_valid(pCreateInfo, ginfo->generalShader,
2899                                  VK_SHADER_STAGE_RAYGEN_BIT_KHR |
2900                                  VK_SHADER_STAGE_MISS_BIT_KHR |
2901                                  VK_SHADER_STAGE_CALLABLE_BIT_KHR);
2902      assert_rt_stage_index_valid(pCreateInfo, ginfo->closestHitShader,
2903                                  VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR);
2904      assert_rt_stage_index_valid(pCreateInfo, ginfo->anyHitShader,
2905                                  VK_SHADER_STAGE_ANY_HIT_BIT_KHR);
2906      assert_rt_stage_index_valid(pCreateInfo, ginfo->intersectionShader,
2907                                  VK_SHADER_STAGE_INTERSECTION_BIT_KHR);
2908      switch (ginfo->type) {
2909      case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR:
2910         assert(ginfo->generalShader < pCreateInfo->stageCount);
2911         assert(ginfo->anyHitShader == VK_SHADER_UNUSED_KHR);
2912         assert(ginfo->closestHitShader == VK_SHADER_UNUSED_KHR);
2913         assert(ginfo->intersectionShader == VK_SHADER_UNUSED_KHR);
2914         break;
2915
2916      case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR:
2917         assert(ginfo->generalShader == VK_SHADER_UNUSED_KHR);
2918         assert(ginfo->intersectionShader == VK_SHADER_UNUSED_KHR);
2919         break;
2920
2921      case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR:
2922         assert(ginfo->generalShader == VK_SHADER_UNUSED_KHR);
2923         break;
2924
2925      default:
2926         unreachable("Invalid ray-tracing shader group type");
2927      }
2928   }
2929
2930   result = anv_ray_tracing_pipeline_init(pipeline, device, cache,
2931                                          pCreateInfo, pAllocator);
2932   if (result != VK_SUCCESS) {
2933      anv_pipeline_finish(&pipeline->base, device, pAllocator);
2934      vk_free2(&device->vk.alloc, pAllocator, pipeline);
2935      return result;
2936   }
2937
2938   anv_genX(&device->info, ray_tracing_pipeline_emit)(pipeline);
2939
2940   *pPipeline = anv_pipeline_to_handle(&pipeline->base);
2941
2942   return pipeline->base.batch.status;
2943}
2944
2945VkResult
2946anv_CreateRayTracingPipelinesKHR(
2947    VkDevice                                    _device,
2948    VkDeferredOperationKHR                      deferredOperation,
2949    VkPipelineCache                             pipelineCache,
2950    uint32_t                                    createInfoCount,
2951    const VkRayTracingPipelineCreateInfoKHR*    pCreateInfos,
2952    const VkAllocationCallbacks*                pAllocator,
2953    VkPipeline*                                 pPipelines)
2954{
2955   ANV_FROM_HANDLE(vk_pipeline_cache, pipeline_cache, pipelineCache);
2956
2957   VkResult result = VK_SUCCESS;
2958
2959   unsigned i;
2960   for (i = 0; i < createInfoCount; i++) {
2961      VkResult res = anv_ray_tracing_pipeline_create(_device, pipeline_cache,
2962                                                     &pCreateInfos[i],
2963                                                     pAllocator, &pPipelines[i]);
2964
2965      if (res == VK_SUCCESS)
2966         continue;
2967
2968      /* Bail out on the first error as it is not obvious what error should be
2969       * report upon 2 different failures. */
2970      result = res;
2971      if (result != VK_PIPELINE_COMPILE_REQUIRED)
2972         break;
2973
2974      pPipelines[i] = VK_NULL_HANDLE;
2975
2976      if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
2977         break;
2978   }
2979
2980   for (; i < createInfoCount; i++)
2981      pPipelines[i] = VK_NULL_HANDLE;
2982
2983   return result;
2984}
2985
2986#define WRITE_STR(field, ...) ({                               \
2987   memset(field, 0, sizeof(field));                            \
2988   UNUSED int i = snprintf(field, sizeof(field), __VA_ARGS__); \
2989   assert(i > 0 && i < sizeof(field));                         \
2990})
2991
2992VkResult anv_GetPipelineExecutablePropertiesKHR(
2993    VkDevice                                    device,
2994    const VkPipelineInfoKHR*                    pPipelineInfo,
2995    uint32_t*                                   pExecutableCount,
2996    VkPipelineExecutablePropertiesKHR*          pProperties)
2997{
2998   ANV_FROM_HANDLE(anv_pipeline, pipeline, pPipelineInfo->pipeline);
2999   VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutablePropertiesKHR, out,
3000                          pProperties, pExecutableCount);
3001
3002   util_dynarray_foreach (&pipeline->executables, struct anv_pipeline_executable, exe) {
3003      vk_outarray_append_typed(VkPipelineExecutablePropertiesKHR, &out, props) {
3004         gl_shader_stage stage = exe->stage;
3005         props->stages = mesa_to_vk_shader_stage(stage);
3006
3007         unsigned simd_width = exe->stats.dispatch_width;
3008         if (stage == MESA_SHADER_FRAGMENT) {
3009            WRITE_STR(props->name, "%s%d %s",
3010                      simd_width ? "SIMD" : "vec",
3011                      simd_width ? simd_width : 4,
3012                      _mesa_shader_stage_to_string(stage));
3013         } else {
3014            WRITE_STR(props->name, "%s", _mesa_shader_stage_to_string(stage));
3015         }
3016         WRITE_STR(props->description, "%s%d %s shader",
3017                   simd_width ? "SIMD" : "vec",
3018                   simd_width ? simd_width : 4,
3019                   _mesa_shader_stage_to_string(stage));
3020
3021         /* The compiler gives us a dispatch width of 0 for vec4 but Vulkan
3022          * wants a subgroup size of 1.
3023          */
3024         props->subgroupSize = MAX2(simd_width, 1);
3025      }
3026   }
3027
3028   return vk_outarray_status(&out);
3029}
3030
3031static const struct anv_pipeline_executable *
3032anv_pipeline_get_executable(struct anv_pipeline *pipeline, uint32_t index)
3033{
3034   assert(index < util_dynarray_num_elements(&pipeline->executables,
3035                                             struct anv_pipeline_executable));
3036   return util_dynarray_element(
3037      &pipeline->executables, struct anv_pipeline_executable, index);
3038}
3039
3040VkResult anv_GetPipelineExecutableStatisticsKHR(
3041    VkDevice                                    device,
3042    const VkPipelineExecutableInfoKHR*          pExecutableInfo,
3043    uint32_t*                                   pStatisticCount,
3044    VkPipelineExecutableStatisticKHR*           pStatistics)
3045{
3046   ANV_FROM_HANDLE(anv_pipeline, pipeline, pExecutableInfo->pipeline);
3047   VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableStatisticKHR, out,
3048                          pStatistics, pStatisticCount);
3049
3050   const struct anv_pipeline_executable *exe =
3051      anv_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
3052
3053   const struct brw_stage_prog_data *prog_data;
3054   switch (pipeline->type) {
3055   case ANV_PIPELINE_GRAPHICS: {
3056      prog_data = anv_pipeline_to_graphics(pipeline)->shaders[exe->stage]->prog_data;
3057      break;
3058   }
3059   case ANV_PIPELINE_COMPUTE: {
3060      prog_data = anv_pipeline_to_compute(pipeline)->cs->prog_data;
3061      break;
3062   }
3063   case ANV_PIPELINE_RAY_TRACING: {
3064      struct anv_shader_bin **shader =
3065         util_dynarray_element(&anv_pipeline_to_ray_tracing(pipeline)->shaders,
3066                               struct anv_shader_bin *,
3067                               pExecutableInfo->executableIndex);
3068      prog_data = (*shader)->prog_data;
3069      break;
3070   }
3071   default:
3072      unreachable("invalid pipeline type");
3073   }
3074
3075   vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3076      WRITE_STR(stat->name, "Instruction Count");
3077      WRITE_STR(stat->description,
3078                "Number of GEN instructions in the final generated "
3079                "shader executable.");
3080      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3081      stat->value.u64 = exe->stats.instructions;
3082   }
3083
3084   vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3085      WRITE_STR(stat->name, "SEND Count");
3086      WRITE_STR(stat->description,
3087                "Number of instructions in the final generated shader "
3088                "executable which access external units such as the "
3089                "constant cache or the sampler.");
3090      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3091      stat->value.u64 = exe->stats.sends;
3092   }
3093
3094   vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3095      WRITE_STR(stat->name, "Loop Count");
3096      WRITE_STR(stat->description,
3097                "Number of loops (not unrolled) in the final generated "
3098                "shader executable.");
3099      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3100      stat->value.u64 = exe->stats.loops;
3101   }
3102
3103   vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3104      WRITE_STR(stat->name, "Cycle Count");
3105      WRITE_STR(stat->description,
3106                "Estimate of the number of EU cycles required to execute "
3107                "the final generated executable.  This is an estimate only "
3108                "and may vary greatly from actual run-time performance.");
3109      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3110      stat->value.u64 = exe->stats.cycles;
3111   }
3112
3113   vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3114      WRITE_STR(stat->name, "Spill Count");
3115      WRITE_STR(stat->description,
3116                "Number of scratch spill operations.  This gives a rough "
3117                "estimate of the cost incurred due to spilling temporary "
3118                "values to memory.  If this is non-zero, you may want to "
3119                "adjust your shader to reduce register pressure.");
3120      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3121      stat->value.u64 = exe->stats.spills;
3122   }
3123
3124   vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3125      WRITE_STR(stat->name, "Fill Count");
3126      WRITE_STR(stat->description,
3127                "Number of scratch fill operations.  This gives a rough "
3128                "estimate of the cost incurred due to spilling temporary "
3129                "values to memory.  If this is non-zero, you may want to "
3130                "adjust your shader to reduce register pressure.");
3131      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3132      stat->value.u64 = exe->stats.fills;
3133   }
3134
3135   vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3136      WRITE_STR(stat->name, "Scratch Memory Size");
3137      WRITE_STR(stat->description,
3138                "Number of bytes of scratch memory required by the "
3139                "generated shader executable.  If this is non-zero, you "
3140                "may want to adjust your shader to reduce register "
3141                "pressure.");
3142      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3143      stat->value.u64 = prog_data->total_scratch;
3144   }
3145
3146   if (gl_shader_stage_uses_workgroup(exe->stage)) {
3147      vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3148         WRITE_STR(stat->name, "Workgroup Memory Size");
3149         WRITE_STR(stat->description,
3150                   "Number of bytes of workgroup shared memory used by this "
3151                   "shader including any padding.");
3152         stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3153         stat->value.u64 = prog_data->total_shared;
3154      }
3155   }
3156
3157   return vk_outarray_status(&out);
3158}
3159
3160static bool
3161write_ir_text(VkPipelineExecutableInternalRepresentationKHR* ir,
3162              const char *data)
3163{
3164   ir->isText = VK_TRUE;
3165
3166   size_t data_len = strlen(data) + 1;
3167
3168   if (ir->pData == NULL) {
3169      ir->dataSize = data_len;
3170      return true;
3171   }
3172
3173   strncpy(ir->pData, data, ir->dataSize);
3174   if (ir->dataSize < data_len)
3175      return false;
3176
3177   ir->dataSize = data_len;
3178   return true;
3179}
3180
3181VkResult anv_GetPipelineExecutableInternalRepresentationsKHR(
3182    VkDevice                                    device,
3183    const VkPipelineExecutableInfoKHR*          pExecutableInfo,
3184    uint32_t*                                   pInternalRepresentationCount,
3185    VkPipelineExecutableInternalRepresentationKHR* pInternalRepresentations)
3186{
3187   ANV_FROM_HANDLE(anv_pipeline, pipeline, pExecutableInfo->pipeline);
3188   VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableInternalRepresentationKHR, out,
3189                          pInternalRepresentations, pInternalRepresentationCount);
3190   bool incomplete_text = false;
3191
3192   const struct anv_pipeline_executable *exe =
3193      anv_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
3194
3195   if (exe->nir) {
3196      vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR, &out, ir) {
3197         WRITE_STR(ir->name, "Final NIR");
3198         WRITE_STR(ir->description,
3199                   "Final NIR before going into the back-end compiler");
3200
3201         if (!write_ir_text(ir, exe->nir))
3202            incomplete_text = true;
3203      }
3204   }
3205
3206   if (exe->disasm) {
3207      vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR, &out, ir) {
3208         WRITE_STR(ir->name, "GEN Assembly");
3209         WRITE_STR(ir->description,
3210                   "Final GEN assembly for the generated shader binary");
3211
3212         if (!write_ir_text(ir, exe->disasm))
3213            incomplete_text = true;
3214      }
3215   }
3216
3217   return incomplete_text ? VK_INCOMPLETE : vk_outarray_status(&out);
3218}
3219
3220VkResult
3221anv_GetRayTracingShaderGroupHandlesKHR(
3222    VkDevice                                    _device,
3223    VkPipeline                                  _pipeline,
3224    uint32_t                                    firstGroup,
3225    uint32_t                                    groupCount,
3226    size_t                                      dataSize,
3227    void*                                       pData)
3228{
3229   ANV_FROM_HANDLE(anv_device, device, _device);
3230   ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
3231
3232   if (pipeline->type != ANV_PIPELINE_RAY_TRACING)
3233      return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
3234
3235   struct anv_ray_tracing_pipeline *rt_pipeline =
3236      anv_pipeline_to_ray_tracing(pipeline);
3237
3238   for (uint32_t i = 0; i < groupCount; i++) {
3239      struct anv_rt_shader_group *group = &rt_pipeline->groups[firstGroup + i];
3240      memcpy(pData, group->handle, sizeof(group->handle));
3241      pData += sizeof(group->handle);
3242   }
3243
3244   return VK_SUCCESS;
3245}
3246
3247VkResult
3248anv_GetRayTracingCaptureReplayShaderGroupHandlesKHR(
3249    VkDevice                                    _device,
3250    VkPipeline                                  pipeline,
3251    uint32_t                                    firstGroup,
3252    uint32_t                                    groupCount,
3253    size_t                                      dataSize,
3254    void*                                       pData)
3255{
3256   ANV_FROM_HANDLE(anv_device, device, _device);
3257   unreachable("Unimplemented");
3258   return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
3259}
3260
3261VkDeviceSize
3262anv_GetRayTracingShaderGroupStackSizeKHR(
3263    VkDevice                                    device,
3264    VkPipeline                                  _pipeline,
3265    uint32_t                                    group,
3266    VkShaderGroupShaderKHR                      groupShader)
3267{
3268   ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
3269   assert(pipeline->type == ANV_PIPELINE_RAY_TRACING);
3270
3271   struct anv_ray_tracing_pipeline *rt_pipeline =
3272      anv_pipeline_to_ray_tracing(pipeline);
3273
3274   assert(group < rt_pipeline->group_count);
3275
3276   struct anv_shader_bin *bin;
3277   switch (groupShader) {
3278   case VK_SHADER_GROUP_SHADER_GENERAL_KHR:
3279      bin = rt_pipeline->groups[group].general;
3280      break;
3281
3282   case VK_SHADER_GROUP_SHADER_CLOSEST_HIT_KHR:
3283      bin = rt_pipeline->groups[group].closest_hit;
3284      break;
3285
3286   case VK_SHADER_GROUP_SHADER_ANY_HIT_KHR:
3287      bin = rt_pipeline->groups[group].any_hit;
3288      break;
3289
3290   case VK_SHADER_GROUP_SHADER_INTERSECTION_KHR:
3291      bin = rt_pipeline->groups[group].intersection;
3292      break;
3293
3294   default:
3295      unreachable("Invalid VkShaderGroupShader enum");
3296   }
3297
3298   if (bin == NULL)
3299      return 0;
3300
3301   return brw_bs_prog_data_const(bin->prog_data)->max_stack_size;
3302}
3303