1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29 
30 #include "util/mesa-sha1.h"
31 #include "util/os_time.h"
32 #include "common/intel_l3_config.h"
33 #include "common/intel_disasm.h"
34 #include "common/intel_sample_positions.h"
35 #include "anv_private.h"
36 #include "compiler/brw_nir.h"
37 #include "compiler/brw_nir_rt.h"
38 #include "anv_nir.h"
39 #include "nir/nir_xfb_info.h"
40 #include "spirv/nir_spirv.h"
41 #include "vk_pipeline.h"
42 #include "vk_render_pass.h"
43 #include "vk_util.h"
44 
45 /* Needed for SWIZZLE macros */
46 #include "program/prog_instruction.h"
47 
48 /* Eventually, this will become part of anv_CreateShader.  Unfortunately,
49  * we can't do that yet because we don't have the ability to copy nir.
50  */
51 static nir_shader *
anv_shader_stage_to_nir(struct anv_device *device, const VkPipelineShaderStageCreateInfo *stage_info, void *mem_ctx)52 anv_shader_stage_to_nir(struct anv_device *device,
53                         const VkPipelineShaderStageCreateInfo *stage_info,
54                         void *mem_ctx)
55 {
56    const struct anv_physical_device *pdevice = device->physical;
57    const struct anv_instance *instance = pdevice->instance;
58    const struct brw_compiler *compiler = pdevice->compiler;
59    gl_shader_stage stage = vk_to_mesa_shader_stage(stage_info->stage);
60    const nir_shader_compiler_options *nir_options =
61       compiler->nir_options[stage];
62 
63    const struct spirv_to_nir_options spirv_options = {
64       .caps = {
65          .demote_to_helper_invocation = true,
66          .derivative_group = true,
67          .descriptor_array_dynamic_indexing = true,
68          .descriptor_array_non_uniform_indexing = true,
69          .descriptor_indexing = true,
70          .device_group = true,
71          .draw_parameters = true,
72          .float16 = pdevice->info.ver >= 8,
73          .float32_atomic_add = pdevice->info.has_lsc,
74          .float32_atomic_min_max = pdevice->info.ver >= 9,
75          .float64 = pdevice->info.ver >= 8,
76          .float64_atomic_min_max = pdevice->info.has_lsc,
77          .fragment_shader_sample_interlock = pdevice->info.ver >= 9,
78          .fragment_shader_pixel_interlock = pdevice->info.ver >= 9,
79          .geometry_streams = true,
80          /* When using Vulkan 1.3 or KHR_format_feature_flags2 is enabled, the
81           * read/write without format is per format, so just report true. It's
82           * up to the application to check.
83           */
84          .image_read_without_format = instance->vk.app_info.api_version >= VK_API_VERSION_1_3 || device->vk.enabled_extensions.KHR_format_feature_flags2,
85          .image_write_without_format = true,
86          .int8 = pdevice->info.ver >= 8,
87          .int16 = pdevice->info.ver >= 8,
88          .int64 = pdevice->info.ver >= 8,
89          .int64_atomics = pdevice->info.ver >= 9 && pdevice->use_softpin,
90          .integer_functions2 = pdevice->info.ver >= 8,
91          .mesh_shading_nv = pdevice->vk.supported_extensions.NV_mesh_shader,
92          .min_lod = true,
93          .multiview = true,
94          .physical_storage_buffer_address = pdevice->has_a64_buffer_access,
95          .post_depth_coverage = pdevice->info.ver >= 9,
96          .runtime_descriptor_array = true,
97          .float_controls = pdevice->info.ver >= 8,
98          .ray_query = pdevice->info.has_ray_tracing,
99          .ray_tracing = pdevice->info.has_ray_tracing,
100          .shader_clock = true,
101          .shader_viewport_index_layer = true,
102          .stencil_export = pdevice->info.ver >= 9,
103          .storage_8bit = pdevice->info.ver >= 8,
104          .storage_16bit = pdevice->info.ver >= 8,
105          .subgroup_arithmetic = true,
106          .subgroup_basic = true,
107          .subgroup_ballot = true,
108          .subgroup_dispatch = true,
109          .subgroup_quad = true,
110          .subgroup_uniform_control_flow = true,
111          .subgroup_shuffle = true,
112          .subgroup_vote = true,
113          .tessellation = true,
114          .transform_feedback = pdevice->info.ver >= 8,
115          .variable_pointers = true,
116          .vk_memory_model = true,
117          .vk_memory_model_device_scope = true,
118          .workgroup_memory_explicit_layout = true,
119          .fragment_shading_rate = pdevice->info.ver >= 11,
120       },
121       .ubo_addr_format =
122          anv_nir_ubo_addr_format(pdevice, device->robust_buffer_access),
123       .ssbo_addr_format =
124           anv_nir_ssbo_addr_format(pdevice, device->robust_buffer_access),
125       .phys_ssbo_addr_format = nir_address_format_64bit_global,
126       .push_const_addr_format = nir_address_format_logical,
127 
128       /* TODO: Consider changing this to an address format that has the NULL
129        * pointer equals to 0.  That might be a better format to play nice
130        * with certain code / code generators.
131        */
132       .shared_addr_format = nir_address_format_32bit_offset,
133    };
134 
135    nir_shader *nir;
136    VkResult result =
137       vk_pipeline_shader_stage_to_nir(&device->vk, stage_info,
138                                       &spirv_options, nir_options,
139                                       mem_ctx, &nir);
140    if (result != VK_SUCCESS)
141       return NULL;
142 
143    if (INTEL_DEBUG(intel_debug_flag_for_shader_stage(stage))) {
144       fprintf(stderr, "NIR (from SPIR-V) for %s shader:\n",
145               gl_shader_stage_name(stage));
146       nir_print_shader(nir, stderr);
147    }
148 
149    NIR_PASS_V(nir, nir_lower_io_to_temporaries,
150               nir_shader_get_entrypoint(nir), true, false);
151 
152    const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = {
153       .point_coord = true,
154    };
155    NIR_PASS(_, nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings);
156 
157    const nir_opt_access_options opt_access_options = {
158       .is_vulkan = true,
159       .infer_non_readable = true,
160    };
161    NIR_PASS(_, nir, nir_opt_access, &opt_access_options);
162 
163    NIR_PASS(_, nir, nir_lower_frexp);
164 
165    /* Vulkan uses the separate-shader linking model */
166    nir->info.separate_shader = true;
167 
168    brw_preprocess_nir(compiler, nir, NULL);
169 
170    return nir;
171 }
172 
173 VkResult
anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device, enum anv_pipeline_type type, VkPipelineCreateFlags flags, const VkAllocationCallbacks *pAllocator)174 anv_pipeline_init(struct anv_pipeline *pipeline,
175                   struct anv_device *device,
176                   enum anv_pipeline_type type,
177                   VkPipelineCreateFlags flags,
178                   const VkAllocationCallbacks *pAllocator)
179 {
180    VkResult result;
181 
182    memset(pipeline, 0, sizeof(*pipeline));
183 
184    vk_object_base_init(&device->vk, &pipeline->base,
185                        VK_OBJECT_TYPE_PIPELINE);
186    pipeline->device = device;
187 
188    /* It's the job of the child class to provide actual backing storage for
189     * the batch by setting batch.start, batch.next, and batch.end.
190     */
191    pipeline->batch.alloc = pAllocator ? pAllocator : &device->vk.alloc;
192    pipeline->batch.relocs = &pipeline->batch_relocs;
193    pipeline->batch.status = VK_SUCCESS;
194 
195    result = anv_reloc_list_init(&pipeline->batch_relocs,
196                                 pipeline->batch.alloc);
197    if (result != VK_SUCCESS)
198       return result;
199 
200    pipeline->mem_ctx = ralloc_context(NULL);
201 
202    pipeline->type = type;
203    pipeline->flags = flags;
204 
205    util_dynarray_init(&pipeline->executables, pipeline->mem_ctx);
206 
207    return VK_SUCCESS;
208 }
209 
210 void
anv_pipeline_finish(struct anv_pipeline *pipeline, struct anv_device *device, const VkAllocationCallbacks *pAllocator)211 anv_pipeline_finish(struct anv_pipeline *pipeline,
212                     struct anv_device *device,
213                     const VkAllocationCallbacks *pAllocator)
214 {
215    anv_reloc_list_finish(&pipeline->batch_relocs,
216                          pAllocator ? pAllocator : &device->vk.alloc);
217    ralloc_free(pipeline->mem_ctx);
218    vk_object_base_finish(&pipeline->base);
219 }
220 
anv_DestroyPipeline( VkDevice _device, VkPipeline _pipeline, const VkAllocationCallbacks* pAllocator)221 void anv_DestroyPipeline(
222     VkDevice                                    _device,
223     VkPipeline                                  _pipeline,
224     const VkAllocationCallbacks*                pAllocator)
225 {
226    ANV_FROM_HANDLE(anv_device, device, _device);
227    ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
228 
229    if (!pipeline)
230       return;
231 
232    switch (pipeline->type) {
233    case ANV_PIPELINE_GRAPHICS: {
234       struct anv_graphics_pipeline *gfx_pipeline =
235          anv_pipeline_to_graphics(pipeline);
236 
237       for (unsigned s = 0; s < ARRAY_SIZE(gfx_pipeline->shaders); s++) {
238          if (gfx_pipeline->shaders[s])
239             anv_shader_bin_unref(device, gfx_pipeline->shaders[s]);
240       }
241       break;
242    }
243 
244    case ANV_PIPELINE_COMPUTE: {
245       struct anv_compute_pipeline *compute_pipeline =
246          anv_pipeline_to_compute(pipeline);
247 
248       if (compute_pipeline->cs)
249          anv_shader_bin_unref(device, compute_pipeline->cs);
250 
251       break;
252    }
253 
254    case ANV_PIPELINE_RAY_TRACING: {
255       struct anv_ray_tracing_pipeline *rt_pipeline =
256          anv_pipeline_to_ray_tracing(pipeline);
257 
258       util_dynarray_foreach(&rt_pipeline->shaders,
259                             struct anv_shader_bin *, shader) {
260          anv_shader_bin_unref(device, *shader);
261       }
262       break;
263    }
264 
265    default:
266       unreachable("invalid pipeline type");
267    }
268 
269    anv_pipeline_finish(pipeline, device, pAllocator);
270    vk_free2(&device->vk.alloc, pAllocator, pipeline);
271 }
272 
273 static void
populate_sampler_prog_key(const struct intel_device_info *devinfo, struct brw_sampler_prog_key_data *key)274 populate_sampler_prog_key(const struct intel_device_info *devinfo,
275                           struct brw_sampler_prog_key_data *key)
276 {
277    /* Almost all multisampled textures are compressed.  The only time when we
278     * don't compress a multisampled texture is for 16x MSAA with a surface
279     * width greater than 8k which is a bit of an edge case.  Since the sampler
280     * just ignores the MCS parameter to ld2ms when MCS is disabled, it's safe
281     * to tell the compiler to always assume compression.
282     */
283    key->compressed_multisample_layout_mask = ~0;
284 
285    /* SkyLake added support for 16x MSAA.  With this came a new message for
286     * reading from a 16x MSAA surface with compression.  The new message was
287     * needed because now the MCS data is 64 bits instead of 32 or lower as is
288     * the case for 8x, 4x, and 2x.  The key->msaa_16 bit-field controls which
289     * message we use.  Fortunately, the 16x message works for 8x, 4x, and 2x
290     * so we can just use it unconditionally.  This may not be quite as
291     * efficient but it saves us from recompiling.
292     */
293    if (devinfo->ver >= 9)
294       key->msaa_16 = ~0;
295 
296    /* XXX: Handle texture swizzle on HSW- */
297    for (int i = 0; i < BRW_MAX_SAMPLERS; i++) {
298       /* Assume color sampler, no swizzling. (Works for BDW+) */
299       key->swizzles[i] = SWIZZLE_XYZW;
300    }
301 }
302 
303 static void
populate_base_prog_key(const struct anv_device *device, bool robust_buffer_acccess, struct brw_base_prog_key *key)304 populate_base_prog_key(const struct anv_device *device,
305                        bool robust_buffer_acccess,
306                        struct brw_base_prog_key *key)
307 {
308    key->robust_buffer_access = robust_buffer_acccess;
309    key->limit_trig_input_range =
310       device->physical->instance->limit_trig_input_range;
311 
312    populate_sampler_prog_key(&device->info, &key->tex);
313 }
314 
315 static void
populate_vs_prog_key(const struct anv_device *device, bool robust_buffer_acccess, struct brw_vs_prog_key *key)316 populate_vs_prog_key(const struct anv_device *device,
317                      bool robust_buffer_acccess,
318                      struct brw_vs_prog_key *key)
319 {
320    memset(key, 0, sizeof(*key));
321 
322    populate_base_prog_key(device, robust_buffer_acccess, &key->base);
323 
324    /* XXX: Handle vertex input work-arounds */
325 
326    /* XXX: Handle sampler_prog_key */
327 }
328 
329 static void
populate_tcs_prog_key(const struct anv_device *device, bool robust_buffer_acccess, unsigned input_vertices, struct brw_tcs_prog_key *key)330 populate_tcs_prog_key(const struct anv_device *device,
331                       bool robust_buffer_acccess,
332                       unsigned input_vertices,
333                       struct brw_tcs_prog_key *key)
334 {
335    memset(key, 0, sizeof(*key));
336 
337    populate_base_prog_key(device, robust_buffer_acccess, &key->base);
338 
339    key->input_vertices = input_vertices;
340 }
341 
342 static void
populate_tes_prog_key(const struct anv_device *device, bool robust_buffer_acccess, struct brw_tes_prog_key *key)343 populate_tes_prog_key(const struct anv_device *device,
344                       bool robust_buffer_acccess,
345                       struct brw_tes_prog_key *key)
346 {
347    memset(key, 0, sizeof(*key));
348 
349    populate_base_prog_key(device, robust_buffer_acccess, &key->base);
350 }
351 
352 static void
populate_gs_prog_key(const struct anv_device *device, bool robust_buffer_acccess, struct brw_gs_prog_key *key)353 populate_gs_prog_key(const struct anv_device *device,
354                      bool robust_buffer_acccess,
355                      struct brw_gs_prog_key *key)
356 {
357    memset(key, 0, sizeof(*key));
358 
359    populate_base_prog_key(device, robust_buffer_acccess, &key->base);
360 }
361 
362 static bool
pipeline_has_coarse_pixel(const struct anv_graphics_pipeline *pipeline, const BITSET_WORD *dynamic, const struct vk_multisample_state *ms, const struct vk_fragment_shading_rate_state *fsr)363 pipeline_has_coarse_pixel(const struct anv_graphics_pipeline *pipeline,
364                           const BITSET_WORD *dynamic,
365                           const struct vk_multisample_state *ms,
366                           const struct vk_fragment_shading_rate_state *fsr)
367 {
368    /* The Vulkan 1.2.199 spec says:
369     *
370     *    "If any of the following conditions are met, Cxy' must be set to
371     *    {1,1}:
372     *
373     *     * If Sample Shading is enabled.
374     *     * [...]"
375     *
376     * And "sample shading" is defined as follows:
377     *
378     *    "Sample shading is enabled for a graphics pipeline:
379     *
380     *     * If the interface of the fragment shader entry point of the
381     *       graphics pipeline includes an input variable decorated with
382     *       SampleId or SamplePosition. In this case minSampleShadingFactor
383     *       takes the value 1.0.
384     *
385     *     * Else if the sampleShadingEnable member of the
386     *       VkPipelineMultisampleStateCreateInfo structure specified when
387     *       creating the graphics pipeline is set to VK_TRUE. In this case
388     *       minSampleShadingFactor takes the value of
389     *       VkPipelineMultisampleStateCreateInfo::minSampleShading.
390     *
391     *    Otherwise, sample shading is considered disabled."
392     *
393     * The first bullet above is handled by the back-end compiler because those
394     * inputs both force per-sample dispatch.  The second bullet is handled
395     * here.  Note that this sample shading being enabled has nothing to do
396     * with minSampleShading.
397     */
398    if (ms != NULL && ms->sample_shading_enable)
399       return false;
400 
401    /* Not dynamic & pipeline has a 1x1 fragment shading rate with no
402     * possibility for element of the pipeline to change the value.
403     */
404    if (!BITSET_TEST(dynamic, MESA_VK_DYNAMIC_FSR) &&
405        fsr->fragment_size.width <= 1 &&
406        fsr->fragment_size.height <= 1 &&
407        fsr->combiner_ops[0] == VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR &&
408        fsr->combiner_ops[1] == VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR)
409       return false;
410 
411    return true;
412 }
413 
414 static void
populate_task_prog_key(const struct anv_device *device, bool robust_buffer_access, struct brw_task_prog_key *key)415 populate_task_prog_key(const struct anv_device *device,
416                        bool robust_buffer_access,
417                        struct brw_task_prog_key *key)
418 {
419    memset(key, 0, sizeof(*key));
420 
421    populate_base_prog_key(device, robust_buffer_access, &key->base);
422 }
423 
424 static void
populate_mesh_prog_key(const struct anv_device *device, bool robust_buffer_access, struct brw_mesh_prog_key *key)425 populate_mesh_prog_key(const struct anv_device *device,
426                        bool robust_buffer_access,
427                        struct brw_mesh_prog_key *key)
428 {
429    memset(key, 0, sizeof(*key));
430 
431    populate_base_prog_key(device, robust_buffer_access, &key->base);
432 }
433 
434 static void
populate_wm_prog_key(const struct anv_graphics_pipeline *pipeline, bool robust_buffer_acccess, const BITSET_WORD *dynamic, const struct vk_multisample_state *ms, const struct vk_fragment_shading_rate_state *fsr, const struct vk_render_pass_state *rp, struct brw_wm_prog_key *key)435 populate_wm_prog_key(const struct anv_graphics_pipeline *pipeline,
436                      bool robust_buffer_acccess,
437                      const BITSET_WORD *dynamic,
438                      const struct vk_multisample_state *ms,
439                      const struct vk_fragment_shading_rate_state *fsr,
440                      const struct vk_render_pass_state *rp,
441                      struct brw_wm_prog_key *key)
442 {
443    const struct anv_device *device = pipeline->base.device;
444 
445    memset(key, 0, sizeof(*key));
446 
447    populate_base_prog_key(device, robust_buffer_acccess, &key->base);
448 
449    /* We set this to 0 here and set to the actual value before we call
450     * brw_compile_fs.
451     */
452    key->input_slots_valid = 0;
453 
454    /* XXX Vulkan doesn't appear to specify */
455    key->clamp_fragment_color = false;
456 
457    key->ignore_sample_mask_out = false;
458 
459    assert(rp->color_attachment_count <= MAX_RTS);
460    /* Consider all inputs as valid until look at the NIR variables. */
461    key->color_outputs_valid = (1u << rp->color_attachment_count) - 1;
462    key->nr_color_regions = rp->color_attachment_count;
463 
464    /* To reduce possible shader recompilations we would need to know if
465     * there is a SampleMask output variable to compute if we should emit
466     * code to workaround the issue that hardware disables alpha to coverage
467     * when there is SampleMask output.
468     */
469    key->alpha_to_coverage = ms != NULL && ms->alpha_to_coverage_enable;
470 
471    /* Vulkan doesn't support fixed-function alpha test */
472    key->alpha_test_replicate_alpha = false;
473 
474    if (ms != NULL) {
475       /* We should probably pull this out of the shader, but it's fairly
476        * harmless to compute it and then let dead-code take care of it.
477        */
478       if (ms->rasterization_samples > 1) {
479          key->persample_interp = ms->sample_shading_enable &&
480             (ms->min_sample_shading * ms->rasterization_samples) > 1;
481          key->multisample_fbo = true;
482       }
483 
484       if (device->physical->instance->sample_mask_out_opengl_behaviour)
485          key->ignore_sample_mask_out = !key->multisample_fbo;
486    }
487 
488    key->coarse_pixel =
489       !key->persample_interp &&
490       device->vk.enabled_extensions.KHR_fragment_shading_rate &&
491       pipeline_has_coarse_pixel(pipeline, dynamic, ms, fsr);
492 }
493 
494 static void
populate_cs_prog_key(const struct anv_device *device, bool robust_buffer_acccess, struct brw_cs_prog_key *key)495 populate_cs_prog_key(const struct anv_device *device,
496                      bool robust_buffer_acccess,
497                      struct brw_cs_prog_key *key)
498 {
499    memset(key, 0, sizeof(*key));
500 
501    populate_base_prog_key(device, robust_buffer_acccess, &key->base);
502 }
503 
504 static void
populate_bs_prog_key(const struct anv_device *device, bool robust_buffer_access, struct brw_bs_prog_key *key)505 populate_bs_prog_key(const struct anv_device *device,
506                      bool robust_buffer_access,
507                      struct brw_bs_prog_key *key)
508 {
509    memset(key, 0, sizeof(*key));
510 
511    populate_base_prog_key(device, robust_buffer_access, &key->base);
512 }
513 
514 struct anv_pipeline_stage {
515    gl_shader_stage stage;
516 
517    const VkPipelineShaderStageCreateInfo *info;
518 
519    unsigned char shader_sha1[20];
520 
521    union brw_any_prog_key key;
522 
523    struct {
524       gl_shader_stage stage;
525       unsigned char sha1[20];
526    } cache_key;
527 
528    nir_shader *nir;
529 
530    struct anv_pipeline_binding surface_to_descriptor[256];
531    struct anv_pipeline_binding sampler_to_descriptor[256];
532    struct anv_pipeline_bind_map bind_map;
533 
534    union brw_any_prog_data prog_data;
535 
536    uint32_t num_stats;
537    struct brw_compile_stats stats[3];
538    char *disasm[3];
539 
540    VkPipelineCreationFeedback feedback;
541 
542    const unsigned *code;
543 
544    struct anv_shader_bin *bin;
545 };
546 
547 static void
anv_pipeline_hash_graphics(struct anv_graphics_pipeline *pipeline, struct anv_pipeline_layout *layout, struct anv_pipeline_stage *stages, unsigned char *sha1_out)548 anv_pipeline_hash_graphics(struct anv_graphics_pipeline *pipeline,
549                            struct anv_pipeline_layout *layout,
550                            struct anv_pipeline_stage *stages,
551                            unsigned char *sha1_out)
552 {
553    struct mesa_sha1 ctx;
554    _mesa_sha1_init(&ctx);
555 
556    _mesa_sha1_update(&ctx, &pipeline->view_mask,
557                      sizeof(pipeline->view_mask));
558 
559    if (layout)
560       _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
561 
562    const bool rba = pipeline->base.device->robust_buffer_access;
563    _mesa_sha1_update(&ctx, &rba, sizeof(rba));
564 
565    for (uint32_t s = 0; s < ANV_GRAPHICS_SHADER_STAGE_COUNT; s++) {
566       if (stages[s].info) {
567          _mesa_sha1_update(&ctx, stages[s].shader_sha1,
568                            sizeof(stages[s].shader_sha1));
569          _mesa_sha1_update(&ctx, &stages[s].key, brw_prog_key_size(s));
570       }
571    }
572 
573    _mesa_sha1_final(&ctx, sha1_out);
574 }
575 
576 static void
anv_pipeline_hash_compute(struct anv_compute_pipeline *pipeline, struct anv_pipeline_layout *layout, struct anv_pipeline_stage *stage, unsigned char *sha1_out)577 anv_pipeline_hash_compute(struct anv_compute_pipeline *pipeline,
578                           struct anv_pipeline_layout *layout,
579                           struct anv_pipeline_stage *stage,
580                           unsigned char *sha1_out)
581 {
582    struct mesa_sha1 ctx;
583    _mesa_sha1_init(&ctx);
584 
585    if (layout)
586       _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
587 
588    const struct anv_device *device = pipeline->base.device;
589 
590    const bool rba = device->robust_buffer_access;
591    _mesa_sha1_update(&ctx, &rba, sizeof(rba));
592 
593    const bool afs = device->physical->instance->assume_full_subgroups;
594    _mesa_sha1_update(&ctx, &afs, sizeof(afs));
595 
596    _mesa_sha1_update(&ctx, stage->shader_sha1,
597                      sizeof(stage->shader_sha1));
598    _mesa_sha1_update(&ctx, &stage->key.cs, sizeof(stage->key.cs));
599 
600    _mesa_sha1_final(&ctx, sha1_out);
601 }
602 
603 static void
anv_pipeline_hash_ray_tracing_shader(struct anv_ray_tracing_pipeline *pipeline, struct anv_pipeline_layout *layout, struct anv_pipeline_stage *stage, unsigned char *sha1_out)604 anv_pipeline_hash_ray_tracing_shader(struct anv_ray_tracing_pipeline *pipeline,
605                                      struct anv_pipeline_layout *layout,
606                                      struct anv_pipeline_stage *stage,
607                                      unsigned char *sha1_out)
608 {
609    struct mesa_sha1 ctx;
610    _mesa_sha1_init(&ctx);
611 
612    if (layout != NULL)
613       _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
614 
615    const bool rba = pipeline->base.device->robust_buffer_access;
616    _mesa_sha1_update(&ctx, &rba, sizeof(rba));
617 
618    _mesa_sha1_update(&ctx, stage->shader_sha1, sizeof(stage->shader_sha1));
619    _mesa_sha1_update(&ctx, &stage->key, sizeof(stage->key.bs));
620 
621    _mesa_sha1_final(&ctx, sha1_out);
622 }
623 
624 static void
anv_pipeline_hash_ray_tracing_combined_shader(struct anv_ray_tracing_pipeline *pipeline, struct anv_pipeline_layout *layout, struct anv_pipeline_stage *intersection, struct anv_pipeline_stage *any_hit, unsigned char *sha1_out)625 anv_pipeline_hash_ray_tracing_combined_shader(struct anv_ray_tracing_pipeline *pipeline,
626                                               struct anv_pipeline_layout *layout,
627                                               struct anv_pipeline_stage *intersection,
628                                               struct anv_pipeline_stage *any_hit,
629                                               unsigned char *sha1_out)
630 {
631    struct mesa_sha1 ctx;
632    _mesa_sha1_init(&ctx);
633 
634    if (layout != NULL)
635       _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
636 
637    const bool rba = pipeline->base.device->robust_buffer_access;
638    _mesa_sha1_update(&ctx, &rba, sizeof(rba));
639 
640    _mesa_sha1_update(&ctx, intersection->shader_sha1, sizeof(intersection->shader_sha1));
641    _mesa_sha1_update(&ctx, &intersection->key, sizeof(intersection->key.bs));
642    _mesa_sha1_update(&ctx, any_hit->shader_sha1, sizeof(any_hit->shader_sha1));
643    _mesa_sha1_update(&ctx, &any_hit->key, sizeof(any_hit->key.bs));
644 
645    _mesa_sha1_final(&ctx, sha1_out);
646 }
647 
648 static nir_shader *
anv_pipeline_stage_get_nir(struct anv_pipeline *pipeline, struct vk_pipeline_cache *cache, void *mem_ctx, struct anv_pipeline_stage *stage)649 anv_pipeline_stage_get_nir(struct anv_pipeline *pipeline,
650                            struct vk_pipeline_cache *cache,
651                            void *mem_ctx,
652                            struct anv_pipeline_stage *stage)
653 {
654    const struct brw_compiler *compiler =
655       pipeline->device->physical->compiler;
656    const nir_shader_compiler_options *nir_options =
657       compiler->nir_options[stage->stage];
658    nir_shader *nir;
659 
660    nir = anv_device_search_for_nir(pipeline->device, cache,
661                                    nir_options,
662                                    stage->shader_sha1,
663                                    mem_ctx);
664    if (nir) {
665       assert(nir->info.stage == stage->stage);
666       return nir;
667    }
668 
669    nir = anv_shader_stage_to_nir(pipeline->device, stage->info, mem_ctx);
670    if (nir) {
671       anv_device_upload_nir(pipeline->device, cache, nir, stage->shader_sha1);
672       return nir;
673    }
674 
675    return NULL;
676 }
677 
678 static void
shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)679 shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
680 {
681    assert(glsl_type_is_vector_or_scalar(type));
682 
683    uint32_t comp_size = glsl_type_is_boolean(type)
684       ? 4 : glsl_get_bit_size(type) / 8;
685    unsigned length = glsl_get_vector_elements(type);
686    *size = comp_size * length,
687    *align = comp_size * (length == 3 ? 4 : length);
688 }
689 
690 static void
anv_pipeline_lower_nir(struct anv_pipeline *pipeline, void *mem_ctx, struct anv_pipeline_stage *stage, struct anv_pipeline_layout *layout)691 anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
692                        void *mem_ctx,
693                        struct anv_pipeline_stage *stage,
694                        struct anv_pipeline_layout *layout)
695 {
696    const struct anv_physical_device *pdevice = pipeline->device->physical;
697    const struct brw_compiler *compiler = pdevice->compiler;
698 
699    struct brw_stage_prog_data *prog_data = &stage->prog_data.base;
700    nir_shader *nir = stage->nir;
701 
702    if (nir->info.stage == MESA_SHADER_FRAGMENT) {
703       NIR_PASS(_, nir, nir_lower_wpos_center);
704       NIR_PASS(_, nir, nir_lower_input_attachments,
705                &(nir_input_attachment_options) {
706                    .use_fragcoord_sysval = true,
707                    .use_layer_id_sysval = true,
708                });
709    }
710 
711    NIR_PASS(_, nir, anv_nir_lower_ycbcr_textures, layout);
712 
713    if (pipeline->type == ANV_PIPELINE_GRAPHICS) {
714       NIR_PASS(_, nir, anv_nir_lower_multiview,
715                anv_pipeline_to_graphics(pipeline));
716    }
717 
718    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
719 
720    NIR_PASS(_, nir, brw_nir_lower_storage_image, compiler->devinfo);
721 
722    NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_global,
723             nir_address_format_64bit_global);
724    NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_push_const,
725             nir_address_format_32bit_offset);
726 
727    NIR_PASS(_, nir, brw_nir_lower_ray_queries, &pdevice->info);
728 
729    /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
730    NIR_PASS_V(nir, anv_nir_apply_pipeline_layout,
731               pdevice, pipeline->device->robust_buffer_access,
732               layout, &stage->bind_map);
733 
734    NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ubo,
735             anv_nir_ubo_addr_format(pdevice,
736                pipeline->device->robust_buffer_access));
737    NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ssbo,
738             anv_nir_ssbo_addr_format(pdevice,
739                pipeline->device->robust_buffer_access));
740 
741    /* First run copy-prop to get rid of all of the vec() that address
742     * calculations often create and then constant-fold so that, when we
743     * get to anv_nir_lower_ubo_loads, we can detect constant offsets.
744     */
745    NIR_PASS(_, nir, nir_copy_prop);
746    NIR_PASS(_, nir, nir_opt_constant_folding);
747 
748    NIR_PASS(_, nir, anv_nir_lower_ubo_loads);
749 
750    /* We don't support non-uniform UBOs and non-uniform SSBO access is
751     * handled naturally by falling back to A64 messages.
752     */
753    NIR_PASS(_, nir, nir_lower_non_uniform_access,
754             &(nir_lower_non_uniform_access_options) {
755                 .types = nir_lower_non_uniform_texture_access |
756                          nir_lower_non_uniform_image_access,
757                 .callback = NULL,
758             });
759 
760    NIR_PASS_V(nir, anv_nir_compute_push_layout,
761               pdevice, pipeline->device->robust_buffer_access,
762               prog_data, &stage->bind_map, mem_ctx);
763 
764    if (gl_shader_stage_uses_workgroup(nir->info.stage)) {
765       if (!nir->info.shared_memory_explicit_layout) {
766          NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
767                   nir_var_mem_shared, shared_type_info);
768       }
769 
770       NIR_PASS(_, nir, nir_lower_explicit_io,
771                nir_var_mem_shared, nir_address_format_32bit_offset);
772 
773       if (nir->info.zero_initialize_shared_memory &&
774           nir->info.shared_size > 0) {
775          /* The effective Shared Local Memory size is at least 1024 bytes and
776           * is always rounded to a power of two, so it is OK to align the size
777           * used by the shader to chunk_size -- which does simplify the logic.
778           */
779          const unsigned chunk_size = 16;
780          const unsigned shared_size = ALIGN(nir->info.shared_size, chunk_size);
781          assert(shared_size <=
782                 intel_calculate_slm_size(compiler->devinfo->ver, nir->info.shared_size));
783 
784          NIR_PASS(_, nir, nir_zero_initialize_shared_memory,
785                   shared_size, chunk_size);
786       }
787    }
788 
789    if (gl_shader_stage_is_compute(nir->info.stage) ||
790        gl_shader_stage_is_mesh(nir->info.stage))
791       NIR_PASS(_, nir, brw_nir_lower_cs_intrinsics);
792 
793    stage->nir = nir;
794 }
795 
796 static void
anv_pipeline_link_vs(const struct brw_compiler *compiler, struct anv_pipeline_stage *vs_stage, struct anv_pipeline_stage *next_stage)797 anv_pipeline_link_vs(const struct brw_compiler *compiler,
798                      struct anv_pipeline_stage *vs_stage,
799                      struct anv_pipeline_stage *next_stage)
800 {
801    if (next_stage)
802       brw_nir_link_shaders(compiler, vs_stage->nir, next_stage->nir);
803 }
804 
805 static void
anv_pipeline_compile_vs(const struct brw_compiler *compiler, void *mem_ctx, struct anv_graphics_pipeline *pipeline, struct anv_pipeline_stage *vs_stage)806 anv_pipeline_compile_vs(const struct brw_compiler *compiler,
807                         void *mem_ctx,
808                         struct anv_graphics_pipeline *pipeline,
809                         struct anv_pipeline_stage *vs_stage)
810 {
811    /* When using Primitive Replication for multiview, each view gets its own
812     * position slot.
813     */
814    uint32_t pos_slots = pipeline->use_primitive_replication ?
815       MAX2(1, util_bitcount(pipeline->view_mask)) : 1;
816 
817    brw_compute_vue_map(compiler->devinfo,
818                        &vs_stage->prog_data.vs.base.vue_map,
819                        vs_stage->nir->info.outputs_written,
820                        vs_stage->nir->info.separate_shader,
821                        pos_slots);
822 
823    vs_stage->num_stats = 1;
824 
825    struct brw_compile_vs_params params = {
826       .nir = vs_stage->nir,
827       .key = &vs_stage->key.vs,
828       .prog_data = &vs_stage->prog_data.vs,
829       .stats = vs_stage->stats,
830       .log_data = pipeline->base.device,
831    };
832 
833    vs_stage->code = brw_compile_vs(compiler, mem_ctx, &params);
834 }
835 
836 static void
merge_tess_info(struct shader_info *tes_info, const struct shader_info *tcs_info)837 merge_tess_info(struct shader_info *tes_info,
838                 const struct shader_info *tcs_info)
839 {
840    /* The Vulkan 1.0.38 spec, section 21.1 Tessellator says:
841     *
842     *    "PointMode. Controls generation of points rather than triangles
843     *     or lines. This functionality defaults to disabled, and is
844     *     enabled if either shader stage includes the execution mode.
845     *
846     * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw,
847     * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd,
848     * and OutputVertices, it says:
849     *
850     *    "One mode must be set in at least one of the tessellation
851     *     shader stages."
852     *
853     * So, the fields can be set in either the TCS or TES, but they must
854     * agree if set in both.  Our backend looks at TES, so bitwise-or in
855     * the values from the TCS.
856     */
857    assert(tcs_info->tess.tcs_vertices_out == 0 ||
858           tes_info->tess.tcs_vertices_out == 0 ||
859           tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out);
860    tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out;
861 
862    assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
863           tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
864           tcs_info->tess.spacing == tes_info->tess.spacing);
865    tes_info->tess.spacing |= tcs_info->tess.spacing;
866 
867    assert(tcs_info->tess._primitive_mode == 0 ||
868           tes_info->tess._primitive_mode == 0 ||
869           tcs_info->tess._primitive_mode == tes_info->tess._primitive_mode);
870    tes_info->tess._primitive_mode |= tcs_info->tess._primitive_mode;
871    tes_info->tess.ccw |= tcs_info->tess.ccw;
872    tes_info->tess.point_mode |= tcs_info->tess.point_mode;
873 }
874 
875 static void
anv_pipeline_link_tcs(const struct brw_compiler *compiler, struct anv_pipeline_stage *tcs_stage, struct anv_pipeline_stage *tes_stage)876 anv_pipeline_link_tcs(const struct brw_compiler *compiler,
877                       struct anv_pipeline_stage *tcs_stage,
878                       struct anv_pipeline_stage *tes_stage)
879 {
880    assert(tes_stage && tes_stage->stage == MESA_SHADER_TESS_EVAL);
881 
882    brw_nir_link_shaders(compiler, tcs_stage->nir, tes_stage->nir);
883 
884    nir_lower_patch_vertices(tes_stage->nir,
885                             tcs_stage->nir->info.tess.tcs_vertices_out,
886                             NULL);
887 
888    /* Copy TCS info into the TES info */
889    merge_tess_info(&tes_stage->nir->info, &tcs_stage->nir->info);
890 
891    /* Whacking the key after cache lookup is a bit sketchy, but all of
892     * this comes from the SPIR-V, which is part of the hash used for the
893     * pipeline cache.  So it should be safe.
894     */
895    tcs_stage->key.tcs._tes_primitive_mode =
896       tes_stage->nir->info.tess._primitive_mode;
897    tcs_stage->key.tcs.quads_workaround =
898       compiler->devinfo->ver < 9 &&
899       tes_stage->nir->info.tess._primitive_mode == TESS_PRIMITIVE_QUADS &&
900       tes_stage->nir->info.tess.spacing == TESS_SPACING_EQUAL;
901 }
902 
903 static void
anv_pipeline_compile_tcs(const struct brw_compiler *compiler, void *mem_ctx, struct anv_device *device, struct anv_pipeline_stage *tcs_stage, struct anv_pipeline_stage *prev_stage)904 anv_pipeline_compile_tcs(const struct brw_compiler *compiler,
905                          void *mem_ctx,
906                          struct anv_device *device,
907                          struct anv_pipeline_stage *tcs_stage,
908                          struct anv_pipeline_stage *prev_stage)
909 {
910    tcs_stage->key.tcs.outputs_written =
911       tcs_stage->nir->info.outputs_written;
912    tcs_stage->key.tcs.patch_outputs_written =
913       tcs_stage->nir->info.patch_outputs_written;
914 
915    tcs_stage->num_stats = 1;
916 
917    struct brw_compile_tcs_params params = {
918       .nir = tcs_stage->nir,
919       .key = &tcs_stage->key.tcs,
920       .prog_data = &tcs_stage->prog_data.tcs,
921       .stats = tcs_stage->stats,
922       .log_data = device,
923    };
924 
925    tcs_stage->code = brw_compile_tcs(compiler, mem_ctx, &params);
926 }
927 
928 static void
anv_pipeline_link_tes(const struct brw_compiler *compiler, struct anv_pipeline_stage *tes_stage, struct anv_pipeline_stage *next_stage)929 anv_pipeline_link_tes(const struct brw_compiler *compiler,
930                       struct anv_pipeline_stage *tes_stage,
931                       struct anv_pipeline_stage *next_stage)
932 {
933    if (next_stage)
934       brw_nir_link_shaders(compiler, tes_stage->nir, next_stage->nir);
935 }
936 
937 static void
anv_pipeline_compile_tes(const struct brw_compiler *compiler, void *mem_ctx, struct anv_device *device, struct anv_pipeline_stage *tes_stage, struct anv_pipeline_stage *tcs_stage)938 anv_pipeline_compile_tes(const struct brw_compiler *compiler,
939                          void *mem_ctx,
940                          struct anv_device *device,
941                          struct anv_pipeline_stage *tes_stage,
942                          struct anv_pipeline_stage *tcs_stage)
943 {
944    tes_stage->key.tes.inputs_read =
945       tcs_stage->nir->info.outputs_written;
946    tes_stage->key.tes.patch_inputs_read =
947       tcs_stage->nir->info.patch_outputs_written;
948 
949    tes_stage->num_stats = 1;
950 
951    struct brw_compile_tes_params params = {
952       .nir = tes_stage->nir,
953       .key = &tes_stage->key.tes,
954       .prog_data = &tes_stage->prog_data.tes,
955       .input_vue_map = &tcs_stage->prog_data.tcs.base.vue_map,
956       .stats = tes_stage->stats,
957       .log_data = device,
958    };
959 
960    tes_stage->code = brw_compile_tes(compiler, mem_ctx, &params);
961 }
962 
963 static void
anv_pipeline_link_gs(const struct brw_compiler *compiler, struct anv_pipeline_stage *gs_stage, struct anv_pipeline_stage *next_stage)964 anv_pipeline_link_gs(const struct brw_compiler *compiler,
965                      struct anv_pipeline_stage *gs_stage,
966                      struct anv_pipeline_stage *next_stage)
967 {
968    if (next_stage)
969       brw_nir_link_shaders(compiler, gs_stage->nir, next_stage->nir);
970 }
971 
972 static void
anv_pipeline_compile_gs(const struct brw_compiler *compiler, void *mem_ctx, struct anv_device *device, struct anv_pipeline_stage *gs_stage, struct anv_pipeline_stage *prev_stage)973 anv_pipeline_compile_gs(const struct brw_compiler *compiler,
974                         void *mem_ctx,
975                         struct anv_device *device,
976                         struct anv_pipeline_stage *gs_stage,
977                         struct anv_pipeline_stage *prev_stage)
978 {
979    brw_compute_vue_map(compiler->devinfo,
980                        &gs_stage->prog_data.gs.base.vue_map,
981                        gs_stage->nir->info.outputs_written,
982                        gs_stage->nir->info.separate_shader, 1);
983 
984    gs_stage->num_stats = 1;
985 
986    struct brw_compile_gs_params params = {
987       .nir = gs_stage->nir,
988       .key = &gs_stage->key.gs,
989       .prog_data = &gs_stage->prog_data.gs,
990       .stats = gs_stage->stats,
991       .log_data = device,
992    };
993 
994    gs_stage->code = brw_compile_gs(compiler, mem_ctx, &params);
995 }
996 
997 static void
anv_pipeline_link_task(const struct brw_compiler *compiler, struct anv_pipeline_stage *task_stage, struct anv_pipeline_stage *next_stage)998 anv_pipeline_link_task(const struct brw_compiler *compiler,
999                        struct anv_pipeline_stage *task_stage,
1000                        struct anv_pipeline_stage *next_stage)
1001 {
1002    assert(next_stage);
1003    assert(next_stage->stage == MESA_SHADER_MESH);
1004    brw_nir_link_shaders(compiler, task_stage->nir, next_stage->nir);
1005 }
1006 
1007 static void
anv_pipeline_compile_task(const struct brw_compiler *compiler, void *mem_ctx, struct anv_device *device, struct anv_pipeline_stage *task_stage)1008 anv_pipeline_compile_task(const struct brw_compiler *compiler,
1009                           void *mem_ctx,
1010                           struct anv_device *device,
1011                           struct anv_pipeline_stage *task_stage)
1012 {
1013    task_stage->num_stats = 1;
1014 
1015    struct brw_compile_task_params params = {
1016       .nir = task_stage->nir,
1017       .key = &task_stage->key.task,
1018       .prog_data = &task_stage->prog_data.task,
1019       .stats = task_stage->stats,
1020       .log_data = device,
1021    };
1022 
1023    task_stage->code = brw_compile_task(compiler, mem_ctx, &params);
1024 }
1025 
1026 static void
anv_pipeline_link_mesh(const struct brw_compiler *compiler, struct anv_pipeline_stage *mesh_stage, struct anv_pipeline_stage *next_stage)1027 anv_pipeline_link_mesh(const struct brw_compiler *compiler,
1028                        struct anv_pipeline_stage *mesh_stage,
1029                        struct anv_pipeline_stage *next_stage)
1030 {
1031    if (next_stage) {
1032       brw_nir_link_shaders(compiler, mesh_stage->nir, next_stage->nir);
1033    }
1034 }
1035 
1036 static void
anv_pipeline_compile_mesh(const struct brw_compiler *compiler, void *mem_ctx, struct anv_device *device, struct anv_pipeline_stage *mesh_stage, struct anv_pipeline_stage *prev_stage)1037 anv_pipeline_compile_mesh(const struct brw_compiler *compiler,
1038                           void *mem_ctx,
1039                           struct anv_device *device,
1040                           struct anv_pipeline_stage *mesh_stage,
1041                           struct anv_pipeline_stage *prev_stage)
1042 {
1043    mesh_stage->num_stats = 1;
1044 
1045    struct brw_compile_mesh_params params = {
1046       .nir = mesh_stage->nir,
1047       .key = &mesh_stage->key.mesh,
1048       .prog_data = &mesh_stage->prog_data.mesh,
1049       .stats = mesh_stage->stats,
1050       .log_data = device,
1051    };
1052 
1053    if (prev_stage) {
1054       assert(prev_stage->stage == MESA_SHADER_TASK);
1055       params.tue_map = &prev_stage->prog_data.task.map;
1056    }
1057 
1058    mesh_stage->code = brw_compile_mesh(compiler, mem_ctx, &params);
1059 }
1060 
1061 static void
anv_pipeline_link_fs(const struct brw_compiler *compiler, struct anv_pipeline_stage *stage, const struct vk_render_pass_state *rp)1062 anv_pipeline_link_fs(const struct brw_compiler *compiler,
1063                      struct anv_pipeline_stage *stage,
1064                      const struct vk_render_pass_state *rp)
1065 {
1066    /* Initially the valid outputs value is set to all possible render targets
1067     * valid (see populate_wm_prog_key()), before we look at the shader
1068     * variables. Here we look at the output variables of the shader an compute
1069     * a correct number of render target outputs.
1070     */
1071    stage->key.wm.color_outputs_valid = 0;
1072    nir_foreach_shader_out_variable_safe(var, stage->nir) {
1073       if (var->data.location < FRAG_RESULT_DATA0)
1074          continue;
1075 
1076       const unsigned rt = var->data.location - FRAG_RESULT_DATA0;
1077       const unsigned array_len =
1078          glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1;
1079       assert(rt + array_len <= MAX_RTS);
1080 
1081       stage->key.wm.color_outputs_valid |= BITFIELD_RANGE(rt, array_len);
1082    }
1083    stage->key.wm.color_outputs_valid &=
1084       (1u << rp->color_attachment_count) - 1;
1085    stage->key.wm.nr_color_regions =
1086       util_last_bit(stage->key.wm.color_outputs_valid);
1087 
1088    unsigned num_rt_bindings;
1089    struct anv_pipeline_binding rt_bindings[MAX_RTS];
1090    if (stage->key.wm.nr_color_regions > 0) {
1091       assert(stage->key.wm.nr_color_regions <= MAX_RTS);
1092       for (unsigned rt = 0; rt < stage->key.wm.nr_color_regions; rt++) {
1093          if (stage->key.wm.color_outputs_valid & BITFIELD_BIT(rt)) {
1094             rt_bindings[rt] = (struct anv_pipeline_binding) {
1095                .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
1096                .index = rt,
1097             };
1098          } else {
1099             /* Setup a null render target */
1100             rt_bindings[rt] = (struct anv_pipeline_binding) {
1101                .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
1102                .index = UINT32_MAX,
1103             };
1104          }
1105       }
1106       num_rt_bindings = stage->key.wm.nr_color_regions;
1107    } else {
1108       /* Setup a null render target */
1109       rt_bindings[0] = (struct anv_pipeline_binding) {
1110          .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
1111          .index = UINT32_MAX,
1112       };
1113       num_rt_bindings = 1;
1114    }
1115 
1116    assert(num_rt_bindings <= MAX_RTS);
1117    assert(stage->bind_map.surface_count == 0);
1118    typed_memcpy(stage->bind_map.surface_to_descriptor,
1119                 rt_bindings, num_rt_bindings);
1120    stage->bind_map.surface_count += num_rt_bindings;
1121 }
1122 
1123 static void
anv_pipeline_compile_fs(const struct brw_compiler *compiler, void *mem_ctx, struct anv_device *device, struct anv_pipeline_stage *fs_stage, struct anv_pipeline_stage *prev_stage)1124 anv_pipeline_compile_fs(const struct brw_compiler *compiler,
1125                         void *mem_ctx,
1126                         struct anv_device *device,
1127                         struct anv_pipeline_stage *fs_stage,
1128                         struct anv_pipeline_stage *prev_stage)
1129 {
1130    /* TODO: we could set this to 0 based on the information in nir_shader, but
1131     * we need this before we call spirv_to_nir.
1132     */
1133    assert(prev_stage);
1134 
1135    struct brw_compile_fs_params params = {
1136       .nir = fs_stage->nir,
1137       .key = &fs_stage->key.wm,
1138       .prog_data = &fs_stage->prog_data.wm,
1139 
1140       .allow_spilling = true,
1141       .stats = fs_stage->stats,
1142       .log_data = device,
1143    };
1144 
1145    if (prev_stage->stage == MESA_SHADER_MESH) {
1146       params.mue_map = &prev_stage->prog_data.mesh.map;
1147       /* TODO(mesh): Slots valid, do we even use/rely on it? */
1148    } else {
1149       fs_stage->key.wm.input_slots_valid =
1150          prev_stage->prog_data.vue.vue_map.slots_valid;
1151    }
1152 
1153    fs_stage->code = brw_compile_fs(compiler, mem_ctx, &params);
1154 
1155    fs_stage->num_stats = (uint32_t)fs_stage->prog_data.wm.dispatch_8 +
1156                          (uint32_t)fs_stage->prog_data.wm.dispatch_16 +
1157                          (uint32_t)fs_stage->prog_data.wm.dispatch_32;
1158 }
1159 
1160 static void
anv_pipeline_add_executable(struct anv_pipeline *pipeline, struct anv_pipeline_stage *stage, struct brw_compile_stats *stats, uint32_t code_offset)1161 anv_pipeline_add_executable(struct anv_pipeline *pipeline,
1162                             struct anv_pipeline_stage *stage,
1163                             struct brw_compile_stats *stats,
1164                             uint32_t code_offset)
1165 {
1166    char *nir = NULL;
1167    if (stage->nir &&
1168        (pipeline->flags &
1169         VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)) {
1170       nir = nir_shader_as_str(stage->nir, pipeline->mem_ctx);
1171    }
1172 
1173    char *disasm = NULL;
1174    if (stage->code &&
1175        (pipeline->flags &
1176         VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)) {
1177       char *stream_data = NULL;
1178       size_t stream_size = 0;
1179       FILE *stream = open_memstream(&stream_data, &stream_size);
1180 
1181       uint32_t push_size = 0;
1182       for (unsigned i = 0; i < 4; i++)
1183          push_size += stage->bind_map.push_ranges[i].length;
1184       if (push_size > 0) {
1185          fprintf(stream, "Push constant ranges:\n");
1186          for (unsigned i = 0; i < 4; i++) {
1187             if (stage->bind_map.push_ranges[i].length == 0)
1188                continue;
1189 
1190             fprintf(stream, "    RANGE%d (%dB): ", i,
1191                     stage->bind_map.push_ranges[i].length * 32);
1192 
1193             switch (stage->bind_map.push_ranges[i].set) {
1194             case ANV_DESCRIPTOR_SET_NULL:
1195                fprintf(stream, "NULL");
1196                break;
1197 
1198             case ANV_DESCRIPTOR_SET_PUSH_CONSTANTS:
1199                fprintf(stream, "Vulkan push constants and API params");
1200                break;
1201 
1202             case ANV_DESCRIPTOR_SET_DESCRIPTORS:
1203                fprintf(stream, "Descriptor buffer for set %d (start=%dB)",
1204                        stage->bind_map.push_ranges[i].index,
1205                        stage->bind_map.push_ranges[i].start * 32);
1206                break;
1207 
1208             case ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS:
1209                unreachable("gl_NumWorkgroups is never pushed");
1210 
1211             case ANV_DESCRIPTOR_SET_SHADER_CONSTANTS:
1212                fprintf(stream, "Inline shader constant data (start=%dB)",
1213                        stage->bind_map.push_ranges[i].start * 32);
1214                break;
1215 
1216             case ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS:
1217                unreachable("Color attachments can't be pushed");
1218 
1219             default:
1220                fprintf(stream, "UBO (set=%d binding=%d start=%dB)",
1221                        stage->bind_map.push_ranges[i].set,
1222                        stage->bind_map.push_ranges[i].index,
1223                        stage->bind_map.push_ranges[i].start * 32);
1224                break;
1225             }
1226             fprintf(stream, "\n");
1227          }
1228          fprintf(stream, "\n");
1229       }
1230 
1231       /* Creating this is far cheaper than it looks.  It's perfectly fine to
1232        * do it for every binary.
1233        */
1234       intel_disassemble(&pipeline->device->physical->compiler->isa,
1235                         stage->code, code_offset, stream);
1236 
1237       fclose(stream);
1238 
1239       /* Copy it to a ralloc'd thing */
1240       disasm = ralloc_size(pipeline->mem_ctx, stream_size + 1);
1241       memcpy(disasm, stream_data, stream_size);
1242       disasm[stream_size] = 0;
1243 
1244       free(stream_data);
1245    }
1246 
1247    const struct anv_pipeline_executable exe = {
1248       .stage = stage->stage,
1249       .stats = *stats,
1250       .nir = nir,
1251       .disasm = disasm,
1252    };
1253    util_dynarray_append(&pipeline->executables,
1254                         struct anv_pipeline_executable, exe);
1255 }
1256 
1257 static void
anv_pipeline_add_executables(struct anv_pipeline *pipeline, struct anv_pipeline_stage *stage, struct anv_shader_bin *bin)1258 anv_pipeline_add_executables(struct anv_pipeline *pipeline,
1259                              struct anv_pipeline_stage *stage,
1260                              struct anv_shader_bin *bin)
1261 {
1262    if (stage->stage == MESA_SHADER_FRAGMENT) {
1263       /* We pull the prog data and stats out of the anv_shader_bin because
1264        * the anv_pipeline_stage may not be fully populated if we successfully
1265        * looked up the shader in a cache.
1266        */
1267       const struct brw_wm_prog_data *wm_prog_data =
1268          (const struct brw_wm_prog_data *)bin->prog_data;
1269       struct brw_compile_stats *stats = bin->stats;
1270 
1271       if (wm_prog_data->dispatch_8) {
1272          anv_pipeline_add_executable(pipeline, stage, stats++, 0);
1273       }
1274 
1275       if (wm_prog_data->dispatch_16) {
1276          anv_pipeline_add_executable(pipeline, stage, stats++,
1277                                      wm_prog_data->prog_offset_16);
1278       }
1279 
1280       if (wm_prog_data->dispatch_32) {
1281          anv_pipeline_add_executable(pipeline, stage, stats++,
1282                                      wm_prog_data->prog_offset_32);
1283       }
1284    } else {
1285       anv_pipeline_add_executable(pipeline, stage, bin->stats, 0);
1286    }
1287 
1288    pipeline->ray_queries = MAX2(pipeline->ray_queries, bin->prog_data->ray_queries);
1289 }
1290 
1291 static void
anv_pipeline_init_from_cached_graphics(struct anv_graphics_pipeline *pipeline)1292 anv_pipeline_init_from_cached_graphics(struct anv_graphics_pipeline *pipeline)
1293 {
1294    /* TODO: Cache this pipeline-wide information. */
1295 
1296    if (anv_pipeline_is_primitive(pipeline)) {
1297       /* Primitive replication depends on information from all the shaders.
1298        * Recover this bit from the fact that we have more than one position slot
1299        * in the vertex shader when using it.
1300        */
1301       assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT);
1302       int pos_slots = 0;
1303       const struct brw_vue_prog_data *vue_prog_data =
1304          (const void *) pipeline->shaders[MESA_SHADER_VERTEX]->prog_data;
1305       const struct brw_vue_map *vue_map = &vue_prog_data->vue_map;
1306       for (int i = 0; i < vue_map->num_slots; i++) {
1307          if (vue_map->slot_to_varying[i] == VARYING_SLOT_POS)
1308             pos_slots++;
1309       }
1310       pipeline->use_primitive_replication = pos_slots > 1;
1311    }
1312 }
1313 
1314 static void
anv_graphics_pipeline_init_keys(struct anv_graphics_pipeline *pipeline, const struct vk_graphics_pipeline_state *state, struct anv_pipeline_stage *stages)1315 anv_graphics_pipeline_init_keys(struct anv_graphics_pipeline *pipeline,
1316                                 const struct vk_graphics_pipeline_state *state,
1317                                 struct anv_pipeline_stage *stages)
1318 {
1319    for (uint32_t s = 0; s < ANV_GRAPHICS_SHADER_STAGE_COUNT; s++) {
1320       if (!stages[s].info)
1321          continue;
1322 
1323       int64_t stage_start = os_time_get_nano();
1324 
1325       vk_pipeline_hash_shader_stage(stages[s].info, stages[s].shader_sha1);
1326 
1327       const struct anv_device *device = pipeline->base.device;
1328       switch (stages[s].stage) {
1329       case MESA_SHADER_VERTEX:
1330          populate_vs_prog_key(device,
1331                               pipeline->base.device->robust_buffer_access,
1332                               &stages[s].key.vs);
1333          break;
1334       case MESA_SHADER_TESS_CTRL:
1335          populate_tcs_prog_key(device,
1336                                pipeline->base.device->robust_buffer_access,
1337                                state->ts->patch_control_points,
1338                                &stages[s].key.tcs);
1339          break;
1340       case MESA_SHADER_TESS_EVAL:
1341          populate_tes_prog_key(device,
1342                                pipeline->base.device->robust_buffer_access,
1343                                &stages[s].key.tes);
1344          break;
1345       case MESA_SHADER_GEOMETRY:
1346          populate_gs_prog_key(device,
1347                               pipeline->base.device->robust_buffer_access,
1348                               &stages[s].key.gs);
1349          break;
1350       case MESA_SHADER_FRAGMENT: {
1351          populate_wm_prog_key(pipeline,
1352                               pipeline->base.device->robust_buffer_access,
1353                               state->dynamic, state->ms, state->fsr, state->rp,
1354                               &stages[s].key.wm);
1355          break;
1356       }
1357       case MESA_SHADER_TASK:
1358          populate_task_prog_key(device,
1359                                 pipeline->base.device->robust_buffer_access,
1360                                 &stages[s].key.task);
1361          break;
1362       case MESA_SHADER_MESH:
1363          populate_mesh_prog_key(device,
1364                                 pipeline->base.device->robust_buffer_access,
1365                                 &stages[s].key.mesh);
1366          break;
1367       default:
1368          unreachable("Invalid graphics shader stage");
1369       }
1370 
1371       stages[s].feedback.duration += os_time_get_nano() - stage_start;
1372       stages[s].feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
1373    }
1374 
1375    assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT ||
1376           pipeline->active_stages & VK_SHADER_STAGE_MESH_BIT_NV);
1377 }
1378 
1379 static bool
anv_graphics_pipeline_load_cached_shaders(struct anv_graphics_pipeline *pipeline, struct vk_pipeline_cache *cache, struct anv_pipeline_stage *stages, VkPipelineCreationFeedbackEXT *pipeline_feedback)1380 anv_graphics_pipeline_load_cached_shaders(struct anv_graphics_pipeline *pipeline,
1381                                           struct vk_pipeline_cache *cache,
1382                                           struct anv_pipeline_stage *stages,
1383                                           VkPipelineCreationFeedbackEXT *pipeline_feedback)
1384 {
1385    unsigned found = 0;
1386    unsigned cache_hits = 0;
1387    for (unsigned s = 0; s < ANV_GRAPHICS_SHADER_STAGE_COUNT; s++) {
1388       if (!stages[s].info)
1389          continue;
1390 
1391       int64_t stage_start = os_time_get_nano();
1392 
1393       bool cache_hit;
1394       struct anv_shader_bin *bin =
1395          anv_device_search_for_kernel(pipeline->base.device, cache,
1396                                       &stages[s].cache_key,
1397                                       sizeof(stages[s].cache_key), &cache_hit);
1398       if (bin) {
1399          found++;
1400          pipeline->shaders[s] = bin;
1401       }
1402 
1403       if (cache_hit) {
1404          cache_hits++;
1405          stages[s].feedback.flags |=
1406             VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
1407       }
1408       stages[s].feedback.duration += os_time_get_nano() - stage_start;
1409    }
1410 
1411    if (found == __builtin_popcount(pipeline->active_stages)) {
1412       if (cache_hits == found) {
1413          pipeline_feedback->flags |=
1414             VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
1415       }
1416       /* We found all our shaders in the cache.  We're done. */
1417       for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1418          if (!stages[s].info)
1419             continue;
1420 
1421          anv_pipeline_add_executables(&pipeline->base, &stages[s],
1422                                       pipeline->shaders[s]);
1423       }
1424       anv_pipeline_init_from_cached_graphics(pipeline);
1425       return true;
1426    } else if (found > 0) {
1427       /* We found some but not all of our shaders. This shouldn't happen most
1428        * of the time but it can if we have a partially populated pipeline
1429        * cache.
1430        */
1431       assert(found < __builtin_popcount(pipeline->active_stages));
1432 
1433       vk_perf(VK_LOG_OBJS(cache ? &cache->base :
1434                                   &pipeline->base.device->vk.base),
1435               "Found a partial pipeline in the cache.  This is "
1436               "most likely caused by an incomplete pipeline cache "
1437               "import or export");
1438 
1439       /* We're going to have to recompile anyway, so just throw away our
1440        * references to the shaders in the cache.  We'll get them out of the
1441        * cache again as part of the compilation process.
1442        */
1443       for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1444          stages[s].feedback.flags = 0;
1445          if (pipeline->shaders[s]) {
1446             anv_shader_bin_unref(pipeline->base.device, pipeline->shaders[s]);
1447             pipeline->shaders[s] = NULL;
1448          }
1449       }
1450    }
1451 
1452    return false;
1453 }
1454 
1455 static const gl_shader_stage graphics_shader_order[] = {
1456    MESA_SHADER_VERTEX,
1457    MESA_SHADER_TESS_CTRL,
1458    MESA_SHADER_TESS_EVAL,
1459    MESA_SHADER_GEOMETRY,
1460 
1461    MESA_SHADER_TASK,
1462    MESA_SHADER_MESH,
1463 
1464    MESA_SHADER_FRAGMENT,
1465 };
1466 
1467 static VkResult
anv_graphics_pipeline_load_nir(struct anv_graphics_pipeline *pipeline, struct vk_pipeline_cache *cache, struct anv_pipeline_stage *stages, void *pipeline_ctx)1468 anv_graphics_pipeline_load_nir(struct anv_graphics_pipeline *pipeline,
1469                                struct vk_pipeline_cache *cache,
1470                                struct anv_pipeline_stage *stages,
1471                                void *pipeline_ctx)
1472 {
1473    for (unsigned i = 0; i < ARRAY_SIZE(graphics_shader_order); i++) {
1474       gl_shader_stage s = graphics_shader_order[i];
1475       if (!stages[s].info)
1476          continue;
1477 
1478       int64_t stage_start = os_time_get_nano();
1479 
1480       assert(stages[s].stage == s);
1481       assert(pipeline->shaders[s] == NULL);
1482 
1483       stages[s].bind_map = (struct anv_pipeline_bind_map) {
1484          .surface_to_descriptor = stages[s].surface_to_descriptor,
1485          .sampler_to_descriptor = stages[s].sampler_to_descriptor
1486       };
1487 
1488       stages[s].nir = anv_pipeline_stage_get_nir(&pipeline->base, cache,
1489                                                  pipeline_ctx,
1490                                                  &stages[s]);
1491       if (stages[s].nir == NULL) {
1492          return vk_error(pipeline, VK_ERROR_UNKNOWN);
1493       }
1494 
1495       stages[s].feedback.duration += os_time_get_nano() - stage_start;
1496    }
1497 
1498    return VK_SUCCESS;
1499 }
1500 
1501 static VkResult
anv_graphics_pipeline_compile(struct anv_graphics_pipeline *pipeline, struct vk_pipeline_cache *cache, const VkGraphicsPipelineCreateInfo *info, const struct vk_graphics_pipeline_state *state)1502 anv_graphics_pipeline_compile(struct anv_graphics_pipeline *pipeline,
1503                               struct vk_pipeline_cache *cache,
1504                               const VkGraphicsPipelineCreateInfo *info,
1505                               const struct vk_graphics_pipeline_state *state)
1506 {
1507    ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
1508    VkResult result;
1509 
1510    VkPipelineCreationFeedbackEXT pipeline_feedback = {
1511       .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
1512    };
1513    int64_t pipeline_start = os_time_get_nano();
1514 
1515    const struct brw_compiler *compiler = pipeline->base.device->physical->compiler;
1516    struct anv_pipeline_stage stages[ANV_GRAPHICS_SHADER_STAGE_COUNT] = {};
1517    for (uint32_t i = 0; i < info->stageCount; i++) {
1518       gl_shader_stage stage = vk_to_mesa_shader_stage(info->pStages[i].stage);
1519       stages[stage].stage = stage;
1520       stages[stage].info = &info->pStages[i];
1521    }
1522 
1523    anv_graphics_pipeline_init_keys(pipeline, state, stages);
1524 
1525    unsigned char sha1[20];
1526    anv_pipeline_hash_graphics(pipeline, layout, stages, sha1);
1527 
1528    for (unsigned s = 0; s < ARRAY_SIZE(stages); s++) {
1529       if (!stages[s].info)
1530          continue;
1531 
1532       stages[s].cache_key.stage = s;
1533       memcpy(stages[s].cache_key.sha1, sha1, sizeof(sha1));
1534    }
1535 
1536    const bool skip_cache_lookup =
1537       (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
1538    if (!skip_cache_lookup) {
1539       bool found_all_shaders =
1540          anv_graphics_pipeline_load_cached_shaders(pipeline, cache, stages,
1541                                                    &pipeline_feedback);
1542       if (found_all_shaders)
1543          goto done;
1544    }
1545 
1546    if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT)
1547       return VK_PIPELINE_COMPILE_REQUIRED;
1548 
1549    void *pipeline_ctx = ralloc_context(NULL);
1550 
1551    result = anv_graphics_pipeline_load_nir(pipeline, cache, stages,
1552                                            pipeline_ctx);
1553    if (result != VK_SUCCESS)
1554       goto fail;
1555 
1556    /* Walk backwards to link */
1557    struct anv_pipeline_stage *next_stage = NULL;
1558    for (int i = ARRAY_SIZE(graphics_shader_order) - 1; i >= 0; i--) {
1559       gl_shader_stage s = graphics_shader_order[i];
1560       if (!stages[s].info)
1561          continue;
1562 
1563       switch (s) {
1564       case MESA_SHADER_VERTEX:
1565          anv_pipeline_link_vs(compiler, &stages[s], next_stage);
1566          break;
1567       case MESA_SHADER_TESS_CTRL:
1568          anv_pipeline_link_tcs(compiler, &stages[s], next_stage);
1569          break;
1570       case MESA_SHADER_TESS_EVAL:
1571          anv_pipeline_link_tes(compiler, &stages[s], next_stage);
1572          break;
1573       case MESA_SHADER_GEOMETRY:
1574          anv_pipeline_link_gs(compiler, &stages[s], next_stage);
1575          break;
1576       case MESA_SHADER_TASK:
1577          anv_pipeline_link_task(compiler, &stages[s], next_stage);
1578          break;
1579       case MESA_SHADER_MESH:
1580          anv_pipeline_link_mesh(compiler, &stages[s], next_stage);
1581          break;
1582       case MESA_SHADER_FRAGMENT:
1583          anv_pipeline_link_fs(compiler, &stages[s], state->rp);
1584          break;
1585       default:
1586          unreachable("Invalid graphics shader stage");
1587       }
1588 
1589       next_stage = &stages[s];
1590    }
1591 
1592    if (pipeline->base.device->info.ver >= 12 &&
1593        pipeline->view_mask != 0) {
1594       /* For some pipelines HW Primitive Replication can be used instead of
1595        * instancing to implement Multiview.  This depend on how viewIndex is
1596        * used in all the active shaders, so this check can't be done per
1597        * individual shaders.
1598        */
1599       nir_shader *shaders[ANV_GRAPHICS_SHADER_STAGE_COUNT] = {};
1600       for (unsigned s = 0; s < ARRAY_SIZE(shaders); s++)
1601          shaders[s] = stages[s].nir;
1602 
1603       pipeline->use_primitive_replication =
1604          anv_check_for_primitive_replication(shaders, pipeline);
1605    } else {
1606       pipeline->use_primitive_replication = false;
1607    }
1608 
1609    struct anv_pipeline_stage *prev_stage = NULL;
1610    for (unsigned i = 0; i < ARRAY_SIZE(graphics_shader_order); i++) {
1611       gl_shader_stage s = graphics_shader_order[i];
1612       if (!stages[s].info)
1613          continue;
1614 
1615       int64_t stage_start = os_time_get_nano();
1616 
1617       void *stage_ctx = ralloc_context(NULL);
1618 
1619       anv_pipeline_lower_nir(&pipeline->base, stage_ctx, &stages[s], layout);
1620 
1621       if (prev_stage && compiler->nir_options[s]->unify_interfaces) {
1622          prev_stage->nir->info.outputs_written |= stages[s].nir->info.inputs_read &
1623                   ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
1624          stages[s].nir->info.inputs_read |= prev_stage->nir->info.outputs_written &
1625                   ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
1626          prev_stage->nir->info.patch_outputs_written |= stages[s].nir->info.patch_inputs_read;
1627          stages[s].nir->info.patch_inputs_read |= prev_stage->nir->info.patch_outputs_written;
1628       }
1629 
1630       ralloc_free(stage_ctx);
1631 
1632       stages[s].feedback.duration += os_time_get_nano() - stage_start;
1633 
1634       prev_stage = &stages[s];
1635    }
1636 
1637    /* In the case the platform can write the primitive variable shading rate,
1638     * figure out the last geometry stage that should write the primitive
1639     * shading rate, and ensure it is marked as used there. The backend will
1640     * write a default value if the shader doesn't actually write it.
1641     *
1642     * We iterate backwards in the stage and stop on the first shader that can
1643     * set the value.
1644     */
1645    const struct intel_device_info *devinfo = &pipeline->base.device->info;
1646    if (devinfo->has_coarse_pixel_primitive_and_cb &&
1647        stages[MESA_SHADER_FRAGMENT].info &&
1648        stages[MESA_SHADER_FRAGMENT].key.wm.coarse_pixel &&
1649        !stages[MESA_SHADER_FRAGMENT].nir->info.fs.uses_sample_shading &&
1650        stages[MESA_SHADER_MESH].info == NULL) {
1651       struct anv_pipeline_stage *last_psr = NULL;
1652 
1653       for (unsigned i = 0; i < ARRAY_SIZE(graphics_shader_order); i++) {
1654          gl_shader_stage s =
1655             graphics_shader_order[ARRAY_SIZE(graphics_shader_order) - i - 1];
1656 
1657          if (!stages[s].info ||
1658              !gl_shader_stage_can_set_fragment_shading_rate(s))
1659             continue;
1660 
1661          last_psr = &stages[s];
1662          break;
1663       }
1664 
1665       assert(last_psr);
1666       last_psr->nir->info.outputs_written |= VARYING_BIT_PRIMITIVE_SHADING_RATE;
1667    }
1668 
1669    prev_stage = NULL;
1670    for (unsigned i = 0; i < ARRAY_SIZE(graphics_shader_order); i++) {
1671       gl_shader_stage s = graphics_shader_order[i];
1672       if (!stages[s].info)
1673          continue;
1674 
1675       int64_t stage_start = os_time_get_nano();
1676 
1677       void *stage_ctx = ralloc_context(NULL);
1678 
1679       switch (s) {
1680       case MESA_SHADER_VERTEX:
1681          anv_pipeline_compile_vs(compiler, stage_ctx, pipeline,
1682                                  &stages[s]);
1683          break;
1684       case MESA_SHADER_TESS_CTRL:
1685          anv_pipeline_compile_tcs(compiler, stage_ctx, pipeline->base.device,
1686                                   &stages[s], prev_stage);
1687          break;
1688       case MESA_SHADER_TESS_EVAL:
1689          anv_pipeline_compile_tes(compiler, stage_ctx, pipeline->base.device,
1690                                   &stages[s], prev_stage);
1691          break;
1692       case MESA_SHADER_GEOMETRY:
1693          anv_pipeline_compile_gs(compiler, stage_ctx, pipeline->base.device,
1694                                  &stages[s], prev_stage);
1695          break;
1696       case MESA_SHADER_TASK:
1697          anv_pipeline_compile_task(compiler, stage_ctx, pipeline->base.device,
1698                                    &stages[s]);
1699          break;
1700       case MESA_SHADER_MESH:
1701          anv_pipeline_compile_mesh(compiler, stage_ctx, pipeline->base.device,
1702                                    &stages[s], prev_stage);
1703          break;
1704       case MESA_SHADER_FRAGMENT:
1705          anv_pipeline_compile_fs(compiler, stage_ctx, pipeline->base.device,
1706                                  &stages[s], prev_stage);
1707          break;
1708       default:
1709          unreachable("Invalid graphics shader stage");
1710       }
1711       if (stages[s].code == NULL) {
1712          ralloc_free(stage_ctx);
1713          result = vk_error(pipeline->base.device, VK_ERROR_OUT_OF_HOST_MEMORY);
1714          goto fail;
1715       }
1716 
1717       anv_nir_validate_push_layout(&stages[s].prog_data.base,
1718                                    &stages[s].bind_map);
1719 
1720       struct anv_shader_bin *bin =
1721          anv_device_upload_kernel(pipeline->base.device, cache, s,
1722                                   &stages[s].cache_key,
1723                                   sizeof(stages[s].cache_key),
1724                                   stages[s].code,
1725                                   stages[s].prog_data.base.program_size,
1726                                   &stages[s].prog_data.base,
1727                                   brw_prog_data_size(s),
1728                                   stages[s].stats, stages[s].num_stats,
1729                                   stages[s].nir->xfb_info,
1730                                   &stages[s].bind_map);
1731       if (!bin) {
1732          ralloc_free(stage_ctx);
1733          result = vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
1734          goto fail;
1735       }
1736 
1737       anv_pipeline_add_executables(&pipeline->base, &stages[s], bin);
1738 
1739       pipeline->shaders[s] = bin;
1740       ralloc_free(stage_ctx);
1741 
1742       stages[s].feedback.duration += os_time_get_nano() - stage_start;
1743 
1744       prev_stage = &stages[s];
1745    }
1746 
1747    ralloc_free(pipeline_ctx);
1748 
1749 done:
1750 
1751    pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
1752 
1753    const VkPipelineCreationFeedbackCreateInfo *create_feedback =
1754       vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
1755    if (create_feedback) {
1756       *create_feedback->pPipelineCreationFeedback = pipeline_feedback;
1757 
1758       uint32_t stage_count = create_feedback->pipelineStageCreationFeedbackCount;
1759       assert(stage_count == 0 || info->stageCount == stage_count);
1760       for (uint32_t i = 0; i < stage_count; i++) {
1761          gl_shader_stage s = vk_to_mesa_shader_stage(info->pStages[i].stage);
1762          create_feedback->pPipelineStageCreationFeedbacks[i] = stages[s].feedback;
1763       }
1764    }
1765 
1766    return VK_SUCCESS;
1767 
1768 fail:
1769    ralloc_free(pipeline_ctx);
1770 
1771    for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1772       if (pipeline->shaders[s])
1773          anv_shader_bin_unref(pipeline->base.device, pipeline->shaders[s]);
1774    }
1775 
1776    return result;
1777 }
1778 
1779 static VkResult
anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline, struct vk_pipeline_cache *cache, const VkComputePipelineCreateInfo *info)1780 anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
1781                         struct vk_pipeline_cache *cache,
1782                         const VkComputePipelineCreateInfo *info)
1783 {
1784    const VkPipelineShaderStageCreateInfo *sinfo = &info->stage;
1785    assert(sinfo->stage == VK_SHADER_STAGE_COMPUTE_BIT);
1786 
1787    VkPipelineCreationFeedback pipeline_feedback = {
1788       .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
1789    };
1790    int64_t pipeline_start = os_time_get_nano();
1791 
1792    struct anv_device *device = pipeline->base.device;
1793    const struct brw_compiler *compiler = device->physical->compiler;
1794 
1795    struct anv_pipeline_stage stage = {
1796       .stage = MESA_SHADER_COMPUTE,
1797       .info = &info->stage,
1798       .cache_key = {
1799          .stage = MESA_SHADER_COMPUTE,
1800       },
1801       .feedback = {
1802          .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
1803       },
1804    };
1805    vk_pipeline_hash_shader_stage(&info->stage, stage.shader_sha1);
1806 
1807    struct anv_shader_bin *bin = NULL;
1808 
1809    populate_cs_prog_key(device, device->robust_buffer_access, &stage.key.cs);
1810 
1811    ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
1812 
1813    const bool skip_cache_lookup =
1814       (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
1815 
1816    anv_pipeline_hash_compute(pipeline, layout, &stage, stage.cache_key.sha1);
1817 
1818    bool cache_hit = false;
1819    if (!skip_cache_lookup) {
1820       bin = anv_device_search_for_kernel(device, cache,
1821                                          &stage.cache_key,
1822                                          sizeof(stage.cache_key),
1823                                          &cache_hit);
1824    }
1825 
1826    if (bin == NULL &&
1827        (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT))
1828       return VK_PIPELINE_COMPILE_REQUIRED;
1829 
1830    void *mem_ctx = ralloc_context(NULL);
1831    if (bin == NULL) {
1832       int64_t stage_start = os_time_get_nano();
1833 
1834       stage.bind_map = (struct anv_pipeline_bind_map) {
1835          .surface_to_descriptor = stage.surface_to_descriptor,
1836          .sampler_to_descriptor = stage.sampler_to_descriptor
1837       };
1838 
1839       /* Set up a binding for the gl_NumWorkGroups */
1840       stage.bind_map.surface_count = 1;
1841       stage.bind_map.surface_to_descriptor[0] = (struct anv_pipeline_binding) {
1842          .set = ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS,
1843       };
1844 
1845       stage.nir = anv_pipeline_stage_get_nir(&pipeline->base, cache, mem_ctx, &stage);
1846       if (stage.nir == NULL) {
1847          ralloc_free(mem_ctx);
1848          return vk_error(pipeline, VK_ERROR_UNKNOWN);
1849       }
1850 
1851       NIR_PASS(_, stage.nir, anv_nir_add_base_work_group_id);
1852 
1853       anv_pipeline_lower_nir(&pipeline->base, mem_ctx, &stage, layout);
1854 
1855       unsigned local_size = stage.nir->info.workgroup_size[0] *
1856                             stage.nir->info.workgroup_size[1] *
1857                             stage.nir->info.workgroup_size[2];
1858 
1859       /* Games don't always request full subgroups when they should,
1860        * which can cause bugs, as they may expect bigger size of the
1861        * subgroup than we choose for the execution.
1862        */
1863       if (device->physical->instance->assume_full_subgroups &&
1864           stage.nir->info.cs.uses_wide_subgroup_intrinsics &&
1865           stage.nir->info.subgroup_size == SUBGROUP_SIZE_API_CONSTANT &&
1866           local_size &&
1867           local_size % BRW_SUBGROUP_SIZE == 0)
1868          stage.nir->info.subgroup_size = SUBGROUP_SIZE_FULL_SUBGROUPS;
1869 
1870       /* If the client requests that we dispatch full subgroups but doesn't
1871        * allow us to pick a subgroup size, we have to smash it to the API
1872        * value of 32.  Performance will likely be terrible in this case but
1873        * there's nothing we can do about that.  The client should have chosen
1874        * a size.
1875        */
1876       if (stage.nir->info.subgroup_size == SUBGROUP_SIZE_FULL_SUBGROUPS)
1877          stage.nir->info.subgroup_size = BRW_SUBGROUP_SIZE;
1878 
1879       stage.num_stats = 1;
1880 
1881       struct brw_compile_cs_params params = {
1882          .nir = stage.nir,
1883          .key = &stage.key.cs,
1884          .prog_data = &stage.prog_data.cs,
1885          .stats = stage.stats,
1886          .log_data = device,
1887       };
1888 
1889       stage.code = brw_compile_cs(compiler, mem_ctx, &params);
1890       if (stage.code == NULL) {
1891          ralloc_free(mem_ctx);
1892          return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
1893       }
1894 
1895       anv_nir_validate_push_layout(&stage.prog_data.base, &stage.bind_map);
1896 
1897       if (!stage.prog_data.cs.uses_num_work_groups) {
1898          assert(stage.bind_map.surface_to_descriptor[0].set ==
1899                 ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS);
1900          stage.bind_map.surface_to_descriptor[0].set = ANV_DESCRIPTOR_SET_NULL;
1901       }
1902 
1903       const unsigned code_size = stage.prog_data.base.program_size;
1904       bin = anv_device_upload_kernel(device, cache,
1905                                      MESA_SHADER_COMPUTE,
1906                                      &stage.cache_key, sizeof(stage.cache_key),
1907                                      stage.code, code_size,
1908                                      &stage.prog_data.base,
1909                                      sizeof(stage.prog_data.cs),
1910                                      stage.stats, stage.num_stats,
1911                                      NULL, &stage.bind_map);
1912       if (!bin) {
1913          ralloc_free(mem_ctx);
1914          return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
1915       }
1916 
1917       stage.feedback.duration = os_time_get_nano() - stage_start;
1918    }
1919 
1920    anv_pipeline_add_executables(&pipeline->base, &stage, bin);
1921 
1922    ralloc_free(mem_ctx);
1923 
1924    if (cache_hit) {
1925       stage.feedback.flags |=
1926          VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
1927       pipeline_feedback.flags |=
1928          VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
1929    }
1930    pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
1931 
1932    const VkPipelineCreationFeedbackCreateInfo *create_feedback =
1933       vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
1934    if (create_feedback) {
1935       *create_feedback->pPipelineCreationFeedback = pipeline_feedback;
1936 
1937       if (create_feedback->pipelineStageCreationFeedbackCount) {
1938          assert(create_feedback->pipelineStageCreationFeedbackCount == 1);
1939          create_feedback->pPipelineStageCreationFeedbacks[0] = stage.feedback;
1940       }
1941    }
1942 
1943    pipeline->cs = bin;
1944 
1945    return VK_SUCCESS;
1946 }
1947 
1948 static VkResult
anv_compute_pipeline_create(struct anv_device *device, struct vk_pipeline_cache *cache, const VkComputePipelineCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline)1949 anv_compute_pipeline_create(struct anv_device *device,
1950                             struct vk_pipeline_cache *cache,
1951                             const VkComputePipelineCreateInfo *pCreateInfo,
1952                             const VkAllocationCallbacks *pAllocator,
1953                             VkPipeline *pPipeline)
1954 {
1955    struct anv_compute_pipeline *pipeline;
1956    VkResult result;
1957 
1958    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO);
1959 
1960    pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
1961                          VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1962    if (pipeline == NULL)
1963       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1964 
1965    result = anv_pipeline_init(&pipeline->base, device,
1966                               ANV_PIPELINE_COMPUTE, pCreateInfo->flags,
1967                               pAllocator);
1968    if (result != VK_SUCCESS) {
1969       vk_free2(&device->vk.alloc, pAllocator, pipeline);
1970       return result;
1971    }
1972 
1973    anv_batch_set_storage(&pipeline->base.batch, ANV_NULL_ADDRESS,
1974                          pipeline->batch_data, sizeof(pipeline->batch_data));
1975 
1976    result = anv_pipeline_compile_cs(pipeline, cache, pCreateInfo);
1977    if (result != VK_SUCCESS) {
1978       anv_pipeline_finish(&pipeline->base, device, pAllocator);
1979       vk_free2(&device->vk.alloc, pAllocator, pipeline);
1980       return result;
1981    }
1982 
1983    anv_genX(&device->info, compute_pipeline_emit)(pipeline);
1984 
1985    *pPipeline = anv_pipeline_to_handle(&pipeline->base);
1986 
1987    return pipeline->base.batch.status;
1988 }
1989 
anv_CreateComputePipelines( VkDevice _device, VkPipelineCache pipelineCache, uint32_t count, const VkComputePipelineCreateInfo* pCreateInfos, const VkAllocationCallbacks* pAllocator, VkPipeline* pPipelines)1990 VkResult anv_CreateComputePipelines(
1991     VkDevice                                    _device,
1992     VkPipelineCache                             pipelineCache,
1993     uint32_t                                    count,
1994     const VkComputePipelineCreateInfo*          pCreateInfos,
1995     const VkAllocationCallbacks*                pAllocator,
1996     VkPipeline*                                 pPipelines)
1997 {
1998    ANV_FROM_HANDLE(anv_device, device, _device);
1999    ANV_FROM_HANDLE(vk_pipeline_cache, pipeline_cache, pipelineCache);
2000 
2001    VkResult result = VK_SUCCESS;
2002 
2003    unsigned i;
2004    for (i = 0; i < count; i++) {
2005       VkResult res = anv_compute_pipeline_create(device, pipeline_cache,
2006                                                  &pCreateInfos[i],
2007                                                  pAllocator, &pPipelines[i]);
2008 
2009       if (res == VK_SUCCESS)
2010          continue;
2011 
2012       /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED as it
2013        * is not obvious what error should be report upon 2 different failures.
2014        * */
2015       result = res;
2016       if (res != VK_PIPELINE_COMPILE_REQUIRED)
2017          break;
2018 
2019       pPipelines[i] = VK_NULL_HANDLE;
2020 
2021       if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
2022          break;
2023    }
2024 
2025    for (; i < count; i++)
2026       pPipelines[i] = VK_NULL_HANDLE;
2027 
2028    return result;
2029 }
2030 
2031 /**
2032  * Calculate the desired L3 partitioning based on the current state of the
2033  * pipeline.  For now this simply returns the conservative defaults calculated
2034  * by get_default_l3_weights(), but we could probably do better by gathering
2035  * more statistics from the pipeline state (e.g. guess of expected URB usage
2036  * and bound surfaces), or by using feed-back from performance counters.
2037  */
2038 void
anv_pipeline_setup_l3_config(struct anv_pipeline *pipeline, bool needs_slm)2039 anv_pipeline_setup_l3_config(struct anv_pipeline *pipeline, bool needs_slm)
2040 {
2041    const struct intel_device_info *devinfo = &pipeline->device->info;
2042 
2043    const struct intel_l3_weights w =
2044       intel_get_default_l3_weights(devinfo, true, needs_slm);
2045 
2046    pipeline->l3_config = intel_get_l3_config(devinfo, w);
2047 }
2048 
2049 static VkResult
anv_graphics_pipeline_init(struct anv_graphics_pipeline *pipeline, struct anv_device *device, struct vk_pipeline_cache *cache, const struct VkGraphicsPipelineCreateInfo *pCreateInfo, const struct vk_graphics_pipeline_state *state, const VkAllocationCallbacks *alloc)2050 anv_graphics_pipeline_init(struct anv_graphics_pipeline *pipeline,
2051                            struct anv_device *device,
2052                            struct vk_pipeline_cache *cache,
2053                            const struct VkGraphicsPipelineCreateInfo *pCreateInfo,
2054                            const struct vk_graphics_pipeline_state *state,
2055                            const VkAllocationCallbacks *alloc)
2056 {
2057    VkResult result;
2058 
2059    result = anv_pipeline_init(&pipeline->base, device,
2060                               ANV_PIPELINE_GRAPHICS, pCreateInfo->flags,
2061                               alloc);
2062    if (result != VK_SUCCESS)
2063       return result;
2064 
2065    anv_batch_set_storage(&pipeline->base.batch, ANV_NULL_ADDRESS,
2066                          pipeline->batch_data, sizeof(pipeline->batch_data));
2067 
2068    pipeline->active_stages = 0;
2069    for (uint32_t i = 0; i < pCreateInfo->stageCount; i++)
2070       pipeline->active_stages |= pCreateInfo->pStages[i].stage;
2071 
2072    if (pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
2073       pipeline->active_stages |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
2074 
2075    if (anv_pipeline_is_mesh(pipeline))
2076       assert(device->physical->vk.supported_extensions.NV_mesh_shader);
2077 
2078    pipeline->dynamic_state.ms.sample_locations = &pipeline->sample_locations;
2079    vk_dynamic_graphics_state_fill(&pipeline->dynamic_state, state);
2080 
2081    pipeline->depth_clamp_enable = state->rs->depth_clamp_enable;
2082    pipeline->depth_clip_enable = state->rs->depth_clip_enable;
2083    pipeline->view_mask = state->rp->view_mask;
2084 
2085    result = anv_graphics_pipeline_compile(pipeline, cache, pCreateInfo, state);
2086    if (result != VK_SUCCESS) {
2087       anv_pipeline_finish(&pipeline->base, device, alloc);
2088       return result;
2089    }
2090 
2091    anv_pipeline_setup_l3_config(&pipeline->base, false);
2092 
2093    if (anv_pipeline_is_primitive(pipeline)) {
2094       const uint64_t inputs_read = get_vs_prog_data(pipeline)->inputs_read;
2095 
2096       u_foreach_bit(a, state->vi->attributes_valid) {
2097          if (inputs_read & BITFIELD64_BIT(VERT_ATTRIB_GENERIC0 + a))
2098             pipeline->vb_used |= BITFIELD64_BIT(state->vi->attributes[a].binding);
2099       }
2100 
2101       u_foreach_bit(b, state->vi->bindings_valid) {
2102          pipeline->vb[b].stride = state->vi->bindings[b].stride;
2103          pipeline->vb[b].instanced = state->vi->bindings[b].input_rate ==
2104                                       VK_VERTEX_INPUT_RATE_INSTANCE;
2105          pipeline->vb[b].instance_divisor = state->vi->bindings[b].divisor;
2106       }
2107 
2108       /* Our implementation of VK_KHR_multiview uses instancing to draw the
2109        * different views.  If the client asks for instancing, we need to multiply
2110        * the instance divisor by the number of views ensure that we repeat the
2111        * client's per-instance data once for each view.
2112        */
2113       pipeline->instance_multiplier = 1;
2114       if (pipeline->view_mask && !pipeline->use_primitive_replication)
2115          pipeline->instance_multiplier = util_bitcount(pipeline->view_mask);
2116    } else {
2117       assert(anv_pipeline_is_mesh(pipeline));
2118       /* TODO(mesh): Mesh vs. Multiview with Instancing. */
2119    }
2120 
2121    pipeline->negative_one_to_one =
2122       state->vp != NULL && state->vp->negative_one_to_one;
2123 
2124    /* Store line mode, polygon mode and rasterization samples, these are used
2125     * for dynamic primitive topology.
2126     */
2127    pipeline->polygon_mode = state->rs->polygon_mode;
2128    pipeline->rasterization_samples =
2129       state->ms != NULL ? state->ms->rasterization_samples : 1;
2130    pipeline->line_mode = state->rs->line.mode;
2131    if (pipeline->line_mode == VK_LINE_RASTERIZATION_MODE_DEFAULT_EXT) {
2132       if (pipeline->rasterization_samples > 1) {
2133          pipeline->line_mode = VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT;
2134       } else {
2135          pipeline->line_mode = VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT;
2136       }
2137    }
2138    pipeline->patch_control_points =
2139       state->ts != NULL ? state->ts->patch_control_points : 0;
2140 
2141    /* Store the color write masks, to be merged with color write enable if
2142     * dynamic.
2143     */
2144    if (state->cb != NULL) {
2145       for (unsigned i = 0; i < state->cb->attachment_count; i++)
2146          pipeline->color_comp_writes[i] = state->cb->attachments[i].write_mask;
2147    }
2148 
2149    return VK_SUCCESS;
2150 }
2151 
2152 static VkResult
anv_graphics_pipeline_create(struct anv_device *device, struct vk_pipeline_cache *cache, const VkGraphicsPipelineCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline)2153 anv_graphics_pipeline_create(struct anv_device *device,
2154                              struct vk_pipeline_cache *cache,
2155                              const VkGraphicsPipelineCreateInfo *pCreateInfo,
2156                              const VkAllocationCallbacks *pAllocator,
2157                              VkPipeline *pPipeline)
2158 {
2159    struct anv_graphics_pipeline *pipeline;
2160    VkResult result;
2161 
2162    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
2163 
2164    pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
2165                          VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2166    if (pipeline == NULL)
2167       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2168 
2169    struct vk_graphics_pipeline_all_state all;
2170    struct vk_graphics_pipeline_state state = { };
2171    result = vk_graphics_pipeline_state_fill(&device->vk, &state, pCreateInfo,
2172                                             NULL /* sp_info */,
2173                                             &all, NULL, 0, NULL);
2174    if (result != VK_SUCCESS) {
2175       vk_free2(&device->vk.alloc, pAllocator, pipeline);
2176       return result;
2177    }
2178 
2179    result = anv_graphics_pipeline_init(pipeline, device, cache,
2180                                        pCreateInfo, &state, pAllocator);
2181    if (result != VK_SUCCESS) {
2182       vk_free2(&device->vk.alloc, pAllocator, pipeline);
2183       return result;
2184    }
2185 
2186    anv_genX(&device->info, graphics_pipeline_emit)(pipeline, &state);
2187 
2188    *pPipeline = anv_pipeline_to_handle(&pipeline->base);
2189 
2190    return pipeline->base.batch.status;
2191 }
2192 
anv_CreateGraphicsPipelines( VkDevice _device, VkPipelineCache pipelineCache, uint32_t count, const VkGraphicsPipelineCreateInfo* pCreateInfos, const VkAllocationCallbacks* pAllocator, VkPipeline* pPipelines)2193 VkResult anv_CreateGraphicsPipelines(
2194     VkDevice                                    _device,
2195     VkPipelineCache                             pipelineCache,
2196     uint32_t                                    count,
2197     const VkGraphicsPipelineCreateInfo*         pCreateInfos,
2198     const VkAllocationCallbacks*                pAllocator,
2199     VkPipeline*                                 pPipelines)
2200 {
2201    ANV_FROM_HANDLE(anv_device, device, _device);
2202    ANV_FROM_HANDLE(vk_pipeline_cache, pipeline_cache, pipelineCache);
2203 
2204    VkResult result = VK_SUCCESS;
2205 
2206    unsigned i;
2207    for (i = 0; i < count; i++) {
2208       VkResult res = anv_graphics_pipeline_create(device,
2209                                                   pipeline_cache,
2210                                                   &pCreateInfos[i],
2211                                                   pAllocator, &pPipelines[i]);
2212 
2213       if (res == VK_SUCCESS)
2214          continue;
2215 
2216       /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED as it
2217        * is not obvious what error should be report upon 2 different failures.
2218        * */
2219       result = res;
2220       if (res != VK_PIPELINE_COMPILE_REQUIRED)
2221          break;
2222 
2223       pPipelines[i] = VK_NULL_HANDLE;
2224 
2225       if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
2226          break;
2227    }
2228 
2229    for (; i < count; i++)
2230       pPipelines[i] = VK_NULL_HANDLE;
2231 
2232    return result;
2233 }
2234 
2235 static VkResult
compile_upload_rt_shader(struct anv_ray_tracing_pipeline *pipeline, struct vk_pipeline_cache *cache, nir_shader *nir, struct anv_pipeline_stage *stage, struct anv_shader_bin **shader_out, void *mem_ctx)2236 compile_upload_rt_shader(struct anv_ray_tracing_pipeline *pipeline,
2237                          struct vk_pipeline_cache *cache,
2238                          nir_shader *nir,
2239                          struct anv_pipeline_stage *stage,
2240                          struct anv_shader_bin **shader_out,
2241                          void *mem_ctx)
2242 {
2243    const struct brw_compiler *compiler =
2244       pipeline->base.device->physical->compiler;
2245    const struct intel_device_info *devinfo = compiler->devinfo;
2246 
2247    nir_shader **resume_shaders = NULL;
2248    uint32_t num_resume_shaders = 0;
2249    if (nir->info.stage != MESA_SHADER_COMPUTE) {
2250       NIR_PASS(_, nir, nir_lower_shader_calls,
2251                nir_address_format_64bit_global,
2252                BRW_BTD_STACK_ALIGN,
2253                &resume_shaders, &num_resume_shaders, mem_ctx);
2254       NIR_PASS(_, nir, brw_nir_lower_shader_calls);
2255       NIR_PASS_V(nir, brw_nir_lower_rt_intrinsics, devinfo);
2256    }
2257 
2258    for (unsigned i = 0; i < num_resume_shaders; i++) {
2259       NIR_PASS(_,resume_shaders[i], brw_nir_lower_shader_calls);
2260       NIR_PASS_V(resume_shaders[i], brw_nir_lower_rt_intrinsics, devinfo);
2261    }
2262 
2263    struct brw_compile_bs_params params = {
2264       .nir = nir,
2265       .key = &stage->key.bs,
2266       .prog_data = &stage->prog_data.bs,
2267       .num_resume_shaders = num_resume_shaders,
2268       .resume_shaders = resume_shaders,
2269 
2270       .stats = stage->stats,
2271       .log_data = pipeline->base.device,
2272    };
2273 
2274    stage->code = brw_compile_bs(compiler, mem_ctx, &params);
2275    if (stage->code == NULL)
2276       return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
2277 
2278    /* Ray-tracing shaders don't have a "real" bind map */
2279    struct anv_pipeline_bind_map empty_bind_map = {};
2280 
2281    const unsigned code_size = stage->prog_data.base.program_size;
2282    struct anv_shader_bin *bin =
2283       anv_device_upload_kernel(pipeline->base.device,
2284                                cache,
2285                                stage->stage,
2286                                &stage->cache_key, sizeof(stage->cache_key),
2287                                stage->code, code_size,
2288                                &stage->prog_data.base,
2289                                sizeof(stage->prog_data.bs),
2290                                stage->stats, 1,
2291                                NULL, &empty_bind_map);
2292    if (bin == NULL)
2293       return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
2294 
2295    /* TODO: Figure out executables for resume shaders */
2296    anv_pipeline_add_executables(&pipeline->base, stage, bin);
2297    util_dynarray_append(&pipeline->shaders, struct anv_shader_bin *, bin);
2298 
2299    *shader_out = bin;
2300 
2301    return VK_SUCCESS;
2302 }
2303 
2304 static bool
is_rt_stack_size_dynamic(const VkRayTracingPipelineCreateInfoKHR *info)2305 is_rt_stack_size_dynamic(const VkRayTracingPipelineCreateInfoKHR *info)
2306 {
2307    if (info->pDynamicState == NULL)
2308       return false;
2309 
2310    for (unsigned i = 0; i < info->pDynamicState->dynamicStateCount; i++) {
2311       if (info->pDynamicState->pDynamicStates[i] ==
2312           VK_DYNAMIC_STATE_RAY_TRACING_PIPELINE_STACK_SIZE_KHR)
2313          return true;
2314    }
2315 
2316    return false;
2317 }
2318 
2319 static void
anv_pipeline_compute_ray_tracing_stacks(struct anv_ray_tracing_pipeline *pipeline, const VkRayTracingPipelineCreateInfoKHR *info, uint32_t *stack_max)2320 anv_pipeline_compute_ray_tracing_stacks(struct anv_ray_tracing_pipeline *pipeline,
2321                                         const VkRayTracingPipelineCreateInfoKHR *info,
2322                                         uint32_t *stack_max)
2323 {
2324    if (is_rt_stack_size_dynamic(info)) {
2325       pipeline->stack_size = 0; /* 0 means dynamic */
2326    } else {
2327       /* From the Vulkan spec:
2328        *
2329        *    "If the stack size is not set explicitly, the stack size for a
2330        *    pipeline is:
2331        *
2332        *       rayGenStackMax +
2333        *       min(1, maxPipelineRayRecursionDepth) ×
2334        *       max(closestHitStackMax, missStackMax,
2335        *           intersectionStackMax + anyHitStackMax) +
2336        *       max(0, maxPipelineRayRecursionDepth-1) ×
2337        *       max(closestHitStackMax, missStackMax) +
2338        *       2 × callableStackMax"
2339        */
2340       pipeline->stack_size =
2341          stack_max[MESA_SHADER_RAYGEN] +
2342          MIN2(1, info->maxPipelineRayRecursionDepth) *
2343          MAX4(stack_max[MESA_SHADER_CLOSEST_HIT],
2344               stack_max[MESA_SHADER_MISS],
2345               stack_max[MESA_SHADER_INTERSECTION],
2346               stack_max[MESA_SHADER_ANY_HIT]) +
2347          MAX2(0, (int)info->maxPipelineRayRecursionDepth - 1) *
2348          MAX2(stack_max[MESA_SHADER_CLOSEST_HIT],
2349               stack_max[MESA_SHADER_MISS]) +
2350          2 * stack_max[MESA_SHADER_CALLABLE];
2351 
2352       /* This is an extremely unlikely case but we need to set it to some
2353        * non-zero value so that we don't accidentally think it's dynamic.
2354        * Our minimum stack size is 2KB anyway so we could set to any small
2355        * value we like.
2356        */
2357       if (pipeline->stack_size == 0)
2358          pipeline->stack_size = 1;
2359    }
2360 }
2361 
2362 static struct anv_pipeline_stage *
anv_pipeline_init_ray_tracing_stages(struct anv_ray_tracing_pipeline *pipeline, const VkRayTracingPipelineCreateInfoKHR *info, void *pipeline_ctx)2363 anv_pipeline_init_ray_tracing_stages(struct anv_ray_tracing_pipeline *pipeline,
2364                                      const VkRayTracingPipelineCreateInfoKHR *info,
2365                                      void *pipeline_ctx)
2366 {
2367    ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
2368 
2369    /* Create enough stage entries for all shader modules plus potential
2370     * combinaisons in the groups.
2371     */
2372    struct anv_pipeline_stage *stages =
2373       rzalloc_array(pipeline_ctx, struct anv_pipeline_stage, info->stageCount);
2374 
2375    for (uint32_t i = 0; i < info->stageCount; i++) {
2376       const VkPipelineShaderStageCreateInfo *sinfo = &info->pStages[i];
2377       if (vk_pipeline_shader_stage_is_null(sinfo))
2378          continue;
2379 
2380       int64_t stage_start = os_time_get_nano();
2381 
2382       stages[i] = (struct anv_pipeline_stage) {
2383          .stage = vk_to_mesa_shader_stage(sinfo->stage),
2384          .info = sinfo,
2385          .cache_key = {
2386             .stage = vk_to_mesa_shader_stage(sinfo->stage),
2387          },
2388          .feedback = {
2389             .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
2390          },
2391       };
2392 
2393       populate_bs_prog_key(pipeline->base.device,
2394                            pipeline->base.device->robust_buffer_access,
2395                            &stages[i].key.bs);
2396 
2397       vk_pipeline_hash_shader_stage(sinfo, stages[i].shader_sha1);
2398 
2399       if (stages[i].stage != MESA_SHADER_INTERSECTION) {
2400          anv_pipeline_hash_ray_tracing_shader(pipeline, layout, &stages[i],
2401                                               stages[i].cache_key.sha1);
2402       }
2403 
2404       stages[i].feedback.duration += os_time_get_nano() - stage_start;
2405    }
2406 
2407    for (uint32_t i = 0; i < info->groupCount; i++) {
2408       const VkRayTracingShaderGroupCreateInfoKHR *ginfo = &info->pGroups[i];
2409 
2410       if (ginfo->type != VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR)
2411          continue;
2412 
2413       int64_t stage_start = os_time_get_nano();
2414 
2415       uint32_t intersection_idx = ginfo->intersectionShader;
2416       assert(intersection_idx < info->stageCount);
2417 
2418       uint32_t any_hit_idx = ginfo->anyHitShader;
2419       if (any_hit_idx != VK_SHADER_UNUSED_KHR) {
2420          assert(any_hit_idx < info->stageCount);
2421          anv_pipeline_hash_ray_tracing_combined_shader(pipeline,
2422                                                        layout,
2423                                                        &stages[intersection_idx],
2424                                                        &stages[any_hit_idx],
2425                                                        stages[intersection_idx].cache_key.sha1);
2426       } else {
2427          anv_pipeline_hash_ray_tracing_shader(pipeline, layout,
2428                                               &stages[intersection_idx],
2429                                               stages[intersection_idx].cache_key.sha1);
2430       }
2431 
2432       stages[intersection_idx].feedback.duration += os_time_get_nano() - stage_start;
2433    }
2434 
2435    return stages;
2436 }
2437 
2438 static bool
anv_pipeline_load_cached_shaders(struct anv_ray_tracing_pipeline *pipeline, struct vk_pipeline_cache *cache, const VkRayTracingPipelineCreateInfoKHR *info, struct anv_pipeline_stage *stages, uint32_t *stack_max)2439 anv_pipeline_load_cached_shaders(struct anv_ray_tracing_pipeline *pipeline,
2440                                  struct vk_pipeline_cache *cache,
2441                                  const VkRayTracingPipelineCreateInfoKHR *info,
2442                                  struct anv_pipeline_stage *stages,
2443                                  uint32_t *stack_max)
2444 {
2445    uint32_t shaders = 0, cache_hits = 0;
2446    for (uint32_t i = 0; i < info->stageCount; i++) {
2447       if (stages[i].info == NULL)
2448          continue;
2449 
2450       shaders++;
2451 
2452       int64_t stage_start = os_time_get_nano();
2453 
2454       bool cache_hit;
2455       stages[i].bin = anv_device_search_for_kernel(pipeline->base.device, cache,
2456                                                    &stages[i].cache_key,
2457                                                    sizeof(stages[i].cache_key),
2458                                                    &cache_hit);
2459       if (cache_hit) {
2460          cache_hits++;
2461          stages[i].feedback.flags |=
2462             VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
2463       }
2464 
2465       if (stages[i].bin != NULL) {
2466          anv_pipeline_add_executables(&pipeline->base, &stages[i], stages[i].bin);
2467          util_dynarray_append(&pipeline->shaders, struct anv_shader_bin *, stages[i].bin);
2468 
2469          uint32_t stack_size =
2470             brw_bs_prog_data_const(stages[i].bin->prog_data)->max_stack_size;
2471          stack_max[stages[i].stage] =
2472             MAX2(stack_max[stages[i].stage], stack_size);
2473       }
2474 
2475       stages[i].feedback.duration += os_time_get_nano() - stage_start;
2476    }
2477 
2478    return cache_hits == shaders;
2479 }
2480 
2481 static VkResult
anv_pipeline_compile_ray_tracing(struct anv_ray_tracing_pipeline *pipeline, struct vk_pipeline_cache *cache, const VkRayTracingPipelineCreateInfoKHR *info)2482 anv_pipeline_compile_ray_tracing(struct anv_ray_tracing_pipeline *pipeline,
2483                                  struct vk_pipeline_cache *cache,
2484                                  const VkRayTracingPipelineCreateInfoKHR *info)
2485 {
2486    const struct intel_device_info *devinfo = &pipeline->base.device->info;
2487    VkResult result;
2488 
2489    VkPipelineCreationFeedback pipeline_feedback = {
2490       .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
2491    };
2492    int64_t pipeline_start = os_time_get_nano();
2493 
2494    void *pipeline_ctx = ralloc_context(NULL);
2495 
2496    struct anv_pipeline_stage *stages =
2497       anv_pipeline_init_ray_tracing_stages(pipeline, info, pipeline_ctx);
2498 
2499    ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
2500 
2501    const bool skip_cache_lookup =
2502       (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
2503 
2504    uint32_t stack_max[MESA_VULKAN_SHADER_STAGES] = {};
2505 
2506    if (!skip_cache_lookup &&
2507        anv_pipeline_load_cached_shaders(pipeline, cache, info, stages, stack_max)) {
2508       pipeline_feedback.flags |=
2509          VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
2510       goto done;
2511    }
2512 
2513    if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT) {
2514       ralloc_free(pipeline_ctx);
2515       return VK_PIPELINE_COMPILE_REQUIRED;
2516    }
2517 
2518    for (uint32_t i = 0; i < info->stageCount; i++) {
2519       if (stages[i].info == NULL)
2520          continue;
2521 
2522       int64_t stage_start = os_time_get_nano();
2523 
2524       stages[i].nir = anv_pipeline_stage_get_nir(&pipeline->base, cache,
2525                                                  pipeline_ctx, &stages[i]);
2526       if (stages[i].nir == NULL) {
2527          ralloc_free(pipeline_ctx);
2528          return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
2529       }
2530 
2531       anv_pipeline_lower_nir(&pipeline->base, pipeline_ctx, &stages[i], layout);
2532 
2533       stages[i].feedback.duration += os_time_get_nano() - stage_start;
2534    }
2535 
2536    for (uint32_t i = 0; i < info->stageCount; i++) {
2537       if (stages[i].info == NULL)
2538          continue;
2539 
2540       /* Shader found in cache already. */
2541       if (stages[i].bin != NULL)
2542          continue;
2543 
2544       /* We handle intersection shaders as part of the group */
2545       if (stages[i].stage == MESA_SHADER_INTERSECTION)
2546          continue;
2547 
2548       int64_t stage_start = os_time_get_nano();
2549 
2550       void *stage_ctx = ralloc_context(pipeline_ctx);
2551 
2552       nir_shader *nir = nir_shader_clone(stage_ctx, stages[i].nir);
2553       switch (stages[i].stage) {
2554       case MESA_SHADER_RAYGEN:
2555          brw_nir_lower_raygen(nir);
2556          break;
2557 
2558       case MESA_SHADER_ANY_HIT:
2559          brw_nir_lower_any_hit(nir, devinfo);
2560          break;
2561 
2562       case MESA_SHADER_CLOSEST_HIT:
2563          brw_nir_lower_closest_hit(nir);
2564          break;
2565 
2566       case MESA_SHADER_MISS:
2567          brw_nir_lower_miss(nir);
2568          break;
2569 
2570       case MESA_SHADER_INTERSECTION:
2571          unreachable("These are handled later");
2572 
2573       case MESA_SHADER_CALLABLE:
2574          brw_nir_lower_callable(nir);
2575          break;
2576 
2577       default:
2578          unreachable("Invalid ray-tracing shader stage");
2579       }
2580 
2581       result = compile_upload_rt_shader(pipeline, cache, nir, &stages[i],
2582                                         &stages[i].bin, stage_ctx);
2583       if (result != VK_SUCCESS) {
2584          ralloc_free(pipeline_ctx);
2585          return result;
2586       }
2587 
2588       uint32_t stack_size =
2589          brw_bs_prog_data_const(stages[i].bin->prog_data)->max_stack_size;
2590       stack_max[stages[i].stage] = MAX2(stack_max[stages[i].stage], stack_size);
2591 
2592       ralloc_free(stage_ctx);
2593 
2594       stages[i].feedback.duration += os_time_get_nano() - stage_start;
2595    }
2596 
2597    for (uint32_t i = 0; i < info->groupCount; i++) {
2598       const VkRayTracingShaderGroupCreateInfoKHR *ginfo = &info->pGroups[i];
2599       struct anv_rt_shader_group *group = &pipeline->groups[i];
2600       group->type = ginfo->type;
2601       switch (ginfo->type) {
2602       case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR:
2603          assert(ginfo->generalShader < info->stageCount);
2604          group->general = stages[ginfo->generalShader].bin;
2605          break;
2606 
2607       case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR:
2608          if (ginfo->anyHitShader < info->stageCount)
2609             group->any_hit = stages[ginfo->anyHitShader].bin;
2610 
2611          if (ginfo->closestHitShader < info->stageCount)
2612             group->closest_hit = stages[ginfo->closestHitShader].bin;
2613          break;
2614 
2615       case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR: {
2616          if (ginfo->closestHitShader < info->stageCount)
2617             group->closest_hit = stages[ginfo->closestHitShader].bin;
2618 
2619          uint32_t intersection_idx = info->pGroups[i].intersectionShader;
2620          assert(intersection_idx < info->stageCount);
2621 
2622          /* Only compile this stage if not already found in the cache. */
2623          if (stages[intersection_idx].bin == NULL) {
2624             /* The any-hit and intersection shader have to be combined */
2625             uint32_t any_hit_idx = info->pGroups[i].anyHitShader;
2626             const nir_shader *any_hit = NULL;
2627             if (any_hit_idx < info->stageCount)
2628                any_hit = stages[any_hit_idx].nir;
2629 
2630             void *group_ctx = ralloc_context(pipeline_ctx);
2631             nir_shader *intersection =
2632                nir_shader_clone(group_ctx, stages[intersection_idx].nir);
2633 
2634             brw_nir_lower_combined_intersection_any_hit(intersection, any_hit,
2635                                                         devinfo);
2636 
2637             result = compile_upload_rt_shader(pipeline, cache,
2638                                               intersection,
2639                                               &stages[intersection_idx],
2640                                               &group->intersection,
2641                                               group_ctx);
2642             ralloc_free(group_ctx);
2643             if (result != VK_SUCCESS)
2644                return result;
2645          } else {
2646             group->intersection = stages[intersection_idx].bin;
2647          }
2648 
2649          uint32_t stack_size =
2650             brw_bs_prog_data_const(group->intersection->prog_data)->max_stack_size;
2651          stack_max[MESA_SHADER_INTERSECTION] =
2652             MAX2(stack_max[MESA_SHADER_INTERSECTION], stack_size);
2653 
2654          break;
2655       }
2656 
2657       default:
2658          unreachable("Invalid ray tracing shader group type");
2659       }
2660    }
2661 
2662  done:
2663    ralloc_free(pipeline_ctx);
2664 
2665    anv_pipeline_compute_ray_tracing_stacks(pipeline, info, stack_max);
2666 
2667    pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
2668 
2669    const VkPipelineCreationFeedbackCreateInfo *create_feedback =
2670       vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
2671    if (create_feedback) {
2672       *create_feedback->pPipelineCreationFeedback = pipeline_feedback;
2673 
2674       uint32_t stage_count = create_feedback->pipelineStageCreationFeedbackCount;
2675       assert(stage_count == 0 || info->stageCount == stage_count);
2676       for (uint32_t i = 0; i < stage_count; i++) {
2677          gl_shader_stage s = vk_to_mesa_shader_stage(info->pStages[i].stage);
2678          create_feedback->pPipelineStageCreationFeedbacks[i] = stages[s].feedback;
2679       }
2680    }
2681 
2682    return VK_SUCCESS;
2683 }
2684 
2685 VkResult
anv_device_init_rt_shaders(struct anv_device *device)2686 anv_device_init_rt_shaders(struct anv_device *device)
2687 {
2688    if (!device->vk.enabled_extensions.KHR_ray_tracing_pipeline)
2689       return VK_SUCCESS;
2690 
2691    bool cache_hit;
2692 
2693    struct brw_rt_trampoline {
2694       char name[16];
2695       struct brw_cs_prog_key key;
2696    } trampoline_key = {
2697       .name = "rt-trampoline",
2698    };
2699    device->rt_trampoline =
2700       anv_device_search_for_kernel(device, device->internal_cache,
2701                                    &trampoline_key, sizeof(trampoline_key),
2702                                    &cache_hit);
2703    if (device->rt_trampoline == NULL) {
2704 
2705       void *tmp_ctx = ralloc_context(NULL);
2706       nir_shader *trampoline_nir =
2707          brw_nir_create_raygen_trampoline(device->physical->compiler, tmp_ctx);
2708 
2709       trampoline_nir->info.subgroup_size = SUBGROUP_SIZE_REQUIRE_8;
2710 
2711       struct anv_pipeline_bind_map bind_map = {
2712          .surface_count = 0,
2713          .sampler_count = 0,
2714       };
2715       uint32_t dummy_params[4] = { 0, };
2716       struct brw_cs_prog_data trampoline_prog_data = {
2717          .base.nr_params = 4,
2718          .base.param = dummy_params,
2719          .uses_inline_data = true,
2720          .uses_btd_stack_ids = true,
2721       };
2722       struct brw_compile_cs_params params = {
2723          .nir = trampoline_nir,
2724          .key = &trampoline_key.key,
2725          .prog_data = &trampoline_prog_data,
2726          .log_data = device,
2727       };
2728       const unsigned *tramp_data =
2729          brw_compile_cs(device->physical->compiler, tmp_ctx, &params);
2730 
2731       device->rt_trampoline =
2732          anv_device_upload_kernel(device, device->internal_cache,
2733                                   MESA_SHADER_COMPUTE,
2734                                   &trampoline_key, sizeof(trampoline_key),
2735                                   tramp_data,
2736                                   trampoline_prog_data.base.program_size,
2737                                   &trampoline_prog_data.base,
2738                                   sizeof(trampoline_prog_data),
2739                                   NULL, 0, NULL, &bind_map);
2740 
2741       ralloc_free(tmp_ctx);
2742 
2743       if (device->rt_trampoline == NULL)
2744          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2745    }
2746 
2747    /* The cache already has a reference and it's not going anywhere so there
2748     * is no need to hold a second reference.
2749     */
2750    anv_shader_bin_unref(device, device->rt_trampoline);
2751 
2752    struct brw_rt_trivial_return {
2753       char name[16];
2754       struct brw_bs_prog_key key;
2755    } return_key = {
2756       .name = "rt-trivial-ret",
2757    };
2758    device->rt_trivial_return =
2759       anv_device_search_for_kernel(device, device->internal_cache,
2760                                    &return_key, sizeof(return_key),
2761                                    &cache_hit);
2762    if (device->rt_trivial_return == NULL) {
2763       void *tmp_ctx = ralloc_context(NULL);
2764       nir_shader *trivial_return_nir =
2765          brw_nir_create_trivial_return_shader(device->physical->compiler, tmp_ctx);
2766 
2767       NIR_PASS_V(trivial_return_nir, brw_nir_lower_rt_intrinsics, &device->info);
2768 
2769       struct anv_pipeline_bind_map bind_map = {
2770          .surface_count = 0,
2771          .sampler_count = 0,
2772       };
2773       struct brw_bs_prog_data return_prog_data = { 0, };
2774       struct brw_compile_bs_params params = {
2775          .nir = trivial_return_nir,
2776          .key = &return_key.key,
2777          .prog_data = &return_prog_data,
2778 
2779          .log_data = device,
2780       };
2781       const unsigned *return_data =
2782          brw_compile_bs(device->physical->compiler, tmp_ctx, &params);
2783 
2784       device->rt_trivial_return =
2785          anv_device_upload_kernel(device, device->internal_cache,
2786                                   MESA_SHADER_CALLABLE,
2787                                   &return_key, sizeof(return_key),
2788                                   return_data, return_prog_data.base.program_size,
2789                                   &return_prog_data.base, sizeof(return_prog_data),
2790                                   NULL, 0, NULL, &bind_map);
2791 
2792       ralloc_free(tmp_ctx);
2793 
2794       if (device->rt_trivial_return == NULL)
2795          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2796    }
2797 
2798    /* The cache already has a reference and it's not going anywhere so there
2799     * is no need to hold a second reference.
2800     */
2801    anv_shader_bin_unref(device, device->rt_trivial_return);
2802 
2803    return VK_SUCCESS;
2804 }
2805 
2806 void
anv_device_finish_rt_shaders(struct anv_device *device)2807 anv_device_finish_rt_shaders(struct anv_device *device)
2808 {
2809    if (!device->vk.enabled_extensions.KHR_ray_tracing_pipeline)
2810       return;
2811 }
2812 
2813 static VkResult
anv_ray_tracing_pipeline_init(struct anv_ray_tracing_pipeline *pipeline, struct anv_device *device, struct vk_pipeline_cache *cache, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, const VkAllocationCallbacks *alloc)2814 anv_ray_tracing_pipeline_init(struct anv_ray_tracing_pipeline *pipeline,
2815                               struct anv_device *device,
2816                               struct vk_pipeline_cache *cache,
2817                               const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
2818                               const VkAllocationCallbacks *alloc)
2819 {
2820    VkResult result;
2821 
2822    util_dynarray_init(&pipeline->shaders, pipeline->base.mem_ctx);
2823 
2824    result = anv_pipeline_compile_ray_tracing(pipeline, cache, pCreateInfo);
2825    if (result != VK_SUCCESS)
2826       goto fail;
2827 
2828    anv_pipeline_setup_l3_config(&pipeline->base, /* needs_slm */ false);
2829 
2830    return VK_SUCCESS;
2831 
2832 fail:
2833    util_dynarray_foreach(&pipeline->shaders,
2834                          struct anv_shader_bin *, shader) {
2835       anv_shader_bin_unref(device, *shader);
2836    }
2837    return result;
2838 }
2839 
2840 static void
assert_rt_stage_index_valid(const VkRayTracingPipelineCreateInfoKHR* pCreateInfo, uint32_t stage_idx, VkShaderStageFlags valid_stages)2841 assert_rt_stage_index_valid(const VkRayTracingPipelineCreateInfoKHR* pCreateInfo,
2842                             uint32_t stage_idx,
2843                             VkShaderStageFlags valid_stages)
2844 {
2845    if (stage_idx == VK_SHADER_UNUSED_KHR)
2846       return;
2847 
2848    assert(stage_idx <= pCreateInfo->stageCount);
2849    assert(util_bitcount(pCreateInfo->pStages[stage_idx].stage) == 1);
2850    assert(pCreateInfo->pStages[stage_idx].stage & valid_stages);
2851 }
2852 
2853 static VkResult
anv_ray_tracing_pipeline_create( VkDevice _device, struct vk_pipeline_cache * cache, const VkRayTracingPipelineCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkPipeline* pPipeline)2854 anv_ray_tracing_pipeline_create(
2855     VkDevice                                    _device,
2856     struct vk_pipeline_cache *                  cache,
2857     const VkRayTracingPipelineCreateInfoKHR*    pCreateInfo,
2858     const VkAllocationCallbacks*                pAllocator,
2859     VkPipeline*                                 pPipeline)
2860 {
2861    ANV_FROM_HANDLE(anv_device, device, _device);
2862    VkResult result;
2863 
2864    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR);
2865 
2866    VK_MULTIALLOC(ma);
2867    VK_MULTIALLOC_DECL(&ma, struct anv_ray_tracing_pipeline, pipeline, 1);
2868    VK_MULTIALLOC_DECL(&ma, struct anv_rt_shader_group, groups, pCreateInfo->groupCount);
2869    if (!vk_multialloc_zalloc2(&ma, &device->vk.alloc, pAllocator,
2870                               VK_SYSTEM_ALLOCATION_SCOPE_DEVICE))
2871       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2872 
2873    result = anv_pipeline_init(&pipeline->base, device,
2874                               ANV_PIPELINE_RAY_TRACING, pCreateInfo->flags,
2875                               pAllocator);
2876    if (result != VK_SUCCESS) {
2877       vk_free2(&device->vk.alloc, pAllocator, pipeline);
2878       return result;
2879    }
2880 
2881    pipeline->group_count = pCreateInfo->groupCount;
2882    pipeline->groups = groups;
2883 
2884    ASSERTED const VkShaderStageFlags ray_tracing_stages =
2885       VK_SHADER_STAGE_RAYGEN_BIT_KHR |
2886       VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
2887       VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR |
2888       VK_SHADER_STAGE_MISS_BIT_KHR |
2889       VK_SHADER_STAGE_INTERSECTION_BIT_KHR |
2890       VK_SHADER_STAGE_CALLABLE_BIT_KHR;
2891 
2892    for (uint32_t i = 0; i < pCreateInfo->stageCount; i++)
2893       assert((pCreateInfo->pStages[i].stage & ~ray_tracing_stages) == 0);
2894 
2895    for (uint32_t i = 0; i < pCreateInfo->groupCount; i++) {
2896       const VkRayTracingShaderGroupCreateInfoKHR *ginfo =
2897          &pCreateInfo->pGroups[i];
2898       assert_rt_stage_index_valid(pCreateInfo, ginfo->generalShader,
2899                                   VK_SHADER_STAGE_RAYGEN_BIT_KHR |
2900                                   VK_SHADER_STAGE_MISS_BIT_KHR |
2901                                   VK_SHADER_STAGE_CALLABLE_BIT_KHR);
2902       assert_rt_stage_index_valid(pCreateInfo, ginfo->closestHitShader,
2903                                   VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR);
2904       assert_rt_stage_index_valid(pCreateInfo, ginfo->anyHitShader,
2905                                   VK_SHADER_STAGE_ANY_HIT_BIT_KHR);
2906       assert_rt_stage_index_valid(pCreateInfo, ginfo->intersectionShader,
2907                                   VK_SHADER_STAGE_INTERSECTION_BIT_KHR);
2908       switch (ginfo->type) {
2909       case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR:
2910          assert(ginfo->generalShader < pCreateInfo->stageCount);
2911          assert(ginfo->anyHitShader == VK_SHADER_UNUSED_KHR);
2912          assert(ginfo->closestHitShader == VK_SHADER_UNUSED_KHR);
2913          assert(ginfo->intersectionShader == VK_SHADER_UNUSED_KHR);
2914          break;
2915 
2916       case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR:
2917          assert(ginfo->generalShader == VK_SHADER_UNUSED_KHR);
2918          assert(ginfo->intersectionShader == VK_SHADER_UNUSED_KHR);
2919          break;
2920 
2921       case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR:
2922          assert(ginfo->generalShader == VK_SHADER_UNUSED_KHR);
2923          break;
2924 
2925       default:
2926          unreachable("Invalid ray-tracing shader group type");
2927       }
2928    }
2929 
2930    result = anv_ray_tracing_pipeline_init(pipeline, device, cache,
2931                                           pCreateInfo, pAllocator);
2932    if (result != VK_SUCCESS) {
2933       anv_pipeline_finish(&pipeline->base, device, pAllocator);
2934       vk_free2(&device->vk.alloc, pAllocator, pipeline);
2935       return result;
2936    }
2937 
2938    anv_genX(&device->info, ray_tracing_pipeline_emit)(pipeline);
2939 
2940    *pPipeline = anv_pipeline_to_handle(&pipeline->base);
2941 
2942    return pipeline->base.batch.status;
2943 }
2944 
2945 VkResult
anv_CreateRayTracingPipelinesKHR( VkDevice _device, VkDeferredOperationKHR deferredOperation, VkPipelineCache pipelineCache, uint32_t createInfoCount, const VkRayTracingPipelineCreateInfoKHR* pCreateInfos, const VkAllocationCallbacks* pAllocator, VkPipeline* pPipelines)2946 anv_CreateRayTracingPipelinesKHR(
2947     VkDevice                                    _device,
2948     VkDeferredOperationKHR                      deferredOperation,
2949     VkPipelineCache                             pipelineCache,
2950     uint32_t                                    createInfoCount,
2951     const VkRayTracingPipelineCreateInfoKHR*    pCreateInfos,
2952     const VkAllocationCallbacks*                pAllocator,
2953     VkPipeline*                                 pPipelines)
2954 {
2955    ANV_FROM_HANDLE(vk_pipeline_cache, pipeline_cache, pipelineCache);
2956 
2957    VkResult result = VK_SUCCESS;
2958 
2959    unsigned i;
2960    for (i = 0; i < createInfoCount; i++) {
2961       VkResult res = anv_ray_tracing_pipeline_create(_device, pipeline_cache,
2962                                                      &pCreateInfos[i],
2963                                                      pAllocator, &pPipelines[i]);
2964 
2965       if (res == VK_SUCCESS)
2966          continue;
2967 
2968       /* Bail out on the first error as it is not obvious what error should be
2969        * report upon 2 different failures. */
2970       result = res;
2971       if (result != VK_PIPELINE_COMPILE_REQUIRED)
2972          break;
2973 
2974       pPipelines[i] = VK_NULL_HANDLE;
2975 
2976       if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
2977          break;
2978    }
2979 
2980    for (; i < createInfoCount; i++)
2981       pPipelines[i] = VK_NULL_HANDLE;
2982 
2983    return result;
2984 }
2985 
2986 #define WRITE_STR(field, ...) ({                               \
2987    memset(field, 0, sizeof(field));                            \
2988    UNUSED int i = snprintf(field, sizeof(field), __VA_ARGS__); \
2989    assert(i > 0 && i < sizeof(field));                         \
2990 })
2991 
anv_GetPipelineExecutablePropertiesKHR( VkDevice device, const VkPipelineInfoKHR* pPipelineInfo, uint32_t* pExecutableCount, VkPipelineExecutablePropertiesKHR* pProperties)2992 VkResult anv_GetPipelineExecutablePropertiesKHR(
2993     VkDevice                                    device,
2994     const VkPipelineInfoKHR*                    pPipelineInfo,
2995     uint32_t*                                   pExecutableCount,
2996     VkPipelineExecutablePropertiesKHR*          pProperties)
2997 {
2998    ANV_FROM_HANDLE(anv_pipeline, pipeline, pPipelineInfo->pipeline);
2999    VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutablePropertiesKHR, out,
3000                           pProperties, pExecutableCount);
3001 
3002    util_dynarray_foreach (&pipeline->executables, struct anv_pipeline_executable, exe) {
3003       vk_outarray_append_typed(VkPipelineExecutablePropertiesKHR, &out, props) {
3004          gl_shader_stage stage = exe->stage;
3005          props->stages = mesa_to_vk_shader_stage(stage);
3006 
3007          unsigned simd_width = exe->stats.dispatch_width;
3008          if (stage == MESA_SHADER_FRAGMENT) {
3009             WRITE_STR(props->name, "%s%d %s",
3010                       simd_width ? "SIMD" : "vec",
3011                       simd_width ? simd_width : 4,
3012                       _mesa_shader_stage_to_string(stage));
3013          } else {
3014             WRITE_STR(props->name, "%s", _mesa_shader_stage_to_string(stage));
3015          }
3016          WRITE_STR(props->description, "%s%d %s shader",
3017                    simd_width ? "SIMD" : "vec",
3018                    simd_width ? simd_width : 4,
3019                    _mesa_shader_stage_to_string(stage));
3020 
3021          /* The compiler gives us a dispatch width of 0 for vec4 but Vulkan
3022           * wants a subgroup size of 1.
3023           */
3024          props->subgroupSize = MAX2(simd_width, 1);
3025       }
3026    }
3027 
3028    return vk_outarray_status(&out);
3029 }
3030 
3031 static const struct anv_pipeline_executable *
anv_pipeline_get_executable(struct anv_pipeline *pipeline, uint32_t index)3032 anv_pipeline_get_executable(struct anv_pipeline *pipeline, uint32_t index)
3033 {
3034    assert(index < util_dynarray_num_elements(&pipeline->executables,
3035                                              struct anv_pipeline_executable));
3036    return util_dynarray_element(
3037       &pipeline->executables, struct anv_pipeline_executable, index);
3038 }
3039 
anv_GetPipelineExecutableStatisticsKHR( VkDevice device, const VkPipelineExecutableInfoKHR* pExecutableInfo, uint32_t* pStatisticCount, VkPipelineExecutableStatisticKHR* pStatistics)3040 VkResult anv_GetPipelineExecutableStatisticsKHR(
3041     VkDevice                                    device,
3042     const VkPipelineExecutableInfoKHR*          pExecutableInfo,
3043     uint32_t*                                   pStatisticCount,
3044     VkPipelineExecutableStatisticKHR*           pStatistics)
3045 {
3046    ANV_FROM_HANDLE(anv_pipeline, pipeline, pExecutableInfo->pipeline);
3047    VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableStatisticKHR, out,
3048                           pStatistics, pStatisticCount);
3049 
3050    const struct anv_pipeline_executable *exe =
3051       anv_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
3052 
3053    const struct brw_stage_prog_data *prog_data;
3054    switch (pipeline->type) {
3055    case ANV_PIPELINE_GRAPHICS: {
3056       prog_data = anv_pipeline_to_graphics(pipeline)->shaders[exe->stage]->prog_data;
3057       break;
3058    }
3059    case ANV_PIPELINE_COMPUTE: {
3060       prog_data = anv_pipeline_to_compute(pipeline)->cs->prog_data;
3061       break;
3062    }
3063    case ANV_PIPELINE_RAY_TRACING: {
3064       struct anv_shader_bin **shader =
3065          util_dynarray_element(&anv_pipeline_to_ray_tracing(pipeline)->shaders,
3066                                struct anv_shader_bin *,
3067                                pExecutableInfo->executableIndex);
3068       prog_data = (*shader)->prog_data;
3069       break;
3070    }
3071    default:
3072       unreachable("invalid pipeline type");
3073    }
3074 
3075    vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3076       WRITE_STR(stat->name, "Instruction Count");
3077       WRITE_STR(stat->description,
3078                 "Number of GEN instructions in the final generated "
3079                 "shader executable.");
3080       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3081       stat->value.u64 = exe->stats.instructions;
3082    }
3083 
3084    vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3085       WRITE_STR(stat->name, "SEND Count");
3086       WRITE_STR(stat->description,
3087                 "Number of instructions in the final generated shader "
3088                 "executable which access external units such as the "
3089                 "constant cache or the sampler.");
3090       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3091       stat->value.u64 = exe->stats.sends;
3092    }
3093 
3094    vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3095       WRITE_STR(stat->name, "Loop Count");
3096       WRITE_STR(stat->description,
3097                 "Number of loops (not unrolled) in the final generated "
3098                 "shader executable.");
3099       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3100       stat->value.u64 = exe->stats.loops;
3101    }
3102 
3103    vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3104       WRITE_STR(stat->name, "Cycle Count");
3105       WRITE_STR(stat->description,
3106                 "Estimate of the number of EU cycles required to execute "
3107                 "the final generated executable.  This is an estimate only "
3108                 "and may vary greatly from actual run-time performance.");
3109       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3110       stat->value.u64 = exe->stats.cycles;
3111    }
3112 
3113    vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3114       WRITE_STR(stat->name, "Spill Count");
3115       WRITE_STR(stat->description,
3116                 "Number of scratch spill operations.  This gives a rough "
3117                 "estimate of the cost incurred due to spilling temporary "
3118                 "values to memory.  If this is non-zero, you may want to "
3119                 "adjust your shader to reduce register pressure.");
3120       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3121       stat->value.u64 = exe->stats.spills;
3122    }
3123 
3124    vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3125       WRITE_STR(stat->name, "Fill Count");
3126       WRITE_STR(stat->description,
3127                 "Number of scratch fill operations.  This gives a rough "
3128                 "estimate of the cost incurred due to spilling temporary "
3129                 "values to memory.  If this is non-zero, you may want to "
3130                 "adjust your shader to reduce register pressure.");
3131       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3132       stat->value.u64 = exe->stats.fills;
3133    }
3134 
3135    vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3136       WRITE_STR(stat->name, "Scratch Memory Size");
3137       WRITE_STR(stat->description,
3138                 "Number of bytes of scratch memory required by the "
3139                 "generated shader executable.  If this is non-zero, you "
3140                 "may want to adjust your shader to reduce register "
3141                 "pressure.");
3142       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3143       stat->value.u64 = prog_data->total_scratch;
3144    }
3145 
3146    if (gl_shader_stage_uses_workgroup(exe->stage)) {
3147       vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3148          WRITE_STR(stat->name, "Workgroup Memory Size");
3149          WRITE_STR(stat->description,
3150                    "Number of bytes of workgroup shared memory used by this "
3151                    "shader including any padding.");
3152          stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3153          stat->value.u64 = prog_data->total_shared;
3154       }
3155    }
3156 
3157    return vk_outarray_status(&out);
3158 }
3159 
3160 static bool
write_ir_text(VkPipelineExecutableInternalRepresentationKHR* ir, const char *data)3161 write_ir_text(VkPipelineExecutableInternalRepresentationKHR* ir,
3162               const char *data)
3163 {
3164    ir->isText = VK_TRUE;
3165 
3166    size_t data_len = strlen(data) + 1;
3167 
3168    if (ir->pData == NULL) {
3169       ir->dataSize = data_len;
3170       return true;
3171    }
3172 
3173    strncpy(ir->pData, data, ir->dataSize);
3174    if (ir->dataSize < data_len)
3175       return false;
3176 
3177    ir->dataSize = data_len;
3178    return true;
3179 }
3180 
anv_GetPipelineExecutableInternalRepresentationsKHR( VkDevice device, const VkPipelineExecutableInfoKHR* pExecutableInfo, uint32_t* pInternalRepresentationCount, VkPipelineExecutableInternalRepresentationKHR* pInternalRepresentations)3181 VkResult anv_GetPipelineExecutableInternalRepresentationsKHR(
3182     VkDevice                                    device,
3183     const VkPipelineExecutableInfoKHR*          pExecutableInfo,
3184     uint32_t*                                   pInternalRepresentationCount,
3185     VkPipelineExecutableInternalRepresentationKHR* pInternalRepresentations)
3186 {
3187    ANV_FROM_HANDLE(anv_pipeline, pipeline, pExecutableInfo->pipeline);
3188    VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableInternalRepresentationKHR, out,
3189                           pInternalRepresentations, pInternalRepresentationCount);
3190    bool incomplete_text = false;
3191 
3192    const struct anv_pipeline_executable *exe =
3193       anv_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
3194 
3195    if (exe->nir) {
3196       vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR, &out, ir) {
3197          WRITE_STR(ir->name, "Final NIR");
3198          WRITE_STR(ir->description,
3199                    "Final NIR before going into the back-end compiler");
3200 
3201          if (!write_ir_text(ir, exe->nir))
3202             incomplete_text = true;
3203       }
3204    }
3205 
3206    if (exe->disasm) {
3207       vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR, &out, ir) {
3208          WRITE_STR(ir->name, "GEN Assembly");
3209          WRITE_STR(ir->description,
3210                    "Final GEN assembly for the generated shader binary");
3211 
3212          if (!write_ir_text(ir, exe->disasm))
3213             incomplete_text = true;
3214       }
3215    }
3216 
3217    return incomplete_text ? VK_INCOMPLETE : vk_outarray_status(&out);
3218 }
3219 
3220 VkResult
anv_GetRayTracingShaderGroupHandlesKHR( VkDevice _device, VkPipeline _pipeline, uint32_t firstGroup, uint32_t groupCount, size_t dataSize, void* pData)3221 anv_GetRayTracingShaderGroupHandlesKHR(
3222     VkDevice                                    _device,
3223     VkPipeline                                  _pipeline,
3224     uint32_t                                    firstGroup,
3225     uint32_t                                    groupCount,
3226     size_t                                      dataSize,
3227     void*                                       pData)
3228 {
3229    ANV_FROM_HANDLE(anv_device, device, _device);
3230    ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
3231 
3232    if (pipeline->type != ANV_PIPELINE_RAY_TRACING)
3233       return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
3234 
3235    struct anv_ray_tracing_pipeline *rt_pipeline =
3236       anv_pipeline_to_ray_tracing(pipeline);
3237 
3238    for (uint32_t i = 0; i < groupCount; i++) {
3239       struct anv_rt_shader_group *group = &rt_pipeline->groups[firstGroup + i];
3240       memcpy(pData, group->handle, sizeof(group->handle));
3241       pData += sizeof(group->handle);
3242    }
3243 
3244    return VK_SUCCESS;
3245 }
3246 
3247 VkResult
anv_GetRayTracingCaptureReplayShaderGroupHandlesKHR( VkDevice _device, VkPipeline pipeline, uint32_t firstGroup, uint32_t groupCount, size_t dataSize, void* pData)3248 anv_GetRayTracingCaptureReplayShaderGroupHandlesKHR(
3249     VkDevice                                    _device,
3250     VkPipeline                                  pipeline,
3251     uint32_t                                    firstGroup,
3252     uint32_t                                    groupCount,
3253     size_t                                      dataSize,
3254     void*                                       pData)
3255 {
3256    ANV_FROM_HANDLE(anv_device, device, _device);
3257    unreachable("Unimplemented");
3258    return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
3259 }
3260 
3261 VkDeviceSize
anv_GetRayTracingShaderGroupStackSizeKHR( VkDevice device, VkPipeline _pipeline, uint32_t group, VkShaderGroupShaderKHR groupShader)3262 anv_GetRayTracingShaderGroupStackSizeKHR(
3263     VkDevice                                    device,
3264     VkPipeline                                  _pipeline,
3265     uint32_t                                    group,
3266     VkShaderGroupShaderKHR                      groupShader)
3267 {
3268    ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
3269    assert(pipeline->type == ANV_PIPELINE_RAY_TRACING);
3270 
3271    struct anv_ray_tracing_pipeline *rt_pipeline =
3272       anv_pipeline_to_ray_tracing(pipeline);
3273 
3274    assert(group < rt_pipeline->group_count);
3275 
3276    struct anv_shader_bin *bin;
3277    switch (groupShader) {
3278    case VK_SHADER_GROUP_SHADER_GENERAL_KHR:
3279       bin = rt_pipeline->groups[group].general;
3280       break;
3281 
3282    case VK_SHADER_GROUP_SHADER_CLOSEST_HIT_KHR:
3283       bin = rt_pipeline->groups[group].closest_hit;
3284       break;
3285 
3286    case VK_SHADER_GROUP_SHADER_ANY_HIT_KHR:
3287       bin = rt_pipeline->groups[group].any_hit;
3288       break;
3289 
3290    case VK_SHADER_GROUP_SHADER_INTERSECTION_KHR:
3291       bin = rt_pipeline->groups[group].intersection;
3292       break;
3293 
3294    default:
3295       unreachable("Invalid VkShaderGroupShader enum");
3296    }
3297 
3298    if (bin == NULL)
3299       return 0;
3300 
3301    return brw_bs_prog_data_const(bin->prog_data)->max_stack_size;
3302 }
3303