1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29
30 #include "util/mesa-sha1.h"
31 #include "util/os_time.h"
32 #include "common/intel_l3_config.h"
33 #include "common/intel_disasm.h"
34 #include "common/intel_sample_positions.h"
35 #include "anv_private.h"
36 #include "compiler/brw_nir.h"
37 #include "compiler/brw_nir_rt.h"
38 #include "anv_nir.h"
39 #include "nir/nir_xfb_info.h"
40 #include "spirv/nir_spirv.h"
41 #include "vk_pipeline.h"
42 #include "vk_render_pass.h"
43 #include "vk_util.h"
44
45 /* Needed for SWIZZLE macros */
46 #include "program/prog_instruction.h"
47
48 /* Eventually, this will become part of anv_CreateShader. Unfortunately,
49 * we can't do that yet because we don't have the ability to copy nir.
50 */
51 static nir_shader *
anv_shader_stage_to_nir(struct anv_device *device, const VkPipelineShaderStageCreateInfo *stage_info, void *mem_ctx)52 anv_shader_stage_to_nir(struct anv_device *device,
53 const VkPipelineShaderStageCreateInfo *stage_info,
54 void *mem_ctx)
55 {
56 const struct anv_physical_device *pdevice = device->physical;
57 const struct anv_instance *instance = pdevice->instance;
58 const struct brw_compiler *compiler = pdevice->compiler;
59 gl_shader_stage stage = vk_to_mesa_shader_stage(stage_info->stage);
60 const nir_shader_compiler_options *nir_options =
61 compiler->nir_options[stage];
62
63 const struct spirv_to_nir_options spirv_options = {
64 .caps = {
65 .demote_to_helper_invocation = true,
66 .derivative_group = true,
67 .descriptor_array_dynamic_indexing = true,
68 .descriptor_array_non_uniform_indexing = true,
69 .descriptor_indexing = true,
70 .device_group = true,
71 .draw_parameters = true,
72 .float16 = pdevice->info.ver >= 8,
73 .float32_atomic_add = pdevice->info.has_lsc,
74 .float32_atomic_min_max = pdevice->info.ver >= 9,
75 .float64 = pdevice->info.ver >= 8,
76 .float64_atomic_min_max = pdevice->info.has_lsc,
77 .fragment_shader_sample_interlock = pdevice->info.ver >= 9,
78 .fragment_shader_pixel_interlock = pdevice->info.ver >= 9,
79 .geometry_streams = true,
80 /* When using Vulkan 1.3 or KHR_format_feature_flags2 is enabled, the
81 * read/write without format is per format, so just report true. It's
82 * up to the application to check.
83 */
84 .image_read_without_format = instance->vk.app_info.api_version >= VK_API_VERSION_1_3 || device->vk.enabled_extensions.KHR_format_feature_flags2,
85 .image_write_without_format = true,
86 .int8 = pdevice->info.ver >= 8,
87 .int16 = pdevice->info.ver >= 8,
88 .int64 = pdevice->info.ver >= 8,
89 .int64_atomics = pdevice->info.ver >= 9 && pdevice->use_softpin,
90 .integer_functions2 = pdevice->info.ver >= 8,
91 .mesh_shading_nv = pdevice->vk.supported_extensions.NV_mesh_shader,
92 .min_lod = true,
93 .multiview = true,
94 .physical_storage_buffer_address = pdevice->has_a64_buffer_access,
95 .post_depth_coverage = pdevice->info.ver >= 9,
96 .runtime_descriptor_array = true,
97 .float_controls = pdevice->info.ver >= 8,
98 .ray_query = pdevice->info.has_ray_tracing,
99 .ray_tracing = pdevice->info.has_ray_tracing,
100 .shader_clock = true,
101 .shader_viewport_index_layer = true,
102 .stencil_export = pdevice->info.ver >= 9,
103 .storage_8bit = pdevice->info.ver >= 8,
104 .storage_16bit = pdevice->info.ver >= 8,
105 .subgroup_arithmetic = true,
106 .subgroup_basic = true,
107 .subgroup_ballot = true,
108 .subgroup_dispatch = true,
109 .subgroup_quad = true,
110 .subgroup_uniform_control_flow = true,
111 .subgroup_shuffle = true,
112 .subgroup_vote = true,
113 .tessellation = true,
114 .transform_feedback = pdevice->info.ver >= 8,
115 .variable_pointers = true,
116 .vk_memory_model = true,
117 .vk_memory_model_device_scope = true,
118 .workgroup_memory_explicit_layout = true,
119 .fragment_shading_rate = pdevice->info.ver >= 11,
120 },
121 .ubo_addr_format =
122 anv_nir_ubo_addr_format(pdevice, device->robust_buffer_access),
123 .ssbo_addr_format =
124 anv_nir_ssbo_addr_format(pdevice, device->robust_buffer_access),
125 .phys_ssbo_addr_format = nir_address_format_64bit_global,
126 .push_const_addr_format = nir_address_format_logical,
127
128 /* TODO: Consider changing this to an address format that has the NULL
129 * pointer equals to 0. That might be a better format to play nice
130 * with certain code / code generators.
131 */
132 .shared_addr_format = nir_address_format_32bit_offset,
133 };
134
135 nir_shader *nir;
136 VkResult result =
137 vk_pipeline_shader_stage_to_nir(&device->vk, stage_info,
138 &spirv_options, nir_options,
139 mem_ctx, &nir);
140 if (result != VK_SUCCESS)
141 return NULL;
142
143 if (INTEL_DEBUG(intel_debug_flag_for_shader_stage(stage))) {
144 fprintf(stderr, "NIR (from SPIR-V) for %s shader:\n",
145 gl_shader_stage_name(stage));
146 nir_print_shader(nir, stderr);
147 }
148
149 NIR_PASS_V(nir, nir_lower_io_to_temporaries,
150 nir_shader_get_entrypoint(nir), true, false);
151
152 const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = {
153 .point_coord = true,
154 };
155 NIR_PASS(_, nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings);
156
157 const nir_opt_access_options opt_access_options = {
158 .is_vulkan = true,
159 .infer_non_readable = true,
160 };
161 NIR_PASS(_, nir, nir_opt_access, &opt_access_options);
162
163 NIR_PASS(_, nir, nir_lower_frexp);
164
165 /* Vulkan uses the separate-shader linking model */
166 nir->info.separate_shader = true;
167
168 brw_preprocess_nir(compiler, nir, NULL);
169
170 return nir;
171 }
172
173 VkResult
anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device, enum anv_pipeline_type type, VkPipelineCreateFlags flags, const VkAllocationCallbacks *pAllocator)174 anv_pipeline_init(struct anv_pipeline *pipeline,
175 struct anv_device *device,
176 enum anv_pipeline_type type,
177 VkPipelineCreateFlags flags,
178 const VkAllocationCallbacks *pAllocator)
179 {
180 VkResult result;
181
182 memset(pipeline, 0, sizeof(*pipeline));
183
184 vk_object_base_init(&device->vk, &pipeline->base,
185 VK_OBJECT_TYPE_PIPELINE);
186 pipeline->device = device;
187
188 /* It's the job of the child class to provide actual backing storage for
189 * the batch by setting batch.start, batch.next, and batch.end.
190 */
191 pipeline->batch.alloc = pAllocator ? pAllocator : &device->vk.alloc;
192 pipeline->batch.relocs = &pipeline->batch_relocs;
193 pipeline->batch.status = VK_SUCCESS;
194
195 result = anv_reloc_list_init(&pipeline->batch_relocs,
196 pipeline->batch.alloc);
197 if (result != VK_SUCCESS)
198 return result;
199
200 pipeline->mem_ctx = ralloc_context(NULL);
201
202 pipeline->type = type;
203 pipeline->flags = flags;
204
205 util_dynarray_init(&pipeline->executables, pipeline->mem_ctx);
206
207 return VK_SUCCESS;
208 }
209
210 void
anv_pipeline_finish(struct anv_pipeline *pipeline, struct anv_device *device, const VkAllocationCallbacks *pAllocator)211 anv_pipeline_finish(struct anv_pipeline *pipeline,
212 struct anv_device *device,
213 const VkAllocationCallbacks *pAllocator)
214 {
215 anv_reloc_list_finish(&pipeline->batch_relocs,
216 pAllocator ? pAllocator : &device->vk.alloc);
217 ralloc_free(pipeline->mem_ctx);
218 vk_object_base_finish(&pipeline->base);
219 }
220
anv_DestroyPipeline( VkDevice _device, VkPipeline _pipeline, const VkAllocationCallbacks* pAllocator)221 void anv_DestroyPipeline(
222 VkDevice _device,
223 VkPipeline _pipeline,
224 const VkAllocationCallbacks* pAllocator)
225 {
226 ANV_FROM_HANDLE(anv_device, device, _device);
227 ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
228
229 if (!pipeline)
230 return;
231
232 switch (pipeline->type) {
233 case ANV_PIPELINE_GRAPHICS: {
234 struct anv_graphics_pipeline *gfx_pipeline =
235 anv_pipeline_to_graphics(pipeline);
236
237 for (unsigned s = 0; s < ARRAY_SIZE(gfx_pipeline->shaders); s++) {
238 if (gfx_pipeline->shaders[s])
239 anv_shader_bin_unref(device, gfx_pipeline->shaders[s]);
240 }
241 break;
242 }
243
244 case ANV_PIPELINE_COMPUTE: {
245 struct anv_compute_pipeline *compute_pipeline =
246 anv_pipeline_to_compute(pipeline);
247
248 if (compute_pipeline->cs)
249 anv_shader_bin_unref(device, compute_pipeline->cs);
250
251 break;
252 }
253
254 case ANV_PIPELINE_RAY_TRACING: {
255 struct anv_ray_tracing_pipeline *rt_pipeline =
256 anv_pipeline_to_ray_tracing(pipeline);
257
258 util_dynarray_foreach(&rt_pipeline->shaders,
259 struct anv_shader_bin *, shader) {
260 anv_shader_bin_unref(device, *shader);
261 }
262 break;
263 }
264
265 default:
266 unreachable("invalid pipeline type");
267 }
268
269 anv_pipeline_finish(pipeline, device, pAllocator);
270 vk_free2(&device->vk.alloc, pAllocator, pipeline);
271 }
272
273 static void
populate_sampler_prog_key(const struct intel_device_info *devinfo, struct brw_sampler_prog_key_data *key)274 populate_sampler_prog_key(const struct intel_device_info *devinfo,
275 struct brw_sampler_prog_key_data *key)
276 {
277 /* Almost all multisampled textures are compressed. The only time when we
278 * don't compress a multisampled texture is for 16x MSAA with a surface
279 * width greater than 8k which is a bit of an edge case. Since the sampler
280 * just ignores the MCS parameter to ld2ms when MCS is disabled, it's safe
281 * to tell the compiler to always assume compression.
282 */
283 key->compressed_multisample_layout_mask = ~0;
284
285 /* SkyLake added support for 16x MSAA. With this came a new message for
286 * reading from a 16x MSAA surface with compression. The new message was
287 * needed because now the MCS data is 64 bits instead of 32 or lower as is
288 * the case for 8x, 4x, and 2x. The key->msaa_16 bit-field controls which
289 * message we use. Fortunately, the 16x message works for 8x, 4x, and 2x
290 * so we can just use it unconditionally. This may not be quite as
291 * efficient but it saves us from recompiling.
292 */
293 if (devinfo->ver >= 9)
294 key->msaa_16 = ~0;
295
296 /* XXX: Handle texture swizzle on HSW- */
297 for (int i = 0; i < BRW_MAX_SAMPLERS; i++) {
298 /* Assume color sampler, no swizzling. (Works for BDW+) */
299 key->swizzles[i] = SWIZZLE_XYZW;
300 }
301 }
302
303 static void
populate_base_prog_key(const struct anv_device *device, bool robust_buffer_acccess, struct brw_base_prog_key *key)304 populate_base_prog_key(const struct anv_device *device,
305 bool robust_buffer_acccess,
306 struct brw_base_prog_key *key)
307 {
308 key->robust_buffer_access = robust_buffer_acccess;
309 key->limit_trig_input_range =
310 device->physical->instance->limit_trig_input_range;
311
312 populate_sampler_prog_key(&device->info, &key->tex);
313 }
314
315 static void
populate_vs_prog_key(const struct anv_device *device, bool robust_buffer_acccess, struct brw_vs_prog_key *key)316 populate_vs_prog_key(const struct anv_device *device,
317 bool robust_buffer_acccess,
318 struct brw_vs_prog_key *key)
319 {
320 memset(key, 0, sizeof(*key));
321
322 populate_base_prog_key(device, robust_buffer_acccess, &key->base);
323
324 /* XXX: Handle vertex input work-arounds */
325
326 /* XXX: Handle sampler_prog_key */
327 }
328
329 static void
populate_tcs_prog_key(const struct anv_device *device, bool robust_buffer_acccess, unsigned input_vertices, struct brw_tcs_prog_key *key)330 populate_tcs_prog_key(const struct anv_device *device,
331 bool robust_buffer_acccess,
332 unsigned input_vertices,
333 struct brw_tcs_prog_key *key)
334 {
335 memset(key, 0, sizeof(*key));
336
337 populate_base_prog_key(device, robust_buffer_acccess, &key->base);
338
339 key->input_vertices = input_vertices;
340 }
341
342 static void
populate_tes_prog_key(const struct anv_device *device, bool robust_buffer_acccess, struct brw_tes_prog_key *key)343 populate_tes_prog_key(const struct anv_device *device,
344 bool robust_buffer_acccess,
345 struct brw_tes_prog_key *key)
346 {
347 memset(key, 0, sizeof(*key));
348
349 populate_base_prog_key(device, robust_buffer_acccess, &key->base);
350 }
351
352 static void
populate_gs_prog_key(const struct anv_device *device, bool robust_buffer_acccess, struct brw_gs_prog_key *key)353 populate_gs_prog_key(const struct anv_device *device,
354 bool robust_buffer_acccess,
355 struct brw_gs_prog_key *key)
356 {
357 memset(key, 0, sizeof(*key));
358
359 populate_base_prog_key(device, robust_buffer_acccess, &key->base);
360 }
361
362 static bool
pipeline_has_coarse_pixel(const struct anv_graphics_pipeline *pipeline, const BITSET_WORD *dynamic, const struct vk_multisample_state *ms, const struct vk_fragment_shading_rate_state *fsr)363 pipeline_has_coarse_pixel(const struct anv_graphics_pipeline *pipeline,
364 const BITSET_WORD *dynamic,
365 const struct vk_multisample_state *ms,
366 const struct vk_fragment_shading_rate_state *fsr)
367 {
368 /* The Vulkan 1.2.199 spec says:
369 *
370 * "If any of the following conditions are met, Cxy' must be set to
371 * {1,1}:
372 *
373 * * If Sample Shading is enabled.
374 * * [...]"
375 *
376 * And "sample shading" is defined as follows:
377 *
378 * "Sample shading is enabled for a graphics pipeline:
379 *
380 * * If the interface of the fragment shader entry point of the
381 * graphics pipeline includes an input variable decorated with
382 * SampleId or SamplePosition. In this case minSampleShadingFactor
383 * takes the value 1.0.
384 *
385 * * Else if the sampleShadingEnable member of the
386 * VkPipelineMultisampleStateCreateInfo structure specified when
387 * creating the graphics pipeline is set to VK_TRUE. In this case
388 * minSampleShadingFactor takes the value of
389 * VkPipelineMultisampleStateCreateInfo::minSampleShading.
390 *
391 * Otherwise, sample shading is considered disabled."
392 *
393 * The first bullet above is handled by the back-end compiler because those
394 * inputs both force per-sample dispatch. The second bullet is handled
395 * here. Note that this sample shading being enabled has nothing to do
396 * with minSampleShading.
397 */
398 if (ms != NULL && ms->sample_shading_enable)
399 return false;
400
401 /* Not dynamic & pipeline has a 1x1 fragment shading rate with no
402 * possibility for element of the pipeline to change the value.
403 */
404 if (!BITSET_TEST(dynamic, MESA_VK_DYNAMIC_FSR) &&
405 fsr->fragment_size.width <= 1 &&
406 fsr->fragment_size.height <= 1 &&
407 fsr->combiner_ops[0] == VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR &&
408 fsr->combiner_ops[1] == VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR)
409 return false;
410
411 return true;
412 }
413
414 static void
populate_task_prog_key(const struct anv_device *device, bool robust_buffer_access, struct brw_task_prog_key *key)415 populate_task_prog_key(const struct anv_device *device,
416 bool robust_buffer_access,
417 struct brw_task_prog_key *key)
418 {
419 memset(key, 0, sizeof(*key));
420
421 populate_base_prog_key(device, robust_buffer_access, &key->base);
422 }
423
424 static void
populate_mesh_prog_key(const struct anv_device *device, bool robust_buffer_access, struct brw_mesh_prog_key *key)425 populate_mesh_prog_key(const struct anv_device *device,
426 bool robust_buffer_access,
427 struct brw_mesh_prog_key *key)
428 {
429 memset(key, 0, sizeof(*key));
430
431 populate_base_prog_key(device, robust_buffer_access, &key->base);
432 }
433
434 static void
populate_wm_prog_key(const struct anv_graphics_pipeline *pipeline, bool robust_buffer_acccess, const BITSET_WORD *dynamic, const struct vk_multisample_state *ms, const struct vk_fragment_shading_rate_state *fsr, const struct vk_render_pass_state *rp, struct brw_wm_prog_key *key)435 populate_wm_prog_key(const struct anv_graphics_pipeline *pipeline,
436 bool robust_buffer_acccess,
437 const BITSET_WORD *dynamic,
438 const struct vk_multisample_state *ms,
439 const struct vk_fragment_shading_rate_state *fsr,
440 const struct vk_render_pass_state *rp,
441 struct brw_wm_prog_key *key)
442 {
443 const struct anv_device *device = pipeline->base.device;
444
445 memset(key, 0, sizeof(*key));
446
447 populate_base_prog_key(device, robust_buffer_acccess, &key->base);
448
449 /* We set this to 0 here and set to the actual value before we call
450 * brw_compile_fs.
451 */
452 key->input_slots_valid = 0;
453
454 /* XXX Vulkan doesn't appear to specify */
455 key->clamp_fragment_color = false;
456
457 key->ignore_sample_mask_out = false;
458
459 assert(rp->color_attachment_count <= MAX_RTS);
460 /* Consider all inputs as valid until look at the NIR variables. */
461 key->color_outputs_valid = (1u << rp->color_attachment_count) - 1;
462 key->nr_color_regions = rp->color_attachment_count;
463
464 /* To reduce possible shader recompilations we would need to know if
465 * there is a SampleMask output variable to compute if we should emit
466 * code to workaround the issue that hardware disables alpha to coverage
467 * when there is SampleMask output.
468 */
469 key->alpha_to_coverage = ms != NULL && ms->alpha_to_coverage_enable;
470
471 /* Vulkan doesn't support fixed-function alpha test */
472 key->alpha_test_replicate_alpha = false;
473
474 if (ms != NULL) {
475 /* We should probably pull this out of the shader, but it's fairly
476 * harmless to compute it and then let dead-code take care of it.
477 */
478 if (ms->rasterization_samples > 1) {
479 key->persample_interp = ms->sample_shading_enable &&
480 (ms->min_sample_shading * ms->rasterization_samples) > 1;
481 key->multisample_fbo = true;
482 }
483
484 if (device->physical->instance->sample_mask_out_opengl_behaviour)
485 key->ignore_sample_mask_out = !key->multisample_fbo;
486 }
487
488 key->coarse_pixel =
489 !key->persample_interp &&
490 device->vk.enabled_extensions.KHR_fragment_shading_rate &&
491 pipeline_has_coarse_pixel(pipeline, dynamic, ms, fsr);
492 }
493
494 static void
populate_cs_prog_key(const struct anv_device *device, bool robust_buffer_acccess, struct brw_cs_prog_key *key)495 populate_cs_prog_key(const struct anv_device *device,
496 bool robust_buffer_acccess,
497 struct brw_cs_prog_key *key)
498 {
499 memset(key, 0, sizeof(*key));
500
501 populate_base_prog_key(device, robust_buffer_acccess, &key->base);
502 }
503
504 static void
populate_bs_prog_key(const struct anv_device *device, bool robust_buffer_access, struct brw_bs_prog_key *key)505 populate_bs_prog_key(const struct anv_device *device,
506 bool robust_buffer_access,
507 struct brw_bs_prog_key *key)
508 {
509 memset(key, 0, sizeof(*key));
510
511 populate_base_prog_key(device, robust_buffer_access, &key->base);
512 }
513
514 struct anv_pipeline_stage {
515 gl_shader_stage stage;
516
517 const VkPipelineShaderStageCreateInfo *info;
518
519 unsigned char shader_sha1[20];
520
521 union brw_any_prog_key key;
522
523 struct {
524 gl_shader_stage stage;
525 unsigned char sha1[20];
526 } cache_key;
527
528 nir_shader *nir;
529
530 struct anv_pipeline_binding surface_to_descriptor[256];
531 struct anv_pipeline_binding sampler_to_descriptor[256];
532 struct anv_pipeline_bind_map bind_map;
533
534 union brw_any_prog_data prog_data;
535
536 uint32_t num_stats;
537 struct brw_compile_stats stats[3];
538 char *disasm[3];
539
540 VkPipelineCreationFeedback feedback;
541
542 const unsigned *code;
543
544 struct anv_shader_bin *bin;
545 };
546
547 static void
anv_pipeline_hash_graphics(struct anv_graphics_pipeline *pipeline, struct anv_pipeline_layout *layout, struct anv_pipeline_stage *stages, unsigned char *sha1_out)548 anv_pipeline_hash_graphics(struct anv_graphics_pipeline *pipeline,
549 struct anv_pipeline_layout *layout,
550 struct anv_pipeline_stage *stages,
551 unsigned char *sha1_out)
552 {
553 struct mesa_sha1 ctx;
554 _mesa_sha1_init(&ctx);
555
556 _mesa_sha1_update(&ctx, &pipeline->view_mask,
557 sizeof(pipeline->view_mask));
558
559 if (layout)
560 _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
561
562 const bool rba = pipeline->base.device->robust_buffer_access;
563 _mesa_sha1_update(&ctx, &rba, sizeof(rba));
564
565 for (uint32_t s = 0; s < ANV_GRAPHICS_SHADER_STAGE_COUNT; s++) {
566 if (stages[s].info) {
567 _mesa_sha1_update(&ctx, stages[s].shader_sha1,
568 sizeof(stages[s].shader_sha1));
569 _mesa_sha1_update(&ctx, &stages[s].key, brw_prog_key_size(s));
570 }
571 }
572
573 _mesa_sha1_final(&ctx, sha1_out);
574 }
575
576 static void
anv_pipeline_hash_compute(struct anv_compute_pipeline *pipeline, struct anv_pipeline_layout *layout, struct anv_pipeline_stage *stage, unsigned char *sha1_out)577 anv_pipeline_hash_compute(struct anv_compute_pipeline *pipeline,
578 struct anv_pipeline_layout *layout,
579 struct anv_pipeline_stage *stage,
580 unsigned char *sha1_out)
581 {
582 struct mesa_sha1 ctx;
583 _mesa_sha1_init(&ctx);
584
585 if (layout)
586 _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
587
588 const struct anv_device *device = pipeline->base.device;
589
590 const bool rba = device->robust_buffer_access;
591 _mesa_sha1_update(&ctx, &rba, sizeof(rba));
592
593 const bool afs = device->physical->instance->assume_full_subgroups;
594 _mesa_sha1_update(&ctx, &afs, sizeof(afs));
595
596 _mesa_sha1_update(&ctx, stage->shader_sha1,
597 sizeof(stage->shader_sha1));
598 _mesa_sha1_update(&ctx, &stage->key.cs, sizeof(stage->key.cs));
599
600 _mesa_sha1_final(&ctx, sha1_out);
601 }
602
603 static void
anv_pipeline_hash_ray_tracing_shader(struct anv_ray_tracing_pipeline *pipeline, struct anv_pipeline_layout *layout, struct anv_pipeline_stage *stage, unsigned char *sha1_out)604 anv_pipeline_hash_ray_tracing_shader(struct anv_ray_tracing_pipeline *pipeline,
605 struct anv_pipeline_layout *layout,
606 struct anv_pipeline_stage *stage,
607 unsigned char *sha1_out)
608 {
609 struct mesa_sha1 ctx;
610 _mesa_sha1_init(&ctx);
611
612 if (layout != NULL)
613 _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
614
615 const bool rba = pipeline->base.device->robust_buffer_access;
616 _mesa_sha1_update(&ctx, &rba, sizeof(rba));
617
618 _mesa_sha1_update(&ctx, stage->shader_sha1, sizeof(stage->shader_sha1));
619 _mesa_sha1_update(&ctx, &stage->key, sizeof(stage->key.bs));
620
621 _mesa_sha1_final(&ctx, sha1_out);
622 }
623
624 static void
anv_pipeline_hash_ray_tracing_combined_shader(struct anv_ray_tracing_pipeline *pipeline, struct anv_pipeline_layout *layout, struct anv_pipeline_stage *intersection, struct anv_pipeline_stage *any_hit, unsigned char *sha1_out)625 anv_pipeline_hash_ray_tracing_combined_shader(struct anv_ray_tracing_pipeline *pipeline,
626 struct anv_pipeline_layout *layout,
627 struct anv_pipeline_stage *intersection,
628 struct anv_pipeline_stage *any_hit,
629 unsigned char *sha1_out)
630 {
631 struct mesa_sha1 ctx;
632 _mesa_sha1_init(&ctx);
633
634 if (layout != NULL)
635 _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
636
637 const bool rba = pipeline->base.device->robust_buffer_access;
638 _mesa_sha1_update(&ctx, &rba, sizeof(rba));
639
640 _mesa_sha1_update(&ctx, intersection->shader_sha1, sizeof(intersection->shader_sha1));
641 _mesa_sha1_update(&ctx, &intersection->key, sizeof(intersection->key.bs));
642 _mesa_sha1_update(&ctx, any_hit->shader_sha1, sizeof(any_hit->shader_sha1));
643 _mesa_sha1_update(&ctx, &any_hit->key, sizeof(any_hit->key.bs));
644
645 _mesa_sha1_final(&ctx, sha1_out);
646 }
647
648 static nir_shader *
anv_pipeline_stage_get_nir(struct anv_pipeline *pipeline, struct vk_pipeline_cache *cache, void *mem_ctx, struct anv_pipeline_stage *stage)649 anv_pipeline_stage_get_nir(struct anv_pipeline *pipeline,
650 struct vk_pipeline_cache *cache,
651 void *mem_ctx,
652 struct anv_pipeline_stage *stage)
653 {
654 const struct brw_compiler *compiler =
655 pipeline->device->physical->compiler;
656 const nir_shader_compiler_options *nir_options =
657 compiler->nir_options[stage->stage];
658 nir_shader *nir;
659
660 nir = anv_device_search_for_nir(pipeline->device, cache,
661 nir_options,
662 stage->shader_sha1,
663 mem_ctx);
664 if (nir) {
665 assert(nir->info.stage == stage->stage);
666 return nir;
667 }
668
669 nir = anv_shader_stage_to_nir(pipeline->device, stage->info, mem_ctx);
670 if (nir) {
671 anv_device_upload_nir(pipeline->device, cache, nir, stage->shader_sha1);
672 return nir;
673 }
674
675 return NULL;
676 }
677
678 static void
shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)679 shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
680 {
681 assert(glsl_type_is_vector_or_scalar(type));
682
683 uint32_t comp_size = glsl_type_is_boolean(type)
684 ? 4 : glsl_get_bit_size(type) / 8;
685 unsigned length = glsl_get_vector_elements(type);
686 *size = comp_size * length,
687 *align = comp_size * (length == 3 ? 4 : length);
688 }
689
690 static void
anv_pipeline_lower_nir(struct anv_pipeline *pipeline, void *mem_ctx, struct anv_pipeline_stage *stage, struct anv_pipeline_layout *layout)691 anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
692 void *mem_ctx,
693 struct anv_pipeline_stage *stage,
694 struct anv_pipeline_layout *layout)
695 {
696 const struct anv_physical_device *pdevice = pipeline->device->physical;
697 const struct brw_compiler *compiler = pdevice->compiler;
698
699 struct brw_stage_prog_data *prog_data = &stage->prog_data.base;
700 nir_shader *nir = stage->nir;
701
702 if (nir->info.stage == MESA_SHADER_FRAGMENT) {
703 NIR_PASS(_, nir, nir_lower_wpos_center);
704 NIR_PASS(_, nir, nir_lower_input_attachments,
705 &(nir_input_attachment_options) {
706 .use_fragcoord_sysval = true,
707 .use_layer_id_sysval = true,
708 });
709 }
710
711 NIR_PASS(_, nir, anv_nir_lower_ycbcr_textures, layout);
712
713 if (pipeline->type == ANV_PIPELINE_GRAPHICS) {
714 NIR_PASS(_, nir, anv_nir_lower_multiview,
715 anv_pipeline_to_graphics(pipeline));
716 }
717
718 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
719
720 NIR_PASS(_, nir, brw_nir_lower_storage_image, compiler->devinfo);
721
722 NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_global,
723 nir_address_format_64bit_global);
724 NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_push_const,
725 nir_address_format_32bit_offset);
726
727 NIR_PASS(_, nir, brw_nir_lower_ray_queries, &pdevice->info);
728
729 /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
730 NIR_PASS_V(nir, anv_nir_apply_pipeline_layout,
731 pdevice, pipeline->device->robust_buffer_access,
732 layout, &stage->bind_map);
733
734 NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ubo,
735 anv_nir_ubo_addr_format(pdevice,
736 pipeline->device->robust_buffer_access));
737 NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ssbo,
738 anv_nir_ssbo_addr_format(pdevice,
739 pipeline->device->robust_buffer_access));
740
741 /* First run copy-prop to get rid of all of the vec() that address
742 * calculations often create and then constant-fold so that, when we
743 * get to anv_nir_lower_ubo_loads, we can detect constant offsets.
744 */
745 NIR_PASS(_, nir, nir_copy_prop);
746 NIR_PASS(_, nir, nir_opt_constant_folding);
747
748 NIR_PASS(_, nir, anv_nir_lower_ubo_loads);
749
750 /* We don't support non-uniform UBOs and non-uniform SSBO access is
751 * handled naturally by falling back to A64 messages.
752 */
753 NIR_PASS(_, nir, nir_lower_non_uniform_access,
754 &(nir_lower_non_uniform_access_options) {
755 .types = nir_lower_non_uniform_texture_access |
756 nir_lower_non_uniform_image_access,
757 .callback = NULL,
758 });
759
760 NIR_PASS_V(nir, anv_nir_compute_push_layout,
761 pdevice, pipeline->device->robust_buffer_access,
762 prog_data, &stage->bind_map, mem_ctx);
763
764 if (gl_shader_stage_uses_workgroup(nir->info.stage)) {
765 if (!nir->info.shared_memory_explicit_layout) {
766 NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
767 nir_var_mem_shared, shared_type_info);
768 }
769
770 NIR_PASS(_, nir, nir_lower_explicit_io,
771 nir_var_mem_shared, nir_address_format_32bit_offset);
772
773 if (nir->info.zero_initialize_shared_memory &&
774 nir->info.shared_size > 0) {
775 /* The effective Shared Local Memory size is at least 1024 bytes and
776 * is always rounded to a power of two, so it is OK to align the size
777 * used by the shader to chunk_size -- which does simplify the logic.
778 */
779 const unsigned chunk_size = 16;
780 const unsigned shared_size = ALIGN(nir->info.shared_size, chunk_size);
781 assert(shared_size <=
782 intel_calculate_slm_size(compiler->devinfo->ver, nir->info.shared_size));
783
784 NIR_PASS(_, nir, nir_zero_initialize_shared_memory,
785 shared_size, chunk_size);
786 }
787 }
788
789 if (gl_shader_stage_is_compute(nir->info.stage) ||
790 gl_shader_stage_is_mesh(nir->info.stage))
791 NIR_PASS(_, nir, brw_nir_lower_cs_intrinsics);
792
793 stage->nir = nir;
794 }
795
796 static void
anv_pipeline_link_vs(const struct brw_compiler *compiler, struct anv_pipeline_stage *vs_stage, struct anv_pipeline_stage *next_stage)797 anv_pipeline_link_vs(const struct brw_compiler *compiler,
798 struct anv_pipeline_stage *vs_stage,
799 struct anv_pipeline_stage *next_stage)
800 {
801 if (next_stage)
802 brw_nir_link_shaders(compiler, vs_stage->nir, next_stage->nir);
803 }
804
805 static void
anv_pipeline_compile_vs(const struct brw_compiler *compiler, void *mem_ctx, struct anv_graphics_pipeline *pipeline, struct anv_pipeline_stage *vs_stage)806 anv_pipeline_compile_vs(const struct brw_compiler *compiler,
807 void *mem_ctx,
808 struct anv_graphics_pipeline *pipeline,
809 struct anv_pipeline_stage *vs_stage)
810 {
811 /* When using Primitive Replication for multiview, each view gets its own
812 * position slot.
813 */
814 uint32_t pos_slots = pipeline->use_primitive_replication ?
815 MAX2(1, util_bitcount(pipeline->view_mask)) : 1;
816
817 brw_compute_vue_map(compiler->devinfo,
818 &vs_stage->prog_data.vs.base.vue_map,
819 vs_stage->nir->info.outputs_written,
820 vs_stage->nir->info.separate_shader,
821 pos_slots);
822
823 vs_stage->num_stats = 1;
824
825 struct brw_compile_vs_params params = {
826 .nir = vs_stage->nir,
827 .key = &vs_stage->key.vs,
828 .prog_data = &vs_stage->prog_data.vs,
829 .stats = vs_stage->stats,
830 .log_data = pipeline->base.device,
831 };
832
833 vs_stage->code = brw_compile_vs(compiler, mem_ctx, ¶ms);
834 }
835
836 static void
merge_tess_info(struct shader_info *tes_info, const struct shader_info *tcs_info)837 merge_tess_info(struct shader_info *tes_info,
838 const struct shader_info *tcs_info)
839 {
840 /* The Vulkan 1.0.38 spec, section 21.1 Tessellator says:
841 *
842 * "PointMode. Controls generation of points rather than triangles
843 * or lines. This functionality defaults to disabled, and is
844 * enabled if either shader stage includes the execution mode.
845 *
846 * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw,
847 * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd,
848 * and OutputVertices, it says:
849 *
850 * "One mode must be set in at least one of the tessellation
851 * shader stages."
852 *
853 * So, the fields can be set in either the TCS or TES, but they must
854 * agree if set in both. Our backend looks at TES, so bitwise-or in
855 * the values from the TCS.
856 */
857 assert(tcs_info->tess.tcs_vertices_out == 0 ||
858 tes_info->tess.tcs_vertices_out == 0 ||
859 tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out);
860 tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out;
861
862 assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
863 tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
864 tcs_info->tess.spacing == tes_info->tess.spacing);
865 tes_info->tess.spacing |= tcs_info->tess.spacing;
866
867 assert(tcs_info->tess._primitive_mode == 0 ||
868 tes_info->tess._primitive_mode == 0 ||
869 tcs_info->tess._primitive_mode == tes_info->tess._primitive_mode);
870 tes_info->tess._primitive_mode |= tcs_info->tess._primitive_mode;
871 tes_info->tess.ccw |= tcs_info->tess.ccw;
872 tes_info->tess.point_mode |= tcs_info->tess.point_mode;
873 }
874
875 static void
anv_pipeline_link_tcs(const struct brw_compiler *compiler, struct anv_pipeline_stage *tcs_stage, struct anv_pipeline_stage *tes_stage)876 anv_pipeline_link_tcs(const struct brw_compiler *compiler,
877 struct anv_pipeline_stage *tcs_stage,
878 struct anv_pipeline_stage *tes_stage)
879 {
880 assert(tes_stage && tes_stage->stage == MESA_SHADER_TESS_EVAL);
881
882 brw_nir_link_shaders(compiler, tcs_stage->nir, tes_stage->nir);
883
884 nir_lower_patch_vertices(tes_stage->nir,
885 tcs_stage->nir->info.tess.tcs_vertices_out,
886 NULL);
887
888 /* Copy TCS info into the TES info */
889 merge_tess_info(&tes_stage->nir->info, &tcs_stage->nir->info);
890
891 /* Whacking the key after cache lookup is a bit sketchy, but all of
892 * this comes from the SPIR-V, which is part of the hash used for the
893 * pipeline cache. So it should be safe.
894 */
895 tcs_stage->key.tcs._tes_primitive_mode =
896 tes_stage->nir->info.tess._primitive_mode;
897 tcs_stage->key.tcs.quads_workaround =
898 compiler->devinfo->ver < 9 &&
899 tes_stage->nir->info.tess._primitive_mode == TESS_PRIMITIVE_QUADS &&
900 tes_stage->nir->info.tess.spacing == TESS_SPACING_EQUAL;
901 }
902
903 static void
anv_pipeline_compile_tcs(const struct brw_compiler *compiler, void *mem_ctx, struct anv_device *device, struct anv_pipeline_stage *tcs_stage, struct anv_pipeline_stage *prev_stage)904 anv_pipeline_compile_tcs(const struct brw_compiler *compiler,
905 void *mem_ctx,
906 struct anv_device *device,
907 struct anv_pipeline_stage *tcs_stage,
908 struct anv_pipeline_stage *prev_stage)
909 {
910 tcs_stage->key.tcs.outputs_written =
911 tcs_stage->nir->info.outputs_written;
912 tcs_stage->key.tcs.patch_outputs_written =
913 tcs_stage->nir->info.patch_outputs_written;
914
915 tcs_stage->num_stats = 1;
916
917 struct brw_compile_tcs_params params = {
918 .nir = tcs_stage->nir,
919 .key = &tcs_stage->key.tcs,
920 .prog_data = &tcs_stage->prog_data.tcs,
921 .stats = tcs_stage->stats,
922 .log_data = device,
923 };
924
925 tcs_stage->code = brw_compile_tcs(compiler, mem_ctx, ¶ms);
926 }
927
928 static void
anv_pipeline_link_tes(const struct brw_compiler *compiler, struct anv_pipeline_stage *tes_stage, struct anv_pipeline_stage *next_stage)929 anv_pipeline_link_tes(const struct brw_compiler *compiler,
930 struct anv_pipeline_stage *tes_stage,
931 struct anv_pipeline_stage *next_stage)
932 {
933 if (next_stage)
934 brw_nir_link_shaders(compiler, tes_stage->nir, next_stage->nir);
935 }
936
937 static void
anv_pipeline_compile_tes(const struct brw_compiler *compiler, void *mem_ctx, struct anv_device *device, struct anv_pipeline_stage *tes_stage, struct anv_pipeline_stage *tcs_stage)938 anv_pipeline_compile_tes(const struct brw_compiler *compiler,
939 void *mem_ctx,
940 struct anv_device *device,
941 struct anv_pipeline_stage *tes_stage,
942 struct anv_pipeline_stage *tcs_stage)
943 {
944 tes_stage->key.tes.inputs_read =
945 tcs_stage->nir->info.outputs_written;
946 tes_stage->key.tes.patch_inputs_read =
947 tcs_stage->nir->info.patch_outputs_written;
948
949 tes_stage->num_stats = 1;
950
951 struct brw_compile_tes_params params = {
952 .nir = tes_stage->nir,
953 .key = &tes_stage->key.tes,
954 .prog_data = &tes_stage->prog_data.tes,
955 .input_vue_map = &tcs_stage->prog_data.tcs.base.vue_map,
956 .stats = tes_stage->stats,
957 .log_data = device,
958 };
959
960 tes_stage->code = brw_compile_tes(compiler, mem_ctx, ¶ms);
961 }
962
963 static void
anv_pipeline_link_gs(const struct brw_compiler *compiler, struct anv_pipeline_stage *gs_stage, struct anv_pipeline_stage *next_stage)964 anv_pipeline_link_gs(const struct brw_compiler *compiler,
965 struct anv_pipeline_stage *gs_stage,
966 struct anv_pipeline_stage *next_stage)
967 {
968 if (next_stage)
969 brw_nir_link_shaders(compiler, gs_stage->nir, next_stage->nir);
970 }
971
972 static void
anv_pipeline_compile_gs(const struct brw_compiler *compiler, void *mem_ctx, struct anv_device *device, struct anv_pipeline_stage *gs_stage, struct anv_pipeline_stage *prev_stage)973 anv_pipeline_compile_gs(const struct brw_compiler *compiler,
974 void *mem_ctx,
975 struct anv_device *device,
976 struct anv_pipeline_stage *gs_stage,
977 struct anv_pipeline_stage *prev_stage)
978 {
979 brw_compute_vue_map(compiler->devinfo,
980 &gs_stage->prog_data.gs.base.vue_map,
981 gs_stage->nir->info.outputs_written,
982 gs_stage->nir->info.separate_shader, 1);
983
984 gs_stage->num_stats = 1;
985
986 struct brw_compile_gs_params params = {
987 .nir = gs_stage->nir,
988 .key = &gs_stage->key.gs,
989 .prog_data = &gs_stage->prog_data.gs,
990 .stats = gs_stage->stats,
991 .log_data = device,
992 };
993
994 gs_stage->code = brw_compile_gs(compiler, mem_ctx, ¶ms);
995 }
996
997 static void
anv_pipeline_link_task(const struct brw_compiler *compiler, struct anv_pipeline_stage *task_stage, struct anv_pipeline_stage *next_stage)998 anv_pipeline_link_task(const struct brw_compiler *compiler,
999 struct anv_pipeline_stage *task_stage,
1000 struct anv_pipeline_stage *next_stage)
1001 {
1002 assert(next_stage);
1003 assert(next_stage->stage == MESA_SHADER_MESH);
1004 brw_nir_link_shaders(compiler, task_stage->nir, next_stage->nir);
1005 }
1006
1007 static void
anv_pipeline_compile_task(const struct brw_compiler *compiler, void *mem_ctx, struct anv_device *device, struct anv_pipeline_stage *task_stage)1008 anv_pipeline_compile_task(const struct brw_compiler *compiler,
1009 void *mem_ctx,
1010 struct anv_device *device,
1011 struct anv_pipeline_stage *task_stage)
1012 {
1013 task_stage->num_stats = 1;
1014
1015 struct brw_compile_task_params params = {
1016 .nir = task_stage->nir,
1017 .key = &task_stage->key.task,
1018 .prog_data = &task_stage->prog_data.task,
1019 .stats = task_stage->stats,
1020 .log_data = device,
1021 };
1022
1023 task_stage->code = brw_compile_task(compiler, mem_ctx, ¶ms);
1024 }
1025
1026 static void
anv_pipeline_link_mesh(const struct brw_compiler *compiler, struct anv_pipeline_stage *mesh_stage, struct anv_pipeline_stage *next_stage)1027 anv_pipeline_link_mesh(const struct brw_compiler *compiler,
1028 struct anv_pipeline_stage *mesh_stage,
1029 struct anv_pipeline_stage *next_stage)
1030 {
1031 if (next_stage) {
1032 brw_nir_link_shaders(compiler, mesh_stage->nir, next_stage->nir);
1033 }
1034 }
1035
1036 static void
anv_pipeline_compile_mesh(const struct brw_compiler *compiler, void *mem_ctx, struct anv_device *device, struct anv_pipeline_stage *mesh_stage, struct anv_pipeline_stage *prev_stage)1037 anv_pipeline_compile_mesh(const struct brw_compiler *compiler,
1038 void *mem_ctx,
1039 struct anv_device *device,
1040 struct anv_pipeline_stage *mesh_stage,
1041 struct anv_pipeline_stage *prev_stage)
1042 {
1043 mesh_stage->num_stats = 1;
1044
1045 struct brw_compile_mesh_params params = {
1046 .nir = mesh_stage->nir,
1047 .key = &mesh_stage->key.mesh,
1048 .prog_data = &mesh_stage->prog_data.mesh,
1049 .stats = mesh_stage->stats,
1050 .log_data = device,
1051 };
1052
1053 if (prev_stage) {
1054 assert(prev_stage->stage == MESA_SHADER_TASK);
1055 params.tue_map = &prev_stage->prog_data.task.map;
1056 }
1057
1058 mesh_stage->code = brw_compile_mesh(compiler, mem_ctx, ¶ms);
1059 }
1060
1061 static void
anv_pipeline_link_fs(const struct brw_compiler *compiler, struct anv_pipeline_stage *stage, const struct vk_render_pass_state *rp)1062 anv_pipeline_link_fs(const struct brw_compiler *compiler,
1063 struct anv_pipeline_stage *stage,
1064 const struct vk_render_pass_state *rp)
1065 {
1066 /* Initially the valid outputs value is set to all possible render targets
1067 * valid (see populate_wm_prog_key()), before we look at the shader
1068 * variables. Here we look at the output variables of the shader an compute
1069 * a correct number of render target outputs.
1070 */
1071 stage->key.wm.color_outputs_valid = 0;
1072 nir_foreach_shader_out_variable_safe(var, stage->nir) {
1073 if (var->data.location < FRAG_RESULT_DATA0)
1074 continue;
1075
1076 const unsigned rt = var->data.location - FRAG_RESULT_DATA0;
1077 const unsigned array_len =
1078 glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1;
1079 assert(rt + array_len <= MAX_RTS);
1080
1081 stage->key.wm.color_outputs_valid |= BITFIELD_RANGE(rt, array_len);
1082 }
1083 stage->key.wm.color_outputs_valid &=
1084 (1u << rp->color_attachment_count) - 1;
1085 stage->key.wm.nr_color_regions =
1086 util_last_bit(stage->key.wm.color_outputs_valid);
1087
1088 unsigned num_rt_bindings;
1089 struct anv_pipeline_binding rt_bindings[MAX_RTS];
1090 if (stage->key.wm.nr_color_regions > 0) {
1091 assert(stage->key.wm.nr_color_regions <= MAX_RTS);
1092 for (unsigned rt = 0; rt < stage->key.wm.nr_color_regions; rt++) {
1093 if (stage->key.wm.color_outputs_valid & BITFIELD_BIT(rt)) {
1094 rt_bindings[rt] = (struct anv_pipeline_binding) {
1095 .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
1096 .index = rt,
1097 };
1098 } else {
1099 /* Setup a null render target */
1100 rt_bindings[rt] = (struct anv_pipeline_binding) {
1101 .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
1102 .index = UINT32_MAX,
1103 };
1104 }
1105 }
1106 num_rt_bindings = stage->key.wm.nr_color_regions;
1107 } else {
1108 /* Setup a null render target */
1109 rt_bindings[0] = (struct anv_pipeline_binding) {
1110 .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
1111 .index = UINT32_MAX,
1112 };
1113 num_rt_bindings = 1;
1114 }
1115
1116 assert(num_rt_bindings <= MAX_RTS);
1117 assert(stage->bind_map.surface_count == 0);
1118 typed_memcpy(stage->bind_map.surface_to_descriptor,
1119 rt_bindings, num_rt_bindings);
1120 stage->bind_map.surface_count += num_rt_bindings;
1121 }
1122
1123 static void
anv_pipeline_compile_fs(const struct brw_compiler *compiler, void *mem_ctx, struct anv_device *device, struct anv_pipeline_stage *fs_stage, struct anv_pipeline_stage *prev_stage)1124 anv_pipeline_compile_fs(const struct brw_compiler *compiler,
1125 void *mem_ctx,
1126 struct anv_device *device,
1127 struct anv_pipeline_stage *fs_stage,
1128 struct anv_pipeline_stage *prev_stage)
1129 {
1130 /* TODO: we could set this to 0 based on the information in nir_shader, but
1131 * we need this before we call spirv_to_nir.
1132 */
1133 assert(prev_stage);
1134
1135 struct brw_compile_fs_params params = {
1136 .nir = fs_stage->nir,
1137 .key = &fs_stage->key.wm,
1138 .prog_data = &fs_stage->prog_data.wm,
1139
1140 .allow_spilling = true,
1141 .stats = fs_stage->stats,
1142 .log_data = device,
1143 };
1144
1145 if (prev_stage->stage == MESA_SHADER_MESH) {
1146 params.mue_map = &prev_stage->prog_data.mesh.map;
1147 /* TODO(mesh): Slots valid, do we even use/rely on it? */
1148 } else {
1149 fs_stage->key.wm.input_slots_valid =
1150 prev_stage->prog_data.vue.vue_map.slots_valid;
1151 }
1152
1153 fs_stage->code = brw_compile_fs(compiler, mem_ctx, ¶ms);
1154
1155 fs_stage->num_stats = (uint32_t)fs_stage->prog_data.wm.dispatch_8 +
1156 (uint32_t)fs_stage->prog_data.wm.dispatch_16 +
1157 (uint32_t)fs_stage->prog_data.wm.dispatch_32;
1158 }
1159
1160 static void
anv_pipeline_add_executable(struct anv_pipeline *pipeline, struct anv_pipeline_stage *stage, struct brw_compile_stats *stats, uint32_t code_offset)1161 anv_pipeline_add_executable(struct anv_pipeline *pipeline,
1162 struct anv_pipeline_stage *stage,
1163 struct brw_compile_stats *stats,
1164 uint32_t code_offset)
1165 {
1166 char *nir = NULL;
1167 if (stage->nir &&
1168 (pipeline->flags &
1169 VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)) {
1170 nir = nir_shader_as_str(stage->nir, pipeline->mem_ctx);
1171 }
1172
1173 char *disasm = NULL;
1174 if (stage->code &&
1175 (pipeline->flags &
1176 VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)) {
1177 char *stream_data = NULL;
1178 size_t stream_size = 0;
1179 FILE *stream = open_memstream(&stream_data, &stream_size);
1180
1181 uint32_t push_size = 0;
1182 for (unsigned i = 0; i < 4; i++)
1183 push_size += stage->bind_map.push_ranges[i].length;
1184 if (push_size > 0) {
1185 fprintf(stream, "Push constant ranges:\n");
1186 for (unsigned i = 0; i < 4; i++) {
1187 if (stage->bind_map.push_ranges[i].length == 0)
1188 continue;
1189
1190 fprintf(stream, " RANGE%d (%dB): ", i,
1191 stage->bind_map.push_ranges[i].length * 32);
1192
1193 switch (stage->bind_map.push_ranges[i].set) {
1194 case ANV_DESCRIPTOR_SET_NULL:
1195 fprintf(stream, "NULL");
1196 break;
1197
1198 case ANV_DESCRIPTOR_SET_PUSH_CONSTANTS:
1199 fprintf(stream, "Vulkan push constants and API params");
1200 break;
1201
1202 case ANV_DESCRIPTOR_SET_DESCRIPTORS:
1203 fprintf(stream, "Descriptor buffer for set %d (start=%dB)",
1204 stage->bind_map.push_ranges[i].index,
1205 stage->bind_map.push_ranges[i].start * 32);
1206 break;
1207
1208 case ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS:
1209 unreachable("gl_NumWorkgroups is never pushed");
1210
1211 case ANV_DESCRIPTOR_SET_SHADER_CONSTANTS:
1212 fprintf(stream, "Inline shader constant data (start=%dB)",
1213 stage->bind_map.push_ranges[i].start * 32);
1214 break;
1215
1216 case ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS:
1217 unreachable("Color attachments can't be pushed");
1218
1219 default:
1220 fprintf(stream, "UBO (set=%d binding=%d start=%dB)",
1221 stage->bind_map.push_ranges[i].set,
1222 stage->bind_map.push_ranges[i].index,
1223 stage->bind_map.push_ranges[i].start * 32);
1224 break;
1225 }
1226 fprintf(stream, "\n");
1227 }
1228 fprintf(stream, "\n");
1229 }
1230
1231 /* Creating this is far cheaper than it looks. It's perfectly fine to
1232 * do it for every binary.
1233 */
1234 intel_disassemble(&pipeline->device->physical->compiler->isa,
1235 stage->code, code_offset, stream);
1236
1237 fclose(stream);
1238
1239 /* Copy it to a ralloc'd thing */
1240 disasm = ralloc_size(pipeline->mem_ctx, stream_size + 1);
1241 memcpy(disasm, stream_data, stream_size);
1242 disasm[stream_size] = 0;
1243
1244 free(stream_data);
1245 }
1246
1247 const struct anv_pipeline_executable exe = {
1248 .stage = stage->stage,
1249 .stats = *stats,
1250 .nir = nir,
1251 .disasm = disasm,
1252 };
1253 util_dynarray_append(&pipeline->executables,
1254 struct anv_pipeline_executable, exe);
1255 }
1256
1257 static void
anv_pipeline_add_executables(struct anv_pipeline *pipeline, struct anv_pipeline_stage *stage, struct anv_shader_bin *bin)1258 anv_pipeline_add_executables(struct anv_pipeline *pipeline,
1259 struct anv_pipeline_stage *stage,
1260 struct anv_shader_bin *bin)
1261 {
1262 if (stage->stage == MESA_SHADER_FRAGMENT) {
1263 /* We pull the prog data and stats out of the anv_shader_bin because
1264 * the anv_pipeline_stage may not be fully populated if we successfully
1265 * looked up the shader in a cache.
1266 */
1267 const struct brw_wm_prog_data *wm_prog_data =
1268 (const struct brw_wm_prog_data *)bin->prog_data;
1269 struct brw_compile_stats *stats = bin->stats;
1270
1271 if (wm_prog_data->dispatch_8) {
1272 anv_pipeline_add_executable(pipeline, stage, stats++, 0);
1273 }
1274
1275 if (wm_prog_data->dispatch_16) {
1276 anv_pipeline_add_executable(pipeline, stage, stats++,
1277 wm_prog_data->prog_offset_16);
1278 }
1279
1280 if (wm_prog_data->dispatch_32) {
1281 anv_pipeline_add_executable(pipeline, stage, stats++,
1282 wm_prog_data->prog_offset_32);
1283 }
1284 } else {
1285 anv_pipeline_add_executable(pipeline, stage, bin->stats, 0);
1286 }
1287
1288 pipeline->ray_queries = MAX2(pipeline->ray_queries, bin->prog_data->ray_queries);
1289 }
1290
1291 static void
anv_pipeline_init_from_cached_graphics(struct anv_graphics_pipeline *pipeline)1292 anv_pipeline_init_from_cached_graphics(struct anv_graphics_pipeline *pipeline)
1293 {
1294 /* TODO: Cache this pipeline-wide information. */
1295
1296 if (anv_pipeline_is_primitive(pipeline)) {
1297 /* Primitive replication depends on information from all the shaders.
1298 * Recover this bit from the fact that we have more than one position slot
1299 * in the vertex shader when using it.
1300 */
1301 assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT);
1302 int pos_slots = 0;
1303 const struct brw_vue_prog_data *vue_prog_data =
1304 (const void *) pipeline->shaders[MESA_SHADER_VERTEX]->prog_data;
1305 const struct brw_vue_map *vue_map = &vue_prog_data->vue_map;
1306 for (int i = 0; i < vue_map->num_slots; i++) {
1307 if (vue_map->slot_to_varying[i] == VARYING_SLOT_POS)
1308 pos_slots++;
1309 }
1310 pipeline->use_primitive_replication = pos_slots > 1;
1311 }
1312 }
1313
1314 static void
anv_graphics_pipeline_init_keys(struct anv_graphics_pipeline *pipeline, const struct vk_graphics_pipeline_state *state, struct anv_pipeline_stage *stages)1315 anv_graphics_pipeline_init_keys(struct anv_graphics_pipeline *pipeline,
1316 const struct vk_graphics_pipeline_state *state,
1317 struct anv_pipeline_stage *stages)
1318 {
1319 for (uint32_t s = 0; s < ANV_GRAPHICS_SHADER_STAGE_COUNT; s++) {
1320 if (!stages[s].info)
1321 continue;
1322
1323 int64_t stage_start = os_time_get_nano();
1324
1325 vk_pipeline_hash_shader_stage(stages[s].info, stages[s].shader_sha1);
1326
1327 const struct anv_device *device = pipeline->base.device;
1328 switch (stages[s].stage) {
1329 case MESA_SHADER_VERTEX:
1330 populate_vs_prog_key(device,
1331 pipeline->base.device->robust_buffer_access,
1332 &stages[s].key.vs);
1333 break;
1334 case MESA_SHADER_TESS_CTRL:
1335 populate_tcs_prog_key(device,
1336 pipeline->base.device->robust_buffer_access,
1337 state->ts->patch_control_points,
1338 &stages[s].key.tcs);
1339 break;
1340 case MESA_SHADER_TESS_EVAL:
1341 populate_tes_prog_key(device,
1342 pipeline->base.device->robust_buffer_access,
1343 &stages[s].key.tes);
1344 break;
1345 case MESA_SHADER_GEOMETRY:
1346 populate_gs_prog_key(device,
1347 pipeline->base.device->robust_buffer_access,
1348 &stages[s].key.gs);
1349 break;
1350 case MESA_SHADER_FRAGMENT: {
1351 populate_wm_prog_key(pipeline,
1352 pipeline->base.device->robust_buffer_access,
1353 state->dynamic, state->ms, state->fsr, state->rp,
1354 &stages[s].key.wm);
1355 break;
1356 }
1357 case MESA_SHADER_TASK:
1358 populate_task_prog_key(device,
1359 pipeline->base.device->robust_buffer_access,
1360 &stages[s].key.task);
1361 break;
1362 case MESA_SHADER_MESH:
1363 populate_mesh_prog_key(device,
1364 pipeline->base.device->robust_buffer_access,
1365 &stages[s].key.mesh);
1366 break;
1367 default:
1368 unreachable("Invalid graphics shader stage");
1369 }
1370
1371 stages[s].feedback.duration += os_time_get_nano() - stage_start;
1372 stages[s].feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
1373 }
1374
1375 assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT ||
1376 pipeline->active_stages & VK_SHADER_STAGE_MESH_BIT_NV);
1377 }
1378
1379 static bool
anv_graphics_pipeline_load_cached_shaders(struct anv_graphics_pipeline *pipeline, struct vk_pipeline_cache *cache, struct anv_pipeline_stage *stages, VkPipelineCreationFeedbackEXT *pipeline_feedback)1380 anv_graphics_pipeline_load_cached_shaders(struct anv_graphics_pipeline *pipeline,
1381 struct vk_pipeline_cache *cache,
1382 struct anv_pipeline_stage *stages,
1383 VkPipelineCreationFeedbackEXT *pipeline_feedback)
1384 {
1385 unsigned found = 0;
1386 unsigned cache_hits = 0;
1387 for (unsigned s = 0; s < ANV_GRAPHICS_SHADER_STAGE_COUNT; s++) {
1388 if (!stages[s].info)
1389 continue;
1390
1391 int64_t stage_start = os_time_get_nano();
1392
1393 bool cache_hit;
1394 struct anv_shader_bin *bin =
1395 anv_device_search_for_kernel(pipeline->base.device, cache,
1396 &stages[s].cache_key,
1397 sizeof(stages[s].cache_key), &cache_hit);
1398 if (bin) {
1399 found++;
1400 pipeline->shaders[s] = bin;
1401 }
1402
1403 if (cache_hit) {
1404 cache_hits++;
1405 stages[s].feedback.flags |=
1406 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
1407 }
1408 stages[s].feedback.duration += os_time_get_nano() - stage_start;
1409 }
1410
1411 if (found == __builtin_popcount(pipeline->active_stages)) {
1412 if (cache_hits == found) {
1413 pipeline_feedback->flags |=
1414 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
1415 }
1416 /* We found all our shaders in the cache. We're done. */
1417 for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1418 if (!stages[s].info)
1419 continue;
1420
1421 anv_pipeline_add_executables(&pipeline->base, &stages[s],
1422 pipeline->shaders[s]);
1423 }
1424 anv_pipeline_init_from_cached_graphics(pipeline);
1425 return true;
1426 } else if (found > 0) {
1427 /* We found some but not all of our shaders. This shouldn't happen most
1428 * of the time but it can if we have a partially populated pipeline
1429 * cache.
1430 */
1431 assert(found < __builtin_popcount(pipeline->active_stages));
1432
1433 vk_perf(VK_LOG_OBJS(cache ? &cache->base :
1434 &pipeline->base.device->vk.base),
1435 "Found a partial pipeline in the cache. This is "
1436 "most likely caused by an incomplete pipeline cache "
1437 "import or export");
1438
1439 /* We're going to have to recompile anyway, so just throw away our
1440 * references to the shaders in the cache. We'll get them out of the
1441 * cache again as part of the compilation process.
1442 */
1443 for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1444 stages[s].feedback.flags = 0;
1445 if (pipeline->shaders[s]) {
1446 anv_shader_bin_unref(pipeline->base.device, pipeline->shaders[s]);
1447 pipeline->shaders[s] = NULL;
1448 }
1449 }
1450 }
1451
1452 return false;
1453 }
1454
1455 static const gl_shader_stage graphics_shader_order[] = {
1456 MESA_SHADER_VERTEX,
1457 MESA_SHADER_TESS_CTRL,
1458 MESA_SHADER_TESS_EVAL,
1459 MESA_SHADER_GEOMETRY,
1460
1461 MESA_SHADER_TASK,
1462 MESA_SHADER_MESH,
1463
1464 MESA_SHADER_FRAGMENT,
1465 };
1466
1467 static VkResult
anv_graphics_pipeline_load_nir(struct anv_graphics_pipeline *pipeline, struct vk_pipeline_cache *cache, struct anv_pipeline_stage *stages, void *pipeline_ctx)1468 anv_graphics_pipeline_load_nir(struct anv_graphics_pipeline *pipeline,
1469 struct vk_pipeline_cache *cache,
1470 struct anv_pipeline_stage *stages,
1471 void *pipeline_ctx)
1472 {
1473 for (unsigned i = 0; i < ARRAY_SIZE(graphics_shader_order); i++) {
1474 gl_shader_stage s = graphics_shader_order[i];
1475 if (!stages[s].info)
1476 continue;
1477
1478 int64_t stage_start = os_time_get_nano();
1479
1480 assert(stages[s].stage == s);
1481 assert(pipeline->shaders[s] == NULL);
1482
1483 stages[s].bind_map = (struct anv_pipeline_bind_map) {
1484 .surface_to_descriptor = stages[s].surface_to_descriptor,
1485 .sampler_to_descriptor = stages[s].sampler_to_descriptor
1486 };
1487
1488 stages[s].nir = anv_pipeline_stage_get_nir(&pipeline->base, cache,
1489 pipeline_ctx,
1490 &stages[s]);
1491 if (stages[s].nir == NULL) {
1492 return vk_error(pipeline, VK_ERROR_UNKNOWN);
1493 }
1494
1495 stages[s].feedback.duration += os_time_get_nano() - stage_start;
1496 }
1497
1498 return VK_SUCCESS;
1499 }
1500
1501 static VkResult
anv_graphics_pipeline_compile(struct anv_graphics_pipeline *pipeline, struct vk_pipeline_cache *cache, const VkGraphicsPipelineCreateInfo *info, const struct vk_graphics_pipeline_state *state)1502 anv_graphics_pipeline_compile(struct anv_graphics_pipeline *pipeline,
1503 struct vk_pipeline_cache *cache,
1504 const VkGraphicsPipelineCreateInfo *info,
1505 const struct vk_graphics_pipeline_state *state)
1506 {
1507 ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
1508 VkResult result;
1509
1510 VkPipelineCreationFeedbackEXT pipeline_feedback = {
1511 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
1512 };
1513 int64_t pipeline_start = os_time_get_nano();
1514
1515 const struct brw_compiler *compiler = pipeline->base.device->physical->compiler;
1516 struct anv_pipeline_stage stages[ANV_GRAPHICS_SHADER_STAGE_COUNT] = {};
1517 for (uint32_t i = 0; i < info->stageCount; i++) {
1518 gl_shader_stage stage = vk_to_mesa_shader_stage(info->pStages[i].stage);
1519 stages[stage].stage = stage;
1520 stages[stage].info = &info->pStages[i];
1521 }
1522
1523 anv_graphics_pipeline_init_keys(pipeline, state, stages);
1524
1525 unsigned char sha1[20];
1526 anv_pipeline_hash_graphics(pipeline, layout, stages, sha1);
1527
1528 for (unsigned s = 0; s < ARRAY_SIZE(stages); s++) {
1529 if (!stages[s].info)
1530 continue;
1531
1532 stages[s].cache_key.stage = s;
1533 memcpy(stages[s].cache_key.sha1, sha1, sizeof(sha1));
1534 }
1535
1536 const bool skip_cache_lookup =
1537 (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
1538 if (!skip_cache_lookup) {
1539 bool found_all_shaders =
1540 anv_graphics_pipeline_load_cached_shaders(pipeline, cache, stages,
1541 &pipeline_feedback);
1542 if (found_all_shaders)
1543 goto done;
1544 }
1545
1546 if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT)
1547 return VK_PIPELINE_COMPILE_REQUIRED;
1548
1549 void *pipeline_ctx = ralloc_context(NULL);
1550
1551 result = anv_graphics_pipeline_load_nir(pipeline, cache, stages,
1552 pipeline_ctx);
1553 if (result != VK_SUCCESS)
1554 goto fail;
1555
1556 /* Walk backwards to link */
1557 struct anv_pipeline_stage *next_stage = NULL;
1558 for (int i = ARRAY_SIZE(graphics_shader_order) - 1; i >= 0; i--) {
1559 gl_shader_stage s = graphics_shader_order[i];
1560 if (!stages[s].info)
1561 continue;
1562
1563 switch (s) {
1564 case MESA_SHADER_VERTEX:
1565 anv_pipeline_link_vs(compiler, &stages[s], next_stage);
1566 break;
1567 case MESA_SHADER_TESS_CTRL:
1568 anv_pipeline_link_tcs(compiler, &stages[s], next_stage);
1569 break;
1570 case MESA_SHADER_TESS_EVAL:
1571 anv_pipeline_link_tes(compiler, &stages[s], next_stage);
1572 break;
1573 case MESA_SHADER_GEOMETRY:
1574 anv_pipeline_link_gs(compiler, &stages[s], next_stage);
1575 break;
1576 case MESA_SHADER_TASK:
1577 anv_pipeline_link_task(compiler, &stages[s], next_stage);
1578 break;
1579 case MESA_SHADER_MESH:
1580 anv_pipeline_link_mesh(compiler, &stages[s], next_stage);
1581 break;
1582 case MESA_SHADER_FRAGMENT:
1583 anv_pipeline_link_fs(compiler, &stages[s], state->rp);
1584 break;
1585 default:
1586 unreachable("Invalid graphics shader stage");
1587 }
1588
1589 next_stage = &stages[s];
1590 }
1591
1592 if (pipeline->base.device->info.ver >= 12 &&
1593 pipeline->view_mask != 0) {
1594 /* For some pipelines HW Primitive Replication can be used instead of
1595 * instancing to implement Multiview. This depend on how viewIndex is
1596 * used in all the active shaders, so this check can't be done per
1597 * individual shaders.
1598 */
1599 nir_shader *shaders[ANV_GRAPHICS_SHADER_STAGE_COUNT] = {};
1600 for (unsigned s = 0; s < ARRAY_SIZE(shaders); s++)
1601 shaders[s] = stages[s].nir;
1602
1603 pipeline->use_primitive_replication =
1604 anv_check_for_primitive_replication(shaders, pipeline);
1605 } else {
1606 pipeline->use_primitive_replication = false;
1607 }
1608
1609 struct anv_pipeline_stage *prev_stage = NULL;
1610 for (unsigned i = 0; i < ARRAY_SIZE(graphics_shader_order); i++) {
1611 gl_shader_stage s = graphics_shader_order[i];
1612 if (!stages[s].info)
1613 continue;
1614
1615 int64_t stage_start = os_time_get_nano();
1616
1617 void *stage_ctx = ralloc_context(NULL);
1618
1619 anv_pipeline_lower_nir(&pipeline->base, stage_ctx, &stages[s], layout);
1620
1621 if (prev_stage && compiler->nir_options[s]->unify_interfaces) {
1622 prev_stage->nir->info.outputs_written |= stages[s].nir->info.inputs_read &
1623 ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
1624 stages[s].nir->info.inputs_read |= prev_stage->nir->info.outputs_written &
1625 ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
1626 prev_stage->nir->info.patch_outputs_written |= stages[s].nir->info.patch_inputs_read;
1627 stages[s].nir->info.patch_inputs_read |= prev_stage->nir->info.patch_outputs_written;
1628 }
1629
1630 ralloc_free(stage_ctx);
1631
1632 stages[s].feedback.duration += os_time_get_nano() - stage_start;
1633
1634 prev_stage = &stages[s];
1635 }
1636
1637 /* In the case the platform can write the primitive variable shading rate,
1638 * figure out the last geometry stage that should write the primitive
1639 * shading rate, and ensure it is marked as used there. The backend will
1640 * write a default value if the shader doesn't actually write it.
1641 *
1642 * We iterate backwards in the stage and stop on the first shader that can
1643 * set the value.
1644 */
1645 const struct intel_device_info *devinfo = &pipeline->base.device->info;
1646 if (devinfo->has_coarse_pixel_primitive_and_cb &&
1647 stages[MESA_SHADER_FRAGMENT].info &&
1648 stages[MESA_SHADER_FRAGMENT].key.wm.coarse_pixel &&
1649 !stages[MESA_SHADER_FRAGMENT].nir->info.fs.uses_sample_shading &&
1650 stages[MESA_SHADER_MESH].info == NULL) {
1651 struct anv_pipeline_stage *last_psr = NULL;
1652
1653 for (unsigned i = 0; i < ARRAY_SIZE(graphics_shader_order); i++) {
1654 gl_shader_stage s =
1655 graphics_shader_order[ARRAY_SIZE(graphics_shader_order) - i - 1];
1656
1657 if (!stages[s].info ||
1658 !gl_shader_stage_can_set_fragment_shading_rate(s))
1659 continue;
1660
1661 last_psr = &stages[s];
1662 break;
1663 }
1664
1665 assert(last_psr);
1666 last_psr->nir->info.outputs_written |= VARYING_BIT_PRIMITIVE_SHADING_RATE;
1667 }
1668
1669 prev_stage = NULL;
1670 for (unsigned i = 0; i < ARRAY_SIZE(graphics_shader_order); i++) {
1671 gl_shader_stage s = graphics_shader_order[i];
1672 if (!stages[s].info)
1673 continue;
1674
1675 int64_t stage_start = os_time_get_nano();
1676
1677 void *stage_ctx = ralloc_context(NULL);
1678
1679 switch (s) {
1680 case MESA_SHADER_VERTEX:
1681 anv_pipeline_compile_vs(compiler, stage_ctx, pipeline,
1682 &stages[s]);
1683 break;
1684 case MESA_SHADER_TESS_CTRL:
1685 anv_pipeline_compile_tcs(compiler, stage_ctx, pipeline->base.device,
1686 &stages[s], prev_stage);
1687 break;
1688 case MESA_SHADER_TESS_EVAL:
1689 anv_pipeline_compile_tes(compiler, stage_ctx, pipeline->base.device,
1690 &stages[s], prev_stage);
1691 break;
1692 case MESA_SHADER_GEOMETRY:
1693 anv_pipeline_compile_gs(compiler, stage_ctx, pipeline->base.device,
1694 &stages[s], prev_stage);
1695 break;
1696 case MESA_SHADER_TASK:
1697 anv_pipeline_compile_task(compiler, stage_ctx, pipeline->base.device,
1698 &stages[s]);
1699 break;
1700 case MESA_SHADER_MESH:
1701 anv_pipeline_compile_mesh(compiler, stage_ctx, pipeline->base.device,
1702 &stages[s], prev_stage);
1703 break;
1704 case MESA_SHADER_FRAGMENT:
1705 anv_pipeline_compile_fs(compiler, stage_ctx, pipeline->base.device,
1706 &stages[s], prev_stage);
1707 break;
1708 default:
1709 unreachable("Invalid graphics shader stage");
1710 }
1711 if (stages[s].code == NULL) {
1712 ralloc_free(stage_ctx);
1713 result = vk_error(pipeline->base.device, VK_ERROR_OUT_OF_HOST_MEMORY);
1714 goto fail;
1715 }
1716
1717 anv_nir_validate_push_layout(&stages[s].prog_data.base,
1718 &stages[s].bind_map);
1719
1720 struct anv_shader_bin *bin =
1721 anv_device_upload_kernel(pipeline->base.device, cache, s,
1722 &stages[s].cache_key,
1723 sizeof(stages[s].cache_key),
1724 stages[s].code,
1725 stages[s].prog_data.base.program_size,
1726 &stages[s].prog_data.base,
1727 brw_prog_data_size(s),
1728 stages[s].stats, stages[s].num_stats,
1729 stages[s].nir->xfb_info,
1730 &stages[s].bind_map);
1731 if (!bin) {
1732 ralloc_free(stage_ctx);
1733 result = vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
1734 goto fail;
1735 }
1736
1737 anv_pipeline_add_executables(&pipeline->base, &stages[s], bin);
1738
1739 pipeline->shaders[s] = bin;
1740 ralloc_free(stage_ctx);
1741
1742 stages[s].feedback.duration += os_time_get_nano() - stage_start;
1743
1744 prev_stage = &stages[s];
1745 }
1746
1747 ralloc_free(pipeline_ctx);
1748
1749 done:
1750
1751 pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
1752
1753 const VkPipelineCreationFeedbackCreateInfo *create_feedback =
1754 vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
1755 if (create_feedback) {
1756 *create_feedback->pPipelineCreationFeedback = pipeline_feedback;
1757
1758 uint32_t stage_count = create_feedback->pipelineStageCreationFeedbackCount;
1759 assert(stage_count == 0 || info->stageCount == stage_count);
1760 for (uint32_t i = 0; i < stage_count; i++) {
1761 gl_shader_stage s = vk_to_mesa_shader_stage(info->pStages[i].stage);
1762 create_feedback->pPipelineStageCreationFeedbacks[i] = stages[s].feedback;
1763 }
1764 }
1765
1766 return VK_SUCCESS;
1767
1768 fail:
1769 ralloc_free(pipeline_ctx);
1770
1771 for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1772 if (pipeline->shaders[s])
1773 anv_shader_bin_unref(pipeline->base.device, pipeline->shaders[s]);
1774 }
1775
1776 return result;
1777 }
1778
1779 static VkResult
anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline, struct vk_pipeline_cache *cache, const VkComputePipelineCreateInfo *info)1780 anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
1781 struct vk_pipeline_cache *cache,
1782 const VkComputePipelineCreateInfo *info)
1783 {
1784 const VkPipelineShaderStageCreateInfo *sinfo = &info->stage;
1785 assert(sinfo->stage == VK_SHADER_STAGE_COMPUTE_BIT);
1786
1787 VkPipelineCreationFeedback pipeline_feedback = {
1788 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
1789 };
1790 int64_t pipeline_start = os_time_get_nano();
1791
1792 struct anv_device *device = pipeline->base.device;
1793 const struct brw_compiler *compiler = device->physical->compiler;
1794
1795 struct anv_pipeline_stage stage = {
1796 .stage = MESA_SHADER_COMPUTE,
1797 .info = &info->stage,
1798 .cache_key = {
1799 .stage = MESA_SHADER_COMPUTE,
1800 },
1801 .feedback = {
1802 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
1803 },
1804 };
1805 vk_pipeline_hash_shader_stage(&info->stage, stage.shader_sha1);
1806
1807 struct anv_shader_bin *bin = NULL;
1808
1809 populate_cs_prog_key(device, device->robust_buffer_access, &stage.key.cs);
1810
1811 ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
1812
1813 const bool skip_cache_lookup =
1814 (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
1815
1816 anv_pipeline_hash_compute(pipeline, layout, &stage, stage.cache_key.sha1);
1817
1818 bool cache_hit = false;
1819 if (!skip_cache_lookup) {
1820 bin = anv_device_search_for_kernel(device, cache,
1821 &stage.cache_key,
1822 sizeof(stage.cache_key),
1823 &cache_hit);
1824 }
1825
1826 if (bin == NULL &&
1827 (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT))
1828 return VK_PIPELINE_COMPILE_REQUIRED;
1829
1830 void *mem_ctx = ralloc_context(NULL);
1831 if (bin == NULL) {
1832 int64_t stage_start = os_time_get_nano();
1833
1834 stage.bind_map = (struct anv_pipeline_bind_map) {
1835 .surface_to_descriptor = stage.surface_to_descriptor,
1836 .sampler_to_descriptor = stage.sampler_to_descriptor
1837 };
1838
1839 /* Set up a binding for the gl_NumWorkGroups */
1840 stage.bind_map.surface_count = 1;
1841 stage.bind_map.surface_to_descriptor[0] = (struct anv_pipeline_binding) {
1842 .set = ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS,
1843 };
1844
1845 stage.nir = anv_pipeline_stage_get_nir(&pipeline->base, cache, mem_ctx, &stage);
1846 if (stage.nir == NULL) {
1847 ralloc_free(mem_ctx);
1848 return vk_error(pipeline, VK_ERROR_UNKNOWN);
1849 }
1850
1851 NIR_PASS(_, stage.nir, anv_nir_add_base_work_group_id);
1852
1853 anv_pipeline_lower_nir(&pipeline->base, mem_ctx, &stage, layout);
1854
1855 unsigned local_size = stage.nir->info.workgroup_size[0] *
1856 stage.nir->info.workgroup_size[1] *
1857 stage.nir->info.workgroup_size[2];
1858
1859 /* Games don't always request full subgroups when they should,
1860 * which can cause bugs, as they may expect bigger size of the
1861 * subgroup than we choose for the execution.
1862 */
1863 if (device->physical->instance->assume_full_subgroups &&
1864 stage.nir->info.cs.uses_wide_subgroup_intrinsics &&
1865 stage.nir->info.subgroup_size == SUBGROUP_SIZE_API_CONSTANT &&
1866 local_size &&
1867 local_size % BRW_SUBGROUP_SIZE == 0)
1868 stage.nir->info.subgroup_size = SUBGROUP_SIZE_FULL_SUBGROUPS;
1869
1870 /* If the client requests that we dispatch full subgroups but doesn't
1871 * allow us to pick a subgroup size, we have to smash it to the API
1872 * value of 32. Performance will likely be terrible in this case but
1873 * there's nothing we can do about that. The client should have chosen
1874 * a size.
1875 */
1876 if (stage.nir->info.subgroup_size == SUBGROUP_SIZE_FULL_SUBGROUPS)
1877 stage.nir->info.subgroup_size = BRW_SUBGROUP_SIZE;
1878
1879 stage.num_stats = 1;
1880
1881 struct brw_compile_cs_params params = {
1882 .nir = stage.nir,
1883 .key = &stage.key.cs,
1884 .prog_data = &stage.prog_data.cs,
1885 .stats = stage.stats,
1886 .log_data = device,
1887 };
1888
1889 stage.code = brw_compile_cs(compiler, mem_ctx, ¶ms);
1890 if (stage.code == NULL) {
1891 ralloc_free(mem_ctx);
1892 return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
1893 }
1894
1895 anv_nir_validate_push_layout(&stage.prog_data.base, &stage.bind_map);
1896
1897 if (!stage.prog_data.cs.uses_num_work_groups) {
1898 assert(stage.bind_map.surface_to_descriptor[0].set ==
1899 ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS);
1900 stage.bind_map.surface_to_descriptor[0].set = ANV_DESCRIPTOR_SET_NULL;
1901 }
1902
1903 const unsigned code_size = stage.prog_data.base.program_size;
1904 bin = anv_device_upload_kernel(device, cache,
1905 MESA_SHADER_COMPUTE,
1906 &stage.cache_key, sizeof(stage.cache_key),
1907 stage.code, code_size,
1908 &stage.prog_data.base,
1909 sizeof(stage.prog_data.cs),
1910 stage.stats, stage.num_stats,
1911 NULL, &stage.bind_map);
1912 if (!bin) {
1913 ralloc_free(mem_ctx);
1914 return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
1915 }
1916
1917 stage.feedback.duration = os_time_get_nano() - stage_start;
1918 }
1919
1920 anv_pipeline_add_executables(&pipeline->base, &stage, bin);
1921
1922 ralloc_free(mem_ctx);
1923
1924 if (cache_hit) {
1925 stage.feedback.flags |=
1926 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
1927 pipeline_feedback.flags |=
1928 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
1929 }
1930 pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
1931
1932 const VkPipelineCreationFeedbackCreateInfo *create_feedback =
1933 vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
1934 if (create_feedback) {
1935 *create_feedback->pPipelineCreationFeedback = pipeline_feedback;
1936
1937 if (create_feedback->pipelineStageCreationFeedbackCount) {
1938 assert(create_feedback->pipelineStageCreationFeedbackCount == 1);
1939 create_feedback->pPipelineStageCreationFeedbacks[0] = stage.feedback;
1940 }
1941 }
1942
1943 pipeline->cs = bin;
1944
1945 return VK_SUCCESS;
1946 }
1947
1948 static VkResult
anv_compute_pipeline_create(struct anv_device *device, struct vk_pipeline_cache *cache, const VkComputePipelineCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline)1949 anv_compute_pipeline_create(struct anv_device *device,
1950 struct vk_pipeline_cache *cache,
1951 const VkComputePipelineCreateInfo *pCreateInfo,
1952 const VkAllocationCallbacks *pAllocator,
1953 VkPipeline *pPipeline)
1954 {
1955 struct anv_compute_pipeline *pipeline;
1956 VkResult result;
1957
1958 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO);
1959
1960 pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
1961 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1962 if (pipeline == NULL)
1963 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1964
1965 result = anv_pipeline_init(&pipeline->base, device,
1966 ANV_PIPELINE_COMPUTE, pCreateInfo->flags,
1967 pAllocator);
1968 if (result != VK_SUCCESS) {
1969 vk_free2(&device->vk.alloc, pAllocator, pipeline);
1970 return result;
1971 }
1972
1973 anv_batch_set_storage(&pipeline->base.batch, ANV_NULL_ADDRESS,
1974 pipeline->batch_data, sizeof(pipeline->batch_data));
1975
1976 result = anv_pipeline_compile_cs(pipeline, cache, pCreateInfo);
1977 if (result != VK_SUCCESS) {
1978 anv_pipeline_finish(&pipeline->base, device, pAllocator);
1979 vk_free2(&device->vk.alloc, pAllocator, pipeline);
1980 return result;
1981 }
1982
1983 anv_genX(&device->info, compute_pipeline_emit)(pipeline);
1984
1985 *pPipeline = anv_pipeline_to_handle(&pipeline->base);
1986
1987 return pipeline->base.batch.status;
1988 }
1989
anv_CreateComputePipelines( VkDevice _device, VkPipelineCache pipelineCache, uint32_t count, const VkComputePipelineCreateInfo* pCreateInfos, const VkAllocationCallbacks* pAllocator, VkPipeline* pPipelines)1990 VkResult anv_CreateComputePipelines(
1991 VkDevice _device,
1992 VkPipelineCache pipelineCache,
1993 uint32_t count,
1994 const VkComputePipelineCreateInfo* pCreateInfos,
1995 const VkAllocationCallbacks* pAllocator,
1996 VkPipeline* pPipelines)
1997 {
1998 ANV_FROM_HANDLE(anv_device, device, _device);
1999 ANV_FROM_HANDLE(vk_pipeline_cache, pipeline_cache, pipelineCache);
2000
2001 VkResult result = VK_SUCCESS;
2002
2003 unsigned i;
2004 for (i = 0; i < count; i++) {
2005 VkResult res = anv_compute_pipeline_create(device, pipeline_cache,
2006 &pCreateInfos[i],
2007 pAllocator, &pPipelines[i]);
2008
2009 if (res == VK_SUCCESS)
2010 continue;
2011
2012 /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED as it
2013 * is not obvious what error should be report upon 2 different failures.
2014 * */
2015 result = res;
2016 if (res != VK_PIPELINE_COMPILE_REQUIRED)
2017 break;
2018
2019 pPipelines[i] = VK_NULL_HANDLE;
2020
2021 if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
2022 break;
2023 }
2024
2025 for (; i < count; i++)
2026 pPipelines[i] = VK_NULL_HANDLE;
2027
2028 return result;
2029 }
2030
2031 /**
2032 * Calculate the desired L3 partitioning based on the current state of the
2033 * pipeline. For now this simply returns the conservative defaults calculated
2034 * by get_default_l3_weights(), but we could probably do better by gathering
2035 * more statistics from the pipeline state (e.g. guess of expected URB usage
2036 * and bound surfaces), or by using feed-back from performance counters.
2037 */
2038 void
anv_pipeline_setup_l3_config(struct anv_pipeline *pipeline, bool needs_slm)2039 anv_pipeline_setup_l3_config(struct anv_pipeline *pipeline, bool needs_slm)
2040 {
2041 const struct intel_device_info *devinfo = &pipeline->device->info;
2042
2043 const struct intel_l3_weights w =
2044 intel_get_default_l3_weights(devinfo, true, needs_slm);
2045
2046 pipeline->l3_config = intel_get_l3_config(devinfo, w);
2047 }
2048
2049 static VkResult
anv_graphics_pipeline_init(struct anv_graphics_pipeline *pipeline, struct anv_device *device, struct vk_pipeline_cache *cache, const struct VkGraphicsPipelineCreateInfo *pCreateInfo, const struct vk_graphics_pipeline_state *state, const VkAllocationCallbacks *alloc)2050 anv_graphics_pipeline_init(struct anv_graphics_pipeline *pipeline,
2051 struct anv_device *device,
2052 struct vk_pipeline_cache *cache,
2053 const struct VkGraphicsPipelineCreateInfo *pCreateInfo,
2054 const struct vk_graphics_pipeline_state *state,
2055 const VkAllocationCallbacks *alloc)
2056 {
2057 VkResult result;
2058
2059 result = anv_pipeline_init(&pipeline->base, device,
2060 ANV_PIPELINE_GRAPHICS, pCreateInfo->flags,
2061 alloc);
2062 if (result != VK_SUCCESS)
2063 return result;
2064
2065 anv_batch_set_storage(&pipeline->base.batch, ANV_NULL_ADDRESS,
2066 pipeline->batch_data, sizeof(pipeline->batch_data));
2067
2068 pipeline->active_stages = 0;
2069 for (uint32_t i = 0; i < pCreateInfo->stageCount; i++)
2070 pipeline->active_stages |= pCreateInfo->pStages[i].stage;
2071
2072 if (pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
2073 pipeline->active_stages |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
2074
2075 if (anv_pipeline_is_mesh(pipeline))
2076 assert(device->physical->vk.supported_extensions.NV_mesh_shader);
2077
2078 pipeline->dynamic_state.ms.sample_locations = &pipeline->sample_locations;
2079 vk_dynamic_graphics_state_fill(&pipeline->dynamic_state, state);
2080
2081 pipeline->depth_clamp_enable = state->rs->depth_clamp_enable;
2082 pipeline->depth_clip_enable = state->rs->depth_clip_enable;
2083 pipeline->view_mask = state->rp->view_mask;
2084
2085 result = anv_graphics_pipeline_compile(pipeline, cache, pCreateInfo, state);
2086 if (result != VK_SUCCESS) {
2087 anv_pipeline_finish(&pipeline->base, device, alloc);
2088 return result;
2089 }
2090
2091 anv_pipeline_setup_l3_config(&pipeline->base, false);
2092
2093 if (anv_pipeline_is_primitive(pipeline)) {
2094 const uint64_t inputs_read = get_vs_prog_data(pipeline)->inputs_read;
2095
2096 u_foreach_bit(a, state->vi->attributes_valid) {
2097 if (inputs_read & BITFIELD64_BIT(VERT_ATTRIB_GENERIC0 + a))
2098 pipeline->vb_used |= BITFIELD64_BIT(state->vi->attributes[a].binding);
2099 }
2100
2101 u_foreach_bit(b, state->vi->bindings_valid) {
2102 pipeline->vb[b].stride = state->vi->bindings[b].stride;
2103 pipeline->vb[b].instanced = state->vi->bindings[b].input_rate ==
2104 VK_VERTEX_INPUT_RATE_INSTANCE;
2105 pipeline->vb[b].instance_divisor = state->vi->bindings[b].divisor;
2106 }
2107
2108 /* Our implementation of VK_KHR_multiview uses instancing to draw the
2109 * different views. If the client asks for instancing, we need to multiply
2110 * the instance divisor by the number of views ensure that we repeat the
2111 * client's per-instance data once for each view.
2112 */
2113 pipeline->instance_multiplier = 1;
2114 if (pipeline->view_mask && !pipeline->use_primitive_replication)
2115 pipeline->instance_multiplier = util_bitcount(pipeline->view_mask);
2116 } else {
2117 assert(anv_pipeline_is_mesh(pipeline));
2118 /* TODO(mesh): Mesh vs. Multiview with Instancing. */
2119 }
2120
2121 pipeline->negative_one_to_one =
2122 state->vp != NULL && state->vp->negative_one_to_one;
2123
2124 /* Store line mode, polygon mode and rasterization samples, these are used
2125 * for dynamic primitive topology.
2126 */
2127 pipeline->polygon_mode = state->rs->polygon_mode;
2128 pipeline->rasterization_samples =
2129 state->ms != NULL ? state->ms->rasterization_samples : 1;
2130 pipeline->line_mode = state->rs->line.mode;
2131 if (pipeline->line_mode == VK_LINE_RASTERIZATION_MODE_DEFAULT_EXT) {
2132 if (pipeline->rasterization_samples > 1) {
2133 pipeline->line_mode = VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT;
2134 } else {
2135 pipeline->line_mode = VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT;
2136 }
2137 }
2138 pipeline->patch_control_points =
2139 state->ts != NULL ? state->ts->patch_control_points : 0;
2140
2141 /* Store the color write masks, to be merged with color write enable if
2142 * dynamic.
2143 */
2144 if (state->cb != NULL) {
2145 for (unsigned i = 0; i < state->cb->attachment_count; i++)
2146 pipeline->color_comp_writes[i] = state->cb->attachments[i].write_mask;
2147 }
2148
2149 return VK_SUCCESS;
2150 }
2151
2152 static VkResult
anv_graphics_pipeline_create(struct anv_device *device, struct vk_pipeline_cache *cache, const VkGraphicsPipelineCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline)2153 anv_graphics_pipeline_create(struct anv_device *device,
2154 struct vk_pipeline_cache *cache,
2155 const VkGraphicsPipelineCreateInfo *pCreateInfo,
2156 const VkAllocationCallbacks *pAllocator,
2157 VkPipeline *pPipeline)
2158 {
2159 struct anv_graphics_pipeline *pipeline;
2160 VkResult result;
2161
2162 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
2163
2164 pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
2165 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2166 if (pipeline == NULL)
2167 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2168
2169 struct vk_graphics_pipeline_all_state all;
2170 struct vk_graphics_pipeline_state state = { };
2171 result = vk_graphics_pipeline_state_fill(&device->vk, &state, pCreateInfo,
2172 NULL /* sp_info */,
2173 &all, NULL, 0, NULL);
2174 if (result != VK_SUCCESS) {
2175 vk_free2(&device->vk.alloc, pAllocator, pipeline);
2176 return result;
2177 }
2178
2179 result = anv_graphics_pipeline_init(pipeline, device, cache,
2180 pCreateInfo, &state, pAllocator);
2181 if (result != VK_SUCCESS) {
2182 vk_free2(&device->vk.alloc, pAllocator, pipeline);
2183 return result;
2184 }
2185
2186 anv_genX(&device->info, graphics_pipeline_emit)(pipeline, &state);
2187
2188 *pPipeline = anv_pipeline_to_handle(&pipeline->base);
2189
2190 return pipeline->base.batch.status;
2191 }
2192
anv_CreateGraphicsPipelines( VkDevice _device, VkPipelineCache pipelineCache, uint32_t count, const VkGraphicsPipelineCreateInfo* pCreateInfos, const VkAllocationCallbacks* pAllocator, VkPipeline* pPipelines)2193 VkResult anv_CreateGraphicsPipelines(
2194 VkDevice _device,
2195 VkPipelineCache pipelineCache,
2196 uint32_t count,
2197 const VkGraphicsPipelineCreateInfo* pCreateInfos,
2198 const VkAllocationCallbacks* pAllocator,
2199 VkPipeline* pPipelines)
2200 {
2201 ANV_FROM_HANDLE(anv_device, device, _device);
2202 ANV_FROM_HANDLE(vk_pipeline_cache, pipeline_cache, pipelineCache);
2203
2204 VkResult result = VK_SUCCESS;
2205
2206 unsigned i;
2207 for (i = 0; i < count; i++) {
2208 VkResult res = anv_graphics_pipeline_create(device,
2209 pipeline_cache,
2210 &pCreateInfos[i],
2211 pAllocator, &pPipelines[i]);
2212
2213 if (res == VK_SUCCESS)
2214 continue;
2215
2216 /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED as it
2217 * is not obvious what error should be report upon 2 different failures.
2218 * */
2219 result = res;
2220 if (res != VK_PIPELINE_COMPILE_REQUIRED)
2221 break;
2222
2223 pPipelines[i] = VK_NULL_HANDLE;
2224
2225 if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
2226 break;
2227 }
2228
2229 for (; i < count; i++)
2230 pPipelines[i] = VK_NULL_HANDLE;
2231
2232 return result;
2233 }
2234
2235 static VkResult
compile_upload_rt_shader(struct anv_ray_tracing_pipeline *pipeline, struct vk_pipeline_cache *cache, nir_shader *nir, struct anv_pipeline_stage *stage, struct anv_shader_bin **shader_out, void *mem_ctx)2236 compile_upload_rt_shader(struct anv_ray_tracing_pipeline *pipeline,
2237 struct vk_pipeline_cache *cache,
2238 nir_shader *nir,
2239 struct anv_pipeline_stage *stage,
2240 struct anv_shader_bin **shader_out,
2241 void *mem_ctx)
2242 {
2243 const struct brw_compiler *compiler =
2244 pipeline->base.device->physical->compiler;
2245 const struct intel_device_info *devinfo = compiler->devinfo;
2246
2247 nir_shader **resume_shaders = NULL;
2248 uint32_t num_resume_shaders = 0;
2249 if (nir->info.stage != MESA_SHADER_COMPUTE) {
2250 NIR_PASS(_, nir, nir_lower_shader_calls,
2251 nir_address_format_64bit_global,
2252 BRW_BTD_STACK_ALIGN,
2253 &resume_shaders, &num_resume_shaders, mem_ctx);
2254 NIR_PASS(_, nir, brw_nir_lower_shader_calls);
2255 NIR_PASS_V(nir, brw_nir_lower_rt_intrinsics, devinfo);
2256 }
2257
2258 for (unsigned i = 0; i < num_resume_shaders; i++) {
2259 NIR_PASS(_,resume_shaders[i], brw_nir_lower_shader_calls);
2260 NIR_PASS_V(resume_shaders[i], brw_nir_lower_rt_intrinsics, devinfo);
2261 }
2262
2263 struct brw_compile_bs_params params = {
2264 .nir = nir,
2265 .key = &stage->key.bs,
2266 .prog_data = &stage->prog_data.bs,
2267 .num_resume_shaders = num_resume_shaders,
2268 .resume_shaders = resume_shaders,
2269
2270 .stats = stage->stats,
2271 .log_data = pipeline->base.device,
2272 };
2273
2274 stage->code = brw_compile_bs(compiler, mem_ctx, ¶ms);
2275 if (stage->code == NULL)
2276 return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
2277
2278 /* Ray-tracing shaders don't have a "real" bind map */
2279 struct anv_pipeline_bind_map empty_bind_map = {};
2280
2281 const unsigned code_size = stage->prog_data.base.program_size;
2282 struct anv_shader_bin *bin =
2283 anv_device_upload_kernel(pipeline->base.device,
2284 cache,
2285 stage->stage,
2286 &stage->cache_key, sizeof(stage->cache_key),
2287 stage->code, code_size,
2288 &stage->prog_data.base,
2289 sizeof(stage->prog_data.bs),
2290 stage->stats, 1,
2291 NULL, &empty_bind_map);
2292 if (bin == NULL)
2293 return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
2294
2295 /* TODO: Figure out executables for resume shaders */
2296 anv_pipeline_add_executables(&pipeline->base, stage, bin);
2297 util_dynarray_append(&pipeline->shaders, struct anv_shader_bin *, bin);
2298
2299 *shader_out = bin;
2300
2301 return VK_SUCCESS;
2302 }
2303
2304 static bool
is_rt_stack_size_dynamic(const VkRayTracingPipelineCreateInfoKHR *info)2305 is_rt_stack_size_dynamic(const VkRayTracingPipelineCreateInfoKHR *info)
2306 {
2307 if (info->pDynamicState == NULL)
2308 return false;
2309
2310 for (unsigned i = 0; i < info->pDynamicState->dynamicStateCount; i++) {
2311 if (info->pDynamicState->pDynamicStates[i] ==
2312 VK_DYNAMIC_STATE_RAY_TRACING_PIPELINE_STACK_SIZE_KHR)
2313 return true;
2314 }
2315
2316 return false;
2317 }
2318
2319 static void
anv_pipeline_compute_ray_tracing_stacks(struct anv_ray_tracing_pipeline *pipeline, const VkRayTracingPipelineCreateInfoKHR *info, uint32_t *stack_max)2320 anv_pipeline_compute_ray_tracing_stacks(struct anv_ray_tracing_pipeline *pipeline,
2321 const VkRayTracingPipelineCreateInfoKHR *info,
2322 uint32_t *stack_max)
2323 {
2324 if (is_rt_stack_size_dynamic(info)) {
2325 pipeline->stack_size = 0; /* 0 means dynamic */
2326 } else {
2327 /* From the Vulkan spec:
2328 *
2329 * "If the stack size is not set explicitly, the stack size for a
2330 * pipeline is:
2331 *
2332 * rayGenStackMax +
2333 * min(1, maxPipelineRayRecursionDepth) ×
2334 * max(closestHitStackMax, missStackMax,
2335 * intersectionStackMax + anyHitStackMax) +
2336 * max(0, maxPipelineRayRecursionDepth-1) ×
2337 * max(closestHitStackMax, missStackMax) +
2338 * 2 × callableStackMax"
2339 */
2340 pipeline->stack_size =
2341 stack_max[MESA_SHADER_RAYGEN] +
2342 MIN2(1, info->maxPipelineRayRecursionDepth) *
2343 MAX4(stack_max[MESA_SHADER_CLOSEST_HIT],
2344 stack_max[MESA_SHADER_MISS],
2345 stack_max[MESA_SHADER_INTERSECTION],
2346 stack_max[MESA_SHADER_ANY_HIT]) +
2347 MAX2(0, (int)info->maxPipelineRayRecursionDepth - 1) *
2348 MAX2(stack_max[MESA_SHADER_CLOSEST_HIT],
2349 stack_max[MESA_SHADER_MISS]) +
2350 2 * stack_max[MESA_SHADER_CALLABLE];
2351
2352 /* This is an extremely unlikely case but we need to set it to some
2353 * non-zero value so that we don't accidentally think it's dynamic.
2354 * Our minimum stack size is 2KB anyway so we could set to any small
2355 * value we like.
2356 */
2357 if (pipeline->stack_size == 0)
2358 pipeline->stack_size = 1;
2359 }
2360 }
2361
2362 static struct anv_pipeline_stage *
anv_pipeline_init_ray_tracing_stages(struct anv_ray_tracing_pipeline *pipeline, const VkRayTracingPipelineCreateInfoKHR *info, void *pipeline_ctx)2363 anv_pipeline_init_ray_tracing_stages(struct anv_ray_tracing_pipeline *pipeline,
2364 const VkRayTracingPipelineCreateInfoKHR *info,
2365 void *pipeline_ctx)
2366 {
2367 ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
2368
2369 /* Create enough stage entries for all shader modules plus potential
2370 * combinaisons in the groups.
2371 */
2372 struct anv_pipeline_stage *stages =
2373 rzalloc_array(pipeline_ctx, struct anv_pipeline_stage, info->stageCount);
2374
2375 for (uint32_t i = 0; i < info->stageCount; i++) {
2376 const VkPipelineShaderStageCreateInfo *sinfo = &info->pStages[i];
2377 if (vk_pipeline_shader_stage_is_null(sinfo))
2378 continue;
2379
2380 int64_t stage_start = os_time_get_nano();
2381
2382 stages[i] = (struct anv_pipeline_stage) {
2383 .stage = vk_to_mesa_shader_stage(sinfo->stage),
2384 .info = sinfo,
2385 .cache_key = {
2386 .stage = vk_to_mesa_shader_stage(sinfo->stage),
2387 },
2388 .feedback = {
2389 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
2390 },
2391 };
2392
2393 populate_bs_prog_key(pipeline->base.device,
2394 pipeline->base.device->robust_buffer_access,
2395 &stages[i].key.bs);
2396
2397 vk_pipeline_hash_shader_stage(sinfo, stages[i].shader_sha1);
2398
2399 if (stages[i].stage != MESA_SHADER_INTERSECTION) {
2400 anv_pipeline_hash_ray_tracing_shader(pipeline, layout, &stages[i],
2401 stages[i].cache_key.sha1);
2402 }
2403
2404 stages[i].feedback.duration += os_time_get_nano() - stage_start;
2405 }
2406
2407 for (uint32_t i = 0; i < info->groupCount; i++) {
2408 const VkRayTracingShaderGroupCreateInfoKHR *ginfo = &info->pGroups[i];
2409
2410 if (ginfo->type != VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR)
2411 continue;
2412
2413 int64_t stage_start = os_time_get_nano();
2414
2415 uint32_t intersection_idx = ginfo->intersectionShader;
2416 assert(intersection_idx < info->stageCount);
2417
2418 uint32_t any_hit_idx = ginfo->anyHitShader;
2419 if (any_hit_idx != VK_SHADER_UNUSED_KHR) {
2420 assert(any_hit_idx < info->stageCount);
2421 anv_pipeline_hash_ray_tracing_combined_shader(pipeline,
2422 layout,
2423 &stages[intersection_idx],
2424 &stages[any_hit_idx],
2425 stages[intersection_idx].cache_key.sha1);
2426 } else {
2427 anv_pipeline_hash_ray_tracing_shader(pipeline, layout,
2428 &stages[intersection_idx],
2429 stages[intersection_idx].cache_key.sha1);
2430 }
2431
2432 stages[intersection_idx].feedback.duration += os_time_get_nano() - stage_start;
2433 }
2434
2435 return stages;
2436 }
2437
2438 static bool
anv_pipeline_load_cached_shaders(struct anv_ray_tracing_pipeline *pipeline, struct vk_pipeline_cache *cache, const VkRayTracingPipelineCreateInfoKHR *info, struct anv_pipeline_stage *stages, uint32_t *stack_max)2439 anv_pipeline_load_cached_shaders(struct anv_ray_tracing_pipeline *pipeline,
2440 struct vk_pipeline_cache *cache,
2441 const VkRayTracingPipelineCreateInfoKHR *info,
2442 struct anv_pipeline_stage *stages,
2443 uint32_t *stack_max)
2444 {
2445 uint32_t shaders = 0, cache_hits = 0;
2446 for (uint32_t i = 0; i < info->stageCount; i++) {
2447 if (stages[i].info == NULL)
2448 continue;
2449
2450 shaders++;
2451
2452 int64_t stage_start = os_time_get_nano();
2453
2454 bool cache_hit;
2455 stages[i].bin = anv_device_search_for_kernel(pipeline->base.device, cache,
2456 &stages[i].cache_key,
2457 sizeof(stages[i].cache_key),
2458 &cache_hit);
2459 if (cache_hit) {
2460 cache_hits++;
2461 stages[i].feedback.flags |=
2462 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
2463 }
2464
2465 if (stages[i].bin != NULL) {
2466 anv_pipeline_add_executables(&pipeline->base, &stages[i], stages[i].bin);
2467 util_dynarray_append(&pipeline->shaders, struct anv_shader_bin *, stages[i].bin);
2468
2469 uint32_t stack_size =
2470 brw_bs_prog_data_const(stages[i].bin->prog_data)->max_stack_size;
2471 stack_max[stages[i].stage] =
2472 MAX2(stack_max[stages[i].stage], stack_size);
2473 }
2474
2475 stages[i].feedback.duration += os_time_get_nano() - stage_start;
2476 }
2477
2478 return cache_hits == shaders;
2479 }
2480
2481 static VkResult
anv_pipeline_compile_ray_tracing(struct anv_ray_tracing_pipeline *pipeline, struct vk_pipeline_cache *cache, const VkRayTracingPipelineCreateInfoKHR *info)2482 anv_pipeline_compile_ray_tracing(struct anv_ray_tracing_pipeline *pipeline,
2483 struct vk_pipeline_cache *cache,
2484 const VkRayTracingPipelineCreateInfoKHR *info)
2485 {
2486 const struct intel_device_info *devinfo = &pipeline->base.device->info;
2487 VkResult result;
2488
2489 VkPipelineCreationFeedback pipeline_feedback = {
2490 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
2491 };
2492 int64_t pipeline_start = os_time_get_nano();
2493
2494 void *pipeline_ctx = ralloc_context(NULL);
2495
2496 struct anv_pipeline_stage *stages =
2497 anv_pipeline_init_ray_tracing_stages(pipeline, info, pipeline_ctx);
2498
2499 ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
2500
2501 const bool skip_cache_lookup =
2502 (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
2503
2504 uint32_t stack_max[MESA_VULKAN_SHADER_STAGES] = {};
2505
2506 if (!skip_cache_lookup &&
2507 anv_pipeline_load_cached_shaders(pipeline, cache, info, stages, stack_max)) {
2508 pipeline_feedback.flags |=
2509 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
2510 goto done;
2511 }
2512
2513 if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT) {
2514 ralloc_free(pipeline_ctx);
2515 return VK_PIPELINE_COMPILE_REQUIRED;
2516 }
2517
2518 for (uint32_t i = 0; i < info->stageCount; i++) {
2519 if (stages[i].info == NULL)
2520 continue;
2521
2522 int64_t stage_start = os_time_get_nano();
2523
2524 stages[i].nir = anv_pipeline_stage_get_nir(&pipeline->base, cache,
2525 pipeline_ctx, &stages[i]);
2526 if (stages[i].nir == NULL) {
2527 ralloc_free(pipeline_ctx);
2528 return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
2529 }
2530
2531 anv_pipeline_lower_nir(&pipeline->base, pipeline_ctx, &stages[i], layout);
2532
2533 stages[i].feedback.duration += os_time_get_nano() - stage_start;
2534 }
2535
2536 for (uint32_t i = 0; i < info->stageCount; i++) {
2537 if (stages[i].info == NULL)
2538 continue;
2539
2540 /* Shader found in cache already. */
2541 if (stages[i].bin != NULL)
2542 continue;
2543
2544 /* We handle intersection shaders as part of the group */
2545 if (stages[i].stage == MESA_SHADER_INTERSECTION)
2546 continue;
2547
2548 int64_t stage_start = os_time_get_nano();
2549
2550 void *stage_ctx = ralloc_context(pipeline_ctx);
2551
2552 nir_shader *nir = nir_shader_clone(stage_ctx, stages[i].nir);
2553 switch (stages[i].stage) {
2554 case MESA_SHADER_RAYGEN:
2555 brw_nir_lower_raygen(nir);
2556 break;
2557
2558 case MESA_SHADER_ANY_HIT:
2559 brw_nir_lower_any_hit(nir, devinfo);
2560 break;
2561
2562 case MESA_SHADER_CLOSEST_HIT:
2563 brw_nir_lower_closest_hit(nir);
2564 break;
2565
2566 case MESA_SHADER_MISS:
2567 brw_nir_lower_miss(nir);
2568 break;
2569
2570 case MESA_SHADER_INTERSECTION:
2571 unreachable("These are handled later");
2572
2573 case MESA_SHADER_CALLABLE:
2574 brw_nir_lower_callable(nir);
2575 break;
2576
2577 default:
2578 unreachable("Invalid ray-tracing shader stage");
2579 }
2580
2581 result = compile_upload_rt_shader(pipeline, cache, nir, &stages[i],
2582 &stages[i].bin, stage_ctx);
2583 if (result != VK_SUCCESS) {
2584 ralloc_free(pipeline_ctx);
2585 return result;
2586 }
2587
2588 uint32_t stack_size =
2589 brw_bs_prog_data_const(stages[i].bin->prog_data)->max_stack_size;
2590 stack_max[stages[i].stage] = MAX2(stack_max[stages[i].stage], stack_size);
2591
2592 ralloc_free(stage_ctx);
2593
2594 stages[i].feedback.duration += os_time_get_nano() - stage_start;
2595 }
2596
2597 for (uint32_t i = 0; i < info->groupCount; i++) {
2598 const VkRayTracingShaderGroupCreateInfoKHR *ginfo = &info->pGroups[i];
2599 struct anv_rt_shader_group *group = &pipeline->groups[i];
2600 group->type = ginfo->type;
2601 switch (ginfo->type) {
2602 case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR:
2603 assert(ginfo->generalShader < info->stageCount);
2604 group->general = stages[ginfo->generalShader].bin;
2605 break;
2606
2607 case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR:
2608 if (ginfo->anyHitShader < info->stageCount)
2609 group->any_hit = stages[ginfo->anyHitShader].bin;
2610
2611 if (ginfo->closestHitShader < info->stageCount)
2612 group->closest_hit = stages[ginfo->closestHitShader].bin;
2613 break;
2614
2615 case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR: {
2616 if (ginfo->closestHitShader < info->stageCount)
2617 group->closest_hit = stages[ginfo->closestHitShader].bin;
2618
2619 uint32_t intersection_idx = info->pGroups[i].intersectionShader;
2620 assert(intersection_idx < info->stageCount);
2621
2622 /* Only compile this stage if not already found in the cache. */
2623 if (stages[intersection_idx].bin == NULL) {
2624 /* The any-hit and intersection shader have to be combined */
2625 uint32_t any_hit_idx = info->pGroups[i].anyHitShader;
2626 const nir_shader *any_hit = NULL;
2627 if (any_hit_idx < info->stageCount)
2628 any_hit = stages[any_hit_idx].nir;
2629
2630 void *group_ctx = ralloc_context(pipeline_ctx);
2631 nir_shader *intersection =
2632 nir_shader_clone(group_ctx, stages[intersection_idx].nir);
2633
2634 brw_nir_lower_combined_intersection_any_hit(intersection, any_hit,
2635 devinfo);
2636
2637 result = compile_upload_rt_shader(pipeline, cache,
2638 intersection,
2639 &stages[intersection_idx],
2640 &group->intersection,
2641 group_ctx);
2642 ralloc_free(group_ctx);
2643 if (result != VK_SUCCESS)
2644 return result;
2645 } else {
2646 group->intersection = stages[intersection_idx].bin;
2647 }
2648
2649 uint32_t stack_size =
2650 brw_bs_prog_data_const(group->intersection->prog_data)->max_stack_size;
2651 stack_max[MESA_SHADER_INTERSECTION] =
2652 MAX2(stack_max[MESA_SHADER_INTERSECTION], stack_size);
2653
2654 break;
2655 }
2656
2657 default:
2658 unreachable("Invalid ray tracing shader group type");
2659 }
2660 }
2661
2662 done:
2663 ralloc_free(pipeline_ctx);
2664
2665 anv_pipeline_compute_ray_tracing_stacks(pipeline, info, stack_max);
2666
2667 pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
2668
2669 const VkPipelineCreationFeedbackCreateInfo *create_feedback =
2670 vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
2671 if (create_feedback) {
2672 *create_feedback->pPipelineCreationFeedback = pipeline_feedback;
2673
2674 uint32_t stage_count = create_feedback->pipelineStageCreationFeedbackCount;
2675 assert(stage_count == 0 || info->stageCount == stage_count);
2676 for (uint32_t i = 0; i < stage_count; i++) {
2677 gl_shader_stage s = vk_to_mesa_shader_stage(info->pStages[i].stage);
2678 create_feedback->pPipelineStageCreationFeedbacks[i] = stages[s].feedback;
2679 }
2680 }
2681
2682 return VK_SUCCESS;
2683 }
2684
2685 VkResult
anv_device_init_rt_shaders(struct anv_device *device)2686 anv_device_init_rt_shaders(struct anv_device *device)
2687 {
2688 if (!device->vk.enabled_extensions.KHR_ray_tracing_pipeline)
2689 return VK_SUCCESS;
2690
2691 bool cache_hit;
2692
2693 struct brw_rt_trampoline {
2694 char name[16];
2695 struct brw_cs_prog_key key;
2696 } trampoline_key = {
2697 .name = "rt-trampoline",
2698 };
2699 device->rt_trampoline =
2700 anv_device_search_for_kernel(device, device->internal_cache,
2701 &trampoline_key, sizeof(trampoline_key),
2702 &cache_hit);
2703 if (device->rt_trampoline == NULL) {
2704
2705 void *tmp_ctx = ralloc_context(NULL);
2706 nir_shader *trampoline_nir =
2707 brw_nir_create_raygen_trampoline(device->physical->compiler, tmp_ctx);
2708
2709 trampoline_nir->info.subgroup_size = SUBGROUP_SIZE_REQUIRE_8;
2710
2711 struct anv_pipeline_bind_map bind_map = {
2712 .surface_count = 0,
2713 .sampler_count = 0,
2714 };
2715 uint32_t dummy_params[4] = { 0, };
2716 struct brw_cs_prog_data trampoline_prog_data = {
2717 .base.nr_params = 4,
2718 .base.param = dummy_params,
2719 .uses_inline_data = true,
2720 .uses_btd_stack_ids = true,
2721 };
2722 struct brw_compile_cs_params params = {
2723 .nir = trampoline_nir,
2724 .key = &trampoline_key.key,
2725 .prog_data = &trampoline_prog_data,
2726 .log_data = device,
2727 };
2728 const unsigned *tramp_data =
2729 brw_compile_cs(device->physical->compiler, tmp_ctx, ¶ms);
2730
2731 device->rt_trampoline =
2732 anv_device_upload_kernel(device, device->internal_cache,
2733 MESA_SHADER_COMPUTE,
2734 &trampoline_key, sizeof(trampoline_key),
2735 tramp_data,
2736 trampoline_prog_data.base.program_size,
2737 &trampoline_prog_data.base,
2738 sizeof(trampoline_prog_data),
2739 NULL, 0, NULL, &bind_map);
2740
2741 ralloc_free(tmp_ctx);
2742
2743 if (device->rt_trampoline == NULL)
2744 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2745 }
2746
2747 /* The cache already has a reference and it's not going anywhere so there
2748 * is no need to hold a second reference.
2749 */
2750 anv_shader_bin_unref(device, device->rt_trampoline);
2751
2752 struct brw_rt_trivial_return {
2753 char name[16];
2754 struct brw_bs_prog_key key;
2755 } return_key = {
2756 .name = "rt-trivial-ret",
2757 };
2758 device->rt_trivial_return =
2759 anv_device_search_for_kernel(device, device->internal_cache,
2760 &return_key, sizeof(return_key),
2761 &cache_hit);
2762 if (device->rt_trivial_return == NULL) {
2763 void *tmp_ctx = ralloc_context(NULL);
2764 nir_shader *trivial_return_nir =
2765 brw_nir_create_trivial_return_shader(device->physical->compiler, tmp_ctx);
2766
2767 NIR_PASS_V(trivial_return_nir, brw_nir_lower_rt_intrinsics, &device->info);
2768
2769 struct anv_pipeline_bind_map bind_map = {
2770 .surface_count = 0,
2771 .sampler_count = 0,
2772 };
2773 struct brw_bs_prog_data return_prog_data = { 0, };
2774 struct brw_compile_bs_params params = {
2775 .nir = trivial_return_nir,
2776 .key = &return_key.key,
2777 .prog_data = &return_prog_data,
2778
2779 .log_data = device,
2780 };
2781 const unsigned *return_data =
2782 brw_compile_bs(device->physical->compiler, tmp_ctx, ¶ms);
2783
2784 device->rt_trivial_return =
2785 anv_device_upload_kernel(device, device->internal_cache,
2786 MESA_SHADER_CALLABLE,
2787 &return_key, sizeof(return_key),
2788 return_data, return_prog_data.base.program_size,
2789 &return_prog_data.base, sizeof(return_prog_data),
2790 NULL, 0, NULL, &bind_map);
2791
2792 ralloc_free(tmp_ctx);
2793
2794 if (device->rt_trivial_return == NULL)
2795 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2796 }
2797
2798 /* The cache already has a reference and it's not going anywhere so there
2799 * is no need to hold a second reference.
2800 */
2801 anv_shader_bin_unref(device, device->rt_trivial_return);
2802
2803 return VK_SUCCESS;
2804 }
2805
2806 void
anv_device_finish_rt_shaders(struct anv_device *device)2807 anv_device_finish_rt_shaders(struct anv_device *device)
2808 {
2809 if (!device->vk.enabled_extensions.KHR_ray_tracing_pipeline)
2810 return;
2811 }
2812
2813 static VkResult
anv_ray_tracing_pipeline_init(struct anv_ray_tracing_pipeline *pipeline, struct anv_device *device, struct vk_pipeline_cache *cache, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, const VkAllocationCallbacks *alloc)2814 anv_ray_tracing_pipeline_init(struct anv_ray_tracing_pipeline *pipeline,
2815 struct anv_device *device,
2816 struct vk_pipeline_cache *cache,
2817 const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
2818 const VkAllocationCallbacks *alloc)
2819 {
2820 VkResult result;
2821
2822 util_dynarray_init(&pipeline->shaders, pipeline->base.mem_ctx);
2823
2824 result = anv_pipeline_compile_ray_tracing(pipeline, cache, pCreateInfo);
2825 if (result != VK_SUCCESS)
2826 goto fail;
2827
2828 anv_pipeline_setup_l3_config(&pipeline->base, /* needs_slm */ false);
2829
2830 return VK_SUCCESS;
2831
2832 fail:
2833 util_dynarray_foreach(&pipeline->shaders,
2834 struct anv_shader_bin *, shader) {
2835 anv_shader_bin_unref(device, *shader);
2836 }
2837 return result;
2838 }
2839
2840 static void
assert_rt_stage_index_valid(const VkRayTracingPipelineCreateInfoKHR* pCreateInfo, uint32_t stage_idx, VkShaderStageFlags valid_stages)2841 assert_rt_stage_index_valid(const VkRayTracingPipelineCreateInfoKHR* pCreateInfo,
2842 uint32_t stage_idx,
2843 VkShaderStageFlags valid_stages)
2844 {
2845 if (stage_idx == VK_SHADER_UNUSED_KHR)
2846 return;
2847
2848 assert(stage_idx <= pCreateInfo->stageCount);
2849 assert(util_bitcount(pCreateInfo->pStages[stage_idx].stage) == 1);
2850 assert(pCreateInfo->pStages[stage_idx].stage & valid_stages);
2851 }
2852
2853 static VkResult
anv_ray_tracing_pipeline_create( VkDevice _device, struct vk_pipeline_cache * cache, const VkRayTracingPipelineCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkPipeline* pPipeline)2854 anv_ray_tracing_pipeline_create(
2855 VkDevice _device,
2856 struct vk_pipeline_cache * cache,
2857 const VkRayTracingPipelineCreateInfoKHR* pCreateInfo,
2858 const VkAllocationCallbacks* pAllocator,
2859 VkPipeline* pPipeline)
2860 {
2861 ANV_FROM_HANDLE(anv_device, device, _device);
2862 VkResult result;
2863
2864 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR);
2865
2866 VK_MULTIALLOC(ma);
2867 VK_MULTIALLOC_DECL(&ma, struct anv_ray_tracing_pipeline, pipeline, 1);
2868 VK_MULTIALLOC_DECL(&ma, struct anv_rt_shader_group, groups, pCreateInfo->groupCount);
2869 if (!vk_multialloc_zalloc2(&ma, &device->vk.alloc, pAllocator,
2870 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE))
2871 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2872
2873 result = anv_pipeline_init(&pipeline->base, device,
2874 ANV_PIPELINE_RAY_TRACING, pCreateInfo->flags,
2875 pAllocator);
2876 if (result != VK_SUCCESS) {
2877 vk_free2(&device->vk.alloc, pAllocator, pipeline);
2878 return result;
2879 }
2880
2881 pipeline->group_count = pCreateInfo->groupCount;
2882 pipeline->groups = groups;
2883
2884 ASSERTED const VkShaderStageFlags ray_tracing_stages =
2885 VK_SHADER_STAGE_RAYGEN_BIT_KHR |
2886 VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
2887 VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR |
2888 VK_SHADER_STAGE_MISS_BIT_KHR |
2889 VK_SHADER_STAGE_INTERSECTION_BIT_KHR |
2890 VK_SHADER_STAGE_CALLABLE_BIT_KHR;
2891
2892 for (uint32_t i = 0; i < pCreateInfo->stageCount; i++)
2893 assert((pCreateInfo->pStages[i].stage & ~ray_tracing_stages) == 0);
2894
2895 for (uint32_t i = 0; i < pCreateInfo->groupCount; i++) {
2896 const VkRayTracingShaderGroupCreateInfoKHR *ginfo =
2897 &pCreateInfo->pGroups[i];
2898 assert_rt_stage_index_valid(pCreateInfo, ginfo->generalShader,
2899 VK_SHADER_STAGE_RAYGEN_BIT_KHR |
2900 VK_SHADER_STAGE_MISS_BIT_KHR |
2901 VK_SHADER_STAGE_CALLABLE_BIT_KHR);
2902 assert_rt_stage_index_valid(pCreateInfo, ginfo->closestHitShader,
2903 VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR);
2904 assert_rt_stage_index_valid(pCreateInfo, ginfo->anyHitShader,
2905 VK_SHADER_STAGE_ANY_HIT_BIT_KHR);
2906 assert_rt_stage_index_valid(pCreateInfo, ginfo->intersectionShader,
2907 VK_SHADER_STAGE_INTERSECTION_BIT_KHR);
2908 switch (ginfo->type) {
2909 case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR:
2910 assert(ginfo->generalShader < pCreateInfo->stageCount);
2911 assert(ginfo->anyHitShader == VK_SHADER_UNUSED_KHR);
2912 assert(ginfo->closestHitShader == VK_SHADER_UNUSED_KHR);
2913 assert(ginfo->intersectionShader == VK_SHADER_UNUSED_KHR);
2914 break;
2915
2916 case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR:
2917 assert(ginfo->generalShader == VK_SHADER_UNUSED_KHR);
2918 assert(ginfo->intersectionShader == VK_SHADER_UNUSED_KHR);
2919 break;
2920
2921 case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR:
2922 assert(ginfo->generalShader == VK_SHADER_UNUSED_KHR);
2923 break;
2924
2925 default:
2926 unreachable("Invalid ray-tracing shader group type");
2927 }
2928 }
2929
2930 result = anv_ray_tracing_pipeline_init(pipeline, device, cache,
2931 pCreateInfo, pAllocator);
2932 if (result != VK_SUCCESS) {
2933 anv_pipeline_finish(&pipeline->base, device, pAllocator);
2934 vk_free2(&device->vk.alloc, pAllocator, pipeline);
2935 return result;
2936 }
2937
2938 anv_genX(&device->info, ray_tracing_pipeline_emit)(pipeline);
2939
2940 *pPipeline = anv_pipeline_to_handle(&pipeline->base);
2941
2942 return pipeline->base.batch.status;
2943 }
2944
2945 VkResult
anv_CreateRayTracingPipelinesKHR( VkDevice _device, VkDeferredOperationKHR deferredOperation, VkPipelineCache pipelineCache, uint32_t createInfoCount, const VkRayTracingPipelineCreateInfoKHR* pCreateInfos, const VkAllocationCallbacks* pAllocator, VkPipeline* pPipelines)2946 anv_CreateRayTracingPipelinesKHR(
2947 VkDevice _device,
2948 VkDeferredOperationKHR deferredOperation,
2949 VkPipelineCache pipelineCache,
2950 uint32_t createInfoCount,
2951 const VkRayTracingPipelineCreateInfoKHR* pCreateInfos,
2952 const VkAllocationCallbacks* pAllocator,
2953 VkPipeline* pPipelines)
2954 {
2955 ANV_FROM_HANDLE(vk_pipeline_cache, pipeline_cache, pipelineCache);
2956
2957 VkResult result = VK_SUCCESS;
2958
2959 unsigned i;
2960 for (i = 0; i < createInfoCount; i++) {
2961 VkResult res = anv_ray_tracing_pipeline_create(_device, pipeline_cache,
2962 &pCreateInfos[i],
2963 pAllocator, &pPipelines[i]);
2964
2965 if (res == VK_SUCCESS)
2966 continue;
2967
2968 /* Bail out on the first error as it is not obvious what error should be
2969 * report upon 2 different failures. */
2970 result = res;
2971 if (result != VK_PIPELINE_COMPILE_REQUIRED)
2972 break;
2973
2974 pPipelines[i] = VK_NULL_HANDLE;
2975
2976 if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
2977 break;
2978 }
2979
2980 for (; i < createInfoCount; i++)
2981 pPipelines[i] = VK_NULL_HANDLE;
2982
2983 return result;
2984 }
2985
2986 #define WRITE_STR(field, ...) ({ \
2987 memset(field, 0, sizeof(field)); \
2988 UNUSED int i = snprintf(field, sizeof(field), __VA_ARGS__); \
2989 assert(i > 0 && i < sizeof(field)); \
2990 })
2991
anv_GetPipelineExecutablePropertiesKHR( VkDevice device, const VkPipelineInfoKHR* pPipelineInfo, uint32_t* pExecutableCount, VkPipelineExecutablePropertiesKHR* pProperties)2992 VkResult anv_GetPipelineExecutablePropertiesKHR(
2993 VkDevice device,
2994 const VkPipelineInfoKHR* pPipelineInfo,
2995 uint32_t* pExecutableCount,
2996 VkPipelineExecutablePropertiesKHR* pProperties)
2997 {
2998 ANV_FROM_HANDLE(anv_pipeline, pipeline, pPipelineInfo->pipeline);
2999 VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutablePropertiesKHR, out,
3000 pProperties, pExecutableCount);
3001
3002 util_dynarray_foreach (&pipeline->executables, struct anv_pipeline_executable, exe) {
3003 vk_outarray_append_typed(VkPipelineExecutablePropertiesKHR, &out, props) {
3004 gl_shader_stage stage = exe->stage;
3005 props->stages = mesa_to_vk_shader_stage(stage);
3006
3007 unsigned simd_width = exe->stats.dispatch_width;
3008 if (stage == MESA_SHADER_FRAGMENT) {
3009 WRITE_STR(props->name, "%s%d %s",
3010 simd_width ? "SIMD" : "vec",
3011 simd_width ? simd_width : 4,
3012 _mesa_shader_stage_to_string(stage));
3013 } else {
3014 WRITE_STR(props->name, "%s", _mesa_shader_stage_to_string(stage));
3015 }
3016 WRITE_STR(props->description, "%s%d %s shader",
3017 simd_width ? "SIMD" : "vec",
3018 simd_width ? simd_width : 4,
3019 _mesa_shader_stage_to_string(stage));
3020
3021 /* The compiler gives us a dispatch width of 0 for vec4 but Vulkan
3022 * wants a subgroup size of 1.
3023 */
3024 props->subgroupSize = MAX2(simd_width, 1);
3025 }
3026 }
3027
3028 return vk_outarray_status(&out);
3029 }
3030
3031 static const struct anv_pipeline_executable *
anv_pipeline_get_executable(struct anv_pipeline *pipeline, uint32_t index)3032 anv_pipeline_get_executable(struct anv_pipeline *pipeline, uint32_t index)
3033 {
3034 assert(index < util_dynarray_num_elements(&pipeline->executables,
3035 struct anv_pipeline_executable));
3036 return util_dynarray_element(
3037 &pipeline->executables, struct anv_pipeline_executable, index);
3038 }
3039
anv_GetPipelineExecutableStatisticsKHR( VkDevice device, const VkPipelineExecutableInfoKHR* pExecutableInfo, uint32_t* pStatisticCount, VkPipelineExecutableStatisticKHR* pStatistics)3040 VkResult anv_GetPipelineExecutableStatisticsKHR(
3041 VkDevice device,
3042 const VkPipelineExecutableInfoKHR* pExecutableInfo,
3043 uint32_t* pStatisticCount,
3044 VkPipelineExecutableStatisticKHR* pStatistics)
3045 {
3046 ANV_FROM_HANDLE(anv_pipeline, pipeline, pExecutableInfo->pipeline);
3047 VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableStatisticKHR, out,
3048 pStatistics, pStatisticCount);
3049
3050 const struct anv_pipeline_executable *exe =
3051 anv_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
3052
3053 const struct brw_stage_prog_data *prog_data;
3054 switch (pipeline->type) {
3055 case ANV_PIPELINE_GRAPHICS: {
3056 prog_data = anv_pipeline_to_graphics(pipeline)->shaders[exe->stage]->prog_data;
3057 break;
3058 }
3059 case ANV_PIPELINE_COMPUTE: {
3060 prog_data = anv_pipeline_to_compute(pipeline)->cs->prog_data;
3061 break;
3062 }
3063 case ANV_PIPELINE_RAY_TRACING: {
3064 struct anv_shader_bin **shader =
3065 util_dynarray_element(&anv_pipeline_to_ray_tracing(pipeline)->shaders,
3066 struct anv_shader_bin *,
3067 pExecutableInfo->executableIndex);
3068 prog_data = (*shader)->prog_data;
3069 break;
3070 }
3071 default:
3072 unreachable("invalid pipeline type");
3073 }
3074
3075 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3076 WRITE_STR(stat->name, "Instruction Count");
3077 WRITE_STR(stat->description,
3078 "Number of GEN instructions in the final generated "
3079 "shader executable.");
3080 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3081 stat->value.u64 = exe->stats.instructions;
3082 }
3083
3084 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3085 WRITE_STR(stat->name, "SEND Count");
3086 WRITE_STR(stat->description,
3087 "Number of instructions in the final generated shader "
3088 "executable which access external units such as the "
3089 "constant cache or the sampler.");
3090 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3091 stat->value.u64 = exe->stats.sends;
3092 }
3093
3094 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3095 WRITE_STR(stat->name, "Loop Count");
3096 WRITE_STR(stat->description,
3097 "Number of loops (not unrolled) in the final generated "
3098 "shader executable.");
3099 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3100 stat->value.u64 = exe->stats.loops;
3101 }
3102
3103 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3104 WRITE_STR(stat->name, "Cycle Count");
3105 WRITE_STR(stat->description,
3106 "Estimate of the number of EU cycles required to execute "
3107 "the final generated executable. This is an estimate only "
3108 "and may vary greatly from actual run-time performance.");
3109 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3110 stat->value.u64 = exe->stats.cycles;
3111 }
3112
3113 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3114 WRITE_STR(stat->name, "Spill Count");
3115 WRITE_STR(stat->description,
3116 "Number of scratch spill operations. This gives a rough "
3117 "estimate of the cost incurred due to spilling temporary "
3118 "values to memory. If this is non-zero, you may want to "
3119 "adjust your shader to reduce register pressure.");
3120 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3121 stat->value.u64 = exe->stats.spills;
3122 }
3123
3124 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3125 WRITE_STR(stat->name, "Fill Count");
3126 WRITE_STR(stat->description,
3127 "Number of scratch fill operations. This gives a rough "
3128 "estimate of the cost incurred due to spilling temporary "
3129 "values to memory. If this is non-zero, you may want to "
3130 "adjust your shader to reduce register pressure.");
3131 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3132 stat->value.u64 = exe->stats.fills;
3133 }
3134
3135 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3136 WRITE_STR(stat->name, "Scratch Memory Size");
3137 WRITE_STR(stat->description,
3138 "Number of bytes of scratch memory required by the "
3139 "generated shader executable. If this is non-zero, you "
3140 "may want to adjust your shader to reduce register "
3141 "pressure.");
3142 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3143 stat->value.u64 = prog_data->total_scratch;
3144 }
3145
3146 if (gl_shader_stage_uses_workgroup(exe->stage)) {
3147 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3148 WRITE_STR(stat->name, "Workgroup Memory Size");
3149 WRITE_STR(stat->description,
3150 "Number of bytes of workgroup shared memory used by this "
3151 "shader including any padding.");
3152 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3153 stat->value.u64 = prog_data->total_shared;
3154 }
3155 }
3156
3157 return vk_outarray_status(&out);
3158 }
3159
3160 static bool
write_ir_text(VkPipelineExecutableInternalRepresentationKHR* ir, const char *data)3161 write_ir_text(VkPipelineExecutableInternalRepresentationKHR* ir,
3162 const char *data)
3163 {
3164 ir->isText = VK_TRUE;
3165
3166 size_t data_len = strlen(data) + 1;
3167
3168 if (ir->pData == NULL) {
3169 ir->dataSize = data_len;
3170 return true;
3171 }
3172
3173 strncpy(ir->pData, data, ir->dataSize);
3174 if (ir->dataSize < data_len)
3175 return false;
3176
3177 ir->dataSize = data_len;
3178 return true;
3179 }
3180
anv_GetPipelineExecutableInternalRepresentationsKHR( VkDevice device, const VkPipelineExecutableInfoKHR* pExecutableInfo, uint32_t* pInternalRepresentationCount, VkPipelineExecutableInternalRepresentationKHR* pInternalRepresentations)3181 VkResult anv_GetPipelineExecutableInternalRepresentationsKHR(
3182 VkDevice device,
3183 const VkPipelineExecutableInfoKHR* pExecutableInfo,
3184 uint32_t* pInternalRepresentationCount,
3185 VkPipelineExecutableInternalRepresentationKHR* pInternalRepresentations)
3186 {
3187 ANV_FROM_HANDLE(anv_pipeline, pipeline, pExecutableInfo->pipeline);
3188 VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableInternalRepresentationKHR, out,
3189 pInternalRepresentations, pInternalRepresentationCount);
3190 bool incomplete_text = false;
3191
3192 const struct anv_pipeline_executable *exe =
3193 anv_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
3194
3195 if (exe->nir) {
3196 vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR, &out, ir) {
3197 WRITE_STR(ir->name, "Final NIR");
3198 WRITE_STR(ir->description,
3199 "Final NIR before going into the back-end compiler");
3200
3201 if (!write_ir_text(ir, exe->nir))
3202 incomplete_text = true;
3203 }
3204 }
3205
3206 if (exe->disasm) {
3207 vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR, &out, ir) {
3208 WRITE_STR(ir->name, "GEN Assembly");
3209 WRITE_STR(ir->description,
3210 "Final GEN assembly for the generated shader binary");
3211
3212 if (!write_ir_text(ir, exe->disasm))
3213 incomplete_text = true;
3214 }
3215 }
3216
3217 return incomplete_text ? VK_INCOMPLETE : vk_outarray_status(&out);
3218 }
3219
3220 VkResult
anv_GetRayTracingShaderGroupHandlesKHR( VkDevice _device, VkPipeline _pipeline, uint32_t firstGroup, uint32_t groupCount, size_t dataSize, void* pData)3221 anv_GetRayTracingShaderGroupHandlesKHR(
3222 VkDevice _device,
3223 VkPipeline _pipeline,
3224 uint32_t firstGroup,
3225 uint32_t groupCount,
3226 size_t dataSize,
3227 void* pData)
3228 {
3229 ANV_FROM_HANDLE(anv_device, device, _device);
3230 ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
3231
3232 if (pipeline->type != ANV_PIPELINE_RAY_TRACING)
3233 return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
3234
3235 struct anv_ray_tracing_pipeline *rt_pipeline =
3236 anv_pipeline_to_ray_tracing(pipeline);
3237
3238 for (uint32_t i = 0; i < groupCount; i++) {
3239 struct anv_rt_shader_group *group = &rt_pipeline->groups[firstGroup + i];
3240 memcpy(pData, group->handle, sizeof(group->handle));
3241 pData += sizeof(group->handle);
3242 }
3243
3244 return VK_SUCCESS;
3245 }
3246
3247 VkResult
anv_GetRayTracingCaptureReplayShaderGroupHandlesKHR( VkDevice _device, VkPipeline pipeline, uint32_t firstGroup, uint32_t groupCount, size_t dataSize, void* pData)3248 anv_GetRayTracingCaptureReplayShaderGroupHandlesKHR(
3249 VkDevice _device,
3250 VkPipeline pipeline,
3251 uint32_t firstGroup,
3252 uint32_t groupCount,
3253 size_t dataSize,
3254 void* pData)
3255 {
3256 ANV_FROM_HANDLE(anv_device, device, _device);
3257 unreachable("Unimplemented");
3258 return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
3259 }
3260
3261 VkDeviceSize
anv_GetRayTracingShaderGroupStackSizeKHR( VkDevice device, VkPipeline _pipeline, uint32_t group, VkShaderGroupShaderKHR groupShader)3262 anv_GetRayTracingShaderGroupStackSizeKHR(
3263 VkDevice device,
3264 VkPipeline _pipeline,
3265 uint32_t group,
3266 VkShaderGroupShaderKHR groupShader)
3267 {
3268 ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
3269 assert(pipeline->type == ANV_PIPELINE_RAY_TRACING);
3270
3271 struct anv_ray_tracing_pipeline *rt_pipeline =
3272 anv_pipeline_to_ray_tracing(pipeline);
3273
3274 assert(group < rt_pipeline->group_count);
3275
3276 struct anv_shader_bin *bin;
3277 switch (groupShader) {
3278 case VK_SHADER_GROUP_SHADER_GENERAL_KHR:
3279 bin = rt_pipeline->groups[group].general;
3280 break;
3281
3282 case VK_SHADER_GROUP_SHADER_CLOSEST_HIT_KHR:
3283 bin = rt_pipeline->groups[group].closest_hit;
3284 break;
3285
3286 case VK_SHADER_GROUP_SHADER_ANY_HIT_KHR:
3287 bin = rt_pipeline->groups[group].any_hit;
3288 break;
3289
3290 case VK_SHADER_GROUP_SHADER_INTERSECTION_KHR:
3291 bin = rt_pipeline->groups[group].intersection;
3292 break;
3293
3294 default:
3295 unreachable("Invalid VkShaderGroupShader enum");
3296 }
3297
3298 if (bin == NULL)
3299 return 0;
3300
3301 return brw_bs_prog_data_const(bin->prog_data)->max_stack_size;
3302 }
3303