1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2021 Google
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21bf215546Sopenharmony_ci * IN THE SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci#include "radv_acceleration_structure.h"
25bf215546Sopenharmony_ci#include "radv_debug.h"
26bf215546Sopenharmony_ci#include "radv_meta.h"
27bf215546Sopenharmony_ci#include "radv_private.h"
28bf215546Sopenharmony_ci#include "radv_rt_common.h"
29bf215546Sopenharmony_ci#include "radv_shader.h"
30bf215546Sopenharmony_ci
31bf215546Sopenharmony_ci#include "nir/nir.h"
32bf215546Sopenharmony_ci#include "nir/nir_builder.h"
33bf215546Sopenharmony_ci#include "nir/nir_builtin_builder.h"
34bf215546Sopenharmony_ci
35bf215546Sopenharmony_cistatic VkRayTracingPipelineCreateInfoKHR
36bf215546Sopenharmony_ciradv_create_merged_rt_create_info(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo)
37bf215546Sopenharmony_ci{
38bf215546Sopenharmony_ci   VkRayTracingPipelineCreateInfoKHR local_create_info = *pCreateInfo;
39bf215546Sopenharmony_ci   uint32_t total_stages = pCreateInfo->stageCount;
40bf215546Sopenharmony_ci   uint32_t total_groups = pCreateInfo->groupCount;
41bf215546Sopenharmony_ci
42bf215546Sopenharmony_ci   if (pCreateInfo->pLibraryInfo) {
43bf215546Sopenharmony_ci      for (unsigned i = 0; i < pCreateInfo->pLibraryInfo->libraryCount; ++i) {
44bf215546Sopenharmony_ci         RADV_FROM_HANDLE(radv_pipeline, pipeline, pCreateInfo->pLibraryInfo->pLibraries[i]);
45bf215546Sopenharmony_ci         struct radv_library_pipeline *library_pipeline = radv_pipeline_to_library(pipeline);
46bf215546Sopenharmony_ci
47bf215546Sopenharmony_ci         total_stages += library_pipeline->stage_count;
48bf215546Sopenharmony_ci         total_groups += library_pipeline->group_count;
49bf215546Sopenharmony_ci      }
50bf215546Sopenharmony_ci   }
51bf215546Sopenharmony_ci   VkPipelineShaderStageCreateInfo *stages = NULL;
52bf215546Sopenharmony_ci   VkRayTracingShaderGroupCreateInfoKHR *groups = NULL;
53bf215546Sopenharmony_ci   local_create_info.stageCount = total_stages;
54bf215546Sopenharmony_ci   local_create_info.groupCount = total_groups;
55bf215546Sopenharmony_ci   local_create_info.pStages = stages =
56bf215546Sopenharmony_ci      malloc(sizeof(VkPipelineShaderStageCreateInfo) * total_stages);
57bf215546Sopenharmony_ci   local_create_info.pGroups = groups =
58bf215546Sopenharmony_ci      malloc(sizeof(VkRayTracingShaderGroupCreateInfoKHR) * total_groups);
59bf215546Sopenharmony_ci   if (!local_create_info.pStages || !local_create_info.pGroups)
60bf215546Sopenharmony_ci      return local_create_info;
61bf215546Sopenharmony_ci
62bf215546Sopenharmony_ci   total_stages = pCreateInfo->stageCount;
63bf215546Sopenharmony_ci   total_groups = pCreateInfo->groupCount;
64bf215546Sopenharmony_ci   for (unsigned j = 0; j < pCreateInfo->stageCount; ++j)
65bf215546Sopenharmony_ci      stages[j] = pCreateInfo->pStages[j];
66bf215546Sopenharmony_ci   for (unsigned j = 0; j < pCreateInfo->groupCount; ++j)
67bf215546Sopenharmony_ci      groups[j] = pCreateInfo->pGroups[j];
68bf215546Sopenharmony_ci
69bf215546Sopenharmony_ci   if (pCreateInfo->pLibraryInfo) {
70bf215546Sopenharmony_ci      for (unsigned i = 0; i < pCreateInfo->pLibraryInfo->libraryCount; ++i) {
71bf215546Sopenharmony_ci         RADV_FROM_HANDLE(radv_pipeline, pipeline, pCreateInfo->pLibraryInfo->pLibraries[i]);
72bf215546Sopenharmony_ci         struct radv_library_pipeline *library_pipeline = radv_pipeline_to_library(pipeline);
73bf215546Sopenharmony_ci
74bf215546Sopenharmony_ci         for (unsigned j = 0; j < library_pipeline->stage_count; ++j)
75bf215546Sopenharmony_ci            stages[total_stages + j] = library_pipeline->stages[j];
76bf215546Sopenharmony_ci         for (unsigned j = 0; j < library_pipeline->group_count; ++j) {
77bf215546Sopenharmony_ci            VkRayTracingShaderGroupCreateInfoKHR *dst = &groups[total_groups + j];
78bf215546Sopenharmony_ci            *dst = library_pipeline->groups[j];
79bf215546Sopenharmony_ci            if (dst->generalShader != VK_SHADER_UNUSED_KHR)
80bf215546Sopenharmony_ci               dst->generalShader += total_stages;
81bf215546Sopenharmony_ci            if (dst->closestHitShader != VK_SHADER_UNUSED_KHR)
82bf215546Sopenharmony_ci               dst->closestHitShader += total_stages;
83bf215546Sopenharmony_ci            if (dst->anyHitShader != VK_SHADER_UNUSED_KHR)
84bf215546Sopenharmony_ci               dst->anyHitShader += total_stages;
85bf215546Sopenharmony_ci            if (dst->intersectionShader != VK_SHADER_UNUSED_KHR)
86bf215546Sopenharmony_ci               dst->intersectionShader += total_stages;
87bf215546Sopenharmony_ci         }
88bf215546Sopenharmony_ci         total_stages += library_pipeline->stage_count;
89bf215546Sopenharmony_ci         total_groups += library_pipeline->group_count;
90bf215546Sopenharmony_ci      }
91bf215546Sopenharmony_ci   }
92bf215546Sopenharmony_ci   return local_create_info;
93bf215546Sopenharmony_ci}
94bf215546Sopenharmony_ci
95bf215546Sopenharmony_cistatic VkResult
96bf215546Sopenharmony_ciradv_rt_pipeline_library_create(VkDevice _device, VkPipelineCache _cache,
97bf215546Sopenharmony_ci                                const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
98bf215546Sopenharmony_ci                                const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline)
99bf215546Sopenharmony_ci{
100bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_device, device, _device);
101bf215546Sopenharmony_ci   struct radv_library_pipeline *pipeline;
102bf215546Sopenharmony_ci
103bf215546Sopenharmony_ci   pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
104bf215546Sopenharmony_ci                         VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
105bf215546Sopenharmony_ci   if (pipeline == NULL)
106bf215546Sopenharmony_ci      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
107bf215546Sopenharmony_ci
108bf215546Sopenharmony_ci   radv_pipeline_init(device, &pipeline->base, RADV_PIPELINE_LIBRARY);
109bf215546Sopenharmony_ci
110bf215546Sopenharmony_ci   VkRayTracingPipelineCreateInfoKHR local_create_info =
111bf215546Sopenharmony_ci      radv_create_merged_rt_create_info(pCreateInfo);
112bf215546Sopenharmony_ci   if (!local_create_info.pStages || !local_create_info.pGroups)
113bf215546Sopenharmony_ci      goto fail;
114bf215546Sopenharmony_ci
115bf215546Sopenharmony_ci   if (local_create_info.stageCount) {
116bf215546Sopenharmony_ci      pipeline->stage_count = local_create_info.stageCount;
117bf215546Sopenharmony_ci
118bf215546Sopenharmony_ci      size_t size = sizeof(VkPipelineShaderStageCreateInfo) * local_create_info.stageCount;
119bf215546Sopenharmony_ci      pipeline->stages = malloc(size);
120bf215546Sopenharmony_ci      if (!pipeline->stages)
121bf215546Sopenharmony_ci         goto fail;
122bf215546Sopenharmony_ci
123bf215546Sopenharmony_ci      memcpy(pipeline->stages, local_create_info.pStages, size);
124bf215546Sopenharmony_ci
125bf215546Sopenharmony_ci      pipeline->hashes = malloc(sizeof(*pipeline->hashes) * local_create_info.stageCount);
126bf215546Sopenharmony_ci      if (!pipeline->hashes)
127bf215546Sopenharmony_ci         goto fail;
128bf215546Sopenharmony_ci
129bf215546Sopenharmony_ci      pipeline->identifiers = malloc(sizeof(*pipeline->identifiers) * local_create_info.stageCount);
130bf215546Sopenharmony_ci      if (!pipeline->identifiers)
131bf215546Sopenharmony_ci         goto fail;
132bf215546Sopenharmony_ci
133bf215546Sopenharmony_ci      for (uint32_t i = 0; i < local_create_info.stageCount; i++) {
134bf215546Sopenharmony_ci         RADV_FROM_HANDLE(vk_shader_module, module, pipeline->stages[i].module);
135bf215546Sopenharmony_ci
136bf215546Sopenharmony_ci         const VkPipelineShaderStageModuleIdentifierCreateInfoEXT *iinfo =
137bf215546Sopenharmony_ci            vk_find_struct_const(local_create_info.pStages[i].pNext,
138bf215546Sopenharmony_ci                                 PIPELINE_SHADER_STAGE_MODULE_IDENTIFIER_CREATE_INFO_EXT);
139bf215546Sopenharmony_ci
140bf215546Sopenharmony_ci         if (module) {
141bf215546Sopenharmony_ci            struct vk_shader_module *new_module = vk_shader_module_clone(NULL, module);
142bf215546Sopenharmony_ci            pipeline->stages[i].module = vk_shader_module_to_handle(new_module);
143bf215546Sopenharmony_ci            pipeline->stages[i].pNext = NULL;
144bf215546Sopenharmony_ci         } else {
145bf215546Sopenharmony_ci            assert(iinfo);
146bf215546Sopenharmony_ci            pipeline->identifiers[i].identifierSize =
147bf215546Sopenharmony_ci               MIN2(iinfo->identifierSize, sizeof(pipeline->hashes[i].sha1));
148bf215546Sopenharmony_ci            memcpy(pipeline->hashes[i].sha1, iinfo->pIdentifier,
149bf215546Sopenharmony_ci                   pipeline->identifiers[i].identifierSize);
150bf215546Sopenharmony_ci            pipeline->stages[i].module = VK_NULL_HANDLE;
151bf215546Sopenharmony_ci            pipeline->stages[i].pNext = &pipeline->identifiers[i];
152bf215546Sopenharmony_ci            pipeline->identifiers[i].sType =
153bf215546Sopenharmony_ci               VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_MODULE_IDENTIFIER_CREATE_INFO_EXT;
154bf215546Sopenharmony_ci            pipeline->identifiers[i].pNext = NULL;
155bf215546Sopenharmony_ci            pipeline->identifiers[i].pIdentifier = pipeline->hashes[i].sha1;
156bf215546Sopenharmony_ci         }
157bf215546Sopenharmony_ci      }
158bf215546Sopenharmony_ci   }
159bf215546Sopenharmony_ci
160bf215546Sopenharmony_ci   if (local_create_info.groupCount) {
161bf215546Sopenharmony_ci      size_t size = sizeof(VkRayTracingShaderGroupCreateInfoKHR) * local_create_info.groupCount;
162bf215546Sopenharmony_ci      pipeline->group_count = local_create_info.groupCount;
163bf215546Sopenharmony_ci      pipeline->groups = malloc(size);
164bf215546Sopenharmony_ci      if (!pipeline->groups)
165bf215546Sopenharmony_ci         goto fail;
166bf215546Sopenharmony_ci      memcpy(pipeline->groups, local_create_info.pGroups, size);
167bf215546Sopenharmony_ci   }
168bf215546Sopenharmony_ci
169bf215546Sopenharmony_ci   *pPipeline = radv_pipeline_to_handle(&pipeline->base);
170bf215546Sopenharmony_ci
171bf215546Sopenharmony_ci   free((void *)local_create_info.pGroups);
172bf215546Sopenharmony_ci   free((void *)local_create_info.pStages);
173bf215546Sopenharmony_ci   return VK_SUCCESS;
174bf215546Sopenharmony_cifail:
175bf215546Sopenharmony_ci   free(pipeline->groups);
176bf215546Sopenharmony_ci   free(pipeline->stages);
177bf215546Sopenharmony_ci   free(pipeline->hashes);
178bf215546Sopenharmony_ci   free(pipeline->identifiers);
179bf215546Sopenharmony_ci   free((void *)local_create_info.pGroups);
180bf215546Sopenharmony_ci   free((void *)local_create_info.pStages);
181bf215546Sopenharmony_ci   return VK_ERROR_OUT_OF_HOST_MEMORY;
182bf215546Sopenharmony_ci}
183bf215546Sopenharmony_ci
184bf215546Sopenharmony_ci/*
185bf215546Sopenharmony_ci * Global variables for an RT pipeline
186bf215546Sopenharmony_ci */
187bf215546Sopenharmony_cistruct rt_variables {
188bf215546Sopenharmony_ci   const VkRayTracingPipelineCreateInfoKHR *create_info;
189bf215546Sopenharmony_ci
190bf215546Sopenharmony_ci   /* idx of the next shader to run in the next iteration of the main loop.
191bf215546Sopenharmony_ci    * During traversal, idx is used to store the SBT index and will contain
192bf215546Sopenharmony_ci    * the correct resume index upon returning.
193bf215546Sopenharmony_ci    */
194bf215546Sopenharmony_ci   nir_variable *idx;
195bf215546Sopenharmony_ci
196bf215546Sopenharmony_ci   /* scratch offset of the argument area relative to stack_ptr */
197bf215546Sopenharmony_ci   nir_variable *arg;
198bf215546Sopenharmony_ci
199bf215546Sopenharmony_ci   nir_variable *stack_ptr;
200bf215546Sopenharmony_ci
201bf215546Sopenharmony_ci   /* global address of the SBT entry used for the shader */
202bf215546Sopenharmony_ci   nir_variable *shader_record_ptr;
203bf215546Sopenharmony_ci
204bf215546Sopenharmony_ci   /* trace_ray arguments */
205bf215546Sopenharmony_ci   nir_variable *accel_struct;
206bf215546Sopenharmony_ci   nir_variable *flags;
207bf215546Sopenharmony_ci   nir_variable *cull_mask;
208bf215546Sopenharmony_ci   nir_variable *sbt_offset;
209bf215546Sopenharmony_ci   nir_variable *sbt_stride;
210bf215546Sopenharmony_ci   nir_variable *miss_index;
211bf215546Sopenharmony_ci   nir_variable *origin;
212bf215546Sopenharmony_ci   nir_variable *tmin;
213bf215546Sopenharmony_ci   nir_variable *direction;
214bf215546Sopenharmony_ci   nir_variable *tmax;
215bf215546Sopenharmony_ci
216bf215546Sopenharmony_ci   /* from the BTAS instance currently being visited */
217bf215546Sopenharmony_ci   nir_variable *custom_instance_and_mask;
218bf215546Sopenharmony_ci
219bf215546Sopenharmony_ci   /* Properties of the primitive currently being visited. */
220bf215546Sopenharmony_ci   nir_variable *primitive_id;
221bf215546Sopenharmony_ci   nir_variable *geometry_id_and_flags;
222bf215546Sopenharmony_ci   nir_variable *instance_id;
223bf215546Sopenharmony_ci   nir_variable *instance_addr;
224bf215546Sopenharmony_ci   nir_variable *hit_kind;
225bf215546Sopenharmony_ci   nir_variable *opaque;
226bf215546Sopenharmony_ci
227bf215546Sopenharmony_ci   /* Safeguard to ensure we don't end up in an infinite loop of non-existing case. Should not be
228bf215546Sopenharmony_ci    * needed but is extra anti-hang safety during bring-up. */
229bf215546Sopenharmony_ci   nir_variable *main_loop_case_visited;
230bf215546Sopenharmony_ci
231bf215546Sopenharmony_ci   /* Output variables for intersection & anyhit shaders. */
232bf215546Sopenharmony_ci   nir_variable *ahit_accept;
233bf215546Sopenharmony_ci   nir_variable *ahit_terminate;
234bf215546Sopenharmony_ci
235bf215546Sopenharmony_ci   /* Array of stack size struct for recording the max stack size for each group. */
236bf215546Sopenharmony_ci   struct radv_pipeline_shader_stack_size *stack_sizes;
237bf215546Sopenharmony_ci   unsigned stage_idx;
238bf215546Sopenharmony_ci};
239bf215546Sopenharmony_ci
240bf215546Sopenharmony_cistatic void
241bf215546Sopenharmony_cireserve_stack_size(struct rt_variables *vars, uint32_t size)
242bf215546Sopenharmony_ci{
243bf215546Sopenharmony_ci   for (uint32_t group_idx = 0; group_idx < vars->create_info->groupCount; group_idx++) {
244bf215546Sopenharmony_ci      const VkRayTracingShaderGroupCreateInfoKHR *group = vars->create_info->pGroups + group_idx;
245bf215546Sopenharmony_ci
246bf215546Sopenharmony_ci      if (vars->stage_idx == group->generalShader || vars->stage_idx == group->closestHitShader)
247bf215546Sopenharmony_ci         vars->stack_sizes[group_idx].recursive_size =
248bf215546Sopenharmony_ci            MAX2(vars->stack_sizes[group_idx].recursive_size, size);
249bf215546Sopenharmony_ci
250bf215546Sopenharmony_ci      if (vars->stage_idx == group->anyHitShader || vars->stage_idx == group->intersectionShader)
251bf215546Sopenharmony_ci         vars->stack_sizes[group_idx].non_recursive_size =
252bf215546Sopenharmony_ci            MAX2(vars->stack_sizes[group_idx].non_recursive_size, size);
253bf215546Sopenharmony_ci   }
254bf215546Sopenharmony_ci}
255bf215546Sopenharmony_ci
256bf215546Sopenharmony_cistatic struct rt_variables
257bf215546Sopenharmony_cicreate_rt_variables(nir_shader *shader, const VkRayTracingPipelineCreateInfoKHR *create_info,
258bf215546Sopenharmony_ci                    struct radv_pipeline_shader_stack_size *stack_sizes)
259bf215546Sopenharmony_ci{
260bf215546Sopenharmony_ci   struct rt_variables vars = {
261bf215546Sopenharmony_ci      .create_info = create_info,
262bf215546Sopenharmony_ci   };
263bf215546Sopenharmony_ci   vars.idx = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "idx");
264bf215546Sopenharmony_ci   vars.arg = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "arg");
265bf215546Sopenharmony_ci   vars.stack_ptr = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "stack_ptr");
266bf215546Sopenharmony_ci   vars.shader_record_ptr =
267bf215546Sopenharmony_ci      nir_variable_create(shader, nir_var_shader_temp, glsl_uint64_t_type(), "shader_record_ptr");
268bf215546Sopenharmony_ci
269bf215546Sopenharmony_ci   const struct glsl_type *vec3_type = glsl_vector_type(GLSL_TYPE_FLOAT, 3);
270bf215546Sopenharmony_ci   vars.accel_struct =
271bf215546Sopenharmony_ci      nir_variable_create(shader, nir_var_shader_temp, glsl_uint64_t_type(), "accel_struct");
272bf215546Sopenharmony_ci   vars.flags = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "ray_flags");
273bf215546Sopenharmony_ci   vars.cull_mask = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "cull_mask");
274bf215546Sopenharmony_ci   vars.sbt_offset =
275bf215546Sopenharmony_ci      nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "sbt_offset");
276bf215546Sopenharmony_ci   vars.sbt_stride =
277bf215546Sopenharmony_ci      nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "sbt_stride");
278bf215546Sopenharmony_ci   vars.miss_index =
279bf215546Sopenharmony_ci      nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "miss_index");
280bf215546Sopenharmony_ci   vars.origin = nir_variable_create(shader, nir_var_shader_temp, vec3_type, "ray_origin");
281bf215546Sopenharmony_ci   vars.tmin = nir_variable_create(shader, nir_var_shader_temp, glsl_float_type(), "ray_tmin");
282bf215546Sopenharmony_ci   vars.direction = nir_variable_create(shader, nir_var_shader_temp, vec3_type, "ray_direction");
283bf215546Sopenharmony_ci   vars.tmax = nir_variable_create(shader, nir_var_shader_temp, glsl_float_type(), "ray_tmax");
284bf215546Sopenharmony_ci
285bf215546Sopenharmony_ci   vars.custom_instance_and_mask = nir_variable_create(
286bf215546Sopenharmony_ci      shader, nir_var_shader_temp, glsl_uint_type(), "custom_instance_and_mask");
287bf215546Sopenharmony_ci   vars.primitive_id =
288bf215546Sopenharmony_ci      nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "primitive_id");
289bf215546Sopenharmony_ci   vars.geometry_id_and_flags =
290bf215546Sopenharmony_ci      nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "geometry_id_and_flags");
291bf215546Sopenharmony_ci   vars.instance_id =
292bf215546Sopenharmony_ci      nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "instance_id");
293bf215546Sopenharmony_ci   vars.instance_addr =
294bf215546Sopenharmony_ci      nir_variable_create(shader, nir_var_shader_temp, glsl_uint64_t_type(), "instance_addr");
295bf215546Sopenharmony_ci   vars.hit_kind = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "hit_kind");
296bf215546Sopenharmony_ci   vars.opaque = nir_variable_create(shader, nir_var_shader_temp, glsl_bool_type(), "opaque");
297bf215546Sopenharmony_ci
298bf215546Sopenharmony_ci   vars.main_loop_case_visited =
299bf215546Sopenharmony_ci      nir_variable_create(shader, nir_var_shader_temp, glsl_bool_type(), "main_loop_case_visited");
300bf215546Sopenharmony_ci   vars.ahit_accept =
301bf215546Sopenharmony_ci      nir_variable_create(shader, nir_var_shader_temp, glsl_bool_type(), "ahit_accept");
302bf215546Sopenharmony_ci   vars.ahit_terminate =
303bf215546Sopenharmony_ci      nir_variable_create(shader, nir_var_shader_temp, glsl_bool_type(), "ahit_terminate");
304bf215546Sopenharmony_ci
305bf215546Sopenharmony_ci   vars.stack_sizes = stack_sizes;
306bf215546Sopenharmony_ci   return vars;
307bf215546Sopenharmony_ci}
308bf215546Sopenharmony_ci
309bf215546Sopenharmony_ci/*
310bf215546Sopenharmony_ci * Remap all the variables between the two rt_variables struct for inlining.
311bf215546Sopenharmony_ci */
312bf215546Sopenharmony_cistatic void
313bf215546Sopenharmony_cimap_rt_variables(struct hash_table *var_remap, struct rt_variables *src,
314bf215546Sopenharmony_ci                 const struct rt_variables *dst)
315bf215546Sopenharmony_ci{
316bf215546Sopenharmony_ci   src->create_info = dst->create_info;
317bf215546Sopenharmony_ci
318bf215546Sopenharmony_ci   _mesa_hash_table_insert(var_remap, src->idx, dst->idx);
319bf215546Sopenharmony_ci   _mesa_hash_table_insert(var_remap, src->arg, dst->arg);
320bf215546Sopenharmony_ci   _mesa_hash_table_insert(var_remap, src->stack_ptr, dst->stack_ptr);
321bf215546Sopenharmony_ci   _mesa_hash_table_insert(var_remap, src->shader_record_ptr, dst->shader_record_ptr);
322bf215546Sopenharmony_ci
323bf215546Sopenharmony_ci   _mesa_hash_table_insert(var_remap, src->accel_struct, dst->accel_struct);
324bf215546Sopenharmony_ci   _mesa_hash_table_insert(var_remap, src->flags, dst->flags);
325bf215546Sopenharmony_ci   _mesa_hash_table_insert(var_remap, src->cull_mask, dst->cull_mask);
326bf215546Sopenharmony_ci   _mesa_hash_table_insert(var_remap, src->sbt_offset, dst->sbt_offset);
327bf215546Sopenharmony_ci   _mesa_hash_table_insert(var_remap, src->sbt_stride, dst->sbt_stride);
328bf215546Sopenharmony_ci   _mesa_hash_table_insert(var_remap, src->miss_index, dst->miss_index);
329bf215546Sopenharmony_ci   _mesa_hash_table_insert(var_remap, src->origin, dst->origin);
330bf215546Sopenharmony_ci   _mesa_hash_table_insert(var_remap, src->tmin, dst->tmin);
331bf215546Sopenharmony_ci   _mesa_hash_table_insert(var_remap, src->direction, dst->direction);
332bf215546Sopenharmony_ci   _mesa_hash_table_insert(var_remap, src->tmax, dst->tmax);
333bf215546Sopenharmony_ci
334bf215546Sopenharmony_ci   _mesa_hash_table_insert(var_remap, src->custom_instance_and_mask, dst->custom_instance_and_mask);
335bf215546Sopenharmony_ci   _mesa_hash_table_insert(var_remap, src->primitive_id, dst->primitive_id);
336bf215546Sopenharmony_ci   _mesa_hash_table_insert(var_remap, src->geometry_id_and_flags, dst->geometry_id_and_flags);
337bf215546Sopenharmony_ci   _mesa_hash_table_insert(var_remap, src->instance_id, dst->instance_id);
338bf215546Sopenharmony_ci   _mesa_hash_table_insert(var_remap, src->instance_addr, dst->instance_addr);
339bf215546Sopenharmony_ci   _mesa_hash_table_insert(var_remap, src->hit_kind, dst->hit_kind);
340bf215546Sopenharmony_ci   _mesa_hash_table_insert(var_remap, src->opaque, dst->opaque);
341bf215546Sopenharmony_ci   _mesa_hash_table_insert(var_remap, src->ahit_accept, dst->ahit_accept);
342bf215546Sopenharmony_ci   _mesa_hash_table_insert(var_remap, src->ahit_terminate, dst->ahit_terminate);
343bf215546Sopenharmony_ci
344bf215546Sopenharmony_ci   src->stack_sizes = dst->stack_sizes;
345bf215546Sopenharmony_ci   src->stage_idx = dst->stage_idx;
346bf215546Sopenharmony_ci}
347bf215546Sopenharmony_ci
348bf215546Sopenharmony_ci/*
349bf215546Sopenharmony_ci * Create a copy of the global rt variables where the primitive/instance related variables are
350bf215546Sopenharmony_ci * independent.This is needed as we need to keep the old values of the global variables around
351bf215546Sopenharmony_ci * in case e.g. an anyhit shader reject the collision. So there are inner variables that get copied
352bf215546Sopenharmony_ci * to the outer variables once we commit to a better hit.
353bf215546Sopenharmony_ci */
354bf215546Sopenharmony_cistatic struct rt_variables
355bf215546Sopenharmony_cicreate_inner_vars(nir_builder *b, const struct rt_variables *vars)
356bf215546Sopenharmony_ci{
357bf215546Sopenharmony_ci   struct rt_variables inner_vars = *vars;
358bf215546Sopenharmony_ci   inner_vars.idx =
359bf215546Sopenharmony_ci      nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_idx");
360bf215546Sopenharmony_ci   inner_vars.shader_record_ptr = nir_variable_create(
361bf215546Sopenharmony_ci      b->shader, nir_var_shader_temp, glsl_uint64_t_type(), "inner_shader_record_ptr");
362bf215546Sopenharmony_ci   inner_vars.primitive_id =
363bf215546Sopenharmony_ci      nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_primitive_id");
364bf215546Sopenharmony_ci   inner_vars.geometry_id_and_flags = nir_variable_create(
365bf215546Sopenharmony_ci      b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_geometry_id_and_flags");
366bf215546Sopenharmony_ci   inner_vars.tmax =
367bf215546Sopenharmony_ci      nir_variable_create(b->shader, nir_var_shader_temp, glsl_float_type(), "inner_tmax");
368bf215546Sopenharmony_ci   inner_vars.instance_id =
369bf215546Sopenharmony_ci      nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_instance_id");
370bf215546Sopenharmony_ci   inner_vars.instance_addr = nir_variable_create(b->shader, nir_var_shader_temp,
371bf215546Sopenharmony_ci                                                  glsl_uint64_t_type(), "inner_instance_addr");
372bf215546Sopenharmony_ci   inner_vars.hit_kind =
373bf215546Sopenharmony_ci      nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_hit_kind");
374bf215546Sopenharmony_ci   inner_vars.custom_instance_and_mask = nir_variable_create(
375bf215546Sopenharmony_ci      b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_custom_instance_and_mask");
376bf215546Sopenharmony_ci
377bf215546Sopenharmony_ci   return inner_vars;
378bf215546Sopenharmony_ci}
379bf215546Sopenharmony_ci
380bf215546Sopenharmony_ci/* The hit attributes are stored on the stack. This is the offset compared to the current stack
381bf215546Sopenharmony_ci * pointer of where the hit attrib is stored. */
382bf215546Sopenharmony_ciconst uint32_t RADV_HIT_ATTRIB_OFFSET = -(16 + RADV_MAX_HIT_ATTRIB_SIZE);
383bf215546Sopenharmony_ci
384bf215546Sopenharmony_cistatic void
385bf215546Sopenharmony_ciinsert_rt_return(nir_builder *b, const struct rt_variables *vars)
386bf215546Sopenharmony_ci{
387bf215546Sopenharmony_ci   nir_store_var(b, vars->stack_ptr, nir_iadd_imm(b, nir_load_var(b, vars->stack_ptr), -16), 1);
388bf215546Sopenharmony_ci   nir_store_var(b, vars->idx,
389bf215546Sopenharmony_ci                 nir_load_scratch(b, 1, 32, nir_load_var(b, vars->stack_ptr), .align_mul = 16), 1);
390bf215546Sopenharmony_ci}
391bf215546Sopenharmony_ci
392bf215546Sopenharmony_cienum sbt_type {
393bf215546Sopenharmony_ci   SBT_RAYGEN = offsetof(VkTraceRaysIndirectCommand2KHR, raygenShaderRecordAddress),
394bf215546Sopenharmony_ci   SBT_MISS = offsetof(VkTraceRaysIndirectCommand2KHR, missShaderBindingTableAddress),
395bf215546Sopenharmony_ci   SBT_HIT = offsetof(VkTraceRaysIndirectCommand2KHR, hitShaderBindingTableAddress),
396bf215546Sopenharmony_ci   SBT_CALLABLE = offsetof(VkTraceRaysIndirectCommand2KHR, callableShaderBindingTableAddress),
397bf215546Sopenharmony_ci};
398bf215546Sopenharmony_ci
399bf215546Sopenharmony_cistatic nir_ssa_def *
400bf215546Sopenharmony_ciget_sbt_ptr(nir_builder *b, nir_ssa_def *idx, enum sbt_type binding)
401bf215546Sopenharmony_ci{
402bf215546Sopenharmony_ci   nir_ssa_def *desc_base_addr = nir_load_sbt_base_amd(b);
403bf215546Sopenharmony_ci
404bf215546Sopenharmony_ci   nir_ssa_def *desc =
405bf215546Sopenharmony_ci      nir_pack_64_2x32(b, nir_build_load_smem_amd(b, 2, desc_base_addr, nir_imm_int(b, binding)));
406bf215546Sopenharmony_ci
407bf215546Sopenharmony_ci   nir_ssa_def *stride_offset = nir_imm_int(b, binding + (binding == SBT_RAYGEN ? 8 : 16));
408bf215546Sopenharmony_ci   nir_ssa_def *stride =
409bf215546Sopenharmony_ci      nir_pack_64_2x32(b, nir_build_load_smem_amd(b, 2, desc_base_addr, stride_offset));
410bf215546Sopenharmony_ci
411bf215546Sopenharmony_ci   return nir_iadd(b, desc, nir_imul(b, nir_u2u64(b, idx), stride));
412bf215546Sopenharmony_ci}
413bf215546Sopenharmony_ci
414bf215546Sopenharmony_cistatic void
415bf215546Sopenharmony_ciload_sbt_entry(nir_builder *b, const struct rt_variables *vars, nir_ssa_def *idx,
416bf215546Sopenharmony_ci               enum sbt_type binding, unsigned offset)
417bf215546Sopenharmony_ci{
418bf215546Sopenharmony_ci   nir_ssa_def *addr = get_sbt_ptr(b, idx, binding);
419bf215546Sopenharmony_ci
420bf215546Sopenharmony_ci   nir_ssa_def *load_addr = nir_iadd_imm(b, addr, offset);
421bf215546Sopenharmony_ci   nir_ssa_def *v_idx = nir_build_load_global(b, 1, 32, load_addr);
422bf215546Sopenharmony_ci
423bf215546Sopenharmony_ci   nir_store_var(b, vars->idx, v_idx, 1);
424bf215546Sopenharmony_ci
425bf215546Sopenharmony_ci   nir_ssa_def *record_addr = nir_iadd_imm(b, addr, RADV_RT_HANDLE_SIZE);
426bf215546Sopenharmony_ci   nir_store_var(b, vars->shader_record_ptr, record_addr, 1);
427bf215546Sopenharmony_ci}
428bf215546Sopenharmony_ci
429bf215546Sopenharmony_ci/* This lowers all the RT instructions that we do not want to pass on to the combined shader and
430bf215546Sopenharmony_ci * that we can implement using the variables from the shader we are going to inline into. */
431bf215546Sopenharmony_cistatic void
432bf215546Sopenharmony_cilower_rt_instructions(nir_shader *shader, struct rt_variables *vars, unsigned call_idx_base)
433bf215546Sopenharmony_ci{
434bf215546Sopenharmony_ci   nir_builder b_shader;
435bf215546Sopenharmony_ci   nir_builder_init(&b_shader, nir_shader_get_entrypoint(shader));
436bf215546Sopenharmony_ci
437bf215546Sopenharmony_ci   nir_foreach_block (block, nir_shader_get_entrypoint(shader)) {
438bf215546Sopenharmony_ci      nir_foreach_instr_safe (instr, block) {
439bf215546Sopenharmony_ci         switch (instr->type) {
440bf215546Sopenharmony_ci         case nir_instr_type_intrinsic: {
441bf215546Sopenharmony_ci            b_shader.cursor = nir_before_instr(instr);
442bf215546Sopenharmony_ci            nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
443bf215546Sopenharmony_ci            nir_ssa_def *ret = NULL;
444bf215546Sopenharmony_ci
445bf215546Sopenharmony_ci            switch (intr->intrinsic) {
446bf215546Sopenharmony_ci            case nir_intrinsic_rt_execute_callable: {
447bf215546Sopenharmony_ci               uint32_t size = align(nir_intrinsic_stack_size(intr), 16) + RADV_MAX_HIT_ATTRIB_SIZE;
448bf215546Sopenharmony_ci               uint32_t ret_idx = call_idx_base + nir_intrinsic_call_idx(intr) + 1;
449bf215546Sopenharmony_ci
450bf215546Sopenharmony_ci               nir_store_var(
451bf215546Sopenharmony_ci                  &b_shader, vars->stack_ptr,
452bf215546Sopenharmony_ci                  nir_iadd_imm(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), size), 1);
453bf215546Sopenharmony_ci               nir_store_scratch(&b_shader, nir_imm_int(&b_shader, ret_idx),
454bf215546Sopenharmony_ci                                 nir_load_var(&b_shader, vars->stack_ptr), .align_mul = 16);
455bf215546Sopenharmony_ci
456bf215546Sopenharmony_ci               nir_store_var(&b_shader, vars->stack_ptr,
457bf215546Sopenharmony_ci                             nir_iadd_imm(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), 16),
458bf215546Sopenharmony_ci                             1);
459bf215546Sopenharmony_ci               load_sbt_entry(&b_shader, vars, intr->src[0].ssa, SBT_CALLABLE, 0);
460bf215546Sopenharmony_ci
461bf215546Sopenharmony_ci               nir_store_var(&b_shader, vars->arg,
462bf215546Sopenharmony_ci                             nir_iadd_imm(&b_shader, intr->src[1].ssa, -size - 16), 1);
463bf215546Sopenharmony_ci
464bf215546Sopenharmony_ci               reserve_stack_size(vars, size + 16);
465bf215546Sopenharmony_ci               break;
466bf215546Sopenharmony_ci            }
467bf215546Sopenharmony_ci            case nir_intrinsic_rt_trace_ray: {
468bf215546Sopenharmony_ci               uint32_t size = align(nir_intrinsic_stack_size(intr), 16) + RADV_MAX_HIT_ATTRIB_SIZE;
469bf215546Sopenharmony_ci               uint32_t ret_idx = call_idx_base + nir_intrinsic_call_idx(intr) + 1;
470bf215546Sopenharmony_ci
471bf215546Sopenharmony_ci               nir_store_var(
472bf215546Sopenharmony_ci                  &b_shader, vars->stack_ptr,
473bf215546Sopenharmony_ci                  nir_iadd_imm(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), size), 1);
474bf215546Sopenharmony_ci               nir_store_scratch(&b_shader, nir_imm_int(&b_shader, ret_idx),
475bf215546Sopenharmony_ci                                 nir_load_var(&b_shader, vars->stack_ptr), .align_mul = 16);
476bf215546Sopenharmony_ci
477bf215546Sopenharmony_ci               nir_store_var(&b_shader, vars->stack_ptr,
478bf215546Sopenharmony_ci                             nir_iadd_imm(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), 16),
479bf215546Sopenharmony_ci                             1);
480bf215546Sopenharmony_ci
481bf215546Sopenharmony_ci               nir_store_var(&b_shader, vars->idx, nir_imm_int(&b_shader, 1), 1);
482bf215546Sopenharmony_ci               nir_store_var(&b_shader, vars->arg,
483bf215546Sopenharmony_ci                             nir_iadd_imm(&b_shader, intr->src[10].ssa, -size - 16), 1);
484bf215546Sopenharmony_ci
485bf215546Sopenharmony_ci               reserve_stack_size(vars, size + 16);
486bf215546Sopenharmony_ci
487bf215546Sopenharmony_ci               /* Per the SPIR-V extension spec we have to ignore some bits for some arguments. */
488bf215546Sopenharmony_ci               nir_store_var(&b_shader, vars->accel_struct, intr->src[0].ssa, 0x1);
489bf215546Sopenharmony_ci               nir_store_var(&b_shader, vars->flags, intr->src[1].ssa, 0x1);
490bf215546Sopenharmony_ci               nir_store_var(&b_shader, vars->cull_mask,
491bf215546Sopenharmony_ci                             nir_iand_imm(&b_shader, intr->src[2].ssa, 0xff), 0x1);
492bf215546Sopenharmony_ci               nir_store_var(&b_shader, vars->sbt_offset,
493bf215546Sopenharmony_ci                             nir_iand_imm(&b_shader, intr->src[3].ssa, 0xf), 0x1);
494bf215546Sopenharmony_ci               nir_store_var(&b_shader, vars->sbt_stride,
495bf215546Sopenharmony_ci                             nir_iand_imm(&b_shader, intr->src[4].ssa, 0xf), 0x1);
496bf215546Sopenharmony_ci               nir_store_var(&b_shader, vars->miss_index,
497bf215546Sopenharmony_ci                             nir_iand_imm(&b_shader, intr->src[5].ssa, 0xffff), 0x1);
498bf215546Sopenharmony_ci               nir_store_var(&b_shader, vars->origin, intr->src[6].ssa, 0x7);
499bf215546Sopenharmony_ci               nir_store_var(&b_shader, vars->tmin, intr->src[7].ssa, 0x1);
500bf215546Sopenharmony_ci               nir_store_var(&b_shader, vars->direction, intr->src[8].ssa, 0x7);
501bf215546Sopenharmony_ci               nir_store_var(&b_shader, vars->tmax, intr->src[9].ssa, 0x1);
502bf215546Sopenharmony_ci               break;
503bf215546Sopenharmony_ci            }
504bf215546Sopenharmony_ci            case nir_intrinsic_rt_resume: {
505bf215546Sopenharmony_ci               uint32_t size = align(nir_intrinsic_stack_size(intr), 16) + RADV_MAX_HIT_ATTRIB_SIZE;
506bf215546Sopenharmony_ci
507bf215546Sopenharmony_ci               nir_store_var(
508bf215546Sopenharmony_ci                  &b_shader, vars->stack_ptr,
509bf215546Sopenharmony_ci                  nir_iadd_imm(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), -size), 1);
510bf215546Sopenharmony_ci               break;
511bf215546Sopenharmony_ci            }
512bf215546Sopenharmony_ci            case nir_intrinsic_rt_return_amd: {
513bf215546Sopenharmony_ci               if (shader->info.stage == MESA_SHADER_RAYGEN) {
514bf215546Sopenharmony_ci                  nir_store_var(&b_shader, vars->idx, nir_imm_int(&b_shader, 0), 1);
515bf215546Sopenharmony_ci                  break;
516bf215546Sopenharmony_ci               }
517bf215546Sopenharmony_ci               insert_rt_return(&b_shader, vars);
518bf215546Sopenharmony_ci               break;
519bf215546Sopenharmony_ci            }
520bf215546Sopenharmony_ci            case nir_intrinsic_load_scratch: {
521bf215546Sopenharmony_ci               nir_instr_rewrite_src_ssa(
522bf215546Sopenharmony_ci                  instr, &intr->src[0],
523bf215546Sopenharmony_ci                  nir_iadd(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), intr->src[0].ssa));
524bf215546Sopenharmony_ci               continue;
525bf215546Sopenharmony_ci            }
526bf215546Sopenharmony_ci            case nir_intrinsic_store_scratch: {
527bf215546Sopenharmony_ci               nir_instr_rewrite_src_ssa(
528bf215546Sopenharmony_ci                  instr, &intr->src[1],
529bf215546Sopenharmony_ci                  nir_iadd(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), intr->src[1].ssa));
530bf215546Sopenharmony_ci               continue;
531bf215546Sopenharmony_ci            }
532bf215546Sopenharmony_ci            case nir_intrinsic_load_rt_arg_scratch_offset_amd: {
533bf215546Sopenharmony_ci               ret = nir_load_var(&b_shader, vars->arg);
534bf215546Sopenharmony_ci               break;
535bf215546Sopenharmony_ci            }
536bf215546Sopenharmony_ci            case nir_intrinsic_load_shader_record_ptr: {
537bf215546Sopenharmony_ci               ret = nir_load_var(&b_shader, vars->shader_record_ptr);
538bf215546Sopenharmony_ci               break;
539bf215546Sopenharmony_ci            }
540bf215546Sopenharmony_ci            case nir_intrinsic_load_ray_launch_id: {
541bf215546Sopenharmony_ci               ret = nir_load_global_invocation_id(&b_shader, 32);
542bf215546Sopenharmony_ci               break;
543bf215546Sopenharmony_ci            }
544bf215546Sopenharmony_ci            case nir_intrinsic_load_ray_launch_size: {
545bf215546Sopenharmony_ci               nir_ssa_def *launch_size_addr =
546bf215546Sopenharmony_ci                  nir_load_ray_launch_size_addr_amd(&b_shader);
547bf215546Sopenharmony_ci
548bf215546Sopenharmony_ci               nir_ssa_def * xy = nir_build_load_smem_amd(
549bf215546Sopenharmony_ci                  &b_shader, 2, launch_size_addr, nir_imm_int(&b_shader, 0));
550bf215546Sopenharmony_ci               nir_ssa_def * z = nir_build_load_smem_amd(
551bf215546Sopenharmony_ci                  &b_shader, 1, launch_size_addr, nir_imm_int(&b_shader, 8));
552bf215546Sopenharmony_ci
553bf215546Sopenharmony_ci               nir_ssa_def *xyz[3] = {
554bf215546Sopenharmony_ci                  nir_channel(&b_shader, xy, 0),
555bf215546Sopenharmony_ci                  nir_channel(&b_shader, xy, 1),
556bf215546Sopenharmony_ci                  z,
557bf215546Sopenharmony_ci               };
558bf215546Sopenharmony_ci               ret = nir_vec(&b_shader, xyz, 3);
559bf215546Sopenharmony_ci               break;
560bf215546Sopenharmony_ci            }
561bf215546Sopenharmony_ci            case nir_intrinsic_load_ray_t_min: {
562bf215546Sopenharmony_ci               ret = nir_load_var(&b_shader, vars->tmin);
563bf215546Sopenharmony_ci               break;
564bf215546Sopenharmony_ci            }
565bf215546Sopenharmony_ci            case nir_intrinsic_load_ray_t_max: {
566bf215546Sopenharmony_ci               ret = nir_load_var(&b_shader, vars->tmax);
567bf215546Sopenharmony_ci               break;
568bf215546Sopenharmony_ci            }
569bf215546Sopenharmony_ci            case nir_intrinsic_load_ray_world_origin: {
570bf215546Sopenharmony_ci               ret = nir_load_var(&b_shader, vars->origin);
571bf215546Sopenharmony_ci               break;
572bf215546Sopenharmony_ci            }
573bf215546Sopenharmony_ci            case nir_intrinsic_load_ray_world_direction: {
574bf215546Sopenharmony_ci               ret = nir_load_var(&b_shader, vars->direction);
575bf215546Sopenharmony_ci               break;
576bf215546Sopenharmony_ci            }
577bf215546Sopenharmony_ci            case nir_intrinsic_load_ray_instance_custom_index: {
578bf215546Sopenharmony_ci               ret = nir_load_var(&b_shader, vars->custom_instance_and_mask);
579bf215546Sopenharmony_ci               ret = nir_iand_imm(&b_shader, ret, 0xFFFFFF);
580bf215546Sopenharmony_ci               break;
581bf215546Sopenharmony_ci            }
582bf215546Sopenharmony_ci            case nir_intrinsic_load_primitive_id: {
583bf215546Sopenharmony_ci               ret = nir_load_var(&b_shader, vars->primitive_id);
584bf215546Sopenharmony_ci               break;
585bf215546Sopenharmony_ci            }
586bf215546Sopenharmony_ci            case nir_intrinsic_load_ray_geometry_index: {
587bf215546Sopenharmony_ci               ret = nir_load_var(&b_shader, vars->geometry_id_and_flags);
588bf215546Sopenharmony_ci               ret = nir_iand_imm(&b_shader, ret, 0xFFFFFFF);
589bf215546Sopenharmony_ci               break;
590bf215546Sopenharmony_ci            }
591bf215546Sopenharmony_ci            case nir_intrinsic_load_instance_id: {
592bf215546Sopenharmony_ci               ret = nir_load_var(&b_shader, vars->instance_id);
593bf215546Sopenharmony_ci               break;
594bf215546Sopenharmony_ci            }
595bf215546Sopenharmony_ci            case nir_intrinsic_load_ray_flags: {
596bf215546Sopenharmony_ci               ret = nir_load_var(&b_shader, vars->flags);
597bf215546Sopenharmony_ci               break;
598bf215546Sopenharmony_ci            }
599bf215546Sopenharmony_ci            case nir_intrinsic_load_ray_hit_kind: {
600bf215546Sopenharmony_ci               ret = nir_load_var(&b_shader, vars->hit_kind);
601bf215546Sopenharmony_ci               break;
602bf215546Sopenharmony_ci            }
603bf215546Sopenharmony_ci            case nir_intrinsic_load_ray_world_to_object: {
604bf215546Sopenharmony_ci               unsigned c = nir_intrinsic_column(intr);
605bf215546Sopenharmony_ci               nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr);
606bf215546Sopenharmony_ci               nir_ssa_def *wto_matrix[3];
607bf215546Sopenharmony_ci               nir_build_wto_matrix_load(&b_shader, instance_node_addr, wto_matrix);
608bf215546Sopenharmony_ci
609bf215546Sopenharmony_ci               nir_ssa_def *vals[3];
610bf215546Sopenharmony_ci               for (unsigned i = 0; i < 3; ++i)
611bf215546Sopenharmony_ci                  vals[i] = nir_channel(&b_shader, wto_matrix[i], c);
612bf215546Sopenharmony_ci
613bf215546Sopenharmony_ci               ret = nir_vec(&b_shader, vals, 3);
614bf215546Sopenharmony_ci               if (c == 3)
615bf215546Sopenharmony_ci                  ret = nir_fneg(&b_shader,
616bf215546Sopenharmony_ci                                 nir_build_vec3_mat_mult(&b_shader, ret, wto_matrix, false));
617bf215546Sopenharmony_ci               break;
618bf215546Sopenharmony_ci            }
619bf215546Sopenharmony_ci            case nir_intrinsic_load_ray_object_to_world: {
620bf215546Sopenharmony_ci               unsigned c = nir_intrinsic_column(intr);
621bf215546Sopenharmony_ci               nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr);
622bf215546Sopenharmony_ci               if (c == 3) {
623bf215546Sopenharmony_ci                  nir_ssa_def *wto_matrix[3];
624bf215546Sopenharmony_ci                  nir_build_wto_matrix_load(&b_shader, instance_node_addr, wto_matrix);
625bf215546Sopenharmony_ci
626bf215546Sopenharmony_ci                  nir_ssa_def *vals[3];
627bf215546Sopenharmony_ci                  for (unsigned i = 0; i < 3; ++i)
628bf215546Sopenharmony_ci                     vals[i] = nir_channel(&b_shader, wto_matrix[i], c);
629bf215546Sopenharmony_ci
630bf215546Sopenharmony_ci                  ret = nir_vec(&b_shader, vals, 3);
631bf215546Sopenharmony_ci               } else {
632bf215546Sopenharmony_ci                  ret = nir_build_load_global(
633bf215546Sopenharmony_ci                     &b_shader, 3, 32, nir_iadd_imm(&b_shader, instance_node_addr, 92 + c * 12));
634bf215546Sopenharmony_ci               }
635bf215546Sopenharmony_ci               break;
636bf215546Sopenharmony_ci            }
637bf215546Sopenharmony_ci            case nir_intrinsic_load_ray_object_origin: {
638bf215546Sopenharmony_ci               nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr);
639bf215546Sopenharmony_ci               nir_ssa_def *wto_matrix[] = {
640bf215546Sopenharmony_ci                  nir_build_load_global(&b_shader, 4, 32,
641bf215546Sopenharmony_ci                                        nir_iadd_imm(&b_shader, instance_node_addr, 16),
642bf215546Sopenharmony_ci                                        .align_mul = 64, .align_offset = 16),
643bf215546Sopenharmony_ci                  nir_build_load_global(&b_shader, 4, 32,
644bf215546Sopenharmony_ci                                        nir_iadd_imm(&b_shader, instance_node_addr, 32),
645bf215546Sopenharmony_ci                                        .align_mul = 64, .align_offset = 32),
646bf215546Sopenharmony_ci                  nir_build_load_global(&b_shader, 4, 32,
647bf215546Sopenharmony_ci                                        nir_iadd_imm(&b_shader, instance_node_addr, 48),
648bf215546Sopenharmony_ci                                        .align_mul = 64, .align_offset = 48)};
649bf215546Sopenharmony_ci               ret = nir_build_vec3_mat_mult_pre(
650bf215546Sopenharmony_ci                  &b_shader, nir_load_var(&b_shader, vars->origin), wto_matrix);
651bf215546Sopenharmony_ci               break;
652bf215546Sopenharmony_ci            }
653bf215546Sopenharmony_ci            case nir_intrinsic_load_ray_object_direction: {
654bf215546Sopenharmony_ci               nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr);
655bf215546Sopenharmony_ci               nir_ssa_def *wto_matrix[3];
656bf215546Sopenharmony_ci               nir_build_wto_matrix_load(&b_shader, instance_node_addr, wto_matrix);
657bf215546Sopenharmony_ci               ret = nir_build_vec3_mat_mult(
658bf215546Sopenharmony_ci                  &b_shader, nir_load_var(&b_shader, vars->direction), wto_matrix, false);
659bf215546Sopenharmony_ci               break;
660bf215546Sopenharmony_ci            }
661bf215546Sopenharmony_ci            case nir_intrinsic_load_intersection_opaque_amd: {
662bf215546Sopenharmony_ci               ret = nir_load_var(&b_shader, vars->opaque);
663bf215546Sopenharmony_ci               break;
664bf215546Sopenharmony_ci            }
665bf215546Sopenharmony_ci            case nir_intrinsic_load_cull_mask: {
666bf215546Sopenharmony_ci               ret = nir_load_var(&b_shader, vars->cull_mask);
667bf215546Sopenharmony_ci               break;
668bf215546Sopenharmony_ci            }
669bf215546Sopenharmony_ci            case nir_intrinsic_ignore_ray_intersection: {
670bf215546Sopenharmony_ci               nir_store_var(&b_shader, vars->ahit_accept, nir_imm_false(&b_shader), 0x1);
671bf215546Sopenharmony_ci
672bf215546Sopenharmony_ci               /* The if is a workaround to avoid having to fix up control flow manually */
673bf215546Sopenharmony_ci               nir_push_if(&b_shader, nir_imm_true(&b_shader));
674bf215546Sopenharmony_ci               nir_jump(&b_shader, nir_jump_return);
675bf215546Sopenharmony_ci               nir_pop_if(&b_shader, NULL);
676bf215546Sopenharmony_ci               break;
677bf215546Sopenharmony_ci            }
678bf215546Sopenharmony_ci            case nir_intrinsic_terminate_ray: {
679bf215546Sopenharmony_ci               nir_store_var(&b_shader, vars->ahit_accept, nir_imm_true(&b_shader), 0x1);
680bf215546Sopenharmony_ci               nir_store_var(&b_shader, vars->ahit_terminate, nir_imm_true(&b_shader), 0x1);
681bf215546Sopenharmony_ci
682bf215546Sopenharmony_ci               /* The if is a workaround to avoid having to fix up control flow manually */
683bf215546Sopenharmony_ci               nir_push_if(&b_shader, nir_imm_true(&b_shader));
684bf215546Sopenharmony_ci               nir_jump(&b_shader, nir_jump_return);
685bf215546Sopenharmony_ci               nir_pop_if(&b_shader, NULL);
686bf215546Sopenharmony_ci               break;
687bf215546Sopenharmony_ci            }
688bf215546Sopenharmony_ci            case nir_intrinsic_report_ray_intersection: {
689bf215546Sopenharmony_ci               nir_push_if(
690bf215546Sopenharmony_ci                  &b_shader,
691bf215546Sopenharmony_ci                  nir_iand(
692bf215546Sopenharmony_ci                     &b_shader,
693bf215546Sopenharmony_ci                     nir_fge(&b_shader, nir_load_var(&b_shader, vars->tmax), intr->src[0].ssa),
694bf215546Sopenharmony_ci                     nir_fge(&b_shader, intr->src[0].ssa, nir_load_var(&b_shader, vars->tmin))));
695bf215546Sopenharmony_ci               {
696bf215546Sopenharmony_ci                  nir_store_var(&b_shader, vars->ahit_accept, nir_imm_true(&b_shader), 0x1);
697bf215546Sopenharmony_ci                  nir_store_var(&b_shader, vars->tmax, intr->src[0].ssa, 1);
698bf215546Sopenharmony_ci                  nir_store_var(&b_shader, vars->hit_kind, intr->src[1].ssa, 1);
699bf215546Sopenharmony_ci               }
700bf215546Sopenharmony_ci               nir_pop_if(&b_shader, NULL);
701bf215546Sopenharmony_ci               break;
702bf215546Sopenharmony_ci            }
703bf215546Sopenharmony_ci            default:
704bf215546Sopenharmony_ci               continue;
705bf215546Sopenharmony_ci            }
706bf215546Sopenharmony_ci
707bf215546Sopenharmony_ci            if (ret)
708bf215546Sopenharmony_ci               nir_ssa_def_rewrite_uses(&intr->dest.ssa, ret);
709bf215546Sopenharmony_ci            nir_instr_remove(instr);
710bf215546Sopenharmony_ci            break;
711bf215546Sopenharmony_ci         }
712bf215546Sopenharmony_ci         case nir_instr_type_jump: {
713bf215546Sopenharmony_ci            nir_jump_instr *jump = nir_instr_as_jump(instr);
714bf215546Sopenharmony_ci            if (jump->type == nir_jump_halt) {
715bf215546Sopenharmony_ci               b_shader.cursor = nir_instr_remove(instr);
716bf215546Sopenharmony_ci               nir_jump(&b_shader, nir_jump_return);
717bf215546Sopenharmony_ci            }
718bf215546Sopenharmony_ci            break;
719bf215546Sopenharmony_ci         }
720bf215546Sopenharmony_ci         default:
721bf215546Sopenharmony_ci            break;
722bf215546Sopenharmony_ci         }
723bf215546Sopenharmony_ci      }
724bf215546Sopenharmony_ci   }
725bf215546Sopenharmony_ci
726bf215546Sopenharmony_ci   nir_metadata_preserve(nir_shader_get_entrypoint(shader), nir_metadata_none);
727bf215546Sopenharmony_ci}
728bf215546Sopenharmony_ci
729bf215546Sopenharmony_cistatic void
730bf215546Sopenharmony_ciinsert_rt_case(nir_builder *b, nir_shader *shader, struct rt_variables *vars, nir_ssa_def *idx,
731bf215546Sopenharmony_ci               uint32_t call_idx_base, uint32_t call_idx)
732bf215546Sopenharmony_ci{
733bf215546Sopenharmony_ci   struct hash_table *var_remap = _mesa_pointer_hash_table_create(NULL);
734bf215546Sopenharmony_ci
735bf215546Sopenharmony_ci   nir_opt_dead_cf(shader);
736bf215546Sopenharmony_ci
737bf215546Sopenharmony_ci   struct rt_variables src_vars = create_rt_variables(shader, vars->create_info, vars->stack_sizes);
738bf215546Sopenharmony_ci   map_rt_variables(var_remap, &src_vars, vars);
739bf215546Sopenharmony_ci
740bf215546Sopenharmony_ci   NIR_PASS_V(shader, lower_rt_instructions, &src_vars, call_idx_base);
741bf215546Sopenharmony_ci
742bf215546Sopenharmony_ci   NIR_PASS(_, shader, nir_opt_remove_phis);
743bf215546Sopenharmony_ci   NIR_PASS(_, shader, nir_lower_returns);
744bf215546Sopenharmony_ci   NIR_PASS(_, shader, nir_opt_dce);
745bf215546Sopenharmony_ci
746bf215546Sopenharmony_ci   reserve_stack_size(vars, shader->scratch_size);
747bf215546Sopenharmony_ci
748bf215546Sopenharmony_ci   nir_push_if(b, nir_ieq_imm(b, idx, call_idx));
749bf215546Sopenharmony_ci   nir_store_var(b, vars->main_loop_case_visited, nir_imm_bool(b, true), 1);
750bf215546Sopenharmony_ci   nir_inline_function_impl(b, nir_shader_get_entrypoint(shader), NULL, var_remap);
751bf215546Sopenharmony_ci   nir_pop_if(b, NULL);
752bf215546Sopenharmony_ci
753bf215546Sopenharmony_ci   /* Adopt the instructions from the source shader, since they are merely moved, not cloned. */
754bf215546Sopenharmony_ci   ralloc_adopt(ralloc_context(b->shader), ralloc_context(shader));
755bf215546Sopenharmony_ci
756bf215546Sopenharmony_ci   ralloc_free(var_remap);
757bf215546Sopenharmony_ci}
758bf215546Sopenharmony_ci
759bf215546Sopenharmony_cistatic bool
760bf215546Sopenharmony_cilower_rt_derefs(nir_shader *shader)
761bf215546Sopenharmony_ci{
762bf215546Sopenharmony_ci   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
763bf215546Sopenharmony_ci
764bf215546Sopenharmony_ci   bool progress = false;
765bf215546Sopenharmony_ci
766bf215546Sopenharmony_ci   nir_builder b;
767bf215546Sopenharmony_ci   nir_builder_init(&b, impl);
768bf215546Sopenharmony_ci
769bf215546Sopenharmony_ci   b.cursor = nir_before_cf_list(&impl->body);
770bf215546Sopenharmony_ci   nir_ssa_def *arg_offset = nir_load_rt_arg_scratch_offset_amd(&b);
771bf215546Sopenharmony_ci
772bf215546Sopenharmony_ci   nir_foreach_block (block, impl) {
773bf215546Sopenharmony_ci      nir_foreach_instr_safe (instr, block) {
774bf215546Sopenharmony_ci         if (instr->type != nir_instr_type_deref)
775bf215546Sopenharmony_ci            continue;
776bf215546Sopenharmony_ci
777bf215546Sopenharmony_ci         nir_deref_instr *deref = nir_instr_as_deref(instr);
778bf215546Sopenharmony_ci         b.cursor = nir_before_instr(&deref->instr);
779bf215546Sopenharmony_ci
780bf215546Sopenharmony_ci         nir_deref_instr *replacement = NULL;
781bf215546Sopenharmony_ci         if (nir_deref_mode_is(deref, nir_var_shader_call_data)) {
782bf215546Sopenharmony_ci            deref->modes = nir_var_function_temp;
783bf215546Sopenharmony_ci            progress = true;
784bf215546Sopenharmony_ci
785bf215546Sopenharmony_ci            if (deref->deref_type == nir_deref_type_var)
786bf215546Sopenharmony_ci               replacement =
787bf215546Sopenharmony_ci                  nir_build_deref_cast(&b, arg_offset, nir_var_function_temp, deref->var->type, 0);
788bf215546Sopenharmony_ci         } else if (nir_deref_mode_is(deref, nir_var_ray_hit_attrib)) {
789bf215546Sopenharmony_ci            deref->modes = nir_var_function_temp;
790bf215546Sopenharmony_ci            progress = true;
791bf215546Sopenharmony_ci
792bf215546Sopenharmony_ci            if (deref->deref_type == nir_deref_type_var)
793bf215546Sopenharmony_ci               replacement = nir_build_deref_cast(&b, nir_imm_int(&b, RADV_HIT_ATTRIB_OFFSET),
794bf215546Sopenharmony_ci                                                  nir_var_function_temp, deref->type, 0);
795bf215546Sopenharmony_ci         }
796bf215546Sopenharmony_ci
797bf215546Sopenharmony_ci         if (replacement != NULL) {
798bf215546Sopenharmony_ci            nir_ssa_def_rewrite_uses(&deref->dest.ssa, &replacement->dest.ssa);
799bf215546Sopenharmony_ci            nir_instr_remove(&deref->instr);
800bf215546Sopenharmony_ci         }
801bf215546Sopenharmony_ci      }
802bf215546Sopenharmony_ci   }
803bf215546Sopenharmony_ci
804bf215546Sopenharmony_ci   if (progress)
805bf215546Sopenharmony_ci      nir_metadata_preserve(impl, nir_metadata_block_index | nir_metadata_dominance);
806bf215546Sopenharmony_ci   else
807bf215546Sopenharmony_ci      nir_metadata_preserve(impl, nir_metadata_all);
808bf215546Sopenharmony_ci
809bf215546Sopenharmony_ci   return progress;
810bf215546Sopenharmony_ci}
811bf215546Sopenharmony_ci
812bf215546Sopenharmony_cistatic nir_shader *
813bf215546Sopenharmony_ciparse_rt_stage(struct radv_device *device, const VkPipelineShaderStageCreateInfo *sinfo)
814bf215546Sopenharmony_ci{
815bf215546Sopenharmony_ci   struct radv_pipeline_key key;
816bf215546Sopenharmony_ci   memset(&key, 0, sizeof(key));
817bf215546Sopenharmony_ci
818bf215546Sopenharmony_ci   struct radv_pipeline_stage rt_stage;
819bf215546Sopenharmony_ci
820bf215546Sopenharmony_ci   radv_pipeline_stage_init(sinfo, &rt_stage, vk_to_mesa_shader_stage(sinfo->stage));
821bf215546Sopenharmony_ci
822bf215546Sopenharmony_ci   nir_shader *shader = radv_shader_spirv_to_nir(device, &rt_stage, &key);
823bf215546Sopenharmony_ci
824bf215546Sopenharmony_ci   if (shader->info.stage == MESA_SHADER_RAYGEN || shader->info.stage == MESA_SHADER_CLOSEST_HIT ||
825bf215546Sopenharmony_ci       shader->info.stage == MESA_SHADER_CALLABLE || shader->info.stage == MESA_SHADER_MISS) {
826bf215546Sopenharmony_ci      nir_block *last_block = nir_impl_last_block(nir_shader_get_entrypoint(shader));
827bf215546Sopenharmony_ci      nir_builder b_inner;
828bf215546Sopenharmony_ci      nir_builder_init(&b_inner, nir_shader_get_entrypoint(shader));
829bf215546Sopenharmony_ci      b_inner.cursor = nir_after_block(last_block);
830bf215546Sopenharmony_ci      nir_rt_return_amd(&b_inner);
831bf215546Sopenharmony_ci   }
832bf215546Sopenharmony_ci
833bf215546Sopenharmony_ci   NIR_PASS(_, shader, nir_lower_vars_to_explicit_types,
834bf215546Sopenharmony_ci            nir_var_function_temp | nir_var_shader_call_data | nir_var_ray_hit_attrib,
835bf215546Sopenharmony_ci            glsl_get_natural_size_align_bytes);
836bf215546Sopenharmony_ci
837bf215546Sopenharmony_ci   NIR_PASS(_, shader, lower_rt_derefs);
838bf215546Sopenharmony_ci
839bf215546Sopenharmony_ci   NIR_PASS(_, shader, nir_lower_explicit_io, nir_var_function_temp,
840bf215546Sopenharmony_ci            nir_address_format_32bit_offset);
841bf215546Sopenharmony_ci
842bf215546Sopenharmony_ci   return shader;
843bf215546Sopenharmony_ci}
844bf215546Sopenharmony_ci
845bf215546Sopenharmony_cistatic nir_function_impl *
846bf215546Sopenharmony_cilower_any_hit_for_intersection(nir_shader *any_hit)
847bf215546Sopenharmony_ci{
848bf215546Sopenharmony_ci   nir_function_impl *impl = nir_shader_get_entrypoint(any_hit);
849bf215546Sopenharmony_ci
850bf215546Sopenharmony_ci   /* Any-hit shaders need three parameters */
851bf215546Sopenharmony_ci   assert(impl->function->num_params == 0);
852bf215546Sopenharmony_ci   nir_parameter params[] = {
853bf215546Sopenharmony_ci      {
854bf215546Sopenharmony_ci         /* A pointer to a boolean value for whether or not the hit was
855bf215546Sopenharmony_ci          * accepted.
856bf215546Sopenharmony_ci          */
857bf215546Sopenharmony_ci         .num_components = 1,
858bf215546Sopenharmony_ci         .bit_size = 32,
859bf215546Sopenharmony_ci      },
860bf215546Sopenharmony_ci      {
861bf215546Sopenharmony_ci         /* The hit T value */
862bf215546Sopenharmony_ci         .num_components = 1,
863bf215546Sopenharmony_ci         .bit_size = 32,
864bf215546Sopenharmony_ci      },
865bf215546Sopenharmony_ci      {
866bf215546Sopenharmony_ci         /* The hit kind */
867bf215546Sopenharmony_ci         .num_components = 1,
868bf215546Sopenharmony_ci         .bit_size = 32,
869bf215546Sopenharmony_ci      },
870bf215546Sopenharmony_ci   };
871bf215546Sopenharmony_ci   impl->function->num_params = ARRAY_SIZE(params);
872bf215546Sopenharmony_ci   impl->function->params = ralloc_array(any_hit, nir_parameter, ARRAY_SIZE(params));
873bf215546Sopenharmony_ci   memcpy(impl->function->params, params, sizeof(params));
874bf215546Sopenharmony_ci
875bf215546Sopenharmony_ci   nir_builder build;
876bf215546Sopenharmony_ci   nir_builder_init(&build, impl);
877bf215546Sopenharmony_ci   nir_builder *b = &build;
878bf215546Sopenharmony_ci
879bf215546Sopenharmony_ci   b->cursor = nir_before_cf_list(&impl->body);
880bf215546Sopenharmony_ci
881bf215546Sopenharmony_ci   nir_ssa_def *commit_ptr = nir_load_param(b, 0);
882bf215546Sopenharmony_ci   nir_ssa_def *hit_t = nir_load_param(b, 1);
883bf215546Sopenharmony_ci   nir_ssa_def *hit_kind = nir_load_param(b, 2);
884bf215546Sopenharmony_ci
885bf215546Sopenharmony_ci   nir_deref_instr *commit =
886bf215546Sopenharmony_ci      nir_build_deref_cast(b, commit_ptr, nir_var_function_temp, glsl_bool_type(), 0);
887bf215546Sopenharmony_ci
888bf215546Sopenharmony_ci   nir_foreach_block_safe (block, impl) {
889bf215546Sopenharmony_ci      nir_foreach_instr_safe (instr, block) {
890bf215546Sopenharmony_ci         switch (instr->type) {
891bf215546Sopenharmony_ci         case nir_instr_type_intrinsic: {
892bf215546Sopenharmony_ci            nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
893bf215546Sopenharmony_ci            switch (intrin->intrinsic) {
894bf215546Sopenharmony_ci            case nir_intrinsic_ignore_ray_intersection:
895bf215546Sopenharmony_ci               b->cursor = nir_instr_remove(&intrin->instr);
896bf215546Sopenharmony_ci               /* We put the newly emitted code inside a dummy if because it's
897bf215546Sopenharmony_ci                * going to contain a jump instruction and we don't want to
898bf215546Sopenharmony_ci                * deal with that mess here.  It'll get dealt with by our
899bf215546Sopenharmony_ci                * control-flow optimization passes.
900bf215546Sopenharmony_ci                */
901bf215546Sopenharmony_ci               nir_store_deref(b, commit, nir_imm_false(b), 0x1);
902bf215546Sopenharmony_ci               nir_push_if(b, nir_imm_true(b));
903bf215546Sopenharmony_ci               nir_jump(b, nir_jump_return);
904bf215546Sopenharmony_ci               nir_pop_if(b, NULL);
905bf215546Sopenharmony_ci               break;
906bf215546Sopenharmony_ci
907bf215546Sopenharmony_ci            case nir_intrinsic_terminate_ray:
908bf215546Sopenharmony_ci               /* The "normal" handling of terminateRay works fine in
909bf215546Sopenharmony_ci                * intersection shaders.
910bf215546Sopenharmony_ci                */
911bf215546Sopenharmony_ci               break;
912bf215546Sopenharmony_ci
913bf215546Sopenharmony_ci            case nir_intrinsic_load_ray_t_max:
914bf215546Sopenharmony_ci               nir_ssa_def_rewrite_uses(&intrin->dest.ssa, hit_t);
915bf215546Sopenharmony_ci               nir_instr_remove(&intrin->instr);
916bf215546Sopenharmony_ci               break;
917bf215546Sopenharmony_ci
918bf215546Sopenharmony_ci            case nir_intrinsic_load_ray_hit_kind:
919bf215546Sopenharmony_ci               nir_ssa_def_rewrite_uses(&intrin->dest.ssa, hit_kind);
920bf215546Sopenharmony_ci               nir_instr_remove(&intrin->instr);
921bf215546Sopenharmony_ci               break;
922bf215546Sopenharmony_ci
923bf215546Sopenharmony_ci            default:
924bf215546Sopenharmony_ci               break;
925bf215546Sopenharmony_ci            }
926bf215546Sopenharmony_ci            break;
927bf215546Sopenharmony_ci         }
928bf215546Sopenharmony_ci         case nir_instr_type_jump: {
929bf215546Sopenharmony_ci            nir_jump_instr *jump = nir_instr_as_jump(instr);
930bf215546Sopenharmony_ci            if (jump->type == nir_jump_halt) {
931bf215546Sopenharmony_ci               b->cursor = nir_instr_remove(instr);
932bf215546Sopenharmony_ci               nir_jump(b, nir_jump_return);
933bf215546Sopenharmony_ci            }
934bf215546Sopenharmony_ci            break;
935bf215546Sopenharmony_ci         }
936bf215546Sopenharmony_ci
937bf215546Sopenharmony_ci         default:
938bf215546Sopenharmony_ci            break;
939bf215546Sopenharmony_ci         }
940bf215546Sopenharmony_ci      }
941bf215546Sopenharmony_ci   }
942bf215546Sopenharmony_ci
943bf215546Sopenharmony_ci   nir_validate_shader(any_hit, "after initial any-hit lowering");
944bf215546Sopenharmony_ci
945bf215546Sopenharmony_ci   nir_lower_returns_impl(impl);
946bf215546Sopenharmony_ci
947bf215546Sopenharmony_ci   nir_validate_shader(any_hit, "after lowering returns");
948bf215546Sopenharmony_ci
949bf215546Sopenharmony_ci   return impl;
950bf215546Sopenharmony_ci}
951bf215546Sopenharmony_ci
952bf215546Sopenharmony_ci/* Inline the any_hit shader into the intersection shader so we don't have
953bf215546Sopenharmony_ci * to implement yet another shader call interface here. Neither do any recursion.
954bf215546Sopenharmony_ci */
955bf215546Sopenharmony_cistatic void
956bf215546Sopenharmony_cinir_lower_intersection_shader(nir_shader *intersection, nir_shader *any_hit)
957bf215546Sopenharmony_ci{
958bf215546Sopenharmony_ci   void *dead_ctx = ralloc_context(intersection);
959bf215546Sopenharmony_ci
960bf215546Sopenharmony_ci   nir_function_impl *any_hit_impl = NULL;
961bf215546Sopenharmony_ci   struct hash_table *any_hit_var_remap = NULL;
962bf215546Sopenharmony_ci   if (any_hit) {
963bf215546Sopenharmony_ci      any_hit = nir_shader_clone(dead_ctx, any_hit);
964bf215546Sopenharmony_ci      NIR_PASS(_, any_hit, nir_opt_dce);
965bf215546Sopenharmony_ci      any_hit_impl = lower_any_hit_for_intersection(any_hit);
966bf215546Sopenharmony_ci      any_hit_var_remap = _mesa_pointer_hash_table_create(dead_ctx);
967bf215546Sopenharmony_ci   }
968bf215546Sopenharmony_ci
969bf215546Sopenharmony_ci   nir_function_impl *impl = nir_shader_get_entrypoint(intersection);
970bf215546Sopenharmony_ci
971bf215546Sopenharmony_ci   nir_builder build;
972bf215546Sopenharmony_ci   nir_builder_init(&build, impl);
973bf215546Sopenharmony_ci   nir_builder *b = &build;
974bf215546Sopenharmony_ci
975bf215546Sopenharmony_ci   b->cursor = nir_before_cf_list(&impl->body);
976bf215546Sopenharmony_ci
977bf215546Sopenharmony_ci   nir_variable *commit = nir_local_variable_create(impl, glsl_bool_type(), "ray_commit");
978bf215546Sopenharmony_ci   nir_store_var(b, commit, nir_imm_false(b), 0x1);
979bf215546Sopenharmony_ci
980bf215546Sopenharmony_ci   nir_foreach_block_safe (block, impl) {
981bf215546Sopenharmony_ci      nir_foreach_instr_safe (instr, block) {
982bf215546Sopenharmony_ci         if (instr->type != nir_instr_type_intrinsic)
983bf215546Sopenharmony_ci            continue;
984bf215546Sopenharmony_ci
985bf215546Sopenharmony_ci         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
986bf215546Sopenharmony_ci         if (intrin->intrinsic != nir_intrinsic_report_ray_intersection)
987bf215546Sopenharmony_ci            continue;
988bf215546Sopenharmony_ci
989bf215546Sopenharmony_ci         b->cursor = nir_instr_remove(&intrin->instr);
990bf215546Sopenharmony_ci         nir_ssa_def *hit_t = nir_ssa_for_src(b, intrin->src[0], 1);
991bf215546Sopenharmony_ci         nir_ssa_def *hit_kind = nir_ssa_for_src(b, intrin->src[1], 1);
992bf215546Sopenharmony_ci         nir_ssa_def *min_t = nir_load_ray_t_min(b);
993bf215546Sopenharmony_ci         nir_ssa_def *max_t = nir_load_ray_t_max(b);
994bf215546Sopenharmony_ci
995bf215546Sopenharmony_ci         /* bool commit_tmp = false; */
996bf215546Sopenharmony_ci         nir_variable *commit_tmp = nir_local_variable_create(impl, glsl_bool_type(), "commit_tmp");
997bf215546Sopenharmony_ci         nir_store_var(b, commit_tmp, nir_imm_false(b), 0x1);
998bf215546Sopenharmony_ci
999bf215546Sopenharmony_ci         nir_push_if(b, nir_iand(b, nir_fge(b, hit_t, min_t), nir_fge(b, max_t, hit_t)));
1000bf215546Sopenharmony_ci         {
1001bf215546Sopenharmony_ci            /* Any-hit defaults to commit */
1002bf215546Sopenharmony_ci            nir_store_var(b, commit_tmp, nir_imm_true(b), 0x1);
1003bf215546Sopenharmony_ci
1004bf215546Sopenharmony_ci            if (any_hit_impl != NULL) {
1005bf215546Sopenharmony_ci               nir_push_if(b, nir_inot(b, nir_load_intersection_opaque_amd(b)));
1006bf215546Sopenharmony_ci               {
1007bf215546Sopenharmony_ci                  nir_ssa_def *params[] = {
1008bf215546Sopenharmony_ci                     &nir_build_deref_var(b, commit_tmp)->dest.ssa,
1009bf215546Sopenharmony_ci                     hit_t,
1010bf215546Sopenharmony_ci                     hit_kind,
1011bf215546Sopenharmony_ci                  };
1012bf215546Sopenharmony_ci                  nir_inline_function_impl(b, any_hit_impl, params, any_hit_var_remap);
1013bf215546Sopenharmony_ci               }
1014bf215546Sopenharmony_ci               nir_pop_if(b, NULL);
1015bf215546Sopenharmony_ci            }
1016bf215546Sopenharmony_ci
1017bf215546Sopenharmony_ci            nir_push_if(b, nir_load_var(b, commit_tmp));
1018bf215546Sopenharmony_ci            {
1019bf215546Sopenharmony_ci               nir_report_ray_intersection(b, 1, hit_t, hit_kind);
1020bf215546Sopenharmony_ci            }
1021bf215546Sopenharmony_ci            nir_pop_if(b, NULL);
1022bf215546Sopenharmony_ci         }
1023bf215546Sopenharmony_ci         nir_pop_if(b, NULL);
1024bf215546Sopenharmony_ci
1025bf215546Sopenharmony_ci         nir_ssa_def *accepted = nir_load_var(b, commit_tmp);
1026bf215546Sopenharmony_ci         nir_ssa_def_rewrite_uses(&intrin->dest.ssa, accepted);
1027bf215546Sopenharmony_ci      }
1028bf215546Sopenharmony_ci   }
1029bf215546Sopenharmony_ci
1030bf215546Sopenharmony_ci   /* We did some inlining; have to re-index SSA defs */
1031bf215546Sopenharmony_ci   nir_index_ssa_defs(impl);
1032bf215546Sopenharmony_ci
1033bf215546Sopenharmony_ci   /* Eliminate the casts introduced for the commit return of the any-hit shader. */
1034bf215546Sopenharmony_ci   NIR_PASS(_, intersection, nir_opt_deref);
1035bf215546Sopenharmony_ci
1036bf215546Sopenharmony_ci   ralloc_free(dead_ctx);
1037bf215546Sopenharmony_ci}
1038bf215546Sopenharmony_ci
1039bf215546Sopenharmony_ci/* Variables only used internally to ray traversal. This is data that describes
1040bf215546Sopenharmony_ci * the current state of the traversal vs. what we'd give to a shader.  e.g. what
1041bf215546Sopenharmony_ci * is the instance we're currently visiting vs. what is the instance of the
1042bf215546Sopenharmony_ci * closest hit. */
1043bf215546Sopenharmony_cistruct rt_traversal_vars {
1044bf215546Sopenharmony_ci   nir_variable *origin;
1045bf215546Sopenharmony_ci   nir_variable *dir;
1046bf215546Sopenharmony_ci   nir_variable *inv_dir;
1047bf215546Sopenharmony_ci   nir_variable *sbt_offset_and_flags;
1048bf215546Sopenharmony_ci   nir_variable *instance_id;
1049bf215546Sopenharmony_ci   nir_variable *custom_instance_and_mask;
1050bf215546Sopenharmony_ci   nir_variable *instance_addr;
1051bf215546Sopenharmony_ci   nir_variable *hit;
1052bf215546Sopenharmony_ci   nir_variable *bvh_base;
1053bf215546Sopenharmony_ci   nir_variable *stack;
1054bf215546Sopenharmony_ci   nir_variable *top_stack;
1055bf215546Sopenharmony_ci};
1056bf215546Sopenharmony_ci
1057bf215546Sopenharmony_cistatic struct rt_traversal_vars
1058bf215546Sopenharmony_ciinit_traversal_vars(nir_builder *b)
1059bf215546Sopenharmony_ci{
1060bf215546Sopenharmony_ci   const struct glsl_type *vec3_type = glsl_vector_type(GLSL_TYPE_FLOAT, 3);
1061bf215546Sopenharmony_ci   struct rt_traversal_vars ret;
1062bf215546Sopenharmony_ci
1063bf215546Sopenharmony_ci   ret.origin = nir_variable_create(b->shader, nir_var_shader_temp, vec3_type, "traversal_origin");
1064bf215546Sopenharmony_ci   ret.dir = nir_variable_create(b->shader, nir_var_shader_temp, vec3_type, "traversal_dir");
1065bf215546Sopenharmony_ci   ret.inv_dir =
1066bf215546Sopenharmony_ci      nir_variable_create(b->shader, nir_var_shader_temp, vec3_type, "traversal_inv_dir");
1067bf215546Sopenharmony_ci   ret.sbt_offset_and_flags = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(),
1068bf215546Sopenharmony_ci                                                  "traversal_sbt_offset_and_flags");
1069bf215546Sopenharmony_ci   ret.instance_id = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(),
1070bf215546Sopenharmony_ci                                         "traversal_instance_id");
1071bf215546Sopenharmony_ci   ret.custom_instance_and_mask = nir_variable_create(
1072bf215546Sopenharmony_ci      b->shader, nir_var_shader_temp, glsl_uint_type(), "traversal_custom_instance_and_mask");
1073bf215546Sopenharmony_ci   ret.instance_addr =
1074bf215546Sopenharmony_ci      nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint64_t_type(), "instance_addr");
1075bf215546Sopenharmony_ci   ret.hit = nir_variable_create(b->shader, nir_var_shader_temp, glsl_bool_type(), "traversal_hit");
1076bf215546Sopenharmony_ci   ret.bvh_base = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint64_t_type(),
1077bf215546Sopenharmony_ci                                      "traversal_bvh_base");
1078bf215546Sopenharmony_ci   ret.stack =
1079bf215546Sopenharmony_ci      nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "traversal_stack_ptr");
1080bf215546Sopenharmony_ci   ret.top_stack = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(),
1081bf215546Sopenharmony_ci                                       "traversal_top_stack_ptr");
1082bf215546Sopenharmony_ci   return ret;
1083bf215546Sopenharmony_ci}
1084bf215546Sopenharmony_ci
1085bf215546Sopenharmony_cistatic void
1086bf215546Sopenharmony_civisit_any_hit_shaders(struct radv_device *device,
1087bf215546Sopenharmony_ci                      const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, nir_builder *b,
1088bf215546Sopenharmony_ci                      struct rt_variables *vars)
1089bf215546Sopenharmony_ci{
1090bf215546Sopenharmony_ci   nir_ssa_def *sbt_idx = nir_load_var(b, vars->idx);
1091bf215546Sopenharmony_ci
1092bf215546Sopenharmony_ci   nir_push_if(b, nir_ine_imm(b, sbt_idx, 0));
1093bf215546Sopenharmony_ci   for (unsigned i = 0; i < pCreateInfo->groupCount; ++i) {
1094bf215546Sopenharmony_ci      const VkRayTracingShaderGroupCreateInfoKHR *group_info = &pCreateInfo->pGroups[i];
1095bf215546Sopenharmony_ci      uint32_t shader_id = VK_SHADER_UNUSED_KHR;
1096bf215546Sopenharmony_ci
1097bf215546Sopenharmony_ci      switch (group_info->type) {
1098bf215546Sopenharmony_ci      case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR:
1099bf215546Sopenharmony_ci         shader_id = group_info->anyHitShader;
1100bf215546Sopenharmony_ci         break;
1101bf215546Sopenharmony_ci      default:
1102bf215546Sopenharmony_ci         break;
1103bf215546Sopenharmony_ci      }
1104bf215546Sopenharmony_ci      if (shader_id == VK_SHADER_UNUSED_KHR)
1105bf215546Sopenharmony_ci         continue;
1106bf215546Sopenharmony_ci
1107bf215546Sopenharmony_ci      const VkPipelineShaderStageCreateInfo *stage = &pCreateInfo->pStages[shader_id];
1108bf215546Sopenharmony_ci      nir_shader *nir_stage = parse_rt_stage(device, stage);
1109bf215546Sopenharmony_ci
1110bf215546Sopenharmony_ci      vars->stage_idx = shader_id;
1111bf215546Sopenharmony_ci      insert_rt_case(b, nir_stage, vars, sbt_idx, 0, i + 2);
1112bf215546Sopenharmony_ci   }
1113bf215546Sopenharmony_ci   nir_pop_if(b, NULL);
1114bf215546Sopenharmony_ci}
1115bf215546Sopenharmony_ci
1116bf215546Sopenharmony_cistatic void
1117bf215546Sopenharmony_ciinsert_traversal_triangle_case(struct radv_device *device,
1118bf215546Sopenharmony_ci                               const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, nir_builder *b,
1119bf215546Sopenharmony_ci                               nir_ssa_def *result, const struct rt_variables *vars,
1120bf215546Sopenharmony_ci                               const struct rt_traversal_vars *trav_vars, nir_ssa_def *bvh_node)
1121bf215546Sopenharmony_ci{
1122bf215546Sopenharmony_ci   nir_ssa_def *dist = nir_channel(b, result, 0);
1123bf215546Sopenharmony_ci   nir_ssa_def *div = nir_channel(b, result, 1);
1124bf215546Sopenharmony_ci   dist = nir_fdiv(b, dist, div);
1125bf215546Sopenharmony_ci   nir_ssa_def *frontface = nir_flt(b, nir_imm_float(b, 0), div);
1126bf215546Sopenharmony_ci   nir_ssa_def *switch_ccw =
1127bf215546Sopenharmony_ci      nir_test_mask(b, nir_load_var(b, trav_vars->sbt_offset_and_flags),
1128bf215546Sopenharmony_ci                    VK_GEOMETRY_INSTANCE_TRIANGLE_FLIP_FACING_BIT_KHR << 24);
1129bf215546Sopenharmony_ci   frontface = nir_ixor(b, frontface, switch_ccw);
1130bf215546Sopenharmony_ci
1131bf215546Sopenharmony_ci   nir_ssa_def *not_cull =
1132bf215546Sopenharmony_ci      nir_inot(b, nir_test_mask(b, nir_load_var(b, vars->flags), SpvRayFlagsSkipTrianglesKHRMask));
1133bf215546Sopenharmony_ci   nir_ssa_def *not_facing_cull = nir_ieq_imm(
1134bf215546Sopenharmony_ci      b,
1135bf215546Sopenharmony_ci      nir_iand(b, nir_load_var(b, vars->flags),
1136bf215546Sopenharmony_ci               nir_bcsel(b, frontface, nir_imm_int(b, SpvRayFlagsCullFrontFacingTrianglesKHRMask),
1137bf215546Sopenharmony_ci                         nir_imm_int(b, SpvRayFlagsCullBackFacingTrianglesKHRMask))),
1138bf215546Sopenharmony_ci      0);
1139bf215546Sopenharmony_ci
1140bf215546Sopenharmony_ci   not_cull = nir_iand(
1141bf215546Sopenharmony_ci      b, not_cull,
1142bf215546Sopenharmony_ci      nir_ior(b, not_facing_cull,
1143bf215546Sopenharmony_ci              nir_test_mask(b, nir_load_var(b, trav_vars->sbt_offset_and_flags),
1144bf215546Sopenharmony_ci                            VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR << 24)));
1145bf215546Sopenharmony_ci
1146bf215546Sopenharmony_ci   nir_push_if(b, nir_iand(b,
1147bf215546Sopenharmony_ci                           nir_iand(b, nir_flt(b, dist, nir_load_var(b, vars->tmax)),
1148bf215546Sopenharmony_ci                                    nir_flt(b, nir_load_var(b, vars->tmin), dist)),
1149bf215546Sopenharmony_ci                           not_cull));
1150bf215546Sopenharmony_ci   {
1151bf215546Sopenharmony_ci
1152bf215546Sopenharmony_ci      nir_ssa_def *triangle_info =
1153bf215546Sopenharmony_ci         nir_build_load_global(b, 2, 32,
1154bf215546Sopenharmony_ci                               nir_iadd_imm(b, build_node_to_addr(device, b, bvh_node),
1155bf215546Sopenharmony_ci                                            offsetof(struct radv_bvh_triangle_node, triangle_id)));
1156bf215546Sopenharmony_ci      nir_ssa_def *primitive_id = nir_channel(b, triangle_info, 0);
1157bf215546Sopenharmony_ci      nir_ssa_def *geometry_id_and_flags = nir_channel(b, triangle_info, 1);
1158bf215546Sopenharmony_ci      nir_ssa_def *geometry_id = nir_iand_imm(b, geometry_id_and_flags, 0xfffffff);
1159bf215546Sopenharmony_ci      nir_ssa_def *is_opaque = hit_is_opaque(b, nir_load_var(b, trav_vars->sbt_offset_and_flags),
1160bf215546Sopenharmony_ci                                             nir_load_var(b, vars->flags), geometry_id_and_flags);
1161bf215546Sopenharmony_ci
1162bf215546Sopenharmony_ci      not_cull =
1163bf215546Sopenharmony_ci         nir_ieq_imm(b,
1164bf215546Sopenharmony_ci                     nir_iand(b, nir_load_var(b, vars->flags),
1165bf215546Sopenharmony_ci                              nir_bcsel(b, is_opaque, nir_imm_int(b, SpvRayFlagsCullOpaqueKHRMask),
1166bf215546Sopenharmony_ci                                        nir_imm_int(b, SpvRayFlagsCullNoOpaqueKHRMask))),
1167bf215546Sopenharmony_ci                     0);
1168bf215546Sopenharmony_ci      nir_push_if(b, not_cull);
1169bf215546Sopenharmony_ci      {
1170bf215546Sopenharmony_ci         nir_ssa_def *sbt_idx = nir_iadd(
1171bf215546Sopenharmony_ci            b,
1172bf215546Sopenharmony_ci            nir_iadd(b, nir_load_var(b, vars->sbt_offset),
1173bf215546Sopenharmony_ci                     nir_iand_imm(b, nir_load_var(b, trav_vars->sbt_offset_and_flags), 0xffffff)),
1174bf215546Sopenharmony_ci            nir_imul(b, nir_load_var(b, vars->sbt_stride), geometry_id));
1175bf215546Sopenharmony_ci         nir_ssa_def *divs[2] = {div, div};
1176bf215546Sopenharmony_ci         nir_ssa_def *ij = nir_fdiv(b, nir_channels(b, result, 0xc), nir_vec(b, divs, 2));
1177bf215546Sopenharmony_ci         nir_ssa_def *hit_kind =
1178bf215546Sopenharmony_ci            nir_bcsel(b, frontface, nir_imm_int(b, 0xFE), nir_imm_int(b, 0xFF));
1179bf215546Sopenharmony_ci
1180bf215546Sopenharmony_ci         nir_store_scratch(
1181bf215546Sopenharmony_ci            b, ij, nir_iadd_imm(b, nir_load_var(b, vars->stack_ptr), RADV_HIT_ATTRIB_OFFSET),
1182bf215546Sopenharmony_ci            .align_mul = 16);
1183bf215546Sopenharmony_ci
1184bf215546Sopenharmony_ci         nir_store_var(b, vars->ahit_accept, nir_imm_true(b), 0x1);
1185bf215546Sopenharmony_ci         nir_store_var(b, vars->ahit_terminate, nir_imm_false(b), 0x1);
1186bf215546Sopenharmony_ci
1187bf215546Sopenharmony_ci         nir_push_if(b, nir_inot(b, is_opaque));
1188bf215546Sopenharmony_ci         {
1189bf215546Sopenharmony_ci            struct rt_variables inner_vars = create_inner_vars(b, vars);
1190bf215546Sopenharmony_ci
1191bf215546Sopenharmony_ci            nir_store_var(b, inner_vars.primitive_id, primitive_id, 1);
1192bf215546Sopenharmony_ci            nir_store_var(b, inner_vars.geometry_id_and_flags, geometry_id_and_flags, 1);
1193bf215546Sopenharmony_ci            nir_store_var(b, inner_vars.tmax, dist, 0x1);
1194bf215546Sopenharmony_ci            nir_store_var(b, inner_vars.instance_id, nir_load_var(b, trav_vars->instance_id), 0x1);
1195bf215546Sopenharmony_ci            nir_store_var(b, inner_vars.instance_addr, nir_load_var(b, trav_vars->instance_addr),
1196bf215546Sopenharmony_ci                          0x1);
1197bf215546Sopenharmony_ci            nir_store_var(b, inner_vars.hit_kind, hit_kind, 0x1);
1198bf215546Sopenharmony_ci            nir_store_var(b, inner_vars.custom_instance_and_mask,
1199bf215546Sopenharmony_ci                          nir_load_var(b, trav_vars->custom_instance_and_mask), 0x1);
1200bf215546Sopenharmony_ci
1201bf215546Sopenharmony_ci            load_sbt_entry(b, &inner_vars, sbt_idx, SBT_HIT, 4);
1202bf215546Sopenharmony_ci
1203bf215546Sopenharmony_ci            visit_any_hit_shaders(device, pCreateInfo, b, &inner_vars);
1204bf215546Sopenharmony_ci
1205bf215546Sopenharmony_ci            nir_push_if(b, nir_inot(b, nir_load_var(b, vars->ahit_accept)));
1206bf215546Sopenharmony_ci            {
1207bf215546Sopenharmony_ci               nir_jump(b, nir_jump_continue);
1208bf215546Sopenharmony_ci            }
1209bf215546Sopenharmony_ci            nir_pop_if(b, NULL);
1210bf215546Sopenharmony_ci         }
1211bf215546Sopenharmony_ci         nir_pop_if(b, NULL);
1212bf215546Sopenharmony_ci
1213bf215546Sopenharmony_ci         nir_store_var(b, vars->primitive_id, primitive_id, 1);
1214bf215546Sopenharmony_ci         nir_store_var(b, vars->geometry_id_and_flags, geometry_id_and_flags, 1);
1215bf215546Sopenharmony_ci         nir_store_var(b, vars->tmax, dist, 0x1);
1216bf215546Sopenharmony_ci         nir_store_var(b, vars->instance_id, nir_load_var(b, trav_vars->instance_id), 0x1);
1217bf215546Sopenharmony_ci         nir_store_var(b, vars->instance_addr, nir_load_var(b, trav_vars->instance_addr), 0x1);
1218bf215546Sopenharmony_ci         nir_store_var(b, vars->hit_kind, hit_kind, 0x1);
1219bf215546Sopenharmony_ci         nir_store_var(b, vars->custom_instance_and_mask,
1220bf215546Sopenharmony_ci                       nir_load_var(b, trav_vars->custom_instance_and_mask), 0x1);
1221bf215546Sopenharmony_ci
1222bf215546Sopenharmony_ci         nir_store_var(b, vars->idx, sbt_idx, 1);
1223bf215546Sopenharmony_ci         nir_store_var(b, trav_vars->hit, nir_imm_true(b), 1);
1224bf215546Sopenharmony_ci
1225bf215546Sopenharmony_ci         nir_ssa_def *terminate_on_first_hit =
1226bf215546Sopenharmony_ci            nir_test_mask(b, nir_load_var(b, vars->flags), SpvRayFlagsTerminateOnFirstHitKHRMask);
1227bf215546Sopenharmony_ci         nir_ssa_def *ray_terminated = nir_load_var(b, vars->ahit_terminate);
1228bf215546Sopenharmony_ci         nir_push_if(b, nir_ior(b, terminate_on_first_hit, ray_terminated));
1229bf215546Sopenharmony_ci         {
1230bf215546Sopenharmony_ci            nir_jump(b, nir_jump_break);
1231bf215546Sopenharmony_ci         }
1232bf215546Sopenharmony_ci         nir_pop_if(b, NULL);
1233bf215546Sopenharmony_ci      }
1234bf215546Sopenharmony_ci      nir_pop_if(b, NULL);
1235bf215546Sopenharmony_ci   }
1236bf215546Sopenharmony_ci   nir_pop_if(b, NULL);
1237bf215546Sopenharmony_ci}
1238bf215546Sopenharmony_ci
1239bf215546Sopenharmony_cistatic void
1240bf215546Sopenharmony_ciinsert_traversal_aabb_case(struct radv_device *device,
1241bf215546Sopenharmony_ci                           const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, nir_builder *b,
1242bf215546Sopenharmony_ci                           const struct rt_variables *vars,
1243bf215546Sopenharmony_ci                           const struct rt_traversal_vars *trav_vars, nir_ssa_def *bvh_node)
1244bf215546Sopenharmony_ci{
1245bf215546Sopenharmony_ci   nir_ssa_def *node_addr = build_node_to_addr(device, b, bvh_node);
1246bf215546Sopenharmony_ci   nir_ssa_def *triangle_info = nir_build_load_global(b, 2, 32, nir_iadd_imm(b, node_addr, 24));
1247bf215546Sopenharmony_ci   nir_ssa_def *primitive_id = nir_channel(b, triangle_info, 0);
1248bf215546Sopenharmony_ci   nir_ssa_def *geometry_id_and_flags = nir_channel(b, triangle_info, 1);
1249bf215546Sopenharmony_ci   nir_ssa_def *geometry_id = nir_iand_imm(b, geometry_id_and_flags, 0xfffffff);
1250bf215546Sopenharmony_ci   nir_ssa_def *is_opaque = hit_is_opaque(b, nir_load_var(b, trav_vars->sbt_offset_and_flags),
1251bf215546Sopenharmony_ci                                          nir_load_var(b, vars->flags), geometry_id_and_flags);
1252bf215546Sopenharmony_ci
1253bf215546Sopenharmony_ci   nir_ssa_def *not_skip_aabb =
1254bf215546Sopenharmony_ci      nir_inot(b, nir_test_mask(b, nir_load_var(b, vars->flags), SpvRayFlagsSkipAABBsKHRMask));
1255bf215546Sopenharmony_ci   nir_ssa_def *not_cull = nir_iand(
1256bf215546Sopenharmony_ci      b, not_skip_aabb,
1257bf215546Sopenharmony_ci      nir_ieq_imm(b,
1258bf215546Sopenharmony_ci                  nir_iand(b, nir_load_var(b, vars->flags),
1259bf215546Sopenharmony_ci                           nir_bcsel(b, is_opaque, nir_imm_int(b, SpvRayFlagsCullOpaqueKHRMask),
1260bf215546Sopenharmony_ci                                     nir_imm_int(b, SpvRayFlagsCullNoOpaqueKHRMask))),
1261bf215546Sopenharmony_ci                  0));
1262bf215546Sopenharmony_ci   nir_push_if(b, not_cull);
1263bf215546Sopenharmony_ci   {
1264bf215546Sopenharmony_ci      nir_ssa_def *sbt_idx = nir_iadd(
1265bf215546Sopenharmony_ci         b,
1266bf215546Sopenharmony_ci         nir_iadd(b, nir_load_var(b, vars->sbt_offset),
1267bf215546Sopenharmony_ci                  nir_iand_imm(b, nir_load_var(b, trav_vars->sbt_offset_and_flags), 0xffffff)),
1268bf215546Sopenharmony_ci         nir_imul(b, nir_load_var(b, vars->sbt_stride), geometry_id));
1269bf215546Sopenharmony_ci
1270bf215546Sopenharmony_ci      struct rt_variables inner_vars = create_inner_vars(b, vars);
1271bf215546Sopenharmony_ci
1272bf215546Sopenharmony_ci      /* For AABBs the intersection shader writes the hit kind, and only does it if it is the
1273bf215546Sopenharmony_ci       * next closest hit candidate. */
1274bf215546Sopenharmony_ci      inner_vars.hit_kind = vars->hit_kind;
1275bf215546Sopenharmony_ci
1276bf215546Sopenharmony_ci      nir_store_var(b, inner_vars.primitive_id, primitive_id, 1);
1277bf215546Sopenharmony_ci      nir_store_var(b, inner_vars.geometry_id_and_flags, geometry_id_and_flags, 1);
1278bf215546Sopenharmony_ci      nir_store_var(b, inner_vars.tmax, nir_load_var(b, vars->tmax), 0x1);
1279bf215546Sopenharmony_ci      nir_store_var(b, inner_vars.instance_id, nir_load_var(b, trav_vars->instance_id), 0x1);
1280bf215546Sopenharmony_ci      nir_store_var(b, inner_vars.instance_addr, nir_load_var(b, trav_vars->instance_addr), 0x1);
1281bf215546Sopenharmony_ci      nir_store_var(b, inner_vars.custom_instance_and_mask,
1282bf215546Sopenharmony_ci                    nir_load_var(b, trav_vars->custom_instance_and_mask), 0x1);
1283bf215546Sopenharmony_ci      nir_store_var(b, inner_vars.opaque, is_opaque, 1);
1284bf215546Sopenharmony_ci
1285bf215546Sopenharmony_ci      load_sbt_entry(b, &inner_vars, sbt_idx, SBT_HIT, 4);
1286bf215546Sopenharmony_ci
1287bf215546Sopenharmony_ci      nir_store_var(b, vars->ahit_accept, nir_imm_false(b), 0x1);
1288bf215546Sopenharmony_ci      nir_store_var(b, vars->ahit_terminate, nir_imm_false(b), 0x1);
1289bf215546Sopenharmony_ci
1290bf215546Sopenharmony_ci      nir_push_if(b, nir_ine_imm(b, nir_load_var(b, inner_vars.idx), 0));
1291bf215546Sopenharmony_ci      for (unsigned i = 0; i < pCreateInfo->groupCount; ++i) {
1292bf215546Sopenharmony_ci         const VkRayTracingShaderGroupCreateInfoKHR *group_info = &pCreateInfo->pGroups[i];
1293bf215546Sopenharmony_ci         uint32_t shader_id = VK_SHADER_UNUSED_KHR;
1294bf215546Sopenharmony_ci         uint32_t any_hit_shader_id = VK_SHADER_UNUSED_KHR;
1295bf215546Sopenharmony_ci
1296bf215546Sopenharmony_ci         switch (group_info->type) {
1297bf215546Sopenharmony_ci         case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR:
1298bf215546Sopenharmony_ci            shader_id = group_info->intersectionShader;
1299bf215546Sopenharmony_ci            any_hit_shader_id = group_info->anyHitShader;
1300bf215546Sopenharmony_ci            break;
1301bf215546Sopenharmony_ci         default:
1302bf215546Sopenharmony_ci            break;
1303bf215546Sopenharmony_ci         }
1304bf215546Sopenharmony_ci         if (shader_id == VK_SHADER_UNUSED_KHR)
1305bf215546Sopenharmony_ci            continue;
1306bf215546Sopenharmony_ci
1307bf215546Sopenharmony_ci         const VkPipelineShaderStageCreateInfo *stage = &pCreateInfo->pStages[shader_id];
1308bf215546Sopenharmony_ci         nir_shader *nir_stage = parse_rt_stage(device, stage);
1309bf215546Sopenharmony_ci
1310bf215546Sopenharmony_ci         nir_shader *any_hit_stage = NULL;
1311bf215546Sopenharmony_ci         if (any_hit_shader_id != VK_SHADER_UNUSED_KHR) {
1312bf215546Sopenharmony_ci            stage = &pCreateInfo->pStages[any_hit_shader_id];
1313bf215546Sopenharmony_ci            any_hit_stage = parse_rt_stage(device, stage);
1314bf215546Sopenharmony_ci
1315bf215546Sopenharmony_ci            nir_lower_intersection_shader(nir_stage, any_hit_stage);
1316bf215546Sopenharmony_ci            ralloc_free(any_hit_stage);
1317bf215546Sopenharmony_ci         }
1318bf215546Sopenharmony_ci
1319bf215546Sopenharmony_ci         inner_vars.stage_idx = shader_id;
1320bf215546Sopenharmony_ci         insert_rt_case(b, nir_stage, &inner_vars, nir_load_var(b, inner_vars.idx), 0, i + 2);
1321bf215546Sopenharmony_ci      }
1322bf215546Sopenharmony_ci      nir_push_else(b, NULL);
1323bf215546Sopenharmony_ci      {
1324bf215546Sopenharmony_ci         nir_ssa_def *vec3_zero = nir_channels(b, nir_imm_vec4(b, 0, 0, 0, 0), 0x7);
1325bf215546Sopenharmony_ci         nir_ssa_def *vec3_inf =
1326bf215546Sopenharmony_ci            nir_channels(b, nir_imm_vec4(b, INFINITY, INFINITY, INFINITY, 0), 0x7);
1327bf215546Sopenharmony_ci
1328bf215546Sopenharmony_ci         nir_ssa_def *bvh_lo = nir_build_load_global(b, 3, 32, nir_iadd_imm(b, node_addr, 0));
1329bf215546Sopenharmony_ci         nir_ssa_def *bvh_hi = nir_build_load_global(b, 3, 32, nir_iadd_imm(b, node_addr, 12));
1330bf215546Sopenharmony_ci
1331bf215546Sopenharmony_ci         bvh_lo = nir_fsub(b, bvh_lo, nir_load_var(b, trav_vars->origin));
1332bf215546Sopenharmony_ci         bvh_hi = nir_fsub(b, bvh_hi, nir_load_var(b, trav_vars->origin));
1333bf215546Sopenharmony_ci         nir_ssa_def *t_vec = nir_fmin(b, nir_fmul(b, bvh_lo, nir_load_var(b, trav_vars->inv_dir)),
1334bf215546Sopenharmony_ci                                       nir_fmul(b, bvh_hi, nir_load_var(b, trav_vars->inv_dir)));
1335bf215546Sopenharmony_ci         nir_ssa_def *t2_vec = nir_fmax(b, nir_fmul(b, bvh_lo, nir_load_var(b, trav_vars->inv_dir)),
1336bf215546Sopenharmony_ci                                        nir_fmul(b, bvh_hi, nir_load_var(b, trav_vars->inv_dir)));
1337bf215546Sopenharmony_ci         /* If we run parallel to one of the edges the range should be [0, inf) not [0,0] */
1338bf215546Sopenharmony_ci         t2_vec =
1339bf215546Sopenharmony_ci            nir_bcsel(b, nir_feq(b, nir_load_var(b, trav_vars->dir), vec3_zero), vec3_inf, t2_vec);
1340bf215546Sopenharmony_ci
1341bf215546Sopenharmony_ci         nir_ssa_def *t_min = nir_fmax(b, nir_channel(b, t_vec, 0), nir_channel(b, t_vec, 1));
1342bf215546Sopenharmony_ci         t_min = nir_fmax(b, t_min, nir_channel(b, t_vec, 2));
1343bf215546Sopenharmony_ci
1344bf215546Sopenharmony_ci         nir_ssa_def *t_max = nir_fmin(b, nir_channel(b, t2_vec, 0), nir_channel(b, t2_vec, 1));
1345bf215546Sopenharmony_ci         t_max = nir_fmin(b, t_max, nir_channel(b, t2_vec, 2));
1346bf215546Sopenharmony_ci
1347bf215546Sopenharmony_ci         nir_push_if(b, nir_iand(b, nir_fge(b, nir_load_var(b, vars->tmax), t_min),
1348bf215546Sopenharmony_ci                                 nir_fge(b, t_max, nir_load_var(b, vars->tmin))));
1349bf215546Sopenharmony_ci         {
1350bf215546Sopenharmony_ci            nir_store_var(b, vars->ahit_accept, nir_imm_true(b), 0x1);
1351bf215546Sopenharmony_ci            nir_store_var(b, vars->tmax, nir_fmax(b, t_min, nir_load_var(b, vars->tmin)), 1);
1352bf215546Sopenharmony_ci         }
1353bf215546Sopenharmony_ci         nir_pop_if(b, NULL);
1354bf215546Sopenharmony_ci      }
1355bf215546Sopenharmony_ci      nir_pop_if(b, NULL);
1356bf215546Sopenharmony_ci
1357bf215546Sopenharmony_ci      nir_push_if(b, nir_load_var(b, vars->ahit_accept));
1358bf215546Sopenharmony_ci      {
1359bf215546Sopenharmony_ci         nir_store_var(b, vars->primitive_id, primitive_id, 1);
1360bf215546Sopenharmony_ci         nir_store_var(b, vars->geometry_id_and_flags, geometry_id_and_flags, 1);
1361bf215546Sopenharmony_ci         nir_store_var(b, vars->tmax, nir_load_var(b, inner_vars.tmax), 0x1);
1362bf215546Sopenharmony_ci         nir_store_var(b, vars->instance_id, nir_load_var(b, trav_vars->instance_id), 0x1);
1363bf215546Sopenharmony_ci         nir_store_var(b, vars->instance_addr, nir_load_var(b, trav_vars->instance_addr), 0x1);
1364bf215546Sopenharmony_ci         nir_store_var(b, vars->custom_instance_and_mask,
1365bf215546Sopenharmony_ci                       nir_load_var(b, trav_vars->custom_instance_and_mask), 0x1);
1366bf215546Sopenharmony_ci
1367bf215546Sopenharmony_ci         nir_store_var(b, vars->idx, sbt_idx, 1);
1368bf215546Sopenharmony_ci         nir_store_var(b, trav_vars->hit, nir_imm_true(b), 1);
1369bf215546Sopenharmony_ci
1370bf215546Sopenharmony_ci         nir_ssa_def *terminate_on_first_hit =
1371bf215546Sopenharmony_ci            nir_test_mask(b, nir_load_var(b, vars->flags), SpvRayFlagsTerminateOnFirstHitKHRMask);
1372bf215546Sopenharmony_ci         nir_ssa_def *ray_terminated = nir_load_var(b, vars->ahit_terminate);
1373bf215546Sopenharmony_ci         nir_push_if(b, nir_ior(b, terminate_on_first_hit, ray_terminated));
1374bf215546Sopenharmony_ci         {
1375bf215546Sopenharmony_ci            nir_jump(b, nir_jump_break);
1376bf215546Sopenharmony_ci         }
1377bf215546Sopenharmony_ci         nir_pop_if(b, NULL);
1378bf215546Sopenharmony_ci      }
1379bf215546Sopenharmony_ci      nir_pop_if(b, NULL);
1380bf215546Sopenharmony_ci   }
1381bf215546Sopenharmony_ci   nir_pop_if(b, NULL);
1382bf215546Sopenharmony_ci}
1383bf215546Sopenharmony_ci
1384bf215546Sopenharmony_cistatic nir_shader *
1385bf215546Sopenharmony_cibuild_traversal_shader(struct radv_device *device,
1386bf215546Sopenharmony_ci                       const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
1387bf215546Sopenharmony_ci                       const struct rt_variables *dst_vars,
1388bf215546Sopenharmony_ci                       struct hash_table *var_remap)
1389bf215546Sopenharmony_ci{
1390bf215546Sopenharmony_ci   nir_builder b = radv_meta_init_shader(device, MESA_SHADER_COMPUTE, "rt_traversal");
1391bf215546Sopenharmony_ci   b.shader->info.internal = false;
1392bf215546Sopenharmony_ci   b.shader->info.workgroup_size[0] = 8;
1393bf215546Sopenharmony_ci   b.shader->info.workgroup_size[1] = device->physical_device->rt_wave_size == 64 ? 8 : 4;
1394bf215546Sopenharmony_ci   struct rt_variables vars = create_rt_variables(b.shader, pCreateInfo, dst_vars->stack_sizes);
1395bf215546Sopenharmony_ci   map_rt_variables(var_remap, &vars, dst_vars);
1396bf215546Sopenharmony_ci
1397bf215546Sopenharmony_ci   unsigned lanes = device->physical_device->rt_wave_size;
1398bf215546Sopenharmony_ci   unsigned elements = lanes * MAX_STACK_ENTRY_COUNT;
1399bf215546Sopenharmony_ci   nir_variable *stack_var = nir_variable_create(b.shader, nir_var_mem_shared,
1400bf215546Sopenharmony_ci                                                 glsl_array_type(glsl_uint_type(), elements, 0),
1401bf215546Sopenharmony_ci                                                 "trav_stack");
1402bf215546Sopenharmony_ci   nir_deref_instr *stack_deref = nir_build_deref_var(&b, stack_var);
1403bf215546Sopenharmony_ci   nir_deref_instr *stack;
1404bf215546Sopenharmony_ci   nir_ssa_def *stack_idx_stride = nir_imm_int(&b, lanes);
1405bf215546Sopenharmony_ci   nir_ssa_def *stack_idx_base = nir_load_local_invocation_index(&b);
1406bf215546Sopenharmony_ci
1407bf215546Sopenharmony_ci   nir_ssa_def *accel_struct = nir_load_var(&b, vars.accel_struct);
1408bf215546Sopenharmony_ci
1409bf215546Sopenharmony_ci   struct rt_traversal_vars trav_vars = init_traversal_vars(&b);
1410bf215546Sopenharmony_ci
1411bf215546Sopenharmony_ci   nir_store_var(&b, trav_vars.hit, nir_imm_false(&b), 1);
1412bf215546Sopenharmony_ci
1413bf215546Sopenharmony_ci   nir_push_if(&b, nir_ine_imm(&b, accel_struct, 0));
1414bf215546Sopenharmony_ci   {
1415bf215546Sopenharmony_ci      nir_store_var(&b, trav_vars.bvh_base, build_addr_to_node(&b, accel_struct), 1);
1416bf215546Sopenharmony_ci
1417bf215546Sopenharmony_ci      nir_ssa_def *bvh_root = nir_build_load_global(
1418bf215546Sopenharmony_ci         &b, 1, 32, accel_struct, .access = ACCESS_NON_WRITEABLE, .align_mul = 64);
1419bf215546Sopenharmony_ci
1420bf215546Sopenharmony_ci      nir_ssa_def *desc = create_bvh_descriptor(&b);
1421bf215546Sopenharmony_ci      nir_ssa_def *vec3ones = nir_channels(&b, nir_imm_vec4(&b, 1.0, 1.0, 1.0, 1.0), 0x7);
1422bf215546Sopenharmony_ci
1423bf215546Sopenharmony_ci      nir_store_var(&b, trav_vars.origin, nir_load_var(&b, vars.origin), 7);
1424bf215546Sopenharmony_ci      nir_store_var(&b, trav_vars.dir, nir_load_var(&b, vars.direction), 7);
1425bf215546Sopenharmony_ci      nir_store_var(&b, trav_vars.inv_dir, nir_fdiv(&b, vec3ones, nir_load_var(&b, trav_vars.dir)), 7);
1426bf215546Sopenharmony_ci      nir_store_var(&b, trav_vars.sbt_offset_and_flags, nir_imm_int(&b, 0), 1);
1427bf215546Sopenharmony_ci      nir_store_var(&b, trav_vars.instance_addr, nir_imm_int64(&b, 0), 1);
1428bf215546Sopenharmony_ci
1429bf215546Sopenharmony_ci      nir_store_var(&b, trav_vars.stack, nir_iadd(&b, stack_idx_base, stack_idx_stride), 1);
1430bf215546Sopenharmony_ci      stack = nir_build_deref_array(&b, stack_deref, stack_idx_base);
1431bf215546Sopenharmony_ci      nir_store_deref(&b, stack, bvh_root, 0x1);
1432bf215546Sopenharmony_ci
1433bf215546Sopenharmony_ci      nir_store_var(&b, trav_vars.top_stack, nir_imm_int(&b, 0), 1);
1434bf215546Sopenharmony_ci
1435bf215546Sopenharmony_ci      nir_push_loop(&b);
1436bf215546Sopenharmony_ci
1437bf215546Sopenharmony_ci      nir_push_if(&b, nir_ieq(&b, nir_load_var(&b, trav_vars.stack), stack_idx_base));
1438bf215546Sopenharmony_ci      nir_jump(&b, nir_jump_break);
1439bf215546Sopenharmony_ci      nir_pop_if(&b, NULL);
1440bf215546Sopenharmony_ci
1441bf215546Sopenharmony_ci      nir_push_if(
1442bf215546Sopenharmony_ci         &b, nir_uge(&b, nir_load_var(&b, trav_vars.top_stack), nir_load_var(&b, trav_vars.stack)));
1443bf215546Sopenharmony_ci      nir_store_var(&b, trav_vars.top_stack, nir_imm_int(&b, 0), 1);
1444bf215546Sopenharmony_ci      nir_store_var(&b, trav_vars.bvh_base,
1445bf215546Sopenharmony_ci                    build_addr_to_node(&b, nir_load_var(&b, vars.accel_struct)), 1);
1446bf215546Sopenharmony_ci      nir_store_var(&b, trav_vars.origin, nir_load_var(&b, vars.origin), 7);
1447bf215546Sopenharmony_ci      nir_store_var(&b, trav_vars.dir, nir_load_var(&b, vars.direction), 7);
1448bf215546Sopenharmony_ci      nir_store_var(&b, trav_vars.inv_dir, nir_fdiv(&b, vec3ones, nir_load_var(&b, trav_vars.dir)), 7);
1449bf215546Sopenharmony_ci      nir_store_var(&b, trav_vars.instance_addr, nir_imm_int64(&b, 0), 1);
1450bf215546Sopenharmony_ci
1451bf215546Sopenharmony_ci      nir_pop_if(&b, NULL);
1452bf215546Sopenharmony_ci
1453bf215546Sopenharmony_ci      nir_store_var(&b, trav_vars.stack,
1454bf215546Sopenharmony_ci                    nir_isub(&b, nir_load_var(&b, trav_vars.stack), stack_idx_stride), 1);
1455bf215546Sopenharmony_ci
1456bf215546Sopenharmony_ci      stack = nir_build_deref_array(&b, stack_deref, nir_load_var(&b, trav_vars.stack));
1457bf215546Sopenharmony_ci      nir_ssa_def *bvh_node = nir_load_deref(&b, stack);
1458bf215546Sopenharmony_ci      nir_ssa_def *bvh_node_type = nir_iand_imm(&b, bvh_node, 7);
1459bf215546Sopenharmony_ci
1460bf215546Sopenharmony_ci      bvh_node = nir_iadd(&b, nir_load_var(&b, trav_vars.bvh_base), nir_u2u(&b, bvh_node, 64));
1461bf215546Sopenharmony_ci      nir_ssa_def *intrinsic_result = NULL;
1462bf215546Sopenharmony_ci      if (!radv_emulate_rt(device->physical_device)) {
1463bf215546Sopenharmony_ci         intrinsic_result = nir_bvh64_intersect_ray_amd(
1464bf215546Sopenharmony_ci            &b, 32, desc, nir_unpack_64_2x32(&b, bvh_node), nir_load_var(&b, vars.tmax),
1465bf215546Sopenharmony_ci            nir_load_var(&b, trav_vars.origin), nir_load_var(&b, trav_vars.dir),
1466bf215546Sopenharmony_ci            nir_load_var(&b, trav_vars.inv_dir));
1467bf215546Sopenharmony_ci      }
1468bf215546Sopenharmony_ci
1469bf215546Sopenharmony_ci      nir_push_if(&b, nir_ine_imm(&b, nir_iand_imm(&b, bvh_node_type, 4), 0));
1470bf215546Sopenharmony_ci      {
1471bf215546Sopenharmony_ci         nir_push_if(&b, nir_ine_imm(&b, nir_iand_imm(&b, bvh_node_type, 2), 0));
1472bf215546Sopenharmony_ci         {
1473bf215546Sopenharmony_ci            /* custom */
1474bf215546Sopenharmony_ci            nir_push_if(&b, nir_ine_imm(&b, nir_iand_imm(&b, bvh_node_type, 1), 0));
1475bf215546Sopenharmony_ci            if (!(pCreateInfo->flags & VK_PIPELINE_CREATE_RAY_TRACING_SKIP_AABBS_BIT_KHR)) {
1476bf215546Sopenharmony_ci               insert_traversal_aabb_case(device, pCreateInfo, &b, &vars, &trav_vars, bvh_node);
1477bf215546Sopenharmony_ci            }
1478bf215546Sopenharmony_ci            nir_push_else(&b, NULL);
1479bf215546Sopenharmony_ci            {
1480bf215546Sopenharmony_ci               /* instance */
1481bf215546Sopenharmony_ci               nir_ssa_def *instance_node_addr = build_node_to_addr(device, &b, bvh_node);
1482bf215546Sopenharmony_ci               nir_ssa_def *instance_data =
1483bf215546Sopenharmony_ci                  nir_build_load_global(&b, 4, 32, instance_node_addr, .align_mul = 64);
1484bf215546Sopenharmony_ci               nir_ssa_def *wto_matrix[] = {
1485bf215546Sopenharmony_ci                  nir_build_load_global(&b, 4, 32, nir_iadd_imm(&b, instance_node_addr, 16),
1486bf215546Sopenharmony_ci                                        .align_mul = 64, .align_offset = 16),
1487bf215546Sopenharmony_ci                  nir_build_load_global(&b, 4, 32, nir_iadd_imm(&b, instance_node_addr, 32),
1488bf215546Sopenharmony_ci                                        .align_mul = 64, .align_offset = 32),
1489bf215546Sopenharmony_ci                  nir_build_load_global(&b, 4, 32, nir_iadd_imm(&b, instance_node_addr, 48),
1490bf215546Sopenharmony_ci                                        .align_mul = 64, .align_offset = 48)};
1491bf215546Sopenharmony_ci               nir_ssa_def *instance_id =
1492bf215546Sopenharmony_ci                  nir_build_load_global(&b, 1, 32, nir_iadd_imm(&b, instance_node_addr, 88));
1493bf215546Sopenharmony_ci               nir_ssa_def *instance_and_mask = nir_channel(&b, instance_data, 2);
1494bf215546Sopenharmony_ci               nir_ssa_def *instance_mask = nir_ushr_imm(&b, instance_and_mask, 24);
1495bf215546Sopenharmony_ci
1496bf215546Sopenharmony_ci               nir_push_if(
1497bf215546Sopenharmony_ci                  &b,
1498bf215546Sopenharmony_ci                  nir_ieq_imm(&b, nir_iand(&b, instance_mask, nir_load_var(&b, vars.cull_mask)), 0));
1499bf215546Sopenharmony_ci               nir_jump(&b, nir_jump_continue);
1500bf215546Sopenharmony_ci               nir_pop_if(&b, NULL);
1501bf215546Sopenharmony_ci
1502bf215546Sopenharmony_ci               nir_store_var(&b, trav_vars.top_stack, nir_load_var(&b, trav_vars.stack), 1);
1503bf215546Sopenharmony_ci               nir_store_var(&b, trav_vars.bvh_base,
1504bf215546Sopenharmony_ci                             build_addr_to_node(
1505bf215546Sopenharmony_ci                                &b, nir_pack_64_2x32(&b, nir_channels(&b, instance_data, 0x3))),
1506bf215546Sopenharmony_ci                             1);
1507bf215546Sopenharmony_ci               stack = nir_build_deref_array(&b, stack_deref, nir_load_var(&b, trav_vars.stack));
1508bf215546Sopenharmony_ci               nir_store_deref(&b, stack, nir_iand_imm(&b, nir_channel(&b, instance_data, 0), 63), 0x1);
1509bf215546Sopenharmony_ci
1510bf215546Sopenharmony_ci               nir_store_var(&b, trav_vars.stack,
1511bf215546Sopenharmony_ci                             nir_iadd(&b, nir_load_var(&b, trav_vars.stack), stack_idx_stride), 1);
1512bf215546Sopenharmony_ci
1513bf215546Sopenharmony_ci               nir_store_var(
1514bf215546Sopenharmony_ci                  &b, trav_vars.origin,
1515bf215546Sopenharmony_ci                  nir_build_vec3_mat_mult_pre(&b, nir_load_var(&b, vars.origin), wto_matrix), 7);
1516bf215546Sopenharmony_ci               nir_store_var(
1517bf215546Sopenharmony_ci                  &b, trav_vars.dir,
1518bf215546Sopenharmony_ci                  nir_build_vec3_mat_mult(&b, nir_load_var(&b, vars.direction), wto_matrix, false),
1519bf215546Sopenharmony_ci                  7);
1520bf215546Sopenharmony_ci               nir_store_var(&b, trav_vars.inv_dir,
1521bf215546Sopenharmony_ci                             nir_fdiv(&b, vec3ones, nir_load_var(&b, trav_vars.dir)), 7);
1522bf215546Sopenharmony_ci               nir_store_var(&b, trav_vars.custom_instance_and_mask, instance_and_mask, 1);
1523bf215546Sopenharmony_ci               nir_store_var(&b, trav_vars.sbt_offset_and_flags, nir_channel(&b, instance_data, 3),
1524bf215546Sopenharmony_ci                             1);
1525bf215546Sopenharmony_ci               nir_store_var(&b, trav_vars.instance_id, instance_id, 1);
1526bf215546Sopenharmony_ci               nir_store_var(&b, trav_vars.instance_addr, instance_node_addr, 1);
1527bf215546Sopenharmony_ci            }
1528bf215546Sopenharmony_ci            nir_pop_if(&b, NULL);
1529bf215546Sopenharmony_ci         }
1530bf215546Sopenharmony_ci         nir_push_else(&b, NULL);
1531bf215546Sopenharmony_ci         {
1532bf215546Sopenharmony_ci            /* box */
1533bf215546Sopenharmony_ci            nir_ssa_def *result = intrinsic_result;
1534bf215546Sopenharmony_ci            if (!result) {
1535bf215546Sopenharmony_ci               /* If we didn't run the intrinsic cause the hardware didn't support it,
1536bf215546Sopenharmony_ci                * emulate ray/box intersection here */
1537bf215546Sopenharmony_ci               result = intersect_ray_amd_software_box(device,
1538bf215546Sopenharmony_ci                  &b, bvh_node, nir_load_var(&b, vars.tmax), nir_load_var(&b, trav_vars.origin),
1539bf215546Sopenharmony_ci                  nir_load_var(&b, trav_vars.dir), nir_load_var(&b, trav_vars.inv_dir));
1540bf215546Sopenharmony_ci            }
1541bf215546Sopenharmony_ci
1542bf215546Sopenharmony_ci            for (unsigned i = 4; i-- > 0; ) {
1543bf215546Sopenharmony_ci               nir_ssa_def *new_node = nir_channel(&b, result, i);
1544bf215546Sopenharmony_ci               nir_push_if(&b, nir_ine_imm(&b, new_node, 0xffffffff));
1545bf215546Sopenharmony_ci               {
1546bf215546Sopenharmony_ci                  stack = nir_build_deref_array(&b, stack_deref, nir_load_var(&b, trav_vars.stack));
1547bf215546Sopenharmony_ci                  nir_store_deref(&b, stack, new_node, 0x1);
1548bf215546Sopenharmony_ci                  nir_store_var(
1549bf215546Sopenharmony_ci                     &b, trav_vars.stack,
1550bf215546Sopenharmony_ci                     nir_iadd(&b, nir_load_var(&b, trav_vars.stack), stack_idx_stride), 1);
1551bf215546Sopenharmony_ci               }
1552bf215546Sopenharmony_ci               nir_pop_if(&b, NULL);
1553bf215546Sopenharmony_ci            }
1554bf215546Sopenharmony_ci         }
1555bf215546Sopenharmony_ci         nir_pop_if(&b, NULL);
1556bf215546Sopenharmony_ci      }
1557bf215546Sopenharmony_ci      nir_push_else(&b, NULL);
1558bf215546Sopenharmony_ci      if (!(pCreateInfo->flags & VK_PIPELINE_CREATE_RAY_TRACING_SKIP_TRIANGLES_BIT_KHR)) {
1559bf215546Sopenharmony_ci         nir_ssa_def *result = intrinsic_result;
1560bf215546Sopenharmony_ci         if (!result) {
1561bf215546Sopenharmony_ci            /* If we didn't run the intrinsic cause the hardware didn't support it,
1562bf215546Sopenharmony_ci             * emulate ray/tri intersection here */
1563bf215546Sopenharmony_ci            result = intersect_ray_amd_software_tri(device,
1564bf215546Sopenharmony_ci               &b, bvh_node, nir_load_var(&b, vars.tmax), nir_load_var(&b, trav_vars.origin),
1565bf215546Sopenharmony_ci               nir_load_var(&b, trav_vars.dir), nir_load_var(&b, trav_vars.inv_dir));
1566bf215546Sopenharmony_ci         }
1567bf215546Sopenharmony_ci         insert_traversal_triangle_case(device, pCreateInfo, &b, result, &vars, &trav_vars, bvh_node);
1568bf215546Sopenharmony_ci      }
1569bf215546Sopenharmony_ci      nir_pop_if(&b, NULL);
1570bf215546Sopenharmony_ci
1571bf215546Sopenharmony_ci      nir_pop_loop(&b, NULL);
1572bf215546Sopenharmony_ci   }
1573bf215546Sopenharmony_ci   nir_pop_if(&b, NULL);
1574bf215546Sopenharmony_ci
1575bf215546Sopenharmony_ci   /* Initialize follow-up shader. */
1576bf215546Sopenharmony_ci   nir_push_if(&b, nir_load_var(&b, trav_vars.hit));
1577bf215546Sopenharmony_ci   {
1578bf215546Sopenharmony_ci      /* vars.idx contains the SBT index at this point. */
1579bf215546Sopenharmony_ci      load_sbt_entry(&b, &vars, nir_load_var(&b, vars.idx), SBT_HIT, 0);
1580bf215546Sopenharmony_ci
1581bf215546Sopenharmony_ci      nir_ssa_def *should_return = nir_ior(&b,
1582bf215546Sopenharmony_ci                                           nir_test_mask(&b, nir_load_var(&b, vars.flags),
1583bf215546Sopenharmony_ci                                                         SpvRayFlagsSkipClosestHitShaderKHRMask),
1584bf215546Sopenharmony_ci                                           nir_ieq_imm(&b, nir_load_var(&b, vars.idx), 0));
1585bf215546Sopenharmony_ci
1586bf215546Sopenharmony_ci      /* should_return is set if we had a hit but we won't be calling the closest hit shader and hence
1587bf215546Sopenharmony_ci       * need to return immediately to the calling shader. */
1588bf215546Sopenharmony_ci      nir_push_if(&b, should_return);
1589bf215546Sopenharmony_ci      {
1590bf215546Sopenharmony_ci         insert_rt_return(&b, &vars);
1591bf215546Sopenharmony_ci      }
1592bf215546Sopenharmony_ci      nir_pop_if(&b, NULL);
1593bf215546Sopenharmony_ci   }
1594bf215546Sopenharmony_ci   nir_push_else(&b, NULL);
1595bf215546Sopenharmony_ci   {
1596bf215546Sopenharmony_ci      /* Only load the miss shader if we actually miss. It is valid to not specify an SBT pointer
1597bf215546Sopenharmony_ci       * for miss shaders if none of the rays miss. */
1598bf215546Sopenharmony_ci      load_sbt_entry(&b, &vars, nir_load_var(&b, vars.miss_index), SBT_MISS, 0);
1599bf215546Sopenharmony_ci   }
1600bf215546Sopenharmony_ci   nir_pop_if(&b, NULL);
1601bf215546Sopenharmony_ci
1602bf215546Sopenharmony_ci   return b.shader;
1603bf215546Sopenharmony_ci}
1604bf215546Sopenharmony_ci
1605bf215546Sopenharmony_ci
1606bf215546Sopenharmony_cistatic void
1607bf215546Sopenharmony_ciinsert_traversal(struct radv_device *device, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
1608bf215546Sopenharmony_ci                 nir_builder *b, const struct rt_variables *vars)
1609bf215546Sopenharmony_ci{
1610bf215546Sopenharmony_ci   struct hash_table *var_remap = _mesa_pointer_hash_table_create(NULL);
1611bf215546Sopenharmony_ci   nir_shader *shader = build_traversal_shader(device, pCreateInfo, vars, var_remap);
1612bf215546Sopenharmony_ci
1613bf215546Sopenharmony_ci   /* For now, just inline the traversal shader */
1614bf215546Sopenharmony_ci   nir_push_if(b, nir_ieq_imm(b, nir_load_var(b, vars->idx), 1));
1615bf215546Sopenharmony_ci   nir_store_var(b, vars->main_loop_case_visited, nir_imm_bool(b, true), 1);
1616bf215546Sopenharmony_ci   nir_inline_function_impl(b, nir_shader_get_entrypoint(shader), NULL, var_remap);
1617bf215546Sopenharmony_ci   nir_pop_if(b, NULL);
1618bf215546Sopenharmony_ci
1619bf215546Sopenharmony_ci   /* Adopt the instructions from the source shader, since they are merely moved, not cloned. */
1620bf215546Sopenharmony_ci   ralloc_adopt(ralloc_context(b->shader), ralloc_context(shader));
1621bf215546Sopenharmony_ci
1622bf215546Sopenharmony_ci   ralloc_free(var_remap);
1623bf215546Sopenharmony_ci}
1624bf215546Sopenharmony_ci
1625bf215546Sopenharmony_cistatic unsigned
1626bf215546Sopenharmony_cicompute_rt_stack_size(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
1627bf215546Sopenharmony_ci                      const struct radv_pipeline_shader_stack_size *stack_sizes)
1628bf215546Sopenharmony_ci{
1629bf215546Sopenharmony_ci   unsigned raygen_size = 0;
1630bf215546Sopenharmony_ci   unsigned callable_size = 0;
1631bf215546Sopenharmony_ci   unsigned chit_size = 0;
1632bf215546Sopenharmony_ci   unsigned miss_size = 0;
1633bf215546Sopenharmony_ci   unsigned non_recursive_size = 0;
1634bf215546Sopenharmony_ci
1635bf215546Sopenharmony_ci   for (unsigned i = 0; i < pCreateInfo->groupCount; ++i) {
1636bf215546Sopenharmony_ci      non_recursive_size = MAX2(stack_sizes[i].non_recursive_size, non_recursive_size);
1637bf215546Sopenharmony_ci
1638bf215546Sopenharmony_ci      const VkRayTracingShaderGroupCreateInfoKHR *group_info = &pCreateInfo->pGroups[i];
1639bf215546Sopenharmony_ci      uint32_t shader_id = VK_SHADER_UNUSED_KHR;
1640bf215546Sopenharmony_ci      unsigned size = stack_sizes[i].recursive_size;
1641bf215546Sopenharmony_ci
1642bf215546Sopenharmony_ci      switch (group_info->type) {
1643bf215546Sopenharmony_ci      case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR:
1644bf215546Sopenharmony_ci         shader_id = group_info->generalShader;
1645bf215546Sopenharmony_ci         break;
1646bf215546Sopenharmony_ci      case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR:
1647bf215546Sopenharmony_ci      case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR:
1648bf215546Sopenharmony_ci         shader_id = group_info->closestHitShader;
1649bf215546Sopenharmony_ci         break;
1650bf215546Sopenharmony_ci      default:
1651bf215546Sopenharmony_ci         break;
1652bf215546Sopenharmony_ci      }
1653bf215546Sopenharmony_ci      if (shader_id == VK_SHADER_UNUSED_KHR)
1654bf215546Sopenharmony_ci         continue;
1655bf215546Sopenharmony_ci
1656bf215546Sopenharmony_ci      const VkPipelineShaderStageCreateInfo *stage = &pCreateInfo->pStages[shader_id];
1657bf215546Sopenharmony_ci      switch (stage->stage) {
1658bf215546Sopenharmony_ci      case VK_SHADER_STAGE_RAYGEN_BIT_KHR:
1659bf215546Sopenharmony_ci         raygen_size = MAX2(raygen_size, size);
1660bf215546Sopenharmony_ci         break;
1661bf215546Sopenharmony_ci      case VK_SHADER_STAGE_MISS_BIT_KHR:
1662bf215546Sopenharmony_ci         miss_size = MAX2(miss_size, size);
1663bf215546Sopenharmony_ci         break;
1664bf215546Sopenharmony_ci      case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR:
1665bf215546Sopenharmony_ci         chit_size = MAX2(chit_size, size);
1666bf215546Sopenharmony_ci         break;
1667bf215546Sopenharmony_ci      case VK_SHADER_STAGE_CALLABLE_BIT_KHR:
1668bf215546Sopenharmony_ci         callable_size = MAX2(callable_size, size);
1669bf215546Sopenharmony_ci         break;
1670bf215546Sopenharmony_ci      default:
1671bf215546Sopenharmony_ci         unreachable("Invalid stage type in RT shader");
1672bf215546Sopenharmony_ci      }
1673bf215546Sopenharmony_ci   }
1674bf215546Sopenharmony_ci   return raygen_size +
1675bf215546Sopenharmony_ci          MIN2(pCreateInfo->maxPipelineRayRecursionDepth, 1) *
1676bf215546Sopenharmony_ci             MAX2(MAX2(chit_size, miss_size), non_recursive_size) +
1677bf215546Sopenharmony_ci          MAX2(0, (int)(pCreateInfo->maxPipelineRayRecursionDepth) - 1) *
1678bf215546Sopenharmony_ci             MAX2(chit_size, miss_size) +
1679bf215546Sopenharmony_ci          2 * callable_size;
1680bf215546Sopenharmony_ci}
1681bf215546Sopenharmony_ci
1682bf215546Sopenharmony_cibool
1683bf215546Sopenharmony_ciradv_rt_pipeline_has_dynamic_stack_size(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo)
1684bf215546Sopenharmony_ci{
1685bf215546Sopenharmony_ci   if (!pCreateInfo->pDynamicState)
1686bf215546Sopenharmony_ci      return false;
1687bf215546Sopenharmony_ci
1688bf215546Sopenharmony_ci   for (unsigned i = 0; i < pCreateInfo->pDynamicState->dynamicStateCount; ++i) {
1689bf215546Sopenharmony_ci      if (pCreateInfo->pDynamicState->pDynamicStates[i] ==
1690bf215546Sopenharmony_ci          VK_DYNAMIC_STATE_RAY_TRACING_PIPELINE_STACK_SIZE_KHR)
1691bf215546Sopenharmony_ci         return true;
1692bf215546Sopenharmony_ci   }
1693bf215546Sopenharmony_ci
1694bf215546Sopenharmony_ci   return false;
1695bf215546Sopenharmony_ci}
1696bf215546Sopenharmony_ci
1697bf215546Sopenharmony_cistatic bool
1698bf215546Sopenharmony_cishould_move_rt_instruction(nir_intrinsic_op intrinsic)
1699bf215546Sopenharmony_ci{
1700bf215546Sopenharmony_ci   switch (intrinsic) {
1701bf215546Sopenharmony_ci   case nir_intrinsic_load_rt_arg_scratch_offset_amd:
1702bf215546Sopenharmony_ci   case nir_intrinsic_load_ray_flags:
1703bf215546Sopenharmony_ci   case nir_intrinsic_load_ray_object_origin:
1704bf215546Sopenharmony_ci   case nir_intrinsic_load_ray_world_origin:
1705bf215546Sopenharmony_ci   case nir_intrinsic_load_ray_t_min:
1706bf215546Sopenharmony_ci   case nir_intrinsic_load_ray_object_direction:
1707bf215546Sopenharmony_ci   case nir_intrinsic_load_ray_world_direction:
1708bf215546Sopenharmony_ci   case nir_intrinsic_load_ray_t_max:
1709bf215546Sopenharmony_ci      return true;
1710bf215546Sopenharmony_ci   default:
1711bf215546Sopenharmony_ci      return false;
1712bf215546Sopenharmony_ci   }
1713bf215546Sopenharmony_ci}
1714bf215546Sopenharmony_ci
1715bf215546Sopenharmony_cistatic void
1716bf215546Sopenharmony_cimove_rt_instructions(nir_shader *shader)
1717bf215546Sopenharmony_ci{
1718bf215546Sopenharmony_ci   nir_cursor target = nir_before_cf_list(&nir_shader_get_entrypoint(shader)->body);
1719bf215546Sopenharmony_ci
1720bf215546Sopenharmony_ci   nir_foreach_block (block, nir_shader_get_entrypoint(shader)) {
1721bf215546Sopenharmony_ci      nir_foreach_instr_safe (instr, block) {
1722bf215546Sopenharmony_ci         if (instr->type != nir_instr_type_intrinsic)
1723bf215546Sopenharmony_ci            continue;
1724bf215546Sopenharmony_ci
1725bf215546Sopenharmony_ci         nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(instr);
1726bf215546Sopenharmony_ci
1727bf215546Sopenharmony_ci         if (!should_move_rt_instruction(intrinsic->intrinsic))
1728bf215546Sopenharmony_ci            continue;
1729bf215546Sopenharmony_ci
1730bf215546Sopenharmony_ci         nir_instr_move(target, instr);
1731bf215546Sopenharmony_ci      }
1732bf215546Sopenharmony_ci   }
1733bf215546Sopenharmony_ci
1734bf215546Sopenharmony_ci   nir_metadata_preserve(nir_shader_get_entrypoint(shader),
1735bf215546Sopenharmony_ci                         nir_metadata_all & (~nir_metadata_instr_index));
1736bf215546Sopenharmony_ci}
1737bf215546Sopenharmony_ci
1738bf215546Sopenharmony_cistatic nir_shader *
1739bf215546Sopenharmony_cicreate_rt_shader(struct radv_device *device, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
1740bf215546Sopenharmony_ci                 struct radv_pipeline_shader_stack_size *stack_sizes)
1741bf215546Sopenharmony_ci{
1742bf215546Sopenharmony_ci   struct radv_pipeline_key key;
1743bf215546Sopenharmony_ci   memset(&key, 0, sizeof(key));
1744bf215546Sopenharmony_ci
1745bf215546Sopenharmony_ci   nir_builder b = radv_meta_init_shader(device, MESA_SHADER_COMPUTE, "rt_combined");
1746bf215546Sopenharmony_ci   b.shader->info.internal = false;
1747bf215546Sopenharmony_ci   b.shader->info.workgroup_size[0] = 8;
1748bf215546Sopenharmony_ci   b.shader->info.workgroup_size[1] = device->physical_device->rt_wave_size == 64 ? 8 : 4;
1749bf215546Sopenharmony_ci
1750bf215546Sopenharmony_ci   struct rt_variables vars = create_rt_variables(b.shader, pCreateInfo, stack_sizes);
1751bf215546Sopenharmony_ci   load_sbt_entry(&b, &vars, nir_imm_int(&b, 0), SBT_RAYGEN, 0);
1752bf215546Sopenharmony_ci   nir_store_var(&b, vars.stack_ptr, nir_imm_int(&b, 0), 0x1);
1753bf215546Sopenharmony_ci
1754bf215546Sopenharmony_ci   nir_store_var(&b, vars.main_loop_case_visited, nir_imm_bool(&b, true), 1);
1755bf215546Sopenharmony_ci
1756bf215546Sopenharmony_ci   nir_loop *loop = nir_push_loop(&b);
1757bf215546Sopenharmony_ci
1758bf215546Sopenharmony_ci   nir_push_if(&b, nir_ior(&b, nir_ieq_imm(&b, nir_load_var(&b, vars.idx), 0),
1759bf215546Sopenharmony_ci                           nir_inot(&b, nir_load_var(&b, vars.main_loop_case_visited))));
1760bf215546Sopenharmony_ci   nir_jump(&b, nir_jump_break);
1761bf215546Sopenharmony_ci   nir_pop_if(&b, NULL);
1762bf215546Sopenharmony_ci
1763bf215546Sopenharmony_ci   nir_store_var(&b, vars.main_loop_case_visited, nir_imm_bool(&b, false), 1);
1764bf215546Sopenharmony_ci
1765bf215546Sopenharmony_ci   insert_traversal(device, pCreateInfo, &b, &vars);
1766bf215546Sopenharmony_ci
1767bf215546Sopenharmony_ci   nir_ssa_def *idx = nir_load_var(&b, vars.idx);
1768bf215546Sopenharmony_ci
1769bf215546Sopenharmony_ci   /* We do a trick with the indexing of the resume shaders so that the first
1770bf215546Sopenharmony_ci    * shader of stage x always gets id x and the resume shader ids then come after
1771bf215546Sopenharmony_ci    * stageCount. This makes the shadergroup handles independent of compilation. */
1772bf215546Sopenharmony_ci   unsigned call_idx_base = pCreateInfo->stageCount + 1;
1773bf215546Sopenharmony_ci   for (unsigned i = 0; i < pCreateInfo->stageCount; ++i) {
1774bf215546Sopenharmony_ci      const VkPipelineShaderStageCreateInfo *stage = &pCreateInfo->pStages[i];
1775bf215546Sopenharmony_ci      gl_shader_stage type = vk_to_mesa_shader_stage(stage->stage);
1776bf215546Sopenharmony_ci      if (type != MESA_SHADER_RAYGEN && type != MESA_SHADER_CALLABLE &&
1777bf215546Sopenharmony_ci          type != MESA_SHADER_CLOSEST_HIT && type != MESA_SHADER_MISS)
1778bf215546Sopenharmony_ci         continue;
1779bf215546Sopenharmony_ci
1780bf215546Sopenharmony_ci      nir_shader *nir_stage = parse_rt_stage(device, stage);
1781bf215546Sopenharmony_ci
1782bf215546Sopenharmony_ci      /* Move ray tracing system values to the top that are set by rt_trace_ray
1783bf215546Sopenharmony_ci       * to prevent them from being overwritten by other rt_trace_ray calls.
1784bf215546Sopenharmony_ci       */
1785bf215546Sopenharmony_ci      NIR_PASS_V(nir_stage, move_rt_instructions);
1786bf215546Sopenharmony_ci
1787bf215546Sopenharmony_ci      uint32_t num_resume_shaders = 0;
1788bf215546Sopenharmony_ci      nir_shader **resume_shaders = NULL;
1789bf215546Sopenharmony_ci      nir_lower_shader_calls(nir_stage, nir_address_format_32bit_offset, 16, &resume_shaders,
1790bf215546Sopenharmony_ci                             &num_resume_shaders, nir_stage);
1791bf215546Sopenharmony_ci
1792bf215546Sopenharmony_ci      vars.stage_idx = i;
1793bf215546Sopenharmony_ci      insert_rt_case(&b, nir_stage, &vars, idx, call_idx_base, i + 2);
1794bf215546Sopenharmony_ci      for (unsigned j = 0; j < num_resume_shaders; ++j) {
1795bf215546Sopenharmony_ci         insert_rt_case(&b, resume_shaders[j], &vars, idx, call_idx_base, call_idx_base + 1 + j);
1796bf215546Sopenharmony_ci      }
1797bf215546Sopenharmony_ci      call_idx_base += num_resume_shaders;
1798bf215546Sopenharmony_ci   }
1799bf215546Sopenharmony_ci
1800bf215546Sopenharmony_ci   nir_pop_loop(&b, loop);
1801bf215546Sopenharmony_ci
1802bf215546Sopenharmony_ci   if (radv_rt_pipeline_has_dynamic_stack_size(pCreateInfo)) {
1803bf215546Sopenharmony_ci      /* Put something so scratch gets enabled in the shader. */
1804bf215546Sopenharmony_ci      b.shader->scratch_size = 16;
1805bf215546Sopenharmony_ci   } else
1806bf215546Sopenharmony_ci      b.shader->scratch_size = compute_rt_stack_size(pCreateInfo, stack_sizes);
1807bf215546Sopenharmony_ci
1808bf215546Sopenharmony_ci   /* Deal with all the inline functions. */
1809bf215546Sopenharmony_ci   nir_index_ssa_defs(nir_shader_get_entrypoint(b.shader));
1810bf215546Sopenharmony_ci   nir_metadata_preserve(nir_shader_get_entrypoint(b.shader), nir_metadata_none);
1811bf215546Sopenharmony_ci
1812bf215546Sopenharmony_ci   return b.shader;
1813bf215546Sopenharmony_ci}
1814bf215546Sopenharmony_ci
1815bf215546Sopenharmony_cistatic VkResult
1816bf215546Sopenharmony_ciradv_rt_pipeline_create(VkDevice _device, VkPipelineCache _cache,
1817bf215546Sopenharmony_ci                        const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
1818bf215546Sopenharmony_ci                        const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline)
1819bf215546Sopenharmony_ci{
1820bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_device, device, _device);
1821bf215546Sopenharmony_ci   VkResult result;
1822bf215546Sopenharmony_ci   struct radv_pipeline *pipeline = NULL;
1823bf215546Sopenharmony_ci   struct radv_compute_pipeline *compute_pipeline = NULL;
1824bf215546Sopenharmony_ci   struct radv_pipeline_shader_stack_size *stack_sizes = NULL;
1825bf215546Sopenharmony_ci   uint8_t hash[20];
1826bf215546Sopenharmony_ci   nir_shader *shader = NULL;
1827bf215546Sopenharmony_ci   bool keep_statistic_info =
1828bf215546Sopenharmony_ci      (pCreateInfo->flags & VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR) ||
1829bf215546Sopenharmony_ci      (device->instance->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS) || device->keep_shader_info;
1830bf215546Sopenharmony_ci
1831bf215546Sopenharmony_ci   if (pCreateInfo->flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR)
1832bf215546Sopenharmony_ci      return radv_rt_pipeline_library_create(_device, _cache, pCreateInfo, pAllocator, pPipeline);
1833bf215546Sopenharmony_ci
1834bf215546Sopenharmony_ci   VkRayTracingPipelineCreateInfoKHR local_create_info =
1835bf215546Sopenharmony_ci      radv_create_merged_rt_create_info(pCreateInfo);
1836bf215546Sopenharmony_ci   if (!local_create_info.pStages || !local_create_info.pGroups) {
1837bf215546Sopenharmony_ci      result = VK_ERROR_OUT_OF_HOST_MEMORY;
1838bf215546Sopenharmony_ci      goto fail;
1839bf215546Sopenharmony_ci   }
1840bf215546Sopenharmony_ci
1841bf215546Sopenharmony_ci   radv_hash_rt_shaders(hash, &local_create_info, radv_get_hash_flags(device, keep_statistic_info));
1842bf215546Sopenharmony_ci   struct vk_shader_module module = {.base.type = VK_OBJECT_TYPE_SHADER_MODULE};
1843bf215546Sopenharmony_ci
1844bf215546Sopenharmony_ci   VkPipelineShaderStageRequiredSubgroupSizeCreateInfo subgroup_size = {
1845bf215546Sopenharmony_ci      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO,
1846bf215546Sopenharmony_ci      .pNext = NULL,
1847bf215546Sopenharmony_ci      .requiredSubgroupSize = device->physical_device->rt_wave_size,
1848bf215546Sopenharmony_ci   };
1849bf215546Sopenharmony_ci
1850bf215546Sopenharmony_ci   VkComputePipelineCreateInfo compute_info = {
1851bf215546Sopenharmony_ci      .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1852bf215546Sopenharmony_ci      .pNext = NULL,
1853bf215546Sopenharmony_ci      .flags = pCreateInfo->flags | VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT,
1854bf215546Sopenharmony_ci      .stage =
1855bf215546Sopenharmony_ci         {
1856bf215546Sopenharmony_ci            .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1857bf215546Sopenharmony_ci            .pNext = &subgroup_size,
1858bf215546Sopenharmony_ci            .stage = VK_SHADER_STAGE_COMPUTE_BIT,
1859bf215546Sopenharmony_ci            .module = vk_shader_module_to_handle(&module),
1860bf215546Sopenharmony_ci            .pName = "main",
1861bf215546Sopenharmony_ci         },
1862bf215546Sopenharmony_ci      .layout = pCreateInfo->layout,
1863bf215546Sopenharmony_ci   };
1864bf215546Sopenharmony_ci
1865bf215546Sopenharmony_ci   /* First check if we can get things from the cache before we take the expensive step of
1866bf215546Sopenharmony_ci    * generating the nir. */
1867bf215546Sopenharmony_ci   result = radv_compute_pipeline_create(_device, _cache, &compute_info, pAllocator, hash,
1868bf215546Sopenharmony_ci                                         stack_sizes, local_create_info.groupCount, pPipeline);
1869bf215546Sopenharmony_ci
1870bf215546Sopenharmony_ci   if (result == VK_PIPELINE_COMPILE_REQUIRED) {
1871bf215546Sopenharmony_ci      if (pCreateInfo->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT)
1872bf215546Sopenharmony_ci         goto fail;
1873bf215546Sopenharmony_ci
1874bf215546Sopenharmony_ci      stack_sizes = calloc(sizeof(*stack_sizes), local_create_info.groupCount);
1875bf215546Sopenharmony_ci      if (!stack_sizes) {
1876bf215546Sopenharmony_ci         result = VK_ERROR_OUT_OF_HOST_MEMORY;
1877bf215546Sopenharmony_ci         goto fail;
1878bf215546Sopenharmony_ci      }
1879bf215546Sopenharmony_ci
1880bf215546Sopenharmony_ci      shader = create_rt_shader(device, &local_create_info, stack_sizes);
1881bf215546Sopenharmony_ci      module.nir = shader;
1882bf215546Sopenharmony_ci      compute_info.flags = pCreateInfo->flags;
1883bf215546Sopenharmony_ci      result = radv_compute_pipeline_create(_device, _cache, &compute_info, pAllocator, hash,
1884bf215546Sopenharmony_ci                                            stack_sizes, local_create_info.groupCount, pPipeline);
1885bf215546Sopenharmony_ci      stack_sizes = NULL;
1886bf215546Sopenharmony_ci
1887bf215546Sopenharmony_ci      if (result != VK_SUCCESS)
1888bf215546Sopenharmony_ci         goto shader_fail;
1889bf215546Sopenharmony_ci   }
1890bf215546Sopenharmony_ci   pipeline = radv_pipeline_from_handle(*pPipeline);
1891bf215546Sopenharmony_ci   compute_pipeline = radv_pipeline_to_compute(pipeline);
1892bf215546Sopenharmony_ci
1893bf215546Sopenharmony_ci   compute_pipeline->rt_group_handles =
1894bf215546Sopenharmony_ci      calloc(sizeof(*compute_pipeline->rt_group_handles), local_create_info.groupCount);
1895bf215546Sopenharmony_ci   if (!compute_pipeline->rt_group_handles) {
1896bf215546Sopenharmony_ci      result = VK_ERROR_OUT_OF_HOST_MEMORY;
1897bf215546Sopenharmony_ci      goto shader_fail;
1898bf215546Sopenharmony_ci   }
1899bf215546Sopenharmony_ci
1900bf215546Sopenharmony_ci   compute_pipeline->dynamic_stack_size = radv_rt_pipeline_has_dynamic_stack_size(pCreateInfo);
1901bf215546Sopenharmony_ci
1902bf215546Sopenharmony_ci   /* For General and ClosestHit shaders, we can use the shader ID directly as handle.
1903bf215546Sopenharmony_ci    * As (potentially different) AnyHit shaders are inlined, for Intersection shaders
1904bf215546Sopenharmony_ci    * we use the Group ID.
1905bf215546Sopenharmony_ci    */
1906bf215546Sopenharmony_ci   for (unsigned i = 0; i < local_create_info.groupCount; ++i) {
1907bf215546Sopenharmony_ci      const VkRayTracingShaderGroupCreateInfoKHR *group_info = &local_create_info.pGroups[i];
1908bf215546Sopenharmony_ci      switch (group_info->type) {
1909bf215546Sopenharmony_ci      case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR:
1910bf215546Sopenharmony_ci         if (group_info->generalShader != VK_SHADER_UNUSED_KHR)
1911bf215546Sopenharmony_ci            compute_pipeline->rt_group_handles[i].handles[0] = group_info->generalShader + 2;
1912bf215546Sopenharmony_ci         break;
1913bf215546Sopenharmony_ci      case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR:
1914bf215546Sopenharmony_ci         if (group_info->intersectionShader != VK_SHADER_UNUSED_KHR)
1915bf215546Sopenharmony_ci            compute_pipeline->rt_group_handles[i].handles[1] = i + 2;
1916bf215546Sopenharmony_ci         FALLTHROUGH;
1917bf215546Sopenharmony_ci      case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR:
1918bf215546Sopenharmony_ci         if (group_info->closestHitShader != VK_SHADER_UNUSED_KHR)
1919bf215546Sopenharmony_ci            compute_pipeline->rt_group_handles[i].handles[0] = group_info->closestHitShader + 2;
1920bf215546Sopenharmony_ci         if (group_info->anyHitShader != VK_SHADER_UNUSED_KHR)
1921bf215546Sopenharmony_ci            compute_pipeline->rt_group_handles[i].handles[1] = i + 2;
1922bf215546Sopenharmony_ci         break;
1923bf215546Sopenharmony_ci      case VK_SHADER_GROUP_SHADER_MAX_ENUM_KHR:
1924bf215546Sopenharmony_ci         unreachable("VK_SHADER_GROUP_SHADER_MAX_ENUM_KHR");
1925bf215546Sopenharmony_ci      }
1926bf215546Sopenharmony_ci   }
1927bf215546Sopenharmony_ci
1928bf215546Sopenharmony_cishader_fail:
1929bf215546Sopenharmony_ci   if (result != VK_SUCCESS && pipeline)
1930bf215546Sopenharmony_ci      radv_pipeline_destroy(device, pipeline, pAllocator);
1931bf215546Sopenharmony_ci   ralloc_free(shader);
1932bf215546Sopenharmony_cifail:
1933bf215546Sopenharmony_ci   free((void *)local_create_info.pGroups);
1934bf215546Sopenharmony_ci   free((void *)local_create_info.pStages);
1935bf215546Sopenharmony_ci   free(stack_sizes);
1936bf215546Sopenharmony_ci   return result;
1937bf215546Sopenharmony_ci}
1938bf215546Sopenharmony_ci
1939bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL
1940bf215546Sopenharmony_ciradv_CreateRayTracingPipelinesKHR(VkDevice _device, VkDeferredOperationKHR deferredOperation,
1941bf215546Sopenharmony_ci                                  VkPipelineCache pipelineCache, uint32_t count,
1942bf215546Sopenharmony_ci                                  const VkRayTracingPipelineCreateInfoKHR *pCreateInfos,
1943bf215546Sopenharmony_ci                                  const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines)
1944bf215546Sopenharmony_ci{
1945bf215546Sopenharmony_ci   VkResult result = VK_SUCCESS;
1946bf215546Sopenharmony_ci
1947bf215546Sopenharmony_ci   unsigned i = 0;
1948bf215546Sopenharmony_ci   for (; i < count; i++) {
1949bf215546Sopenharmony_ci      VkResult r;
1950bf215546Sopenharmony_ci      r = radv_rt_pipeline_create(_device, pipelineCache, &pCreateInfos[i], pAllocator,
1951bf215546Sopenharmony_ci                                  &pPipelines[i]);
1952bf215546Sopenharmony_ci      if (r != VK_SUCCESS) {
1953bf215546Sopenharmony_ci         result = r;
1954bf215546Sopenharmony_ci         pPipelines[i] = VK_NULL_HANDLE;
1955bf215546Sopenharmony_ci
1956bf215546Sopenharmony_ci         if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
1957bf215546Sopenharmony_ci            break;
1958bf215546Sopenharmony_ci      }
1959bf215546Sopenharmony_ci   }
1960bf215546Sopenharmony_ci
1961bf215546Sopenharmony_ci   for (; i < count; ++i)
1962bf215546Sopenharmony_ci      pPipelines[i] = VK_NULL_HANDLE;
1963bf215546Sopenharmony_ci
1964bf215546Sopenharmony_ci   return result;
1965bf215546Sopenharmony_ci}
1966bf215546Sopenharmony_ci
1967bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL
1968bf215546Sopenharmony_ciradv_GetRayTracingShaderGroupHandlesKHR(VkDevice device, VkPipeline _pipeline, uint32_t firstGroup,
1969bf215546Sopenharmony_ci                                        uint32_t groupCount, size_t dataSize, void *pData)
1970bf215546Sopenharmony_ci{
1971bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
1972bf215546Sopenharmony_ci   struct radv_compute_pipeline *compute_pipeline = radv_pipeline_to_compute(pipeline);
1973bf215546Sopenharmony_ci   char *data = pData;
1974bf215546Sopenharmony_ci
1975bf215546Sopenharmony_ci   STATIC_ASSERT(sizeof(*compute_pipeline->rt_group_handles) <= RADV_RT_HANDLE_SIZE);
1976bf215546Sopenharmony_ci
1977bf215546Sopenharmony_ci   memset(data, 0, groupCount * RADV_RT_HANDLE_SIZE);
1978bf215546Sopenharmony_ci
1979bf215546Sopenharmony_ci   for (uint32_t i = 0; i < groupCount; ++i) {
1980bf215546Sopenharmony_ci      memcpy(data + i * RADV_RT_HANDLE_SIZE, &compute_pipeline->rt_group_handles[firstGroup + i],
1981bf215546Sopenharmony_ci             sizeof(*compute_pipeline->rt_group_handles));
1982bf215546Sopenharmony_ci   }
1983bf215546Sopenharmony_ci
1984bf215546Sopenharmony_ci   return VK_SUCCESS;
1985bf215546Sopenharmony_ci}
1986bf215546Sopenharmony_ci
1987bf215546Sopenharmony_ciVKAPI_ATTR VkDeviceSize VKAPI_CALL
1988bf215546Sopenharmony_ciradv_GetRayTracingShaderGroupStackSizeKHR(VkDevice device, VkPipeline _pipeline, uint32_t group,
1989bf215546Sopenharmony_ci                                          VkShaderGroupShaderKHR groupShader)
1990bf215546Sopenharmony_ci{
1991bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
1992bf215546Sopenharmony_ci   struct radv_compute_pipeline *compute_pipeline = radv_pipeline_to_compute(pipeline);
1993bf215546Sopenharmony_ci   const struct radv_pipeline_shader_stack_size *stack_size =
1994bf215546Sopenharmony_ci      &compute_pipeline->rt_stack_sizes[group];
1995bf215546Sopenharmony_ci
1996bf215546Sopenharmony_ci   if (groupShader == VK_SHADER_GROUP_SHADER_ANY_HIT_KHR ||
1997bf215546Sopenharmony_ci       groupShader == VK_SHADER_GROUP_SHADER_INTERSECTION_KHR)
1998bf215546Sopenharmony_ci      return stack_size->non_recursive_size;
1999bf215546Sopenharmony_ci   else
2000bf215546Sopenharmony_ci      return stack_size->recursive_size;
2001bf215546Sopenharmony_ci}
2002bf215546Sopenharmony_ci
2003bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL
2004bf215546Sopenharmony_ciradv_GetRayTracingCaptureReplayShaderGroupHandlesKHR(VkDevice _device, VkPipeline pipeline,
2005bf215546Sopenharmony_ci                                                     uint32_t firstGroup, uint32_t groupCount,
2006bf215546Sopenharmony_ci                                                     size_t dataSize, void *pData)
2007bf215546Sopenharmony_ci{
2008bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_device, device, _device);
2009bf215546Sopenharmony_ci   unreachable("Unimplemented");
2010bf215546Sopenharmony_ci   return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
2011bf215546Sopenharmony_ci}
2012