1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2021 Google 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "radv_acceleration_structure.h" 25bf215546Sopenharmony_ci#include "radv_debug.h" 26bf215546Sopenharmony_ci#include "radv_meta.h" 27bf215546Sopenharmony_ci#include "radv_private.h" 28bf215546Sopenharmony_ci#include "radv_rt_common.h" 29bf215546Sopenharmony_ci#include "radv_shader.h" 30bf215546Sopenharmony_ci 31bf215546Sopenharmony_ci#include "nir/nir.h" 32bf215546Sopenharmony_ci#include "nir/nir_builder.h" 33bf215546Sopenharmony_ci#include "nir/nir_builtin_builder.h" 34bf215546Sopenharmony_ci 35bf215546Sopenharmony_cistatic VkRayTracingPipelineCreateInfoKHR 36bf215546Sopenharmony_ciradv_create_merged_rt_create_info(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo) 37bf215546Sopenharmony_ci{ 38bf215546Sopenharmony_ci VkRayTracingPipelineCreateInfoKHR local_create_info = *pCreateInfo; 39bf215546Sopenharmony_ci uint32_t total_stages = pCreateInfo->stageCount; 40bf215546Sopenharmony_ci uint32_t total_groups = pCreateInfo->groupCount; 41bf215546Sopenharmony_ci 42bf215546Sopenharmony_ci if (pCreateInfo->pLibraryInfo) { 43bf215546Sopenharmony_ci for (unsigned i = 0; i < pCreateInfo->pLibraryInfo->libraryCount; ++i) { 44bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_pipeline, pipeline, pCreateInfo->pLibraryInfo->pLibraries[i]); 45bf215546Sopenharmony_ci struct radv_library_pipeline *library_pipeline = radv_pipeline_to_library(pipeline); 46bf215546Sopenharmony_ci 47bf215546Sopenharmony_ci total_stages += library_pipeline->stage_count; 48bf215546Sopenharmony_ci total_groups += library_pipeline->group_count; 49bf215546Sopenharmony_ci } 50bf215546Sopenharmony_ci } 51bf215546Sopenharmony_ci VkPipelineShaderStageCreateInfo *stages = NULL; 52bf215546Sopenharmony_ci VkRayTracingShaderGroupCreateInfoKHR *groups = NULL; 53bf215546Sopenharmony_ci local_create_info.stageCount = total_stages; 54bf215546Sopenharmony_ci local_create_info.groupCount = total_groups; 55bf215546Sopenharmony_ci local_create_info.pStages = stages = 56bf215546Sopenharmony_ci malloc(sizeof(VkPipelineShaderStageCreateInfo) * total_stages); 57bf215546Sopenharmony_ci local_create_info.pGroups = groups = 58bf215546Sopenharmony_ci malloc(sizeof(VkRayTracingShaderGroupCreateInfoKHR) * total_groups); 59bf215546Sopenharmony_ci if (!local_create_info.pStages || !local_create_info.pGroups) 60bf215546Sopenharmony_ci return local_create_info; 61bf215546Sopenharmony_ci 62bf215546Sopenharmony_ci total_stages = pCreateInfo->stageCount; 63bf215546Sopenharmony_ci total_groups = pCreateInfo->groupCount; 64bf215546Sopenharmony_ci for (unsigned j = 0; j < pCreateInfo->stageCount; ++j) 65bf215546Sopenharmony_ci stages[j] = pCreateInfo->pStages[j]; 66bf215546Sopenharmony_ci for (unsigned j = 0; j < pCreateInfo->groupCount; ++j) 67bf215546Sopenharmony_ci groups[j] = pCreateInfo->pGroups[j]; 68bf215546Sopenharmony_ci 69bf215546Sopenharmony_ci if (pCreateInfo->pLibraryInfo) { 70bf215546Sopenharmony_ci for (unsigned i = 0; i < pCreateInfo->pLibraryInfo->libraryCount; ++i) { 71bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_pipeline, pipeline, pCreateInfo->pLibraryInfo->pLibraries[i]); 72bf215546Sopenharmony_ci struct radv_library_pipeline *library_pipeline = radv_pipeline_to_library(pipeline); 73bf215546Sopenharmony_ci 74bf215546Sopenharmony_ci for (unsigned j = 0; j < library_pipeline->stage_count; ++j) 75bf215546Sopenharmony_ci stages[total_stages + j] = library_pipeline->stages[j]; 76bf215546Sopenharmony_ci for (unsigned j = 0; j < library_pipeline->group_count; ++j) { 77bf215546Sopenharmony_ci VkRayTracingShaderGroupCreateInfoKHR *dst = &groups[total_groups + j]; 78bf215546Sopenharmony_ci *dst = library_pipeline->groups[j]; 79bf215546Sopenharmony_ci if (dst->generalShader != VK_SHADER_UNUSED_KHR) 80bf215546Sopenharmony_ci dst->generalShader += total_stages; 81bf215546Sopenharmony_ci if (dst->closestHitShader != VK_SHADER_UNUSED_KHR) 82bf215546Sopenharmony_ci dst->closestHitShader += total_stages; 83bf215546Sopenharmony_ci if (dst->anyHitShader != VK_SHADER_UNUSED_KHR) 84bf215546Sopenharmony_ci dst->anyHitShader += total_stages; 85bf215546Sopenharmony_ci if (dst->intersectionShader != VK_SHADER_UNUSED_KHR) 86bf215546Sopenharmony_ci dst->intersectionShader += total_stages; 87bf215546Sopenharmony_ci } 88bf215546Sopenharmony_ci total_stages += library_pipeline->stage_count; 89bf215546Sopenharmony_ci total_groups += library_pipeline->group_count; 90bf215546Sopenharmony_ci } 91bf215546Sopenharmony_ci } 92bf215546Sopenharmony_ci return local_create_info; 93bf215546Sopenharmony_ci} 94bf215546Sopenharmony_ci 95bf215546Sopenharmony_cistatic VkResult 96bf215546Sopenharmony_ciradv_rt_pipeline_library_create(VkDevice _device, VkPipelineCache _cache, 97bf215546Sopenharmony_ci const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, 98bf215546Sopenharmony_ci const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline) 99bf215546Sopenharmony_ci{ 100bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_device, device, _device); 101bf215546Sopenharmony_ci struct radv_library_pipeline *pipeline; 102bf215546Sopenharmony_ci 103bf215546Sopenharmony_ci pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8, 104bf215546Sopenharmony_ci VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 105bf215546Sopenharmony_ci if (pipeline == NULL) 106bf215546Sopenharmony_ci return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 107bf215546Sopenharmony_ci 108bf215546Sopenharmony_ci radv_pipeline_init(device, &pipeline->base, RADV_PIPELINE_LIBRARY); 109bf215546Sopenharmony_ci 110bf215546Sopenharmony_ci VkRayTracingPipelineCreateInfoKHR local_create_info = 111bf215546Sopenharmony_ci radv_create_merged_rt_create_info(pCreateInfo); 112bf215546Sopenharmony_ci if (!local_create_info.pStages || !local_create_info.pGroups) 113bf215546Sopenharmony_ci goto fail; 114bf215546Sopenharmony_ci 115bf215546Sopenharmony_ci if (local_create_info.stageCount) { 116bf215546Sopenharmony_ci pipeline->stage_count = local_create_info.stageCount; 117bf215546Sopenharmony_ci 118bf215546Sopenharmony_ci size_t size = sizeof(VkPipelineShaderStageCreateInfo) * local_create_info.stageCount; 119bf215546Sopenharmony_ci pipeline->stages = malloc(size); 120bf215546Sopenharmony_ci if (!pipeline->stages) 121bf215546Sopenharmony_ci goto fail; 122bf215546Sopenharmony_ci 123bf215546Sopenharmony_ci memcpy(pipeline->stages, local_create_info.pStages, size); 124bf215546Sopenharmony_ci 125bf215546Sopenharmony_ci pipeline->hashes = malloc(sizeof(*pipeline->hashes) * local_create_info.stageCount); 126bf215546Sopenharmony_ci if (!pipeline->hashes) 127bf215546Sopenharmony_ci goto fail; 128bf215546Sopenharmony_ci 129bf215546Sopenharmony_ci pipeline->identifiers = malloc(sizeof(*pipeline->identifiers) * local_create_info.stageCount); 130bf215546Sopenharmony_ci if (!pipeline->identifiers) 131bf215546Sopenharmony_ci goto fail; 132bf215546Sopenharmony_ci 133bf215546Sopenharmony_ci for (uint32_t i = 0; i < local_create_info.stageCount; i++) { 134bf215546Sopenharmony_ci RADV_FROM_HANDLE(vk_shader_module, module, pipeline->stages[i].module); 135bf215546Sopenharmony_ci 136bf215546Sopenharmony_ci const VkPipelineShaderStageModuleIdentifierCreateInfoEXT *iinfo = 137bf215546Sopenharmony_ci vk_find_struct_const(local_create_info.pStages[i].pNext, 138bf215546Sopenharmony_ci PIPELINE_SHADER_STAGE_MODULE_IDENTIFIER_CREATE_INFO_EXT); 139bf215546Sopenharmony_ci 140bf215546Sopenharmony_ci if (module) { 141bf215546Sopenharmony_ci struct vk_shader_module *new_module = vk_shader_module_clone(NULL, module); 142bf215546Sopenharmony_ci pipeline->stages[i].module = vk_shader_module_to_handle(new_module); 143bf215546Sopenharmony_ci pipeline->stages[i].pNext = NULL; 144bf215546Sopenharmony_ci } else { 145bf215546Sopenharmony_ci assert(iinfo); 146bf215546Sopenharmony_ci pipeline->identifiers[i].identifierSize = 147bf215546Sopenharmony_ci MIN2(iinfo->identifierSize, sizeof(pipeline->hashes[i].sha1)); 148bf215546Sopenharmony_ci memcpy(pipeline->hashes[i].sha1, iinfo->pIdentifier, 149bf215546Sopenharmony_ci pipeline->identifiers[i].identifierSize); 150bf215546Sopenharmony_ci pipeline->stages[i].module = VK_NULL_HANDLE; 151bf215546Sopenharmony_ci pipeline->stages[i].pNext = &pipeline->identifiers[i]; 152bf215546Sopenharmony_ci pipeline->identifiers[i].sType = 153bf215546Sopenharmony_ci VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_MODULE_IDENTIFIER_CREATE_INFO_EXT; 154bf215546Sopenharmony_ci pipeline->identifiers[i].pNext = NULL; 155bf215546Sopenharmony_ci pipeline->identifiers[i].pIdentifier = pipeline->hashes[i].sha1; 156bf215546Sopenharmony_ci } 157bf215546Sopenharmony_ci } 158bf215546Sopenharmony_ci } 159bf215546Sopenharmony_ci 160bf215546Sopenharmony_ci if (local_create_info.groupCount) { 161bf215546Sopenharmony_ci size_t size = sizeof(VkRayTracingShaderGroupCreateInfoKHR) * local_create_info.groupCount; 162bf215546Sopenharmony_ci pipeline->group_count = local_create_info.groupCount; 163bf215546Sopenharmony_ci pipeline->groups = malloc(size); 164bf215546Sopenharmony_ci if (!pipeline->groups) 165bf215546Sopenharmony_ci goto fail; 166bf215546Sopenharmony_ci memcpy(pipeline->groups, local_create_info.pGroups, size); 167bf215546Sopenharmony_ci } 168bf215546Sopenharmony_ci 169bf215546Sopenharmony_ci *pPipeline = radv_pipeline_to_handle(&pipeline->base); 170bf215546Sopenharmony_ci 171bf215546Sopenharmony_ci free((void *)local_create_info.pGroups); 172bf215546Sopenharmony_ci free((void *)local_create_info.pStages); 173bf215546Sopenharmony_ci return VK_SUCCESS; 174bf215546Sopenharmony_cifail: 175bf215546Sopenharmony_ci free(pipeline->groups); 176bf215546Sopenharmony_ci free(pipeline->stages); 177bf215546Sopenharmony_ci free(pipeline->hashes); 178bf215546Sopenharmony_ci free(pipeline->identifiers); 179bf215546Sopenharmony_ci free((void *)local_create_info.pGroups); 180bf215546Sopenharmony_ci free((void *)local_create_info.pStages); 181bf215546Sopenharmony_ci return VK_ERROR_OUT_OF_HOST_MEMORY; 182bf215546Sopenharmony_ci} 183bf215546Sopenharmony_ci 184bf215546Sopenharmony_ci/* 185bf215546Sopenharmony_ci * Global variables for an RT pipeline 186bf215546Sopenharmony_ci */ 187bf215546Sopenharmony_cistruct rt_variables { 188bf215546Sopenharmony_ci const VkRayTracingPipelineCreateInfoKHR *create_info; 189bf215546Sopenharmony_ci 190bf215546Sopenharmony_ci /* idx of the next shader to run in the next iteration of the main loop. 191bf215546Sopenharmony_ci * During traversal, idx is used to store the SBT index and will contain 192bf215546Sopenharmony_ci * the correct resume index upon returning. 193bf215546Sopenharmony_ci */ 194bf215546Sopenharmony_ci nir_variable *idx; 195bf215546Sopenharmony_ci 196bf215546Sopenharmony_ci /* scratch offset of the argument area relative to stack_ptr */ 197bf215546Sopenharmony_ci nir_variable *arg; 198bf215546Sopenharmony_ci 199bf215546Sopenharmony_ci nir_variable *stack_ptr; 200bf215546Sopenharmony_ci 201bf215546Sopenharmony_ci /* global address of the SBT entry used for the shader */ 202bf215546Sopenharmony_ci nir_variable *shader_record_ptr; 203bf215546Sopenharmony_ci 204bf215546Sopenharmony_ci /* trace_ray arguments */ 205bf215546Sopenharmony_ci nir_variable *accel_struct; 206bf215546Sopenharmony_ci nir_variable *flags; 207bf215546Sopenharmony_ci nir_variable *cull_mask; 208bf215546Sopenharmony_ci nir_variable *sbt_offset; 209bf215546Sopenharmony_ci nir_variable *sbt_stride; 210bf215546Sopenharmony_ci nir_variable *miss_index; 211bf215546Sopenharmony_ci nir_variable *origin; 212bf215546Sopenharmony_ci nir_variable *tmin; 213bf215546Sopenharmony_ci nir_variable *direction; 214bf215546Sopenharmony_ci nir_variable *tmax; 215bf215546Sopenharmony_ci 216bf215546Sopenharmony_ci /* from the BTAS instance currently being visited */ 217bf215546Sopenharmony_ci nir_variable *custom_instance_and_mask; 218bf215546Sopenharmony_ci 219bf215546Sopenharmony_ci /* Properties of the primitive currently being visited. */ 220bf215546Sopenharmony_ci nir_variable *primitive_id; 221bf215546Sopenharmony_ci nir_variable *geometry_id_and_flags; 222bf215546Sopenharmony_ci nir_variable *instance_id; 223bf215546Sopenharmony_ci nir_variable *instance_addr; 224bf215546Sopenharmony_ci nir_variable *hit_kind; 225bf215546Sopenharmony_ci nir_variable *opaque; 226bf215546Sopenharmony_ci 227bf215546Sopenharmony_ci /* Safeguard to ensure we don't end up in an infinite loop of non-existing case. Should not be 228bf215546Sopenharmony_ci * needed but is extra anti-hang safety during bring-up. */ 229bf215546Sopenharmony_ci nir_variable *main_loop_case_visited; 230bf215546Sopenharmony_ci 231bf215546Sopenharmony_ci /* Output variables for intersection & anyhit shaders. */ 232bf215546Sopenharmony_ci nir_variable *ahit_accept; 233bf215546Sopenharmony_ci nir_variable *ahit_terminate; 234bf215546Sopenharmony_ci 235bf215546Sopenharmony_ci /* Array of stack size struct for recording the max stack size for each group. */ 236bf215546Sopenharmony_ci struct radv_pipeline_shader_stack_size *stack_sizes; 237bf215546Sopenharmony_ci unsigned stage_idx; 238bf215546Sopenharmony_ci}; 239bf215546Sopenharmony_ci 240bf215546Sopenharmony_cistatic void 241bf215546Sopenharmony_cireserve_stack_size(struct rt_variables *vars, uint32_t size) 242bf215546Sopenharmony_ci{ 243bf215546Sopenharmony_ci for (uint32_t group_idx = 0; group_idx < vars->create_info->groupCount; group_idx++) { 244bf215546Sopenharmony_ci const VkRayTracingShaderGroupCreateInfoKHR *group = vars->create_info->pGroups + group_idx; 245bf215546Sopenharmony_ci 246bf215546Sopenharmony_ci if (vars->stage_idx == group->generalShader || vars->stage_idx == group->closestHitShader) 247bf215546Sopenharmony_ci vars->stack_sizes[group_idx].recursive_size = 248bf215546Sopenharmony_ci MAX2(vars->stack_sizes[group_idx].recursive_size, size); 249bf215546Sopenharmony_ci 250bf215546Sopenharmony_ci if (vars->stage_idx == group->anyHitShader || vars->stage_idx == group->intersectionShader) 251bf215546Sopenharmony_ci vars->stack_sizes[group_idx].non_recursive_size = 252bf215546Sopenharmony_ci MAX2(vars->stack_sizes[group_idx].non_recursive_size, size); 253bf215546Sopenharmony_ci } 254bf215546Sopenharmony_ci} 255bf215546Sopenharmony_ci 256bf215546Sopenharmony_cistatic struct rt_variables 257bf215546Sopenharmony_cicreate_rt_variables(nir_shader *shader, const VkRayTracingPipelineCreateInfoKHR *create_info, 258bf215546Sopenharmony_ci struct radv_pipeline_shader_stack_size *stack_sizes) 259bf215546Sopenharmony_ci{ 260bf215546Sopenharmony_ci struct rt_variables vars = { 261bf215546Sopenharmony_ci .create_info = create_info, 262bf215546Sopenharmony_ci }; 263bf215546Sopenharmony_ci vars.idx = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "idx"); 264bf215546Sopenharmony_ci vars.arg = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "arg"); 265bf215546Sopenharmony_ci vars.stack_ptr = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "stack_ptr"); 266bf215546Sopenharmony_ci vars.shader_record_ptr = 267bf215546Sopenharmony_ci nir_variable_create(shader, nir_var_shader_temp, glsl_uint64_t_type(), "shader_record_ptr"); 268bf215546Sopenharmony_ci 269bf215546Sopenharmony_ci const struct glsl_type *vec3_type = glsl_vector_type(GLSL_TYPE_FLOAT, 3); 270bf215546Sopenharmony_ci vars.accel_struct = 271bf215546Sopenharmony_ci nir_variable_create(shader, nir_var_shader_temp, glsl_uint64_t_type(), "accel_struct"); 272bf215546Sopenharmony_ci vars.flags = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "ray_flags"); 273bf215546Sopenharmony_ci vars.cull_mask = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "cull_mask"); 274bf215546Sopenharmony_ci vars.sbt_offset = 275bf215546Sopenharmony_ci nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "sbt_offset"); 276bf215546Sopenharmony_ci vars.sbt_stride = 277bf215546Sopenharmony_ci nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "sbt_stride"); 278bf215546Sopenharmony_ci vars.miss_index = 279bf215546Sopenharmony_ci nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "miss_index"); 280bf215546Sopenharmony_ci vars.origin = nir_variable_create(shader, nir_var_shader_temp, vec3_type, "ray_origin"); 281bf215546Sopenharmony_ci vars.tmin = nir_variable_create(shader, nir_var_shader_temp, glsl_float_type(), "ray_tmin"); 282bf215546Sopenharmony_ci vars.direction = nir_variable_create(shader, nir_var_shader_temp, vec3_type, "ray_direction"); 283bf215546Sopenharmony_ci vars.tmax = nir_variable_create(shader, nir_var_shader_temp, glsl_float_type(), "ray_tmax"); 284bf215546Sopenharmony_ci 285bf215546Sopenharmony_ci vars.custom_instance_and_mask = nir_variable_create( 286bf215546Sopenharmony_ci shader, nir_var_shader_temp, glsl_uint_type(), "custom_instance_and_mask"); 287bf215546Sopenharmony_ci vars.primitive_id = 288bf215546Sopenharmony_ci nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "primitive_id"); 289bf215546Sopenharmony_ci vars.geometry_id_and_flags = 290bf215546Sopenharmony_ci nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "geometry_id_and_flags"); 291bf215546Sopenharmony_ci vars.instance_id = 292bf215546Sopenharmony_ci nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "instance_id"); 293bf215546Sopenharmony_ci vars.instance_addr = 294bf215546Sopenharmony_ci nir_variable_create(shader, nir_var_shader_temp, glsl_uint64_t_type(), "instance_addr"); 295bf215546Sopenharmony_ci vars.hit_kind = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "hit_kind"); 296bf215546Sopenharmony_ci vars.opaque = nir_variable_create(shader, nir_var_shader_temp, glsl_bool_type(), "opaque"); 297bf215546Sopenharmony_ci 298bf215546Sopenharmony_ci vars.main_loop_case_visited = 299bf215546Sopenharmony_ci nir_variable_create(shader, nir_var_shader_temp, glsl_bool_type(), "main_loop_case_visited"); 300bf215546Sopenharmony_ci vars.ahit_accept = 301bf215546Sopenharmony_ci nir_variable_create(shader, nir_var_shader_temp, glsl_bool_type(), "ahit_accept"); 302bf215546Sopenharmony_ci vars.ahit_terminate = 303bf215546Sopenharmony_ci nir_variable_create(shader, nir_var_shader_temp, glsl_bool_type(), "ahit_terminate"); 304bf215546Sopenharmony_ci 305bf215546Sopenharmony_ci vars.stack_sizes = stack_sizes; 306bf215546Sopenharmony_ci return vars; 307bf215546Sopenharmony_ci} 308bf215546Sopenharmony_ci 309bf215546Sopenharmony_ci/* 310bf215546Sopenharmony_ci * Remap all the variables between the two rt_variables struct for inlining. 311bf215546Sopenharmony_ci */ 312bf215546Sopenharmony_cistatic void 313bf215546Sopenharmony_cimap_rt_variables(struct hash_table *var_remap, struct rt_variables *src, 314bf215546Sopenharmony_ci const struct rt_variables *dst) 315bf215546Sopenharmony_ci{ 316bf215546Sopenharmony_ci src->create_info = dst->create_info; 317bf215546Sopenharmony_ci 318bf215546Sopenharmony_ci _mesa_hash_table_insert(var_remap, src->idx, dst->idx); 319bf215546Sopenharmony_ci _mesa_hash_table_insert(var_remap, src->arg, dst->arg); 320bf215546Sopenharmony_ci _mesa_hash_table_insert(var_remap, src->stack_ptr, dst->stack_ptr); 321bf215546Sopenharmony_ci _mesa_hash_table_insert(var_remap, src->shader_record_ptr, dst->shader_record_ptr); 322bf215546Sopenharmony_ci 323bf215546Sopenharmony_ci _mesa_hash_table_insert(var_remap, src->accel_struct, dst->accel_struct); 324bf215546Sopenharmony_ci _mesa_hash_table_insert(var_remap, src->flags, dst->flags); 325bf215546Sopenharmony_ci _mesa_hash_table_insert(var_remap, src->cull_mask, dst->cull_mask); 326bf215546Sopenharmony_ci _mesa_hash_table_insert(var_remap, src->sbt_offset, dst->sbt_offset); 327bf215546Sopenharmony_ci _mesa_hash_table_insert(var_remap, src->sbt_stride, dst->sbt_stride); 328bf215546Sopenharmony_ci _mesa_hash_table_insert(var_remap, src->miss_index, dst->miss_index); 329bf215546Sopenharmony_ci _mesa_hash_table_insert(var_remap, src->origin, dst->origin); 330bf215546Sopenharmony_ci _mesa_hash_table_insert(var_remap, src->tmin, dst->tmin); 331bf215546Sopenharmony_ci _mesa_hash_table_insert(var_remap, src->direction, dst->direction); 332bf215546Sopenharmony_ci _mesa_hash_table_insert(var_remap, src->tmax, dst->tmax); 333bf215546Sopenharmony_ci 334bf215546Sopenharmony_ci _mesa_hash_table_insert(var_remap, src->custom_instance_and_mask, dst->custom_instance_and_mask); 335bf215546Sopenharmony_ci _mesa_hash_table_insert(var_remap, src->primitive_id, dst->primitive_id); 336bf215546Sopenharmony_ci _mesa_hash_table_insert(var_remap, src->geometry_id_and_flags, dst->geometry_id_and_flags); 337bf215546Sopenharmony_ci _mesa_hash_table_insert(var_remap, src->instance_id, dst->instance_id); 338bf215546Sopenharmony_ci _mesa_hash_table_insert(var_remap, src->instance_addr, dst->instance_addr); 339bf215546Sopenharmony_ci _mesa_hash_table_insert(var_remap, src->hit_kind, dst->hit_kind); 340bf215546Sopenharmony_ci _mesa_hash_table_insert(var_remap, src->opaque, dst->opaque); 341bf215546Sopenharmony_ci _mesa_hash_table_insert(var_remap, src->ahit_accept, dst->ahit_accept); 342bf215546Sopenharmony_ci _mesa_hash_table_insert(var_remap, src->ahit_terminate, dst->ahit_terminate); 343bf215546Sopenharmony_ci 344bf215546Sopenharmony_ci src->stack_sizes = dst->stack_sizes; 345bf215546Sopenharmony_ci src->stage_idx = dst->stage_idx; 346bf215546Sopenharmony_ci} 347bf215546Sopenharmony_ci 348bf215546Sopenharmony_ci/* 349bf215546Sopenharmony_ci * Create a copy of the global rt variables where the primitive/instance related variables are 350bf215546Sopenharmony_ci * independent.This is needed as we need to keep the old values of the global variables around 351bf215546Sopenharmony_ci * in case e.g. an anyhit shader reject the collision. So there are inner variables that get copied 352bf215546Sopenharmony_ci * to the outer variables once we commit to a better hit. 353bf215546Sopenharmony_ci */ 354bf215546Sopenharmony_cistatic struct rt_variables 355bf215546Sopenharmony_cicreate_inner_vars(nir_builder *b, const struct rt_variables *vars) 356bf215546Sopenharmony_ci{ 357bf215546Sopenharmony_ci struct rt_variables inner_vars = *vars; 358bf215546Sopenharmony_ci inner_vars.idx = 359bf215546Sopenharmony_ci nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_idx"); 360bf215546Sopenharmony_ci inner_vars.shader_record_ptr = nir_variable_create( 361bf215546Sopenharmony_ci b->shader, nir_var_shader_temp, glsl_uint64_t_type(), "inner_shader_record_ptr"); 362bf215546Sopenharmony_ci inner_vars.primitive_id = 363bf215546Sopenharmony_ci nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_primitive_id"); 364bf215546Sopenharmony_ci inner_vars.geometry_id_and_flags = nir_variable_create( 365bf215546Sopenharmony_ci b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_geometry_id_and_flags"); 366bf215546Sopenharmony_ci inner_vars.tmax = 367bf215546Sopenharmony_ci nir_variable_create(b->shader, nir_var_shader_temp, glsl_float_type(), "inner_tmax"); 368bf215546Sopenharmony_ci inner_vars.instance_id = 369bf215546Sopenharmony_ci nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_instance_id"); 370bf215546Sopenharmony_ci inner_vars.instance_addr = nir_variable_create(b->shader, nir_var_shader_temp, 371bf215546Sopenharmony_ci glsl_uint64_t_type(), "inner_instance_addr"); 372bf215546Sopenharmony_ci inner_vars.hit_kind = 373bf215546Sopenharmony_ci nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_hit_kind"); 374bf215546Sopenharmony_ci inner_vars.custom_instance_and_mask = nir_variable_create( 375bf215546Sopenharmony_ci b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_custom_instance_and_mask"); 376bf215546Sopenharmony_ci 377bf215546Sopenharmony_ci return inner_vars; 378bf215546Sopenharmony_ci} 379bf215546Sopenharmony_ci 380bf215546Sopenharmony_ci/* The hit attributes are stored on the stack. This is the offset compared to the current stack 381bf215546Sopenharmony_ci * pointer of where the hit attrib is stored. */ 382bf215546Sopenharmony_ciconst uint32_t RADV_HIT_ATTRIB_OFFSET = -(16 + RADV_MAX_HIT_ATTRIB_SIZE); 383bf215546Sopenharmony_ci 384bf215546Sopenharmony_cistatic void 385bf215546Sopenharmony_ciinsert_rt_return(nir_builder *b, const struct rt_variables *vars) 386bf215546Sopenharmony_ci{ 387bf215546Sopenharmony_ci nir_store_var(b, vars->stack_ptr, nir_iadd_imm(b, nir_load_var(b, vars->stack_ptr), -16), 1); 388bf215546Sopenharmony_ci nir_store_var(b, vars->idx, 389bf215546Sopenharmony_ci nir_load_scratch(b, 1, 32, nir_load_var(b, vars->stack_ptr), .align_mul = 16), 1); 390bf215546Sopenharmony_ci} 391bf215546Sopenharmony_ci 392bf215546Sopenharmony_cienum sbt_type { 393bf215546Sopenharmony_ci SBT_RAYGEN = offsetof(VkTraceRaysIndirectCommand2KHR, raygenShaderRecordAddress), 394bf215546Sopenharmony_ci SBT_MISS = offsetof(VkTraceRaysIndirectCommand2KHR, missShaderBindingTableAddress), 395bf215546Sopenharmony_ci SBT_HIT = offsetof(VkTraceRaysIndirectCommand2KHR, hitShaderBindingTableAddress), 396bf215546Sopenharmony_ci SBT_CALLABLE = offsetof(VkTraceRaysIndirectCommand2KHR, callableShaderBindingTableAddress), 397bf215546Sopenharmony_ci}; 398bf215546Sopenharmony_ci 399bf215546Sopenharmony_cistatic nir_ssa_def * 400bf215546Sopenharmony_ciget_sbt_ptr(nir_builder *b, nir_ssa_def *idx, enum sbt_type binding) 401bf215546Sopenharmony_ci{ 402bf215546Sopenharmony_ci nir_ssa_def *desc_base_addr = nir_load_sbt_base_amd(b); 403bf215546Sopenharmony_ci 404bf215546Sopenharmony_ci nir_ssa_def *desc = 405bf215546Sopenharmony_ci nir_pack_64_2x32(b, nir_build_load_smem_amd(b, 2, desc_base_addr, nir_imm_int(b, binding))); 406bf215546Sopenharmony_ci 407bf215546Sopenharmony_ci nir_ssa_def *stride_offset = nir_imm_int(b, binding + (binding == SBT_RAYGEN ? 8 : 16)); 408bf215546Sopenharmony_ci nir_ssa_def *stride = 409bf215546Sopenharmony_ci nir_pack_64_2x32(b, nir_build_load_smem_amd(b, 2, desc_base_addr, stride_offset)); 410bf215546Sopenharmony_ci 411bf215546Sopenharmony_ci return nir_iadd(b, desc, nir_imul(b, nir_u2u64(b, idx), stride)); 412bf215546Sopenharmony_ci} 413bf215546Sopenharmony_ci 414bf215546Sopenharmony_cistatic void 415bf215546Sopenharmony_ciload_sbt_entry(nir_builder *b, const struct rt_variables *vars, nir_ssa_def *idx, 416bf215546Sopenharmony_ci enum sbt_type binding, unsigned offset) 417bf215546Sopenharmony_ci{ 418bf215546Sopenharmony_ci nir_ssa_def *addr = get_sbt_ptr(b, idx, binding); 419bf215546Sopenharmony_ci 420bf215546Sopenharmony_ci nir_ssa_def *load_addr = nir_iadd_imm(b, addr, offset); 421bf215546Sopenharmony_ci nir_ssa_def *v_idx = nir_build_load_global(b, 1, 32, load_addr); 422bf215546Sopenharmony_ci 423bf215546Sopenharmony_ci nir_store_var(b, vars->idx, v_idx, 1); 424bf215546Sopenharmony_ci 425bf215546Sopenharmony_ci nir_ssa_def *record_addr = nir_iadd_imm(b, addr, RADV_RT_HANDLE_SIZE); 426bf215546Sopenharmony_ci nir_store_var(b, vars->shader_record_ptr, record_addr, 1); 427bf215546Sopenharmony_ci} 428bf215546Sopenharmony_ci 429bf215546Sopenharmony_ci/* This lowers all the RT instructions that we do not want to pass on to the combined shader and 430bf215546Sopenharmony_ci * that we can implement using the variables from the shader we are going to inline into. */ 431bf215546Sopenharmony_cistatic void 432bf215546Sopenharmony_cilower_rt_instructions(nir_shader *shader, struct rt_variables *vars, unsigned call_idx_base) 433bf215546Sopenharmony_ci{ 434bf215546Sopenharmony_ci nir_builder b_shader; 435bf215546Sopenharmony_ci nir_builder_init(&b_shader, nir_shader_get_entrypoint(shader)); 436bf215546Sopenharmony_ci 437bf215546Sopenharmony_ci nir_foreach_block (block, nir_shader_get_entrypoint(shader)) { 438bf215546Sopenharmony_ci nir_foreach_instr_safe (instr, block) { 439bf215546Sopenharmony_ci switch (instr->type) { 440bf215546Sopenharmony_ci case nir_instr_type_intrinsic: { 441bf215546Sopenharmony_ci b_shader.cursor = nir_before_instr(instr); 442bf215546Sopenharmony_ci nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 443bf215546Sopenharmony_ci nir_ssa_def *ret = NULL; 444bf215546Sopenharmony_ci 445bf215546Sopenharmony_ci switch (intr->intrinsic) { 446bf215546Sopenharmony_ci case nir_intrinsic_rt_execute_callable: { 447bf215546Sopenharmony_ci uint32_t size = align(nir_intrinsic_stack_size(intr), 16) + RADV_MAX_HIT_ATTRIB_SIZE; 448bf215546Sopenharmony_ci uint32_t ret_idx = call_idx_base + nir_intrinsic_call_idx(intr) + 1; 449bf215546Sopenharmony_ci 450bf215546Sopenharmony_ci nir_store_var( 451bf215546Sopenharmony_ci &b_shader, vars->stack_ptr, 452bf215546Sopenharmony_ci nir_iadd_imm(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), size), 1); 453bf215546Sopenharmony_ci nir_store_scratch(&b_shader, nir_imm_int(&b_shader, ret_idx), 454bf215546Sopenharmony_ci nir_load_var(&b_shader, vars->stack_ptr), .align_mul = 16); 455bf215546Sopenharmony_ci 456bf215546Sopenharmony_ci nir_store_var(&b_shader, vars->stack_ptr, 457bf215546Sopenharmony_ci nir_iadd_imm(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), 16), 458bf215546Sopenharmony_ci 1); 459bf215546Sopenharmony_ci load_sbt_entry(&b_shader, vars, intr->src[0].ssa, SBT_CALLABLE, 0); 460bf215546Sopenharmony_ci 461bf215546Sopenharmony_ci nir_store_var(&b_shader, vars->arg, 462bf215546Sopenharmony_ci nir_iadd_imm(&b_shader, intr->src[1].ssa, -size - 16), 1); 463bf215546Sopenharmony_ci 464bf215546Sopenharmony_ci reserve_stack_size(vars, size + 16); 465bf215546Sopenharmony_ci break; 466bf215546Sopenharmony_ci } 467bf215546Sopenharmony_ci case nir_intrinsic_rt_trace_ray: { 468bf215546Sopenharmony_ci uint32_t size = align(nir_intrinsic_stack_size(intr), 16) + RADV_MAX_HIT_ATTRIB_SIZE; 469bf215546Sopenharmony_ci uint32_t ret_idx = call_idx_base + nir_intrinsic_call_idx(intr) + 1; 470bf215546Sopenharmony_ci 471bf215546Sopenharmony_ci nir_store_var( 472bf215546Sopenharmony_ci &b_shader, vars->stack_ptr, 473bf215546Sopenharmony_ci nir_iadd_imm(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), size), 1); 474bf215546Sopenharmony_ci nir_store_scratch(&b_shader, nir_imm_int(&b_shader, ret_idx), 475bf215546Sopenharmony_ci nir_load_var(&b_shader, vars->stack_ptr), .align_mul = 16); 476bf215546Sopenharmony_ci 477bf215546Sopenharmony_ci nir_store_var(&b_shader, vars->stack_ptr, 478bf215546Sopenharmony_ci nir_iadd_imm(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), 16), 479bf215546Sopenharmony_ci 1); 480bf215546Sopenharmony_ci 481bf215546Sopenharmony_ci nir_store_var(&b_shader, vars->idx, nir_imm_int(&b_shader, 1), 1); 482bf215546Sopenharmony_ci nir_store_var(&b_shader, vars->arg, 483bf215546Sopenharmony_ci nir_iadd_imm(&b_shader, intr->src[10].ssa, -size - 16), 1); 484bf215546Sopenharmony_ci 485bf215546Sopenharmony_ci reserve_stack_size(vars, size + 16); 486bf215546Sopenharmony_ci 487bf215546Sopenharmony_ci /* Per the SPIR-V extension spec we have to ignore some bits for some arguments. */ 488bf215546Sopenharmony_ci nir_store_var(&b_shader, vars->accel_struct, intr->src[0].ssa, 0x1); 489bf215546Sopenharmony_ci nir_store_var(&b_shader, vars->flags, intr->src[1].ssa, 0x1); 490bf215546Sopenharmony_ci nir_store_var(&b_shader, vars->cull_mask, 491bf215546Sopenharmony_ci nir_iand_imm(&b_shader, intr->src[2].ssa, 0xff), 0x1); 492bf215546Sopenharmony_ci nir_store_var(&b_shader, vars->sbt_offset, 493bf215546Sopenharmony_ci nir_iand_imm(&b_shader, intr->src[3].ssa, 0xf), 0x1); 494bf215546Sopenharmony_ci nir_store_var(&b_shader, vars->sbt_stride, 495bf215546Sopenharmony_ci nir_iand_imm(&b_shader, intr->src[4].ssa, 0xf), 0x1); 496bf215546Sopenharmony_ci nir_store_var(&b_shader, vars->miss_index, 497bf215546Sopenharmony_ci nir_iand_imm(&b_shader, intr->src[5].ssa, 0xffff), 0x1); 498bf215546Sopenharmony_ci nir_store_var(&b_shader, vars->origin, intr->src[6].ssa, 0x7); 499bf215546Sopenharmony_ci nir_store_var(&b_shader, vars->tmin, intr->src[7].ssa, 0x1); 500bf215546Sopenharmony_ci nir_store_var(&b_shader, vars->direction, intr->src[8].ssa, 0x7); 501bf215546Sopenharmony_ci nir_store_var(&b_shader, vars->tmax, intr->src[9].ssa, 0x1); 502bf215546Sopenharmony_ci break; 503bf215546Sopenharmony_ci } 504bf215546Sopenharmony_ci case nir_intrinsic_rt_resume: { 505bf215546Sopenharmony_ci uint32_t size = align(nir_intrinsic_stack_size(intr), 16) + RADV_MAX_HIT_ATTRIB_SIZE; 506bf215546Sopenharmony_ci 507bf215546Sopenharmony_ci nir_store_var( 508bf215546Sopenharmony_ci &b_shader, vars->stack_ptr, 509bf215546Sopenharmony_ci nir_iadd_imm(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), -size), 1); 510bf215546Sopenharmony_ci break; 511bf215546Sopenharmony_ci } 512bf215546Sopenharmony_ci case nir_intrinsic_rt_return_amd: { 513bf215546Sopenharmony_ci if (shader->info.stage == MESA_SHADER_RAYGEN) { 514bf215546Sopenharmony_ci nir_store_var(&b_shader, vars->idx, nir_imm_int(&b_shader, 0), 1); 515bf215546Sopenharmony_ci break; 516bf215546Sopenharmony_ci } 517bf215546Sopenharmony_ci insert_rt_return(&b_shader, vars); 518bf215546Sopenharmony_ci break; 519bf215546Sopenharmony_ci } 520bf215546Sopenharmony_ci case nir_intrinsic_load_scratch: { 521bf215546Sopenharmony_ci nir_instr_rewrite_src_ssa( 522bf215546Sopenharmony_ci instr, &intr->src[0], 523bf215546Sopenharmony_ci nir_iadd(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), intr->src[0].ssa)); 524bf215546Sopenharmony_ci continue; 525bf215546Sopenharmony_ci } 526bf215546Sopenharmony_ci case nir_intrinsic_store_scratch: { 527bf215546Sopenharmony_ci nir_instr_rewrite_src_ssa( 528bf215546Sopenharmony_ci instr, &intr->src[1], 529bf215546Sopenharmony_ci nir_iadd(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), intr->src[1].ssa)); 530bf215546Sopenharmony_ci continue; 531bf215546Sopenharmony_ci } 532bf215546Sopenharmony_ci case nir_intrinsic_load_rt_arg_scratch_offset_amd: { 533bf215546Sopenharmony_ci ret = nir_load_var(&b_shader, vars->arg); 534bf215546Sopenharmony_ci break; 535bf215546Sopenharmony_ci } 536bf215546Sopenharmony_ci case nir_intrinsic_load_shader_record_ptr: { 537bf215546Sopenharmony_ci ret = nir_load_var(&b_shader, vars->shader_record_ptr); 538bf215546Sopenharmony_ci break; 539bf215546Sopenharmony_ci } 540bf215546Sopenharmony_ci case nir_intrinsic_load_ray_launch_id: { 541bf215546Sopenharmony_ci ret = nir_load_global_invocation_id(&b_shader, 32); 542bf215546Sopenharmony_ci break; 543bf215546Sopenharmony_ci } 544bf215546Sopenharmony_ci case nir_intrinsic_load_ray_launch_size: { 545bf215546Sopenharmony_ci nir_ssa_def *launch_size_addr = 546bf215546Sopenharmony_ci nir_load_ray_launch_size_addr_amd(&b_shader); 547bf215546Sopenharmony_ci 548bf215546Sopenharmony_ci nir_ssa_def * xy = nir_build_load_smem_amd( 549bf215546Sopenharmony_ci &b_shader, 2, launch_size_addr, nir_imm_int(&b_shader, 0)); 550bf215546Sopenharmony_ci nir_ssa_def * z = nir_build_load_smem_amd( 551bf215546Sopenharmony_ci &b_shader, 1, launch_size_addr, nir_imm_int(&b_shader, 8)); 552bf215546Sopenharmony_ci 553bf215546Sopenharmony_ci nir_ssa_def *xyz[3] = { 554bf215546Sopenharmony_ci nir_channel(&b_shader, xy, 0), 555bf215546Sopenharmony_ci nir_channel(&b_shader, xy, 1), 556bf215546Sopenharmony_ci z, 557bf215546Sopenharmony_ci }; 558bf215546Sopenharmony_ci ret = nir_vec(&b_shader, xyz, 3); 559bf215546Sopenharmony_ci break; 560bf215546Sopenharmony_ci } 561bf215546Sopenharmony_ci case nir_intrinsic_load_ray_t_min: { 562bf215546Sopenharmony_ci ret = nir_load_var(&b_shader, vars->tmin); 563bf215546Sopenharmony_ci break; 564bf215546Sopenharmony_ci } 565bf215546Sopenharmony_ci case nir_intrinsic_load_ray_t_max: { 566bf215546Sopenharmony_ci ret = nir_load_var(&b_shader, vars->tmax); 567bf215546Sopenharmony_ci break; 568bf215546Sopenharmony_ci } 569bf215546Sopenharmony_ci case nir_intrinsic_load_ray_world_origin: { 570bf215546Sopenharmony_ci ret = nir_load_var(&b_shader, vars->origin); 571bf215546Sopenharmony_ci break; 572bf215546Sopenharmony_ci } 573bf215546Sopenharmony_ci case nir_intrinsic_load_ray_world_direction: { 574bf215546Sopenharmony_ci ret = nir_load_var(&b_shader, vars->direction); 575bf215546Sopenharmony_ci break; 576bf215546Sopenharmony_ci } 577bf215546Sopenharmony_ci case nir_intrinsic_load_ray_instance_custom_index: { 578bf215546Sopenharmony_ci ret = nir_load_var(&b_shader, vars->custom_instance_and_mask); 579bf215546Sopenharmony_ci ret = nir_iand_imm(&b_shader, ret, 0xFFFFFF); 580bf215546Sopenharmony_ci break; 581bf215546Sopenharmony_ci } 582bf215546Sopenharmony_ci case nir_intrinsic_load_primitive_id: { 583bf215546Sopenharmony_ci ret = nir_load_var(&b_shader, vars->primitive_id); 584bf215546Sopenharmony_ci break; 585bf215546Sopenharmony_ci } 586bf215546Sopenharmony_ci case nir_intrinsic_load_ray_geometry_index: { 587bf215546Sopenharmony_ci ret = nir_load_var(&b_shader, vars->geometry_id_and_flags); 588bf215546Sopenharmony_ci ret = nir_iand_imm(&b_shader, ret, 0xFFFFFFF); 589bf215546Sopenharmony_ci break; 590bf215546Sopenharmony_ci } 591bf215546Sopenharmony_ci case nir_intrinsic_load_instance_id: { 592bf215546Sopenharmony_ci ret = nir_load_var(&b_shader, vars->instance_id); 593bf215546Sopenharmony_ci break; 594bf215546Sopenharmony_ci } 595bf215546Sopenharmony_ci case nir_intrinsic_load_ray_flags: { 596bf215546Sopenharmony_ci ret = nir_load_var(&b_shader, vars->flags); 597bf215546Sopenharmony_ci break; 598bf215546Sopenharmony_ci } 599bf215546Sopenharmony_ci case nir_intrinsic_load_ray_hit_kind: { 600bf215546Sopenharmony_ci ret = nir_load_var(&b_shader, vars->hit_kind); 601bf215546Sopenharmony_ci break; 602bf215546Sopenharmony_ci } 603bf215546Sopenharmony_ci case nir_intrinsic_load_ray_world_to_object: { 604bf215546Sopenharmony_ci unsigned c = nir_intrinsic_column(intr); 605bf215546Sopenharmony_ci nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr); 606bf215546Sopenharmony_ci nir_ssa_def *wto_matrix[3]; 607bf215546Sopenharmony_ci nir_build_wto_matrix_load(&b_shader, instance_node_addr, wto_matrix); 608bf215546Sopenharmony_ci 609bf215546Sopenharmony_ci nir_ssa_def *vals[3]; 610bf215546Sopenharmony_ci for (unsigned i = 0; i < 3; ++i) 611bf215546Sopenharmony_ci vals[i] = nir_channel(&b_shader, wto_matrix[i], c); 612bf215546Sopenharmony_ci 613bf215546Sopenharmony_ci ret = nir_vec(&b_shader, vals, 3); 614bf215546Sopenharmony_ci if (c == 3) 615bf215546Sopenharmony_ci ret = nir_fneg(&b_shader, 616bf215546Sopenharmony_ci nir_build_vec3_mat_mult(&b_shader, ret, wto_matrix, false)); 617bf215546Sopenharmony_ci break; 618bf215546Sopenharmony_ci } 619bf215546Sopenharmony_ci case nir_intrinsic_load_ray_object_to_world: { 620bf215546Sopenharmony_ci unsigned c = nir_intrinsic_column(intr); 621bf215546Sopenharmony_ci nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr); 622bf215546Sopenharmony_ci if (c == 3) { 623bf215546Sopenharmony_ci nir_ssa_def *wto_matrix[3]; 624bf215546Sopenharmony_ci nir_build_wto_matrix_load(&b_shader, instance_node_addr, wto_matrix); 625bf215546Sopenharmony_ci 626bf215546Sopenharmony_ci nir_ssa_def *vals[3]; 627bf215546Sopenharmony_ci for (unsigned i = 0; i < 3; ++i) 628bf215546Sopenharmony_ci vals[i] = nir_channel(&b_shader, wto_matrix[i], c); 629bf215546Sopenharmony_ci 630bf215546Sopenharmony_ci ret = nir_vec(&b_shader, vals, 3); 631bf215546Sopenharmony_ci } else { 632bf215546Sopenharmony_ci ret = nir_build_load_global( 633bf215546Sopenharmony_ci &b_shader, 3, 32, nir_iadd_imm(&b_shader, instance_node_addr, 92 + c * 12)); 634bf215546Sopenharmony_ci } 635bf215546Sopenharmony_ci break; 636bf215546Sopenharmony_ci } 637bf215546Sopenharmony_ci case nir_intrinsic_load_ray_object_origin: { 638bf215546Sopenharmony_ci nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr); 639bf215546Sopenharmony_ci nir_ssa_def *wto_matrix[] = { 640bf215546Sopenharmony_ci nir_build_load_global(&b_shader, 4, 32, 641bf215546Sopenharmony_ci nir_iadd_imm(&b_shader, instance_node_addr, 16), 642bf215546Sopenharmony_ci .align_mul = 64, .align_offset = 16), 643bf215546Sopenharmony_ci nir_build_load_global(&b_shader, 4, 32, 644bf215546Sopenharmony_ci nir_iadd_imm(&b_shader, instance_node_addr, 32), 645bf215546Sopenharmony_ci .align_mul = 64, .align_offset = 32), 646bf215546Sopenharmony_ci nir_build_load_global(&b_shader, 4, 32, 647bf215546Sopenharmony_ci nir_iadd_imm(&b_shader, instance_node_addr, 48), 648bf215546Sopenharmony_ci .align_mul = 64, .align_offset = 48)}; 649bf215546Sopenharmony_ci ret = nir_build_vec3_mat_mult_pre( 650bf215546Sopenharmony_ci &b_shader, nir_load_var(&b_shader, vars->origin), wto_matrix); 651bf215546Sopenharmony_ci break; 652bf215546Sopenharmony_ci } 653bf215546Sopenharmony_ci case nir_intrinsic_load_ray_object_direction: { 654bf215546Sopenharmony_ci nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr); 655bf215546Sopenharmony_ci nir_ssa_def *wto_matrix[3]; 656bf215546Sopenharmony_ci nir_build_wto_matrix_load(&b_shader, instance_node_addr, wto_matrix); 657bf215546Sopenharmony_ci ret = nir_build_vec3_mat_mult( 658bf215546Sopenharmony_ci &b_shader, nir_load_var(&b_shader, vars->direction), wto_matrix, false); 659bf215546Sopenharmony_ci break; 660bf215546Sopenharmony_ci } 661bf215546Sopenharmony_ci case nir_intrinsic_load_intersection_opaque_amd: { 662bf215546Sopenharmony_ci ret = nir_load_var(&b_shader, vars->opaque); 663bf215546Sopenharmony_ci break; 664bf215546Sopenharmony_ci } 665bf215546Sopenharmony_ci case nir_intrinsic_load_cull_mask: { 666bf215546Sopenharmony_ci ret = nir_load_var(&b_shader, vars->cull_mask); 667bf215546Sopenharmony_ci break; 668bf215546Sopenharmony_ci } 669bf215546Sopenharmony_ci case nir_intrinsic_ignore_ray_intersection: { 670bf215546Sopenharmony_ci nir_store_var(&b_shader, vars->ahit_accept, nir_imm_false(&b_shader), 0x1); 671bf215546Sopenharmony_ci 672bf215546Sopenharmony_ci /* The if is a workaround to avoid having to fix up control flow manually */ 673bf215546Sopenharmony_ci nir_push_if(&b_shader, nir_imm_true(&b_shader)); 674bf215546Sopenharmony_ci nir_jump(&b_shader, nir_jump_return); 675bf215546Sopenharmony_ci nir_pop_if(&b_shader, NULL); 676bf215546Sopenharmony_ci break; 677bf215546Sopenharmony_ci } 678bf215546Sopenharmony_ci case nir_intrinsic_terminate_ray: { 679bf215546Sopenharmony_ci nir_store_var(&b_shader, vars->ahit_accept, nir_imm_true(&b_shader), 0x1); 680bf215546Sopenharmony_ci nir_store_var(&b_shader, vars->ahit_terminate, nir_imm_true(&b_shader), 0x1); 681bf215546Sopenharmony_ci 682bf215546Sopenharmony_ci /* The if is a workaround to avoid having to fix up control flow manually */ 683bf215546Sopenharmony_ci nir_push_if(&b_shader, nir_imm_true(&b_shader)); 684bf215546Sopenharmony_ci nir_jump(&b_shader, nir_jump_return); 685bf215546Sopenharmony_ci nir_pop_if(&b_shader, NULL); 686bf215546Sopenharmony_ci break; 687bf215546Sopenharmony_ci } 688bf215546Sopenharmony_ci case nir_intrinsic_report_ray_intersection: { 689bf215546Sopenharmony_ci nir_push_if( 690bf215546Sopenharmony_ci &b_shader, 691bf215546Sopenharmony_ci nir_iand( 692bf215546Sopenharmony_ci &b_shader, 693bf215546Sopenharmony_ci nir_fge(&b_shader, nir_load_var(&b_shader, vars->tmax), intr->src[0].ssa), 694bf215546Sopenharmony_ci nir_fge(&b_shader, intr->src[0].ssa, nir_load_var(&b_shader, vars->tmin)))); 695bf215546Sopenharmony_ci { 696bf215546Sopenharmony_ci nir_store_var(&b_shader, vars->ahit_accept, nir_imm_true(&b_shader), 0x1); 697bf215546Sopenharmony_ci nir_store_var(&b_shader, vars->tmax, intr->src[0].ssa, 1); 698bf215546Sopenharmony_ci nir_store_var(&b_shader, vars->hit_kind, intr->src[1].ssa, 1); 699bf215546Sopenharmony_ci } 700bf215546Sopenharmony_ci nir_pop_if(&b_shader, NULL); 701bf215546Sopenharmony_ci break; 702bf215546Sopenharmony_ci } 703bf215546Sopenharmony_ci default: 704bf215546Sopenharmony_ci continue; 705bf215546Sopenharmony_ci } 706bf215546Sopenharmony_ci 707bf215546Sopenharmony_ci if (ret) 708bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&intr->dest.ssa, ret); 709bf215546Sopenharmony_ci nir_instr_remove(instr); 710bf215546Sopenharmony_ci break; 711bf215546Sopenharmony_ci } 712bf215546Sopenharmony_ci case nir_instr_type_jump: { 713bf215546Sopenharmony_ci nir_jump_instr *jump = nir_instr_as_jump(instr); 714bf215546Sopenharmony_ci if (jump->type == nir_jump_halt) { 715bf215546Sopenharmony_ci b_shader.cursor = nir_instr_remove(instr); 716bf215546Sopenharmony_ci nir_jump(&b_shader, nir_jump_return); 717bf215546Sopenharmony_ci } 718bf215546Sopenharmony_ci break; 719bf215546Sopenharmony_ci } 720bf215546Sopenharmony_ci default: 721bf215546Sopenharmony_ci break; 722bf215546Sopenharmony_ci } 723bf215546Sopenharmony_ci } 724bf215546Sopenharmony_ci } 725bf215546Sopenharmony_ci 726bf215546Sopenharmony_ci nir_metadata_preserve(nir_shader_get_entrypoint(shader), nir_metadata_none); 727bf215546Sopenharmony_ci} 728bf215546Sopenharmony_ci 729bf215546Sopenharmony_cistatic void 730bf215546Sopenharmony_ciinsert_rt_case(nir_builder *b, nir_shader *shader, struct rt_variables *vars, nir_ssa_def *idx, 731bf215546Sopenharmony_ci uint32_t call_idx_base, uint32_t call_idx) 732bf215546Sopenharmony_ci{ 733bf215546Sopenharmony_ci struct hash_table *var_remap = _mesa_pointer_hash_table_create(NULL); 734bf215546Sopenharmony_ci 735bf215546Sopenharmony_ci nir_opt_dead_cf(shader); 736bf215546Sopenharmony_ci 737bf215546Sopenharmony_ci struct rt_variables src_vars = create_rt_variables(shader, vars->create_info, vars->stack_sizes); 738bf215546Sopenharmony_ci map_rt_variables(var_remap, &src_vars, vars); 739bf215546Sopenharmony_ci 740bf215546Sopenharmony_ci NIR_PASS_V(shader, lower_rt_instructions, &src_vars, call_idx_base); 741bf215546Sopenharmony_ci 742bf215546Sopenharmony_ci NIR_PASS(_, shader, nir_opt_remove_phis); 743bf215546Sopenharmony_ci NIR_PASS(_, shader, nir_lower_returns); 744bf215546Sopenharmony_ci NIR_PASS(_, shader, nir_opt_dce); 745bf215546Sopenharmony_ci 746bf215546Sopenharmony_ci reserve_stack_size(vars, shader->scratch_size); 747bf215546Sopenharmony_ci 748bf215546Sopenharmony_ci nir_push_if(b, nir_ieq_imm(b, idx, call_idx)); 749bf215546Sopenharmony_ci nir_store_var(b, vars->main_loop_case_visited, nir_imm_bool(b, true), 1); 750bf215546Sopenharmony_ci nir_inline_function_impl(b, nir_shader_get_entrypoint(shader), NULL, var_remap); 751bf215546Sopenharmony_ci nir_pop_if(b, NULL); 752bf215546Sopenharmony_ci 753bf215546Sopenharmony_ci /* Adopt the instructions from the source shader, since they are merely moved, not cloned. */ 754bf215546Sopenharmony_ci ralloc_adopt(ralloc_context(b->shader), ralloc_context(shader)); 755bf215546Sopenharmony_ci 756bf215546Sopenharmony_ci ralloc_free(var_remap); 757bf215546Sopenharmony_ci} 758bf215546Sopenharmony_ci 759bf215546Sopenharmony_cistatic bool 760bf215546Sopenharmony_cilower_rt_derefs(nir_shader *shader) 761bf215546Sopenharmony_ci{ 762bf215546Sopenharmony_ci nir_function_impl *impl = nir_shader_get_entrypoint(shader); 763bf215546Sopenharmony_ci 764bf215546Sopenharmony_ci bool progress = false; 765bf215546Sopenharmony_ci 766bf215546Sopenharmony_ci nir_builder b; 767bf215546Sopenharmony_ci nir_builder_init(&b, impl); 768bf215546Sopenharmony_ci 769bf215546Sopenharmony_ci b.cursor = nir_before_cf_list(&impl->body); 770bf215546Sopenharmony_ci nir_ssa_def *arg_offset = nir_load_rt_arg_scratch_offset_amd(&b); 771bf215546Sopenharmony_ci 772bf215546Sopenharmony_ci nir_foreach_block (block, impl) { 773bf215546Sopenharmony_ci nir_foreach_instr_safe (instr, block) { 774bf215546Sopenharmony_ci if (instr->type != nir_instr_type_deref) 775bf215546Sopenharmony_ci continue; 776bf215546Sopenharmony_ci 777bf215546Sopenharmony_ci nir_deref_instr *deref = nir_instr_as_deref(instr); 778bf215546Sopenharmony_ci b.cursor = nir_before_instr(&deref->instr); 779bf215546Sopenharmony_ci 780bf215546Sopenharmony_ci nir_deref_instr *replacement = NULL; 781bf215546Sopenharmony_ci if (nir_deref_mode_is(deref, nir_var_shader_call_data)) { 782bf215546Sopenharmony_ci deref->modes = nir_var_function_temp; 783bf215546Sopenharmony_ci progress = true; 784bf215546Sopenharmony_ci 785bf215546Sopenharmony_ci if (deref->deref_type == nir_deref_type_var) 786bf215546Sopenharmony_ci replacement = 787bf215546Sopenharmony_ci nir_build_deref_cast(&b, arg_offset, nir_var_function_temp, deref->var->type, 0); 788bf215546Sopenharmony_ci } else if (nir_deref_mode_is(deref, nir_var_ray_hit_attrib)) { 789bf215546Sopenharmony_ci deref->modes = nir_var_function_temp; 790bf215546Sopenharmony_ci progress = true; 791bf215546Sopenharmony_ci 792bf215546Sopenharmony_ci if (deref->deref_type == nir_deref_type_var) 793bf215546Sopenharmony_ci replacement = nir_build_deref_cast(&b, nir_imm_int(&b, RADV_HIT_ATTRIB_OFFSET), 794bf215546Sopenharmony_ci nir_var_function_temp, deref->type, 0); 795bf215546Sopenharmony_ci } 796bf215546Sopenharmony_ci 797bf215546Sopenharmony_ci if (replacement != NULL) { 798bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&deref->dest.ssa, &replacement->dest.ssa); 799bf215546Sopenharmony_ci nir_instr_remove(&deref->instr); 800bf215546Sopenharmony_ci } 801bf215546Sopenharmony_ci } 802bf215546Sopenharmony_ci } 803bf215546Sopenharmony_ci 804bf215546Sopenharmony_ci if (progress) 805bf215546Sopenharmony_ci nir_metadata_preserve(impl, nir_metadata_block_index | nir_metadata_dominance); 806bf215546Sopenharmony_ci else 807bf215546Sopenharmony_ci nir_metadata_preserve(impl, nir_metadata_all); 808bf215546Sopenharmony_ci 809bf215546Sopenharmony_ci return progress; 810bf215546Sopenharmony_ci} 811bf215546Sopenharmony_ci 812bf215546Sopenharmony_cistatic nir_shader * 813bf215546Sopenharmony_ciparse_rt_stage(struct radv_device *device, const VkPipelineShaderStageCreateInfo *sinfo) 814bf215546Sopenharmony_ci{ 815bf215546Sopenharmony_ci struct radv_pipeline_key key; 816bf215546Sopenharmony_ci memset(&key, 0, sizeof(key)); 817bf215546Sopenharmony_ci 818bf215546Sopenharmony_ci struct radv_pipeline_stage rt_stage; 819bf215546Sopenharmony_ci 820bf215546Sopenharmony_ci radv_pipeline_stage_init(sinfo, &rt_stage, vk_to_mesa_shader_stage(sinfo->stage)); 821bf215546Sopenharmony_ci 822bf215546Sopenharmony_ci nir_shader *shader = radv_shader_spirv_to_nir(device, &rt_stage, &key); 823bf215546Sopenharmony_ci 824bf215546Sopenharmony_ci if (shader->info.stage == MESA_SHADER_RAYGEN || shader->info.stage == MESA_SHADER_CLOSEST_HIT || 825bf215546Sopenharmony_ci shader->info.stage == MESA_SHADER_CALLABLE || shader->info.stage == MESA_SHADER_MISS) { 826bf215546Sopenharmony_ci nir_block *last_block = nir_impl_last_block(nir_shader_get_entrypoint(shader)); 827bf215546Sopenharmony_ci nir_builder b_inner; 828bf215546Sopenharmony_ci nir_builder_init(&b_inner, nir_shader_get_entrypoint(shader)); 829bf215546Sopenharmony_ci b_inner.cursor = nir_after_block(last_block); 830bf215546Sopenharmony_ci nir_rt_return_amd(&b_inner); 831bf215546Sopenharmony_ci } 832bf215546Sopenharmony_ci 833bf215546Sopenharmony_ci NIR_PASS(_, shader, nir_lower_vars_to_explicit_types, 834bf215546Sopenharmony_ci nir_var_function_temp | nir_var_shader_call_data | nir_var_ray_hit_attrib, 835bf215546Sopenharmony_ci glsl_get_natural_size_align_bytes); 836bf215546Sopenharmony_ci 837bf215546Sopenharmony_ci NIR_PASS(_, shader, lower_rt_derefs); 838bf215546Sopenharmony_ci 839bf215546Sopenharmony_ci NIR_PASS(_, shader, nir_lower_explicit_io, nir_var_function_temp, 840bf215546Sopenharmony_ci nir_address_format_32bit_offset); 841bf215546Sopenharmony_ci 842bf215546Sopenharmony_ci return shader; 843bf215546Sopenharmony_ci} 844bf215546Sopenharmony_ci 845bf215546Sopenharmony_cistatic nir_function_impl * 846bf215546Sopenharmony_cilower_any_hit_for_intersection(nir_shader *any_hit) 847bf215546Sopenharmony_ci{ 848bf215546Sopenharmony_ci nir_function_impl *impl = nir_shader_get_entrypoint(any_hit); 849bf215546Sopenharmony_ci 850bf215546Sopenharmony_ci /* Any-hit shaders need three parameters */ 851bf215546Sopenharmony_ci assert(impl->function->num_params == 0); 852bf215546Sopenharmony_ci nir_parameter params[] = { 853bf215546Sopenharmony_ci { 854bf215546Sopenharmony_ci /* A pointer to a boolean value for whether or not the hit was 855bf215546Sopenharmony_ci * accepted. 856bf215546Sopenharmony_ci */ 857bf215546Sopenharmony_ci .num_components = 1, 858bf215546Sopenharmony_ci .bit_size = 32, 859bf215546Sopenharmony_ci }, 860bf215546Sopenharmony_ci { 861bf215546Sopenharmony_ci /* The hit T value */ 862bf215546Sopenharmony_ci .num_components = 1, 863bf215546Sopenharmony_ci .bit_size = 32, 864bf215546Sopenharmony_ci }, 865bf215546Sopenharmony_ci { 866bf215546Sopenharmony_ci /* The hit kind */ 867bf215546Sopenharmony_ci .num_components = 1, 868bf215546Sopenharmony_ci .bit_size = 32, 869bf215546Sopenharmony_ci }, 870bf215546Sopenharmony_ci }; 871bf215546Sopenharmony_ci impl->function->num_params = ARRAY_SIZE(params); 872bf215546Sopenharmony_ci impl->function->params = ralloc_array(any_hit, nir_parameter, ARRAY_SIZE(params)); 873bf215546Sopenharmony_ci memcpy(impl->function->params, params, sizeof(params)); 874bf215546Sopenharmony_ci 875bf215546Sopenharmony_ci nir_builder build; 876bf215546Sopenharmony_ci nir_builder_init(&build, impl); 877bf215546Sopenharmony_ci nir_builder *b = &build; 878bf215546Sopenharmony_ci 879bf215546Sopenharmony_ci b->cursor = nir_before_cf_list(&impl->body); 880bf215546Sopenharmony_ci 881bf215546Sopenharmony_ci nir_ssa_def *commit_ptr = nir_load_param(b, 0); 882bf215546Sopenharmony_ci nir_ssa_def *hit_t = nir_load_param(b, 1); 883bf215546Sopenharmony_ci nir_ssa_def *hit_kind = nir_load_param(b, 2); 884bf215546Sopenharmony_ci 885bf215546Sopenharmony_ci nir_deref_instr *commit = 886bf215546Sopenharmony_ci nir_build_deref_cast(b, commit_ptr, nir_var_function_temp, glsl_bool_type(), 0); 887bf215546Sopenharmony_ci 888bf215546Sopenharmony_ci nir_foreach_block_safe (block, impl) { 889bf215546Sopenharmony_ci nir_foreach_instr_safe (instr, block) { 890bf215546Sopenharmony_ci switch (instr->type) { 891bf215546Sopenharmony_ci case nir_instr_type_intrinsic: { 892bf215546Sopenharmony_ci nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 893bf215546Sopenharmony_ci switch (intrin->intrinsic) { 894bf215546Sopenharmony_ci case nir_intrinsic_ignore_ray_intersection: 895bf215546Sopenharmony_ci b->cursor = nir_instr_remove(&intrin->instr); 896bf215546Sopenharmony_ci /* We put the newly emitted code inside a dummy if because it's 897bf215546Sopenharmony_ci * going to contain a jump instruction and we don't want to 898bf215546Sopenharmony_ci * deal with that mess here. It'll get dealt with by our 899bf215546Sopenharmony_ci * control-flow optimization passes. 900bf215546Sopenharmony_ci */ 901bf215546Sopenharmony_ci nir_store_deref(b, commit, nir_imm_false(b), 0x1); 902bf215546Sopenharmony_ci nir_push_if(b, nir_imm_true(b)); 903bf215546Sopenharmony_ci nir_jump(b, nir_jump_return); 904bf215546Sopenharmony_ci nir_pop_if(b, NULL); 905bf215546Sopenharmony_ci break; 906bf215546Sopenharmony_ci 907bf215546Sopenharmony_ci case nir_intrinsic_terminate_ray: 908bf215546Sopenharmony_ci /* The "normal" handling of terminateRay works fine in 909bf215546Sopenharmony_ci * intersection shaders. 910bf215546Sopenharmony_ci */ 911bf215546Sopenharmony_ci break; 912bf215546Sopenharmony_ci 913bf215546Sopenharmony_ci case nir_intrinsic_load_ray_t_max: 914bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&intrin->dest.ssa, hit_t); 915bf215546Sopenharmony_ci nir_instr_remove(&intrin->instr); 916bf215546Sopenharmony_ci break; 917bf215546Sopenharmony_ci 918bf215546Sopenharmony_ci case nir_intrinsic_load_ray_hit_kind: 919bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&intrin->dest.ssa, hit_kind); 920bf215546Sopenharmony_ci nir_instr_remove(&intrin->instr); 921bf215546Sopenharmony_ci break; 922bf215546Sopenharmony_ci 923bf215546Sopenharmony_ci default: 924bf215546Sopenharmony_ci break; 925bf215546Sopenharmony_ci } 926bf215546Sopenharmony_ci break; 927bf215546Sopenharmony_ci } 928bf215546Sopenharmony_ci case nir_instr_type_jump: { 929bf215546Sopenharmony_ci nir_jump_instr *jump = nir_instr_as_jump(instr); 930bf215546Sopenharmony_ci if (jump->type == nir_jump_halt) { 931bf215546Sopenharmony_ci b->cursor = nir_instr_remove(instr); 932bf215546Sopenharmony_ci nir_jump(b, nir_jump_return); 933bf215546Sopenharmony_ci } 934bf215546Sopenharmony_ci break; 935bf215546Sopenharmony_ci } 936bf215546Sopenharmony_ci 937bf215546Sopenharmony_ci default: 938bf215546Sopenharmony_ci break; 939bf215546Sopenharmony_ci } 940bf215546Sopenharmony_ci } 941bf215546Sopenharmony_ci } 942bf215546Sopenharmony_ci 943bf215546Sopenharmony_ci nir_validate_shader(any_hit, "after initial any-hit lowering"); 944bf215546Sopenharmony_ci 945bf215546Sopenharmony_ci nir_lower_returns_impl(impl); 946bf215546Sopenharmony_ci 947bf215546Sopenharmony_ci nir_validate_shader(any_hit, "after lowering returns"); 948bf215546Sopenharmony_ci 949bf215546Sopenharmony_ci return impl; 950bf215546Sopenharmony_ci} 951bf215546Sopenharmony_ci 952bf215546Sopenharmony_ci/* Inline the any_hit shader into the intersection shader so we don't have 953bf215546Sopenharmony_ci * to implement yet another shader call interface here. Neither do any recursion. 954bf215546Sopenharmony_ci */ 955bf215546Sopenharmony_cistatic void 956bf215546Sopenharmony_cinir_lower_intersection_shader(nir_shader *intersection, nir_shader *any_hit) 957bf215546Sopenharmony_ci{ 958bf215546Sopenharmony_ci void *dead_ctx = ralloc_context(intersection); 959bf215546Sopenharmony_ci 960bf215546Sopenharmony_ci nir_function_impl *any_hit_impl = NULL; 961bf215546Sopenharmony_ci struct hash_table *any_hit_var_remap = NULL; 962bf215546Sopenharmony_ci if (any_hit) { 963bf215546Sopenharmony_ci any_hit = nir_shader_clone(dead_ctx, any_hit); 964bf215546Sopenharmony_ci NIR_PASS(_, any_hit, nir_opt_dce); 965bf215546Sopenharmony_ci any_hit_impl = lower_any_hit_for_intersection(any_hit); 966bf215546Sopenharmony_ci any_hit_var_remap = _mesa_pointer_hash_table_create(dead_ctx); 967bf215546Sopenharmony_ci } 968bf215546Sopenharmony_ci 969bf215546Sopenharmony_ci nir_function_impl *impl = nir_shader_get_entrypoint(intersection); 970bf215546Sopenharmony_ci 971bf215546Sopenharmony_ci nir_builder build; 972bf215546Sopenharmony_ci nir_builder_init(&build, impl); 973bf215546Sopenharmony_ci nir_builder *b = &build; 974bf215546Sopenharmony_ci 975bf215546Sopenharmony_ci b->cursor = nir_before_cf_list(&impl->body); 976bf215546Sopenharmony_ci 977bf215546Sopenharmony_ci nir_variable *commit = nir_local_variable_create(impl, glsl_bool_type(), "ray_commit"); 978bf215546Sopenharmony_ci nir_store_var(b, commit, nir_imm_false(b), 0x1); 979bf215546Sopenharmony_ci 980bf215546Sopenharmony_ci nir_foreach_block_safe (block, impl) { 981bf215546Sopenharmony_ci nir_foreach_instr_safe (instr, block) { 982bf215546Sopenharmony_ci if (instr->type != nir_instr_type_intrinsic) 983bf215546Sopenharmony_ci continue; 984bf215546Sopenharmony_ci 985bf215546Sopenharmony_ci nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 986bf215546Sopenharmony_ci if (intrin->intrinsic != nir_intrinsic_report_ray_intersection) 987bf215546Sopenharmony_ci continue; 988bf215546Sopenharmony_ci 989bf215546Sopenharmony_ci b->cursor = nir_instr_remove(&intrin->instr); 990bf215546Sopenharmony_ci nir_ssa_def *hit_t = nir_ssa_for_src(b, intrin->src[0], 1); 991bf215546Sopenharmony_ci nir_ssa_def *hit_kind = nir_ssa_for_src(b, intrin->src[1], 1); 992bf215546Sopenharmony_ci nir_ssa_def *min_t = nir_load_ray_t_min(b); 993bf215546Sopenharmony_ci nir_ssa_def *max_t = nir_load_ray_t_max(b); 994bf215546Sopenharmony_ci 995bf215546Sopenharmony_ci /* bool commit_tmp = false; */ 996bf215546Sopenharmony_ci nir_variable *commit_tmp = nir_local_variable_create(impl, glsl_bool_type(), "commit_tmp"); 997bf215546Sopenharmony_ci nir_store_var(b, commit_tmp, nir_imm_false(b), 0x1); 998bf215546Sopenharmony_ci 999bf215546Sopenharmony_ci nir_push_if(b, nir_iand(b, nir_fge(b, hit_t, min_t), nir_fge(b, max_t, hit_t))); 1000bf215546Sopenharmony_ci { 1001bf215546Sopenharmony_ci /* Any-hit defaults to commit */ 1002bf215546Sopenharmony_ci nir_store_var(b, commit_tmp, nir_imm_true(b), 0x1); 1003bf215546Sopenharmony_ci 1004bf215546Sopenharmony_ci if (any_hit_impl != NULL) { 1005bf215546Sopenharmony_ci nir_push_if(b, nir_inot(b, nir_load_intersection_opaque_amd(b))); 1006bf215546Sopenharmony_ci { 1007bf215546Sopenharmony_ci nir_ssa_def *params[] = { 1008bf215546Sopenharmony_ci &nir_build_deref_var(b, commit_tmp)->dest.ssa, 1009bf215546Sopenharmony_ci hit_t, 1010bf215546Sopenharmony_ci hit_kind, 1011bf215546Sopenharmony_ci }; 1012bf215546Sopenharmony_ci nir_inline_function_impl(b, any_hit_impl, params, any_hit_var_remap); 1013bf215546Sopenharmony_ci } 1014bf215546Sopenharmony_ci nir_pop_if(b, NULL); 1015bf215546Sopenharmony_ci } 1016bf215546Sopenharmony_ci 1017bf215546Sopenharmony_ci nir_push_if(b, nir_load_var(b, commit_tmp)); 1018bf215546Sopenharmony_ci { 1019bf215546Sopenharmony_ci nir_report_ray_intersection(b, 1, hit_t, hit_kind); 1020bf215546Sopenharmony_ci } 1021bf215546Sopenharmony_ci nir_pop_if(b, NULL); 1022bf215546Sopenharmony_ci } 1023bf215546Sopenharmony_ci nir_pop_if(b, NULL); 1024bf215546Sopenharmony_ci 1025bf215546Sopenharmony_ci nir_ssa_def *accepted = nir_load_var(b, commit_tmp); 1026bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&intrin->dest.ssa, accepted); 1027bf215546Sopenharmony_ci } 1028bf215546Sopenharmony_ci } 1029bf215546Sopenharmony_ci 1030bf215546Sopenharmony_ci /* We did some inlining; have to re-index SSA defs */ 1031bf215546Sopenharmony_ci nir_index_ssa_defs(impl); 1032bf215546Sopenharmony_ci 1033bf215546Sopenharmony_ci /* Eliminate the casts introduced for the commit return of the any-hit shader. */ 1034bf215546Sopenharmony_ci NIR_PASS(_, intersection, nir_opt_deref); 1035bf215546Sopenharmony_ci 1036bf215546Sopenharmony_ci ralloc_free(dead_ctx); 1037bf215546Sopenharmony_ci} 1038bf215546Sopenharmony_ci 1039bf215546Sopenharmony_ci/* Variables only used internally to ray traversal. This is data that describes 1040bf215546Sopenharmony_ci * the current state of the traversal vs. what we'd give to a shader. e.g. what 1041bf215546Sopenharmony_ci * is the instance we're currently visiting vs. what is the instance of the 1042bf215546Sopenharmony_ci * closest hit. */ 1043bf215546Sopenharmony_cistruct rt_traversal_vars { 1044bf215546Sopenharmony_ci nir_variable *origin; 1045bf215546Sopenharmony_ci nir_variable *dir; 1046bf215546Sopenharmony_ci nir_variable *inv_dir; 1047bf215546Sopenharmony_ci nir_variable *sbt_offset_and_flags; 1048bf215546Sopenharmony_ci nir_variable *instance_id; 1049bf215546Sopenharmony_ci nir_variable *custom_instance_and_mask; 1050bf215546Sopenharmony_ci nir_variable *instance_addr; 1051bf215546Sopenharmony_ci nir_variable *hit; 1052bf215546Sopenharmony_ci nir_variable *bvh_base; 1053bf215546Sopenharmony_ci nir_variable *stack; 1054bf215546Sopenharmony_ci nir_variable *top_stack; 1055bf215546Sopenharmony_ci}; 1056bf215546Sopenharmony_ci 1057bf215546Sopenharmony_cistatic struct rt_traversal_vars 1058bf215546Sopenharmony_ciinit_traversal_vars(nir_builder *b) 1059bf215546Sopenharmony_ci{ 1060bf215546Sopenharmony_ci const struct glsl_type *vec3_type = glsl_vector_type(GLSL_TYPE_FLOAT, 3); 1061bf215546Sopenharmony_ci struct rt_traversal_vars ret; 1062bf215546Sopenharmony_ci 1063bf215546Sopenharmony_ci ret.origin = nir_variable_create(b->shader, nir_var_shader_temp, vec3_type, "traversal_origin"); 1064bf215546Sopenharmony_ci ret.dir = nir_variable_create(b->shader, nir_var_shader_temp, vec3_type, "traversal_dir"); 1065bf215546Sopenharmony_ci ret.inv_dir = 1066bf215546Sopenharmony_ci nir_variable_create(b->shader, nir_var_shader_temp, vec3_type, "traversal_inv_dir"); 1067bf215546Sopenharmony_ci ret.sbt_offset_and_flags = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), 1068bf215546Sopenharmony_ci "traversal_sbt_offset_and_flags"); 1069bf215546Sopenharmony_ci ret.instance_id = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), 1070bf215546Sopenharmony_ci "traversal_instance_id"); 1071bf215546Sopenharmony_ci ret.custom_instance_and_mask = nir_variable_create( 1072bf215546Sopenharmony_ci b->shader, nir_var_shader_temp, glsl_uint_type(), "traversal_custom_instance_and_mask"); 1073bf215546Sopenharmony_ci ret.instance_addr = 1074bf215546Sopenharmony_ci nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint64_t_type(), "instance_addr"); 1075bf215546Sopenharmony_ci ret.hit = nir_variable_create(b->shader, nir_var_shader_temp, glsl_bool_type(), "traversal_hit"); 1076bf215546Sopenharmony_ci ret.bvh_base = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint64_t_type(), 1077bf215546Sopenharmony_ci "traversal_bvh_base"); 1078bf215546Sopenharmony_ci ret.stack = 1079bf215546Sopenharmony_ci nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "traversal_stack_ptr"); 1080bf215546Sopenharmony_ci ret.top_stack = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), 1081bf215546Sopenharmony_ci "traversal_top_stack_ptr"); 1082bf215546Sopenharmony_ci return ret; 1083bf215546Sopenharmony_ci} 1084bf215546Sopenharmony_ci 1085bf215546Sopenharmony_cistatic void 1086bf215546Sopenharmony_civisit_any_hit_shaders(struct radv_device *device, 1087bf215546Sopenharmony_ci const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, nir_builder *b, 1088bf215546Sopenharmony_ci struct rt_variables *vars) 1089bf215546Sopenharmony_ci{ 1090bf215546Sopenharmony_ci nir_ssa_def *sbt_idx = nir_load_var(b, vars->idx); 1091bf215546Sopenharmony_ci 1092bf215546Sopenharmony_ci nir_push_if(b, nir_ine_imm(b, sbt_idx, 0)); 1093bf215546Sopenharmony_ci for (unsigned i = 0; i < pCreateInfo->groupCount; ++i) { 1094bf215546Sopenharmony_ci const VkRayTracingShaderGroupCreateInfoKHR *group_info = &pCreateInfo->pGroups[i]; 1095bf215546Sopenharmony_ci uint32_t shader_id = VK_SHADER_UNUSED_KHR; 1096bf215546Sopenharmony_ci 1097bf215546Sopenharmony_ci switch (group_info->type) { 1098bf215546Sopenharmony_ci case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR: 1099bf215546Sopenharmony_ci shader_id = group_info->anyHitShader; 1100bf215546Sopenharmony_ci break; 1101bf215546Sopenharmony_ci default: 1102bf215546Sopenharmony_ci break; 1103bf215546Sopenharmony_ci } 1104bf215546Sopenharmony_ci if (shader_id == VK_SHADER_UNUSED_KHR) 1105bf215546Sopenharmony_ci continue; 1106bf215546Sopenharmony_ci 1107bf215546Sopenharmony_ci const VkPipelineShaderStageCreateInfo *stage = &pCreateInfo->pStages[shader_id]; 1108bf215546Sopenharmony_ci nir_shader *nir_stage = parse_rt_stage(device, stage); 1109bf215546Sopenharmony_ci 1110bf215546Sopenharmony_ci vars->stage_idx = shader_id; 1111bf215546Sopenharmony_ci insert_rt_case(b, nir_stage, vars, sbt_idx, 0, i + 2); 1112bf215546Sopenharmony_ci } 1113bf215546Sopenharmony_ci nir_pop_if(b, NULL); 1114bf215546Sopenharmony_ci} 1115bf215546Sopenharmony_ci 1116bf215546Sopenharmony_cistatic void 1117bf215546Sopenharmony_ciinsert_traversal_triangle_case(struct radv_device *device, 1118bf215546Sopenharmony_ci const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, nir_builder *b, 1119bf215546Sopenharmony_ci nir_ssa_def *result, const struct rt_variables *vars, 1120bf215546Sopenharmony_ci const struct rt_traversal_vars *trav_vars, nir_ssa_def *bvh_node) 1121bf215546Sopenharmony_ci{ 1122bf215546Sopenharmony_ci nir_ssa_def *dist = nir_channel(b, result, 0); 1123bf215546Sopenharmony_ci nir_ssa_def *div = nir_channel(b, result, 1); 1124bf215546Sopenharmony_ci dist = nir_fdiv(b, dist, div); 1125bf215546Sopenharmony_ci nir_ssa_def *frontface = nir_flt(b, nir_imm_float(b, 0), div); 1126bf215546Sopenharmony_ci nir_ssa_def *switch_ccw = 1127bf215546Sopenharmony_ci nir_test_mask(b, nir_load_var(b, trav_vars->sbt_offset_and_flags), 1128bf215546Sopenharmony_ci VK_GEOMETRY_INSTANCE_TRIANGLE_FLIP_FACING_BIT_KHR << 24); 1129bf215546Sopenharmony_ci frontface = nir_ixor(b, frontface, switch_ccw); 1130bf215546Sopenharmony_ci 1131bf215546Sopenharmony_ci nir_ssa_def *not_cull = 1132bf215546Sopenharmony_ci nir_inot(b, nir_test_mask(b, nir_load_var(b, vars->flags), SpvRayFlagsSkipTrianglesKHRMask)); 1133bf215546Sopenharmony_ci nir_ssa_def *not_facing_cull = nir_ieq_imm( 1134bf215546Sopenharmony_ci b, 1135bf215546Sopenharmony_ci nir_iand(b, nir_load_var(b, vars->flags), 1136bf215546Sopenharmony_ci nir_bcsel(b, frontface, nir_imm_int(b, SpvRayFlagsCullFrontFacingTrianglesKHRMask), 1137bf215546Sopenharmony_ci nir_imm_int(b, SpvRayFlagsCullBackFacingTrianglesKHRMask))), 1138bf215546Sopenharmony_ci 0); 1139bf215546Sopenharmony_ci 1140bf215546Sopenharmony_ci not_cull = nir_iand( 1141bf215546Sopenharmony_ci b, not_cull, 1142bf215546Sopenharmony_ci nir_ior(b, not_facing_cull, 1143bf215546Sopenharmony_ci nir_test_mask(b, nir_load_var(b, trav_vars->sbt_offset_and_flags), 1144bf215546Sopenharmony_ci VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR << 24))); 1145bf215546Sopenharmony_ci 1146bf215546Sopenharmony_ci nir_push_if(b, nir_iand(b, 1147bf215546Sopenharmony_ci nir_iand(b, nir_flt(b, dist, nir_load_var(b, vars->tmax)), 1148bf215546Sopenharmony_ci nir_flt(b, nir_load_var(b, vars->tmin), dist)), 1149bf215546Sopenharmony_ci not_cull)); 1150bf215546Sopenharmony_ci { 1151bf215546Sopenharmony_ci 1152bf215546Sopenharmony_ci nir_ssa_def *triangle_info = 1153bf215546Sopenharmony_ci nir_build_load_global(b, 2, 32, 1154bf215546Sopenharmony_ci nir_iadd_imm(b, build_node_to_addr(device, b, bvh_node), 1155bf215546Sopenharmony_ci offsetof(struct radv_bvh_triangle_node, triangle_id))); 1156bf215546Sopenharmony_ci nir_ssa_def *primitive_id = nir_channel(b, triangle_info, 0); 1157bf215546Sopenharmony_ci nir_ssa_def *geometry_id_and_flags = nir_channel(b, triangle_info, 1); 1158bf215546Sopenharmony_ci nir_ssa_def *geometry_id = nir_iand_imm(b, geometry_id_and_flags, 0xfffffff); 1159bf215546Sopenharmony_ci nir_ssa_def *is_opaque = hit_is_opaque(b, nir_load_var(b, trav_vars->sbt_offset_and_flags), 1160bf215546Sopenharmony_ci nir_load_var(b, vars->flags), geometry_id_and_flags); 1161bf215546Sopenharmony_ci 1162bf215546Sopenharmony_ci not_cull = 1163bf215546Sopenharmony_ci nir_ieq_imm(b, 1164bf215546Sopenharmony_ci nir_iand(b, nir_load_var(b, vars->flags), 1165bf215546Sopenharmony_ci nir_bcsel(b, is_opaque, nir_imm_int(b, SpvRayFlagsCullOpaqueKHRMask), 1166bf215546Sopenharmony_ci nir_imm_int(b, SpvRayFlagsCullNoOpaqueKHRMask))), 1167bf215546Sopenharmony_ci 0); 1168bf215546Sopenharmony_ci nir_push_if(b, not_cull); 1169bf215546Sopenharmony_ci { 1170bf215546Sopenharmony_ci nir_ssa_def *sbt_idx = nir_iadd( 1171bf215546Sopenharmony_ci b, 1172bf215546Sopenharmony_ci nir_iadd(b, nir_load_var(b, vars->sbt_offset), 1173bf215546Sopenharmony_ci nir_iand_imm(b, nir_load_var(b, trav_vars->sbt_offset_and_flags), 0xffffff)), 1174bf215546Sopenharmony_ci nir_imul(b, nir_load_var(b, vars->sbt_stride), geometry_id)); 1175bf215546Sopenharmony_ci nir_ssa_def *divs[2] = {div, div}; 1176bf215546Sopenharmony_ci nir_ssa_def *ij = nir_fdiv(b, nir_channels(b, result, 0xc), nir_vec(b, divs, 2)); 1177bf215546Sopenharmony_ci nir_ssa_def *hit_kind = 1178bf215546Sopenharmony_ci nir_bcsel(b, frontface, nir_imm_int(b, 0xFE), nir_imm_int(b, 0xFF)); 1179bf215546Sopenharmony_ci 1180bf215546Sopenharmony_ci nir_store_scratch( 1181bf215546Sopenharmony_ci b, ij, nir_iadd_imm(b, nir_load_var(b, vars->stack_ptr), RADV_HIT_ATTRIB_OFFSET), 1182bf215546Sopenharmony_ci .align_mul = 16); 1183bf215546Sopenharmony_ci 1184bf215546Sopenharmony_ci nir_store_var(b, vars->ahit_accept, nir_imm_true(b), 0x1); 1185bf215546Sopenharmony_ci nir_store_var(b, vars->ahit_terminate, nir_imm_false(b), 0x1); 1186bf215546Sopenharmony_ci 1187bf215546Sopenharmony_ci nir_push_if(b, nir_inot(b, is_opaque)); 1188bf215546Sopenharmony_ci { 1189bf215546Sopenharmony_ci struct rt_variables inner_vars = create_inner_vars(b, vars); 1190bf215546Sopenharmony_ci 1191bf215546Sopenharmony_ci nir_store_var(b, inner_vars.primitive_id, primitive_id, 1); 1192bf215546Sopenharmony_ci nir_store_var(b, inner_vars.geometry_id_and_flags, geometry_id_and_flags, 1); 1193bf215546Sopenharmony_ci nir_store_var(b, inner_vars.tmax, dist, 0x1); 1194bf215546Sopenharmony_ci nir_store_var(b, inner_vars.instance_id, nir_load_var(b, trav_vars->instance_id), 0x1); 1195bf215546Sopenharmony_ci nir_store_var(b, inner_vars.instance_addr, nir_load_var(b, trav_vars->instance_addr), 1196bf215546Sopenharmony_ci 0x1); 1197bf215546Sopenharmony_ci nir_store_var(b, inner_vars.hit_kind, hit_kind, 0x1); 1198bf215546Sopenharmony_ci nir_store_var(b, inner_vars.custom_instance_and_mask, 1199bf215546Sopenharmony_ci nir_load_var(b, trav_vars->custom_instance_and_mask), 0x1); 1200bf215546Sopenharmony_ci 1201bf215546Sopenharmony_ci load_sbt_entry(b, &inner_vars, sbt_idx, SBT_HIT, 4); 1202bf215546Sopenharmony_ci 1203bf215546Sopenharmony_ci visit_any_hit_shaders(device, pCreateInfo, b, &inner_vars); 1204bf215546Sopenharmony_ci 1205bf215546Sopenharmony_ci nir_push_if(b, nir_inot(b, nir_load_var(b, vars->ahit_accept))); 1206bf215546Sopenharmony_ci { 1207bf215546Sopenharmony_ci nir_jump(b, nir_jump_continue); 1208bf215546Sopenharmony_ci } 1209bf215546Sopenharmony_ci nir_pop_if(b, NULL); 1210bf215546Sopenharmony_ci } 1211bf215546Sopenharmony_ci nir_pop_if(b, NULL); 1212bf215546Sopenharmony_ci 1213bf215546Sopenharmony_ci nir_store_var(b, vars->primitive_id, primitive_id, 1); 1214bf215546Sopenharmony_ci nir_store_var(b, vars->geometry_id_and_flags, geometry_id_and_flags, 1); 1215bf215546Sopenharmony_ci nir_store_var(b, vars->tmax, dist, 0x1); 1216bf215546Sopenharmony_ci nir_store_var(b, vars->instance_id, nir_load_var(b, trav_vars->instance_id), 0x1); 1217bf215546Sopenharmony_ci nir_store_var(b, vars->instance_addr, nir_load_var(b, trav_vars->instance_addr), 0x1); 1218bf215546Sopenharmony_ci nir_store_var(b, vars->hit_kind, hit_kind, 0x1); 1219bf215546Sopenharmony_ci nir_store_var(b, vars->custom_instance_and_mask, 1220bf215546Sopenharmony_ci nir_load_var(b, trav_vars->custom_instance_and_mask), 0x1); 1221bf215546Sopenharmony_ci 1222bf215546Sopenharmony_ci nir_store_var(b, vars->idx, sbt_idx, 1); 1223bf215546Sopenharmony_ci nir_store_var(b, trav_vars->hit, nir_imm_true(b), 1); 1224bf215546Sopenharmony_ci 1225bf215546Sopenharmony_ci nir_ssa_def *terminate_on_first_hit = 1226bf215546Sopenharmony_ci nir_test_mask(b, nir_load_var(b, vars->flags), SpvRayFlagsTerminateOnFirstHitKHRMask); 1227bf215546Sopenharmony_ci nir_ssa_def *ray_terminated = nir_load_var(b, vars->ahit_terminate); 1228bf215546Sopenharmony_ci nir_push_if(b, nir_ior(b, terminate_on_first_hit, ray_terminated)); 1229bf215546Sopenharmony_ci { 1230bf215546Sopenharmony_ci nir_jump(b, nir_jump_break); 1231bf215546Sopenharmony_ci } 1232bf215546Sopenharmony_ci nir_pop_if(b, NULL); 1233bf215546Sopenharmony_ci } 1234bf215546Sopenharmony_ci nir_pop_if(b, NULL); 1235bf215546Sopenharmony_ci } 1236bf215546Sopenharmony_ci nir_pop_if(b, NULL); 1237bf215546Sopenharmony_ci} 1238bf215546Sopenharmony_ci 1239bf215546Sopenharmony_cistatic void 1240bf215546Sopenharmony_ciinsert_traversal_aabb_case(struct radv_device *device, 1241bf215546Sopenharmony_ci const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, nir_builder *b, 1242bf215546Sopenharmony_ci const struct rt_variables *vars, 1243bf215546Sopenharmony_ci const struct rt_traversal_vars *trav_vars, nir_ssa_def *bvh_node) 1244bf215546Sopenharmony_ci{ 1245bf215546Sopenharmony_ci nir_ssa_def *node_addr = build_node_to_addr(device, b, bvh_node); 1246bf215546Sopenharmony_ci nir_ssa_def *triangle_info = nir_build_load_global(b, 2, 32, nir_iadd_imm(b, node_addr, 24)); 1247bf215546Sopenharmony_ci nir_ssa_def *primitive_id = nir_channel(b, triangle_info, 0); 1248bf215546Sopenharmony_ci nir_ssa_def *geometry_id_and_flags = nir_channel(b, triangle_info, 1); 1249bf215546Sopenharmony_ci nir_ssa_def *geometry_id = nir_iand_imm(b, geometry_id_and_flags, 0xfffffff); 1250bf215546Sopenharmony_ci nir_ssa_def *is_opaque = hit_is_opaque(b, nir_load_var(b, trav_vars->sbt_offset_and_flags), 1251bf215546Sopenharmony_ci nir_load_var(b, vars->flags), geometry_id_and_flags); 1252bf215546Sopenharmony_ci 1253bf215546Sopenharmony_ci nir_ssa_def *not_skip_aabb = 1254bf215546Sopenharmony_ci nir_inot(b, nir_test_mask(b, nir_load_var(b, vars->flags), SpvRayFlagsSkipAABBsKHRMask)); 1255bf215546Sopenharmony_ci nir_ssa_def *not_cull = nir_iand( 1256bf215546Sopenharmony_ci b, not_skip_aabb, 1257bf215546Sopenharmony_ci nir_ieq_imm(b, 1258bf215546Sopenharmony_ci nir_iand(b, nir_load_var(b, vars->flags), 1259bf215546Sopenharmony_ci nir_bcsel(b, is_opaque, nir_imm_int(b, SpvRayFlagsCullOpaqueKHRMask), 1260bf215546Sopenharmony_ci nir_imm_int(b, SpvRayFlagsCullNoOpaqueKHRMask))), 1261bf215546Sopenharmony_ci 0)); 1262bf215546Sopenharmony_ci nir_push_if(b, not_cull); 1263bf215546Sopenharmony_ci { 1264bf215546Sopenharmony_ci nir_ssa_def *sbt_idx = nir_iadd( 1265bf215546Sopenharmony_ci b, 1266bf215546Sopenharmony_ci nir_iadd(b, nir_load_var(b, vars->sbt_offset), 1267bf215546Sopenharmony_ci nir_iand_imm(b, nir_load_var(b, trav_vars->sbt_offset_and_flags), 0xffffff)), 1268bf215546Sopenharmony_ci nir_imul(b, nir_load_var(b, vars->sbt_stride), geometry_id)); 1269bf215546Sopenharmony_ci 1270bf215546Sopenharmony_ci struct rt_variables inner_vars = create_inner_vars(b, vars); 1271bf215546Sopenharmony_ci 1272bf215546Sopenharmony_ci /* For AABBs the intersection shader writes the hit kind, and only does it if it is the 1273bf215546Sopenharmony_ci * next closest hit candidate. */ 1274bf215546Sopenharmony_ci inner_vars.hit_kind = vars->hit_kind; 1275bf215546Sopenharmony_ci 1276bf215546Sopenharmony_ci nir_store_var(b, inner_vars.primitive_id, primitive_id, 1); 1277bf215546Sopenharmony_ci nir_store_var(b, inner_vars.geometry_id_and_flags, geometry_id_and_flags, 1); 1278bf215546Sopenharmony_ci nir_store_var(b, inner_vars.tmax, nir_load_var(b, vars->tmax), 0x1); 1279bf215546Sopenharmony_ci nir_store_var(b, inner_vars.instance_id, nir_load_var(b, trav_vars->instance_id), 0x1); 1280bf215546Sopenharmony_ci nir_store_var(b, inner_vars.instance_addr, nir_load_var(b, trav_vars->instance_addr), 0x1); 1281bf215546Sopenharmony_ci nir_store_var(b, inner_vars.custom_instance_and_mask, 1282bf215546Sopenharmony_ci nir_load_var(b, trav_vars->custom_instance_and_mask), 0x1); 1283bf215546Sopenharmony_ci nir_store_var(b, inner_vars.opaque, is_opaque, 1); 1284bf215546Sopenharmony_ci 1285bf215546Sopenharmony_ci load_sbt_entry(b, &inner_vars, sbt_idx, SBT_HIT, 4); 1286bf215546Sopenharmony_ci 1287bf215546Sopenharmony_ci nir_store_var(b, vars->ahit_accept, nir_imm_false(b), 0x1); 1288bf215546Sopenharmony_ci nir_store_var(b, vars->ahit_terminate, nir_imm_false(b), 0x1); 1289bf215546Sopenharmony_ci 1290bf215546Sopenharmony_ci nir_push_if(b, nir_ine_imm(b, nir_load_var(b, inner_vars.idx), 0)); 1291bf215546Sopenharmony_ci for (unsigned i = 0; i < pCreateInfo->groupCount; ++i) { 1292bf215546Sopenharmony_ci const VkRayTracingShaderGroupCreateInfoKHR *group_info = &pCreateInfo->pGroups[i]; 1293bf215546Sopenharmony_ci uint32_t shader_id = VK_SHADER_UNUSED_KHR; 1294bf215546Sopenharmony_ci uint32_t any_hit_shader_id = VK_SHADER_UNUSED_KHR; 1295bf215546Sopenharmony_ci 1296bf215546Sopenharmony_ci switch (group_info->type) { 1297bf215546Sopenharmony_ci case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR: 1298bf215546Sopenharmony_ci shader_id = group_info->intersectionShader; 1299bf215546Sopenharmony_ci any_hit_shader_id = group_info->anyHitShader; 1300bf215546Sopenharmony_ci break; 1301bf215546Sopenharmony_ci default: 1302bf215546Sopenharmony_ci break; 1303bf215546Sopenharmony_ci } 1304bf215546Sopenharmony_ci if (shader_id == VK_SHADER_UNUSED_KHR) 1305bf215546Sopenharmony_ci continue; 1306bf215546Sopenharmony_ci 1307bf215546Sopenharmony_ci const VkPipelineShaderStageCreateInfo *stage = &pCreateInfo->pStages[shader_id]; 1308bf215546Sopenharmony_ci nir_shader *nir_stage = parse_rt_stage(device, stage); 1309bf215546Sopenharmony_ci 1310bf215546Sopenharmony_ci nir_shader *any_hit_stage = NULL; 1311bf215546Sopenharmony_ci if (any_hit_shader_id != VK_SHADER_UNUSED_KHR) { 1312bf215546Sopenharmony_ci stage = &pCreateInfo->pStages[any_hit_shader_id]; 1313bf215546Sopenharmony_ci any_hit_stage = parse_rt_stage(device, stage); 1314bf215546Sopenharmony_ci 1315bf215546Sopenharmony_ci nir_lower_intersection_shader(nir_stage, any_hit_stage); 1316bf215546Sopenharmony_ci ralloc_free(any_hit_stage); 1317bf215546Sopenharmony_ci } 1318bf215546Sopenharmony_ci 1319bf215546Sopenharmony_ci inner_vars.stage_idx = shader_id; 1320bf215546Sopenharmony_ci insert_rt_case(b, nir_stage, &inner_vars, nir_load_var(b, inner_vars.idx), 0, i + 2); 1321bf215546Sopenharmony_ci } 1322bf215546Sopenharmony_ci nir_push_else(b, NULL); 1323bf215546Sopenharmony_ci { 1324bf215546Sopenharmony_ci nir_ssa_def *vec3_zero = nir_channels(b, nir_imm_vec4(b, 0, 0, 0, 0), 0x7); 1325bf215546Sopenharmony_ci nir_ssa_def *vec3_inf = 1326bf215546Sopenharmony_ci nir_channels(b, nir_imm_vec4(b, INFINITY, INFINITY, INFINITY, 0), 0x7); 1327bf215546Sopenharmony_ci 1328bf215546Sopenharmony_ci nir_ssa_def *bvh_lo = nir_build_load_global(b, 3, 32, nir_iadd_imm(b, node_addr, 0)); 1329bf215546Sopenharmony_ci nir_ssa_def *bvh_hi = nir_build_load_global(b, 3, 32, nir_iadd_imm(b, node_addr, 12)); 1330bf215546Sopenharmony_ci 1331bf215546Sopenharmony_ci bvh_lo = nir_fsub(b, bvh_lo, nir_load_var(b, trav_vars->origin)); 1332bf215546Sopenharmony_ci bvh_hi = nir_fsub(b, bvh_hi, nir_load_var(b, trav_vars->origin)); 1333bf215546Sopenharmony_ci nir_ssa_def *t_vec = nir_fmin(b, nir_fmul(b, bvh_lo, nir_load_var(b, trav_vars->inv_dir)), 1334bf215546Sopenharmony_ci nir_fmul(b, bvh_hi, nir_load_var(b, trav_vars->inv_dir))); 1335bf215546Sopenharmony_ci nir_ssa_def *t2_vec = nir_fmax(b, nir_fmul(b, bvh_lo, nir_load_var(b, trav_vars->inv_dir)), 1336bf215546Sopenharmony_ci nir_fmul(b, bvh_hi, nir_load_var(b, trav_vars->inv_dir))); 1337bf215546Sopenharmony_ci /* If we run parallel to one of the edges the range should be [0, inf) not [0,0] */ 1338bf215546Sopenharmony_ci t2_vec = 1339bf215546Sopenharmony_ci nir_bcsel(b, nir_feq(b, nir_load_var(b, trav_vars->dir), vec3_zero), vec3_inf, t2_vec); 1340bf215546Sopenharmony_ci 1341bf215546Sopenharmony_ci nir_ssa_def *t_min = nir_fmax(b, nir_channel(b, t_vec, 0), nir_channel(b, t_vec, 1)); 1342bf215546Sopenharmony_ci t_min = nir_fmax(b, t_min, nir_channel(b, t_vec, 2)); 1343bf215546Sopenharmony_ci 1344bf215546Sopenharmony_ci nir_ssa_def *t_max = nir_fmin(b, nir_channel(b, t2_vec, 0), nir_channel(b, t2_vec, 1)); 1345bf215546Sopenharmony_ci t_max = nir_fmin(b, t_max, nir_channel(b, t2_vec, 2)); 1346bf215546Sopenharmony_ci 1347bf215546Sopenharmony_ci nir_push_if(b, nir_iand(b, nir_fge(b, nir_load_var(b, vars->tmax), t_min), 1348bf215546Sopenharmony_ci nir_fge(b, t_max, nir_load_var(b, vars->tmin)))); 1349bf215546Sopenharmony_ci { 1350bf215546Sopenharmony_ci nir_store_var(b, vars->ahit_accept, nir_imm_true(b), 0x1); 1351bf215546Sopenharmony_ci nir_store_var(b, vars->tmax, nir_fmax(b, t_min, nir_load_var(b, vars->tmin)), 1); 1352bf215546Sopenharmony_ci } 1353bf215546Sopenharmony_ci nir_pop_if(b, NULL); 1354bf215546Sopenharmony_ci } 1355bf215546Sopenharmony_ci nir_pop_if(b, NULL); 1356bf215546Sopenharmony_ci 1357bf215546Sopenharmony_ci nir_push_if(b, nir_load_var(b, vars->ahit_accept)); 1358bf215546Sopenharmony_ci { 1359bf215546Sopenharmony_ci nir_store_var(b, vars->primitive_id, primitive_id, 1); 1360bf215546Sopenharmony_ci nir_store_var(b, vars->geometry_id_and_flags, geometry_id_and_flags, 1); 1361bf215546Sopenharmony_ci nir_store_var(b, vars->tmax, nir_load_var(b, inner_vars.tmax), 0x1); 1362bf215546Sopenharmony_ci nir_store_var(b, vars->instance_id, nir_load_var(b, trav_vars->instance_id), 0x1); 1363bf215546Sopenharmony_ci nir_store_var(b, vars->instance_addr, nir_load_var(b, trav_vars->instance_addr), 0x1); 1364bf215546Sopenharmony_ci nir_store_var(b, vars->custom_instance_and_mask, 1365bf215546Sopenharmony_ci nir_load_var(b, trav_vars->custom_instance_and_mask), 0x1); 1366bf215546Sopenharmony_ci 1367bf215546Sopenharmony_ci nir_store_var(b, vars->idx, sbt_idx, 1); 1368bf215546Sopenharmony_ci nir_store_var(b, trav_vars->hit, nir_imm_true(b), 1); 1369bf215546Sopenharmony_ci 1370bf215546Sopenharmony_ci nir_ssa_def *terminate_on_first_hit = 1371bf215546Sopenharmony_ci nir_test_mask(b, nir_load_var(b, vars->flags), SpvRayFlagsTerminateOnFirstHitKHRMask); 1372bf215546Sopenharmony_ci nir_ssa_def *ray_terminated = nir_load_var(b, vars->ahit_terminate); 1373bf215546Sopenharmony_ci nir_push_if(b, nir_ior(b, terminate_on_first_hit, ray_terminated)); 1374bf215546Sopenharmony_ci { 1375bf215546Sopenharmony_ci nir_jump(b, nir_jump_break); 1376bf215546Sopenharmony_ci } 1377bf215546Sopenharmony_ci nir_pop_if(b, NULL); 1378bf215546Sopenharmony_ci } 1379bf215546Sopenharmony_ci nir_pop_if(b, NULL); 1380bf215546Sopenharmony_ci } 1381bf215546Sopenharmony_ci nir_pop_if(b, NULL); 1382bf215546Sopenharmony_ci} 1383bf215546Sopenharmony_ci 1384bf215546Sopenharmony_cistatic nir_shader * 1385bf215546Sopenharmony_cibuild_traversal_shader(struct radv_device *device, 1386bf215546Sopenharmony_ci const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, 1387bf215546Sopenharmony_ci const struct rt_variables *dst_vars, 1388bf215546Sopenharmony_ci struct hash_table *var_remap) 1389bf215546Sopenharmony_ci{ 1390bf215546Sopenharmony_ci nir_builder b = radv_meta_init_shader(device, MESA_SHADER_COMPUTE, "rt_traversal"); 1391bf215546Sopenharmony_ci b.shader->info.internal = false; 1392bf215546Sopenharmony_ci b.shader->info.workgroup_size[0] = 8; 1393bf215546Sopenharmony_ci b.shader->info.workgroup_size[1] = device->physical_device->rt_wave_size == 64 ? 8 : 4; 1394bf215546Sopenharmony_ci struct rt_variables vars = create_rt_variables(b.shader, pCreateInfo, dst_vars->stack_sizes); 1395bf215546Sopenharmony_ci map_rt_variables(var_remap, &vars, dst_vars); 1396bf215546Sopenharmony_ci 1397bf215546Sopenharmony_ci unsigned lanes = device->physical_device->rt_wave_size; 1398bf215546Sopenharmony_ci unsigned elements = lanes * MAX_STACK_ENTRY_COUNT; 1399bf215546Sopenharmony_ci nir_variable *stack_var = nir_variable_create(b.shader, nir_var_mem_shared, 1400bf215546Sopenharmony_ci glsl_array_type(glsl_uint_type(), elements, 0), 1401bf215546Sopenharmony_ci "trav_stack"); 1402bf215546Sopenharmony_ci nir_deref_instr *stack_deref = nir_build_deref_var(&b, stack_var); 1403bf215546Sopenharmony_ci nir_deref_instr *stack; 1404bf215546Sopenharmony_ci nir_ssa_def *stack_idx_stride = nir_imm_int(&b, lanes); 1405bf215546Sopenharmony_ci nir_ssa_def *stack_idx_base = nir_load_local_invocation_index(&b); 1406bf215546Sopenharmony_ci 1407bf215546Sopenharmony_ci nir_ssa_def *accel_struct = nir_load_var(&b, vars.accel_struct); 1408bf215546Sopenharmony_ci 1409bf215546Sopenharmony_ci struct rt_traversal_vars trav_vars = init_traversal_vars(&b); 1410bf215546Sopenharmony_ci 1411bf215546Sopenharmony_ci nir_store_var(&b, trav_vars.hit, nir_imm_false(&b), 1); 1412bf215546Sopenharmony_ci 1413bf215546Sopenharmony_ci nir_push_if(&b, nir_ine_imm(&b, accel_struct, 0)); 1414bf215546Sopenharmony_ci { 1415bf215546Sopenharmony_ci nir_store_var(&b, trav_vars.bvh_base, build_addr_to_node(&b, accel_struct), 1); 1416bf215546Sopenharmony_ci 1417bf215546Sopenharmony_ci nir_ssa_def *bvh_root = nir_build_load_global( 1418bf215546Sopenharmony_ci &b, 1, 32, accel_struct, .access = ACCESS_NON_WRITEABLE, .align_mul = 64); 1419bf215546Sopenharmony_ci 1420bf215546Sopenharmony_ci nir_ssa_def *desc = create_bvh_descriptor(&b); 1421bf215546Sopenharmony_ci nir_ssa_def *vec3ones = nir_channels(&b, nir_imm_vec4(&b, 1.0, 1.0, 1.0, 1.0), 0x7); 1422bf215546Sopenharmony_ci 1423bf215546Sopenharmony_ci nir_store_var(&b, trav_vars.origin, nir_load_var(&b, vars.origin), 7); 1424bf215546Sopenharmony_ci nir_store_var(&b, trav_vars.dir, nir_load_var(&b, vars.direction), 7); 1425bf215546Sopenharmony_ci nir_store_var(&b, trav_vars.inv_dir, nir_fdiv(&b, vec3ones, nir_load_var(&b, trav_vars.dir)), 7); 1426bf215546Sopenharmony_ci nir_store_var(&b, trav_vars.sbt_offset_and_flags, nir_imm_int(&b, 0), 1); 1427bf215546Sopenharmony_ci nir_store_var(&b, trav_vars.instance_addr, nir_imm_int64(&b, 0), 1); 1428bf215546Sopenharmony_ci 1429bf215546Sopenharmony_ci nir_store_var(&b, trav_vars.stack, nir_iadd(&b, stack_idx_base, stack_idx_stride), 1); 1430bf215546Sopenharmony_ci stack = nir_build_deref_array(&b, stack_deref, stack_idx_base); 1431bf215546Sopenharmony_ci nir_store_deref(&b, stack, bvh_root, 0x1); 1432bf215546Sopenharmony_ci 1433bf215546Sopenharmony_ci nir_store_var(&b, trav_vars.top_stack, nir_imm_int(&b, 0), 1); 1434bf215546Sopenharmony_ci 1435bf215546Sopenharmony_ci nir_push_loop(&b); 1436bf215546Sopenharmony_ci 1437bf215546Sopenharmony_ci nir_push_if(&b, nir_ieq(&b, nir_load_var(&b, trav_vars.stack), stack_idx_base)); 1438bf215546Sopenharmony_ci nir_jump(&b, nir_jump_break); 1439bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 1440bf215546Sopenharmony_ci 1441bf215546Sopenharmony_ci nir_push_if( 1442bf215546Sopenharmony_ci &b, nir_uge(&b, nir_load_var(&b, trav_vars.top_stack), nir_load_var(&b, trav_vars.stack))); 1443bf215546Sopenharmony_ci nir_store_var(&b, trav_vars.top_stack, nir_imm_int(&b, 0), 1); 1444bf215546Sopenharmony_ci nir_store_var(&b, trav_vars.bvh_base, 1445bf215546Sopenharmony_ci build_addr_to_node(&b, nir_load_var(&b, vars.accel_struct)), 1); 1446bf215546Sopenharmony_ci nir_store_var(&b, trav_vars.origin, nir_load_var(&b, vars.origin), 7); 1447bf215546Sopenharmony_ci nir_store_var(&b, trav_vars.dir, nir_load_var(&b, vars.direction), 7); 1448bf215546Sopenharmony_ci nir_store_var(&b, trav_vars.inv_dir, nir_fdiv(&b, vec3ones, nir_load_var(&b, trav_vars.dir)), 7); 1449bf215546Sopenharmony_ci nir_store_var(&b, trav_vars.instance_addr, nir_imm_int64(&b, 0), 1); 1450bf215546Sopenharmony_ci 1451bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 1452bf215546Sopenharmony_ci 1453bf215546Sopenharmony_ci nir_store_var(&b, trav_vars.stack, 1454bf215546Sopenharmony_ci nir_isub(&b, nir_load_var(&b, trav_vars.stack), stack_idx_stride), 1); 1455bf215546Sopenharmony_ci 1456bf215546Sopenharmony_ci stack = nir_build_deref_array(&b, stack_deref, nir_load_var(&b, trav_vars.stack)); 1457bf215546Sopenharmony_ci nir_ssa_def *bvh_node = nir_load_deref(&b, stack); 1458bf215546Sopenharmony_ci nir_ssa_def *bvh_node_type = nir_iand_imm(&b, bvh_node, 7); 1459bf215546Sopenharmony_ci 1460bf215546Sopenharmony_ci bvh_node = nir_iadd(&b, nir_load_var(&b, trav_vars.bvh_base), nir_u2u(&b, bvh_node, 64)); 1461bf215546Sopenharmony_ci nir_ssa_def *intrinsic_result = NULL; 1462bf215546Sopenharmony_ci if (!radv_emulate_rt(device->physical_device)) { 1463bf215546Sopenharmony_ci intrinsic_result = nir_bvh64_intersect_ray_amd( 1464bf215546Sopenharmony_ci &b, 32, desc, nir_unpack_64_2x32(&b, bvh_node), nir_load_var(&b, vars.tmax), 1465bf215546Sopenharmony_ci nir_load_var(&b, trav_vars.origin), nir_load_var(&b, trav_vars.dir), 1466bf215546Sopenharmony_ci nir_load_var(&b, trav_vars.inv_dir)); 1467bf215546Sopenharmony_ci } 1468bf215546Sopenharmony_ci 1469bf215546Sopenharmony_ci nir_push_if(&b, nir_ine_imm(&b, nir_iand_imm(&b, bvh_node_type, 4), 0)); 1470bf215546Sopenharmony_ci { 1471bf215546Sopenharmony_ci nir_push_if(&b, nir_ine_imm(&b, nir_iand_imm(&b, bvh_node_type, 2), 0)); 1472bf215546Sopenharmony_ci { 1473bf215546Sopenharmony_ci /* custom */ 1474bf215546Sopenharmony_ci nir_push_if(&b, nir_ine_imm(&b, nir_iand_imm(&b, bvh_node_type, 1), 0)); 1475bf215546Sopenharmony_ci if (!(pCreateInfo->flags & VK_PIPELINE_CREATE_RAY_TRACING_SKIP_AABBS_BIT_KHR)) { 1476bf215546Sopenharmony_ci insert_traversal_aabb_case(device, pCreateInfo, &b, &vars, &trav_vars, bvh_node); 1477bf215546Sopenharmony_ci } 1478bf215546Sopenharmony_ci nir_push_else(&b, NULL); 1479bf215546Sopenharmony_ci { 1480bf215546Sopenharmony_ci /* instance */ 1481bf215546Sopenharmony_ci nir_ssa_def *instance_node_addr = build_node_to_addr(device, &b, bvh_node); 1482bf215546Sopenharmony_ci nir_ssa_def *instance_data = 1483bf215546Sopenharmony_ci nir_build_load_global(&b, 4, 32, instance_node_addr, .align_mul = 64); 1484bf215546Sopenharmony_ci nir_ssa_def *wto_matrix[] = { 1485bf215546Sopenharmony_ci nir_build_load_global(&b, 4, 32, nir_iadd_imm(&b, instance_node_addr, 16), 1486bf215546Sopenharmony_ci .align_mul = 64, .align_offset = 16), 1487bf215546Sopenharmony_ci nir_build_load_global(&b, 4, 32, nir_iadd_imm(&b, instance_node_addr, 32), 1488bf215546Sopenharmony_ci .align_mul = 64, .align_offset = 32), 1489bf215546Sopenharmony_ci nir_build_load_global(&b, 4, 32, nir_iadd_imm(&b, instance_node_addr, 48), 1490bf215546Sopenharmony_ci .align_mul = 64, .align_offset = 48)}; 1491bf215546Sopenharmony_ci nir_ssa_def *instance_id = 1492bf215546Sopenharmony_ci nir_build_load_global(&b, 1, 32, nir_iadd_imm(&b, instance_node_addr, 88)); 1493bf215546Sopenharmony_ci nir_ssa_def *instance_and_mask = nir_channel(&b, instance_data, 2); 1494bf215546Sopenharmony_ci nir_ssa_def *instance_mask = nir_ushr_imm(&b, instance_and_mask, 24); 1495bf215546Sopenharmony_ci 1496bf215546Sopenharmony_ci nir_push_if( 1497bf215546Sopenharmony_ci &b, 1498bf215546Sopenharmony_ci nir_ieq_imm(&b, nir_iand(&b, instance_mask, nir_load_var(&b, vars.cull_mask)), 0)); 1499bf215546Sopenharmony_ci nir_jump(&b, nir_jump_continue); 1500bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 1501bf215546Sopenharmony_ci 1502bf215546Sopenharmony_ci nir_store_var(&b, trav_vars.top_stack, nir_load_var(&b, trav_vars.stack), 1); 1503bf215546Sopenharmony_ci nir_store_var(&b, trav_vars.bvh_base, 1504bf215546Sopenharmony_ci build_addr_to_node( 1505bf215546Sopenharmony_ci &b, nir_pack_64_2x32(&b, nir_channels(&b, instance_data, 0x3))), 1506bf215546Sopenharmony_ci 1); 1507bf215546Sopenharmony_ci stack = nir_build_deref_array(&b, stack_deref, nir_load_var(&b, trav_vars.stack)); 1508bf215546Sopenharmony_ci nir_store_deref(&b, stack, nir_iand_imm(&b, nir_channel(&b, instance_data, 0), 63), 0x1); 1509bf215546Sopenharmony_ci 1510bf215546Sopenharmony_ci nir_store_var(&b, trav_vars.stack, 1511bf215546Sopenharmony_ci nir_iadd(&b, nir_load_var(&b, trav_vars.stack), stack_idx_stride), 1); 1512bf215546Sopenharmony_ci 1513bf215546Sopenharmony_ci nir_store_var( 1514bf215546Sopenharmony_ci &b, trav_vars.origin, 1515bf215546Sopenharmony_ci nir_build_vec3_mat_mult_pre(&b, nir_load_var(&b, vars.origin), wto_matrix), 7); 1516bf215546Sopenharmony_ci nir_store_var( 1517bf215546Sopenharmony_ci &b, trav_vars.dir, 1518bf215546Sopenharmony_ci nir_build_vec3_mat_mult(&b, nir_load_var(&b, vars.direction), wto_matrix, false), 1519bf215546Sopenharmony_ci 7); 1520bf215546Sopenharmony_ci nir_store_var(&b, trav_vars.inv_dir, 1521bf215546Sopenharmony_ci nir_fdiv(&b, vec3ones, nir_load_var(&b, trav_vars.dir)), 7); 1522bf215546Sopenharmony_ci nir_store_var(&b, trav_vars.custom_instance_and_mask, instance_and_mask, 1); 1523bf215546Sopenharmony_ci nir_store_var(&b, trav_vars.sbt_offset_and_flags, nir_channel(&b, instance_data, 3), 1524bf215546Sopenharmony_ci 1); 1525bf215546Sopenharmony_ci nir_store_var(&b, trav_vars.instance_id, instance_id, 1); 1526bf215546Sopenharmony_ci nir_store_var(&b, trav_vars.instance_addr, instance_node_addr, 1); 1527bf215546Sopenharmony_ci } 1528bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 1529bf215546Sopenharmony_ci } 1530bf215546Sopenharmony_ci nir_push_else(&b, NULL); 1531bf215546Sopenharmony_ci { 1532bf215546Sopenharmony_ci /* box */ 1533bf215546Sopenharmony_ci nir_ssa_def *result = intrinsic_result; 1534bf215546Sopenharmony_ci if (!result) { 1535bf215546Sopenharmony_ci /* If we didn't run the intrinsic cause the hardware didn't support it, 1536bf215546Sopenharmony_ci * emulate ray/box intersection here */ 1537bf215546Sopenharmony_ci result = intersect_ray_amd_software_box(device, 1538bf215546Sopenharmony_ci &b, bvh_node, nir_load_var(&b, vars.tmax), nir_load_var(&b, trav_vars.origin), 1539bf215546Sopenharmony_ci nir_load_var(&b, trav_vars.dir), nir_load_var(&b, trav_vars.inv_dir)); 1540bf215546Sopenharmony_ci } 1541bf215546Sopenharmony_ci 1542bf215546Sopenharmony_ci for (unsigned i = 4; i-- > 0; ) { 1543bf215546Sopenharmony_ci nir_ssa_def *new_node = nir_channel(&b, result, i); 1544bf215546Sopenharmony_ci nir_push_if(&b, nir_ine_imm(&b, new_node, 0xffffffff)); 1545bf215546Sopenharmony_ci { 1546bf215546Sopenharmony_ci stack = nir_build_deref_array(&b, stack_deref, nir_load_var(&b, trav_vars.stack)); 1547bf215546Sopenharmony_ci nir_store_deref(&b, stack, new_node, 0x1); 1548bf215546Sopenharmony_ci nir_store_var( 1549bf215546Sopenharmony_ci &b, trav_vars.stack, 1550bf215546Sopenharmony_ci nir_iadd(&b, nir_load_var(&b, trav_vars.stack), stack_idx_stride), 1); 1551bf215546Sopenharmony_ci } 1552bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 1553bf215546Sopenharmony_ci } 1554bf215546Sopenharmony_ci } 1555bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 1556bf215546Sopenharmony_ci } 1557bf215546Sopenharmony_ci nir_push_else(&b, NULL); 1558bf215546Sopenharmony_ci if (!(pCreateInfo->flags & VK_PIPELINE_CREATE_RAY_TRACING_SKIP_TRIANGLES_BIT_KHR)) { 1559bf215546Sopenharmony_ci nir_ssa_def *result = intrinsic_result; 1560bf215546Sopenharmony_ci if (!result) { 1561bf215546Sopenharmony_ci /* If we didn't run the intrinsic cause the hardware didn't support it, 1562bf215546Sopenharmony_ci * emulate ray/tri intersection here */ 1563bf215546Sopenharmony_ci result = intersect_ray_amd_software_tri(device, 1564bf215546Sopenharmony_ci &b, bvh_node, nir_load_var(&b, vars.tmax), nir_load_var(&b, trav_vars.origin), 1565bf215546Sopenharmony_ci nir_load_var(&b, trav_vars.dir), nir_load_var(&b, trav_vars.inv_dir)); 1566bf215546Sopenharmony_ci } 1567bf215546Sopenharmony_ci insert_traversal_triangle_case(device, pCreateInfo, &b, result, &vars, &trav_vars, bvh_node); 1568bf215546Sopenharmony_ci } 1569bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 1570bf215546Sopenharmony_ci 1571bf215546Sopenharmony_ci nir_pop_loop(&b, NULL); 1572bf215546Sopenharmony_ci } 1573bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 1574bf215546Sopenharmony_ci 1575bf215546Sopenharmony_ci /* Initialize follow-up shader. */ 1576bf215546Sopenharmony_ci nir_push_if(&b, nir_load_var(&b, trav_vars.hit)); 1577bf215546Sopenharmony_ci { 1578bf215546Sopenharmony_ci /* vars.idx contains the SBT index at this point. */ 1579bf215546Sopenharmony_ci load_sbt_entry(&b, &vars, nir_load_var(&b, vars.idx), SBT_HIT, 0); 1580bf215546Sopenharmony_ci 1581bf215546Sopenharmony_ci nir_ssa_def *should_return = nir_ior(&b, 1582bf215546Sopenharmony_ci nir_test_mask(&b, nir_load_var(&b, vars.flags), 1583bf215546Sopenharmony_ci SpvRayFlagsSkipClosestHitShaderKHRMask), 1584bf215546Sopenharmony_ci nir_ieq_imm(&b, nir_load_var(&b, vars.idx), 0)); 1585bf215546Sopenharmony_ci 1586bf215546Sopenharmony_ci /* should_return is set if we had a hit but we won't be calling the closest hit shader and hence 1587bf215546Sopenharmony_ci * need to return immediately to the calling shader. */ 1588bf215546Sopenharmony_ci nir_push_if(&b, should_return); 1589bf215546Sopenharmony_ci { 1590bf215546Sopenharmony_ci insert_rt_return(&b, &vars); 1591bf215546Sopenharmony_ci } 1592bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 1593bf215546Sopenharmony_ci } 1594bf215546Sopenharmony_ci nir_push_else(&b, NULL); 1595bf215546Sopenharmony_ci { 1596bf215546Sopenharmony_ci /* Only load the miss shader if we actually miss. It is valid to not specify an SBT pointer 1597bf215546Sopenharmony_ci * for miss shaders if none of the rays miss. */ 1598bf215546Sopenharmony_ci load_sbt_entry(&b, &vars, nir_load_var(&b, vars.miss_index), SBT_MISS, 0); 1599bf215546Sopenharmony_ci } 1600bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 1601bf215546Sopenharmony_ci 1602bf215546Sopenharmony_ci return b.shader; 1603bf215546Sopenharmony_ci} 1604bf215546Sopenharmony_ci 1605bf215546Sopenharmony_ci 1606bf215546Sopenharmony_cistatic void 1607bf215546Sopenharmony_ciinsert_traversal(struct radv_device *device, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, 1608bf215546Sopenharmony_ci nir_builder *b, const struct rt_variables *vars) 1609bf215546Sopenharmony_ci{ 1610bf215546Sopenharmony_ci struct hash_table *var_remap = _mesa_pointer_hash_table_create(NULL); 1611bf215546Sopenharmony_ci nir_shader *shader = build_traversal_shader(device, pCreateInfo, vars, var_remap); 1612bf215546Sopenharmony_ci 1613bf215546Sopenharmony_ci /* For now, just inline the traversal shader */ 1614bf215546Sopenharmony_ci nir_push_if(b, nir_ieq_imm(b, nir_load_var(b, vars->idx), 1)); 1615bf215546Sopenharmony_ci nir_store_var(b, vars->main_loop_case_visited, nir_imm_bool(b, true), 1); 1616bf215546Sopenharmony_ci nir_inline_function_impl(b, nir_shader_get_entrypoint(shader), NULL, var_remap); 1617bf215546Sopenharmony_ci nir_pop_if(b, NULL); 1618bf215546Sopenharmony_ci 1619bf215546Sopenharmony_ci /* Adopt the instructions from the source shader, since they are merely moved, not cloned. */ 1620bf215546Sopenharmony_ci ralloc_adopt(ralloc_context(b->shader), ralloc_context(shader)); 1621bf215546Sopenharmony_ci 1622bf215546Sopenharmony_ci ralloc_free(var_remap); 1623bf215546Sopenharmony_ci} 1624bf215546Sopenharmony_ci 1625bf215546Sopenharmony_cistatic unsigned 1626bf215546Sopenharmony_cicompute_rt_stack_size(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, 1627bf215546Sopenharmony_ci const struct radv_pipeline_shader_stack_size *stack_sizes) 1628bf215546Sopenharmony_ci{ 1629bf215546Sopenharmony_ci unsigned raygen_size = 0; 1630bf215546Sopenharmony_ci unsigned callable_size = 0; 1631bf215546Sopenharmony_ci unsigned chit_size = 0; 1632bf215546Sopenharmony_ci unsigned miss_size = 0; 1633bf215546Sopenharmony_ci unsigned non_recursive_size = 0; 1634bf215546Sopenharmony_ci 1635bf215546Sopenharmony_ci for (unsigned i = 0; i < pCreateInfo->groupCount; ++i) { 1636bf215546Sopenharmony_ci non_recursive_size = MAX2(stack_sizes[i].non_recursive_size, non_recursive_size); 1637bf215546Sopenharmony_ci 1638bf215546Sopenharmony_ci const VkRayTracingShaderGroupCreateInfoKHR *group_info = &pCreateInfo->pGroups[i]; 1639bf215546Sopenharmony_ci uint32_t shader_id = VK_SHADER_UNUSED_KHR; 1640bf215546Sopenharmony_ci unsigned size = stack_sizes[i].recursive_size; 1641bf215546Sopenharmony_ci 1642bf215546Sopenharmony_ci switch (group_info->type) { 1643bf215546Sopenharmony_ci case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR: 1644bf215546Sopenharmony_ci shader_id = group_info->generalShader; 1645bf215546Sopenharmony_ci break; 1646bf215546Sopenharmony_ci case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR: 1647bf215546Sopenharmony_ci case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR: 1648bf215546Sopenharmony_ci shader_id = group_info->closestHitShader; 1649bf215546Sopenharmony_ci break; 1650bf215546Sopenharmony_ci default: 1651bf215546Sopenharmony_ci break; 1652bf215546Sopenharmony_ci } 1653bf215546Sopenharmony_ci if (shader_id == VK_SHADER_UNUSED_KHR) 1654bf215546Sopenharmony_ci continue; 1655bf215546Sopenharmony_ci 1656bf215546Sopenharmony_ci const VkPipelineShaderStageCreateInfo *stage = &pCreateInfo->pStages[shader_id]; 1657bf215546Sopenharmony_ci switch (stage->stage) { 1658bf215546Sopenharmony_ci case VK_SHADER_STAGE_RAYGEN_BIT_KHR: 1659bf215546Sopenharmony_ci raygen_size = MAX2(raygen_size, size); 1660bf215546Sopenharmony_ci break; 1661bf215546Sopenharmony_ci case VK_SHADER_STAGE_MISS_BIT_KHR: 1662bf215546Sopenharmony_ci miss_size = MAX2(miss_size, size); 1663bf215546Sopenharmony_ci break; 1664bf215546Sopenharmony_ci case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR: 1665bf215546Sopenharmony_ci chit_size = MAX2(chit_size, size); 1666bf215546Sopenharmony_ci break; 1667bf215546Sopenharmony_ci case VK_SHADER_STAGE_CALLABLE_BIT_KHR: 1668bf215546Sopenharmony_ci callable_size = MAX2(callable_size, size); 1669bf215546Sopenharmony_ci break; 1670bf215546Sopenharmony_ci default: 1671bf215546Sopenharmony_ci unreachable("Invalid stage type in RT shader"); 1672bf215546Sopenharmony_ci } 1673bf215546Sopenharmony_ci } 1674bf215546Sopenharmony_ci return raygen_size + 1675bf215546Sopenharmony_ci MIN2(pCreateInfo->maxPipelineRayRecursionDepth, 1) * 1676bf215546Sopenharmony_ci MAX2(MAX2(chit_size, miss_size), non_recursive_size) + 1677bf215546Sopenharmony_ci MAX2(0, (int)(pCreateInfo->maxPipelineRayRecursionDepth) - 1) * 1678bf215546Sopenharmony_ci MAX2(chit_size, miss_size) + 1679bf215546Sopenharmony_ci 2 * callable_size; 1680bf215546Sopenharmony_ci} 1681bf215546Sopenharmony_ci 1682bf215546Sopenharmony_cibool 1683bf215546Sopenharmony_ciradv_rt_pipeline_has_dynamic_stack_size(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo) 1684bf215546Sopenharmony_ci{ 1685bf215546Sopenharmony_ci if (!pCreateInfo->pDynamicState) 1686bf215546Sopenharmony_ci return false; 1687bf215546Sopenharmony_ci 1688bf215546Sopenharmony_ci for (unsigned i = 0; i < pCreateInfo->pDynamicState->dynamicStateCount; ++i) { 1689bf215546Sopenharmony_ci if (pCreateInfo->pDynamicState->pDynamicStates[i] == 1690bf215546Sopenharmony_ci VK_DYNAMIC_STATE_RAY_TRACING_PIPELINE_STACK_SIZE_KHR) 1691bf215546Sopenharmony_ci return true; 1692bf215546Sopenharmony_ci } 1693bf215546Sopenharmony_ci 1694bf215546Sopenharmony_ci return false; 1695bf215546Sopenharmony_ci} 1696bf215546Sopenharmony_ci 1697bf215546Sopenharmony_cistatic bool 1698bf215546Sopenharmony_cishould_move_rt_instruction(nir_intrinsic_op intrinsic) 1699bf215546Sopenharmony_ci{ 1700bf215546Sopenharmony_ci switch (intrinsic) { 1701bf215546Sopenharmony_ci case nir_intrinsic_load_rt_arg_scratch_offset_amd: 1702bf215546Sopenharmony_ci case nir_intrinsic_load_ray_flags: 1703bf215546Sopenharmony_ci case nir_intrinsic_load_ray_object_origin: 1704bf215546Sopenharmony_ci case nir_intrinsic_load_ray_world_origin: 1705bf215546Sopenharmony_ci case nir_intrinsic_load_ray_t_min: 1706bf215546Sopenharmony_ci case nir_intrinsic_load_ray_object_direction: 1707bf215546Sopenharmony_ci case nir_intrinsic_load_ray_world_direction: 1708bf215546Sopenharmony_ci case nir_intrinsic_load_ray_t_max: 1709bf215546Sopenharmony_ci return true; 1710bf215546Sopenharmony_ci default: 1711bf215546Sopenharmony_ci return false; 1712bf215546Sopenharmony_ci } 1713bf215546Sopenharmony_ci} 1714bf215546Sopenharmony_ci 1715bf215546Sopenharmony_cistatic void 1716bf215546Sopenharmony_cimove_rt_instructions(nir_shader *shader) 1717bf215546Sopenharmony_ci{ 1718bf215546Sopenharmony_ci nir_cursor target = nir_before_cf_list(&nir_shader_get_entrypoint(shader)->body); 1719bf215546Sopenharmony_ci 1720bf215546Sopenharmony_ci nir_foreach_block (block, nir_shader_get_entrypoint(shader)) { 1721bf215546Sopenharmony_ci nir_foreach_instr_safe (instr, block) { 1722bf215546Sopenharmony_ci if (instr->type != nir_instr_type_intrinsic) 1723bf215546Sopenharmony_ci continue; 1724bf215546Sopenharmony_ci 1725bf215546Sopenharmony_ci nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(instr); 1726bf215546Sopenharmony_ci 1727bf215546Sopenharmony_ci if (!should_move_rt_instruction(intrinsic->intrinsic)) 1728bf215546Sopenharmony_ci continue; 1729bf215546Sopenharmony_ci 1730bf215546Sopenharmony_ci nir_instr_move(target, instr); 1731bf215546Sopenharmony_ci } 1732bf215546Sopenharmony_ci } 1733bf215546Sopenharmony_ci 1734bf215546Sopenharmony_ci nir_metadata_preserve(nir_shader_get_entrypoint(shader), 1735bf215546Sopenharmony_ci nir_metadata_all & (~nir_metadata_instr_index)); 1736bf215546Sopenharmony_ci} 1737bf215546Sopenharmony_ci 1738bf215546Sopenharmony_cistatic nir_shader * 1739bf215546Sopenharmony_cicreate_rt_shader(struct radv_device *device, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, 1740bf215546Sopenharmony_ci struct radv_pipeline_shader_stack_size *stack_sizes) 1741bf215546Sopenharmony_ci{ 1742bf215546Sopenharmony_ci struct radv_pipeline_key key; 1743bf215546Sopenharmony_ci memset(&key, 0, sizeof(key)); 1744bf215546Sopenharmony_ci 1745bf215546Sopenharmony_ci nir_builder b = radv_meta_init_shader(device, MESA_SHADER_COMPUTE, "rt_combined"); 1746bf215546Sopenharmony_ci b.shader->info.internal = false; 1747bf215546Sopenharmony_ci b.shader->info.workgroup_size[0] = 8; 1748bf215546Sopenharmony_ci b.shader->info.workgroup_size[1] = device->physical_device->rt_wave_size == 64 ? 8 : 4; 1749bf215546Sopenharmony_ci 1750bf215546Sopenharmony_ci struct rt_variables vars = create_rt_variables(b.shader, pCreateInfo, stack_sizes); 1751bf215546Sopenharmony_ci load_sbt_entry(&b, &vars, nir_imm_int(&b, 0), SBT_RAYGEN, 0); 1752bf215546Sopenharmony_ci nir_store_var(&b, vars.stack_ptr, nir_imm_int(&b, 0), 0x1); 1753bf215546Sopenharmony_ci 1754bf215546Sopenharmony_ci nir_store_var(&b, vars.main_loop_case_visited, nir_imm_bool(&b, true), 1); 1755bf215546Sopenharmony_ci 1756bf215546Sopenharmony_ci nir_loop *loop = nir_push_loop(&b); 1757bf215546Sopenharmony_ci 1758bf215546Sopenharmony_ci nir_push_if(&b, nir_ior(&b, nir_ieq_imm(&b, nir_load_var(&b, vars.idx), 0), 1759bf215546Sopenharmony_ci nir_inot(&b, nir_load_var(&b, vars.main_loop_case_visited)))); 1760bf215546Sopenharmony_ci nir_jump(&b, nir_jump_break); 1761bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 1762bf215546Sopenharmony_ci 1763bf215546Sopenharmony_ci nir_store_var(&b, vars.main_loop_case_visited, nir_imm_bool(&b, false), 1); 1764bf215546Sopenharmony_ci 1765bf215546Sopenharmony_ci insert_traversal(device, pCreateInfo, &b, &vars); 1766bf215546Sopenharmony_ci 1767bf215546Sopenharmony_ci nir_ssa_def *idx = nir_load_var(&b, vars.idx); 1768bf215546Sopenharmony_ci 1769bf215546Sopenharmony_ci /* We do a trick with the indexing of the resume shaders so that the first 1770bf215546Sopenharmony_ci * shader of stage x always gets id x and the resume shader ids then come after 1771bf215546Sopenharmony_ci * stageCount. This makes the shadergroup handles independent of compilation. */ 1772bf215546Sopenharmony_ci unsigned call_idx_base = pCreateInfo->stageCount + 1; 1773bf215546Sopenharmony_ci for (unsigned i = 0; i < pCreateInfo->stageCount; ++i) { 1774bf215546Sopenharmony_ci const VkPipelineShaderStageCreateInfo *stage = &pCreateInfo->pStages[i]; 1775bf215546Sopenharmony_ci gl_shader_stage type = vk_to_mesa_shader_stage(stage->stage); 1776bf215546Sopenharmony_ci if (type != MESA_SHADER_RAYGEN && type != MESA_SHADER_CALLABLE && 1777bf215546Sopenharmony_ci type != MESA_SHADER_CLOSEST_HIT && type != MESA_SHADER_MISS) 1778bf215546Sopenharmony_ci continue; 1779bf215546Sopenharmony_ci 1780bf215546Sopenharmony_ci nir_shader *nir_stage = parse_rt_stage(device, stage); 1781bf215546Sopenharmony_ci 1782bf215546Sopenharmony_ci /* Move ray tracing system values to the top that are set by rt_trace_ray 1783bf215546Sopenharmony_ci * to prevent them from being overwritten by other rt_trace_ray calls. 1784bf215546Sopenharmony_ci */ 1785bf215546Sopenharmony_ci NIR_PASS_V(nir_stage, move_rt_instructions); 1786bf215546Sopenharmony_ci 1787bf215546Sopenharmony_ci uint32_t num_resume_shaders = 0; 1788bf215546Sopenharmony_ci nir_shader **resume_shaders = NULL; 1789bf215546Sopenharmony_ci nir_lower_shader_calls(nir_stage, nir_address_format_32bit_offset, 16, &resume_shaders, 1790bf215546Sopenharmony_ci &num_resume_shaders, nir_stage); 1791bf215546Sopenharmony_ci 1792bf215546Sopenharmony_ci vars.stage_idx = i; 1793bf215546Sopenharmony_ci insert_rt_case(&b, nir_stage, &vars, idx, call_idx_base, i + 2); 1794bf215546Sopenharmony_ci for (unsigned j = 0; j < num_resume_shaders; ++j) { 1795bf215546Sopenharmony_ci insert_rt_case(&b, resume_shaders[j], &vars, idx, call_idx_base, call_idx_base + 1 + j); 1796bf215546Sopenharmony_ci } 1797bf215546Sopenharmony_ci call_idx_base += num_resume_shaders; 1798bf215546Sopenharmony_ci } 1799bf215546Sopenharmony_ci 1800bf215546Sopenharmony_ci nir_pop_loop(&b, loop); 1801bf215546Sopenharmony_ci 1802bf215546Sopenharmony_ci if (radv_rt_pipeline_has_dynamic_stack_size(pCreateInfo)) { 1803bf215546Sopenharmony_ci /* Put something so scratch gets enabled in the shader. */ 1804bf215546Sopenharmony_ci b.shader->scratch_size = 16; 1805bf215546Sopenharmony_ci } else 1806bf215546Sopenharmony_ci b.shader->scratch_size = compute_rt_stack_size(pCreateInfo, stack_sizes); 1807bf215546Sopenharmony_ci 1808bf215546Sopenharmony_ci /* Deal with all the inline functions. */ 1809bf215546Sopenharmony_ci nir_index_ssa_defs(nir_shader_get_entrypoint(b.shader)); 1810bf215546Sopenharmony_ci nir_metadata_preserve(nir_shader_get_entrypoint(b.shader), nir_metadata_none); 1811bf215546Sopenharmony_ci 1812bf215546Sopenharmony_ci return b.shader; 1813bf215546Sopenharmony_ci} 1814bf215546Sopenharmony_ci 1815bf215546Sopenharmony_cistatic VkResult 1816bf215546Sopenharmony_ciradv_rt_pipeline_create(VkDevice _device, VkPipelineCache _cache, 1817bf215546Sopenharmony_ci const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, 1818bf215546Sopenharmony_ci const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline) 1819bf215546Sopenharmony_ci{ 1820bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_device, device, _device); 1821bf215546Sopenharmony_ci VkResult result; 1822bf215546Sopenharmony_ci struct radv_pipeline *pipeline = NULL; 1823bf215546Sopenharmony_ci struct radv_compute_pipeline *compute_pipeline = NULL; 1824bf215546Sopenharmony_ci struct radv_pipeline_shader_stack_size *stack_sizes = NULL; 1825bf215546Sopenharmony_ci uint8_t hash[20]; 1826bf215546Sopenharmony_ci nir_shader *shader = NULL; 1827bf215546Sopenharmony_ci bool keep_statistic_info = 1828bf215546Sopenharmony_ci (pCreateInfo->flags & VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR) || 1829bf215546Sopenharmony_ci (device->instance->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS) || device->keep_shader_info; 1830bf215546Sopenharmony_ci 1831bf215546Sopenharmony_ci if (pCreateInfo->flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR) 1832bf215546Sopenharmony_ci return radv_rt_pipeline_library_create(_device, _cache, pCreateInfo, pAllocator, pPipeline); 1833bf215546Sopenharmony_ci 1834bf215546Sopenharmony_ci VkRayTracingPipelineCreateInfoKHR local_create_info = 1835bf215546Sopenharmony_ci radv_create_merged_rt_create_info(pCreateInfo); 1836bf215546Sopenharmony_ci if (!local_create_info.pStages || !local_create_info.pGroups) { 1837bf215546Sopenharmony_ci result = VK_ERROR_OUT_OF_HOST_MEMORY; 1838bf215546Sopenharmony_ci goto fail; 1839bf215546Sopenharmony_ci } 1840bf215546Sopenharmony_ci 1841bf215546Sopenharmony_ci radv_hash_rt_shaders(hash, &local_create_info, radv_get_hash_flags(device, keep_statistic_info)); 1842bf215546Sopenharmony_ci struct vk_shader_module module = {.base.type = VK_OBJECT_TYPE_SHADER_MODULE}; 1843bf215546Sopenharmony_ci 1844bf215546Sopenharmony_ci VkPipelineShaderStageRequiredSubgroupSizeCreateInfo subgroup_size = { 1845bf215546Sopenharmony_ci .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO, 1846bf215546Sopenharmony_ci .pNext = NULL, 1847bf215546Sopenharmony_ci .requiredSubgroupSize = device->physical_device->rt_wave_size, 1848bf215546Sopenharmony_ci }; 1849bf215546Sopenharmony_ci 1850bf215546Sopenharmony_ci VkComputePipelineCreateInfo compute_info = { 1851bf215546Sopenharmony_ci .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 1852bf215546Sopenharmony_ci .pNext = NULL, 1853bf215546Sopenharmony_ci .flags = pCreateInfo->flags | VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT, 1854bf215546Sopenharmony_ci .stage = 1855bf215546Sopenharmony_ci { 1856bf215546Sopenharmony_ci .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 1857bf215546Sopenharmony_ci .pNext = &subgroup_size, 1858bf215546Sopenharmony_ci .stage = VK_SHADER_STAGE_COMPUTE_BIT, 1859bf215546Sopenharmony_ci .module = vk_shader_module_to_handle(&module), 1860bf215546Sopenharmony_ci .pName = "main", 1861bf215546Sopenharmony_ci }, 1862bf215546Sopenharmony_ci .layout = pCreateInfo->layout, 1863bf215546Sopenharmony_ci }; 1864bf215546Sopenharmony_ci 1865bf215546Sopenharmony_ci /* First check if we can get things from the cache before we take the expensive step of 1866bf215546Sopenharmony_ci * generating the nir. */ 1867bf215546Sopenharmony_ci result = radv_compute_pipeline_create(_device, _cache, &compute_info, pAllocator, hash, 1868bf215546Sopenharmony_ci stack_sizes, local_create_info.groupCount, pPipeline); 1869bf215546Sopenharmony_ci 1870bf215546Sopenharmony_ci if (result == VK_PIPELINE_COMPILE_REQUIRED) { 1871bf215546Sopenharmony_ci if (pCreateInfo->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT) 1872bf215546Sopenharmony_ci goto fail; 1873bf215546Sopenharmony_ci 1874bf215546Sopenharmony_ci stack_sizes = calloc(sizeof(*stack_sizes), local_create_info.groupCount); 1875bf215546Sopenharmony_ci if (!stack_sizes) { 1876bf215546Sopenharmony_ci result = VK_ERROR_OUT_OF_HOST_MEMORY; 1877bf215546Sopenharmony_ci goto fail; 1878bf215546Sopenharmony_ci } 1879bf215546Sopenharmony_ci 1880bf215546Sopenharmony_ci shader = create_rt_shader(device, &local_create_info, stack_sizes); 1881bf215546Sopenharmony_ci module.nir = shader; 1882bf215546Sopenharmony_ci compute_info.flags = pCreateInfo->flags; 1883bf215546Sopenharmony_ci result = radv_compute_pipeline_create(_device, _cache, &compute_info, pAllocator, hash, 1884bf215546Sopenharmony_ci stack_sizes, local_create_info.groupCount, pPipeline); 1885bf215546Sopenharmony_ci stack_sizes = NULL; 1886bf215546Sopenharmony_ci 1887bf215546Sopenharmony_ci if (result != VK_SUCCESS) 1888bf215546Sopenharmony_ci goto shader_fail; 1889bf215546Sopenharmony_ci } 1890bf215546Sopenharmony_ci pipeline = radv_pipeline_from_handle(*pPipeline); 1891bf215546Sopenharmony_ci compute_pipeline = radv_pipeline_to_compute(pipeline); 1892bf215546Sopenharmony_ci 1893bf215546Sopenharmony_ci compute_pipeline->rt_group_handles = 1894bf215546Sopenharmony_ci calloc(sizeof(*compute_pipeline->rt_group_handles), local_create_info.groupCount); 1895bf215546Sopenharmony_ci if (!compute_pipeline->rt_group_handles) { 1896bf215546Sopenharmony_ci result = VK_ERROR_OUT_OF_HOST_MEMORY; 1897bf215546Sopenharmony_ci goto shader_fail; 1898bf215546Sopenharmony_ci } 1899bf215546Sopenharmony_ci 1900bf215546Sopenharmony_ci compute_pipeline->dynamic_stack_size = radv_rt_pipeline_has_dynamic_stack_size(pCreateInfo); 1901bf215546Sopenharmony_ci 1902bf215546Sopenharmony_ci /* For General and ClosestHit shaders, we can use the shader ID directly as handle. 1903bf215546Sopenharmony_ci * As (potentially different) AnyHit shaders are inlined, for Intersection shaders 1904bf215546Sopenharmony_ci * we use the Group ID. 1905bf215546Sopenharmony_ci */ 1906bf215546Sopenharmony_ci for (unsigned i = 0; i < local_create_info.groupCount; ++i) { 1907bf215546Sopenharmony_ci const VkRayTracingShaderGroupCreateInfoKHR *group_info = &local_create_info.pGroups[i]; 1908bf215546Sopenharmony_ci switch (group_info->type) { 1909bf215546Sopenharmony_ci case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR: 1910bf215546Sopenharmony_ci if (group_info->generalShader != VK_SHADER_UNUSED_KHR) 1911bf215546Sopenharmony_ci compute_pipeline->rt_group_handles[i].handles[0] = group_info->generalShader + 2; 1912bf215546Sopenharmony_ci break; 1913bf215546Sopenharmony_ci case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR: 1914bf215546Sopenharmony_ci if (group_info->intersectionShader != VK_SHADER_UNUSED_KHR) 1915bf215546Sopenharmony_ci compute_pipeline->rt_group_handles[i].handles[1] = i + 2; 1916bf215546Sopenharmony_ci FALLTHROUGH; 1917bf215546Sopenharmony_ci case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR: 1918bf215546Sopenharmony_ci if (group_info->closestHitShader != VK_SHADER_UNUSED_KHR) 1919bf215546Sopenharmony_ci compute_pipeline->rt_group_handles[i].handles[0] = group_info->closestHitShader + 2; 1920bf215546Sopenharmony_ci if (group_info->anyHitShader != VK_SHADER_UNUSED_KHR) 1921bf215546Sopenharmony_ci compute_pipeline->rt_group_handles[i].handles[1] = i + 2; 1922bf215546Sopenharmony_ci break; 1923bf215546Sopenharmony_ci case VK_SHADER_GROUP_SHADER_MAX_ENUM_KHR: 1924bf215546Sopenharmony_ci unreachable("VK_SHADER_GROUP_SHADER_MAX_ENUM_KHR"); 1925bf215546Sopenharmony_ci } 1926bf215546Sopenharmony_ci } 1927bf215546Sopenharmony_ci 1928bf215546Sopenharmony_cishader_fail: 1929bf215546Sopenharmony_ci if (result != VK_SUCCESS && pipeline) 1930bf215546Sopenharmony_ci radv_pipeline_destroy(device, pipeline, pAllocator); 1931bf215546Sopenharmony_ci ralloc_free(shader); 1932bf215546Sopenharmony_cifail: 1933bf215546Sopenharmony_ci free((void *)local_create_info.pGroups); 1934bf215546Sopenharmony_ci free((void *)local_create_info.pStages); 1935bf215546Sopenharmony_ci free(stack_sizes); 1936bf215546Sopenharmony_ci return result; 1937bf215546Sopenharmony_ci} 1938bf215546Sopenharmony_ci 1939bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL 1940bf215546Sopenharmony_ciradv_CreateRayTracingPipelinesKHR(VkDevice _device, VkDeferredOperationKHR deferredOperation, 1941bf215546Sopenharmony_ci VkPipelineCache pipelineCache, uint32_t count, 1942bf215546Sopenharmony_ci const VkRayTracingPipelineCreateInfoKHR *pCreateInfos, 1943bf215546Sopenharmony_ci const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines) 1944bf215546Sopenharmony_ci{ 1945bf215546Sopenharmony_ci VkResult result = VK_SUCCESS; 1946bf215546Sopenharmony_ci 1947bf215546Sopenharmony_ci unsigned i = 0; 1948bf215546Sopenharmony_ci for (; i < count; i++) { 1949bf215546Sopenharmony_ci VkResult r; 1950bf215546Sopenharmony_ci r = radv_rt_pipeline_create(_device, pipelineCache, &pCreateInfos[i], pAllocator, 1951bf215546Sopenharmony_ci &pPipelines[i]); 1952bf215546Sopenharmony_ci if (r != VK_SUCCESS) { 1953bf215546Sopenharmony_ci result = r; 1954bf215546Sopenharmony_ci pPipelines[i] = VK_NULL_HANDLE; 1955bf215546Sopenharmony_ci 1956bf215546Sopenharmony_ci if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT) 1957bf215546Sopenharmony_ci break; 1958bf215546Sopenharmony_ci } 1959bf215546Sopenharmony_ci } 1960bf215546Sopenharmony_ci 1961bf215546Sopenharmony_ci for (; i < count; ++i) 1962bf215546Sopenharmony_ci pPipelines[i] = VK_NULL_HANDLE; 1963bf215546Sopenharmony_ci 1964bf215546Sopenharmony_ci return result; 1965bf215546Sopenharmony_ci} 1966bf215546Sopenharmony_ci 1967bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL 1968bf215546Sopenharmony_ciradv_GetRayTracingShaderGroupHandlesKHR(VkDevice device, VkPipeline _pipeline, uint32_t firstGroup, 1969bf215546Sopenharmony_ci uint32_t groupCount, size_t dataSize, void *pData) 1970bf215546Sopenharmony_ci{ 1971bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline); 1972bf215546Sopenharmony_ci struct radv_compute_pipeline *compute_pipeline = radv_pipeline_to_compute(pipeline); 1973bf215546Sopenharmony_ci char *data = pData; 1974bf215546Sopenharmony_ci 1975bf215546Sopenharmony_ci STATIC_ASSERT(sizeof(*compute_pipeline->rt_group_handles) <= RADV_RT_HANDLE_SIZE); 1976bf215546Sopenharmony_ci 1977bf215546Sopenharmony_ci memset(data, 0, groupCount * RADV_RT_HANDLE_SIZE); 1978bf215546Sopenharmony_ci 1979bf215546Sopenharmony_ci for (uint32_t i = 0; i < groupCount; ++i) { 1980bf215546Sopenharmony_ci memcpy(data + i * RADV_RT_HANDLE_SIZE, &compute_pipeline->rt_group_handles[firstGroup + i], 1981bf215546Sopenharmony_ci sizeof(*compute_pipeline->rt_group_handles)); 1982bf215546Sopenharmony_ci } 1983bf215546Sopenharmony_ci 1984bf215546Sopenharmony_ci return VK_SUCCESS; 1985bf215546Sopenharmony_ci} 1986bf215546Sopenharmony_ci 1987bf215546Sopenharmony_ciVKAPI_ATTR VkDeviceSize VKAPI_CALL 1988bf215546Sopenharmony_ciradv_GetRayTracingShaderGroupStackSizeKHR(VkDevice device, VkPipeline _pipeline, uint32_t group, 1989bf215546Sopenharmony_ci VkShaderGroupShaderKHR groupShader) 1990bf215546Sopenharmony_ci{ 1991bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline); 1992bf215546Sopenharmony_ci struct radv_compute_pipeline *compute_pipeline = radv_pipeline_to_compute(pipeline); 1993bf215546Sopenharmony_ci const struct radv_pipeline_shader_stack_size *stack_size = 1994bf215546Sopenharmony_ci &compute_pipeline->rt_stack_sizes[group]; 1995bf215546Sopenharmony_ci 1996bf215546Sopenharmony_ci if (groupShader == VK_SHADER_GROUP_SHADER_ANY_HIT_KHR || 1997bf215546Sopenharmony_ci groupShader == VK_SHADER_GROUP_SHADER_INTERSECTION_KHR) 1998bf215546Sopenharmony_ci return stack_size->non_recursive_size; 1999bf215546Sopenharmony_ci else 2000bf215546Sopenharmony_ci return stack_size->recursive_size; 2001bf215546Sopenharmony_ci} 2002bf215546Sopenharmony_ci 2003bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL 2004bf215546Sopenharmony_ciradv_GetRayTracingCaptureReplayShaderGroupHandlesKHR(VkDevice _device, VkPipeline pipeline, 2005bf215546Sopenharmony_ci uint32_t firstGroup, uint32_t groupCount, 2006bf215546Sopenharmony_ci size_t dataSize, void *pData) 2007bf215546Sopenharmony_ci{ 2008bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_device, device, _device); 2009bf215546Sopenharmony_ci unreachable("Unimplemented"); 2010bf215546Sopenharmony_ci return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT); 2011bf215546Sopenharmony_ci} 2012