1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2021 Google 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "radv_debug.h" 25bf215546Sopenharmony_ci#include "radv_rt_common.h" 26bf215546Sopenharmony_ci#include "radv_acceleration_structure.h" 27bf215546Sopenharmony_ci 28bf215546Sopenharmony_cibool 29bf215546Sopenharmony_ciradv_enable_rt(const struct radv_physical_device *pdevice, bool rt_pipelines) 30bf215546Sopenharmony_ci{ 31bf215546Sopenharmony_ci if ((pdevice->rad_info.gfx_level < GFX10_3 && !radv_emulate_rt(pdevice)) || pdevice->use_llvm) 32bf215546Sopenharmony_ci return false; 33bf215546Sopenharmony_ci 34bf215546Sopenharmony_ci if (rt_pipelines) 35bf215546Sopenharmony_ci return pdevice->instance->perftest_flags & RADV_PERFTEST_RT; 36bf215546Sopenharmony_ci 37bf215546Sopenharmony_ci return true; 38bf215546Sopenharmony_ci} 39bf215546Sopenharmony_ci 40bf215546Sopenharmony_cibool 41bf215546Sopenharmony_ciradv_emulate_rt(const struct radv_physical_device *pdevice) 42bf215546Sopenharmony_ci{ 43bf215546Sopenharmony_ci return pdevice->instance->perftest_flags & RADV_PERFTEST_EMULATE_RT; 44bf215546Sopenharmony_ci} 45bf215546Sopenharmony_ci 46bf215546Sopenharmony_civoid 47bf215546Sopenharmony_cinir_sort_hit_pair(nir_builder *b, nir_variable *var_distances, nir_variable *var_indices, 48bf215546Sopenharmony_ci uint32_t chan_1, uint32_t chan_2) 49bf215546Sopenharmony_ci{ 50bf215546Sopenharmony_ci nir_ssa_def *ssa_distances = nir_load_var(b, var_distances); 51bf215546Sopenharmony_ci nir_ssa_def *ssa_indices = nir_load_var(b, var_indices); 52bf215546Sopenharmony_ci /* if (distances[chan_2] < distances[chan_1]) { */ 53bf215546Sopenharmony_ci nir_push_if( 54bf215546Sopenharmony_ci b, nir_flt(b, nir_channel(b, ssa_distances, chan_2), nir_channel(b, ssa_distances, chan_1))); 55bf215546Sopenharmony_ci { 56bf215546Sopenharmony_ci /* swap(distances[chan_2], distances[chan_1]); */ 57bf215546Sopenharmony_ci nir_ssa_def *new_distances[4] = {nir_ssa_undef(b, 1, 32), nir_ssa_undef(b, 1, 32), 58bf215546Sopenharmony_ci nir_ssa_undef(b, 1, 32), nir_ssa_undef(b, 1, 32)}; 59bf215546Sopenharmony_ci nir_ssa_def *new_indices[4] = {nir_ssa_undef(b, 1, 32), nir_ssa_undef(b, 1, 32), 60bf215546Sopenharmony_ci nir_ssa_undef(b, 1, 32), nir_ssa_undef(b, 1, 32)}; 61bf215546Sopenharmony_ci new_distances[chan_2] = nir_channel(b, ssa_distances, chan_1); 62bf215546Sopenharmony_ci new_distances[chan_1] = nir_channel(b, ssa_distances, chan_2); 63bf215546Sopenharmony_ci new_indices[chan_2] = nir_channel(b, ssa_indices, chan_1); 64bf215546Sopenharmony_ci new_indices[chan_1] = nir_channel(b, ssa_indices, chan_2); 65bf215546Sopenharmony_ci nir_store_var(b, var_distances, nir_vec(b, new_distances, 4), 66bf215546Sopenharmony_ci (1u << chan_1) | (1u << chan_2)); 67bf215546Sopenharmony_ci nir_store_var(b, var_indices, nir_vec(b, new_indices, 4), (1u << chan_1) | (1u << chan_2)); 68bf215546Sopenharmony_ci } 69bf215546Sopenharmony_ci /* } */ 70bf215546Sopenharmony_ci nir_pop_if(b, NULL); 71bf215546Sopenharmony_ci} 72bf215546Sopenharmony_ci 73bf215546Sopenharmony_cinir_ssa_def * 74bf215546Sopenharmony_ciintersect_ray_amd_software_box(struct radv_device *device, nir_builder *b, nir_ssa_def *bvh_node, 75bf215546Sopenharmony_ci nir_ssa_def *ray_tmax, nir_ssa_def *origin, nir_ssa_def *dir, 76bf215546Sopenharmony_ci nir_ssa_def *inv_dir) 77bf215546Sopenharmony_ci{ 78bf215546Sopenharmony_ci const struct glsl_type *vec4_type = glsl_vector_type(GLSL_TYPE_FLOAT, 4); 79bf215546Sopenharmony_ci const struct glsl_type *uvec4_type = glsl_vector_type(GLSL_TYPE_UINT, 4); 80bf215546Sopenharmony_ci 81bf215546Sopenharmony_ci nir_ssa_def *node_addr = build_node_to_addr(device, b, bvh_node); 82bf215546Sopenharmony_ci 83bf215546Sopenharmony_ci /* vec4 distances = vec4(INF, INF, INF, INF); */ 84bf215546Sopenharmony_ci nir_variable *distances = 85bf215546Sopenharmony_ci nir_variable_create(b->shader, nir_var_shader_temp, vec4_type, "distances"); 86bf215546Sopenharmony_ci nir_store_var(b, distances, nir_imm_vec4(b, INFINITY, INFINITY, INFINITY, INFINITY), 0xf); 87bf215546Sopenharmony_ci 88bf215546Sopenharmony_ci /* uvec4 child_indices = uvec4(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff); */ 89bf215546Sopenharmony_ci nir_variable *child_indices = 90bf215546Sopenharmony_ci nir_variable_create(b->shader, nir_var_shader_temp, uvec4_type, "child_indices"); 91bf215546Sopenharmony_ci nir_store_var(b, child_indices, 92bf215546Sopenharmony_ci nir_imm_ivec4(b, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu), 0xf); 93bf215546Sopenharmony_ci 94bf215546Sopenharmony_ci /* Need to remove infinities here because otherwise we get nasty NaN propogation 95bf215546Sopenharmony_ci * if the direction has 0s in it. */ 96bf215546Sopenharmony_ci /* inv_dir = clamp(inv_dir, -FLT_MAX, FLT_MAX); */ 97bf215546Sopenharmony_ci inv_dir = nir_fclamp(b, inv_dir, nir_imm_float(b, -FLT_MAX), nir_imm_float(b, FLT_MAX)); 98bf215546Sopenharmony_ci 99bf215546Sopenharmony_ci for (int i = 0; i < 4; i++) { 100bf215546Sopenharmony_ci const uint32_t child_offset = offsetof(struct radv_bvh_box32_node, children[i]); 101bf215546Sopenharmony_ci const uint32_t coord_offsets[2] = { 102bf215546Sopenharmony_ci offsetof(struct radv_bvh_box32_node, coords[i][0][0]), 103bf215546Sopenharmony_ci offsetof(struct radv_bvh_box32_node, coords[i][1][0]), 104bf215546Sopenharmony_ci }; 105bf215546Sopenharmony_ci 106bf215546Sopenharmony_ci /* node->children[i] -> uint */ 107bf215546Sopenharmony_ci nir_ssa_def *child_index = 108bf215546Sopenharmony_ci nir_build_load_global(b, 1, 32, nir_iadd_imm(b, node_addr, child_offset), .align_mul = 64, 109bf215546Sopenharmony_ci .align_offset = child_offset % 64); 110bf215546Sopenharmony_ci /* node->coords[i][0], node->coords[i][1] -> vec3 */ 111bf215546Sopenharmony_ci nir_ssa_def *node_coords[2] = { 112bf215546Sopenharmony_ci nir_build_load_global(b, 3, 32, nir_iadd_imm(b, node_addr, coord_offsets[0]), 113bf215546Sopenharmony_ci .align_mul = 64, .align_offset = coord_offsets[0] % 64), 114bf215546Sopenharmony_ci nir_build_load_global(b, 3, 32, nir_iadd_imm(b, node_addr, coord_offsets[1]), 115bf215546Sopenharmony_ci .align_mul = 64, .align_offset = coord_offsets[1] % 64), 116bf215546Sopenharmony_ci }; 117bf215546Sopenharmony_ci 118bf215546Sopenharmony_ci /* If x of the aabb min is NaN, then this is an inactive aabb. 119bf215546Sopenharmony_ci * We don't need to care about any other components being NaN as that is UB. 120bf215546Sopenharmony_ci * https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/chap36.html#VkAabbPositionsKHR 121bf215546Sopenharmony_ci */ 122bf215546Sopenharmony_ci nir_ssa_def *min_x = nir_channel(b, node_coords[0], 0); 123bf215546Sopenharmony_ci nir_ssa_def *min_x_is_not_nan = 124bf215546Sopenharmony_ci nir_inot(b, nir_fneu(b, min_x, min_x)); /* NaN != NaN -> true */ 125bf215546Sopenharmony_ci 126bf215546Sopenharmony_ci /* vec3 bound0 = (node->coords[i][0] - origin) * inv_dir; */ 127bf215546Sopenharmony_ci nir_ssa_def *bound0 = nir_fmul(b, nir_fsub(b, node_coords[0], origin), inv_dir); 128bf215546Sopenharmony_ci /* vec3 bound1 = (node->coords[i][1] - origin) * inv_dir; */ 129bf215546Sopenharmony_ci nir_ssa_def *bound1 = nir_fmul(b, nir_fsub(b, node_coords[1], origin), inv_dir); 130bf215546Sopenharmony_ci 131bf215546Sopenharmony_ci /* float tmin = max(max(min(bound0.x, bound1.x), min(bound0.y, bound1.y)), min(bound0.z, 132bf215546Sopenharmony_ci * bound1.z)); */ 133bf215546Sopenharmony_ci nir_ssa_def *tmin = 134bf215546Sopenharmony_ci nir_fmax(b, 135bf215546Sopenharmony_ci nir_fmax(b, nir_fmin(b, nir_channel(b, bound0, 0), nir_channel(b, bound1, 0)), 136bf215546Sopenharmony_ci nir_fmin(b, nir_channel(b, bound0, 1), nir_channel(b, bound1, 1))), 137bf215546Sopenharmony_ci nir_fmin(b, nir_channel(b, bound0, 2), nir_channel(b, bound1, 2))); 138bf215546Sopenharmony_ci 139bf215546Sopenharmony_ci /* float tmax = min(min(max(bound0.x, bound1.x), max(bound0.y, bound1.y)), max(bound0.z, 140bf215546Sopenharmony_ci * bound1.z)); */ 141bf215546Sopenharmony_ci nir_ssa_def *tmax = 142bf215546Sopenharmony_ci nir_fmin(b, 143bf215546Sopenharmony_ci nir_fmin(b, nir_fmax(b, nir_channel(b, bound0, 0), nir_channel(b, bound1, 0)), 144bf215546Sopenharmony_ci nir_fmax(b, nir_channel(b, bound0, 1), nir_channel(b, bound1, 1))), 145bf215546Sopenharmony_ci nir_fmax(b, nir_channel(b, bound0, 2), nir_channel(b, bound1, 2))); 146bf215546Sopenharmony_ci 147bf215546Sopenharmony_ci /* if (!isnan(node->coords[i][0].x) && tmax >= max(0.0f, tmin) && tmin < ray_tmax) { */ 148bf215546Sopenharmony_ci nir_push_if(b, 149bf215546Sopenharmony_ci nir_iand(b, min_x_is_not_nan, 150bf215546Sopenharmony_ci nir_iand(b, nir_fge(b, tmax, nir_fmax(b, nir_imm_float(b, 0.0f), tmin)), 151bf215546Sopenharmony_ci nir_flt(b, tmin, ray_tmax)))); 152bf215546Sopenharmony_ci { 153bf215546Sopenharmony_ci /* child_indices[i] = node->children[i]; */ 154bf215546Sopenharmony_ci nir_ssa_def *new_child_indices[4] = {child_index, child_index, child_index, child_index}; 155bf215546Sopenharmony_ci nir_store_var(b, child_indices, nir_vec(b, new_child_indices, 4), 1u << i); 156bf215546Sopenharmony_ci 157bf215546Sopenharmony_ci /* distances[i] = tmin; */ 158bf215546Sopenharmony_ci nir_ssa_def *new_distances[4] = {tmin, tmin, tmin, tmin}; 159bf215546Sopenharmony_ci nir_store_var(b, distances, nir_vec(b, new_distances, 4), 1u << i); 160bf215546Sopenharmony_ci } 161bf215546Sopenharmony_ci /* } */ 162bf215546Sopenharmony_ci nir_pop_if(b, NULL); 163bf215546Sopenharmony_ci } 164bf215546Sopenharmony_ci 165bf215546Sopenharmony_ci /* Sort our distances with a sorting network. */ 166bf215546Sopenharmony_ci nir_sort_hit_pair(b, distances, child_indices, 0, 1); 167bf215546Sopenharmony_ci nir_sort_hit_pair(b, distances, child_indices, 2, 3); 168bf215546Sopenharmony_ci nir_sort_hit_pair(b, distances, child_indices, 0, 2); 169bf215546Sopenharmony_ci nir_sort_hit_pair(b, distances, child_indices, 1, 3); 170bf215546Sopenharmony_ci nir_sort_hit_pair(b, distances, child_indices, 1, 2); 171bf215546Sopenharmony_ci 172bf215546Sopenharmony_ci return nir_load_var(b, child_indices); 173bf215546Sopenharmony_ci} 174bf215546Sopenharmony_ci 175bf215546Sopenharmony_cinir_ssa_def * 176bf215546Sopenharmony_ciintersect_ray_amd_software_tri(struct radv_device *device, nir_builder *b, nir_ssa_def *bvh_node, 177bf215546Sopenharmony_ci nir_ssa_def *ray_tmax, nir_ssa_def *origin, nir_ssa_def *dir, 178bf215546Sopenharmony_ci nir_ssa_def *inv_dir) 179bf215546Sopenharmony_ci{ 180bf215546Sopenharmony_ci const struct glsl_type *vec4_type = glsl_vector_type(GLSL_TYPE_FLOAT, 4); 181bf215546Sopenharmony_ci 182bf215546Sopenharmony_ci nir_ssa_def *node_addr = build_node_to_addr(device, b, bvh_node); 183bf215546Sopenharmony_ci 184bf215546Sopenharmony_ci const uint32_t coord_offsets[3] = { 185bf215546Sopenharmony_ci offsetof(struct radv_bvh_triangle_node, coords[0]), 186bf215546Sopenharmony_ci offsetof(struct radv_bvh_triangle_node, coords[1]), 187bf215546Sopenharmony_ci offsetof(struct radv_bvh_triangle_node, coords[2]), 188bf215546Sopenharmony_ci }; 189bf215546Sopenharmony_ci 190bf215546Sopenharmony_ci /* node->coords[0], node->coords[1], node->coords[2] -> vec3 */ 191bf215546Sopenharmony_ci nir_ssa_def *node_coords[3] = { 192bf215546Sopenharmony_ci nir_build_load_global(b, 3, 32, nir_iadd_imm(b, node_addr, coord_offsets[0]), .align_mul = 64, 193bf215546Sopenharmony_ci .align_offset = coord_offsets[0] % 64), 194bf215546Sopenharmony_ci nir_build_load_global(b, 3, 32, nir_iadd_imm(b, node_addr, coord_offsets[1]), .align_mul = 64, 195bf215546Sopenharmony_ci .align_offset = coord_offsets[1] % 64), 196bf215546Sopenharmony_ci nir_build_load_global(b, 3, 32, nir_iadd_imm(b, node_addr, coord_offsets[2]), .align_mul = 64, 197bf215546Sopenharmony_ci .align_offset = coord_offsets[2] % 64), 198bf215546Sopenharmony_ci }; 199bf215546Sopenharmony_ci 200bf215546Sopenharmony_ci nir_variable *result = nir_variable_create(b->shader, nir_var_shader_temp, vec4_type, "result"); 201bf215546Sopenharmony_ci nir_store_var(b, result, nir_imm_vec4(b, INFINITY, 1.0f, 0.0f, 0.0f), 0xf); 202bf215546Sopenharmony_ci 203bf215546Sopenharmony_ci /* Based on watertight Ray/Triangle intersection from 204bf215546Sopenharmony_ci * http://jcgt.org/published/0002/01/05/paper.pdf */ 205bf215546Sopenharmony_ci 206bf215546Sopenharmony_ci /* Calculate the dimension where the ray direction is largest */ 207bf215546Sopenharmony_ci nir_ssa_def *abs_dir = nir_fabs(b, dir); 208bf215546Sopenharmony_ci 209bf215546Sopenharmony_ci nir_ssa_def *abs_dirs[3] = { 210bf215546Sopenharmony_ci nir_channel(b, abs_dir, 0), 211bf215546Sopenharmony_ci nir_channel(b, abs_dir, 1), 212bf215546Sopenharmony_ci nir_channel(b, abs_dir, 2), 213bf215546Sopenharmony_ci }; 214bf215546Sopenharmony_ci /* Find index of greatest value of abs_dir and put that as kz. */ 215bf215546Sopenharmony_ci nir_ssa_def *kz = nir_bcsel( 216bf215546Sopenharmony_ci b, nir_fge(b, abs_dirs[0], abs_dirs[1]), 217bf215546Sopenharmony_ci nir_bcsel(b, nir_fge(b, abs_dirs[0], abs_dirs[2]), nir_imm_int(b, 0), nir_imm_int(b, 2)), 218bf215546Sopenharmony_ci nir_bcsel(b, nir_fge(b, abs_dirs[1], abs_dirs[2]), nir_imm_int(b, 1), nir_imm_int(b, 2))); 219bf215546Sopenharmony_ci nir_ssa_def *kx = nir_imod(b, nir_iadd_imm(b, kz, 1), nir_imm_int(b, 3)); 220bf215546Sopenharmony_ci nir_ssa_def *ky = nir_imod(b, nir_iadd_imm(b, kx, 1), nir_imm_int(b, 3)); 221bf215546Sopenharmony_ci nir_ssa_def *k_indices[3] = {kx, ky, kz}; 222bf215546Sopenharmony_ci nir_ssa_def *k = nir_vec(b, k_indices, 3); 223bf215546Sopenharmony_ci 224bf215546Sopenharmony_ci /* Swap kx and ky dimensions to preseve winding order */ 225bf215546Sopenharmony_ci unsigned swap_xy_swizzle[4] = {1, 0, 2, 3}; 226bf215546Sopenharmony_ci k = nir_bcsel(b, nir_flt(b, nir_vector_extract(b, dir, kz), nir_imm_float(b, 0.0f)), 227bf215546Sopenharmony_ci nir_swizzle(b, k, swap_xy_swizzle, 3), k); 228bf215546Sopenharmony_ci 229bf215546Sopenharmony_ci kx = nir_channel(b, k, 0); 230bf215546Sopenharmony_ci ky = nir_channel(b, k, 1); 231bf215546Sopenharmony_ci kz = nir_channel(b, k, 2); 232bf215546Sopenharmony_ci 233bf215546Sopenharmony_ci /* Calculate shear constants */ 234bf215546Sopenharmony_ci nir_ssa_def *sz = nir_frcp(b, nir_vector_extract(b, dir, kz)); 235bf215546Sopenharmony_ci nir_ssa_def *sx = nir_fmul(b, nir_vector_extract(b, dir, kx), sz); 236bf215546Sopenharmony_ci nir_ssa_def *sy = nir_fmul(b, nir_vector_extract(b, dir, ky), sz); 237bf215546Sopenharmony_ci 238bf215546Sopenharmony_ci /* Calculate vertices relative to ray origin */ 239bf215546Sopenharmony_ci nir_ssa_def *v_a = nir_fsub(b, node_coords[0], origin); 240bf215546Sopenharmony_ci nir_ssa_def *v_b = nir_fsub(b, node_coords[1], origin); 241bf215546Sopenharmony_ci nir_ssa_def *v_c = nir_fsub(b, node_coords[2], origin); 242bf215546Sopenharmony_ci 243bf215546Sopenharmony_ci /* Perform shear and scale */ 244bf215546Sopenharmony_ci nir_ssa_def *ax = 245bf215546Sopenharmony_ci nir_fsub(b, nir_vector_extract(b, v_a, kx), nir_fmul(b, sx, nir_vector_extract(b, v_a, kz))); 246bf215546Sopenharmony_ci nir_ssa_def *ay = 247bf215546Sopenharmony_ci nir_fsub(b, nir_vector_extract(b, v_a, ky), nir_fmul(b, sy, nir_vector_extract(b, v_a, kz))); 248bf215546Sopenharmony_ci nir_ssa_def *bx = 249bf215546Sopenharmony_ci nir_fsub(b, nir_vector_extract(b, v_b, kx), nir_fmul(b, sx, nir_vector_extract(b, v_b, kz))); 250bf215546Sopenharmony_ci nir_ssa_def *by = 251bf215546Sopenharmony_ci nir_fsub(b, nir_vector_extract(b, v_b, ky), nir_fmul(b, sy, nir_vector_extract(b, v_b, kz))); 252bf215546Sopenharmony_ci nir_ssa_def *cx = 253bf215546Sopenharmony_ci nir_fsub(b, nir_vector_extract(b, v_c, kx), nir_fmul(b, sx, nir_vector_extract(b, v_c, kz))); 254bf215546Sopenharmony_ci nir_ssa_def *cy = 255bf215546Sopenharmony_ci nir_fsub(b, nir_vector_extract(b, v_c, ky), nir_fmul(b, sy, nir_vector_extract(b, v_c, kz))); 256bf215546Sopenharmony_ci 257bf215546Sopenharmony_ci nir_ssa_def *u = nir_fsub(b, nir_fmul(b, cx, by), nir_fmul(b, cy, bx)); 258bf215546Sopenharmony_ci nir_ssa_def *v = nir_fsub(b, nir_fmul(b, ax, cy), nir_fmul(b, ay, cx)); 259bf215546Sopenharmony_ci nir_ssa_def *w = nir_fsub(b, nir_fmul(b, bx, ay), nir_fmul(b, by, ax)); 260bf215546Sopenharmony_ci 261bf215546Sopenharmony_ci nir_variable *u_var = 262bf215546Sopenharmony_ci nir_variable_create(b->shader, nir_var_shader_temp, glsl_float_type(), "u"); 263bf215546Sopenharmony_ci nir_variable *v_var = 264bf215546Sopenharmony_ci nir_variable_create(b->shader, nir_var_shader_temp, glsl_float_type(), "v"); 265bf215546Sopenharmony_ci nir_variable *w_var = 266bf215546Sopenharmony_ci nir_variable_create(b->shader, nir_var_shader_temp, glsl_float_type(), "w"); 267bf215546Sopenharmony_ci nir_store_var(b, u_var, u, 0x1); 268bf215546Sopenharmony_ci nir_store_var(b, v_var, v, 0x1); 269bf215546Sopenharmony_ci nir_store_var(b, w_var, w, 0x1); 270bf215546Sopenharmony_ci 271bf215546Sopenharmony_ci /* Fallback to testing edges with double precision... 272bf215546Sopenharmony_ci * 273bf215546Sopenharmony_ci * The Vulkan spec states it only needs single precision watertightness 274bf215546Sopenharmony_ci * but we fail dEQP-VK.ray_tracing_pipeline.watertightness.closedFan2.1024 with 275bf215546Sopenharmony_ci * failures = 1 without doing this. :( */ 276bf215546Sopenharmony_ci nir_ssa_def *cond_retest = nir_ior( 277bf215546Sopenharmony_ci b, nir_ior(b, nir_feq(b, u, nir_imm_float(b, 0.0f)), nir_feq(b, v, nir_imm_float(b, 0.0f))), 278bf215546Sopenharmony_ci nir_feq(b, w, nir_imm_float(b, 0.0f))); 279bf215546Sopenharmony_ci 280bf215546Sopenharmony_ci nir_push_if(b, cond_retest); 281bf215546Sopenharmony_ci { 282bf215546Sopenharmony_ci ax = nir_f2f64(b, ax); 283bf215546Sopenharmony_ci ay = nir_f2f64(b, ay); 284bf215546Sopenharmony_ci bx = nir_f2f64(b, bx); 285bf215546Sopenharmony_ci by = nir_f2f64(b, by); 286bf215546Sopenharmony_ci cx = nir_f2f64(b, cx); 287bf215546Sopenharmony_ci cy = nir_f2f64(b, cy); 288bf215546Sopenharmony_ci 289bf215546Sopenharmony_ci nir_store_var(b, u_var, nir_f2f32(b, nir_fsub(b, nir_fmul(b, cx, by), nir_fmul(b, cy, bx))), 290bf215546Sopenharmony_ci 0x1); 291bf215546Sopenharmony_ci nir_store_var(b, v_var, nir_f2f32(b, nir_fsub(b, nir_fmul(b, ax, cy), nir_fmul(b, ay, cx))), 292bf215546Sopenharmony_ci 0x1); 293bf215546Sopenharmony_ci nir_store_var(b, w_var, nir_f2f32(b, nir_fsub(b, nir_fmul(b, bx, ay), nir_fmul(b, by, ax))), 294bf215546Sopenharmony_ci 0x1); 295bf215546Sopenharmony_ci } 296bf215546Sopenharmony_ci nir_pop_if(b, NULL); 297bf215546Sopenharmony_ci 298bf215546Sopenharmony_ci u = nir_load_var(b, u_var); 299bf215546Sopenharmony_ci v = nir_load_var(b, v_var); 300bf215546Sopenharmony_ci w = nir_load_var(b, w_var); 301bf215546Sopenharmony_ci 302bf215546Sopenharmony_ci /* Perform edge tests. */ 303bf215546Sopenharmony_ci nir_ssa_def *cond_back = nir_ior( 304bf215546Sopenharmony_ci b, nir_ior(b, nir_flt(b, u, nir_imm_float(b, 0.0f)), nir_flt(b, v, nir_imm_float(b, 0.0f))), 305bf215546Sopenharmony_ci nir_flt(b, w, nir_imm_float(b, 0.0f))); 306bf215546Sopenharmony_ci 307bf215546Sopenharmony_ci nir_ssa_def *cond_front = nir_ior( 308bf215546Sopenharmony_ci b, nir_ior(b, nir_flt(b, nir_imm_float(b, 0.0f), u), nir_flt(b, nir_imm_float(b, 0.0f), v)), 309bf215546Sopenharmony_ci nir_flt(b, nir_imm_float(b, 0.0f), w)); 310bf215546Sopenharmony_ci 311bf215546Sopenharmony_ci nir_ssa_def *cond = nir_inot(b, nir_iand(b, cond_back, cond_front)); 312bf215546Sopenharmony_ci 313bf215546Sopenharmony_ci nir_push_if(b, cond); 314bf215546Sopenharmony_ci { 315bf215546Sopenharmony_ci nir_ssa_def *det = nir_fadd(b, u, nir_fadd(b, v, w)); 316bf215546Sopenharmony_ci 317bf215546Sopenharmony_ci nir_ssa_def *az = nir_fmul(b, sz, nir_vector_extract(b, v_a, kz)); 318bf215546Sopenharmony_ci nir_ssa_def *bz = nir_fmul(b, sz, nir_vector_extract(b, v_b, kz)); 319bf215546Sopenharmony_ci nir_ssa_def *cz = nir_fmul(b, sz, nir_vector_extract(b, v_c, kz)); 320bf215546Sopenharmony_ci 321bf215546Sopenharmony_ci nir_ssa_def *t = 322bf215546Sopenharmony_ci nir_fadd(b, nir_fadd(b, nir_fmul(b, u, az), nir_fmul(b, v, bz)), nir_fmul(b, w, cz)); 323bf215546Sopenharmony_ci 324bf215546Sopenharmony_ci nir_ssa_def *t_signed = nir_fmul(b, nir_fsign(b, det), t); 325bf215546Sopenharmony_ci 326bf215546Sopenharmony_ci nir_ssa_def *det_cond_front = nir_inot(b, nir_flt(b, t_signed, nir_imm_float(b, 0.0f))); 327bf215546Sopenharmony_ci 328bf215546Sopenharmony_ci nir_push_if(b, det_cond_front); 329bf215546Sopenharmony_ci { 330bf215546Sopenharmony_ci nir_ssa_def *indices[4] = {t, det, v, w}; 331bf215546Sopenharmony_ci nir_store_var(b, result, nir_vec(b, indices, 4), 0xf); 332bf215546Sopenharmony_ci } 333bf215546Sopenharmony_ci nir_pop_if(b, NULL); 334bf215546Sopenharmony_ci } 335bf215546Sopenharmony_ci nir_pop_if(b, NULL); 336bf215546Sopenharmony_ci 337bf215546Sopenharmony_ci return nir_load_var(b, result); 338bf215546Sopenharmony_ci} 339bf215546Sopenharmony_ci 340bf215546Sopenharmony_cinir_ssa_def * 341bf215546Sopenharmony_cibuild_addr_to_node(nir_builder *b, nir_ssa_def *addr) 342bf215546Sopenharmony_ci{ 343bf215546Sopenharmony_ci const uint64_t bvh_size = 1ull << 42; 344bf215546Sopenharmony_ci nir_ssa_def *node = nir_ushr_imm(b, addr, 3); 345bf215546Sopenharmony_ci return nir_iand_imm(b, node, (bvh_size - 1) << 3); 346bf215546Sopenharmony_ci} 347bf215546Sopenharmony_ci 348bf215546Sopenharmony_cinir_ssa_def * 349bf215546Sopenharmony_cibuild_node_to_addr(struct radv_device *device, nir_builder *b, nir_ssa_def *node) 350bf215546Sopenharmony_ci{ 351bf215546Sopenharmony_ci nir_ssa_def *addr = nir_iand_imm(b, node, ~7ull); 352bf215546Sopenharmony_ci addr = nir_ishl_imm(b, addr, 3); 353bf215546Sopenharmony_ci /* Assumes everything is in the top half of address space, which is true in 354bf215546Sopenharmony_ci * GFX9+ for now. */ 355bf215546Sopenharmony_ci return device->physical_device->rad_info.gfx_level >= GFX9 356bf215546Sopenharmony_ci ? nir_ior_imm(b, addr, 0xffffull << 48) 357bf215546Sopenharmony_ci : addr; 358bf215546Sopenharmony_ci} 359bf215546Sopenharmony_ci 360bf215546Sopenharmony_cinir_ssa_def * 361bf215546Sopenharmony_cinir_build_vec3_mat_mult(nir_builder *b, nir_ssa_def *vec, nir_ssa_def *matrix[], bool translation) 362bf215546Sopenharmony_ci{ 363bf215546Sopenharmony_ci nir_ssa_def *result_components[3] = { 364bf215546Sopenharmony_ci nir_channel(b, matrix[0], 3), 365bf215546Sopenharmony_ci nir_channel(b, matrix[1], 3), 366bf215546Sopenharmony_ci nir_channel(b, matrix[2], 3), 367bf215546Sopenharmony_ci }; 368bf215546Sopenharmony_ci for (unsigned i = 0; i < 3; ++i) { 369bf215546Sopenharmony_ci for (unsigned j = 0; j < 3; ++j) { 370bf215546Sopenharmony_ci nir_ssa_def *v = 371bf215546Sopenharmony_ci nir_fmul(b, nir_channels(b, vec, 1 << j), nir_channels(b, matrix[i], 1 << j)); 372bf215546Sopenharmony_ci result_components[i] = (translation || j) ? nir_fadd(b, result_components[i], v) : v; 373bf215546Sopenharmony_ci } 374bf215546Sopenharmony_ci } 375bf215546Sopenharmony_ci return nir_vec(b, result_components, 3); 376bf215546Sopenharmony_ci} 377bf215546Sopenharmony_ci 378bf215546Sopenharmony_cinir_ssa_def * 379bf215546Sopenharmony_cinir_build_vec3_mat_mult_pre(nir_builder *b, nir_ssa_def *vec, nir_ssa_def *matrix[]) 380bf215546Sopenharmony_ci{ 381bf215546Sopenharmony_ci nir_ssa_def *result_components[3] = { 382bf215546Sopenharmony_ci nir_channel(b, matrix[0], 3), 383bf215546Sopenharmony_ci nir_channel(b, matrix[1], 3), 384bf215546Sopenharmony_ci nir_channel(b, matrix[2], 3), 385bf215546Sopenharmony_ci }; 386bf215546Sopenharmony_ci return nir_build_vec3_mat_mult(b, nir_fsub(b, vec, nir_vec(b, result_components, 3)), matrix, 387bf215546Sopenharmony_ci false); 388bf215546Sopenharmony_ci} 389bf215546Sopenharmony_ci 390bf215546Sopenharmony_civoid 391bf215546Sopenharmony_cinir_build_wto_matrix_load(nir_builder *b, nir_ssa_def *instance_addr, nir_ssa_def **out) 392bf215546Sopenharmony_ci{ 393bf215546Sopenharmony_ci unsigned offset = offsetof(struct radv_bvh_instance_node, wto_matrix); 394bf215546Sopenharmony_ci for (unsigned i = 0; i < 3; ++i) { 395bf215546Sopenharmony_ci out[i] = nir_build_load_global(b, 4, 32, nir_iadd_imm(b, instance_addr, offset + i * 16), 396bf215546Sopenharmony_ci .align_mul = 64, .align_offset = offset + i * 16); 397bf215546Sopenharmony_ci } 398bf215546Sopenharmony_ci} 399bf215546Sopenharmony_ci 400bf215546Sopenharmony_ci/* When a hit is opaque the any_hit shader is skipped for this hit and the hit 401bf215546Sopenharmony_ci * is assumed to be an actual hit. */ 402bf215546Sopenharmony_cinir_ssa_def * 403bf215546Sopenharmony_cihit_is_opaque(nir_builder *b, nir_ssa_def *sbt_offset_and_flags, nir_ssa_def *flags, 404bf215546Sopenharmony_ci nir_ssa_def *geometry_id_and_flags) 405bf215546Sopenharmony_ci{ 406bf215546Sopenharmony_ci nir_ssa_def *geom_force_opaque = 407bf215546Sopenharmony_ci nir_test_mask(b, geometry_id_and_flags, VK_GEOMETRY_OPAQUE_BIT_KHR << 28); 408bf215546Sopenharmony_ci nir_ssa_def *instance_force_opaque = 409bf215546Sopenharmony_ci nir_test_mask(b, sbt_offset_and_flags, VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR << 24); 410bf215546Sopenharmony_ci nir_ssa_def *instance_force_non_opaque = 411bf215546Sopenharmony_ci nir_test_mask(b, sbt_offset_and_flags, VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_KHR << 24); 412bf215546Sopenharmony_ci 413bf215546Sopenharmony_ci nir_ssa_def *opaque = geom_force_opaque; 414bf215546Sopenharmony_ci opaque = nir_bcsel(b, instance_force_opaque, nir_imm_bool(b, true), opaque); 415bf215546Sopenharmony_ci opaque = nir_bcsel(b, instance_force_non_opaque, nir_imm_bool(b, false), opaque); 416bf215546Sopenharmony_ci 417bf215546Sopenharmony_ci nir_ssa_def *ray_force_opaque = nir_test_mask(b, flags, SpvRayFlagsOpaqueKHRMask); 418bf215546Sopenharmony_ci nir_ssa_def *ray_force_non_opaque = nir_test_mask(b, flags, SpvRayFlagsNoOpaqueKHRMask); 419bf215546Sopenharmony_ci 420bf215546Sopenharmony_ci opaque = nir_bcsel(b, ray_force_opaque, nir_imm_bool(b, true), opaque); 421bf215546Sopenharmony_ci opaque = nir_bcsel(b, ray_force_non_opaque, nir_imm_bool(b, false), opaque); 422bf215546Sopenharmony_ci return opaque; 423bf215546Sopenharmony_ci} 424bf215546Sopenharmony_ci 425bf215546Sopenharmony_cinir_ssa_def * 426bf215546Sopenharmony_cicreate_bvh_descriptor(nir_builder *b) 427bf215546Sopenharmony_ci{ 428bf215546Sopenharmony_ci /* We create a BVH descriptor that covers the entire memory range. That way we can always 429bf215546Sopenharmony_ci * use the same descriptor, which avoids divergence when different rays hit different 430bf215546Sopenharmony_ci * instances at the cost of having to use 64-bit node ids. */ 431bf215546Sopenharmony_ci const uint64_t bvh_size = 1ull << 42; 432bf215546Sopenharmony_ci return nir_imm_ivec4( 433bf215546Sopenharmony_ci b, 0, 1u << 31 /* Enable box sorting */, (bvh_size - 1) & 0xFFFFFFFFu, 434bf215546Sopenharmony_ci ((bvh_size - 1) >> 32) | (1u << 24 /* Return IJ for triangles */) | (1u << 31)); 435bf215546Sopenharmony_ci} 436