1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2021 Google
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21bf215546Sopenharmony_ci * IN THE SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci#include "radv_debug.h"
25bf215546Sopenharmony_ci#include "radv_rt_common.h"
26bf215546Sopenharmony_ci#include "radv_acceleration_structure.h"
27bf215546Sopenharmony_ci
28bf215546Sopenharmony_cibool
29bf215546Sopenharmony_ciradv_enable_rt(const struct radv_physical_device *pdevice, bool rt_pipelines)
30bf215546Sopenharmony_ci{
31bf215546Sopenharmony_ci   if ((pdevice->rad_info.gfx_level < GFX10_3 && !radv_emulate_rt(pdevice)) || pdevice->use_llvm)
32bf215546Sopenharmony_ci      return false;
33bf215546Sopenharmony_ci
34bf215546Sopenharmony_ci   if (rt_pipelines)
35bf215546Sopenharmony_ci      return pdevice->instance->perftest_flags & RADV_PERFTEST_RT;
36bf215546Sopenharmony_ci
37bf215546Sopenharmony_ci   return true;
38bf215546Sopenharmony_ci}
39bf215546Sopenharmony_ci
40bf215546Sopenharmony_cibool
41bf215546Sopenharmony_ciradv_emulate_rt(const struct radv_physical_device *pdevice)
42bf215546Sopenharmony_ci{
43bf215546Sopenharmony_ci   return pdevice->instance->perftest_flags & RADV_PERFTEST_EMULATE_RT;
44bf215546Sopenharmony_ci}
45bf215546Sopenharmony_ci
46bf215546Sopenharmony_civoid
47bf215546Sopenharmony_cinir_sort_hit_pair(nir_builder *b, nir_variable *var_distances, nir_variable *var_indices,
48bf215546Sopenharmony_ci                  uint32_t chan_1, uint32_t chan_2)
49bf215546Sopenharmony_ci{
50bf215546Sopenharmony_ci   nir_ssa_def *ssa_distances = nir_load_var(b, var_distances);
51bf215546Sopenharmony_ci   nir_ssa_def *ssa_indices = nir_load_var(b, var_indices);
52bf215546Sopenharmony_ci   /* if (distances[chan_2] < distances[chan_1]) { */
53bf215546Sopenharmony_ci   nir_push_if(
54bf215546Sopenharmony_ci      b, nir_flt(b, nir_channel(b, ssa_distances, chan_2), nir_channel(b, ssa_distances, chan_1)));
55bf215546Sopenharmony_ci   {
56bf215546Sopenharmony_ci      /* swap(distances[chan_2], distances[chan_1]); */
57bf215546Sopenharmony_ci      nir_ssa_def *new_distances[4] = {nir_ssa_undef(b, 1, 32), nir_ssa_undef(b, 1, 32),
58bf215546Sopenharmony_ci                                       nir_ssa_undef(b, 1, 32), nir_ssa_undef(b, 1, 32)};
59bf215546Sopenharmony_ci      nir_ssa_def *new_indices[4] = {nir_ssa_undef(b, 1, 32), nir_ssa_undef(b, 1, 32),
60bf215546Sopenharmony_ci                                     nir_ssa_undef(b, 1, 32), nir_ssa_undef(b, 1, 32)};
61bf215546Sopenharmony_ci      new_distances[chan_2] = nir_channel(b, ssa_distances, chan_1);
62bf215546Sopenharmony_ci      new_distances[chan_1] = nir_channel(b, ssa_distances, chan_2);
63bf215546Sopenharmony_ci      new_indices[chan_2] = nir_channel(b, ssa_indices, chan_1);
64bf215546Sopenharmony_ci      new_indices[chan_1] = nir_channel(b, ssa_indices, chan_2);
65bf215546Sopenharmony_ci      nir_store_var(b, var_distances, nir_vec(b, new_distances, 4),
66bf215546Sopenharmony_ci                    (1u << chan_1) | (1u << chan_2));
67bf215546Sopenharmony_ci      nir_store_var(b, var_indices, nir_vec(b, new_indices, 4), (1u << chan_1) | (1u << chan_2));
68bf215546Sopenharmony_ci   }
69bf215546Sopenharmony_ci   /* } */
70bf215546Sopenharmony_ci   nir_pop_if(b, NULL);
71bf215546Sopenharmony_ci}
72bf215546Sopenharmony_ci
73bf215546Sopenharmony_cinir_ssa_def *
74bf215546Sopenharmony_ciintersect_ray_amd_software_box(struct radv_device *device, nir_builder *b, nir_ssa_def *bvh_node,
75bf215546Sopenharmony_ci                               nir_ssa_def *ray_tmax, nir_ssa_def *origin, nir_ssa_def *dir,
76bf215546Sopenharmony_ci                               nir_ssa_def *inv_dir)
77bf215546Sopenharmony_ci{
78bf215546Sopenharmony_ci   const struct glsl_type *vec4_type = glsl_vector_type(GLSL_TYPE_FLOAT, 4);
79bf215546Sopenharmony_ci   const struct glsl_type *uvec4_type = glsl_vector_type(GLSL_TYPE_UINT, 4);
80bf215546Sopenharmony_ci
81bf215546Sopenharmony_ci   nir_ssa_def *node_addr = build_node_to_addr(device, b, bvh_node);
82bf215546Sopenharmony_ci
83bf215546Sopenharmony_ci   /* vec4 distances = vec4(INF, INF, INF, INF); */
84bf215546Sopenharmony_ci   nir_variable *distances =
85bf215546Sopenharmony_ci      nir_variable_create(b->shader, nir_var_shader_temp, vec4_type, "distances");
86bf215546Sopenharmony_ci   nir_store_var(b, distances, nir_imm_vec4(b, INFINITY, INFINITY, INFINITY, INFINITY), 0xf);
87bf215546Sopenharmony_ci
88bf215546Sopenharmony_ci   /* uvec4 child_indices = uvec4(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff); */
89bf215546Sopenharmony_ci   nir_variable *child_indices =
90bf215546Sopenharmony_ci      nir_variable_create(b->shader, nir_var_shader_temp, uvec4_type, "child_indices");
91bf215546Sopenharmony_ci   nir_store_var(b, child_indices,
92bf215546Sopenharmony_ci                 nir_imm_ivec4(b, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu), 0xf);
93bf215546Sopenharmony_ci
94bf215546Sopenharmony_ci   /* Need to remove infinities here because otherwise we get nasty NaN propogation
95bf215546Sopenharmony_ci    * if the direction has 0s in it. */
96bf215546Sopenharmony_ci   /* inv_dir = clamp(inv_dir, -FLT_MAX, FLT_MAX); */
97bf215546Sopenharmony_ci   inv_dir = nir_fclamp(b, inv_dir, nir_imm_float(b, -FLT_MAX), nir_imm_float(b, FLT_MAX));
98bf215546Sopenharmony_ci
99bf215546Sopenharmony_ci   for (int i = 0; i < 4; i++) {
100bf215546Sopenharmony_ci      const uint32_t child_offset = offsetof(struct radv_bvh_box32_node, children[i]);
101bf215546Sopenharmony_ci      const uint32_t coord_offsets[2] = {
102bf215546Sopenharmony_ci         offsetof(struct radv_bvh_box32_node, coords[i][0][0]),
103bf215546Sopenharmony_ci         offsetof(struct radv_bvh_box32_node, coords[i][1][0]),
104bf215546Sopenharmony_ci      };
105bf215546Sopenharmony_ci
106bf215546Sopenharmony_ci      /* node->children[i] -> uint */
107bf215546Sopenharmony_ci      nir_ssa_def *child_index =
108bf215546Sopenharmony_ci         nir_build_load_global(b, 1, 32, nir_iadd_imm(b, node_addr, child_offset), .align_mul = 64,
109bf215546Sopenharmony_ci                               .align_offset = child_offset % 64);
110bf215546Sopenharmony_ci      /* node->coords[i][0], node->coords[i][1] -> vec3 */
111bf215546Sopenharmony_ci      nir_ssa_def *node_coords[2] = {
112bf215546Sopenharmony_ci         nir_build_load_global(b, 3, 32, nir_iadd_imm(b, node_addr, coord_offsets[0]),
113bf215546Sopenharmony_ci                               .align_mul = 64, .align_offset = coord_offsets[0] % 64),
114bf215546Sopenharmony_ci         nir_build_load_global(b, 3, 32, nir_iadd_imm(b, node_addr, coord_offsets[1]),
115bf215546Sopenharmony_ci                               .align_mul = 64, .align_offset = coord_offsets[1] % 64),
116bf215546Sopenharmony_ci      };
117bf215546Sopenharmony_ci
118bf215546Sopenharmony_ci      /* If x of the aabb min is NaN, then this is an inactive aabb.
119bf215546Sopenharmony_ci       * We don't need to care about any other components being NaN as that is UB.
120bf215546Sopenharmony_ci       * https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/chap36.html#VkAabbPositionsKHR
121bf215546Sopenharmony_ci       */
122bf215546Sopenharmony_ci      nir_ssa_def *min_x = nir_channel(b, node_coords[0], 0);
123bf215546Sopenharmony_ci      nir_ssa_def *min_x_is_not_nan =
124bf215546Sopenharmony_ci         nir_inot(b, nir_fneu(b, min_x, min_x)); /* NaN != NaN -> true */
125bf215546Sopenharmony_ci
126bf215546Sopenharmony_ci      /* vec3 bound0 = (node->coords[i][0] - origin) * inv_dir; */
127bf215546Sopenharmony_ci      nir_ssa_def *bound0 = nir_fmul(b, nir_fsub(b, node_coords[0], origin), inv_dir);
128bf215546Sopenharmony_ci      /* vec3 bound1 = (node->coords[i][1] - origin) * inv_dir; */
129bf215546Sopenharmony_ci      nir_ssa_def *bound1 = nir_fmul(b, nir_fsub(b, node_coords[1], origin), inv_dir);
130bf215546Sopenharmony_ci
131bf215546Sopenharmony_ci      /* float tmin = max(max(min(bound0.x, bound1.x), min(bound0.y, bound1.y)), min(bound0.z,
132bf215546Sopenharmony_ci       * bound1.z)); */
133bf215546Sopenharmony_ci      nir_ssa_def *tmin =
134bf215546Sopenharmony_ci         nir_fmax(b,
135bf215546Sopenharmony_ci                  nir_fmax(b, nir_fmin(b, nir_channel(b, bound0, 0), nir_channel(b, bound1, 0)),
136bf215546Sopenharmony_ci                           nir_fmin(b, nir_channel(b, bound0, 1), nir_channel(b, bound1, 1))),
137bf215546Sopenharmony_ci                  nir_fmin(b, nir_channel(b, bound0, 2), nir_channel(b, bound1, 2)));
138bf215546Sopenharmony_ci
139bf215546Sopenharmony_ci      /* float tmax = min(min(max(bound0.x, bound1.x), max(bound0.y, bound1.y)), max(bound0.z,
140bf215546Sopenharmony_ci       * bound1.z)); */
141bf215546Sopenharmony_ci      nir_ssa_def *tmax =
142bf215546Sopenharmony_ci         nir_fmin(b,
143bf215546Sopenharmony_ci                  nir_fmin(b, nir_fmax(b, nir_channel(b, bound0, 0), nir_channel(b, bound1, 0)),
144bf215546Sopenharmony_ci                           nir_fmax(b, nir_channel(b, bound0, 1), nir_channel(b, bound1, 1))),
145bf215546Sopenharmony_ci                  nir_fmax(b, nir_channel(b, bound0, 2), nir_channel(b, bound1, 2)));
146bf215546Sopenharmony_ci
147bf215546Sopenharmony_ci      /* if (!isnan(node->coords[i][0].x) && tmax >= max(0.0f, tmin) && tmin < ray_tmax) { */
148bf215546Sopenharmony_ci      nir_push_if(b,
149bf215546Sopenharmony_ci                  nir_iand(b, min_x_is_not_nan,
150bf215546Sopenharmony_ci                           nir_iand(b, nir_fge(b, tmax, nir_fmax(b, nir_imm_float(b, 0.0f), tmin)),
151bf215546Sopenharmony_ci                                    nir_flt(b, tmin, ray_tmax))));
152bf215546Sopenharmony_ci      {
153bf215546Sopenharmony_ci         /* child_indices[i] = node->children[i]; */
154bf215546Sopenharmony_ci         nir_ssa_def *new_child_indices[4] = {child_index, child_index, child_index, child_index};
155bf215546Sopenharmony_ci         nir_store_var(b, child_indices, nir_vec(b, new_child_indices, 4), 1u << i);
156bf215546Sopenharmony_ci
157bf215546Sopenharmony_ci         /* distances[i] = tmin; */
158bf215546Sopenharmony_ci         nir_ssa_def *new_distances[4] = {tmin, tmin, tmin, tmin};
159bf215546Sopenharmony_ci         nir_store_var(b, distances, nir_vec(b, new_distances, 4), 1u << i);
160bf215546Sopenharmony_ci      }
161bf215546Sopenharmony_ci      /* } */
162bf215546Sopenharmony_ci      nir_pop_if(b, NULL);
163bf215546Sopenharmony_ci   }
164bf215546Sopenharmony_ci
165bf215546Sopenharmony_ci   /* Sort our distances with a sorting network. */
166bf215546Sopenharmony_ci   nir_sort_hit_pair(b, distances, child_indices, 0, 1);
167bf215546Sopenharmony_ci   nir_sort_hit_pair(b, distances, child_indices, 2, 3);
168bf215546Sopenharmony_ci   nir_sort_hit_pair(b, distances, child_indices, 0, 2);
169bf215546Sopenharmony_ci   nir_sort_hit_pair(b, distances, child_indices, 1, 3);
170bf215546Sopenharmony_ci   nir_sort_hit_pair(b, distances, child_indices, 1, 2);
171bf215546Sopenharmony_ci
172bf215546Sopenharmony_ci   return nir_load_var(b, child_indices);
173bf215546Sopenharmony_ci}
174bf215546Sopenharmony_ci
175bf215546Sopenharmony_cinir_ssa_def *
176bf215546Sopenharmony_ciintersect_ray_amd_software_tri(struct radv_device *device, nir_builder *b, nir_ssa_def *bvh_node,
177bf215546Sopenharmony_ci                               nir_ssa_def *ray_tmax, nir_ssa_def *origin, nir_ssa_def *dir,
178bf215546Sopenharmony_ci                               nir_ssa_def *inv_dir)
179bf215546Sopenharmony_ci{
180bf215546Sopenharmony_ci   const struct glsl_type *vec4_type = glsl_vector_type(GLSL_TYPE_FLOAT, 4);
181bf215546Sopenharmony_ci
182bf215546Sopenharmony_ci   nir_ssa_def *node_addr = build_node_to_addr(device, b, bvh_node);
183bf215546Sopenharmony_ci
184bf215546Sopenharmony_ci   const uint32_t coord_offsets[3] = {
185bf215546Sopenharmony_ci      offsetof(struct radv_bvh_triangle_node, coords[0]),
186bf215546Sopenharmony_ci      offsetof(struct radv_bvh_triangle_node, coords[1]),
187bf215546Sopenharmony_ci      offsetof(struct radv_bvh_triangle_node, coords[2]),
188bf215546Sopenharmony_ci   };
189bf215546Sopenharmony_ci
190bf215546Sopenharmony_ci   /* node->coords[0], node->coords[1], node->coords[2] -> vec3 */
191bf215546Sopenharmony_ci   nir_ssa_def *node_coords[3] = {
192bf215546Sopenharmony_ci      nir_build_load_global(b, 3, 32, nir_iadd_imm(b, node_addr, coord_offsets[0]), .align_mul = 64,
193bf215546Sopenharmony_ci                            .align_offset = coord_offsets[0] % 64),
194bf215546Sopenharmony_ci      nir_build_load_global(b, 3, 32, nir_iadd_imm(b, node_addr, coord_offsets[1]), .align_mul = 64,
195bf215546Sopenharmony_ci                            .align_offset = coord_offsets[1] % 64),
196bf215546Sopenharmony_ci      nir_build_load_global(b, 3, 32, nir_iadd_imm(b, node_addr, coord_offsets[2]), .align_mul = 64,
197bf215546Sopenharmony_ci                            .align_offset = coord_offsets[2] % 64),
198bf215546Sopenharmony_ci   };
199bf215546Sopenharmony_ci
200bf215546Sopenharmony_ci   nir_variable *result = nir_variable_create(b->shader, nir_var_shader_temp, vec4_type, "result");
201bf215546Sopenharmony_ci   nir_store_var(b, result, nir_imm_vec4(b, INFINITY, 1.0f, 0.0f, 0.0f), 0xf);
202bf215546Sopenharmony_ci
203bf215546Sopenharmony_ci   /* Based on watertight Ray/Triangle intersection from
204bf215546Sopenharmony_ci    * http://jcgt.org/published/0002/01/05/paper.pdf */
205bf215546Sopenharmony_ci
206bf215546Sopenharmony_ci   /* Calculate the dimension where the ray direction is largest */
207bf215546Sopenharmony_ci   nir_ssa_def *abs_dir = nir_fabs(b, dir);
208bf215546Sopenharmony_ci
209bf215546Sopenharmony_ci   nir_ssa_def *abs_dirs[3] = {
210bf215546Sopenharmony_ci      nir_channel(b, abs_dir, 0),
211bf215546Sopenharmony_ci      nir_channel(b, abs_dir, 1),
212bf215546Sopenharmony_ci      nir_channel(b, abs_dir, 2),
213bf215546Sopenharmony_ci   };
214bf215546Sopenharmony_ci   /* Find index of greatest value of abs_dir and put that as kz. */
215bf215546Sopenharmony_ci   nir_ssa_def *kz = nir_bcsel(
216bf215546Sopenharmony_ci      b, nir_fge(b, abs_dirs[0], abs_dirs[1]),
217bf215546Sopenharmony_ci      nir_bcsel(b, nir_fge(b, abs_dirs[0], abs_dirs[2]), nir_imm_int(b, 0), nir_imm_int(b, 2)),
218bf215546Sopenharmony_ci      nir_bcsel(b, nir_fge(b, abs_dirs[1], abs_dirs[2]), nir_imm_int(b, 1), nir_imm_int(b, 2)));
219bf215546Sopenharmony_ci   nir_ssa_def *kx = nir_imod(b, nir_iadd_imm(b, kz, 1), nir_imm_int(b, 3));
220bf215546Sopenharmony_ci   nir_ssa_def *ky = nir_imod(b, nir_iadd_imm(b, kx, 1), nir_imm_int(b, 3));
221bf215546Sopenharmony_ci   nir_ssa_def *k_indices[3] = {kx, ky, kz};
222bf215546Sopenharmony_ci   nir_ssa_def *k = nir_vec(b, k_indices, 3);
223bf215546Sopenharmony_ci
224bf215546Sopenharmony_ci   /* Swap kx and ky dimensions to preseve winding order */
225bf215546Sopenharmony_ci   unsigned swap_xy_swizzle[4] = {1, 0, 2, 3};
226bf215546Sopenharmony_ci   k = nir_bcsel(b, nir_flt(b, nir_vector_extract(b, dir, kz), nir_imm_float(b, 0.0f)),
227bf215546Sopenharmony_ci                 nir_swizzle(b, k, swap_xy_swizzle, 3), k);
228bf215546Sopenharmony_ci
229bf215546Sopenharmony_ci   kx = nir_channel(b, k, 0);
230bf215546Sopenharmony_ci   ky = nir_channel(b, k, 1);
231bf215546Sopenharmony_ci   kz = nir_channel(b, k, 2);
232bf215546Sopenharmony_ci
233bf215546Sopenharmony_ci   /* Calculate shear constants */
234bf215546Sopenharmony_ci   nir_ssa_def *sz = nir_frcp(b, nir_vector_extract(b, dir, kz));
235bf215546Sopenharmony_ci   nir_ssa_def *sx = nir_fmul(b, nir_vector_extract(b, dir, kx), sz);
236bf215546Sopenharmony_ci   nir_ssa_def *sy = nir_fmul(b, nir_vector_extract(b, dir, ky), sz);
237bf215546Sopenharmony_ci
238bf215546Sopenharmony_ci   /* Calculate vertices relative to ray origin */
239bf215546Sopenharmony_ci   nir_ssa_def *v_a = nir_fsub(b, node_coords[0], origin);
240bf215546Sopenharmony_ci   nir_ssa_def *v_b = nir_fsub(b, node_coords[1], origin);
241bf215546Sopenharmony_ci   nir_ssa_def *v_c = nir_fsub(b, node_coords[2], origin);
242bf215546Sopenharmony_ci
243bf215546Sopenharmony_ci   /* Perform shear and scale */
244bf215546Sopenharmony_ci   nir_ssa_def *ax =
245bf215546Sopenharmony_ci      nir_fsub(b, nir_vector_extract(b, v_a, kx), nir_fmul(b, sx, nir_vector_extract(b, v_a, kz)));
246bf215546Sopenharmony_ci   nir_ssa_def *ay =
247bf215546Sopenharmony_ci      nir_fsub(b, nir_vector_extract(b, v_a, ky), nir_fmul(b, sy, nir_vector_extract(b, v_a, kz)));
248bf215546Sopenharmony_ci   nir_ssa_def *bx =
249bf215546Sopenharmony_ci      nir_fsub(b, nir_vector_extract(b, v_b, kx), nir_fmul(b, sx, nir_vector_extract(b, v_b, kz)));
250bf215546Sopenharmony_ci   nir_ssa_def *by =
251bf215546Sopenharmony_ci      nir_fsub(b, nir_vector_extract(b, v_b, ky), nir_fmul(b, sy, nir_vector_extract(b, v_b, kz)));
252bf215546Sopenharmony_ci   nir_ssa_def *cx =
253bf215546Sopenharmony_ci      nir_fsub(b, nir_vector_extract(b, v_c, kx), nir_fmul(b, sx, nir_vector_extract(b, v_c, kz)));
254bf215546Sopenharmony_ci   nir_ssa_def *cy =
255bf215546Sopenharmony_ci      nir_fsub(b, nir_vector_extract(b, v_c, ky), nir_fmul(b, sy, nir_vector_extract(b, v_c, kz)));
256bf215546Sopenharmony_ci
257bf215546Sopenharmony_ci   nir_ssa_def *u = nir_fsub(b, nir_fmul(b, cx, by), nir_fmul(b, cy, bx));
258bf215546Sopenharmony_ci   nir_ssa_def *v = nir_fsub(b, nir_fmul(b, ax, cy), nir_fmul(b, ay, cx));
259bf215546Sopenharmony_ci   nir_ssa_def *w = nir_fsub(b, nir_fmul(b, bx, ay), nir_fmul(b, by, ax));
260bf215546Sopenharmony_ci
261bf215546Sopenharmony_ci   nir_variable *u_var =
262bf215546Sopenharmony_ci      nir_variable_create(b->shader, nir_var_shader_temp, glsl_float_type(), "u");
263bf215546Sopenharmony_ci   nir_variable *v_var =
264bf215546Sopenharmony_ci      nir_variable_create(b->shader, nir_var_shader_temp, glsl_float_type(), "v");
265bf215546Sopenharmony_ci   nir_variable *w_var =
266bf215546Sopenharmony_ci      nir_variable_create(b->shader, nir_var_shader_temp, glsl_float_type(), "w");
267bf215546Sopenharmony_ci   nir_store_var(b, u_var, u, 0x1);
268bf215546Sopenharmony_ci   nir_store_var(b, v_var, v, 0x1);
269bf215546Sopenharmony_ci   nir_store_var(b, w_var, w, 0x1);
270bf215546Sopenharmony_ci
271bf215546Sopenharmony_ci   /* Fallback to testing edges with double precision...
272bf215546Sopenharmony_ci    *
273bf215546Sopenharmony_ci    * The Vulkan spec states it only needs single precision watertightness
274bf215546Sopenharmony_ci    * but we fail dEQP-VK.ray_tracing_pipeline.watertightness.closedFan2.1024 with
275bf215546Sopenharmony_ci    * failures = 1 without doing this. :( */
276bf215546Sopenharmony_ci   nir_ssa_def *cond_retest = nir_ior(
277bf215546Sopenharmony_ci      b, nir_ior(b, nir_feq(b, u, nir_imm_float(b, 0.0f)), nir_feq(b, v, nir_imm_float(b, 0.0f))),
278bf215546Sopenharmony_ci      nir_feq(b, w, nir_imm_float(b, 0.0f)));
279bf215546Sopenharmony_ci
280bf215546Sopenharmony_ci   nir_push_if(b, cond_retest);
281bf215546Sopenharmony_ci   {
282bf215546Sopenharmony_ci      ax = nir_f2f64(b, ax);
283bf215546Sopenharmony_ci      ay = nir_f2f64(b, ay);
284bf215546Sopenharmony_ci      bx = nir_f2f64(b, bx);
285bf215546Sopenharmony_ci      by = nir_f2f64(b, by);
286bf215546Sopenharmony_ci      cx = nir_f2f64(b, cx);
287bf215546Sopenharmony_ci      cy = nir_f2f64(b, cy);
288bf215546Sopenharmony_ci
289bf215546Sopenharmony_ci      nir_store_var(b, u_var, nir_f2f32(b, nir_fsub(b, nir_fmul(b, cx, by), nir_fmul(b, cy, bx))),
290bf215546Sopenharmony_ci                    0x1);
291bf215546Sopenharmony_ci      nir_store_var(b, v_var, nir_f2f32(b, nir_fsub(b, nir_fmul(b, ax, cy), nir_fmul(b, ay, cx))),
292bf215546Sopenharmony_ci                    0x1);
293bf215546Sopenharmony_ci      nir_store_var(b, w_var, nir_f2f32(b, nir_fsub(b, nir_fmul(b, bx, ay), nir_fmul(b, by, ax))),
294bf215546Sopenharmony_ci                    0x1);
295bf215546Sopenharmony_ci   }
296bf215546Sopenharmony_ci   nir_pop_if(b, NULL);
297bf215546Sopenharmony_ci
298bf215546Sopenharmony_ci   u = nir_load_var(b, u_var);
299bf215546Sopenharmony_ci   v = nir_load_var(b, v_var);
300bf215546Sopenharmony_ci   w = nir_load_var(b, w_var);
301bf215546Sopenharmony_ci
302bf215546Sopenharmony_ci   /* Perform edge tests. */
303bf215546Sopenharmony_ci   nir_ssa_def *cond_back = nir_ior(
304bf215546Sopenharmony_ci      b, nir_ior(b, nir_flt(b, u, nir_imm_float(b, 0.0f)), nir_flt(b, v, nir_imm_float(b, 0.0f))),
305bf215546Sopenharmony_ci      nir_flt(b, w, nir_imm_float(b, 0.0f)));
306bf215546Sopenharmony_ci
307bf215546Sopenharmony_ci   nir_ssa_def *cond_front = nir_ior(
308bf215546Sopenharmony_ci      b, nir_ior(b, nir_flt(b, nir_imm_float(b, 0.0f), u), nir_flt(b, nir_imm_float(b, 0.0f), v)),
309bf215546Sopenharmony_ci      nir_flt(b, nir_imm_float(b, 0.0f), w));
310bf215546Sopenharmony_ci
311bf215546Sopenharmony_ci   nir_ssa_def *cond = nir_inot(b, nir_iand(b, cond_back, cond_front));
312bf215546Sopenharmony_ci
313bf215546Sopenharmony_ci   nir_push_if(b, cond);
314bf215546Sopenharmony_ci   {
315bf215546Sopenharmony_ci      nir_ssa_def *det = nir_fadd(b, u, nir_fadd(b, v, w));
316bf215546Sopenharmony_ci
317bf215546Sopenharmony_ci      nir_ssa_def *az = nir_fmul(b, sz, nir_vector_extract(b, v_a, kz));
318bf215546Sopenharmony_ci      nir_ssa_def *bz = nir_fmul(b, sz, nir_vector_extract(b, v_b, kz));
319bf215546Sopenharmony_ci      nir_ssa_def *cz = nir_fmul(b, sz, nir_vector_extract(b, v_c, kz));
320bf215546Sopenharmony_ci
321bf215546Sopenharmony_ci      nir_ssa_def *t =
322bf215546Sopenharmony_ci         nir_fadd(b, nir_fadd(b, nir_fmul(b, u, az), nir_fmul(b, v, bz)), nir_fmul(b, w, cz));
323bf215546Sopenharmony_ci
324bf215546Sopenharmony_ci      nir_ssa_def *t_signed = nir_fmul(b, nir_fsign(b, det), t);
325bf215546Sopenharmony_ci
326bf215546Sopenharmony_ci      nir_ssa_def *det_cond_front = nir_inot(b, nir_flt(b, t_signed, nir_imm_float(b, 0.0f)));
327bf215546Sopenharmony_ci
328bf215546Sopenharmony_ci      nir_push_if(b, det_cond_front);
329bf215546Sopenharmony_ci      {
330bf215546Sopenharmony_ci         nir_ssa_def *indices[4] = {t, det, v, w};
331bf215546Sopenharmony_ci         nir_store_var(b, result, nir_vec(b, indices, 4), 0xf);
332bf215546Sopenharmony_ci      }
333bf215546Sopenharmony_ci      nir_pop_if(b, NULL);
334bf215546Sopenharmony_ci   }
335bf215546Sopenharmony_ci   nir_pop_if(b, NULL);
336bf215546Sopenharmony_ci
337bf215546Sopenharmony_ci   return nir_load_var(b, result);
338bf215546Sopenharmony_ci}
339bf215546Sopenharmony_ci
340bf215546Sopenharmony_cinir_ssa_def *
341bf215546Sopenharmony_cibuild_addr_to_node(nir_builder *b, nir_ssa_def *addr)
342bf215546Sopenharmony_ci{
343bf215546Sopenharmony_ci   const uint64_t bvh_size = 1ull << 42;
344bf215546Sopenharmony_ci   nir_ssa_def *node = nir_ushr_imm(b, addr, 3);
345bf215546Sopenharmony_ci   return nir_iand_imm(b, node, (bvh_size - 1) << 3);
346bf215546Sopenharmony_ci}
347bf215546Sopenharmony_ci
348bf215546Sopenharmony_cinir_ssa_def *
349bf215546Sopenharmony_cibuild_node_to_addr(struct radv_device *device, nir_builder *b, nir_ssa_def *node)
350bf215546Sopenharmony_ci{
351bf215546Sopenharmony_ci   nir_ssa_def *addr = nir_iand_imm(b, node, ~7ull);
352bf215546Sopenharmony_ci   addr = nir_ishl_imm(b, addr, 3);
353bf215546Sopenharmony_ci   /* Assumes everything is in the top half of address space, which is true in
354bf215546Sopenharmony_ci    * GFX9+ for now. */
355bf215546Sopenharmony_ci   return device->physical_device->rad_info.gfx_level >= GFX9
356bf215546Sopenharmony_ci             ? nir_ior_imm(b, addr, 0xffffull << 48)
357bf215546Sopenharmony_ci             : addr;
358bf215546Sopenharmony_ci}
359bf215546Sopenharmony_ci
360bf215546Sopenharmony_cinir_ssa_def *
361bf215546Sopenharmony_cinir_build_vec3_mat_mult(nir_builder *b, nir_ssa_def *vec, nir_ssa_def *matrix[], bool translation)
362bf215546Sopenharmony_ci{
363bf215546Sopenharmony_ci   nir_ssa_def *result_components[3] = {
364bf215546Sopenharmony_ci      nir_channel(b, matrix[0], 3),
365bf215546Sopenharmony_ci      nir_channel(b, matrix[1], 3),
366bf215546Sopenharmony_ci      nir_channel(b, matrix[2], 3),
367bf215546Sopenharmony_ci   };
368bf215546Sopenharmony_ci   for (unsigned i = 0; i < 3; ++i) {
369bf215546Sopenharmony_ci      for (unsigned j = 0; j < 3; ++j) {
370bf215546Sopenharmony_ci         nir_ssa_def *v =
371bf215546Sopenharmony_ci            nir_fmul(b, nir_channels(b, vec, 1 << j), nir_channels(b, matrix[i], 1 << j));
372bf215546Sopenharmony_ci         result_components[i] = (translation || j) ? nir_fadd(b, result_components[i], v) : v;
373bf215546Sopenharmony_ci      }
374bf215546Sopenharmony_ci   }
375bf215546Sopenharmony_ci   return nir_vec(b, result_components, 3);
376bf215546Sopenharmony_ci}
377bf215546Sopenharmony_ci
378bf215546Sopenharmony_cinir_ssa_def *
379bf215546Sopenharmony_cinir_build_vec3_mat_mult_pre(nir_builder *b, nir_ssa_def *vec, nir_ssa_def *matrix[])
380bf215546Sopenharmony_ci{
381bf215546Sopenharmony_ci   nir_ssa_def *result_components[3] = {
382bf215546Sopenharmony_ci      nir_channel(b, matrix[0], 3),
383bf215546Sopenharmony_ci      nir_channel(b, matrix[1], 3),
384bf215546Sopenharmony_ci      nir_channel(b, matrix[2], 3),
385bf215546Sopenharmony_ci   };
386bf215546Sopenharmony_ci   return nir_build_vec3_mat_mult(b, nir_fsub(b, vec, nir_vec(b, result_components, 3)), matrix,
387bf215546Sopenharmony_ci                                  false);
388bf215546Sopenharmony_ci}
389bf215546Sopenharmony_ci
390bf215546Sopenharmony_civoid
391bf215546Sopenharmony_cinir_build_wto_matrix_load(nir_builder *b, nir_ssa_def *instance_addr, nir_ssa_def **out)
392bf215546Sopenharmony_ci{
393bf215546Sopenharmony_ci   unsigned offset = offsetof(struct radv_bvh_instance_node, wto_matrix);
394bf215546Sopenharmony_ci   for (unsigned i = 0; i < 3; ++i) {
395bf215546Sopenharmony_ci      out[i] = nir_build_load_global(b, 4, 32, nir_iadd_imm(b, instance_addr, offset + i * 16),
396bf215546Sopenharmony_ci                                     .align_mul = 64, .align_offset = offset + i * 16);
397bf215546Sopenharmony_ci   }
398bf215546Sopenharmony_ci}
399bf215546Sopenharmony_ci
400bf215546Sopenharmony_ci/* When a hit is opaque the any_hit shader is skipped for this hit and the hit
401bf215546Sopenharmony_ci * is assumed to be an actual hit. */
402bf215546Sopenharmony_cinir_ssa_def *
403bf215546Sopenharmony_cihit_is_opaque(nir_builder *b, nir_ssa_def *sbt_offset_and_flags, nir_ssa_def *flags,
404bf215546Sopenharmony_ci              nir_ssa_def *geometry_id_and_flags)
405bf215546Sopenharmony_ci{
406bf215546Sopenharmony_ci   nir_ssa_def *geom_force_opaque =
407bf215546Sopenharmony_ci      nir_test_mask(b, geometry_id_and_flags, VK_GEOMETRY_OPAQUE_BIT_KHR << 28);
408bf215546Sopenharmony_ci   nir_ssa_def *instance_force_opaque =
409bf215546Sopenharmony_ci      nir_test_mask(b, sbt_offset_and_flags, VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR << 24);
410bf215546Sopenharmony_ci   nir_ssa_def *instance_force_non_opaque =
411bf215546Sopenharmony_ci      nir_test_mask(b, sbt_offset_and_flags, VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_KHR << 24);
412bf215546Sopenharmony_ci
413bf215546Sopenharmony_ci   nir_ssa_def *opaque = geom_force_opaque;
414bf215546Sopenharmony_ci   opaque = nir_bcsel(b, instance_force_opaque, nir_imm_bool(b, true), opaque);
415bf215546Sopenharmony_ci   opaque = nir_bcsel(b, instance_force_non_opaque, nir_imm_bool(b, false), opaque);
416bf215546Sopenharmony_ci
417bf215546Sopenharmony_ci   nir_ssa_def *ray_force_opaque = nir_test_mask(b, flags, SpvRayFlagsOpaqueKHRMask);
418bf215546Sopenharmony_ci   nir_ssa_def *ray_force_non_opaque = nir_test_mask(b, flags, SpvRayFlagsNoOpaqueKHRMask);
419bf215546Sopenharmony_ci
420bf215546Sopenharmony_ci   opaque = nir_bcsel(b, ray_force_opaque, nir_imm_bool(b, true), opaque);
421bf215546Sopenharmony_ci   opaque = nir_bcsel(b, ray_force_non_opaque, nir_imm_bool(b, false), opaque);
422bf215546Sopenharmony_ci   return opaque;
423bf215546Sopenharmony_ci}
424bf215546Sopenharmony_ci
425bf215546Sopenharmony_cinir_ssa_def *
426bf215546Sopenharmony_cicreate_bvh_descriptor(nir_builder *b)
427bf215546Sopenharmony_ci{
428bf215546Sopenharmony_ci   /* We create a BVH descriptor that covers the entire memory range. That way we can always
429bf215546Sopenharmony_ci    * use the same descriptor, which avoids divergence when different rays hit different
430bf215546Sopenharmony_ci    * instances at the cost of having to use 64-bit node ids. */
431bf215546Sopenharmony_ci   const uint64_t bvh_size = 1ull << 42;
432bf215546Sopenharmony_ci   return nir_imm_ivec4(
433bf215546Sopenharmony_ci      b, 0, 1u << 31 /* Enable box sorting */, (bvh_size - 1) & 0xFFFFFFFFu,
434bf215546Sopenharmony_ci      ((bvh_size - 1) >> 32) | (1u << 24 /* Return IJ for triangles */) | (1u << 31));
435bf215546Sopenharmony_ci}
436