1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2018 Red Hat Inc. 3bf215546Sopenharmony_ci * Copyright © 2015 Intel Corporation 4bf215546Sopenharmony_ci * 5bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 6bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 7bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 8bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 10bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 11bf215546Sopenharmony_ci * 12bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 13bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 14bf215546Sopenharmony_ci * Software. 15bf215546Sopenharmony_ci * 16bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22bf215546Sopenharmony_ci * IN THE SOFTWARE. 23bf215546Sopenharmony_ci */ 24bf215546Sopenharmony_ci 25bf215546Sopenharmony_ci#include <math.h> 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci#include "nir.h" 28bf215546Sopenharmony_ci#include "nir_builtin_builder.h" 29bf215546Sopenharmony_ci 30bf215546Sopenharmony_cinir_ssa_def* 31bf215546Sopenharmony_cinir_cross3(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) 32bf215546Sopenharmony_ci{ 33bf215546Sopenharmony_ci unsigned yzx[3] = { 1, 2, 0 }; 34bf215546Sopenharmony_ci unsigned zxy[3] = { 2, 0, 1 }; 35bf215546Sopenharmony_ci 36bf215546Sopenharmony_ci return nir_ffma(b, nir_swizzle(b, x, yzx, 3), 37bf215546Sopenharmony_ci nir_swizzle(b, y, zxy, 3), 38bf215546Sopenharmony_ci nir_fneg(b, nir_fmul(b, nir_swizzle(b, x, zxy, 3), 39bf215546Sopenharmony_ci nir_swizzle(b, y, yzx, 3)))); 40bf215546Sopenharmony_ci} 41bf215546Sopenharmony_ci 42bf215546Sopenharmony_cinir_ssa_def* 43bf215546Sopenharmony_cinir_cross4(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) 44bf215546Sopenharmony_ci{ 45bf215546Sopenharmony_ci nir_ssa_def *cross = nir_cross3(b, x, y); 46bf215546Sopenharmony_ci 47bf215546Sopenharmony_ci return nir_vec4(b, 48bf215546Sopenharmony_ci nir_channel(b, cross, 0), 49bf215546Sopenharmony_ci nir_channel(b, cross, 1), 50bf215546Sopenharmony_ci nir_channel(b, cross, 2), 51bf215546Sopenharmony_ci nir_imm_intN_t(b, 0, cross->bit_size)); 52bf215546Sopenharmony_ci} 53bf215546Sopenharmony_ci 54bf215546Sopenharmony_cinir_ssa_def* 55bf215546Sopenharmony_cinir_fast_length(nir_builder *b, nir_ssa_def *vec) 56bf215546Sopenharmony_ci{ 57bf215546Sopenharmony_ci return nir_fsqrt(b, nir_fdot(b, vec, vec)); 58bf215546Sopenharmony_ci} 59bf215546Sopenharmony_ci 60bf215546Sopenharmony_cinir_ssa_def* 61bf215546Sopenharmony_cinir_nextafter(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) 62bf215546Sopenharmony_ci{ 63bf215546Sopenharmony_ci nir_ssa_def *zero = nir_imm_intN_t(b, 0, x->bit_size); 64bf215546Sopenharmony_ci nir_ssa_def *one = nir_imm_intN_t(b, 1, x->bit_size); 65bf215546Sopenharmony_ci 66bf215546Sopenharmony_ci nir_ssa_def *condeq = nir_feq(b, x, y); 67bf215546Sopenharmony_ci nir_ssa_def *conddir = nir_flt(b, x, y); 68bf215546Sopenharmony_ci nir_ssa_def *condzero = nir_feq(b, x, zero); 69bf215546Sopenharmony_ci 70bf215546Sopenharmony_ci uint64_t sign_mask = 1ull << (x->bit_size - 1); 71bf215546Sopenharmony_ci uint64_t min_abs = 1; 72bf215546Sopenharmony_ci 73bf215546Sopenharmony_ci if (nir_is_denorm_flush_to_zero(b->shader->info.float_controls_execution_mode, x->bit_size)) { 74bf215546Sopenharmony_ci switch (x->bit_size) { 75bf215546Sopenharmony_ci case 16: 76bf215546Sopenharmony_ci min_abs = 1 << 10; 77bf215546Sopenharmony_ci break; 78bf215546Sopenharmony_ci case 32: 79bf215546Sopenharmony_ci min_abs = 1 << 23; 80bf215546Sopenharmony_ci break; 81bf215546Sopenharmony_ci case 64: 82bf215546Sopenharmony_ci min_abs = 1ULL << 52; 83bf215546Sopenharmony_ci break; 84bf215546Sopenharmony_ci } 85bf215546Sopenharmony_ci 86bf215546Sopenharmony_ci /* Flush denorm to zero to avoid returning a denorm when condeq is true. */ 87bf215546Sopenharmony_ci x = nir_fmul(b, x, nir_imm_floatN_t(b, 1.0, x->bit_size)); 88bf215546Sopenharmony_ci } 89bf215546Sopenharmony_ci 90bf215546Sopenharmony_ci /* beware of: +/-0.0 - 1 == NaN */ 91bf215546Sopenharmony_ci nir_ssa_def *xn = 92bf215546Sopenharmony_ci nir_bcsel(b, 93bf215546Sopenharmony_ci condzero, 94bf215546Sopenharmony_ci nir_imm_intN_t(b, sign_mask | min_abs, x->bit_size), 95bf215546Sopenharmony_ci nir_isub(b, x, one)); 96bf215546Sopenharmony_ci 97bf215546Sopenharmony_ci /* beware of -0.0 + 1 == -0x1p-149 */ 98bf215546Sopenharmony_ci nir_ssa_def *xp = nir_bcsel(b, condzero, 99bf215546Sopenharmony_ci nir_imm_intN_t(b, min_abs, x->bit_size), 100bf215546Sopenharmony_ci nir_iadd(b, x, one)); 101bf215546Sopenharmony_ci 102bf215546Sopenharmony_ci /* nextafter can be implemented by just +/- 1 on the int value */ 103bf215546Sopenharmony_ci nir_ssa_def *res = 104bf215546Sopenharmony_ci nir_bcsel(b, nir_ixor(b, conddir, nir_flt(b, x, zero)), xp, xn); 105bf215546Sopenharmony_ci 106bf215546Sopenharmony_ci return nir_nan_check2(b, x, y, nir_bcsel(b, condeq, x, res)); 107bf215546Sopenharmony_ci} 108bf215546Sopenharmony_ci 109bf215546Sopenharmony_cinir_ssa_def* 110bf215546Sopenharmony_cinir_normalize(nir_builder *b, nir_ssa_def *vec) 111bf215546Sopenharmony_ci{ 112bf215546Sopenharmony_ci if (vec->num_components == 1) 113bf215546Sopenharmony_ci return nir_fsign(b, vec); 114bf215546Sopenharmony_ci 115bf215546Sopenharmony_ci nir_ssa_def *f0 = nir_imm_floatN_t(b, 0.0, vec->bit_size); 116bf215546Sopenharmony_ci nir_ssa_def *f1 = nir_imm_floatN_t(b, 1.0, vec->bit_size); 117bf215546Sopenharmony_ci nir_ssa_def *finf = nir_imm_floatN_t(b, INFINITY, vec->bit_size); 118bf215546Sopenharmony_ci 119bf215546Sopenharmony_ci /* scale the input to increase precision */ 120bf215546Sopenharmony_ci nir_ssa_def *maxc = nir_fmax_abs_vec_comp(b, vec); 121bf215546Sopenharmony_ci nir_ssa_def *svec = nir_fdiv(b, vec, maxc); 122bf215546Sopenharmony_ci /* for inf */ 123bf215546Sopenharmony_ci nir_ssa_def *finfvec = nir_copysign(b, nir_bcsel(b, nir_feq(b, vec, finf), f1, f0), f1); 124bf215546Sopenharmony_ci 125bf215546Sopenharmony_ci nir_ssa_def *temp = nir_bcsel(b, nir_feq(b, maxc, finf), finfvec, svec); 126bf215546Sopenharmony_ci nir_ssa_def *res = nir_fmul(b, temp, nir_frsq(b, nir_fdot(b, temp, temp))); 127bf215546Sopenharmony_ci 128bf215546Sopenharmony_ci return nir_bcsel(b, nir_feq(b, maxc, f0), vec, res); 129bf215546Sopenharmony_ci} 130bf215546Sopenharmony_ci 131bf215546Sopenharmony_cinir_ssa_def* 132bf215546Sopenharmony_cinir_smoothstep(nir_builder *b, nir_ssa_def *edge0, nir_ssa_def *edge1, nir_ssa_def *x) 133bf215546Sopenharmony_ci{ 134bf215546Sopenharmony_ci nir_ssa_def *f2 = nir_imm_floatN_t(b, 2.0, x->bit_size); 135bf215546Sopenharmony_ci nir_ssa_def *f3 = nir_imm_floatN_t(b, 3.0, x->bit_size); 136bf215546Sopenharmony_ci 137bf215546Sopenharmony_ci /* t = clamp((x - edge0) / (edge1 - edge0), 0, 1) */ 138bf215546Sopenharmony_ci nir_ssa_def *t = 139bf215546Sopenharmony_ci nir_fsat(b, nir_fdiv(b, nir_fsub(b, x, edge0), 140bf215546Sopenharmony_ci nir_fsub(b, edge1, edge0))); 141bf215546Sopenharmony_ci 142bf215546Sopenharmony_ci /* result = t * t * (3 - 2 * t) */ 143bf215546Sopenharmony_ci return nir_fmul(b, t, nir_fmul(b, t, nir_a_minus_bc(b, f3, f2, t))); 144bf215546Sopenharmony_ci} 145bf215546Sopenharmony_ci 146bf215546Sopenharmony_cinir_ssa_def* 147bf215546Sopenharmony_cinir_upsample(nir_builder *b, nir_ssa_def *hi, nir_ssa_def *lo) 148bf215546Sopenharmony_ci{ 149bf215546Sopenharmony_ci assert(lo->num_components == hi->num_components); 150bf215546Sopenharmony_ci assert(lo->bit_size == hi->bit_size); 151bf215546Sopenharmony_ci 152bf215546Sopenharmony_ci nir_ssa_def *res[NIR_MAX_VEC_COMPONENTS]; 153bf215546Sopenharmony_ci for (unsigned i = 0; i < lo->num_components; ++i) { 154bf215546Sopenharmony_ci nir_ssa_def *vec = nir_vec2(b, nir_channel(b, lo, i), nir_channel(b, hi, i)); 155bf215546Sopenharmony_ci res[i] = nir_pack_bits(b, vec, vec->bit_size * 2); 156bf215546Sopenharmony_ci } 157bf215546Sopenharmony_ci 158bf215546Sopenharmony_ci return nir_vec(b, res, lo->num_components); 159bf215546Sopenharmony_ci} 160bf215546Sopenharmony_ci 161bf215546Sopenharmony_ci/** 162bf215546Sopenharmony_ci * Compute xs[0] + xs[1] + xs[2] + ... using fadd. 163bf215546Sopenharmony_ci */ 164bf215546Sopenharmony_cistatic nir_ssa_def * 165bf215546Sopenharmony_cibuild_fsum(nir_builder *b, nir_ssa_def **xs, int terms) 166bf215546Sopenharmony_ci{ 167bf215546Sopenharmony_ci nir_ssa_def *accum = xs[0]; 168bf215546Sopenharmony_ci 169bf215546Sopenharmony_ci for (int i = 1; i < terms; i++) 170bf215546Sopenharmony_ci accum = nir_fadd(b, accum, xs[i]); 171bf215546Sopenharmony_ci 172bf215546Sopenharmony_ci return accum; 173bf215546Sopenharmony_ci} 174bf215546Sopenharmony_ci 175bf215546Sopenharmony_cinir_ssa_def * 176bf215546Sopenharmony_cinir_atan(nir_builder *b, nir_ssa_def *y_over_x) 177bf215546Sopenharmony_ci{ 178bf215546Sopenharmony_ci const uint32_t bit_size = y_over_x->bit_size; 179bf215546Sopenharmony_ci 180bf215546Sopenharmony_ci nir_ssa_def *abs_y_over_x = nir_fabs(b, y_over_x); 181bf215546Sopenharmony_ci nir_ssa_def *one = nir_imm_floatN_t(b, 1.0f, bit_size); 182bf215546Sopenharmony_ci 183bf215546Sopenharmony_ci /* 184bf215546Sopenharmony_ci * range-reduction, first step: 185bf215546Sopenharmony_ci * 186bf215546Sopenharmony_ci * / y_over_x if |y_over_x| <= 1.0; 187bf215546Sopenharmony_ci * x = < 188bf215546Sopenharmony_ci * \ 1.0 / y_over_x otherwise 189bf215546Sopenharmony_ci */ 190bf215546Sopenharmony_ci nir_ssa_def *x = nir_fdiv(b, nir_fmin(b, abs_y_over_x, one), 191bf215546Sopenharmony_ci nir_fmax(b, abs_y_over_x, one)); 192bf215546Sopenharmony_ci 193bf215546Sopenharmony_ci /* 194bf215546Sopenharmony_ci * approximate atan by evaluating polynomial: 195bf215546Sopenharmony_ci * 196bf215546Sopenharmony_ci * x * 0.9999793128310355 - x^3 * 0.3326756418091246 + 197bf215546Sopenharmony_ci * x^5 * 0.1938924977115610 - x^7 * 0.1173503194786851 + 198bf215546Sopenharmony_ci * x^9 * 0.0536813784310406 - x^11 * 0.0121323213173444 199bf215546Sopenharmony_ci */ 200bf215546Sopenharmony_ci nir_ssa_def *x_2 = nir_fmul(b, x, x); 201bf215546Sopenharmony_ci nir_ssa_def *x_3 = nir_fmul(b, x_2, x); 202bf215546Sopenharmony_ci nir_ssa_def *x_5 = nir_fmul(b, x_3, x_2); 203bf215546Sopenharmony_ci nir_ssa_def *x_7 = nir_fmul(b, x_5, x_2); 204bf215546Sopenharmony_ci nir_ssa_def *x_9 = nir_fmul(b, x_7, x_2); 205bf215546Sopenharmony_ci nir_ssa_def *x_11 = nir_fmul(b, x_9, x_2); 206bf215546Sopenharmony_ci 207bf215546Sopenharmony_ci nir_ssa_def *polynomial_terms[] = { 208bf215546Sopenharmony_ci nir_fmul_imm(b, x, 0.9999793128310355f), 209bf215546Sopenharmony_ci nir_fmul_imm(b, x_3, -0.3326756418091246f), 210bf215546Sopenharmony_ci nir_fmul_imm(b, x_5, 0.1938924977115610f), 211bf215546Sopenharmony_ci nir_fmul_imm(b, x_7, -0.1173503194786851f), 212bf215546Sopenharmony_ci nir_fmul_imm(b, x_9, 0.0536813784310406f), 213bf215546Sopenharmony_ci nir_fmul_imm(b, x_11, -0.0121323213173444f), 214bf215546Sopenharmony_ci }; 215bf215546Sopenharmony_ci 216bf215546Sopenharmony_ci nir_ssa_def *tmp = 217bf215546Sopenharmony_ci build_fsum(b, polynomial_terms, ARRAY_SIZE(polynomial_terms)); 218bf215546Sopenharmony_ci 219bf215546Sopenharmony_ci /* range-reduction fixup */ 220bf215546Sopenharmony_ci tmp = nir_ffma(b, 221bf215546Sopenharmony_ci nir_b2f(b, nir_flt(b, one, abs_y_over_x), bit_size), 222bf215546Sopenharmony_ci nir_ffma_imm12(b, tmp, -2.0f, M_PI_2), 223bf215546Sopenharmony_ci tmp); 224bf215546Sopenharmony_ci 225bf215546Sopenharmony_ci /* sign fixup */ 226bf215546Sopenharmony_ci nir_ssa_def *result = nir_fmul(b, tmp, nir_fsign(b, y_over_x)); 227bf215546Sopenharmony_ci 228bf215546Sopenharmony_ci /* The fmin and fmax above will filter out NaN values. This leads to 229bf215546Sopenharmony_ci * non-NaN results for NaN inputs. Work around this by doing 230bf215546Sopenharmony_ci * 231bf215546Sopenharmony_ci * !isnan(y_over_x) ? ... : y_over_x; 232bf215546Sopenharmony_ci */ 233bf215546Sopenharmony_ci if (b->exact || 234bf215546Sopenharmony_ci nir_is_float_control_signed_zero_inf_nan_preserve(b->shader->info.float_controls_execution_mode, bit_size)) { 235bf215546Sopenharmony_ci const bool exact = b->exact; 236bf215546Sopenharmony_ci 237bf215546Sopenharmony_ci b->exact = true; 238bf215546Sopenharmony_ci nir_ssa_def *is_not_nan = nir_feq(b, y_over_x, y_over_x); 239bf215546Sopenharmony_ci b->exact = exact; 240bf215546Sopenharmony_ci 241bf215546Sopenharmony_ci /* The extra 1.0*y_over_x ensures that subnormal results are flushed to 242bf215546Sopenharmony_ci * zero. 243bf215546Sopenharmony_ci */ 244bf215546Sopenharmony_ci result = nir_bcsel(b, is_not_nan, result, nir_fmul_imm(b, y_over_x, 1.0)); 245bf215546Sopenharmony_ci } 246bf215546Sopenharmony_ci 247bf215546Sopenharmony_ci return result; 248bf215546Sopenharmony_ci} 249bf215546Sopenharmony_ci 250bf215546Sopenharmony_cinir_ssa_def * 251bf215546Sopenharmony_cinir_atan2(nir_builder *b, nir_ssa_def *y, nir_ssa_def *x) 252bf215546Sopenharmony_ci{ 253bf215546Sopenharmony_ci assert(y->bit_size == x->bit_size); 254bf215546Sopenharmony_ci const uint32_t bit_size = x->bit_size; 255bf215546Sopenharmony_ci 256bf215546Sopenharmony_ci nir_ssa_def *zero = nir_imm_floatN_t(b, 0, bit_size); 257bf215546Sopenharmony_ci nir_ssa_def *one = nir_imm_floatN_t(b, 1, bit_size); 258bf215546Sopenharmony_ci 259bf215546Sopenharmony_ci /* If we're on the left half-plane rotate the coordinates π/2 clock-wise 260bf215546Sopenharmony_ci * for the y=0 discontinuity to end up aligned with the vertical 261bf215546Sopenharmony_ci * discontinuity of atan(s/t) along t=0. This also makes sure that we 262bf215546Sopenharmony_ci * don't attempt to divide by zero along the vertical line, which may give 263bf215546Sopenharmony_ci * unspecified results on non-GLSL 4.1-capable hardware. 264bf215546Sopenharmony_ci */ 265bf215546Sopenharmony_ci nir_ssa_def *flip = nir_fge(b, zero, x); 266bf215546Sopenharmony_ci nir_ssa_def *s = nir_bcsel(b, flip, nir_fabs(b, x), y); 267bf215546Sopenharmony_ci nir_ssa_def *t = nir_bcsel(b, flip, y, nir_fabs(b, x)); 268bf215546Sopenharmony_ci 269bf215546Sopenharmony_ci /* If the magnitude of the denominator exceeds some huge value, scale down 270bf215546Sopenharmony_ci * the arguments in order to prevent the reciprocal operation from flushing 271bf215546Sopenharmony_ci * its result to zero, which would cause precision problems, and for s 272bf215546Sopenharmony_ci * infinite would cause us to return a NaN instead of the correct finite 273bf215546Sopenharmony_ci * value. 274bf215546Sopenharmony_ci * 275bf215546Sopenharmony_ci * If fmin and fmax are respectively the smallest and largest positive 276bf215546Sopenharmony_ci * normalized floating point values representable by the implementation, 277bf215546Sopenharmony_ci * the constants below should be in agreement with: 278bf215546Sopenharmony_ci * 279bf215546Sopenharmony_ci * huge <= 1 / fmin 280bf215546Sopenharmony_ci * scale <= 1 / fmin / fmax (for |t| >= huge) 281bf215546Sopenharmony_ci * 282bf215546Sopenharmony_ci * In addition scale should be a negative power of two in order to avoid 283bf215546Sopenharmony_ci * loss of precision. The values chosen below should work for most usual 284bf215546Sopenharmony_ci * floating point representations with at least the dynamic range of ATI's 285bf215546Sopenharmony_ci * 24-bit representation. 286bf215546Sopenharmony_ci */ 287bf215546Sopenharmony_ci const double huge_val = bit_size >= 32 ? 1e18 : 16384; 288bf215546Sopenharmony_ci nir_ssa_def *huge = nir_imm_floatN_t(b, huge_val, bit_size); 289bf215546Sopenharmony_ci nir_ssa_def *scale = nir_bcsel(b, nir_fge(b, nir_fabs(b, t), huge), 290bf215546Sopenharmony_ci nir_imm_floatN_t(b, 0.25, bit_size), one); 291bf215546Sopenharmony_ci nir_ssa_def *rcp_scaled_t = nir_frcp(b, nir_fmul(b, t, scale)); 292bf215546Sopenharmony_ci nir_ssa_def *s_over_t = nir_fmul(b, nir_fmul(b, s, scale), rcp_scaled_t); 293bf215546Sopenharmony_ci 294bf215546Sopenharmony_ci /* For |x| = |y| assume tan = 1 even if infinite (i.e. pretend momentarily 295bf215546Sopenharmony_ci * that ∞/∞ = 1) in order to comply with the rather artificial rules 296bf215546Sopenharmony_ci * inherited from IEEE 754-2008, namely: 297bf215546Sopenharmony_ci * 298bf215546Sopenharmony_ci * "atan2(±∞, −∞) is ±3π/4 299bf215546Sopenharmony_ci * atan2(±∞, +∞) is ±π/4" 300bf215546Sopenharmony_ci * 301bf215546Sopenharmony_ci * Note that this is inconsistent with the rules for the neighborhood of 302bf215546Sopenharmony_ci * zero that are based on iterated limits: 303bf215546Sopenharmony_ci * 304bf215546Sopenharmony_ci * "atan2(±0, −0) is ±π 305bf215546Sopenharmony_ci * atan2(±0, +0) is ±0" 306bf215546Sopenharmony_ci * 307bf215546Sopenharmony_ci * but GLSL specifically allows implementations to deviate from IEEE rules 308bf215546Sopenharmony_ci * at (0,0), so we take that license (i.e. pretend that 0/0 = 1 here as 309bf215546Sopenharmony_ci * well). 310bf215546Sopenharmony_ci */ 311bf215546Sopenharmony_ci nir_ssa_def *tan = nir_bcsel(b, nir_feq(b, nir_fabs(b, x), nir_fabs(b, y)), 312bf215546Sopenharmony_ci one, nir_fabs(b, s_over_t)); 313bf215546Sopenharmony_ci 314bf215546Sopenharmony_ci /* Calculate the arctangent and fix up the result if we had flipped the 315bf215546Sopenharmony_ci * coordinate system. 316bf215546Sopenharmony_ci */ 317bf215546Sopenharmony_ci nir_ssa_def *arc = 318bf215546Sopenharmony_ci nir_ffma_imm1(b, nir_b2f(b, flip, bit_size), M_PI_2, nir_atan(b, tan)); 319bf215546Sopenharmony_ci 320bf215546Sopenharmony_ci /* Rather convoluted calculation of the sign of the result. When x < 0 we 321bf215546Sopenharmony_ci * cannot use fsign because we need to be able to distinguish between 322bf215546Sopenharmony_ci * negative and positive zero. We don't use bitwise arithmetic tricks for 323bf215546Sopenharmony_ci * consistency with the GLSL front-end. When x >= 0 rcp_scaled_t will 324bf215546Sopenharmony_ci * always be non-negative so this won't be able to distinguish between 325bf215546Sopenharmony_ci * negative and positive zero, but we don't care because atan2 is 326bf215546Sopenharmony_ci * continuous along the whole positive y = 0 half-line, so it won't affect 327bf215546Sopenharmony_ci * the result significantly. 328bf215546Sopenharmony_ci */ 329bf215546Sopenharmony_ci return nir_bcsel(b, nir_flt(b, nir_fmin(b, y, rcp_scaled_t), zero), 330bf215546Sopenharmony_ci nir_fneg(b, arc), arc); 331bf215546Sopenharmony_ci} 332bf215546Sopenharmony_ci 333bf215546Sopenharmony_cinir_ssa_def * 334bf215546Sopenharmony_cinir_get_texture_size(nir_builder *b, nir_tex_instr *tex) 335bf215546Sopenharmony_ci{ 336bf215546Sopenharmony_ci b->cursor = nir_before_instr(&tex->instr); 337bf215546Sopenharmony_ci 338bf215546Sopenharmony_ci nir_tex_instr *txs; 339bf215546Sopenharmony_ci 340bf215546Sopenharmony_ci unsigned num_srcs = 1; /* One for the LOD */ 341bf215546Sopenharmony_ci for (unsigned i = 0; i < tex->num_srcs; i++) { 342bf215546Sopenharmony_ci if (tex->src[i].src_type == nir_tex_src_texture_deref || 343bf215546Sopenharmony_ci tex->src[i].src_type == nir_tex_src_sampler_deref || 344bf215546Sopenharmony_ci tex->src[i].src_type == nir_tex_src_texture_offset || 345bf215546Sopenharmony_ci tex->src[i].src_type == nir_tex_src_sampler_offset || 346bf215546Sopenharmony_ci tex->src[i].src_type == nir_tex_src_texture_handle || 347bf215546Sopenharmony_ci tex->src[i].src_type == nir_tex_src_sampler_handle) 348bf215546Sopenharmony_ci num_srcs++; 349bf215546Sopenharmony_ci } 350bf215546Sopenharmony_ci 351bf215546Sopenharmony_ci txs = nir_tex_instr_create(b->shader, num_srcs); 352bf215546Sopenharmony_ci txs->op = nir_texop_txs; 353bf215546Sopenharmony_ci txs->sampler_dim = tex->sampler_dim; 354bf215546Sopenharmony_ci txs->is_array = tex->is_array; 355bf215546Sopenharmony_ci txs->is_shadow = tex->is_shadow; 356bf215546Sopenharmony_ci txs->is_new_style_shadow = tex->is_new_style_shadow; 357bf215546Sopenharmony_ci txs->texture_index = tex->texture_index; 358bf215546Sopenharmony_ci txs->sampler_index = tex->sampler_index; 359bf215546Sopenharmony_ci txs->dest_type = nir_type_int32; 360bf215546Sopenharmony_ci 361bf215546Sopenharmony_ci unsigned idx = 0; 362bf215546Sopenharmony_ci for (unsigned i = 0; i < tex->num_srcs; i++) { 363bf215546Sopenharmony_ci if (tex->src[i].src_type == nir_tex_src_texture_deref || 364bf215546Sopenharmony_ci tex->src[i].src_type == nir_tex_src_sampler_deref || 365bf215546Sopenharmony_ci tex->src[i].src_type == nir_tex_src_texture_offset || 366bf215546Sopenharmony_ci tex->src[i].src_type == nir_tex_src_sampler_offset || 367bf215546Sopenharmony_ci tex->src[i].src_type == nir_tex_src_texture_handle || 368bf215546Sopenharmony_ci tex->src[i].src_type == nir_tex_src_sampler_handle) { 369bf215546Sopenharmony_ci nir_src_copy(&txs->src[idx].src, &tex->src[i].src); 370bf215546Sopenharmony_ci txs->src[idx].src_type = tex->src[i].src_type; 371bf215546Sopenharmony_ci idx++; 372bf215546Sopenharmony_ci } 373bf215546Sopenharmony_ci } 374bf215546Sopenharmony_ci /* Add in an LOD because some back-ends require it */ 375bf215546Sopenharmony_ci txs->src[idx].src = nir_src_for_ssa(nir_imm_int(b, 0)); 376bf215546Sopenharmony_ci txs->src[idx].src_type = nir_tex_src_lod; 377bf215546Sopenharmony_ci 378bf215546Sopenharmony_ci nir_ssa_dest_init(&txs->instr, &txs->dest, 379bf215546Sopenharmony_ci nir_tex_instr_dest_size(txs), 32, NULL); 380bf215546Sopenharmony_ci nir_builder_instr_insert(b, &txs->instr); 381bf215546Sopenharmony_ci 382bf215546Sopenharmony_ci return &txs->dest.ssa; 383bf215546Sopenharmony_ci} 384bf215546Sopenharmony_ci 385bf215546Sopenharmony_cinir_ssa_def * 386bf215546Sopenharmony_cinir_get_texture_lod(nir_builder *b, nir_tex_instr *tex) 387bf215546Sopenharmony_ci{ 388bf215546Sopenharmony_ci b->cursor = nir_before_instr(&tex->instr); 389bf215546Sopenharmony_ci 390bf215546Sopenharmony_ci nir_tex_instr *tql; 391bf215546Sopenharmony_ci 392bf215546Sopenharmony_ci unsigned num_srcs = 0; 393bf215546Sopenharmony_ci for (unsigned i = 0; i < tex->num_srcs; i++) { 394bf215546Sopenharmony_ci if (tex->src[i].src_type == nir_tex_src_coord || 395bf215546Sopenharmony_ci tex->src[i].src_type == nir_tex_src_texture_deref || 396bf215546Sopenharmony_ci tex->src[i].src_type == nir_tex_src_sampler_deref || 397bf215546Sopenharmony_ci tex->src[i].src_type == nir_tex_src_texture_offset || 398bf215546Sopenharmony_ci tex->src[i].src_type == nir_tex_src_sampler_offset || 399bf215546Sopenharmony_ci tex->src[i].src_type == nir_tex_src_texture_handle || 400bf215546Sopenharmony_ci tex->src[i].src_type == nir_tex_src_sampler_handle) 401bf215546Sopenharmony_ci num_srcs++; 402bf215546Sopenharmony_ci } 403bf215546Sopenharmony_ci 404bf215546Sopenharmony_ci tql = nir_tex_instr_create(b->shader, num_srcs); 405bf215546Sopenharmony_ci tql->op = nir_texop_lod; 406bf215546Sopenharmony_ci tql->coord_components = tex->coord_components; 407bf215546Sopenharmony_ci tql->sampler_dim = tex->sampler_dim; 408bf215546Sopenharmony_ci tql->is_array = tex->is_array; 409bf215546Sopenharmony_ci tql->is_shadow = tex->is_shadow; 410bf215546Sopenharmony_ci tql->is_new_style_shadow = tex->is_new_style_shadow; 411bf215546Sopenharmony_ci tql->texture_index = tex->texture_index; 412bf215546Sopenharmony_ci tql->sampler_index = tex->sampler_index; 413bf215546Sopenharmony_ci tql->dest_type = nir_type_float32; 414bf215546Sopenharmony_ci 415bf215546Sopenharmony_ci unsigned idx = 0; 416bf215546Sopenharmony_ci for (unsigned i = 0; i < tex->num_srcs; i++) { 417bf215546Sopenharmony_ci if (tex->src[i].src_type == nir_tex_src_coord || 418bf215546Sopenharmony_ci tex->src[i].src_type == nir_tex_src_texture_deref || 419bf215546Sopenharmony_ci tex->src[i].src_type == nir_tex_src_sampler_deref || 420bf215546Sopenharmony_ci tex->src[i].src_type == nir_tex_src_texture_offset || 421bf215546Sopenharmony_ci tex->src[i].src_type == nir_tex_src_sampler_offset || 422bf215546Sopenharmony_ci tex->src[i].src_type == nir_tex_src_texture_handle || 423bf215546Sopenharmony_ci tex->src[i].src_type == nir_tex_src_sampler_handle) { 424bf215546Sopenharmony_ci nir_src_copy(&tql->src[idx].src, &tex->src[i].src); 425bf215546Sopenharmony_ci tql->src[idx].src_type = tex->src[i].src_type; 426bf215546Sopenharmony_ci idx++; 427bf215546Sopenharmony_ci } 428bf215546Sopenharmony_ci } 429bf215546Sopenharmony_ci 430bf215546Sopenharmony_ci nir_ssa_dest_init(&tql->instr, &tql->dest, 2, 32, NULL); 431bf215546Sopenharmony_ci nir_builder_instr_insert(b, &tql->instr); 432bf215546Sopenharmony_ci 433bf215546Sopenharmony_ci /* The LOD is the y component of the result */ 434bf215546Sopenharmony_ci return nir_channel(b, &tql->dest.ssa, 1); 435bf215546Sopenharmony_ci} 436