1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2018 Red Hat Inc. 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#ifndef NIR_BUILTIN_BUILDER_H 25bf215546Sopenharmony_ci#define NIR_BUILTIN_BUILDER_H 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci#include "util/u_math.h" 28bf215546Sopenharmony_ci#include "nir_builder.h" 29bf215546Sopenharmony_ci 30bf215546Sopenharmony_ci#ifdef __cplusplus 31bf215546Sopenharmony_ciextern "C" { 32bf215546Sopenharmony_ci#endif 33bf215546Sopenharmony_ci 34bf215546Sopenharmony_ci/* 35bf215546Sopenharmony_ci * Functions are sorted alphabetically with removed type and "fast" prefix. 36bf215546Sopenharmony_ci * Definitions for functions in the C file come first. 37bf215546Sopenharmony_ci */ 38bf215546Sopenharmony_ci 39bf215546Sopenharmony_cinir_ssa_def* nir_cross3(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y); 40bf215546Sopenharmony_cinir_ssa_def* nir_cross4(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y); 41bf215546Sopenharmony_cinir_ssa_def* nir_fast_length(nir_builder *b, nir_ssa_def *vec); 42bf215546Sopenharmony_cinir_ssa_def* nir_nextafter(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y); 43bf215546Sopenharmony_cinir_ssa_def* nir_normalize(nir_builder *b, nir_ssa_def *vec); 44bf215546Sopenharmony_cinir_ssa_def* nir_smoothstep(nir_builder *b, nir_ssa_def *edge0, 45bf215546Sopenharmony_ci nir_ssa_def *edge1, nir_ssa_def *x); 46bf215546Sopenharmony_cinir_ssa_def* nir_upsample(nir_builder *b, nir_ssa_def *hi, nir_ssa_def *lo); 47bf215546Sopenharmony_cinir_ssa_def* nir_atan(nir_builder *b, nir_ssa_def *y_over_x); 48bf215546Sopenharmony_cinir_ssa_def* nir_atan2(nir_builder *b, nir_ssa_def *y, nir_ssa_def *x); 49bf215546Sopenharmony_ci 50bf215546Sopenharmony_cinir_ssa_def * 51bf215546Sopenharmony_cinir_get_texture_lod(nir_builder *b, nir_tex_instr *tex); 52bf215546Sopenharmony_ci 53bf215546Sopenharmony_cinir_ssa_def * 54bf215546Sopenharmony_cinir_get_texture_size(nir_builder *b, nir_tex_instr *tex); 55bf215546Sopenharmony_ci 56bf215546Sopenharmony_cistatic inline nir_ssa_def * 57bf215546Sopenharmony_cinir_nan_check2(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *res) 58bf215546Sopenharmony_ci{ 59bf215546Sopenharmony_ci return nir_bcsel(b, nir_fneu(b, x, x), x, nir_bcsel(b, nir_fneu(b, y, y), y, res)); 60bf215546Sopenharmony_ci} 61bf215546Sopenharmony_ci 62bf215546Sopenharmony_cistatic inline nir_ssa_def * 63bf215546Sopenharmony_cinir_fmax_abs_vec_comp(nir_builder *b, nir_ssa_def *vec) 64bf215546Sopenharmony_ci{ 65bf215546Sopenharmony_ci nir_ssa_def *abs = nir_fabs(b, vec); 66bf215546Sopenharmony_ci nir_ssa_def *res = nir_channel(b, abs, 0); 67bf215546Sopenharmony_ci for (unsigned i = 1; i < vec->num_components; ++i) 68bf215546Sopenharmony_ci res = nir_fmax(b, res, nir_channel(b, abs, i)); 69bf215546Sopenharmony_ci return res; 70bf215546Sopenharmony_ci} 71bf215546Sopenharmony_ci 72bf215546Sopenharmony_cistatic inline nir_ssa_def * 73bf215546Sopenharmony_cinir_iabs_diff(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) 74bf215546Sopenharmony_ci{ 75bf215546Sopenharmony_ci nir_ssa_def *cond = nir_ige(b, x, y); 76bf215546Sopenharmony_ci nir_ssa_def *res0 = nir_isub(b, x, y); 77bf215546Sopenharmony_ci nir_ssa_def *res1 = nir_isub(b, y, x); 78bf215546Sopenharmony_ci return nir_bcsel(b, cond, res0, res1); 79bf215546Sopenharmony_ci} 80bf215546Sopenharmony_ci 81bf215546Sopenharmony_cistatic inline nir_ssa_def * 82bf215546Sopenharmony_cinir_uabs_diff(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) 83bf215546Sopenharmony_ci{ 84bf215546Sopenharmony_ci nir_ssa_def *cond = nir_uge(b, x, y); 85bf215546Sopenharmony_ci nir_ssa_def *res0 = nir_isub(b, x, y); 86bf215546Sopenharmony_ci nir_ssa_def *res1 = nir_isub(b, y, x); 87bf215546Sopenharmony_ci return nir_bcsel(b, cond, res0, res1); 88bf215546Sopenharmony_ci} 89bf215546Sopenharmony_ci 90bf215546Sopenharmony_cistatic inline nir_ssa_def * 91bf215546Sopenharmony_cinir_fexp(nir_builder *b, nir_ssa_def *x) 92bf215546Sopenharmony_ci{ 93bf215546Sopenharmony_ci return nir_fexp2(b, nir_fmul_imm(b, x, M_LOG2E)); 94bf215546Sopenharmony_ci} 95bf215546Sopenharmony_ci 96bf215546Sopenharmony_cistatic inline nir_ssa_def * 97bf215546Sopenharmony_cinir_flog(nir_builder *b, nir_ssa_def *x) 98bf215546Sopenharmony_ci{ 99bf215546Sopenharmony_ci return nir_fmul_imm(b, nir_flog2(b, x), 1.0 / M_LOG2E); 100bf215546Sopenharmony_ci} 101bf215546Sopenharmony_ci 102bf215546Sopenharmony_cistatic inline nir_ssa_def * 103bf215546Sopenharmony_cinir_imad24(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z) 104bf215546Sopenharmony_ci{ 105bf215546Sopenharmony_ci nir_ssa_def *temp = nir_imul24(b, x, y); 106bf215546Sopenharmony_ci return nir_iadd(b, temp, z); 107bf215546Sopenharmony_ci} 108bf215546Sopenharmony_ci 109bf215546Sopenharmony_cistatic inline nir_ssa_def * 110bf215546Sopenharmony_cinir_imad_hi(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z) 111bf215546Sopenharmony_ci{ 112bf215546Sopenharmony_ci nir_ssa_def *temp = nir_imul_high(b, x, y); 113bf215546Sopenharmony_ci return nir_iadd(b, temp, z); 114bf215546Sopenharmony_ci} 115bf215546Sopenharmony_ci 116bf215546Sopenharmony_cistatic inline nir_ssa_def * 117bf215546Sopenharmony_cinir_umad_hi(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z) 118bf215546Sopenharmony_ci{ 119bf215546Sopenharmony_ci nir_ssa_def *temp = nir_umul_high(b, x, y); 120bf215546Sopenharmony_ci return nir_iadd(b, temp, z); 121bf215546Sopenharmony_ci} 122bf215546Sopenharmony_ci 123bf215546Sopenharmony_cistatic inline nir_ssa_def * 124bf215546Sopenharmony_cinir_bitselect(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *s) 125bf215546Sopenharmony_ci{ 126bf215546Sopenharmony_ci return nir_ior(b, nir_iand(b, nir_inot(b, s), x), nir_iand(b, s, y)); 127bf215546Sopenharmony_ci} 128bf215546Sopenharmony_ci 129bf215546Sopenharmony_cistatic inline nir_ssa_def * 130bf215546Sopenharmony_cinir_copysign(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) 131bf215546Sopenharmony_ci{ 132bf215546Sopenharmony_ci uint64_t masks = 1ull << (x->bit_size - 1); 133bf215546Sopenharmony_ci uint64_t maskv = ~masks; 134bf215546Sopenharmony_ci 135bf215546Sopenharmony_ci nir_ssa_def *s = nir_imm_intN_t(b, masks, x->bit_size); 136bf215546Sopenharmony_ci nir_ssa_def *v = nir_imm_intN_t(b, maskv, x->bit_size); 137bf215546Sopenharmony_ci 138bf215546Sopenharmony_ci return nir_ior(b, nir_iand(b, x, v), nir_iand(b, y, s)); 139bf215546Sopenharmony_ci} 140bf215546Sopenharmony_ci 141bf215546Sopenharmony_cistatic inline nir_ssa_def * 142bf215546Sopenharmony_cinir_degrees(nir_builder *b, nir_ssa_def *val) 143bf215546Sopenharmony_ci{ 144bf215546Sopenharmony_ci return nir_fmul_imm(b, val, 180.0 / M_PI); 145bf215546Sopenharmony_ci} 146bf215546Sopenharmony_ci 147bf215546Sopenharmony_cistatic inline nir_ssa_def * 148bf215546Sopenharmony_cinir_fdim(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) 149bf215546Sopenharmony_ci{ 150bf215546Sopenharmony_ci nir_ssa_def *cond = nir_flt(b, y, x); 151bf215546Sopenharmony_ci nir_ssa_def *res = nir_fsub(b, x, y); 152bf215546Sopenharmony_ci nir_ssa_def *zero = nir_imm_floatN_t(b, 0.0, x->bit_size); 153bf215546Sopenharmony_ci 154bf215546Sopenharmony_ci // return NaN if either x or y are NaN, else x-y if x>y, else +0.0 155bf215546Sopenharmony_ci return nir_nan_check2(b, x, y, nir_bcsel(b, cond, res, zero)); 156bf215546Sopenharmony_ci} 157bf215546Sopenharmony_ci 158bf215546Sopenharmony_cistatic inline nir_ssa_def * 159bf215546Sopenharmony_cinir_fast_distance(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) 160bf215546Sopenharmony_ci{ 161bf215546Sopenharmony_ci return nir_fast_length(b, nir_fsub(b, x, y)); 162bf215546Sopenharmony_ci} 163bf215546Sopenharmony_ci 164bf215546Sopenharmony_cistatic inline nir_ssa_def* 165bf215546Sopenharmony_cinir_fast_normalize(nir_builder *b, nir_ssa_def *vec) 166bf215546Sopenharmony_ci{ 167bf215546Sopenharmony_ci return nir_fdiv(b, vec, nir_fast_length(b, vec)); 168bf215546Sopenharmony_ci} 169bf215546Sopenharmony_ci 170bf215546Sopenharmony_cistatic inline nir_ssa_def* 171bf215546Sopenharmony_cinir_fmad(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z) 172bf215546Sopenharmony_ci{ 173bf215546Sopenharmony_ci return nir_fadd(b, nir_fmul(b, x, y), z); 174bf215546Sopenharmony_ci} 175bf215546Sopenharmony_ci 176bf215546Sopenharmony_cistatic inline nir_ssa_def* 177bf215546Sopenharmony_cinir_maxmag(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) 178bf215546Sopenharmony_ci{ 179bf215546Sopenharmony_ci nir_ssa_def *xabs = nir_fabs(b, x); 180bf215546Sopenharmony_ci nir_ssa_def *yabs = nir_fabs(b, y); 181bf215546Sopenharmony_ci 182bf215546Sopenharmony_ci nir_ssa_def *condy = nir_flt(b, xabs, yabs); 183bf215546Sopenharmony_ci nir_ssa_def *condx = nir_flt(b, yabs, xabs); 184bf215546Sopenharmony_ci 185bf215546Sopenharmony_ci return nir_bcsel(b, condy, y, nir_bcsel(b, condx, x, nir_fmax(b, x, y))); 186bf215546Sopenharmony_ci} 187bf215546Sopenharmony_ci 188bf215546Sopenharmony_cistatic inline nir_ssa_def* 189bf215546Sopenharmony_cinir_minmag(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) 190bf215546Sopenharmony_ci{ 191bf215546Sopenharmony_ci nir_ssa_def *xabs = nir_fabs(b, x); 192bf215546Sopenharmony_ci nir_ssa_def *yabs = nir_fabs(b, y); 193bf215546Sopenharmony_ci 194bf215546Sopenharmony_ci nir_ssa_def *condx = nir_flt(b, xabs, yabs); 195bf215546Sopenharmony_ci nir_ssa_def *condy = nir_flt(b, yabs, xabs); 196bf215546Sopenharmony_ci 197bf215546Sopenharmony_ci return nir_bcsel(b, condy, y, nir_bcsel(b, condx, x, nir_fmin(b, x, y))); 198bf215546Sopenharmony_ci} 199bf215546Sopenharmony_ci 200bf215546Sopenharmony_cistatic inline nir_ssa_def* 201bf215546Sopenharmony_cinir_nan(nir_builder *b, nir_ssa_def *x) 202bf215546Sopenharmony_ci{ 203bf215546Sopenharmony_ci nir_ssa_def *nan = nir_imm_floatN_t(b, NAN, x->bit_size); 204bf215546Sopenharmony_ci if (x->num_components == 1) 205bf215546Sopenharmony_ci return nan; 206bf215546Sopenharmony_ci 207bf215546Sopenharmony_ci nir_ssa_def *nans[NIR_MAX_VEC_COMPONENTS]; 208bf215546Sopenharmony_ci for (unsigned i = 0; i < x->num_components; ++i) 209bf215546Sopenharmony_ci nans[i] = nan; 210bf215546Sopenharmony_ci 211bf215546Sopenharmony_ci return nir_vec(b, nans, x->num_components); 212bf215546Sopenharmony_ci} 213bf215546Sopenharmony_ci 214bf215546Sopenharmony_cistatic inline nir_ssa_def * 215bf215546Sopenharmony_cinir_radians(nir_builder *b, nir_ssa_def *val) 216bf215546Sopenharmony_ci{ 217bf215546Sopenharmony_ci return nir_fmul_imm(b, val, M_PI / 180.0); 218bf215546Sopenharmony_ci} 219bf215546Sopenharmony_ci 220bf215546Sopenharmony_cistatic inline nir_ssa_def * 221bf215546Sopenharmony_cinir_select(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *s) 222bf215546Sopenharmony_ci{ 223bf215546Sopenharmony_ci if (s->num_components != 1) { 224bf215546Sopenharmony_ci uint64_t mask = 1ull << (s->bit_size - 1); 225bf215546Sopenharmony_ci s = nir_iand(b, s, nir_imm_intN_t(b, mask, s->bit_size)); 226bf215546Sopenharmony_ci } 227bf215546Sopenharmony_ci return nir_bcsel(b, nir_ieq_imm(b, s, 0), x, y); 228bf215546Sopenharmony_ci} 229bf215546Sopenharmony_ci 230bf215546Sopenharmony_cistatic inline nir_ssa_def * 231bf215546Sopenharmony_cinir_ftan(nir_builder *b, nir_ssa_def *x) 232bf215546Sopenharmony_ci{ 233bf215546Sopenharmony_ci return nir_fdiv(b, nir_fsin(b, x), nir_fcos(b, x)); 234bf215546Sopenharmony_ci} 235bf215546Sopenharmony_ci 236bf215546Sopenharmony_cistatic inline nir_ssa_def * 237bf215546Sopenharmony_cinir_clz_u(nir_builder *b, nir_ssa_def *a) 238bf215546Sopenharmony_ci{ 239bf215546Sopenharmony_ci nir_ssa_def *val; 240bf215546Sopenharmony_ci val = nir_isub(b, nir_imm_intN_t(b, a->bit_size - 1, 32), nir_ufind_msb(b, a)); 241bf215546Sopenharmony_ci return nir_u2u(b, val, a->bit_size); 242bf215546Sopenharmony_ci} 243bf215546Sopenharmony_ci 244bf215546Sopenharmony_cistatic inline nir_ssa_def * 245bf215546Sopenharmony_cinir_ctz_u(nir_builder *b, nir_ssa_def *a) 246bf215546Sopenharmony_ci{ 247bf215546Sopenharmony_ci nir_ssa_def *cond = nir_ieq(b, a, nir_imm_intN_t(b, 0, a->bit_size)); 248bf215546Sopenharmony_ci 249bf215546Sopenharmony_ci return nir_bcsel(b, cond, 250bf215546Sopenharmony_ci nir_imm_intN_t(b, a->bit_size, a->bit_size), 251bf215546Sopenharmony_ci nir_u2u(b, nir_find_lsb(b, a), a->bit_size)); 252bf215546Sopenharmony_ci} 253bf215546Sopenharmony_ci 254bf215546Sopenharmony_ci#ifdef __cplusplus 255bf215546Sopenharmony_ci} 256bf215546Sopenharmony_ci#endif 257bf215546Sopenharmony_ci 258bf215546Sopenharmony_ci#endif /* NIR_BUILTIN_BUILDER_H */ 259