1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2014 Connor Abbott 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci * 23bf215546Sopenharmony_ci * Authors: 24bf215546Sopenharmony_ci * Connor Abbott (cwabbott0@gmail.com) 25bf215546Sopenharmony_ci * 26bf215546Sopenharmony_ci */ 27bf215546Sopenharmony_ci 28bf215546Sopenharmony_ci#ifndef NIR_H 29bf215546Sopenharmony_ci#define NIR_H 30bf215546Sopenharmony_ci 31bf215546Sopenharmony_ci#include "util/hash_table.h" 32bf215546Sopenharmony_ci#include "compiler/glsl/list.h" 33bf215546Sopenharmony_ci#include "util/list.h" 34bf215546Sopenharmony_ci#include "util/log.h" 35bf215546Sopenharmony_ci#include "util/ralloc.h" 36bf215546Sopenharmony_ci#include "util/set.h" 37bf215546Sopenharmony_ci#include "util/bitscan.h" 38bf215546Sopenharmony_ci#include "util/bitset.h" 39bf215546Sopenharmony_ci#include "util/compiler.h" 40bf215546Sopenharmony_ci#include "util/enum_operators.h" 41bf215546Sopenharmony_ci#include "util/macros.h" 42bf215546Sopenharmony_ci#include "util/format/u_format.h" 43bf215546Sopenharmony_ci#include "compiler/nir_types.h" 44bf215546Sopenharmony_ci#include "compiler/shader_enums.h" 45bf215546Sopenharmony_ci#include "compiler/shader_info.h" 46bf215546Sopenharmony_ci#define XXH_INLINE_ALL 47bf215546Sopenharmony_ci#include "util/xxhash.h" 48bf215546Sopenharmony_ci#include <stdio.h> 49bf215546Sopenharmony_ci 50bf215546Sopenharmony_ci#ifndef NDEBUG 51bf215546Sopenharmony_ci#include "util/debug.h" 52bf215546Sopenharmony_ci#endif /* NDEBUG */ 53bf215546Sopenharmony_ci 54bf215546Sopenharmony_ci#include "nir_opcodes.h" 55bf215546Sopenharmony_ci 56bf215546Sopenharmony_ci#if defined(_WIN32) && !defined(snprintf) 57bf215546Sopenharmony_ci#define snprintf _snprintf 58bf215546Sopenharmony_ci#endif 59bf215546Sopenharmony_ci 60bf215546Sopenharmony_ci#ifdef __cplusplus 61bf215546Sopenharmony_ciextern "C" { 62bf215546Sopenharmony_ci#endif 63bf215546Sopenharmony_ci 64bf215546Sopenharmony_ciextern uint32_t nir_debug; 65bf215546Sopenharmony_ciextern bool nir_debug_print_shader[MESA_SHADER_KERNEL + 1]; 66bf215546Sopenharmony_ci 67bf215546Sopenharmony_ci#ifndef NDEBUG 68bf215546Sopenharmony_ci#define NIR_DEBUG(flag) unlikely(nir_debug & (NIR_DEBUG_ ## flag)) 69bf215546Sopenharmony_ci#else 70bf215546Sopenharmony_ci#define NIR_DEBUG(flag) false 71bf215546Sopenharmony_ci#endif 72bf215546Sopenharmony_ci 73bf215546Sopenharmony_ci#define NIR_DEBUG_CLONE (1u << 0) 74bf215546Sopenharmony_ci#define NIR_DEBUG_SERIALIZE (1u << 1) 75bf215546Sopenharmony_ci#define NIR_DEBUG_NOVALIDATE (1u << 2) 76bf215546Sopenharmony_ci#define NIR_DEBUG_VALIDATE_SSA_DOMINANCE (1u << 3) 77bf215546Sopenharmony_ci#define NIR_DEBUG_TGSI (1u << 4) 78bf215546Sopenharmony_ci#define NIR_DEBUG_PRINT_VS (1u << 5) 79bf215546Sopenharmony_ci#define NIR_DEBUG_PRINT_TCS (1u << 6) 80bf215546Sopenharmony_ci#define NIR_DEBUG_PRINT_TES (1u << 7) 81bf215546Sopenharmony_ci#define NIR_DEBUG_PRINT_GS (1u << 8) 82bf215546Sopenharmony_ci#define NIR_DEBUG_PRINT_FS (1u << 9) 83bf215546Sopenharmony_ci#define NIR_DEBUG_PRINT_CS (1u << 10) 84bf215546Sopenharmony_ci#define NIR_DEBUG_PRINT_TS (1u << 11) 85bf215546Sopenharmony_ci#define NIR_DEBUG_PRINT_MS (1u << 12) 86bf215546Sopenharmony_ci#define NIR_DEBUG_PRINT_RGS (1u << 13) 87bf215546Sopenharmony_ci#define NIR_DEBUG_PRINT_AHS (1u << 14) 88bf215546Sopenharmony_ci#define NIR_DEBUG_PRINT_CHS (1u << 15) 89bf215546Sopenharmony_ci#define NIR_DEBUG_PRINT_MHS (1u << 16) 90bf215546Sopenharmony_ci#define NIR_DEBUG_PRINT_IS (1u << 17) 91bf215546Sopenharmony_ci#define NIR_DEBUG_PRINT_CBS (1u << 18) 92bf215546Sopenharmony_ci#define NIR_DEBUG_PRINT_KS (1u << 19) 93bf215546Sopenharmony_ci#define NIR_DEBUG_PRINT_CONSTS (1u << 20) 94bf215546Sopenharmony_ci#define NIR_DEBUG_VALIDATE_GC_LIST (1u << 21) 95bf215546Sopenharmony_ci 96bf215546Sopenharmony_ci#define NIR_DEBUG_PRINT (NIR_DEBUG_PRINT_VS | \ 97bf215546Sopenharmony_ci NIR_DEBUG_PRINT_TCS | \ 98bf215546Sopenharmony_ci NIR_DEBUG_PRINT_TES | \ 99bf215546Sopenharmony_ci NIR_DEBUG_PRINT_GS | \ 100bf215546Sopenharmony_ci NIR_DEBUG_PRINT_FS | \ 101bf215546Sopenharmony_ci NIR_DEBUG_PRINT_CS | \ 102bf215546Sopenharmony_ci NIR_DEBUG_PRINT_TS | \ 103bf215546Sopenharmony_ci NIR_DEBUG_PRINT_MS | \ 104bf215546Sopenharmony_ci NIR_DEBUG_PRINT_RGS | \ 105bf215546Sopenharmony_ci NIR_DEBUG_PRINT_AHS | \ 106bf215546Sopenharmony_ci NIR_DEBUG_PRINT_CHS | \ 107bf215546Sopenharmony_ci NIR_DEBUG_PRINT_MHS | \ 108bf215546Sopenharmony_ci NIR_DEBUG_PRINT_IS | \ 109bf215546Sopenharmony_ci NIR_DEBUG_PRINT_CBS | \ 110bf215546Sopenharmony_ci NIR_DEBUG_PRINT_KS) 111bf215546Sopenharmony_ci 112bf215546Sopenharmony_ci#define NIR_FALSE 0u 113bf215546Sopenharmony_ci#define NIR_TRUE (~0u) 114bf215546Sopenharmony_ci#define NIR_MAX_VEC_COMPONENTS 16 115bf215546Sopenharmony_ci#define NIR_MAX_MATRIX_COLUMNS 4 116bf215546Sopenharmony_ci#define NIR_STREAM_PACKED (1 << 8) 117bf215546Sopenharmony_citypedef uint16_t nir_component_mask_t; 118bf215546Sopenharmony_ci 119bf215546Sopenharmony_cistatic inline bool 120bf215546Sopenharmony_cinir_num_components_valid(unsigned num_components) 121bf215546Sopenharmony_ci{ 122bf215546Sopenharmony_ci return (num_components >= 1 && 123bf215546Sopenharmony_ci num_components <= 5) || 124bf215546Sopenharmony_ci num_components == 8 || 125bf215546Sopenharmony_ci num_components == 16; 126bf215546Sopenharmony_ci} 127bf215546Sopenharmony_ci 128bf215546Sopenharmony_cistatic inline nir_component_mask_t 129bf215546Sopenharmony_cinir_component_mask(unsigned num_components) 130bf215546Sopenharmony_ci{ 131bf215546Sopenharmony_ci assert(nir_num_components_valid(num_components)); 132bf215546Sopenharmony_ci return (1u << num_components) - 1; 133bf215546Sopenharmony_ci} 134bf215546Sopenharmony_ci 135bf215546Sopenharmony_civoid 136bf215546Sopenharmony_cinir_process_debug_variable(void); 137bf215546Sopenharmony_ci 138bf215546Sopenharmony_cibool nir_component_mask_can_reinterpret(nir_component_mask_t mask, 139bf215546Sopenharmony_ci unsigned old_bit_size, 140bf215546Sopenharmony_ci unsigned new_bit_size); 141bf215546Sopenharmony_cinir_component_mask_t 142bf215546Sopenharmony_cinir_component_mask_reinterpret(nir_component_mask_t mask, 143bf215546Sopenharmony_ci unsigned old_bit_size, 144bf215546Sopenharmony_ci unsigned new_bit_size); 145bf215546Sopenharmony_ci 146bf215546Sopenharmony_ci/** Defines a cast function 147bf215546Sopenharmony_ci * 148bf215546Sopenharmony_ci * This macro defines a cast function from in_type to out_type where 149bf215546Sopenharmony_ci * out_type is some structure type that contains a field of type out_type. 150bf215546Sopenharmony_ci * 151bf215546Sopenharmony_ci * Note that you have to be a bit careful as the generated cast function 152bf215546Sopenharmony_ci * destroys constness. 153bf215546Sopenharmony_ci */ 154bf215546Sopenharmony_ci#define NIR_DEFINE_CAST(name, in_type, out_type, field, \ 155bf215546Sopenharmony_ci type_field, type_value) \ 156bf215546Sopenharmony_cistatic inline out_type * \ 157bf215546Sopenharmony_ciname(const in_type *parent) \ 158bf215546Sopenharmony_ci{ \ 159bf215546Sopenharmony_ci assert(parent && parent->type_field == type_value); \ 160bf215546Sopenharmony_ci return exec_node_data(out_type, parent, field); \ 161bf215546Sopenharmony_ci} 162bf215546Sopenharmony_ci 163bf215546Sopenharmony_cistruct nir_function; 164bf215546Sopenharmony_cistruct nir_shader; 165bf215546Sopenharmony_cistruct nir_instr; 166bf215546Sopenharmony_cistruct nir_builder; 167bf215546Sopenharmony_cistruct nir_xfb_info; 168bf215546Sopenharmony_ci 169bf215546Sopenharmony_ci 170bf215546Sopenharmony_ci/** 171bf215546Sopenharmony_ci * Description of built-in state associated with a uniform 172bf215546Sopenharmony_ci * 173bf215546Sopenharmony_ci * \sa nir_variable::state_slots 174bf215546Sopenharmony_ci */ 175bf215546Sopenharmony_citypedef struct { 176bf215546Sopenharmony_ci gl_state_index16 tokens[STATE_LENGTH]; 177bf215546Sopenharmony_ci uint16_t swizzle; 178bf215546Sopenharmony_ci} nir_state_slot; 179bf215546Sopenharmony_ci 180bf215546Sopenharmony_citypedef enum { 181bf215546Sopenharmony_ci nir_var_system_value = (1 << 0), 182bf215546Sopenharmony_ci nir_var_uniform = (1 << 1), 183bf215546Sopenharmony_ci nir_var_shader_in = (1 << 2), 184bf215546Sopenharmony_ci nir_var_shader_out = (1 << 3), 185bf215546Sopenharmony_ci nir_var_image = (1 << 4), 186bf215546Sopenharmony_ci /** Incoming call or ray payload data for ray-tracing shaders */ 187bf215546Sopenharmony_ci nir_var_shader_call_data = (1 << 5), 188bf215546Sopenharmony_ci /** Ray hit attributes */ 189bf215546Sopenharmony_ci nir_var_ray_hit_attrib = (1 << 6), 190bf215546Sopenharmony_ci 191bf215546Sopenharmony_ci /* Modes named nir_var_mem_* have explicit data layout */ 192bf215546Sopenharmony_ci nir_var_mem_ubo = (1 << 7), 193bf215546Sopenharmony_ci nir_var_mem_push_const = (1 << 8), 194bf215546Sopenharmony_ci nir_var_mem_ssbo = (1 << 9), 195bf215546Sopenharmony_ci nir_var_mem_constant = (1 << 10), 196bf215546Sopenharmony_ci nir_var_mem_task_payload = (1 << 11), 197bf215546Sopenharmony_ci 198bf215546Sopenharmony_ci /* Generic modes intentionally come last. See encode_dref_modes() in 199bf215546Sopenharmony_ci * nir_serialize.c for more details. 200bf215546Sopenharmony_ci */ 201bf215546Sopenharmony_ci nir_var_shader_temp = (1 << 12), 202bf215546Sopenharmony_ci nir_var_function_temp = (1 << 13), 203bf215546Sopenharmony_ci nir_var_mem_shared = (1 << 14), 204bf215546Sopenharmony_ci nir_var_mem_global = (1 << 15), 205bf215546Sopenharmony_ci 206bf215546Sopenharmony_ci nir_var_mem_generic = (nir_var_shader_temp | 207bf215546Sopenharmony_ci nir_var_function_temp | 208bf215546Sopenharmony_ci nir_var_mem_shared | 209bf215546Sopenharmony_ci nir_var_mem_global), 210bf215546Sopenharmony_ci 211bf215546Sopenharmony_ci nir_var_read_only_modes = nir_var_shader_in | nir_var_uniform | 212bf215546Sopenharmony_ci nir_var_system_value | nir_var_mem_constant | 213bf215546Sopenharmony_ci nir_var_mem_ubo, 214bf215546Sopenharmony_ci /** Modes where vector derefs can be indexed as arrays */ 215bf215546Sopenharmony_ci nir_var_vec_indexable_modes = nir_var_mem_ubo | nir_var_mem_ssbo | 216bf215546Sopenharmony_ci nir_var_mem_shared | nir_var_mem_global | 217bf215546Sopenharmony_ci nir_var_mem_push_const, 218bf215546Sopenharmony_ci nir_num_variable_modes = 16, 219bf215546Sopenharmony_ci nir_var_all = (1 << nir_num_variable_modes) - 1, 220bf215546Sopenharmony_ci} nir_variable_mode; 221bf215546Sopenharmony_ciMESA_DEFINE_CPP_ENUM_BITFIELD_OPERATORS(nir_variable_mode) 222bf215546Sopenharmony_ci 223bf215546Sopenharmony_ci/** 224bf215546Sopenharmony_ci * Rounding modes. 225bf215546Sopenharmony_ci */ 226bf215546Sopenharmony_citypedef enum { 227bf215546Sopenharmony_ci nir_rounding_mode_undef = 0, 228bf215546Sopenharmony_ci nir_rounding_mode_rtne = 1, /* round to nearest even */ 229bf215546Sopenharmony_ci nir_rounding_mode_ru = 2, /* round up */ 230bf215546Sopenharmony_ci nir_rounding_mode_rd = 3, /* round down */ 231bf215546Sopenharmony_ci nir_rounding_mode_rtz = 4, /* round towards zero */ 232bf215546Sopenharmony_ci} nir_rounding_mode; 233bf215546Sopenharmony_ci 234bf215546Sopenharmony_citypedef union { 235bf215546Sopenharmony_ci bool b; 236bf215546Sopenharmony_ci float f32; 237bf215546Sopenharmony_ci double f64; 238bf215546Sopenharmony_ci int8_t i8; 239bf215546Sopenharmony_ci uint8_t u8; 240bf215546Sopenharmony_ci int16_t i16; 241bf215546Sopenharmony_ci uint16_t u16; 242bf215546Sopenharmony_ci int32_t i32; 243bf215546Sopenharmony_ci uint32_t u32; 244bf215546Sopenharmony_ci int64_t i64; 245bf215546Sopenharmony_ci uint64_t u64; 246bf215546Sopenharmony_ci} nir_const_value; 247bf215546Sopenharmony_ci 248bf215546Sopenharmony_ci#define nir_const_value_to_array(arr, c, components, m) \ 249bf215546Sopenharmony_ci{ \ 250bf215546Sopenharmony_ci for (unsigned i = 0; i < components; ++i) \ 251bf215546Sopenharmony_ci arr[i] = c[i].m; \ 252bf215546Sopenharmony_ci} while (false) 253bf215546Sopenharmony_ci 254bf215546Sopenharmony_cistatic inline nir_const_value 255bf215546Sopenharmony_cinir_const_value_for_raw_uint(uint64_t x, unsigned bit_size) 256bf215546Sopenharmony_ci{ 257bf215546Sopenharmony_ci nir_const_value v; 258bf215546Sopenharmony_ci memset(&v, 0, sizeof(v)); 259bf215546Sopenharmony_ci 260bf215546Sopenharmony_ci switch (bit_size) { 261bf215546Sopenharmony_ci case 1: v.b = x; break; 262bf215546Sopenharmony_ci case 8: v.u8 = x; break; 263bf215546Sopenharmony_ci case 16: v.u16 = x; break; 264bf215546Sopenharmony_ci case 32: v.u32 = x; break; 265bf215546Sopenharmony_ci case 64: v.u64 = x; break; 266bf215546Sopenharmony_ci default: 267bf215546Sopenharmony_ci unreachable("Invalid bit size"); 268bf215546Sopenharmony_ci } 269bf215546Sopenharmony_ci 270bf215546Sopenharmony_ci return v; 271bf215546Sopenharmony_ci} 272bf215546Sopenharmony_ci 273bf215546Sopenharmony_cistatic inline nir_const_value 274bf215546Sopenharmony_cinir_const_value_for_int(int64_t i, unsigned bit_size) 275bf215546Sopenharmony_ci{ 276bf215546Sopenharmony_ci nir_const_value v; 277bf215546Sopenharmony_ci memset(&v, 0, sizeof(v)); 278bf215546Sopenharmony_ci 279bf215546Sopenharmony_ci assert(bit_size <= 64); 280bf215546Sopenharmony_ci if (bit_size < 64) { 281bf215546Sopenharmony_ci assert(i >= (-(1ll << (bit_size - 1)))); 282bf215546Sopenharmony_ci assert(i < (1ll << (bit_size - 1))); 283bf215546Sopenharmony_ci } 284bf215546Sopenharmony_ci 285bf215546Sopenharmony_ci return nir_const_value_for_raw_uint(i, bit_size); 286bf215546Sopenharmony_ci} 287bf215546Sopenharmony_ci 288bf215546Sopenharmony_cistatic inline nir_const_value 289bf215546Sopenharmony_cinir_const_value_for_uint(uint64_t u, unsigned bit_size) 290bf215546Sopenharmony_ci{ 291bf215546Sopenharmony_ci nir_const_value v; 292bf215546Sopenharmony_ci memset(&v, 0, sizeof(v)); 293bf215546Sopenharmony_ci 294bf215546Sopenharmony_ci assert(bit_size <= 64); 295bf215546Sopenharmony_ci if (bit_size < 64) 296bf215546Sopenharmony_ci assert(u < (1ull << bit_size)); 297bf215546Sopenharmony_ci 298bf215546Sopenharmony_ci return nir_const_value_for_raw_uint(u, bit_size); 299bf215546Sopenharmony_ci} 300bf215546Sopenharmony_ci 301bf215546Sopenharmony_cistatic inline nir_const_value 302bf215546Sopenharmony_cinir_const_value_for_bool(bool b, unsigned bit_size) 303bf215546Sopenharmony_ci{ 304bf215546Sopenharmony_ci /* Booleans use a 0/-1 convention */ 305bf215546Sopenharmony_ci return nir_const_value_for_int(-(int)b, bit_size); 306bf215546Sopenharmony_ci} 307bf215546Sopenharmony_ci 308bf215546Sopenharmony_ci/* This one isn't inline because it requires half-float conversion */ 309bf215546Sopenharmony_cinir_const_value nir_const_value_for_float(double b, unsigned bit_size); 310bf215546Sopenharmony_ci 311bf215546Sopenharmony_cistatic inline int64_t 312bf215546Sopenharmony_cinir_const_value_as_int(nir_const_value value, unsigned bit_size) 313bf215546Sopenharmony_ci{ 314bf215546Sopenharmony_ci switch (bit_size) { 315bf215546Sopenharmony_ci /* int1_t uses 0/-1 convention */ 316bf215546Sopenharmony_ci case 1: return -(int)value.b; 317bf215546Sopenharmony_ci case 8: return value.i8; 318bf215546Sopenharmony_ci case 16: return value.i16; 319bf215546Sopenharmony_ci case 32: return value.i32; 320bf215546Sopenharmony_ci case 64: return value.i64; 321bf215546Sopenharmony_ci default: 322bf215546Sopenharmony_ci unreachable("Invalid bit size"); 323bf215546Sopenharmony_ci } 324bf215546Sopenharmony_ci} 325bf215546Sopenharmony_ci 326bf215546Sopenharmony_cistatic inline uint64_t 327bf215546Sopenharmony_cinir_const_value_as_uint(nir_const_value value, unsigned bit_size) 328bf215546Sopenharmony_ci{ 329bf215546Sopenharmony_ci switch (bit_size) { 330bf215546Sopenharmony_ci case 1: return value.b; 331bf215546Sopenharmony_ci case 8: return value.u8; 332bf215546Sopenharmony_ci case 16: return value.u16; 333bf215546Sopenharmony_ci case 32: return value.u32; 334bf215546Sopenharmony_ci case 64: return value.u64; 335bf215546Sopenharmony_ci default: 336bf215546Sopenharmony_ci unreachable("Invalid bit size"); 337bf215546Sopenharmony_ci } 338bf215546Sopenharmony_ci} 339bf215546Sopenharmony_ci 340bf215546Sopenharmony_cistatic inline bool 341bf215546Sopenharmony_cinir_const_value_as_bool(nir_const_value value, unsigned bit_size) 342bf215546Sopenharmony_ci{ 343bf215546Sopenharmony_ci int64_t i = nir_const_value_as_int(value, bit_size); 344bf215546Sopenharmony_ci 345bf215546Sopenharmony_ci /* Booleans of any size use 0/-1 convention */ 346bf215546Sopenharmony_ci assert(i == 0 || i == -1); 347bf215546Sopenharmony_ci 348bf215546Sopenharmony_ci return i; 349bf215546Sopenharmony_ci} 350bf215546Sopenharmony_ci 351bf215546Sopenharmony_ci/* This one isn't inline because it requires half-float conversion */ 352bf215546Sopenharmony_cidouble nir_const_value_as_float(nir_const_value value, unsigned bit_size); 353bf215546Sopenharmony_ci 354bf215546Sopenharmony_citypedef struct nir_constant { 355bf215546Sopenharmony_ci /** 356bf215546Sopenharmony_ci * Value of the constant. 357bf215546Sopenharmony_ci * 358bf215546Sopenharmony_ci * The field used to back the values supplied by the constant is determined 359bf215546Sopenharmony_ci * by the type associated with the \c nir_variable. Constants may be 360bf215546Sopenharmony_ci * scalars, vectors, or matrices. 361bf215546Sopenharmony_ci */ 362bf215546Sopenharmony_ci nir_const_value values[NIR_MAX_VEC_COMPONENTS]; 363bf215546Sopenharmony_ci 364bf215546Sopenharmony_ci /* we could get this from the var->type but makes clone *much* easier to 365bf215546Sopenharmony_ci * not have to care about the type. 366bf215546Sopenharmony_ci */ 367bf215546Sopenharmony_ci unsigned num_elements; 368bf215546Sopenharmony_ci 369bf215546Sopenharmony_ci /* Array elements / Structure Fields */ 370bf215546Sopenharmony_ci struct nir_constant **elements; 371bf215546Sopenharmony_ci} nir_constant; 372bf215546Sopenharmony_ci 373bf215546Sopenharmony_ci/** 374bf215546Sopenharmony_ci * \brief Layout qualifiers for gl_FragDepth. 375bf215546Sopenharmony_ci * 376bf215546Sopenharmony_ci * The AMD/ARB_conservative_depth extensions allow gl_FragDepth to be redeclared 377bf215546Sopenharmony_ci * with a layout qualifier. 378bf215546Sopenharmony_ci */ 379bf215546Sopenharmony_citypedef enum { 380bf215546Sopenharmony_ci nir_depth_layout_none, /**< No depth layout is specified. */ 381bf215546Sopenharmony_ci nir_depth_layout_any, 382bf215546Sopenharmony_ci nir_depth_layout_greater, 383bf215546Sopenharmony_ci nir_depth_layout_less, 384bf215546Sopenharmony_ci nir_depth_layout_unchanged 385bf215546Sopenharmony_ci} nir_depth_layout; 386bf215546Sopenharmony_ci 387bf215546Sopenharmony_ci/** 388bf215546Sopenharmony_ci * Enum keeping track of how a variable was declared. 389bf215546Sopenharmony_ci */ 390bf215546Sopenharmony_citypedef enum { 391bf215546Sopenharmony_ci /** 392bf215546Sopenharmony_ci * Normal declaration. 393bf215546Sopenharmony_ci */ 394bf215546Sopenharmony_ci nir_var_declared_normally = 0, 395bf215546Sopenharmony_ci 396bf215546Sopenharmony_ci /** 397bf215546Sopenharmony_ci * Variable is implicitly generated by the compiler and should not be 398bf215546Sopenharmony_ci * visible via the API. 399bf215546Sopenharmony_ci */ 400bf215546Sopenharmony_ci nir_var_hidden, 401bf215546Sopenharmony_ci} nir_var_declaration_type; 402bf215546Sopenharmony_ci 403bf215546Sopenharmony_ci/** 404bf215546Sopenharmony_ci * Either a uniform, global variable, shader input, or shader output. Based on 405bf215546Sopenharmony_ci * ir_variable - it should be easy to translate between the two. 406bf215546Sopenharmony_ci */ 407bf215546Sopenharmony_ci 408bf215546Sopenharmony_citypedef struct nir_variable { 409bf215546Sopenharmony_ci struct exec_node node; 410bf215546Sopenharmony_ci 411bf215546Sopenharmony_ci /** 412bf215546Sopenharmony_ci * Declared type of the variable 413bf215546Sopenharmony_ci */ 414bf215546Sopenharmony_ci const struct glsl_type *type; 415bf215546Sopenharmony_ci 416bf215546Sopenharmony_ci /** 417bf215546Sopenharmony_ci * Declared name of the variable 418bf215546Sopenharmony_ci */ 419bf215546Sopenharmony_ci char *name; 420bf215546Sopenharmony_ci 421bf215546Sopenharmony_ci struct nir_variable_data { 422bf215546Sopenharmony_ci /** 423bf215546Sopenharmony_ci * Storage class of the variable. 424bf215546Sopenharmony_ci * 425bf215546Sopenharmony_ci * \sa nir_variable_mode 426bf215546Sopenharmony_ci */ 427bf215546Sopenharmony_ci unsigned mode:16; 428bf215546Sopenharmony_ci 429bf215546Sopenharmony_ci /** 430bf215546Sopenharmony_ci * Is the variable read-only? 431bf215546Sopenharmony_ci * 432bf215546Sopenharmony_ci * This is set for variables declared as \c const, shader inputs, 433bf215546Sopenharmony_ci * and uniforms. 434bf215546Sopenharmony_ci */ 435bf215546Sopenharmony_ci unsigned read_only:1; 436bf215546Sopenharmony_ci unsigned centroid:1; 437bf215546Sopenharmony_ci unsigned sample:1; 438bf215546Sopenharmony_ci unsigned patch:1; 439bf215546Sopenharmony_ci unsigned invariant:1; 440bf215546Sopenharmony_ci 441bf215546Sopenharmony_ci /** 442bf215546Sopenharmony_ci * Is the variable a ray query? 443bf215546Sopenharmony_ci */ 444bf215546Sopenharmony_ci unsigned ray_query:1; 445bf215546Sopenharmony_ci 446bf215546Sopenharmony_ci /** 447bf215546Sopenharmony_ci * Precision qualifier. 448bf215546Sopenharmony_ci * 449bf215546Sopenharmony_ci * In desktop GLSL we do not care about precision qualifiers at all, in 450bf215546Sopenharmony_ci * fact, the spec says that precision qualifiers are ignored. 451bf215546Sopenharmony_ci * 452bf215546Sopenharmony_ci * To make things easy, we make it so that this field is always 453bf215546Sopenharmony_ci * GLSL_PRECISION_NONE on desktop shaders. This way all the variables 454bf215546Sopenharmony_ci * have the same precision value and the checks we add in the compiler 455bf215546Sopenharmony_ci * for this field will never break a desktop shader compile. 456bf215546Sopenharmony_ci */ 457bf215546Sopenharmony_ci unsigned precision:2; 458bf215546Sopenharmony_ci 459bf215546Sopenharmony_ci /** 460bf215546Sopenharmony_ci * Has this variable been statically assigned? 461bf215546Sopenharmony_ci * 462bf215546Sopenharmony_ci * This answers whether the variable was assigned in any path of 463bf215546Sopenharmony_ci * the shader during ast_to_hir. This doesn't answer whether it is 464bf215546Sopenharmony_ci * still written after dead code removal, nor is it maintained in 465bf215546Sopenharmony_ci * non-ast_to_hir.cpp (GLSL parsing) paths. 466bf215546Sopenharmony_ci */ 467bf215546Sopenharmony_ci unsigned assigned:1; 468bf215546Sopenharmony_ci 469bf215546Sopenharmony_ci /** 470bf215546Sopenharmony_ci * Can this variable be coalesced with another? 471bf215546Sopenharmony_ci * 472bf215546Sopenharmony_ci * This is set by nir_lower_io_to_temporaries to say that any 473bf215546Sopenharmony_ci * copies involving this variable should stay put. Propagating it can 474bf215546Sopenharmony_ci * duplicate the resulting load/store, which is not wanted, and may 475bf215546Sopenharmony_ci * result in a load/store of the variable with an indirect offset which 476bf215546Sopenharmony_ci * the backend may not be able to handle. 477bf215546Sopenharmony_ci */ 478bf215546Sopenharmony_ci unsigned cannot_coalesce:1; 479bf215546Sopenharmony_ci 480bf215546Sopenharmony_ci /** 481bf215546Sopenharmony_ci * When separate shader programs are enabled, only input/outputs between 482bf215546Sopenharmony_ci * the stages of a multi-stage separate program can be safely removed 483bf215546Sopenharmony_ci * from the shader interface. Other input/outputs must remains active. 484bf215546Sopenharmony_ci * 485bf215546Sopenharmony_ci * This is also used to make sure xfb varyings that are unused by the 486bf215546Sopenharmony_ci * fragment shader are not removed. 487bf215546Sopenharmony_ci */ 488bf215546Sopenharmony_ci unsigned always_active_io:1; 489bf215546Sopenharmony_ci 490bf215546Sopenharmony_ci /** 491bf215546Sopenharmony_ci * Interpolation mode for shader inputs / outputs 492bf215546Sopenharmony_ci * 493bf215546Sopenharmony_ci * \sa glsl_interp_mode 494bf215546Sopenharmony_ci */ 495bf215546Sopenharmony_ci unsigned interpolation:3; 496bf215546Sopenharmony_ci 497bf215546Sopenharmony_ci /** 498bf215546Sopenharmony_ci * If non-zero, then this variable may be packed along with other variables 499bf215546Sopenharmony_ci * into a single varying slot, so this offset should be applied when 500bf215546Sopenharmony_ci * accessing components. For example, an offset of 1 means that the x 501bf215546Sopenharmony_ci * component of this variable is actually stored in component y of the 502bf215546Sopenharmony_ci * location specified by \c location. 503bf215546Sopenharmony_ci */ 504bf215546Sopenharmony_ci unsigned location_frac:2; 505bf215546Sopenharmony_ci 506bf215546Sopenharmony_ci /** 507bf215546Sopenharmony_ci * If true, this variable represents an array of scalars that should 508bf215546Sopenharmony_ci * be tightly packed. In other words, consecutive array elements 509bf215546Sopenharmony_ci * should be stored one component apart, rather than one slot apart. 510bf215546Sopenharmony_ci */ 511bf215546Sopenharmony_ci unsigned compact:1; 512bf215546Sopenharmony_ci 513bf215546Sopenharmony_ci /** 514bf215546Sopenharmony_ci * Whether this is a fragment shader output implicitly initialized with 515bf215546Sopenharmony_ci * the previous contents of the specified render target at the 516bf215546Sopenharmony_ci * framebuffer location corresponding to this shader invocation. 517bf215546Sopenharmony_ci */ 518bf215546Sopenharmony_ci unsigned fb_fetch_output:1; 519bf215546Sopenharmony_ci 520bf215546Sopenharmony_ci /** 521bf215546Sopenharmony_ci * Non-zero if this variable is considered bindless as defined by 522bf215546Sopenharmony_ci * ARB_bindless_texture. 523bf215546Sopenharmony_ci */ 524bf215546Sopenharmony_ci unsigned bindless:1; 525bf215546Sopenharmony_ci 526bf215546Sopenharmony_ci /** 527bf215546Sopenharmony_ci * Was an explicit binding set in the shader? 528bf215546Sopenharmony_ci */ 529bf215546Sopenharmony_ci unsigned explicit_binding:1; 530bf215546Sopenharmony_ci 531bf215546Sopenharmony_ci /** 532bf215546Sopenharmony_ci * Was the location explicitly set in the shader? 533bf215546Sopenharmony_ci * 534bf215546Sopenharmony_ci * If the location is explicitly set in the shader, it \b cannot be changed 535bf215546Sopenharmony_ci * by the linker or by the API (e.g., calls to \c glBindAttribLocation have 536bf215546Sopenharmony_ci * no effect). 537bf215546Sopenharmony_ci */ 538bf215546Sopenharmony_ci unsigned explicit_location:1; 539bf215546Sopenharmony_ci 540bf215546Sopenharmony_ci /** 541bf215546Sopenharmony_ci * Is this varying used by transform feedback? 542bf215546Sopenharmony_ci * 543bf215546Sopenharmony_ci * This is used by the linker to decide if it's safe to pack the varying. 544bf215546Sopenharmony_ci */ 545bf215546Sopenharmony_ci unsigned is_xfb:1; 546bf215546Sopenharmony_ci 547bf215546Sopenharmony_ci /** 548bf215546Sopenharmony_ci * Is this varying used only by transform feedback? 549bf215546Sopenharmony_ci * 550bf215546Sopenharmony_ci * This is used by the linker to decide if its safe to pack the varying. 551bf215546Sopenharmony_ci */ 552bf215546Sopenharmony_ci unsigned is_xfb_only:1; 553bf215546Sopenharmony_ci 554bf215546Sopenharmony_ci /** 555bf215546Sopenharmony_ci * Was a transfer feedback buffer set in the shader? 556bf215546Sopenharmony_ci */ 557bf215546Sopenharmony_ci unsigned explicit_xfb_buffer:1; 558bf215546Sopenharmony_ci 559bf215546Sopenharmony_ci /** 560bf215546Sopenharmony_ci * Was a transfer feedback stride set in the shader? 561bf215546Sopenharmony_ci */ 562bf215546Sopenharmony_ci unsigned explicit_xfb_stride:1; 563bf215546Sopenharmony_ci 564bf215546Sopenharmony_ci /** 565bf215546Sopenharmony_ci * Was an explicit offset set in the shader? 566bf215546Sopenharmony_ci */ 567bf215546Sopenharmony_ci unsigned explicit_offset:1; 568bf215546Sopenharmony_ci 569bf215546Sopenharmony_ci /** 570bf215546Sopenharmony_ci * Layout of the matrix. Uses glsl_matrix_layout values. 571bf215546Sopenharmony_ci */ 572bf215546Sopenharmony_ci unsigned matrix_layout:2; 573bf215546Sopenharmony_ci 574bf215546Sopenharmony_ci /** 575bf215546Sopenharmony_ci * Non-zero if this variable was created by lowering a named interface 576bf215546Sopenharmony_ci * block. 577bf215546Sopenharmony_ci */ 578bf215546Sopenharmony_ci unsigned from_named_ifc_block:1; 579bf215546Sopenharmony_ci 580bf215546Sopenharmony_ci /** 581bf215546Sopenharmony_ci * Non-zero if the variable must be a shader input. This is useful for 582bf215546Sopenharmony_ci * constraints on function parameters. 583bf215546Sopenharmony_ci */ 584bf215546Sopenharmony_ci unsigned must_be_shader_input:1; 585bf215546Sopenharmony_ci 586bf215546Sopenharmony_ci /** 587bf215546Sopenharmony_ci * How the variable was declared. See nir_var_declaration_type. 588bf215546Sopenharmony_ci * 589bf215546Sopenharmony_ci * This is used to detect variables generated by the compiler, so should 590bf215546Sopenharmony_ci * not be visible via the API. 591bf215546Sopenharmony_ci */ 592bf215546Sopenharmony_ci unsigned how_declared:2; 593bf215546Sopenharmony_ci 594bf215546Sopenharmony_ci /** 595bf215546Sopenharmony_ci * Is this variable per-view? If so, we know it must be an array with 596bf215546Sopenharmony_ci * size corresponding to the number of views. 597bf215546Sopenharmony_ci */ 598bf215546Sopenharmony_ci unsigned per_view:1; 599bf215546Sopenharmony_ci 600bf215546Sopenharmony_ci /** 601bf215546Sopenharmony_ci * Whether the variable is per-primitive. 602bf215546Sopenharmony_ci * Can be use by Mesh Shader outputs and corresponding Fragment Shader inputs. 603bf215546Sopenharmony_ci */ 604bf215546Sopenharmony_ci unsigned per_primitive:1; 605bf215546Sopenharmony_ci 606bf215546Sopenharmony_ci /** 607bf215546Sopenharmony_ci * \brief Layout qualifier for gl_FragDepth. See nir_depth_layout. 608bf215546Sopenharmony_ci * 609bf215546Sopenharmony_ci * This is not equal to \c ir_depth_layout_none if and only if this 610bf215546Sopenharmony_ci * variable is \c gl_FragDepth and a layout qualifier is specified. 611bf215546Sopenharmony_ci */ 612bf215546Sopenharmony_ci unsigned depth_layout:3; 613bf215546Sopenharmony_ci 614bf215546Sopenharmony_ci /** 615bf215546Sopenharmony_ci * Vertex stream output identifier. 616bf215546Sopenharmony_ci * 617bf215546Sopenharmony_ci * For packed outputs, NIR_STREAM_PACKED is set and bits [2*i+1,2*i] 618bf215546Sopenharmony_ci * indicate the stream of the i-th component. 619bf215546Sopenharmony_ci */ 620bf215546Sopenharmony_ci unsigned stream:9; 621bf215546Sopenharmony_ci 622bf215546Sopenharmony_ci /** 623bf215546Sopenharmony_ci * See gl_access_qualifier. 624bf215546Sopenharmony_ci * 625bf215546Sopenharmony_ci * Access flags for memory variables (SSBO/global), image uniforms, and 626bf215546Sopenharmony_ci * bindless images in uniforms/inputs/outputs. 627bf215546Sopenharmony_ci */ 628bf215546Sopenharmony_ci unsigned access:9; 629bf215546Sopenharmony_ci 630bf215546Sopenharmony_ci /** 631bf215546Sopenharmony_ci * Descriptor set binding for sampler or UBO. 632bf215546Sopenharmony_ci */ 633bf215546Sopenharmony_ci unsigned descriptor_set:5; 634bf215546Sopenharmony_ci 635bf215546Sopenharmony_ci /** 636bf215546Sopenharmony_ci * output index for dual source blending. 637bf215546Sopenharmony_ci */ 638bf215546Sopenharmony_ci unsigned index; 639bf215546Sopenharmony_ci 640bf215546Sopenharmony_ci /** 641bf215546Sopenharmony_ci * Initial binding point for a sampler or UBO. 642bf215546Sopenharmony_ci * 643bf215546Sopenharmony_ci * For array types, this represents the binding point for the first element. 644bf215546Sopenharmony_ci */ 645bf215546Sopenharmony_ci unsigned binding; 646bf215546Sopenharmony_ci 647bf215546Sopenharmony_ci /** 648bf215546Sopenharmony_ci * Storage location of the base of this variable 649bf215546Sopenharmony_ci * 650bf215546Sopenharmony_ci * The precise meaning of this field depends on the nature of the variable. 651bf215546Sopenharmony_ci * 652bf215546Sopenharmony_ci * - Vertex shader input: one of the values from \c gl_vert_attrib. 653bf215546Sopenharmony_ci * - Vertex shader output: one of the values from \c gl_varying_slot. 654bf215546Sopenharmony_ci * - Geometry shader input: one of the values from \c gl_varying_slot. 655bf215546Sopenharmony_ci * - Geometry shader output: one of the values from \c gl_varying_slot. 656bf215546Sopenharmony_ci * - Fragment shader input: one of the values from \c gl_varying_slot. 657bf215546Sopenharmony_ci * - Fragment shader output: one of the values from \c gl_frag_result. 658bf215546Sopenharmony_ci * - Task shader output: one of the values from \c gl_varying_slot. 659bf215546Sopenharmony_ci * - Mesh shader input: one of the values from \c gl_varying_slot. 660bf215546Sopenharmony_ci * - Mesh shader output: one of the values from \c gl_varying_slot. 661bf215546Sopenharmony_ci * - Uniforms: Per-stage uniform slot number for default uniform block. 662bf215546Sopenharmony_ci * - Uniforms: Index within the uniform block definition for UBO members. 663bf215546Sopenharmony_ci * - Non-UBO Uniforms: uniform slot number. 664bf215546Sopenharmony_ci * - Other: This field is not currently used. 665bf215546Sopenharmony_ci * 666bf215546Sopenharmony_ci * If the variable is a uniform, shader input, or shader output, and the 667bf215546Sopenharmony_ci * slot has not been assigned, the value will be -1. 668bf215546Sopenharmony_ci */ 669bf215546Sopenharmony_ci int location; 670bf215546Sopenharmony_ci 671bf215546Sopenharmony_ci /** 672bf215546Sopenharmony_ci * The actual location of the variable in the IR. Only valid for inputs, 673bf215546Sopenharmony_ci * outputs, uniforms (including samplers and images), and for UBO and SSBO 674bf215546Sopenharmony_ci * variables in GLSL. 675bf215546Sopenharmony_ci */ 676bf215546Sopenharmony_ci unsigned driver_location; 677bf215546Sopenharmony_ci 678bf215546Sopenharmony_ci /** 679bf215546Sopenharmony_ci * Location an atomic counter or transform feedback is stored at. 680bf215546Sopenharmony_ci */ 681bf215546Sopenharmony_ci unsigned offset; 682bf215546Sopenharmony_ci 683bf215546Sopenharmony_ci union { 684bf215546Sopenharmony_ci struct { 685bf215546Sopenharmony_ci /** Image internal format if specified explicitly, otherwise PIPE_FORMAT_NONE. */ 686bf215546Sopenharmony_ci enum pipe_format format; 687bf215546Sopenharmony_ci } image; 688bf215546Sopenharmony_ci 689bf215546Sopenharmony_ci struct { 690bf215546Sopenharmony_ci /** 691bf215546Sopenharmony_ci * For OpenCL inline samplers. See cl_sampler_addressing_mode and cl_sampler_filter_mode 692bf215546Sopenharmony_ci */ 693bf215546Sopenharmony_ci unsigned is_inline_sampler : 1; 694bf215546Sopenharmony_ci unsigned addressing_mode : 3; 695bf215546Sopenharmony_ci unsigned normalized_coordinates : 1; 696bf215546Sopenharmony_ci unsigned filter_mode : 1; 697bf215546Sopenharmony_ci } sampler; 698bf215546Sopenharmony_ci 699bf215546Sopenharmony_ci struct { 700bf215546Sopenharmony_ci /** 701bf215546Sopenharmony_ci * Transform feedback buffer. 702bf215546Sopenharmony_ci */ 703bf215546Sopenharmony_ci uint16_t buffer:2; 704bf215546Sopenharmony_ci 705bf215546Sopenharmony_ci /** 706bf215546Sopenharmony_ci * Transform feedback stride. 707bf215546Sopenharmony_ci */ 708bf215546Sopenharmony_ci uint16_t stride; 709bf215546Sopenharmony_ci } xfb; 710bf215546Sopenharmony_ci }; 711bf215546Sopenharmony_ci } data; 712bf215546Sopenharmony_ci 713bf215546Sopenharmony_ci /** 714bf215546Sopenharmony_ci * Identifier for this variable generated by nir_index_vars() that is unique 715bf215546Sopenharmony_ci * among other variables in the same exec_list. 716bf215546Sopenharmony_ci */ 717bf215546Sopenharmony_ci unsigned index; 718bf215546Sopenharmony_ci 719bf215546Sopenharmony_ci /* Number of nir_variable_data members */ 720bf215546Sopenharmony_ci uint16_t num_members; 721bf215546Sopenharmony_ci 722bf215546Sopenharmony_ci /** 723bf215546Sopenharmony_ci * Built-in state that backs this uniform 724bf215546Sopenharmony_ci * 725bf215546Sopenharmony_ci * Once set at variable creation, \c state_slots must remain invariant. 726bf215546Sopenharmony_ci * This is because, ideally, this array would be shared by all clones of 727bf215546Sopenharmony_ci * this variable in the IR tree. In other words, we'd really like for it 728bf215546Sopenharmony_ci * to be a fly-weight. 729bf215546Sopenharmony_ci * 730bf215546Sopenharmony_ci * If the variable is not a uniform, \c num_state_slots will be zero and 731bf215546Sopenharmony_ci * \c state_slots will be \c NULL. 732bf215546Sopenharmony_ci */ 733bf215546Sopenharmony_ci /*@{*/ 734bf215546Sopenharmony_ci uint16_t num_state_slots; /**< Number of state slots used */ 735bf215546Sopenharmony_ci nir_state_slot *state_slots; /**< State descriptors. */ 736bf215546Sopenharmony_ci /*@}*/ 737bf215546Sopenharmony_ci 738bf215546Sopenharmony_ci /** 739bf215546Sopenharmony_ci * Constant expression assigned in the initializer of the variable 740bf215546Sopenharmony_ci * 741bf215546Sopenharmony_ci * This field should only be used temporarily by creators of NIR shaders 742bf215546Sopenharmony_ci * and then nir_lower_variable_initializers can be used to get rid of them. 743bf215546Sopenharmony_ci * Most of the rest of NIR ignores this field or asserts that it's NULL. 744bf215546Sopenharmony_ci */ 745bf215546Sopenharmony_ci nir_constant *constant_initializer; 746bf215546Sopenharmony_ci 747bf215546Sopenharmony_ci /** 748bf215546Sopenharmony_ci * Global variable assigned in the initializer of the variable 749bf215546Sopenharmony_ci * This field should only be used temporarily by creators of NIR shaders 750bf215546Sopenharmony_ci * and then nir_lower_variable_initializers can be used to get rid of them. 751bf215546Sopenharmony_ci * Most of the rest of NIR ignores this field or asserts that it's NULL. 752bf215546Sopenharmony_ci */ 753bf215546Sopenharmony_ci struct nir_variable *pointer_initializer; 754bf215546Sopenharmony_ci 755bf215546Sopenharmony_ci /** 756bf215546Sopenharmony_ci * For variables that are in an interface block or are an instance of an 757bf215546Sopenharmony_ci * interface block, this is the \c GLSL_TYPE_INTERFACE type for that block. 758bf215546Sopenharmony_ci * 759bf215546Sopenharmony_ci * \sa ir_variable::location 760bf215546Sopenharmony_ci */ 761bf215546Sopenharmony_ci const struct glsl_type *interface_type; 762bf215546Sopenharmony_ci 763bf215546Sopenharmony_ci /** 764bf215546Sopenharmony_ci * Description of per-member data for per-member struct variables 765bf215546Sopenharmony_ci * 766bf215546Sopenharmony_ci * This is used for variables which are actually an amalgamation of 767bf215546Sopenharmony_ci * multiple entities such as a struct of built-in values or a struct of 768bf215546Sopenharmony_ci * inputs each with their own layout specifier. This is only allowed on 769bf215546Sopenharmony_ci * variables with a struct or array of array of struct type. 770bf215546Sopenharmony_ci */ 771bf215546Sopenharmony_ci struct nir_variable_data *members; 772bf215546Sopenharmony_ci} nir_variable; 773bf215546Sopenharmony_ci 774bf215546Sopenharmony_cistatic inline bool 775bf215546Sopenharmony_ci_nir_shader_variable_has_mode(nir_variable *var, unsigned modes) 776bf215546Sopenharmony_ci{ 777bf215546Sopenharmony_ci /* This isn't a shader variable */ 778bf215546Sopenharmony_ci assert(!(modes & nir_var_function_temp)); 779bf215546Sopenharmony_ci return var->data.mode & modes; 780bf215546Sopenharmony_ci} 781bf215546Sopenharmony_ci 782bf215546Sopenharmony_ci#define nir_foreach_variable_in_list(var, var_list) \ 783bf215546Sopenharmony_ci foreach_list_typed(nir_variable, var, node, var_list) 784bf215546Sopenharmony_ci 785bf215546Sopenharmony_ci#define nir_foreach_variable_in_list_safe(var, var_list) \ 786bf215546Sopenharmony_ci foreach_list_typed_safe(nir_variable, var, node, var_list) 787bf215546Sopenharmony_ci 788bf215546Sopenharmony_ci#define nir_foreach_variable_in_shader(var, shader) \ 789bf215546Sopenharmony_ci nir_foreach_variable_in_list(var, &(shader)->variables) 790bf215546Sopenharmony_ci 791bf215546Sopenharmony_ci#define nir_foreach_variable_in_shader_safe(var, shader) \ 792bf215546Sopenharmony_ci nir_foreach_variable_in_list_safe(var, &(shader)->variables) 793bf215546Sopenharmony_ci 794bf215546Sopenharmony_ci#define nir_foreach_variable_with_modes(var, shader, modes) \ 795bf215546Sopenharmony_ci nir_foreach_variable_in_shader(var, shader) \ 796bf215546Sopenharmony_ci if (_nir_shader_variable_has_mode(var, modes)) 797bf215546Sopenharmony_ci 798bf215546Sopenharmony_ci#define nir_foreach_variable_with_modes_safe(var, shader, modes) \ 799bf215546Sopenharmony_ci nir_foreach_variable_in_shader_safe(var, shader) \ 800bf215546Sopenharmony_ci if (_nir_shader_variable_has_mode(var, modes)) 801bf215546Sopenharmony_ci 802bf215546Sopenharmony_ci#define nir_foreach_shader_in_variable(var, shader) \ 803bf215546Sopenharmony_ci nir_foreach_variable_with_modes(var, shader, nir_var_shader_in) 804bf215546Sopenharmony_ci 805bf215546Sopenharmony_ci#define nir_foreach_shader_in_variable_safe(var, shader) \ 806bf215546Sopenharmony_ci nir_foreach_variable_with_modes_safe(var, shader, nir_var_shader_in) 807bf215546Sopenharmony_ci 808bf215546Sopenharmony_ci#define nir_foreach_shader_out_variable(var, shader) \ 809bf215546Sopenharmony_ci nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) 810bf215546Sopenharmony_ci 811bf215546Sopenharmony_ci#define nir_foreach_shader_out_variable_safe(var, shader) \ 812bf215546Sopenharmony_ci nir_foreach_variable_with_modes_safe(var, shader, nir_var_shader_out) 813bf215546Sopenharmony_ci 814bf215546Sopenharmony_ci#define nir_foreach_uniform_variable(var, shader) \ 815bf215546Sopenharmony_ci nir_foreach_variable_with_modes(var, shader, nir_var_uniform) 816bf215546Sopenharmony_ci 817bf215546Sopenharmony_ci#define nir_foreach_uniform_variable_safe(var, shader) \ 818bf215546Sopenharmony_ci nir_foreach_variable_with_modes_safe(var, shader, nir_var_uniform) 819bf215546Sopenharmony_ci 820bf215546Sopenharmony_ci#define nir_foreach_image_variable(var, shader) \ 821bf215546Sopenharmony_ci nir_foreach_variable_with_modes(var, shader, nir_var_image) 822bf215546Sopenharmony_ci 823bf215546Sopenharmony_ci#define nir_foreach_image_variable_safe(var, shader) \ 824bf215546Sopenharmony_ci nir_foreach_variable_with_modes_safe(var, shader, nir_var_image) 825bf215546Sopenharmony_ci 826bf215546Sopenharmony_cistatic inline bool 827bf215546Sopenharmony_cinir_variable_is_global(const nir_variable *var) 828bf215546Sopenharmony_ci{ 829bf215546Sopenharmony_ci return var->data.mode != nir_var_function_temp; 830bf215546Sopenharmony_ci} 831bf215546Sopenharmony_ci 832bf215546Sopenharmony_citypedef struct nir_register { 833bf215546Sopenharmony_ci struct exec_node node; 834bf215546Sopenharmony_ci 835bf215546Sopenharmony_ci unsigned num_components; /** < number of vector components */ 836bf215546Sopenharmony_ci unsigned num_array_elems; /** < size of array (0 for no array) */ 837bf215546Sopenharmony_ci 838bf215546Sopenharmony_ci /* The bit-size of each channel; must be one of 8, 16, 32, or 64 */ 839bf215546Sopenharmony_ci uint8_t bit_size; 840bf215546Sopenharmony_ci 841bf215546Sopenharmony_ci /** 842bf215546Sopenharmony_ci * True if this register may have different values in different SIMD 843bf215546Sopenharmony_ci * invocations of the shader. 844bf215546Sopenharmony_ci */ 845bf215546Sopenharmony_ci bool divergent; 846bf215546Sopenharmony_ci 847bf215546Sopenharmony_ci /** generic register index. */ 848bf215546Sopenharmony_ci unsigned index; 849bf215546Sopenharmony_ci 850bf215546Sopenharmony_ci /** set of nir_srcs where this register is used (read from) */ 851bf215546Sopenharmony_ci struct list_head uses; 852bf215546Sopenharmony_ci 853bf215546Sopenharmony_ci /** set of nir_dests where this register is defined (written to) */ 854bf215546Sopenharmony_ci struct list_head defs; 855bf215546Sopenharmony_ci 856bf215546Sopenharmony_ci /** set of nir_ifs where this register is used as a condition */ 857bf215546Sopenharmony_ci struct list_head if_uses; 858bf215546Sopenharmony_ci} nir_register; 859bf215546Sopenharmony_ci 860bf215546Sopenharmony_ci#define nir_foreach_register(reg, reg_list) \ 861bf215546Sopenharmony_ci foreach_list_typed(nir_register, reg, node, reg_list) 862bf215546Sopenharmony_ci#define nir_foreach_register_safe(reg, reg_list) \ 863bf215546Sopenharmony_ci foreach_list_typed_safe(nir_register, reg, node, reg_list) 864bf215546Sopenharmony_ci 865bf215546Sopenharmony_citypedef enum PACKED { 866bf215546Sopenharmony_ci nir_instr_type_alu, 867bf215546Sopenharmony_ci nir_instr_type_deref, 868bf215546Sopenharmony_ci nir_instr_type_call, 869bf215546Sopenharmony_ci nir_instr_type_tex, 870bf215546Sopenharmony_ci nir_instr_type_intrinsic, 871bf215546Sopenharmony_ci nir_instr_type_load_const, 872bf215546Sopenharmony_ci nir_instr_type_jump, 873bf215546Sopenharmony_ci nir_instr_type_ssa_undef, 874bf215546Sopenharmony_ci nir_instr_type_phi, 875bf215546Sopenharmony_ci nir_instr_type_parallel_copy, 876bf215546Sopenharmony_ci} nir_instr_type; 877bf215546Sopenharmony_ci 878bf215546Sopenharmony_citypedef struct nir_instr { 879bf215546Sopenharmony_ci struct exec_node node; 880bf215546Sopenharmony_ci struct list_head gc_node; 881bf215546Sopenharmony_ci struct nir_block *block; 882bf215546Sopenharmony_ci nir_instr_type type; 883bf215546Sopenharmony_ci 884bf215546Sopenharmony_ci /* A temporary for optimization and analysis passes to use for storing 885bf215546Sopenharmony_ci * flags. For instance, DCE uses this to store the "dead/live" info. 886bf215546Sopenharmony_ci */ 887bf215546Sopenharmony_ci uint8_t pass_flags; 888bf215546Sopenharmony_ci 889bf215546Sopenharmony_ci /** generic instruction index. */ 890bf215546Sopenharmony_ci uint32_t index; 891bf215546Sopenharmony_ci} nir_instr; 892bf215546Sopenharmony_ci 893bf215546Sopenharmony_cistatic inline nir_instr * 894bf215546Sopenharmony_cinir_instr_next(nir_instr *instr) 895bf215546Sopenharmony_ci{ 896bf215546Sopenharmony_ci struct exec_node *next = exec_node_get_next(&instr->node); 897bf215546Sopenharmony_ci if (exec_node_is_tail_sentinel(next)) 898bf215546Sopenharmony_ci return NULL; 899bf215546Sopenharmony_ci else 900bf215546Sopenharmony_ci return exec_node_data(nir_instr, next, node); 901bf215546Sopenharmony_ci} 902bf215546Sopenharmony_ci 903bf215546Sopenharmony_cistatic inline nir_instr * 904bf215546Sopenharmony_cinir_instr_prev(nir_instr *instr) 905bf215546Sopenharmony_ci{ 906bf215546Sopenharmony_ci struct exec_node *prev = exec_node_get_prev(&instr->node); 907bf215546Sopenharmony_ci if (exec_node_is_head_sentinel(prev)) 908bf215546Sopenharmony_ci return NULL; 909bf215546Sopenharmony_ci else 910bf215546Sopenharmony_ci return exec_node_data(nir_instr, prev, node); 911bf215546Sopenharmony_ci} 912bf215546Sopenharmony_ci 913bf215546Sopenharmony_cistatic inline bool 914bf215546Sopenharmony_cinir_instr_is_first(const nir_instr *instr) 915bf215546Sopenharmony_ci{ 916bf215546Sopenharmony_ci return exec_node_is_head_sentinel(exec_node_get_prev_const(&instr->node)); 917bf215546Sopenharmony_ci} 918bf215546Sopenharmony_ci 919bf215546Sopenharmony_cistatic inline bool 920bf215546Sopenharmony_cinir_instr_is_last(const nir_instr *instr) 921bf215546Sopenharmony_ci{ 922bf215546Sopenharmony_ci return exec_node_is_tail_sentinel(exec_node_get_next_const(&instr->node)); 923bf215546Sopenharmony_ci} 924bf215546Sopenharmony_ci 925bf215546Sopenharmony_citypedef struct nir_ssa_def { 926bf215546Sopenharmony_ci /** Instruction which produces this SSA value. */ 927bf215546Sopenharmony_ci nir_instr *parent_instr; 928bf215546Sopenharmony_ci 929bf215546Sopenharmony_ci /** set of nir_instrs where this register is used (read from) */ 930bf215546Sopenharmony_ci struct list_head uses; 931bf215546Sopenharmony_ci 932bf215546Sopenharmony_ci /** set of nir_ifs where this register is used as a condition */ 933bf215546Sopenharmony_ci struct list_head if_uses; 934bf215546Sopenharmony_ci 935bf215546Sopenharmony_ci /** generic SSA definition index. */ 936bf215546Sopenharmony_ci unsigned index; 937bf215546Sopenharmony_ci 938bf215546Sopenharmony_ci uint8_t num_components; 939bf215546Sopenharmony_ci 940bf215546Sopenharmony_ci /* The bit-size of each channel; must be one of 8, 16, 32, or 64 */ 941bf215546Sopenharmony_ci uint8_t bit_size; 942bf215546Sopenharmony_ci 943bf215546Sopenharmony_ci /** 944bf215546Sopenharmony_ci * True if this SSA value may have different values in different SIMD 945bf215546Sopenharmony_ci * invocations of the shader. This is set by nir_divergence_analysis. 946bf215546Sopenharmony_ci */ 947bf215546Sopenharmony_ci bool divergent; 948bf215546Sopenharmony_ci} nir_ssa_def; 949bf215546Sopenharmony_ci 950bf215546Sopenharmony_cistruct nir_src; 951bf215546Sopenharmony_ci 952bf215546Sopenharmony_citypedef struct { 953bf215546Sopenharmony_ci nir_register *reg; 954bf215546Sopenharmony_ci struct nir_src *indirect; /** < NULL for no indirect offset */ 955bf215546Sopenharmony_ci unsigned base_offset; 956bf215546Sopenharmony_ci 957bf215546Sopenharmony_ci /* TODO use-def chain goes here */ 958bf215546Sopenharmony_ci} nir_reg_src; 959bf215546Sopenharmony_ci 960bf215546Sopenharmony_citypedef struct { 961bf215546Sopenharmony_ci nir_instr *parent_instr; 962bf215546Sopenharmony_ci struct list_head def_link; 963bf215546Sopenharmony_ci 964bf215546Sopenharmony_ci nir_register *reg; 965bf215546Sopenharmony_ci struct nir_src *indirect; /** < NULL for no indirect offset */ 966bf215546Sopenharmony_ci unsigned base_offset; 967bf215546Sopenharmony_ci 968bf215546Sopenharmony_ci /* TODO def-use chain goes here */ 969bf215546Sopenharmony_ci} nir_reg_dest; 970bf215546Sopenharmony_ci 971bf215546Sopenharmony_cistruct nir_if; 972bf215546Sopenharmony_ci 973bf215546Sopenharmony_citypedef struct nir_src { 974bf215546Sopenharmony_ci union { 975bf215546Sopenharmony_ci /** Instruction that consumes this value as a source. */ 976bf215546Sopenharmony_ci nir_instr *parent_instr; 977bf215546Sopenharmony_ci struct nir_if *parent_if; 978bf215546Sopenharmony_ci }; 979bf215546Sopenharmony_ci 980bf215546Sopenharmony_ci struct list_head use_link; 981bf215546Sopenharmony_ci 982bf215546Sopenharmony_ci union { 983bf215546Sopenharmony_ci nir_reg_src reg; 984bf215546Sopenharmony_ci nir_ssa_def *ssa; 985bf215546Sopenharmony_ci }; 986bf215546Sopenharmony_ci 987bf215546Sopenharmony_ci bool is_ssa; 988bf215546Sopenharmony_ci} nir_src; 989bf215546Sopenharmony_ci 990bf215546Sopenharmony_cistatic inline nir_src 991bf215546Sopenharmony_cinir_src_init(void) 992bf215546Sopenharmony_ci{ 993bf215546Sopenharmony_ci nir_src src = { { NULL } }; 994bf215546Sopenharmony_ci return src; 995bf215546Sopenharmony_ci} 996bf215546Sopenharmony_ci 997bf215546Sopenharmony_ci#define NIR_SRC_INIT nir_src_init() 998bf215546Sopenharmony_ci 999bf215546Sopenharmony_ci#define nir_foreach_use(src, reg_or_ssa_def) \ 1000bf215546Sopenharmony_ci list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->uses, use_link) 1001bf215546Sopenharmony_ci 1002bf215546Sopenharmony_ci#define nir_foreach_use_safe(src, reg_or_ssa_def) \ 1003bf215546Sopenharmony_ci list_for_each_entry_safe(nir_src, src, &(reg_or_ssa_def)->uses, use_link) 1004bf215546Sopenharmony_ci 1005bf215546Sopenharmony_ci#define nir_foreach_if_use(src, reg_or_ssa_def) \ 1006bf215546Sopenharmony_ci list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->if_uses, use_link) 1007bf215546Sopenharmony_ci 1008bf215546Sopenharmony_ci#define nir_foreach_if_use_safe(src, reg_or_ssa_def) \ 1009bf215546Sopenharmony_ci list_for_each_entry_safe(nir_src, src, &(reg_or_ssa_def)->if_uses, use_link) 1010bf215546Sopenharmony_ci 1011bf215546Sopenharmony_citypedef struct { 1012bf215546Sopenharmony_ci union { 1013bf215546Sopenharmony_ci nir_reg_dest reg; 1014bf215546Sopenharmony_ci nir_ssa_def ssa; 1015bf215546Sopenharmony_ci }; 1016bf215546Sopenharmony_ci 1017bf215546Sopenharmony_ci bool is_ssa; 1018bf215546Sopenharmony_ci} nir_dest; 1019bf215546Sopenharmony_ci 1020bf215546Sopenharmony_cistatic inline nir_dest 1021bf215546Sopenharmony_cinir_dest_init(void) 1022bf215546Sopenharmony_ci{ 1023bf215546Sopenharmony_ci nir_dest dest = { { { NULL } } }; 1024bf215546Sopenharmony_ci return dest; 1025bf215546Sopenharmony_ci} 1026bf215546Sopenharmony_ci 1027bf215546Sopenharmony_ci#define NIR_DEST_INIT nir_dest_init() 1028bf215546Sopenharmony_ci 1029bf215546Sopenharmony_ci#define nir_foreach_def(dest, reg) \ 1030bf215546Sopenharmony_ci list_for_each_entry(nir_dest, dest, &(reg)->defs, reg.def_link) 1031bf215546Sopenharmony_ci 1032bf215546Sopenharmony_ci#define nir_foreach_def_safe(dest, reg) \ 1033bf215546Sopenharmony_ci list_for_each_entry_safe(nir_dest, dest, &(reg)->defs, reg.def_link) 1034bf215546Sopenharmony_ci 1035bf215546Sopenharmony_cistatic inline nir_src 1036bf215546Sopenharmony_cinir_src_for_ssa(nir_ssa_def *def) 1037bf215546Sopenharmony_ci{ 1038bf215546Sopenharmony_ci nir_src src = NIR_SRC_INIT; 1039bf215546Sopenharmony_ci 1040bf215546Sopenharmony_ci src.is_ssa = true; 1041bf215546Sopenharmony_ci src.ssa = def; 1042bf215546Sopenharmony_ci 1043bf215546Sopenharmony_ci return src; 1044bf215546Sopenharmony_ci} 1045bf215546Sopenharmony_ci 1046bf215546Sopenharmony_cistatic inline nir_src 1047bf215546Sopenharmony_cinir_src_for_reg(nir_register *reg) 1048bf215546Sopenharmony_ci{ 1049bf215546Sopenharmony_ci nir_src src = NIR_SRC_INIT; 1050bf215546Sopenharmony_ci 1051bf215546Sopenharmony_ci src.is_ssa = false; 1052bf215546Sopenharmony_ci src.reg.reg = reg; 1053bf215546Sopenharmony_ci src.reg.indirect = NULL; 1054bf215546Sopenharmony_ci src.reg.base_offset = 0; 1055bf215546Sopenharmony_ci 1056bf215546Sopenharmony_ci return src; 1057bf215546Sopenharmony_ci} 1058bf215546Sopenharmony_ci 1059bf215546Sopenharmony_cistatic inline nir_dest 1060bf215546Sopenharmony_cinir_dest_for_reg(nir_register *reg) 1061bf215546Sopenharmony_ci{ 1062bf215546Sopenharmony_ci nir_dest dest = NIR_DEST_INIT; 1063bf215546Sopenharmony_ci 1064bf215546Sopenharmony_ci dest.reg.reg = reg; 1065bf215546Sopenharmony_ci 1066bf215546Sopenharmony_ci return dest; 1067bf215546Sopenharmony_ci} 1068bf215546Sopenharmony_ci 1069bf215546Sopenharmony_cistatic inline unsigned 1070bf215546Sopenharmony_cinir_src_bit_size(nir_src src) 1071bf215546Sopenharmony_ci{ 1072bf215546Sopenharmony_ci return src.is_ssa ? src.ssa->bit_size : src.reg.reg->bit_size; 1073bf215546Sopenharmony_ci} 1074bf215546Sopenharmony_ci 1075bf215546Sopenharmony_cistatic inline unsigned 1076bf215546Sopenharmony_cinir_src_num_components(nir_src src) 1077bf215546Sopenharmony_ci{ 1078bf215546Sopenharmony_ci return src.is_ssa ? src.ssa->num_components : src.reg.reg->num_components; 1079bf215546Sopenharmony_ci} 1080bf215546Sopenharmony_ci 1081bf215546Sopenharmony_cistatic inline bool 1082bf215546Sopenharmony_cinir_src_is_const(nir_src src) 1083bf215546Sopenharmony_ci{ 1084bf215546Sopenharmony_ci return src.is_ssa && 1085bf215546Sopenharmony_ci src.ssa->parent_instr->type == nir_instr_type_load_const; 1086bf215546Sopenharmony_ci} 1087bf215546Sopenharmony_ci 1088bf215546Sopenharmony_cistatic inline bool 1089bf215546Sopenharmony_cinir_src_is_undef(nir_src src) 1090bf215546Sopenharmony_ci{ 1091bf215546Sopenharmony_ci return src.is_ssa && 1092bf215546Sopenharmony_ci src.ssa->parent_instr->type == nir_instr_type_ssa_undef; 1093bf215546Sopenharmony_ci} 1094bf215546Sopenharmony_ci 1095bf215546Sopenharmony_cistatic inline bool 1096bf215546Sopenharmony_cinir_src_is_divergent(nir_src src) 1097bf215546Sopenharmony_ci{ 1098bf215546Sopenharmony_ci return src.is_ssa ? src.ssa->divergent : src.reg.reg->divergent; 1099bf215546Sopenharmony_ci} 1100bf215546Sopenharmony_ci 1101bf215546Sopenharmony_cistatic inline unsigned 1102bf215546Sopenharmony_cinir_dest_bit_size(nir_dest dest) 1103bf215546Sopenharmony_ci{ 1104bf215546Sopenharmony_ci return dest.is_ssa ? dest.ssa.bit_size : dest.reg.reg->bit_size; 1105bf215546Sopenharmony_ci} 1106bf215546Sopenharmony_ci 1107bf215546Sopenharmony_cistatic inline unsigned 1108bf215546Sopenharmony_cinir_dest_num_components(nir_dest dest) 1109bf215546Sopenharmony_ci{ 1110bf215546Sopenharmony_ci return dest.is_ssa ? dest.ssa.num_components : dest.reg.reg->num_components; 1111bf215546Sopenharmony_ci} 1112bf215546Sopenharmony_ci 1113bf215546Sopenharmony_cistatic inline bool 1114bf215546Sopenharmony_cinir_dest_is_divergent(nir_dest dest) 1115bf215546Sopenharmony_ci{ 1116bf215546Sopenharmony_ci return dest.is_ssa ? dest.ssa.divergent : dest.reg.reg->divergent; 1117bf215546Sopenharmony_ci} 1118bf215546Sopenharmony_ci 1119bf215546Sopenharmony_ci/* Are all components the same, ie. .xxxx */ 1120bf215546Sopenharmony_cistatic inline bool 1121bf215546Sopenharmony_cinir_is_same_comp_swizzle(uint8_t *swiz, unsigned nr_comp) 1122bf215546Sopenharmony_ci{ 1123bf215546Sopenharmony_ci for (unsigned i = 1; i < nr_comp; i++) 1124bf215546Sopenharmony_ci if (swiz[i] != swiz[0]) 1125bf215546Sopenharmony_ci return false; 1126bf215546Sopenharmony_ci return true; 1127bf215546Sopenharmony_ci} 1128bf215546Sopenharmony_ci 1129bf215546Sopenharmony_ci/* Are all components sequential, ie. .yzw */ 1130bf215546Sopenharmony_cistatic inline bool 1131bf215546Sopenharmony_cinir_is_sequential_comp_swizzle(uint8_t *swiz, unsigned nr_comp) 1132bf215546Sopenharmony_ci{ 1133bf215546Sopenharmony_ci for (unsigned i = 1; i < nr_comp; i++) 1134bf215546Sopenharmony_ci if (swiz[i] != (swiz[0] + i)) 1135bf215546Sopenharmony_ci return false; 1136bf215546Sopenharmony_ci return true; 1137bf215546Sopenharmony_ci} 1138bf215546Sopenharmony_ci 1139bf215546Sopenharmony_civoid nir_src_copy(nir_src *dest, const nir_src *src); 1140bf215546Sopenharmony_civoid nir_dest_copy(nir_dest *dest, const nir_dest *src); 1141bf215546Sopenharmony_ci 1142bf215546Sopenharmony_citypedef struct { 1143bf215546Sopenharmony_ci /** Base source */ 1144bf215546Sopenharmony_ci nir_src src; 1145bf215546Sopenharmony_ci 1146bf215546Sopenharmony_ci /** 1147bf215546Sopenharmony_ci * \name input modifiers 1148bf215546Sopenharmony_ci */ 1149bf215546Sopenharmony_ci /*@{*/ 1150bf215546Sopenharmony_ci /** 1151bf215546Sopenharmony_ci * For inputs interpreted as floating point, flips the sign bit. For 1152bf215546Sopenharmony_ci * inputs interpreted as integers, performs the two's complement negation. 1153bf215546Sopenharmony_ci */ 1154bf215546Sopenharmony_ci bool negate; 1155bf215546Sopenharmony_ci 1156bf215546Sopenharmony_ci /** 1157bf215546Sopenharmony_ci * Clears the sign bit for floating point values, and computes the integer 1158bf215546Sopenharmony_ci * absolute value for integers. Note that the negate modifier acts after 1159bf215546Sopenharmony_ci * the absolute value modifier, therefore if both are set then all inputs 1160bf215546Sopenharmony_ci * will become negative. 1161bf215546Sopenharmony_ci */ 1162bf215546Sopenharmony_ci bool abs; 1163bf215546Sopenharmony_ci /*@}*/ 1164bf215546Sopenharmony_ci 1165bf215546Sopenharmony_ci /** 1166bf215546Sopenharmony_ci * For each input component, says which component of the register it is 1167bf215546Sopenharmony_ci * chosen from. 1168bf215546Sopenharmony_ci * 1169bf215546Sopenharmony_ci * Note that which elements of the swizzle are used and which are ignored 1170bf215546Sopenharmony_ci * are based on the write mask for most opcodes - for example, a statement 1171bf215546Sopenharmony_ci * like "foo.xzw = bar.zyx" would have a writemask of 1101b and a swizzle 1172bf215546Sopenharmony_ci * of {2, 1, x, 0} where x means "don't care." 1173bf215546Sopenharmony_ci */ 1174bf215546Sopenharmony_ci uint8_t swizzle[NIR_MAX_VEC_COMPONENTS]; 1175bf215546Sopenharmony_ci} nir_alu_src; 1176bf215546Sopenharmony_ci 1177bf215546Sopenharmony_citypedef struct { 1178bf215546Sopenharmony_ci /** Base destination */ 1179bf215546Sopenharmony_ci nir_dest dest; 1180bf215546Sopenharmony_ci 1181bf215546Sopenharmony_ci /** 1182bf215546Sopenharmony_ci * Saturate output modifier 1183bf215546Sopenharmony_ci * 1184bf215546Sopenharmony_ci * Only valid for opcodes that output floating-point numbers. Clamps the 1185bf215546Sopenharmony_ci * output to between 0.0 and 1.0 inclusive. 1186bf215546Sopenharmony_ci */ 1187bf215546Sopenharmony_ci bool saturate; 1188bf215546Sopenharmony_ci 1189bf215546Sopenharmony_ci /** 1190bf215546Sopenharmony_ci * Write-mask 1191bf215546Sopenharmony_ci * 1192bf215546Sopenharmony_ci * Ignored if dest.is_ssa is true 1193bf215546Sopenharmony_ci */ 1194bf215546Sopenharmony_ci unsigned write_mask : NIR_MAX_VEC_COMPONENTS; 1195bf215546Sopenharmony_ci} nir_alu_dest; 1196bf215546Sopenharmony_ci 1197bf215546Sopenharmony_ci/** NIR sized and unsized types 1198bf215546Sopenharmony_ci * 1199bf215546Sopenharmony_ci * The values in this enum are carefully chosen so that the sized type is 1200bf215546Sopenharmony_ci * just the unsized type OR the number of bits. 1201bf215546Sopenharmony_ci */ 1202bf215546Sopenharmony_citypedef enum PACKED { 1203bf215546Sopenharmony_ci nir_type_invalid = 0, /* Not a valid type */ 1204bf215546Sopenharmony_ci nir_type_int = 2, 1205bf215546Sopenharmony_ci nir_type_uint = 4, 1206bf215546Sopenharmony_ci nir_type_bool = 6, 1207bf215546Sopenharmony_ci nir_type_float = 128, 1208bf215546Sopenharmony_ci nir_type_bool1 = 1 | nir_type_bool, 1209bf215546Sopenharmony_ci nir_type_bool8 = 8 | nir_type_bool, 1210bf215546Sopenharmony_ci nir_type_bool16 = 16 | nir_type_bool, 1211bf215546Sopenharmony_ci nir_type_bool32 = 32 | nir_type_bool, 1212bf215546Sopenharmony_ci nir_type_int1 = 1 | nir_type_int, 1213bf215546Sopenharmony_ci nir_type_int8 = 8 | nir_type_int, 1214bf215546Sopenharmony_ci nir_type_int16 = 16 | nir_type_int, 1215bf215546Sopenharmony_ci nir_type_int32 = 32 | nir_type_int, 1216bf215546Sopenharmony_ci nir_type_int64 = 64 | nir_type_int, 1217bf215546Sopenharmony_ci nir_type_uint1 = 1 | nir_type_uint, 1218bf215546Sopenharmony_ci nir_type_uint8 = 8 | nir_type_uint, 1219bf215546Sopenharmony_ci nir_type_uint16 = 16 | nir_type_uint, 1220bf215546Sopenharmony_ci nir_type_uint32 = 32 | nir_type_uint, 1221bf215546Sopenharmony_ci nir_type_uint64 = 64 | nir_type_uint, 1222bf215546Sopenharmony_ci nir_type_float16 = 16 | nir_type_float, 1223bf215546Sopenharmony_ci nir_type_float32 = 32 | nir_type_float, 1224bf215546Sopenharmony_ci nir_type_float64 = 64 | nir_type_float, 1225bf215546Sopenharmony_ci} nir_alu_type; 1226bf215546Sopenharmony_ci 1227bf215546Sopenharmony_ci#define NIR_ALU_TYPE_SIZE_MASK 0x79 1228bf215546Sopenharmony_ci#define NIR_ALU_TYPE_BASE_TYPE_MASK 0x86 1229bf215546Sopenharmony_ci 1230bf215546Sopenharmony_cistatic inline unsigned 1231bf215546Sopenharmony_cinir_alu_type_get_type_size(nir_alu_type type) 1232bf215546Sopenharmony_ci{ 1233bf215546Sopenharmony_ci return type & NIR_ALU_TYPE_SIZE_MASK; 1234bf215546Sopenharmony_ci} 1235bf215546Sopenharmony_ci 1236bf215546Sopenharmony_cistatic inline nir_alu_type 1237bf215546Sopenharmony_cinir_alu_type_get_base_type(nir_alu_type type) 1238bf215546Sopenharmony_ci{ 1239bf215546Sopenharmony_ci return (nir_alu_type)(type & NIR_ALU_TYPE_BASE_TYPE_MASK); 1240bf215546Sopenharmony_ci} 1241bf215546Sopenharmony_ci 1242bf215546Sopenharmony_cinir_alu_type 1243bf215546Sopenharmony_cinir_get_nir_type_for_glsl_base_type(enum glsl_base_type base_type); 1244bf215546Sopenharmony_ci 1245bf215546Sopenharmony_cistatic inline nir_alu_type 1246bf215546Sopenharmony_cinir_get_nir_type_for_glsl_type(const struct glsl_type *type) 1247bf215546Sopenharmony_ci{ 1248bf215546Sopenharmony_ci return nir_get_nir_type_for_glsl_base_type(glsl_get_base_type(type)); 1249bf215546Sopenharmony_ci} 1250bf215546Sopenharmony_ci 1251bf215546Sopenharmony_cienum glsl_base_type 1252bf215546Sopenharmony_cinir_get_glsl_base_type_for_nir_type(nir_alu_type base_type); 1253bf215546Sopenharmony_ci 1254bf215546Sopenharmony_cinir_op nir_type_conversion_op(nir_alu_type src, nir_alu_type dst, 1255bf215546Sopenharmony_ci nir_rounding_mode rnd); 1256bf215546Sopenharmony_ci 1257bf215546Sopenharmony_cinir_op 1258bf215546Sopenharmony_cinir_op_vec(unsigned components); 1259bf215546Sopenharmony_ci 1260bf215546Sopenharmony_cibool 1261bf215546Sopenharmony_cinir_op_is_vec(nir_op op); 1262bf215546Sopenharmony_ci 1263bf215546Sopenharmony_cistatic inline bool 1264bf215546Sopenharmony_cinir_is_float_control_signed_zero_inf_nan_preserve(unsigned execution_mode, unsigned bit_size) 1265bf215546Sopenharmony_ci{ 1266bf215546Sopenharmony_ci return (16 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP16) || 1267bf215546Sopenharmony_ci (32 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP32) || 1268bf215546Sopenharmony_ci (64 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP64); 1269bf215546Sopenharmony_ci} 1270bf215546Sopenharmony_ci 1271bf215546Sopenharmony_cistatic inline bool 1272bf215546Sopenharmony_cinir_is_denorm_flush_to_zero(unsigned execution_mode, unsigned bit_size) 1273bf215546Sopenharmony_ci{ 1274bf215546Sopenharmony_ci return (16 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16) || 1275bf215546Sopenharmony_ci (32 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32) || 1276bf215546Sopenharmony_ci (64 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP64); 1277bf215546Sopenharmony_ci} 1278bf215546Sopenharmony_ci 1279bf215546Sopenharmony_cistatic inline bool 1280bf215546Sopenharmony_cinir_is_denorm_preserve(unsigned execution_mode, unsigned bit_size) 1281bf215546Sopenharmony_ci{ 1282bf215546Sopenharmony_ci return (16 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP16) || 1283bf215546Sopenharmony_ci (32 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP32) || 1284bf215546Sopenharmony_ci (64 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP64); 1285bf215546Sopenharmony_ci} 1286bf215546Sopenharmony_ci 1287bf215546Sopenharmony_cistatic inline bool 1288bf215546Sopenharmony_cinir_is_rounding_mode_rtne(unsigned execution_mode, unsigned bit_size) 1289bf215546Sopenharmony_ci{ 1290bf215546Sopenharmony_ci return (16 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16) || 1291bf215546Sopenharmony_ci (32 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32) || 1292bf215546Sopenharmony_ci (64 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64); 1293bf215546Sopenharmony_ci} 1294bf215546Sopenharmony_ci 1295bf215546Sopenharmony_cistatic inline bool 1296bf215546Sopenharmony_cinir_is_rounding_mode_rtz(unsigned execution_mode, unsigned bit_size) 1297bf215546Sopenharmony_ci{ 1298bf215546Sopenharmony_ci return (16 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16) || 1299bf215546Sopenharmony_ci (32 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32) || 1300bf215546Sopenharmony_ci (64 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64); 1301bf215546Sopenharmony_ci} 1302bf215546Sopenharmony_ci 1303bf215546Sopenharmony_cistatic inline bool 1304bf215546Sopenharmony_cinir_has_any_rounding_mode_rtz(unsigned execution_mode) 1305bf215546Sopenharmony_ci{ 1306bf215546Sopenharmony_ci return (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16) || 1307bf215546Sopenharmony_ci (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32) || 1308bf215546Sopenharmony_ci (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64); 1309bf215546Sopenharmony_ci} 1310bf215546Sopenharmony_ci 1311bf215546Sopenharmony_cistatic inline bool 1312bf215546Sopenharmony_cinir_has_any_rounding_mode_rtne(unsigned execution_mode) 1313bf215546Sopenharmony_ci{ 1314bf215546Sopenharmony_ci return (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16) || 1315bf215546Sopenharmony_ci (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32) || 1316bf215546Sopenharmony_ci (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64); 1317bf215546Sopenharmony_ci} 1318bf215546Sopenharmony_ci 1319bf215546Sopenharmony_cistatic inline nir_rounding_mode 1320bf215546Sopenharmony_cinir_get_rounding_mode_from_float_controls(unsigned execution_mode, 1321bf215546Sopenharmony_ci nir_alu_type type) 1322bf215546Sopenharmony_ci{ 1323bf215546Sopenharmony_ci if (nir_alu_type_get_base_type(type) != nir_type_float) 1324bf215546Sopenharmony_ci return nir_rounding_mode_undef; 1325bf215546Sopenharmony_ci 1326bf215546Sopenharmony_ci unsigned bit_size = nir_alu_type_get_type_size(type); 1327bf215546Sopenharmony_ci 1328bf215546Sopenharmony_ci if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) 1329bf215546Sopenharmony_ci return nir_rounding_mode_rtz; 1330bf215546Sopenharmony_ci if (nir_is_rounding_mode_rtne(execution_mode, bit_size)) 1331bf215546Sopenharmony_ci return nir_rounding_mode_rtne; 1332bf215546Sopenharmony_ci return nir_rounding_mode_undef; 1333bf215546Sopenharmony_ci} 1334bf215546Sopenharmony_ci 1335bf215546Sopenharmony_cistatic inline bool 1336bf215546Sopenharmony_cinir_has_any_rounding_mode_enabled(unsigned execution_mode) 1337bf215546Sopenharmony_ci{ 1338bf215546Sopenharmony_ci bool result = 1339bf215546Sopenharmony_ci nir_has_any_rounding_mode_rtne(execution_mode) || 1340bf215546Sopenharmony_ci nir_has_any_rounding_mode_rtz(execution_mode); 1341bf215546Sopenharmony_ci return result; 1342bf215546Sopenharmony_ci} 1343bf215546Sopenharmony_ci 1344bf215546Sopenharmony_citypedef enum { 1345bf215546Sopenharmony_ci /** 1346bf215546Sopenharmony_ci * Operation where the first two sources are commutative. 1347bf215546Sopenharmony_ci * 1348bf215546Sopenharmony_ci * For 2-source operations, this just mathematical commutativity. Some 1349bf215546Sopenharmony_ci * 3-source operations, like ffma, are only commutative in the first two 1350bf215546Sopenharmony_ci * sources. 1351bf215546Sopenharmony_ci */ 1352bf215546Sopenharmony_ci NIR_OP_IS_2SRC_COMMUTATIVE = (1 << 0), 1353bf215546Sopenharmony_ci 1354bf215546Sopenharmony_ci /** 1355bf215546Sopenharmony_ci * Operation is associative 1356bf215546Sopenharmony_ci */ 1357bf215546Sopenharmony_ci NIR_OP_IS_ASSOCIATIVE = (1 << 1), 1358bf215546Sopenharmony_ci 1359bf215546Sopenharmony_ci /** 1360bf215546Sopenharmony_ci * Operation where src[0] is used to select src[1] on true or src[2] false. 1361bf215546Sopenharmony_ci * src[0] may be Boolean, or it may be another type used in an implicit 1362bf215546Sopenharmony_ci * comparison. 1363bf215546Sopenharmony_ci */ 1364bf215546Sopenharmony_ci NIR_OP_IS_SELECTION = (1 << 2), 1365bf215546Sopenharmony_ci} nir_op_algebraic_property; 1366bf215546Sopenharmony_ci 1367bf215546Sopenharmony_ci/* vec16 is the widest ALU op in NIR, making the max number of input of ALU 1368bf215546Sopenharmony_ci * instructions to be the same as NIR_MAX_VEC_COMPONENTS. 1369bf215546Sopenharmony_ci */ 1370bf215546Sopenharmony_ci#define NIR_ALU_MAX_INPUTS NIR_MAX_VEC_COMPONENTS 1371bf215546Sopenharmony_ci 1372bf215546Sopenharmony_citypedef struct nir_op_info { 1373bf215546Sopenharmony_ci /** Name of the NIR ALU opcode */ 1374bf215546Sopenharmony_ci const char *name; 1375bf215546Sopenharmony_ci 1376bf215546Sopenharmony_ci /** Number of inputs (sources) */ 1377bf215546Sopenharmony_ci uint8_t num_inputs; 1378bf215546Sopenharmony_ci 1379bf215546Sopenharmony_ci /** 1380bf215546Sopenharmony_ci * The number of components in the output 1381bf215546Sopenharmony_ci * 1382bf215546Sopenharmony_ci * If non-zero, this is the size of the output and input sizes are 1383bf215546Sopenharmony_ci * explicitly given; swizzle and writemask are still in effect, but if 1384bf215546Sopenharmony_ci * the output component is masked out, then the input component may 1385bf215546Sopenharmony_ci * still be in use. 1386bf215546Sopenharmony_ci * 1387bf215546Sopenharmony_ci * If zero, the opcode acts in the standard, per-component manner; the 1388bf215546Sopenharmony_ci * operation is performed on each component (except the ones that are 1389bf215546Sopenharmony_ci * masked out) with the input being taken from the input swizzle for 1390bf215546Sopenharmony_ci * that component. 1391bf215546Sopenharmony_ci * 1392bf215546Sopenharmony_ci * The size of some of the inputs may be given (i.e. non-zero) even 1393bf215546Sopenharmony_ci * though output_size is zero; in that case, the inputs with a zero 1394bf215546Sopenharmony_ci * size act per-component, while the inputs with non-zero size don't. 1395bf215546Sopenharmony_ci */ 1396bf215546Sopenharmony_ci uint8_t output_size; 1397bf215546Sopenharmony_ci 1398bf215546Sopenharmony_ci /** 1399bf215546Sopenharmony_ci * The type of vector that the instruction outputs. Note that the 1400bf215546Sopenharmony_ci * staurate modifier is only allowed on outputs with the float type. 1401bf215546Sopenharmony_ci */ 1402bf215546Sopenharmony_ci nir_alu_type output_type; 1403bf215546Sopenharmony_ci 1404bf215546Sopenharmony_ci /** 1405bf215546Sopenharmony_ci * The number of components in each input 1406bf215546Sopenharmony_ci * 1407bf215546Sopenharmony_ci * See nir_op_infos::output_size for more detail about the relationship 1408bf215546Sopenharmony_ci * between input and output sizes. 1409bf215546Sopenharmony_ci */ 1410bf215546Sopenharmony_ci uint8_t input_sizes[NIR_ALU_MAX_INPUTS]; 1411bf215546Sopenharmony_ci 1412bf215546Sopenharmony_ci /** 1413bf215546Sopenharmony_ci * The type of vector that each input takes. Note that negate and 1414bf215546Sopenharmony_ci * absolute value are only allowed on inputs with int or float type and 1415bf215546Sopenharmony_ci * behave differently on the two. 1416bf215546Sopenharmony_ci */ 1417bf215546Sopenharmony_ci nir_alu_type input_types[NIR_ALU_MAX_INPUTS]; 1418bf215546Sopenharmony_ci 1419bf215546Sopenharmony_ci /** Algebraic properties of this opcode */ 1420bf215546Sopenharmony_ci nir_op_algebraic_property algebraic_properties; 1421bf215546Sopenharmony_ci 1422bf215546Sopenharmony_ci /** Whether this represents a numeric conversion opcode */ 1423bf215546Sopenharmony_ci bool is_conversion; 1424bf215546Sopenharmony_ci} nir_op_info; 1425bf215546Sopenharmony_ci 1426bf215546Sopenharmony_ci/** Metadata for each nir_op, indexed by opcode */ 1427bf215546Sopenharmony_ciextern const nir_op_info nir_op_infos[nir_num_opcodes]; 1428bf215546Sopenharmony_ci 1429bf215546Sopenharmony_cistatic inline bool 1430bf215546Sopenharmony_cinir_op_is_selection(nir_op op) 1431bf215546Sopenharmony_ci{ 1432bf215546Sopenharmony_ci return (nir_op_infos[op].algebraic_properties & NIR_OP_IS_SELECTION) != 0; 1433bf215546Sopenharmony_ci} 1434bf215546Sopenharmony_ci 1435bf215546Sopenharmony_citypedef struct nir_alu_instr { 1436bf215546Sopenharmony_ci /** Base instruction */ 1437bf215546Sopenharmony_ci nir_instr instr; 1438bf215546Sopenharmony_ci 1439bf215546Sopenharmony_ci /** Opcode */ 1440bf215546Sopenharmony_ci nir_op op; 1441bf215546Sopenharmony_ci 1442bf215546Sopenharmony_ci /** Indicates that this ALU instruction generates an exact value 1443bf215546Sopenharmony_ci * 1444bf215546Sopenharmony_ci * This is kind of a mixture of GLSL "precise" and "invariant" and not 1445bf215546Sopenharmony_ci * really equivalent to either. This indicates that the value generated by 1446bf215546Sopenharmony_ci * this operation is high-precision and any code transformations that touch 1447bf215546Sopenharmony_ci * it must ensure that the resulting value is bit-for-bit identical to the 1448bf215546Sopenharmony_ci * original. 1449bf215546Sopenharmony_ci */ 1450bf215546Sopenharmony_ci bool exact:1; 1451bf215546Sopenharmony_ci 1452bf215546Sopenharmony_ci /** 1453bf215546Sopenharmony_ci * Indicates that this instruction doese not cause signed integer wrapping 1454bf215546Sopenharmony_ci * to occur, in the form of overflow or underflow. 1455bf215546Sopenharmony_ci */ 1456bf215546Sopenharmony_ci bool no_signed_wrap:1; 1457bf215546Sopenharmony_ci 1458bf215546Sopenharmony_ci /** 1459bf215546Sopenharmony_ci * Indicates that this instruction does not cause unsigned integer wrapping 1460bf215546Sopenharmony_ci * to occur, in the form of overflow or underflow. 1461bf215546Sopenharmony_ci */ 1462bf215546Sopenharmony_ci bool no_unsigned_wrap:1; 1463bf215546Sopenharmony_ci 1464bf215546Sopenharmony_ci /** Destination */ 1465bf215546Sopenharmony_ci nir_alu_dest dest; 1466bf215546Sopenharmony_ci 1467bf215546Sopenharmony_ci /** Sources 1468bf215546Sopenharmony_ci * 1469bf215546Sopenharmony_ci * The size of the array is given by nir_op_info::num_inputs. 1470bf215546Sopenharmony_ci */ 1471bf215546Sopenharmony_ci nir_alu_src src[]; 1472bf215546Sopenharmony_ci} nir_alu_instr; 1473bf215546Sopenharmony_ci 1474bf215546Sopenharmony_civoid nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src); 1475bf215546Sopenharmony_civoid nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src); 1476bf215546Sopenharmony_ci 1477bf215546Sopenharmony_cibool nir_alu_instr_is_copy(nir_alu_instr *instr); 1478bf215546Sopenharmony_ci 1479bf215546Sopenharmony_ci/* is this source channel used? */ 1480bf215546Sopenharmony_cibool 1481bf215546Sopenharmony_cinir_alu_instr_channel_used(const nir_alu_instr *instr, unsigned src, 1482bf215546Sopenharmony_ci unsigned channel); 1483bf215546Sopenharmony_cinir_component_mask_t 1484bf215546Sopenharmony_cinir_alu_instr_src_read_mask(const nir_alu_instr *instr, unsigned src); 1485bf215546Sopenharmony_ci/** 1486bf215546Sopenharmony_ci * Get the number of channels used for a source 1487bf215546Sopenharmony_ci */ 1488bf215546Sopenharmony_ciunsigned 1489bf215546Sopenharmony_cinir_ssa_alu_instr_src_components(const nir_alu_instr *instr, unsigned src); 1490bf215546Sopenharmony_ci 1491bf215546Sopenharmony_cibool 1492bf215546Sopenharmony_cinir_alu_instr_is_comparison(const nir_alu_instr *instr); 1493bf215546Sopenharmony_ci 1494bf215546Sopenharmony_cibool nir_const_value_negative_equal(nir_const_value c1, nir_const_value c2, 1495bf215546Sopenharmony_ci nir_alu_type full_type); 1496bf215546Sopenharmony_ci 1497bf215546Sopenharmony_cibool nir_alu_srcs_equal(const nir_alu_instr *alu1, const nir_alu_instr *alu2, 1498bf215546Sopenharmony_ci unsigned src1, unsigned src2); 1499bf215546Sopenharmony_ci 1500bf215546Sopenharmony_cibool nir_alu_srcs_negative_equal(const nir_alu_instr *alu1, 1501bf215546Sopenharmony_ci const nir_alu_instr *alu2, 1502bf215546Sopenharmony_ci unsigned src1, unsigned src2); 1503bf215546Sopenharmony_ci 1504bf215546Sopenharmony_cibool nir_alu_src_is_trivial_ssa(const nir_alu_instr *alu, unsigned srcn); 1505bf215546Sopenharmony_ci 1506bf215546Sopenharmony_citypedef enum { 1507bf215546Sopenharmony_ci nir_deref_type_var, 1508bf215546Sopenharmony_ci nir_deref_type_array, 1509bf215546Sopenharmony_ci nir_deref_type_array_wildcard, 1510bf215546Sopenharmony_ci nir_deref_type_ptr_as_array, 1511bf215546Sopenharmony_ci nir_deref_type_struct, 1512bf215546Sopenharmony_ci nir_deref_type_cast, 1513bf215546Sopenharmony_ci} nir_deref_type; 1514bf215546Sopenharmony_ci 1515bf215546Sopenharmony_citypedef struct { 1516bf215546Sopenharmony_ci nir_instr instr; 1517bf215546Sopenharmony_ci 1518bf215546Sopenharmony_ci /** The type of this deref instruction */ 1519bf215546Sopenharmony_ci nir_deref_type deref_type; 1520bf215546Sopenharmony_ci 1521bf215546Sopenharmony_ci /** Bitmask what modes the underlying variable might be 1522bf215546Sopenharmony_ci * 1523bf215546Sopenharmony_ci * For OpenCL-style generic pointers, we may not know exactly what mode it 1524bf215546Sopenharmony_ci * is at any given point in time in the compile process. This bitfield 1525bf215546Sopenharmony_ci * contains the set of modes which it MAY be. 1526bf215546Sopenharmony_ci * 1527bf215546Sopenharmony_ci * Generally, this field should not be accessed directly. Use one of the 1528bf215546Sopenharmony_ci * nir_deref_mode_ helpers instead. 1529bf215546Sopenharmony_ci */ 1530bf215546Sopenharmony_ci nir_variable_mode modes; 1531bf215546Sopenharmony_ci 1532bf215546Sopenharmony_ci /** The dereferenced type of the resulting pointer value */ 1533bf215546Sopenharmony_ci const struct glsl_type *type; 1534bf215546Sopenharmony_ci 1535bf215546Sopenharmony_ci union { 1536bf215546Sopenharmony_ci /** Variable being dereferenced if deref_type is a deref_var */ 1537bf215546Sopenharmony_ci nir_variable *var; 1538bf215546Sopenharmony_ci 1539bf215546Sopenharmony_ci /** Parent deref if deref_type is not deref_var */ 1540bf215546Sopenharmony_ci nir_src parent; 1541bf215546Sopenharmony_ci }; 1542bf215546Sopenharmony_ci 1543bf215546Sopenharmony_ci /** Additional deref parameters */ 1544bf215546Sopenharmony_ci union { 1545bf215546Sopenharmony_ci struct { 1546bf215546Sopenharmony_ci nir_src index; 1547bf215546Sopenharmony_ci bool in_bounds; 1548bf215546Sopenharmony_ci } arr; 1549bf215546Sopenharmony_ci 1550bf215546Sopenharmony_ci struct { 1551bf215546Sopenharmony_ci unsigned index; 1552bf215546Sopenharmony_ci } strct; 1553bf215546Sopenharmony_ci 1554bf215546Sopenharmony_ci struct { 1555bf215546Sopenharmony_ci unsigned ptr_stride; 1556bf215546Sopenharmony_ci unsigned align_mul; 1557bf215546Sopenharmony_ci unsigned align_offset; 1558bf215546Sopenharmony_ci } cast; 1559bf215546Sopenharmony_ci }; 1560bf215546Sopenharmony_ci 1561bf215546Sopenharmony_ci /** Destination to store the resulting "pointer" */ 1562bf215546Sopenharmony_ci nir_dest dest; 1563bf215546Sopenharmony_ci} nir_deref_instr; 1564bf215546Sopenharmony_ci 1565bf215546Sopenharmony_ci/** Returns true if deref might have one of the given modes 1566bf215546Sopenharmony_ci * 1567bf215546Sopenharmony_ci * For multi-mode derefs, this returns true if any of the possible modes for 1568bf215546Sopenharmony_ci * the deref to have any of the specified modes. This function returning true 1569bf215546Sopenharmony_ci * does NOT mean that the deref definitely has one of those modes. It simply 1570bf215546Sopenharmony_ci * means that, with the best information we have at the time, it might. 1571bf215546Sopenharmony_ci */ 1572bf215546Sopenharmony_cistatic inline bool 1573bf215546Sopenharmony_cinir_deref_mode_may_be(const nir_deref_instr *deref, nir_variable_mode modes) 1574bf215546Sopenharmony_ci{ 1575bf215546Sopenharmony_ci assert(!(modes & ~nir_var_all)); 1576bf215546Sopenharmony_ci assert(deref->modes != 0); 1577bf215546Sopenharmony_ci return deref->modes & modes; 1578bf215546Sopenharmony_ci} 1579bf215546Sopenharmony_ci 1580bf215546Sopenharmony_ci/** Returns true if deref must have one of the given modes 1581bf215546Sopenharmony_ci * 1582bf215546Sopenharmony_ci * For multi-mode derefs, this returns true if NIR can prove that the given 1583bf215546Sopenharmony_ci * deref has one of the specified modes. This function returning false does 1584bf215546Sopenharmony_ci * NOT mean that deref doesn't have one of the given mode. It very well may 1585bf215546Sopenharmony_ci * have one of those modes, we just don't have enough information to prove 1586bf215546Sopenharmony_ci * that it does for sure. 1587bf215546Sopenharmony_ci */ 1588bf215546Sopenharmony_cistatic inline bool 1589bf215546Sopenharmony_cinir_deref_mode_must_be(const nir_deref_instr *deref, nir_variable_mode modes) 1590bf215546Sopenharmony_ci{ 1591bf215546Sopenharmony_ci assert(!(modes & ~nir_var_all)); 1592bf215546Sopenharmony_ci assert(deref->modes != 0); 1593bf215546Sopenharmony_ci return !(deref->modes & ~modes); 1594bf215546Sopenharmony_ci} 1595bf215546Sopenharmony_ci 1596bf215546Sopenharmony_ci/** Returns true if deref has the given mode 1597bf215546Sopenharmony_ci * 1598bf215546Sopenharmony_ci * This returns true if the deref has exactly the mode specified. If the 1599bf215546Sopenharmony_ci * deref may have that mode but may also have a different mode (i.e. modes has 1600bf215546Sopenharmony_ci * multiple bits set), this will assert-fail. 1601bf215546Sopenharmony_ci * 1602bf215546Sopenharmony_ci * If you're confused about which nir_deref_mode_ helper to use, use this one 1603bf215546Sopenharmony_ci * or nir_deref_mode_is_one_of below. 1604bf215546Sopenharmony_ci */ 1605bf215546Sopenharmony_cistatic inline bool 1606bf215546Sopenharmony_cinir_deref_mode_is(const nir_deref_instr *deref, nir_variable_mode mode) 1607bf215546Sopenharmony_ci{ 1608bf215546Sopenharmony_ci assert(util_bitcount(mode) == 1 && (mode & nir_var_all)); 1609bf215546Sopenharmony_ci assert(deref->modes != 0); 1610bf215546Sopenharmony_ci 1611bf215546Sopenharmony_ci /* This is only for "simple" cases so, if modes might interact with this 1612bf215546Sopenharmony_ci * deref then the deref has to have a single mode. 1613bf215546Sopenharmony_ci */ 1614bf215546Sopenharmony_ci if (nir_deref_mode_may_be(deref, mode)) { 1615bf215546Sopenharmony_ci assert(util_bitcount(deref->modes) == 1); 1616bf215546Sopenharmony_ci assert(deref->modes == mode); 1617bf215546Sopenharmony_ci } 1618bf215546Sopenharmony_ci 1619bf215546Sopenharmony_ci return deref->modes == mode; 1620bf215546Sopenharmony_ci} 1621bf215546Sopenharmony_ci 1622bf215546Sopenharmony_ci/** Returns true if deref has one of the given modes 1623bf215546Sopenharmony_ci * 1624bf215546Sopenharmony_ci * This returns true if the deref has exactly one possible mode and that mode 1625bf215546Sopenharmony_ci * is one of the modes specified. If the deref may have one of those modes 1626bf215546Sopenharmony_ci * but may also have a different mode (i.e. modes has multiple bits set), this 1627bf215546Sopenharmony_ci * will assert-fail. 1628bf215546Sopenharmony_ci */ 1629bf215546Sopenharmony_cistatic inline bool 1630bf215546Sopenharmony_cinir_deref_mode_is_one_of(const nir_deref_instr *deref, nir_variable_mode modes) 1631bf215546Sopenharmony_ci{ 1632bf215546Sopenharmony_ci /* This is only for "simple" cases so, if modes might interact with this 1633bf215546Sopenharmony_ci * deref then the deref has to have a single mode. 1634bf215546Sopenharmony_ci */ 1635bf215546Sopenharmony_ci if (nir_deref_mode_may_be(deref, modes)) { 1636bf215546Sopenharmony_ci assert(util_bitcount(deref->modes) == 1); 1637bf215546Sopenharmony_ci assert(nir_deref_mode_must_be(deref, modes)); 1638bf215546Sopenharmony_ci } 1639bf215546Sopenharmony_ci 1640bf215546Sopenharmony_ci return nir_deref_mode_may_be(deref, modes); 1641bf215546Sopenharmony_ci} 1642bf215546Sopenharmony_ci 1643bf215546Sopenharmony_ci/** Returns true if deref's possible modes lie in the given set of modes 1644bf215546Sopenharmony_ci * 1645bf215546Sopenharmony_ci * This returns true if the deref's modes lie in the given set of modes. If 1646bf215546Sopenharmony_ci * the deref's modes overlap with the specified modes but aren't entirely 1647bf215546Sopenharmony_ci * contained in the specified set of modes, this will assert-fail. In 1648bf215546Sopenharmony_ci * particular, if this is used in a generic pointers scenario, the specified 1649bf215546Sopenharmony_ci * modes has to contain all or none of the possible generic pointer modes. 1650bf215546Sopenharmony_ci * 1651bf215546Sopenharmony_ci * This is intended mostly for mass-lowering of derefs which might have 1652bf215546Sopenharmony_ci * generic pointers. 1653bf215546Sopenharmony_ci */ 1654bf215546Sopenharmony_cistatic inline bool 1655bf215546Sopenharmony_cinir_deref_mode_is_in_set(const nir_deref_instr *deref, nir_variable_mode modes) 1656bf215546Sopenharmony_ci{ 1657bf215546Sopenharmony_ci if (nir_deref_mode_may_be(deref, modes)) 1658bf215546Sopenharmony_ci assert(nir_deref_mode_must_be(deref, modes)); 1659bf215546Sopenharmony_ci 1660bf215546Sopenharmony_ci return nir_deref_mode_may_be(deref, modes); 1661bf215546Sopenharmony_ci} 1662bf215546Sopenharmony_ci 1663bf215546Sopenharmony_cistatic inline nir_deref_instr *nir_src_as_deref(nir_src src); 1664bf215546Sopenharmony_ci 1665bf215546Sopenharmony_cistatic inline nir_deref_instr * 1666bf215546Sopenharmony_cinir_deref_instr_parent(const nir_deref_instr *instr) 1667bf215546Sopenharmony_ci{ 1668bf215546Sopenharmony_ci if (instr->deref_type == nir_deref_type_var) 1669bf215546Sopenharmony_ci return NULL; 1670bf215546Sopenharmony_ci else 1671bf215546Sopenharmony_ci return nir_src_as_deref(instr->parent); 1672bf215546Sopenharmony_ci} 1673bf215546Sopenharmony_ci 1674bf215546Sopenharmony_cistatic inline nir_variable * 1675bf215546Sopenharmony_cinir_deref_instr_get_variable(const nir_deref_instr *instr) 1676bf215546Sopenharmony_ci{ 1677bf215546Sopenharmony_ci while (instr->deref_type != nir_deref_type_var) { 1678bf215546Sopenharmony_ci if (instr->deref_type == nir_deref_type_cast) 1679bf215546Sopenharmony_ci return NULL; 1680bf215546Sopenharmony_ci 1681bf215546Sopenharmony_ci instr = nir_deref_instr_parent(instr); 1682bf215546Sopenharmony_ci } 1683bf215546Sopenharmony_ci 1684bf215546Sopenharmony_ci return instr->var; 1685bf215546Sopenharmony_ci} 1686bf215546Sopenharmony_ci 1687bf215546Sopenharmony_cibool nir_deref_instr_has_indirect(nir_deref_instr *instr); 1688bf215546Sopenharmony_cibool nir_deref_instr_is_known_out_of_bounds(nir_deref_instr *instr); 1689bf215546Sopenharmony_ci 1690bf215546Sopenharmony_citypedef enum { 1691bf215546Sopenharmony_ci nir_deref_instr_has_complex_use_allow_memcpy_src = (1 << 0), 1692bf215546Sopenharmony_ci nir_deref_instr_has_complex_use_allow_memcpy_dst = (1 << 1), 1693bf215546Sopenharmony_ci} nir_deref_instr_has_complex_use_options; 1694bf215546Sopenharmony_ci 1695bf215546Sopenharmony_cibool nir_deref_instr_has_complex_use(nir_deref_instr *instr, 1696bf215546Sopenharmony_ci nir_deref_instr_has_complex_use_options opts); 1697bf215546Sopenharmony_ci 1698bf215546Sopenharmony_cibool nir_deref_instr_remove_if_unused(nir_deref_instr *instr); 1699bf215546Sopenharmony_ci 1700bf215546Sopenharmony_ciunsigned nir_deref_instr_array_stride(nir_deref_instr *instr); 1701bf215546Sopenharmony_ci 1702bf215546Sopenharmony_citypedef struct { 1703bf215546Sopenharmony_ci nir_instr instr; 1704bf215546Sopenharmony_ci 1705bf215546Sopenharmony_ci struct nir_function *callee; 1706bf215546Sopenharmony_ci 1707bf215546Sopenharmony_ci unsigned num_params; 1708bf215546Sopenharmony_ci nir_src params[]; 1709bf215546Sopenharmony_ci} nir_call_instr; 1710bf215546Sopenharmony_ci 1711bf215546Sopenharmony_ci#include "nir_intrinsics.h" 1712bf215546Sopenharmony_ci 1713bf215546Sopenharmony_ci#define NIR_INTRINSIC_MAX_CONST_INDEX 7 1714bf215546Sopenharmony_ci 1715bf215546Sopenharmony_ci/** Represents an intrinsic 1716bf215546Sopenharmony_ci * 1717bf215546Sopenharmony_ci * An intrinsic is an instruction type for handling things that are 1718bf215546Sopenharmony_ci * more-or-less regular operations but don't just consume and produce SSA 1719bf215546Sopenharmony_ci * values like ALU operations do. Intrinsics are not for things that have 1720bf215546Sopenharmony_ci * special semantic meaning such as phi nodes and parallel copies. 1721bf215546Sopenharmony_ci * Examples of intrinsics include variable load/store operations, system 1722bf215546Sopenharmony_ci * value loads, and the like. Even though texturing more-or-less falls 1723bf215546Sopenharmony_ci * under this category, texturing is its own instruction type because 1724bf215546Sopenharmony_ci * trying to represent texturing with intrinsics would lead to a 1725bf215546Sopenharmony_ci * combinatorial explosion of intrinsic opcodes. 1726bf215546Sopenharmony_ci * 1727bf215546Sopenharmony_ci * By having a single instruction type for handling a lot of different 1728bf215546Sopenharmony_ci * cases, optimization passes can look for intrinsics and, for the most 1729bf215546Sopenharmony_ci * part, completely ignore them. Each intrinsic type also has a few 1730bf215546Sopenharmony_ci * possible flags that govern whether or not they can be reordered or 1731bf215546Sopenharmony_ci * eliminated. That way passes like dead code elimination can still work 1732bf215546Sopenharmony_ci * on intrisics without understanding the meaning of each. 1733bf215546Sopenharmony_ci * 1734bf215546Sopenharmony_ci * Each intrinsic has some number of constant indices, some number of 1735bf215546Sopenharmony_ci * variables, and some number of sources. What these sources, variables, 1736bf215546Sopenharmony_ci * and indices mean depends on the intrinsic and is documented with the 1737bf215546Sopenharmony_ci * intrinsic declaration in nir_intrinsics.h. Intrinsics and texture 1738bf215546Sopenharmony_ci * instructions are the only types of instruction that can operate on 1739bf215546Sopenharmony_ci * variables. 1740bf215546Sopenharmony_ci */ 1741bf215546Sopenharmony_citypedef struct { 1742bf215546Sopenharmony_ci nir_instr instr; 1743bf215546Sopenharmony_ci 1744bf215546Sopenharmony_ci nir_intrinsic_op intrinsic; 1745bf215546Sopenharmony_ci 1746bf215546Sopenharmony_ci nir_dest dest; 1747bf215546Sopenharmony_ci 1748bf215546Sopenharmony_ci /** number of components if this is a vectorized intrinsic 1749bf215546Sopenharmony_ci * 1750bf215546Sopenharmony_ci * Similarly to ALU operations, some intrinsics are vectorized. 1751bf215546Sopenharmony_ci * An intrinsic is vectorized if nir_intrinsic_infos.dest_components == 0. 1752bf215546Sopenharmony_ci * For vectorized intrinsics, the num_components field specifies the 1753bf215546Sopenharmony_ci * number of destination components and the number of source components 1754bf215546Sopenharmony_ci * for all sources with nir_intrinsic_infos.src_components[i] == 0. 1755bf215546Sopenharmony_ci */ 1756bf215546Sopenharmony_ci uint8_t num_components; 1757bf215546Sopenharmony_ci 1758bf215546Sopenharmony_ci int const_index[NIR_INTRINSIC_MAX_CONST_INDEX]; 1759bf215546Sopenharmony_ci 1760bf215546Sopenharmony_ci nir_src src[]; 1761bf215546Sopenharmony_ci} nir_intrinsic_instr; 1762bf215546Sopenharmony_ci 1763bf215546Sopenharmony_cistatic inline nir_variable * 1764bf215546Sopenharmony_cinir_intrinsic_get_var(nir_intrinsic_instr *intrin, unsigned i) 1765bf215546Sopenharmony_ci{ 1766bf215546Sopenharmony_ci return nir_deref_instr_get_variable(nir_src_as_deref(intrin->src[i])); 1767bf215546Sopenharmony_ci} 1768bf215546Sopenharmony_ci 1769bf215546Sopenharmony_citypedef enum { 1770bf215546Sopenharmony_ci /* Memory ordering. */ 1771bf215546Sopenharmony_ci NIR_MEMORY_ACQUIRE = 1 << 0, 1772bf215546Sopenharmony_ci NIR_MEMORY_RELEASE = 1 << 1, 1773bf215546Sopenharmony_ci NIR_MEMORY_ACQ_REL = NIR_MEMORY_ACQUIRE | NIR_MEMORY_RELEASE, 1774bf215546Sopenharmony_ci 1775bf215546Sopenharmony_ci /* Memory visibility operations. */ 1776bf215546Sopenharmony_ci NIR_MEMORY_MAKE_AVAILABLE = 1 << 2, 1777bf215546Sopenharmony_ci NIR_MEMORY_MAKE_VISIBLE = 1 << 3, 1778bf215546Sopenharmony_ci} nir_memory_semantics; 1779bf215546Sopenharmony_ci 1780bf215546Sopenharmony_citypedef enum { 1781bf215546Sopenharmony_ci NIR_SCOPE_NONE, 1782bf215546Sopenharmony_ci NIR_SCOPE_INVOCATION, 1783bf215546Sopenharmony_ci NIR_SCOPE_SUBGROUP, 1784bf215546Sopenharmony_ci NIR_SCOPE_SHADER_CALL, 1785bf215546Sopenharmony_ci NIR_SCOPE_WORKGROUP, 1786bf215546Sopenharmony_ci NIR_SCOPE_QUEUE_FAMILY, 1787bf215546Sopenharmony_ci NIR_SCOPE_DEVICE, 1788bf215546Sopenharmony_ci} nir_scope; 1789bf215546Sopenharmony_ci 1790bf215546Sopenharmony_ci/** 1791bf215546Sopenharmony_ci * \name NIR intrinsics semantic flags 1792bf215546Sopenharmony_ci * 1793bf215546Sopenharmony_ci * information about what the compiler can do with the intrinsics. 1794bf215546Sopenharmony_ci * 1795bf215546Sopenharmony_ci * \sa nir_intrinsic_info::flags 1796bf215546Sopenharmony_ci */ 1797bf215546Sopenharmony_citypedef enum { 1798bf215546Sopenharmony_ci /** 1799bf215546Sopenharmony_ci * whether the intrinsic can be safely eliminated if none of its output 1800bf215546Sopenharmony_ci * value is not being used. 1801bf215546Sopenharmony_ci */ 1802bf215546Sopenharmony_ci NIR_INTRINSIC_CAN_ELIMINATE = (1 << 0), 1803bf215546Sopenharmony_ci 1804bf215546Sopenharmony_ci /** 1805bf215546Sopenharmony_ci * Whether the intrinsic can be reordered with respect to any other 1806bf215546Sopenharmony_ci * intrinsic, i.e. whether the only reordering dependencies of the 1807bf215546Sopenharmony_ci * intrinsic are due to the register reads/writes. 1808bf215546Sopenharmony_ci */ 1809bf215546Sopenharmony_ci NIR_INTRINSIC_CAN_REORDER = (1 << 1), 1810bf215546Sopenharmony_ci} nir_intrinsic_semantic_flag; 1811bf215546Sopenharmony_ci 1812bf215546Sopenharmony_ci/** 1813bf215546Sopenharmony_ci * Maximum valid value for a nir align_mul value (in intrinsics or derefs). 1814bf215546Sopenharmony_ci * 1815bf215546Sopenharmony_ci * Offsets can be signed, so this is the largest power of two in int32_t. 1816bf215546Sopenharmony_ci */ 1817bf215546Sopenharmony_ci#define NIR_ALIGN_MUL_MAX 0x40000000 1818bf215546Sopenharmony_ci 1819bf215546Sopenharmony_citypedef struct nir_io_semantics { 1820bf215546Sopenharmony_ci unsigned location:7; /* gl_vert_attrib, gl_varying_slot, or gl_frag_result */ 1821bf215546Sopenharmony_ci unsigned num_slots:6; /* max 32, may be pessimistic with const indexing */ 1822bf215546Sopenharmony_ci unsigned dual_source_blend_index:1; 1823bf215546Sopenharmony_ci unsigned fb_fetch_output:1; /* for GL_KHR_blend_equation_advanced */ 1824bf215546Sopenharmony_ci unsigned gs_streams:8; /* xxyyzzww: 2-bit stream index for each component */ 1825bf215546Sopenharmony_ci unsigned medium_precision:1; /* GLSL mediump qualifier */ 1826bf215546Sopenharmony_ci unsigned per_view:1; 1827bf215546Sopenharmony_ci unsigned high_16bits:1; /* whether accessing low or high half of the slot */ 1828bf215546Sopenharmony_ci unsigned invariant:1; /* The variable has the invariant flag set */ 1829bf215546Sopenharmony_ci /* CLIP_DISTn, LAYER, VIEWPORT, and TESS_LEVEL_* have up to 3 uses: 1830bf215546Sopenharmony_ci * - an output consumed by the next stage 1831bf215546Sopenharmony_ci * - a system value output affecting fixed-func hardware, e.g. the clipper 1832bf215546Sopenharmony_ci * - a transform feedback output written to memory 1833bf215546Sopenharmony_ci * The following fields disable the first two. Transform feedback is disabled 1834bf215546Sopenharmony_ci * by transform feedback info. 1835bf215546Sopenharmony_ci */ 1836bf215546Sopenharmony_ci unsigned no_varying:1; /* whether this output isn't consumed by the next stage */ 1837bf215546Sopenharmony_ci unsigned no_sysval_output:1; /* whether this system value output has no 1838bf215546Sopenharmony_ci effect due to current pipeline states */ 1839bf215546Sopenharmony_ci unsigned _pad:3; 1840bf215546Sopenharmony_ci} nir_io_semantics; 1841bf215546Sopenharmony_ci 1842bf215546Sopenharmony_ci/* Transform feedback info for 2 outputs. nir_intrinsic_store_output contains 1843bf215546Sopenharmony_ci * this structure twice to support up to 4 outputs. The structure is limited 1844bf215546Sopenharmony_ci * to 32 bits because it's stored in nir_intrinsic_instr::const_index[]. 1845bf215546Sopenharmony_ci */ 1846bf215546Sopenharmony_citypedef struct nir_io_xfb { 1847bf215546Sopenharmony_ci struct { 1848bf215546Sopenharmony_ci /* start_component is equal to the index of out[]; add 2 for io_xfb2 */ 1849bf215546Sopenharmony_ci /* start_component is not relative to nir_intrinsic_component */ 1850bf215546Sopenharmony_ci /* get the stream index from nir_io_semantics */ 1851bf215546Sopenharmony_ci uint8_t num_components:4; /* max 4; if this is 0, xfb is disabled */ 1852bf215546Sopenharmony_ci uint8_t buffer:4; /* buffer index, max 3 */ 1853bf215546Sopenharmony_ci uint8_t offset; /* transform feedback buffer offset in dwords, 1854bf215546Sopenharmony_ci max (1K - 4) bytes */ 1855bf215546Sopenharmony_ci } out[2]; 1856bf215546Sopenharmony_ci} nir_io_xfb; 1857bf215546Sopenharmony_ci 1858bf215546Sopenharmony_ciunsigned 1859bf215546Sopenharmony_cinir_instr_xfb_write_mask(nir_intrinsic_instr *instr); 1860bf215546Sopenharmony_ci 1861bf215546Sopenharmony_ci#define NIR_INTRINSIC_MAX_INPUTS 11 1862bf215546Sopenharmony_ci 1863bf215546Sopenharmony_citypedef struct { 1864bf215546Sopenharmony_ci const char *name; 1865bf215546Sopenharmony_ci 1866bf215546Sopenharmony_ci uint8_t num_srcs; /** < number of register/SSA inputs */ 1867bf215546Sopenharmony_ci 1868bf215546Sopenharmony_ci /** number of components of each input register 1869bf215546Sopenharmony_ci * 1870bf215546Sopenharmony_ci * If this value is 0, the number of components is given by the 1871bf215546Sopenharmony_ci * num_components field of nir_intrinsic_instr. If this value is -1, the 1872bf215546Sopenharmony_ci * intrinsic consumes however many components are provided and it is not 1873bf215546Sopenharmony_ci * validated at all. 1874bf215546Sopenharmony_ci */ 1875bf215546Sopenharmony_ci int8_t src_components[NIR_INTRINSIC_MAX_INPUTS]; 1876bf215546Sopenharmony_ci 1877bf215546Sopenharmony_ci bool has_dest; 1878bf215546Sopenharmony_ci 1879bf215546Sopenharmony_ci /** number of components of the output register 1880bf215546Sopenharmony_ci * 1881bf215546Sopenharmony_ci * If this value is 0, the number of components is given by the 1882bf215546Sopenharmony_ci * num_components field of nir_intrinsic_instr. 1883bf215546Sopenharmony_ci */ 1884bf215546Sopenharmony_ci uint8_t dest_components; 1885bf215546Sopenharmony_ci 1886bf215546Sopenharmony_ci /** bitfield of legal bit sizes */ 1887bf215546Sopenharmony_ci uint8_t dest_bit_sizes; 1888bf215546Sopenharmony_ci 1889bf215546Sopenharmony_ci /** source which the destination bit size must match 1890bf215546Sopenharmony_ci * 1891bf215546Sopenharmony_ci * Some intrinsics, such as subgroup intrinsics, are data manipulation 1892bf215546Sopenharmony_ci * intrinsics and they have similar bit-size rules to ALU ops. This enables 1893bf215546Sopenharmony_ci * validation to validate a bit more and enables auto-generated builder code 1894bf215546Sopenharmony_ci * to properly determine destination bit sizes automatically. 1895bf215546Sopenharmony_ci */ 1896bf215546Sopenharmony_ci int8_t bit_size_src; 1897bf215546Sopenharmony_ci 1898bf215546Sopenharmony_ci /** the number of constant indices used by the intrinsic */ 1899bf215546Sopenharmony_ci uint8_t num_indices; 1900bf215546Sopenharmony_ci 1901bf215546Sopenharmony_ci /** list of indices */ 1902bf215546Sopenharmony_ci uint8_t indices[NIR_INTRINSIC_MAX_CONST_INDEX]; 1903bf215546Sopenharmony_ci 1904bf215546Sopenharmony_ci /** indicates the usage of intr->const_index[n] */ 1905bf215546Sopenharmony_ci uint8_t index_map[NIR_INTRINSIC_NUM_INDEX_FLAGS]; 1906bf215546Sopenharmony_ci 1907bf215546Sopenharmony_ci /** semantic flags for calls to this intrinsic */ 1908bf215546Sopenharmony_ci nir_intrinsic_semantic_flag flags; 1909bf215546Sopenharmony_ci} nir_intrinsic_info; 1910bf215546Sopenharmony_ci 1911bf215546Sopenharmony_ciextern const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics]; 1912bf215546Sopenharmony_ci 1913bf215546Sopenharmony_ciunsigned 1914bf215546Sopenharmony_cinir_intrinsic_src_components(const nir_intrinsic_instr *intr, unsigned srcn); 1915bf215546Sopenharmony_ci 1916bf215546Sopenharmony_ciunsigned 1917bf215546Sopenharmony_cinir_intrinsic_dest_components(nir_intrinsic_instr *intr); 1918bf215546Sopenharmony_ci 1919bf215546Sopenharmony_ci/** 1920bf215546Sopenharmony_ci * Helper to copy const_index[] from src to dst, without assuming they 1921bf215546Sopenharmony_ci * match in order. 1922bf215546Sopenharmony_ci */ 1923bf215546Sopenharmony_civoid nir_intrinsic_copy_const_indices(nir_intrinsic_instr *dst, nir_intrinsic_instr *src); 1924bf215546Sopenharmony_ci 1925bf215546Sopenharmony_ci#include "nir_intrinsics_indices.h" 1926bf215546Sopenharmony_ci 1927bf215546Sopenharmony_cistatic inline void 1928bf215546Sopenharmony_cinir_intrinsic_set_align(nir_intrinsic_instr *intrin, 1929bf215546Sopenharmony_ci unsigned align_mul, unsigned align_offset) 1930bf215546Sopenharmony_ci{ 1931bf215546Sopenharmony_ci assert(util_is_power_of_two_nonzero(align_mul)); 1932bf215546Sopenharmony_ci assert(align_offset < align_mul); 1933bf215546Sopenharmony_ci nir_intrinsic_set_align_mul(intrin, align_mul); 1934bf215546Sopenharmony_ci nir_intrinsic_set_align_offset(intrin, align_offset); 1935bf215546Sopenharmony_ci} 1936bf215546Sopenharmony_ci 1937bf215546Sopenharmony_ci/** Returns a simple alignment for a load/store intrinsic offset 1938bf215546Sopenharmony_ci * 1939bf215546Sopenharmony_ci * Instead of the full mul+offset alignment scheme provided by the ALIGN_MUL 1940bf215546Sopenharmony_ci * and ALIGN_OFFSET parameters, this helper takes both into account and 1941bf215546Sopenharmony_ci * provides a single simple alignment parameter. The offset X is guaranteed 1942bf215546Sopenharmony_ci * to satisfy X % align == 0. 1943bf215546Sopenharmony_ci */ 1944bf215546Sopenharmony_cistatic inline unsigned 1945bf215546Sopenharmony_cinir_intrinsic_align(const nir_intrinsic_instr *intrin) 1946bf215546Sopenharmony_ci{ 1947bf215546Sopenharmony_ci const unsigned align_mul = nir_intrinsic_align_mul(intrin); 1948bf215546Sopenharmony_ci const unsigned align_offset = nir_intrinsic_align_offset(intrin); 1949bf215546Sopenharmony_ci assert(align_offset < align_mul); 1950bf215546Sopenharmony_ci return align_offset ? 1 << (ffs(align_offset) - 1) : align_mul; 1951bf215546Sopenharmony_ci} 1952bf215546Sopenharmony_ci 1953bf215546Sopenharmony_cistatic inline bool 1954bf215546Sopenharmony_cinir_intrinsic_has_align(const nir_intrinsic_instr *intrin) 1955bf215546Sopenharmony_ci{ 1956bf215546Sopenharmony_ci return nir_intrinsic_has_align_mul(intrin) && 1957bf215546Sopenharmony_ci nir_intrinsic_has_align_offset(intrin); 1958bf215546Sopenharmony_ci} 1959bf215546Sopenharmony_ci 1960bf215546Sopenharmony_ciunsigned 1961bf215546Sopenharmony_cinir_image_intrinsic_coord_components(const nir_intrinsic_instr *instr); 1962bf215546Sopenharmony_ci 1963bf215546Sopenharmony_ci/* Converts a image_deref_* intrinsic into a image_* one */ 1964bf215546Sopenharmony_civoid nir_rewrite_image_intrinsic(nir_intrinsic_instr *instr, 1965bf215546Sopenharmony_ci nir_ssa_def *handle, bool bindless); 1966bf215546Sopenharmony_ci 1967bf215546Sopenharmony_ci/* Determine if an intrinsic can be arbitrarily reordered and eliminated. */ 1968bf215546Sopenharmony_cistatic inline bool 1969bf215546Sopenharmony_cinir_intrinsic_can_reorder(nir_intrinsic_instr *instr) 1970bf215546Sopenharmony_ci{ 1971bf215546Sopenharmony_ci if (nir_intrinsic_has_access(instr) && 1972bf215546Sopenharmony_ci nir_intrinsic_access(instr) & ACCESS_VOLATILE) 1973bf215546Sopenharmony_ci return false; 1974bf215546Sopenharmony_ci if (instr->intrinsic == nir_intrinsic_load_deref) { 1975bf215546Sopenharmony_ci nir_deref_instr *deref = nir_src_as_deref(instr->src[0]); 1976bf215546Sopenharmony_ci return nir_deref_mode_is_in_set(deref, nir_var_read_only_modes) || 1977bf215546Sopenharmony_ci (nir_intrinsic_access(instr) & ACCESS_CAN_REORDER); 1978bf215546Sopenharmony_ci } else if (instr->intrinsic == nir_intrinsic_load_ssbo || 1979bf215546Sopenharmony_ci instr->intrinsic == nir_intrinsic_bindless_image_load || 1980bf215546Sopenharmony_ci instr->intrinsic == nir_intrinsic_image_deref_load || 1981bf215546Sopenharmony_ci instr->intrinsic == nir_intrinsic_image_load) { 1982bf215546Sopenharmony_ci return nir_intrinsic_access(instr) & ACCESS_CAN_REORDER; 1983bf215546Sopenharmony_ci } else { 1984bf215546Sopenharmony_ci const nir_intrinsic_info *info = 1985bf215546Sopenharmony_ci &nir_intrinsic_infos[instr->intrinsic]; 1986bf215546Sopenharmony_ci return (info->flags & NIR_INTRINSIC_CAN_ELIMINATE) && 1987bf215546Sopenharmony_ci (info->flags & NIR_INTRINSIC_CAN_REORDER); 1988bf215546Sopenharmony_ci } 1989bf215546Sopenharmony_ci} 1990bf215546Sopenharmony_ci 1991bf215546Sopenharmony_cibool nir_intrinsic_writes_external_memory(const nir_intrinsic_instr *instr); 1992bf215546Sopenharmony_ci 1993bf215546Sopenharmony_ci/** Texture instruction source type */ 1994bf215546Sopenharmony_citypedef enum { 1995bf215546Sopenharmony_ci /** Texture coordinate 1996bf215546Sopenharmony_ci * 1997bf215546Sopenharmony_ci * Must have nir_tex_instr::coord_components components. 1998bf215546Sopenharmony_ci */ 1999bf215546Sopenharmony_ci nir_tex_src_coord, 2000bf215546Sopenharmony_ci 2001bf215546Sopenharmony_ci /** Projector 2002bf215546Sopenharmony_ci * 2003bf215546Sopenharmony_ci * The texture coordinate (except for the array component, if any) is 2004bf215546Sopenharmony_ci * divided by this value before LOD computation and sampling. 2005bf215546Sopenharmony_ci * 2006bf215546Sopenharmony_ci * Must be a float scalar. 2007bf215546Sopenharmony_ci */ 2008bf215546Sopenharmony_ci nir_tex_src_projector, 2009bf215546Sopenharmony_ci 2010bf215546Sopenharmony_ci /** Shadow comparator 2011bf215546Sopenharmony_ci * 2012bf215546Sopenharmony_ci * For shadow sampling, the fetched texel values are compared against the 2013bf215546Sopenharmony_ci * shadow comparator using the compare op specified by the sampler object 2014bf215546Sopenharmony_ci * and converted to 1.0 if the comparison succeeds and 0.0 if it fails. 2015bf215546Sopenharmony_ci * Interpolation happens after this conversion so the actual result may be 2016bf215546Sopenharmony_ci * anywhere in the range [0.0, 1.0]. 2017bf215546Sopenharmony_ci * 2018bf215546Sopenharmony_ci * Only valid if nir_tex_instr::is_shadow and must be a float scalar. 2019bf215546Sopenharmony_ci */ 2020bf215546Sopenharmony_ci nir_tex_src_comparator, 2021bf215546Sopenharmony_ci 2022bf215546Sopenharmony_ci /** Coordinate offset 2023bf215546Sopenharmony_ci * 2024bf215546Sopenharmony_ci * An integer value that is added to the texel address before sampling. 2025bf215546Sopenharmony_ci * This is only allowed with operations that take an explicit LOD as it is 2026bf215546Sopenharmony_ci * applied in integer texel space after LOD selection and not normalized 2027bf215546Sopenharmony_ci * coordinate space. 2028bf215546Sopenharmony_ci */ 2029bf215546Sopenharmony_ci nir_tex_src_offset, 2030bf215546Sopenharmony_ci 2031bf215546Sopenharmony_ci /** LOD bias 2032bf215546Sopenharmony_ci * 2033bf215546Sopenharmony_ci * This value is added to the computed LOD before mip-mapping. 2034bf215546Sopenharmony_ci */ 2035bf215546Sopenharmony_ci nir_tex_src_bias, 2036bf215546Sopenharmony_ci 2037bf215546Sopenharmony_ci /** Explicit LOD */ 2038bf215546Sopenharmony_ci nir_tex_src_lod, 2039bf215546Sopenharmony_ci 2040bf215546Sopenharmony_ci /** Min LOD 2041bf215546Sopenharmony_ci * 2042bf215546Sopenharmony_ci * The computed LOD is clamped to be at least as large as min_lod before 2043bf215546Sopenharmony_ci * mip-mapping. 2044bf215546Sopenharmony_ci */ 2045bf215546Sopenharmony_ci nir_tex_src_min_lod, 2046bf215546Sopenharmony_ci 2047bf215546Sopenharmony_ci /** MSAA sample index */ 2048bf215546Sopenharmony_ci nir_tex_src_ms_index, 2049bf215546Sopenharmony_ci 2050bf215546Sopenharmony_ci /** Intel-specific MSAA compression data */ 2051bf215546Sopenharmony_ci nir_tex_src_ms_mcs_intel, 2052bf215546Sopenharmony_ci 2053bf215546Sopenharmony_ci /** Explicit horizontal (X-major) coordinate derivative */ 2054bf215546Sopenharmony_ci nir_tex_src_ddx, 2055bf215546Sopenharmony_ci 2056bf215546Sopenharmony_ci /** Explicit vertical (Y-major) coordinate derivative */ 2057bf215546Sopenharmony_ci nir_tex_src_ddy, 2058bf215546Sopenharmony_ci 2059bf215546Sopenharmony_ci /** Texture variable dereference */ 2060bf215546Sopenharmony_ci nir_tex_src_texture_deref, 2061bf215546Sopenharmony_ci 2062bf215546Sopenharmony_ci /** Sampler variable dereference */ 2063bf215546Sopenharmony_ci nir_tex_src_sampler_deref, 2064bf215546Sopenharmony_ci 2065bf215546Sopenharmony_ci /** Texture index offset 2066bf215546Sopenharmony_ci * 2067bf215546Sopenharmony_ci * This is added to nir_tex_instr::texture_index. Unless 2068bf215546Sopenharmony_ci * nir_tex_instr::texture_non_uniform is set, this is guaranteed to be 2069bf215546Sopenharmony_ci * dynamically uniform. 2070bf215546Sopenharmony_ci */ 2071bf215546Sopenharmony_ci nir_tex_src_texture_offset, 2072bf215546Sopenharmony_ci 2073bf215546Sopenharmony_ci /** Dynamically uniform sampler index offset 2074bf215546Sopenharmony_ci * 2075bf215546Sopenharmony_ci * This is added to nir_tex_instr::sampler_index. Unless 2076bf215546Sopenharmony_ci * nir_tex_instr::sampler_non_uniform is set, this is guaranteed to be 2077bf215546Sopenharmony_ci * dynamically uniform. This should not be present until GLSL ES 3.20, GLSL 2078bf215546Sopenharmony_ci * 4.00, or ARB_gpu_shader5, because in ES 3.10 and GL 3.30 samplers said 2079bf215546Sopenharmony_ci * "When aggregated into arrays within a shader, samplers can only be indexed 2080bf215546Sopenharmony_ci * with a constant integral expression." 2081bf215546Sopenharmony_ci */ 2082bf215546Sopenharmony_ci nir_tex_src_sampler_offset, 2083bf215546Sopenharmony_ci 2084bf215546Sopenharmony_ci /** Bindless texture handle 2085bf215546Sopenharmony_ci * 2086bf215546Sopenharmony_ci * This is, unfortunately, a bit overloaded at the moment. There are 2087bf215546Sopenharmony_ci * generally two types of bindless handles: 2088bf215546Sopenharmony_ci * 2089bf215546Sopenharmony_ci * 1. For GL_ARB_bindless bindless handles. These are part of the 2090bf215546Sopenharmony_ci * GL/Gallium-level API and are always a 64-bit integer. 2091bf215546Sopenharmony_ci * 2092bf215546Sopenharmony_ci * 2. HW-specific handles. GL_ARB_bindless handles may be lowered to 2093bf215546Sopenharmony_ci * these. Also, these are used by many Vulkan drivers to implement 2094bf215546Sopenharmony_ci * descriptor sets, especially for UPDATE_AFTER_BIND descriptors. 2095bf215546Sopenharmony_ci * The details of hardware handles (bit size, format, etc.) is 2096bf215546Sopenharmony_ci * HW-specific. 2097bf215546Sopenharmony_ci * 2098bf215546Sopenharmony_ci * Because of this overloading and the resulting ambiguity, we currently 2099bf215546Sopenharmony_ci * don't validate anything for these. 2100bf215546Sopenharmony_ci */ 2101bf215546Sopenharmony_ci nir_tex_src_texture_handle, 2102bf215546Sopenharmony_ci 2103bf215546Sopenharmony_ci /** Bindless sampler handle 2104bf215546Sopenharmony_ci * 2105bf215546Sopenharmony_ci * See nir_tex_src_texture_handle, 2106bf215546Sopenharmony_ci */ 2107bf215546Sopenharmony_ci nir_tex_src_sampler_handle, 2108bf215546Sopenharmony_ci 2109bf215546Sopenharmony_ci /** Plane index for multi-plane YCbCr textures */ 2110bf215546Sopenharmony_ci nir_tex_src_plane, 2111bf215546Sopenharmony_ci 2112bf215546Sopenharmony_ci /** 2113bf215546Sopenharmony_ci * Backend-specific vec4 tex src argument. 2114bf215546Sopenharmony_ci * 2115bf215546Sopenharmony_ci * Can be used to have NIR optimization (copy propagation, lower_vec_to_movs) 2116bf215546Sopenharmony_ci * apply to the packing of the tex srcs. This lowering must only happen 2117bf215546Sopenharmony_ci * after nir_lower_tex(). 2118bf215546Sopenharmony_ci * 2119bf215546Sopenharmony_ci * The nir_tex_instr_src_type() of this argument is float, so no lowering 2120bf215546Sopenharmony_ci * will happen if nir_lower_int_to_float is used. 2121bf215546Sopenharmony_ci */ 2122bf215546Sopenharmony_ci nir_tex_src_backend1, 2123bf215546Sopenharmony_ci 2124bf215546Sopenharmony_ci /** Second backend-specific vec4 tex src argument, see nir_tex_src_backend1. */ 2125bf215546Sopenharmony_ci nir_tex_src_backend2, 2126bf215546Sopenharmony_ci 2127bf215546Sopenharmony_ci nir_num_tex_src_types 2128bf215546Sopenharmony_ci} nir_tex_src_type; 2129bf215546Sopenharmony_ci 2130bf215546Sopenharmony_ci/** A texture instruction source */ 2131bf215546Sopenharmony_citypedef struct { 2132bf215546Sopenharmony_ci /** Base source */ 2133bf215546Sopenharmony_ci nir_src src; 2134bf215546Sopenharmony_ci 2135bf215546Sopenharmony_ci /** Type of this source */ 2136bf215546Sopenharmony_ci nir_tex_src_type src_type; 2137bf215546Sopenharmony_ci} nir_tex_src; 2138bf215546Sopenharmony_ci 2139bf215546Sopenharmony_ci/** Texture instruction opcode */ 2140bf215546Sopenharmony_citypedef enum { 2141bf215546Sopenharmony_ci nir_texop_tex, /**< Regular texture look-up */ 2142bf215546Sopenharmony_ci nir_texop_txb, /**< Texture look-up with LOD bias */ 2143bf215546Sopenharmony_ci nir_texop_txl, /**< Texture look-up with explicit LOD */ 2144bf215546Sopenharmony_ci nir_texop_txd, /**< Texture look-up with partial derivatives */ 2145bf215546Sopenharmony_ci nir_texop_txf, /**< Texel fetch with explicit LOD */ 2146bf215546Sopenharmony_ci nir_texop_txf_ms, /**< Multisample texture fetch */ 2147bf215546Sopenharmony_ci nir_texop_txf_ms_fb, /**< Multisample texture fetch from framebuffer */ 2148bf215546Sopenharmony_ci nir_texop_txf_ms_mcs_intel, /**< Multisample compression value fetch */ 2149bf215546Sopenharmony_ci nir_texop_txs, /**< Texture size */ 2150bf215546Sopenharmony_ci nir_texop_lod, /**< Texture lod query */ 2151bf215546Sopenharmony_ci nir_texop_tg4, /**< Texture gather */ 2152bf215546Sopenharmony_ci nir_texop_query_levels, /**< Texture levels query */ 2153bf215546Sopenharmony_ci nir_texop_texture_samples, /**< Texture samples query */ 2154bf215546Sopenharmony_ci nir_texop_samples_identical, /**< Query whether all samples are definitely 2155bf215546Sopenharmony_ci * identical. 2156bf215546Sopenharmony_ci */ 2157bf215546Sopenharmony_ci nir_texop_tex_prefetch, /**< Regular texture look-up, eligible for pre-dispatch */ 2158bf215546Sopenharmony_ci nir_texop_fragment_fetch_amd, /**< Multisample fragment color texture fetch */ 2159bf215546Sopenharmony_ci nir_texop_fragment_mask_fetch_amd, /**< Multisample fragment mask texture fetch */ 2160bf215546Sopenharmony_ci} nir_texop; 2161bf215546Sopenharmony_ci 2162bf215546Sopenharmony_ci/** Represents a texture instruction */ 2163bf215546Sopenharmony_citypedef struct { 2164bf215546Sopenharmony_ci /** Base instruction */ 2165bf215546Sopenharmony_ci nir_instr instr; 2166bf215546Sopenharmony_ci 2167bf215546Sopenharmony_ci /** Dimensionality of the texture operation 2168bf215546Sopenharmony_ci * 2169bf215546Sopenharmony_ci * This will typically match the dimensionality of the texture deref type 2170bf215546Sopenharmony_ci * if a nir_tex_src_texture_deref is present. However, it may not if 2171bf215546Sopenharmony_ci * texture lowering has occurred. 2172bf215546Sopenharmony_ci */ 2173bf215546Sopenharmony_ci enum glsl_sampler_dim sampler_dim; 2174bf215546Sopenharmony_ci 2175bf215546Sopenharmony_ci /** ALU type of the destination 2176bf215546Sopenharmony_ci * 2177bf215546Sopenharmony_ci * This is the canonical sampled type for this texture operation and may 2178bf215546Sopenharmony_ci * not exactly match the sampled type of the deref type when a 2179bf215546Sopenharmony_ci * nir_tex_src_texture_deref is present. For OpenCL, the sampled type of 2180bf215546Sopenharmony_ci * the texture deref will be GLSL_TYPE_VOID and this is allowed to be 2181bf215546Sopenharmony_ci * anything. With SPIR-V, the signedness of integer types is allowed to 2182bf215546Sopenharmony_ci * differ. For all APIs, the bit size may differ if the driver has done 2183bf215546Sopenharmony_ci * any sort of mediump or similar lowering since texture types always have 2184bf215546Sopenharmony_ci * 32-bit sampled types. 2185bf215546Sopenharmony_ci */ 2186bf215546Sopenharmony_ci nir_alu_type dest_type; 2187bf215546Sopenharmony_ci 2188bf215546Sopenharmony_ci /** Texture opcode */ 2189bf215546Sopenharmony_ci nir_texop op; 2190bf215546Sopenharmony_ci 2191bf215546Sopenharmony_ci /** Destination */ 2192bf215546Sopenharmony_ci nir_dest dest; 2193bf215546Sopenharmony_ci 2194bf215546Sopenharmony_ci /** Array of sources 2195bf215546Sopenharmony_ci * 2196bf215546Sopenharmony_ci * This array has nir_tex_instr::num_srcs elements 2197bf215546Sopenharmony_ci */ 2198bf215546Sopenharmony_ci nir_tex_src *src; 2199bf215546Sopenharmony_ci 2200bf215546Sopenharmony_ci /** Number of sources */ 2201bf215546Sopenharmony_ci unsigned num_srcs; 2202bf215546Sopenharmony_ci 2203bf215546Sopenharmony_ci /** Number of components in the coordinate, if any */ 2204bf215546Sopenharmony_ci unsigned coord_components; 2205bf215546Sopenharmony_ci 2206bf215546Sopenharmony_ci /** True if the texture instruction acts on an array texture */ 2207bf215546Sopenharmony_ci bool is_array; 2208bf215546Sopenharmony_ci 2209bf215546Sopenharmony_ci /** True if the texture instruction performs a shadow comparison 2210bf215546Sopenharmony_ci * 2211bf215546Sopenharmony_ci * If this is true, the texture instruction must have a 2212bf215546Sopenharmony_ci * nir_tex_src_comparator. 2213bf215546Sopenharmony_ci */ 2214bf215546Sopenharmony_ci bool is_shadow; 2215bf215546Sopenharmony_ci 2216bf215546Sopenharmony_ci /** 2217bf215546Sopenharmony_ci * If is_shadow is true, whether this is the old-style shadow that outputs 2218bf215546Sopenharmony_ci * 4 components or the new-style shadow that outputs 1 component. 2219bf215546Sopenharmony_ci */ 2220bf215546Sopenharmony_ci bool is_new_style_shadow; 2221bf215546Sopenharmony_ci 2222bf215546Sopenharmony_ci /** 2223bf215546Sopenharmony_ci * True if this texture instruction should return a sparse residency code. 2224bf215546Sopenharmony_ci * The code is in the last component of the result. 2225bf215546Sopenharmony_ci */ 2226bf215546Sopenharmony_ci bool is_sparse; 2227bf215546Sopenharmony_ci 2228bf215546Sopenharmony_ci /** nir_texop_tg4 component selector 2229bf215546Sopenharmony_ci * 2230bf215546Sopenharmony_ci * This determines which RGBA component is gathered. 2231bf215546Sopenharmony_ci */ 2232bf215546Sopenharmony_ci unsigned component : 2; 2233bf215546Sopenharmony_ci 2234bf215546Sopenharmony_ci /** Validation needs to know this for gradient component count */ 2235bf215546Sopenharmony_ci unsigned array_is_lowered_cube : 1; 2236bf215546Sopenharmony_ci 2237bf215546Sopenharmony_ci /** Gather offsets */ 2238bf215546Sopenharmony_ci int8_t tg4_offsets[4][2]; 2239bf215546Sopenharmony_ci 2240bf215546Sopenharmony_ci /** True if the texture index or handle is not dynamically uniform */ 2241bf215546Sopenharmony_ci bool texture_non_uniform; 2242bf215546Sopenharmony_ci 2243bf215546Sopenharmony_ci /** True if the sampler index or handle is not dynamically uniform. 2244bf215546Sopenharmony_ci * 2245bf215546Sopenharmony_ci * This may be set when VK_EXT_descriptor_indexing is supported and the 2246bf215546Sopenharmony_ci * appropriate capability is enabled. 2247bf215546Sopenharmony_ci * 2248bf215546Sopenharmony_ci * This should always be false in GLSL (GLSL ES 3.20 says "When aggregated 2249bf215546Sopenharmony_ci * into arrays within a shader, opaque types can only be indexed with a 2250bf215546Sopenharmony_ci * dynamically uniform integral expression", and GLSL 4.60 says "When 2251bf215546Sopenharmony_ci * aggregated into arrays within a shader, [texture, sampler, and 2252bf215546Sopenharmony_ci * samplerShadow] types can only be indexed with a dynamically uniform 2253bf215546Sopenharmony_ci * expression, or texture lookup will result in undefined values."). 2254bf215546Sopenharmony_ci */ 2255bf215546Sopenharmony_ci bool sampler_non_uniform; 2256bf215546Sopenharmony_ci 2257bf215546Sopenharmony_ci /** The texture index 2258bf215546Sopenharmony_ci * 2259bf215546Sopenharmony_ci * If this texture instruction has a nir_tex_src_texture_offset source, 2260bf215546Sopenharmony_ci * then the texture index is given by texture_index + texture_offset. 2261bf215546Sopenharmony_ci */ 2262bf215546Sopenharmony_ci unsigned texture_index; 2263bf215546Sopenharmony_ci 2264bf215546Sopenharmony_ci /** The sampler index 2265bf215546Sopenharmony_ci * 2266bf215546Sopenharmony_ci * The following operations do not require a sampler and, as such, this 2267bf215546Sopenharmony_ci * field should be ignored: 2268bf215546Sopenharmony_ci * - nir_texop_txf 2269bf215546Sopenharmony_ci * - nir_texop_txf_ms 2270bf215546Sopenharmony_ci * - nir_texop_txs 2271bf215546Sopenharmony_ci * - nir_texop_query_levels 2272bf215546Sopenharmony_ci * - nir_texop_texture_samples 2273bf215546Sopenharmony_ci * - nir_texop_samples_identical 2274bf215546Sopenharmony_ci * 2275bf215546Sopenharmony_ci * If this texture instruction has a nir_tex_src_sampler_offset source, 2276bf215546Sopenharmony_ci * then the sampler index is given by sampler_index + sampler_offset. 2277bf215546Sopenharmony_ci */ 2278bf215546Sopenharmony_ci unsigned sampler_index; 2279bf215546Sopenharmony_ci} nir_tex_instr; 2280bf215546Sopenharmony_ci 2281bf215546Sopenharmony_ci/** 2282bf215546Sopenharmony_ci * Returns true if the texture operation requires a sampler as a general rule 2283bf215546Sopenharmony_ci * 2284bf215546Sopenharmony_ci * Note that the specific hw/driver backend could require to a sampler 2285bf215546Sopenharmony_ci * object/configuration packet in any case, for some other reason. 2286bf215546Sopenharmony_ci * 2287bf215546Sopenharmony_ci * @see nir_tex_instr::sampler_index. 2288bf215546Sopenharmony_ci */ 2289bf215546Sopenharmony_cibool nir_tex_instr_need_sampler(const nir_tex_instr *instr); 2290bf215546Sopenharmony_ci 2291bf215546Sopenharmony_ci/** Returns the number of components returned by this nir_tex_instr 2292bf215546Sopenharmony_ci * 2293bf215546Sopenharmony_ci * Useful for code building texture instructions when you don't want to think 2294bf215546Sopenharmony_ci * about how many components a particular texture op returns. This does not 2295bf215546Sopenharmony_ci * include the sparse residency code. 2296bf215546Sopenharmony_ci */ 2297bf215546Sopenharmony_ciunsigned 2298bf215546Sopenharmony_cinir_tex_instr_result_size(const nir_tex_instr *instr); 2299bf215546Sopenharmony_ci 2300bf215546Sopenharmony_ci/** 2301bf215546Sopenharmony_ci * Returns the destination size of this nir_tex_instr including the sparse 2302bf215546Sopenharmony_ci * residency code, if any. 2303bf215546Sopenharmony_ci */ 2304bf215546Sopenharmony_cistatic inline unsigned 2305bf215546Sopenharmony_cinir_tex_instr_dest_size(const nir_tex_instr *instr) 2306bf215546Sopenharmony_ci{ 2307bf215546Sopenharmony_ci /* One more component is needed for the residency code. */ 2308bf215546Sopenharmony_ci return nir_tex_instr_result_size(instr) + instr->is_sparse; 2309bf215546Sopenharmony_ci} 2310bf215546Sopenharmony_ci 2311bf215546Sopenharmony_ci/** 2312bf215546Sopenharmony_ci * Returns true if this texture operation queries something about the texture 2313bf215546Sopenharmony_ci * rather than actually sampling it. 2314bf215546Sopenharmony_ci */ 2315bf215546Sopenharmony_cibool 2316bf215546Sopenharmony_cinir_tex_instr_is_query(const nir_tex_instr *instr); 2317bf215546Sopenharmony_ci 2318bf215546Sopenharmony_ci/** Returns true if this texture instruction does implicit derivatives 2319bf215546Sopenharmony_ci * 2320bf215546Sopenharmony_ci * This is important as there are extra control-flow rules around derivatives 2321bf215546Sopenharmony_ci * and texture instructions which perform them implicitly. 2322bf215546Sopenharmony_ci */ 2323bf215546Sopenharmony_cibool 2324bf215546Sopenharmony_cinir_tex_instr_has_implicit_derivative(const nir_tex_instr *instr); 2325bf215546Sopenharmony_ci 2326bf215546Sopenharmony_ci/** Returns the ALU type of the given texture instruction source */ 2327bf215546Sopenharmony_cinir_alu_type 2328bf215546Sopenharmony_cinir_tex_instr_src_type(const nir_tex_instr *instr, unsigned src); 2329bf215546Sopenharmony_ci 2330bf215546Sopenharmony_ci/** 2331bf215546Sopenharmony_ci * Returns the number of components required by the given texture instruction 2332bf215546Sopenharmony_ci * source 2333bf215546Sopenharmony_ci */ 2334bf215546Sopenharmony_ciunsigned 2335bf215546Sopenharmony_cinir_tex_instr_src_size(const nir_tex_instr *instr, unsigned src); 2336bf215546Sopenharmony_ci 2337bf215546Sopenharmony_ci/** 2338bf215546Sopenharmony_ci * Returns the index of the texture instruction source with the given 2339bf215546Sopenharmony_ci * nir_tex_src_type or -1 if no such source exists. 2340bf215546Sopenharmony_ci */ 2341bf215546Sopenharmony_cistatic inline int 2342bf215546Sopenharmony_cinir_tex_instr_src_index(const nir_tex_instr *instr, nir_tex_src_type type) 2343bf215546Sopenharmony_ci{ 2344bf215546Sopenharmony_ci for (unsigned i = 0; i < instr->num_srcs; i++) 2345bf215546Sopenharmony_ci if (instr->src[i].src_type == type) 2346bf215546Sopenharmony_ci return (int) i; 2347bf215546Sopenharmony_ci 2348bf215546Sopenharmony_ci return -1; 2349bf215546Sopenharmony_ci} 2350bf215546Sopenharmony_ci 2351bf215546Sopenharmony_ci/** Adds a source to a texture instruction */ 2352bf215546Sopenharmony_civoid nir_tex_instr_add_src(nir_tex_instr *tex, 2353bf215546Sopenharmony_ci nir_tex_src_type src_type, 2354bf215546Sopenharmony_ci nir_src src); 2355bf215546Sopenharmony_ci 2356bf215546Sopenharmony_ci/** Removes a source from a texture instruction */ 2357bf215546Sopenharmony_civoid nir_tex_instr_remove_src(nir_tex_instr *tex, unsigned src_idx); 2358bf215546Sopenharmony_ci 2359bf215546Sopenharmony_cibool nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr *tex); 2360bf215546Sopenharmony_ci 2361bf215546Sopenharmony_citypedef struct { 2362bf215546Sopenharmony_ci nir_instr instr; 2363bf215546Sopenharmony_ci 2364bf215546Sopenharmony_ci nir_ssa_def def; 2365bf215546Sopenharmony_ci 2366bf215546Sopenharmony_ci nir_const_value value[]; 2367bf215546Sopenharmony_ci} nir_load_const_instr; 2368bf215546Sopenharmony_ci 2369bf215546Sopenharmony_citypedef enum { 2370bf215546Sopenharmony_ci /** Return from a function 2371bf215546Sopenharmony_ci * 2372bf215546Sopenharmony_ci * This instruction is a classic function return. It jumps to 2373bf215546Sopenharmony_ci * nir_function_impl::end_block. No return value is provided in this 2374bf215546Sopenharmony_ci * instruction. Instead, the function is expected to write any return 2375bf215546Sopenharmony_ci * data to a deref passed in from the caller. 2376bf215546Sopenharmony_ci */ 2377bf215546Sopenharmony_ci nir_jump_return, 2378bf215546Sopenharmony_ci 2379bf215546Sopenharmony_ci /** Immediately exit the current shader 2380bf215546Sopenharmony_ci * 2381bf215546Sopenharmony_ci * This instruction is roughly the equivalent of C's "exit()" in that it 2382bf215546Sopenharmony_ci * immediately terminates the current shader invocation. From a CFG 2383bf215546Sopenharmony_ci * perspective, it looks like a jump to nir_function_impl::end_block but 2384bf215546Sopenharmony_ci * it actually jumps to the end block of the shader entrypoint. A halt 2385bf215546Sopenharmony_ci * instruction in the shader entrypoint itself is semantically identical 2386bf215546Sopenharmony_ci * to a return. 2387bf215546Sopenharmony_ci * 2388bf215546Sopenharmony_ci * For shaders with built-in I/O, any outputs written prior to a halt 2389bf215546Sopenharmony_ci * instruction remain written and any outputs not written prior to the 2390bf215546Sopenharmony_ci * halt have undefined values. It does NOT cause an implicit discard of 2391bf215546Sopenharmony_ci * written results. If one wants discard results in a fragment shader, 2392bf215546Sopenharmony_ci * for instance, a discard or demote intrinsic is required. 2393bf215546Sopenharmony_ci */ 2394bf215546Sopenharmony_ci nir_jump_halt, 2395bf215546Sopenharmony_ci 2396bf215546Sopenharmony_ci /** Break out of the inner-most loop 2397bf215546Sopenharmony_ci * 2398bf215546Sopenharmony_ci * This has the same semantics as C's "break" statement. 2399bf215546Sopenharmony_ci */ 2400bf215546Sopenharmony_ci nir_jump_break, 2401bf215546Sopenharmony_ci 2402bf215546Sopenharmony_ci /** Jump back to the top of the inner-most loop 2403bf215546Sopenharmony_ci * 2404bf215546Sopenharmony_ci * This has the same semantics as C's "continue" statement assuming that a 2405bf215546Sopenharmony_ci * NIR loop is implemented as "while (1) { body }". 2406bf215546Sopenharmony_ci */ 2407bf215546Sopenharmony_ci nir_jump_continue, 2408bf215546Sopenharmony_ci 2409bf215546Sopenharmony_ci /** Jumps for unstructured CFG. 2410bf215546Sopenharmony_ci * 2411bf215546Sopenharmony_ci * As within an unstructured CFG we can't rely on block ordering we need to 2412bf215546Sopenharmony_ci * place explicit jumps at the end of every block. 2413bf215546Sopenharmony_ci */ 2414bf215546Sopenharmony_ci nir_jump_goto, 2415bf215546Sopenharmony_ci nir_jump_goto_if, 2416bf215546Sopenharmony_ci} nir_jump_type; 2417bf215546Sopenharmony_ci 2418bf215546Sopenharmony_citypedef struct { 2419bf215546Sopenharmony_ci nir_instr instr; 2420bf215546Sopenharmony_ci nir_jump_type type; 2421bf215546Sopenharmony_ci nir_src condition; 2422bf215546Sopenharmony_ci struct nir_block *target; 2423bf215546Sopenharmony_ci struct nir_block *else_target; 2424bf215546Sopenharmony_ci} nir_jump_instr; 2425bf215546Sopenharmony_ci 2426bf215546Sopenharmony_ci/* creates a new SSA variable in an undefined state */ 2427bf215546Sopenharmony_ci 2428bf215546Sopenharmony_citypedef struct { 2429bf215546Sopenharmony_ci nir_instr instr; 2430bf215546Sopenharmony_ci nir_ssa_def def; 2431bf215546Sopenharmony_ci} nir_ssa_undef_instr; 2432bf215546Sopenharmony_ci 2433bf215546Sopenharmony_citypedef struct { 2434bf215546Sopenharmony_ci struct exec_node node; 2435bf215546Sopenharmony_ci 2436bf215546Sopenharmony_ci /* The predecessor block corresponding to this source */ 2437bf215546Sopenharmony_ci struct nir_block *pred; 2438bf215546Sopenharmony_ci 2439bf215546Sopenharmony_ci nir_src src; 2440bf215546Sopenharmony_ci} nir_phi_src; 2441bf215546Sopenharmony_ci 2442bf215546Sopenharmony_ci#define nir_foreach_phi_src(phi_src, phi) \ 2443bf215546Sopenharmony_ci foreach_list_typed(nir_phi_src, phi_src, node, &(phi)->srcs) 2444bf215546Sopenharmony_ci#define nir_foreach_phi_src_safe(phi_src, phi) \ 2445bf215546Sopenharmony_ci foreach_list_typed_safe(nir_phi_src, phi_src, node, &(phi)->srcs) 2446bf215546Sopenharmony_ci 2447bf215546Sopenharmony_citypedef struct { 2448bf215546Sopenharmony_ci nir_instr instr; 2449bf215546Sopenharmony_ci 2450bf215546Sopenharmony_ci struct exec_list srcs; /** < list of nir_phi_src */ 2451bf215546Sopenharmony_ci 2452bf215546Sopenharmony_ci nir_dest dest; 2453bf215546Sopenharmony_ci} nir_phi_instr; 2454bf215546Sopenharmony_ci 2455bf215546Sopenharmony_cistatic inline nir_phi_src * 2456bf215546Sopenharmony_cinir_phi_get_src_from_block(nir_phi_instr *phi, struct nir_block *block) 2457bf215546Sopenharmony_ci{ 2458bf215546Sopenharmony_ci nir_foreach_phi_src(src, phi) { 2459bf215546Sopenharmony_ci if (src->pred == block) 2460bf215546Sopenharmony_ci return src; 2461bf215546Sopenharmony_ci } 2462bf215546Sopenharmony_ci 2463bf215546Sopenharmony_ci assert(!"Block is not a predecessor of phi."); 2464bf215546Sopenharmony_ci return NULL; 2465bf215546Sopenharmony_ci} 2466bf215546Sopenharmony_ci 2467bf215546Sopenharmony_citypedef struct { 2468bf215546Sopenharmony_ci struct exec_node node; 2469bf215546Sopenharmony_ci nir_src src; 2470bf215546Sopenharmony_ci nir_dest dest; 2471bf215546Sopenharmony_ci} nir_parallel_copy_entry; 2472bf215546Sopenharmony_ci 2473bf215546Sopenharmony_ci#define nir_foreach_parallel_copy_entry(entry, pcopy) \ 2474bf215546Sopenharmony_ci foreach_list_typed(nir_parallel_copy_entry, entry, node, &(pcopy)->entries) 2475bf215546Sopenharmony_ci 2476bf215546Sopenharmony_citypedef struct { 2477bf215546Sopenharmony_ci nir_instr instr; 2478bf215546Sopenharmony_ci 2479bf215546Sopenharmony_ci /* A list of nir_parallel_copy_entrys. The sources of all of the 2480bf215546Sopenharmony_ci * entries are copied to the corresponding destinations "in parallel". 2481bf215546Sopenharmony_ci * In other words, if we have two entries: a -> b and b -> a, the values 2482bf215546Sopenharmony_ci * get swapped. 2483bf215546Sopenharmony_ci */ 2484bf215546Sopenharmony_ci struct exec_list entries; 2485bf215546Sopenharmony_ci} nir_parallel_copy_instr; 2486bf215546Sopenharmony_ci 2487bf215546Sopenharmony_ciNIR_DEFINE_CAST(nir_instr_as_alu, nir_instr, nir_alu_instr, instr, 2488bf215546Sopenharmony_ci type, nir_instr_type_alu) 2489bf215546Sopenharmony_ciNIR_DEFINE_CAST(nir_instr_as_deref, nir_instr, nir_deref_instr, instr, 2490bf215546Sopenharmony_ci type, nir_instr_type_deref) 2491bf215546Sopenharmony_ciNIR_DEFINE_CAST(nir_instr_as_call, nir_instr, nir_call_instr, instr, 2492bf215546Sopenharmony_ci type, nir_instr_type_call) 2493bf215546Sopenharmony_ciNIR_DEFINE_CAST(nir_instr_as_jump, nir_instr, nir_jump_instr, instr, 2494bf215546Sopenharmony_ci type, nir_instr_type_jump) 2495bf215546Sopenharmony_ciNIR_DEFINE_CAST(nir_instr_as_tex, nir_instr, nir_tex_instr, instr, 2496bf215546Sopenharmony_ci type, nir_instr_type_tex) 2497bf215546Sopenharmony_ciNIR_DEFINE_CAST(nir_instr_as_intrinsic, nir_instr, nir_intrinsic_instr, instr, 2498bf215546Sopenharmony_ci type, nir_instr_type_intrinsic) 2499bf215546Sopenharmony_ciNIR_DEFINE_CAST(nir_instr_as_load_const, nir_instr, nir_load_const_instr, instr, 2500bf215546Sopenharmony_ci type, nir_instr_type_load_const) 2501bf215546Sopenharmony_ciNIR_DEFINE_CAST(nir_instr_as_ssa_undef, nir_instr, nir_ssa_undef_instr, instr, 2502bf215546Sopenharmony_ci type, nir_instr_type_ssa_undef) 2503bf215546Sopenharmony_ciNIR_DEFINE_CAST(nir_instr_as_phi, nir_instr, nir_phi_instr, instr, 2504bf215546Sopenharmony_ci type, nir_instr_type_phi) 2505bf215546Sopenharmony_ciNIR_DEFINE_CAST(nir_instr_as_parallel_copy, nir_instr, 2506bf215546Sopenharmony_ci nir_parallel_copy_instr, instr, 2507bf215546Sopenharmony_ci type, nir_instr_type_parallel_copy) 2508bf215546Sopenharmony_ci 2509bf215546Sopenharmony_ci 2510bf215546Sopenharmony_ci#define NIR_DEFINE_SRC_AS_CONST(type, suffix) \ 2511bf215546Sopenharmony_cistatic inline type \ 2512bf215546Sopenharmony_cinir_src_comp_as_##suffix(nir_src src, unsigned comp) \ 2513bf215546Sopenharmony_ci{ \ 2514bf215546Sopenharmony_ci assert(nir_src_is_const(src)); \ 2515bf215546Sopenharmony_ci nir_load_const_instr *load = \ 2516bf215546Sopenharmony_ci nir_instr_as_load_const(src.ssa->parent_instr); \ 2517bf215546Sopenharmony_ci assert(comp < load->def.num_components); \ 2518bf215546Sopenharmony_ci return nir_const_value_as_##suffix(load->value[comp], \ 2519bf215546Sopenharmony_ci load->def.bit_size); \ 2520bf215546Sopenharmony_ci} \ 2521bf215546Sopenharmony_ci \ 2522bf215546Sopenharmony_cistatic inline type \ 2523bf215546Sopenharmony_cinir_src_as_##suffix(nir_src src) \ 2524bf215546Sopenharmony_ci{ \ 2525bf215546Sopenharmony_ci assert(nir_src_num_components(src) == 1); \ 2526bf215546Sopenharmony_ci return nir_src_comp_as_##suffix(src, 0); \ 2527bf215546Sopenharmony_ci} 2528bf215546Sopenharmony_ci 2529bf215546Sopenharmony_ciNIR_DEFINE_SRC_AS_CONST(int64_t, int) 2530bf215546Sopenharmony_ciNIR_DEFINE_SRC_AS_CONST(uint64_t, uint) 2531bf215546Sopenharmony_ciNIR_DEFINE_SRC_AS_CONST(bool, bool) 2532bf215546Sopenharmony_ciNIR_DEFINE_SRC_AS_CONST(double, float) 2533bf215546Sopenharmony_ci 2534bf215546Sopenharmony_ci#undef NIR_DEFINE_SRC_AS_CONST 2535bf215546Sopenharmony_ci 2536bf215546Sopenharmony_ci 2537bf215546Sopenharmony_citypedef struct { 2538bf215546Sopenharmony_ci nir_ssa_def *def; 2539bf215546Sopenharmony_ci unsigned comp; 2540bf215546Sopenharmony_ci} nir_ssa_scalar; 2541bf215546Sopenharmony_ci 2542bf215546Sopenharmony_cistatic inline bool 2543bf215546Sopenharmony_cinir_ssa_scalar_is_const(nir_ssa_scalar s) 2544bf215546Sopenharmony_ci{ 2545bf215546Sopenharmony_ci return s.def->parent_instr->type == nir_instr_type_load_const; 2546bf215546Sopenharmony_ci} 2547bf215546Sopenharmony_ci 2548bf215546Sopenharmony_cistatic inline nir_const_value 2549bf215546Sopenharmony_cinir_ssa_scalar_as_const_value(nir_ssa_scalar s) 2550bf215546Sopenharmony_ci{ 2551bf215546Sopenharmony_ci assert(s.comp < s.def->num_components); 2552bf215546Sopenharmony_ci nir_load_const_instr *load = nir_instr_as_load_const(s.def->parent_instr); 2553bf215546Sopenharmony_ci return load->value[s.comp]; 2554bf215546Sopenharmony_ci} 2555bf215546Sopenharmony_ci 2556bf215546Sopenharmony_ci#define NIR_DEFINE_SCALAR_AS_CONST(type, suffix) \ 2557bf215546Sopenharmony_cistatic inline type \ 2558bf215546Sopenharmony_cinir_ssa_scalar_as_##suffix(nir_ssa_scalar s) \ 2559bf215546Sopenharmony_ci{ \ 2560bf215546Sopenharmony_ci return nir_const_value_as_##suffix( \ 2561bf215546Sopenharmony_ci nir_ssa_scalar_as_const_value(s), s.def->bit_size); \ 2562bf215546Sopenharmony_ci} 2563bf215546Sopenharmony_ci 2564bf215546Sopenharmony_ciNIR_DEFINE_SCALAR_AS_CONST(int64_t, int) 2565bf215546Sopenharmony_ciNIR_DEFINE_SCALAR_AS_CONST(uint64_t, uint) 2566bf215546Sopenharmony_ciNIR_DEFINE_SCALAR_AS_CONST(bool, bool) 2567bf215546Sopenharmony_ciNIR_DEFINE_SCALAR_AS_CONST(double, float) 2568bf215546Sopenharmony_ci 2569bf215546Sopenharmony_ci#undef NIR_DEFINE_SCALAR_AS_CONST 2570bf215546Sopenharmony_ci 2571bf215546Sopenharmony_cistatic inline bool 2572bf215546Sopenharmony_cinir_ssa_scalar_is_alu(nir_ssa_scalar s) 2573bf215546Sopenharmony_ci{ 2574bf215546Sopenharmony_ci return s.def->parent_instr->type == nir_instr_type_alu; 2575bf215546Sopenharmony_ci} 2576bf215546Sopenharmony_ci 2577bf215546Sopenharmony_cistatic inline nir_op 2578bf215546Sopenharmony_cinir_ssa_scalar_alu_op(nir_ssa_scalar s) 2579bf215546Sopenharmony_ci{ 2580bf215546Sopenharmony_ci return nir_instr_as_alu(s.def->parent_instr)->op; 2581bf215546Sopenharmony_ci} 2582bf215546Sopenharmony_ci 2583bf215546Sopenharmony_cistatic inline nir_ssa_scalar 2584bf215546Sopenharmony_cinir_ssa_scalar_chase_alu_src(nir_ssa_scalar s, unsigned alu_src_idx) 2585bf215546Sopenharmony_ci{ 2586bf215546Sopenharmony_ci nir_ssa_scalar out = { NULL, 0 }; 2587bf215546Sopenharmony_ci 2588bf215546Sopenharmony_ci nir_alu_instr *alu = nir_instr_as_alu(s.def->parent_instr); 2589bf215546Sopenharmony_ci assert(alu_src_idx < nir_op_infos[alu->op].num_inputs); 2590bf215546Sopenharmony_ci 2591bf215546Sopenharmony_ci /* Our component must be written */ 2592bf215546Sopenharmony_ci assert(s.comp < s.def->num_components); 2593bf215546Sopenharmony_ci assert(alu->dest.write_mask & (1u << s.comp)); 2594bf215546Sopenharmony_ci 2595bf215546Sopenharmony_ci assert(alu->src[alu_src_idx].src.is_ssa); 2596bf215546Sopenharmony_ci out.def = alu->src[alu_src_idx].src.ssa; 2597bf215546Sopenharmony_ci 2598bf215546Sopenharmony_ci if (nir_op_infos[alu->op].input_sizes[alu_src_idx] == 0) { 2599bf215546Sopenharmony_ci /* The ALU src is unsized so the source component follows the 2600bf215546Sopenharmony_ci * destination component. 2601bf215546Sopenharmony_ci */ 2602bf215546Sopenharmony_ci out.comp = alu->src[alu_src_idx].swizzle[s.comp]; 2603bf215546Sopenharmony_ci } else { 2604bf215546Sopenharmony_ci /* This is a sized source so all source components work together to 2605bf215546Sopenharmony_ci * produce all the destination components. Since we need to return a 2606bf215546Sopenharmony_ci * scalar, this only works if the source is a scalar. 2607bf215546Sopenharmony_ci */ 2608bf215546Sopenharmony_ci assert(nir_op_infos[alu->op].input_sizes[alu_src_idx] == 1); 2609bf215546Sopenharmony_ci out.comp = alu->src[alu_src_idx].swizzle[0]; 2610bf215546Sopenharmony_ci } 2611bf215546Sopenharmony_ci assert(out.comp < out.def->num_components); 2612bf215546Sopenharmony_ci 2613bf215546Sopenharmony_ci return out; 2614bf215546Sopenharmony_ci} 2615bf215546Sopenharmony_ci 2616bf215546Sopenharmony_cinir_ssa_scalar nir_ssa_scalar_chase_movs(nir_ssa_scalar s); 2617bf215546Sopenharmony_ci 2618bf215546Sopenharmony_cistatic inline nir_ssa_scalar 2619bf215546Sopenharmony_cinir_get_ssa_scalar(nir_ssa_def *def, unsigned channel) 2620bf215546Sopenharmony_ci{ 2621bf215546Sopenharmony_ci nir_ssa_scalar s = { def, channel }; 2622bf215546Sopenharmony_ci return s; 2623bf215546Sopenharmony_ci} 2624bf215546Sopenharmony_ci 2625bf215546Sopenharmony_ci/** Returns a nir_ssa_scalar where we've followed the bit-exact mov/vec use chain to the original definition */ 2626bf215546Sopenharmony_cistatic inline nir_ssa_scalar 2627bf215546Sopenharmony_cinir_ssa_scalar_resolved(nir_ssa_def *def, unsigned channel) 2628bf215546Sopenharmony_ci{ 2629bf215546Sopenharmony_ci return nir_ssa_scalar_chase_movs(nir_get_ssa_scalar(def, channel)); 2630bf215546Sopenharmony_ci} 2631bf215546Sopenharmony_ci 2632bf215546Sopenharmony_ci 2633bf215546Sopenharmony_citypedef struct { 2634bf215546Sopenharmony_ci bool success; 2635bf215546Sopenharmony_ci 2636bf215546Sopenharmony_ci nir_variable *var; 2637bf215546Sopenharmony_ci unsigned desc_set; 2638bf215546Sopenharmony_ci unsigned binding; 2639bf215546Sopenharmony_ci unsigned num_indices; 2640bf215546Sopenharmony_ci nir_src indices[4]; 2641bf215546Sopenharmony_ci bool read_first_invocation; 2642bf215546Sopenharmony_ci} nir_binding; 2643bf215546Sopenharmony_ci 2644bf215546Sopenharmony_cinir_binding nir_chase_binding(nir_src rsrc); 2645bf215546Sopenharmony_cinir_variable *nir_get_binding_variable(struct nir_shader *shader, nir_binding binding); 2646bf215546Sopenharmony_ci 2647bf215546Sopenharmony_ci 2648bf215546Sopenharmony_ci/* 2649bf215546Sopenharmony_ci * Control flow 2650bf215546Sopenharmony_ci * 2651bf215546Sopenharmony_ci * Control flow consists of a tree of control flow nodes, which include 2652bf215546Sopenharmony_ci * if-statements and loops. The leaves of the tree are basic blocks, lists of 2653bf215546Sopenharmony_ci * instructions that always run start-to-finish. Each basic block also keeps 2654bf215546Sopenharmony_ci * track of its successors (blocks which may run immediately after the current 2655bf215546Sopenharmony_ci * block) and predecessors (blocks which could have run immediately before the 2656bf215546Sopenharmony_ci * current block). Each function also has a start block and an end block which 2657bf215546Sopenharmony_ci * all return statements point to (which is always empty). Together, all the 2658bf215546Sopenharmony_ci * blocks with their predecessors and successors make up the control flow 2659bf215546Sopenharmony_ci * graph (CFG) of the function. There are helpers that modify the tree of 2660bf215546Sopenharmony_ci * control flow nodes while modifying the CFG appropriately; these should be 2661bf215546Sopenharmony_ci * used instead of modifying the tree directly. 2662bf215546Sopenharmony_ci */ 2663bf215546Sopenharmony_ci 2664bf215546Sopenharmony_citypedef enum { 2665bf215546Sopenharmony_ci nir_cf_node_block, 2666bf215546Sopenharmony_ci nir_cf_node_if, 2667bf215546Sopenharmony_ci nir_cf_node_loop, 2668bf215546Sopenharmony_ci nir_cf_node_function 2669bf215546Sopenharmony_ci} nir_cf_node_type; 2670bf215546Sopenharmony_ci 2671bf215546Sopenharmony_citypedef struct nir_cf_node { 2672bf215546Sopenharmony_ci struct exec_node node; 2673bf215546Sopenharmony_ci nir_cf_node_type type; 2674bf215546Sopenharmony_ci struct nir_cf_node *parent; 2675bf215546Sopenharmony_ci} nir_cf_node; 2676bf215546Sopenharmony_ci 2677bf215546Sopenharmony_citypedef struct nir_block { 2678bf215546Sopenharmony_ci nir_cf_node cf_node; 2679bf215546Sopenharmony_ci 2680bf215546Sopenharmony_ci struct exec_list instr_list; /** < list of nir_instr */ 2681bf215546Sopenharmony_ci 2682bf215546Sopenharmony_ci /** generic block index; generated by nir_index_blocks */ 2683bf215546Sopenharmony_ci unsigned index; 2684bf215546Sopenharmony_ci 2685bf215546Sopenharmony_ci /* 2686bf215546Sopenharmony_ci * Each block can only have up to 2 successors, so we put them in a simple 2687bf215546Sopenharmony_ci * array - no need for anything more complicated. 2688bf215546Sopenharmony_ci */ 2689bf215546Sopenharmony_ci struct nir_block *successors[2]; 2690bf215546Sopenharmony_ci 2691bf215546Sopenharmony_ci /* Set of nir_block predecessors in the CFG */ 2692bf215546Sopenharmony_ci struct set *predecessors; 2693bf215546Sopenharmony_ci 2694bf215546Sopenharmony_ci /* 2695bf215546Sopenharmony_ci * this node's immediate dominator in the dominance tree - set to NULL for 2696bf215546Sopenharmony_ci * the start block. 2697bf215546Sopenharmony_ci */ 2698bf215546Sopenharmony_ci struct nir_block *imm_dom; 2699bf215546Sopenharmony_ci 2700bf215546Sopenharmony_ci /* This node's children in the dominance tree */ 2701bf215546Sopenharmony_ci unsigned num_dom_children; 2702bf215546Sopenharmony_ci struct nir_block **dom_children; 2703bf215546Sopenharmony_ci 2704bf215546Sopenharmony_ci /* Set of nir_blocks on the dominance frontier of this block */ 2705bf215546Sopenharmony_ci struct set *dom_frontier; 2706bf215546Sopenharmony_ci 2707bf215546Sopenharmony_ci /* 2708bf215546Sopenharmony_ci * These two indices have the property that dom_{pre,post}_index for each 2709bf215546Sopenharmony_ci * child of this block in the dominance tree will always be between 2710bf215546Sopenharmony_ci * dom_pre_index and dom_post_index for this block, which makes testing if 2711bf215546Sopenharmony_ci * a given block is dominated by another block an O(1) operation. 2712bf215546Sopenharmony_ci */ 2713bf215546Sopenharmony_ci uint32_t dom_pre_index, dom_post_index; 2714bf215546Sopenharmony_ci 2715bf215546Sopenharmony_ci /** 2716bf215546Sopenharmony_ci * Value just before the first nir_instr->index in the block, but after 2717bf215546Sopenharmony_ci * end_ip that of any predecessor block. 2718bf215546Sopenharmony_ci */ 2719bf215546Sopenharmony_ci uint32_t start_ip; 2720bf215546Sopenharmony_ci /** 2721bf215546Sopenharmony_ci * Value just after the last nir_instr->index in the block, but before the 2722bf215546Sopenharmony_ci * start_ip of any successor block. 2723bf215546Sopenharmony_ci */ 2724bf215546Sopenharmony_ci uint32_t end_ip; 2725bf215546Sopenharmony_ci 2726bf215546Sopenharmony_ci /* SSA def live in and out for this block; used for liveness analysis. 2727bf215546Sopenharmony_ci * Indexed by ssa_def->index 2728bf215546Sopenharmony_ci */ 2729bf215546Sopenharmony_ci BITSET_WORD *live_in; 2730bf215546Sopenharmony_ci BITSET_WORD *live_out; 2731bf215546Sopenharmony_ci} nir_block; 2732bf215546Sopenharmony_ci 2733bf215546Sopenharmony_cistatic inline bool 2734bf215546Sopenharmony_cinir_block_is_reachable(nir_block *b) 2735bf215546Sopenharmony_ci{ 2736bf215546Sopenharmony_ci /* See also nir_block_dominates */ 2737bf215546Sopenharmony_ci return b->dom_post_index != 0; 2738bf215546Sopenharmony_ci} 2739bf215546Sopenharmony_ci 2740bf215546Sopenharmony_cistatic inline nir_instr * 2741bf215546Sopenharmony_cinir_block_first_instr(nir_block *block) 2742bf215546Sopenharmony_ci{ 2743bf215546Sopenharmony_ci struct exec_node *head = exec_list_get_head(&block->instr_list); 2744bf215546Sopenharmony_ci return exec_node_data(nir_instr, head, node); 2745bf215546Sopenharmony_ci} 2746bf215546Sopenharmony_ci 2747bf215546Sopenharmony_cistatic inline nir_instr * 2748bf215546Sopenharmony_cinir_block_last_instr(nir_block *block) 2749bf215546Sopenharmony_ci{ 2750bf215546Sopenharmony_ci struct exec_node *tail = exec_list_get_tail(&block->instr_list); 2751bf215546Sopenharmony_ci return exec_node_data(nir_instr, tail, node); 2752bf215546Sopenharmony_ci} 2753bf215546Sopenharmony_ci 2754bf215546Sopenharmony_cistatic inline bool 2755bf215546Sopenharmony_cinir_block_ends_in_jump(nir_block *block) 2756bf215546Sopenharmony_ci{ 2757bf215546Sopenharmony_ci return !exec_list_is_empty(&block->instr_list) && 2758bf215546Sopenharmony_ci nir_block_last_instr(block)->type == nir_instr_type_jump; 2759bf215546Sopenharmony_ci} 2760bf215546Sopenharmony_ci 2761bf215546Sopenharmony_cistatic inline bool 2762bf215546Sopenharmony_cinir_block_ends_in_return_or_halt(nir_block *block) 2763bf215546Sopenharmony_ci{ 2764bf215546Sopenharmony_ci if (exec_list_is_empty(&block->instr_list)) 2765bf215546Sopenharmony_ci return false; 2766bf215546Sopenharmony_ci 2767bf215546Sopenharmony_ci nir_instr *instr = nir_block_last_instr(block); 2768bf215546Sopenharmony_ci if (instr->type != nir_instr_type_jump) 2769bf215546Sopenharmony_ci return false; 2770bf215546Sopenharmony_ci 2771bf215546Sopenharmony_ci nir_jump_instr *jump_instr = nir_instr_as_jump(instr); 2772bf215546Sopenharmony_ci return jump_instr->type == nir_jump_return || 2773bf215546Sopenharmony_ci jump_instr->type == nir_jump_halt; 2774bf215546Sopenharmony_ci} 2775bf215546Sopenharmony_ci 2776bf215546Sopenharmony_cistatic inline bool 2777bf215546Sopenharmony_cinir_block_ends_in_break(nir_block *block) 2778bf215546Sopenharmony_ci{ 2779bf215546Sopenharmony_ci if (exec_list_is_empty(&block->instr_list)) 2780bf215546Sopenharmony_ci return false; 2781bf215546Sopenharmony_ci 2782bf215546Sopenharmony_ci nir_instr *instr = nir_block_last_instr(block); 2783bf215546Sopenharmony_ci return instr->type == nir_instr_type_jump && 2784bf215546Sopenharmony_ci nir_instr_as_jump(instr)->type == nir_jump_break; 2785bf215546Sopenharmony_ci} 2786bf215546Sopenharmony_ci 2787bf215546Sopenharmony_ci#define nir_foreach_instr(instr, block) \ 2788bf215546Sopenharmony_ci foreach_list_typed(nir_instr, instr, node, &(block)->instr_list) 2789bf215546Sopenharmony_ci#define nir_foreach_instr_reverse(instr, block) \ 2790bf215546Sopenharmony_ci foreach_list_typed_reverse(nir_instr, instr, node, &(block)->instr_list) 2791bf215546Sopenharmony_ci#define nir_foreach_instr_safe(instr, block) \ 2792bf215546Sopenharmony_ci foreach_list_typed_safe(nir_instr, instr, node, &(block)->instr_list) 2793bf215546Sopenharmony_ci#define nir_foreach_instr_reverse_safe(instr, block) \ 2794bf215546Sopenharmony_ci foreach_list_typed_reverse_safe(nir_instr, instr, node, &(block)->instr_list) 2795bf215546Sopenharmony_ci 2796bf215546Sopenharmony_cistatic inline nir_phi_instr * 2797bf215546Sopenharmony_cinir_block_last_phi_instr(nir_block *block) 2798bf215546Sopenharmony_ci{ 2799bf215546Sopenharmony_ci nir_phi_instr *last_phi = NULL; 2800bf215546Sopenharmony_ci nir_foreach_instr(instr, block) { 2801bf215546Sopenharmony_ci if (instr->type == nir_instr_type_phi) 2802bf215546Sopenharmony_ci last_phi = nir_instr_as_phi(instr); 2803bf215546Sopenharmony_ci else 2804bf215546Sopenharmony_ci return last_phi; 2805bf215546Sopenharmony_ci } 2806bf215546Sopenharmony_ci return last_phi; 2807bf215546Sopenharmony_ci} 2808bf215546Sopenharmony_ci 2809bf215546Sopenharmony_citypedef enum { 2810bf215546Sopenharmony_ci nir_selection_control_none = 0x0, 2811bf215546Sopenharmony_ci nir_selection_control_flatten = 0x1, 2812bf215546Sopenharmony_ci nir_selection_control_dont_flatten = 0x2, 2813bf215546Sopenharmony_ci} nir_selection_control; 2814bf215546Sopenharmony_ci 2815bf215546Sopenharmony_citypedef struct nir_if { 2816bf215546Sopenharmony_ci nir_cf_node cf_node; 2817bf215546Sopenharmony_ci nir_src condition; 2818bf215546Sopenharmony_ci nir_selection_control control; 2819bf215546Sopenharmony_ci 2820bf215546Sopenharmony_ci struct exec_list then_list; /** < list of nir_cf_node */ 2821bf215546Sopenharmony_ci struct exec_list else_list; /** < list of nir_cf_node */ 2822bf215546Sopenharmony_ci} nir_if; 2823bf215546Sopenharmony_ci 2824bf215546Sopenharmony_citypedef struct { 2825bf215546Sopenharmony_ci nir_if *nif; 2826bf215546Sopenharmony_ci 2827bf215546Sopenharmony_ci /** Instruction that generates nif::condition. */ 2828bf215546Sopenharmony_ci nir_instr *conditional_instr; 2829bf215546Sopenharmony_ci 2830bf215546Sopenharmony_ci /** Block within ::nif that has the break instruction. */ 2831bf215546Sopenharmony_ci nir_block *break_block; 2832bf215546Sopenharmony_ci 2833bf215546Sopenharmony_ci /** Last block for the then- or else-path that does not contain the break. */ 2834bf215546Sopenharmony_ci nir_block *continue_from_block; 2835bf215546Sopenharmony_ci 2836bf215546Sopenharmony_ci /** True when ::break_block is in the else-path of ::nif. */ 2837bf215546Sopenharmony_ci bool continue_from_then; 2838bf215546Sopenharmony_ci bool induction_rhs; 2839bf215546Sopenharmony_ci 2840bf215546Sopenharmony_ci /* This is true if the terminators exact trip count is unknown. For 2841bf215546Sopenharmony_ci * example: 2842bf215546Sopenharmony_ci * 2843bf215546Sopenharmony_ci * for (int i = 0; i < imin(x, 4); i++) 2844bf215546Sopenharmony_ci * ... 2845bf215546Sopenharmony_ci * 2846bf215546Sopenharmony_ci * Here loop analysis would have set a max_trip_count of 4 however we dont 2847bf215546Sopenharmony_ci * know for sure that this is the exact trip count. 2848bf215546Sopenharmony_ci */ 2849bf215546Sopenharmony_ci bool exact_trip_count_unknown; 2850bf215546Sopenharmony_ci 2851bf215546Sopenharmony_ci struct list_head loop_terminator_link; 2852bf215546Sopenharmony_ci} nir_loop_terminator; 2853bf215546Sopenharmony_ci 2854bf215546Sopenharmony_citypedef struct { 2855bf215546Sopenharmony_ci /* Induction variable. */ 2856bf215546Sopenharmony_ci nir_ssa_def *def; 2857bf215546Sopenharmony_ci 2858bf215546Sopenharmony_ci /* Init statement with only uniform. */ 2859bf215546Sopenharmony_ci nir_src *init_src; 2860bf215546Sopenharmony_ci 2861bf215546Sopenharmony_ci /* Update statement with only uniform. */ 2862bf215546Sopenharmony_ci nir_alu_src *update_src; 2863bf215546Sopenharmony_ci} nir_loop_induction_variable; 2864bf215546Sopenharmony_ci 2865bf215546Sopenharmony_citypedef struct { 2866bf215546Sopenharmony_ci /* Estimated cost (in number of instructions) of the loop */ 2867bf215546Sopenharmony_ci unsigned instr_cost; 2868bf215546Sopenharmony_ci 2869bf215546Sopenharmony_ci /* Guessed trip count based on array indexing */ 2870bf215546Sopenharmony_ci unsigned guessed_trip_count; 2871bf215546Sopenharmony_ci 2872bf215546Sopenharmony_ci /* Maximum number of times the loop is run (if known) */ 2873bf215546Sopenharmony_ci unsigned max_trip_count; 2874bf215546Sopenharmony_ci 2875bf215546Sopenharmony_ci /* Do we know the exact number of times the loop will be run */ 2876bf215546Sopenharmony_ci bool exact_trip_count_known; 2877bf215546Sopenharmony_ci 2878bf215546Sopenharmony_ci /* Unroll the loop regardless of its size */ 2879bf215546Sopenharmony_ci bool force_unroll; 2880bf215546Sopenharmony_ci 2881bf215546Sopenharmony_ci /* Does the loop contain complex loop terminators, continues or other 2882bf215546Sopenharmony_ci * complex behaviours? If this is true we can't rely on 2883bf215546Sopenharmony_ci * loop_terminator_list to be complete or accurate. 2884bf215546Sopenharmony_ci */ 2885bf215546Sopenharmony_ci bool complex_loop; 2886bf215546Sopenharmony_ci 2887bf215546Sopenharmony_ci nir_loop_terminator *limiting_terminator; 2888bf215546Sopenharmony_ci 2889bf215546Sopenharmony_ci /* A list of loop_terminators terminating this loop. */ 2890bf215546Sopenharmony_ci struct list_head loop_terminator_list; 2891bf215546Sopenharmony_ci 2892bf215546Sopenharmony_ci /* array of induction variables for this loop */ 2893bf215546Sopenharmony_ci nir_loop_induction_variable *induction_vars; 2894bf215546Sopenharmony_ci unsigned num_induction_vars; 2895bf215546Sopenharmony_ci} nir_loop_info; 2896bf215546Sopenharmony_ci 2897bf215546Sopenharmony_citypedef enum { 2898bf215546Sopenharmony_ci nir_loop_control_none = 0x0, 2899bf215546Sopenharmony_ci nir_loop_control_unroll = 0x1, 2900bf215546Sopenharmony_ci nir_loop_control_dont_unroll = 0x2, 2901bf215546Sopenharmony_ci} nir_loop_control; 2902bf215546Sopenharmony_ci 2903bf215546Sopenharmony_citypedef struct { 2904bf215546Sopenharmony_ci nir_cf_node cf_node; 2905bf215546Sopenharmony_ci 2906bf215546Sopenharmony_ci struct exec_list body; /** < list of nir_cf_node */ 2907bf215546Sopenharmony_ci 2908bf215546Sopenharmony_ci nir_loop_info *info; 2909bf215546Sopenharmony_ci nir_loop_control control; 2910bf215546Sopenharmony_ci bool partially_unrolled; 2911bf215546Sopenharmony_ci bool divergent; 2912bf215546Sopenharmony_ci} nir_loop; 2913bf215546Sopenharmony_ci 2914bf215546Sopenharmony_ci/** 2915bf215546Sopenharmony_ci * Various bits of metadata that can may be created or required by 2916bf215546Sopenharmony_ci * optimization and analysis passes 2917bf215546Sopenharmony_ci */ 2918bf215546Sopenharmony_citypedef enum { 2919bf215546Sopenharmony_ci nir_metadata_none = 0x0, 2920bf215546Sopenharmony_ci 2921bf215546Sopenharmony_ci /** Indicates that nir_block::index values are valid. 2922bf215546Sopenharmony_ci * 2923bf215546Sopenharmony_ci * The start block has index 0 and they increase through a natural walk of 2924bf215546Sopenharmony_ci * the CFG. nir_function_impl::num_blocks is the number of blocks and 2925bf215546Sopenharmony_ci * every block index is in the range [0, nir_function_impl::num_blocks]. 2926bf215546Sopenharmony_ci * 2927bf215546Sopenharmony_ci * A pass can preserve this metadata type if it doesn't touch the CFG. 2928bf215546Sopenharmony_ci */ 2929bf215546Sopenharmony_ci nir_metadata_block_index = 0x1, 2930bf215546Sopenharmony_ci 2931bf215546Sopenharmony_ci /** Indicates that block dominance information is valid 2932bf215546Sopenharmony_ci * 2933bf215546Sopenharmony_ci * This includes: 2934bf215546Sopenharmony_ci * 2935bf215546Sopenharmony_ci * - nir_block::num_dom_children 2936bf215546Sopenharmony_ci * - nir_block::dom_children 2937bf215546Sopenharmony_ci * - nir_block::dom_frontier 2938bf215546Sopenharmony_ci * - nir_block::dom_pre_index 2939bf215546Sopenharmony_ci * - nir_block::dom_post_index 2940bf215546Sopenharmony_ci * 2941bf215546Sopenharmony_ci * A pass can preserve this metadata type if it doesn't touch the CFG. 2942bf215546Sopenharmony_ci */ 2943bf215546Sopenharmony_ci nir_metadata_dominance = 0x2, 2944bf215546Sopenharmony_ci 2945bf215546Sopenharmony_ci /** Indicates that SSA def data-flow liveness information is valid 2946bf215546Sopenharmony_ci * 2947bf215546Sopenharmony_ci * This includes: 2948bf215546Sopenharmony_ci * 2949bf215546Sopenharmony_ci * - nir_block::live_in 2950bf215546Sopenharmony_ci * - nir_block::live_out 2951bf215546Sopenharmony_ci * 2952bf215546Sopenharmony_ci * A pass can preserve this metadata type if it never adds or removes any 2953bf215546Sopenharmony_ci * SSA defs or uses of SSA defs (most passes shouldn't preserve this 2954bf215546Sopenharmony_ci * metadata type). 2955bf215546Sopenharmony_ci */ 2956bf215546Sopenharmony_ci nir_metadata_live_ssa_defs = 0x4, 2957bf215546Sopenharmony_ci 2958bf215546Sopenharmony_ci /** A dummy metadata value to track when a pass forgot to call 2959bf215546Sopenharmony_ci * nir_metadata_preserve. 2960bf215546Sopenharmony_ci * 2961bf215546Sopenharmony_ci * A pass should always clear this value even if it doesn't make any 2962bf215546Sopenharmony_ci * progress to indicate that it thought about preserving metadata. 2963bf215546Sopenharmony_ci */ 2964bf215546Sopenharmony_ci nir_metadata_not_properly_reset = 0x8, 2965bf215546Sopenharmony_ci 2966bf215546Sopenharmony_ci /** Indicates that loop analysis information is valid. 2967bf215546Sopenharmony_ci * 2968bf215546Sopenharmony_ci * This includes everything pointed to by nir_loop::info. 2969bf215546Sopenharmony_ci * 2970bf215546Sopenharmony_ci * A pass can preserve this metadata type if it is guaranteed to not affect 2971bf215546Sopenharmony_ci * any loop metadata. However, since loop metadata includes things like 2972bf215546Sopenharmony_ci * loop counts which depend on arithmetic in the loop, this is very hard to 2973bf215546Sopenharmony_ci * determine. Most passes shouldn't preserve this metadata type. 2974bf215546Sopenharmony_ci */ 2975bf215546Sopenharmony_ci nir_metadata_loop_analysis = 0x10, 2976bf215546Sopenharmony_ci 2977bf215546Sopenharmony_ci /** Indicates that nir_instr::index values are valid. 2978bf215546Sopenharmony_ci * 2979bf215546Sopenharmony_ci * The start instruction has index 0 and they increase through a natural 2980bf215546Sopenharmony_ci * walk of instructions in blocks in the CFG. The indices my have holes 2981bf215546Sopenharmony_ci * after passes such as DCE. 2982bf215546Sopenharmony_ci * 2983bf215546Sopenharmony_ci * A pass can preserve this metadata type if it never adds or moves any 2984bf215546Sopenharmony_ci * instructions (most passes shouldn't preserve this metadata type), but 2985bf215546Sopenharmony_ci * can preserve it if it only removes instructions. 2986bf215546Sopenharmony_ci */ 2987bf215546Sopenharmony_ci nir_metadata_instr_index = 0x20, 2988bf215546Sopenharmony_ci 2989bf215546Sopenharmony_ci /** All metadata 2990bf215546Sopenharmony_ci * 2991bf215546Sopenharmony_ci * This includes all nir_metadata flags except not_properly_reset. Passes 2992bf215546Sopenharmony_ci * which do not change the shader in any way should call 2993bf215546Sopenharmony_ci * 2994bf215546Sopenharmony_ci * nir_metadata_preserve(impl, nir_metadata_all); 2995bf215546Sopenharmony_ci */ 2996bf215546Sopenharmony_ci nir_metadata_all = ~nir_metadata_not_properly_reset, 2997bf215546Sopenharmony_ci} nir_metadata; 2998bf215546Sopenharmony_ciMESA_DEFINE_CPP_ENUM_BITFIELD_OPERATORS(nir_metadata) 2999bf215546Sopenharmony_ci 3000bf215546Sopenharmony_citypedef struct { 3001bf215546Sopenharmony_ci nir_cf_node cf_node; 3002bf215546Sopenharmony_ci 3003bf215546Sopenharmony_ci /** pointer to the function of which this is an implementation */ 3004bf215546Sopenharmony_ci struct nir_function *function; 3005bf215546Sopenharmony_ci 3006bf215546Sopenharmony_ci /** 3007bf215546Sopenharmony_ci * For entrypoints, a pointer to a nir_function_impl which runs before 3008bf215546Sopenharmony_ci * it, once per draw or dispatch, communicating via store_preamble and 3009bf215546Sopenharmony_ci * load_preamble intrinsics. If NULL then there is no preamble. 3010bf215546Sopenharmony_ci */ 3011bf215546Sopenharmony_ci struct nir_function *preamble; 3012bf215546Sopenharmony_ci 3013bf215546Sopenharmony_ci struct exec_list body; /** < list of nir_cf_node */ 3014bf215546Sopenharmony_ci 3015bf215546Sopenharmony_ci nir_block *end_block; 3016bf215546Sopenharmony_ci 3017bf215546Sopenharmony_ci /** list for all local variables in the function */ 3018bf215546Sopenharmony_ci struct exec_list locals; 3019bf215546Sopenharmony_ci 3020bf215546Sopenharmony_ci /** list of local registers in the function */ 3021bf215546Sopenharmony_ci struct exec_list registers; 3022bf215546Sopenharmony_ci 3023bf215546Sopenharmony_ci /** next available local register index */ 3024bf215546Sopenharmony_ci unsigned reg_alloc; 3025bf215546Sopenharmony_ci 3026bf215546Sopenharmony_ci /** next available SSA value index */ 3027bf215546Sopenharmony_ci unsigned ssa_alloc; 3028bf215546Sopenharmony_ci 3029bf215546Sopenharmony_ci /* total number of basic blocks, only valid when block_index_dirty = false */ 3030bf215546Sopenharmony_ci unsigned num_blocks; 3031bf215546Sopenharmony_ci 3032bf215546Sopenharmony_ci /** True if this nir_function_impl uses structured control-flow 3033bf215546Sopenharmony_ci * 3034bf215546Sopenharmony_ci * Structured nir_function_impls have different validation rules. 3035bf215546Sopenharmony_ci */ 3036bf215546Sopenharmony_ci bool structured; 3037bf215546Sopenharmony_ci 3038bf215546Sopenharmony_ci nir_metadata valid_metadata; 3039bf215546Sopenharmony_ci} nir_function_impl; 3040bf215546Sopenharmony_ci 3041bf215546Sopenharmony_ci#define nir_foreach_function_temp_variable(var, impl) \ 3042bf215546Sopenharmony_ci foreach_list_typed(nir_variable, var, node, &(impl)->locals) 3043bf215546Sopenharmony_ci 3044bf215546Sopenharmony_ci#define nir_foreach_function_temp_variable_safe(var, impl) \ 3045bf215546Sopenharmony_ci foreach_list_typed_safe(nir_variable, var, node, &(impl)->locals) 3046bf215546Sopenharmony_ci 3047bf215546Sopenharmony_ciATTRIBUTE_RETURNS_NONNULL static inline nir_block * 3048bf215546Sopenharmony_cinir_start_block(nir_function_impl *impl) 3049bf215546Sopenharmony_ci{ 3050bf215546Sopenharmony_ci return (nir_block *) impl->body.head_sentinel.next; 3051bf215546Sopenharmony_ci} 3052bf215546Sopenharmony_ci 3053bf215546Sopenharmony_ciATTRIBUTE_RETURNS_NONNULL static inline nir_block * 3054bf215546Sopenharmony_cinir_impl_last_block(nir_function_impl *impl) 3055bf215546Sopenharmony_ci{ 3056bf215546Sopenharmony_ci return (nir_block *) impl->body.tail_sentinel.prev; 3057bf215546Sopenharmony_ci} 3058bf215546Sopenharmony_ci 3059bf215546Sopenharmony_cistatic inline nir_cf_node * 3060bf215546Sopenharmony_cinir_cf_node_next(nir_cf_node *node) 3061bf215546Sopenharmony_ci{ 3062bf215546Sopenharmony_ci struct exec_node *next = exec_node_get_next(&node->node); 3063bf215546Sopenharmony_ci if (exec_node_is_tail_sentinel(next)) 3064bf215546Sopenharmony_ci return NULL; 3065bf215546Sopenharmony_ci else 3066bf215546Sopenharmony_ci return exec_node_data(nir_cf_node, next, node); 3067bf215546Sopenharmony_ci} 3068bf215546Sopenharmony_ci 3069bf215546Sopenharmony_cistatic inline nir_cf_node * 3070bf215546Sopenharmony_cinir_cf_node_prev(nir_cf_node *node) 3071bf215546Sopenharmony_ci{ 3072bf215546Sopenharmony_ci struct exec_node *prev = exec_node_get_prev(&node->node); 3073bf215546Sopenharmony_ci if (exec_node_is_head_sentinel(prev)) 3074bf215546Sopenharmony_ci return NULL; 3075bf215546Sopenharmony_ci else 3076bf215546Sopenharmony_ci return exec_node_data(nir_cf_node, prev, node); 3077bf215546Sopenharmony_ci} 3078bf215546Sopenharmony_ci 3079bf215546Sopenharmony_cistatic inline bool 3080bf215546Sopenharmony_cinir_cf_node_is_first(const nir_cf_node *node) 3081bf215546Sopenharmony_ci{ 3082bf215546Sopenharmony_ci return exec_node_is_head_sentinel(node->node.prev); 3083bf215546Sopenharmony_ci} 3084bf215546Sopenharmony_ci 3085bf215546Sopenharmony_cistatic inline bool 3086bf215546Sopenharmony_cinir_cf_node_is_last(const nir_cf_node *node) 3087bf215546Sopenharmony_ci{ 3088bf215546Sopenharmony_ci return exec_node_is_tail_sentinel(node->node.next); 3089bf215546Sopenharmony_ci} 3090bf215546Sopenharmony_ci 3091bf215546Sopenharmony_ciNIR_DEFINE_CAST(nir_cf_node_as_block, nir_cf_node, nir_block, cf_node, 3092bf215546Sopenharmony_ci type, nir_cf_node_block) 3093bf215546Sopenharmony_ciNIR_DEFINE_CAST(nir_cf_node_as_if, nir_cf_node, nir_if, cf_node, 3094bf215546Sopenharmony_ci type, nir_cf_node_if) 3095bf215546Sopenharmony_ciNIR_DEFINE_CAST(nir_cf_node_as_loop, nir_cf_node, nir_loop, cf_node, 3096bf215546Sopenharmony_ci type, nir_cf_node_loop) 3097bf215546Sopenharmony_ciNIR_DEFINE_CAST(nir_cf_node_as_function, nir_cf_node, 3098bf215546Sopenharmony_ci nir_function_impl, cf_node, type, nir_cf_node_function) 3099bf215546Sopenharmony_ci 3100bf215546Sopenharmony_cistatic inline nir_block * 3101bf215546Sopenharmony_cinir_if_first_then_block(nir_if *if_stmt) 3102bf215546Sopenharmony_ci{ 3103bf215546Sopenharmony_ci struct exec_node *head = exec_list_get_head(&if_stmt->then_list); 3104bf215546Sopenharmony_ci return nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node)); 3105bf215546Sopenharmony_ci} 3106bf215546Sopenharmony_ci 3107bf215546Sopenharmony_cistatic inline nir_block * 3108bf215546Sopenharmony_cinir_if_last_then_block(nir_if *if_stmt) 3109bf215546Sopenharmony_ci{ 3110bf215546Sopenharmony_ci struct exec_node *tail = exec_list_get_tail(&if_stmt->then_list); 3111bf215546Sopenharmony_ci return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node)); 3112bf215546Sopenharmony_ci} 3113bf215546Sopenharmony_ci 3114bf215546Sopenharmony_cistatic inline nir_block * 3115bf215546Sopenharmony_cinir_if_first_else_block(nir_if *if_stmt) 3116bf215546Sopenharmony_ci{ 3117bf215546Sopenharmony_ci struct exec_node *head = exec_list_get_head(&if_stmt->else_list); 3118bf215546Sopenharmony_ci return nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node)); 3119bf215546Sopenharmony_ci} 3120bf215546Sopenharmony_ci 3121bf215546Sopenharmony_cistatic inline nir_block * 3122bf215546Sopenharmony_cinir_if_last_else_block(nir_if *if_stmt) 3123bf215546Sopenharmony_ci{ 3124bf215546Sopenharmony_ci struct exec_node *tail = exec_list_get_tail(&if_stmt->else_list); 3125bf215546Sopenharmony_ci return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node)); 3126bf215546Sopenharmony_ci} 3127bf215546Sopenharmony_ci 3128bf215546Sopenharmony_cistatic inline nir_block * 3129bf215546Sopenharmony_cinir_loop_first_block(nir_loop *loop) 3130bf215546Sopenharmony_ci{ 3131bf215546Sopenharmony_ci struct exec_node *head = exec_list_get_head(&loop->body); 3132bf215546Sopenharmony_ci return nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node)); 3133bf215546Sopenharmony_ci} 3134bf215546Sopenharmony_ci 3135bf215546Sopenharmony_cistatic inline nir_block * 3136bf215546Sopenharmony_cinir_loop_last_block(nir_loop *loop) 3137bf215546Sopenharmony_ci{ 3138bf215546Sopenharmony_ci struct exec_node *tail = exec_list_get_tail(&loop->body); 3139bf215546Sopenharmony_ci return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node)); 3140bf215546Sopenharmony_ci} 3141bf215546Sopenharmony_ci 3142bf215546Sopenharmony_ci/** 3143bf215546Sopenharmony_ci * Return true if this list of cf_nodes contains a single empty block. 3144bf215546Sopenharmony_ci */ 3145bf215546Sopenharmony_cistatic inline bool 3146bf215546Sopenharmony_cinir_cf_list_is_empty_block(struct exec_list *cf_list) 3147bf215546Sopenharmony_ci{ 3148bf215546Sopenharmony_ci if (exec_list_is_singular(cf_list)) { 3149bf215546Sopenharmony_ci struct exec_node *head = exec_list_get_head(cf_list); 3150bf215546Sopenharmony_ci nir_block *block = 3151bf215546Sopenharmony_ci nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node)); 3152bf215546Sopenharmony_ci return exec_list_is_empty(&block->instr_list); 3153bf215546Sopenharmony_ci } 3154bf215546Sopenharmony_ci return false; 3155bf215546Sopenharmony_ci} 3156bf215546Sopenharmony_ci 3157bf215546Sopenharmony_citypedef struct { 3158bf215546Sopenharmony_ci uint8_t num_components; 3159bf215546Sopenharmony_ci uint8_t bit_size; 3160bf215546Sopenharmony_ci} nir_parameter; 3161bf215546Sopenharmony_ci 3162bf215546Sopenharmony_citypedef struct nir_printf_info { 3163bf215546Sopenharmony_ci unsigned num_args; 3164bf215546Sopenharmony_ci unsigned *arg_sizes; 3165bf215546Sopenharmony_ci unsigned string_size; 3166bf215546Sopenharmony_ci char *strings; 3167bf215546Sopenharmony_ci} nir_printf_info; 3168bf215546Sopenharmony_ci 3169bf215546Sopenharmony_citypedef struct nir_function { 3170bf215546Sopenharmony_ci struct exec_node node; 3171bf215546Sopenharmony_ci 3172bf215546Sopenharmony_ci const char *name; 3173bf215546Sopenharmony_ci struct nir_shader *shader; 3174bf215546Sopenharmony_ci 3175bf215546Sopenharmony_ci unsigned num_params; 3176bf215546Sopenharmony_ci nir_parameter *params; 3177bf215546Sopenharmony_ci 3178bf215546Sopenharmony_ci /** The implementation of this function. 3179bf215546Sopenharmony_ci * 3180bf215546Sopenharmony_ci * If the function is only declared and not implemented, this is NULL. 3181bf215546Sopenharmony_ci */ 3182bf215546Sopenharmony_ci nir_function_impl *impl; 3183bf215546Sopenharmony_ci 3184bf215546Sopenharmony_ci bool is_entrypoint; 3185bf215546Sopenharmony_ci bool is_preamble; 3186bf215546Sopenharmony_ci} nir_function; 3187bf215546Sopenharmony_ci 3188bf215546Sopenharmony_citypedef enum { 3189bf215546Sopenharmony_ci nir_lower_imul64 = (1 << 0), 3190bf215546Sopenharmony_ci nir_lower_isign64 = (1 << 1), 3191bf215546Sopenharmony_ci /** Lower all int64 modulus and division opcodes */ 3192bf215546Sopenharmony_ci nir_lower_divmod64 = (1 << 2), 3193bf215546Sopenharmony_ci /** Lower all 64-bit umul_high and imul_high opcodes */ 3194bf215546Sopenharmony_ci nir_lower_imul_high64 = (1 << 3), 3195bf215546Sopenharmony_ci nir_lower_mov64 = (1 << 4), 3196bf215546Sopenharmony_ci nir_lower_icmp64 = (1 << 5), 3197bf215546Sopenharmony_ci nir_lower_iadd64 = (1 << 6), 3198bf215546Sopenharmony_ci nir_lower_iabs64 = (1 << 7), 3199bf215546Sopenharmony_ci nir_lower_ineg64 = (1 << 8), 3200bf215546Sopenharmony_ci nir_lower_logic64 = (1 << 9), 3201bf215546Sopenharmony_ci nir_lower_minmax64 = (1 << 10), 3202bf215546Sopenharmony_ci nir_lower_shift64 = (1 << 11), 3203bf215546Sopenharmony_ci nir_lower_imul_2x32_64 = (1 << 12), 3204bf215546Sopenharmony_ci nir_lower_extract64 = (1 << 13), 3205bf215546Sopenharmony_ci nir_lower_ufind_msb64 = (1 << 14), 3206bf215546Sopenharmony_ci nir_lower_bit_count64 = (1 << 15), 3207bf215546Sopenharmony_ci nir_lower_subgroup_shuffle64 = (1 << 16), 3208bf215546Sopenharmony_ci nir_lower_scan_reduce_bitwise64 = (1 << 17), 3209bf215546Sopenharmony_ci nir_lower_scan_reduce_iadd64 = (1 << 18), 3210bf215546Sopenharmony_ci nir_lower_vote_ieq64 = (1 << 19), 3211bf215546Sopenharmony_ci nir_lower_usub_sat64 = (1 << 20), 3212bf215546Sopenharmony_ci nir_lower_iadd_sat64 = (1 << 21), 3213bf215546Sopenharmony_ci} nir_lower_int64_options; 3214bf215546Sopenharmony_ci 3215bf215546Sopenharmony_citypedef enum { 3216bf215546Sopenharmony_ci nir_lower_drcp = (1 << 0), 3217bf215546Sopenharmony_ci nir_lower_dsqrt = (1 << 1), 3218bf215546Sopenharmony_ci nir_lower_drsq = (1 << 2), 3219bf215546Sopenharmony_ci nir_lower_dtrunc = (1 << 3), 3220bf215546Sopenharmony_ci nir_lower_dfloor = (1 << 4), 3221bf215546Sopenharmony_ci nir_lower_dceil = (1 << 5), 3222bf215546Sopenharmony_ci nir_lower_dfract = (1 << 6), 3223bf215546Sopenharmony_ci nir_lower_dround_even = (1 << 7), 3224bf215546Sopenharmony_ci nir_lower_dmod = (1 << 8), 3225bf215546Sopenharmony_ci nir_lower_dsub = (1 << 9), 3226bf215546Sopenharmony_ci nir_lower_ddiv = (1 << 10), 3227bf215546Sopenharmony_ci nir_lower_fp64_full_software = (1 << 11), 3228bf215546Sopenharmony_ci} nir_lower_doubles_options; 3229bf215546Sopenharmony_ci 3230bf215546Sopenharmony_citypedef enum { 3231bf215546Sopenharmony_ci nir_divergence_single_prim_per_subgroup = (1 << 0), 3232bf215546Sopenharmony_ci nir_divergence_single_patch_per_tcs_subgroup = (1 << 1), 3233bf215546Sopenharmony_ci nir_divergence_single_patch_per_tes_subgroup = (1 << 2), 3234bf215546Sopenharmony_ci nir_divergence_view_index_uniform = (1 << 3), 3235bf215546Sopenharmony_ci nir_divergence_single_frag_shading_rate_per_subgroup = (1 << 4), 3236bf215546Sopenharmony_ci nir_divergence_multiple_workgroup_per_compute_subgroup = (1 << 5), 3237bf215546Sopenharmony_ci} nir_divergence_options; 3238bf215546Sopenharmony_ci 3239bf215546Sopenharmony_citypedef enum { 3240bf215546Sopenharmony_ci nir_pack_varying_interp_mode_none = (1 << 0), 3241bf215546Sopenharmony_ci nir_pack_varying_interp_mode_smooth = (1 << 1), 3242bf215546Sopenharmony_ci nir_pack_varying_interp_mode_flat = (1 << 2), 3243bf215546Sopenharmony_ci nir_pack_varying_interp_mode_noperspective = (1 << 3), 3244bf215546Sopenharmony_ci nir_pack_varying_interp_loc_sample = (1 << 16), 3245bf215546Sopenharmony_ci nir_pack_varying_interp_loc_centroid = (1 << 17), 3246bf215546Sopenharmony_ci nir_pack_varying_interp_loc_center = (1 << 18), 3247bf215546Sopenharmony_ci} nir_pack_varying_options; 3248bf215546Sopenharmony_ci 3249bf215546Sopenharmony_ci/** An instruction filtering callback 3250bf215546Sopenharmony_ci * 3251bf215546Sopenharmony_ci * Returns true if the instruction should be processed and false otherwise. 3252bf215546Sopenharmony_ci */ 3253bf215546Sopenharmony_citypedef bool (*nir_instr_filter_cb)(const nir_instr *, const void *); 3254bf215546Sopenharmony_ci 3255bf215546Sopenharmony_ci/** A vectorization width callback 3256bf215546Sopenharmony_ci * 3257bf215546Sopenharmony_ci * Returns the maximum vectorization width per instruction. 3258bf215546Sopenharmony_ci * 0, if the instruction must not be modified. 3259bf215546Sopenharmony_ci * 3260bf215546Sopenharmony_ci * The vectorization width must be a power of 2. 3261bf215546Sopenharmony_ci */ 3262bf215546Sopenharmony_citypedef uint8_t (*nir_vectorize_cb)(const nir_instr *, const void *); 3263bf215546Sopenharmony_ci 3264bf215546Sopenharmony_citypedef struct nir_shader_compiler_options { 3265bf215546Sopenharmony_ci bool lower_fdiv; 3266bf215546Sopenharmony_ci bool lower_ffma16; 3267bf215546Sopenharmony_ci bool lower_ffma32; 3268bf215546Sopenharmony_ci bool lower_ffma64; 3269bf215546Sopenharmony_ci bool fuse_ffma16; 3270bf215546Sopenharmony_ci bool fuse_ffma32; 3271bf215546Sopenharmony_ci bool fuse_ffma64; 3272bf215546Sopenharmony_ci bool lower_flrp16; 3273bf215546Sopenharmony_ci bool lower_flrp32; 3274bf215546Sopenharmony_ci /** Lowers flrp when it does not support doubles */ 3275bf215546Sopenharmony_ci bool lower_flrp64; 3276bf215546Sopenharmony_ci bool lower_fpow; 3277bf215546Sopenharmony_ci bool lower_fsat; 3278bf215546Sopenharmony_ci bool lower_fsqrt; 3279bf215546Sopenharmony_ci bool lower_sincos; 3280bf215546Sopenharmony_ci bool lower_fmod; 3281bf215546Sopenharmony_ci /** Lowers ibitfield_extract/ubitfield_extract to ibfe/ubfe. */ 3282bf215546Sopenharmony_ci bool lower_bitfield_extract; 3283bf215546Sopenharmony_ci /** Lowers ibitfield_extract/ubitfield_extract to compares, shifts. */ 3284bf215546Sopenharmony_ci bool lower_bitfield_extract_to_shifts; 3285bf215546Sopenharmony_ci /** Lowers bitfield_insert to bfi/bfm */ 3286bf215546Sopenharmony_ci bool lower_bitfield_insert; 3287bf215546Sopenharmony_ci /** Lowers bitfield_insert to compares, and shifts. */ 3288bf215546Sopenharmony_ci bool lower_bitfield_insert_to_shifts; 3289bf215546Sopenharmony_ci /** Lowers bitfield_insert to bfm/bitfield_select. */ 3290bf215546Sopenharmony_ci bool lower_bitfield_insert_to_bitfield_select; 3291bf215546Sopenharmony_ci /** Lowers bitfield_reverse to shifts. */ 3292bf215546Sopenharmony_ci bool lower_bitfield_reverse; 3293bf215546Sopenharmony_ci /** Lowers bit_count to shifts. */ 3294bf215546Sopenharmony_ci bool lower_bit_count; 3295bf215546Sopenharmony_ci /** Lowers ifind_msb to compare and ufind_msb */ 3296bf215546Sopenharmony_ci bool lower_ifind_msb; 3297bf215546Sopenharmony_ci /** Lowers ifind_msb and ufind_msb to reverse variants */ 3298bf215546Sopenharmony_ci bool lower_find_msb_to_reverse; 3299bf215546Sopenharmony_ci /** Lowers find_lsb to ufind_msb and logic ops */ 3300bf215546Sopenharmony_ci bool lower_find_lsb; 3301bf215546Sopenharmony_ci bool lower_uadd_carry; 3302bf215546Sopenharmony_ci bool lower_usub_borrow; 3303bf215546Sopenharmony_ci /** Lowers imul_high/umul_high to 16-bit multiplies and carry operations. */ 3304bf215546Sopenharmony_ci bool lower_mul_high; 3305bf215546Sopenharmony_ci /** lowers fneg to fmul(x, -1.0). Driver must call nir_opt_algebraic_late() */ 3306bf215546Sopenharmony_ci bool lower_fneg; 3307bf215546Sopenharmony_ci /** lowers ineg to isub. Driver must call nir_opt_algebraic_late(). */ 3308bf215546Sopenharmony_ci bool lower_ineg; 3309bf215546Sopenharmony_ci /** lowers fisnormal to alu ops. */ 3310bf215546Sopenharmony_ci bool lower_fisnormal; 3311bf215546Sopenharmony_ci 3312bf215546Sopenharmony_ci /* lower {slt,sge,seq,sne} to {flt,fge,feq,fneu} + b2f: */ 3313bf215546Sopenharmony_ci bool lower_scmp; 3314bf215546Sopenharmony_ci 3315bf215546Sopenharmony_ci /* lower b/fall_equalN/b/fany_nequalN (ex:fany_nequal4 to sne+fdot4+fsat) */ 3316bf215546Sopenharmony_ci bool lower_vector_cmp; 3317bf215546Sopenharmony_ci 3318bf215546Sopenharmony_ci /** enable rules to avoid bit ops */ 3319bf215546Sopenharmony_ci bool lower_bitops; 3320bf215546Sopenharmony_ci 3321bf215546Sopenharmony_ci /** enables rules to lower isign to imin+imax */ 3322bf215546Sopenharmony_ci bool lower_isign; 3323bf215546Sopenharmony_ci 3324bf215546Sopenharmony_ci /** enables rules to lower fsign to fsub and flt */ 3325bf215546Sopenharmony_ci bool lower_fsign; 3326bf215546Sopenharmony_ci 3327bf215546Sopenharmony_ci /** enables rules to lower iabs to ineg+imax */ 3328bf215546Sopenharmony_ci bool lower_iabs; 3329bf215546Sopenharmony_ci 3330bf215546Sopenharmony_ci /** enable rules that avoid generating umax from signed integer ops */ 3331bf215546Sopenharmony_ci bool lower_umax; 3332bf215546Sopenharmony_ci 3333bf215546Sopenharmony_ci /** enable rules that avoid generating umin from signed integer ops */ 3334bf215546Sopenharmony_ci bool lower_umin; 3335bf215546Sopenharmony_ci 3336bf215546Sopenharmony_ci /* lower fdph to fdot4 */ 3337bf215546Sopenharmony_ci bool lower_fdph; 3338bf215546Sopenharmony_ci 3339bf215546Sopenharmony_ci /** lower fdot to fmul and fsum/fadd. */ 3340bf215546Sopenharmony_ci bool lower_fdot; 3341bf215546Sopenharmony_ci 3342bf215546Sopenharmony_ci /* Does the native fdot instruction replicate its result for four 3343bf215546Sopenharmony_ci * components? If so, then opt_algebraic_late will turn all fdotN 3344bf215546Sopenharmony_ci * instructions into fdotN_replicated instructions. 3345bf215546Sopenharmony_ci */ 3346bf215546Sopenharmony_ci bool fdot_replicates; 3347bf215546Sopenharmony_ci 3348bf215546Sopenharmony_ci /** lowers ffloor to fsub+ffract: */ 3349bf215546Sopenharmony_ci bool lower_ffloor; 3350bf215546Sopenharmony_ci 3351bf215546Sopenharmony_ci /** lowers ffract to fsub+ffloor: */ 3352bf215546Sopenharmony_ci bool lower_ffract; 3353bf215546Sopenharmony_ci 3354bf215546Sopenharmony_ci /** lowers fceil to fneg+ffloor+fneg: */ 3355bf215546Sopenharmony_ci bool lower_fceil; 3356bf215546Sopenharmony_ci 3357bf215546Sopenharmony_ci bool lower_ftrunc; 3358bf215546Sopenharmony_ci 3359bf215546Sopenharmony_ci /** Lowers fround_even to ffract+feq+csel. 3360bf215546Sopenharmony_ci * 3361bf215546Sopenharmony_ci * Not correct in that it doesn't correctly handle the "_even" part of the 3362bf215546Sopenharmony_ci * rounding, but good enough for DX9 array indexing handling on DX9-class 3363bf215546Sopenharmony_ci * hardware. 3364bf215546Sopenharmony_ci */ 3365bf215546Sopenharmony_ci bool lower_fround_even; 3366bf215546Sopenharmony_ci 3367bf215546Sopenharmony_ci bool lower_ldexp; 3368bf215546Sopenharmony_ci 3369bf215546Sopenharmony_ci bool lower_pack_half_2x16; 3370bf215546Sopenharmony_ci bool lower_pack_unorm_2x16; 3371bf215546Sopenharmony_ci bool lower_pack_snorm_2x16; 3372bf215546Sopenharmony_ci bool lower_pack_unorm_4x8; 3373bf215546Sopenharmony_ci bool lower_pack_snorm_4x8; 3374bf215546Sopenharmony_ci bool lower_pack_64_2x32; 3375bf215546Sopenharmony_ci bool lower_pack_64_4x16; 3376bf215546Sopenharmony_ci bool lower_pack_32_2x16; 3377bf215546Sopenharmony_ci bool lower_pack_64_2x32_split; 3378bf215546Sopenharmony_ci bool lower_pack_32_2x16_split; 3379bf215546Sopenharmony_ci bool lower_unpack_half_2x16; 3380bf215546Sopenharmony_ci bool lower_unpack_unorm_2x16; 3381bf215546Sopenharmony_ci bool lower_unpack_snorm_2x16; 3382bf215546Sopenharmony_ci bool lower_unpack_unorm_4x8; 3383bf215546Sopenharmony_ci bool lower_unpack_snorm_4x8; 3384bf215546Sopenharmony_ci bool lower_unpack_64_2x32_split; 3385bf215546Sopenharmony_ci bool lower_unpack_32_2x16_split; 3386bf215546Sopenharmony_ci 3387bf215546Sopenharmony_ci bool lower_pack_split; 3388bf215546Sopenharmony_ci 3389bf215546Sopenharmony_ci bool lower_extract_byte; 3390bf215546Sopenharmony_ci bool lower_extract_word; 3391bf215546Sopenharmony_ci bool lower_insert_byte; 3392bf215546Sopenharmony_ci bool lower_insert_word; 3393bf215546Sopenharmony_ci 3394bf215546Sopenharmony_ci bool lower_all_io_to_temps; 3395bf215546Sopenharmony_ci bool lower_all_io_to_elements; 3396bf215546Sopenharmony_ci 3397bf215546Sopenharmony_ci /* Indicates that the driver only has zero-based vertex id */ 3398bf215546Sopenharmony_ci bool vertex_id_zero_based; 3399bf215546Sopenharmony_ci 3400bf215546Sopenharmony_ci /** 3401bf215546Sopenharmony_ci * If enabled, gl_BaseVertex will be lowered as: 3402bf215546Sopenharmony_ci * is_indexed_draw (~0/0) & firstvertex 3403bf215546Sopenharmony_ci */ 3404bf215546Sopenharmony_ci bool lower_base_vertex; 3405bf215546Sopenharmony_ci 3406bf215546Sopenharmony_ci /** 3407bf215546Sopenharmony_ci * If enabled, gl_HelperInvocation will be lowered as: 3408bf215546Sopenharmony_ci * 3409bf215546Sopenharmony_ci * !((1 << sample_id) & sample_mask_in)) 3410bf215546Sopenharmony_ci * 3411bf215546Sopenharmony_ci * This depends on some possibly hw implementation details, which may 3412bf215546Sopenharmony_ci * not be true for all hw. In particular that the FS is only executed 3413bf215546Sopenharmony_ci * for covered samples or for helper invocations. So, do not blindly 3414bf215546Sopenharmony_ci * enable this option. 3415bf215546Sopenharmony_ci * 3416bf215546Sopenharmony_ci * Note: See also issue #22 in ARB_shader_image_load_store 3417bf215546Sopenharmony_ci */ 3418bf215546Sopenharmony_ci bool lower_helper_invocation; 3419bf215546Sopenharmony_ci 3420bf215546Sopenharmony_ci /** 3421bf215546Sopenharmony_ci * Convert gl_SampleMaskIn to gl_HelperInvocation as follows: 3422bf215546Sopenharmony_ci * 3423bf215546Sopenharmony_ci * gl_SampleMaskIn == 0 ---> gl_HelperInvocation 3424bf215546Sopenharmony_ci * gl_SampleMaskIn != 0 ---> !gl_HelperInvocation 3425bf215546Sopenharmony_ci */ 3426bf215546Sopenharmony_ci bool optimize_sample_mask_in; 3427bf215546Sopenharmony_ci 3428bf215546Sopenharmony_ci bool lower_cs_local_index_to_id; 3429bf215546Sopenharmony_ci bool lower_cs_local_id_to_index; 3430bf215546Sopenharmony_ci 3431bf215546Sopenharmony_ci /* Prevents lowering global_invocation_id to be in terms of workgroup_id */ 3432bf215546Sopenharmony_ci bool has_cs_global_id; 3433bf215546Sopenharmony_ci 3434bf215546Sopenharmony_ci bool lower_device_index_to_zero; 3435bf215546Sopenharmony_ci 3436bf215546Sopenharmony_ci /* Set if nir_lower_pntc_ytransform() should invert gl_PointCoord. 3437bf215546Sopenharmony_ci * Either when frame buffer is flipped or GL_POINT_SPRITE_COORD_ORIGIN 3438bf215546Sopenharmony_ci * is GL_LOWER_LEFT. 3439bf215546Sopenharmony_ci */ 3440bf215546Sopenharmony_ci bool lower_wpos_pntc; 3441bf215546Sopenharmony_ci 3442bf215546Sopenharmony_ci /** 3443bf215546Sopenharmony_ci * Set if nir_op_[iu]hadd and nir_op_[iu]rhadd instructions should be 3444bf215546Sopenharmony_ci * lowered to simple arithmetic. 3445bf215546Sopenharmony_ci * 3446bf215546Sopenharmony_ci * If this flag is set, the lowering will be applied to all bit-sizes of 3447bf215546Sopenharmony_ci * these instructions. 3448bf215546Sopenharmony_ci * 3449bf215546Sopenharmony_ci * \sa ::lower_hadd64 3450bf215546Sopenharmony_ci */ 3451bf215546Sopenharmony_ci bool lower_hadd; 3452bf215546Sopenharmony_ci 3453bf215546Sopenharmony_ci /** 3454bf215546Sopenharmony_ci * Set if only 64-bit nir_op_[iu]hadd and nir_op_[iu]rhadd instructions 3455bf215546Sopenharmony_ci * should be lowered to simple arithmetic. 3456bf215546Sopenharmony_ci * 3457bf215546Sopenharmony_ci * If this flag is set, the lowering will be applied to only 64-bit 3458bf215546Sopenharmony_ci * versions of these instructions. 3459bf215546Sopenharmony_ci * 3460bf215546Sopenharmony_ci * \sa ::lower_hadd 3461bf215546Sopenharmony_ci */ 3462bf215546Sopenharmony_ci bool lower_hadd64; 3463bf215546Sopenharmony_ci 3464bf215546Sopenharmony_ci /** 3465bf215546Sopenharmony_ci * Set if nir_op_uadd_sat should be lowered to simple arithmetic. 3466bf215546Sopenharmony_ci * 3467bf215546Sopenharmony_ci * If this flag is set, the lowering will be applied to all bit-sizes of 3468bf215546Sopenharmony_ci * these instructions. 3469bf215546Sopenharmony_ci */ 3470bf215546Sopenharmony_ci bool lower_uadd_sat; 3471bf215546Sopenharmony_ci 3472bf215546Sopenharmony_ci /** 3473bf215546Sopenharmony_ci * Set if nir_op_usub_sat should be lowered to simple arithmetic. 3474bf215546Sopenharmony_ci * 3475bf215546Sopenharmony_ci * If this flag is set, the lowering will be applied to all bit-sizes of 3476bf215546Sopenharmony_ci * these instructions. 3477bf215546Sopenharmony_ci */ 3478bf215546Sopenharmony_ci bool lower_usub_sat; 3479bf215546Sopenharmony_ci 3480bf215546Sopenharmony_ci /** 3481bf215546Sopenharmony_ci * Set if nir_op_iadd_sat and nir_op_isub_sat should be lowered to simple 3482bf215546Sopenharmony_ci * arithmetic. 3483bf215546Sopenharmony_ci * 3484bf215546Sopenharmony_ci * If this flag is set, the lowering will be applied to all bit-sizes of 3485bf215546Sopenharmony_ci * these instructions. 3486bf215546Sopenharmony_ci */ 3487bf215546Sopenharmony_ci bool lower_iadd_sat; 3488bf215546Sopenharmony_ci 3489bf215546Sopenharmony_ci /** 3490bf215546Sopenharmony_ci * Set if imul_32x16 and umul_32x16 should be lowered to simple 3491bf215546Sopenharmony_ci * arithmetic. 3492bf215546Sopenharmony_ci */ 3493bf215546Sopenharmony_ci bool lower_mul_32x16; 3494bf215546Sopenharmony_ci 3495bf215546Sopenharmony_ci /** 3496bf215546Sopenharmony_ci * Should IO be re-vectorized? Some scalar ISAs still operate on vec4's 3497bf215546Sopenharmony_ci * for IO purposes and would prefer loads/stores be vectorized. 3498bf215546Sopenharmony_ci */ 3499bf215546Sopenharmony_ci bool vectorize_io; 3500bf215546Sopenharmony_ci bool lower_to_scalar; 3501bf215546Sopenharmony_ci nir_instr_filter_cb lower_to_scalar_filter; 3502bf215546Sopenharmony_ci 3503bf215546Sopenharmony_ci /** 3504bf215546Sopenharmony_ci * Disables potentially harmful algebraic transformations for architectures 3505bf215546Sopenharmony_ci * with SIMD-within-a-register semantics. 3506bf215546Sopenharmony_ci * 3507bf215546Sopenharmony_ci * Note, to actually vectorize 16bit instructions, use nir_opt_vectorize() 3508bf215546Sopenharmony_ci * with a suitable callback function. 3509bf215546Sopenharmony_ci */ 3510bf215546Sopenharmony_ci bool vectorize_vec2_16bit; 3511bf215546Sopenharmony_ci 3512bf215546Sopenharmony_ci /** 3513bf215546Sopenharmony_ci * Should the linker unify inputs_read/outputs_written between adjacent 3514bf215546Sopenharmony_ci * shader stages which are linked into a single program? 3515bf215546Sopenharmony_ci */ 3516bf215546Sopenharmony_ci bool unify_interfaces; 3517bf215546Sopenharmony_ci 3518bf215546Sopenharmony_ci /** 3519bf215546Sopenharmony_ci * Should nir_lower_io() create load_interpolated_input intrinsics? 3520bf215546Sopenharmony_ci * 3521bf215546Sopenharmony_ci * If not, it generates regular load_input intrinsics and interpolation 3522bf215546Sopenharmony_ci * information must be inferred from the list of input nir_variables. 3523bf215546Sopenharmony_ci */ 3524bf215546Sopenharmony_ci bool use_interpolated_input_intrinsics; 3525bf215546Sopenharmony_ci 3526bf215546Sopenharmony_ci 3527bf215546Sopenharmony_ci /** 3528bf215546Sopenharmony_ci * Whether nir_lower_io() will lower interpolateAt functions to 3529bf215546Sopenharmony_ci * load_interpolated_input intrinsics. 3530bf215546Sopenharmony_ci * 3531bf215546Sopenharmony_ci * Unlike use_interpolated_input_intrinsics this will only lower these 3532bf215546Sopenharmony_ci * functions and leave input load intrinsics untouched. 3533bf215546Sopenharmony_ci */ 3534bf215546Sopenharmony_ci bool lower_interpolate_at; 3535bf215546Sopenharmony_ci 3536bf215546Sopenharmony_ci /* Lowers when 32x32->64 bit multiplication is not supported */ 3537bf215546Sopenharmony_ci bool lower_mul_2x32_64; 3538bf215546Sopenharmony_ci 3539bf215546Sopenharmony_ci /* Lowers when rotate instruction is not supported */ 3540bf215546Sopenharmony_ci bool lower_rotate; 3541bf215546Sopenharmony_ci 3542bf215546Sopenharmony_ci /** Backend supports ternary addition */ 3543bf215546Sopenharmony_ci bool has_iadd3; 3544bf215546Sopenharmony_ci 3545bf215546Sopenharmony_ci /** 3546bf215546Sopenharmony_ci * Backend supports imul24, and would like to use it (when possible) 3547bf215546Sopenharmony_ci * for address/offset calculation. If true, driver should call 3548bf215546Sopenharmony_ci * nir_lower_amul(). (If not set, amul will automatically be lowered 3549bf215546Sopenharmony_ci * to imul.) 3550bf215546Sopenharmony_ci */ 3551bf215546Sopenharmony_ci bool has_imul24; 3552bf215546Sopenharmony_ci 3553bf215546Sopenharmony_ci /** Backend supports umul24, if not set umul24 will automatically be lowered 3554bf215546Sopenharmony_ci * to imul with masked inputs */ 3555bf215546Sopenharmony_ci bool has_umul24; 3556bf215546Sopenharmony_ci 3557bf215546Sopenharmony_ci /** Backend supports umad24, if not set umad24 will automatically be lowered 3558bf215546Sopenharmony_ci * to imul with masked inputs and iadd */ 3559bf215546Sopenharmony_ci bool has_umad24; 3560bf215546Sopenharmony_ci 3561bf215546Sopenharmony_ci /* Backend supports fused comapre against zero and csel */ 3562bf215546Sopenharmony_ci bool has_fused_comp_and_csel; 3563bf215546Sopenharmony_ci 3564bf215546Sopenharmony_ci /** Backend supports fsub, if not set fsub will automatically be lowered to 3565bf215546Sopenharmony_ci * fadd(x, fneg(y)). If true, driver should call nir_opt_algebraic_late(). */ 3566bf215546Sopenharmony_ci bool has_fsub; 3567bf215546Sopenharmony_ci 3568bf215546Sopenharmony_ci /** Backend supports isub, if not set isub will automatically be lowered to 3569bf215546Sopenharmony_ci * iadd(x, ineg(y)). If true, driver should call nir_opt_algebraic_late(). */ 3570bf215546Sopenharmony_ci bool has_isub; 3571bf215546Sopenharmony_ci 3572bf215546Sopenharmony_ci /** Backend supports pack_32_4x8 or pack_32_4x8_split. */ 3573bf215546Sopenharmony_ci bool has_pack_32_4x8; 3574bf215546Sopenharmony_ci 3575bf215546Sopenharmony_ci /** Backend supports txs, if not nir_lower_tex(..) uses txs-free variants 3576bf215546Sopenharmony_ci * for rect texture lowering. */ 3577bf215546Sopenharmony_ci bool has_txs; 3578bf215546Sopenharmony_ci 3579bf215546Sopenharmony_ci /** Backend supports sdot_4x8 opcodes. */ 3580bf215546Sopenharmony_ci bool has_sdot_4x8; 3581bf215546Sopenharmony_ci 3582bf215546Sopenharmony_ci /** Backend supports udot_4x8 opcodes. */ 3583bf215546Sopenharmony_ci bool has_udot_4x8; 3584bf215546Sopenharmony_ci 3585bf215546Sopenharmony_ci /** Backend supports sudot_4x8 opcodes. */ 3586bf215546Sopenharmony_ci bool has_sudot_4x8; 3587bf215546Sopenharmony_ci 3588bf215546Sopenharmony_ci /** Backend supports sdot_2x16 and udot_2x16 opcodes. */ 3589bf215546Sopenharmony_ci bool has_dot_2x16; 3590bf215546Sopenharmony_ci 3591bf215546Sopenharmony_ci /* Whether to generate only scoped_barrier intrinsics instead of the set of 3592bf215546Sopenharmony_ci * memory and control barrier intrinsics based on GLSL. 3593bf215546Sopenharmony_ci */ 3594bf215546Sopenharmony_ci bool use_scoped_barrier; 3595bf215546Sopenharmony_ci 3596bf215546Sopenharmony_ci /** Backend supports fmulz (and ffmaz if lower_ffma32=false) */ 3597bf215546Sopenharmony_ci bool has_fmulz; 3598bf215546Sopenharmony_ci 3599bf215546Sopenharmony_ci /** 3600bf215546Sopenharmony_ci * Is this the Intel vec4 backend? 3601bf215546Sopenharmony_ci * 3602bf215546Sopenharmony_ci * Used to inhibit algebraic optimizations that are known to be harmful on 3603bf215546Sopenharmony_ci * the Intel vec4 backend. This is generally applicable to any 3604bf215546Sopenharmony_ci * optimization that might cause more immediate values to be used in 3605bf215546Sopenharmony_ci * 3-source (e.g., ffma and flrp) instructions. 3606bf215546Sopenharmony_ci */ 3607bf215546Sopenharmony_ci bool intel_vec4; 3608bf215546Sopenharmony_ci 3609bf215546Sopenharmony_ci /** 3610bf215546Sopenharmony_ci * For most Intel GPUs, all ternary operations such as FMA and BFE cannot 3611bf215546Sopenharmony_ci * have immediates, so two to three instructions may eventually be needed. 3612bf215546Sopenharmony_ci */ 3613bf215546Sopenharmony_ci bool avoid_ternary_with_two_constants; 3614bf215546Sopenharmony_ci 3615bf215546Sopenharmony_ci /** Whether 8-bit ALU is supported. */ 3616bf215546Sopenharmony_ci bool support_8bit_alu; 3617bf215546Sopenharmony_ci 3618bf215546Sopenharmony_ci /** Whether 16-bit ALU is supported. */ 3619bf215546Sopenharmony_ci bool support_16bit_alu; 3620bf215546Sopenharmony_ci 3621bf215546Sopenharmony_ci unsigned max_unroll_iterations; 3622bf215546Sopenharmony_ci unsigned max_unroll_iterations_aggressive; 3623bf215546Sopenharmony_ci 3624bf215546Sopenharmony_ci bool lower_uniforms_to_ubo; 3625bf215546Sopenharmony_ci 3626bf215546Sopenharmony_ci /* If the precision is ignored, backends that don't handle 3627bf215546Sopenharmony_ci * different precisions when passing data between stages and use 3628bf215546Sopenharmony_ci * vectorized IO can pack more varyings when linking. */ 3629bf215546Sopenharmony_ci bool linker_ignore_precision; 3630bf215546Sopenharmony_ci 3631bf215546Sopenharmony_ci /* Specifies if indirect sampler array access will trigger forced loop 3632bf215546Sopenharmony_ci * unrolling. 3633bf215546Sopenharmony_ci */ 3634bf215546Sopenharmony_ci bool force_indirect_unrolling_sampler; 3635bf215546Sopenharmony_ci 3636bf215546Sopenharmony_ci /* Some older drivers don't support GLSL versions with the concept of flat 3637bf215546Sopenharmony_ci * varyings and also don't support integers. This setting helps us avoid 3638bf215546Sopenharmony_ci * marking varyings as flat and potentially having them changed to ints via 3639bf215546Sopenharmony_ci * varying packing. 3640bf215546Sopenharmony_ci */ 3641bf215546Sopenharmony_ci bool no_integers; 3642bf215546Sopenharmony_ci 3643bf215546Sopenharmony_ci /** 3644bf215546Sopenharmony_ci * Specifies which type of indirectly accessed variables should force 3645bf215546Sopenharmony_ci * loop unrolling. 3646bf215546Sopenharmony_ci */ 3647bf215546Sopenharmony_ci nir_variable_mode force_indirect_unrolling; 3648bf215546Sopenharmony_ci 3649bf215546Sopenharmony_ci nir_lower_int64_options lower_int64_options; 3650bf215546Sopenharmony_ci nir_lower_doubles_options lower_doubles_options; 3651bf215546Sopenharmony_ci nir_divergence_options divergence_analysis_options; 3652bf215546Sopenharmony_ci 3653bf215546Sopenharmony_ci /** 3654bf215546Sopenharmony_ci * Support pack varyings with different interpolation location 3655bf215546Sopenharmony_ci * (center, centroid, sample) and mode (flat, noperspective, smooth) 3656bf215546Sopenharmony_ci * into same slot. 3657bf215546Sopenharmony_ci */ 3658bf215546Sopenharmony_ci nir_pack_varying_options pack_varying_options; 3659bf215546Sopenharmony_ci 3660bf215546Sopenharmony_ci /** 3661bf215546Sopenharmony_ci * Lower load_deref/store_deref of inputs and outputs into 3662bf215546Sopenharmony_ci * load_input/store_input intrinsics. This is used by nir_lower_io_passes. 3663bf215546Sopenharmony_ci */ 3664bf215546Sopenharmony_ci bool lower_io_variables; 3665bf215546Sopenharmony_ci 3666bf215546Sopenharmony_ci /** 3667bf215546Sopenharmony_ci * Lower color inputs to load_colorN that are kind of like system values 3668bf215546Sopenharmony_ci * if lower_io_variables is also set. shader_info will contain 3669bf215546Sopenharmony_ci * the interpolation settings. This is used by nir_lower_io_passes. 3670bf215546Sopenharmony_ci */ 3671bf215546Sopenharmony_ci bool lower_fs_color_inputs; 3672bf215546Sopenharmony_ci 3673bf215546Sopenharmony_ci /** 3674bf215546Sopenharmony_ci * The masks of shader stages that support indirect indexing with 3675bf215546Sopenharmony_ci * load_input and store_output intrinsics. It's used when 3676bf215546Sopenharmony_ci * lower_io_variables is true. This is used by nir_lower_io_passes. 3677bf215546Sopenharmony_ci */ 3678bf215546Sopenharmony_ci uint8_t support_indirect_inputs; 3679bf215546Sopenharmony_ci uint8_t support_indirect_outputs; 3680bf215546Sopenharmony_ci 3681bf215546Sopenharmony_ci /** 3682bf215546Sopenharmony_ci * Remove varying loaded from uniform, let fragment shader load the 3683bf215546Sopenharmony_ci * uniform directly. GPU passing varying by memory can benifit from it 3684bf215546Sopenharmony_ci * for sure; but GPU passing varying by on chip resource may not. 3685bf215546Sopenharmony_ci * Because it saves on chip resource but may increase memory pressure when 3686bf215546Sopenharmony_ci * fragment task is far more than vertex one, so better left it disabled. 3687bf215546Sopenharmony_ci */ 3688bf215546Sopenharmony_ci bool lower_varying_from_uniform; 3689bf215546Sopenharmony_ci} nir_shader_compiler_options; 3690bf215546Sopenharmony_ci 3691bf215546Sopenharmony_citypedef struct nir_shader { 3692bf215546Sopenharmony_ci /** list of uniforms (nir_variable) */ 3693bf215546Sopenharmony_ci struct exec_list variables; 3694bf215546Sopenharmony_ci 3695bf215546Sopenharmony_ci /** Set of driver-specific options for the shader. 3696bf215546Sopenharmony_ci * 3697bf215546Sopenharmony_ci * The memory for the options is expected to be kept in a single static 3698bf215546Sopenharmony_ci * copy by the driver. 3699bf215546Sopenharmony_ci */ 3700bf215546Sopenharmony_ci const struct nir_shader_compiler_options *options; 3701bf215546Sopenharmony_ci 3702bf215546Sopenharmony_ci /** Various bits of compile-time information about a given shader */ 3703bf215546Sopenharmony_ci struct shader_info info; 3704bf215546Sopenharmony_ci 3705bf215546Sopenharmony_ci struct exec_list functions; /** < list of nir_function */ 3706bf215546Sopenharmony_ci 3707bf215546Sopenharmony_ci struct list_head gc_list; /** < list of all nir_instrs allocated on the shader but not yet freed. */ 3708bf215546Sopenharmony_ci 3709bf215546Sopenharmony_ci /** 3710bf215546Sopenharmony_ci * The size of the variable space for load_input_*, load_uniform_*, etc. 3711bf215546Sopenharmony_ci * intrinsics. This is in back-end specific units which is likely one of 3712bf215546Sopenharmony_ci * bytes, dwords, or vec4s depending on context and back-end. 3713bf215546Sopenharmony_ci */ 3714bf215546Sopenharmony_ci unsigned num_inputs, num_uniforms, num_outputs; 3715bf215546Sopenharmony_ci 3716bf215546Sopenharmony_ci /** Size in bytes of required implicitly bound global memory */ 3717bf215546Sopenharmony_ci unsigned global_mem_size; 3718bf215546Sopenharmony_ci 3719bf215546Sopenharmony_ci /** Size in bytes of required scratch space */ 3720bf215546Sopenharmony_ci unsigned scratch_size; 3721bf215546Sopenharmony_ci 3722bf215546Sopenharmony_ci /** Constant data associated with this shader. 3723bf215546Sopenharmony_ci * 3724bf215546Sopenharmony_ci * Constant data is loaded through load_constant intrinsics (as compared to 3725bf215546Sopenharmony_ci * the NIR load_const instructions which have the constant value inlined 3726bf215546Sopenharmony_ci * into them). This is usually generated by nir_opt_large_constants (so 3727bf215546Sopenharmony_ci * shaders don't have to load_const into a temporary array when they want 3728bf215546Sopenharmony_ci * to indirect on a const array). 3729bf215546Sopenharmony_ci */ 3730bf215546Sopenharmony_ci void *constant_data; 3731bf215546Sopenharmony_ci /** Size of the constant data associated with the shader, in bytes */ 3732bf215546Sopenharmony_ci unsigned constant_data_size; 3733bf215546Sopenharmony_ci 3734bf215546Sopenharmony_ci struct nir_xfb_info *xfb_info; 3735bf215546Sopenharmony_ci 3736bf215546Sopenharmony_ci unsigned printf_info_count; 3737bf215546Sopenharmony_ci nir_printf_info *printf_info; 3738bf215546Sopenharmony_ci} nir_shader; 3739bf215546Sopenharmony_ci 3740bf215546Sopenharmony_ci#define nir_foreach_function(func, shader) \ 3741bf215546Sopenharmony_ci foreach_list_typed(nir_function, func, node, &(shader)->functions) 3742bf215546Sopenharmony_ci 3743bf215546Sopenharmony_cistatic inline nir_function_impl * 3744bf215546Sopenharmony_cinir_shader_get_entrypoint(const nir_shader *shader) 3745bf215546Sopenharmony_ci{ 3746bf215546Sopenharmony_ci nir_function *func = NULL; 3747bf215546Sopenharmony_ci 3748bf215546Sopenharmony_ci nir_foreach_function(function, shader) { 3749bf215546Sopenharmony_ci assert(func == NULL); 3750bf215546Sopenharmony_ci if (function->is_entrypoint) { 3751bf215546Sopenharmony_ci func = function; 3752bf215546Sopenharmony_ci#ifndef NDEBUG 3753bf215546Sopenharmony_ci break; 3754bf215546Sopenharmony_ci#endif 3755bf215546Sopenharmony_ci } 3756bf215546Sopenharmony_ci } 3757bf215546Sopenharmony_ci 3758bf215546Sopenharmony_ci if (!func) 3759bf215546Sopenharmony_ci return NULL; 3760bf215546Sopenharmony_ci 3761bf215546Sopenharmony_ci assert(func->num_params == 0); 3762bf215546Sopenharmony_ci assert(func->impl); 3763bf215546Sopenharmony_ci return func->impl; 3764bf215546Sopenharmony_ci} 3765bf215546Sopenharmony_ci 3766bf215546Sopenharmony_civoid nir_remove_non_entrypoints(nir_shader *shader); 3767bf215546Sopenharmony_ci 3768bf215546Sopenharmony_cinir_shader *nir_shader_create(void *mem_ctx, 3769bf215546Sopenharmony_ci gl_shader_stage stage, 3770bf215546Sopenharmony_ci const nir_shader_compiler_options *options, 3771bf215546Sopenharmony_ci shader_info *si); 3772bf215546Sopenharmony_ci 3773bf215546Sopenharmony_cinir_register *nir_local_reg_create(nir_function_impl *impl); 3774bf215546Sopenharmony_ci 3775bf215546Sopenharmony_civoid nir_reg_remove(nir_register *reg); 3776bf215546Sopenharmony_ci 3777bf215546Sopenharmony_ci/** Adds a variable to the appropriate list in nir_shader */ 3778bf215546Sopenharmony_civoid nir_shader_add_variable(nir_shader *shader, nir_variable *var); 3779bf215546Sopenharmony_ci 3780bf215546Sopenharmony_cistatic inline void 3781bf215546Sopenharmony_cinir_function_impl_add_variable(nir_function_impl *impl, nir_variable *var) 3782bf215546Sopenharmony_ci{ 3783bf215546Sopenharmony_ci assert(var->data.mode == nir_var_function_temp); 3784bf215546Sopenharmony_ci exec_list_push_tail(&impl->locals, &var->node); 3785bf215546Sopenharmony_ci} 3786bf215546Sopenharmony_ci 3787bf215546Sopenharmony_ci/** creates a variable, sets a few defaults, and adds it to the list */ 3788bf215546Sopenharmony_cinir_variable *nir_variable_create(nir_shader *shader, 3789bf215546Sopenharmony_ci nir_variable_mode mode, 3790bf215546Sopenharmony_ci const struct glsl_type *type, 3791bf215546Sopenharmony_ci const char *name); 3792bf215546Sopenharmony_ci/** creates a local variable and adds it to the list */ 3793bf215546Sopenharmony_cinir_variable *nir_local_variable_create(nir_function_impl *impl, 3794bf215546Sopenharmony_ci const struct glsl_type *type, 3795bf215546Sopenharmony_ci const char *name); 3796bf215546Sopenharmony_ci 3797bf215546Sopenharmony_cinir_variable *nir_find_variable_with_location(nir_shader *shader, 3798bf215546Sopenharmony_ci nir_variable_mode mode, 3799bf215546Sopenharmony_ci unsigned location); 3800bf215546Sopenharmony_ci 3801bf215546Sopenharmony_cinir_variable *nir_find_variable_with_driver_location(nir_shader *shader, 3802bf215546Sopenharmony_ci nir_variable_mode mode, 3803bf215546Sopenharmony_ci unsigned location); 3804bf215546Sopenharmony_ci 3805bf215546Sopenharmony_civoid nir_sort_variables_with_modes(nir_shader *shader, 3806bf215546Sopenharmony_ci int (*compar)(const nir_variable *, 3807bf215546Sopenharmony_ci const nir_variable *), 3808bf215546Sopenharmony_ci nir_variable_mode modes); 3809bf215546Sopenharmony_ci 3810bf215546Sopenharmony_ci/** creates a function and adds it to the shader's list of functions */ 3811bf215546Sopenharmony_cinir_function *nir_function_create(nir_shader *shader, const char *name); 3812bf215546Sopenharmony_ci 3813bf215546Sopenharmony_cinir_function_impl *nir_function_impl_create(nir_function *func); 3814bf215546Sopenharmony_ci/** creates a function_impl that isn't tied to any particular function */ 3815bf215546Sopenharmony_cinir_function_impl *nir_function_impl_create_bare(nir_shader *shader); 3816bf215546Sopenharmony_ci 3817bf215546Sopenharmony_cinir_block *nir_block_create(nir_shader *shader); 3818bf215546Sopenharmony_cinir_if *nir_if_create(nir_shader *shader); 3819bf215546Sopenharmony_cinir_loop *nir_loop_create(nir_shader *shader); 3820bf215546Sopenharmony_ci 3821bf215546Sopenharmony_cinir_function_impl *nir_cf_node_get_function(nir_cf_node *node); 3822bf215546Sopenharmony_ci 3823bf215546Sopenharmony_ci/** requests that the given pieces of metadata be generated */ 3824bf215546Sopenharmony_civoid nir_metadata_require(nir_function_impl *impl, nir_metadata required, ...); 3825bf215546Sopenharmony_ci/** dirties all but the preserved metadata */ 3826bf215546Sopenharmony_civoid nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved); 3827bf215546Sopenharmony_ci/** Preserves all metadata for the given shader */ 3828bf215546Sopenharmony_civoid nir_shader_preserve_all_metadata(nir_shader *shader); 3829bf215546Sopenharmony_ci 3830bf215546Sopenharmony_ci/** creates an instruction with default swizzle/writemask/etc. with NULL registers */ 3831bf215546Sopenharmony_cinir_alu_instr *nir_alu_instr_create(nir_shader *shader, nir_op op); 3832bf215546Sopenharmony_ci 3833bf215546Sopenharmony_cinir_deref_instr *nir_deref_instr_create(nir_shader *shader, 3834bf215546Sopenharmony_ci nir_deref_type deref_type); 3835bf215546Sopenharmony_ci 3836bf215546Sopenharmony_cinir_jump_instr *nir_jump_instr_create(nir_shader *shader, nir_jump_type type); 3837bf215546Sopenharmony_ci 3838bf215546Sopenharmony_cinir_load_const_instr *nir_load_const_instr_create(nir_shader *shader, 3839bf215546Sopenharmony_ci unsigned num_components, 3840bf215546Sopenharmony_ci unsigned bit_size); 3841bf215546Sopenharmony_ci 3842bf215546Sopenharmony_cinir_intrinsic_instr *nir_intrinsic_instr_create(nir_shader *shader, 3843bf215546Sopenharmony_ci nir_intrinsic_op op); 3844bf215546Sopenharmony_ci 3845bf215546Sopenharmony_cinir_call_instr *nir_call_instr_create(nir_shader *shader, 3846bf215546Sopenharmony_ci nir_function *callee); 3847bf215546Sopenharmony_ci 3848bf215546Sopenharmony_ci/** Creates a NIR texture instruction */ 3849bf215546Sopenharmony_cinir_tex_instr *nir_tex_instr_create(nir_shader *shader, unsigned num_srcs); 3850bf215546Sopenharmony_ci 3851bf215546Sopenharmony_cinir_phi_instr *nir_phi_instr_create(nir_shader *shader); 3852bf215546Sopenharmony_cinir_phi_src *nir_phi_instr_add_src(nir_phi_instr *instr, nir_block *pred, nir_src src); 3853bf215546Sopenharmony_ci 3854bf215546Sopenharmony_cinir_parallel_copy_instr *nir_parallel_copy_instr_create(nir_shader *shader); 3855bf215546Sopenharmony_ci 3856bf215546Sopenharmony_cinir_ssa_undef_instr *nir_ssa_undef_instr_create(nir_shader *shader, 3857bf215546Sopenharmony_ci unsigned num_components, 3858bf215546Sopenharmony_ci unsigned bit_size); 3859bf215546Sopenharmony_ci 3860bf215546Sopenharmony_cinir_const_value nir_alu_binop_identity(nir_op binop, unsigned bit_size); 3861bf215546Sopenharmony_ci 3862bf215546Sopenharmony_ci/** 3863bf215546Sopenharmony_ci * NIR Cursors and Instruction Insertion API 3864bf215546Sopenharmony_ci * @{ 3865bf215546Sopenharmony_ci * 3866bf215546Sopenharmony_ci * A tiny struct representing a point to insert/extract instructions or 3867bf215546Sopenharmony_ci * control flow nodes. Helps reduce the combinatorial explosion of possible 3868bf215546Sopenharmony_ci * points to insert/extract. 3869bf215546Sopenharmony_ci * 3870bf215546Sopenharmony_ci * \sa nir_control_flow.h 3871bf215546Sopenharmony_ci */ 3872bf215546Sopenharmony_citypedef enum { 3873bf215546Sopenharmony_ci nir_cursor_before_block, 3874bf215546Sopenharmony_ci nir_cursor_after_block, 3875bf215546Sopenharmony_ci nir_cursor_before_instr, 3876bf215546Sopenharmony_ci nir_cursor_after_instr, 3877bf215546Sopenharmony_ci} nir_cursor_option; 3878bf215546Sopenharmony_ci 3879bf215546Sopenharmony_citypedef struct { 3880bf215546Sopenharmony_ci nir_cursor_option option; 3881bf215546Sopenharmony_ci union { 3882bf215546Sopenharmony_ci nir_block *block; 3883bf215546Sopenharmony_ci nir_instr *instr; 3884bf215546Sopenharmony_ci }; 3885bf215546Sopenharmony_ci} nir_cursor; 3886bf215546Sopenharmony_ci 3887bf215546Sopenharmony_cistatic inline nir_block * 3888bf215546Sopenharmony_cinir_cursor_current_block(nir_cursor cursor) 3889bf215546Sopenharmony_ci{ 3890bf215546Sopenharmony_ci if (cursor.option == nir_cursor_before_instr || 3891bf215546Sopenharmony_ci cursor.option == nir_cursor_after_instr) { 3892bf215546Sopenharmony_ci return cursor.instr->block; 3893bf215546Sopenharmony_ci } else { 3894bf215546Sopenharmony_ci return cursor.block; 3895bf215546Sopenharmony_ci } 3896bf215546Sopenharmony_ci} 3897bf215546Sopenharmony_ci 3898bf215546Sopenharmony_cibool nir_cursors_equal(nir_cursor a, nir_cursor b); 3899bf215546Sopenharmony_ci 3900bf215546Sopenharmony_cistatic inline nir_cursor 3901bf215546Sopenharmony_cinir_before_block(nir_block *block) 3902bf215546Sopenharmony_ci{ 3903bf215546Sopenharmony_ci nir_cursor cursor; 3904bf215546Sopenharmony_ci cursor.option = nir_cursor_before_block; 3905bf215546Sopenharmony_ci cursor.block = block; 3906bf215546Sopenharmony_ci return cursor; 3907bf215546Sopenharmony_ci} 3908bf215546Sopenharmony_ci 3909bf215546Sopenharmony_cistatic inline nir_cursor 3910bf215546Sopenharmony_cinir_after_block(nir_block *block) 3911bf215546Sopenharmony_ci{ 3912bf215546Sopenharmony_ci nir_cursor cursor; 3913bf215546Sopenharmony_ci cursor.option = nir_cursor_after_block; 3914bf215546Sopenharmony_ci cursor.block = block; 3915bf215546Sopenharmony_ci return cursor; 3916bf215546Sopenharmony_ci} 3917bf215546Sopenharmony_ci 3918bf215546Sopenharmony_cistatic inline nir_cursor 3919bf215546Sopenharmony_cinir_before_instr(nir_instr *instr) 3920bf215546Sopenharmony_ci{ 3921bf215546Sopenharmony_ci nir_cursor cursor; 3922bf215546Sopenharmony_ci cursor.option = nir_cursor_before_instr; 3923bf215546Sopenharmony_ci cursor.instr = instr; 3924bf215546Sopenharmony_ci return cursor; 3925bf215546Sopenharmony_ci} 3926bf215546Sopenharmony_ci 3927bf215546Sopenharmony_cistatic inline nir_cursor 3928bf215546Sopenharmony_cinir_after_instr(nir_instr *instr) 3929bf215546Sopenharmony_ci{ 3930bf215546Sopenharmony_ci nir_cursor cursor; 3931bf215546Sopenharmony_ci cursor.option = nir_cursor_after_instr; 3932bf215546Sopenharmony_ci cursor.instr = instr; 3933bf215546Sopenharmony_ci return cursor; 3934bf215546Sopenharmony_ci} 3935bf215546Sopenharmony_ci 3936bf215546Sopenharmony_cistatic inline nir_cursor 3937bf215546Sopenharmony_cinir_before_block_after_phis(nir_block *block) 3938bf215546Sopenharmony_ci{ 3939bf215546Sopenharmony_ci nir_phi_instr *last_phi = nir_block_last_phi_instr(block); 3940bf215546Sopenharmony_ci if (last_phi) 3941bf215546Sopenharmony_ci return nir_after_instr(&last_phi->instr); 3942bf215546Sopenharmony_ci else 3943bf215546Sopenharmony_ci return nir_before_block(block); 3944bf215546Sopenharmony_ci} 3945bf215546Sopenharmony_ci 3946bf215546Sopenharmony_cistatic inline nir_cursor 3947bf215546Sopenharmony_cinir_after_block_before_jump(nir_block *block) 3948bf215546Sopenharmony_ci{ 3949bf215546Sopenharmony_ci nir_instr *last_instr = nir_block_last_instr(block); 3950bf215546Sopenharmony_ci if (last_instr && last_instr->type == nir_instr_type_jump) { 3951bf215546Sopenharmony_ci return nir_before_instr(last_instr); 3952bf215546Sopenharmony_ci } else { 3953bf215546Sopenharmony_ci return nir_after_block(block); 3954bf215546Sopenharmony_ci } 3955bf215546Sopenharmony_ci} 3956bf215546Sopenharmony_ci 3957bf215546Sopenharmony_cistatic inline nir_cursor 3958bf215546Sopenharmony_cinir_before_src(nir_src *src, bool is_if_condition) 3959bf215546Sopenharmony_ci{ 3960bf215546Sopenharmony_ci if (is_if_condition) { 3961bf215546Sopenharmony_ci nir_block *prev_block = 3962bf215546Sopenharmony_ci nir_cf_node_as_block(nir_cf_node_prev(&src->parent_if->cf_node)); 3963bf215546Sopenharmony_ci assert(!nir_block_ends_in_jump(prev_block)); 3964bf215546Sopenharmony_ci return nir_after_block(prev_block); 3965bf215546Sopenharmony_ci } else if (src->parent_instr->type == nir_instr_type_phi) { 3966bf215546Sopenharmony_ci#ifndef NDEBUG 3967bf215546Sopenharmony_ci nir_phi_instr *cond_phi = nir_instr_as_phi(src->parent_instr); 3968bf215546Sopenharmony_ci bool found = false; 3969bf215546Sopenharmony_ci nir_foreach_phi_src(phi_src, cond_phi) { 3970bf215546Sopenharmony_ci if (phi_src->src.ssa == src->ssa) { 3971bf215546Sopenharmony_ci found = true; 3972bf215546Sopenharmony_ci break; 3973bf215546Sopenharmony_ci } 3974bf215546Sopenharmony_ci } 3975bf215546Sopenharmony_ci assert(found); 3976bf215546Sopenharmony_ci#endif 3977bf215546Sopenharmony_ci /* The list_entry() macro is a generic container-of macro, it just happens 3978bf215546Sopenharmony_ci * to have a more specific name. 3979bf215546Sopenharmony_ci */ 3980bf215546Sopenharmony_ci nir_phi_src *phi_src = list_entry(src, nir_phi_src, src); 3981bf215546Sopenharmony_ci return nir_after_block_before_jump(phi_src->pred); 3982bf215546Sopenharmony_ci } else { 3983bf215546Sopenharmony_ci return nir_before_instr(src->parent_instr); 3984bf215546Sopenharmony_ci } 3985bf215546Sopenharmony_ci} 3986bf215546Sopenharmony_ci 3987bf215546Sopenharmony_cistatic inline nir_cursor 3988bf215546Sopenharmony_cinir_before_cf_node(nir_cf_node *node) 3989bf215546Sopenharmony_ci{ 3990bf215546Sopenharmony_ci if (node->type == nir_cf_node_block) 3991bf215546Sopenharmony_ci return nir_before_block(nir_cf_node_as_block(node)); 3992bf215546Sopenharmony_ci 3993bf215546Sopenharmony_ci return nir_after_block(nir_cf_node_as_block(nir_cf_node_prev(node))); 3994bf215546Sopenharmony_ci} 3995bf215546Sopenharmony_ci 3996bf215546Sopenharmony_cistatic inline nir_cursor 3997bf215546Sopenharmony_cinir_after_cf_node(nir_cf_node *node) 3998bf215546Sopenharmony_ci{ 3999bf215546Sopenharmony_ci if (node->type == nir_cf_node_block) 4000bf215546Sopenharmony_ci return nir_after_block(nir_cf_node_as_block(node)); 4001bf215546Sopenharmony_ci 4002bf215546Sopenharmony_ci return nir_before_block(nir_cf_node_as_block(nir_cf_node_next(node))); 4003bf215546Sopenharmony_ci} 4004bf215546Sopenharmony_ci 4005bf215546Sopenharmony_cistatic inline nir_cursor 4006bf215546Sopenharmony_cinir_after_phis(nir_block *block) 4007bf215546Sopenharmony_ci{ 4008bf215546Sopenharmony_ci nir_foreach_instr(instr, block) { 4009bf215546Sopenharmony_ci if (instr->type != nir_instr_type_phi) 4010bf215546Sopenharmony_ci return nir_before_instr(instr); 4011bf215546Sopenharmony_ci } 4012bf215546Sopenharmony_ci return nir_after_block(block); 4013bf215546Sopenharmony_ci} 4014bf215546Sopenharmony_ci 4015bf215546Sopenharmony_cistatic inline nir_cursor 4016bf215546Sopenharmony_cinir_after_instr_and_phis(nir_instr *instr) 4017bf215546Sopenharmony_ci{ 4018bf215546Sopenharmony_ci if (instr->type == nir_instr_type_phi) 4019bf215546Sopenharmony_ci return nir_after_phis(instr->block); 4020bf215546Sopenharmony_ci else 4021bf215546Sopenharmony_ci return nir_after_instr(instr); 4022bf215546Sopenharmony_ci} 4023bf215546Sopenharmony_ci 4024bf215546Sopenharmony_cistatic inline nir_cursor 4025bf215546Sopenharmony_cinir_after_cf_node_and_phis(nir_cf_node *node) 4026bf215546Sopenharmony_ci{ 4027bf215546Sopenharmony_ci if (node->type == nir_cf_node_block) 4028bf215546Sopenharmony_ci return nir_after_block(nir_cf_node_as_block(node)); 4029bf215546Sopenharmony_ci 4030bf215546Sopenharmony_ci nir_block *block = nir_cf_node_as_block(nir_cf_node_next(node)); 4031bf215546Sopenharmony_ci 4032bf215546Sopenharmony_ci return nir_after_phis(block); 4033bf215546Sopenharmony_ci} 4034bf215546Sopenharmony_ci 4035bf215546Sopenharmony_cistatic inline nir_cursor 4036bf215546Sopenharmony_cinir_before_cf_list(struct exec_list *cf_list) 4037bf215546Sopenharmony_ci{ 4038bf215546Sopenharmony_ci nir_cf_node *first_node = exec_node_data(nir_cf_node, 4039bf215546Sopenharmony_ci exec_list_get_head(cf_list), node); 4040bf215546Sopenharmony_ci return nir_before_cf_node(first_node); 4041bf215546Sopenharmony_ci} 4042bf215546Sopenharmony_ci 4043bf215546Sopenharmony_cistatic inline nir_cursor 4044bf215546Sopenharmony_cinir_after_cf_list(struct exec_list *cf_list) 4045bf215546Sopenharmony_ci{ 4046bf215546Sopenharmony_ci nir_cf_node *last_node = exec_node_data(nir_cf_node, 4047bf215546Sopenharmony_ci exec_list_get_tail(cf_list), node); 4048bf215546Sopenharmony_ci return nir_after_cf_node(last_node); 4049bf215546Sopenharmony_ci} 4050bf215546Sopenharmony_ci 4051bf215546Sopenharmony_ci/** 4052bf215546Sopenharmony_ci * Insert a NIR instruction at the given cursor. 4053bf215546Sopenharmony_ci * 4054bf215546Sopenharmony_ci * Note: This does not update the cursor. 4055bf215546Sopenharmony_ci */ 4056bf215546Sopenharmony_civoid nir_instr_insert(nir_cursor cursor, nir_instr *instr); 4057bf215546Sopenharmony_ci 4058bf215546Sopenharmony_cibool nir_instr_move(nir_cursor cursor, nir_instr *instr); 4059bf215546Sopenharmony_ci 4060bf215546Sopenharmony_cistatic inline void 4061bf215546Sopenharmony_cinir_instr_insert_before(nir_instr *instr, nir_instr *before) 4062bf215546Sopenharmony_ci{ 4063bf215546Sopenharmony_ci nir_instr_insert(nir_before_instr(instr), before); 4064bf215546Sopenharmony_ci} 4065bf215546Sopenharmony_ci 4066bf215546Sopenharmony_cistatic inline void 4067bf215546Sopenharmony_cinir_instr_insert_after(nir_instr *instr, nir_instr *after) 4068bf215546Sopenharmony_ci{ 4069bf215546Sopenharmony_ci nir_instr_insert(nir_after_instr(instr), after); 4070bf215546Sopenharmony_ci} 4071bf215546Sopenharmony_ci 4072bf215546Sopenharmony_cistatic inline void 4073bf215546Sopenharmony_cinir_instr_insert_before_block(nir_block *block, nir_instr *before) 4074bf215546Sopenharmony_ci{ 4075bf215546Sopenharmony_ci nir_instr_insert(nir_before_block(block), before); 4076bf215546Sopenharmony_ci} 4077bf215546Sopenharmony_ci 4078bf215546Sopenharmony_cistatic inline void 4079bf215546Sopenharmony_cinir_instr_insert_after_block(nir_block *block, nir_instr *after) 4080bf215546Sopenharmony_ci{ 4081bf215546Sopenharmony_ci nir_instr_insert(nir_after_block(block), after); 4082bf215546Sopenharmony_ci} 4083bf215546Sopenharmony_ci 4084bf215546Sopenharmony_cistatic inline void 4085bf215546Sopenharmony_cinir_instr_insert_before_cf(nir_cf_node *node, nir_instr *before) 4086bf215546Sopenharmony_ci{ 4087bf215546Sopenharmony_ci nir_instr_insert(nir_before_cf_node(node), before); 4088bf215546Sopenharmony_ci} 4089bf215546Sopenharmony_ci 4090bf215546Sopenharmony_cistatic inline void 4091bf215546Sopenharmony_cinir_instr_insert_after_cf(nir_cf_node *node, nir_instr *after) 4092bf215546Sopenharmony_ci{ 4093bf215546Sopenharmony_ci nir_instr_insert(nir_after_cf_node(node), after); 4094bf215546Sopenharmony_ci} 4095bf215546Sopenharmony_ci 4096bf215546Sopenharmony_cistatic inline void 4097bf215546Sopenharmony_cinir_instr_insert_before_cf_list(struct exec_list *list, nir_instr *before) 4098bf215546Sopenharmony_ci{ 4099bf215546Sopenharmony_ci nir_instr_insert(nir_before_cf_list(list), before); 4100bf215546Sopenharmony_ci} 4101bf215546Sopenharmony_ci 4102bf215546Sopenharmony_cistatic inline void 4103bf215546Sopenharmony_cinir_instr_insert_after_cf_list(struct exec_list *list, nir_instr *after) 4104bf215546Sopenharmony_ci{ 4105bf215546Sopenharmony_ci nir_instr_insert(nir_after_cf_list(list), after); 4106bf215546Sopenharmony_ci} 4107bf215546Sopenharmony_ci 4108bf215546Sopenharmony_civoid nir_instr_remove_v(nir_instr *instr); 4109bf215546Sopenharmony_civoid nir_instr_free(nir_instr *instr); 4110bf215546Sopenharmony_civoid nir_instr_free_list(struct exec_list *list); 4111bf215546Sopenharmony_ci 4112bf215546Sopenharmony_cistatic inline nir_cursor 4113bf215546Sopenharmony_cinir_instr_remove(nir_instr *instr) 4114bf215546Sopenharmony_ci{ 4115bf215546Sopenharmony_ci nir_cursor cursor; 4116bf215546Sopenharmony_ci nir_instr *prev = nir_instr_prev(instr); 4117bf215546Sopenharmony_ci if (prev) { 4118bf215546Sopenharmony_ci cursor = nir_after_instr(prev); 4119bf215546Sopenharmony_ci } else { 4120bf215546Sopenharmony_ci cursor = nir_before_block(instr->block); 4121bf215546Sopenharmony_ci } 4122bf215546Sopenharmony_ci nir_instr_remove_v(instr); 4123bf215546Sopenharmony_ci return cursor; 4124bf215546Sopenharmony_ci} 4125bf215546Sopenharmony_ci 4126bf215546Sopenharmony_cinir_cursor nir_instr_free_and_dce(nir_instr *instr); 4127bf215546Sopenharmony_ci 4128bf215546Sopenharmony_ci/** @} */ 4129bf215546Sopenharmony_ci 4130bf215546Sopenharmony_cinir_ssa_def *nir_instr_ssa_def(nir_instr *instr); 4131bf215546Sopenharmony_cibool nir_instr_def_is_register(nir_instr *instr); 4132bf215546Sopenharmony_ci 4133bf215546Sopenharmony_citypedef bool (*nir_foreach_ssa_def_cb)(nir_ssa_def *def, void *state); 4134bf215546Sopenharmony_citypedef bool (*nir_foreach_dest_cb)(nir_dest *dest, void *state); 4135bf215546Sopenharmony_citypedef bool (*nir_foreach_src_cb)(nir_src *src, void *state); 4136bf215546Sopenharmony_cibool nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, 4137bf215546Sopenharmony_ci void *state); 4138bf215546Sopenharmony_cistatic inline bool nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state); 4139bf215546Sopenharmony_cistatic inline bool nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state); 4140bf215546Sopenharmony_cibool nir_foreach_phi_src_leaving_block(nir_block *instr, 4141bf215546Sopenharmony_ci nir_foreach_src_cb cb, 4142bf215546Sopenharmony_ci void *state); 4143bf215546Sopenharmony_ci 4144bf215546Sopenharmony_cinir_const_value *nir_src_as_const_value(nir_src src); 4145bf215546Sopenharmony_ci 4146bf215546Sopenharmony_ci#define NIR_SRC_AS_(name, c_type, type_enum, cast_macro) \ 4147bf215546Sopenharmony_cistatic inline c_type * \ 4148bf215546Sopenharmony_cinir_src_as_ ## name (nir_src src) \ 4149bf215546Sopenharmony_ci{ \ 4150bf215546Sopenharmony_ci return src.is_ssa && src.ssa->parent_instr->type == type_enum \ 4151bf215546Sopenharmony_ci ? cast_macro(src.ssa->parent_instr) : NULL; \ 4152bf215546Sopenharmony_ci} 4153bf215546Sopenharmony_ci 4154bf215546Sopenharmony_ciNIR_SRC_AS_(alu_instr, nir_alu_instr, nir_instr_type_alu, nir_instr_as_alu) 4155bf215546Sopenharmony_ciNIR_SRC_AS_(intrinsic, nir_intrinsic_instr, 4156bf215546Sopenharmony_ci nir_instr_type_intrinsic, nir_instr_as_intrinsic) 4157bf215546Sopenharmony_ciNIR_SRC_AS_(deref, nir_deref_instr, nir_instr_type_deref, nir_instr_as_deref) 4158bf215546Sopenharmony_ci 4159bf215546Sopenharmony_cibool nir_src_is_always_uniform(nir_src src); 4160bf215546Sopenharmony_cibool nir_srcs_equal(nir_src src1, nir_src src2); 4161bf215546Sopenharmony_cibool nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2); 4162bf215546Sopenharmony_ci 4163bf215546Sopenharmony_cistatic inline void 4164bf215546Sopenharmony_cinir_instr_rewrite_src_ssa(ASSERTED nir_instr *instr, 4165bf215546Sopenharmony_ci nir_src *src, nir_ssa_def *new_ssa) 4166bf215546Sopenharmony_ci{ 4167bf215546Sopenharmony_ci assert(src->parent_instr == instr); 4168bf215546Sopenharmony_ci assert(src->is_ssa && src->ssa); 4169bf215546Sopenharmony_ci list_del(&src->use_link); 4170bf215546Sopenharmony_ci src->ssa = new_ssa; 4171bf215546Sopenharmony_ci list_addtail(&src->use_link, &new_ssa->uses); 4172bf215546Sopenharmony_ci} 4173bf215546Sopenharmony_ci 4174bf215546Sopenharmony_civoid nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src); 4175bf215546Sopenharmony_civoid nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src); 4176bf215546Sopenharmony_ci 4177bf215546Sopenharmony_cistatic inline void 4178bf215546Sopenharmony_cinir_if_rewrite_condition_ssa(ASSERTED nir_if *if_stmt, 4179bf215546Sopenharmony_ci nir_src *src, nir_ssa_def *new_ssa) 4180bf215546Sopenharmony_ci{ 4181bf215546Sopenharmony_ci assert(src->parent_if == if_stmt); 4182bf215546Sopenharmony_ci assert(src->is_ssa && src->ssa); 4183bf215546Sopenharmony_ci list_del(&src->use_link); 4184bf215546Sopenharmony_ci src->ssa = new_ssa; 4185bf215546Sopenharmony_ci list_addtail(&src->use_link, &new_ssa->if_uses); 4186bf215546Sopenharmony_ci} 4187bf215546Sopenharmony_ci 4188bf215546Sopenharmony_civoid nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src); 4189bf215546Sopenharmony_civoid nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest, 4190bf215546Sopenharmony_ci nir_dest new_dest); 4191bf215546Sopenharmony_ci 4192bf215546Sopenharmony_civoid nir_ssa_dest_init(nir_instr *instr, nir_dest *dest, 4193bf215546Sopenharmony_ci unsigned num_components, unsigned bit_size, 4194bf215546Sopenharmony_ci const char *name); 4195bf215546Sopenharmony_civoid nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def, 4196bf215546Sopenharmony_ci unsigned num_components, unsigned bit_size); 4197bf215546Sopenharmony_cistatic inline void 4198bf215546Sopenharmony_cinir_ssa_dest_init_for_type(nir_instr *instr, nir_dest *dest, 4199bf215546Sopenharmony_ci const struct glsl_type *type, 4200bf215546Sopenharmony_ci const char *name) 4201bf215546Sopenharmony_ci{ 4202bf215546Sopenharmony_ci assert(glsl_type_is_vector_or_scalar(type)); 4203bf215546Sopenharmony_ci nir_ssa_dest_init(instr, dest, glsl_get_components(type), 4204bf215546Sopenharmony_ci glsl_get_bit_size(type), name); 4205bf215546Sopenharmony_ci} 4206bf215546Sopenharmony_civoid nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_ssa_def *new_ssa); 4207bf215546Sopenharmony_civoid nir_ssa_def_rewrite_uses_src(nir_ssa_def *def, nir_src new_src); 4208bf215546Sopenharmony_civoid nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_ssa_def *new_ssa, 4209bf215546Sopenharmony_ci nir_instr *after_me); 4210bf215546Sopenharmony_ci 4211bf215546Sopenharmony_cinir_component_mask_t nir_src_components_read(const nir_src *src); 4212bf215546Sopenharmony_cinir_component_mask_t nir_ssa_def_components_read(const nir_ssa_def *def); 4213bf215546Sopenharmony_ci 4214bf215546Sopenharmony_cistatic inline bool 4215bf215546Sopenharmony_cinir_ssa_def_is_unused(nir_ssa_def *ssa) 4216bf215546Sopenharmony_ci{ 4217bf215546Sopenharmony_ci return list_is_empty(&ssa->uses) && list_is_empty(&ssa->if_uses); 4218bf215546Sopenharmony_ci} 4219bf215546Sopenharmony_ci 4220bf215546Sopenharmony_ci 4221bf215546Sopenharmony_ci/** Returns the next block, disregarding structure 4222bf215546Sopenharmony_ci * 4223bf215546Sopenharmony_ci * The ordering is deterministic but has no guarantees beyond that. In 4224bf215546Sopenharmony_ci * particular, it is not guaranteed to be dominance-preserving. 4225bf215546Sopenharmony_ci */ 4226bf215546Sopenharmony_cinir_block *nir_block_unstructured_next(nir_block *block); 4227bf215546Sopenharmony_cinir_block *nir_unstructured_start_block(nir_function_impl *impl); 4228bf215546Sopenharmony_ci 4229bf215546Sopenharmony_ci#define nir_foreach_block_unstructured(block, impl) \ 4230bf215546Sopenharmony_ci for (nir_block *block = nir_unstructured_start_block(impl); block != NULL; \ 4231bf215546Sopenharmony_ci block = nir_block_unstructured_next(block)) 4232bf215546Sopenharmony_ci 4233bf215546Sopenharmony_ci#define nir_foreach_block_unstructured_safe(block, impl) \ 4234bf215546Sopenharmony_ci for (nir_block *block = nir_unstructured_start_block(impl), \ 4235bf215546Sopenharmony_ci *next = nir_block_unstructured_next(block); \ 4236bf215546Sopenharmony_ci block != NULL; \ 4237bf215546Sopenharmony_ci block = next, next = nir_block_unstructured_next(block)) 4238bf215546Sopenharmony_ci 4239bf215546Sopenharmony_ci/* 4240bf215546Sopenharmony_ci * finds the next basic block in source-code order, returns NULL if there is 4241bf215546Sopenharmony_ci * none 4242bf215546Sopenharmony_ci */ 4243bf215546Sopenharmony_ci 4244bf215546Sopenharmony_cinir_block *nir_block_cf_tree_next(nir_block *block); 4245bf215546Sopenharmony_ci 4246bf215546Sopenharmony_ci/* Performs the opposite of nir_block_cf_tree_next() */ 4247bf215546Sopenharmony_ci 4248bf215546Sopenharmony_cinir_block *nir_block_cf_tree_prev(nir_block *block); 4249bf215546Sopenharmony_ci 4250bf215546Sopenharmony_ci/* Gets the first block in a CF node in source-code order */ 4251bf215546Sopenharmony_ci 4252bf215546Sopenharmony_cinir_block *nir_cf_node_cf_tree_first(nir_cf_node *node); 4253bf215546Sopenharmony_ci 4254bf215546Sopenharmony_ci/* Gets the last block in a CF node in source-code order */ 4255bf215546Sopenharmony_ci 4256bf215546Sopenharmony_cinir_block *nir_cf_node_cf_tree_last(nir_cf_node *node); 4257bf215546Sopenharmony_ci 4258bf215546Sopenharmony_ci/* Gets the next block after a CF node in source-code order */ 4259bf215546Sopenharmony_ci 4260bf215546Sopenharmony_cinir_block *nir_cf_node_cf_tree_next(nir_cf_node *node); 4261bf215546Sopenharmony_ci 4262bf215546Sopenharmony_ci/* Macros for loops that visit blocks in source-code order */ 4263bf215546Sopenharmony_ci 4264bf215546Sopenharmony_ci#define nir_foreach_block(block, impl) \ 4265bf215546Sopenharmony_ci for (nir_block *block = nir_start_block(impl); block != NULL; \ 4266bf215546Sopenharmony_ci block = nir_block_cf_tree_next(block)) 4267bf215546Sopenharmony_ci 4268bf215546Sopenharmony_ci#define nir_foreach_block_safe(block, impl) \ 4269bf215546Sopenharmony_ci for (nir_block *block = nir_start_block(impl), \ 4270bf215546Sopenharmony_ci *next = nir_block_cf_tree_next(block); \ 4271bf215546Sopenharmony_ci block != NULL; \ 4272bf215546Sopenharmony_ci block = next, next = nir_block_cf_tree_next(block)) 4273bf215546Sopenharmony_ci 4274bf215546Sopenharmony_ci#define nir_foreach_block_reverse(block, impl) \ 4275bf215546Sopenharmony_ci for (nir_block *block = nir_impl_last_block(impl); block != NULL; \ 4276bf215546Sopenharmony_ci block = nir_block_cf_tree_prev(block)) 4277bf215546Sopenharmony_ci 4278bf215546Sopenharmony_ci#define nir_foreach_block_reverse_safe(block, impl) \ 4279bf215546Sopenharmony_ci for (nir_block *block = nir_impl_last_block(impl), \ 4280bf215546Sopenharmony_ci *prev = nir_block_cf_tree_prev(block); \ 4281bf215546Sopenharmony_ci block != NULL; \ 4282bf215546Sopenharmony_ci block = prev, prev = nir_block_cf_tree_prev(block)) 4283bf215546Sopenharmony_ci 4284bf215546Sopenharmony_ci#define nir_foreach_block_in_cf_node(block, node) \ 4285bf215546Sopenharmony_ci for (nir_block *block = nir_cf_node_cf_tree_first(node); \ 4286bf215546Sopenharmony_ci block != nir_cf_node_cf_tree_next(node); \ 4287bf215546Sopenharmony_ci block = nir_block_cf_tree_next(block)) 4288bf215546Sopenharmony_ci 4289bf215546Sopenharmony_ci/* If the following CF node is an if, this function returns that if. 4290bf215546Sopenharmony_ci * Otherwise, it returns NULL. 4291bf215546Sopenharmony_ci */ 4292bf215546Sopenharmony_cinir_if *nir_block_get_following_if(nir_block *block); 4293bf215546Sopenharmony_ci 4294bf215546Sopenharmony_cinir_loop *nir_block_get_following_loop(nir_block *block); 4295bf215546Sopenharmony_ci 4296bf215546Sopenharmony_cinir_block **nir_block_get_predecessors_sorted(const nir_block *block, void *mem_ctx); 4297bf215546Sopenharmony_ci 4298bf215546Sopenharmony_civoid nir_index_local_regs(nir_function_impl *impl); 4299bf215546Sopenharmony_civoid nir_index_ssa_defs(nir_function_impl *impl); 4300bf215546Sopenharmony_ciunsigned nir_index_instrs(nir_function_impl *impl); 4301bf215546Sopenharmony_ci 4302bf215546Sopenharmony_civoid nir_index_blocks(nir_function_impl *impl); 4303bf215546Sopenharmony_ci 4304bf215546Sopenharmony_ciunsigned nir_shader_index_vars(nir_shader *shader, nir_variable_mode modes); 4305bf215546Sopenharmony_ciunsigned nir_function_impl_index_vars(nir_function_impl *impl); 4306bf215546Sopenharmony_ci 4307bf215546Sopenharmony_civoid nir_print_shader(nir_shader *shader, FILE *fp); 4308bf215546Sopenharmony_civoid nir_print_shader_annotated(nir_shader *shader, FILE *fp, struct hash_table *errors); 4309bf215546Sopenharmony_civoid nir_print_instr(const nir_instr *instr, FILE *fp); 4310bf215546Sopenharmony_civoid nir_print_deref(const nir_deref_instr *deref, FILE *fp); 4311bf215546Sopenharmony_civoid nir_log_shader_annotated_tagged(enum mesa_log_level level, const char *tag, nir_shader *shader, struct hash_table *annotations); 4312bf215546Sopenharmony_ci#define nir_log_shadere(s) nir_log_shader_annotated_tagged(MESA_LOG_ERROR, (MESA_LOG_TAG), (s), NULL) 4313bf215546Sopenharmony_ci#define nir_log_shaderw(s) nir_log_shader_annotated_tagged(MESA_LOG_WARN, (MESA_LOG_TAG), (s), NULL) 4314bf215546Sopenharmony_ci#define nir_log_shaderi(s) nir_log_shader_annotated_tagged(MESA_LOG_INFO, (MESA_LOG_TAG), (s), NULL) 4315bf215546Sopenharmony_ci#define nir_log_shader_annotated(s, annotations) nir_log_shader_annotated_tagged(MESA_LOG_ERROR, (MESA_LOG_TAG), (s), annotations) 4316bf215546Sopenharmony_ci 4317bf215546Sopenharmony_cichar *nir_shader_as_str(nir_shader *nir, void *mem_ctx); 4318bf215546Sopenharmony_cichar *nir_shader_as_str_annotated(nir_shader *nir, struct hash_table *annotations, void *mem_ctx); 4319bf215546Sopenharmony_ci 4320bf215546Sopenharmony_ci/** Shallow clone of a single instruction. */ 4321bf215546Sopenharmony_cinir_instr *nir_instr_clone(nir_shader *s, const nir_instr *orig); 4322bf215546Sopenharmony_ci 4323bf215546Sopenharmony_ci/** Clone a single instruction, including a remap table to rewrite sources. */ 4324bf215546Sopenharmony_cinir_instr *nir_instr_clone_deep(nir_shader *s, const nir_instr *orig, 4325bf215546Sopenharmony_ci struct hash_table *remap_table); 4326bf215546Sopenharmony_ci 4327bf215546Sopenharmony_ci/** Shallow clone of a single ALU instruction. */ 4328bf215546Sopenharmony_cinir_alu_instr *nir_alu_instr_clone(nir_shader *s, const nir_alu_instr *orig); 4329bf215546Sopenharmony_ci 4330bf215546Sopenharmony_cinir_shader *nir_shader_clone(void *mem_ctx, const nir_shader *s); 4331bf215546Sopenharmony_cinir_function_impl *nir_function_impl_clone(nir_shader *shader, 4332bf215546Sopenharmony_ci const nir_function_impl *fi); 4333bf215546Sopenharmony_cinir_constant *nir_constant_clone(const nir_constant *c, nir_variable *var); 4334bf215546Sopenharmony_cinir_variable *nir_variable_clone(const nir_variable *c, nir_shader *shader); 4335bf215546Sopenharmony_ci 4336bf215546Sopenharmony_civoid nir_shader_replace(nir_shader *dest, nir_shader *src); 4337bf215546Sopenharmony_ci 4338bf215546Sopenharmony_civoid nir_shader_serialize_deserialize(nir_shader *s); 4339bf215546Sopenharmony_ci 4340bf215546Sopenharmony_ci#ifndef NDEBUG 4341bf215546Sopenharmony_civoid nir_validate_shader(nir_shader *shader, const char *when); 4342bf215546Sopenharmony_civoid nir_validate_ssa_dominance(nir_shader *shader, const char *when); 4343bf215546Sopenharmony_civoid nir_metadata_set_validation_flag(nir_shader *shader); 4344bf215546Sopenharmony_civoid nir_metadata_check_validation_flag(nir_shader *shader); 4345bf215546Sopenharmony_ci 4346bf215546Sopenharmony_cistatic inline bool 4347bf215546Sopenharmony_cishould_skip_nir(const char *name) 4348bf215546Sopenharmony_ci{ 4349bf215546Sopenharmony_ci static const char *list = NULL; 4350bf215546Sopenharmony_ci if (!list) { 4351bf215546Sopenharmony_ci /* Comma separated list of names to skip. */ 4352bf215546Sopenharmony_ci list = getenv("NIR_SKIP"); 4353bf215546Sopenharmony_ci if (!list) 4354bf215546Sopenharmony_ci list = ""; 4355bf215546Sopenharmony_ci } 4356bf215546Sopenharmony_ci 4357bf215546Sopenharmony_ci if (!list[0]) 4358bf215546Sopenharmony_ci return false; 4359bf215546Sopenharmony_ci 4360bf215546Sopenharmony_ci return comma_separated_list_contains(list, name); 4361bf215546Sopenharmony_ci} 4362bf215546Sopenharmony_ci 4363bf215546Sopenharmony_cistatic inline bool 4364bf215546Sopenharmony_cishould_print_nir(nir_shader *shader) 4365bf215546Sopenharmony_ci{ 4366bf215546Sopenharmony_ci if (shader->info.internal || 4367bf215546Sopenharmony_ci shader->info.stage < 0 || 4368bf215546Sopenharmony_ci shader->info.stage > MESA_SHADER_KERNEL) 4369bf215546Sopenharmony_ci return false; 4370bf215546Sopenharmony_ci 4371bf215546Sopenharmony_ci return unlikely(nir_debug_print_shader[shader->info.stage]); 4372bf215546Sopenharmony_ci} 4373bf215546Sopenharmony_ci#else 4374bf215546Sopenharmony_cistatic inline void nir_validate_shader(nir_shader *shader, const char *when) { (void) shader; (void)when; } 4375bf215546Sopenharmony_cistatic inline void nir_validate_ssa_dominance(nir_shader *shader, const char *when) { (void) shader; (void)when; } 4376bf215546Sopenharmony_cistatic inline void nir_metadata_set_validation_flag(nir_shader *shader) { (void) shader; } 4377bf215546Sopenharmony_cistatic inline void nir_metadata_check_validation_flag(nir_shader *shader) { (void) shader; } 4378bf215546Sopenharmony_cistatic inline bool should_skip_nir(UNUSED const char *pass_name) { return false; } 4379bf215546Sopenharmony_cistatic inline bool should_print_nir(UNUSED nir_shader *shader) { return false; } 4380bf215546Sopenharmony_ci#endif /* NDEBUG */ 4381bf215546Sopenharmony_ci 4382bf215546Sopenharmony_ci#define _PASS(pass, nir, do_pass) do { \ 4383bf215546Sopenharmony_ci if (should_skip_nir(#pass)) { \ 4384bf215546Sopenharmony_ci printf("skipping %s\n", #pass); \ 4385bf215546Sopenharmony_ci break; \ 4386bf215546Sopenharmony_ci } \ 4387bf215546Sopenharmony_ci do_pass \ 4388bf215546Sopenharmony_ci if (NIR_DEBUG(CLONE)) { \ 4389bf215546Sopenharmony_ci nir_shader *clone = nir_shader_clone(ralloc_parent(nir), nir); \ 4390bf215546Sopenharmony_ci nir_shader_replace(nir, clone); \ 4391bf215546Sopenharmony_ci } \ 4392bf215546Sopenharmony_ci if (NIR_DEBUG(SERIALIZE)) { \ 4393bf215546Sopenharmony_ci nir_shader_serialize_deserialize(nir); \ 4394bf215546Sopenharmony_ci } \ 4395bf215546Sopenharmony_ci} while (0) 4396bf215546Sopenharmony_ci 4397bf215546Sopenharmony_ci#define NIR_PASS(progress, nir, pass, ...) _PASS(pass, nir, \ 4398bf215546Sopenharmony_ci nir_metadata_set_validation_flag(nir); \ 4399bf215546Sopenharmony_ci if (should_print_nir(nir)) \ 4400bf215546Sopenharmony_ci printf("%s\n", #pass); \ 4401bf215546Sopenharmony_ci if (pass(nir, ##__VA_ARGS__)) { \ 4402bf215546Sopenharmony_ci nir_validate_shader(nir, "after " #pass " in " __FILE__); \ 4403bf215546Sopenharmony_ci UNUSED bool _; \ 4404bf215546Sopenharmony_ci progress = true; \ 4405bf215546Sopenharmony_ci if (should_print_nir(nir)) \ 4406bf215546Sopenharmony_ci nir_print_shader(nir, stdout); \ 4407bf215546Sopenharmony_ci nir_metadata_check_validation_flag(nir); \ 4408bf215546Sopenharmony_ci } \ 4409bf215546Sopenharmony_ci) 4410bf215546Sopenharmony_ci 4411bf215546Sopenharmony_ci#define NIR_PASS_V(nir, pass, ...) _PASS(pass, nir, \ 4412bf215546Sopenharmony_ci if (should_print_nir(nir)) \ 4413bf215546Sopenharmony_ci printf("%s\n", #pass); \ 4414bf215546Sopenharmony_ci pass(nir, ##__VA_ARGS__); \ 4415bf215546Sopenharmony_ci nir_validate_shader(nir, "after " #pass " in " __FILE__); \ 4416bf215546Sopenharmony_ci if (should_print_nir(nir)) \ 4417bf215546Sopenharmony_ci nir_print_shader(nir, stdout); \ 4418bf215546Sopenharmony_ci) 4419bf215546Sopenharmony_ci 4420bf215546Sopenharmony_ci#define NIR_SKIP(name) should_skip_nir(#name) 4421bf215546Sopenharmony_ci 4422bf215546Sopenharmony_ci/** An instruction filtering callback with writemask 4423bf215546Sopenharmony_ci * 4424bf215546Sopenharmony_ci * Returns true if the instruction should be processed with the associated 4425bf215546Sopenharmony_ci * writemask and false otherwise. 4426bf215546Sopenharmony_ci */ 4427bf215546Sopenharmony_citypedef bool (*nir_instr_writemask_filter_cb)(const nir_instr *, 4428bf215546Sopenharmony_ci unsigned writemask, const void *); 4429bf215546Sopenharmony_ci 4430bf215546Sopenharmony_ci/** A simple instruction lowering callback 4431bf215546Sopenharmony_ci * 4432bf215546Sopenharmony_ci * Many instruction lowering passes can be written as a simple function which 4433bf215546Sopenharmony_ci * takes an instruction as its input and returns a sequence of instructions 4434bf215546Sopenharmony_ci * that implement the consumed instruction. This function type represents 4435bf215546Sopenharmony_ci * such a lowering function. When called, a function with this prototype 4436bf215546Sopenharmony_ci * should either return NULL indicating that no lowering needs to be done or 4437bf215546Sopenharmony_ci * emit a sequence of instructions using the provided builder (whose cursor 4438bf215546Sopenharmony_ci * will already be placed after the instruction to be lowered) and return the 4439bf215546Sopenharmony_ci * resulting nir_ssa_def. 4440bf215546Sopenharmony_ci */ 4441bf215546Sopenharmony_citypedef nir_ssa_def *(*nir_lower_instr_cb)(struct nir_builder *, 4442bf215546Sopenharmony_ci nir_instr *, void *); 4443bf215546Sopenharmony_ci 4444bf215546Sopenharmony_ci/** 4445bf215546Sopenharmony_ci * Special return value for nir_lower_instr_cb when some progress occurred 4446bf215546Sopenharmony_ci * (like changing an input to the instr) that didn't result in a replacement 4447bf215546Sopenharmony_ci * SSA def being generated. 4448bf215546Sopenharmony_ci */ 4449bf215546Sopenharmony_ci#define NIR_LOWER_INSTR_PROGRESS ((nir_ssa_def *)(uintptr_t)1) 4450bf215546Sopenharmony_ci 4451bf215546Sopenharmony_ci/** 4452bf215546Sopenharmony_ci * Special return value for nir_lower_instr_cb when some progress occurred 4453bf215546Sopenharmony_ci * that should remove the current instruction that doesn't create an output 4454bf215546Sopenharmony_ci * (like a store) 4455bf215546Sopenharmony_ci */ 4456bf215546Sopenharmony_ci 4457bf215546Sopenharmony_ci#define NIR_LOWER_INSTR_PROGRESS_REPLACE ((nir_ssa_def *)(uintptr_t)2) 4458bf215546Sopenharmony_ci 4459bf215546Sopenharmony_ci/** Iterate over all the instructions in a nir_function_impl and lower them 4460bf215546Sopenharmony_ci * using the provided callbacks 4461bf215546Sopenharmony_ci * 4462bf215546Sopenharmony_ci * This function implements the guts of a standard lowering pass for you. It 4463bf215546Sopenharmony_ci * iterates over all of the instructions in a nir_function_impl and calls the 4464bf215546Sopenharmony_ci * filter callback on each one. If the filter callback returns true, it then 4465bf215546Sopenharmony_ci * calls the lowering call back on the instruction. (Splitting it this way 4466bf215546Sopenharmony_ci * allows us to avoid some save/restore work for instructions we know won't be 4467bf215546Sopenharmony_ci * lowered.) If the instruction is dead after the lowering is complete, it 4468bf215546Sopenharmony_ci * will be removed. If new instructions are added, the lowering callback will 4469bf215546Sopenharmony_ci * also be called on them in case multiple lowerings are required. 4470bf215546Sopenharmony_ci * 4471bf215546Sopenharmony_ci * If the callback indicates that the original instruction is replaced (either 4472bf215546Sopenharmony_ci * through a new SSA def or NIR_LOWER_INSTR_PROGRESS_REPLACE), then the 4473bf215546Sopenharmony_ci * instruction is removed along with any now-dead SSA defs it used. 4474bf215546Sopenharmony_ci * 4475bf215546Sopenharmony_ci * The metadata for the nir_function_impl will also be updated. If any blocks 4476bf215546Sopenharmony_ci * are added (they cannot be removed), dominance and block indices will be 4477bf215546Sopenharmony_ci * invalidated. 4478bf215546Sopenharmony_ci */ 4479bf215546Sopenharmony_cibool nir_function_impl_lower_instructions(nir_function_impl *impl, 4480bf215546Sopenharmony_ci nir_instr_filter_cb filter, 4481bf215546Sopenharmony_ci nir_lower_instr_cb lower, 4482bf215546Sopenharmony_ci void *cb_data); 4483bf215546Sopenharmony_cibool nir_shader_lower_instructions(nir_shader *shader, 4484bf215546Sopenharmony_ci nir_instr_filter_cb filter, 4485bf215546Sopenharmony_ci nir_lower_instr_cb lower, 4486bf215546Sopenharmony_ci void *cb_data); 4487bf215546Sopenharmony_ci 4488bf215546Sopenharmony_civoid nir_calc_dominance_impl(nir_function_impl *impl); 4489bf215546Sopenharmony_civoid nir_calc_dominance(nir_shader *shader); 4490bf215546Sopenharmony_ci 4491bf215546Sopenharmony_cinir_block *nir_dominance_lca(nir_block *b1, nir_block *b2); 4492bf215546Sopenharmony_cibool nir_block_dominates(nir_block *parent, nir_block *child); 4493bf215546Sopenharmony_cibool nir_block_is_unreachable(nir_block *block); 4494bf215546Sopenharmony_ci 4495bf215546Sopenharmony_civoid nir_dump_dom_tree_impl(nir_function_impl *impl, FILE *fp); 4496bf215546Sopenharmony_civoid nir_dump_dom_tree(nir_shader *shader, FILE *fp); 4497bf215546Sopenharmony_ci 4498bf215546Sopenharmony_civoid nir_dump_dom_frontier_impl(nir_function_impl *impl, FILE *fp); 4499bf215546Sopenharmony_civoid nir_dump_dom_frontier(nir_shader *shader, FILE *fp); 4500bf215546Sopenharmony_ci 4501bf215546Sopenharmony_civoid nir_dump_cfg_impl(nir_function_impl *impl, FILE *fp); 4502bf215546Sopenharmony_civoid nir_dump_cfg(nir_shader *shader, FILE *fp); 4503bf215546Sopenharmony_ci 4504bf215546Sopenharmony_civoid nir_gs_count_vertices_and_primitives(const nir_shader *shader, 4505bf215546Sopenharmony_ci int *out_vtxcnt, 4506bf215546Sopenharmony_ci int *out_prmcnt, 4507bf215546Sopenharmony_ci unsigned num_streams); 4508bf215546Sopenharmony_ci 4509bf215546Sopenharmony_citypedef enum { 4510bf215546Sopenharmony_ci nir_group_all, 4511bf215546Sopenharmony_ci nir_group_same_resource_only, 4512bf215546Sopenharmony_ci} nir_load_grouping; 4513bf215546Sopenharmony_ci 4514bf215546Sopenharmony_civoid nir_group_loads(nir_shader *shader, nir_load_grouping grouping, 4515bf215546Sopenharmony_ci unsigned max_distance); 4516bf215546Sopenharmony_ci 4517bf215546Sopenharmony_cibool nir_shrink_vec_array_vars(nir_shader *shader, nir_variable_mode modes); 4518bf215546Sopenharmony_cibool nir_split_array_vars(nir_shader *shader, nir_variable_mode modes); 4519bf215546Sopenharmony_cibool nir_split_var_copies(nir_shader *shader); 4520bf215546Sopenharmony_cibool nir_split_per_member_structs(nir_shader *shader); 4521bf215546Sopenharmony_cibool nir_split_struct_vars(nir_shader *shader, nir_variable_mode modes); 4522bf215546Sopenharmony_ci 4523bf215546Sopenharmony_cibool nir_lower_returns_impl(nir_function_impl *impl); 4524bf215546Sopenharmony_cibool nir_lower_returns(nir_shader *shader); 4525bf215546Sopenharmony_ci 4526bf215546Sopenharmony_civoid nir_inline_function_impl(struct nir_builder *b, 4527bf215546Sopenharmony_ci const nir_function_impl *impl, 4528bf215546Sopenharmony_ci nir_ssa_def **params, 4529bf215546Sopenharmony_ci struct hash_table *shader_var_remap); 4530bf215546Sopenharmony_cibool nir_inline_functions(nir_shader *shader); 4531bf215546Sopenharmony_ci 4532bf215546Sopenharmony_civoid nir_find_inlinable_uniforms(nir_shader *shader); 4533bf215546Sopenharmony_civoid nir_inline_uniforms(nir_shader *shader, unsigned num_uniforms, 4534bf215546Sopenharmony_ci const uint32_t *uniform_values, 4535bf215546Sopenharmony_ci const uint16_t *uniform_dw_offsets); 4536bf215546Sopenharmony_ci 4537bf215546Sopenharmony_cibool nir_propagate_invariant(nir_shader *shader, bool invariant_prim); 4538bf215546Sopenharmony_ci 4539bf215546Sopenharmony_civoid nir_lower_var_copy_instr(nir_intrinsic_instr *copy, nir_shader *shader); 4540bf215546Sopenharmony_civoid nir_lower_deref_copy_instr(struct nir_builder *b, 4541bf215546Sopenharmony_ci nir_intrinsic_instr *copy); 4542bf215546Sopenharmony_cibool nir_lower_var_copies(nir_shader *shader); 4543bf215546Sopenharmony_ci 4544bf215546Sopenharmony_cibool nir_opt_memcpy(nir_shader *shader); 4545bf215546Sopenharmony_cibool nir_lower_memcpy(nir_shader *shader); 4546bf215546Sopenharmony_ci 4547bf215546Sopenharmony_civoid nir_fixup_deref_modes(nir_shader *shader); 4548bf215546Sopenharmony_ci 4549bf215546Sopenharmony_cibool nir_lower_global_vars_to_local(nir_shader *shader); 4550bf215546Sopenharmony_ci 4551bf215546Sopenharmony_citypedef enum { 4552bf215546Sopenharmony_ci nir_lower_direct_array_deref_of_vec_load = (1 << 0), 4553bf215546Sopenharmony_ci nir_lower_indirect_array_deref_of_vec_load = (1 << 1), 4554bf215546Sopenharmony_ci nir_lower_direct_array_deref_of_vec_store = (1 << 2), 4555bf215546Sopenharmony_ci nir_lower_indirect_array_deref_of_vec_store = (1 << 3), 4556bf215546Sopenharmony_ci} nir_lower_array_deref_of_vec_options; 4557bf215546Sopenharmony_ci 4558bf215546Sopenharmony_cibool nir_lower_array_deref_of_vec(nir_shader *shader, nir_variable_mode modes, 4559bf215546Sopenharmony_ci nir_lower_array_deref_of_vec_options options); 4560bf215546Sopenharmony_ci 4561bf215546Sopenharmony_cibool nir_lower_indirect_derefs(nir_shader *shader, nir_variable_mode modes, 4562bf215546Sopenharmony_ci uint32_t max_lower_array_len); 4563bf215546Sopenharmony_ci 4564bf215546Sopenharmony_cibool nir_lower_indirect_var_derefs(nir_shader *shader, 4565bf215546Sopenharmony_ci const struct set *vars); 4566bf215546Sopenharmony_ci 4567bf215546Sopenharmony_cibool nir_lower_locals_to_regs(nir_shader *shader); 4568bf215546Sopenharmony_ci 4569bf215546Sopenharmony_civoid nir_lower_io_to_temporaries(nir_shader *shader, 4570bf215546Sopenharmony_ci nir_function_impl *entrypoint, 4571bf215546Sopenharmony_ci bool outputs, bool inputs); 4572bf215546Sopenharmony_ci 4573bf215546Sopenharmony_cibool nir_lower_vars_to_scratch(nir_shader *shader, 4574bf215546Sopenharmony_ci nir_variable_mode modes, 4575bf215546Sopenharmony_ci int size_threshold, 4576bf215546Sopenharmony_ci glsl_type_size_align_func size_align); 4577bf215546Sopenharmony_ci 4578bf215546Sopenharmony_civoid nir_lower_clip_halfz(nir_shader *shader); 4579bf215546Sopenharmony_ci 4580bf215546Sopenharmony_civoid nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint); 4581bf215546Sopenharmony_ci 4582bf215546Sopenharmony_civoid nir_gather_ssa_types(nir_function_impl *impl, 4583bf215546Sopenharmony_ci BITSET_WORD *float_types, 4584bf215546Sopenharmony_ci BITSET_WORD *int_types); 4585bf215546Sopenharmony_ci 4586bf215546Sopenharmony_civoid nir_assign_var_locations(nir_shader *shader, nir_variable_mode mode, 4587bf215546Sopenharmony_ci unsigned *size, 4588bf215546Sopenharmony_ci int (*type_size)(const struct glsl_type *, bool)); 4589bf215546Sopenharmony_ci 4590bf215546Sopenharmony_ci/* Some helpers to do very simple linking */ 4591bf215546Sopenharmony_cibool nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer); 4592bf215546Sopenharmony_cibool nir_remove_unused_io_vars(nir_shader *shader, nir_variable_mode mode, 4593bf215546Sopenharmony_ci uint64_t *used_by_other_stage, 4594bf215546Sopenharmony_ci uint64_t *used_by_other_stage_patches); 4595bf215546Sopenharmony_civoid nir_compact_varyings(nir_shader *producer, nir_shader *consumer, 4596bf215546Sopenharmony_ci bool default_to_smooth_interp); 4597bf215546Sopenharmony_civoid nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer); 4598bf215546Sopenharmony_cibool nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer); 4599bf215546Sopenharmony_civoid nir_link_varying_precision(nir_shader *producer, nir_shader *consumer); 4600bf215546Sopenharmony_ci 4601bf215546Sopenharmony_cibool nir_slot_is_sysval_output(gl_varying_slot slot); 4602bf215546Sopenharmony_cibool nir_slot_is_varying(gl_varying_slot slot); 4603bf215546Sopenharmony_cibool nir_slot_is_sysval_output_and_varying(gl_varying_slot slot); 4604bf215546Sopenharmony_civoid nir_remove_varying(nir_intrinsic_instr *intr); 4605bf215546Sopenharmony_civoid nir_remove_sysval_output(nir_intrinsic_instr *intr); 4606bf215546Sopenharmony_ci 4607bf215546Sopenharmony_cibool nir_lower_amul(nir_shader *shader, 4608bf215546Sopenharmony_ci int (*type_size)(const struct glsl_type *, bool)); 4609bf215546Sopenharmony_ci 4610bf215546Sopenharmony_cibool nir_lower_ubo_vec4(nir_shader *shader); 4611bf215546Sopenharmony_ci 4612bf215546Sopenharmony_civoid nir_assign_io_var_locations(nir_shader *shader, 4613bf215546Sopenharmony_ci nir_variable_mode mode, 4614bf215546Sopenharmony_ci unsigned *size, 4615bf215546Sopenharmony_ci gl_shader_stage stage); 4616bf215546Sopenharmony_ci 4617bf215546Sopenharmony_citypedef struct { 4618bf215546Sopenharmony_ci uint8_t num_linked_io_vars; 4619bf215546Sopenharmony_ci uint8_t num_linked_patch_io_vars; 4620bf215546Sopenharmony_ci} nir_linked_io_var_info; 4621bf215546Sopenharmony_ci 4622bf215546Sopenharmony_cinir_linked_io_var_info 4623bf215546Sopenharmony_cinir_assign_linked_io_var_locations(nir_shader *producer, 4624bf215546Sopenharmony_ci nir_shader *consumer); 4625bf215546Sopenharmony_ci 4626bf215546Sopenharmony_citypedef enum { 4627bf215546Sopenharmony_ci /* If set, this causes all 64-bit IO operations to be lowered on-the-fly 4628bf215546Sopenharmony_ci * to 32-bit operations. This is only valid for nir_var_shader_in/out 4629bf215546Sopenharmony_ci * modes. 4630bf215546Sopenharmony_ci */ 4631bf215546Sopenharmony_ci nir_lower_io_lower_64bit_to_32 = (1 << 0), 4632bf215546Sopenharmony_ci 4633bf215546Sopenharmony_ci /* If set, this forces all non-flat fragment shader inputs to be 4634bf215546Sopenharmony_ci * interpolated as if with the "sample" qualifier. This requires 4635bf215546Sopenharmony_ci * nir_shader_compiler_options::use_interpolated_input_intrinsics. 4636bf215546Sopenharmony_ci */ 4637bf215546Sopenharmony_ci nir_lower_io_force_sample_interpolation = (1 << 1), 4638bf215546Sopenharmony_ci} nir_lower_io_options; 4639bf215546Sopenharmony_cibool nir_lower_io(nir_shader *shader, 4640bf215546Sopenharmony_ci nir_variable_mode modes, 4641bf215546Sopenharmony_ci int (*type_size)(const struct glsl_type *, bool), 4642bf215546Sopenharmony_ci nir_lower_io_options); 4643bf215546Sopenharmony_ci 4644bf215546Sopenharmony_cibool nir_io_add_const_offset_to_base(nir_shader *nir, nir_variable_mode modes); 4645bf215546Sopenharmony_ci 4646bf215546Sopenharmony_civoid 4647bf215546Sopenharmony_cinir_lower_io_passes(nir_shader *nir); 4648bf215546Sopenharmony_ci 4649bf215546Sopenharmony_cibool nir_io_add_intrinsic_xfb_info(nir_shader *nir); 4650bf215546Sopenharmony_ci 4651bf215546Sopenharmony_cibool 4652bf215546Sopenharmony_cinir_lower_vars_to_explicit_types(nir_shader *shader, 4653bf215546Sopenharmony_ci nir_variable_mode modes, 4654bf215546Sopenharmony_ci glsl_type_size_align_func type_info); 4655bf215546Sopenharmony_civoid 4656bf215546Sopenharmony_cinir_gather_explicit_io_initializers(nir_shader *shader, 4657bf215546Sopenharmony_ci void *dst, size_t dst_size, 4658bf215546Sopenharmony_ci nir_variable_mode mode); 4659bf215546Sopenharmony_ci 4660bf215546Sopenharmony_cibool nir_lower_vec3_to_vec4(nir_shader *shader, nir_variable_mode modes); 4661bf215546Sopenharmony_ci 4662bf215546Sopenharmony_citypedef enum { 4663bf215546Sopenharmony_ci /** 4664bf215546Sopenharmony_ci * An address format which is a simple 32-bit global GPU address. 4665bf215546Sopenharmony_ci */ 4666bf215546Sopenharmony_ci nir_address_format_32bit_global, 4667bf215546Sopenharmony_ci 4668bf215546Sopenharmony_ci /** 4669bf215546Sopenharmony_ci * An address format which is a simple 64-bit global GPU address. 4670bf215546Sopenharmony_ci */ 4671bf215546Sopenharmony_ci nir_address_format_64bit_global, 4672bf215546Sopenharmony_ci 4673bf215546Sopenharmony_ci /** 4674bf215546Sopenharmony_ci * An address format which is a 64-bit global GPU address encoded as a 4675bf215546Sopenharmony_ci * 2x32-bit vector. 4676bf215546Sopenharmony_ci */ 4677bf215546Sopenharmony_ci nir_address_format_2x32bit_global, 4678bf215546Sopenharmony_ci 4679bf215546Sopenharmony_ci /** 4680bf215546Sopenharmony_ci * An address format which is a 64-bit global base address and a 32-bit 4681bf215546Sopenharmony_ci * offset. 4682bf215546Sopenharmony_ci * 4683bf215546Sopenharmony_ci * The address is comprised as a 32-bit vec4 where .xy are a uint64_t base 4684bf215546Sopenharmony_ci * address stored with the low bits in .x and high bits in .y, .z is 4685bf215546Sopenharmony_ci * undefined, and .w is an offset. This is intended to match 4686bf215546Sopenharmony_ci * 64bit_bounded_global but without the bounds checking. 4687bf215546Sopenharmony_ci */ 4688bf215546Sopenharmony_ci nir_address_format_64bit_global_32bit_offset, 4689bf215546Sopenharmony_ci 4690bf215546Sopenharmony_ci /** 4691bf215546Sopenharmony_ci * An address format which is a bounds-checked 64-bit global GPU address. 4692bf215546Sopenharmony_ci * 4693bf215546Sopenharmony_ci * The address is comprised as a 32-bit vec4 where .xy are a uint64_t base 4694bf215546Sopenharmony_ci * address stored with the low bits in .x and high bits in .y, .z is a 4695bf215546Sopenharmony_ci * size, and .w is an offset. When the final I/O operation is lowered, .w 4696bf215546Sopenharmony_ci * is checked against .z and the operation is predicated on the result. 4697bf215546Sopenharmony_ci */ 4698bf215546Sopenharmony_ci nir_address_format_64bit_bounded_global, 4699bf215546Sopenharmony_ci 4700bf215546Sopenharmony_ci /** 4701bf215546Sopenharmony_ci * An address format which is comprised of a vec2 where the first 4702bf215546Sopenharmony_ci * component is a buffer index and the second is an offset. 4703bf215546Sopenharmony_ci */ 4704bf215546Sopenharmony_ci nir_address_format_32bit_index_offset, 4705bf215546Sopenharmony_ci 4706bf215546Sopenharmony_ci /** 4707bf215546Sopenharmony_ci * An address format which is a 64-bit value, where the high 32 bits 4708bf215546Sopenharmony_ci * are a buffer index, and the low 32 bits are an offset. 4709bf215546Sopenharmony_ci */ 4710bf215546Sopenharmony_ci nir_address_format_32bit_index_offset_pack64, 4711bf215546Sopenharmony_ci 4712bf215546Sopenharmony_ci /** 4713bf215546Sopenharmony_ci * An address format which is comprised of a vec3 where the first two 4714bf215546Sopenharmony_ci * components specify the buffer and the third is an offset. 4715bf215546Sopenharmony_ci */ 4716bf215546Sopenharmony_ci nir_address_format_vec2_index_32bit_offset, 4717bf215546Sopenharmony_ci 4718bf215546Sopenharmony_ci /** 4719bf215546Sopenharmony_ci * An address format which represents generic pointers with a 62-bit 4720bf215546Sopenharmony_ci * pointer and a 2-bit enum in the top two bits. The top two bits have 4721bf215546Sopenharmony_ci * the following meanings: 4722bf215546Sopenharmony_ci * 4723bf215546Sopenharmony_ci * - 0x0: Global memory 4724bf215546Sopenharmony_ci * - 0x1: Shared memory 4725bf215546Sopenharmony_ci * - 0x2: Scratch memory 4726bf215546Sopenharmony_ci * - 0x3: Global memory 4727bf215546Sopenharmony_ci * 4728bf215546Sopenharmony_ci * The redundancy between 0x0 and 0x3 is because of Intel sign-extension of 4729bf215546Sopenharmony_ci * addresses. Valid global memory addresses may naturally have either 0 or 4730bf215546Sopenharmony_ci * ~0 as their high bits. 4731bf215546Sopenharmony_ci * 4732bf215546Sopenharmony_ci * Shared and scratch pointers are represented as 32-bit offsets with the 4733bf215546Sopenharmony_ci * top 32 bits only being used for the enum. This allows us to avoid 4734bf215546Sopenharmony_ci * 64-bit address calculations in a bunch of cases. 4735bf215546Sopenharmony_ci */ 4736bf215546Sopenharmony_ci nir_address_format_62bit_generic, 4737bf215546Sopenharmony_ci 4738bf215546Sopenharmony_ci /** 4739bf215546Sopenharmony_ci * An address format which is a simple 32-bit offset. 4740bf215546Sopenharmony_ci */ 4741bf215546Sopenharmony_ci nir_address_format_32bit_offset, 4742bf215546Sopenharmony_ci 4743bf215546Sopenharmony_ci /** 4744bf215546Sopenharmony_ci * An address format which is a simple 32-bit offset cast to 64-bit. 4745bf215546Sopenharmony_ci */ 4746bf215546Sopenharmony_ci nir_address_format_32bit_offset_as_64bit, 4747bf215546Sopenharmony_ci 4748bf215546Sopenharmony_ci /** 4749bf215546Sopenharmony_ci * An address format representing a purely logical addressing model. In 4750bf215546Sopenharmony_ci * this model, all deref chains must be complete from the dereference 4751bf215546Sopenharmony_ci * operation to the variable. Cast derefs are not allowed. These 4752bf215546Sopenharmony_ci * addresses will be 32-bit scalars but the format is immaterial because 4753bf215546Sopenharmony_ci * you can always chase the chain. 4754bf215546Sopenharmony_ci */ 4755bf215546Sopenharmony_ci nir_address_format_logical, 4756bf215546Sopenharmony_ci} nir_address_format; 4757bf215546Sopenharmony_ci 4758bf215546Sopenharmony_ciunsigned 4759bf215546Sopenharmony_cinir_address_format_bit_size(nir_address_format addr_format); 4760bf215546Sopenharmony_ci 4761bf215546Sopenharmony_ciunsigned 4762bf215546Sopenharmony_cinir_address_format_num_components(nir_address_format addr_format); 4763bf215546Sopenharmony_ci 4764bf215546Sopenharmony_cistatic inline const struct glsl_type * 4765bf215546Sopenharmony_cinir_address_format_to_glsl_type(nir_address_format addr_format) 4766bf215546Sopenharmony_ci{ 4767bf215546Sopenharmony_ci unsigned bit_size = nir_address_format_bit_size(addr_format); 4768bf215546Sopenharmony_ci assert(bit_size == 32 || bit_size == 64); 4769bf215546Sopenharmony_ci return glsl_vector_type(bit_size == 32 ? GLSL_TYPE_UINT : GLSL_TYPE_UINT64, 4770bf215546Sopenharmony_ci nir_address_format_num_components(addr_format)); 4771bf215546Sopenharmony_ci} 4772bf215546Sopenharmony_ci 4773bf215546Sopenharmony_ciconst nir_const_value *nir_address_format_null_value(nir_address_format addr_format); 4774bf215546Sopenharmony_ci 4775bf215546Sopenharmony_cinir_ssa_def *nir_build_addr_ieq(struct nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1, 4776bf215546Sopenharmony_ci nir_address_format addr_format); 4777bf215546Sopenharmony_ci 4778bf215546Sopenharmony_cinir_ssa_def *nir_build_addr_isub(struct nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1, 4779bf215546Sopenharmony_ci nir_address_format addr_format); 4780bf215546Sopenharmony_ci 4781bf215546Sopenharmony_cinir_ssa_def * nir_explicit_io_address_from_deref(struct nir_builder *b, 4782bf215546Sopenharmony_ci nir_deref_instr *deref, 4783bf215546Sopenharmony_ci nir_ssa_def *base_addr, 4784bf215546Sopenharmony_ci nir_address_format addr_format); 4785bf215546Sopenharmony_ci 4786bf215546Sopenharmony_cibool nir_get_explicit_deref_align(nir_deref_instr *deref, 4787bf215546Sopenharmony_ci bool default_to_type_align, 4788bf215546Sopenharmony_ci uint32_t *align_mul, 4789bf215546Sopenharmony_ci uint32_t *align_offset); 4790bf215546Sopenharmony_ci 4791bf215546Sopenharmony_civoid nir_lower_explicit_io_instr(struct nir_builder *b, 4792bf215546Sopenharmony_ci nir_intrinsic_instr *io_instr, 4793bf215546Sopenharmony_ci nir_ssa_def *addr, 4794bf215546Sopenharmony_ci nir_address_format addr_format); 4795bf215546Sopenharmony_ci 4796bf215546Sopenharmony_cibool nir_lower_explicit_io(nir_shader *shader, 4797bf215546Sopenharmony_ci nir_variable_mode modes, 4798bf215546Sopenharmony_ci nir_address_format); 4799bf215546Sopenharmony_ci 4800bf215546Sopenharmony_cibool 4801bf215546Sopenharmony_cinir_lower_shader_calls(nir_shader *shader, 4802bf215546Sopenharmony_ci nir_address_format address_format, 4803bf215546Sopenharmony_ci unsigned stack_alignment, 4804bf215546Sopenharmony_ci nir_shader ***resume_shaders_out, 4805bf215546Sopenharmony_ci uint32_t *num_resume_shaders_out, 4806bf215546Sopenharmony_ci void *mem_ctx); 4807bf215546Sopenharmony_ci 4808bf215546Sopenharmony_cinir_src *nir_get_io_offset_src(nir_intrinsic_instr *instr); 4809bf215546Sopenharmony_cinir_src *nir_get_io_arrayed_index_src(nir_intrinsic_instr *instr); 4810bf215546Sopenharmony_cinir_src *nir_get_shader_call_payload_src(nir_intrinsic_instr *call); 4811bf215546Sopenharmony_ci 4812bf215546Sopenharmony_cibool nir_is_arrayed_io(const nir_variable *var, gl_shader_stage stage); 4813bf215546Sopenharmony_ci 4814bf215546Sopenharmony_cibool nir_lower_regs_to_ssa_impl(nir_function_impl *impl); 4815bf215546Sopenharmony_cibool nir_lower_regs_to_ssa(nir_shader *shader); 4816bf215546Sopenharmony_cibool nir_lower_vars_to_ssa(nir_shader *shader); 4817bf215546Sopenharmony_ci 4818bf215546Sopenharmony_cibool nir_remove_dead_derefs(nir_shader *shader); 4819bf215546Sopenharmony_cibool nir_remove_dead_derefs_impl(nir_function_impl *impl); 4820bf215546Sopenharmony_ci 4821bf215546Sopenharmony_citypedef struct nir_remove_dead_variables_options { 4822bf215546Sopenharmony_ci bool (*can_remove_var)(nir_variable *var, void *data); 4823bf215546Sopenharmony_ci void *can_remove_var_data; 4824bf215546Sopenharmony_ci} nir_remove_dead_variables_options; 4825bf215546Sopenharmony_ci 4826bf215546Sopenharmony_cibool nir_remove_dead_variables(nir_shader *shader, nir_variable_mode modes, 4827bf215546Sopenharmony_ci const nir_remove_dead_variables_options *options); 4828bf215546Sopenharmony_ci 4829bf215546Sopenharmony_cibool nir_lower_variable_initializers(nir_shader *shader, 4830bf215546Sopenharmony_ci nir_variable_mode modes); 4831bf215546Sopenharmony_cibool nir_zero_initialize_shared_memory(nir_shader *shader, 4832bf215546Sopenharmony_ci const unsigned shared_size, 4833bf215546Sopenharmony_ci const unsigned chunk_size); 4834bf215546Sopenharmony_ci 4835bf215546Sopenharmony_cibool nir_move_vec_src_uses_to_dest(nir_shader *shader); 4836bf215546Sopenharmony_cibool nir_lower_vec_to_movs(nir_shader *shader, nir_instr_writemask_filter_cb cb, 4837bf215546Sopenharmony_ci const void *_data); 4838bf215546Sopenharmony_civoid nir_lower_alpha_test(nir_shader *shader, enum compare_func func, 4839bf215546Sopenharmony_ci bool alpha_to_one, 4840bf215546Sopenharmony_ci const gl_state_index16 *alpha_ref_state_tokens); 4841bf215546Sopenharmony_cibool nir_lower_alu(nir_shader *shader); 4842bf215546Sopenharmony_ci 4843bf215546Sopenharmony_cibool nir_lower_flrp(nir_shader *shader, unsigned lowering_mask, 4844bf215546Sopenharmony_ci bool always_precise); 4845bf215546Sopenharmony_ci 4846bf215546Sopenharmony_cibool nir_scale_fdiv(nir_shader *shader); 4847bf215546Sopenharmony_ci 4848bf215546Sopenharmony_cibool nir_lower_alu_to_scalar(nir_shader *shader, nir_instr_filter_cb cb, const void *data); 4849bf215546Sopenharmony_cibool nir_lower_alu_width(nir_shader *shader, nir_vectorize_cb cb, const void *data); 4850bf215546Sopenharmony_cibool nir_lower_bool_to_bitsize(nir_shader *shader); 4851bf215546Sopenharmony_cibool nir_lower_bool_to_float(nir_shader *shader); 4852bf215546Sopenharmony_cibool nir_lower_bool_to_int32(nir_shader *shader); 4853bf215546Sopenharmony_cibool nir_opt_simplify_convert_alu_types(nir_shader *shader); 4854bf215546Sopenharmony_cibool nir_lower_const_arrays_to_uniforms(nir_shader *shader, 4855bf215546Sopenharmony_ci unsigned max_uniform_components); 4856bf215546Sopenharmony_cibool nir_lower_convert_alu_types(nir_shader *shader, 4857bf215546Sopenharmony_ci bool (*should_lower)(nir_intrinsic_instr *)); 4858bf215546Sopenharmony_cibool nir_lower_constant_convert_alu_types(nir_shader *shader); 4859bf215546Sopenharmony_cibool nir_lower_alu_conversion_to_intrinsic(nir_shader *shader); 4860bf215546Sopenharmony_cibool nir_lower_int_to_float(nir_shader *shader); 4861bf215546Sopenharmony_cibool nir_lower_load_const_to_scalar(nir_shader *shader); 4862bf215546Sopenharmony_cibool nir_lower_read_invocation_to_scalar(nir_shader *shader); 4863bf215546Sopenharmony_cibool nir_lower_phis_to_scalar(nir_shader *shader, bool lower_all); 4864bf215546Sopenharmony_civoid nir_lower_io_arrays_to_elements(nir_shader *producer, nir_shader *consumer); 4865bf215546Sopenharmony_civoid nir_lower_io_arrays_to_elements_no_indirects(nir_shader *shader, 4866bf215546Sopenharmony_ci bool outputs_only); 4867bf215546Sopenharmony_civoid nir_lower_io_to_scalar(nir_shader *shader, nir_variable_mode mask); 4868bf215546Sopenharmony_cibool nir_lower_io_to_scalar_early(nir_shader *shader, nir_variable_mode mask); 4869bf215546Sopenharmony_cibool nir_lower_io_to_vector(nir_shader *shader, nir_variable_mode mask); 4870bf215546Sopenharmony_cibool nir_vectorize_tess_levels(nir_shader *shader); 4871bf215546Sopenharmony_ci 4872bf215546Sopenharmony_cibool nir_lower_fragcolor(nir_shader *shader, unsigned max_cbufs); 4873bf215546Sopenharmony_cibool nir_lower_fragcoord_wtrans(nir_shader *shader); 4874bf215546Sopenharmony_civoid nir_lower_viewport_transform(nir_shader *shader); 4875bf215546Sopenharmony_cibool nir_lower_uniforms_to_ubo(nir_shader *shader, bool dword_packed, bool load_vec4); 4876bf215546Sopenharmony_ci 4877bf215546Sopenharmony_cibool nir_lower_is_helper_invocation(nir_shader *shader); 4878bf215546Sopenharmony_ci 4879bf215546Sopenharmony_cibool nir_lower_single_sampled(nir_shader *shader); 4880bf215546Sopenharmony_ci 4881bf215546Sopenharmony_citypedef struct nir_lower_subgroups_options { 4882bf215546Sopenharmony_ci uint8_t subgroup_size; 4883bf215546Sopenharmony_ci uint8_t ballot_bit_size; 4884bf215546Sopenharmony_ci uint8_t ballot_components; 4885bf215546Sopenharmony_ci bool lower_to_scalar:1; 4886bf215546Sopenharmony_ci bool lower_vote_trivial:1; 4887bf215546Sopenharmony_ci bool lower_vote_eq:1; 4888bf215546Sopenharmony_ci bool lower_subgroup_masks:1; 4889bf215546Sopenharmony_ci bool lower_relative_shuffle:1; 4890bf215546Sopenharmony_ci bool lower_shuffle_to_32bit:1; 4891bf215546Sopenharmony_ci bool lower_shuffle_to_swizzle_amd:1; 4892bf215546Sopenharmony_ci bool lower_shuffle:1; 4893bf215546Sopenharmony_ci bool lower_quad:1; 4894bf215546Sopenharmony_ci bool lower_quad_broadcast_dynamic:1; 4895bf215546Sopenharmony_ci bool lower_quad_broadcast_dynamic_to_const:1; 4896bf215546Sopenharmony_ci bool lower_elect:1; 4897bf215546Sopenharmony_ci bool lower_read_invocation_to_cond:1; 4898bf215546Sopenharmony_ci} nir_lower_subgroups_options; 4899bf215546Sopenharmony_ci 4900bf215546Sopenharmony_cibool nir_lower_subgroups(nir_shader *shader, 4901bf215546Sopenharmony_ci const nir_lower_subgroups_options *options); 4902bf215546Sopenharmony_ci 4903bf215546Sopenharmony_cibool nir_lower_system_values(nir_shader *shader); 4904bf215546Sopenharmony_ci 4905bf215546Sopenharmony_citypedef struct nir_lower_compute_system_values_options { 4906bf215546Sopenharmony_ci bool has_base_global_invocation_id:1; 4907bf215546Sopenharmony_ci bool has_base_workgroup_id:1; 4908bf215546Sopenharmony_ci bool shuffle_local_ids_for_quad_derivatives:1; 4909bf215546Sopenharmony_ci bool lower_local_invocation_index:1; 4910bf215546Sopenharmony_ci bool lower_cs_local_id_to_index:1; 4911bf215546Sopenharmony_ci bool lower_workgroup_id_to_index:1; 4912bf215546Sopenharmony_ci} nir_lower_compute_system_values_options; 4913bf215546Sopenharmony_ci 4914bf215546Sopenharmony_cibool nir_lower_compute_system_values(nir_shader *shader, 4915bf215546Sopenharmony_ci const nir_lower_compute_system_values_options *options); 4916bf215546Sopenharmony_ci 4917bf215546Sopenharmony_cistruct nir_lower_sysvals_to_varyings_options { 4918bf215546Sopenharmony_ci bool frag_coord:1; 4919bf215546Sopenharmony_ci bool front_face:1; 4920bf215546Sopenharmony_ci bool point_coord:1; 4921bf215546Sopenharmony_ci}; 4922bf215546Sopenharmony_ci 4923bf215546Sopenharmony_cibool 4924bf215546Sopenharmony_cinir_lower_sysvals_to_varyings(nir_shader *shader, 4925bf215546Sopenharmony_ci const struct nir_lower_sysvals_to_varyings_options *options); 4926bf215546Sopenharmony_ci 4927bf215546Sopenharmony_cienum PACKED nir_lower_tex_packing { 4928bf215546Sopenharmony_ci /** No packing */ 4929bf215546Sopenharmony_ci nir_lower_tex_packing_none = 0, 4930bf215546Sopenharmony_ci /** 4931bf215546Sopenharmony_ci * The sampler returns up to 2 32-bit words of half floats or 16-bit signed 4932bf215546Sopenharmony_ci * or unsigned ints based on the sampler type 4933bf215546Sopenharmony_ci */ 4934bf215546Sopenharmony_ci nir_lower_tex_packing_16, 4935bf215546Sopenharmony_ci /** The sampler returns 1 32-bit word of 4x8 unorm */ 4936bf215546Sopenharmony_ci nir_lower_tex_packing_8, 4937bf215546Sopenharmony_ci}; 4938bf215546Sopenharmony_ci 4939bf215546Sopenharmony_citypedef struct nir_lower_tex_options { 4940bf215546Sopenharmony_ci /** 4941bf215546Sopenharmony_ci * bitmask of (1 << GLSL_SAMPLER_DIM_x) to control for which 4942bf215546Sopenharmony_ci * sampler types a texture projector is lowered. 4943bf215546Sopenharmony_ci */ 4944bf215546Sopenharmony_ci unsigned lower_txp; 4945bf215546Sopenharmony_ci 4946bf215546Sopenharmony_ci /** 4947bf215546Sopenharmony_ci * If true, lower texture projector for any array sampler dims 4948bf215546Sopenharmony_ci */ 4949bf215546Sopenharmony_ci bool lower_txp_array; 4950bf215546Sopenharmony_ci 4951bf215546Sopenharmony_ci /** 4952bf215546Sopenharmony_ci * If true, lower away nir_tex_src_offset for all texelfetch instructions. 4953bf215546Sopenharmony_ci */ 4954bf215546Sopenharmony_ci bool lower_txf_offset; 4955bf215546Sopenharmony_ci 4956bf215546Sopenharmony_ci /** 4957bf215546Sopenharmony_ci * If true, lower away nir_tex_src_offset for all rect textures. 4958bf215546Sopenharmony_ci */ 4959bf215546Sopenharmony_ci bool lower_rect_offset; 4960bf215546Sopenharmony_ci 4961bf215546Sopenharmony_ci /** 4962bf215546Sopenharmony_ci * If not NULL, this filter will return true for tex instructions that 4963bf215546Sopenharmony_ci * should lower away nir_tex_src_offset. 4964bf215546Sopenharmony_ci */ 4965bf215546Sopenharmony_ci nir_instr_filter_cb lower_offset_filter; 4966bf215546Sopenharmony_ci 4967bf215546Sopenharmony_ci /** 4968bf215546Sopenharmony_ci * If true, lower rect textures to 2D, using txs to fetch the 4969bf215546Sopenharmony_ci * texture dimensions and dividing the texture coords by the 4970bf215546Sopenharmony_ci * texture dims to normalize. 4971bf215546Sopenharmony_ci */ 4972bf215546Sopenharmony_ci bool lower_rect; 4973bf215546Sopenharmony_ci 4974bf215546Sopenharmony_ci /** 4975bf215546Sopenharmony_ci * If true, convert yuv to rgb. 4976bf215546Sopenharmony_ci */ 4977bf215546Sopenharmony_ci unsigned lower_y_uv_external; 4978bf215546Sopenharmony_ci unsigned lower_y_u_v_external; 4979bf215546Sopenharmony_ci unsigned lower_yx_xuxv_external; 4980bf215546Sopenharmony_ci unsigned lower_xy_uxvx_external; 4981bf215546Sopenharmony_ci unsigned lower_ayuv_external; 4982bf215546Sopenharmony_ci unsigned lower_xyuv_external; 4983bf215546Sopenharmony_ci unsigned lower_yuv_external; 4984bf215546Sopenharmony_ci unsigned lower_yu_yv_external; 4985bf215546Sopenharmony_ci unsigned lower_y41x_external; 4986bf215546Sopenharmony_ci unsigned bt709_external; 4987bf215546Sopenharmony_ci unsigned bt2020_external; 4988bf215546Sopenharmony_ci unsigned yuv_full_range_external; 4989bf215546Sopenharmony_ci 4990bf215546Sopenharmony_ci /** 4991bf215546Sopenharmony_ci * To emulate certain texture wrap modes, this can be used 4992bf215546Sopenharmony_ci * to saturate the specified tex coord to [0.0, 1.0]. The 4993bf215546Sopenharmony_ci * bits are according to sampler #, ie. if, for example: 4994bf215546Sopenharmony_ci * 4995bf215546Sopenharmony_ci * (conf->saturate_s & (1 << n)) 4996bf215546Sopenharmony_ci * 4997bf215546Sopenharmony_ci * is true, then the s coord for sampler n is saturated. 4998bf215546Sopenharmony_ci * 4999bf215546Sopenharmony_ci * Note that clamping must happen *after* projector lowering 5000bf215546Sopenharmony_ci * so any projected texture sample instruction with a clamped 5001bf215546Sopenharmony_ci * coordinate gets automatically lowered, regardless of the 5002bf215546Sopenharmony_ci * 'lower_txp' setting. 5003bf215546Sopenharmony_ci */ 5004bf215546Sopenharmony_ci unsigned saturate_s; 5005bf215546Sopenharmony_ci unsigned saturate_t; 5006bf215546Sopenharmony_ci unsigned saturate_r; 5007bf215546Sopenharmony_ci 5008bf215546Sopenharmony_ci /* Bitmask of textures that need swizzling. 5009bf215546Sopenharmony_ci * 5010bf215546Sopenharmony_ci * If (swizzle_result & (1 << texture_index)), then the swizzle in 5011bf215546Sopenharmony_ci * swizzles[texture_index] is applied to the result of the texturing 5012bf215546Sopenharmony_ci * operation. 5013bf215546Sopenharmony_ci */ 5014bf215546Sopenharmony_ci unsigned swizzle_result; 5015bf215546Sopenharmony_ci 5016bf215546Sopenharmony_ci /* A swizzle for each texture. Values 0-3 represent x, y, z, or w swizzles 5017bf215546Sopenharmony_ci * while 4 and 5 represent 0 and 1 respectively. 5018bf215546Sopenharmony_ci * 5019bf215546Sopenharmony_ci * Indexed by texture-id. 5020bf215546Sopenharmony_ci */ 5021bf215546Sopenharmony_ci uint8_t swizzles[32][4]; 5022bf215546Sopenharmony_ci 5023bf215546Sopenharmony_ci /* Can be used to scale sampled values in range required by the 5024bf215546Sopenharmony_ci * format. 5025bf215546Sopenharmony_ci * 5026bf215546Sopenharmony_ci * Indexed by texture-id. 5027bf215546Sopenharmony_ci */ 5028bf215546Sopenharmony_ci float scale_factors[32]; 5029bf215546Sopenharmony_ci 5030bf215546Sopenharmony_ci /** 5031bf215546Sopenharmony_ci * Bitmap of textures that need srgb to linear conversion. If 5032bf215546Sopenharmony_ci * (lower_srgb & (1 << texture_index)) then the rgb (xyz) components 5033bf215546Sopenharmony_ci * of the texture are lowered to linear. 5034bf215546Sopenharmony_ci */ 5035bf215546Sopenharmony_ci unsigned lower_srgb; 5036bf215546Sopenharmony_ci 5037bf215546Sopenharmony_ci /** 5038bf215546Sopenharmony_ci * If true, lower nir_texop_txd on cube maps with nir_texop_txl. 5039bf215546Sopenharmony_ci */ 5040bf215546Sopenharmony_ci bool lower_txd_cube_map; 5041bf215546Sopenharmony_ci 5042bf215546Sopenharmony_ci /** 5043bf215546Sopenharmony_ci * If true, lower nir_texop_txd on 3D surfaces with nir_texop_txl. 5044bf215546Sopenharmony_ci */ 5045bf215546Sopenharmony_ci bool lower_txd_3d; 5046bf215546Sopenharmony_ci 5047bf215546Sopenharmony_ci /** 5048bf215546Sopenharmony_ci * If true, lower nir_texop_txd any array surfaces with nir_texop_txl. 5049bf215546Sopenharmony_ci */ 5050bf215546Sopenharmony_ci bool lower_txd_array; 5051bf215546Sopenharmony_ci 5052bf215546Sopenharmony_ci /** 5053bf215546Sopenharmony_ci * If true, lower nir_texop_txd on shadow samplers (except cube maps) 5054bf215546Sopenharmony_ci * with nir_texop_txl. Notice that cube map shadow samplers are lowered 5055bf215546Sopenharmony_ci * with lower_txd_cube_map. 5056bf215546Sopenharmony_ci */ 5057bf215546Sopenharmony_ci bool lower_txd_shadow; 5058bf215546Sopenharmony_ci 5059bf215546Sopenharmony_ci /** 5060bf215546Sopenharmony_ci * If true, lower nir_texop_txd on all samplers to a nir_texop_txl. 5061bf215546Sopenharmony_ci * Implies lower_txd_cube_map and lower_txd_shadow. 5062bf215546Sopenharmony_ci */ 5063bf215546Sopenharmony_ci bool lower_txd; 5064bf215546Sopenharmony_ci 5065bf215546Sopenharmony_ci /** 5066bf215546Sopenharmony_ci * If true, lower nir_texop_txb that try to use shadow compare and min_lod 5067bf215546Sopenharmony_ci * at the same time to a nir_texop_lod, some math, and nir_texop_tex. 5068bf215546Sopenharmony_ci */ 5069bf215546Sopenharmony_ci bool lower_txb_shadow_clamp; 5070bf215546Sopenharmony_ci 5071bf215546Sopenharmony_ci /** 5072bf215546Sopenharmony_ci * If true, lower nir_texop_txd on shadow samplers when it uses min_lod 5073bf215546Sopenharmony_ci * with nir_texop_txl. This includes cube maps. 5074bf215546Sopenharmony_ci */ 5075bf215546Sopenharmony_ci bool lower_txd_shadow_clamp; 5076bf215546Sopenharmony_ci 5077bf215546Sopenharmony_ci /** 5078bf215546Sopenharmony_ci * If true, lower nir_texop_txd on when it uses both offset and min_lod 5079bf215546Sopenharmony_ci * with nir_texop_txl. This includes cube maps. 5080bf215546Sopenharmony_ci */ 5081bf215546Sopenharmony_ci bool lower_txd_offset_clamp; 5082bf215546Sopenharmony_ci 5083bf215546Sopenharmony_ci /** 5084bf215546Sopenharmony_ci * If true, lower nir_texop_txd with min_lod to a nir_texop_txl if the 5085bf215546Sopenharmony_ci * sampler is bindless. 5086bf215546Sopenharmony_ci */ 5087bf215546Sopenharmony_ci bool lower_txd_clamp_bindless_sampler; 5088bf215546Sopenharmony_ci 5089bf215546Sopenharmony_ci /** 5090bf215546Sopenharmony_ci * If true, lower nir_texop_txd with min_lod to a nir_texop_txl if the 5091bf215546Sopenharmony_ci * sampler index is not statically determinable to be less than 16. 5092bf215546Sopenharmony_ci */ 5093bf215546Sopenharmony_ci bool lower_txd_clamp_if_sampler_index_not_lt_16; 5094bf215546Sopenharmony_ci 5095bf215546Sopenharmony_ci /** 5096bf215546Sopenharmony_ci * If true, lower nir_texop_txs with a non-0-lod into nir_texop_txs with 5097bf215546Sopenharmony_ci * 0-lod followed by a nir_ishr. 5098bf215546Sopenharmony_ci */ 5099bf215546Sopenharmony_ci bool lower_txs_lod; 5100bf215546Sopenharmony_ci 5101bf215546Sopenharmony_ci /** 5102bf215546Sopenharmony_ci * If true, lower nir_texop_txs for cube arrays to a nir_texop_txs with a 5103bf215546Sopenharmony_ci * 2D array type followed by a nir_idiv by 6. 5104bf215546Sopenharmony_ci */ 5105bf215546Sopenharmony_ci bool lower_txs_cube_array; 5106bf215546Sopenharmony_ci 5107bf215546Sopenharmony_ci /** 5108bf215546Sopenharmony_ci * If true, apply a .bagr swizzle on tg4 results to handle Broadcom's 5109bf215546Sopenharmony_ci * mixed-up tg4 locations. 5110bf215546Sopenharmony_ci */ 5111bf215546Sopenharmony_ci bool lower_tg4_broadcom_swizzle; 5112bf215546Sopenharmony_ci 5113bf215546Sopenharmony_ci /** 5114bf215546Sopenharmony_ci * If true, lowers tg4 with 4 constant offsets to 4 tg4 calls 5115bf215546Sopenharmony_ci */ 5116bf215546Sopenharmony_ci bool lower_tg4_offsets; 5117bf215546Sopenharmony_ci 5118bf215546Sopenharmony_ci /** 5119bf215546Sopenharmony_ci * Lower txf_ms to fragment_mask_fetch and fragment_fetch and samples_identical to 5120bf215546Sopenharmony_ci * fragment_mask_fetch. 5121bf215546Sopenharmony_ci */ 5122bf215546Sopenharmony_ci bool lower_to_fragment_fetch_amd; 5123bf215546Sopenharmony_ci 5124bf215546Sopenharmony_ci /** 5125bf215546Sopenharmony_ci * To lower packed sampler return formats. 5126bf215546Sopenharmony_ci * 5127bf215546Sopenharmony_ci * Indexed by sampler-id. 5128bf215546Sopenharmony_ci */ 5129bf215546Sopenharmony_ci enum nir_lower_tex_packing lower_tex_packing[32]; 5130bf215546Sopenharmony_ci 5131bf215546Sopenharmony_ci /** 5132bf215546Sopenharmony_ci * If true, lower nir_texop_lod to return -FLT_MAX if the sum of the 5133bf215546Sopenharmony_ci * absolute values of derivatives is 0 for all coordinates. 5134bf215546Sopenharmony_ci */ 5135bf215546Sopenharmony_ci bool lower_lod_zero_width; 5136bf215546Sopenharmony_ci 5137bf215546Sopenharmony_ci /* Turns nir_op_tex and other ops with an implicit derivative, in stages 5138bf215546Sopenharmony_ci * without implicit derivatives (like the vertex shader) to have an explicit 5139bf215546Sopenharmony_ci * LOD with a value of 0. 5140bf215546Sopenharmony_ci */ 5141bf215546Sopenharmony_ci bool lower_invalid_implicit_lod; 5142bf215546Sopenharmony_ci 5143bf215546Sopenharmony_ci /* If true, round the layer component of the coordinates source to the nearest 5144bf215546Sopenharmony_ci * integer for all array ops. 5145bf215546Sopenharmony_ci */ 5146bf215546Sopenharmony_ci bool lower_array_layer_round_even; 5147bf215546Sopenharmony_ci 5148bf215546Sopenharmony_ci /** 5149bf215546Sopenharmony_ci * Payload data to be sent to callback / filter functions. 5150bf215546Sopenharmony_ci */ 5151bf215546Sopenharmony_ci void *callback_data; 5152bf215546Sopenharmony_ci} nir_lower_tex_options; 5153bf215546Sopenharmony_ci 5154bf215546Sopenharmony_ci/** Lowers complex texture instructions to simpler ones */ 5155bf215546Sopenharmony_cibool nir_lower_tex(nir_shader *shader, 5156bf215546Sopenharmony_ci const nir_lower_tex_options *options); 5157bf215546Sopenharmony_ci 5158bf215546Sopenharmony_ci 5159bf215546Sopenharmony_citypedef struct nir_lower_tex_shadow_swizzle { 5160bf215546Sopenharmony_ci unsigned swizzle_r:3; 5161bf215546Sopenharmony_ci unsigned swizzle_g:3; 5162bf215546Sopenharmony_ci unsigned swizzle_b:3; 5163bf215546Sopenharmony_ci unsigned swizzle_a:3; 5164bf215546Sopenharmony_ci} nir_lower_tex_shadow_swizzle; 5165bf215546Sopenharmony_ci 5166bf215546Sopenharmony_cibool 5167bf215546Sopenharmony_cinir_lower_tex_shadow(nir_shader *s, 5168bf215546Sopenharmony_ci unsigned n_states, 5169bf215546Sopenharmony_ci enum compare_func *compare_func, 5170bf215546Sopenharmony_ci nir_lower_tex_shadow_swizzle *tex_swizzles); 5171bf215546Sopenharmony_ci 5172bf215546Sopenharmony_citypedef struct nir_lower_image_options { 5173bf215546Sopenharmony_ci /** 5174bf215546Sopenharmony_ci * If true, lower cube size operations. 5175bf215546Sopenharmony_ci */ 5176bf215546Sopenharmony_ci bool lower_cube_size; 5177bf215546Sopenharmony_ci} nir_lower_image_options; 5178bf215546Sopenharmony_ci 5179bf215546Sopenharmony_cibool nir_lower_image(nir_shader *nir, 5180bf215546Sopenharmony_ci const nir_lower_image_options *options); 5181bf215546Sopenharmony_ci 5182bf215546Sopenharmony_cibool nir_lower_readonly_images_to_tex(nir_shader *shader, bool per_variable); 5183bf215546Sopenharmony_ci 5184bf215546Sopenharmony_cienum nir_lower_non_uniform_access_type { 5185bf215546Sopenharmony_ci nir_lower_non_uniform_ubo_access = (1 << 0), 5186bf215546Sopenharmony_ci nir_lower_non_uniform_ssbo_access = (1 << 1), 5187bf215546Sopenharmony_ci nir_lower_non_uniform_texture_access = (1 << 2), 5188bf215546Sopenharmony_ci nir_lower_non_uniform_image_access = (1 << 3), 5189bf215546Sopenharmony_ci}; 5190bf215546Sopenharmony_ci 5191bf215546Sopenharmony_ci/* Given the nir_src used for the resource, return the channels which might be non-uniform. */ 5192bf215546Sopenharmony_citypedef nir_component_mask_t (*nir_lower_non_uniform_access_callback)(const nir_src *, void *); 5193bf215546Sopenharmony_ci 5194bf215546Sopenharmony_citypedef struct nir_lower_non_uniform_access_options { 5195bf215546Sopenharmony_ci enum nir_lower_non_uniform_access_type types; 5196bf215546Sopenharmony_ci nir_lower_non_uniform_access_callback callback; 5197bf215546Sopenharmony_ci void *callback_data; 5198bf215546Sopenharmony_ci} nir_lower_non_uniform_access_options; 5199bf215546Sopenharmony_ci 5200bf215546Sopenharmony_cibool nir_lower_non_uniform_access(nir_shader *shader, 5201bf215546Sopenharmony_ci const nir_lower_non_uniform_access_options *options); 5202bf215546Sopenharmony_ci 5203bf215546Sopenharmony_citypedef struct { 5204bf215546Sopenharmony_ci /* If true, a 32-bit division lowering based on NV50LegalizeSSA::handleDIV() 5205bf215546Sopenharmony_ci * is used. It is the faster of the two but it is not exact in some cases 5206bf215546Sopenharmony_ci * (for example, 1091317713u / 1034u gives 5209173 instead of 1055432). 5207bf215546Sopenharmony_ci * 5208bf215546Sopenharmony_ci * If false, a lowering based on AMDGPUTargetLowering::LowerUDIVREM() and 5209bf215546Sopenharmony_ci * AMDGPUTargetLowering::LowerSDIVREM() is used. It requires more 5210bf215546Sopenharmony_ci * instructions than the nv50 path and many of them are integer 5211bf215546Sopenharmony_ci * multiplications, so it is probably slower. It should always return the 5212bf215546Sopenharmony_ci * correct result, though. 5213bf215546Sopenharmony_ci */ 5214bf215546Sopenharmony_ci bool imprecise_32bit_lowering; 5215bf215546Sopenharmony_ci 5216bf215546Sopenharmony_ci /* Whether 16-bit floating point arithmetic should be allowed in 8-bit 5217bf215546Sopenharmony_ci * division lowering 5218bf215546Sopenharmony_ci */ 5219bf215546Sopenharmony_ci bool allow_fp16; 5220bf215546Sopenharmony_ci} nir_lower_idiv_options; 5221bf215546Sopenharmony_ci 5222bf215546Sopenharmony_cibool nir_lower_idiv(nir_shader *shader, const nir_lower_idiv_options *options); 5223bf215546Sopenharmony_ci 5224bf215546Sopenharmony_citypedef struct nir_input_attachment_options { 5225bf215546Sopenharmony_ci bool use_fragcoord_sysval; 5226bf215546Sopenharmony_ci bool use_layer_id_sysval; 5227bf215546Sopenharmony_ci bool use_view_id_for_layer; 5228bf215546Sopenharmony_ci} nir_input_attachment_options; 5229bf215546Sopenharmony_ci 5230bf215546Sopenharmony_cibool nir_lower_input_attachments(nir_shader *shader, 5231bf215546Sopenharmony_ci const nir_input_attachment_options *options); 5232bf215546Sopenharmony_ci 5233bf215546Sopenharmony_cibool nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables, 5234bf215546Sopenharmony_ci bool use_vars, 5235bf215546Sopenharmony_ci bool use_clipdist_array, 5236bf215546Sopenharmony_ci const gl_state_index16 clipplane_state_tokens[][STATE_LENGTH]); 5237bf215546Sopenharmony_cibool nir_lower_clip_gs(nir_shader *shader, unsigned ucp_enables, 5238bf215546Sopenharmony_ci bool use_clipdist_array, 5239bf215546Sopenharmony_ci const gl_state_index16 clipplane_state_tokens[][STATE_LENGTH]); 5240bf215546Sopenharmony_cibool nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables, 5241bf215546Sopenharmony_ci bool use_clipdist_array); 5242bf215546Sopenharmony_cibool nir_lower_clip_cull_distance_arrays(nir_shader *nir); 5243bf215546Sopenharmony_cibool nir_lower_clip_disable(nir_shader *shader, unsigned clip_plane_enable); 5244bf215546Sopenharmony_ci 5245bf215546Sopenharmony_civoid nir_lower_point_size_mov(nir_shader *shader, 5246bf215546Sopenharmony_ci const gl_state_index16 *pointsize_state_tokens); 5247bf215546Sopenharmony_ci 5248bf215546Sopenharmony_cibool nir_lower_frexp(nir_shader *nir); 5249bf215546Sopenharmony_ci 5250bf215546Sopenharmony_civoid nir_lower_two_sided_color(nir_shader *shader, bool face_sysval); 5251bf215546Sopenharmony_ci 5252bf215546Sopenharmony_cibool nir_lower_clamp_color_outputs(nir_shader *shader); 5253bf215546Sopenharmony_ci 5254bf215546Sopenharmony_cibool nir_lower_flatshade(nir_shader *shader); 5255bf215546Sopenharmony_ci 5256bf215546Sopenharmony_civoid nir_lower_passthrough_edgeflags(nir_shader *shader); 5257bf215546Sopenharmony_cibool nir_lower_patch_vertices(nir_shader *nir, unsigned static_count, 5258bf215546Sopenharmony_ci const gl_state_index16 *uniform_state_tokens); 5259bf215546Sopenharmony_ci 5260bf215546Sopenharmony_citypedef struct nir_lower_wpos_ytransform_options { 5261bf215546Sopenharmony_ci gl_state_index16 state_tokens[STATE_LENGTH]; 5262bf215546Sopenharmony_ci bool fs_coord_origin_upper_left :1; 5263bf215546Sopenharmony_ci bool fs_coord_origin_lower_left :1; 5264bf215546Sopenharmony_ci bool fs_coord_pixel_center_integer :1; 5265bf215546Sopenharmony_ci bool fs_coord_pixel_center_half_integer :1; 5266bf215546Sopenharmony_ci} nir_lower_wpos_ytransform_options; 5267bf215546Sopenharmony_ci 5268bf215546Sopenharmony_cibool nir_lower_wpos_ytransform(nir_shader *shader, 5269bf215546Sopenharmony_ci const nir_lower_wpos_ytransform_options *options); 5270bf215546Sopenharmony_cibool nir_lower_wpos_center(nir_shader *shader); 5271bf215546Sopenharmony_ci 5272bf215546Sopenharmony_cibool nir_lower_pntc_ytransform(nir_shader *shader, 5273bf215546Sopenharmony_ci const gl_state_index16 clipplane_state_tokens[][STATE_LENGTH]); 5274bf215546Sopenharmony_ci 5275bf215546Sopenharmony_cibool nir_lower_wrmasks(nir_shader *shader, nir_instr_filter_cb cb, const void *data); 5276bf215546Sopenharmony_ci 5277bf215546Sopenharmony_cibool nir_lower_fb_read(nir_shader *shader); 5278bf215546Sopenharmony_ci 5279bf215546Sopenharmony_citypedef struct nir_lower_drawpixels_options { 5280bf215546Sopenharmony_ci gl_state_index16 texcoord_state_tokens[STATE_LENGTH]; 5281bf215546Sopenharmony_ci gl_state_index16 scale_state_tokens[STATE_LENGTH]; 5282bf215546Sopenharmony_ci gl_state_index16 bias_state_tokens[STATE_LENGTH]; 5283bf215546Sopenharmony_ci unsigned drawpix_sampler; 5284bf215546Sopenharmony_ci unsigned pixelmap_sampler; 5285bf215546Sopenharmony_ci bool pixel_maps :1; 5286bf215546Sopenharmony_ci bool scale_and_bias :1; 5287bf215546Sopenharmony_ci} nir_lower_drawpixels_options; 5288bf215546Sopenharmony_ci 5289bf215546Sopenharmony_civoid nir_lower_drawpixels(nir_shader *shader, 5290bf215546Sopenharmony_ci const nir_lower_drawpixels_options *options); 5291bf215546Sopenharmony_ci 5292bf215546Sopenharmony_citypedef struct nir_lower_bitmap_options { 5293bf215546Sopenharmony_ci unsigned sampler; 5294bf215546Sopenharmony_ci bool swizzle_xxxx; 5295bf215546Sopenharmony_ci} nir_lower_bitmap_options; 5296bf215546Sopenharmony_ci 5297bf215546Sopenharmony_civoid nir_lower_bitmap(nir_shader *shader, const nir_lower_bitmap_options *options); 5298bf215546Sopenharmony_ci 5299bf215546Sopenharmony_cibool nir_lower_atomics_to_ssbo(nir_shader *shader, unsigned offset_align_state); 5300bf215546Sopenharmony_ci 5301bf215546Sopenharmony_citypedef enum { 5302bf215546Sopenharmony_ci nir_lower_int_source_mods = 1 << 0, 5303bf215546Sopenharmony_ci nir_lower_fabs_source_mods = 1 << 1, 5304bf215546Sopenharmony_ci nir_lower_fneg_source_mods = 1 << 2, 5305bf215546Sopenharmony_ci nir_lower_64bit_source_mods = 1 << 3, 5306bf215546Sopenharmony_ci nir_lower_triop_abs = 1 << 4, 5307bf215546Sopenharmony_ci nir_lower_all_source_mods = (1 << 5) - 1 5308bf215546Sopenharmony_ci} nir_lower_to_source_mods_flags; 5309bf215546Sopenharmony_ci 5310bf215546Sopenharmony_ci#define nir_lower_float_source_mods (nir_lower_fabs_source_mods | nir_lower_fneg_source_mods) 5311bf215546Sopenharmony_ci 5312bf215546Sopenharmony_cibool nir_lower_to_source_mods(nir_shader *shader, nir_lower_to_source_mods_flags options); 5313bf215546Sopenharmony_ci 5314bf215546Sopenharmony_citypedef enum { 5315bf215546Sopenharmony_ci nir_lower_gs_intrinsics_per_stream = 1 << 0, 5316bf215546Sopenharmony_ci nir_lower_gs_intrinsics_count_primitives = 1 << 1, 5317bf215546Sopenharmony_ci nir_lower_gs_intrinsics_count_vertices_per_primitive = 1 << 2, 5318bf215546Sopenharmony_ci nir_lower_gs_intrinsics_overwrite_incomplete = 1 << 3, 5319bf215546Sopenharmony_ci} nir_lower_gs_intrinsics_flags; 5320bf215546Sopenharmony_ci 5321bf215546Sopenharmony_cibool nir_lower_gs_intrinsics(nir_shader *shader, nir_lower_gs_intrinsics_flags options); 5322bf215546Sopenharmony_ci 5323bf215546Sopenharmony_citypedef struct { 5324bf215546Sopenharmony_ci bool payload_to_shared_for_atomics : 1; 5325bf215546Sopenharmony_ci} nir_lower_task_shader_options; 5326bf215546Sopenharmony_ci 5327bf215546Sopenharmony_cibool nir_lower_task_shader(nir_shader *shader, nir_lower_task_shader_options options); 5328bf215546Sopenharmony_ci 5329bf215546Sopenharmony_citypedef unsigned (*nir_lower_bit_size_callback)(const nir_instr *, void *); 5330bf215546Sopenharmony_ci 5331bf215546Sopenharmony_cibool nir_lower_bit_size(nir_shader *shader, 5332bf215546Sopenharmony_ci nir_lower_bit_size_callback callback, 5333bf215546Sopenharmony_ci void *callback_data); 5334bf215546Sopenharmony_cibool nir_lower_64bit_phis(nir_shader *shader); 5335bf215546Sopenharmony_ci 5336bf215546Sopenharmony_cibool nir_split_64bit_vec3_and_vec4(nir_shader *shader); 5337bf215546Sopenharmony_ci 5338bf215546Sopenharmony_cinir_lower_int64_options nir_lower_int64_op_to_options_mask(nir_op opcode); 5339bf215546Sopenharmony_cibool nir_lower_int64(nir_shader *shader); 5340bf215546Sopenharmony_ci 5341bf215546Sopenharmony_cinir_lower_doubles_options nir_lower_doubles_op_to_options_mask(nir_op opcode); 5342bf215546Sopenharmony_cibool nir_lower_doubles(nir_shader *shader, const nir_shader *softfp64, 5343bf215546Sopenharmony_ci nir_lower_doubles_options options); 5344bf215546Sopenharmony_cibool nir_lower_pack(nir_shader *shader); 5345bf215546Sopenharmony_ci 5346bf215546Sopenharmony_cibool nir_recompute_io_bases(nir_shader *nir, nir_variable_mode modes); 5347bf215546Sopenharmony_cibool nir_lower_mediump_io(nir_shader *nir, nir_variable_mode modes, 5348bf215546Sopenharmony_ci uint64_t varying_mask, bool use_16bit_slots); 5349bf215546Sopenharmony_cibool nir_force_mediump_io(nir_shader *nir, nir_variable_mode modes, 5350bf215546Sopenharmony_ci nir_alu_type types); 5351bf215546Sopenharmony_cibool nir_unpack_16bit_varying_slots(nir_shader *nir, nir_variable_mode modes); 5352bf215546Sopenharmony_ci 5353bf215546Sopenharmony_cistruct nir_fold_tex_srcs_options { 5354bf215546Sopenharmony_ci unsigned sampler_dims; 5355bf215546Sopenharmony_ci unsigned src_types; 5356bf215546Sopenharmony_ci}; 5357bf215546Sopenharmony_ci 5358bf215546Sopenharmony_cistruct nir_fold_16bit_tex_image_options { 5359bf215546Sopenharmony_ci nir_rounding_mode rounding_mode; 5360bf215546Sopenharmony_ci bool fold_tex_dest; 5361bf215546Sopenharmony_ci bool fold_image_load_store_data; 5362bf215546Sopenharmony_ci unsigned fold_srcs_options_count; 5363bf215546Sopenharmony_ci struct nir_fold_tex_srcs_options *fold_srcs_options; 5364bf215546Sopenharmony_ci}; 5365bf215546Sopenharmony_ci 5366bf215546Sopenharmony_cibool nir_fold_16bit_tex_image(nir_shader *nir, 5367bf215546Sopenharmony_ci struct nir_fold_16bit_tex_image_options *options); 5368bf215546Sopenharmony_ci 5369bf215546Sopenharmony_citypedef struct { 5370bf215546Sopenharmony_ci bool legalize_type; /* whether this src should be legalized */ 5371bf215546Sopenharmony_ci uint8_t bit_size; /* bit_size to enforce */ 5372bf215546Sopenharmony_ci nir_tex_src_type match_src; /* if bit_size is 0, match bit size of this */ 5373bf215546Sopenharmony_ci} nir_tex_src_type_constraint, nir_tex_src_type_constraints[nir_num_tex_src_types]; 5374bf215546Sopenharmony_ci 5375bf215546Sopenharmony_cibool nir_legalize_16bit_sampler_srcs(nir_shader *nir, 5376bf215546Sopenharmony_ci nir_tex_src_type_constraints constraints); 5377bf215546Sopenharmony_ci 5378bf215546Sopenharmony_cibool nir_lower_point_size(nir_shader *shader, float min, float max); 5379bf215546Sopenharmony_ci 5380bf215546Sopenharmony_civoid nir_lower_texcoord_replace(nir_shader *s, unsigned coord_replace, 5381bf215546Sopenharmony_ci bool point_coord_is_sysval, bool yinvert); 5382bf215546Sopenharmony_ci 5383bf215546Sopenharmony_citypedef enum { 5384bf215546Sopenharmony_ci nir_lower_interpolation_at_sample = (1 << 1), 5385bf215546Sopenharmony_ci nir_lower_interpolation_at_offset = (1 << 2), 5386bf215546Sopenharmony_ci nir_lower_interpolation_centroid = (1 << 3), 5387bf215546Sopenharmony_ci nir_lower_interpolation_pixel = (1 << 4), 5388bf215546Sopenharmony_ci nir_lower_interpolation_sample = (1 << 5), 5389bf215546Sopenharmony_ci} nir_lower_interpolation_options; 5390bf215546Sopenharmony_ci 5391bf215546Sopenharmony_cibool nir_lower_interpolation(nir_shader *shader, 5392bf215546Sopenharmony_ci nir_lower_interpolation_options options); 5393bf215546Sopenharmony_ci 5394bf215546Sopenharmony_cibool nir_lower_discard_if(nir_shader *shader); 5395bf215546Sopenharmony_ci 5396bf215546Sopenharmony_cibool nir_lower_discard_or_demote(nir_shader *shader, 5397bf215546Sopenharmony_ci bool force_correct_quad_ops_after_discard); 5398bf215546Sopenharmony_ci 5399bf215546Sopenharmony_cibool nir_lower_memory_model(nir_shader *shader); 5400bf215546Sopenharmony_ci 5401bf215546Sopenharmony_cibool nir_lower_goto_ifs(nir_shader *shader); 5402bf215546Sopenharmony_ci 5403bf215546Sopenharmony_cibool nir_shader_uses_view_index(nir_shader *shader); 5404bf215546Sopenharmony_cibool nir_can_lower_multiview(nir_shader *shader); 5405bf215546Sopenharmony_cibool nir_lower_multiview(nir_shader *shader, uint32_t view_mask); 5406bf215546Sopenharmony_ci 5407bf215546Sopenharmony_ci 5408bf215546Sopenharmony_cibool nir_lower_fp16_casts(nir_shader *shader); 5409bf215546Sopenharmony_cibool nir_normalize_cubemap_coords(nir_shader *shader); 5410bf215546Sopenharmony_ci 5411bf215546Sopenharmony_cibool nir_shader_supports_implicit_lod(nir_shader *shader); 5412bf215546Sopenharmony_ci 5413bf215546Sopenharmony_civoid nir_live_ssa_defs_impl(nir_function_impl *impl); 5414bf215546Sopenharmony_ci 5415bf215546Sopenharmony_ciconst BITSET_WORD *nir_get_live_ssa_defs(nir_cursor cursor, void *mem_ctx); 5416bf215546Sopenharmony_ci 5417bf215546Sopenharmony_civoid nir_loop_analyze_impl(nir_function_impl *impl, 5418bf215546Sopenharmony_ci nir_variable_mode indirect_mask, 5419bf215546Sopenharmony_ci bool force_unroll_sampler_indirect); 5420bf215546Sopenharmony_ci 5421bf215546Sopenharmony_cibool nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b); 5422bf215546Sopenharmony_ci 5423bf215546Sopenharmony_cibool nir_repair_ssa_impl(nir_function_impl *impl); 5424bf215546Sopenharmony_cibool nir_repair_ssa(nir_shader *shader); 5425bf215546Sopenharmony_ci 5426bf215546Sopenharmony_civoid nir_convert_loop_to_lcssa(nir_loop *loop); 5427bf215546Sopenharmony_cibool nir_convert_to_lcssa(nir_shader *shader, bool skip_invariants, bool skip_bool_invariants); 5428bf215546Sopenharmony_civoid nir_divergence_analysis(nir_shader *shader); 5429bf215546Sopenharmony_cibool nir_update_instr_divergence(nir_shader *shader, nir_instr *instr); 5430bf215546Sopenharmony_cibool nir_has_divergent_loop(nir_shader *shader); 5431bf215546Sopenharmony_ci 5432bf215546Sopenharmony_ci/* If phi_webs_only is true, only convert SSA values involved in phi nodes to 5433bf215546Sopenharmony_ci * registers. If false, convert all values (even those not involved in a phi 5434bf215546Sopenharmony_ci * node) to registers. 5435bf215546Sopenharmony_ci */ 5436bf215546Sopenharmony_cibool nir_convert_from_ssa(nir_shader *shader, bool phi_webs_only); 5437bf215546Sopenharmony_ci 5438bf215546Sopenharmony_cibool nir_lower_phis_to_regs_block(nir_block *block); 5439bf215546Sopenharmony_cibool nir_lower_ssa_defs_to_regs_block(nir_block *block); 5440bf215546Sopenharmony_cibool nir_rematerialize_derefs_in_use_blocks_impl(nir_function_impl *impl); 5441bf215546Sopenharmony_ci 5442bf215546Sopenharmony_cibool nir_lower_samplers(nir_shader *shader); 5443bf215546Sopenharmony_cibool nir_lower_ssbo(nir_shader *shader); 5444bf215546Sopenharmony_ci 5445bf215546Sopenharmony_citypedef struct nir_lower_printf_options { 5446bf215546Sopenharmony_ci bool treat_doubles_as_floats : 1; 5447bf215546Sopenharmony_ci unsigned max_buffer_size; 5448bf215546Sopenharmony_ci} nir_lower_printf_options; 5449bf215546Sopenharmony_ci 5450bf215546Sopenharmony_cibool nir_lower_printf(nir_shader *nir, const nir_lower_printf_options *options); 5451bf215546Sopenharmony_ci 5452bf215546Sopenharmony_ci/* This is here for unit tests. */ 5453bf215546Sopenharmony_cibool nir_opt_comparison_pre_impl(nir_function_impl *impl); 5454bf215546Sopenharmony_ci 5455bf215546Sopenharmony_cibool nir_opt_comparison_pre(nir_shader *shader); 5456bf215546Sopenharmony_ci 5457bf215546Sopenharmony_citypedef struct nir_opt_access_options { 5458bf215546Sopenharmony_ci bool is_vulkan; 5459bf215546Sopenharmony_ci bool infer_non_readable; 5460bf215546Sopenharmony_ci} nir_opt_access_options; 5461bf215546Sopenharmony_ci 5462bf215546Sopenharmony_cibool nir_opt_access(nir_shader *shader, const nir_opt_access_options *options); 5463bf215546Sopenharmony_cibool nir_opt_algebraic(nir_shader *shader); 5464bf215546Sopenharmony_cibool nir_opt_algebraic_before_ffma(nir_shader *shader); 5465bf215546Sopenharmony_cibool nir_opt_algebraic_late(nir_shader *shader); 5466bf215546Sopenharmony_cibool nir_opt_algebraic_distribute_src_mods(nir_shader *shader); 5467bf215546Sopenharmony_cibool nir_opt_constant_folding(nir_shader *shader); 5468bf215546Sopenharmony_ci 5469bf215546Sopenharmony_ci/* Try to combine a and b into a. Return true if combination was possible, 5470bf215546Sopenharmony_ci * which will result in b being removed by the pass. Return false if 5471bf215546Sopenharmony_ci * combination wasn't possible. 5472bf215546Sopenharmony_ci */ 5473bf215546Sopenharmony_citypedef bool (*nir_combine_memory_barrier_cb)( 5474bf215546Sopenharmony_ci nir_intrinsic_instr *a, nir_intrinsic_instr *b, void *data); 5475bf215546Sopenharmony_ci 5476bf215546Sopenharmony_cibool nir_opt_combine_memory_barriers(nir_shader *shader, 5477bf215546Sopenharmony_ci nir_combine_memory_barrier_cb combine_cb, 5478bf215546Sopenharmony_ci void *data); 5479bf215546Sopenharmony_ci 5480bf215546Sopenharmony_cibool nir_opt_combine_stores(nir_shader *shader, nir_variable_mode modes); 5481bf215546Sopenharmony_ci 5482bf215546Sopenharmony_cibool nir_copy_prop_impl(nir_function_impl *impl); 5483bf215546Sopenharmony_cibool nir_copy_prop(nir_shader *shader); 5484bf215546Sopenharmony_ci 5485bf215546Sopenharmony_cibool nir_opt_copy_prop_vars(nir_shader *shader); 5486bf215546Sopenharmony_ci 5487bf215546Sopenharmony_cibool nir_opt_cse(nir_shader *shader); 5488bf215546Sopenharmony_ci 5489bf215546Sopenharmony_cibool nir_opt_dce(nir_shader *shader); 5490bf215546Sopenharmony_ci 5491bf215546Sopenharmony_cibool nir_opt_dead_cf(nir_shader *shader); 5492bf215546Sopenharmony_ci 5493bf215546Sopenharmony_cibool nir_opt_dead_write_vars(nir_shader *shader); 5494bf215546Sopenharmony_ci 5495bf215546Sopenharmony_cibool nir_opt_deref_impl(nir_function_impl *impl); 5496bf215546Sopenharmony_cibool nir_opt_deref(nir_shader *shader); 5497bf215546Sopenharmony_ci 5498bf215546Sopenharmony_cibool nir_opt_find_array_copies(nir_shader *shader); 5499bf215546Sopenharmony_ci 5500bf215546Sopenharmony_cibool nir_opt_fragdepth(nir_shader *shader); 5501bf215546Sopenharmony_ci 5502bf215546Sopenharmony_cibool nir_opt_gcm(nir_shader *shader, bool value_number); 5503bf215546Sopenharmony_ci 5504bf215546Sopenharmony_cibool nir_opt_idiv_const(nir_shader *shader, unsigned min_bit_size); 5505bf215546Sopenharmony_ci 5506bf215546Sopenharmony_citypedef enum { 5507bf215546Sopenharmony_ci nir_opt_if_aggressive_last_continue = (1 << 0), 5508bf215546Sopenharmony_ci nir_opt_if_optimize_phi_true_false = (1 << 1), 5509bf215546Sopenharmony_ci} nir_opt_if_options; 5510bf215546Sopenharmony_ci 5511bf215546Sopenharmony_cibool nir_opt_if(nir_shader *shader, nir_opt_if_options options); 5512bf215546Sopenharmony_ci 5513bf215546Sopenharmony_cibool nir_opt_intrinsics(nir_shader *shader); 5514bf215546Sopenharmony_ci 5515bf215546Sopenharmony_cibool nir_opt_large_constants(nir_shader *shader, 5516bf215546Sopenharmony_ci glsl_type_size_align_func size_align, 5517bf215546Sopenharmony_ci unsigned threshold); 5518bf215546Sopenharmony_ci 5519bf215546Sopenharmony_cibool nir_opt_loop_unroll(nir_shader *shader); 5520bf215546Sopenharmony_ci 5521bf215546Sopenharmony_citypedef enum { 5522bf215546Sopenharmony_ci nir_move_const_undef = (1 << 0), 5523bf215546Sopenharmony_ci nir_move_load_ubo = (1 << 1), 5524bf215546Sopenharmony_ci nir_move_load_input = (1 << 2), 5525bf215546Sopenharmony_ci nir_move_comparisons = (1 << 3), 5526bf215546Sopenharmony_ci nir_move_copies = (1 << 4), 5527bf215546Sopenharmony_ci nir_move_load_ssbo = (1 << 5), 5528bf215546Sopenharmony_ci nir_move_load_uniform = (1 << 6), 5529bf215546Sopenharmony_ci} nir_move_options; 5530bf215546Sopenharmony_ci 5531bf215546Sopenharmony_cibool nir_can_move_instr(nir_instr *instr, nir_move_options options); 5532bf215546Sopenharmony_ci 5533bf215546Sopenharmony_cibool nir_opt_sink(nir_shader *shader, nir_move_options options); 5534bf215546Sopenharmony_ci 5535bf215546Sopenharmony_cibool nir_opt_move(nir_shader *shader, nir_move_options options); 5536bf215546Sopenharmony_ci 5537bf215546Sopenharmony_citypedef struct { 5538bf215546Sopenharmony_ci /** nir_load_uniform max base offset */ 5539bf215546Sopenharmony_ci uint32_t uniform_max; 5540bf215546Sopenharmony_ci 5541bf215546Sopenharmony_ci /** nir_load_ubo_vec4 max base offset */ 5542bf215546Sopenharmony_ci uint32_t ubo_vec4_max; 5543bf215546Sopenharmony_ci 5544bf215546Sopenharmony_ci /** nir_var_mem_shared max base offset */ 5545bf215546Sopenharmony_ci uint32_t shared_max; 5546bf215546Sopenharmony_ci 5547bf215546Sopenharmony_ci /** nir_load/store_buffer_amd max base offset */ 5548bf215546Sopenharmony_ci uint32_t buffer_max; 5549bf215546Sopenharmony_ci} nir_opt_offsets_options; 5550bf215546Sopenharmony_ci 5551bf215546Sopenharmony_cibool nir_opt_offsets(nir_shader *shader, const nir_opt_offsets_options *options); 5552bf215546Sopenharmony_ci 5553bf215546Sopenharmony_cibool nir_opt_peephole_select(nir_shader *shader, unsigned limit, 5554bf215546Sopenharmony_ci bool indirect_load_ok, bool expensive_alu_ok); 5555bf215546Sopenharmony_ci 5556bf215546Sopenharmony_cibool nir_opt_rematerialize_compares(nir_shader *shader); 5557bf215546Sopenharmony_ci 5558bf215546Sopenharmony_cibool nir_opt_remove_phis(nir_shader *shader); 5559bf215546Sopenharmony_cibool nir_opt_remove_phis_block(nir_block *block); 5560bf215546Sopenharmony_ci 5561bf215546Sopenharmony_cibool nir_opt_phi_precision(nir_shader *shader); 5562bf215546Sopenharmony_ci 5563bf215546Sopenharmony_cibool nir_opt_shrink_stores(nir_shader *shader, bool shrink_image_store); 5564bf215546Sopenharmony_ci 5565bf215546Sopenharmony_cibool nir_opt_shrink_vectors(nir_shader *shader); 5566bf215546Sopenharmony_ci 5567bf215546Sopenharmony_cibool nir_opt_trivial_continues(nir_shader *shader); 5568bf215546Sopenharmony_ci 5569bf215546Sopenharmony_cibool nir_opt_undef(nir_shader *shader); 5570bf215546Sopenharmony_ci 5571bf215546Sopenharmony_cibool nir_lower_undef_to_zero(nir_shader *shader); 5572bf215546Sopenharmony_ci 5573bf215546Sopenharmony_cibool nir_opt_uniform_atomics(nir_shader *shader); 5574bf215546Sopenharmony_ci 5575bf215546Sopenharmony_cibool nir_opt_vectorize(nir_shader *shader, nir_vectorize_cb filter, 5576bf215546Sopenharmony_ci void *data); 5577bf215546Sopenharmony_ci 5578bf215546Sopenharmony_cibool nir_opt_conditional_discard(nir_shader *shader); 5579bf215546Sopenharmony_cibool nir_opt_move_discards_to_top(nir_shader *shader); 5580bf215546Sopenharmony_ci 5581bf215546Sopenharmony_cibool nir_opt_ray_queries(nir_shader *shader); 5582bf215546Sopenharmony_ci 5583bf215546Sopenharmony_citypedef bool (*nir_should_vectorize_mem_func)(unsigned align_mul, 5584bf215546Sopenharmony_ci unsigned align_offset, 5585bf215546Sopenharmony_ci unsigned bit_size, 5586bf215546Sopenharmony_ci unsigned num_components, 5587bf215546Sopenharmony_ci nir_intrinsic_instr *low, nir_intrinsic_instr *high, 5588bf215546Sopenharmony_ci void *data); 5589bf215546Sopenharmony_ci 5590bf215546Sopenharmony_citypedef struct { 5591bf215546Sopenharmony_ci nir_should_vectorize_mem_func callback; 5592bf215546Sopenharmony_ci nir_variable_mode modes; 5593bf215546Sopenharmony_ci nir_variable_mode robust_modes; 5594bf215546Sopenharmony_ci void *cb_data; 5595bf215546Sopenharmony_ci bool has_shared2_amd; 5596bf215546Sopenharmony_ci} nir_load_store_vectorize_options; 5597bf215546Sopenharmony_ci 5598bf215546Sopenharmony_cibool nir_opt_load_store_vectorize(nir_shader *shader, const nir_load_store_vectorize_options *options); 5599bf215546Sopenharmony_ci 5600bf215546Sopenharmony_civoid nir_sweep(nir_shader *shader); 5601bf215546Sopenharmony_ci 5602bf215546Sopenharmony_civoid nir_remap_dual_slot_attributes(nir_shader *shader, 5603bf215546Sopenharmony_ci uint64_t *dual_slot_inputs); 5604bf215546Sopenharmony_ciuint64_t nir_get_single_slot_attribs_mask(uint64_t attribs, uint64_t dual_slot); 5605bf215546Sopenharmony_ci 5606bf215546Sopenharmony_cinir_intrinsic_op nir_intrinsic_from_system_value(gl_system_value val); 5607bf215546Sopenharmony_cigl_system_value nir_system_value_from_intrinsic(nir_intrinsic_op intrin); 5608bf215546Sopenharmony_ci 5609bf215546Sopenharmony_cistatic inline bool 5610bf215546Sopenharmony_cinir_variable_is_in_ubo(const nir_variable *var) 5611bf215546Sopenharmony_ci{ 5612bf215546Sopenharmony_ci return (var->data.mode == nir_var_mem_ubo && 5613bf215546Sopenharmony_ci var->interface_type != NULL); 5614bf215546Sopenharmony_ci} 5615bf215546Sopenharmony_ci 5616bf215546Sopenharmony_cistatic inline bool 5617bf215546Sopenharmony_cinir_variable_is_in_ssbo(const nir_variable *var) 5618bf215546Sopenharmony_ci{ 5619bf215546Sopenharmony_ci return (var->data.mode == nir_var_mem_ssbo && 5620bf215546Sopenharmony_ci var->interface_type != NULL); 5621bf215546Sopenharmony_ci} 5622bf215546Sopenharmony_ci 5623bf215546Sopenharmony_cistatic inline bool 5624bf215546Sopenharmony_cinir_variable_is_in_block(const nir_variable *var) 5625bf215546Sopenharmony_ci{ 5626bf215546Sopenharmony_ci return nir_variable_is_in_ubo(var) || nir_variable_is_in_ssbo(var); 5627bf215546Sopenharmony_ci} 5628bf215546Sopenharmony_ci 5629bf215546Sopenharmony_citypedef struct nir_unsigned_upper_bound_config { 5630bf215546Sopenharmony_ci unsigned min_subgroup_size; 5631bf215546Sopenharmony_ci unsigned max_subgroup_size; 5632bf215546Sopenharmony_ci unsigned max_workgroup_invocations; 5633bf215546Sopenharmony_ci unsigned max_workgroup_count[3]; 5634bf215546Sopenharmony_ci unsigned max_workgroup_size[3]; 5635bf215546Sopenharmony_ci 5636bf215546Sopenharmony_ci uint32_t vertex_attrib_max[32]; 5637bf215546Sopenharmony_ci} nir_unsigned_upper_bound_config; 5638bf215546Sopenharmony_ci 5639bf215546Sopenharmony_ciuint32_t 5640bf215546Sopenharmony_cinir_unsigned_upper_bound(nir_shader *shader, struct hash_table *range_ht, 5641bf215546Sopenharmony_ci nir_ssa_scalar scalar, 5642bf215546Sopenharmony_ci const nir_unsigned_upper_bound_config *config); 5643bf215546Sopenharmony_ci 5644bf215546Sopenharmony_cibool 5645bf215546Sopenharmony_cinir_addition_might_overflow(nir_shader *shader, struct hash_table *range_ht, 5646bf215546Sopenharmony_ci nir_ssa_scalar ssa, unsigned const_val, 5647bf215546Sopenharmony_ci const nir_unsigned_upper_bound_config *config); 5648bf215546Sopenharmony_ci 5649bf215546Sopenharmony_citypedef enum { 5650bf215546Sopenharmony_ci nir_ray_query_value_intersection_type, 5651bf215546Sopenharmony_ci nir_ray_query_value_intersection_t, 5652bf215546Sopenharmony_ci nir_ray_query_value_intersection_instance_custom_index, 5653bf215546Sopenharmony_ci nir_ray_query_value_intersection_instance_id, 5654bf215546Sopenharmony_ci nir_ray_query_value_intersection_instance_sbt_index, 5655bf215546Sopenharmony_ci nir_ray_query_value_intersection_geometry_index, 5656bf215546Sopenharmony_ci nir_ray_query_value_intersection_primitive_index, 5657bf215546Sopenharmony_ci nir_ray_query_value_intersection_barycentrics, 5658bf215546Sopenharmony_ci nir_ray_query_value_intersection_front_face, 5659bf215546Sopenharmony_ci nir_ray_query_value_intersection_object_ray_direction, 5660bf215546Sopenharmony_ci nir_ray_query_value_intersection_object_ray_origin, 5661bf215546Sopenharmony_ci nir_ray_query_value_intersection_object_to_world, 5662bf215546Sopenharmony_ci nir_ray_query_value_intersection_world_to_object, 5663bf215546Sopenharmony_ci nir_ray_query_value_intersection_candidate_aabb_opaque, 5664bf215546Sopenharmony_ci nir_ray_query_value_tmin, 5665bf215546Sopenharmony_ci nir_ray_query_value_flags, 5666bf215546Sopenharmony_ci nir_ray_query_value_world_ray_direction, 5667bf215546Sopenharmony_ci nir_ray_query_value_world_ray_origin, 5668bf215546Sopenharmony_ci} nir_ray_query_value; 5669bf215546Sopenharmony_ci 5670bf215546Sopenharmony_citypedef struct { 5671bf215546Sopenharmony_ci /* True if gl_DrawID is considered uniform, i.e. if the preamble is run 5672bf215546Sopenharmony_ci * at least once per "internal" draw rather than per user-visible draw. 5673bf215546Sopenharmony_ci */ 5674bf215546Sopenharmony_ci bool drawid_uniform; 5675bf215546Sopenharmony_ci 5676bf215546Sopenharmony_ci /* True if the subgroup size is uniform. */ 5677bf215546Sopenharmony_ci bool subgroup_size_uniform; 5678bf215546Sopenharmony_ci 5679bf215546Sopenharmony_ci /* size/align for load/store_preamble. */ 5680bf215546Sopenharmony_ci void (*def_size)(nir_ssa_def *def, unsigned *size, unsigned *align); 5681bf215546Sopenharmony_ci 5682bf215546Sopenharmony_ci /* Total available size for load/store_preamble storage, in units 5683bf215546Sopenharmony_ci * determined by def_size. 5684bf215546Sopenharmony_ci */ 5685bf215546Sopenharmony_ci unsigned preamble_storage_size; 5686bf215546Sopenharmony_ci 5687bf215546Sopenharmony_ci /* Give the cost for an instruction. nir_opt_preamble will prioritize 5688bf215546Sopenharmony_ci * instructions with higher costs. Instructions with cost 0 may still be 5689bf215546Sopenharmony_ci * lifted, but only when required to lift other instructions with non-0 5690bf215546Sopenharmony_ci * cost (e.g. a load_const source of an expression). 5691bf215546Sopenharmony_ci */ 5692bf215546Sopenharmony_ci float (*instr_cost_cb)(nir_instr *instr, const void *data); 5693bf215546Sopenharmony_ci 5694bf215546Sopenharmony_ci /* Give the cost of rewriting the instruction to use load_preamble. This 5695bf215546Sopenharmony_ci * may happen from inserting move instructions, etc. If the benefit doesn't 5696bf215546Sopenharmony_ci * exceed the cost here then we won't rewrite it. 5697bf215546Sopenharmony_ci */ 5698bf215546Sopenharmony_ci float (*rewrite_cost_cb)(nir_ssa_def *def, const void *data); 5699bf215546Sopenharmony_ci 5700bf215546Sopenharmony_ci /* Instructions whose definitions should not be rewritten. These could 5701bf215546Sopenharmony_ci * still be moved to the preamble, but they shouldn't be the root of a 5702bf215546Sopenharmony_ci * replacement expression. Instructions with cost 0 and derefs are 5703bf215546Sopenharmony_ci * automatically included by the pass. 5704bf215546Sopenharmony_ci */ 5705bf215546Sopenharmony_ci nir_instr_filter_cb avoid_instr_cb; 5706bf215546Sopenharmony_ci 5707bf215546Sopenharmony_ci const void *cb_data; 5708bf215546Sopenharmony_ci} nir_opt_preamble_options; 5709bf215546Sopenharmony_ci 5710bf215546Sopenharmony_cibool 5711bf215546Sopenharmony_cinir_opt_preamble(nir_shader *shader, 5712bf215546Sopenharmony_ci const nir_opt_preamble_options *options, 5713bf215546Sopenharmony_ci unsigned *size); 5714bf215546Sopenharmony_ci 5715bf215546Sopenharmony_cinir_function_impl *nir_shader_get_preamble(nir_shader *shader); 5716bf215546Sopenharmony_ci 5717bf215546Sopenharmony_cibool nir_lower_point_smooth(nir_shader *shader); 5718bf215546Sopenharmony_cibool nir_lower_poly_line_smooth(nir_shader *shader, unsigned num_smooth_aa_sample); 5719bf215546Sopenharmony_ci 5720bf215546Sopenharmony_ci#include "nir_inline_helpers.h" 5721bf215546Sopenharmony_ci 5722bf215546Sopenharmony_ci#ifdef __cplusplus 5723bf215546Sopenharmony_ci} /* extern "C" */ 5724bf215546Sopenharmony_ci#endif 5725bf215546Sopenharmony_ci 5726bf215546Sopenharmony_ci#endif /* NIR_H */ 5727