1/* 2 * Copyright © 2014 Connor Abbott 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Connor Abbott (cwabbott0@gmail.com) 25 * 26 */ 27 28#ifndef NIR_H 29#define NIR_H 30 31#include "util/hash_table.h" 32#include "compiler/glsl/list.h" 33#include "util/list.h" 34#include "util/log.h" 35#include "util/ralloc.h" 36#include "util/set.h" 37#include "util/bitscan.h" 38#include "util/bitset.h" 39#include "util/compiler.h" 40#include "util/enum_operators.h" 41#include "util/macros.h" 42#include "util/format/u_format.h" 43#include "compiler/nir_types.h" 44#include "compiler/shader_enums.h" 45#include "compiler/shader_info.h" 46#define XXH_INLINE_ALL 47#include "util/xxhash.h" 48#include <stdio.h> 49 50#ifndef NDEBUG 51#include "util/debug.h" 52#endif /* NDEBUG */ 53 54#include "nir_opcodes.h" 55 56#if defined(_WIN32) && !defined(snprintf) 57#define snprintf _snprintf 58#endif 59 60#ifdef __cplusplus 61extern "C" { 62#endif 63 64extern uint32_t nir_debug; 65extern bool nir_debug_print_shader[MESA_SHADER_KERNEL + 1]; 66 67#ifndef NDEBUG 68#define NIR_DEBUG(flag) unlikely(nir_debug & (NIR_DEBUG_ ## flag)) 69#else 70#define NIR_DEBUG(flag) false 71#endif 72 73#define NIR_DEBUG_CLONE (1u << 0) 74#define NIR_DEBUG_SERIALIZE (1u << 1) 75#define NIR_DEBUG_NOVALIDATE (1u << 2) 76#define NIR_DEBUG_VALIDATE_SSA_DOMINANCE (1u << 3) 77#define NIR_DEBUG_TGSI (1u << 4) 78#define NIR_DEBUG_PRINT_VS (1u << 5) 79#define NIR_DEBUG_PRINT_TCS (1u << 6) 80#define NIR_DEBUG_PRINT_TES (1u << 7) 81#define NIR_DEBUG_PRINT_GS (1u << 8) 82#define NIR_DEBUG_PRINT_FS (1u << 9) 83#define NIR_DEBUG_PRINT_CS (1u << 10) 84#define NIR_DEBUG_PRINT_TS (1u << 11) 85#define NIR_DEBUG_PRINT_MS (1u << 12) 86#define NIR_DEBUG_PRINT_RGS (1u << 13) 87#define NIR_DEBUG_PRINT_AHS (1u << 14) 88#define NIR_DEBUG_PRINT_CHS (1u << 15) 89#define NIR_DEBUG_PRINT_MHS (1u << 16) 90#define NIR_DEBUG_PRINT_IS (1u << 17) 91#define NIR_DEBUG_PRINT_CBS (1u << 18) 92#define NIR_DEBUG_PRINT_KS (1u << 19) 93#define NIR_DEBUG_PRINT_CONSTS (1u << 20) 94#define NIR_DEBUG_VALIDATE_GC_LIST (1u << 21) 95 96#define NIR_DEBUG_PRINT (NIR_DEBUG_PRINT_VS | \ 97 NIR_DEBUG_PRINT_TCS | \ 98 NIR_DEBUG_PRINT_TES | \ 99 NIR_DEBUG_PRINT_GS | \ 100 NIR_DEBUG_PRINT_FS | \ 101 NIR_DEBUG_PRINT_CS | \ 102 NIR_DEBUG_PRINT_TS | \ 103 NIR_DEBUG_PRINT_MS | \ 104 NIR_DEBUG_PRINT_RGS | \ 105 NIR_DEBUG_PRINT_AHS | \ 106 NIR_DEBUG_PRINT_CHS | \ 107 NIR_DEBUG_PRINT_MHS | \ 108 NIR_DEBUG_PRINT_IS | \ 109 NIR_DEBUG_PRINT_CBS | \ 110 NIR_DEBUG_PRINT_KS) 111 112#define NIR_FALSE 0u 113#define NIR_TRUE (~0u) 114#define NIR_MAX_VEC_COMPONENTS 16 115#define NIR_MAX_MATRIX_COLUMNS 4 116#define NIR_STREAM_PACKED (1 << 8) 117typedef uint16_t nir_component_mask_t; 118 119static inline bool 120nir_num_components_valid(unsigned num_components) 121{ 122 return (num_components >= 1 && 123 num_components <= 5) || 124 num_components == 8 || 125 num_components == 16; 126} 127 128static inline nir_component_mask_t 129nir_component_mask(unsigned num_components) 130{ 131 assert(nir_num_components_valid(num_components)); 132 return (1u << num_components) - 1; 133} 134 135void 136nir_process_debug_variable(void); 137 138bool nir_component_mask_can_reinterpret(nir_component_mask_t mask, 139 unsigned old_bit_size, 140 unsigned new_bit_size); 141nir_component_mask_t 142nir_component_mask_reinterpret(nir_component_mask_t mask, 143 unsigned old_bit_size, 144 unsigned new_bit_size); 145 146/** Defines a cast function 147 * 148 * This macro defines a cast function from in_type to out_type where 149 * out_type is some structure type that contains a field of type out_type. 150 * 151 * Note that you have to be a bit careful as the generated cast function 152 * destroys constness. 153 */ 154#define NIR_DEFINE_CAST(name, in_type, out_type, field, \ 155 type_field, type_value) \ 156static inline out_type * \ 157name(const in_type *parent) \ 158{ \ 159 assert(parent && parent->type_field == type_value); \ 160 return exec_node_data(out_type, parent, field); \ 161} 162 163struct nir_function; 164struct nir_shader; 165struct nir_instr; 166struct nir_builder; 167struct nir_xfb_info; 168 169 170/** 171 * Description of built-in state associated with a uniform 172 * 173 * \sa nir_variable::state_slots 174 */ 175typedef struct { 176 gl_state_index16 tokens[STATE_LENGTH]; 177 uint16_t swizzle; 178} nir_state_slot; 179 180typedef enum { 181 nir_var_system_value = (1 << 0), 182 nir_var_uniform = (1 << 1), 183 nir_var_shader_in = (1 << 2), 184 nir_var_shader_out = (1 << 3), 185 nir_var_image = (1 << 4), 186 /** Incoming call or ray payload data for ray-tracing shaders */ 187 nir_var_shader_call_data = (1 << 5), 188 /** Ray hit attributes */ 189 nir_var_ray_hit_attrib = (1 << 6), 190 191 /* Modes named nir_var_mem_* have explicit data layout */ 192 nir_var_mem_ubo = (1 << 7), 193 nir_var_mem_push_const = (1 << 8), 194 nir_var_mem_ssbo = (1 << 9), 195 nir_var_mem_constant = (1 << 10), 196 nir_var_mem_task_payload = (1 << 11), 197 198 /* Generic modes intentionally come last. See encode_dref_modes() in 199 * nir_serialize.c for more details. 200 */ 201 nir_var_shader_temp = (1 << 12), 202 nir_var_function_temp = (1 << 13), 203 nir_var_mem_shared = (1 << 14), 204 nir_var_mem_global = (1 << 15), 205 206 nir_var_mem_generic = (nir_var_shader_temp | 207 nir_var_function_temp | 208 nir_var_mem_shared | 209 nir_var_mem_global), 210 211 nir_var_read_only_modes = nir_var_shader_in | nir_var_uniform | 212 nir_var_system_value | nir_var_mem_constant | 213 nir_var_mem_ubo, 214 /** Modes where vector derefs can be indexed as arrays */ 215 nir_var_vec_indexable_modes = nir_var_mem_ubo | nir_var_mem_ssbo | 216 nir_var_mem_shared | nir_var_mem_global | 217 nir_var_mem_push_const, 218 nir_num_variable_modes = 16, 219 nir_var_all = (1 << nir_num_variable_modes) - 1, 220} nir_variable_mode; 221MESA_DEFINE_CPP_ENUM_BITFIELD_OPERATORS(nir_variable_mode) 222 223/** 224 * Rounding modes. 225 */ 226typedef enum { 227 nir_rounding_mode_undef = 0, 228 nir_rounding_mode_rtne = 1, /* round to nearest even */ 229 nir_rounding_mode_ru = 2, /* round up */ 230 nir_rounding_mode_rd = 3, /* round down */ 231 nir_rounding_mode_rtz = 4, /* round towards zero */ 232} nir_rounding_mode; 233 234typedef union { 235 bool b; 236 float f32; 237 double f64; 238 int8_t i8; 239 uint8_t u8; 240 int16_t i16; 241 uint16_t u16; 242 int32_t i32; 243 uint32_t u32; 244 int64_t i64; 245 uint64_t u64; 246} nir_const_value; 247 248#define nir_const_value_to_array(arr, c, components, m) \ 249{ \ 250 for (unsigned i = 0; i < components; ++i) \ 251 arr[i] = c[i].m; \ 252} while (false) 253 254static inline nir_const_value 255nir_const_value_for_raw_uint(uint64_t x, unsigned bit_size) 256{ 257 nir_const_value v; 258 memset(&v, 0, sizeof(v)); 259 260 switch (bit_size) { 261 case 1: v.b = x; break; 262 case 8: v.u8 = x; break; 263 case 16: v.u16 = x; break; 264 case 32: v.u32 = x; break; 265 case 64: v.u64 = x; break; 266 default: 267 unreachable("Invalid bit size"); 268 } 269 270 return v; 271} 272 273static inline nir_const_value 274nir_const_value_for_int(int64_t i, unsigned bit_size) 275{ 276 nir_const_value v; 277 memset(&v, 0, sizeof(v)); 278 279 assert(bit_size <= 64); 280 if (bit_size < 64) { 281 assert(i >= (-(1ll << (bit_size - 1)))); 282 assert(i < (1ll << (bit_size - 1))); 283 } 284 285 return nir_const_value_for_raw_uint(i, bit_size); 286} 287 288static inline nir_const_value 289nir_const_value_for_uint(uint64_t u, unsigned bit_size) 290{ 291 nir_const_value v; 292 memset(&v, 0, sizeof(v)); 293 294 assert(bit_size <= 64); 295 if (bit_size < 64) 296 assert(u < (1ull << bit_size)); 297 298 return nir_const_value_for_raw_uint(u, bit_size); 299} 300 301static inline nir_const_value 302nir_const_value_for_bool(bool b, unsigned bit_size) 303{ 304 /* Booleans use a 0/-1 convention */ 305 return nir_const_value_for_int(-(int)b, bit_size); 306} 307 308/* This one isn't inline because it requires half-float conversion */ 309nir_const_value nir_const_value_for_float(double b, unsigned bit_size); 310 311static inline int64_t 312nir_const_value_as_int(nir_const_value value, unsigned bit_size) 313{ 314 switch (bit_size) { 315 /* int1_t uses 0/-1 convention */ 316 case 1: return -(int)value.b; 317 case 8: return value.i8; 318 case 16: return value.i16; 319 case 32: return value.i32; 320 case 64: return value.i64; 321 default: 322 unreachable("Invalid bit size"); 323 } 324} 325 326static inline uint64_t 327nir_const_value_as_uint(nir_const_value value, unsigned bit_size) 328{ 329 switch (bit_size) { 330 case 1: return value.b; 331 case 8: return value.u8; 332 case 16: return value.u16; 333 case 32: return value.u32; 334 case 64: return value.u64; 335 default: 336 unreachable("Invalid bit size"); 337 } 338} 339 340static inline bool 341nir_const_value_as_bool(nir_const_value value, unsigned bit_size) 342{ 343 int64_t i = nir_const_value_as_int(value, bit_size); 344 345 /* Booleans of any size use 0/-1 convention */ 346 assert(i == 0 || i == -1); 347 348 return i; 349} 350 351/* This one isn't inline because it requires half-float conversion */ 352double nir_const_value_as_float(nir_const_value value, unsigned bit_size); 353 354typedef struct nir_constant { 355 /** 356 * Value of the constant. 357 * 358 * The field used to back the values supplied by the constant is determined 359 * by the type associated with the \c nir_variable. Constants may be 360 * scalars, vectors, or matrices. 361 */ 362 nir_const_value values[NIR_MAX_VEC_COMPONENTS]; 363 364 /* we could get this from the var->type but makes clone *much* easier to 365 * not have to care about the type. 366 */ 367 unsigned num_elements; 368 369 /* Array elements / Structure Fields */ 370 struct nir_constant **elements; 371} nir_constant; 372 373/** 374 * \brief Layout qualifiers for gl_FragDepth. 375 * 376 * The AMD/ARB_conservative_depth extensions allow gl_FragDepth to be redeclared 377 * with a layout qualifier. 378 */ 379typedef enum { 380 nir_depth_layout_none, /**< No depth layout is specified. */ 381 nir_depth_layout_any, 382 nir_depth_layout_greater, 383 nir_depth_layout_less, 384 nir_depth_layout_unchanged 385} nir_depth_layout; 386 387/** 388 * Enum keeping track of how a variable was declared. 389 */ 390typedef enum { 391 /** 392 * Normal declaration. 393 */ 394 nir_var_declared_normally = 0, 395 396 /** 397 * Variable is implicitly generated by the compiler and should not be 398 * visible via the API. 399 */ 400 nir_var_hidden, 401} nir_var_declaration_type; 402 403/** 404 * Either a uniform, global variable, shader input, or shader output. Based on 405 * ir_variable - it should be easy to translate between the two. 406 */ 407 408typedef struct nir_variable { 409 struct exec_node node; 410 411 /** 412 * Declared type of the variable 413 */ 414 const struct glsl_type *type; 415 416 /** 417 * Declared name of the variable 418 */ 419 char *name; 420 421 struct nir_variable_data { 422 /** 423 * Storage class of the variable. 424 * 425 * \sa nir_variable_mode 426 */ 427 unsigned mode:16; 428 429 /** 430 * Is the variable read-only? 431 * 432 * This is set for variables declared as \c const, shader inputs, 433 * and uniforms. 434 */ 435 unsigned read_only:1; 436 unsigned centroid:1; 437 unsigned sample:1; 438 unsigned patch:1; 439 unsigned invariant:1; 440 441 /** 442 * Is the variable a ray query? 443 */ 444 unsigned ray_query:1; 445 446 /** 447 * Precision qualifier. 448 * 449 * In desktop GLSL we do not care about precision qualifiers at all, in 450 * fact, the spec says that precision qualifiers are ignored. 451 * 452 * To make things easy, we make it so that this field is always 453 * GLSL_PRECISION_NONE on desktop shaders. This way all the variables 454 * have the same precision value and the checks we add in the compiler 455 * for this field will never break a desktop shader compile. 456 */ 457 unsigned precision:2; 458 459 /** 460 * Has this variable been statically assigned? 461 * 462 * This answers whether the variable was assigned in any path of 463 * the shader during ast_to_hir. This doesn't answer whether it is 464 * still written after dead code removal, nor is it maintained in 465 * non-ast_to_hir.cpp (GLSL parsing) paths. 466 */ 467 unsigned assigned:1; 468 469 /** 470 * Can this variable be coalesced with another? 471 * 472 * This is set by nir_lower_io_to_temporaries to say that any 473 * copies involving this variable should stay put. Propagating it can 474 * duplicate the resulting load/store, which is not wanted, and may 475 * result in a load/store of the variable with an indirect offset which 476 * the backend may not be able to handle. 477 */ 478 unsigned cannot_coalesce:1; 479 480 /** 481 * When separate shader programs are enabled, only input/outputs between 482 * the stages of a multi-stage separate program can be safely removed 483 * from the shader interface. Other input/outputs must remains active. 484 * 485 * This is also used to make sure xfb varyings that are unused by the 486 * fragment shader are not removed. 487 */ 488 unsigned always_active_io:1; 489 490 /** 491 * Interpolation mode for shader inputs / outputs 492 * 493 * \sa glsl_interp_mode 494 */ 495 unsigned interpolation:3; 496 497 /** 498 * If non-zero, then this variable may be packed along with other variables 499 * into a single varying slot, so this offset should be applied when 500 * accessing components. For example, an offset of 1 means that the x 501 * component of this variable is actually stored in component y of the 502 * location specified by \c location. 503 */ 504 unsigned location_frac:2; 505 506 /** 507 * If true, this variable represents an array of scalars that should 508 * be tightly packed. In other words, consecutive array elements 509 * should be stored one component apart, rather than one slot apart. 510 */ 511 unsigned compact:1; 512 513 /** 514 * Whether this is a fragment shader output implicitly initialized with 515 * the previous contents of the specified render target at the 516 * framebuffer location corresponding to this shader invocation. 517 */ 518 unsigned fb_fetch_output:1; 519 520 /** 521 * Non-zero if this variable is considered bindless as defined by 522 * ARB_bindless_texture. 523 */ 524 unsigned bindless:1; 525 526 /** 527 * Was an explicit binding set in the shader? 528 */ 529 unsigned explicit_binding:1; 530 531 /** 532 * Was the location explicitly set in the shader? 533 * 534 * If the location is explicitly set in the shader, it \b cannot be changed 535 * by the linker or by the API (e.g., calls to \c glBindAttribLocation have 536 * no effect). 537 */ 538 unsigned explicit_location:1; 539 540 /** 541 * Is this varying used by transform feedback? 542 * 543 * This is used by the linker to decide if it's safe to pack the varying. 544 */ 545 unsigned is_xfb:1; 546 547 /** 548 * Is this varying used only by transform feedback? 549 * 550 * This is used by the linker to decide if its safe to pack the varying. 551 */ 552 unsigned is_xfb_only:1; 553 554 /** 555 * Was a transfer feedback buffer set in the shader? 556 */ 557 unsigned explicit_xfb_buffer:1; 558 559 /** 560 * Was a transfer feedback stride set in the shader? 561 */ 562 unsigned explicit_xfb_stride:1; 563 564 /** 565 * Was an explicit offset set in the shader? 566 */ 567 unsigned explicit_offset:1; 568 569 /** 570 * Layout of the matrix. Uses glsl_matrix_layout values. 571 */ 572 unsigned matrix_layout:2; 573 574 /** 575 * Non-zero if this variable was created by lowering a named interface 576 * block. 577 */ 578 unsigned from_named_ifc_block:1; 579 580 /** 581 * Non-zero if the variable must be a shader input. This is useful for 582 * constraints on function parameters. 583 */ 584 unsigned must_be_shader_input:1; 585 586 /** 587 * How the variable was declared. See nir_var_declaration_type. 588 * 589 * This is used to detect variables generated by the compiler, so should 590 * not be visible via the API. 591 */ 592 unsigned how_declared:2; 593 594 /** 595 * Is this variable per-view? If so, we know it must be an array with 596 * size corresponding to the number of views. 597 */ 598 unsigned per_view:1; 599 600 /** 601 * Whether the variable is per-primitive. 602 * Can be use by Mesh Shader outputs and corresponding Fragment Shader inputs. 603 */ 604 unsigned per_primitive:1; 605 606 /** 607 * \brief Layout qualifier for gl_FragDepth. See nir_depth_layout. 608 * 609 * This is not equal to \c ir_depth_layout_none if and only if this 610 * variable is \c gl_FragDepth and a layout qualifier is specified. 611 */ 612 unsigned depth_layout:3; 613 614 /** 615 * Vertex stream output identifier. 616 * 617 * For packed outputs, NIR_STREAM_PACKED is set and bits [2*i+1,2*i] 618 * indicate the stream of the i-th component. 619 */ 620 unsigned stream:9; 621 622 /** 623 * See gl_access_qualifier. 624 * 625 * Access flags for memory variables (SSBO/global), image uniforms, and 626 * bindless images in uniforms/inputs/outputs. 627 */ 628 unsigned access:9; 629 630 /** 631 * Descriptor set binding for sampler or UBO. 632 */ 633 unsigned descriptor_set:5; 634 635 /** 636 * output index for dual source blending. 637 */ 638 unsigned index; 639 640 /** 641 * Initial binding point for a sampler or UBO. 642 * 643 * For array types, this represents the binding point for the first element. 644 */ 645 unsigned binding; 646 647 /** 648 * Storage location of the base of this variable 649 * 650 * The precise meaning of this field depends on the nature of the variable. 651 * 652 * - Vertex shader input: one of the values from \c gl_vert_attrib. 653 * - Vertex shader output: one of the values from \c gl_varying_slot. 654 * - Geometry shader input: one of the values from \c gl_varying_slot. 655 * - Geometry shader output: one of the values from \c gl_varying_slot. 656 * - Fragment shader input: one of the values from \c gl_varying_slot. 657 * - Fragment shader output: one of the values from \c gl_frag_result. 658 * - Task shader output: one of the values from \c gl_varying_slot. 659 * - Mesh shader input: one of the values from \c gl_varying_slot. 660 * - Mesh shader output: one of the values from \c gl_varying_slot. 661 * - Uniforms: Per-stage uniform slot number for default uniform block. 662 * - Uniforms: Index within the uniform block definition for UBO members. 663 * - Non-UBO Uniforms: uniform slot number. 664 * - Other: This field is not currently used. 665 * 666 * If the variable is a uniform, shader input, or shader output, and the 667 * slot has not been assigned, the value will be -1. 668 */ 669 int location; 670 671 /** 672 * The actual location of the variable in the IR. Only valid for inputs, 673 * outputs, uniforms (including samplers and images), and for UBO and SSBO 674 * variables in GLSL. 675 */ 676 unsigned driver_location; 677 678 /** 679 * Location an atomic counter or transform feedback is stored at. 680 */ 681 unsigned offset; 682 683 union { 684 struct { 685 /** Image internal format if specified explicitly, otherwise PIPE_FORMAT_NONE. */ 686 enum pipe_format format; 687 } image; 688 689 struct { 690 /** 691 * For OpenCL inline samplers. See cl_sampler_addressing_mode and cl_sampler_filter_mode 692 */ 693 unsigned is_inline_sampler : 1; 694 unsigned addressing_mode : 3; 695 unsigned normalized_coordinates : 1; 696 unsigned filter_mode : 1; 697 } sampler; 698 699 struct { 700 /** 701 * Transform feedback buffer. 702 */ 703 uint16_t buffer:2; 704 705 /** 706 * Transform feedback stride. 707 */ 708 uint16_t stride; 709 } xfb; 710 }; 711 } data; 712 713 /** 714 * Identifier for this variable generated by nir_index_vars() that is unique 715 * among other variables in the same exec_list. 716 */ 717 unsigned index; 718 719 /* Number of nir_variable_data members */ 720 uint16_t num_members; 721 722 /** 723 * Built-in state that backs this uniform 724 * 725 * Once set at variable creation, \c state_slots must remain invariant. 726 * This is because, ideally, this array would be shared by all clones of 727 * this variable in the IR tree. In other words, we'd really like for it 728 * to be a fly-weight. 729 * 730 * If the variable is not a uniform, \c num_state_slots will be zero and 731 * \c state_slots will be \c NULL. 732 */ 733 /*@{*/ 734 uint16_t num_state_slots; /**< Number of state slots used */ 735 nir_state_slot *state_slots; /**< State descriptors. */ 736 /*@}*/ 737 738 /** 739 * Constant expression assigned in the initializer of the variable 740 * 741 * This field should only be used temporarily by creators of NIR shaders 742 * and then nir_lower_variable_initializers can be used to get rid of them. 743 * Most of the rest of NIR ignores this field or asserts that it's NULL. 744 */ 745 nir_constant *constant_initializer; 746 747 /** 748 * Global variable assigned in the initializer of the variable 749 * This field should only be used temporarily by creators of NIR shaders 750 * and then nir_lower_variable_initializers can be used to get rid of them. 751 * Most of the rest of NIR ignores this field or asserts that it's NULL. 752 */ 753 struct nir_variable *pointer_initializer; 754 755 /** 756 * For variables that are in an interface block or are an instance of an 757 * interface block, this is the \c GLSL_TYPE_INTERFACE type for that block. 758 * 759 * \sa ir_variable::location 760 */ 761 const struct glsl_type *interface_type; 762 763 /** 764 * Description of per-member data for per-member struct variables 765 * 766 * This is used for variables which are actually an amalgamation of 767 * multiple entities such as a struct of built-in values or a struct of 768 * inputs each with their own layout specifier. This is only allowed on 769 * variables with a struct or array of array of struct type. 770 */ 771 struct nir_variable_data *members; 772} nir_variable; 773 774static inline bool 775_nir_shader_variable_has_mode(nir_variable *var, unsigned modes) 776{ 777 /* This isn't a shader variable */ 778 assert(!(modes & nir_var_function_temp)); 779 return var->data.mode & modes; 780} 781 782#define nir_foreach_variable_in_list(var, var_list) \ 783 foreach_list_typed(nir_variable, var, node, var_list) 784 785#define nir_foreach_variable_in_list_safe(var, var_list) \ 786 foreach_list_typed_safe(nir_variable, var, node, var_list) 787 788#define nir_foreach_variable_in_shader(var, shader) \ 789 nir_foreach_variable_in_list(var, &(shader)->variables) 790 791#define nir_foreach_variable_in_shader_safe(var, shader) \ 792 nir_foreach_variable_in_list_safe(var, &(shader)->variables) 793 794#define nir_foreach_variable_with_modes(var, shader, modes) \ 795 nir_foreach_variable_in_shader(var, shader) \ 796 if (_nir_shader_variable_has_mode(var, modes)) 797 798#define nir_foreach_variable_with_modes_safe(var, shader, modes) \ 799 nir_foreach_variable_in_shader_safe(var, shader) \ 800 if (_nir_shader_variable_has_mode(var, modes)) 801 802#define nir_foreach_shader_in_variable(var, shader) \ 803 nir_foreach_variable_with_modes(var, shader, nir_var_shader_in) 804 805#define nir_foreach_shader_in_variable_safe(var, shader) \ 806 nir_foreach_variable_with_modes_safe(var, shader, nir_var_shader_in) 807 808#define nir_foreach_shader_out_variable(var, shader) \ 809 nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) 810 811#define nir_foreach_shader_out_variable_safe(var, shader) \ 812 nir_foreach_variable_with_modes_safe(var, shader, nir_var_shader_out) 813 814#define nir_foreach_uniform_variable(var, shader) \ 815 nir_foreach_variable_with_modes(var, shader, nir_var_uniform) 816 817#define nir_foreach_uniform_variable_safe(var, shader) \ 818 nir_foreach_variable_with_modes_safe(var, shader, nir_var_uniform) 819 820#define nir_foreach_image_variable(var, shader) \ 821 nir_foreach_variable_with_modes(var, shader, nir_var_image) 822 823#define nir_foreach_image_variable_safe(var, shader) \ 824 nir_foreach_variable_with_modes_safe(var, shader, nir_var_image) 825 826static inline bool 827nir_variable_is_global(const nir_variable *var) 828{ 829 return var->data.mode != nir_var_function_temp; 830} 831 832typedef struct nir_register { 833 struct exec_node node; 834 835 unsigned num_components; /** < number of vector components */ 836 unsigned num_array_elems; /** < size of array (0 for no array) */ 837 838 /* The bit-size of each channel; must be one of 8, 16, 32, or 64 */ 839 uint8_t bit_size; 840 841 /** 842 * True if this register may have different values in different SIMD 843 * invocations of the shader. 844 */ 845 bool divergent; 846 847 /** generic register index. */ 848 unsigned index; 849 850 /** set of nir_srcs where this register is used (read from) */ 851 struct list_head uses; 852 853 /** set of nir_dests where this register is defined (written to) */ 854 struct list_head defs; 855 856 /** set of nir_ifs where this register is used as a condition */ 857 struct list_head if_uses; 858} nir_register; 859 860#define nir_foreach_register(reg, reg_list) \ 861 foreach_list_typed(nir_register, reg, node, reg_list) 862#define nir_foreach_register_safe(reg, reg_list) \ 863 foreach_list_typed_safe(nir_register, reg, node, reg_list) 864 865typedef enum PACKED { 866 nir_instr_type_alu, 867 nir_instr_type_deref, 868 nir_instr_type_call, 869 nir_instr_type_tex, 870 nir_instr_type_intrinsic, 871 nir_instr_type_load_const, 872 nir_instr_type_jump, 873 nir_instr_type_ssa_undef, 874 nir_instr_type_phi, 875 nir_instr_type_parallel_copy, 876} nir_instr_type; 877 878typedef struct nir_instr { 879 struct exec_node node; 880 struct list_head gc_node; 881 struct nir_block *block; 882 nir_instr_type type; 883 884 /* A temporary for optimization and analysis passes to use for storing 885 * flags. For instance, DCE uses this to store the "dead/live" info. 886 */ 887 uint8_t pass_flags; 888 889 /** generic instruction index. */ 890 uint32_t index; 891} nir_instr; 892 893static inline nir_instr * 894nir_instr_next(nir_instr *instr) 895{ 896 struct exec_node *next = exec_node_get_next(&instr->node); 897 if (exec_node_is_tail_sentinel(next)) 898 return NULL; 899 else 900 return exec_node_data(nir_instr, next, node); 901} 902 903static inline nir_instr * 904nir_instr_prev(nir_instr *instr) 905{ 906 struct exec_node *prev = exec_node_get_prev(&instr->node); 907 if (exec_node_is_head_sentinel(prev)) 908 return NULL; 909 else 910 return exec_node_data(nir_instr, prev, node); 911} 912 913static inline bool 914nir_instr_is_first(const nir_instr *instr) 915{ 916 return exec_node_is_head_sentinel(exec_node_get_prev_const(&instr->node)); 917} 918 919static inline bool 920nir_instr_is_last(const nir_instr *instr) 921{ 922 return exec_node_is_tail_sentinel(exec_node_get_next_const(&instr->node)); 923} 924 925typedef struct nir_ssa_def { 926 /** Instruction which produces this SSA value. */ 927 nir_instr *parent_instr; 928 929 /** set of nir_instrs where this register is used (read from) */ 930 struct list_head uses; 931 932 /** set of nir_ifs where this register is used as a condition */ 933 struct list_head if_uses; 934 935 /** generic SSA definition index. */ 936 unsigned index; 937 938 uint8_t num_components; 939 940 /* The bit-size of each channel; must be one of 8, 16, 32, or 64 */ 941 uint8_t bit_size; 942 943 /** 944 * True if this SSA value may have different values in different SIMD 945 * invocations of the shader. This is set by nir_divergence_analysis. 946 */ 947 bool divergent; 948} nir_ssa_def; 949 950struct nir_src; 951 952typedef struct { 953 nir_register *reg; 954 struct nir_src *indirect; /** < NULL for no indirect offset */ 955 unsigned base_offset; 956 957 /* TODO use-def chain goes here */ 958} nir_reg_src; 959 960typedef struct { 961 nir_instr *parent_instr; 962 struct list_head def_link; 963 964 nir_register *reg; 965 struct nir_src *indirect; /** < NULL for no indirect offset */ 966 unsigned base_offset; 967 968 /* TODO def-use chain goes here */ 969} nir_reg_dest; 970 971struct nir_if; 972 973typedef struct nir_src { 974 union { 975 /** Instruction that consumes this value as a source. */ 976 nir_instr *parent_instr; 977 struct nir_if *parent_if; 978 }; 979 980 struct list_head use_link; 981 982 union { 983 nir_reg_src reg; 984 nir_ssa_def *ssa; 985 }; 986 987 bool is_ssa; 988} nir_src; 989 990static inline nir_src 991nir_src_init(void) 992{ 993 nir_src src = { { NULL } }; 994 return src; 995} 996 997#define NIR_SRC_INIT nir_src_init() 998 999#define nir_foreach_use(src, reg_or_ssa_def) \ 1000 list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->uses, use_link) 1001 1002#define nir_foreach_use_safe(src, reg_or_ssa_def) \ 1003 list_for_each_entry_safe(nir_src, src, &(reg_or_ssa_def)->uses, use_link) 1004 1005#define nir_foreach_if_use(src, reg_or_ssa_def) \ 1006 list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->if_uses, use_link) 1007 1008#define nir_foreach_if_use_safe(src, reg_or_ssa_def) \ 1009 list_for_each_entry_safe(nir_src, src, &(reg_or_ssa_def)->if_uses, use_link) 1010 1011typedef struct { 1012 union { 1013 nir_reg_dest reg; 1014 nir_ssa_def ssa; 1015 }; 1016 1017 bool is_ssa; 1018} nir_dest; 1019 1020static inline nir_dest 1021nir_dest_init(void) 1022{ 1023 nir_dest dest = { { { NULL } } }; 1024 return dest; 1025} 1026 1027#define NIR_DEST_INIT nir_dest_init() 1028 1029#define nir_foreach_def(dest, reg) \ 1030 list_for_each_entry(nir_dest, dest, &(reg)->defs, reg.def_link) 1031 1032#define nir_foreach_def_safe(dest, reg) \ 1033 list_for_each_entry_safe(nir_dest, dest, &(reg)->defs, reg.def_link) 1034 1035static inline nir_src 1036nir_src_for_ssa(nir_ssa_def *def) 1037{ 1038 nir_src src = NIR_SRC_INIT; 1039 1040 src.is_ssa = true; 1041 src.ssa = def; 1042 1043 return src; 1044} 1045 1046static inline nir_src 1047nir_src_for_reg(nir_register *reg) 1048{ 1049 nir_src src = NIR_SRC_INIT; 1050 1051 src.is_ssa = false; 1052 src.reg.reg = reg; 1053 src.reg.indirect = NULL; 1054 src.reg.base_offset = 0; 1055 1056 return src; 1057} 1058 1059static inline nir_dest 1060nir_dest_for_reg(nir_register *reg) 1061{ 1062 nir_dest dest = NIR_DEST_INIT; 1063 1064 dest.reg.reg = reg; 1065 1066 return dest; 1067} 1068 1069static inline unsigned 1070nir_src_bit_size(nir_src src) 1071{ 1072 return src.is_ssa ? src.ssa->bit_size : src.reg.reg->bit_size; 1073} 1074 1075static inline unsigned 1076nir_src_num_components(nir_src src) 1077{ 1078 return src.is_ssa ? src.ssa->num_components : src.reg.reg->num_components; 1079} 1080 1081static inline bool 1082nir_src_is_const(nir_src src) 1083{ 1084 return src.is_ssa && 1085 src.ssa->parent_instr->type == nir_instr_type_load_const; 1086} 1087 1088static inline bool 1089nir_src_is_undef(nir_src src) 1090{ 1091 return src.is_ssa && 1092 src.ssa->parent_instr->type == nir_instr_type_ssa_undef; 1093} 1094 1095static inline bool 1096nir_src_is_divergent(nir_src src) 1097{ 1098 return src.is_ssa ? src.ssa->divergent : src.reg.reg->divergent; 1099} 1100 1101static inline unsigned 1102nir_dest_bit_size(nir_dest dest) 1103{ 1104 return dest.is_ssa ? dest.ssa.bit_size : dest.reg.reg->bit_size; 1105} 1106 1107static inline unsigned 1108nir_dest_num_components(nir_dest dest) 1109{ 1110 return dest.is_ssa ? dest.ssa.num_components : dest.reg.reg->num_components; 1111} 1112 1113static inline bool 1114nir_dest_is_divergent(nir_dest dest) 1115{ 1116 return dest.is_ssa ? dest.ssa.divergent : dest.reg.reg->divergent; 1117} 1118 1119/* Are all components the same, ie. .xxxx */ 1120static inline bool 1121nir_is_same_comp_swizzle(uint8_t *swiz, unsigned nr_comp) 1122{ 1123 for (unsigned i = 1; i < nr_comp; i++) 1124 if (swiz[i] != swiz[0]) 1125 return false; 1126 return true; 1127} 1128 1129/* Are all components sequential, ie. .yzw */ 1130static inline bool 1131nir_is_sequential_comp_swizzle(uint8_t *swiz, unsigned nr_comp) 1132{ 1133 for (unsigned i = 1; i < nr_comp; i++) 1134 if (swiz[i] != (swiz[0] + i)) 1135 return false; 1136 return true; 1137} 1138 1139void nir_src_copy(nir_src *dest, const nir_src *src); 1140void nir_dest_copy(nir_dest *dest, const nir_dest *src); 1141 1142typedef struct { 1143 /** Base source */ 1144 nir_src src; 1145 1146 /** 1147 * \name input modifiers 1148 */ 1149 /*@{*/ 1150 /** 1151 * For inputs interpreted as floating point, flips the sign bit. For 1152 * inputs interpreted as integers, performs the two's complement negation. 1153 */ 1154 bool negate; 1155 1156 /** 1157 * Clears the sign bit for floating point values, and computes the integer 1158 * absolute value for integers. Note that the negate modifier acts after 1159 * the absolute value modifier, therefore if both are set then all inputs 1160 * will become negative. 1161 */ 1162 bool abs; 1163 /*@}*/ 1164 1165 /** 1166 * For each input component, says which component of the register it is 1167 * chosen from. 1168 * 1169 * Note that which elements of the swizzle are used and which are ignored 1170 * are based on the write mask for most opcodes - for example, a statement 1171 * like "foo.xzw = bar.zyx" would have a writemask of 1101b and a swizzle 1172 * of {2, 1, x, 0} where x means "don't care." 1173 */ 1174 uint8_t swizzle[NIR_MAX_VEC_COMPONENTS]; 1175} nir_alu_src; 1176 1177typedef struct { 1178 /** Base destination */ 1179 nir_dest dest; 1180 1181 /** 1182 * Saturate output modifier 1183 * 1184 * Only valid for opcodes that output floating-point numbers. Clamps the 1185 * output to between 0.0 and 1.0 inclusive. 1186 */ 1187 bool saturate; 1188 1189 /** 1190 * Write-mask 1191 * 1192 * Ignored if dest.is_ssa is true 1193 */ 1194 unsigned write_mask : NIR_MAX_VEC_COMPONENTS; 1195} nir_alu_dest; 1196 1197/** NIR sized and unsized types 1198 * 1199 * The values in this enum are carefully chosen so that the sized type is 1200 * just the unsized type OR the number of bits. 1201 */ 1202typedef enum PACKED { 1203 nir_type_invalid = 0, /* Not a valid type */ 1204 nir_type_int = 2, 1205 nir_type_uint = 4, 1206 nir_type_bool = 6, 1207 nir_type_float = 128, 1208 nir_type_bool1 = 1 | nir_type_bool, 1209 nir_type_bool8 = 8 | nir_type_bool, 1210 nir_type_bool16 = 16 | nir_type_bool, 1211 nir_type_bool32 = 32 | nir_type_bool, 1212 nir_type_int1 = 1 | nir_type_int, 1213 nir_type_int8 = 8 | nir_type_int, 1214 nir_type_int16 = 16 | nir_type_int, 1215 nir_type_int32 = 32 | nir_type_int, 1216 nir_type_int64 = 64 | nir_type_int, 1217 nir_type_uint1 = 1 | nir_type_uint, 1218 nir_type_uint8 = 8 | nir_type_uint, 1219 nir_type_uint16 = 16 | nir_type_uint, 1220 nir_type_uint32 = 32 | nir_type_uint, 1221 nir_type_uint64 = 64 | nir_type_uint, 1222 nir_type_float16 = 16 | nir_type_float, 1223 nir_type_float32 = 32 | nir_type_float, 1224 nir_type_float64 = 64 | nir_type_float, 1225} nir_alu_type; 1226 1227#define NIR_ALU_TYPE_SIZE_MASK 0x79 1228#define NIR_ALU_TYPE_BASE_TYPE_MASK 0x86 1229 1230static inline unsigned 1231nir_alu_type_get_type_size(nir_alu_type type) 1232{ 1233 return type & NIR_ALU_TYPE_SIZE_MASK; 1234} 1235 1236static inline nir_alu_type 1237nir_alu_type_get_base_type(nir_alu_type type) 1238{ 1239 return (nir_alu_type)(type & NIR_ALU_TYPE_BASE_TYPE_MASK); 1240} 1241 1242nir_alu_type 1243nir_get_nir_type_for_glsl_base_type(enum glsl_base_type base_type); 1244 1245static inline nir_alu_type 1246nir_get_nir_type_for_glsl_type(const struct glsl_type *type) 1247{ 1248 return nir_get_nir_type_for_glsl_base_type(glsl_get_base_type(type)); 1249} 1250 1251enum glsl_base_type 1252nir_get_glsl_base_type_for_nir_type(nir_alu_type base_type); 1253 1254nir_op nir_type_conversion_op(nir_alu_type src, nir_alu_type dst, 1255 nir_rounding_mode rnd); 1256 1257nir_op 1258nir_op_vec(unsigned components); 1259 1260bool 1261nir_op_is_vec(nir_op op); 1262 1263static inline bool 1264nir_is_float_control_signed_zero_inf_nan_preserve(unsigned execution_mode, unsigned bit_size) 1265{ 1266 return (16 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP16) || 1267 (32 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP32) || 1268 (64 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP64); 1269} 1270 1271static inline bool 1272nir_is_denorm_flush_to_zero(unsigned execution_mode, unsigned bit_size) 1273{ 1274 return (16 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16) || 1275 (32 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32) || 1276 (64 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP64); 1277} 1278 1279static inline bool 1280nir_is_denorm_preserve(unsigned execution_mode, unsigned bit_size) 1281{ 1282 return (16 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP16) || 1283 (32 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP32) || 1284 (64 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP64); 1285} 1286 1287static inline bool 1288nir_is_rounding_mode_rtne(unsigned execution_mode, unsigned bit_size) 1289{ 1290 return (16 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16) || 1291 (32 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32) || 1292 (64 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64); 1293} 1294 1295static inline bool 1296nir_is_rounding_mode_rtz(unsigned execution_mode, unsigned bit_size) 1297{ 1298 return (16 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16) || 1299 (32 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32) || 1300 (64 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64); 1301} 1302 1303static inline bool 1304nir_has_any_rounding_mode_rtz(unsigned execution_mode) 1305{ 1306 return (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16) || 1307 (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32) || 1308 (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64); 1309} 1310 1311static inline bool 1312nir_has_any_rounding_mode_rtne(unsigned execution_mode) 1313{ 1314 return (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16) || 1315 (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32) || 1316 (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64); 1317} 1318 1319static inline nir_rounding_mode 1320nir_get_rounding_mode_from_float_controls(unsigned execution_mode, 1321 nir_alu_type type) 1322{ 1323 if (nir_alu_type_get_base_type(type) != nir_type_float) 1324 return nir_rounding_mode_undef; 1325 1326 unsigned bit_size = nir_alu_type_get_type_size(type); 1327 1328 if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) 1329 return nir_rounding_mode_rtz; 1330 if (nir_is_rounding_mode_rtne(execution_mode, bit_size)) 1331 return nir_rounding_mode_rtne; 1332 return nir_rounding_mode_undef; 1333} 1334 1335static inline bool 1336nir_has_any_rounding_mode_enabled(unsigned execution_mode) 1337{ 1338 bool result = 1339 nir_has_any_rounding_mode_rtne(execution_mode) || 1340 nir_has_any_rounding_mode_rtz(execution_mode); 1341 return result; 1342} 1343 1344typedef enum { 1345 /** 1346 * Operation where the first two sources are commutative. 1347 * 1348 * For 2-source operations, this just mathematical commutativity. Some 1349 * 3-source operations, like ffma, are only commutative in the first two 1350 * sources. 1351 */ 1352 NIR_OP_IS_2SRC_COMMUTATIVE = (1 << 0), 1353 1354 /** 1355 * Operation is associative 1356 */ 1357 NIR_OP_IS_ASSOCIATIVE = (1 << 1), 1358 1359 /** 1360 * Operation where src[0] is used to select src[1] on true or src[2] false. 1361 * src[0] may be Boolean, or it may be another type used in an implicit 1362 * comparison. 1363 */ 1364 NIR_OP_IS_SELECTION = (1 << 2), 1365} nir_op_algebraic_property; 1366 1367/* vec16 is the widest ALU op in NIR, making the max number of input of ALU 1368 * instructions to be the same as NIR_MAX_VEC_COMPONENTS. 1369 */ 1370#define NIR_ALU_MAX_INPUTS NIR_MAX_VEC_COMPONENTS 1371 1372typedef struct nir_op_info { 1373 /** Name of the NIR ALU opcode */ 1374 const char *name; 1375 1376 /** Number of inputs (sources) */ 1377 uint8_t num_inputs; 1378 1379 /** 1380 * The number of components in the output 1381 * 1382 * If non-zero, this is the size of the output and input sizes are 1383 * explicitly given; swizzle and writemask are still in effect, but if 1384 * the output component is masked out, then the input component may 1385 * still be in use. 1386 * 1387 * If zero, the opcode acts in the standard, per-component manner; the 1388 * operation is performed on each component (except the ones that are 1389 * masked out) with the input being taken from the input swizzle for 1390 * that component. 1391 * 1392 * The size of some of the inputs may be given (i.e. non-zero) even 1393 * though output_size is zero; in that case, the inputs with a zero 1394 * size act per-component, while the inputs with non-zero size don't. 1395 */ 1396 uint8_t output_size; 1397 1398 /** 1399 * The type of vector that the instruction outputs. Note that the 1400 * staurate modifier is only allowed on outputs with the float type. 1401 */ 1402 nir_alu_type output_type; 1403 1404 /** 1405 * The number of components in each input 1406 * 1407 * See nir_op_infos::output_size for more detail about the relationship 1408 * between input and output sizes. 1409 */ 1410 uint8_t input_sizes[NIR_ALU_MAX_INPUTS]; 1411 1412 /** 1413 * The type of vector that each input takes. Note that negate and 1414 * absolute value are only allowed on inputs with int or float type and 1415 * behave differently on the two. 1416 */ 1417 nir_alu_type input_types[NIR_ALU_MAX_INPUTS]; 1418 1419 /** Algebraic properties of this opcode */ 1420 nir_op_algebraic_property algebraic_properties; 1421 1422 /** Whether this represents a numeric conversion opcode */ 1423 bool is_conversion; 1424} nir_op_info; 1425 1426/** Metadata for each nir_op, indexed by opcode */ 1427extern const nir_op_info nir_op_infos[nir_num_opcodes]; 1428 1429static inline bool 1430nir_op_is_selection(nir_op op) 1431{ 1432 return (nir_op_infos[op].algebraic_properties & NIR_OP_IS_SELECTION) != 0; 1433} 1434 1435typedef struct nir_alu_instr { 1436 /** Base instruction */ 1437 nir_instr instr; 1438 1439 /** Opcode */ 1440 nir_op op; 1441 1442 /** Indicates that this ALU instruction generates an exact value 1443 * 1444 * This is kind of a mixture of GLSL "precise" and "invariant" and not 1445 * really equivalent to either. This indicates that the value generated by 1446 * this operation is high-precision and any code transformations that touch 1447 * it must ensure that the resulting value is bit-for-bit identical to the 1448 * original. 1449 */ 1450 bool exact:1; 1451 1452 /** 1453 * Indicates that this instruction doese not cause signed integer wrapping 1454 * to occur, in the form of overflow or underflow. 1455 */ 1456 bool no_signed_wrap:1; 1457 1458 /** 1459 * Indicates that this instruction does not cause unsigned integer wrapping 1460 * to occur, in the form of overflow or underflow. 1461 */ 1462 bool no_unsigned_wrap:1; 1463 1464 /** Destination */ 1465 nir_alu_dest dest; 1466 1467 /** Sources 1468 * 1469 * The size of the array is given by nir_op_info::num_inputs. 1470 */ 1471 nir_alu_src src[]; 1472} nir_alu_instr; 1473 1474void nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src); 1475void nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src); 1476 1477bool nir_alu_instr_is_copy(nir_alu_instr *instr); 1478 1479/* is this source channel used? */ 1480bool 1481nir_alu_instr_channel_used(const nir_alu_instr *instr, unsigned src, 1482 unsigned channel); 1483nir_component_mask_t 1484nir_alu_instr_src_read_mask(const nir_alu_instr *instr, unsigned src); 1485/** 1486 * Get the number of channels used for a source 1487 */ 1488unsigned 1489nir_ssa_alu_instr_src_components(const nir_alu_instr *instr, unsigned src); 1490 1491bool 1492nir_alu_instr_is_comparison(const nir_alu_instr *instr); 1493 1494bool nir_const_value_negative_equal(nir_const_value c1, nir_const_value c2, 1495 nir_alu_type full_type); 1496 1497bool nir_alu_srcs_equal(const nir_alu_instr *alu1, const nir_alu_instr *alu2, 1498 unsigned src1, unsigned src2); 1499 1500bool nir_alu_srcs_negative_equal(const nir_alu_instr *alu1, 1501 const nir_alu_instr *alu2, 1502 unsigned src1, unsigned src2); 1503 1504bool nir_alu_src_is_trivial_ssa(const nir_alu_instr *alu, unsigned srcn); 1505 1506typedef enum { 1507 nir_deref_type_var, 1508 nir_deref_type_array, 1509 nir_deref_type_array_wildcard, 1510 nir_deref_type_ptr_as_array, 1511 nir_deref_type_struct, 1512 nir_deref_type_cast, 1513} nir_deref_type; 1514 1515typedef struct { 1516 nir_instr instr; 1517 1518 /** The type of this deref instruction */ 1519 nir_deref_type deref_type; 1520 1521 /** Bitmask what modes the underlying variable might be 1522 * 1523 * For OpenCL-style generic pointers, we may not know exactly what mode it 1524 * is at any given point in time in the compile process. This bitfield 1525 * contains the set of modes which it MAY be. 1526 * 1527 * Generally, this field should not be accessed directly. Use one of the 1528 * nir_deref_mode_ helpers instead. 1529 */ 1530 nir_variable_mode modes; 1531 1532 /** The dereferenced type of the resulting pointer value */ 1533 const struct glsl_type *type; 1534 1535 union { 1536 /** Variable being dereferenced if deref_type is a deref_var */ 1537 nir_variable *var; 1538 1539 /** Parent deref if deref_type is not deref_var */ 1540 nir_src parent; 1541 }; 1542 1543 /** Additional deref parameters */ 1544 union { 1545 struct { 1546 nir_src index; 1547 bool in_bounds; 1548 } arr; 1549 1550 struct { 1551 unsigned index; 1552 } strct; 1553 1554 struct { 1555 unsigned ptr_stride; 1556 unsigned align_mul; 1557 unsigned align_offset; 1558 } cast; 1559 }; 1560 1561 /** Destination to store the resulting "pointer" */ 1562 nir_dest dest; 1563} nir_deref_instr; 1564 1565/** Returns true if deref might have one of the given modes 1566 * 1567 * For multi-mode derefs, this returns true if any of the possible modes for 1568 * the deref to have any of the specified modes. This function returning true 1569 * does NOT mean that the deref definitely has one of those modes. It simply 1570 * means that, with the best information we have at the time, it might. 1571 */ 1572static inline bool 1573nir_deref_mode_may_be(const nir_deref_instr *deref, nir_variable_mode modes) 1574{ 1575 assert(!(modes & ~nir_var_all)); 1576 assert(deref->modes != 0); 1577 return deref->modes & modes; 1578} 1579 1580/** Returns true if deref must have one of the given modes 1581 * 1582 * For multi-mode derefs, this returns true if NIR can prove that the given 1583 * deref has one of the specified modes. This function returning false does 1584 * NOT mean that deref doesn't have one of the given mode. It very well may 1585 * have one of those modes, we just don't have enough information to prove 1586 * that it does for sure. 1587 */ 1588static inline bool 1589nir_deref_mode_must_be(const nir_deref_instr *deref, nir_variable_mode modes) 1590{ 1591 assert(!(modes & ~nir_var_all)); 1592 assert(deref->modes != 0); 1593 return !(deref->modes & ~modes); 1594} 1595 1596/** Returns true if deref has the given mode 1597 * 1598 * This returns true if the deref has exactly the mode specified. If the 1599 * deref may have that mode but may also have a different mode (i.e. modes has 1600 * multiple bits set), this will assert-fail. 1601 * 1602 * If you're confused about which nir_deref_mode_ helper to use, use this one 1603 * or nir_deref_mode_is_one_of below. 1604 */ 1605static inline bool 1606nir_deref_mode_is(const nir_deref_instr *deref, nir_variable_mode mode) 1607{ 1608 assert(util_bitcount(mode) == 1 && (mode & nir_var_all)); 1609 assert(deref->modes != 0); 1610 1611 /* This is only for "simple" cases so, if modes might interact with this 1612 * deref then the deref has to have a single mode. 1613 */ 1614 if (nir_deref_mode_may_be(deref, mode)) { 1615 assert(util_bitcount(deref->modes) == 1); 1616 assert(deref->modes == mode); 1617 } 1618 1619 return deref->modes == mode; 1620} 1621 1622/** Returns true if deref has one of the given modes 1623 * 1624 * This returns true if the deref has exactly one possible mode and that mode 1625 * is one of the modes specified. If the deref may have one of those modes 1626 * but may also have a different mode (i.e. modes has multiple bits set), this 1627 * will assert-fail. 1628 */ 1629static inline bool 1630nir_deref_mode_is_one_of(const nir_deref_instr *deref, nir_variable_mode modes) 1631{ 1632 /* This is only for "simple" cases so, if modes might interact with this 1633 * deref then the deref has to have a single mode. 1634 */ 1635 if (nir_deref_mode_may_be(deref, modes)) { 1636 assert(util_bitcount(deref->modes) == 1); 1637 assert(nir_deref_mode_must_be(deref, modes)); 1638 } 1639 1640 return nir_deref_mode_may_be(deref, modes); 1641} 1642 1643/** Returns true if deref's possible modes lie in the given set of modes 1644 * 1645 * This returns true if the deref's modes lie in the given set of modes. If 1646 * the deref's modes overlap with the specified modes but aren't entirely 1647 * contained in the specified set of modes, this will assert-fail. In 1648 * particular, if this is used in a generic pointers scenario, the specified 1649 * modes has to contain all or none of the possible generic pointer modes. 1650 * 1651 * This is intended mostly for mass-lowering of derefs which might have 1652 * generic pointers. 1653 */ 1654static inline bool 1655nir_deref_mode_is_in_set(const nir_deref_instr *deref, nir_variable_mode modes) 1656{ 1657 if (nir_deref_mode_may_be(deref, modes)) 1658 assert(nir_deref_mode_must_be(deref, modes)); 1659 1660 return nir_deref_mode_may_be(deref, modes); 1661} 1662 1663static inline nir_deref_instr *nir_src_as_deref(nir_src src); 1664 1665static inline nir_deref_instr * 1666nir_deref_instr_parent(const nir_deref_instr *instr) 1667{ 1668 if (instr->deref_type == nir_deref_type_var) 1669 return NULL; 1670 else 1671 return nir_src_as_deref(instr->parent); 1672} 1673 1674static inline nir_variable * 1675nir_deref_instr_get_variable(const nir_deref_instr *instr) 1676{ 1677 while (instr->deref_type != nir_deref_type_var) { 1678 if (instr->deref_type == nir_deref_type_cast) 1679 return NULL; 1680 1681 instr = nir_deref_instr_parent(instr); 1682 } 1683 1684 return instr->var; 1685} 1686 1687bool nir_deref_instr_has_indirect(nir_deref_instr *instr); 1688bool nir_deref_instr_is_known_out_of_bounds(nir_deref_instr *instr); 1689 1690typedef enum { 1691 nir_deref_instr_has_complex_use_allow_memcpy_src = (1 << 0), 1692 nir_deref_instr_has_complex_use_allow_memcpy_dst = (1 << 1), 1693} nir_deref_instr_has_complex_use_options; 1694 1695bool nir_deref_instr_has_complex_use(nir_deref_instr *instr, 1696 nir_deref_instr_has_complex_use_options opts); 1697 1698bool nir_deref_instr_remove_if_unused(nir_deref_instr *instr); 1699 1700unsigned nir_deref_instr_array_stride(nir_deref_instr *instr); 1701 1702typedef struct { 1703 nir_instr instr; 1704 1705 struct nir_function *callee; 1706 1707 unsigned num_params; 1708 nir_src params[]; 1709} nir_call_instr; 1710 1711#include "nir_intrinsics.h" 1712 1713#define NIR_INTRINSIC_MAX_CONST_INDEX 7 1714 1715/** Represents an intrinsic 1716 * 1717 * An intrinsic is an instruction type for handling things that are 1718 * more-or-less regular operations but don't just consume and produce SSA 1719 * values like ALU operations do. Intrinsics are not for things that have 1720 * special semantic meaning such as phi nodes and parallel copies. 1721 * Examples of intrinsics include variable load/store operations, system 1722 * value loads, and the like. Even though texturing more-or-less falls 1723 * under this category, texturing is its own instruction type because 1724 * trying to represent texturing with intrinsics would lead to a 1725 * combinatorial explosion of intrinsic opcodes. 1726 * 1727 * By having a single instruction type for handling a lot of different 1728 * cases, optimization passes can look for intrinsics and, for the most 1729 * part, completely ignore them. Each intrinsic type also has a few 1730 * possible flags that govern whether or not they can be reordered or 1731 * eliminated. That way passes like dead code elimination can still work 1732 * on intrisics without understanding the meaning of each. 1733 * 1734 * Each intrinsic has some number of constant indices, some number of 1735 * variables, and some number of sources. What these sources, variables, 1736 * and indices mean depends on the intrinsic and is documented with the 1737 * intrinsic declaration in nir_intrinsics.h. Intrinsics and texture 1738 * instructions are the only types of instruction that can operate on 1739 * variables. 1740 */ 1741typedef struct { 1742 nir_instr instr; 1743 1744 nir_intrinsic_op intrinsic; 1745 1746 nir_dest dest; 1747 1748 /** number of components if this is a vectorized intrinsic 1749 * 1750 * Similarly to ALU operations, some intrinsics are vectorized. 1751 * An intrinsic is vectorized if nir_intrinsic_infos.dest_components == 0. 1752 * For vectorized intrinsics, the num_components field specifies the 1753 * number of destination components and the number of source components 1754 * for all sources with nir_intrinsic_infos.src_components[i] == 0. 1755 */ 1756 uint8_t num_components; 1757 1758 int const_index[NIR_INTRINSIC_MAX_CONST_INDEX]; 1759 1760 nir_src src[]; 1761} nir_intrinsic_instr; 1762 1763static inline nir_variable * 1764nir_intrinsic_get_var(nir_intrinsic_instr *intrin, unsigned i) 1765{ 1766 return nir_deref_instr_get_variable(nir_src_as_deref(intrin->src[i])); 1767} 1768 1769typedef enum { 1770 /* Memory ordering. */ 1771 NIR_MEMORY_ACQUIRE = 1 << 0, 1772 NIR_MEMORY_RELEASE = 1 << 1, 1773 NIR_MEMORY_ACQ_REL = NIR_MEMORY_ACQUIRE | NIR_MEMORY_RELEASE, 1774 1775 /* Memory visibility operations. */ 1776 NIR_MEMORY_MAKE_AVAILABLE = 1 << 2, 1777 NIR_MEMORY_MAKE_VISIBLE = 1 << 3, 1778} nir_memory_semantics; 1779 1780typedef enum { 1781 NIR_SCOPE_NONE, 1782 NIR_SCOPE_INVOCATION, 1783 NIR_SCOPE_SUBGROUP, 1784 NIR_SCOPE_SHADER_CALL, 1785 NIR_SCOPE_WORKGROUP, 1786 NIR_SCOPE_QUEUE_FAMILY, 1787 NIR_SCOPE_DEVICE, 1788} nir_scope; 1789 1790/** 1791 * \name NIR intrinsics semantic flags 1792 * 1793 * information about what the compiler can do with the intrinsics. 1794 * 1795 * \sa nir_intrinsic_info::flags 1796 */ 1797typedef enum { 1798 /** 1799 * whether the intrinsic can be safely eliminated if none of its output 1800 * value is not being used. 1801 */ 1802 NIR_INTRINSIC_CAN_ELIMINATE = (1 << 0), 1803 1804 /** 1805 * Whether the intrinsic can be reordered with respect to any other 1806 * intrinsic, i.e. whether the only reordering dependencies of the 1807 * intrinsic are due to the register reads/writes. 1808 */ 1809 NIR_INTRINSIC_CAN_REORDER = (1 << 1), 1810} nir_intrinsic_semantic_flag; 1811 1812/** 1813 * Maximum valid value for a nir align_mul value (in intrinsics or derefs). 1814 * 1815 * Offsets can be signed, so this is the largest power of two in int32_t. 1816 */ 1817#define NIR_ALIGN_MUL_MAX 0x40000000 1818 1819typedef struct nir_io_semantics { 1820 unsigned location:7; /* gl_vert_attrib, gl_varying_slot, or gl_frag_result */ 1821 unsigned num_slots:6; /* max 32, may be pessimistic with const indexing */ 1822 unsigned dual_source_blend_index:1; 1823 unsigned fb_fetch_output:1; /* for GL_KHR_blend_equation_advanced */ 1824 unsigned gs_streams:8; /* xxyyzzww: 2-bit stream index for each component */ 1825 unsigned medium_precision:1; /* GLSL mediump qualifier */ 1826 unsigned per_view:1; 1827 unsigned high_16bits:1; /* whether accessing low or high half of the slot */ 1828 unsigned invariant:1; /* The variable has the invariant flag set */ 1829 /* CLIP_DISTn, LAYER, VIEWPORT, and TESS_LEVEL_* have up to 3 uses: 1830 * - an output consumed by the next stage 1831 * - a system value output affecting fixed-func hardware, e.g. the clipper 1832 * - a transform feedback output written to memory 1833 * The following fields disable the first two. Transform feedback is disabled 1834 * by transform feedback info. 1835 */ 1836 unsigned no_varying:1; /* whether this output isn't consumed by the next stage */ 1837 unsigned no_sysval_output:1; /* whether this system value output has no 1838 effect due to current pipeline states */ 1839 unsigned _pad:3; 1840} nir_io_semantics; 1841 1842/* Transform feedback info for 2 outputs. nir_intrinsic_store_output contains 1843 * this structure twice to support up to 4 outputs. The structure is limited 1844 * to 32 bits because it's stored in nir_intrinsic_instr::const_index[]. 1845 */ 1846typedef struct nir_io_xfb { 1847 struct { 1848 /* start_component is equal to the index of out[]; add 2 for io_xfb2 */ 1849 /* start_component is not relative to nir_intrinsic_component */ 1850 /* get the stream index from nir_io_semantics */ 1851 uint8_t num_components:4; /* max 4; if this is 0, xfb is disabled */ 1852 uint8_t buffer:4; /* buffer index, max 3 */ 1853 uint8_t offset; /* transform feedback buffer offset in dwords, 1854 max (1K - 4) bytes */ 1855 } out[2]; 1856} nir_io_xfb; 1857 1858unsigned 1859nir_instr_xfb_write_mask(nir_intrinsic_instr *instr); 1860 1861#define NIR_INTRINSIC_MAX_INPUTS 11 1862 1863typedef struct { 1864 const char *name; 1865 1866 uint8_t num_srcs; /** < number of register/SSA inputs */ 1867 1868 /** number of components of each input register 1869 * 1870 * If this value is 0, the number of components is given by the 1871 * num_components field of nir_intrinsic_instr. If this value is -1, the 1872 * intrinsic consumes however many components are provided and it is not 1873 * validated at all. 1874 */ 1875 int8_t src_components[NIR_INTRINSIC_MAX_INPUTS]; 1876 1877 bool has_dest; 1878 1879 /** number of components of the output register 1880 * 1881 * If this value is 0, the number of components is given by the 1882 * num_components field of nir_intrinsic_instr. 1883 */ 1884 uint8_t dest_components; 1885 1886 /** bitfield of legal bit sizes */ 1887 uint8_t dest_bit_sizes; 1888 1889 /** source which the destination bit size must match 1890 * 1891 * Some intrinsics, such as subgroup intrinsics, are data manipulation 1892 * intrinsics and they have similar bit-size rules to ALU ops. This enables 1893 * validation to validate a bit more and enables auto-generated builder code 1894 * to properly determine destination bit sizes automatically. 1895 */ 1896 int8_t bit_size_src; 1897 1898 /** the number of constant indices used by the intrinsic */ 1899 uint8_t num_indices; 1900 1901 /** list of indices */ 1902 uint8_t indices[NIR_INTRINSIC_MAX_CONST_INDEX]; 1903 1904 /** indicates the usage of intr->const_index[n] */ 1905 uint8_t index_map[NIR_INTRINSIC_NUM_INDEX_FLAGS]; 1906 1907 /** semantic flags for calls to this intrinsic */ 1908 nir_intrinsic_semantic_flag flags; 1909} nir_intrinsic_info; 1910 1911extern const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics]; 1912 1913unsigned 1914nir_intrinsic_src_components(const nir_intrinsic_instr *intr, unsigned srcn); 1915 1916unsigned 1917nir_intrinsic_dest_components(nir_intrinsic_instr *intr); 1918 1919/** 1920 * Helper to copy const_index[] from src to dst, without assuming they 1921 * match in order. 1922 */ 1923void nir_intrinsic_copy_const_indices(nir_intrinsic_instr *dst, nir_intrinsic_instr *src); 1924 1925#include "nir_intrinsics_indices.h" 1926 1927static inline void 1928nir_intrinsic_set_align(nir_intrinsic_instr *intrin, 1929 unsigned align_mul, unsigned align_offset) 1930{ 1931 assert(util_is_power_of_two_nonzero(align_mul)); 1932 assert(align_offset < align_mul); 1933 nir_intrinsic_set_align_mul(intrin, align_mul); 1934 nir_intrinsic_set_align_offset(intrin, align_offset); 1935} 1936 1937/** Returns a simple alignment for a load/store intrinsic offset 1938 * 1939 * Instead of the full mul+offset alignment scheme provided by the ALIGN_MUL 1940 * and ALIGN_OFFSET parameters, this helper takes both into account and 1941 * provides a single simple alignment parameter. The offset X is guaranteed 1942 * to satisfy X % align == 0. 1943 */ 1944static inline unsigned 1945nir_intrinsic_align(const nir_intrinsic_instr *intrin) 1946{ 1947 const unsigned align_mul = nir_intrinsic_align_mul(intrin); 1948 const unsigned align_offset = nir_intrinsic_align_offset(intrin); 1949 assert(align_offset < align_mul); 1950 return align_offset ? 1 << (ffs(align_offset) - 1) : align_mul; 1951} 1952 1953static inline bool 1954nir_intrinsic_has_align(const nir_intrinsic_instr *intrin) 1955{ 1956 return nir_intrinsic_has_align_mul(intrin) && 1957 nir_intrinsic_has_align_offset(intrin); 1958} 1959 1960unsigned 1961nir_image_intrinsic_coord_components(const nir_intrinsic_instr *instr); 1962 1963/* Converts a image_deref_* intrinsic into a image_* one */ 1964void nir_rewrite_image_intrinsic(nir_intrinsic_instr *instr, 1965 nir_ssa_def *handle, bool bindless); 1966 1967/* Determine if an intrinsic can be arbitrarily reordered and eliminated. */ 1968static inline bool 1969nir_intrinsic_can_reorder(nir_intrinsic_instr *instr) 1970{ 1971 if (nir_intrinsic_has_access(instr) && 1972 nir_intrinsic_access(instr) & ACCESS_VOLATILE) 1973 return false; 1974 if (instr->intrinsic == nir_intrinsic_load_deref) { 1975 nir_deref_instr *deref = nir_src_as_deref(instr->src[0]); 1976 return nir_deref_mode_is_in_set(deref, nir_var_read_only_modes) || 1977 (nir_intrinsic_access(instr) & ACCESS_CAN_REORDER); 1978 } else if (instr->intrinsic == nir_intrinsic_load_ssbo || 1979 instr->intrinsic == nir_intrinsic_bindless_image_load || 1980 instr->intrinsic == nir_intrinsic_image_deref_load || 1981 instr->intrinsic == nir_intrinsic_image_load) { 1982 return nir_intrinsic_access(instr) & ACCESS_CAN_REORDER; 1983 } else { 1984 const nir_intrinsic_info *info = 1985 &nir_intrinsic_infos[instr->intrinsic]; 1986 return (info->flags & NIR_INTRINSIC_CAN_ELIMINATE) && 1987 (info->flags & NIR_INTRINSIC_CAN_REORDER); 1988 } 1989} 1990 1991bool nir_intrinsic_writes_external_memory(const nir_intrinsic_instr *instr); 1992 1993/** Texture instruction source type */ 1994typedef enum { 1995 /** Texture coordinate 1996 * 1997 * Must have nir_tex_instr::coord_components components. 1998 */ 1999 nir_tex_src_coord, 2000 2001 /** Projector 2002 * 2003 * The texture coordinate (except for the array component, if any) is 2004 * divided by this value before LOD computation and sampling. 2005 * 2006 * Must be a float scalar. 2007 */ 2008 nir_tex_src_projector, 2009 2010 /** Shadow comparator 2011 * 2012 * For shadow sampling, the fetched texel values are compared against the 2013 * shadow comparator using the compare op specified by the sampler object 2014 * and converted to 1.0 if the comparison succeeds and 0.0 if it fails. 2015 * Interpolation happens after this conversion so the actual result may be 2016 * anywhere in the range [0.0, 1.0]. 2017 * 2018 * Only valid if nir_tex_instr::is_shadow and must be a float scalar. 2019 */ 2020 nir_tex_src_comparator, 2021 2022 /** Coordinate offset 2023 * 2024 * An integer value that is added to the texel address before sampling. 2025 * This is only allowed with operations that take an explicit LOD as it is 2026 * applied in integer texel space after LOD selection and not normalized 2027 * coordinate space. 2028 */ 2029 nir_tex_src_offset, 2030 2031 /** LOD bias 2032 * 2033 * This value is added to the computed LOD before mip-mapping. 2034 */ 2035 nir_tex_src_bias, 2036 2037 /** Explicit LOD */ 2038 nir_tex_src_lod, 2039 2040 /** Min LOD 2041 * 2042 * The computed LOD is clamped to be at least as large as min_lod before 2043 * mip-mapping. 2044 */ 2045 nir_tex_src_min_lod, 2046 2047 /** MSAA sample index */ 2048 nir_tex_src_ms_index, 2049 2050 /** Intel-specific MSAA compression data */ 2051 nir_tex_src_ms_mcs_intel, 2052 2053 /** Explicit horizontal (X-major) coordinate derivative */ 2054 nir_tex_src_ddx, 2055 2056 /** Explicit vertical (Y-major) coordinate derivative */ 2057 nir_tex_src_ddy, 2058 2059 /** Texture variable dereference */ 2060 nir_tex_src_texture_deref, 2061 2062 /** Sampler variable dereference */ 2063 nir_tex_src_sampler_deref, 2064 2065 /** Texture index offset 2066 * 2067 * This is added to nir_tex_instr::texture_index. Unless 2068 * nir_tex_instr::texture_non_uniform is set, this is guaranteed to be 2069 * dynamically uniform. 2070 */ 2071 nir_tex_src_texture_offset, 2072 2073 /** Dynamically uniform sampler index offset 2074 * 2075 * This is added to nir_tex_instr::sampler_index. Unless 2076 * nir_tex_instr::sampler_non_uniform is set, this is guaranteed to be 2077 * dynamically uniform. This should not be present until GLSL ES 3.20, GLSL 2078 * 4.00, or ARB_gpu_shader5, because in ES 3.10 and GL 3.30 samplers said 2079 * "When aggregated into arrays within a shader, samplers can only be indexed 2080 * with a constant integral expression." 2081 */ 2082 nir_tex_src_sampler_offset, 2083 2084 /** Bindless texture handle 2085 * 2086 * This is, unfortunately, a bit overloaded at the moment. There are 2087 * generally two types of bindless handles: 2088 * 2089 * 1. For GL_ARB_bindless bindless handles. These are part of the 2090 * GL/Gallium-level API and are always a 64-bit integer. 2091 * 2092 * 2. HW-specific handles. GL_ARB_bindless handles may be lowered to 2093 * these. Also, these are used by many Vulkan drivers to implement 2094 * descriptor sets, especially for UPDATE_AFTER_BIND descriptors. 2095 * The details of hardware handles (bit size, format, etc.) is 2096 * HW-specific. 2097 * 2098 * Because of this overloading and the resulting ambiguity, we currently 2099 * don't validate anything for these. 2100 */ 2101 nir_tex_src_texture_handle, 2102 2103 /** Bindless sampler handle 2104 * 2105 * See nir_tex_src_texture_handle, 2106 */ 2107 nir_tex_src_sampler_handle, 2108 2109 /** Plane index for multi-plane YCbCr textures */ 2110 nir_tex_src_plane, 2111 2112 /** 2113 * Backend-specific vec4 tex src argument. 2114 * 2115 * Can be used to have NIR optimization (copy propagation, lower_vec_to_movs) 2116 * apply to the packing of the tex srcs. This lowering must only happen 2117 * after nir_lower_tex(). 2118 * 2119 * The nir_tex_instr_src_type() of this argument is float, so no lowering 2120 * will happen if nir_lower_int_to_float is used. 2121 */ 2122 nir_tex_src_backend1, 2123 2124 /** Second backend-specific vec4 tex src argument, see nir_tex_src_backend1. */ 2125 nir_tex_src_backend2, 2126 2127 nir_num_tex_src_types 2128} nir_tex_src_type; 2129 2130/** A texture instruction source */ 2131typedef struct { 2132 /** Base source */ 2133 nir_src src; 2134 2135 /** Type of this source */ 2136 nir_tex_src_type src_type; 2137} nir_tex_src; 2138 2139/** Texture instruction opcode */ 2140typedef enum { 2141 nir_texop_tex, /**< Regular texture look-up */ 2142 nir_texop_txb, /**< Texture look-up with LOD bias */ 2143 nir_texop_txl, /**< Texture look-up with explicit LOD */ 2144 nir_texop_txd, /**< Texture look-up with partial derivatives */ 2145 nir_texop_txf, /**< Texel fetch with explicit LOD */ 2146 nir_texop_txf_ms, /**< Multisample texture fetch */ 2147 nir_texop_txf_ms_fb, /**< Multisample texture fetch from framebuffer */ 2148 nir_texop_txf_ms_mcs_intel, /**< Multisample compression value fetch */ 2149 nir_texop_txs, /**< Texture size */ 2150 nir_texop_lod, /**< Texture lod query */ 2151 nir_texop_tg4, /**< Texture gather */ 2152 nir_texop_query_levels, /**< Texture levels query */ 2153 nir_texop_texture_samples, /**< Texture samples query */ 2154 nir_texop_samples_identical, /**< Query whether all samples are definitely 2155 * identical. 2156 */ 2157 nir_texop_tex_prefetch, /**< Regular texture look-up, eligible for pre-dispatch */ 2158 nir_texop_fragment_fetch_amd, /**< Multisample fragment color texture fetch */ 2159 nir_texop_fragment_mask_fetch_amd, /**< Multisample fragment mask texture fetch */ 2160} nir_texop; 2161 2162/** Represents a texture instruction */ 2163typedef struct { 2164 /** Base instruction */ 2165 nir_instr instr; 2166 2167 /** Dimensionality of the texture operation 2168 * 2169 * This will typically match the dimensionality of the texture deref type 2170 * if a nir_tex_src_texture_deref is present. However, it may not if 2171 * texture lowering has occurred. 2172 */ 2173 enum glsl_sampler_dim sampler_dim; 2174 2175 /** ALU type of the destination 2176 * 2177 * This is the canonical sampled type for this texture operation and may 2178 * not exactly match the sampled type of the deref type when a 2179 * nir_tex_src_texture_deref is present. For OpenCL, the sampled type of 2180 * the texture deref will be GLSL_TYPE_VOID and this is allowed to be 2181 * anything. With SPIR-V, the signedness of integer types is allowed to 2182 * differ. For all APIs, the bit size may differ if the driver has done 2183 * any sort of mediump or similar lowering since texture types always have 2184 * 32-bit sampled types. 2185 */ 2186 nir_alu_type dest_type; 2187 2188 /** Texture opcode */ 2189 nir_texop op; 2190 2191 /** Destination */ 2192 nir_dest dest; 2193 2194 /** Array of sources 2195 * 2196 * This array has nir_tex_instr::num_srcs elements 2197 */ 2198 nir_tex_src *src; 2199 2200 /** Number of sources */ 2201 unsigned num_srcs; 2202 2203 /** Number of components in the coordinate, if any */ 2204 unsigned coord_components; 2205 2206 /** True if the texture instruction acts on an array texture */ 2207 bool is_array; 2208 2209 /** True if the texture instruction performs a shadow comparison 2210 * 2211 * If this is true, the texture instruction must have a 2212 * nir_tex_src_comparator. 2213 */ 2214 bool is_shadow; 2215 2216 /** 2217 * If is_shadow is true, whether this is the old-style shadow that outputs 2218 * 4 components or the new-style shadow that outputs 1 component. 2219 */ 2220 bool is_new_style_shadow; 2221 2222 /** 2223 * True if this texture instruction should return a sparse residency code. 2224 * The code is in the last component of the result. 2225 */ 2226 bool is_sparse; 2227 2228 /** nir_texop_tg4 component selector 2229 * 2230 * This determines which RGBA component is gathered. 2231 */ 2232 unsigned component : 2; 2233 2234 /** Validation needs to know this for gradient component count */ 2235 unsigned array_is_lowered_cube : 1; 2236 2237 /** Gather offsets */ 2238 int8_t tg4_offsets[4][2]; 2239 2240 /** True if the texture index or handle is not dynamically uniform */ 2241 bool texture_non_uniform; 2242 2243 /** True if the sampler index or handle is not dynamically uniform. 2244 * 2245 * This may be set when VK_EXT_descriptor_indexing is supported and the 2246 * appropriate capability is enabled. 2247 * 2248 * This should always be false in GLSL (GLSL ES 3.20 says "When aggregated 2249 * into arrays within a shader, opaque types can only be indexed with a 2250 * dynamically uniform integral expression", and GLSL 4.60 says "When 2251 * aggregated into arrays within a shader, [texture, sampler, and 2252 * samplerShadow] types can only be indexed with a dynamically uniform 2253 * expression, or texture lookup will result in undefined values."). 2254 */ 2255 bool sampler_non_uniform; 2256 2257 /** The texture index 2258 * 2259 * If this texture instruction has a nir_tex_src_texture_offset source, 2260 * then the texture index is given by texture_index + texture_offset. 2261 */ 2262 unsigned texture_index; 2263 2264 /** The sampler index 2265 * 2266 * The following operations do not require a sampler and, as such, this 2267 * field should be ignored: 2268 * - nir_texop_txf 2269 * - nir_texop_txf_ms 2270 * - nir_texop_txs 2271 * - nir_texop_query_levels 2272 * - nir_texop_texture_samples 2273 * - nir_texop_samples_identical 2274 * 2275 * If this texture instruction has a nir_tex_src_sampler_offset source, 2276 * then the sampler index is given by sampler_index + sampler_offset. 2277 */ 2278 unsigned sampler_index; 2279} nir_tex_instr; 2280 2281/** 2282 * Returns true if the texture operation requires a sampler as a general rule 2283 * 2284 * Note that the specific hw/driver backend could require to a sampler 2285 * object/configuration packet in any case, for some other reason. 2286 * 2287 * @see nir_tex_instr::sampler_index. 2288 */ 2289bool nir_tex_instr_need_sampler(const nir_tex_instr *instr); 2290 2291/** Returns the number of components returned by this nir_tex_instr 2292 * 2293 * Useful for code building texture instructions when you don't want to think 2294 * about how many components a particular texture op returns. This does not 2295 * include the sparse residency code. 2296 */ 2297unsigned 2298nir_tex_instr_result_size(const nir_tex_instr *instr); 2299 2300/** 2301 * Returns the destination size of this nir_tex_instr including the sparse 2302 * residency code, if any. 2303 */ 2304static inline unsigned 2305nir_tex_instr_dest_size(const nir_tex_instr *instr) 2306{ 2307 /* One more component is needed for the residency code. */ 2308 return nir_tex_instr_result_size(instr) + instr->is_sparse; 2309} 2310 2311/** 2312 * Returns true if this texture operation queries something about the texture 2313 * rather than actually sampling it. 2314 */ 2315bool 2316nir_tex_instr_is_query(const nir_tex_instr *instr); 2317 2318/** Returns true if this texture instruction does implicit derivatives 2319 * 2320 * This is important as there are extra control-flow rules around derivatives 2321 * and texture instructions which perform them implicitly. 2322 */ 2323bool 2324nir_tex_instr_has_implicit_derivative(const nir_tex_instr *instr); 2325 2326/** Returns the ALU type of the given texture instruction source */ 2327nir_alu_type 2328nir_tex_instr_src_type(const nir_tex_instr *instr, unsigned src); 2329 2330/** 2331 * Returns the number of components required by the given texture instruction 2332 * source 2333 */ 2334unsigned 2335nir_tex_instr_src_size(const nir_tex_instr *instr, unsigned src); 2336 2337/** 2338 * Returns the index of the texture instruction source with the given 2339 * nir_tex_src_type or -1 if no such source exists. 2340 */ 2341static inline int 2342nir_tex_instr_src_index(const nir_tex_instr *instr, nir_tex_src_type type) 2343{ 2344 for (unsigned i = 0; i < instr->num_srcs; i++) 2345 if (instr->src[i].src_type == type) 2346 return (int) i; 2347 2348 return -1; 2349} 2350 2351/** Adds a source to a texture instruction */ 2352void nir_tex_instr_add_src(nir_tex_instr *tex, 2353 nir_tex_src_type src_type, 2354 nir_src src); 2355 2356/** Removes a source from a texture instruction */ 2357void nir_tex_instr_remove_src(nir_tex_instr *tex, unsigned src_idx); 2358 2359bool nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr *tex); 2360 2361typedef struct { 2362 nir_instr instr; 2363 2364 nir_ssa_def def; 2365 2366 nir_const_value value[]; 2367} nir_load_const_instr; 2368 2369typedef enum { 2370 /** Return from a function 2371 * 2372 * This instruction is a classic function return. It jumps to 2373 * nir_function_impl::end_block. No return value is provided in this 2374 * instruction. Instead, the function is expected to write any return 2375 * data to a deref passed in from the caller. 2376 */ 2377 nir_jump_return, 2378 2379 /** Immediately exit the current shader 2380 * 2381 * This instruction is roughly the equivalent of C's "exit()" in that it 2382 * immediately terminates the current shader invocation. From a CFG 2383 * perspective, it looks like a jump to nir_function_impl::end_block but 2384 * it actually jumps to the end block of the shader entrypoint. A halt 2385 * instruction in the shader entrypoint itself is semantically identical 2386 * to a return. 2387 * 2388 * For shaders with built-in I/O, any outputs written prior to a halt 2389 * instruction remain written and any outputs not written prior to the 2390 * halt have undefined values. It does NOT cause an implicit discard of 2391 * written results. If one wants discard results in a fragment shader, 2392 * for instance, a discard or demote intrinsic is required. 2393 */ 2394 nir_jump_halt, 2395 2396 /** Break out of the inner-most loop 2397 * 2398 * This has the same semantics as C's "break" statement. 2399 */ 2400 nir_jump_break, 2401 2402 /** Jump back to the top of the inner-most loop 2403 * 2404 * This has the same semantics as C's "continue" statement assuming that a 2405 * NIR loop is implemented as "while (1) { body }". 2406 */ 2407 nir_jump_continue, 2408 2409 /** Jumps for unstructured CFG. 2410 * 2411 * As within an unstructured CFG we can't rely on block ordering we need to 2412 * place explicit jumps at the end of every block. 2413 */ 2414 nir_jump_goto, 2415 nir_jump_goto_if, 2416} nir_jump_type; 2417 2418typedef struct { 2419 nir_instr instr; 2420 nir_jump_type type; 2421 nir_src condition; 2422 struct nir_block *target; 2423 struct nir_block *else_target; 2424} nir_jump_instr; 2425 2426/* creates a new SSA variable in an undefined state */ 2427 2428typedef struct { 2429 nir_instr instr; 2430 nir_ssa_def def; 2431} nir_ssa_undef_instr; 2432 2433typedef struct { 2434 struct exec_node node; 2435 2436 /* The predecessor block corresponding to this source */ 2437 struct nir_block *pred; 2438 2439 nir_src src; 2440} nir_phi_src; 2441 2442#define nir_foreach_phi_src(phi_src, phi) \ 2443 foreach_list_typed(nir_phi_src, phi_src, node, &(phi)->srcs) 2444#define nir_foreach_phi_src_safe(phi_src, phi) \ 2445 foreach_list_typed_safe(nir_phi_src, phi_src, node, &(phi)->srcs) 2446 2447typedef struct { 2448 nir_instr instr; 2449 2450 struct exec_list srcs; /** < list of nir_phi_src */ 2451 2452 nir_dest dest; 2453} nir_phi_instr; 2454 2455static inline nir_phi_src * 2456nir_phi_get_src_from_block(nir_phi_instr *phi, struct nir_block *block) 2457{ 2458 nir_foreach_phi_src(src, phi) { 2459 if (src->pred == block) 2460 return src; 2461 } 2462 2463 assert(!"Block is not a predecessor of phi."); 2464 return NULL; 2465} 2466 2467typedef struct { 2468 struct exec_node node; 2469 nir_src src; 2470 nir_dest dest; 2471} nir_parallel_copy_entry; 2472 2473#define nir_foreach_parallel_copy_entry(entry, pcopy) \ 2474 foreach_list_typed(nir_parallel_copy_entry, entry, node, &(pcopy)->entries) 2475 2476typedef struct { 2477 nir_instr instr; 2478 2479 /* A list of nir_parallel_copy_entrys. The sources of all of the 2480 * entries are copied to the corresponding destinations "in parallel". 2481 * In other words, if we have two entries: a -> b and b -> a, the values 2482 * get swapped. 2483 */ 2484 struct exec_list entries; 2485} nir_parallel_copy_instr; 2486 2487NIR_DEFINE_CAST(nir_instr_as_alu, nir_instr, nir_alu_instr, instr, 2488 type, nir_instr_type_alu) 2489NIR_DEFINE_CAST(nir_instr_as_deref, nir_instr, nir_deref_instr, instr, 2490 type, nir_instr_type_deref) 2491NIR_DEFINE_CAST(nir_instr_as_call, nir_instr, nir_call_instr, instr, 2492 type, nir_instr_type_call) 2493NIR_DEFINE_CAST(nir_instr_as_jump, nir_instr, nir_jump_instr, instr, 2494 type, nir_instr_type_jump) 2495NIR_DEFINE_CAST(nir_instr_as_tex, nir_instr, nir_tex_instr, instr, 2496 type, nir_instr_type_tex) 2497NIR_DEFINE_CAST(nir_instr_as_intrinsic, nir_instr, nir_intrinsic_instr, instr, 2498 type, nir_instr_type_intrinsic) 2499NIR_DEFINE_CAST(nir_instr_as_load_const, nir_instr, nir_load_const_instr, instr, 2500 type, nir_instr_type_load_const) 2501NIR_DEFINE_CAST(nir_instr_as_ssa_undef, nir_instr, nir_ssa_undef_instr, instr, 2502 type, nir_instr_type_ssa_undef) 2503NIR_DEFINE_CAST(nir_instr_as_phi, nir_instr, nir_phi_instr, instr, 2504 type, nir_instr_type_phi) 2505NIR_DEFINE_CAST(nir_instr_as_parallel_copy, nir_instr, 2506 nir_parallel_copy_instr, instr, 2507 type, nir_instr_type_parallel_copy) 2508 2509 2510#define NIR_DEFINE_SRC_AS_CONST(type, suffix) \ 2511static inline type \ 2512nir_src_comp_as_##suffix(nir_src src, unsigned comp) \ 2513{ \ 2514 assert(nir_src_is_const(src)); \ 2515 nir_load_const_instr *load = \ 2516 nir_instr_as_load_const(src.ssa->parent_instr); \ 2517 assert(comp < load->def.num_components); \ 2518 return nir_const_value_as_##suffix(load->value[comp], \ 2519 load->def.bit_size); \ 2520} \ 2521 \ 2522static inline type \ 2523nir_src_as_##suffix(nir_src src) \ 2524{ \ 2525 assert(nir_src_num_components(src) == 1); \ 2526 return nir_src_comp_as_##suffix(src, 0); \ 2527} 2528 2529NIR_DEFINE_SRC_AS_CONST(int64_t, int) 2530NIR_DEFINE_SRC_AS_CONST(uint64_t, uint) 2531NIR_DEFINE_SRC_AS_CONST(bool, bool) 2532NIR_DEFINE_SRC_AS_CONST(double, float) 2533 2534#undef NIR_DEFINE_SRC_AS_CONST 2535 2536 2537typedef struct { 2538 nir_ssa_def *def; 2539 unsigned comp; 2540} nir_ssa_scalar; 2541 2542static inline bool 2543nir_ssa_scalar_is_const(nir_ssa_scalar s) 2544{ 2545 return s.def->parent_instr->type == nir_instr_type_load_const; 2546} 2547 2548static inline nir_const_value 2549nir_ssa_scalar_as_const_value(nir_ssa_scalar s) 2550{ 2551 assert(s.comp < s.def->num_components); 2552 nir_load_const_instr *load = nir_instr_as_load_const(s.def->parent_instr); 2553 return load->value[s.comp]; 2554} 2555 2556#define NIR_DEFINE_SCALAR_AS_CONST(type, suffix) \ 2557static inline type \ 2558nir_ssa_scalar_as_##suffix(nir_ssa_scalar s) \ 2559{ \ 2560 return nir_const_value_as_##suffix( \ 2561 nir_ssa_scalar_as_const_value(s), s.def->bit_size); \ 2562} 2563 2564NIR_DEFINE_SCALAR_AS_CONST(int64_t, int) 2565NIR_DEFINE_SCALAR_AS_CONST(uint64_t, uint) 2566NIR_DEFINE_SCALAR_AS_CONST(bool, bool) 2567NIR_DEFINE_SCALAR_AS_CONST(double, float) 2568 2569#undef NIR_DEFINE_SCALAR_AS_CONST 2570 2571static inline bool 2572nir_ssa_scalar_is_alu(nir_ssa_scalar s) 2573{ 2574 return s.def->parent_instr->type == nir_instr_type_alu; 2575} 2576 2577static inline nir_op 2578nir_ssa_scalar_alu_op(nir_ssa_scalar s) 2579{ 2580 return nir_instr_as_alu(s.def->parent_instr)->op; 2581} 2582 2583static inline nir_ssa_scalar 2584nir_ssa_scalar_chase_alu_src(nir_ssa_scalar s, unsigned alu_src_idx) 2585{ 2586 nir_ssa_scalar out = { NULL, 0 }; 2587 2588 nir_alu_instr *alu = nir_instr_as_alu(s.def->parent_instr); 2589 assert(alu_src_idx < nir_op_infos[alu->op].num_inputs); 2590 2591 /* Our component must be written */ 2592 assert(s.comp < s.def->num_components); 2593 assert(alu->dest.write_mask & (1u << s.comp)); 2594 2595 assert(alu->src[alu_src_idx].src.is_ssa); 2596 out.def = alu->src[alu_src_idx].src.ssa; 2597 2598 if (nir_op_infos[alu->op].input_sizes[alu_src_idx] == 0) { 2599 /* The ALU src is unsized so the source component follows the 2600 * destination component. 2601 */ 2602 out.comp = alu->src[alu_src_idx].swizzle[s.comp]; 2603 } else { 2604 /* This is a sized source so all source components work together to 2605 * produce all the destination components. Since we need to return a 2606 * scalar, this only works if the source is a scalar. 2607 */ 2608 assert(nir_op_infos[alu->op].input_sizes[alu_src_idx] == 1); 2609 out.comp = alu->src[alu_src_idx].swizzle[0]; 2610 } 2611 assert(out.comp < out.def->num_components); 2612 2613 return out; 2614} 2615 2616nir_ssa_scalar nir_ssa_scalar_chase_movs(nir_ssa_scalar s); 2617 2618static inline nir_ssa_scalar 2619nir_get_ssa_scalar(nir_ssa_def *def, unsigned channel) 2620{ 2621 nir_ssa_scalar s = { def, channel }; 2622 return s; 2623} 2624 2625/** Returns a nir_ssa_scalar where we've followed the bit-exact mov/vec use chain to the original definition */ 2626static inline nir_ssa_scalar 2627nir_ssa_scalar_resolved(nir_ssa_def *def, unsigned channel) 2628{ 2629 return nir_ssa_scalar_chase_movs(nir_get_ssa_scalar(def, channel)); 2630} 2631 2632 2633typedef struct { 2634 bool success; 2635 2636 nir_variable *var; 2637 unsigned desc_set; 2638 unsigned binding; 2639 unsigned num_indices; 2640 nir_src indices[4]; 2641 bool read_first_invocation; 2642} nir_binding; 2643 2644nir_binding nir_chase_binding(nir_src rsrc); 2645nir_variable *nir_get_binding_variable(struct nir_shader *shader, nir_binding binding); 2646 2647 2648/* 2649 * Control flow 2650 * 2651 * Control flow consists of a tree of control flow nodes, which include 2652 * if-statements and loops. The leaves of the tree are basic blocks, lists of 2653 * instructions that always run start-to-finish. Each basic block also keeps 2654 * track of its successors (blocks which may run immediately after the current 2655 * block) and predecessors (blocks which could have run immediately before the 2656 * current block). Each function also has a start block and an end block which 2657 * all return statements point to (which is always empty). Together, all the 2658 * blocks with their predecessors and successors make up the control flow 2659 * graph (CFG) of the function. There are helpers that modify the tree of 2660 * control flow nodes while modifying the CFG appropriately; these should be 2661 * used instead of modifying the tree directly. 2662 */ 2663 2664typedef enum { 2665 nir_cf_node_block, 2666 nir_cf_node_if, 2667 nir_cf_node_loop, 2668 nir_cf_node_function 2669} nir_cf_node_type; 2670 2671typedef struct nir_cf_node { 2672 struct exec_node node; 2673 nir_cf_node_type type; 2674 struct nir_cf_node *parent; 2675} nir_cf_node; 2676 2677typedef struct nir_block { 2678 nir_cf_node cf_node; 2679 2680 struct exec_list instr_list; /** < list of nir_instr */ 2681 2682 /** generic block index; generated by nir_index_blocks */ 2683 unsigned index; 2684 2685 /* 2686 * Each block can only have up to 2 successors, so we put them in a simple 2687 * array - no need for anything more complicated. 2688 */ 2689 struct nir_block *successors[2]; 2690 2691 /* Set of nir_block predecessors in the CFG */ 2692 struct set *predecessors; 2693 2694 /* 2695 * this node's immediate dominator in the dominance tree - set to NULL for 2696 * the start block. 2697 */ 2698 struct nir_block *imm_dom; 2699 2700 /* This node's children in the dominance tree */ 2701 unsigned num_dom_children; 2702 struct nir_block **dom_children; 2703 2704 /* Set of nir_blocks on the dominance frontier of this block */ 2705 struct set *dom_frontier; 2706 2707 /* 2708 * These two indices have the property that dom_{pre,post}_index for each 2709 * child of this block in the dominance tree will always be between 2710 * dom_pre_index and dom_post_index for this block, which makes testing if 2711 * a given block is dominated by another block an O(1) operation. 2712 */ 2713 uint32_t dom_pre_index, dom_post_index; 2714 2715 /** 2716 * Value just before the first nir_instr->index in the block, but after 2717 * end_ip that of any predecessor block. 2718 */ 2719 uint32_t start_ip; 2720 /** 2721 * Value just after the last nir_instr->index in the block, but before the 2722 * start_ip of any successor block. 2723 */ 2724 uint32_t end_ip; 2725 2726 /* SSA def live in and out for this block; used for liveness analysis. 2727 * Indexed by ssa_def->index 2728 */ 2729 BITSET_WORD *live_in; 2730 BITSET_WORD *live_out; 2731} nir_block; 2732 2733static inline bool 2734nir_block_is_reachable(nir_block *b) 2735{ 2736 /* See also nir_block_dominates */ 2737 return b->dom_post_index != 0; 2738} 2739 2740static inline nir_instr * 2741nir_block_first_instr(nir_block *block) 2742{ 2743 struct exec_node *head = exec_list_get_head(&block->instr_list); 2744 return exec_node_data(nir_instr, head, node); 2745} 2746 2747static inline nir_instr * 2748nir_block_last_instr(nir_block *block) 2749{ 2750 struct exec_node *tail = exec_list_get_tail(&block->instr_list); 2751 return exec_node_data(nir_instr, tail, node); 2752} 2753 2754static inline bool 2755nir_block_ends_in_jump(nir_block *block) 2756{ 2757 return !exec_list_is_empty(&block->instr_list) && 2758 nir_block_last_instr(block)->type == nir_instr_type_jump; 2759} 2760 2761static inline bool 2762nir_block_ends_in_return_or_halt(nir_block *block) 2763{ 2764 if (exec_list_is_empty(&block->instr_list)) 2765 return false; 2766 2767 nir_instr *instr = nir_block_last_instr(block); 2768 if (instr->type != nir_instr_type_jump) 2769 return false; 2770 2771 nir_jump_instr *jump_instr = nir_instr_as_jump(instr); 2772 return jump_instr->type == nir_jump_return || 2773 jump_instr->type == nir_jump_halt; 2774} 2775 2776static inline bool 2777nir_block_ends_in_break(nir_block *block) 2778{ 2779 if (exec_list_is_empty(&block->instr_list)) 2780 return false; 2781 2782 nir_instr *instr = nir_block_last_instr(block); 2783 return instr->type == nir_instr_type_jump && 2784 nir_instr_as_jump(instr)->type == nir_jump_break; 2785} 2786 2787#define nir_foreach_instr(instr, block) \ 2788 foreach_list_typed(nir_instr, instr, node, &(block)->instr_list) 2789#define nir_foreach_instr_reverse(instr, block) \ 2790 foreach_list_typed_reverse(nir_instr, instr, node, &(block)->instr_list) 2791#define nir_foreach_instr_safe(instr, block) \ 2792 foreach_list_typed_safe(nir_instr, instr, node, &(block)->instr_list) 2793#define nir_foreach_instr_reverse_safe(instr, block) \ 2794 foreach_list_typed_reverse_safe(nir_instr, instr, node, &(block)->instr_list) 2795 2796static inline nir_phi_instr * 2797nir_block_last_phi_instr(nir_block *block) 2798{ 2799 nir_phi_instr *last_phi = NULL; 2800 nir_foreach_instr(instr, block) { 2801 if (instr->type == nir_instr_type_phi) 2802 last_phi = nir_instr_as_phi(instr); 2803 else 2804 return last_phi; 2805 } 2806 return last_phi; 2807} 2808 2809typedef enum { 2810 nir_selection_control_none = 0x0, 2811 nir_selection_control_flatten = 0x1, 2812 nir_selection_control_dont_flatten = 0x2, 2813} nir_selection_control; 2814 2815typedef struct nir_if { 2816 nir_cf_node cf_node; 2817 nir_src condition; 2818 nir_selection_control control; 2819 2820 struct exec_list then_list; /** < list of nir_cf_node */ 2821 struct exec_list else_list; /** < list of nir_cf_node */ 2822} nir_if; 2823 2824typedef struct { 2825 nir_if *nif; 2826 2827 /** Instruction that generates nif::condition. */ 2828 nir_instr *conditional_instr; 2829 2830 /** Block within ::nif that has the break instruction. */ 2831 nir_block *break_block; 2832 2833 /** Last block for the then- or else-path that does not contain the break. */ 2834 nir_block *continue_from_block; 2835 2836 /** True when ::break_block is in the else-path of ::nif. */ 2837 bool continue_from_then; 2838 bool induction_rhs; 2839 2840 /* This is true if the terminators exact trip count is unknown. For 2841 * example: 2842 * 2843 * for (int i = 0; i < imin(x, 4); i++) 2844 * ... 2845 * 2846 * Here loop analysis would have set a max_trip_count of 4 however we dont 2847 * know for sure that this is the exact trip count. 2848 */ 2849 bool exact_trip_count_unknown; 2850 2851 struct list_head loop_terminator_link; 2852} nir_loop_terminator; 2853 2854typedef struct { 2855 /* Induction variable. */ 2856 nir_ssa_def *def; 2857 2858 /* Init statement with only uniform. */ 2859 nir_src *init_src; 2860 2861 /* Update statement with only uniform. */ 2862 nir_alu_src *update_src; 2863} nir_loop_induction_variable; 2864 2865typedef struct { 2866 /* Estimated cost (in number of instructions) of the loop */ 2867 unsigned instr_cost; 2868 2869 /* Guessed trip count based on array indexing */ 2870 unsigned guessed_trip_count; 2871 2872 /* Maximum number of times the loop is run (if known) */ 2873 unsigned max_trip_count; 2874 2875 /* Do we know the exact number of times the loop will be run */ 2876 bool exact_trip_count_known; 2877 2878 /* Unroll the loop regardless of its size */ 2879 bool force_unroll; 2880 2881 /* Does the loop contain complex loop terminators, continues or other 2882 * complex behaviours? If this is true we can't rely on 2883 * loop_terminator_list to be complete or accurate. 2884 */ 2885 bool complex_loop; 2886 2887 nir_loop_terminator *limiting_terminator; 2888 2889 /* A list of loop_terminators terminating this loop. */ 2890 struct list_head loop_terminator_list; 2891 2892 /* array of induction variables for this loop */ 2893 nir_loop_induction_variable *induction_vars; 2894 unsigned num_induction_vars; 2895} nir_loop_info; 2896 2897typedef enum { 2898 nir_loop_control_none = 0x0, 2899 nir_loop_control_unroll = 0x1, 2900 nir_loop_control_dont_unroll = 0x2, 2901} nir_loop_control; 2902 2903typedef struct { 2904 nir_cf_node cf_node; 2905 2906 struct exec_list body; /** < list of nir_cf_node */ 2907 2908 nir_loop_info *info; 2909 nir_loop_control control; 2910 bool partially_unrolled; 2911 bool divergent; 2912} nir_loop; 2913 2914/** 2915 * Various bits of metadata that can may be created or required by 2916 * optimization and analysis passes 2917 */ 2918typedef enum { 2919 nir_metadata_none = 0x0, 2920 2921 /** Indicates that nir_block::index values are valid. 2922 * 2923 * The start block has index 0 and they increase through a natural walk of 2924 * the CFG. nir_function_impl::num_blocks is the number of blocks and 2925 * every block index is in the range [0, nir_function_impl::num_blocks]. 2926 * 2927 * A pass can preserve this metadata type if it doesn't touch the CFG. 2928 */ 2929 nir_metadata_block_index = 0x1, 2930 2931 /** Indicates that block dominance information is valid 2932 * 2933 * This includes: 2934 * 2935 * - nir_block::num_dom_children 2936 * - nir_block::dom_children 2937 * - nir_block::dom_frontier 2938 * - nir_block::dom_pre_index 2939 * - nir_block::dom_post_index 2940 * 2941 * A pass can preserve this metadata type if it doesn't touch the CFG. 2942 */ 2943 nir_metadata_dominance = 0x2, 2944 2945 /** Indicates that SSA def data-flow liveness information is valid 2946 * 2947 * This includes: 2948 * 2949 * - nir_block::live_in 2950 * - nir_block::live_out 2951 * 2952 * A pass can preserve this metadata type if it never adds or removes any 2953 * SSA defs or uses of SSA defs (most passes shouldn't preserve this 2954 * metadata type). 2955 */ 2956 nir_metadata_live_ssa_defs = 0x4, 2957 2958 /** A dummy metadata value to track when a pass forgot to call 2959 * nir_metadata_preserve. 2960 * 2961 * A pass should always clear this value even if it doesn't make any 2962 * progress to indicate that it thought about preserving metadata. 2963 */ 2964 nir_metadata_not_properly_reset = 0x8, 2965 2966 /** Indicates that loop analysis information is valid. 2967 * 2968 * This includes everything pointed to by nir_loop::info. 2969 * 2970 * A pass can preserve this metadata type if it is guaranteed to not affect 2971 * any loop metadata. However, since loop metadata includes things like 2972 * loop counts which depend on arithmetic in the loop, this is very hard to 2973 * determine. Most passes shouldn't preserve this metadata type. 2974 */ 2975 nir_metadata_loop_analysis = 0x10, 2976 2977 /** Indicates that nir_instr::index values are valid. 2978 * 2979 * The start instruction has index 0 and they increase through a natural 2980 * walk of instructions in blocks in the CFG. The indices my have holes 2981 * after passes such as DCE. 2982 * 2983 * A pass can preserve this metadata type if it never adds or moves any 2984 * instructions (most passes shouldn't preserve this metadata type), but 2985 * can preserve it if it only removes instructions. 2986 */ 2987 nir_metadata_instr_index = 0x20, 2988 2989 /** All metadata 2990 * 2991 * This includes all nir_metadata flags except not_properly_reset. Passes 2992 * which do not change the shader in any way should call 2993 * 2994 * nir_metadata_preserve(impl, nir_metadata_all); 2995 */ 2996 nir_metadata_all = ~nir_metadata_not_properly_reset, 2997} nir_metadata; 2998MESA_DEFINE_CPP_ENUM_BITFIELD_OPERATORS(nir_metadata) 2999 3000typedef struct { 3001 nir_cf_node cf_node; 3002 3003 /** pointer to the function of which this is an implementation */ 3004 struct nir_function *function; 3005 3006 /** 3007 * For entrypoints, a pointer to a nir_function_impl which runs before 3008 * it, once per draw or dispatch, communicating via store_preamble and 3009 * load_preamble intrinsics. If NULL then there is no preamble. 3010 */ 3011 struct nir_function *preamble; 3012 3013 struct exec_list body; /** < list of nir_cf_node */ 3014 3015 nir_block *end_block; 3016 3017 /** list for all local variables in the function */ 3018 struct exec_list locals; 3019 3020 /** list of local registers in the function */ 3021 struct exec_list registers; 3022 3023 /** next available local register index */ 3024 unsigned reg_alloc; 3025 3026 /** next available SSA value index */ 3027 unsigned ssa_alloc; 3028 3029 /* total number of basic blocks, only valid when block_index_dirty = false */ 3030 unsigned num_blocks; 3031 3032 /** True if this nir_function_impl uses structured control-flow 3033 * 3034 * Structured nir_function_impls have different validation rules. 3035 */ 3036 bool structured; 3037 3038 nir_metadata valid_metadata; 3039} nir_function_impl; 3040 3041#define nir_foreach_function_temp_variable(var, impl) \ 3042 foreach_list_typed(nir_variable, var, node, &(impl)->locals) 3043 3044#define nir_foreach_function_temp_variable_safe(var, impl) \ 3045 foreach_list_typed_safe(nir_variable, var, node, &(impl)->locals) 3046 3047ATTRIBUTE_RETURNS_NONNULL static inline nir_block * 3048nir_start_block(nir_function_impl *impl) 3049{ 3050 return (nir_block *) impl->body.head_sentinel.next; 3051} 3052 3053ATTRIBUTE_RETURNS_NONNULL static inline nir_block * 3054nir_impl_last_block(nir_function_impl *impl) 3055{ 3056 return (nir_block *) impl->body.tail_sentinel.prev; 3057} 3058 3059static inline nir_cf_node * 3060nir_cf_node_next(nir_cf_node *node) 3061{ 3062 struct exec_node *next = exec_node_get_next(&node->node); 3063 if (exec_node_is_tail_sentinel(next)) 3064 return NULL; 3065 else 3066 return exec_node_data(nir_cf_node, next, node); 3067} 3068 3069static inline nir_cf_node * 3070nir_cf_node_prev(nir_cf_node *node) 3071{ 3072 struct exec_node *prev = exec_node_get_prev(&node->node); 3073 if (exec_node_is_head_sentinel(prev)) 3074 return NULL; 3075 else 3076 return exec_node_data(nir_cf_node, prev, node); 3077} 3078 3079static inline bool 3080nir_cf_node_is_first(const nir_cf_node *node) 3081{ 3082 return exec_node_is_head_sentinel(node->node.prev); 3083} 3084 3085static inline bool 3086nir_cf_node_is_last(const nir_cf_node *node) 3087{ 3088 return exec_node_is_tail_sentinel(node->node.next); 3089} 3090 3091NIR_DEFINE_CAST(nir_cf_node_as_block, nir_cf_node, nir_block, cf_node, 3092 type, nir_cf_node_block) 3093NIR_DEFINE_CAST(nir_cf_node_as_if, nir_cf_node, nir_if, cf_node, 3094 type, nir_cf_node_if) 3095NIR_DEFINE_CAST(nir_cf_node_as_loop, nir_cf_node, nir_loop, cf_node, 3096 type, nir_cf_node_loop) 3097NIR_DEFINE_CAST(nir_cf_node_as_function, nir_cf_node, 3098 nir_function_impl, cf_node, type, nir_cf_node_function) 3099 3100static inline nir_block * 3101nir_if_first_then_block(nir_if *if_stmt) 3102{ 3103 struct exec_node *head = exec_list_get_head(&if_stmt->then_list); 3104 return nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node)); 3105} 3106 3107static inline nir_block * 3108nir_if_last_then_block(nir_if *if_stmt) 3109{ 3110 struct exec_node *tail = exec_list_get_tail(&if_stmt->then_list); 3111 return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node)); 3112} 3113 3114static inline nir_block * 3115nir_if_first_else_block(nir_if *if_stmt) 3116{ 3117 struct exec_node *head = exec_list_get_head(&if_stmt->else_list); 3118 return nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node)); 3119} 3120 3121static inline nir_block * 3122nir_if_last_else_block(nir_if *if_stmt) 3123{ 3124 struct exec_node *tail = exec_list_get_tail(&if_stmt->else_list); 3125 return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node)); 3126} 3127 3128static inline nir_block * 3129nir_loop_first_block(nir_loop *loop) 3130{ 3131 struct exec_node *head = exec_list_get_head(&loop->body); 3132 return nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node)); 3133} 3134 3135static inline nir_block * 3136nir_loop_last_block(nir_loop *loop) 3137{ 3138 struct exec_node *tail = exec_list_get_tail(&loop->body); 3139 return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node)); 3140} 3141 3142/** 3143 * Return true if this list of cf_nodes contains a single empty block. 3144 */ 3145static inline bool 3146nir_cf_list_is_empty_block(struct exec_list *cf_list) 3147{ 3148 if (exec_list_is_singular(cf_list)) { 3149 struct exec_node *head = exec_list_get_head(cf_list); 3150 nir_block *block = 3151 nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node)); 3152 return exec_list_is_empty(&block->instr_list); 3153 } 3154 return false; 3155} 3156 3157typedef struct { 3158 uint8_t num_components; 3159 uint8_t bit_size; 3160} nir_parameter; 3161 3162typedef struct nir_printf_info { 3163 unsigned num_args; 3164 unsigned *arg_sizes; 3165 unsigned string_size; 3166 char *strings; 3167} nir_printf_info; 3168 3169typedef struct nir_function { 3170 struct exec_node node; 3171 3172 const char *name; 3173 struct nir_shader *shader; 3174 3175 unsigned num_params; 3176 nir_parameter *params; 3177 3178 /** The implementation of this function. 3179 * 3180 * If the function is only declared and not implemented, this is NULL. 3181 */ 3182 nir_function_impl *impl; 3183 3184 bool is_entrypoint; 3185 bool is_preamble; 3186} nir_function; 3187 3188typedef enum { 3189 nir_lower_imul64 = (1 << 0), 3190 nir_lower_isign64 = (1 << 1), 3191 /** Lower all int64 modulus and division opcodes */ 3192 nir_lower_divmod64 = (1 << 2), 3193 /** Lower all 64-bit umul_high and imul_high opcodes */ 3194 nir_lower_imul_high64 = (1 << 3), 3195 nir_lower_mov64 = (1 << 4), 3196 nir_lower_icmp64 = (1 << 5), 3197 nir_lower_iadd64 = (1 << 6), 3198 nir_lower_iabs64 = (1 << 7), 3199 nir_lower_ineg64 = (1 << 8), 3200 nir_lower_logic64 = (1 << 9), 3201 nir_lower_minmax64 = (1 << 10), 3202 nir_lower_shift64 = (1 << 11), 3203 nir_lower_imul_2x32_64 = (1 << 12), 3204 nir_lower_extract64 = (1 << 13), 3205 nir_lower_ufind_msb64 = (1 << 14), 3206 nir_lower_bit_count64 = (1 << 15), 3207 nir_lower_subgroup_shuffle64 = (1 << 16), 3208 nir_lower_scan_reduce_bitwise64 = (1 << 17), 3209 nir_lower_scan_reduce_iadd64 = (1 << 18), 3210 nir_lower_vote_ieq64 = (1 << 19), 3211 nir_lower_usub_sat64 = (1 << 20), 3212 nir_lower_iadd_sat64 = (1 << 21), 3213} nir_lower_int64_options; 3214 3215typedef enum { 3216 nir_lower_drcp = (1 << 0), 3217 nir_lower_dsqrt = (1 << 1), 3218 nir_lower_drsq = (1 << 2), 3219 nir_lower_dtrunc = (1 << 3), 3220 nir_lower_dfloor = (1 << 4), 3221 nir_lower_dceil = (1 << 5), 3222 nir_lower_dfract = (1 << 6), 3223 nir_lower_dround_even = (1 << 7), 3224 nir_lower_dmod = (1 << 8), 3225 nir_lower_dsub = (1 << 9), 3226 nir_lower_ddiv = (1 << 10), 3227 nir_lower_fp64_full_software = (1 << 11), 3228} nir_lower_doubles_options; 3229 3230typedef enum { 3231 nir_divergence_single_prim_per_subgroup = (1 << 0), 3232 nir_divergence_single_patch_per_tcs_subgroup = (1 << 1), 3233 nir_divergence_single_patch_per_tes_subgroup = (1 << 2), 3234 nir_divergence_view_index_uniform = (1 << 3), 3235 nir_divergence_single_frag_shading_rate_per_subgroup = (1 << 4), 3236 nir_divergence_multiple_workgroup_per_compute_subgroup = (1 << 5), 3237} nir_divergence_options; 3238 3239typedef enum { 3240 nir_pack_varying_interp_mode_none = (1 << 0), 3241 nir_pack_varying_interp_mode_smooth = (1 << 1), 3242 nir_pack_varying_interp_mode_flat = (1 << 2), 3243 nir_pack_varying_interp_mode_noperspective = (1 << 3), 3244 nir_pack_varying_interp_loc_sample = (1 << 16), 3245 nir_pack_varying_interp_loc_centroid = (1 << 17), 3246 nir_pack_varying_interp_loc_center = (1 << 18), 3247} nir_pack_varying_options; 3248 3249/** An instruction filtering callback 3250 * 3251 * Returns true if the instruction should be processed and false otherwise. 3252 */ 3253typedef bool (*nir_instr_filter_cb)(const nir_instr *, const void *); 3254 3255/** A vectorization width callback 3256 * 3257 * Returns the maximum vectorization width per instruction. 3258 * 0, if the instruction must not be modified. 3259 * 3260 * The vectorization width must be a power of 2. 3261 */ 3262typedef uint8_t (*nir_vectorize_cb)(const nir_instr *, const void *); 3263 3264typedef struct nir_shader_compiler_options { 3265 bool lower_fdiv; 3266 bool lower_ffma16; 3267 bool lower_ffma32; 3268 bool lower_ffma64; 3269 bool fuse_ffma16; 3270 bool fuse_ffma32; 3271 bool fuse_ffma64; 3272 bool lower_flrp16; 3273 bool lower_flrp32; 3274 /** Lowers flrp when it does not support doubles */ 3275 bool lower_flrp64; 3276 bool lower_fpow; 3277 bool lower_fsat; 3278 bool lower_fsqrt; 3279 bool lower_sincos; 3280 bool lower_fmod; 3281 /** Lowers ibitfield_extract/ubitfield_extract to ibfe/ubfe. */ 3282 bool lower_bitfield_extract; 3283 /** Lowers ibitfield_extract/ubitfield_extract to compares, shifts. */ 3284 bool lower_bitfield_extract_to_shifts; 3285 /** Lowers bitfield_insert to bfi/bfm */ 3286 bool lower_bitfield_insert; 3287 /** Lowers bitfield_insert to compares, and shifts. */ 3288 bool lower_bitfield_insert_to_shifts; 3289 /** Lowers bitfield_insert to bfm/bitfield_select. */ 3290 bool lower_bitfield_insert_to_bitfield_select; 3291 /** Lowers bitfield_reverse to shifts. */ 3292 bool lower_bitfield_reverse; 3293 /** Lowers bit_count to shifts. */ 3294 bool lower_bit_count; 3295 /** Lowers ifind_msb to compare and ufind_msb */ 3296 bool lower_ifind_msb; 3297 /** Lowers ifind_msb and ufind_msb to reverse variants */ 3298 bool lower_find_msb_to_reverse; 3299 /** Lowers find_lsb to ufind_msb and logic ops */ 3300 bool lower_find_lsb; 3301 bool lower_uadd_carry; 3302 bool lower_usub_borrow; 3303 /** Lowers imul_high/umul_high to 16-bit multiplies and carry operations. */ 3304 bool lower_mul_high; 3305 /** lowers fneg to fmul(x, -1.0). Driver must call nir_opt_algebraic_late() */ 3306 bool lower_fneg; 3307 /** lowers ineg to isub. Driver must call nir_opt_algebraic_late(). */ 3308 bool lower_ineg; 3309 /** lowers fisnormal to alu ops. */ 3310 bool lower_fisnormal; 3311 3312 /* lower {slt,sge,seq,sne} to {flt,fge,feq,fneu} + b2f: */ 3313 bool lower_scmp; 3314 3315 /* lower b/fall_equalN/b/fany_nequalN (ex:fany_nequal4 to sne+fdot4+fsat) */ 3316 bool lower_vector_cmp; 3317 3318 /** enable rules to avoid bit ops */ 3319 bool lower_bitops; 3320 3321 /** enables rules to lower isign to imin+imax */ 3322 bool lower_isign; 3323 3324 /** enables rules to lower fsign to fsub and flt */ 3325 bool lower_fsign; 3326 3327 /** enables rules to lower iabs to ineg+imax */ 3328 bool lower_iabs; 3329 3330 /** enable rules that avoid generating umax from signed integer ops */ 3331 bool lower_umax; 3332 3333 /** enable rules that avoid generating umin from signed integer ops */ 3334 bool lower_umin; 3335 3336 /* lower fdph to fdot4 */ 3337 bool lower_fdph; 3338 3339 /** lower fdot to fmul and fsum/fadd. */ 3340 bool lower_fdot; 3341 3342 /* Does the native fdot instruction replicate its result for four 3343 * components? If so, then opt_algebraic_late will turn all fdotN 3344 * instructions into fdotN_replicated instructions. 3345 */ 3346 bool fdot_replicates; 3347 3348 /** lowers ffloor to fsub+ffract: */ 3349 bool lower_ffloor; 3350 3351 /** lowers ffract to fsub+ffloor: */ 3352 bool lower_ffract; 3353 3354 /** lowers fceil to fneg+ffloor+fneg: */ 3355 bool lower_fceil; 3356 3357 bool lower_ftrunc; 3358 3359 /** Lowers fround_even to ffract+feq+csel. 3360 * 3361 * Not correct in that it doesn't correctly handle the "_even" part of the 3362 * rounding, but good enough for DX9 array indexing handling on DX9-class 3363 * hardware. 3364 */ 3365 bool lower_fround_even; 3366 3367 bool lower_ldexp; 3368 3369 bool lower_pack_half_2x16; 3370 bool lower_pack_unorm_2x16; 3371 bool lower_pack_snorm_2x16; 3372 bool lower_pack_unorm_4x8; 3373 bool lower_pack_snorm_4x8; 3374 bool lower_pack_64_2x32; 3375 bool lower_pack_64_4x16; 3376 bool lower_pack_32_2x16; 3377 bool lower_pack_64_2x32_split; 3378 bool lower_pack_32_2x16_split; 3379 bool lower_unpack_half_2x16; 3380 bool lower_unpack_unorm_2x16; 3381 bool lower_unpack_snorm_2x16; 3382 bool lower_unpack_unorm_4x8; 3383 bool lower_unpack_snorm_4x8; 3384 bool lower_unpack_64_2x32_split; 3385 bool lower_unpack_32_2x16_split; 3386 3387 bool lower_pack_split; 3388 3389 bool lower_extract_byte; 3390 bool lower_extract_word; 3391 bool lower_insert_byte; 3392 bool lower_insert_word; 3393 3394 bool lower_all_io_to_temps; 3395 bool lower_all_io_to_elements; 3396 3397 /* Indicates that the driver only has zero-based vertex id */ 3398 bool vertex_id_zero_based; 3399 3400 /** 3401 * If enabled, gl_BaseVertex will be lowered as: 3402 * is_indexed_draw (~0/0) & firstvertex 3403 */ 3404 bool lower_base_vertex; 3405 3406 /** 3407 * If enabled, gl_HelperInvocation will be lowered as: 3408 * 3409 * !((1 << sample_id) & sample_mask_in)) 3410 * 3411 * This depends on some possibly hw implementation details, which may 3412 * not be true for all hw. In particular that the FS is only executed 3413 * for covered samples or for helper invocations. So, do not blindly 3414 * enable this option. 3415 * 3416 * Note: See also issue #22 in ARB_shader_image_load_store 3417 */ 3418 bool lower_helper_invocation; 3419 3420 /** 3421 * Convert gl_SampleMaskIn to gl_HelperInvocation as follows: 3422 * 3423 * gl_SampleMaskIn == 0 ---> gl_HelperInvocation 3424 * gl_SampleMaskIn != 0 ---> !gl_HelperInvocation 3425 */ 3426 bool optimize_sample_mask_in; 3427 3428 bool lower_cs_local_index_to_id; 3429 bool lower_cs_local_id_to_index; 3430 3431 /* Prevents lowering global_invocation_id to be in terms of workgroup_id */ 3432 bool has_cs_global_id; 3433 3434 bool lower_device_index_to_zero; 3435 3436 /* Set if nir_lower_pntc_ytransform() should invert gl_PointCoord. 3437 * Either when frame buffer is flipped or GL_POINT_SPRITE_COORD_ORIGIN 3438 * is GL_LOWER_LEFT. 3439 */ 3440 bool lower_wpos_pntc; 3441 3442 /** 3443 * Set if nir_op_[iu]hadd and nir_op_[iu]rhadd instructions should be 3444 * lowered to simple arithmetic. 3445 * 3446 * If this flag is set, the lowering will be applied to all bit-sizes of 3447 * these instructions. 3448 * 3449 * \sa ::lower_hadd64 3450 */ 3451 bool lower_hadd; 3452 3453 /** 3454 * Set if only 64-bit nir_op_[iu]hadd and nir_op_[iu]rhadd instructions 3455 * should be lowered to simple arithmetic. 3456 * 3457 * If this flag is set, the lowering will be applied to only 64-bit 3458 * versions of these instructions. 3459 * 3460 * \sa ::lower_hadd 3461 */ 3462 bool lower_hadd64; 3463 3464 /** 3465 * Set if nir_op_uadd_sat should be lowered to simple arithmetic. 3466 * 3467 * If this flag is set, the lowering will be applied to all bit-sizes of 3468 * these instructions. 3469 */ 3470 bool lower_uadd_sat; 3471 3472 /** 3473 * Set if nir_op_usub_sat should be lowered to simple arithmetic. 3474 * 3475 * If this flag is set, the lowering will be applied to all bit-sizes of 3476 * these instructions. 3477 */ 3478 bool lower_usub_sat; 3479 3480 /** 3481 * Set if nir_op_iadd_sat and nir_op_isub_sat should be lowered to simple 3482 * arithmetic. 3483 * 3484 * If this flag is set, the lowering will be applied to all bit-sizes of 3485 * these instructions. 3486 */ 3487 bool lower_iadd_sat; 3488 3489 /** 3490 * Set if imul_32x16 and umul_32x16 should be lowered to simple 3491 * arithmetic. 3492 */ 3493 bool lower_mul_32x16; 3494 3495 /** 3496 * Should IO be re-vectorized? Some scalar ISAs still operate on vec4's 3497 * for IO purposes and would prefer loads/stores be vectorized. 3498 */ 3499 bool vectorize_io; 3500 bool lower_to_scalar; 3501 nir_instr_filter_cb lower_to_scalar_filter; 3502 3503 /** 3504 * Disables potentially harmful algebraic transformations for architectures 3505 * with SIMD-within-a-register semantics. 3506 * 3507 * Note, to actually vectorize 16bit instructions, use nir_opt_vectorize() 3508 * with a suitable callback function. 3509 */ 3510 bool vectorize_vec2_16bit; 3511 3512 /** 3513 * Should the linker unify inputs_read/outputs_written between adjacent 3514 * shader stages which are linked into a single program? 3515 */ 3516 bool unify_interfaces; 3517 3518 /** 3519 * Should nir_lower_io() create load_interpolated_input intrinsics? 3520 * 3521 * If not, it generates regular load_input intrinsics and interpolation 3522 * information must be inferred from the list of input nir_variables. 3523 */ 3524 bool use_interpolated_input_intrinsics; 3525 3526 3527 /** 3528 * Whether nir_lower_io() will lower interpolateAt functions to 3529 * load_interpolated_input intrinsics. 3530 * 3531 * Unlike use_interpolated_input_intrinsics this will only lower these 3532 * functions and leave input load intrinsics untouched. 3533 */ 3534 bool lower_interpolate_at; 3535 3536 /* Lowers when 32x32->64 bit multiplication is not supported */ 3537 bool lower_mul_2x32_64; 3538 3539 /* Lowers when rotate instruction is not supported */ 3540 bool lower_rotate; 3541 3542 /** Backend supports ternary addition */ 3543 bool has_iadd3; 3544 3545 /** 3546 * Backend supports imul24, and would like to use it (when possible) 3547 * for address/offset calculation. If true, driver should call 3548 * nir_lower_amul(). (If not set, amul will automatically be lowered 3549 * to imul.) 3550 */ 3551 bool has_imul24; 3552 3553 /** Backend supports umul24, if not set umul24 will automatically be lowered 3554 * to imul with masked inputs */ 3555 bool has_umul24; 3556 3557 /** Backend supports umad24, if not set umad24 will automatically be lowered 3558 * to imul with masked inputs and iadd */ 3559 bool has_umad24; 3560 3561 /* Backend supports fused comapre against zero and csel */ 3562 bool has_fused_comp_and_csel; 3563 3564 /** Backend supports fsub, if not set fsub will automatically be lowered to 3565 * fadd(x, fneg(y)). If true, driver should call nir_opt_algebraic_late(). */ 3566 bool has_fsub; 3567 3568 /** Backend supports isub, if not set isub will automatically be lowered to 3569 * iadd(x, ineg(y)). If true, driver should call nir_opt_algebraic_late(). */ 3570 bool has_isub; 3571 3572 /** Backend supports pack_32_4x8 or pack_32_4x8_split. */ 3573 bool has_pack_32_4x8; 3574 3575 /** Backend supports txs, if not nir_lower_tex(..) uses txs-free variants 3576 * for rect texture lowering. */ 3577 bool has_txs; 3578 3579 /** Backend supports sdot_4x8 opcodes. */ 3580 bool has_sdot_4x8; 3581 3582 /** Backend supports udot_4x8 opcodes. */ 3583 bool has_udot_4x8; 3584 3585 /** Backend supports sudot_4x8 opcodes. */ 3586 bool has_sudot_4x8; 3587 3588 /** Backend supports sdot_2x16 and udot_2x16 opcodes. */ 3589 bool has_dot_2x16; 3590 3591 /* Whether to generate only scoped_barrier intrinsics instead of the set of 3592 * memory and control barrier intrinsics based on GLSL. 3593 */ 3594 bool use_scoped_barrier; 3595 3596 /** Backend supports fmulz (and ffmaz if lower_ffma32=false) */ 3597 bool has_fmulz; 3598 3599 /** 3600 * Is this the Intel vec4 backend? 3601 * 3602 * Used to inhibit algebraic optimizations that are known to be harmful on 3603 * the Intel vec4 backend. This is generally applicable to any 3604 * optimization that might cause more immediate values to be used in 3605 * 3-source (e.g., ffma and flrp) instructions. 3606 */ 3607 bool intel_vec4; 3608 3609 /** 3610 * For most Intel GPUs, all ternary operations such as FMA and BFE cannot 3611 * have immediates, so two to three instructions may eventually be needed. 3612 */ 3613 bool avoid_ternary_with_two_constants; 3614 3615 /** Whether 8-bit ALU is supported. */ 3616 bool support_8bit_alu; 3617 3618 /** Whether 16-bit ALU is supported. */ 3619 bool support_16bit_alu; 3620 3621 unsigned max_unroll_iterations; 3622 unsigned max_unroll_iterations_aggressive; 3623 3624 bool lower_uniforms_to_ubo; 3625 3626 /* If the precision is ignored, backends that don't handle 3627 * different precisions when passing data between stages and use 3628 * vectorized IO can pack more varyings when linking. */ 3629 bool linker_ignore_precision; 3630 3631 /* Specifies if indirect sampler array access will trigger forced loop 3632 * unrolling. 3633 */ 3634 bool force_indirect_unrolling_sampler; 3635 3636 /* Some older drivers don't support GLSL versions with the concept of flat 3637 * varyings and also don't support integers. This setting helps us avoid 3638 * marking varyings as flat and potentially having them changed to ints via 3639 * varying packing. 3640 */ 3641 bool no_integers; 3642 3643 /** 3644 * Specifies which type of indirectly accessed variables should force 3645 * loop unrolling. 3646 */ 3647 nir_variable_mode force_indirect_unrolling; 3648 3649 nir_lower_int64_options lower_int64_options; 3650 nir_lower_doubles_options lower_doubles_options; 3651 nir_divergence_options divergence_analysis_options; 3652 3653 /** 3654 * Support pack varyings with different interpolation location 3655 * (center, centroid, sample) and mode (flat, noperspective, smooth) 3656 * into same slot. 3657 */ 3658 nir_pack_varying_options pack_varying_options; 3659 3660 /** 3661 * Lower load_deref/store_deref of inputs and outputs into 3662 * load_input/store_input intrinsics. This is used by nir_lower_io_passes. 3663 */ 3664 bool lower_io_variables; 3665 3666 /** 3667 * Lower color inputs to load_colorN that are kind of like system values 3668 * if lower_io_variables is also set. shader_info will contain 3669 * the interpolation settings. This is used by nir_lower_io_passes. 3670 */ 3671 bool lower_fs_color_inputs; 3672 3673 /** 3674 * The masks of shader stages that support indirect indexing with 3675 * load_input and store_output intrinsics. It's used when 3676 * lower_io_variables is true. This is used by nir_lower_io_passes. 3677 */ 3678 uint8_t support_indirect_inputs; 3679 uint8_t support_indirect_outputs; 3680 3681 /** 3682 * Remove varying loaded from uniform, let fragment shader load the 3683 * uniform directly. GPU passing varying by memory can benifit from it 3684 * for sure; but GPU passing varying by on chip resource may not. 3685 * Because it saves on chip resource but may increase memory pressure when 3686 * fragment task is far more than vertex one, so better left it disabled. 3687 */ 3688 bool lower_varying_from_uniform; 3689} nir_shader_compiler_options; 3690 3691typedef struct nir_shader { 3692 /** list of uniforms (nir_variable) */ 3693 struct exec_list variables; 3694 3695 /** Set of driver-specific options for the shader. 3696 * 3697 * The memory for the options is expected to be kept in a single static 3698 * copy by the driver. 3699 */ 3700 const struct nir_shader_compiler_options *options; 3701 3702 /** Various bits of compile-time information about a given shader */ 3703 struct shader_info info; 3704 3705 struct exec_list functions; /** < list of nir_function */ 3706 3707 struct list_head gc_list; /** < list of all nir_instrs allocated on the shader but not yet freed. */ 3708 3709 /** 3710 * The size of the variable space for load_input_*, load_uniform_*, etc. 3711 * intrinsics. This is in back-end specific units which is likely one of 3712 * bytes, dwords, or vec4s depending on context and back-end. 3713 */ 3714 unsigned num_inputs, num_uniforms, num_outputs; 3715 3716 /** Size in bytes of required implicitly bound global memory */ 3717 unsigned global_mem_size; 3718 3719 /** Size in bytes of required scratch space */ 3720 unsigned scratch_size; 3721 3722 /** Constant data associated with this shader. 3723 * 3724 * Constant data is loaded through load_constant intrinsics (as compared to 3725 * the NIR load_const instructions which have the constant value inlined 3726 * into them). This is usually generated by nir_opt_large_constants (so 3727 * shaders don't have to load_const into a temporary array when they want 3728 * to indirect on a const array). 3729 */ 3730 void *constant_data; 3731 /** Size of the constant data associated with the shader, in bytes */ 3732 unsigned constant_data_size; 3733 3734 struct nir_xfb_info *xfb_info; 3735 3736 unsigned printf_info_count; 3737 nir_printf_info *printf_info; 3738} nir_shader; 3739 3740#define nir_foreach_function(func, shader) \ 3741 foreach_list_typed(nir_function, func, node, &(shader)->functions) 3742 3743static inline nir_function_impl * 3744nir_shader_get_entrypoint(const nir_shader *shader) 3745{ 3746 nir_function *func = NULL; 3747 3748 nir_foreach_function(function, shader) { 3749 assert(func == NULL); 3750 if (function->is_entrypoint) { 3751 func = function; 3752#ifndef NDEBUG 3753 break; 3754#endif 3755 } 3756 } 3757 3758 if (!func) 3759 return NULL; 3760 3761 assert(func->num_params == 0); 3762 assert(func->impl); 3763 return func->impl; 3764} 3765 3766void nir_remove_non_entrypoints(nir_shader *shader); 3767 3768nir_shader *nir_shader_create(void *mem_ctx, 3769 gl_shader_stage stage, 3770 const nir_shader_compiler_options *options, 3771 shader_info *si); 3772 3773nir_register *nir_local_reg_create(nir_function_impl *impl); 3774 3775void nir_reg_remove(nir_register *reg); 3776 3777/** Adds a variable to the appropriate list in nir_shader */ 3778void nir_shader_add_variable(nir_shader *shader, nir_variable *var); 3779 3780static inline void 3781nir_function_impl_add_variable(nir_function_impl *impl, nir_variable *var) 3782{ 3783 assert(var->data.mode == nir_var_function_temp); 3784 exec_list_push_tail(&impl->locals, &var->node); 3785} 3786 3787/** creates a variable, sets a few defaults, and adds it to the list */ 3788nir_variable *nir_variable_create(nir_shader *shader, 3789 nir_variable_mode mode, 3790 const struct glsl_type *type, 3791 const char *name); 3792/** creates a local variable and adds it to the list */ 3793nir_variable *nir_local_variable_create(nir_function_impl *impl, 3794 const struct glsl_type *type, 3795 const char *name); 3796 3797nir_variable *nir_find_variable_with_location(nir_shader *shader, 3798 nir_variable_mode mode, 3799 unsigned location); 3800 3801nir_variable *nir_find_variable_with_driver_location(nir_shader *shader, 3802 nir_variable_mode mode, 3803 unsigned location); 3804 3805void nir_sort_variables_with_modes(nir_shader *shader, 3806 int (*compar)(const nir_variable *, 3807 const nir_variable *), 3808 nir_variable_mode modes); 3809 3810/** creates a function and adds it to the shader's list of functions */ 3811nir_function *nir_function_create(nir_shader *shader, const char *name); 3812 3813nir_function_impl *nir_function_impl_create(nir_function *func); 3814/** creates a function_impl that isn't tied to any particular function */ 3815nir_function_impl *nir_function_impl_create_bare(nir_shader *shader); 3816 3817nir_block *nir_block_create(nir_shader *shader); 3818nir_if *nir_if_create(nir_shader *shader); 3819nir_loop *nir_loop_create(nir_shader *shader); 3820 3821nir_function_impl *nir_cf_node_get_function(nir_cf_node *node); 3822 3823/** requests that the given pieces of metadata be generated */ 3824void nir_metadata_require(nir_function_impl *impl, nir_metadata required, ...); 3825/** dirties all but the preserved metadata */ 3826void nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved); 3827/** Preserves all metadata for the given shader */ 3828void nir_shader_preserve_all_metadata(nir_shader *shader); 3829 3830/** creates an instruction with default swizzle/writemask/etc. with NULL registers */ 3831nir_alu_instr *nir_alu_instr_create(nir_shader *shader, nir_op op); 3832 3833nir_deref_instr *nir_deref_instr_create(nir_shader *shader, 3834 nir_deref_type deref_type); 3835 3836nir_jump_instr *nir_jump_instr_create(nir_shader *shader, nir_jump_type type); 3837 3838nir_load_const_instr *nir_load_const_instr_create(nir_shader *shader, 3839 unsigned num_components, 3840 unsigned bit_size); 3841 3842nir_intrinsic_instr *nir_intrinsic_instr_create(nir_shader *shader, 3843 nir_intrinsic_op op); 3844 3845nir_call_instr *nir_call_instr_create(nir_shader *shader, 3846 nir_function *callee); 3847 3848/** Creates a NIR texture instruction */ 3849nir_tex_instr *nir_tex_instr_create(nir_shader *shader, unsigned num_srcs); 3850 3851nir_phi_instr *nir_phi_instr_create(nir_shader *shader); 3852nir_phi_src *nir_phi_instr_add_src(nir_phi_instr *instr, nir_block *pred, nir_src src); 3853 3854nir_parallel_copy_instr *nir_parallel_copy_instr_create(nir_shader *shader); 3855 3856nir_ssa_undef_instr *nir_ssa_undef_instr_create(nir_shader *shader, 3857 unsigned num_components, 3858 unsigned bit_size); 3859 3860nir_const_value nir_alu_binop_identity(nir_op binop, unsigned bit_size); 3861 3862/** 3863 * NIR Cursors and Instruction Insertion API 3864 * @{ 3865 * 3866 * A tiny struct representing a point to insert/extract instructions or 3867 * control flow nodes. Helps reduce the combinatorial explosion of possible 3868 * points to insert/extract. 3869 * 3870 * \sa nir_control_flow.h 3871 */ 3872typedef enum { 3873 nir_cursor_before_block, 3874 nir_cursor_after_block, 3875 nir_cursor_before_instr, 3876 nir_cursor_after_instr, 3877} nir_cursor_option; 3878 3879typedef struct { 3880 nir_cursor_option option; 3881 union { 3882 nir_block *block; 3883 nir_instr *instr; 3884 }; 3885} nir_cursor; 3886 3887static inline nir_block * 3888nir_cursor_current_block(nir_cursor cursor) 3889{ 3890 if (cursor.option == nir_cursor_before_instr || 3891 cursor.option == nir_cursor_after_instr) { 3892 return cursor.instr->block; 3893 } else { 3894 return cursor.block; 3895 } 3896} 3897 3898bool nir_cursors_equal(nir_cursor a, nir_cursor b); 3899 3900static inline nir_cursor 3901nir_before_block(nir_block *block) 3902{ 3903 nir_cursor cursor; 3904 cursor.option = nir_cursor_before_block; 3905 cursor.block = block; 3906 return cursor; 3907} 3908 3909static inline nir_cursor 3910nir_after_block(nir_block *block) 3911{ 3912 nir_cursor cursor; 3913 cursor.option = nir_cursor_after_block; 3914 cursor.block = block; 3915 return cursor; 3916} 3917 3918static inline nir_cursor 3919nir_before_instr(nir_instr *instr) 3920{ 3921 nir_cursor cursor; 3922 cursor.option = nir_cursor_before_instr; 3923 cursor.instr = instr; 3924 return cursor; 3925} 3926 3927static inline nir_cursor 3928nir_after_instr(nir_instr *instr) 3929{ 3930 nir_cursor cursor; 3931 cursor.option = nir_cursor_after_instr; 3932 cursor.instr = instr; 3933 return cursor; 3934} 3935 3936static inline nir_cursor 3937nir_before_block_after_phis(nir_block *block) 3938{ 3939 nir_phi_instr *last_phi = nir_block_last_phi_instr(block); 3940 if (last_phi) 3941 return nir_after_instr(&last_phi->instr); 3942 else 3943 return nir_before_block(block); 3944} 3945 3946static inline nir_cursor 3947nir_after_block_before_jump(nir_block *block) 3948{ 3949 nir_instr *last_instr = nir_block_last_instr(block); 3950 if (last_instr && last_instr->type == nir_instr_type_jump) { 3951 return nir_before_instr(last_instr); 3952 } else { 3953 return nir_after_block(block); 3954 } 3955} 3956 3957static inline nir_cursor 3958nir_before_src(nir_src *src, bool is_if_condition) 3959{ 3960 if (is_if_condition) { 3961 nir_block *prev_block = 3962 nir_cf_node_as_block(nir_cf_node_prev(&src->parent_if->cf_node)); 3963 assert(!nir_block_ends_in_jump(prev_block)); 3964 return nir_after_block(prev_block); 3965 } else if (src->parent_instr->type == nir_instr_type_phi) { 3966#ifndef NDEBUG 3967 nir_phi_instr *cond_phi = nir_instr_as_phi(src->parent_instr); 3968 bool found = false; 3969 nir_foreach_phi_src(phi_src, cond_phi) { 3970 if (phi_src->src.ssa == src->ssa) { 3971 found = true; 3972 break; 3973 } 3974 } 3975 assert(found); 3976#endif 3977 /* The list_entry() macro is a generic container-of macro, it just happens 3978 * to have a more specific name. 3979 */ 3980 nir_phi_src *phi_src = list_entry(src, nir_phi_src, src); 3981 return nir_after_block_before_jump(phi_src->pred); 3982 } else { 3983 return nir_before_instr(src->parent_instr); 3984 } 3985} 3986 3987static inline nir_cursor 3988nir_before_cf_node(nir_cf_node *node) 3989{ 3990 if (node->type == nir_cf_node_block) 3991 return nir_before_block(nir_cf_node_as_block(node)); 3992 3993 return nir_after_block(nir_cf_node_as_block(nir_cf_node_prev(node))); 3994} 3995 3996static inline nir_cursor 3997nir_after_cf_node(nir_cf_node *node) 3998{ 3999 if (node->type == nir_cf_node_block) 4000 return nir_after_block(nir_cf_node_as_block(node)); 4001 4002 return nir_before_block(nir_cf_node_as_block(nir_cf_node_next(node))); 4003} 4004 4005static inline nir_cursor 4006nir_after_phis(nir_block *block) 4007{ 4008 nir_foreach_instr(instr, block) { 4009 if (instr->type != nir_instr_type_phi) 4010 return nir_before_instr(instr); 4011 } 4012 return nir_after_block(block); 4013} 4014 4015static inline nir_cursor 4016nir_after_instr_and_phis(nir_instr *instr) 4017{ 4018 if (instr->type == nir_instr_type_phi) 4019 return nir_after_phis(instr->block); 4020 else 4021 return nir_after_instr(instr); 4022} 4023 4024static inline nir_cursor 4025nir_after_cf_node_and_phis(nir_cf_node *node) 4026{ 4027 if (node->type == nir_cf_node_block) 4028 return nir_after_block(nir_cf_node_as_block(node)); 4029 4030 nir_block *block = nir_cf_node_as_block(nir_cf_node_next(node)); 4031 4032 return nir_after_phis(block); 4033} 4034 4035static inline nir_cursor 4036nir_before_cf_list(struct exec_list *cf_list) 4037{ 4038 nir_cf_node *first_node = exec_node_data(nir_cf_node, 4039 exec_list_get_head(cf_list), node); 4040 return nir_before_cf_node(first_node); 4041} 4042 4043static inline nir_cursor 4044nir_after_cf_list(struct exec_list *cf_list) 4045{ 4046 nir_cf_node *last_node = exec_node_data(nir_cf_node, 4047 exec_list_get_tail(cf_list), node); 4048 return nir_after_cf_node(last_node); 4049} 4050 4051/** 4052 * Insert a NIR instruction at the given cursor. 4053 * 4054 * Note: This does not update the cursor. 4055 */ 4056void nir_instr_insert(nir_cursor cursor, nir_instr *instr); 4057 4058bool nir_instr_move(nir_cursor cursor, nir_instr *instr); 4059 4060static inline void 4061nir_instr_insert_before(nir_instr *instr, nir_instr *before) 4062{ 4063 nir_instr_insert(nir_before_instr(instr), before); 4064} 4065 4066static inline void 4067nir_instr_insert_after(nir_instr *instr, nir_instr *after) 4068{ 4069 nir_instr_insert(nir_after_instr(instr), after); 4070} 4071 4072static inline void 4073nir_instr_insert_before_block(nir_block *block, nir_instr *before) 4074{ 4075 nir_instr_insert(nir_before_block(block), before); 4076} 4077 4078static inline void 4079nir_instr_insert_after_block(nir_block *block, nir_instr *after) 4080{ 4081 nir_instr_insert(nir_after_block(block), after); 4082} 4083 4084static inline void 4085nir_instr_insert_before_cf(nir_cf_node *node, nir_instr *before) 4086{ 4087 nir_instr_insert(nir_before_cf_node(node), before); 4088} 4089 4090static inline void 4091nir_instr_insert_after_cf(nir_cf_node *node, nir_instr *after) 4092{ 4093 nir_instr_insert(nir_after_cf_node(node), after); 4094} 4095 4096static inline void 4097nir_instr_insert_before_cf_list(struct exec_list *list, nir_instr *before) 4098{ 4099 nir_instr_insert(nir_before_cf_list(list), before); 4100} 4101 4102static inline void 4103nir_instr_insert_after_cf_list(struct exec_list *list, nir_instr *after) 4104{ 4105 nir_instr_insert(nir_after_cf_list(list), after); 4106} 4107 4108void nir_instr_remove_v(nir_instr *instr); 4109void nir_instr_free(nir_instr *instr); 4110void nir_instr_free_list(struct exec_list *list); 4111 4112static inline nir_cursor 4113nir_instr_remove(nir_instr *instr) 4114{ 4115 nir_cursor cursor; 4116 nir_instr *prev = nir_instr_prev(instr); 4117 if (prev) { 4118 cursor = nir_after_instr(prev); 4119 } else { 4120 cursor = nir_before_block(instr->block); 4121 } 4122 nir_instr_remove_v(instr); 4123 return cursor; 4124} 4125 4126nir_cursor nir_instr_free_and_dce(nir_instr *instr); 4127 4128/** @} */ 4129 4130nir_ssa_def *nir_instr_ssa_def(nir_instr *instr); 4131bool nir_instr_def_is_register(nir_instr *instr); 4132 4133typedef bool (*nir_foreach_ssa_def_cb)(nir_ssa_def *def, void *state); 4134typedef bool (*nir_foreach_dest_cb)(nir_dest *dest, void *state); 4135typedef bool (*nir_foreach_src_cb)(nir_src *src, void *state); 4136bool nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, 4137 void *state); 4138static inline bool nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state); 4139static inline bool nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state); 4140bool nir_foreach_phi_src_leaving_block(nir_block *instr, 4141 nir_foreach_src_cb cb, 4142 void *state); 4143 4144nir_const_value *nir_src_as_const_value(nir_src src); 4145 4146#define NIR_SRC_AS_(name, c_type, type_enum, cast_macro) \ 4147static inline c_type * \ 4148nir_src_as_ ## name (nir_src src) \ 4149{ \ 4150 return src.is_ssa && src.ssa->parent_instr->type == type_enum \ 4151 ? cast_macro(src.ssa->parent_instr) : NULL; \ 4152} 4153 4154NIR_SRC_AS_(alu_instr, nir_alu_instr, nir_instr_type_alu, nir_instr_as_alu) 4155NIR_SRC_AS_(intrinsic, nir_intrinsic_instr, 4156 nir_instr_type_intrinsic, nir_instr_as_intrinsic) 4157NIR_SRC_AS_(deref, nir_deref_instr, nir_instr_type_deref, nir_instr_as_deref) 4158 4159bool nir_src_is_always_uniform(nir_src src); 4160bool nir_srcs_equal(nir_src src1, nir_src src2); 4161bool nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2); 4162 4163static inline void 4164nir_instr_rewrite_src_ssa(ASSERTED nir_instr *instr, 4165 nir_src *src, nir_ssa_def *new_ssa) 4166{ 4167 assert(src->parent_instr == instr); 4168 assert(src->is_ssa && src->ssa); 4169 list_del(&src->use_link); 4170 src->ssa = new_ssa; 4171 list_addtail(&src->use_link, &new_ssa->uses); 4172} 4173 4174void nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src); 4175void nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src); 4176 4177static inline void 4178nir_if_rewrite_condition_ssa(ASSERTED nir_if *if_stmt, 4179 nir_src *src, nir_ssa_def *new_ssa) 4180{ 4181 assert(src->parent_if == if_stmt); 4182 assert(src->is_ssa && src->ssa); 4183 list_del(&src->use_link); 4184 src->ssa = new_ssa; 4185 list_addtail(&src->use_link, &new_ssa->if_uses); 4186} 4187 4188void nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src); 4189void nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest, 4190 nir_dest new_dest); 4191 4192void nir_ssa_dest_init(nir_instr *instr, nir_dest *dest, 4193 unsigned num_components, unsigned bit_size, 4194 const char *name); 4195void nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def, 4196 unsigned num_components, unsigned bit_size); 4197static inline void 4198nir_ssa_dest_init_for_type(nir_instr *instr, nir_dest *dest, 4199 const struct glsl_type *type, 4200 const char *name) 4201{ 4202 assert(glsl_type_is_vector_or_scalar(type)); 4203 nir_ssa_dest_init(instr, dest, glsl_get_components(type), 4204 glsl_get_bit_size(type), name); 4205} 4206void nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_ssa_def *new_ssa); 4207void nir_ssa_def_rewrite_uses_src(nir_ssa_def *def, nir_src new_src); 4208void nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_ssa_def *new_ssa, 4209 nir_instr *after_me); 4210 4211nir_component_mask_t nir_src_components_read(const nir_src *src); 4212nir_component_mask_t nir_ssa_def_components_read(const nir_ssa_def *def); 4213 4214static inline bool 4215nir_ssa_def_is_unused(nir_ssa_def *ssa) 4216{ 4217 return list_is_empty(&ssa->uses) && list_is_empty(&ssa->if_uses); 4218} 4219 4220 4221/** Returns the next block, disregarding structure 4222 * 4223 * The ordering is deterministic but has no guarantees beyond that. In 4224 * particular, it is not guaranteed to be dominance-preserving. 4225 */ 4226nir_block *nir_block_unstructured_next(nir_block *block); 4227nir_block *nir_unstructured_start_block(nir_function_impl *impl); 4228 4229#define nir_foreach_block_unstructured(block, impl) \ 4230 for (nir_block *block = nir_unstructured_start_block(impl); block != NULL; \ 4231 block = nir_block_unstructured_next(block)) 4232 4233#define nir_foreach_block_unstructured_safe(block, impl) \ 4234 for (nir_block *block = nir_unstructured_start_block(impl), \ 4235 *next = nir_block_unstructured_next(block); \ 4236 block != NULL; \ 4237 block = next, next = nir_block_unstructured_next(block)) 4238 4239/* 4240 * finds the next basic block in source-code order, returns NULL if there is 4241 * none 4242 */ 4243 4244nir_block *nir_block_cf_tree_next(nir_block *block); 4245 4246/* Performs the opposite of nir_block_cf_tree_next() */ 4247 4248nir_block *nir_block_cf_tree_prev(nir_block *block); 4249 4250/* Gets the first block in a CF node in source-code order */ 4251 4252nir_block *nir_cf_node_cf_tree_first(nir_cf_node *node); 4253 4254/* Gets the last block in a CF node in source-code order */ 4255 4256nir_block *nir_cf_node_cf_tree_last(nir_cf_node *node); 4257 4258/* Gets the next block after a CF node in source-code order */ 4259 4260nir_block *nir_cf_node_cf_tree_next(nir_cf_node *node); 4261 4262/* Macros for loops that visit blocks in source-code order */ 4263 4264#define nir_foreach_block(block, impl) \ 4265 for (nir_block *block = nir_start_block(impl); block != NULL; \ 4266 block = nir_block_cf_tree_next(block)) 4267 4268#define nir_foreach_block_safe(block, impl) \ 4269 for (nir_block *block = nir_start_block(impl), \ 4270 *next = nir_block_cf_tree_next(block); \ 4271 block != NULL; \ 4272 block = next, next = nir_block_cf_tree_next(block)) 4273 4274#define nir_foreach_block_reverse(block, impl) \ 4275 for (nir_block *block = nir_impl_last_block(impl); block != NULL; \ 4276 block = nir_block_cf_tree_prev(block)) 4277 4278#define nir_foreach_block_reverse_safe(block, impl) \ 4279 for (nir_block *block = nir_impl_last_block(impl), \ 4280 *prev = nir_block_cf_tree_prev(block); \ 4281 block != NULL; \ 4282 block = prev, prev = nir_block_cf_tree_prev(block)) 4283 4284#define nir_foreach_block_in_cf_node(block, node) \ 4285 for (nir_block *block = nir_cf_node_cf_tree_first(node); \ 4286 block != nir_cf_node_cf_tree_next(node); \ 4287 block = nir_block_cf_tree_next(block)) 4288 4289/* If the following CF node is an if, this function returns that if. 4290 * Otherwise, it returns NULL. 4291 */ 4292nir_if *nir_block_get_following_if(nir_block *block); 4293 4294nir_loop *nir_block_get_following_loop(nir_block *block); 4295 4296nir_block **nir_block_get_predecessors_sorted(const nir_block *block, void *mem_ctx); 4297 4298void nir_index_local_regs(nir_function_impl *impl); 4299void nir_index_ssa_defs(nir_function_impl *impl); 4300unsigned nir_index_instrs(nir_function_impl *impl); 4301 4302void nir_index_blocks(nir_function_impl *impl); 4303 4304unsigned nir_shader_index_vars(nir_shader *shader, nir_variable_mode modes); 4305unsigned nir_function_impl_index_vars(nir_function_impl *impl); 4306 4307void nir_print_shader(nir_shader *shader, FILE *fp); 4308void nir_print_shader_annotated(nir_shader *shader, FILE *fp, struct hash_table *errors); 4309void nir_print_instr(const nir_instr *instr, FILE *fp); 4310void nir_print_deref(const nir_deref_instr *deref, FILE *fp); 4311void nir_log_shader_annotated_tagged(enum mesa_log_level level, const char *tag, nir_shader *shader, struct hash_table *annotations); 4312#define nir_log_shadere(s) nir_log_shader_annotated_tagged(MESA_LOG_ERROR, (MESA_LOG_TAG), (s), NULL) 4313#define nir_log_shaderw(s) nir_log_shader_annotated_tagged(MESA_LOG_WARN, (MESA_LOG_TAG), (s), NULL) 4314#define nir_log_shaderi(s) nir_log_shader_annotated_tagged(MESA_LOG_INFO, (MESA_LOG_TAG), (s), NULL) 4315#define nir_log_shader_annotated(s, annotations) nir_log_shader_annotated_tagged(MESA_LOG_ERROR, (MESA_LOG_TAG), (s), annotations) 4316 4317char *nir_shader_as_str(nir_shader *nir, void *mem_ctx); 4318char *nir_shader_as_str_annotated(nir_shader *nir, struct hash_table *annotations, void *mem_ctx); 4319 4320/** Shallow clone of a single instruction. */ 4321nir_instr *nir_instr_clone(nir_shader *s, const nir_instr *orig); 4322 4323/** Clone a single instruction, including a remap table to rewrite sources. */ 4324nir_instr *nir_instr_clone_deep(nir_shader *s, const nir_instr *orig, 4325 struct hash_table *remap_table); 4326 4327/** Shallow clone of a single ALU instruction. */ 4328nir_alu_instr *nir_alu_instr_clone(nir_shader *s, const nir_alu_instr *orig); 4329 4330nir_shader *nir_shader_clone(void *mem_ctx, const nir_shader *s); 4331nir_function_impl *nir_function_impl_clone(nir_shader *shader, 4332 const nir_function_impl *fi); 4333nir_constant *nir_constant_clone(const nir_constant *c, nir_variable *var); 4334nir_variable *nir_variable_clone(const nir_variable *c, nir_shader *shader); 4335 4336void nir_shader_replace(nir_shader *dest, nir_shader *src); 4337 4338void nir_shader_serialize_deserialize(nir_shader *s); 4339 4340#ifndef NDEBUG 4341void nir_validate_shader(nir_shader *shader, const char *when); 4342void nir_validate_ssa_dominance(nir_shader *shader, const char *when); 4343void nir_metadata_set_validation_flag(nir_shader *shader); 4344void nir_metadata_check_validation_flag(nir_shader *shader); 4345 4346static inline bool 4347should_skip_nir(const char *name) 4348{ 4349 static const char *list = NULL; 4350 if (!list) { 4351 /* Comma separated list of names to skip. */ 4352 list = getenv("NIR_SKIP"); 4353 if (!list) 4354 list = ""; 4355 } 4356 4357 if (!list[0]) 4358 return false; 4359 4360 return comma_separated_list_contains(list, name); 4361} 4362 4363static inline bool 4364should_print_nir(nir_shader *shader) 4365{ 4366 if (shader->info.internal || 4367 shader->info.stage < 0 || 4368 shader->info.stage > MESA_SHADER_KERNEL) 4369 return false; 4370 4371 return unlikely(nir_debug_print_shader[shader->info.stage]); 4372} 4373#else 4374static inline void nir_validate_shader(nir_shader *shader, const char *when) { (void) shader; (void)when; } 4375static inline void nir_validate_ssa_dominance(nir_shader *shader, const char *when) { (void) shader; (void)when; } 4376static inline void nir_metadata_set_validation_flag(nir_shader *shader) { (void) shader; } 4377static inline void nir_metadata_check_validation_flag(nir_shader *shader) { (void) shader; } 4378static inline bool should_skip_nir(UNUSED const char *pass_name) { return false; } 4379static inline bool should_print_nir(UNUSED nir_shader *shader) { return false; } 4380#endif /* NDEBUG */ 4381 4382#define _PASS(pass, nir, do_pass) do { \ 4383 if (should_skip_nir(#pass)) { \ 4384 printf("skipping %s\n", #pass); \ 4385 break; \ 4386 } \ 4387 do_pass \ 4388 if (NIR_DEBUG(CLONE)) { \ 4389 nir_shader *clone = nir_shader_clone(ralloc_parent(nir), nir); \ 4390 nir_shader_replace(nir, clone); \ 4391 } \ 4392 if (NIR_DEBUG(SERIALIZE)) { \ 4393 nir_shader_serialize_deserialize(nir); \ 4394 } \ 4395} while (0) 4396 4397#define NIR_PASS(progress, nir, pass, ...) _PASS(pass, nir, \ 4398 nir_metadata_set_validation_flag(nir); \ 4399 if (should_print_nir(nir)) \ 4400 printf("%s\n", #pass); \ 4401 if (pass(nir, ##__VA_ARGS__)) { \ 4402 nir_validate_shader(nir, "after " #pass " in " __FILE__); \ 4403 UNUSED bool _; \ 4404 progress = true; \ 4405 if (should_print_nir(nir)) \ 4406 nir_print_shader(nir, stdout); \ 4407 nir_metadata_check_validation_flag(nir); \ 4408 } \ 4409) 4410 4411#define NIR_PASS_V(nir, pass, ...) _PASS(pass, nir, \ 4412 if (should_print_nir(nir)) \ 4413 printf("%s\n", #pass); \ 4414 pass(nir, ##__VA_ARGS__); \ 4415 nir_validate_shader(nir, "after " #pass " in " __FILE__); \ 4416 if (should_print_nir(nir)) \ 4417 nir_print_shader(nir, stdout); \ 4418) 4419 4420#define NIR_SKIP(name) should_skip_nir(#name) 4421 4422/** An instruction filtering callback with writemask 4423 * 4424 * Returns true if the instruction should be processed with the associated 4425 * writemask and false otherwise. 4426 */ 4427typedef bool (*nir_instr_writemask_filter_cb)(const nir_instr *, 4428 unsigned writemask, const void *); 4429 4430/** A simple instruction lowering callback 4431 * 4432 * Many instruction lowering passes can be written as a simple function which 4433 * takes an instruction as its input and returns a sequence of instructions 4434 * that implement the consumed instruction. This function type represents 4435 * such a lowering function. When called, a function with this prototype 4436 * should either return NULL indicating that no lowering needs to be done or 4437 * emit a sequence of instructions using the provided builder (whose cursor 4438 * will already be placed after the instruction to be lowered) and return the 4439 * resulting nir_ssa_def. 4440 */ 4441typedef nir_ssa_def *(*nir_lower_instr_cb)(struct nir_builder *, 4442 nir_instr *, void *); 4443 4444/** 4445 * Special return value for nir_lower_instr_cb when some progress occurred 4446 * (like changing an input to the instr) that didn't result in a replacement 4447 * SSA def being generated. 4448 */ 4449#define NIR_LOWER_INSTR_PROGRESS ((nir_ssa_def *)(uintptr_t)1) 4450 4451/** 4452 * Special return value for nir_lower_instr_cb when some progress occurred 4453 * that should remove the current instruction that doesn't create an output 4454 * (like a store) 4455 */ 4456 4457#define NIR_LOWER_INSTR_PROGRESS_REPLACE ((nir_ssa_def *)(uintptr_t)2) 4458 4459/** Iterate over all the instructions in a nir_function_impl and lower them 4460 * using the provided callbacks 4461 * 4462 * This function implements the guts of a standard lowering pass for you. It 4463 * iterates over all of the instructions in a nir_function_impl and calls the 4464 * filter callback on each one. If the filter callback returns true, it then 4465 * calls the lowering call back on the instruction. (Splitting it this way 4466 * allows us to avoid some save/restore work for instructions we know won't be 4467 * lowered.) If the instruction is dead after the lowering is complete, it 4468 * will be removed. If new instructions are added, the lowering callback will 4469 * also be called on them in case multiple lowerings are required. 4470 * 4471 * If the callback indicates that the original instruction is replaced (either 4472 * through a new SSA def or NIR_LOWER_INSTR_PROGRESS_REPLACE), then the 4473 * instruction is removed along with any now-dead SSA defs it used. 4474 * 4475 * The metadata for the nir_function_impl will also be updated. If any blocks 4476 * are added (they cannot be removed), dominance and block indices will be 4477 * invalidated. 4478 */ 4479bool nir_function_impl_lower_instructions(nir_function_impl *impl, 4480 nir_instr_filter_cb filter, 4481 nir_lower_instr_cb lower, 4482 void *cb_data); 4483bool nir_shader_lower_instructions(nir_shader *shader, 4484 nir_instr_filter_cb filter, 4485 nir_lower_instr_cb lower, 4486 void *cb_data); 4487 4488void nir_calc_dominance_impl(nir_function_impl *impl); 4489void nir_calc_dominance(nir_shader *shader); 4490 4491nir_block *nir_dominance_lca(nir_block *b1, nir_block *b2); 4492bool nir_block_dominates(nir_block *parent, nir_block *child); 4493bool nir_block_is_unreachable(nir_block *block); 4494 4495void nir_dump_dom_tree_impl(nir_function_impl *impl, FILE *fp); 4496void nir_dump_dom_tree(nir_shader *shader, FILE *fp); 4497 4498void nir_dump_dom_frontier_impl(nir_function_impl *impl, FILE *fp); 4499void nir_dump_dom_frontier(nir_shader *shader, FILE *fp); 4500 4501void nir_dump_cfg_impl(nir_function_impl *impl, FILE *fp); 4502void nir_dump_cfg(nir_shader *shader, FILE *fp); 4503 4504void nir_gs_count_vertices_and_primitives(const nir_shader *shader, 4505 int *out_vtxcnt, 4506 int *out_prmcnt, 4507 unsigned num_streams); 4508 4509typedef enum { 4510 nir_group_all, 4511 nir_group_same_resource_only, 4512} nir_load_grouping; 4513 4514void nir_group_loads(nir_shader *shader, nir_load_grouping grouping, 4515 unsigned max_distance); 4516 4517bool nir_shrink_vec_array_vars(nir_shader *shader, nir_variable_mode modes); 4518bool nir_split_array_vars(nir_shader *shader, nir_variable_mode modes); 4519bool nir_split_var_copies(nir_shader *shader); 4520bool nir_split_per_member_structs(nir_shader *shader); 4521bool nir_split_struct_vars(nir_shader *shader, nir_variable_mode modes); 4522 4523bool nir_lower_returns_impl(nir_function_impl *impl); 4524bool nir_lower_returns(nir_shader *shader); 4525 4526void nir_inline_function_impl(struct nir_builder *b, 4527 const nir_function_impl *impl, 4528 nir_ssa_def **params, 4529 struct hash_table *shader_var_remap); 4530bool nir_inline_functions(nir_shader *shader); 4531 4532void nir_find_inlinable_uniforms(nir_shader *shader); 4533void nir_inline_uniforms(nir_shader *shader, unsigned num_uniforms, 4534 const uint32_t *uniform_values, 4535 const uint16_t *uniform_dw_offsets); 4536 4537bool nir_propagate_invariant(nir_shader *shader, bool invariant_prim); 4538 4539void nir_lower_var_copy_instr(nir_intrinsic_instr *copy, nir_shader *shader); 4540void nir_lower_deref_copy_instr(struct nir_builder *b, 4541 nir_intrinsic_instr *copy); 4542bool nir_lower_var_copies(nir_shader *shader); 4543 4544bool nir_opt_memcpy(nir_shader *shader); 4545bool nir_lower_memcpy(nir_shader *shader); 4546 4547void nir_fixup_deref_modes(nir_shader *shader); 4548 4549bool nir_lower_global_vars_to_local(nir_shader *shader); 4550 4551typedef enum { 4552 nir_lower_direct_array_deref_of_vec_load = (1 << 0), 4553 nir_lower_indirect_array_deref_of_vec_load = (1 << 1), 4554 nir_lower_direct_array_deref_of_vec_store = (1 << 2), 4555 nir_lower_indirect_array_deref_of_vec_store = (1 << 3), 4556} nir_lower_array_deref_of_vec_options; 4557 4558bool nir_lower_array_deref_of_vec(nir_shader *shader, nir_variable_mode modes, 4559 nir_lower_array_deref_of_vec_options options); 4560 4561bool nir_lower_indirect_derefs(nir_shader *shader, nir_variable_mode modes, 4562 uint32_t max_lower_array_len); 4563 4564bool nir_lower_indirect_var_derefs(nir_shader *shader, 4565 const struct set *vars); 4566 4567bool nir_lower_locals_to_regs(nir_shader *shader); 4568 4569void nir_lower_io_to_temporaries(nir_shader *shader, 4570 nir_function_impl *entrypoint, 4571 bool outputs, bool inputs); 4572 4573bool nir_lower_vars_to_scratch(nir_shader *shader, 4574 nir_variable_mode modes, 4575 int size_threshold, 4576 glsl_type_size_align_func size_align); 4577 4578void nir_lower_clip_halfz(nir_shader *shader); 4579 4580void nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint); 4581 4582void nir_gather_ssa_types(nir_function_impl *impl, 4583 BITSET_WORD *float_types, 4584 BITSET_WORD *int_types); 4585 4586void nir_assign_var_locations(nir_shader *shader, nir_variable_mode mode, 4587 unsigned *size, 4588 int (*type_size)(const struct glsl_type *, bool)); 4589 4590/* Some helpers to do very simple linking */ 4591bool nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer); 4592bool nir_remove_unused_io_vars(nir_shader *shader, nir_variable_mode mode, 4593 uint64_t *used_by_other_stage, 4594 uint64_t *used_by_other_stage_patches); 4595void nir_compact_varyings(nir_shader *producer, nir_shader *consumer, 4596 bool default_to_smooth_interp); 4597void nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer); 4598bool nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer); 4599void nir_link_varying_precision(nir_shader *producer, nir_shader *consumer); 4600 4601bool nir_slot_is_sysval_output(gl_varying_slot slot); 4602bool nir_slot_is_varying(gl_varying_slot slot); 4603bool nir_slot_is_sysval_output_and_varying(gl_varying_slot slot); 4604void nir_remove_varying(nir_intrinsic_instr *intr); 4605void nir_remove_sysval_output(nir_intrinsic_instr *intr); 4606 4607bool nir_lower_amul(nir_shader *shader, 4608 int (*type_size)(const struct glsl_type *, bool)); 4609 4610bool nir_lower_ubo_vec4(nir_shader *shader); 4611 4612void nir_assign_io_var_locations(nir_shader *shader, 4613 nir_variable_mode mode, 4614 unsigned *size, 4615 gl_shader_stage stage); 4616 4617typedef struct { 4618 uint8_t num_linked_io_vars; 4619 uint8_t num_linked_patch_io_vars; 4620} nir_linked_io_var_info; 4621 4622nir_linked_io_var_info 4623nir_assign_linked_io_var_locations(nir_shader *producer, 4624 nir_shader *consumer); 4625 4626typedef enum { 4627 /* If set, this causes all 64-bit IO operations to be lowered on-the-fly 4628 * to 32-bit operations. This is only valid for nir_var_shader_in/out 4629 * modes. 4630 */ 4631 nir_lower_io_lower_64bit_to_32 = (1 << 0), 4632 4633 /* If set, this forces all non-flat fragment shader inputs to be 4634 * interpolated as if with the "sample" qualifier. This requires 4635 * nir_shader_compiler_options::use_interpolated_input_intrinsics. 4636 */ 4637 nir_lower_io_force_sample_interpolation = (1 << 1), 4638} nir_lower_io_options; 4639bool nir_lower_io(nir_shader *shader, 4640 nir_variable_mode modes, 4641 int (*type_size)(const struct glsl_type *, bool), 4642 nir_lower_io_options); 4643 4644bool nir_io_add_const_offset_to_base(nir_shader *nir, nir_variable_mode modes); 4645 4646void 4647nir_lower_io_passes(nir_shader *nir); 4648 4649bool nir_io_add_intrinsic_xfb_info(nir_shader *nir); 4650 4651bool 4652nir_lower_vars_to_explicit_types(nir_shader *shader, 4653 nir_variable_mode modes, 4654 glsl_type_size_align_func type_info); 4655void 4656nir_gather_explicit_io_initializers(nir_shader *shader, 4657 void *dst, size_t dst_size, 4658 nir_variable_mode mode); 4659 4660bool nir_lower_vec3_to_vec4(nir_shader *shader, nir_variable_mode modes); 4661 4662typedef enum { 4663 /** 4664 * An address format which is a simple 32-bit global GPU address. 4665 */ 4666 nir_address_format_32bit_global, 4667 4668 /** 4669 * An address format which is a simple 64-bit global GPU address. 4670 */ 4671 nir_address_format_64bit_global, 4672 4673 /** 4674 * An address format which is a 64-bit global GPU address encoded as a 4675 * 2x32-bit vector. 4676 */ 4677 nir_address_format_2x32bit_global, 4678 4679 /** 4680 * An address format which is a 64-bit global base address and a 32-bit 4681 * offset. 4682 * 4683 * The address is comprised as a 32-bit vec4 where .xy are a uint64_t base 4684 * address stored with the low bits in .x and high bits in .y, .z is 4685 * undefined, and .w is an offset. This is intended to match 4686 * 64bit_bounded_global but without the bounds checking. 4687 */ 4688 nir_address_format_64bit_global_32bit_offset, 4689 4690 /** 4691 * An address format which is a bounds-checked 64-bit global GPU address. 4692 * 4693 * The address is comprised as a 32-bit vec4 where .xy are a uint64_t base 4694 * address stored with the low bits in .x and high bits in .y, .z is a 4695 * size, and .w is an offset. When the final I/O operation is lowered, .w 4696 * is checked against .z and the operation is predicated on the result. 4697 */ 4698 nir_address_format_64bit_bounded_global, 4699 4700 /** 4701 * An address format which is comprised of a vec2 where the first 4702 * component is a buffer index and the second is an offset. 4703 */ 4704 nir_address_format_32bit_index_offset, 4705 4706 /** 4707 * An address format which is a 64-bit value, where the high 32 bits 4708 * are a buffer index, and the low 32 bits are an offset. 4709 */ 4710 nir_address_format_32bit_index_offset_pack64, 4711 4712 /** 4713 * An address format which is comprised of a vec3 where the first two 4714 * components specify the buffer and the third is an offset. 4715 */ 4716 nir_address_format_vec2_index_32bit_offset, 4717 4718 /** 4719 * An address format which represents generic pointers with a 62-bit 4720 * pointer and a 2-bit enum in the top two bits. The top two bits have 4721 * the following meanings: 4722 * 4723 * - 0x0: Global memory 4724 * - 0x1: Shared memory 4725 * - 0x2: Scratch memory 4726 * - 0x3: Global memory 4727 * 4728 * The redundancy between 0x0 and 0x3 is because of Intel sign-extension of 4729 * addresses. Valid global memory addresses may naturally have either 0 or 4730 * ~0 as their high bits. 4731 * 4732 * Shared and scratch pointers are represented as 32-bit offsets with the 4733 * top 32 bits only being used for the enum. This allows us to avoid 4734 * 64-bit address calculations in a bunch of cases. 4735 */ 4736 nir_address_format_62bit_generic, 4737 4738 /** 4739 * An address format which is a simple 32-bit offset. 4740 */ 4741 nir_address_format_32bit_offset, 4742 4743 /** 4744 * An address format which is a simple 32-bit offset cast to 64-bit. 4745 */ 4746 nir_address_format_32bit_offset_as_64bit, 4747 4748 /** 4749 * An address format representing a purely logical addressing model. In 4750 * this model, all deref chains must be complete from the dereference 4751 * operation to the variable. Cast derefs are not allowed. These 4752 * addresses will be 32-bit scalars but the format is immaterial because 4753 * you can always chase the chain. 4754 */ 4755 nir_address_format_logical, 4756} nir_address_format; 4757 4758unsigned 4759nir_address_format_bit_size(nir_address_format addr_format); 4760 4761unsigned 4762nir_address_format_num_components(nir_address_format addr_format); 4763 4764static inline const struct glsl_type * 4765nir_address_format_to_glsl_type(nir_address_format addr_format) 4766{ 4767 unsigned bit_size = nir_address_format_bit_size(addr_format); 4768 assert(bit_size == 32 || bit_size == 64); 4769 return glsl_vector_type(bit_size == 32 ? GLSL_TYPE_UINT : GLSL_TYPE_UINT64, 4770 nir_address_format_num_components(addr_format)); 4771} 4772 4773const nir_const_value *nir_address_format_null_value(nir_address_format addr_format); 4774 4775nir_ssa_def *nir_build_addr_ieq(struct nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1, 4776 nir_address_format addr_format); 4777 4778nir_ssa_def *nir_build_addr_isub(struct nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1, 4779 nir_address_format addr_format); 4780 4781nir_ssa_def * nir_explicit_io_address_from_deref(struct nir_builder *b, 4782 nir_deref_instr *deref, 4783 nir_ssa_def *base_addr, 4784 nir_address_format addr_format); 4785 4786bool nir_get_explicit_deref_align(nir_deref_instr *deref, 4787 bool default_to_type_align, 4788 uint32_t *align_mul, 4789 uint32_t *align_offset); 4790 4791void nir_lower_explicit_io_instr(struct nir_builder *b, 4792 nir_intrinsic_instr *io_instr, 4793 nir_ssa_def *addr, 4794 nir_address_format addr_format); 4795 4796bool nir_lower_explicit_io(nir_shader *shader, 4797 nir_variable_mode modes, 4798 nir_address_format); 4799 4800bool 4801nir_lower_shader_calls(nir_shader *shader, 4802 nir_address_format address_format, 4803 unsigned stack_alignment, 4804 nir_shader ***resume_shaders_out, 4805 uint32_t *num_resume_shaders_out, 4806 void *mem_ctx); 4807 4808nir_src *nir_get_io_offset_src(nir_intrinsic_instr *instr); 4809nir_src *nir_get_io_arrayed_index_src(nir_intrinsic_instr *instr); 4810nir_src *nir_get_shader_call_payload_src(nir_intrinsic_instr *call); 4811 4812bool nir_is_arrayed_io(const nir_variable *var, gl_shader_stage stage); 4813 4814bool nir_lower_regs_to_ssa_impl(nir_function_impl *impl); 4815bool nir_lower_regs_to_ssa(nir_shader *shader); 4816bool nir_lower_vars_to_ssa(nir_shader *shader); 4817 4818bool nir_remove_dead_derefs(nir_shader *shader); 4819bool nir_remove_dead_derefs_impl(nir_function_impl *impl); 4820 4821typedef struct nir_remove_dead_variables_options { 4822 bool (*can_remove_var)(nir_variable *var, void *data); 4823 void *can_remove_var_data; 4824} nir_remove_dead_variables_options; 4825 4826bool nir_remove_dead_variables(nir_shader *shader, nir_variable_mode modes, 4827 const nir_remove_dead_variables_options *options); 4828 4829bool nir_lower_variable_initializers(nir_shader *shader, 4830 nir_variable_mode modes); 4831bool nir_zero_initialize_shared_memory(nir_shader *shader, 4832 const unsigned shared_size, 4833 const unsigned chunk_size); 4834 4835bool nir_move_vec_src_uses_to_dest(nir_shader *shader); 4836bool nir_lower_vec_to_movs(nir_shader *shader, nir_instr_writemask_filter_cb cb, 4837 const void *_data); 4838void nir_lower_alpha_test(nir_shader *shader, enum compare_func func, 4839 bool alpha_to_one, 4840 const gl_state_index16 *alpha_ref_state_tokens); 4841bool nir_lower_alu(nir_shader *shader); 4842 4843bool nir_lower_flrp(nir_shader *shader, unsigned lowering_mask, 4844 bool always_precise); 4845 4846bool nir_scale_fdiv(nir_shader *shader); 4847 4848bool nir_lower_alu_to_scalar(nir_shader *shader, nir_instr_filter_cb cb, const void *data); 4849bool nir_lower_alu_width(nir_shader *shader, nir_vectorize_cb cb, const void *data); 4850bool nir_lower_bool_to_bitsize(nir_shader *shader); 4851bool nir_lower_bool_to_float(nir_shader *shader); 4852bool nir_lower_bool_to_int32(nir_shader *shader); 4853bool nir_opt_simplify_convert_alu_types(nir_shader *shader); 4854bool nir_lower_const_arrays_to_uniforms(nir_shader *shader, 4855 unsigned max_uniform_components); 4856bool nir_lower_convert_alu_types(nir_shader *shader, 4857 bool (*should_lower)(nir_intrinsic_instr *)); 4858bool nir_lower_constant_convert_alu_types(nir_shader *shader); 4859bool nir_lower_alu_conversion_to_intrinsic(nir_shader *shader); 4860bool nir_lower_int_to_float(nir_shader *shader); 4861bool nir_lower_load_const_to_scalar(nir_shader *shader); 4862bool nir_lower_read_invocation_to_scalar(nir_shader *shader); 4863bool nir_lower_phis_to_scalar(nir_shader *shader, bool lower_all); 4864void nir_lower_io_arrays_to_elements(nir_shader *producer, nir_shader *consumer); 4865void nir_lower_io_arrays_to_elements_no_indirects(nir_shader *shader, 4866 bool outputs_only); 4867void nir_lower_io_to_scalar(nir_shader *shader, nir_variable_mode mask); 4868bool nir_lower_io_to_scalar_early(nir_shader *shader, nir_variable_mode mask); 4869bool nir_lower_io_to_vector(nir_shader *shader, nir_variable_mode mask); 4870bool nir_vectorize_tess_levels(nir_shader *shader); 4871 4872bool nir_lower_fragcolor(nir_shader *shader, unsigned max_cbufs); 4873bool nir_lower_fragcoord_wtrans(nir_shader *shader); 4874void nir_lower_viewport_transform(nir_shader *shader); 4875bool nir_lower_uniforms_to_ubo(nir_shader *shader, bool dword_packed, bool load_vec4); 4876 4877bool nir_lower_is_helper_invocation(nir_shader *shader); 4878 4879bool nir_lower_single_sampled(nir_shader *shader); 4880 4881typedef struct nir_lower_subgroups_options { 4882 uint8_t subgroup_size; 4883 uint8_t ballot_bit_size; 4884 uint8_t ballot_components; 4885 bool lower_to_scalar:1; 4886 bool lower_vote_trivial:1; 4887 bool lower_vote_eq:1; 4888 bool lower_subgroup_masks:1; 4889 bool lower_relative_shuffle:1; 4890 bool lower_shuffle_to_32bit:1; 4891 bool lower_shuffle_to_swizzle_amd:1; 4892 bool lower_shuffle:1; 4893 bool lower_quad:1; 4894 bool lower_quad_broadcast_dynamic:1; 4895 bool lower_quad_broadcast_dynamic_to_const:1; 4896 bool lower_elect:1; 4897 bool lower_read_invocation_to_cond:1; 4898} nir_lower_subgroups_options; 4899 4900bool nir_lower_subgroups(nir_shader *shader, 4901 const nir_lower_subgroups_options *options); 4902 4903bool nir_lower_system_values(nir_shader *shader); 4904 4905typedef struct nir_lower_compute_system_values_options { 4906 bool has_base_global_invocation_id:1; 4907 bool has_base_workgroup_id:1; 4908 bool shuffle_local_ids_for_quad_derivatives:1; 4909 bool lower_local_invocation_index:1; 4910 bool lower_cs_local_id_to_index:1; 4911 bool lower_workgroup_id_to_index:1; 4912} nir_lower_compute_system_values_options; 4913 4914bool nir_lower_compute_system_values(nir_shader *shader, 4915 const nir_lower_compute_system_values_options *options); 4916 4917struct nir_lower_sysvals_to_varyings_options { 4918 bool frag_coord:1; 4919 bool front_face:1; 4920 bool point_coord:1; 4921}; 4922 4923bool 4924nir_lower_sysvals_to_varyings(nir_shader *shader, 4925 const struct nir_lower_sysvals_to_varyings_options *options); 4926 4927enum PACKED nir_lower_tex_packing { 4928 /** No packing */ 4929 nir_lower_tex_packing_none = 0, 4930 /** 4931 * The sampler returns up to 2 32-bit words of half floats or 16-bit signed 4932 * or unsigned ints based on the sampler type 4933 */ 4934 nir_lower_tex_packing_16, 4935 /** The sampler returns 1 32-bit word of 4x8 unorm */ 4936 nir_lower_tex_packing_8, 4937}; 4938 4939typedef struct nir_lower_tex_options { 4940 /** 4941 * bitmask of (1 << GLSL_SAMPLER_DIM_x) to control for which 4942 * sampler types a texture projector is lowered. 4943 */ 4944 unsigned lower_txp; 4945 4946 /** 4947 * If true, lower texture projector for any array sampler dims 4948 */ 4949 bool lower_txp_array; 4950 4951 /** 4952 * If true, lower away nir_tex_src_offset for all texelfetch instructions. 4953 */ 4954 bool lower_txf_offset; 4955 4956 /** 4957 * If true, lower away nir_tex_src_offset for all rect textures. 4958 */ 4959 bool lower_rect_offset; 4960 4961 /** 4962 * If not NULL, this filter will return true for tex instructions that 4963 * should lower away nir_tex_src_offset. 4964 */ 4965 nir_instr_filter_cb lower_offset_filter; 4966 4967 /** 4968 * If true, lower rect textures to 2D, using txs to fetch the 4969 * texture dimensions and dividing the texture coords by the 4970 * texture dims to normalize. 4971 */ 4972 bool lower_rect; 4973 4974 /** 4975 * If true, convert yuv to rgb. 4976 */ 4977 unsigned lower_y_uv_external; 4978 unsigned lower_y_u_v_external; 4979 unsigned lower_yx_xuxv_external; 4980 unsigned lower_xy_uxvx_external; 4981 unsigned lower_ayuv_external; 4982 unsigned lower_xyuv_external; 4983 unsigned lower_yuv_external; 4984 unsigned lower_yu_yv_external; 4985 unsigned lower_y41x_external; 4986 unsigned bt709_external; 4987 unsigned bt2020_external; 4988 unsigned yuv_full_range_external; 4989 4990 /** 4991 * To emulate certain texture wrap modes, this can be used 4992 * to saturate the specified tex coord to [0.0, 1.0]. The 4993 * bits are according to sampler #, ie. if, for example: 4994 * 4995 * (conf->saturate_s & (1 << n)) 4996 * 4997 * is true, then the s coord for sampler n is saturated. 4998 * 4999 * Note that clamping must happen *after* projector lowering 5000 * so any projected texture sample instruction with a clamped 5001 * coordinate gets automatically lowered, regardless of the 5002 * 'lower_txp' setting. 5003 */ 5004 unsigned saturate_s; 5005 unsigned saturate_t; 5006 unsigned saturate_r; 5007 5008 /* Bitmask of textures that need swizzling. 5009 * 5010 * If (swizzle_result & (1 << texture_index)), then the swizzle in 5011 * swizzles[texture_index] is applied to the result of the texturing 5012 * operation. 5013 */ 5014 unsigned swizzle_result; 5015 5016 /* A swizzle for each texture. Values 0-3 represent x, y, z, or w swizzles 5017 * while 4 and 5 represent 0 and 1 respectively. 5018 * 5019 * Indexed by texture-id. 5020 */ 5021 uint8_t swizzles[32][4]; 5022 5023 /* Can be used to scale sampled values in range required by the 5024 * format. 5025 * 5026 * Indexed by texture-id. 5027 */ 5028 float scale_factors[32]; 5029 5030 /** 5031 * Bitmap of textures that need srgb to linear conversion. If 5032 * (lower_srgb & (1 << texture_index)) then the rgb (xyz) components 5033 * of the texture are lowered to linear. 5034 */ 5035 unsigned lower_srgb; 5036 5037 /** 5038 * If true, lower nir_texop_txd on cube maps with nir_texop_txl. 5039 */ 5040 bool lower_txd_cube_map; 5041 5042 /** 5043 * If true, lower nir_texop_txd on 3D surfaces with nir_texop_txl. 5044 */ 5045 bool lower_txd_3d; 5046 5047 /** 5048 * If true, lower nir_texop_txd any array surfaces with nir_texop_txl. 5049 */ 5050 bool lower_txd_array; 5051 5052 /** 5053 * If true, lower nir_texop_txd on shadow samplers (except cube maps) 5054 * with nir_texop_txl. Notice that cube map shadow samplers are lowered 5055 * with lower_txd_cube_map. 5056 */ 5057 bool lower_txd_shadow; 5058 5059 /** 5060 * If true, lower nir_texop_txd on all samplers to a nir_texop_txl. 5061 * Implies lower_txd_cube_map and lower_txd_shadow. 5062 */ 5063 bool lower_txd; 5064 5065 /** 5066 * If true, lower nir_texop_txb that try to use shadow compare and min_lod 5067 * at the same time to a nir_texop_lod, some math, and nir_texop_tex. 5068 */ 5069 bool lower_txb_shadow_clamp; 5070 5071 /** 5072 * If true, lower nir_texop_txd on shadow samplers when it uses min_lod 5073 * with nir_texop_txl. This includes cube maps. 5074 */ 5075 bool lower_txd_shadow_clamp; 5076 5077 /** 5078 * If true, lower nir_texop_txd on when it uses both offset and min_lod 5079 * with nir_texop_txl. This includes cube maps. 5080 */ 5081 bool lower_txd_offset_clamp; 5082 5083 /** 5084 * If true, lower nir_texop_txd with min_lod to a nir_texop_txl if the 5085 * sampler is bindless. 5086 */ 5087 bool lower_txd_clamp_bindless_sampler; 5088 5089 /** 5090 * If true, lower nir_texop_txd with min_lod to a nir_texop_txl if the 5091 * sampler index is not statically determinable to be less than 16. 5092 */ 5093 bool lower_txd_clamp_if_sampler_index_not_lt_16; 5094 5095 /** 5096 * If true, lower nir_texop_txs with a non-0-lod into nir_texop_txs with 5097 * 0-lod followed by a nir_ishr. 5098 */ 5099 bool lower_txs_lod; 5100 5101 /** 5102 * If true, lower nir_texop_txs for cube arrays to a nir_texop_txs with a 5103 * 2D array type followed by a nir_idiv by 6. 5104 */ 5105 bool lower_txs_cube_array; 5106 5107 /** 5108 * If true, apply a .bagr swizzle on tg4 results to handle Broadcom's 5109 * mixed-up tg4 locations. 5110 */ 5111 bool lower_tg4_broadcom_swizzle; 5112 5113 /** 5114 * If true, lowers tg4 with 4 constant offsets to 4 tg4 calls 5115 */ 5116 bool lower_tg4_offsets; 5117 5118 /** 5119 * Lower txf_ms to fragment_mask_fetch and fragment_fetch and samples_identical to 5120 * fragment_mask_fetch. 5121 */ 5122 bool lower_to_fragment_fetch_amd; 5123 5124 /** 5125 * To lower packed sampler return formats. 5126 * 5127 * Indexed by sampler-id. 5128 */ 5129 enum nir_lower_tex_packing lower_tex_packing[32]; 5130 5131 /** 5132 * If true, lower nir_texop_lod to return -FLT_MAX if the sum of the 5133 * absolute values of derivatives is 0 for all coordinates. 5134 */ 5135 bool lower_lod_zero_width; 5136 5137 /* Turns nir_op_tex and other ops with an implicit derivative, in stages 5138 * without implicit derivatives (like the vertex shader) to have an explicit 5139 * LOD with a value of 0. 5140 */ 5141 bool lower_invalid_implicit_lod; 5142 5143 /* If true, round the layer component of the coordinates source to the nearest 5144 * integer for all array ops. 5145 */ 5146 bool lower_array_layer_round_even; 5147 5148 /** 5149 * Payload data to be sent to callback / filter functions. 5150 */ 5151 void *callback_data; 5152} nir_lower_tex_options; 5153 5154/** Lowers complex texture instructions to simpler ones */ 5155bool nir_lower_tex(nir_shader *shader, 5156 const nir_lower_tex_options *options); 5157 5158 5159typedef struct nir_lower_tex_shadow_swizzle { 5160 unsigned swizzle_r:3; 5161 unsigned swizzle_g:3; 5162 unsigned swizzle_b:3; 5163 unsigned swizzle_a:3; 5164} nir_lower_tex_shadow_swizzle; 5165 5166bool 5167nir_lower_tex_shadow(nir_shader *s, 5168 unsigned n_states, 5169 enum compare_func *compare_func, 5170 nir_lower_tex_shadow_swizzle *tex_swizzles); 5171 5172typedef struct nir_lower_image_options { 5173 /** 5174 * If true, lower cube size operations. 5175 */ 5176 bool lower_cube_size; 5177} nir_lower_image_options; 5178 5179bool nir_lower_image(nir_shader *nir, 5180 const nir_lower_image_options *options); 5181 5182bool nir_lower_readonly_images_to_tex(nir_shader *shader, bool per_variable); 5183 5184enum nir_lower_non_uniform_access_type { 5185 nir_lower_non_uniform_ubo_access = (1 << 0), 5186 nir_lower_non_uniform_ssbo_access = (1 << 1), 5187 nir_lower_non_uniform_texture_access = (1 << 2), 5188 nir_lower_non_uniform_image_access = (1 << 3), 5189}; 5190 5191/* Given the nir_src used for the resource, return the channels which might be non-uniform. */ 5192typedef nir_component_mask_t (*nir_lower_non_uniform_access_callback)(const nir_src *, void *); 5193 5194typedef struct nir_lower_non_uniform_access_options { 5195 enum nir_lower_non_uniform_access_type types; 5196 nir_lower_non_uniform_access_callback callback; 5197 void *callback_data; 5198} nir_lower_non_uniform_access_options; 5199 5200bool nir_lower_non_uniform_access(nir_shader *shader, 5201 const nir_lower_non_uniform_access_options *options); 5202 5203typedef struct { 5204 /* If true, a 32-bit division lowering based on NV50LegalizeSSA::handleDIV() 5205 * is used. It is the faster of the two but it is not exact in some cases 5206 * (for example, 1091317713u / 1034u gives 5209173 instead of 1055432). 5207 * 5208 * If false, a lowering based on AMDGPUTargetLowering::LowerUDIVREM() and 5209 * AMDGPUTargetLowering::LowerSDIVREM() is used. It requires more 5210 * instructions than the nv50 path and many of them are integer 5211 * multiplications, so it is probably slower. It should always return the 5212 * correct result, though. 5213 */ 5214 bool imprecise_32bit_lowering; 5215 5216 /* Whether 16-bit floating point arithmetic should be allowed in 8-bit 5217 * division lowering 5218 */ 5219 bool allow_fp16; 5220} nir_lower_idiv_options; 5221 5222bool nir_lower_idiv(nir_shader *shader, const nir_lower_idiv_options *options); 5223 5224typedef struct nir_input_attachment_options { 5225 bool use_fragcoord_sysval; 5226 bool use_layer_id_sysval; 5227 bool use_view_id_for_layer; 5228} nir_input_attachment_options; 5229 5230bool nir_lower_input_attachments(nir_shader *shader, 5231 const nir_input_attachment_options *options); 5232 5233bool nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables, 5234 bool use_vars, 5235 bool use_clipdist_array, 5236 const gl_state_index16 clipplane_state_tokens[][STATE_LENGTH]); 5237bool nir_lower_clip_gs(nir_shader *shader, unsigned ucp_enables, 5238 bool use_clipdist_array, 5239 const gl_state_index16 clipplane_state_tokens[][STATE_LENGTH]); 5240bool nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables, 5241 bool use_clipdist_array); 5242bool nir_lower_clip_cull_distance_arrays(nir_shader *nir); 5243bool nir_lower_clip_disable(nir_shader *shader, unsigned clip_plane_enable); 5244 5245void nir_lower_point_size_mov(nir_shader *shader, 5246 const gl_state_index16 *pointsize_state_tokens); 5247 5248bool nir_lower_frexp(nir_shader *nir); 5249 5250void nir_lower_two_sided_color(nir_shader *shader, bool face_sysval); 5251 5252bool nir_lower_clamp_color_outputs(nir_shader *shader); 5253 5254bool nir_lower_flatshade(nir_shader *shader); 5255 5256void nir_lower_passthrough_edgeflags(nir_shader *shader); 5257bool nir_lower_patch_vertices(nir_shader *nir, unsigned static_count, 5258 const gl_state_index16 *uniform_state_tokens); 5259 5260typedef struct nir_lower_wpos_ytransform_options { 5261 gl_state_index16 state_tokens[STATE_LENGTH]; 5262 bool fs_coord_origin_upper_left :1; 5263 bool fs_coord_origin_lower_left :1; 5264 bool fs_coord_pixel_center_integer :1; 5265 bool fs_coord_pixel_center_half_integer :1; 5266} nir_lower_wpos_ytransform_options; 5267 5268bool nir_lower_wpos_ytransform(nir_shader *shader, 5269 const nir_lower_wpos_ytransform_options *options); 5270bool nir_lower_wpos_center(nir_shader *shader); 5271 5272bool nir_lower_pntc_ytransform(nir_shader *shader, 5273 const gl_state_index16 clipplane_state_tokens[][STATE_LENGTH]); 5274 5275bool nir_lower_wrmasks(nir_shader *shader, nir_instr_filter_cb cb, const void *data); 5276 5277bool nir_lower_fb_read(nir_shader *shader); 5278 5279typedef struct nir_lower_drawpixels_options { 5280 gl_state_index16 texcoord_state_tokens[STATE_LENGTH]; 5281 gl_state_index16 scale_state_tokens[STATE_LENGTH]; 5282 gl_state_index16 bias_state_tokens[STATE_LENGTH]; 5283 unsigned drawpix_sampler; 5284 unsigned pixelmap_sampler; 5285 bool pixel_maps :1; 5286 bool scale_and_bias :1; 5287} nir_lower_drawpixels_options; 5288 5289void nir_lower_drawpixels(nir_shader *shader, 5290 const nir_lower_drawpixels_options *options); 5291 5292typedef struct nir_lower_bitmap_options { 5293 unsigned sampler; 5294 bool swizzle_xxxx; 5295} nir_lower_bitmap_options; 5296 5297void nir_lower_bitmap(nir_shader *shader, const nir_lower_bitmap_options *options); 5298 5299bool nir_lower_atomics_to_ssbo(nir_shader *shader, unsigned offset_align_state); 5300 5301typedef enum { 5302 nir_lower_int_source_mods = 1 << 0, 5303 nir_lower_fabs_source_mods = 1 << 1, 5304 nir_lower_fneg_source_mods = 1 << 2, 5305 nir_lower_64bit_source_mods = 1 << 3, 5306 nir_lower_triop_abs = 1 << 4, 5307 nir_lower_all_source_mods = (1 << 5) - 1 5308} nir_lower_to_source_mods_flags; 5309 5310#define nir_lower_float_source_mods (nir_lower_fabs_source_mods | nir_lower_fneg_source_mods) 5311 5312bool nir_lower_to_source_mods(nir_shader *shader, nir_lower_to_source_mods_flags options); 5313 5314typedef enum { 5315 nir_lower_gs_intrinsics_per_stream = 1 << 0, 5316 nir_lower_gs_intrinsics_count_primitives = 1 << 1, 5317 nir_lower_gs_intrinsics_count_vertices_per_primitive = 1 << 2, 5318 nir_lower_gs_intrinsics_overwrite_incomplete = 1 << 3, 5319} nir_lower_gs_intrinsics_flags; 5320 5321bool nir_lower_gs_intrinsics(nir_shader *shader, nir_lower_gs_intrinsics_flags options); 5322 5323typedef struct { 5324 bool payload_to_shared_for_atomics : 1; 5325} nir_lower_task_shader_options; 5326 5327bool nir_lower_task_shader(nir_shader *shader, nir_lower_task_shader_options options); 5328 5329typedef unsigned (*nir_lower_bit_size_callback)(const nir_instr *, void *); 5330 5331bool nir_lower_bit_size(nir_shader *shader, 5332 nir_lower_bit_size_callback callback, 5333 void *callback_data); 5334bool nir_lower_64bit_phis(nir_shader *shader); 5335 5336bool nir_split_64bit_vec3_and_vec4(nir_shader *shader); 5337 5338nir_lower_int64_options nir_lower_int64_op_to_options_mask(nir_op opcode); 5339bool nir_lower_int64(nir_shader *shader); 5340 5341nir_lower_doubles_options nir_lower_doubles_op_to_options_mask(nir_op opcode); 5342bool nir_lower_doubles(nir_shader *shader, const nir_shader *softfp64, 5343 nir_lower_doubles_options options); 5344bool nir_lower_pack(nir_shader *shader); 5345 5346bool nir_recompute_io_bases(nir_shader *nir, nir_variable_mode modes); 5347bool nir_lower_mediump_io(nir_shader *nir, nir_variable_mode modes, 5348 uint64_t varying_mask, bool use_16bit_slots); 5349bool nir_force_mediump_io(nir_shader *nir, nir_variable_mode modes, 5350 nir_alu_type types); 5351bool nir_unpack_16bit_varying_slots(nir_shader *nir, nir_variable_mode modes); 5352 5353struct nir_fold_tex_srcs_options { 5354 unsigned sampler_dims; 5355 unsigned src_types; 5356}; 5357 5358struct nir_fold_16bit_tex_image_options { 5359 nir_rounding_mode rounding_mode; 5360 bool fold_tex_dest; 5361 bool fold_image_load_store_data; 5362 unsigned fold_srcs_options_count; 5363 struct nir_fold_tex_srcs_options *fold_srcs_options; 5364}; 5365 5366bool nir_fold_16bit_tex_image(nir_shader *nir, 5367 struct nir_fold_16bit_tex_image_options *options); 5368 5369typedef struct { 5370 bool legalize_type; /* whether this src should be legalized */ 5371 uint8_t bit_size; /* bit_size to enforce */ 5372 nir_tex_src_type match_src; /* if bit_size is 0, match bit size of this */ 5373} nir_tex_src_type_constraint, nir_tex_src_type_constraints[nir_num_tex_src_types]; 5374 5375bool nir_legalize_16bit_sampler_srcs(nir_shader *nir, 5376 nir_tex_src_type_constraints constraints); 5377 5378bool nir_lower_point_size(nir_shader *shader, float min, float max); 5379 5380void nir_lower_texcoord_replace(nir_shader *s, unsigned coord_replace, 5381 bool point_coord_is_sysval, bool yinvert); 5382 5383typedef enum { 5384 nir_lower_interpolation_at_sample = (1 << 1), 5385 nir_lower_interpolation_at_offset = (1 << 2), 5386 nir_lower_interpolation_centroid = (1 << 3), 5387 nir_lower_interpolation_pixel = (1 << 4), 5388 nir_lower_interpolation_sample = (1 << 5), 5389} nir_lower_interpolation_options; 5390 5391bool nir_lower_interpolation(nir_shader *shader, 5392 nir_lower_interpolation_options options); 5393 5394bool nir_lower_discard_if(nir_shader *shader); 5395 5396bool nir_lower_discard_or_demote(nir_shader *shader, 5397 bool force_correct_quad_ops_after_discard); 5398 5399bool nir_lower_memory_model(nir_shader *shader); 5400 5401bool nir_lower_goto_ifs(nir_shader *shader); 5402 5403bool nir_shader_uses_view_index(nir_shader *shader); 5404bool nir_can_lower_multiview(nir_shader *shader); 5405bool nir_lower_multiview(nir_shader *shader, uint32_t view_mask); 5406 5407 5408bool nir_lower_fp16_casts(nir_shader *shader); 5409bool nir_normalize_cubemap_coords(nir_shader *shader); 5410 5411bool nir_shader_supports_implicit_lod(nir_shader *shader); 5412 5413void nir_live_ssa_defs_impl(nir_function_impl *impl); 5414 5415const BITSET_WORD *nir_get_live_ssa_defs(nir_cursor cursor, void *mem_ctx); 5416 5417void nir_loop_analyze_impl(nir_function_impl *impl, 5418 nir_variable_mode indirect_mask, 5419 bool force_unroll_sampler_indirect); 5420 5421bool nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b); 5422 5423bool nir_repair_ssa_impl(nir_function_impl *impl); 5424bool nir_repair_ssa(nir_shader *shader); 5425 5426void nir_convert_loop_to_lcssa(nir_loop *loop); 5427bool nir_convert_to_lcssa(nir_shader *shader, bool skip_invariants, bool skip_bool_invariants); 5428void nir_divergence_analysis(nir_shader *shader); 5429bool nir_update_instr_divergence(nir_shader *shader, nir_instr *instr); 5430bool nir_has_divergent_loop(nir_shader *shader); 5431 5432/* If phi_webs_only is true, only convert SSA values involved in phi nodes to 5433 * registers. If false, convert all values (even those not involved in a phi 5434 * node) to registers. 5435 */ 5436bool nir_convert_from_ssa(nir_shader *shader, bool phi_webs_only); 5437 5438bool nir_lower_phis_to_regs_block(nir_block *block); 5439bool nir_lower_ssa_defs_to_regs_block(nir_block *block); 5440bool nir_rematerialize_derefs_in_use_blocks_impl(nir_function_impl *impl); 5441 5442bool nir_lower_samplers(nir_shader *shader); 5443bool nir_lower_ssbo(nir_shader *shader); 5444 5445typedef struct nir_lower_printf_options { 5446 bool treat_doubles_as_floats : 1; 5447 unsigned max_buffer_size; 5448} nir_lower_printf_options; 5449 5450bool nir_lower_printf(nir_shader *nir, const nir_lower_printf_options *options); 5451 5452/* This is here for unit tests. */ 5453bool nir_opt_comparison_pre_impl(nir_function_impl *impl); 5454 5455bool nir_opt_comparison_pre(nir_shader *shader); 5456 5457typedef struct nir_opt_access_options { 5458 bool is_vulkan; 5459 bool infer_non_readable; 5460} nir_opt_access_options; 5461 5462bool nir_opt_access(nir_shader *shader, const nir_opt_access_options *options); 5463bool nir_opt_algebraic(nir_shader *shader); 5464bool nir_opt_algebraic_before_ffma(nir_shader *shader); 5465bool nir_opt_algebraic_late(nir_shader *shader); 5466bool nir_opt_algebraic_distribute_src_mods(nir_shader *shader); 5467bool nir_opt_constant_folding(nir_shader *shader); 5468 5469/* Try to combine a and b into a. Return true if combination was possible, 5470 * which will result in b being removed by the pass. Return false if 5471 * combination wasn't possible. 5472 */ 5473typedef bool (*nir_combine_memory_barrier_cb)( 5474 nir_intrinsic_instr *a, nir_intrinsic_instr *b, void *data); 5475 5476bool nir_opt_combine_memory_barriers(nir_shader *shader, 5477 nir_combine_memory_barrier_cb combine_cb, 5478 void *data); 5479 5480bool nir_opt_combine_stores(nir_shader *shader, nir_variable_mode modes); 5481 5482bool nir_copy_prop_impl(nir_function_impl *impl); 5483bool nir_copy_prop(nir_shader *shader); 5484 5485bool nir_opt_copy_prop_vars(nir_shader *shader); 5486 5487bool nir_opt_cse(nir_shader *shader); 5488 5489bool nir_opt_dce(nir_shader *shader); 5490 5491bool nir_opt_dead_cf(nir_shader *shader); 5492 5493bool nir_opt_dead_write_vars(nir_shader *shader); 5494 5495bool nir_opt_deref_impl(nir_function_impl *impl); 5496bool nir_opt_deref(nir_shader *shader); 5497 5498bool nir_opt_find_array_copies(nir_shader *shader); 5499 5500bool nir_opt_fragdepth(nir_shader *shader); 5501 5502bool nir_opt_gcm(nir_shader *shader, bool value_number); 5503 5504bool nir_opt_idiv_const(nir_shader *shader, unsigned min_bit_size); 5505 5506typedef enum { 5507 nir_opt_if_aggressive_last_continue = (1 << 0), 5508 nir_opt_if_optimize_phi_true_false = (1 << 1), 5509} nir_opt_if_options; 5510 5511bool nir_opt_if(nir_shader *shader, nir_opt_if_options options); 5512 5513bool nir_opt_intrinsics(nir_shader *shader); 5514 5515bool nir_opt_large_constants(nir_shader *shader, 5516 glsl_type_size_align_func size_align, 5517 unsigned threshold); 5518 5519bool nir_opt_loop_unroll(nir_shader *shader); 5520 5521typedef enum { 5522 nir_move_const_undef = (1 << 0), 5523 nir_move_load_ubo = (1 << 1), 5524 nir_move_load_input = (1 << 2), 5525 nir_move_comparisons = (1 << 3), 5526 nir_move_copies = (1 << 4), 5527 nir_move_load_ssbo = (1 << 5), 5528 nir_move_load_uniform = (1 << 6), 5529} nir_move_options; 5530 5531bool nir_can_move_instr(nir_instr *instr, nir_move_options options); 5532 5533bool nir_opt_sink(nir_shader *shader, nir_move_options options); 5534 5535bool nir_opt_move(nir_shader *shader, nir_move_options options); 5536 5537typedef struct { 5538 /** nir_load_uniform max base offset */ 5539 uint32_t uniform_max; 5540 5541 /** nir_load_ubo_vec4 max base offset */ 5542 uint32_t ubo_vec4_max; 5543 5544 /** nir_var_mem_shared max base offset */ 5545 uint32_t shared_max; 5546 5547 /** nir_load/store_buffer_amd max base offset */ 5548 uint32_t buffer_max; 5549} nir_opt_offsets_options; 5550 5551bool nir_opt_offsets(nir_shader *shader, const nir_opt_offsets_options *options); 5552 5553bool nir_opt_peephole_select(nir_shader *shader, unsigned limit, 5554 bool indirect_load_ok, bool expensive_alu_ok); 5555 5556bool nir_opt_rematerialize_compares(nir_shader *shader); 5557 5558bool nir_opt_remove_phis(nir_shader *shader); 5559bool nir_opt_remove_phis_block(nir_block *block); 5560 5561bool nir_opt_phi_precision(nir_shader *shader); 5562 5563bool nir_opt_shrink_stores(nir_shader *shader, bool shrink_image_store); 5564 5565bool nir_opt_shrink_vectors(nir_shader *shader); 5566 5567bool nir_opt_trivial_continues(nir_shader *shader); 5568 5569bool nir_opt_undef(nir_shader *shader); 5570 5571bool nir_lower_undef_to_zero(nir_shader *shader); 5572 5573bool nir_opt_uniform_atomics(nir_shader *shader); 5574 5575bool nir_opt_vectorize(nir_shader *shader, nir_vectorize_cb filter, 5576 void *data); 5577 5578bool nir_opt_conditional_discard(nir_shader *shader); 5579bool nir_opt_move_discards_to_top(nir_shader *shader); 5580 5581bool nir_opt_ray_queries(nir_shader *shader); 5582 5583typedef bool (*nir_should_vectorize_mem_func)(unsigned align_mul, 5584 unsigned align_offset, 5585 unsigned bit_size, 5586 unsigned num_components, 5587 nir_intrinsic_instr *low, nir_intrinsic_instr *high, 5588 void *data); 5589 5590typedef struct { 5591 nir_should_vectorize_mem_func callback; 5592 nir_variable_mode modes; 5593 nir_variable_mode robust_modes; 5594 void *cb_data; 5595 bool has_shared2_amd; 5596} nir_load_store_vectorize_options; 5597 5598bool nir_opt_load_store_vectorize(nir_shader *shader, const nir_load_store_vectorize_options *options); 5599 5600void nir_sweep(nir_shader *shader); 5601 5602void nir_remap_dual_slot_attributes(nir_shader *shader, 5603 uint64_t *dual_slot_inputs); 5604uint64_t nir_get_single_slot_attribs_mask(uint64_t attribs, uint64_t dual_slot); 5605 5606nir_intrinsic_op nir_intrinsic_from_system_value(gl_system_value val); 5607gl_system_value nir_system_value_from_intrinsic(nir_intrinsic_op intrin); 5608 5609static inline bool 5610nir_variable_is_in_ubo(const nir_variable *var) 5611{ 5612 return (var->data.mode == nir_var_mem_ubo && 5613 var->interface_type != NULL); 5614} 5615 5616static inline bool 5617nir_variable_is_in_ssbo(const nir_variable *var) 5618{ 5619 return (var->data.mode == nir_var_mem_ssbo && 5620 var->interface_type != NULL); 5621} 5622 5623static inline bool 5624nir_variable_is_in_block(const nir_variable *var) 5625{ 5626 return nir_variable_is_in_ubo(var) || nir_variable_is_in_ssbo(var); 5627} 5628 5629typedef struct nir_unsigned_upper_bound_config { 5630 unsigned min_subgroup_size; 5631 unsigned max_subgroup_size; 5632 unsigned max_workgroup_invocations; 5633 unsigned max_workgroup_count[3]; 5634 unsigned max_workgroup_size[3]; 5635 5636 uint32_t vertex_attrib_max[32]; 5637} nir_unsigned_upper_bound_config; 5638 5639uint32_t 5640nir_unsigned_upper_bound(nir_shader *shader, struct hash_table *range_ht, 5641 nir_ssa_scalar scalar, 5642 const nir_unsigned_upper_bound_config *config); 5643 5644bool 5645nir_addition_might_overflow(nir_shader *shader, struct hash_table *range_ht, 5646 nir_ssa_scalar ssa, unsigned const_val, 5647 const nir_unsigned_upper_bound_config *config); 5648 5649typedef enum { 5650 nir_ray_query_value_intersection_type, 5651 nir_ray_query_value_intersection_t, 5652 nir_ray_query_value_intersection_instance_custom_index, 5653 nir_ray_query_value_intersection_instance_id, 5654 nir_ray_query_value_intersection_instance_sbt_index, 5655 nir_ray_query_value_intersection_geometry_index, 5656 nir_ray_query_value_intersection_primitive_index, 5657 nir_ray_query_value_intersection_barycentrics, 5658 nir_ray_query_value_intersection_front_face, 5659 nir_ray_query_value_intersection_object_ray_direction, 5660 nir_ray_query_value_intersection_object_ray_origin, 5661 nir_ray_query_value_intersection_object_to_world, 5662 nir_ray_query_value_intersection_world_to_object, 5663 nir_ray_query_value_intersection_candidate_aabb_opaque, 5664 nir_ray_query_value_tmin, 5665 nir_ray_query_value_flags, 5666 nir_ray_query_value_world_ray_direction, 5667 nir_ray_query_value_world_ray_origin, 5668} nir_ray_query_value; 5669 5670typedef struct { 5671 /* True if gl_DrawID is considered uniform, i.e. if the preamble is run 5672 * at least once per "internal" draw rather than per user-visible draw. 5673 */ 5674 bool drawid_uniform; 5675 5676 /* True if the subgroup size is uniform. */ 5677 bool subgroup_size_uniform; 5678 5679 /* size/align for load/store_preamble. */ 5680 void (*def_size)(nir_ssa_def *def, unsigned *size, unsigned *align); 5681 5682 /* Total available size for load/store_preamble storage, in units 5683 * determined by def_size. 5684 */ 5685 unsigned preamble_storage_size; 5686 5687 /* Give the cost for an instruction. nir_opt_preamble will prioritize 5688 * instructions with higher costs. Instructions with cost 0 may still be 5689 * lifted, but only when required to lift other instructions with non-0 5690 * cost (e.g. a load_const source of an expression). 5691 */ 5692 float (*instr_cost_cb)(nir_instr *instr, const void *data); 5693 5694 /* Give the cost of rewriting the instruction to use load_preamble. This 5695 * may happen from inserting move instructions, etc. If the benefit doesn't 5696 * exceed the cost here then we won't rewrite it. 5697 */ 5698 float (*rewrite_cost_cb)(nir_ssa_def *def, const void *data); 5699 5700 /* Instructions whose definitions should not be rewritten. These could 5701 * still be moved to the preamble, but they shouldn't be the root of a 5702 * replacement expression. Instructions with cost 0 and derefs are 5703 * automatically included by the pass. 5704 */ 5705 nir_instr_filter_cb avoid_instr_cb; 5706 5707 const void *cb_data; 5708} nir_opt_preamble_options; 5709 5710bool 5711nir_opt_preamble(nir_shader *shader, 5712 const nir_opt_preamble_options *options, 5713 unsigned *size); 5714 5715nir_function_impl *nir_shader_get_preamble(nir_shader *shader); 5716 5717bool nir_lower_point_smooth(nir_shader *shader); 5718bool nir_lower_poly_line_smooth(nir_shader *shader, unsigned num_smooth_aa_sample); 5719 5720#include "nir_inline_helpers.h" 5721 5722#ifdef __cplusplus 5723} /* extern "C" */ 5724#endif 5725 5726#endif /* NIR_H */ 5727