1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright (C) 2020 Collabora, Ltd. 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21bf215546Sopenharmony_ci * SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#ifndef __PAN_IR_H 25bf215546Sopenharmony_ci#define __PAN_IR_H 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci#include <stdint.h> 28bf215546Sopenharmony_ci#include "compiler/nir/nir.h" 29bf215546Sopenharmony_ci#include "util/u_dynarray.h" 30bf215546Sopenharmony_ci#include "util/hash_table.h" 31bf215546Sopenharmony_ci 32bf215546Sopenharmony_ci/* On Valhall, the driver gives the hardware a table of resource tables. 33bf215546Sopenharmony_ci * Resources are addressed as the index of the table together with the index of 34bf215546Sopenharmony_ci * the resource within the table. For simplicity, we put one type of resource 35bf215546Sopenharmony_ci * in each table and fix the numbering of the tables. 36bf215546Sopenharmony_ci * 37bf215546Sopenharmony_ci * This numbering is arbitrary. It is a software ABI between the 38bf215546Sopenharmony_ci * Gallium driver and the Valhall compiler. 39bf215546Sopenharmony_ci */ 40bf215546Sopenharmony_cienum pan_resource_table { 41bf215546Sopenharmony_ci PAN_TABLE_UBO = 0, 42bf215546Sopenharmony_ci PAN_TABLE_ATTRIBUTE, 43bf215546Sopenharmony_ci PAN_TABLE_ATTRIBUTE_BUFFER, 44bf215546Sopenharmony_ci PAN_TABLE_SAMPLER, 45bf215546Sopenharmony_ci PAN_TABLE_TEXTURE, 46bf215546Sopenharmony_ci PAN_TABLE_IMAGE, 47bf215546Sopenharmony_ci 48bf215546Sopenharmony_ci PAN_NUM_RESOURCE_TABLES 49bf215546Sopenharmony_ci}; 50bf215546Sopenharmony_ci 51bf215546Sopenharmony_ci/* Indices for named (non-XFB) varyings that are present. These are packed 52bf215546Sopenharmony_ci * tightly so they correspond to a bitfield present (P) indexed by (1 << 53bf215546Sopenharmony_ci * PAN_VARY_*). This has the nice property that you can lookup the buffer index 54bf215546Sopenharmony_ci * of a given special field given a shift S by: 55bf215546Sopenharmony_ci * 56bf215546Sopenharmony_ci * idx = popcount(P & ((1 << S) - 1)) 57bf215546Sopenharmony_ci * 58bf215546Sopenharmony_ci * That is... look at all of the varyings that come earlier and count them, the 59bf215546Sopenharmony_ci * count is the new index since plus one. Likewise, the total number of special 60bf215546Sopenharmony_ci * buffers required is simply popcount(P) 61bf215546Sopenharmony_ci */ 62bf215546Sopenharmony_ci 63bf215546Sopenharmony_cienum pan_special_varying { 64bf215546Sopenharmony_ci PAN_VARY_GENERAL = 0, 65bf215546Sopenharmony_ci PAN_VARY_POSITION = 1, 66bf215546Sopenharmony_ci PAN_VARY_PSIZ = 2, 67bf215546Sopenharmony_ci PAN_VARY_PNTCOORD = 3, 68bf215546Sopenharmony_ci PAN_VARY_FACE = 4, 69bf215546Sopenharmony_ci PAN_VARY_FRAGCOORD = 5, 70bf215546Sopenharmony_ci 71bf215546Sopenharmony_ci /* Keep last */ 72bf215546Sopenharmony_ci PAN_VARY_MAX, 73bf215546Sopenharmony_ci}; 74bf215546Sopenharmony_ci 75bf215546Sopenharmony_ci/* Maximum number of attribute descriptors required for varyings. These include 76bf215546Sopenharmony_ci * up to MAX_VARYING source level varyings plus a descriptor each non-GENERAL 77bf215546Sopenharmony_ci * special varying */ 78bf215546Sopenharmony_ci#define PAN_MAX_VARYINGS (MAX_VARYING + PAN_VARY_MAX - 1) 79bf215546Sopenharmony_ci 80bf215546Sopenharmony_ci/* Define the general compiler entry point */ 81bf215546Sopenharmony_ci 82bf215546Sopenharmony_ci#define MAX_SYSVAL_COUNT 32 83bf215546Sopenharmony_ci 84bf215546Sopenharmony_ci/* Allow 2D of sysval IDs, while allowing nonparametric sysvals to equal 85bf215546Sopenharmony_ci * their class for equal comparison */ 86bf215546Sopenharmony_ci 87bf215546Sopenharmony_ci#define PAN_SYSVAL(type, no) (((no) << 16) | PAN_SYSVAL_##type) 88bf215546Sopenharmony_ci#define PAN_SYSVAL_TYPE(sysval) ((sysval) & 0xffff) 89bf215546Sopenharmony_ci#define PAN_SYSVAL_ID(sysval) ((sysval) >> 16) 90bf215546Sopenharmony_ci 91bf215546Sopenharmony_ci/* Define some common types. We start at one for easy indexing of hash 92bf215546Sopenharmony_ci * tables internal to the compiler */ 93bf215546Sopenharmony_ci 94bf215546Sopenharmony_cienum { 95bf215546Sopenharmony_ci PAN_SYSVAL_VIEWPORT_SCALE = 1, 96bf215546Sopenharmony_ci PAN_SYSVAL_VIEWPORT_OFFSET = 2, 97bf215546Sopenharmony_ci PAN_SYSVAL_TEXTURE_SIZE = 3, 98bf215546Sopenharmony_ci PAN_SYSVAL_SSBO = 4, 99bf215546Sopenharmony_ci PAN_SYSVAL_NUM_WORK_GROUPS = 5, 100bf215546Sopenharmony_ci PAN_SYSVAL_SAMPLER = 7, 101bf215546Sopenharmony_ci PAN_SYSVAL_LOCAL_GROUP_SIZE = 8, 102bf215546Sopenharmony_ci PAN_SYSVAL_WORK_DIM = 9, 103bf215546Sopenharmony_ci PAN_SYSVAL_IMAGE_SIZE = 10, 104bf215546Sopenharmony_ci PAN_SYSVAL_SAMPLE_POSITIONS = 11, 105bf215546Sopenharmony_ci PAN_SYSVAL_MULTISAMPLED = 12, 106bf215546Sopenharmony_ci PAN_SYSVAL_RT_CONVERSION = 13, 107bf215546Sopenharmony_ci PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS = 14, 108bf215546Sopenharmony_ci PAN_SYSVAL_DRAWID = 15, 109bf215546Sopenharmony_ci PAN_SYSVAL_BLEND_CONSTANTS = 16, 110bf215546Sopenharmony_ci PAN_SYSVAL_XFB = 17, 111bf215546Sopenharmony_ci PAN_SYSVAL_NUM_VERTICES = 18, 112bf215546Sopenharmony_ci}; 113bf215546Sopenharmony_ci 114bf215546Sopenharmony_ci#define PAN_TXS_SYSVAL_ID(texidx, dim, is_array) \ 115bf215546Sopenharmony_ci ((texidx) | ((dim) << 7) | ((is_array) ? (1 << 9) : 0)) 116bf215546Sopenharmony_ci 117bf215546Sopenharmony_ci#define PAN_SYSVAL_ID_TO_TXS_TEX_IDX(id) ((id) & 0x7f) 118bf215546Sopenharmony_ci#define PAN_SYSVAL_ID_TO_TXS_DIM(id) (((id) >> 7) & 0x3) 119bf215546Sopenharmony_ci#define PAN_SYSVAL_ID_TO_TXS_IS_ARRAY(id) !!((id) & (1 << 9)) 120bf215546Sopenharmony_ci 121bf215546Sopenharmony_ci/* Special attribute slots for vertex builtins. Sort of arbitrary but let's be 122bf215546Sopenharmony_ci * consistent with the blob so we can compare traces easier. */ 123bf215546Sopenharmony_ci 124bf215546Sopenharmony_cienum { 125bf215546Sopenharmony_ci PAN_VERTEX_ID = 16, 126bf215546Sopenharmony_ci PAN_INSTANCE_ID = 17, 127bf215546Sopenharmony_ci PAN_MAX_ATTRIBUTE 128bf215546Sopenharmony_ci}; 129bf215546Sopenharmony_ci 130bf215546Sopenharmony_cistruct panfrost_sysvals { 131bf215546Sopenharmony_ci /* The mapping of sysvals to uniforms, the count, and the off-by-one inverse */ 132bf215546Sopenharmony_ci unsigned sysvals[MAX_SYSVAL_COUNT]; 133bf215546Sopenharmony_ci unsigned sysval_count; 134bf215546Sopenharmony_ci}; 135bf215546Sopenharmony_ci 136bf215546Sopenharmony_ci/* Architecturally, Bifrost/Valhall can address 128 FAU slots of 64-bits each. 137bf215546Sopenharmony_ci * In practice, the maximum number of FAU slots is limited by implementation. 138bf215546Sopenharmony_ci * All known Bifrost and Valhall devices limit to 64 FAU slots. Therefore the 139bf215546Sopenharmony_ci * maximum number of 32-bit words is 128, since there are 2 words per FAU slot. 140bf215546Sopenharmony_ci * 141bf215546Sopenharmony_ci * Midgard can push at most 92 words, so this bound suffices. The Midgard 142bf215546Sopenharmony_ci * compiler pushes less than this, as Midgard uses register-mapped uniforms 143bf215546Sopenharmony_ci * instead of FAU, preventing large numbers of uniforms to be pushed for 144bf215546Sopenharmony_ci * nontrivial programs. 145bf215546Sopenharmony_ci */ 146bf215546Sopenharmony_ci#define PAN_MAX_PUSH 128 147bf215546Sopenharmony_ci 148bf215546Sopenharmony_ci/* Architectural invariants (Midgard and Bifrost): UBO must be <= 2^16 bytes so 149bf215546Sopenharmony_ci * an offset to a word must be < 2^16. There are less than 2^8 UBOs */ 150bf215546Sopenharmony_ci 151bf215546Sopenharmony_cistruct panfrost_ubo_word { 152bf215546Sopenharmony_ci uint16_t ubo; 153bf215546Sopenharmony_ci uint16_t offset; 154bf215546Sopenharmony_ci}; 155bf215546Sopenharmony_ci 156bf215546Sopenharmony_cistruct panfrost_ubo_push { 157bf215546Sopenharmony_ci unsigned count; 158bf215546Sopenharmony_ci struct panfrost_ubo_word words[PAN_MAX_PUSH]; 159bf215546Sopenharmony_ci}; 160bf215546Sopenharmony_ci 161bf215546Sopenharmony_ci/* Helper for searching the above. Note this is O(N) to the number of pushed 162bf215546Sopenharmony_ci * constants, do not run in the draw call hot path */ 163bf215546Sopenharmony_ci 164bf215546Sopenharmony_ciunsigned 165bf215546Sopenharmony_cipan_lookup_pushed_ubo(struct panfrost_ubo_push *push, unsigned ubo, unsigned offs); 166bf215546Sopenharmony_ci 167bf215546Sopenharmony_cistruct hash_table_u64 * 168bf215546Sopenharmony_cipanfrost_init_sysvals(struct panfrost_sysvals *sysvals, 169bf215546Sopenharmony_ci struct panfrost_sysvals *fixed_sysvals, 170bf215546Sopenharmony_ci void *memctx); 171bf215546Sopenharmony_ci 172bf215546Sopenharmony_ciunsigned 173bf215546Sopenharmony_cipan_lookup_sysval(struct hash_table_u64 *sysval_to_id, 174bf215546Sopenharmony_ci struct panfrost_sysvals *sysvals, 175bf215546Sopenharmony_ci int sysval); 176bf215546Sopenharmony_ci 177bf215546Sopenharmony_ciint 178bf215546Sopenharmony_cipanfrost_sysval_for_instr(nir_instr *instr, nir_dest *dest); 179bf215546Sopenharmony_ci 180bf215546Sopenharmony_cistruct panfrost_compile_inputs { 181bf215546Sopenharmony_ci unsigned gpu_id; 182bf215546Sopenharmony_ci bool is_blend, is_blit; 183bf215546Sopenharmony_ci struct { 184bf215546Sopenharmony_ci unsigned rt; 185bf215546Sopenharmony_ci unsigned nr_samples; 186bf215546Sopenharmony_ci uint64_t bifrost_blend_desc; 187bf215546Sopenharmony_ci } blend; 188bf215546Sopenharmony_ci int fixed_sysval_ubo; 189bf215546Sopenharmony_ci struct panfrost_sysvals *fixed_sysval_layout; 190bf215546Sopenharmony_ci bool shaderdb; 191bf215546Sopenharmony_ci bool no_idvs; 192bf215546Sopenharmony_ci bool no_ubo_to_push; 193bf215546Sopenharmony_ci 194bf215546Sopenharmony_ci enum pipe_format rt_formats[8]; 195bf215546Sopenharmony_ci uint8_t raw_fmt_mask; 196bf215546Sopenharmony_ci unsigned nr_cbufs; 197bf215546Sopenharmony_ci 198bf215546Sopenharmony_ci /* Used on Valhall. 199bf215546Sopenharmony_ci * 200bf215546Sopenharmony_ci * Bit mask of special desktop-only varyings (e.g VARYING_SLOT_TEX0) 201bf215546Sopenharmony_ci * written by the previous stage (fragment shader) or written by this 202bf215546Sopenharmony_ci * stage (vertex shader). Bits are slots from gl_varying_slot. 203bf215546Sopenharmony_ci * 204bf215546Sopenharmony_ci * For modern APIs (GLES or VK), this should be 0. 205bf215546Sopenharmony_ci */ 206bf215546Sopenharmony_ci uint32_t fixed_varying_mask; 207bf215546Sopenharmony_ci 208bf215546Sopenharmony_ci union { 209bf215546Sopenharmony_ci struct { 210bf215546Sopenharmony_ci bool static_rt_conv; 211bf215546Sopenharmony_ci uint32_t rt_conv[8]; 212bf215546Sopenharmony_ci } bifrost; 213bf215546Sopenharmony_ci }; 214bf215546Sopenharmony_ci}; 215bf215546Sopenharmony_ci 216bf215546Sopenharmony_cistruct pan_shader_varying { 217bf215546Sopenharmony_ci gl_varying_slot location; 218bf215546Sopenharmony_ci enum pipe_format format; 219bf215546Sopenharmony_ci}; 220bf215546Sopenharmony_ci 221bf215546Sopenharmony_cistruct bifrost_shader_blend_info { 222bf215546Sopenharmony_ci nir_alu_type type; 223bf215546Sopenharmony_ci uint32_t return_offset; 224bf215546Sopenharmony_ci 225bf215546Sopenharmony_ci /* mali_bifrost_register_file_format corresponding to nir_alu_type */ 226bf215546Sopenharmony_ci unsigned format; 227bf215546Sopenharmony_ci}; 228bf215546Sopenharmony_ci 229bf215546Sopenharmony_ci/* 230bf215546Sopenharmony_ci * Unpacked form of a v7 message preload descriptor, produced by the compiler's 231bf215546Sopenharmony_ci * message preload optimization. By splitting out this struct, the compiler does 232bf215546Sopenharmony_ci * not need to know about data structure packing, avoiding a dependency on 233bf215546Sopenharmony_ci * GenXML. 234bf215546Sopenharmony_ci */ 235bf215546Sopenharmony_cistruct bifrost_message_preload { 236bf215546Sopenharmony_ci /* Whether to preload this message */ 237bf215546Sopenharmony_ci bool enabled; 238bf215546Sopenharmony_ci 239bf215546Sopenharmony_ci /* Varying to load from */ 240bf215546Sopenharmony_ci unsigned varying_index; 241bf215546Sopenharmony_ci 242bf215546Sopenharmony_ci /* Register type, FP32 otherwise */ 243bf215546Sopenharmony_ci bool fp16; 244bf215546Sopenharmony_ci 245bf215546Sopenharmony_ci /* Number of components, ignored if texturing */ 246bf215546Sopenharmony_ci unsigned num_components; 247bf215546Sopenharmony_ci 248bf215546Sopenharmony_ci /* If texture is set, performs a texture instruction according to 249bf215546Sopenharmony_ci * texture_index, skip, and zero_lod. If texture is unset, only the 250bf215546Sopenharmony_ci * varying load is performed. 251bf215546Sopenharmony_ci */ 252bf215546Sopenharmony_ci bool texture, skip, zero_lod; 253bf215546Sopenharmony_ci unsigned texture_index; 254bf215546Sopenharmony_ci}; 255bf215546Sopenharmony_ci 256bf215546Sopenharmony_cistruct bifrost_shader_info { 257bf215546Sopenharmony_ci struct bifrost_shader_blend_info blend[8]; 258bf215546Sopenharmony_ci nir_alu_type blend_src1_type; 259bf215546Sopenharmony_ci bool wait_6, wait_7; 260bf215546Sopenharmony_ci struct bifrost_message_preload messages[2]; 261bf215546Sopenharmony_ci 262bf215546Sopenharmony_ci /* Whether any flat varyings are loaded. This may disable optimizations 263bf215546Sopenharmony_ci * that change the provoking vertex, since that would load incorrect 264bf215546Sopenharmony_ci * values for flat varyings. 265bf215546Sopenharmony_ci */ 266bf215546Sopenharmony_ci bool uses_flat_shading; 267bf215546Sopenharmony_ci}; 268bf215546Sopenharmony_ci 269bf215546Sopenharmony_cistruct midgard_shader_info { 270bf215546Sopenharmony_ci unsigned first_tag; 271bf215546Sopenharmony_ci}; 272bf215546Sopenharmony_ci 273bf215546Sopenharmony_cistruct pan_shader_info { 274bf215546Sopenharmony_ci gl_shader_stage stage; 275bf215546Sopenharmony_ci unsigned work_reg_count; 276bf215546Sopenharmony_ci unsigned tls_size; 277bf215546Sopenharmony_ci unsigned wls_size; 278bf215546Sopenharmony_ci 279bf215546Sopenharmony_ci /* Bit mask of preloaded registers */ 280bf215546Sopenharmony_ci uint64_t preload; 281bf215546Sopenharmony_ci 282bf215546Sopenharmony_ci union { 283bf215546Sopenharmony_ci struct { 284bf215546Sopenharmony_ci bool reads_frag_coord; 285bf215546Sopenharmony_ci bool reads_point_coord; 286bf215546Sopenharmony_ci bool reads_face; 287bf215546Sopenharmony_ci bool can_discard; 288bf215546Sopenharmony_ci bool writes_depth; 289bf215546Sopenharmony_ci bool writes_stencil; 290bf215546Sopenharmony_ci bool writes_coverage; 291bf215546Sopenharmony_ci bool sidefx; 292bf215546Sopenharmony_ci bool sample_shading; 293bf215546Sopenharmony_ci bool early_fragment_tests; 294bf215546Sopenharmony_ci bool can_early_z, can_fpk; 295bf215546Sopenharmony_ci BITSET_WORD outputs_read; 296bf215546Sopenharmony_ci BITSET_WORD outputs_written; 297bf215546Sopenharmony_ci } fs; 298bf215546Sopenharmony_ci 299bf215546Sopenharmony_ci struct { 300bf215546Sopenharmony_ci bool writes_point_size; 301bf215546Sopenharmony_ci 302bf215546Sopenharmony_ci /* If the primary shader writes point size, the Valhall 303bf215546Sopenharmony_ci * driver may need a variant that does not write point 304bf215546Sopenharmony_ci * size. Offset to such a shader in the program binary. 305bf215546Sopenharmony_ci * 306bf215546Sopenharmony_ci * Zero if no such variant is required. 307bf215546Sopenharmony_ci * 308bf215546Sopenharmony_ci * Only used with IDVS on Valhall. 309bf215546Sopenharmony_ci */ 310bf215546Sopenharmony_ci unsigned no_psiz_offset; 311bf215546Sopenharmony_ci 312bf215546Sopenharmony_ci /* Set if Index-Driven Vertex Shading is in use */ 313bf215546Sopenharmony_ci bool idvs; 314bf215546Sopenharmony_ci 315bf215546Sopenharmony_ci /* If IDVS is used, whether a varying shader is used */ 316bf215546Sopenharmony_ci bool secondary_enable; 317bf215546Sopenharmony_ci 318bf215546Sopenharmony_ci /* If a varying shader is used, the varying shader's 319bf215546Sopenharmony_ci * offset in the program binary 320bf215546Sopenharmony_ci */ 321bf215546Sopenharmony_ci unsigned secondary_offset; 322bf215546Sopenharmony_ci 323bf215546Sopenharmony_ci /* If IDVS is in use, number of work registers used by 324bf215546Sopenharmony_ci * the varying shader 325bf215546Sopenharmony_ci */ 326bf215546Sopenharmony_ci unsigned secondary_work_reg_count; 327bf215546Sopenharmony_ci 328bf215546Sopenharmony_ci /* If IDVS is in use, bit mask of preloaded registers 329bf215546Sopenharmony_ci * used by the varying shader 330bf215546Sopenharmony_ci */ 331bf215546Sopenharmony_ci uint64_t secondary_preload; 332bf215546Sopenharmony_ci } vs; 333bf215546Sopenharmony_ci 334bf215546Sopenharmony_ci struct { 335bf215546Sopenharmony_ci /* Is it legal to merge workgroups? This is true if the 336bf215546Sopenharmony_ci * shader uses neither barriers nor shared memory. 337bf215546Sopenharmony_ci * 338bf215546Sopenharmony_ci * Used by the Valhall hardware. 339bf215546Sopenharmony_ci */ 340bf215546Sopenharmony_ci bool allow_merging_workgroups; 341bf215546Sopenharmony_ci } cs; 342bf215546Sopenharmony_ci }; 343bf215546Sopenharmony_ci 344bf215546Sopenharmony_ci /* Does the shader contains a barrier? or (for fragment shaders) does it 345bf215546Sopenharmony_ci * require helper invocations, which demand the same ordering guarantees 346bf215546Sopenharmony_ci * of the hardware? These notions are unified in the hardware, so we 347bf215546Sopenharmony_ci * unify them here as well. 348bf215546Sopenharmony_ci */ 349bf215546Sopenharmony_ci bool contains_barrier; 350bf215546Sopenharmony_ci bool separable; 351bf215546Sopenharmony_ci bool writes_global; 352bf215546Sopenharmony_ci uint64_t outputs_written; 353bf215546Sopenharmony_ci 354bf215546Sopenharmony_ci unsigned sampler_count; 355bf215546Sopenharmony_ci unsigned texture_count; 356bf215546Sopenharmony_ci unsigned ubo_count; 357bf215546Sopenharmony_ci unsigned attributes_read_count; 358bf215546Sopenharmony_ci unsigned attribute_count; 359bf215546Sopenharmony_ci unsigned attributes_read; 360bf215546Sopenharmony_ci 361bf215546Sopenharmony_ci struct { 362bf215546Sopenharmony_ci unsigned input_count; 363bf215546Sopenharmony_ci struct pan_shader_varying input[PAN_MAX_VARYINGS]; 364bf215546Sopenharmony_ci unsigned output_count; 365bf215546Sopenharmony_ci struct pan_shader_varying output[PAN_MAX_VARYINGS]; 366bf215546Sopenharmony_ci } varyings; 367bf215546Sopenharmony_ci 368bf215546Sopenharmony_ci struct panfrost_sysvals sysvals; 369bf215546Sopenharmony_ci 370bf215546Sopenharmony_ci /* UBOs to push to Register Mapped Uniforms (Midgard) or Fast Access 371bf215546Sopenharmony_ci * Uniforms (Bifrost) */ 372bf215546Sopenharmony_ci struct panfrost_ubo_push push; 373bf215546Sopenharmony_ci 374bf215546Sopenharmony_ci uint32_t ubo_mask; 375bf215546Sopenharmony_ci 376bf215546Sopenharmony_ci union { 377bf215546Sopenharmony_ci struct bifrost_shader_info bifrost; 378bf215546Sopenharmony_ci struct midgard_shader_info midgard; 379bf215546Sopenharmony_ci }; 380bf215546Sopenharmony_ci}; 381bf215546Sopenharmony_ci 382bf215546Sopenharmony_citypedef struct pan_block { 383bf215546Sopenharmony_ci /* Link to next block. Must be first for mir_get_block */ 384bf215546Sopenharmony_ci struct list_head link; 385bf215546Sopenharmony_ci 386bf215546Sopenharmony_ci /* List of instructions emitted for the current block */ 387bf215546Sopenharmony_ci struct list_head instructions; 388bf215546Sopenharmony_ci 389bf215546Sopenharmony_ci /* Index of the block in source order */ 390bf215546Sopenharmony_ci unsigned name; 391bf215546Sopenharmony_ci 392bf215546Sopenharmony_ci /* Control flow graph */ 393bf215546Sopenharmony_ci struct pan_block *successors[2]; 394bf215546Sopenharmony_ci struct set *predecessors; 395bf215546Sopenharmony_ci bool unconditional_jumps; 396bf215546Sopenharmony_ci 397bf215546Sopenharmony_ci /* In liveness analysis, these are live masks (per-component) for 398bf215546Sopenharmony_ci * indices for the block. Scalar compilers have the luxury of using 399bf215546Sopenharmony_ci * simple bit fields, but for us, liveness is a vector idea. */ 400bf215546Sopenharmony_ci uint16_t *live_in; 401bf215546Sopenharmony_ci uint16_t *live_out; 402bf215546Sopenharmony_ci} pan_block; 403bf215546Sopenharmony_ci 404bf215546Sopenharmony_cistruct pan_instruction { 405bf215546Sopenharmony_ci struct list_head link; 406bf215546Sopenharmony_ci}; 407bf215546Sopenharmony_ci 408bf215546Sopenharmony_ci#define pan_foreach_instr_in_block_rev(block, v) \ 409bf215546Sopenharmony_ci list_for_each_entry_rev(struct pan_instruction, v, &block->instructions, link) 410bf215546Sopenharmony_ci 411bf215546Sopenharmony_ci#define pan_foreach_successor(blk, v) \ 412bf215546Sopenharmony_ci pan_block *v; \ 413bf215546Sopenharmony_ci pan_block **_v; \ 414bf215546Sopenharmony_ci for (_v = (pan_block **) &blk->successors[0], \ 415bf215546Sopenharmony_ci v = *_v; \ 416bf215546Sopenharmony_ci v != NULL && _v < (pan_block **) &blk->successors[2]; \ 417bf215546Sopenharmony_ci _v++, v = *_v) \ 418bf215546Sopenharmony_ci 419bf215546Sopenharmony_ci#define pan_foreach_predecessor(blk, v) \ 420bf215546Sopenharmony_ci struct set_entry *_entry_##v; \ 421bf215546Sopenharmony_ci struct pan_block *v; \ 422bf215546Sopenharmony_ci for (_entry_##v = _mesa_set_next_entry(blk->predecessors, NULL), \ 423bf215546Sopenharmony_ci v = (struct pan_block *) (_entry_##v ? _entry_##v->key : NULL); \ 424bf215546Sopenharmony_ci _entry_##v != NULL; \ 425bf215546Sopenharmony_ci _entry_##v = _mesa_set_next_entry(blk->predecessors, _entry_##v), \ 426bf215546Sopenharmony_ci v = (struct pan_block *) (_entry_##v ? _entry_##v->key : NULL)) 427bf215546Sopenharmony_ci 428bf215546Sopenharmony_cistatic inline pan_block * 429bf215546Sopenharmony_cipan_exit_block(struct list_head *blocks) 430bf215546Sopenharmony_ci{ 431bf215546Sopenharmony_ci pan_block *last = list_last_entry(blocks, pan_block, link); 432bf215546Sopenharmony_ci assert(!last->successors[0] && !last->successors[1]); 433bf215546Sopenharmony_ci return last; 434bf215546Sopenharmony_ci} 435bf215546Sopenharmony_ci 436bf215546Sopenharmony_citypedef void (*pan_liveness_update)(uint16_t *, void *, unsigned max); 437bf215546Sopenharmony_ci 438bf215546Sopenharmony_civoid pan_liveness_gen(uint16_t *live, unsigned node, unsigned max, uint16_t mask); 439bf215546Sopenharmony_civoid pan_liveness_kill(uint16_t *live, unsigned node, unsigned max, uint16_t mask); 440bf215546Sopenharmony_cibool pan_liveness_get(uint16_t *live, unsigned node, uint16_t max); 441bf215546Sopenharmony_ci 442bf215546Sopenharmony_civoid pan_compute_liveness(struct list_head *blocks, 443bf215546Sopenharmony_ci unsigned temp_count, 444bf215546Sopenharmony_ci pan_liveness_update callback); 445bf215546Sopenharmony_ci 446bf215546Sopenharmony_civoid pan_free_liveness(struct list_head *blocks); 447bf215546Sopenharmony_ci 448bf215546Sopenharmony_ciuint16_t 449bf215546Sopenharmony_cipan_to_bytemask(unsigned bytes, unsigned mask); 450bf215546Sopenharmony_ci 451bf215546Sopenharmony_civoid pan_block_add_successor(pan_block *block, pan_block *successor); 452bf215546Sopenharmony_ci 453bf215546Sopenharmony_ci/* IR indexing */ 454bf215546Sopenharmony_ci#define PAN_IS_REG (1) 455bf215546Sopenharmony_ci 456bf215546Sopenharmony_cistatic inline unsigned 457bf215546Sopenharmony_cipan_ssa_index(nir_ssa_def *ssa) 458bf215546Sopenharmony_ci{ 459bf215546Sopenharmony_ci /* Off-by-one ensures BIR_NO_ARG is skipped */ 460bf215546Sopenharmony_ci return ((ssa->index + 1) << 1) | 0; 461bf215546Sopenharmony_ci} 462bf215546Sopenharmony_ci 463bf215546Sopenharmony_cistatic inline unsigned 464bf215546Sopenharmony_cipan_src_index(nir_src *src) 465bf215546Sopenharmony_ci{ 466bf215546Sopenharmony_ci if (src->is_ssa) 467bf215546Sopenharmony_ci return pan_ssa_index(src->ssa); 468bf215546Sopenharmony_ci else { 469bf215546Sopenharmony_ci assert(!src->reg.indirect); 470bf215546Sopenharmony_ci return (src->reg.reg->index << 1) | PAN_IS_REG; 471bf215546Sopenharmony_ci } 472bf215546Sopenharmony_ci} 473bf215546Sopenharmony_ci 474bf215546Sopenharmony_cistatic inline unsigned 475bf215546Sopenharmony_cipan_dest_index(nir_dest *dst) 476bf215546Sopenharmony_ci{ 477bf215546Sopenharmony_ci if (dst->is_ssa) 478bf215546Sopenharmony_ci return pan_ssa_index(&dst->ssa); 479bf215546Sopenharmony_ci else { 480bf215546Sopenharmony_ci assert(!dst->reg.indirect); 481bf215546Sopenharmony_ci return (dst->reg.reg->index << 1) | PAN_IS_REG; 482bf215546Sopenharmony_ci } 483bf215546Sopenharmony_ci} 484bf215546Sopenharmony_ci 485bf215546Sopenharmony_ci/* IR printing helpers */ 486bf215546Sopenharmony_civoid pan_print_alu_type(nir_alu_type t, FILE *fp); 487bf215546Sopenharmony_ci 488bf215546Sopenharmony_ci/* Until it can be upstreamed.. */ 489bf215546Sopenharmony_cibool pan_has_source_mod(nir_alu_src *src, nir_op op); 490bf215546Sopenharmony_cibool pan_has_dest_mod(nir_dest **dest, nir_op op); 491bf215546Sopenharmony_ci 492bf215546Sopenharmony_ci/* NIR passes to do some backend-specific lowering */ 493bf215546Sopenharmony_ci 494bf215546Sopenharmony_ci#define PAN_WRITEOUT_C 1 495bf215546Sopenharmony_ci#define PAN_WRITEOUT_Z 2 496bf215546Sopenharmony_ci#define PAN_WRITEOUT_S 4 497bf215546Sopenharmony_ci#define PAN_WRITEOUT_2 8 498bf215546Sopenharmony_ci 499bf215546Sopenharmony_cibool pan_nir_lower_zs_store(nir_shader *nir); 500bf215546Sopenharmony_ci 501bf215546Sopenharmony_cibool pan_nir_lower_64bit_intrin(nir_shader *shader); 502bf215546Sopenharmony_ci 503bf215546Sopenharmony_cibool pan_lower_helper_invocation(nir_shader *shader); 504bf215546Sopenharmony_cibool pan_lower_sample_pos(nir_shader *shader); 505bf215546Sopenharmony_cibool pan_lower_xfb(nir_shader *nir); 506bf215546Sopenharmony_ci 507bf215546Sopenharmony_ci/* 508bf215546Sopenharmony_ci * Helper returning the subgroup size. Generally, this is equal to the number of 509bf215546Sopenharmony_ci * threads in a warp. For Midgard (including warping models), this returns 1, as 510bf215546Sopenharmony_ci * subgroups are not supported. 511bf215546Sopenharmony_ci */ 512bf215546Sopenharmony_cistatic inline unsigned 513bf215546Sopenharmony_cipan_subgroup_size(unsigned arch) 514bf215546Sopenharmony_ci{ 515bf215546Sopenharmony_ci if (arch >= 9) 516bf215546Sopenharmony_ci return 16; 517bf215546Sopenharmony_ci else if (arch >= 7) 518bf215546Sopenharmony_ci return 8; 519bf215546Sopenharmony_ci else if (arch >= 6) 520bf215546Sopenharmony_ci return 4; 521bf215546Sopenharmony_ci else 522bf215546Sopenharmony_ci return 1; 523bf215546Sopenharmony_ci} 524bf215546Sopenharmony_ci 525bf215546Sopenharmony_ci#endif 526