1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org> 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21bf215546Sopenharmony_ci * SOFTWARE. 22bf215546Sopenharmony_ci * 23bf215546Sopenharmony_ci * Authors: 24bf215546Sopenharmony_ci * Rob Clark <robclark@freedesktop.org> 25bf215546Sopenharmony_ci */ 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci#ifndef IR3_SHADER_H_ 28bf215546Sopenharmony_ci#define IR3_SHADER_H_ 29bf215546Sopenharmony_ci 30bf215546Sopenharmony_ci#include <stdio.h> 31bf215546Sopenharmony_ci 32bf215546Sopenharmony_ci#include "c11/threads.h" 33bf215546Sopenharmony_ci#include "compiler/nir/nir.h" 34bf215546Sopenharmony_ci#include "compiler/shader_enums.h" 35bf215546Sopenharmony_ci#include "util/bitscan.h" 36bf215546Sopenharmony_ci#include "util/disk_cache.h" 37bf215546Sopenharmony_ci 38bf215546Sopenharmony_ci#include "ir3_compiler.h" 39bf215546Sopenharmony_ci 40bf215546Sopenharmony_ci/* driver param indices: */ 41bf215546Sopenharmony_cienum ir3_driver_param { 42bf215546Sopenharmony_ci /* compute shader driver params: */ 43bf215546Sopenharmony_ci IR3_DP_NUM_WORK_GROUPS_X = 0, 44bf215546Sopenharmony_ci IR3_DP_NUM_WORK_GROUPS_Y = 1, 45bf215546Sopenharmony_ci IR3_DP_NUM_WORK_GROUPS_Z = 2, 46bf215546Sopenharmony_ci IR3_DP_WORK_DIM = 3, 47bf215546Sopenharmony_ci IR3_DP_BASE_GROUP_X = 4, 48bf215546Sopenharmony_ci IR3_DP_BASE_GROUP_Y = 5, 49bf215546Sopenharmony_ci IR3_DP_BASE_GROUP_Z = 6, 50bf215546Sopenharmony_ci IR3_DP_CS_SUBGROUP_SIZE = 7, 51bf215546Sopenharmony_ci IR3_DP_LOCAL_GROUP_SIZE_X = 8, 52bf215546Sopenharmony_ci IR3_DP_LOCAL_GROUP_SIZE_Y = 9, 53bf215546Sopenharmony_ci IR3_DP_LOCAL_GROUP_SIZE_Z = 10, 54bf215546Sopenharmony_ci IR3_DP_SUBGROUP_ID_SHIFT = 11, 55bf215546Sopenharmony_ci IR3_DP_WORKGROUP_ID_X = 12, 56bf215546Sopenharmony_ci IR3_DP_WORKGROUP_ID_Y = 13, 57bf215546Sopenharmony_ci IR3_DP_WORKGROUP_ID_Z = 14, 58bf215546Sopenharmony_ci /* NOTE: gl_NumWorkGroups should be vec4 aligned because 59bf215546Sopenharmony_ci * glDispatchComputeIndirect() needs to load these from 60bf215546Sopenharmony_ci * the info->indirect buffer. Keep that in mind when/if 61bf215546Sopenharmony_ci * adding any addition CS driver params. 62bf215546Sopenharmony_ci */ 63bf215546Sopenharmony_ci IR3_DP_CS_COUNT = 16, /* must be aligned to vec4 */ 64bf215546Sopenharmony_ci 65bf215546Sopenharmony_ci /* vertex shader driver params: */ 66bf215546Sopenharmony_ci IR3_DP_DRAWID = 0, 67bf215546Sopenharmony_ci IR3_DP_VTXID_BASE = 1, 68bf215546Sopenharmony_ci IR3_DP_INSTID_BASE = 2, 69bf215546Sopenharmony_ci IR3_DP_VTXCNT_MAX = 3, 70bf215546Sopenharmony_ci /* user-clip-plane components, up to 8x vec4's: */ 71bf215546Sopenharmony_ci IR3_DP_UCP0_X = 4, 72bf215546Sopenharmony_ci /* .... */ 73bf215546Sopenharmony_ci IR3_DP_UCP7_W = 35, 74bf215546Sopenharmony_ci IR3_DP_VS_COUNT = 36, /* must be aligned to vec4 */ 75bf215546Sopenharmony_ci 76bf215546Sopenharmony_ci /* fragment shader driver params: */ 77bf215546Sopenharmony_ci IR3_DP_FS_SUBGROUP_SIZE = 0, 78bf215546Sopenharmony_ci}; 79bf215546Sopenharmony_ci 80bf215546Sopenharmony_ci#define IR3_MAX_SHADER_BUFFERS 32 81bf215546Sopenharmony_ci#define IR3_MAX_SHADER_IMAGES 32 82bf215546Sopenharmony_ci#define IR3_MAX_SO_BUFFERS 4 83bf215546Sopenharmony_ci#define IR3_MAX_SO_STREAMS 4 84bf215546Sopenharmony_ci#define IR3_MAX_SO_OUTPUTS 64 85bf215546Sopenharmony_ci#define IR3_MAX_UBO_PUSH_RANGES 32 86bf215546Sopenharmony_ci 87bf215546Sopenharmony_ci/* mirrors SYSTEM_VALUE_BARYCENTRIC_ but starting from 0 */ 88bf215546Sopenharmony_cienum ir3_bary { 89bf215546Sopenharmony_ci IJ_PERSP_PIXEL, 90bf215546Sopenharmony_ci IJ_PERSP_SAMPLE, 91bf215546Sopenharmony_ci IJ_PERSP_CENTROID, 92bf215546Sopenharmony_ci IJ_PERSP_CENTER_RHW, 93bf215546Sopenharmony_ci IJ_LINEAR_PIXEL, 94bf215546Sopenharmony_ci IJ_LINEAR_CENTROID, 95bf215546Sopenharmony_ci IJ_LINEAR_SAMPLE, 96bf215546Sopenharmony_ci IJ_COUNT, 97bf215546Sopenharmony_ci}; 98bf215546Sopenharmony_ci 99bf215546Sopenharmony_ci/* Description of what wavesizes are allowed. */ 100bf215546Sopenharmony_cienum ir3_wavesize_option { 101bf215546Sopenharmony_ci IR3_SINGLE_ONLY, 102bf215546Sopenharmony_ci IR3_SINGLE_OR_DOUBLE, 103bf215546Sopenharmony_ci IR3_DOUBLE_ONLY, 104bf215546Sopenharmony_ci}; 105bf215546Sopenharmony_ci 106bf215546Sopenharmony_ci/** 107bf215546Sopenharmony_ci * Description of a lowered UBO. 108bf215546Sopenharmony_ci */ 109bf215546Sopenharmony_cistruct ir3_ubo_info { 110bf215546Sopenharmony_ci uint32_t block; /* Which constant block */ 111bf215546Sopenharmony_ci uint16_t bindless_base; /* For bindless, which base register is used */ 112bf215546Sopenharmony_ci bool bindless; 113bf215546Sopenharmony_ci}; 114bf215546Sopenharmony_ci 115bf215546Sopenharmony_ci/** 116bf215546Sopenharmony_ci * Description of a range of a lowered UBO access. 117bf215546Sopenharmony_ci * 118bf215546Sopenharmony_ci * Drivers should not assume that there are not multiple disjoint 119bf215546Sopenharmony_ci * lowered ranges of a single UBO. 120bf215546Sopenharmony_ci */ 121bf215546Sopenharmony_cistruct ir3_ubo_range { 122bf215546Sopenharmony_ci struct ir3_ubo_info ubo; 123bf215546Sopenharmony_ci uint32_t offset; /* start offset to push in the const register file */ 124bf215546Sopenharmony_ci uint32_t start, end; /* range of block that's actually used */ 125bf215546Sopenharmony_ci}; 126bf215546Sopenharmony_ci 127bf215546Sopenharmony_cistruct ir3_ubo_analysis_state { 128bf215546Sopenharmony_ci struct ir3_ubo_range range[IR3_MAX_UBO_PUSH_RANGES]; 129bf215546Sopenharmony_ci uint32_t num_enabled; 130bf215546Sopenharmony_ci uint32_t size; 131bf215546Sopenharmony_ci uint32_t 132bf215546Sopenharmony_ci cmdstream_size; /* for per-gen backend to stash required cmdstream size */ 133bf215546Sopenharmony_ci}; 134bf215546Sopenharmony_ci 135bf215546Sopenharmony_ci/** 136bf215546Sopenharmony_ci * Describes the layout of shader consts in the const register file. 137bf215546Sopenharmony_ci * 138bf215546Sopenharmony_ci * Layout of constant registers, each section aligned to vec4. Note 139bf215546Sopenharmony_ci * that pointer size (ubo, etc) changes depending on generation. 140bf215546Sopenharmony_ci * 141bf215546Sopenharmony_ci * + user consts: only used for turnip push consts 142bf215546Sopenharmony_ci * + lowered UBO ranges 143bf215546Sopenharmony_ci * + preamble consts 144bf215546Sopenharmony_ci * + UBO addresses: turnip is bindless and these are wasted 145bf215546Sopenharmony_ci * + image dimensions: a5xx only; needed to calculate pixel offset, but only 146bf215546Sopenharmony_ci * for images that have image_{load,store,size,atomic*} intrinsics 147bf215546Sopenharmony_ci * + kernel params: cl only 148bf215546Sopenharmony_ci * + driver params: these are stage-dependent; see ir3_driver_param 149bf215546Sopenharmony_ci * + TFBO addresses: only for vs on a3xx/a4xx 150bf215546Sopenharmony_ci * + primitive params: these are stage-dependent 151bf215546Sopenharmony_ci * vs, gs: uvec4(primitive_stride, vertex_stride, 0, 0) 152bf215546Sopenharmony_ci * hs, ds: uvec4(primitive_stride, vertex_stride, 153bf215546Sopenharmony_ci * patch_stride, patch_vertices_in) 154bf215546Sopenharmony_ci * uvec4(tess_param_base, tess_factor_base) 155bf215546Sopenharmony_ci * + primitive map 156bf215546Sopenharmony_ci * + lowered immediates 157bf215546Sopenharmony_ci * 158bf215546Sopenharmony_ci * Immediates go last mostly because they are inserted in the CP pass 159bf215546Sopenharmony_ci * after the nir -> ir3 frontend. 160bf215546Sopenharmony_ci * 161bf215546Sopenharmony_ci * Note UBO size in bytes should be aligned to vec4 162bf215546Sopenharmony_ci */ 163bf215546Sopenharmony_cistruct ir3_const_state { 164bf215546Sopenharmony_ci unsigned num_ubos; 165bf215546Sopenharmony_ci unsigned num_driver_params; /* scalar */ 166bf215546Sopenharmony_ci 167bf215546Sopenharmony_ci /* UBO that should be mapped to the NIR shader's constant_data (or -1). */ 168bf215546Sopenharmony_ci int32_t constant_data_ubo; 169bf215546Sopenharmony_ci 170bf215546Sopenharmony_ci struct { 171bf215546Sopenharmony_ci /* user const start at zero */ 172bf215546Sopenharmony_ci unsigned ubo; 173bf215546Sopenharmony_ci unsigned image_dims; 174bf215546Sopenharmony_ci unsigned kernel_params; 175bf215546Sopenharmony_ci unsigned driver_param; 176bf215546Sopenharmony_ci unsigned tfbo; 177bf215546Sopenharmony_ci unsigned primitive_param; 178bf215546Sopenharmony_ci unsigned primitive_map; 179bf215546Sopenharmony_ci unsigned immediate; 180bf215546Sopenharmony_ci } offsets; 181bf215546Sopenharmony_ci 182bf215546Sopenharmony_ci struct { 183bf215546Sopenharmony_ci uint32_t mask; /* bitmask of images that have image_store */ 184bf215546Sopenharmony_ci uint32_t count; /* number of consts allocated */ 185bf215546Sopenharmony_ci /* three const allocated per image which has image_store: 186bf215546Sopenharmony_ci * + cpp (bytes per pixel) 187bf215546Sopenharmony_ci * + pitch (y pitch) 188bf215546Sopenharmony_ci * + array_pitch (z pitch) 189bf215546Sopenharmony_ci */ 190bf215546Sopenharmony_ci uint32_t off[IR3_MAX_SHADER_IMAGES]; 191bf215546Sopenharmony_ci } image_dims; 192bf215546Sopenharmony_ci 193bf215546Sopenharmony_ci unsigned immediates_count; 194bf215546Sopenharmony_ci unsigned immediates_size; 195bf215546Sopenharmony_ci uint32_t *immediates; 196bf215546Sopenharmony_ci 197bf215546Sopenharmony_ci unsigned preamble_size; 198bf215546Sopenharmony_ci 199bf215546Sopenharmony_ci /* State of ubo access lowered to push consts: */ 200bf215546Sopenharmony_ci struct ir3_ubo_analysis_state ubo_state; 201bf215546Sopenharmony_ci bool shared_consts_enable; 202bf215546Sopenharmony_ci}; 203bf215546Sopenharmony_ci 204bf215546Sopenharmony_ci/** 205bf215546Sopenharmony_ci * A single output for vertex transform feedback. 206bf215546Sopenharmony_ci */ 207bf215546Sopenharmony_cistruct ir3_stream_output { 208bf215546Sopenharmony_ci unsigned register_index : 6; /**< 0 to 63 (OUT index) */ 209bf215546Sopenharmony_ci unsigned start_component : 2; /** 0 to 3 */ 210bf215546Sopenharmony_ci unsigned num_components : 3; /** 1 to 4 */ 211bf215546Sopenharmony_ci unsigned output_buffer : 3; /**< 0 to PIPE_MAX_SO_BUFFERS */ 212bf215546Sopenharmony_ci unsigned dst_offset : 16; /**< offset into the buffer in dwords */ 213bf215546Sopenharmony_ci unsigned stream : 2; /**< 0 to 3 */ 214bf215546Sopenharmony_ci}; 215bf215546Sopenharmony_ci 216bf215546Sopenharmony_ci/** 217bf215546Sopenharmony_ci * Stream output for vertex transform feedback. 218bf215546Sopenharmony_ci */ 219bf215546Sopenharmony_cistruct ir3_stream_output_info { 220bf215546Sopenharmony_ci unsigned num_outputs; 221bf215546Sopenharmony_ci /** stride for an entire vertex for each buffer in dwords */ 222bf215546Sopenharmony_ci uint16_t stride[IR3_MAX_SO_BUFFERS]; 223bf215546Sopenharmony_ci 224bf215546Sopenharmony_ci /* These correspond to the VPC_SO_STREAM_CNTL fields */ 225bf215546Sopenharmony_ci uint8_t streams_written; 226bf215546Sopenharmony_ci uint8_t buffer_to_stream[IR3_MAX_SO_BUFFERS]; 227bf215546Sopenharmony_ci 228bf215546Sopenharmony_ci /** 229bf215546Sopenharmony_ci * Array of stream outputs, in the order they are to be written in. 230bf215546Sopenharmony_ci * Selected components are tightly packed into the output buffer. 231bf215546Sopenharmony_ci */ 232bf215546Sopenharmony_ci struct ir3_stream_output output[IR3_MAX_SO_OUTPUTS]; 233bf215546Sopenharmony_ci}; 234bf215546Sopenharmony_ci 235bf215546Sopenharmony_ci/** 236bf215546Sopenharmony_ci * Starting from a4xx, HW supports pre-dispatching texture sampling 237bf215546Sopenharmony_ci * instructions prior to scheduling a shader stage, when the 238bf215546Sopenharmony_ci * coordinate maps exactly to an output of the previous stage. 239bf215546Sopenharmony_ci */ 240bf215546Sopenharmony_ci 241bf215546Sopenharmony_ci/** 242bf215546Sopenharmony_ci * There is a limit in the number of pre-dispatches allowed for any 243bf215546Sopenharmony_ci * given stage. 244bf215546Sopenharmony_ci */ 245bf215546Sopenharmony_ci#define IR3_MAX_SAMPLER_PREFETCH 4 246bf215546Sopenharmony_ci 247bf215546Sopenharmony_ci/** 248bf215546Sopenharmony_ci * This is the output stream value for 'cmd', as used by blob. It may 249bf215546Sopenharmony_ci * encode the return type (in 3 bits) but it hasn't been verified yet. 250bf215546Sopenharmony_ci */ 251bf215546Sopenharmony_ci#define IR3_SAMPLER_PREFETCH_CMD 0x4 252bf215546Sopenharmony_ci#define IR3_SAMPLER_BINDLESS_PREFETCH_CMD 0x6 253bf215546Sopenharmony_ci 254bf215546Sopenharmony_ci/** 255bf215546Sopenharmony_ci * Stream output for texture sampling pre-dispatches. 256bf215546Sopenharmony_ci */ 257bf215546Sopenharmony_cistruct ir3_sampler_prefetch { 258bf215546Sopenharmony_ci uint8_t src; 259bf215546Sopenharmony_ci uint8_t samp_id; 260bf215546Sopenharmony_ci uint8_t tex_id; 261bf215546Sopenharmony_ci uint16_t samp_bindless_id; 262bf215546Sopenharmony_ci uint16_t tex_bindless_id; 263bf215546Sopenharmony_ci uint8_t dst; 264bf215546Sopenharmony_ci uint8_t wrmask; 265bf215546Sopenharmony_ci uint8_t half_precision; 266bf215546Sopenharmony_ci uint8_t cmd; 267bf215546Sopenharmony_ci}; 268bf215546Sopenharmony_ci 269bf215546Sopenharmony_ci/* Configuration key used to identify a shader variant.. different 270bf215546Sopenharmony_ci * shader variants can be used to implement features not supported 271bf215546Sopenharmony_ci * in hw (two sided color), binning-pass vertex shader, etc. 272bf215546Sopenharmony_ci * 273bf215546Sopenharmony_ci * When adding to this struct, please update ir3_shader_variant()'s debug 274bf215546Sopenharmony_ci * output. 275bf215546Sopenharmony_ci */ 276bf215546Sopenharmony_cistruct ir3_shader_key { 277bf215546Sopenharmony_ci union { 278bf215546Sopenharmony_ci struct { 279bf215546Sopenharmony_ci /* 280bf215546Sopenharmony_ci * Combined Vertex/Fragment shader parameters: 281bf215546Sopenharmony_ci */ 282bf215546Sopenharmony_ci unsigned ucp_enables : 8; 283bf215546Sopenharmony_ci 284bf215546Sopenharmony_ci /* do we need to check {v,f}saturate_{s,t,r}? */ 285bf215546Sopenharmony_ci unsigned has_per_samp : 1; 286bf215546Sopenharmony_ci 287bf215546Sopenharmony_ci /* 288bf215546Sopenharmony_ci * Fragment shader variant parameters: 289bf215546Sopenharmony_ci */ 290bf215546Sopenharmony_ci unsigned sample_shading : 1; 291bf215546Sopenharmony_ci unsigned msaa : 1; 292bf215546Sopenharmony_ci /* used when shader needs to handle flat varyings (a4xx) 293bf215546Sopenharmony_ci * for front/back color inputs to frag shader: 294bf215546Sopenharmony_ci */ 295bf215546Sopenharmony_ci unsigned rasterflat : 1; 296bf215546Sopenharmony_ci 297bf215546Sopenharmony_ci /* Indicates that this is a tessellation pipeline which requires a 298bf215546Sopenharmony_ci * whole different kind of vertex shader. In case of 299bf215546Sopenharmony_ci * tessellation, this field also tells us which kind of output 300bf215546Sopenharmony_ci * topology the TES uses, which the TCS needs to know. 301bf215546Sopenharmony_ci */ 302bf215546Sopenharmony_ci#define IR3_TESS_NONE 0 303bf215546Sopenharmony_ci#define IR3_TESS_QUADS 1 304bf215546Sopenharmony_ci#define IR3_TESS_TRIANGLES 2 305bf215546Sopenharmony_ci#define IR3_TESS_ISOLINES 3 306bf215546Sopenharmony_ci unsigned tessellation : 2; 307bf215546Sopenharmony_ci 308bf215546Sopenharmony_ci unsigned has_gs : 1; 309bf215546Sopenharmony_ci 310bf215546Sopenharmony_ci /* Whether stages after TCS read gl_PrimitiveID, used to determine 311bf215546Sopenharmony_ci * whether the TCS has to store it in the tess factor BO. 312bf215546Sopenharmony_ci */ 313bf215546Sopenharmony_ci unsigned tcs_store_primid : 1; 314bf215546Sopenharmony_ci 315bf215546Sopenharmony_ci /* Whether this variant sticks to the "safe" maximum constlen, 316bf215546Sopenharmony_ci * which guarantees that the combined stages will never go over 317bf215546Sopenharmony_ci * the limit: 318bf215546Sopenharmony_ci */ 319bf215546Sopenharmony_ci unsigned safe_constlen : 1; 320bf215546Sopenharmony_ci 321bf215546Sopenharmony_ci /* Whether gl_Layer must be forced to 0 because it isn't written. */ 322bf215546Sopenharmony_ci unsigned layer_zero : 1; 323bf215546Sopenharmony_ci 324bf215546Sopenharmony_ci /* Whether gl_ViewportIndex must be forced to 0 because it isn't 325bf215546Sopenharmony_ci * written. */ 326bf215546Sopenharmony_ci unsigned view_zero : 1; 327bf215546Sopenharmony_ci }; 328bf215546Sopenharmony_ci uint32_t global; 329bf215546Sopenharmony_ci }; 330bf215546Sopenharmony_ci 331bf215546Sopenharmony_ci /* bitmask of ms shifts (a3xx) */ 332bf215546Sopenharmony_ci uint32_t vsamples, fsamples; 333bf215546Sopenharmony_ci 334bf215546Sopenharmony_ci /* bitmask of samplers which need astc srgb workaround (a4xx): */ 335bf215546Sopenharmony_ci uint16_t vastc_srgb, fastc_srgb; 336bf215546Sopenharmony_ci 337bf215546Sopenharmony_ci /* per-component (3-bit) swizzles of each sampler (a4xx tg4): */ 338bf215546Sopenharmony_ci uint16_t vsampler_swizzles[16]; 339bf215546Sopenharmony_ci uint16_t fsampler_swizzles[16]; 340bf215546Sopenharmony_ci}; 341bf215546Sopenharmony_ci 342bf215546Sopenharmony_cistatic inline unsigned 343bf215546Sopenharmony_ciir3_tess_mode(enum tess_primitive_mode tess_mode) 344bf215546Sopenharmony_ci{ 345bf215546Sopenharmony_ci switch (tess_mode) { 346bf215546Sopenharmony_ci case TESS_PRIMITIVE_ISOLINES: 347bf215546Sopenharmony_ci return IR3_TESS_ISOLINES; 348bf215546Sopenharmony_ci case TESS_PRIMITIVE_TRIANGLES: 349bf215546Sopenharmony_ci return IR3_TESS_TRIANGLES; 350bf215546Sopenharmony_ci case TESS_PRIMITIVE_QUADS: 351bf215546Sopenharmony_ci return IR3_TESS_QUADS; 352bf215546Sopenharmony_ci default: 353bf215546Sopenharmony_ci unreachable("bad tessmode"); 354bf215546Sopenharmony_ci } 355bf215546Sopenharmony_ci} 356bf215546Sopenharmony_ci 357bf215546Sopenharmony_cistatic inline uint32_t 358bf215546Sopenharmony_ciir3_tess_factor_stride(unsigned patch_type) 359bf215546Sopenharmony_ci{ 360bf215546Sopenharmony_ci /* note: this matches the stride used by ir3's build_tessfactor_base */ 361bf215546Sopenharmony_ci switch (patch_type) { 362bf215546Sopenharmony_ci case IR3_TESS_ISOLINES: 363bf215546Sopenharmony_ci return 12; 364bf215546Sopenharmony_ci case IR3_TESS_TRIANGLES: 365bf215546Sopenharmony_ci return 20; 366bf215546Sopenharmony_ci case IR3_TESS_QUADS: 367bf215546Sopenharmony_ci return 28; 368bf215546Sopenharmony_ci default: 369bf215546Sopenharmony_ci unreachable("bad tessmode"); 370bf215546Sopenharmony_ci } 371bf215546Sopenharmony_ci} 372bf215546Sopenharmony_ci 373bf215546Sopenharmony_cistatic inline bool 374bf215546Sopenharmony_ciir3_shader_key_equal(const struct ir3_shader_key *a, 375bf215546Sopenharmony_ci const struct ir3_shader_key *b) 376bf215546Sopenharmony_ci{ 377bf215546Sopenharmony_ci /* slow-path if we need to check {v,f}saturate_{s,t,r} */ 378bf215546Sopenharmony_ci if (a->has_per_samp || b->has_per_samp) 379bf215546Sopenharmony_ci return memcmp(a, b, sizeof(struct ir3_shader_key)) == 0; 380bf215546Sopenharmony_ci return a->global == b->global; 381bf215546Sopenharmony_ci} 382bf215546Sopenharmony_ci 383bf215546Sopenharmony_ci/* will the two keys produce different lowering for a fragment shader? */ 384bf215546Sopenharmony_cistatic inline bool 385bf215546Sopenharmony_ciir3_shader_key_changes_fs(struct ir3_shader_key *key, 386bf215546Sopenharmony_ci struct ir3_shader_key *last_key) 387bf215546Sopenharmony_ci{ 388bf215546Sopenharmony_ci if (last_key->has_per_samp || key->has_per_samp) { 389bf215546Sopenharmony_ci if ((last_key->fsamples != key->fsamples) || 390bf215546Sopenharmony_ci (last_key->fastc_srgb != key->fastc_srgb) || 391bf215546Sopenharmony_ci memcmp(last_key->fsampler_swizzles, key->fsampler_swizzles, 392bf215546Sopenharmony_ci sizeof(key->fsampler_swizzles))) 393bf215546Sopenharmony_ci return true; 394bf215546Sopenharmony_ci } 395bf215546Sopenharmony_ci 396bf215546Sopenharmony_ci if (last_key->rasterflat != key->rasterflat) 397bf215546Sopenharmony_ci return true; 398bf215546Sopenharmony_ci 399bf215546Sopenharmony_ci if (last_key->layer_zero != key->layer_zero) 400bf215546Sopenharmony_ci return true; 401bf215546Sopenharmony_ci 402bf215546Sopenharmony_ci if (last_key->ucp_enables != key->ucp_enables) 403bf215546Sopenharmony_ci return true; 404bf215546Sopenharmony_ci 405bf215546Sopenharmony_ci if (last_key->safe_constlen != key->safe_constlen) 406bf215546Sopenharmony_ci return true; 407bf215546Sopenharmony_ci 408bf215546Sopenharmony_ci return false; 409bf215546Sopenharmony_ci} 410bf215546Sopenharmony_ci 411bf215546Sopenharmony_ci/* will the two keys produce different lowering for a vertex shader? */ 412bf215546Sopenharmony_cistatic inline bool 413bf215546Sopenharmony_ciir3_shader_key_changes_vs(struct ir3_shader_key *key, 414bf215546Sopenharmony_ci struct ir3_shader_key *last_key) 415bf215546Sopenharmony_ci{ 416bf215546Sopenharmony_ci if (last_key->has_per_samp || key->has_per_samp) { 417bf215546Sopenharmony_ci if ((last_key->vsamples != key->vsamples) || 418bf215546Sopenharmony_ci (last_key->vastc_srgb != key->vastc_srgb) || 419bf215546Sopenharmony_ci memcmp(last_key->vsampler_swizzles, key->vsampler_swizzles, 420bf215546Sopenharmony_ci sizeof(key->vsampler_swizzles))) 421bf215546Sopenharmony_ci return true; 422bf215546Sopenharmony_ci } 423bf215546Sopenharmony_ci 424bf215546Sopenharmony_ci if (last_key->ucp_enables != key->ucp_enables) 425bf215546Sopenharmony_ci return true; 426bf215546Sopenharmony_ci 427bf215546Sopenharmony_ci if (last_key->safe_constlen != key->safe_constlen) 428bf215546Sopenharmony_ci return true; 429bf215546Sopenharmony_ci 430bf215546Sopenharmony_ci return false; 431bf215546Sopenharmony_ci} 432bf215546Sopenharmony_ci 433bf215546Sopenharmony_ci/** 434bf215546Sopenharmony_ci * On a4xx+a5xx, Images share state with textures and SSBOs: 435bf215546Sopenharmony_ci * 436bf215546Sopenharmony_ci * + Uses texture (cat5) state/instruction (isam) to read 437bf215546Sopenharmony_ci * + Uses SSBO state and instructions (cat6) to write and for atomics 438bf215546Sopenharmony_ci * 439bf215546Sopenharmony_ci * Starting with a6xx, Images and SSBOs are basically the same thing, 440bf215546Sopenharmony_ci * with texture state and isam also used for SSBO reads. 441bf215546Sopenharmony_ci * 442bf215546Sopenharmony_ci * On top of that, gallium makes the SSBO (shader_buffers) state semi 443bf215546Sopenharmony_ci * sparse, with the first half of the state space used for atomic 444bf215546Sopenharmony_ci * counters lowered to atomic buffers. We could ignore this, but I 445bf215546Sopenharmony_ci * don't think we could *really* handle the case of a single shader 446bf215546Sopenharmony_ci * that used the max # of textures + images + SSBOs. And once we are 447bf215546Sopenharmony_ci * offsetting images by num_ssbos (or visa versa) to map them into 448bf215546Sopenharmony_ci * the same hardware state, the hardware state has become coupled to 449bf215546Sopenharmony_ci * the shader state, so at this point we might as well just use a 450bf215546Sopenharmony_ci * mapping table to remap things from image/SSBO idx to hw idx. 451bf215546Sopenharmony_ci * 452bf215546Sopenharmony_ci * To make things less (more?) confusing, for the hw "SSBO" state 453bf215546Sopenharmony_ci * (since it is really both SSBO and Image) I'll use the name "IBO" 454bf215546Sopenharmony_ci */ 455bf215546Sopenharmony_cistruct ir3_ibo_mapping { 456bf215546Sopenharmony_ci#define IBO_INVALID 0xff 457bf215546Sopenharmony_ci /* Maps logical SSBO state to hw tex state: */ 458bf215546Sopenharmony_ci uint8_t ssbo_to_tex[IR3_MAX_SHADER_BUFFERS]; 459bf215546Sopenharmony_ci 460bf215546Sopenharmony_ci /* Maps logical Image state to hw tex state: */ 461bf215546Sopenharmony_ci uint8_t image_to_tex[IR3_MAX_SHADER_IMAGES]; 462bf215546Sopenharmony_ci 463bf215546Sopenharmony_ci /* Maps hw state back to logical SSBO or Image state: 464bf215546Sopenharmony_ci * 465bf215546Sopenharmony_ci * note IBO_SSBO ORd into values to indicate that the 466bf215546Sopenharmony_ci * hw slot is used for SSBO state vs Image state. 467bf215546Sopenharmony_ci */ 468bf215546Sopenharmony_ci#define IBO_SSBO 0x80 469bf215546Sopenharmony_ci uint8_t tex_to_image[32]; 470bf215546Sopenharmony_ci 471bf215546Sopenharmony_ci /* including real textures */ 472bf215546Sopenharmony_ci uint8_t num_tex; 473bf215546Sopenharmony_ci /* the number of real textures, ie. image/ssbo start here */ 474bf215546Sopenharmony_ci uint8_t tex_base; 475bf215546Sopenharmony_ci}; 476bf215546Sopenharmony_ci 477bf215546Sopenharmony_cistruct ir3_disasm_info { 478bf215546Sopenharmony_ci bool write_disasm; 479bf215546Sopenharmony_ci char *nir; 480bf215546Sopenharmony_ci char *disasm; 481bf215546Sopenharmony_ci}; 482bf215546Sopenharmony_ci 483bf215546Sopenharmony_ci/* Represents half register in regid */ 484bf215546Sopenharmony_ci#define HALF_REG_ID 0x100 485bf215546Sopenharmony_ci 486bf215546Sopenharmony_ci/** 487bf215546Sopenharmony_ci * Shader variant which contains the actual hw shader instructions, 488bf215546Sopenharmony_ci * and necessary info for shader state setup. 489bf215546Sopenharmony_ci */ 490bf215546Sopenharmony_cistruct ir3_shader_variant { 491bf215546Sopenharmony_ci struct fd_bo *bo; 492bf215546Sopenharmony_ci 493bf215546Sopenharmony_ci /* variant id (for debug) */ 494bf215546Sopenharmony_ci uint32_t id; 495bf215546Sopenharmony_ci 496bf215546Sopenharmony_ci /* id of the shader the variant came from (for debug) */ 497bf215546Sopenharmony_ci uint32_t shader_id; 498bf215546Sopenharmony_ci 499bf215546Sopenharmony_ci struct ir3_shader_key key; 500bf215546Sopenharmony_ci 501bf215546Sopenharmony_ci /* vertex shaders can have an extra version for hwbinning pass, 502bf215546Sopenharmony_ci * which is pointed to by so->binning: 503bf215546Sopenharmony_ci */ 504bf215546Sopenharmony_ci bool binning_pass; 505bf215546Sopenharmony_ci // union { 506bf215546Sopenharmony_ci struct ir3_shader_variant *binning; 507bf215546Sopenharmony_ci struct ir3_shader_variant *nonbinning; 508bf215546Sopenharmony_ci // }; 509bf215546Sopenharmony_ci 510bf215546Sopenharmony_ci struct ir3 *ir; /* freed after assembling machine instructions */ 511bf215546Sopenharmony_ci 512bf215546Sopenharmony_ci /* shader variants form a linked list: */ 513bf215546Sopenharmony_ci struct ir3_shader_variant *next; 514bf215546Sopenharmony_ci 515bf215546Sopenharmony_ci /* replicated here to avoid passing extra ptrs everywhere: */ 516bf215546Sopenharmony_ci gl_shader_stage type; 517bf215546Sopenharmony_ci struct ir3_compiler *compiler; 518bf215546Sopenharmony_ci 519bf215546Sopenharmony_ci char *name; 520bf215546Sopenharmony_ci 521bf215546Sopenharmony_ci /* variant's copy of nir->constant_data (since we don't track the NIR in 522bf215546Sopenharmony_ci * the variant, and shader->nir is before the opt pass). Moves to v->bin 523bf215546Sopenharmony_ci * after assembly. 524bf215546Sopenharmony_ci */ 525bf215546Sopenharmony_ci void *constant_data; 526bf215546Sopenharmony_ci 527bf215546Sopenharmony_ci /* 528bf215546Sopenharmony_ci * Below here is serialized when written to disk cache: 529bf215546Sopenharmony_ci */ 530bf215546Sopenharmony_ci 531bf215546Sopenharmony_ci /* The actual binary shader instructions, size given by info.sizedwords: */ 532bf215546Sopenharmony_ci uint32_t *bin; 533bf215546Sopenharmony_ci 534bf215546Sopenharmony_ci struct ir3_const_state *const_state; 535bf215546Sopenharmony_ci 536bf215546Sopenharmony_ci /* 537bf215546Sopenharmony_ci * The following macros are used by the shader disk cache save/ 538bf215546Sopenharmony_ci * restore paths to serialize/deserialize the variant. Any 539bf215546Sopenharmony_ci * pointers that require special handling in store_variant() 540bf215546Sopenharmony_ci * and retrieve_variant() should go above here. 541bf215546Sopenharmony_ci */ 542bf215546Sopenharmony_ci#define VARIANT_CACHE_START offsetof(struct ir3_shader_variant, info) 543bf215546Sopenharmony_ci#define VARIANT_CACHE_PTR(v) (((char *)v) + VARIANT_CACHE_START) 544bf215546Sopenharmony_ci#define VARIANT_CACHE_SIZE \ 545bf215546Sopenharmony_ci (sizeof(struct ir3_shader_variant) - VARIANT_CACHE_START) 546bf215546Sopenharmony_ci 547bf215546Sopenharmony_ci struct ir3_info info; 548bf215546Sopenharmony_ci 549bf215546Sopenharmony_ci uint32_t constant_data_size; 550bf215546Sopenharmony_ci 551bf215546Sopenharmony_ci /* Levels of nesting of flow control: 552bf215546Sopenharmony_ci */ 553bf215546Sopenharmony_ci unsigned branchstack; 554bf215546Sopenharmony_ci 555bf215546Sopenharmony_ci unsigned loops; 556bf215546Sopenharmony_ci 557bf215546Sopenharmony_ci /* the instructions length is in units of instruction groups 558bf215546Sopenharmony_ci * (4 instructions for a3xx, 16 instructions for a4xx.. each 559bf215546Sopenharmony_ci * instruction is 2 dwords): 560bf215546Sopenharmony_ci */ 561bf215546Sopenharmony_ci unsigned instrlen; 562bf215546Sopenharmony_ci 563bf215546Sopenharmony_ci /* the constants length is in units of vec4's, and is the sum of 564bf215546Sopenharmony_ci * the uniforms and the built-in compiler constants 565bf215546Sopenharmony_ci */ 566bf215546Sopenharmony_ci unsigned constlen; 567bf215546Sopenharmony_ci 568bf215546Sopenharmony_ci /* The private memory size in bytes */ 569bf215546Sopenharmony_ci unsigned pvtmem_size; 570bf215546Sopenharmony_ci /* Whether we should use the new per-wave layout rather than per-fiber. */ 571bf215546Sopenharmony_ci bool pvtmem_per_wave; 572bf215546Sopenharmony_ci 573bf215546Sopenharmony_ci /* Size in bytes of required shared memory */ 574bf215546Sopenharmony_ci unsigned shared_size; 575bf215546Sopenharmony_ci 576bf215546Sopenharmony_ci /* About Linkage: 577bf215546Sopenharmony_ci * + Let the frag shader determine the position/compmask for the 578bf215546Sopenharmony_ci * varyings, since it is the place where we know if the varying 579bf215546Sopenharmony_ci * is actually used, and if so, which components are used. So 580bf215546Sopenharmony_ci * what the hw calls "outloc" is taken from the "inloc" of the 581bf215546Sopenharmony_ci * frag shader. 582bf215546Sopenharmony_ci * + From the vert shader, we only need the output regid 583bf215546Sopenharmony_ci */ 584bf215546Sopenharmony_ci 585bf215546Sopenharmony_ci bool frag_face, color0_mrt; 586bf215546Sopenharmony_ci uint8_t fragcoord_compmask; 587bf215546Sopenharmony_ci 588bf215546Sopenharmony_ci /* NOTE: for input/outputs, slot is: 589bf215546Sopenharmony_ci * gl_vert_attrib - for VS inputs 590bf215546Sopenharmony_ci * gl_varying_slot - for VS output / FS input 591bf215546Sopenharmony_ci * gl_frag_result - for FS output 592bf215546Sopenharmony_ci */ 593bf215546Sopenharmony_ci 594bf215546Sopenharmony_ci /* varyings/outputs: */ 595bf215546Sopenharmony_ci unsigned outputs_count; 596bf215546Sopenharmony_ci struct { 597bf215546Sopenharmony_ci uint8_t slot; 598bf215546Sopenharmony_ci uint8_t regid; 599bf215546Sopenharmony_ci uint8_t view; 600bf215546Sopenharmony_ci bool half : 1; 601bf215546Sopenharmony_ci } outputs[32 + 2]; /* +POSITION +PSIZE */ 602bf215546Sopenharmony_ci bool writes_pos, writes_smask, writes_psize, writes_stencilref; 603bf215546Sopenharmony_ci 604bf215546Sopenharmony_ci /* Size in dwords of all outputs for VS, size of entire patch for HS. */ 605bf215546Sopenharmony_ci uint32_t output_size; 606bf215546Sopenharmony_ci 607bf215546Sopenharmony_ci /* Expected size of incoming output_loc for HS, DS, and GS */ 608bf215546Sopenharmony_ci uint32_t input_size; 609bf215546Sopenharmony_ci 610bf215546Sopenharmony_ci /* Map from location to offset in per-primitive storage. In dwords for 611bf215546Sopenharmony_ci * HS, where varyings are read in the next stage via ldg with a dword 612bf215546Sopenharmony_ci * offset, and in bytes for all other stages. 613bf215546Sopenharmony_ci * +POSITION, +PSIZE, ... - see shader_io_get_unique_index 614bf215546Sopenharmony_ci */ 615bf215546Sopenharmony_ci unsigned output_loc[12 + 32]; 616bf215546Sopenharmony_ci 617bf215546Sopenharmony_ci /* attributes (VS) / varyings (FS): 618bf215546Sopenharmony_ci * Note that sysval's should come *after* normal inputs. 619bf215546Sopenharmony_ci */ 620bf215546Sopenharmony_ci unsigned inputs_count; 621bf215546Sopenharmony_ci struct { 622bf215546Sopenharmony_ci uint8_t slot; 623bf215546Sopenharmony_ci uint8_t regid; 624bf215546Sopenharmony_ci uint8_t compmask; 625bf215546Sopenharmony_ci /* location of input (ie. offset passed to bary.f, etc). This 626bf215546Sopenharmony_ci * matches the SP_VS_VPC_DST_REG.OUTLOCn value (a3xx and a4xx 627bf215546Sopenharmony_ci * have the OUTLOCn value offset by 8, presumably to account 628bf215546Sopenharmony_ci * for gl_Position/gl_PointSize) 629bf215546Sopenharmony_ci */ 630bf215546Sopenharmony_ci uint8_t inloc; 631bf215546Sopenharmony_ci /* vertex shader specific: */ 632bf215546Sopenharmony_ci bool sysval : 1; /* slot is a gl_system_value */ 633bf215546Sopenharmony_ci /* fragment shader specific: */ 634bf215546Sopenharmony_ci bool bary : 1; /* fetched varying (vs one loaded into reg) */ 635bf215546Sopenharmony_ci bool rasterflat : 1; /* special handling for emit->rasterflat */ 636bf215546Sopenharmony_ci bool half : 1; 637bf215546Sopenharmony_ci bool flat : 1; 638bf215546Sopenharmony_ci } inputs[32 + 2]; /* +POSITION +FACE */ 639bf215546Sopenharmony_ci 640bf215546Sopenharmony_ci /* sum of input components (scalar). For frag shaders, it only counts 641bf215546Sopenharmony_ci * the varying inputs: 642bf215546Sopenharmony_ci */ 643bf215546Sopenharmony_ci unsigned total_in; 644bf215546Sopenharmony_ci 645bf215546Sopenharmony_ci /* sum of sysval input components (scalar). */ 646bf215546Sopenharmony_ci unsigned sysval_in; 647bf215546Sopenharmony_ci 648bf215546Sopenharmony_ci /* For frag shaders, the total number of inputs (not scalar, 649bf215546Sopenharmony_ci * ie. SP_VS_PARAM_REG.TOTALVSOUTVAR) 650bf215546Sopenharmony_ci */ 651bf215546Sopenharmony_ci unsigned varying_in; 652bf215546Sopenharmony_ci 653bf215546Sopenharmony_ci /* Remapping table to map Image and SSBO to hw state: */ 654bf215546Sopenharmony_ci struct ir3_ibo_mapping image_mapping; 655bf215546Sopenharmony_ci 656bf215546Sopenharmony_ci /* number of samplers/textures (which are currently 1:1): */ 657bf215546Sopenharmony_ci int num_samp; 658bf215546Sopenharmony_ci 659bf215546Sopenharmony_ci /* is there an implicit sampler to read framebuffer (FS only).. if 660bf215546Sopenharmony_ci * so the sampler-idx is 'num_samp - 1' (ie. it is appended after 661bf215546Sopenharmony_ci * the last "real" texture) 662bf215546Sopenharmony_ci */ 663bf215546Sopenharmony_ci bool fb_read; 664bf215546Sopenharmony_ci 665bf215546Sopenharmony_ci /* do we have one or more SSBO instructions: */ 666bf215546Sopenharmony_ci bool has_ssbo; 667bf215546Sopenharmony_ci 668bf215546Sopenharmony_ci /* Which bindless resources are used, for filling out sp_xs_config */ 669bf215546Sopenharmony_ci bool bindless_tex; 670bf215546Sopenharmony_ci bool bindless_samp; 671bf215546Sopenharmony_ci bool bindless_ibo; 672bf215546Sopenharmony_ci bool bindless_ubo; 673bf215546Sopenharmony_ci 674bf215546Sopenharmony_ci /* do we need derivatives: */ 675bf215546Sopenharmony_ci bool need_pixlod; 676bf215546Sopenharmony_ci 677bf215546Sopenharmony_ci bool need_fine_derivatives; 678bf215546Sopenharmony_ci 679bf215546Sopenharmony_ci /* do we need VS driver params? */ 680bf215546Sopenharmony_ci bool need_driver_params; 681bf215546Sopenharmony_ci 682bf215546Sopenharmony_ci /* do we have image write, etc (which prevents early-z): */ 683bf215546Sopenharmony_ci bool no_earlyz; 684bf215546Sopenharmony_ci 685bf215546Sopenharmony_ci /* do we have kill, which also prevents early-z, but not necessarily 686bf215546Sopenharmony_ci * early-lrz (as long as lrz-write is disabled, which must be handled 687bf215546Sopenharmony_ci * outside of ir3. Unlike other no_earlyz cases, kill doesn't have 688bf215546Sopenharmony_ci * side effects that prevent early-lrz discard. 689bf215546Sopenharmony_ci */ 690bf215546Sopenharmony_ci bool has_kill; 691bf215546Sopenharmony_ci 692bf215546Sopenharmony_ci bool per_samp; 693bf215546Sopenharmony_ci 694bf215546Sopenharmony_ci /* Are we using split or merged register file? */ 695bf215546Sopenharmony_ci bool mergedregs; 696bf215546Sopenharmony_ci 697bf215546Sopenharmony_ci uint8_t clip_mask, cull_mask; 698bf215546Sopenharmony_ci 699bf215546Sopenharmony_ci /* for astc srgb workaround, the number/base of additional 700bf215546Sopenharmony_ci * alpha tex states we need, and index of original tex states 701bf215546Sopenharmony_ci */ 702bf215546Sopenharmony_ci struct { 703bf215546Sopenharmony_ci unsigned base, count; 704bf215546Sopenharmony_ci unsigned orig_idx[16]; 705bf215546Sopenharmony_ci } astc_srgb; 706bf215546Sopenharmony_ci 707bf215546Sopenharmony_ci /* for tg4 workaround, the number/base of additional 708bf215546Sopenharmony_ci * unswizzled tex states we need, and index of original tex states 709bf215546Sopenharmony_ci */ 710bf215546Sopenharmony_ci struct { 711bf215546Sopenharmony_ci unsigned base, count; 712bf215546Sopenharmony_ci unsigned orig_idx[16]; 713bf215546Sopenharmony_ci } tg4; 714bf215546Sopenharmony_ci 715bf215546Sopenharmony_ci /* texture sampler pre-dispatches */ 716bf215546Sopenharmony_ci uint32_t num_sampler_prefetch; 717bf215546Sopenharmony_ci struct ir3_sampler_prefetch sampler_prefetch[IR3_MAX_SAMPLER_PREFETCH]; 718bf215546Sopenharmony_ci 719bf215546Sopenharmony_ci uint16_t local_size[3]; 720bf215546Sopenharmony_ci bool local_size_variable; 721bf215546Sopenharmony_ci 722bf215546Sopenharmony_ci /* Important for compute shader to determine max reg footprint */ 723bf215546Sopenharmony_ci bool has_barrier; 724bf215546Sopenharmony_ci 725bf215546Sopenharmony_ci /* The offset where images start in the IBO array. */ 726bf215546Sopenharmony_ci unsigned num_ssbos; 727bf215546Sopenharmony_ci 728bf215546Sopenharmony_ci /* The total number of SSBOs and images, i.e. the number of hardware IBOs. */ 729bf215546Sopenharmony_ci unsigned num_ibos; 730bf215546Sopenharmony_ci 731bf215546Sopenharmony_ci unsigned num_reserved_user_consts; 732bf215546Sopenharmony_ci 733bf215546Sopenharmony_ci union { 734bf215546Sopenharmony_ci struct { 735bf215546Sopenharmony_ci enum tess_primitive_mode primitive_mode; 736bf215546Sopenharmony_ci 737bf215546Sopenharmony_ci /** The number of vertices in the TCS output patch. */ 738bf215546Sopenharmony_ci uint8_t tcs_vertices_out; 739bf215546Sopenharmony_ci unsigned spacing:2; /*gl_tess_spacing*/ 740bf215546Sopenharmony_ci 741bf215546Sopenharmony_ci /** Is the vertex order counterclockwise? */ 742bf215546Sopenharmony_ci bool ccw:1; 743bf215546Sopenharmony_ci bool point_mode:1; 744bf215546Sopenharmony_ci } tess; 745bf215546Sopenharmony_ci struct { 746bf215546Sopenharmony_ci /** The output primitive type */ 747bf215546Sopenharmony_ci uint16_t output_primitive; 748bf215546Sopenharmony_ci 749bf215546Sopenharmony_ci /** The maximum number of vertices the geometry shader might write. */ 750bf215546Sopenharmony_ci uint16_t vertices_out; 751bf215546Sopenharmony_ci 752bf215546Sopenharmony_ci /** 1 .. MAX_GEOMETRY_SHADER_INVOCATIONS */ 753bf215546Sopenharmony_ci uint8_t invocations; 754bf215546Sopenharmony_ci 755bf215546Sopenharmony_ci /** The number of vertices received per input primitive (max. 6) */ 756bf215546Sopenharmony_ci uint8_t vertices_in:3; 757bf215546Sopenharmony_ci } gs; 758bf215546Sopenharmony_ci struct { 759bf215546Sopenharmony_ci bool early_fragment_tests : 1; 760bf215546Sopenharmony_ci bool color_is_dual_source : 1; 761bf215546Sopenharmony_ci } fs; 762bf215546Sopenharmony_ci struct { 763bf215546Sopenharmony_ci unsigned req_input_mem; 764bf215546Sopenharmony_ci unsigned req_local_mem; 765bf215546Sopenharmony_ci } cs; 766bf215546Sopenharmony_ci }; 767bf215546Sopenharmony_ci 768bf215546Sopenharmony_ci enum ir3_wavesize_option api_wavesize, real_wavesize; 769bf215546Sopenharmony_ci 770bf215546Sopenharmony_ci /* For when we don't have a shader, variant's copy of streamout state */ 771bf215546Sopenharmony_ci struct ir3_stream_output_info stream_output; 772bf215546Sopenharmony_ci 773bf215546Sopenharmony_ci struct ir3_disasm_info disasm_info; 774bf215546Sopenharmony_ci}; 775bf215546Sopenharmony_ci 776bf215546Sopenharmony_cistatic inline const char * 777bf215546Sopenharmony_ciir3_shader_stage(struct ir3_shader_variant *v) 778bf215546Sopenharmony_ci{ 779bf215546Sopenharmony_ci switch (v->type) { 780bf215546Sopenharmony_ci case MESA_SHADER_VERTEX: 781bf215546Sopenharmony_ci return v->binning_pass ? "BVERT" : "VERT"; 782bf215546Sopenharmony_ci case MESA_SHADER_TESS_CTRL: 783bf215546Sopenharmony_ci return "TCS"; 784bf215546Sopenharmony_ci case MESA_SHADER_TESS_EVAL: 785bf215546Sopenharmony_ci return "TES"; 786bf215546Sopenharmony_ci case MESA_SHADER_GEOMETRY: 787bf215546Sopenharmony_ci return "GEOM"; 788bf215546Sopenharmony_ci case MESA_SHADER_FRAGMENT: 789bf215546Sopenharmony_ci return "FRAG"; 790bf215546Sopenharmony_ci case MESA_SHADER_COMPUTE: 791bf215546Sopenharmony_ci case MESA_SHADER_KERNEL: 792bf215546Sopenharmony_ci return "CL"; 793bf215546Sopenharmony_ci default: 794bf215546Sopenharmony_ci unreachable("invalid type"); 795bf215546Sopenharmony_ci return NULL; 796bf215546Sopenharmony_ci } 797bf215546Sopenharmony_ci} 798bf215546Sopenharmony_ci 799bf215546Sopenharmony_ci/* Currently we do not do binning for tess. And for GS there is no 800bf215546Sopenharmony_ci * cross-stage VS+GS optimization, so the full VS+GS is used in 801bf215546Sopenharmony_ci * the binning pass. 802bf215546Sopenharmony_ci */ 803bf215546Sopenharmony_cistatic inline bool 804bf215546Sopenharmony_ciir3_has_binning_vs(const struct ir3_shader_key *key) 805bf215546Sopenharmony_ci{ 806bf215546Sopenharmony_ci if (key->tessellation || key->has_gs) 807bf215546Sopenharmony_ci return false; 808bf215546Sopenharmony_ci return true; 809bf215546Sopenharmony_ci} 810bf215546Sopenharmony_ci 811bf215546Sopenharmony_ci/** 812bf215546Sopenharmony_ci * Represents a shader at the API level, before state-specific variants are 813bf215546Sopenharmony_ci * generated. 814bf215546Sopenharmony_ci */ 815bf215546Sopenharmony_cistruct ir3_shader { 816bf215546Sopenharmony_ci gl_shader_stage type; 817bf215546Sopenharmony_ci 818bf215546Sopenharmony_ci /* shader id (for debug): */ 819bf215546Sopenharmony_ci uint32_t id; 820bf215546Sopenharmony_ci uint32_t variant_count; 821bf215546Sopenharmony_ci 822bf215546Sopenharmony_ci /* Set by freedreno after shader_state_create, so we can emit debug info 823bf215546Sopenharmony_ci * when recompiling a shader at draw time. 824bf215546Sopenharmony_ci */ 825bf215546Sopenharmony_ci bool initial_variants_done; 826bf215546Sopenharmony_ci 827bf215546Sopenharmony_ci struct ir3_compiler *compiler; 828bf215546Sopenharmony_ci 829bf215546Sopenharmony_ci unsigned num_reserved_user_consts; 830bf215546Sopenharmony_ci 831bf215546Sopenharmony_ci /* What API-visible wavesizes are allowed. Even if only double wavesize is 832bf215546Sopenharmony_ci * allowed, we may still use the smaller wavesize "under the hood" and the 833bf215546Sopenharmony_ci * application simply sees the upper half as always disabled. 834bf215546Sopenharmony_ci */ 835bf215546Sopenharmony_ci enum ir3_wavesize_option api_wavesize; 836bf215546Sopenharmony_ci 837bf215546Sopenharmony_ci /* What wavesizes we're allowed to actually use. If the API wavesize is 838bf215546Sopenharmony_ci * single-only, then this must be single-only too. 839bf215546Sopenharmony_ci */ 840bf215546Sopenharmony_ci enum ir3_wavesize_option real_wavesize; 841bf215546Sopenharmony_ci 842bf215546Sopenharmony_ci bool nir_finalized; 843bf215546Sopenharmony_ci struct nir_shader *nir; 844bf215546Sopenharmony_ci struct ir3_stream_output_info stream_output; 845bf215546Sopenharmony_ci 846bf215546Sopenharmony_ci /* per shader stage specific info: */ 847bf215546Sopenharmony_ci union { 848bf215546Sopenharmony_ci /* for compute shaders: */ 849bf215546Sopenharmony_ci struct { 850bf215546Sopenharmony_ci unsigned req_input_mem; /* in dwords */ 851bf215546Sopenharmony_ci unsigned req_local_mem; 852bf215546Sopenharmony_ci } cs; 853bf215546Sopenharmony_ci }; 854bf215546Sopenharmony_ci 855bf215546Sopenharmony_ci struct ir3_shader_variant *variants; 856bf215546Sopenharmony_ci mtx_t variants_lock; 857bf215546Sopenharmony_ci 858bf215546Sopenharmony_ci cache_key cache_key; /* shader disk-cache key */ 859bf215546Sopenharmony_ci 860bf215546Sopenharmony_ci /* Bitmask of bits of the shader key used by this shader. Used to avoid 861bf215546Sopenharmony_ci * recompiles for GL NOS that doesn't actually apply to the shader. 862bf215546Sopenharmony_ci */ 863bf215546Sopenharmony_ci struct ir3_shader_key key_mask; 864bf215546Sopenharmony_ci 865bf215546Sopenharmony_ci bool shared_consts_enable; 866bf215546Sopenharmony_ci}; 867bf215546Sopenharmony_ci 868bf215546Sopenharmony_ci/** 869bf215546Sopenharmony_ci * In order to use the same cmdstream, in particular constlen setup and const 870bf215546Sopenharmony_ci * emit, for both binning and draw pass (a6xx+), the binning pass re-uses it's 871bf215546Sopenharmony_ci * corresponding draw pass shaders const_state. 872bf215546Sopenharmony_ci */ 873bf215546Sopenharmony_cistatic inline struct ir3_const_state * 874bf215546Sopenharmony_ciir3_const_state(const struct ir3_shader_variant *v) 875bf215546Sopenharmony_ci{ 876bf215546Sopenharmony_ci if (v->binning_pass) 877bf215546Sopenharmony_ci return v->nonbinning->const_state; 878bf215546Sopenharmony_ci return v->const_state; 879bf215546Sopenharmony_ci} 880bf215546Sopenharmony_ci 881bf215546Sopenharmony_ci/* Given a variant, calculate the maximum constlen it can have. 882bf215546Sopenharmony_ci */ 883bf215546Sopenharmony_cistatic inline unsigned 884bf215546Sopenharmony_ciir3_max_const(const struct ir3_shader_variant *v) 885bf215546Sopenharmony_ci{ 886bf215546Sopenharmony_ci const struct ir3_compiler *compiler = v->compiler; 887bf215546Sopenharmony_ci bool shared_consts_enable = ir3_const_state(v)->shared_consts_enable; 888bf215546Sopenharmony_ci 889bf215546Sopenharmony_ci /* Shared consts size for CS and FS matches with what's acutally used, 890bf215546Sopenharmony_ci * but the size of shared consts for geomtry stages doesn't. 891bf215546Sopenharmony_ci * So we use a hw quirk for geometry shared consts. 892bf215546Sopenharmony_ci */ 893bf215546Sopenharmony_ci uint32_t shared_consts_size = shared_consts_enable ? 894bf215546Sopenharmony_ci compiler->shared_consts_size : 0; 895bf215546Sopenharmony_ci 896bf215546Sopenharmony_ci uint32_t shared_consts_size_geom = shared_consts_enable ? 897bf215546Sopenharmony_ci compiler->geom_shared_consts_size_quirk : 0; 898bf215546Sopenharmony_ci 899bf215546Sopenharmony_ci uint32_t safe_shared_consts_size = shared_consts_enable ? 900bf215546Sopenharmony_ci ALIGN_POT(MAX2(DIV_ROUND_UP(shared_consts_size_geom, 4), 901bf215546Sopenharmony_ci DIV_ROUND_UP(shared_consts_size, 5)), 4) : 0; 902bf215546Sopenharmony_ci 903bf215546Sopenharmony_ci if ((v->type == MESA_SHADER_COMPUTE) || 904bf215546Sopenharmony_ci (v->type == MESA_SHADER_KERNEL)) { 905bf215546Sopenharmony_ci return compiler->max_const_compute - shared_consts_size; 906bf215546Sopenharmony_ci } else if (v->key.safe_constlen) { 907bf215546Sopenharmony_ci return compiler->max_const_safe - safe_shared_consts_size; 908bf215546Sopenharmony_ci } else if (v->type == MESA_SHADER_FRAGMENT) { 909bf215546Sopenharmony_ci return compiler->max_const_frag - shared_consts_size; 910bf215546Sopenharmony_ci } else { 911bf215546Sopenharmony_ci return compiler->max_const_geom - shared_consts_size_geom; 912bf215546Sopenharmony_ci } 913bf215546Sopenharmony_ci} 914bf215546Sopenharmony_ci 915bf215546Sopenharmony_civoid *ir3_shader_assemble(struct ir3_shader_variant *v); 916bf215546Sopenharmony_cistruct ir3_shader_variant * 917bf215546Sopenharmony_ciir3_shader_create_variant(struct ir3_shader *shader, 918bf215546Sopenharmony_ci const struct ir3_shader_key *key, 919bf215546Sopenharmony_ci bool keep_ir); 920bf215546Sopenharmony_cistruct ir3_shader_variant * 921bf215546Sopenharmony_ciir3_shader_get_variant(struct ir3_shader *shader, 922bf215546Sopenharmony_ci const struct ir3_shader_key *key, bool binning_pass, 923bf215546Sopenharmony_ci bool keep_ir, bool *created); 924bf215546Sopenharmony_ci 925bf215546Sopenharmony_ci 926bf215546Sopenharmony_cistruct ir3_shader_options { 927bf215546Sopenharmony_ci unsigned reserved_user_consts; 928bf215546Sopenharmony_ci enum ir3_wavesize_option api_wavesize, real_wavesize; 929bf215546Sopenharmony_ci bool shared_consts_enable; 930bf215546Sopenharmony_ci}; 931bf215546Sopenharmony_ci 932bf215546Sopenharmony_cistruct ir3_shader * 933bf215546Sopenharmony_ciir3_shader_from_nir(struct ir3_compiler *compiler, nir_shader *nir, 934bf215546Sopenharmony_ci const struct ir3_shader_options *options, 935bf215546Sopenharmony_ci struct ir3_stream_output_info *stream_output); 936bf215546Sopenharmony_ciuint32_t ir3_trim_constlen(struct ir3_shader_variant **variants, 937bf215546Sopenharmony_ci const struct ir3_compiler *compiler); 938bf215546Sopenharmony_civoid ir3_shader_destroy(struct ir3_shader *shader); 939bf215546Sopenharmony_civoid ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out); 940bf215546Sopenharmony_ciuint64_t ir3_shader_outputs(const struct ir3_shader *so); 941bf215546Sopenharmony_ci 942bf215546Sopenharmony_ciint ir3_glsl_type_size(const struct glsl_type *type, bool bindless); 943bf215546Sopenharmony_ci 944bf215546Sopenharmony_ci/* 945bf215546Sopenharmony_ci * Helper/util: 946bf215546Sopenharmony_ci */ 947bf215546Sopenharmony_ci 948bf215546Sopenharmony_ci/* clears shader-key flags which don't apply to the given shader. 949bf215546Sopenharmony_ci */ 950bf215546Sopenharmony_cistatic inline void 951bf215546Sopenharmony_ciir3_key_clear_unused(struct ir3_shader_key *key, struct ir3_shader *shader) 952bf215546Sopenharmony_ci{ 953bf215546Sopenharmony_ci uint32_t *key_bits = (uint32_t *)key; 954bf215546Sopenharmony_ci uint32_t *key_mask = (uint32_t *)&shader->key_mask; 955bf215546Sopenharmony_ci STATIC_ASSERT(sizeof(*key) % 4 == 0); 956bf215546Sopenharmony_ci for (int i = 0; i < sizeof(*key) >> 2; i++) 957bf215546Sopenharmony_ci key_bits[i] &= key_mask[i]; 958bf215546Sopenharmony_ci} 959bf215546Sopenharmony_ci 960bf215546Sopenharmony_cistatic inline int 961bf215546Sopenharmony_ciir3_find_output(const struct ir3_shader_variant *so, gl_varying_slot slot) 962bf215546Sopenharmony_ci{ 963bf215546Sopenharmony_ci int j; 964bf215546Sopenharmony_ci 965bf215546Sopenharmony_ci for (j = 0; j < so->outputs_count; j++) 966bf215546Sopenharmony_ci if (so->outputs[j].slot == slot) 967bf215546Sopenharmony_ci return j; 968bf215546Sopenharmony_ci 969bf215546Sopenharmony_ci /* it seems optional to have a OUT.BCOLOR[n] for each OUT.COLOR[n] 970bf215546Sopenharmony_ci * in the vertex shader.. but the fragment shader doesn't know this 971bf215546Sopenharmony_ci * so it will always have both IN.COLOR[n] and IN.BCOLOR[n]. So 972bf215546Sopenharmony_ci * at link time if there is no matching OUT.BCOLOR[n], we must map 973bf215546Sopenharmony_ci * OUT.COLOR[n] to IN.BCOLOR[n]. And visa versa if there is only 974bf215546Sopenharmony_ci * a OUT.BCOLOR[n] but no matching OUT.COLOR[n] 975bf215546Sopenharmony_ci */ 976bf215546Sopenharmony_ci if (slot == VARYING_SLOT_BFC0) { 977bf215546Sopenharmony_ci slot = VARYING_SLOT_COL0; 978bf215546Sopenharmony_ci } else if (slot == VARYING_SLOT_BFC1) { 979bf215546Sopenharmony_ci slot = VARYING_SLOT_COL1; 980bf215546Sopenharmony_ci } else if (slot == VARYING_SLOT_COL0) { 981bf215546Sopenharmony_ci slot = VARYING_SLOT_BFC0; 982bf215546Sopenharmony_ci } else if (slot == VARYING_SLOT_COL1) { 983bf215546Sopenharmony_ci slot = VARYING_SLOT_BFC1; 984bf215546Sopenharmony_ci } else { 985bf215546Sopenharmony_ci return -1; 986bf215546Sopenharmony_ci } 987bf215546Sopenharmony_ci 988bf215546Sopenharmony_ci for (j = 0; j < so->outputs_count; j++) 989bf215546Sopenharmony_ci if (so->outputs[j].slot == slot) 990bf215546Sopenharmony_ci return j; 991bf215546Sopenharmony_ci 992bf215546Sopenharmony_ci return -1; 993bf215546Sopenharmony_ci} 994bf215546Sopenharmony_ci 995bf215546Sopenharmony_cistatic inline int 996bf215546Sopenharmony_ciir3_next_varying(const struct ir3_shader_variant *so, int i) 997bf215546Sopenharmony_ci{ 998bf215546Sopenharmony_ci while (++i < so->inputs_count) 999bf215546Sopenharmony_ci if (so->inputs[i].compmask && so->inputs[i].bary) 1000bf215546Sopenharmony_ci break; 1001bf215546Sopenharmony_ci return i; 1002bf215546Sopenharmony_ci} 1003bf215546Sopenharmony_ci 1004bf215546Sopenharmony_cistruct ir3_shader_linkage { 1005bf215546Sopenharmony_ci /* Maximum location either consumed by the fragment shader or produced by 1006bf215546Sopenharmony_ci * the last geometry stage, i.e. the size required for each vertex in the 1007bf215546Sopenharmony_ci * VPC in DWORD's. 1008bf215546Sopenharmony_ci */ 1009bf215546Sopenharmony_ci uint8_t max_loc; 1010bf215546Sopenharmony_ci 1011bf215546Sopenharmony_ci /* Number of entries in var. */ 1012bf215546Sopenharmony_ci uint8_t cnt; 1013bf215546Sopenharmony_ci 1014bf215546Sopenharmony_ci /* Bitset of locations used, including ones which are only used by the FS. 1015bf215546Sopenharmony_ci */ 1016bf215546Sopenharmony_ci uint32_t varmask[4]; 1017bf215546Sopenharmony_ci 1018bf215546Sopenharmony_ci /* Map from VS output to location. */ 1019bf215546Sopenharmony_ci struct { 1020bf215546Sopenharmony_ci uint8_t slot; 1021bf215546Sopenharmony_ci uint8_t regid; 1022bf215546Sopenharmony_ci uint8_t compmask; 1023bf215546Sopenharmony_ci uint8_t loc; 1024bf215546Sopenharmony_ci } var[32]; 1025bf215546Sopenharmony_ci 1026bf215546Sopenharmony_ci /* location for fixed-function gl_PrimitiveID passthrough */ 1027bf215546Sopenharmony_ci uint8_t primid_loc; 1028bf215546Sopenharmony_ci 1029bf215546Sopenharmony_ci /* location for fixed-function gl_ViewIndex passthrough */ 1030bf215546Sopenharmony_ci uint8_t viewid_loc; 1031bf215546Sopenharmony_ci 1032bf215546Sopenharmony_ci /* location for combined clip/cull distance arrays */ 1033bf215546Sopenharmony_ci uint8_t clip0_loc, clip1_loc; 1034bf215546Sopenharmony_ci}; 1035bf215546Sopenharmony_ci 1036bf215546Sopenharmony_cistatic inline void 1037bf215546Sopenharmony_ciir3_link_add(struct ir3_shader_linkage *l, uint8_t slot, uint8_t regid_, 1038bf215546Sopenharmony_ci uint8_t compmask, uint8_t loc) 1039bf215546Sopenharmony_ci{ 1040bf215546Sopenharmony_ci for (int j = 0; j < util_last_bit(compmask); j++) { 1041bf215546Sopenharmony_ci uint8_t comploc = loc + j; 1042bf215546Sopenharmony_ci l->varmask[comploc / 32] |= 1 << (comploc % 32); 1043bf215546Sopenharmony_ci } 1044bf215546Sopenharmony_ci 1045bf215546Sopenharmony_ci l->max_loc = MAX2(l->max_loc, loc + util_last_bit(compmask)); 1046bf215546Sopenharmony_ci 1047bf215546Sopenharmony_ci if (regid_ != regid(63, 0)) { 1048bf215546Sopenharmony_ci int i = l->cnt++; 1049bf215546Sopenharmony_ci assert(i < ARRAY_SIZE(l->var)); 1050bf215546Sopenharmony_ci 1051bf215546Sopenharmony_ci l->var[i].slot = slot; 1052bf215546Sopenharmony_ci l->var[i].regid = regid_; 1053bf215546Sopenharmony_ci l->var[i].compmask = compmask; 1054bf215546Sopenharmony_ci l->var[i].loc = loc; 1055bf215546Sopenharmony_ci } 1056bf215546Sopenharmony_ci} 1057bf215546Sopenharmony_ci 1058bf215546Sopenharmony_cistatic inline void 1059bf215546Sopenharmony_ciir3_link_shaders(struct ir3_shader_linkage *l, 1060bf215546Sopenharmony_ci const struct ir3_shader_variant *vs, 1061bf215546Sopenharmony_ci const struct ir3_shader_variant *fs, bool pack_vs_out) 1062bf215546Sopenharmony_ci{ 1063bf215546Sopenharmony_ci /* On older platforms, varmask isn't programmed at all, and it appears 1064bf215546Sopenharmony_ci * that the hardware generates a mask of used VPC locations using the VS 1065bf215546Sopenharmony_ci * output map, and hangs if a FS bary instruction references a location 1066bf215546Sopenharmony_ci * not in the list. This means that we need to have a dummy entry in the 1067bf215546Sopenharmony_ci * VS out map for things like gl_PointCoord which aren't written by the 1068bf215546Sopenharmony_ci * VS. Furthermore we can't use r63.x, so just pick a random register to 1069bf215546Sopenharmony_ci * use if there is no VS output. 1070bf215546Sopenharmony_ci */ 1071bf215546Sopenharmony_ci const unsigned default_regid = pack_vs_out ? regid(63, 0) : regid(0, 0); 1072bf215546Sopenharmony_ci int j = -1, k; 1073bf215546Sopenharmony_ci 1074bf215546Sopenharmony_ci l->primid_loc = 0xff; 1075bf215546Sopenharmony_ci l->viewid_loc = 0xff; 1076bf215546Sopenharmony_ci l->clip0_loc = 0xff; 1077bf215546Sopenharmony_ci l->clip1_loc = 0xff; 1078bf215546Sopenharmony_ci 1079bf215546Sopenharmony_ci while (l->cnt < ARRAY_SIZE(l->var)) { 1080bf215546Sopenharmony_ci j = ir3_next_varying(fs, j); 1081bf215546Sopenharmony_ci 1082bf215546Sopenharmony_ci if (j >= fs->inputs_count) 1083bf215546Sopenharmony_ci break; 1084bf215546Sopenharmony_ci 1085bf215546Sopenharmony_ci if (fs->inputs[j].inloc >= fs->total_in) 1086bf215546Sopenharmony_ci continue; 1087bf215546Sopenharmony_ci 1088bf215546Sopenharmony_ci k = ir3_find_output(vs, fs->inputs[j].slot); 1089bf215546Sopenharmony_ci 1090bf215546Sopenharmony_ci if (k < 0 && fs->inputs[j].slot == VARYING_SLOT_PRIMITIVE_ID) { 1091bf215546Sopenharmony_ci l->primid_loc = fs->inputs[j].inloc; 1092bf215546Sopenharmony_ci } 1093bf215546Sopenharmony_ci 1094bf215546Sopenharmony_ci if (fs->inputs[j].slot == VARYING_SLOT_VIEW_INDEX) { 1095bf215546Sopenharmony_ci assert(k < 0); 1096bf215546Sopenharmony_ci l->viewid_loc = fs->inputs[j].inloc; 1097bf215546Sopenharmony_ci } 1098bf215546Sopenharmony_ci 1099bf215546Sopenharmony_ci if (fs->inputs[j].slot == VARYING_SLOT_CLIP_DIST0) 1100bf215546Sopenharmony_ci l->clip0_loc = fs->inputs[j].inloc; 1101bf215546Sopenharmony_ci 1102bf215546Sopenharmony_ci if (fs->inputs[j].slot == VARYING_SLOT_CLIP_DIST1) 1103bf215546Sopenharmony_ci l->clip1_loc = fs->inputs[j].inloc; 1104bf215546Sopenharmony_ci 1105bf215546Sopenharmony_ci ir3_link_add(l, fs->inputs[j].slot, 1106bf215546Sopenharmony_ci k >= 0 ? vs->outputs[k].regid : default_regid, 1107bf215546Sopenharmony_ci fs->inputs[j].compmask, fs->inputs[j].inloc); 1108bf215546Sopenharmony_ci } 1109bf215546Sopenharmony_ci} 1110bf215546Sopenharmony_ci 1111bf215546Sopenharmony_cistatic inline uint32_t 1112bf215546Sopenharmony_ciir3_find_output_regid(const struct ir3_shader_variant *so, unsigned slot) 1113bf215546Sopenharmony_ci{ 1114bf215546Sopenharmony_ci int j; 1115bf215546Sopenharmony_ci for (j = 0; j < so->outputs_count; j++) 1116bf215546Sopenharmony_ci if (so->outputs[j].slot == slot) { 1117bf215546Sopenharmony_ci uint32_t regid = so->outputs[j].regid; 1118bf215546Sopenharmony_ci if (so->outputs[j].half) 1119bf215546Sopenharmony_ci regid |= HALF_REG_ID; 1120bf215546Sopenharmony_ci return regid; 1121bf215546Sopenharmony_ci } 1122bf215546Sopenharmony_ci return regid(63, 0); 1123bf215546Sopenharmony_ci} 1124bf215546Sopenharmony_ci 1125bf215546Sopenharmony_civoid ir3_link_stream_out(struct ir3_shader_linkage *l, 1126bf215546Sopenharmony_ci const struct ir3_shader_variant *v); 1127bf215546Sopenharmony_ci 1128bf215546Sopenharmony_ci#define VARYING_SLOT_GS_HEADER_IR3 (VARYING_SLOT_MAX + 0) 1129bf215546Sopenharmony_ci#define VARYING_SLOT_GS_VERTEX_FLAGS_IR3 (VARYING_SLOT_MAX + 1) 1130bf215546Sopenharmony_ci#define VARYING_SLOT_TCS_HEADER_IR3 (VARYING_SLOT_MAX + 2) 1131bf215546Sopenharmony_ci#define VARYING_SLOT_REL_PATCH_ID_IR3 (VARYING_SLOT_MAX + 3) 1132bf215546Sopenharmony_ci 1133bf215546Sopenharmony_cistatic inline uint32_t 1134bf215546Sopenharmony_ciir3_find_sysval_regid(const struct ir3_shader_variant *so, unsigned slot) 1135bf215546Sopenharmony_ci{ 1136bf215546Sopenharmony_ci int j; 1137bf215546Sopenharmony_ci for (j = 0; j < so->inputs_count; j++) 1138bf215546Sopenharmony_ci if (so->inputs[j].sysval && (so->inputs[j].slot == slot)) 1139bf215546Sopenharmony_ci return so->inputs[j].regid; 1140bf215546Sopenharmony_ci return regid(63, 0); 1141bf215546Sopenharmony_ci} 1142bf215546Sopenharmony_ci 1143bf215546Sopenharmony_ci/* calculate register footprint in terms of half-regs (ie. one full 1144bf215546Sopenharmony_ci * reg counts as two half-regs). 1145bf215546Sopenharmony_ci */ 1146bf215546Sopenharmony_cistatic inline uint32_t 1147bf215546Sopenharmony_ciir3_shader_halfregs(const struct ir3_shader_variant *v) 1148bf215546Sopenharmony_ci{ 1149bf215546Sopenharmony_ci return (2 * (v->info.max_reg + 1)) + (v->info.max_half_reg + 1); 1150bf215546Sopenharmony_ci} 1151bf215546Sopenharmony_ci 1152bf215546Sopenharmony_cistatic inline uint32_t 1153bf215546Sopenharmony_ciir3_shader_nibo(const struct ir3_shader_variant *v) 1154bf215546Sopenharmony_ci{ 1155bf215546Sopenharmony_ci return v->num_ibos; 1156bf215546Sopenharmony_ci} 1157bf215546Sopenharmony_ci 1158bf215546Sopenharmony_cistatic inline uint32_t 1159bf215546Sopenharmony_ciir3_shader_branchstack_hw(const struct ir3_shader_variant *v) 1160bf215546Sopenharmony_ci{ 1161bf215546Sopenharmony_ci /* Dummy shader */ 1162bf215546Sopenharmony_ci if (!v->compiler) 1163bf215546Sopenharmony_ci return 0; 1164bf215546Sopenharmony_ci 1165bf215546Sopenharmony_ci if (v->compiler->gen < 5) 1166bf215546Sopenharmony_ci return v->branchstack; 1167bf215546Sopenharmony_ci 1168bf215546Sopenharmony_ci if (v->branchstack > 0) { 1169bf215546Sopenharmony_ci uint32_t branchstack = v->branchstack / 2 + 1; 1170bf215546Sopenharmony_ci return MIN2(branchstack, v->compiler->branchstack_size / 2); 1171bf215546Sopenharmony_ci } else { 1172bf215546Sopenharmony_ci return 0; 1173bf215546Sopenharmony_ci } 1174bf215546Sopenharmony_ci} 1175bf215546Sopenharmony_ci 1176bf215546Sopenharmony_ci#endif /* IR3_SHADER_H_ */ 1177