1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org> 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21bf215546Sopenharmony_ci * SOFTWARE. 22bf215546Sopenharmony_ci * 23bf215546Sopenharmony_ci * Authors: 24bf215546Sopenharmony_ci * Rob Clark <robclark@freedesktop.org> 25bf215546Sopenharmony_ci */ 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci#ifndef IR3_COMPILER_H_ 28bf215546Sopenharmony_ci#define IR3_COMPILER_H_ 29bf215546Sopenharmony_ci 30bf215546Sopenharmony_ci#include "compiler/nir/nir.h" 31bf215546Sopenharmony_ci#include "util/disk_cache.h" 32bf215546Sopenharmony_ci#include "util/log.h" 33bf215546Sopenharmony_ci 34bf215546Sopenharmony_ci#include "freedreno_dev_info.h" 35bf215546Sopenharmony_ci 36bf215546Sopenharmony_ci#include "ir3.h" 37bf215546Sopenharmony_ci 38bf215546Sopenharmony_cistruct ir3_ra_reg_set; 39bf215546Sopenharmony_cistruct ir3_shader; 40bf215546Sopenharmony_ci 41bf215546Sopenharmony_cistruct ir3_compiler { 42bf215546Sopenharmony_ci struct fd_device *dev; 43bf215546Sopenharmony_ci const struct fd_dev_id *dev_id; 44bf215546Sopenharmony_ci uint8_t gen; 45bf215546Sopenharmony_ci uint32_t shader_count; 46bf215546Sopenharmony_ci 47bf215546Sopenharmony_ci struct disk_cache *disk_cache; 48bf215546Sopenharmony_ci 49bf215546Sopenharmony_ci struct nir_shader_compiler_options nir_options; 50bf215546Sopenharmony_ci 51bf215546Sopenharmony_ci bool robust_buffer_access2; 52bf215546Sopenharmony_ci 53bf215546Sopenharmony_ci /* 54bf215546Sopenharmony_ci * Configuration options for things that are handled differently on 55bf215546Sopenharmony_ci * different generations: 56bf215546Sopenharmony_ci */ 57bf215546Sopenharmony_ci 58bf215546Sopenharmony_ci /* a4xx (and later) drops SP_FS_FLAT_SHAD_MODE_REG_* for flat-interpolate 59bf215546Sopenharmony_ci * so we need to use ldlv.u32 to load the varying directly: 60bf215546Sopenharmony_ci */ 61bf215546Sopenharmony_ci bool flat_bypass; 62bf215546Sopenharmony_ci 63bf215546Sopenharmony_ci /* on a3xx, we need to add one to # of array levels: 64bf215546Sopenharmony_ci */ 65bf215546Sopenharmony_ci bool levels_add_one; 66bf215546Sopenharmony_ci 67bf215546Sopenharmony_ci /* on a3xx, we need to scale up integer coords for isaml based 68bf215546Sopenharmony_ci * on LoD: 69bf215546Sopenharmony_ci */ 70bf215546Sopenharmony_ci bool unminify_coords; 71bf215546Sopenharmony_ci 72bf215546Sopenharmony_ci /* on a3xx do txf_ms w/ isaml and scaled coords: */ 73bf215546Sopenharmony_ci bool txf_ms_with_isaml; 74bf215546Sopenharmony_ci 75bf215546Sopenharmony_ci /* on a4xx, for array textures we need to add 0.5 to the array 76bf215546Sopenharmony_ci * index coordinate: 77bf215546Sopenharmony_ci */ 78bf215546Sopenharmony_ci bool array_index_add_half; 79bf215546Sopenharmony_ci 80bf215546Sopenharmony_ci /* on a6xx, rewrite samgp to sequence of samgq0-3 in vertex shaders: 81bf215546Sopenharmony_ci */ 82bf215546Sopenharmony_ci bool samgq_workaround; 83bf215546Sopenharmony_ci 84bf215546Sopenharmony_ci /* on a650, vertex shader <-> tess control io uses LDL/STL */ 85bf215546Sopenharmony_ci bool tess_use_shared; 86bf215546Sopenharmony_ci 87bf215546Sopenharmony_ci /* The maximum number of constants, in vec4's, across the entire graphics 88bf215546Sopenharmony_ci * pipeline. 89bf215546Sopenharmony_ci */ 90bf215546Sopenharmony_ci uint16_t max_const_pipeline; 91bf215546Sopenharmony_ci 92bf215546Sopenharmony_ci /* The maximum number of constants, in vec4's, for VS+HS+DS+GS. */ 93bf215546Sopenharmony_ci uint16_t max_const_geom; 94bf215546Sopenharmony_ci 95bf215546Sopenharmony_ci /* The maximum number of constants, in vec4's, for FS. */ 96bf215546Sopenharmony_ci uint16_t max_const_frag; 97bf215546Sopenharmony_ci 98bf215546Sopenharmony_ci /* A "safe" max constlen that can be applied to each shader in the 99bf215546Sopenharmony_ci * pipeline which we guarantee will never exceed any combined limits. 100bf215546Sopenharmony_ci */ 101bf215546Sopenharmony_ci uint16_t max_const_safe; 102bf215546Sopenharmony_ci 103bf215546Sopenharmony_ci /* The maximum number of constants, in vec4's, for compute shaders. */ 104bf215546Sopenharmony_ci uint16_t max_const_compute; 105bf215546Sopenharmony_ci 106bf215546Sopenharmony_ci /* Number of instructions that the shader's base address and length 107bf215546Sopenharmony_ci * (instrlen divides instruction count by this) must be aligned to. 108bf215546Sopenharmony_ci */ 109bf215546Sopenharmony_ci uint32_t instr_align; 110bf215546Sopenharmony_ci 111bf215546Sopenharmony_ci /* on a3xx, the unit of indirect const load is higher than later gens (in 112bf215546Sopenharmony_ci * vec4 units): 113bf215546Sopenharmony_ci */ 114bf215546Sopenharmony_ci uint32_t const_upload_unit; 115bf215546Sopenharmony_ci 116bf215546Sopenharmony_ci /* The base number of threads per wave. Some stages may be able to double 117bf215546Sopenharmony_ci * this. 118bf215546Sopenharmony_ci */ 119bf215546Sopenharmony_ci uint32_t threadsize_base; 120bf215546Sopenharmony_ci 121bf215546Sopenharmony_ci /* On at least a6xx, waves are always launched in pairs. In calculations 122bf215546Sopenharmony_ci * about occupancy, we pretend that each wave pair is actually one wave, 123bf215546Sopenharmony_ci * which simplifies many of the calculations, but means we have to 124bf215546Sopenharmony_ci * multiply threadsize_base by this number. 125bf215546Sopenharmony_ci */ 126bf215546Sopenharmony_ci uint32_t wave_granularity; 127bf215546Sopenharmony_ci 128bf215546Sopenharmony_ci /* The maximum number of simultaneous waves per core. */ 129bf215546Sopenharmony_ci uint32_t max_waves; 130bf215546Sopenharmony_ci 131bf215546Sopenharmony_ci /* This is theoretical maximum number of vec4 registers that one wave of 132bf215546Sopenharmony_ci * the base threadsize could use. To get the actual size of the register 133bf215546Sopenharmony_ci * file in bytes one would need to compute: 134bf215546Sopenharmony_ci * 135bf215546Sopenharmony_ci * reg_size_vec4 * threadsize_base * wave_granularity * 16 (bytes per vec4) 136bf215546Sopenharmony_ci * 137bf215546Sopenharmony_ci * However this number is more often what we actually need. For example, a 138bf215546Sopenharmony_ci * max_reg more than half of this will result in a doubled threadsize 139bf215546Sopenharmony_ci * being impossible (because double-sized waves take up twice as many 140bf215546Sopenharmony_ci * registers). Also, the formula for the occupancy given a particular 141bf215546Sopenharmony_ci * register footprint is simpler. 142bf215546Sopenharmony_ci * 143bf215546Sopenharmony_ci * It is in vec4 units because the register file is allocated 144bf215546Sopenharmony_ci * with vec4 granularity, so it's in the same units as max_reg. 145bf215546Sopenharmony_ci */ 146bf215546Sopenharmony_ci uint32_t reg_size_vec4; 147bf215546Sopenharmony_ci 148bf215546Sopenharmony_ci /* The size of local memory in bytes */ 149bf215546Sopenharmony_ci uint32_t local_mem_size; 150bf215546Sopenharmony_ci 151bf215546Sopenharmony_ci /* The number of total branch stack entries, divided by wave_granularity. */ 152bf215546Sopenharmony_ci uint32_t branchstack_size; 153bf215546Sopenharmony_ci 154bf215546Sopenharmony_ci /* Whether clip+cull distances are supported */ 155bf215546Sopenharmony_ci bool has_clip_cull; 156bf215546Sopenharmony_ci 157bf215546Sopenharmony_ci /* Whether private memory is supported */ 158bf215546Sopenharmony_ci bool has_pvtmem; 159bf215546Sopenharmony_ci 160bf215546Sopenharmony_ci /* True if 16-bit descriptors are used for both 16-bit and 32-bit access. */ 161bf215546Sopenharmony_ci bool storage_16bit; 162bf215546Sopenharmony_ci 163bf215546Sopenharmony_ci /* True if getfiberid, getlast.w8, brcst.active, and quad_shuffle 164bf215546Sopenharmony_ci * instructions are supported which are necessary to support 165bf215546Sopenharmony_ci * subgroup quad and arithmetic operations. 166bf215546Sopenharmony_ci */ 167bf215546Sopenharmony_ci bool has_getfiberid; 168bf215546Sopenharmony_ci 169bf215546Sopenharmony_ci /* MAX_COMPUTE_VARIABLE_GROUP_INVOCATIONS_ARB */ 170bf215546Sopenharmony_ci uint32_t max_variable_workgroup_size; 171bf215546Sopenharmony_ci 172bf215546Sopenharmony_ci bool has_dp2acc; 173bf215546Sopenharmony_ci bool has_dp4acc; 174bf215546Sopenharmony_ci 175bf215546Sopenharmony_ci /* Type to use for 1b nir bools: */ 176bf215546Sopenharmony_ci type_t bool_type; 177bf215546Sopenharmony_ci 178bf215546Sopenharmony_ci /* Whether compute invocation params are passed in via shared regfile or 179bf215546Sopenharmony_ci * constbuf. a5xx+ has the shared regfile. 180bf215546Sopenharmony_ci */ 181bf215546Sopenharmony_ci bool has_shared_regfile; 182bf215546Sopenharmony_ci 183bf215546Sopenharmony_ci /* True if preamble instructions (shps, shpe, etc.) are supported */ 184bf215546Sopenharmony_ci bool has_preamble; 185bf215546Sopenharmony_ci 186bf215546Sopenharmony_ci bool push_ubo_with_preamble; 187bf215546Sopenharmony_ci 188bf215546Sopenharmony_ci /* Where the shared consts start in constants file, in vec4's. */ 189bf215546Sopenharmony_ci uint16_t shared_consts_base_offset; 190bf215546Sopenharmony_ci 191bf215546Sopenharmony_ci /* The size of shared consts for CS and FS(in vec4's). 192bf215546Sopenharmony_ci * Also the size that is actually used on geometry stages (on a6xx). 193bf215546Sopenharmony_ci */ 194bf215546Sopenharmony_ci uint64_t shared_consts_size; 195bf215546Sopenharmony_ci 196bf215546Sopenharmony_ci /* Found on a6xx for geometry stages, that is different from 197bf215546Sopenharmony_ci * actually used shared consts. 198bf215546Sopenharmony_ci * 199bf215546Sopenharmony_ci * TODO: Keep an eye on this for next gens. 200bf215546Sopenharmony_ci */ 201bf215546Sopenharmony_ci uint64_t geom_shared_consts_size_quirk; 202bf215546Sopenharmony_ci}; 203bf215546Sopenharmony_ci 204bf215546Sopenharmony_cistruct ir3_compiler_options { 205bf215546Sopenharmony_ci /* If true, UBO/SSBO accesses are assumed to be bounds-checked as defined by 206bf215546Sopenharmony_ci * VK_EXT_robustness2 and optimizations may have to be more conservative. 207bf215546Sopenharmony_ci */ 208bf215546Sopenharmony_ci bool robust_buffer_access2; 209bf215546Sopenharmony_ci 210bf215546Sopenharmony_ci /* If true, promote UBOs (except for constant data) to constants using ldc.k 211bf215546Sopenharmony_ci * in the preamble. The driver should ignore everything in ubo_state except 212bf215546Sopenharmony_ci * for the constant data UBO, which is excluded because the command pushing 213bf215546Sopenharmony_ci * constants for it can be pre-baked when compiling the shader. 214bf215546Sopenharmony_ci */ 215bf215546Sopenharmony_ci bool push_ubo_with_preamble; 216bf215546Sopenharmony_ci 217bf215546Sopenharmony_ci /* If true, disable the shader cache. The driver is then responsible for 218bf215546Sopenharmony_ci * caching. 219bf215546Sopenharmony_ci */ 220bf215546Sopenharmony_ci bool disable_cache; 221bf215546Sopenharmony_ci}; 222bf215546Sopenharmony_ci 223bf215546Sopenharmony_civoid ir3_compiler_destroy(struct ir3_compiler *compiler); 224bf215546Sopenharmony_cistruct ir3_compiler *ir3_compiler_create(struct fd_device *dev, 225bf215546Sopenharmony_ci const struct fd_dev_id *dev_id, 226bf215546Sopenharmony_ci const struct ir3_compiler_options *options); 227bf215546Sopenharmony_ci 228bf215546Sopenharmony_civoid ir3_disk_cache_init(struct ir3_compiler *compiler); 229bf215546Sopenharmony_civoid ir3_disk_cache_init_shader_key(struct ir3_compiler *compiler, 230bf215546Sopenharmony_ci struct ir3_shader *shader); 231bf215546Sopenharmony_cistruct ir3_shader_variant *ir3_retrieve_variant(struct blob_reader *blob, 232bf215546Sopenharmony_ci struct ir3_compiler *compiler, 233bf215546Sopenharmony_ci void *mem_ctx); 234bf215546Sopenharmony_civoid ir3_store_variant(struct blob *blob, struct ir3_shader_variant *v); 235bf215546Sopenharmony_cibool ir3_disk_cache_retrieve(struct ir3_shader *shader, 236bf215546Sopenharmony_ci struct ir3_shader_variant *v); 237bf215546Sopenharmony_civoid ir3_disk_cache_store(struct ir3_shader *shader, 238bf215546Sopenharmony_ci struct ir3_shader_variant *v); 239bf215546Sopenharmony_ci 240bf215546Sopenharmony_ciconst nir_shader_compiler_options * 241bf215546Sopenharmony_ciir3_get_compiler_options(struct ir3_compiler *compiler); 242bf215546Sopenharmony_ci 243bf215546Sopenharmony_ciint ir3_compile_shader_nir(struct ir3_compiler *compiler, 244bf215546Sopenharmony_ci struct ir3_shader *shader, 245bf215546Sopenharmony_ci struct ir3_shader_variant *so); 246bf215546Sopenharmony_ci 247bf215546Sopenharmony_ci/* gpu pointer size in units of 32bit registers/slots */ 248bf215546Sopenharmony_cistatic inline unsigned 249bf215546Sopenharmony_ciir3_pointer_size(struct ir3_compiler *compiler) 250bf215546Sopenharmony_ci{ 251bf215546Sopenharmony_ci return fd_dev_64b(compiler->dev_id) ? 2 : 1; 252bf215546Sopenharmony_ci} 253bf215546Sopenharmony_ci 254bf215546Sopenharmony_cienum ir3_shader_debug { 255bf215546Sopenharmony_ci IR3_DBG_SHADER_VS = BITFIELD_BIT(0), 256bf215546Sopenharmony_ci IR3_DBG_SHADER_TCS = BITFIELD_BIT(1), 257bf215546Sopenharmony_ci IR3_DBG_SHADER_TES = BITFIELD_BIT(2), 258bf215546Sopenharmony_ci IR3_DBG_SHADER_GS = BITFIELD_BIT(3), 259bf215546Sopenharmony_ci IR3_DBG_SHADER_FS = BITFIELD_BIT(4), 260bf215546Sopenharmony_ci IR3_DBG_SHADER_CS = BITFIELD_BIT(5), 261bf215546Sopenharmony_ci IR3_DBG_DISASM = BITFIELD_BIT(6), 262bf215546Sopenharmony_ci IR3_DBG_OPTMSGS = BITFIELD_BIT(7), 263bf215546Sopenharmony_ci IR3_DBG_FORCES2EN = BITFIELD_BIT(8), 264bf215546Sopenharmony_ci IR3_DBG_NOUBOOPT = BITFIELD_BIT(9), 265bf215546Sopenharmony_ci IR3_DBG_NOFP16 = BITFIELD_BIT(10), 266bf215546Sopenharmony_ci IR3_DBG_NOCACHE = BITFIELD_BIT(11), 267bf215546Sopenharmony_ci IR3_DBG_SPILLALL = BITFIELD_BIT(12), 268bf215546Sopenharmony_ci IR3_DBG_NOPREAMBLE = BITFIELD_BIT(13), 269bf215546Sopenharmony_ci 270bf215546Sopenharmony_ci /* DEBUG-only options: */ 271bf215546Sopenharmony_ci IR3_DBG_SCHEDMSGS = BITFIELD_BIT(20), 272bf215546Sopenharmony_ci IR3_DBG_RAMSGS = BITFIELD_BIT(21), 273bf215546Sopenharmony_ci 274bf215546Sopenharmony_ci /* Only used for the disk-caching logic: */ 275bf215546Sopenharmony_ci IR3_DBG_ROBUST_UBO_ACCESS = BITFIELD_BIT(30), 276bf215546Sopenharmony_ci}; 277bf215546Sopenharmony_ci 278bf215546Sopenharmony_ciextern enum ir3_shader_debug ir3_shader_debug; 279bf215546Sopenharmony_ciextern const char *ir3_shader_override_path; 280bf215546Sopenharmony_ci 281bf215546Sopenharmony_cistatic inline bool 282bf215546Sopenharmony_cishader_debug_enabled(gl_shader_stage type) 283bf215546Sopenharmony_ci{ 284bf215546Sopenharmony_ci if (ir3_shader_debug & IR3_DBG_DISASM) 285bf215546Sopenharmony_ci return true; 286bf215546Sopenharmony_ci 287bf215546Sopenharmony_ci switch (type) { 288bf215546Sopenharmony_ci case MESA_SHADER_VERTEX: 289bf215546Sopenharmony_ci return !!(ir3_shader_debug & IR3_DBG_SHADER_VS); 290bf215546Sopenharmony_ci case MESA_SHADER_TESS_CTRL: 291bf215546Sopenharmony_ci return !!(ir3_shader_debug & IR3_DBG_SHADER_TCS); 292bf215546Sopenharmony_ci case MESA_SHADER_TESS_EVAL: 293bf215546Sopenharmony_ci return !!(ir3_shader_debug & IR3_DBG_SHADER_TES); 294bf215546Sopenharmony_ci case MESA_SHADER_GEOMETRY: 295bf215546Sopenharmony_ci return !!(ir3_shader_debug & IR3_DBG_SHADER_GS); 296bf215546Sopenharmony_ci case MESA_SHADER_FRAGMENT: 297bf215546Sopenharmony_ci return !!(ir3_shader_debug & IR3_DBG_SHADER_FS); 298bf215546Sopenharmony_ci case MESA_SHADER_COMPUTE: 299bf215546Sopenharmony_ci case MESA_SHADER_KERNEL: 300bf215546Sopenharmony_ci return !!(ir3_shader_debug & IR3_DBG_SHADER_CS); 301bf215546Sopenharmony_ci default: 302bf215546Sopenharmony_ci assert(0); 303bf215546Sopenharmony_ci return false; 304bf215546Sopenharmony_ci } 305bf215546Sopenharmony_ci} 306bf215546Sopenharmony_ci 307bf215546Sopenharmony_cistatic inline void 308bf215546Sopenharmony_ciir3_debug_print(struct ir3 *ir, const char *when) 309bf215546Sopenharmony_ci{ 310bf215546Sopenharmony_ci if (ir3_shader_debug & IR3_DBG_OPTMSGS) { 311bf215546Sopenharmony_ci mesa_logi("%s:", when); 312bf215546Sopenharmony_ci ir3_print(ir); 313bf215546Sopenharmony_ci } 314bf215546Sopenharmony_ci} 315bf215546Sopenharmony_ci 316bf215546Sopenharmony_ci#endif /* IR3_COMPILER_H_ */ 317