1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org> 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21bf215546Sopenharmony_ci * SOFTWARE. 22bf215546Sopenharmony_ci * 23bf215546Sopenharmony_ci * Authors: 24bf215546Sopenharmony_ci * Rob Clark <robclark@freedesktop.org> 25bf215546Sopenharmony_ci */ 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci#include "pipe/p_screen.h" 28bf215546Sopenharmony_ci#include "pipe/p_state.h" 29bf215546Sopenharmony_ci#include "tgsi/tgsi_dump.h" 30bf215546Sopenharmony_ci#include "tgsi/tgsi_parse.h" 31bf215546Sopenharmony_ci#include "util/format/u_format.h" 32bf215546Sopenharmony_ci#include "util/u_inlines.h" 33bf215546Sopenharmony_ci#include "util/u_memory.h" 34bf215546Sopenharmony_ci#include "util/u_string.h" 35bf215546Sopenharmony_ci 36bf215546Sopenharmony_ci#include "nir/tgsi_to_nir.h" 37bf215546Sopenharmony_ci#include "nir_serialize.h" 38bf215546Sopenharmony_ci 39bf215546Sopenharmony_ci#include "freedreno_context.h" 40bf215546Sopenharmony_ci#include "freedreno_util.h" 41bf215546Sopenharmony_ci 42bf215546Sopenharmony_ci#include "ir3/ir3_cache.h" 43bf215546Sopenharmony_ci#include "ir3/ir3_compiler.h" 44bf215546Sopenharmony_ci#include "ir3/ir3_gallium.h" 45bf215546Sopenharmony_ci#include "ir3/ir3_nir.h" 46bf215546Sopenharmony_ci#include "ir3/ir3_shader.h" 47bf215546Sopenharmony_ci 48bf215546Sopenharmony_ci/** 49bf215546Sopenharmony_ci * The hardware cso for shader state 50bf215546Sopenharmony_ci * 51bf215546Sopenharmony_ci * Initially just a container for the ir3_shader, but this is where we'll 52bf215546Sopenharmony_ci * plumb in async compile. 53bf215546Sopenharmony_ci */ 54bf215546Sopenharmony_cistruct ir3_shader_state { 55bf215546Sopenharmony_ci struct ir3_shader *shader; 56bf215546Sopenharmony_ci 57bf215546Sopenharmony_ci /* Fence signalled when async compile is completed: */ 58bf215546Sopenharmony_ci struct util_queue_fence ready; 59bf215546Sopenharmony_ci}; 60bf215546Sopenharmony_ci 61bf215546Sopenharmony_ci/** 62bf215546Sopenharmony_ci * Should initial variants be compiled synchronously? 63bf215546Sopenharmony_ci * 64bf215546Sopenharmony_ci * The only case where util_debug_message() is used in the initial-variants 65bf215546Sopenharmony_ci * path is with FD_MESA_DEBUG=shaderdb. So if either debug is disabled (ie. 66bf215546Sopenharmony_ci * debug.debug_message==NULL), or shaderdb stats are not enabled, we can 67bf215546Sopenharmony_ci * compile the initial shader variant asynchronously. 68bf215546Sopenharmony_ci */ 69bf215546Sopenharmony_cistatic bool 70bf215546Sopenharmony_ciinitial_variants_synchronous(struct fd_context *ctx) 71bf215546Sopenharmony_ci{ 72bf215546Sopenharmony_ci return unlikely(ctx->debug.debug_message) || FD_DBG(SHADERDB) || 73bf215546Sopenharmony_ci FD_DBG(SERIALC); 74bf215546Sopenharmony_ci} 75bf215546Sopenharmony_ci 76bf215546Sopenharmony_cistatic void 77bf215546Sopenharmony_cidump_shader_info(struct ir3_shader_variant *v, 78bf215546Sopenharmony_ci struct util_debug_callback *debug) 79bf215546Sopenharmony_ci{ 80bf215546Sopenharmony_ci if (!FD_DBG(SHADERDB)) 81bf215546Sopenharmony_ci return; 82bf215546Sopenharmony_ci 83bf215546Sopenharmony_ci util_debug_message( 84bf215546Sopenharmony_ci debug, SHADER_INFO, 85bf215546Sopenharmony_ci "%s shader: %u inst, %u nops, %u non-nops, %u mov, %u cov, " 86bf215546Sopenharmony_ci "%u dwords, %u last-baryf, %u half, %u full, %u constlen, " 87bf215546Sopenharmony_ci "%u cat0, %u cat1, %u cat2, %u cat3, %u cat4, %u cat5, %u cat6, %u cat7, " 88bf215546Sopenharmony_ci "%u stp, %u ldp, %u sstall, %u (ss), %u systall, %u (sy), %d waves, " 89bf215546Sopenharmony_ci "%d loops\n", 90bf215546Sopenharmony_ci ir3_shader_stage(v), v->info.instrs_count, v->info.nops_count, 91bf215546Sopenharmony_ci v->info.instrs_count - v->info.nops_count, v->info.mov_count, 92bf215546Sopenharmony_ci v->info.cov_count, v->info.sizedwords, v->info.last_baryf, 93bf215546Sopenharmony_ci v->info.max_half_reg + 1, v->info.max_reg + 1, v->constlen, 94bf215546Sopenharmony_ci v->info.instrs_per_cat[0], v->info.instrs_per_cat[1], 95bf215546Sopenharmony_ci v->info.instrs_per_cat[2], v->info.instrs_per_cat[3], 96bf215546Sopenharmony_ci v->info.instrs_per_cat[4], v->info.instrs_per_cat[5], 97bf215546Sopenharmony_ci v->info.instrs_per_cat[6], v->info.instrs_per_cat[7], 98bf215546Sopenharmony_ci v->info.stp_count, v->info.ldp_count, v->info.sstall, 99bf215546Sopenharmony_ci v->info.ss, v->info.systall, v->info.sy, v->info.max_waves, v->loops); 100bf215546Sopenharmony_ci} 101bf215546Sopenharmony_ci 102bf215546Sopenharmony_cistatic void 103bf215546Sopenharmony_ciupload_shader_variant(struct ir3_shader_variant *v) 104bf215546Sopenharmony_ci{ 105bf215546Sopenharmony_ci struct ir3_compiler *compiler = v->compiler; 106bf215546Sopenharmony_ci 107bf215546Sopenharmony_ci assert(!v->bo); 108bf215546Sopenharmony_ci 109bf215546Sopenharmony_ci v->bo = 110bf215546Sopenharmony_ci fd_bo_new(compiler->dev, v->info.size, FD_BO_NOMAP, 111bf215546Sopenharmony_ci "%s:%s", ir3_shader_stage(v), v->name); 112bf215546Sopenharmony_ci 113bf215546Sopenharmony_ci /* Always include shaders in kernel crash dumps. */ 114bf215546Sopenharmony_ci fd_bo_mark_for_dump(v->bo); 115bf215546Sopenharmony_ci 116bf215546Sopenharmony_ci fd_bo_upload(v->bo, v->bin, v->info.size); 117bf215546Sopenharmony_ci} 118bf215546Sopenharmony_ci 119bf215546Sopenharmony_cistruct ir3_shader_variant * 120bf215546Sopenharmony_ciir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key, 121bf215546Sopenharmony_ci bool binning_pass, struct util_debug_callback *debug) 122bf215546Sopenharmony_ci{ 123bf215546Sopenharmony_ci struct ir3_shader_variant *v; 124bf215546Sopenharmony_ci bool created = false; 125bf215546Sopenharmony_ci 126bf215546Sopenharmony_ci /* Some shader key values may not be used by a given ir3_shader (for 127bf215546Sopenharmony_ci * example, fragment shader saturates in the vertex shader), so clean out 128bf215546Sopenharmony_ci * those flags to avoid recompiling. 129bf215546Sopenharmony_ci */ 130bf215546Sopenharmony_ci ir3_key_clear_unused(&key, shader); 131bf215546Sopenharmony_ci 132bf215546Sopenharmony_ci v = ir3_shader_get_variant(shader, &key, binning_pass, false, &created); 133bf215546Sopenharmony_ci 134bf215546Sopenharmony_ci if (created) { 135bf215546Sopenharmony_ci if (shader->initial_variants_done) { 136bf215546Sopenharmony_ci perf_debug_message(debug, SHADER_INFO, 137bf215546Sopenharmony_ci "%s shader: recompiling at draw time: global " 138bf215546Sopenharmony_ci "0x%08x, vfsamples %x/%x, astc %x/%x\n", 139bf215546Sopenharmony_ci ir3_shader_stage(v), key.global, key.vsamples, 140bf215546Sopenharmony_ci key.fsamples, key.vastc_srgb, key.fastc_srgb); 141bf215546Sopenharmony_ci } 142bf215546Sopenharmony_ci 143bf215546Sopenharmony_ci dump_shader_info(v, debug); 144bf215546Sopenharmony_ci upload_shader_variant(v); 145bf215546Sopenharmony_ci 146bf215546Sopenharmony_ci if (v->binning) { 147bf215546Sopenharmony_ci upload_shader_variant(v->binning); 148bf215546Sopenharmony_ci dump_shader_info(v->binning, debug); 149bf215546Sopenharmony_ci } 150bf215546Sopenharmony_ci } 151bf215546Sopenharmony_ci 152bf215546Sopenharmony_ci return v; 153bf215546Sopenharmony_ci} 154bf215546Sopenharmony_ci 155bf215546Sopenharmony_cistatic void 156bf215546Sopenharmony_cicopy_stream_out(struct ir3_stream_output_info *i, 157bf215546Sopenharmony_ci const struct pipe_stream_output_info *p) 158bf215546Sopenharmony_ci{ 159bf215546Sopenharmony_ci STATIC_ASSERT(ARRAY_SIZE(i->stride) == ARRAY_SIZE(p->stride)); 160bf215546Sopenharmony_ci STATIC_ASSERT(ARRAY_SIZE(i->output) == ARRAY_SIZE(p->output)); 161bf215546Sopenharmony_ci 162bf215546Sopenharmony_ci i->num_outputs = p->num_outputs; 163bf215546Sopenharmony_ci for (int n = 0; n < ARRAY_SIZE(i->stride); n++) 164bf215546Sopenharmony_ci i->stride[n] = p->stride[n]; 165bf215546Sopenharmony_ci 166bf215546Sopenharmony_ci for (int n = 0; n < ARRAY_SIZE(i->output); n++) { 167bf215546Sopenharmony_ci i->output[n].register_index = p->output[n].register_index; 168bf215546Sopenharmony_ci i->output[n].start_component = p->output[n].start_component; 169bf215546Sopenharmony_ci i->output[n].num_components = p->output[n].num_components; 170bf215546Sopenharmony_ci i->output[n].output_buffer = p->output[n].output_buffer; 171bf215546Sopenharmony_ci i->output[n].dst_offset = p->output[n].dst_offset; 172bf215546Sopenharmony_ci i->output[n].stream = p->output[n].stream; 173bf215546Sopenharmony_ci } 174bf215546Sopenharmony_ci} 175bf215546Sopenharmony_ci 176bf215546Sopenharmony_cistatic void 177bf215546Sopenharmony_cicreate_initial_variants(struct ir3_shader_state *hwcso, 178bf215546Sopenharmony_ci struct util_debug_callback *debug) 179bf215546Sopenharmony_ci{ 180bf215546Sopenharmony_ci struct ir3_shader *shader = hwcso->shader; 181bf215546Sopenharmony_ci struct ir3_compiler *compiler = shader->compiler; 182bf215546Sopenharmony_ci nir_shader *nir = shader->nir; 183bf215546Sopenharmony_ci 184bf215546Sopenharmony_ci /* Compile standard variants immediately to try to avoid draw-time stalls 185bf215546Sopenharmony_ci * to run the compiler. 186bf215546Sopenharmony_ci */ 187bf215546Sopenharmony_ci struct ir3_shader_key key = { 188bf215546Sopenharmony_ci .tessellation = IR3_TESS_NONE, 189bf215546Sopenharmony_ci .ucp_enables = MASK(nir->info.clip_distance_array_size), 190bf215546Sopenharmony_ci .msaa = true, 191bf215546Sopenharmony_ci }; 192bf215546Sopenharmony_ci 193bf215546Sopenharmony_ci switch (nir->info.stage) { 194bf215546Sopenharmony_ci case MESA_SHADER_TESS_EVAL: 195bf215546Sopenharmony_ci key.tessellation = ir3_tess_mode(nir->info.tess._primitive_mode); 196bf215546Sopenharmony_ci break; 197bf215546Sopenharmony_ci 198bf215546Sopenharmony_ci case MESA_SHADER_TESS_CTRL: 199bf215546Sopenharmony_ci /* The primitive_mode field, while it exists for TCS, is not 200bf215546Sopenharmony_ci * populated (since separable shaders between TCS/TES are legal, 201bf215546Sopenharmony_ci * so TCS wouldn't have access to TES's declaration). Make a 202bf215546Sopenharmony_ci * guess so that we shader-db something plausible for TCS. 203bf215546Sopenharmony_ci */ 204bf215546Sopenharmony_ci if (nir->info.outputs_written & VARYING_BIT_TESS_LEVEL_INNER) 205bf215546Sopenharmony_ci key.tessellation = IR3_TESS_TRIANGLES; 206bf215546Sopenharmony_ci else 207bf215546Sopenharmony_ci key.tessellation = IR3_TESS_ISOLINES; 208bf215546Sopenharmony_ci break; 209bf215546Sopenharmony_ci 210bf215546Sopenharmony_ci case MESA_SHADER_GEOMETRY: 211bf215546Sopenharmony_ci key.has_gs = true; 212bf215546Sopenharmony_ci break; 213bf215546Sopenharmony_ci 214bf215546Sopenharmony_ci default: 215bf215546Sopenharmony_ci break; 216bf215546Sopenharmony_ci } 217bf215546Sopenharmony_ci 218bf215546Sopenharmony_ci key.safe_constlen = false; 219bf215546Sopenharmony_ci struct ir3_shader_variant *v = ir3_shader_variant(shader, key, false, debug); 220bf215546Sopenharmony_ci if (!v) 221bf215546Sopenharmony_ci return; 222bf215546Sopenharmony_ci 223bf215546Sopenharmony_ci if (v->constlen > compiler->max_const_safe) { 224bf215546Sopenharmony_ci key.safe_constlen = true; 225bf215546Sopenharmony_ci ir3_shader_variant(shader, key, false, debug); 226bf215546Sopenharmony_ci } 227bf215546Sopenharmony_ci 228bf215546Sopenharmony_ci /* For vertex shaders, also compile initial binning pass shader: */ 229bf215546Sopenharmony_ci if (nir->info.stage == MESA_SHADER_VERTEX) { 230bf215546Sopenharmony_ci key.safe_constlen = false; 231bf215546Sopenharmony_ci v = ir3_shader_variant(shader, key, true, debug); 232bf215546Sopenharmony_ci if (!v) 233bf215546Sopenharmony_ci return; 234bf215546Sopenharmony_ci 235bf215546Sopenharmony_ci if (v->constlen > compiler->max_const_safe) { 236bf215546Sopenharmony_ci key.safe_constlen = true; 237bf215546Sopenharmony_ci ir3_shader_variant(shader, key, true, debug); 238bf215546Sopenharmony_ci } 239bf215546Sopenharmony_ci } 240bf215546Sopenharmony_ci 241bf215546Sopenharmony_ci shader->initial_variants_done = true; 242bf215546Sopenharmony_ci} 243bf215546Sopenharmony_ci 244bf215546Sopenharmony_cistatic void 245bf215546Sopenharmony_cicreate_initial_variants_async(void *job, void *gdata, int thread_index) 246bf215546Sopenharmony_ci{ 247bf215546Sopenharmony_ci struct ir3_shader_state *hwcso = job; 248bf215546Sopenharmony_ci struct util_debug_callback debug = {}; 249bf215546Sopenharmony_ci 250bf215546Sopenharmony_ci create_initial_variants(hwcso, &debug); 251bf215546Sopenharmony_ci} 252bf215546Sopenharmony_ci 253bf215546Sopenharmony_cistatic void 254bf215546Sopenharmony_cicreate_initial_compute_variants_async(void *job, void *gdata, int thread_index) 255bf215546Sopenharmony_ci{ 256bf215546Sopenharmony_ci struct ir3_shader_state *hwcso = job; 257bf215546Sopenharmony_ci struct ir3_shader *shader = hwcso->shader; 258bf215546Sopenharmony_ci struct util_debug_callback debug = {}; 259bf215546Sopenharmony_ci static struct ir3_shader_key key; /* static is implicitly zeroed */ 260bf215546Sopenharmony_ci 261bf215546Sopenharmony_ci ir3_shader_variant(shader, key, false, &debug); 262bf215546Sopenharmony_ci shader->initial_variants_done = true; 263bf215546Sopenharmony_ci} 264bf215546Sopenharmony_ci 265bf215546Sopenharmony_ci/* a bit annoying that compute-shader and normal shader state objects 266bf215546Sopenharmony_ci * aren't a bit more aligned. 267bf215546Sopenharmony_ci */ 268bf215546Sopenharmony_civoid * 269bf215546Sopenharmony_ciir3_shader_compute_state_create(struct pipe_context *pctx, 270bf215546Sopenharmony_ci const struct pipe_compute_state *cso) 271bf215546Sopenharmony_ci{ 272bf215546Sopenharmony_ci struct fd_context *ctx = fd_context(pctx); 273bf215546Sopenharmony_ci 274bf215546Sopenharmony_ci /* req_input_mem will only be non-zero for cl kernels (ie. clover). 275bf215546Sopenharmony_ci * This isn't a perfect test because I guess it is possible (but 276bf215546Sopenharmony_ci * uncommon) for none for the kernel parameters to be a global, 277bf215546Sopenharmony_ci * but ctx->set_global_bindings() can't fail, so this is the next 278bf215546Sopenharmony_ci * best place to fail if we need a newer version of kernel driver: 279bf215546Sopenharmony_ci */ 280bf215546Sopenharmony_ci if ((cso->req_input_mem > 0) && 281bf215546Sopenharmony_ci fd_device_version(ctx->dev) < FD_VERSION_BO_IOVA) { 282bf215546Sopenharmony_ci return NULL; 283bf215546Sopenharmony_ci } 284bf215546Sopenharmony_ci 285bf215546Sopenharmony_ci struct ir3_compiler *compiler = ctx->screen->compiler; 286bf215546Sopenharmony_ci nir_shader *nir; 287bf215546Sopenharmony_ci 288bf215546Sopenharmony_ci if (cso->ir_type == PIPE_SHADER_IR_NIR) { 289bf215546Sopenharmony_ci /* we take ownership of the reference: */ 290bf215546Sopenharmony_ci nir = (nir_shader *)cso->prog; 291bf215546Sopenharmony_ci } else if (cso->ir_type == PIPE_SHADER_IR_NIR_SERIALIZED) { 292bf215546Sopenharmony_ci const nir_shader_compiler_options *options = 293bf215546Sopenharmony_ci ir3_get_compiler_options(compiler); 294bf215546Sopenharmony_ci const struct pipe_binary_program_header *hdr = cso->prog; 295bf215546Sopenharmony_ci struct blob_reader reader; 296bf215546Sopenharmony_ci 297bf215546Sopenharmony_ci blob_reader_init(&reader, hdr->blob, hdr->num_bytes); 298bf215546Sopenharmony_ci nir = nir_deserialize(NULL, options, &reader); 299bf215546Sopenharmony_ci 300bf215546Sopenharmony_ci ir3_finalize_nir(compiler, nir); 301bf215546Sopenharmony_ci } else { 302bf215546Sopenharmony_ci assert(cso->ir_type == PIPE_SHADER_IR_TGSI); 303bf215546Sopenharmony_ci if (ir3_shader_debug & IR3_DBG_DISASM) { 304bf215546Sopenharmony_ci tgsi_dump(cso->prog, 0); 305bf215546Sopenharmony_ci } 306bf215546Sopenharmony_ci nir = tgsi_to_nir(cso->prog, pctx->screen, false); 307bf215546Sopenharmony_ci } 308bf215546Sopenharmony_ci 309bf215546Sopenharmony_ci struct ir3_shader *shader = 310bf215546Sopenharmony_ci ir3_shader_from_nir(compiler, nir, &(struct ir3_shader_options){ 311bf215546Sopenharmony_ci /* TODO: force to single on a6xx with legacy 312bf215546Sopenharmony_ci * ballot extension that uses 64-bit masks 313bf215546Sopenharmony_ci */ 314bf215546Sopenharmony_ci .api_wavesize = IR3_SINGLE_OR_DOUBLE, 315bf215546Sopenharmony_ci .real_wavesize = IR3_SINGLE_OR_DOUBLE, 316bf215546Sopenharmony_ci }, NULL); 317bf215546Sopenharmony_ci shader->cs.req_input_mem = align(cso->req_input_mem, 4) / 4; /* byte->dword */ 318bf215546Sopenharmony_ci shader->cs.req_local_mem = cso->req_local_mem; 319bf215546Sopenharmony_ci 320bf215546Sopenharmony_ci struct ir3_shader_state *hwcso = calloc(1, sizeof(*hwcso)); 321bf215546Sopenharmony_ci 322bf215546Sopenharmony_ci util_queue_fence_init(&hwcso->ready); 323bf215546Sopenharmony_ci hwcso->shader = shader; 324bf215546Sopenharmony_ci 325bf215546Sopenharmony_ci /* Immediately compile a standard variant. We have so few variants in our 326bf215546Sopenharmony_ci * shaders, that doing so almost eliminates draw-time recompiles. (This 327bf215546Sopenharmony_ci * is also how we get data from shader-db's ./run) 328bf215546Sopenharmony_ci */ 329bf215546Sopenharmony_ci 330bf215546Sopenharmony_ci if (initial_variants_synchronous(ctx)) { 331bf215546Sopenharmony_ci static struct ir3_shader_key key; /* static is implicitly zeroed */ 332bf215546Sopenharmony_ci ir3_shader_variant(shader, key, false, &ctx->debug); 333bf215546Sopenharmony_ci shader->initial_variants_done = true; 334bf215546Sopenharmony_ci } else { 335bf215546Sopenharmony_ci struct fd_screen *screen = ctx->screen; 336bf215546Sopenharmony_ci util_queue_add_job(&screen->compile_queue, hwcso, &hwcso->ready, 337bf215546Sopenharmony_ci create_initial_compute_variants_async, NULL, 0); 338bf215546Sopenharmony_ci } 339bf215546Sopenharmony_ci 340bf215546Sopenharmony_ci return hwcso; 341bf215546Sopenharmony_ci} 342bf215546Sopenharmony_ci 343bf215546Sopenharmony_civoid * 344bf215546Sopenharmony_ciir3_shader_state_create(struct pipe_context *pctx, 345bf215546Sopenharmony_ci const struct pipe_shader_state *cso) 346bf215546Sopenharmony_ci{ 347bf215546Sopenharmony_ci struct fd_context *ctx = fd_context(pctx); 348bf215546Sopenharmony_ci struct ir3_compiler *compiler = ctx->screen->compiler; 349bf215546Sopenharmony_ci struct ir3_shader_state *hwcso = calloc(1, sizeof(*hwcso)); 350bf215546Sopenharmony_ci 351bf215546Sopenharmony_ci /* 352bf215546Sopenharmony_ci * Convert to nir (if necessary): 353bf215546Sopenharmony_ci */ 354bf215546Sopenharmony_ci 355bf215546Sopenharmony_ci nir_shader *nir; 356bf215546Sopenharmony_ci if (cso->type == PIPE_SHADER_IR_NIR) { 357bf215546Sopenharmony_ci /* we take ownership of the reference: */ 358bf215546Sopenharmony_ci nir = cso->ir.nir; 359bf215546Sopenharmony_ci } else { 360bf215546Sopenharmony_ci assert(cso->type == PIPE_SHADER_IR_TGSI); 361bf215546Sopenharmony_ci if (ir3_shader_debug & IR3_DBG_DISASM) { 362bf215546Sopenharmony_ci tgsi_dump(cso->tokens, 0); 363bf215546Sopenharmony_ci } 364bf215546Sopenharmony_ci nir = tgsi_to_nir(cso->tokens, pctx->screen, false); 365bf215546Sopenharmony_ci } 366bf215546Sopenharmony_ci 367bf215546Sopenharmony_ci /* 368bf215546Sopenharmony_ci * Create ir3_shader: 369bf215546Sopenharmony_ci * 370bf215546Sopenharmony_ci * This part is cheap, it doesn't compile initial variants 371bf215546Sopenharmony_ci */ 372bf215546Sopenharmony_ci 373bf215546Sopenharmony_ci struct ir3_stream_output_info stream_output = {}; 374bf215546Sopenharmony_ci copy_stream_out(&stream_output, &cso->stream_output); 375bf215546Sopenharmony_ci 376bf215546Sopenharmony_ci hwcso->shader = 377bf215546Sopenharmony_ci ir3_shader_from_nir(compiler, nir, &(struct ir3_shader_options){ 378bf215546Sopenharmony_ci /* TODO: force to single on a6xx with legacy 379bf215546Sopenharmony_ci * ballot extension that uses 64-bit masks 380bf215546Sopenharmony_ci */ 381bf215546Sopenharmony_ci .api_wavesize = IR3_SINGLE_OR_DOUBLE, 382bf215546Sopenharmony_ci .real_wavesize = IR3_SINGLE_OR_DOUBLE, 383bf215546Sopenharmony_ci }, 384bf215546Sopenharmony_ci &stream_output); 385bf215546Sopenharmony_ci 386bf215546Sopenharmony_ci /* 387bf215546Sopenharmony_ci * Create initial variants to avoid draw-time stalls. This is 388bf215546Sopenharmony_ci * normally done asynchronously, unless debug is enabled (which 389bf215546Sopenharmony_ci * will be the case for shader-db) 390bf215546Sopenharmony_ci */ 391bf215546Sopenharmony_ci 392bf215546Sopenharmony_ci util_queue_fence_init(&hwcso->ready); 393bf215546Sopenharmony_ci 394bf215546Sopenharmony_ci if (initial_variants_synchronous(ctx)) { 395bf215546Sopenharmony_ci create_initial_variants(hwcso, &ctx->debug); 396bf215546Sopenharmony_ci } else { 397bf215546Sopenharmony_ci util_queue_add_job(&ctx->screen->compile_queue, hwcso, &hwcso->ready, 398bf215546Sopenharmony_ci create_initial_variants_async, NULL, 0); 399bf215546Sopenharmony_ci } 400bf215546Sopenharmony_ci 401bf215546Sopenharmony_ci return hwcso; 402bf215546Sopenharmony_ci} 403bf215546Sopenharmony_ci 404bf215546Sopenharmony_civoid 405bf215546Sopenharmony_ciir3_shader_state_delete(struct pipe_context *pctx, void *_hwcso) 406bf215546Sopenharmony_ci{ 407bf215546Sopenharmony_ci struct fd_context *ctx = fd_context(pctx); 408bf215546Sopenharmony_ci struct fd_screen *screen = ctx->screen; 409bf215546Sopenharmony_ci struct ir3_shader_state *hwcso = _hwcso; 410bf215546Sopenharmony_ci struct ir3_shader *so = hwcso->shader; 411bf215546Sopenharmony_ci 412bf215546Sopenharmony_ci ir3_cache_invalidate(ctx->shader_cache, hwcso); 413bf215546Sopenharmony_ci 414bf215546Sopenharmony_ci /* util_queue_drop_job() guarantees that either: 415bf215546Sopenharmony_ci * 1) job did not execute 416bf215546Sopenharmony_ci * 2) job completed 417bf215546Sopenharmony_ci * 418bf215546Sopenharmony_ci * In either case the fence is signaled 419bf215546Sopenharmony_ci */ 420bf215546Sopenharmony_ci util_queue_drop_job(&screen->compile_queue, &hwcso->ready); 421bf215546Sopenharmony_ci 422bf215546Sopenharmony_ci /* free the uploaded shaders, since this is handled outside of the 423bf215546Sopenharmony_ci * shared ir3 code (ie. not used by turnip): 424bf215546Sopenharmony_ci */ 425bf215546Sopenharmony_ci for (struct ir3_shader_variant *v = so->variants; v; v = v->next) { 426bf215546Sopenharmony_ci fd_bo_del(v->bo); 427bf215546Sopenharmony_ci v->bo = NULL; 428bf215546Sopenharmony_ci 429bf215546Sopenharmony_ci if (v->binning && v->binning->bo) { 430bf215546Sopenharmony_ci fd_bo_del(v->binning->bo); 431bf215546Sopenharmony_ci v->binning->bo = NULL; 432bf215546Sopenharmony_ci } 433bf215546Sopenharmony_ci } 434bf215546Sopenharmony_ci 435bf215546Sopenharmony_ci ir3_shader_destroy(so); 436bf215546Sopenharmony_ci util_queue_fence_destroy(&hwcso->ready); 437bf215546Sopenharmony_ci free(hwcso); 438bf215546Sopenharmony_ci} 439bf215546Sopenharmony_ci 440bf215546Sopenharmony_cistruct ir3_shader * 441bf215546Sopenharmony_ciir3_get_shader(struct ir3_shader_state *hwcso) 442bf215546Sopenharmony_ci{ 443bf215546Sopenharmony_ci if (!hwcso) 444bf215546Sopenharmony_ci return NULL; 445bf215546Sopenharmony_ci 446bf215546Sopenharmony_ci struct ir3_shader *shader = hwcso->shader; 447bf215546Sopenharmony_ci perf_time (1000, "waited for %s:%s:%s variants", 448bf215546Sopenharmony_ci _mesa_shader_stage_to_abbrev(shader->type), 449bf215546Sopenharmony_ci shader->nir->info.name, 450bf215546Sopenharmony_ci shader->nir->info.label) { 451bf215546Sopenharmony_ci /* wait for initial variants to compile: */ 452bf215546Sopenharmony_ci util_queue_fence_wait(&hwcso->ready); 453bf215546Sopenharmony_ci } 454bf215546Sopenharmony_ci 455bf215546Sopenharmony_ci return shader; 456bf215546Sopenharmony_ci} 457bf215546Sopenharmony_ci 458bf215546Sopenharmony_cistruct shader_info * 459bf215546Sopenharmony_ciir3_get_shader_info(struct ir3_shader_state *hwcso) 460bf215546Sopenharmony_ci{ 461bf215546Sopenharmony_ci if (!hwcso) 462bf215546Sopenharmony_ci return NULL; 463bf215546Sopenharmony_ci return &hwcso->shader->nir->info; 464bf215546Sopenharmony_ci} 465bf215546Sopenharmony_ci 466bf215546Sopenharmony_ci/* fixup dirty shader state in case some "unrelated" (from the state- 467bf215546Sopenharmony_ci * tracker's perspective) state change causes us to switch to a 468bf215546Sopenharmony_ci * different variant. 469bf215546Sopenharmony_ci */ 470bf215546Sopenharmony_civoid 471bf215546Sopenharmony_ciir3_fixup_shader_state(struct pipe_context *pctx, struct ir3_shader_key *key) 472bf215546Sopenharmony_ci{ 473bf215546Sopenharmony_ci struct fd_context *ctx = fd_context(pctx); 474bf215546Sopenharmony_ci 475bf215546Sopenharmony_ci if (!ir3_shader_key_equal(ctx->last.key, key)) { 476bf215546Sopenharmony_ci if (ir3_shader_key_changes_fs(ctx->last.key, key)) { 477bf215546Sopenharmony_ci fd_context_dirty_shader(ctx, PIPE_SHADER_FRAGMENT, 478bf215546Sopenharmony_ci FD_DIRTY_SHADER_PROG); 479bf215546Sopenharmony_ci } 480bf215546Sopenharmony_ci 481bf215546Sopenharmony_ci if (ir3_shader_key_changes_vs(ctx->last.key, key)) { 482bf215546Sopenharmony_ci fd_context_dirty_shader(ctx, PIPE_SHADER_VERTEX, FD_DIRTY_SHADER_PROG); 483bf215546Sopenharmony_ci } 484bf215546Sopenharmony_ci 485bf215546Sopenharmony_ci /* NOTE: currently only a6xx has gs/tess, but needs no 486bf215546Sopenharmony_ci * gs/tess specific lowering. 487bf215546Sopenharmony_ci */ 488bf215546Sopenharmony_ci 489bf215546Sopenharmony_ci *ctx->last.key = *key; 490bf215546Sopenharmony_ci } 491bf215546Sopenharmony_ci} 492bf215546Sopenharmony_ci 493bf215546Sopenharmony_cistatic char * 494bf215546Sopenharmony_ciir3_screen_finalize_nir(struct pipe_screen *pscreen, void *nir) 495bf215546Sopenharmony_ci{ 496bf215546Sopenharmony_ci struct fd_screen *screen = fd_screen(pscreen); 497bf215546Sopenharmony_ci 498bf215546Sopenharmony_ci ir3_nir_lower_io_to_temporaries(nir); 499bf215546Sopenharmony_ci ir3_finalize_nir(screen->compiler, nir); 500bf215546Sopenharmony_ci 501bf215546Sopenharmony_ci return NULL; 502bf215546Sopenharmony_ci} 503bf215546Sopenharmony_ci 504bf215546Sopenharmony_cistatic void 505bf215546Sopenharmony_ciir3_set_max_shader_compiler_threads(struct pipe_screen *pscreen, 506bf215546Sopenharmony_ci unsigned max_threads) 507bf215546Sopenharmony_ci{ 508bf215546Sopenharmony_ci struct fd_screen *screen = fd_screen(pscreen); 509bf215546Sopenharmony_ci 510bf215546Sopenharmony_ci /* This function doesn't allow a greater number of threads than 511bf215546Sopenharmony_ci * the queue had at its creation. 512bf215546Sopenharmony_ci */ 513bf215546Sopenharmony_ci util_queue_adjust_num_threads(&screen->compile_queue, max_threads); 514bf215546Sopenharmony_ci} 515bf215546Sopenharmony_ci 516bf215546Sopenharmony_cistatic bool 517bf215546Sopenharmony_ciir3_is_parallel_shader_compilation_finished(struct pipe_screen *pscreen, 518bf215546Sopenharmony_ci void *shader, 519bf215546Sopenharmony_ci enum pipe_shader_type shader_type) 520bf215546Sopenharmony_ci{ 521bf215546Sopenharmony_ci struct ir3_shader_state *hwcso = (struct ir3_shader_state *)shader; 522bf215546Sopenharmony_ci 523bf215546Sopenharmony_ci return util_queue_fence_is_signalled(&hwcso->ready); 524bf215546Sopenharmony_ci} 525bf215546Sopenharmony_ci 526bf215546Sopenharmony_civoid 527bf215546Sopenharmony_ciir3_prog_init(struct pipe_context *pctx) 528bf215546Sopenharmony_ci{ 529bf215546Sopenharmony_ci pctx->create_vs_state = ir3_shader_state_create; 530bf215546Sopenharmony_ci pctx->delete_vs_state = ir3_shader_state_delete; 531bf215546Sopenharmony_ci 532bf215546Sopenharmony_ci pctx->create_tcs_state = ir3_shader_state_create; 533bf215546Sopenharmony_ci pctx->delete_tcs_state = ir3_shader_state_delete; 534bf215546Sopenharmony_ci 535bf215546Sopenharmony_ci pctx->create_tes_state = ir3_shader_state_create; 536bf215546Sopenharmony_ci pctx->delete_tes_state = ir3_shader_state_delete; 537bf215546Sopenharmony_ci 538bf215546Sopenharmony_ci pctx->create_gs_state = ir3_shader_state_create; 539bf215546Sopenharmony_ci pctx->delete_gs_state = ir3_shader_state_delete; 540bf215546Sopenharmony_ci 541bf215546Sopenharmony_ci pctx->create_fs_state = ir3_shader_state_create; 542bf215546Sopenharmony_ci pctx->delete_fs_state = ir3_shader_state_delete; 543bf215546Sopenharmony_ci} 544bf215546Sopenharmony_ci 545bf215546Sopenharmony_civoid 546bf215546Sopenharmony_ciir3_screen_init(struct pipe_screen *pscreen) 547bf215546Sopenharmony_ci{ 548bf215546Sopenharmony_ci struct fd_screen *screen = fd_screen(pscreen); 549bf215546Sopenharmony_ci 550bf215546Sopenharmony_ci screen->compiler = ir3_compiler_create(screen->dev, screen->dev_id, 551bf215546Sopenharmony_ci &(struct ir3_compiler_options) {}); 552bf215546Sopenharmony_ci 553bf215546Sopenharmony_ci /* TODO do we want to limit things to # of fast cores, or just limit 554bf215546Sopenharmony_ci * based on total # of both big and little cores. The little cores 555bf215546Sopenharmony_ci * tend to be in-order and probably much slower for compiling than 556bf215546Sopenharmony_ci * big cores. OTOH if they are sitting idle, maybe it is useful to 557bf215546Sopenharmony_ci * use them? 558bf215546Sopenharmony_ci */ 559bf215546Sopenharmony_ci unsigned num_threads = sysconf(_SC_NPROCESSORS_ONLN) - 1; 560bf215546Sopenharmony_ci 561bf215546Sopenharmony_ci /* Create at least one thread - even on single core CPU systems. */ 562bf215546Sopenharmony_ci num_threads = MAX2(1, num_threads); 563bf215546Sopenharmony_ci 564bf215546Sopenharmony_ci util_queue_init(&screen->compile_queue, "ir3q", 64, num_threads, 565bf215546Sopenharmony_ci UTIL_QUEUE_INIT_RESIZE_IF_FULL | 566bf215546Sopenharmony_ci UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY, NULL); 567bf215546Sopenharmony_ci 568bf215546Sopenharmony_ci pscreen->finalize_nir = ir3_screen_finalize_nir; 569bf215546Sopenharmony_ci pscreen->set_max_shader_compiler_threads = 570bf215546Sopenharmony_ci ir3_set_max_shader_compiler_threads; 571bf215546Sopenharmony_ci pscreen->is_parallel_shader_compilation_finished = 572bf215546Sopenharmony_ci ir3_is_parallel_shader_compilation_finished; 573bf215546Sopenharmony_ci} 574bf215546Sopenharmony_ci 575bf215546Sopenharmony_civoid 576bf215546Sopenharmony_ciir3_screen_fini(struct pipe_screen *pscreen) 577bf215546Sopenharmony_ci{ 578bf215546Sopenharmony_ci struct fd_screen *screen = fd_screen(pscreen); 579bf215546Sopenharmony_ci 580bf215546Sopenharmony_ci util_queue_destroy(&screen->compile_queue); 581bf215546Sopenharmony_ci ir3_compiler_destroy(screen->compiler); 582bf215546Sopenharmony_ci screen->compiler = NULL; 583bf215546Sopenharmony_ci} 584bf215546Sopenharmony_ci 585bf215546Sopenharmony_civoid 586bf215546Sopenharmony_ciir3_update_max_tf_vtx(struct fd_context *ctx, 587bf215546Sopenharmony_ci const struct ir3_shader_variant *v) 588bf215546Sopenharmony_ci{ 589bf215546Sopenharmony_ci struct fd_streamout_stateobj *so = &ctx->streamout; 590bf215546Sopenharmony_ci const struct ir3_stream_output_info *info = &v->stream_output; 591bf215546Sopenharmony_ci uint32_t maxvtxcnt = 0x7fffffff; 592bf215546Sopenharmony_ci 593bf215546Sopenharmony_ci if (v->stream_output.num_outputs == 0) 594bf215546Sopenharmony_ci maxvtxcnt = 0; 595bf215546Sopenharmony_ci if (so->num_targets == 0) 596bf215546Sopenharmony_ci maxvtxcnt = 0; 597bf215546Sopenharmony_ci 598bf215546Sopenharmony_ci /* offset to write to is: 599bf215546Sopenharmony_ci * 600bf215546Sopenharmony_ci * total_vtxcnt = vtxcnt + offsets[i] 601bf215546Sopenharmony_ci * offset = total_vtxcnt * stride[i] 602bf215546Sopenharmony_ci * 603bf215546Sopenharmony_ci * offset = vtxcnt * stride[i] ; calculated in shader 604bf215546Sopenharmony_ci * + offsets[i] * stride[i] ; calculated at emit_tfbos() 605bf215546Sopenharmony_ci * 606bf215546Sopenharmony_ci * assuming for each vtx, each target buffer will have data written 607bf215546Sopenharmony_ci * up to 'offset + stride[i]', that leaves maxvtxcnt as: 608bf215546Sopenharmony_ci * 609bf215546Sopenharmony_ci * buffer_size = (maxvtxcnt * stride[i]) + stride[i] 610bf215546Sopenharmony_ci * maxvtxcnt = (buffer_size - stride[i]) / stride[i] 611bf215546Sopenharmony_ci * 612bf215546Sopenharmony_ci * but shader is actually doing a less-than (rather than less-than- 613bf215546Sopenharmony_ci * equal) check, so we can drop the -stride[i]. 614bf215546Sopenharmony_ci * 615bf215546Sopenharmony_ci * TODO is assumption about `offset + stride[i]` legit? 616bf215546Sopenharmony_ci */ 617bf215546Sopenharmony_ci for (unsigned i = 0; i < so->num_targets; i++) { 618bf215546Sopenharmony_ci struct pipe_stream_output_target *target = so->targets[i]; 619bf215546Sopenharmony_ci unsigned stride = info->stride[i] * 4; /* convert dwords->bytes */ 620bf215546Sopenharmony_ci if (target) { 621bf215546Sopenharmony_ci uint32_t max = target->buffer_size / stride; 622bf215546Sopenharmony_ci maxvtxcnt = MIN2(maxvtxcnt, max); 623bf215546Sopenharmony_ci } 624bf215546Sopenharmony_ci } 625bf215546Sopenharmony_ci 626bf215546Sopenharmony_ci ctx->streamout.max_tf_vtx = maxvtxcnt; 627bf215546Sopenharmony_ci} 628