1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright (c) 2012-2015 Etnaviv Project 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sub license, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the 12bf215546Sopenharmony_ci * next paragraph) shall be included in all copies or substantial portions 13bf215546Sopenharmony_ci * of the Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21bf215546Sopenharmony_ci * DEALINGS IN THE SOFTWARE. 22bf215546Sopenharmony_ci * 23bf215546Sopenharmony_ci * Authors: 24bf215546Sopenharmony_ci * Wladimir J. van der Laan <laanwj@gmail.com> 25bf215546Sopenharmony_ci */ 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci#include "etnaviv_shader.h" 28bf215546Sopenharmony_ci 29bf215546Sopenharmony_ci#include "etnaviv_compiler.h" 30bf215546Sopenharmony_ci#include "etnaviv_context.h" 31bf215546Sopenharmony_ci#include "etnaviv_debug.h" 32bf215546Sopenharmony_ci#include "etnaviv_disasm.h" 33bf215546Sopenharmony_ci#include "etnaviv_disk_cache.h" 34bf215546Sopenharmony_ci#include "etnaviv_screen.h" 35bf215546Sopenharmony_ci#include "etnaviv_util.h" 36bf215546Sopenharmony_ci 37bf215546Sopenharmony_ci#include "tgsi/tgsi_parse.h" 38bf215546Sopenharmony_ci#include "nir/tgsi_to_nir.h" 39bf215546Sopenharmony_ci#include "util/u_atomic.h" 40bf215546Sopenharmony_ci#include "util/u_cpu_detect.h" 41bf215546Sopenharmony_ci#include "util/u_math.h" 42bf215546Sopenharmony_ci#include "util/u_memory.h" 43bf215546Sopenharmony_ci 44bf215546Sopenharmony_ci/* Upload shader code to bo, if not already done */ 45bf215546Sopenharmony_cistatic bool etna_icache_upload_shader(struct etna_context *ctx, struct etna_shader_variant *v) 46bf215546Sopenharmony_ci{ 47bf215546Sopenharmony_ci if (v->bo) 48bf215546Sopenharmony_ci return true; 49bf215546Sopenharmony_ci v->bo = etna_bo_new(ctx->screen->dev, v->code_size*4, DRM_ETNA_GEM_CACHE_WC); 50bf215546Sopenharmony_ci if (!v->bo) 51bf215546Sopenharmony_ci return false; 52bf215546Sopenharmony_ci 53bf215546Sopenharmony_ci void *buf = etna_bo_map(v->bo); 54bf215546Sopenharmony_ci etna_bo_cpu_prep(v->bo, DRM_ETNA_PREP_WRITE); 55bf215546Sopenharmony_ci memcpy(buf, v->code, v->code_size*4); 56bf215546Sopenharmony_ci etna_bo_cpu_fini(v->bo); 57bf215546Sopenharmony_ci DBG("Uploaded %s of %u words to bo %p", v->stage == MESA_SHADER_FRAGMENT ? "fs":"vs", v->code_size, v->bo); 58bf215546Sopenharmony_ci return true; 59bf215546Sopenharmony_ci} 60bf215546Sopenharmony_ci 61bf215546Sopenharmony_ciextern const char *tgsi_swizzle_names[]; 62bf215546Sopenharmony_civoid 63bf215546Sopenharmony_cietna_dump_shader(const struct etna_shader_variant *shader) 64bf215546Sopenharmony_ci{ 65bf215546Sopenharmony_ci if (shader->stage == MESA_SHADER_VERTEX) 66bf215546Sopenharmony_ci printf("VERT\n"); 67bf215546Sopenharmony_ci else 68bf215546Sopenharmony_ci printf("FRAG\n"); 69bf215546Sopenharmony_ci 70bf215546Sopenharmony_ci etna_disasm(shader->code, shader->code_size, PRINT_RAW); 71bf215546Sopenharmony_ci 72bf215546Sopenharmony_ci printf("num loops: %i\n", shader->num_loops); 73bf215546Sopenharmony_ci printf("num temps: %i\n", shader->num_temps); 74bf215546Sopenharmony_ci printf("immediates:\n"); 75bf215546Sopenharmony_ci for (int idx = 0; idx < shader->uniforms.count; ++idx) { 76bf215546Sopenharmony_ci printf(" [%i].%s = %f (0x%08x) (%d)\n", 77bf215546Sopenharmony_ci idx / 4, 78bf215546Sopenharmony_ci tgsi_swizzle_names[idx % 4], 79bf215546Sopenharmony_ci *((float *)&shader->uniforms.data[idx]), 80bf215546Sopenharmony_ci shader->uniforms.data[idx], 81bf215546Sopenharmony_ci shader->uniforms.contents[idx]); 82bf215546Sopenharmony_ci } 83bf215546Sopenharmony_ci printf("inputs:\n"); 84bf215546Sopenharmony_ci for (int idx = 0; idx < shader->infile.num_reg; ++idx) { 85bf215546Sopenharmony_ci printf(" [%i] name=%s comps=%i\n", shader->infile.reg[idx].reg, 86bf215546Sopenharmony_ci (shader->stage == MESA_SHADER_VERTEX) ? 87bf215546Sopenharmony_ci gl_vert_attrib_name(shader->infile.reg[idx].slot) : 88bf215546Sopenharmony_ci gl_varying_slot_name_for_stage(shader->infile.reg[idx].slot, shader->stage), 89bf215546Sopenharmony_ci shader->infile.reg[idx].num_components); 90bf215546Sopenharmony_ci } 91bf215546Sopenharmony_ci printf("outputs:\n"); 92bf215546Sopenharmony_ci for (int idx = 0; idx < shader->outfile.num_reg; ++idx) { 93bf215546Sopenharmony_ci printf(" [%i] name=%s comps=%i\n", shader->outfile.reg[idx].reg, 94bf215546Sopenharmony_ci (shader->stage == MESA_SHADER_VERTEX) ? 95bf215546Sopenharmony_ci gl_varying_slot_name_for_stage(shader->outfile.reg[idx].slot, shader->stage) : 96bf215546Sopenharmony_ci gl_frag_result_name(shader->outfile.reg[idx].slot), 97bf215546Sopenharmony_ci shader->outfile.reg[idx].num_components); 98bf215546Sopenharmony_ci } 99bf215546Sopenharmony_ci printf("special:\n"); 100bf215546Sopenharmony_ci if (shader->stage == MESA_SHADER_VERTEX) { 101bf215546Sopenharmony_ci printf(" vs_pos_out_reg=%i\n", shader->vs_pos_out_reg); 102bf215546Sopenharmony_ci printf(" vs_pointsize_out_reg=%i\n", shader->vs_pointsize_out_reg); 103bf215546Sopenharmony_ci printf(" vs_load_balancing=0x%08x\n", shader->vs_load_balancing); 104bf215546Sopenharmony_ci } else { 105bf215546Sopenharmony_ci printf(" ps_color_out_reg=%i\n", shader->ps_color_out_reg); 106bf215546Sopenharmony_ci printf(" ps_depth_out_reg=%i\n", shader->ps_depth_out_reg); 107bf215546Sopenharmony_ci } 108bf215546Sopenharmony_ci printf(" input_count_unk8=0x%08x\n", shader->input_count_unk8); 109bf215546Sopenharmony_ci} 110bf215546Sopenharmony_ci 111bf215546Sopenharmony_ci/* Link vs and fs together: fill in shader_state from vs and fs 112bf215546Sopenharmony_ci * as this function is called every time a new fs or vs is bound, the goal is to 113bf215546Sopenharmony_ci * do little processing as possible here, and to precompute as much as possible in 114bf215546Sopenharmony_ci * the vs/fs shader_object. 115bf215546Sopenharmony_ci * 116bf215546Sopenharmony_ci * XXX we could cache the link result for a certain set of VS/PS; usually a pair 117bf215546Sopenharmony_ci * of VS and PS will be used together anyway. 118bf215546Sopenharmony_ci */ 119bf215546Sopenharmony_cistatic bool 120bf215546Sopenharmony_cietna_link_shaders(struct etna_context *ctx, struct compiled_shader_state *cs, 121bf215546Sopenharmony_ci struct etna_shader_variant *vs, struct etna_shader_variant *fs) 122bf215546Sopenharmony_ci{ 123bf215546Sopenharmony_ci struct etna_shader_link_info link = { }; 124bf215546Sopenharmony_ci bool failed; 125bf215546Sopenharmony_ci 126bf215546Sopenharmony_ci assert(vs->stage == MESA_SHADER_VERTEX); 127bf215546Sopenharmony_ci assert(fs->stage == MESA_SHADER_FRAGMENT); 128bf215546Sopenharmony_ci 129bf215546Sopenharmony_ci#ifdef DEBUG 130bf215546Sopenharmony_ci if (DBG_ENABLED(ETNA_DBG_DUMP_SHADERS)) { 131bf215546Sopenharmony_ci etna_dump_shader(vs); 132bf215546Sopenharmony_ci etna_dump_shader(fs); 133bf215546Sopenharmony_ci } 134bf215546Sopenharmony_ci#endif 135bf215546Sopenharmony_ci 136bf215546Sopenharmony_ci failed = etna_link_shader(&link, vs, fs); 137bf215546Sopenharmony_ci 138bf215546Sopenharmony_ci if (failed) { 139bf215546Sopenharmony_ci /* linking failed: some fs inputs do not have corresponding 140bf215546Sopenharmony_ci * vs outputs */ 141bf215546Sopenharmony_ci assert(0); 142bf215546Sopenharmony_ci 143bf215546Sopenharmony_ci return false; 144bf215546Sopenharmony_ci } 145bf215546Sopenharmony_ci 146bf215546Sopenharmony_ci if (DBG_ENABLED(ETNA_DBG_LINKER_MSGS)) { 147bf215546Sopenharmony_ci debug_printf("link result:\n"); 148bf215546Sopenharmony_ci debug_printf(" vs -> fs comps use pa_attr\n"); 149bf215546Sopenharmony_ci 150bf215546Sopenharmony_ci for (int idx = 0; idx < link.num_varyings; ++idx) 151bf215546Sopenharmony_ci debug_printf(" t%-2u -> t%-2u %-5.*s %u,%u,%u,%u 0x%08x\n", 152bf215546Sopenharmony_ci link.varyings[idx].reg, idx + 1, 153bf215546Sopenharmony_ci link.varyings[idx].num_components, "xyzw", 154bf215546Sopenharmony_ci link.varyings[idx].use[0], link.varyings[idx].use[1], 155bf215546Sopenharmony_ci link.varyings[idx].use[2], link.varyings[idx].use[3], 156bf215546Sopenharmony_ci link.varyings[idx].pa_attributes); 157bf215546Sopenharmony_ci } 158bf215546Sopenharmony_ci 159bf215546Sopenharmony_ci /* set last_varying_2x flag if the last varying has 1 or 2 components */ 160bf215546Sopenharmony_ci bool last_varying_2x = false; 161bf215546Sopenharmony_ci if (link.num_varyings > 0 && link.varyings[link.num_varyings - 1].num_components <= 2) 162bf215546Sopenharmony_ci last_varying_2x = true; 163bf215546Sopenharmony_ci 164bf215546Sopenharmony_ci cs->RA_CONTROL = VIVS_RA_CONTROL_UNK0 | 165bf215546Sopenharmony_ci COND(last_varying_2x, VIVS_RA_CONTROL_LAST_VARYING_2X); 166bf215546Sopenharmony_ci 167bf215546Sopenharmony_ci cs->PA_ATTRIBUTE_ELEMENT_COUNT = VIVS_PA_ATTRIBUTE_ELEMENT_COUNT_COUNT(link.num_varyings); 168bf215546Sopenharmony_ci for (int idx = 0; idx < link.num_varyings; ++idx) 169bf215546Sopenharmony_ci cs->PA_SHADER_ATTRIBUTES[idx] = link.varyings[idx].pa_attributes; 170bf215546Sopenharmony_ci 171bf215546Sopenharmony_ci cs->VS_END_PC = vs->code_size / 4; 172bf215546Sopenharmony_ci cs->VS_OUTPUT_COUNT = 1 + link.num_varyings; /* position + varyings */ 173bf215546Sopenharmony_ci 174bf215546Sopenharmony_ci /* vs outputs (varyings) */ 175bf215546Sopenharmony_ci DEFINE_ETNA_BITARRAY(vs_output, 16, 8) = {0}; 176bf215546Sopenharmony_ci int varid = 0; 177bf215546Sopenharmony_ci etna_bitarray_set(vs_output, 8, varid++, vs->vs_pos_out_reg); 178bf215546Sopenharmony_ci for (int idx = 0; idx < link.num_varyings; ++idx) 179bf215546Sopenharmony_ci etna_bitarray_set(vs_output, 8, varid++, link.varyings[idx].reg); 180bf215546Sopenharmony_ci if (vs->vs_pointsize_out_reg >= 0) 181bf215546Sopenharmony_ci etna_bitarray_set(vs_output, 8, varid++, vs->vs_pointsize_out_reg); /* pointsize is last */ 182bf215546Sopenharmony_ci 183bf215546Sopenharmony_ci for (int idx = 0; idx < ARRAY_SIZE(cs->VS_OUTPUT); ++idx) 184bf215546Sopenharmony_ci cs->VS_OUTPUT[idx] = vs_output[idx]; 185bf215546Sopenharmony_ci 186bf215546Sopenharmony_ci if (vs->vs_pointsize_out_reg != -1) { 187bf215546Sopenharmony_ci /* vertex shader outputs point coordinate, provide extra output and make 188bf215546Sopenharmony_ci * sure PA config is 189bf215546Sopenharmony_ci * not masked */ 190bf215546Sopenharmony_ci cs->PA_CONFIG = ~0; 191bf215546Sopenharmony_ci cs->VS_OUTPUT_COUNT_PSIZE = cs->VS_OUTPUT_COUNT + 1; 192bf215546Sopenharmony_ci } else { 193bf215546Sopenharmony_ci /* vertex shader does not output point coordinate, make sure thate 194bf215546Sopenharmony_ci * POINT_SIZE_ENABLE is masked 195bf215546Sopenharmony_ci * and no extra output is given */ 196bf215546Sopenharmony_ci cs->PA_CONFIG = ~VIVS_PA_CONFIG_POINT_SIZE_ENABLE; 197bf215546Sopenharmony_ci cs->VS_OUTPUT_COUNT_PSIZE = cs->VS_OUTPUT_COUNT; 198bf215546Sopenharmony_ci } 199bf215546Sopenharmony_ci 200bf215546Sopenharmony_ci /* if fragment shader doesn't read pointcoord, disable it */ 201bf215546Sopenharmony_ci if (link.pcoord_varying_comp_ofs == -1) 202bf215546Sopenharmony_ci cs->PA_CONFIG &= ~VIVS_PA_CONFIG_POINT_SPRITE_ENABLE; 203bf215546Sopenharmony_ci 204bf215546Sopenharmony_ci cs->VS_LOAD_BALANCING = vs->vs_load_balancing; 205bf215546Sopenharmony_ci cs->VS_START_PC = 0; 206bf215546Sopenharmony_ci 207bf215546Sopenharmony_ci cs->PS_END_PC = fs->code_size / 4; 208bf215546Sopenharmony_ci cs->PS_OUTPUT_REG = fs->ps_color_out_reg; 209bf215546Sopenharmony_ci cs->PS_INPUT_COUNT = 210bf215546Sopenharmony_ci VIVS_PS_INPUT_COUNT_COUNT(link.num_varyings + 1) | /* Number of inputs plus position */ 211bf215546Sopenharmony_ci VIVS_PS_INPUT_COUNT_UNK8(fs->input_count_unk8); 212bf215546Sopenharmony_ci cs->PS_TEMP_REGISTER_CONTROL = 213bf215546Sopenharmony_ci VIVS_PS_TEMP_REGISTER_CONTROL_NUM_TEMPS(MAX2(fs->num_temps, link.num_varyings + 1)); 214bf215546Sopenharmony_ci cs->PS_START_PC = 0; 215bf215546Sopenharmony_ci 216bf215546Sopenharmony_ci /* Precompute PS_INPUT_COUNT and TEMP_REGISTER_CONTROL in the case of MSAA 217bf215546Sopenharmony_ci * mode, avoids some fumbling in sync_context. */ 218bf215546Sopenharmony_ci cs->PS_INPUT_COUNT_MSAA = 219bf215546Sopenharmony_ci VIVS_PS_INPUT_COUNT_COUNT(link.num_varyings + 2) | /* MSAA adds another input */ 220bf215546Sopenharmony_ci VIVS_PS_INPUT_COUNT_UNK8(fs->input_count_unk8); 221bf215546Sopenharmony_ci cs->PS_TEMP_REGISTER_CONTROL_MSAA = 222bf215546Sopenharmony_ci VIVS_PS_TEMP_REGISTER_CONTROL_NUM_TEMPS(MAX2(fs->num_temps, link.num_varyings + 2)); 223bf215546Sopenharmony_ci 224bf215546Sopenharmony_ci uint32_t total_components = 0; 225bf215546Sopenharmony_ci DEFINE_ETNA_BITARRAY(num_components, ETNA_NUM_VARYINGS, 4) = {0}; 226bf215546Sopenharmony_ci DEFINE_ETNA_BITARRAY(component_use, 4 * ETNA_NUM_VARYINGS, 2) = {0}; 227bf215546Sopenharmony_ci for (int idx = 0; idx < link.num_varyings; ++idx) { 228bf215546Sopenharmony_ci const struct etna_varying *varying = &link.varyings[idx]; 229bf215546Sopenharmony_ci 230bf215546Sopenharmony_ci etna_bitarray_set(num_components, 4, idx, varying->num_components); 231bf215546Sopenharmony_ci for (int comp = 0; comp < varying->num_components; ++comp) { 232bf215546Sopenharmony_ci etna_bitarray_set(component_use, 2, total_components, varying->use[comp]); 233bf215546Sopenharmony_ci total_components += 1; 234bf215546Sopenharmony_ci } 235bf215546Sopenharmony_ci } 236bf215546Sopenharmony_ci 237bf215546Sopenharmony_ci cs->GL_VARYING_TOTAL_COMPONENTS = 238bf215546Sopenharmony_ci VIVS_GL_VARYING_TOTAL_COMPONENTS_NUM(align(total_components, 2)); 239bf215546Sopenharmony_ci cs->GL_VARYING_NUM_COMPONENTS[0] = num_components[0]; 240bf215546Sopenharmony_ci cs->GL_VARYING_NUM_COMPONENTS[1] = num_components[1]; 241bf215546Sopenharmony_ci cs->GL_VARYING_COMPONENT_USE[0] = component_use[0]; 242bf215546Sopenharmony_ci cs->GL_VARYING_COMPONENT_USE[1] = component_use[1]; 243bf215546Sopenharmony_ci 244bf215546Sopenharmony_ci cs->GL_HALTI5_SH_SPECIALS = 245bf215546Sopenharmony_ci 0x7f7f0000 | /* unknown bits, probably other PS inputs */ 246bf215546Sopenharmony_ci /* pointsize is last (see above) */ 247bf215546Sopenharmony_ci VIVS_GL_HALTI5_SH_SPECIALS_VS_PSIZE_OUT((vs->vs_pointsize_out_reg != -1) ? 248bf215546Sopenharmony_ci cs->VS_OUTPUT_COUNT * 4 : 0x00) | 249bf215546Sopenharmony_ci VIVS_GL_HALTI5_SH_SPECIALS_PS_PCOORD_IN((link.pcoord_varying_comp_ofs != -1) ? 250bf215546Sopenharmony_ci link.pcoord_varying_comp_ofs : 0x7f); 251bf215546Sopenharmony_ci 252bf215546Sopenharmony_ci cs->writes_z = fs->ps_depth_out_reg >= 0; 253bf215546Sopenharmony_ci cs->uses_discard = fs->uses_discard; 254bf215546Sopenharmony_ci 255bf215546Sopenharmony_ci /* reference instruction memory */ 256bf215546Sopenharmony_ci cs->vs_inst_mem_size = vs->code_size; 257bf215546Sopenharmony_ci cs->VS_INST_MEM = vs->code; 258bf215546Sopenharmony_ci 259bf215546Sopenharmony_ci cs->ps_inst_mem_size = fs->code_size; 260bf215546Sopenharmony_ci cs->PS_INST_MEM = fs->code; 261bf215546Sopenharmony_ci 262bf215546Sopenharmony_ci if (vs->needs_icache || fs->needs_icache) { 263bf215546Sopenharmony_ci /* If either of the shaders needs ICACHE, we use it for both. It is 264bf215546Sopenharmony_ci * either switched on or off for the entire shader processor. 265bf215546Sopenharmony_ci */ 266bf215546Sopenharmony_ci if (!etna_icache_upload_shader(ctx, vs) || 267bf215546Sopenharmony_ci !etna_icache_upload_shader(ctx, fs)) { 268bf215546Sopenharmony_ci assert(0); 269bf215546Sopenharmony_ci return false; 270bf215546Sopenharmony_ci } 271bf215546Sopenharmony_ci 272bf215546Sopenharmony_ci cs->VS_INST_ADDR.bo = vs->bo; 273bf215546Sopenharmony_ci cs->VS_INST_ADDR.offset = 0; 274bf215546Sopenharmony_ci cs->VS_INST_ADDR.flags = ETNA_RELOC_READ; 275bf215546Sopenharmony_ci cs->PS_INST_ADDR.bo = fs->bo; 276bf215546Sopenharmony_ci cs->PS_INST_ADDR.offset = 0; 277bf215546Sopenharmony_ci cs->PS_INST_ADDR.flags = ETNA_RELOC_READ; 278bf215546Sopenharmony_ci } else { 279bf215546Sopenharmony_ci /* clear relocs */ 280bf215546Sopenharmony_ci memset(&cs->VS_INST_ADDR, 0, sizeof(cs->VS_INST_ADDR)); 281bf215546Sopenharmony_ci memset(&cs->PS_INST_ADDR, 0, sizeof(cs->PS_INST_ADDR)); 282bf215546Sopenharmony_ci } 283bf215546Sopenharmony_ci 284bf215546Sopenharmony_ci return true; 285bf215546Sopenharmony_ci} 286bf215546Sopenharmony_ci 287bf215546Sopenharmony_cibool 288bf215546Sopenharmony_cietna_shader_link(struct etna_context *ctx) 289bf215546Sopenharmony_ci{ 290bf215546Sopenharmony_ci if (!ctx->shader.vs || !ctx->shader.fs) 291bf215546Sopenharmony_ci return false; 292bf215546Sopenharmony_ci 293bf215546Sopenharmony_ci /* re-link vs and fs if needed */ 294bf215546Sopenharmony_ci return etna_link_shaders(ctx, &ctx->shader_state, ctx->shader.vs, ctx->shader.fs); 295bf215546Sopenharmony_ci} 296bf215546Sopenharmony_ci 297bf215546Sopenharmony_civoid 298bf215546Sopenharmony_cietna_destroy_shader(struct etna_shader_variant *shader) 299bf215546Sopenharmony_ci{ 300bf215546Sopenharmony_ci assert(shader); 301bf215546Sopenharmony_ci 302bf215546Sopenharmony_ci FREE(shader->code); 303bf215546Sopenharmony_ci FREE(shader->uniforms.data); 304bf215546Sopenharmony_ci FREE(shader->uniforms.contents); 305bf215546Sopenharmony_ci FREE(shader); 306bf215546Sopenharmony_ci} 307bf215546Sopenharmony_ci 308bf215546Sopenharmony_cistatic bool 309bf215546Sopenharmony_cietna_shader_update_vs_inputs(struct compiled_shader_state *cs, 310bf215546Sopenharmony_ci const struct etna_shader_variant *vs, 311bf215546Sopenharmony_ci const struct compiled_vertex_elements_state *ves) 312bf215546Sopenharmony_ci{ 313bf215546Sopenharmony_ci unsigned num_temps, cur_temp, num_vs_inputs; 314bf215546Sopenharmony_ci 315bf215546Sopenharmony_ci if (!vs) 316bf215546Sopenharmony_ci return false; 317bf215546Sopenharmony_ci 318bf215546Sopenharmony_ci /* Number of vertex elements determines number of VS inputs. Otherwise, 319bf215546Sopenharmony_ci * the GPU crashes. Allocate any unused vertex elements to VS temporary 320bf215546Sopenharmony_ci * registers. */ 321bf215546Sopenharmony_ci num_vs_inputs = MAX2(ves->num_elements, vs->infile.num_reg); 322bf215546Sopenharmony_ci if (num_vs_inputs != ves->num_elements) { 323bf215546Sopenharmony_ci BUG("Number of elements %u does not match the number of VS inputs %zu", 324bf215546Sopenharmony_ci ves->num_elements, vs->infile.num_reg); 325bf215546Sopenharmony_ci return false; 326bf215546Sopenharmony_ci } 327bf215546Sopenharmony_ci 328bf215546Sopenharmony_ci cur_temp = vs->num_temps; 329bf215546Sopenharmony_ci num_temps = num_vs_inputs - vs->infile.num_reg + cur_temp; 330bf215546Sopenharmony_ci 331bf215546Sopenharmony_ci cs->VS_INPUT_COUNT = VIVS_VS_INPUT_COUNT_COUNT(num_vs_inputs) | 332bf215546Sopenharmony_ci VIVS_VS_INPUT_COUNT_UNK8(vs->input_count_unk8); 333bf215546Sopenharmony_ci cs->VS_TEMP_REGISTER_CONTROL = 334bf215546Sopenharmony_ci VIVS_VS_TEMP_REGISTER_CONTROL_NUM_TEMPS(num_temps); 335bf215546Sopenharmony_ci 336bf215546Sopenharmony_ci /* vs inputs (attributes) */ 337bf215546Sopenharmony_ci DEFINE_ETNA_BITARRAY(vs_input, 16, 8) = {0}; 338bf215546Sopenharmony_ci for (int idx = 0; idx < num_vs_inputs; ++idx) { 339bf215546Sopenharmony_ci if (idx < vs->infile.num_reg) 340bf215546Sopenharmony_ci etna_bitarray_set(vs_input, 8, idx, vs->infile.reg[idx].reg); 341bf215546Sopenharmony_ci else 342bf215546Sopenharmony_ci etna_bitarray_set(vs_input, 8, idx, cur_temp++); 343bf215546Sopenharmony_ci } 344bf215546Sopenharmony_ci 345bf215546Sopenharmony_ci if (vs->vs_id_in_reg >= 0) { 346bf215546Sopenharmony_ci cs->VS_INPUT_COUNT = VIVS_VS_INPUT_COUNT_COUNT(num_vs_inputs + 1) | 347bf215546Sopenharmony_ci VIVS_VS_INPUT_COUNT_UNK8(vs->input_count_unk8) | 348bf215546Sopenharmony_ci VIVS_VS_INPUT_COUNT_ID_ENABLE; 349bf215546Sopenharmony_ci 350bf215546Sopenharmony_ci etna_bitarray_set(vs_input, 8, num_vs_inputs, vs->vs_id_in_reg); 351bf215546Sopenharmony_ci 352bf215546Sopenharmony_ci cs->FE_HALTI5_ID_CONFIG = 353bf215546Sopenharmony_ci VIVS_FE_HALTI5_ID_CONFIG_VERTEX_ID_ENABLE | 354bf215546Sopenharmony_ci VIVS_FE_HALTI5_ID_CONFIG_INSTANCE_ID_ENABLE | 355bf215546Sopenharmony_ci VIVS_FE_HALTI5_ID_CONFIG_VERTEX_ID_REG(vs->vs_id_in_reg * 4) | 356bf215546Sopenharmony_ci VIVS_FE_HALTI5_ID_CONFIG_INSTANCE_ID_REG(vs->vs_id_in_reg * 4 + 1); 357bf215546Sopenharmony_ci } 358bf215546Sopenharmony_ci 359bf215546Sopenharmony_ci for (int idx = 0; idx < ARRAY_SIZE(cs->VS_INPUT); ++idx) 360bf215546Sopenharmony_ci cs->VS_INPUT[idx] = vs_input[idx]; 361bf215546Sopenharmony_ci 362bf215546Sopenharmony_ci return true; 363bf215546Sopenharmony_ci} 364bf215546Sopenharmony_ci 365bf215546Sopenharmony_cistatic inline const char * 366bf215546Sopenharmony_cietna_shader_stage(struct etna_shader_variant *shader) 367bf215546Sopenharmony_ci{ 368bf215546Sopenharmony_ci switch (shader->stage) { 369bf215546Sopenharmony_ci case MESA_SHADER_VERTEX: return "VERT"; 370bf215546Sopenharmony_ci case MESA_SHADER_FRAGMENT: return "FRAG"; 371bf215546Sopenharmony_ci case MESA_SHADER_COMPUTE: return "CL"; 372bf215546Sopenharmony_ci default: 373bf215546Sopenharmony_ci unreachable("invalid type"); 374bf215546Sopenharmony_ci return NULL; 375bf215546Sopenharmony_ci } 376bf215546Sopenharmony_ci} 377bf215546Sopenharmony_ci 378bf215546Sopenharmony_cistatic void 379bf215546Sopenharmony_cidump_shader_info(struct etna_shader_variant *v, struct util_debug_callback *debug) 380bf215546Sopenharmony_ci{ 381bf215546Sopenharmony_ci if (!unlikely(etna_mesa_debug & ETNA_DBG_SHADERDB)) 382bf215546Sopenharmony_ci return; 383bf215546Sopenharmony_ci 384bf215546Sopenharmony_ci util_debug_message(debug, SHADER_INFO, 385bf215546Sopenharmony_ci "%s shader: %u instructions, %u temps, " 386bf215546Sopenharmony_ci "%u immediates, %u loops", 387bf215546Sopenharmony_ci etna_shader_stage(v), 388bf215546Sopenharmony_ci v->code_size, 389bf215546Sopenharmony_ci v->num_temps, 390bf215546Sopenharmony_ci v->uniforms.count, 391bf215546Sopenharmony_ci v->num_loops); 392bf215546Sopenharmony_ci} 393bf215546Sopenharmony_ci 394bf215546Sopenharmony_cibool 395bf215546Sopenharmony_cietna_shader_update_vertex(struct etna_context *ctx) 396bf215546Sopenharmony_ci{ 397bf215546Sopenharmony_ci return etna_shader_update_vs_inputs(&ctx->shader_state, ctx->shader.vs, 398bf215546Sopenharmony_ci ctx->vertex_elements); 399bf215546Sopenharmony_ci} 400bf215546Sopenharmony_ci 401bf215546Sopenharmony_cistatic struct etna_shader_variant * 402bf215546Sopenharmony_cicreate_variant(struct etna_shader *shader, struct etna_shader_key key) 403bf215546Sopenharmony_ci{ 404bf215546Sopenharmony_ci struct etna_shader_variant *v = CALLOC_STRUCT(etna_shader_variant); 405bf215546Sopenharmony_ci int ret; 406bf215546Sopenharmony_ci 407bf215546Sopenharmony_ci if (!v) 408bf215546Sopenharmony_ci return NULL; 409bf215546Sopenharmony_ci 410bf215546Sopenharmony_ci v->shader = shader; 411bf215546Sopenharmony_ci v->key = key; 412bf215546Sopenharmony_ci v->id = ++shader->variant_count; 413bf215546Sopenharmony_ci 414bf215546Sopenharmony_ci if (etna_disk_cache_retrieve(shader->compiler, v)) 415bf215546Sopenharmony_ci return v; 416bf215546Sopenharmony_ci 417bf215546Sopenharmony_ci ret = etna_compile_shader(v); 418bf215546Sopenharmony_ci if (!ret) { 419bf215546Sopenharmony_ci debug_error("compile failed!"); 420bf215546Sopenharmony_ci goto fail; 421bf215546Sopenharmony_ci } 422bf215546Sopenharmony_ci 423bf215546Sopenharmony_ci etna_disk_cache_store(shader->compiler, v); 424bf215546Sopenharmony_ci 425bf215546Sopenharmony_ci return v; 426bf215546Sopenharmony_ci 427bf215546Sopenharmony_cifail: 428bf215546Sopenharmony_ci FREE(v); 429bf215546Sopenharmony_ci return NULL; 430bf215546Sopenharmony_ci} 431bf215546Sopenharmony_ci 432bf215546Sopenharmony_cistruct etna_shader_variant * 433bf215546Sopenharmony_cietna_shader_variant(struct etna_shader *shader, struct etna_shader_key key, 434bf215546Sopenharmony_ci struct util_debug_callback *debug) 435bf215546Sopenharmony_ci{ 436bf215546Sopenharmony_ci struct etna_shader_variant *v; 437bf215546Sopenharmony_ci 438bf215546Sopenharmony_ci for (v = shader->variants; v; v = v->next) 439bf215546Sopenharmony_ci if (etna_shader_key_equal(&key, &v->key)) 440bf215546Sopenharmony_ci return v; 441bf215546Sopenharmony_ci 442bf215546Sopenharmony_ci /* compile new variant if it doesn't exist already */ 443bf215546Sopenharmony_ci v = create_variant(shader, key); 444bf215546Sopenharmony_ci if (v) { 445bf215546Sopenharmony_ci v->next = shader->variants; 446bf215546Sopenharmony_ci shader->variants = v; 447bf215546Sopenharmony_ci dump_shader_info(v, debug); 448bf215546Sopenharmony_ci } 449bf215546Sopenharmony_ci 450bf215546Sopenharmony_ci return v; 451bf215546Sopenharmony_ci} 452bf215546Sopenharmony_ci 453bf215546Sopenharmony_ci/** 454bf215546Sopenharmony_ci * Should initial variants be compiled synchronously? 455bf215546Sopenharmony_ci * 456bf215546Sopenharmony_ci * The only case where pipe_debug_message() is used in the initial-variants 457bf215546Sopenharmony_ci * path is with ETNA_MESA_DEBUG=shaderdb. So if either debug is disabled (ie. 458bf215546Sopenharmony_ci * debug.debug_message==NULL), or shaderdb stats are not enabled, we can 459bf215546Sopenharmony_ci * compile the initial shader variant asynchronously. 460bf215546Sopenharmony_ci */ 461bf215546Sopenharmony_cistatic inline bool 462bf215546Sopenharmony_ciinitial_variants_synchronous(struct etna_context *ctx) 463bf215546Sopenharmony_ci{ 464bf215546Sopenharmony_ci return unlikely(ctx->debug.debug_message) || (etna_mesa_debug & ETNA_DBG_SHADERDB); 465bf215546Sopenharmony_ci} 466bf215546Sopenharmony_ci 467bf215546Sopenharmony_cistatic void 468bf215546Sopenharmony_cicreate_initial_variants_async(void *job, void *gdata, int thread_index) 469bf215546Sopenharmony_ci{ 470bf215546Sopenharmony_ci struct etna_shader *shader = job; 471bf215546Sopenharmony_ci struct util_debug_callback debug = {}; 472bf215546Sopenharmony_ci static struct etna_shader_key key; 473bf215546Sopenharmony_ci 474bf215546Sopenharmony_ci etna_shader_variant(shader, key, &debug); 475bf215546Sopenharmony_ci} 476bf215546Sopenharmony_ci 477bf215546Sopenharmony_cistatic void * 478bf215546Sopenharmony_cietna_create_shader_state(struct pipe_context *pctx, 479bf215546Sopenharmony_ci const struct pipe_shader_state *pss) 480bf215546Sopenharmony_ci{ 481bf215546Sopenharmony_ci struct etna_context *ctx = etna_context(pctx); 482bf215546Sopenharmony_ci struct etna_screen *screen = ctx->screen; 483bf215546Sopenharmony_ci struct etna_compiler *compiler = screen->compiler; 484bf215546Sopenharmony_ci struct etna_shader *shader = CALLOC_STRUCT(etna_shader); 485bf215546Sopenharmony_ci 486bf215546Sopenharmony_ci if (!shader) 487bf215546Sopenharmony_ci return NULL; 488bf215546Sopenharmony_ci 489bf215546Sopenharmony_ci shader->id = p_atomic_inc_return(&compiler->shader_count); 490bf215546Sopenharmony_ci shader->specs = &screen->specs; 491bf215546Sopenharmony_ci shader->compiler = screen->compiler; 492bf215546Sopenharmony_ci util_queue_fence_init(&shader->ready); 493bf215546Sopenharmony_ci 494bf215546Sopenharmony_ci shader->nir = (pss->type == PIPE_SHADER_IR_NIR) ? pss->ir.nir : 495bf215546Sopenharmony_ci tgsi_to_nir(pss->tokens, pctx->screen, false); 496bf215546Sopenharmony_ci 497bf215546Sopenharmony_ci etna_disk_cache_init_shader_key(compiler, shader); 498bf215546Sopenharmony_ci 499bf215546Sopenharmony_ci if (initial_variants_synchronous(ctx)) { 500bf215546Sopenharmony_ci struct etna_shader_key key = {}; 501bf215546Sopenharmony_ci etna_shader_variant(shader, key, &ctx->debug); 502bf215546Sopenharmony_ci } else { 503bf215546Sopenharmony_ci struct etna_screen *screen = ctx->screen; 504bf215546Sopenharmony_ci util_queue_add_job(&screen->shader_compiler_queue, shader, &shader->ready, 505bf215546Sopenharmony_ci create_initial_variants_async, NULL, 0); 506bf215546Sopenharmony_ci } 507bf215546Sopenharmony_ci 508bf215546Sopenharmony_ci return shader; 509bf215546Sopenharmony_ci} 510bf215546Sopenharmony_ci 511bf215546Sopenharmony_cistatic void 512bf215546Sopenharmony_cietna_delete_shader_state(struct pipe_context *pctx, void *ss) 513bf215546Sopenharmony_ci{ 514bf215546Sopenharmony_ci struct etna_context *ctx = etna_context(pctx); 515bf215546Sopenharmony_ci struct etna_screen *screen = ctx->screen; 516bf215546Sopenharmony_ci struct etna_shader *shader = ss; 517bf215546Sopenharmony_ci struct etna_shader_variant *v, *t; 518bf215546Sopenharmony_ci 519bf215546Sopenharmony_ci util_queue_drop_job(&screen->shader_compiler_queue, &shader->ready); 520bf215546Sopenharmony_ci 521bf215546Sopenharmony_ci v = shader->variants; 522bf215546Sopenharmony_ci while (v) { 523bf215546Sopenharmony_ci t = v; 524bf215546Sopenharmony_ci v = v->next; 525bf215546Sopenharmony_ci if (t->bo) 526bf215546Sopenharmony_ci etna_bo_del(t->bo); 527bf215546Sopenharmony_ci 528bf215546Sopenharmony_ci etna_destroy_shader(t); 529bf215546Sopenharmony_ci } 530bf215546Sopenharmony_ci 531bf215546Sopenharmony_ci tgsi_free_tokens(shader->tokens); 532bf215546Sopenharmony_ci ralloc_free(shader->nir); 533bf215546Sopenharmony_ci util_queue_fence_destroy(&shader->ready); 534bf215546Sopenharmony_ci FREE(shader); 535bf215546Sopenharmony_ci} 536bf215546Sopenharmony_ci 537bf215546Sopenharmony_cistatic void 538bf215546Sopenharmony_cietna_bind_fs_state(struct pipe_context *pctx, void *hwcso) 539bf215546Sopenharmony_ci{ 540bf215546Sopenharmony_ci struct etna_context *ctx = etna_context(pctx); 541bf215546Sopenharmony_ci 542bf215546Sopenharmony_ci ctx->shader.bind_fs = hwcso; 543bf215546Sopenharmony_ci ctx->dirty |= ETNA_DIRTY_SHADER; 544bf215546Sopenharmony_ci} 545bf215546Sopenharmony_ci 546bf215546Sopenharmony_cistatic void 547bf215546Sopenharmony_cietna_bind_vs_state(struct pipe_context *pctx, void *hwcso) 548bf215546Sopenharmony_ci{ 549bf215546Sopenharmony_ci struct etna_context *ctx = etna_context(pctx); 550bf215546Sopenharmony_ci 551bf215546Sopenharmony_ci ctx->shader.bind_vs = hwcso; 552bf215546Sopenharmony_ci ctx->dirty |= ETNA_DIRTY_SHADER; 553bf215546Sopenharmony_ci} 554bf215546Sopenharmony_ci 555bf215546Sopenharmony_cistatic void 556bf215546Sopenharmony_cietna_set_max_shader_compiler_threads(struct pipe_screen *pscreen, 557bf215546Sopenharmony_ci unsigned max_threads) 558bf215546Sopenharmony_ci{ 559bf215546Sopenharmony_ci struct etna_screen *screen = etna_screen(pscreen); 560bf215546Sopenharmony_ci 561bf215546Sopenharmony_ci util_queue_adjust_num_threads(&screen->shader_compiler_queue, max_threads); 562bf215546Sopenharmony_ci} 563bf215546Sopenharmony_ci 564bf215546Sopenharmony_cistatic bool 565bf215546Sopenharmony_cietna_is_parallel_shader_compilation_finished(struct pipe_screen *pscreen, 566bf215546Sopenharmony_ci void *hwcso, 567bf215546Sopenharmony_ci enum pipe_shader_type shader_type) 568bf215546Sopenharmony_ci{ 569bf215546Sopenharmony_ci struct etna_shader *shader = (struct etna_shader *)hwcso; 570bf215546Sopenharmony_ci 571bf215546Sopenharmony_ci return util_queue_fence_is_signalled(&shader->ready); 572bf215546Sopenharmony_ci} 573bf215546Sopenharmony_ci 574bf215546Sopenharmony_civoid 575bf215546Sopenharmony_cietna_shader_init(struct pipe_context *pctx) 576bf215546Sopenharmony_ci{ 577bf215546Sopenharmony_ci pctx->create_fs_state = etna_create_shader_state; 578bf215546Sopenharmony_ci pctx->bind_fs_state = etna_bind_fs_state; 579bf215546Sopenharmony_ci pctx->delete_fs_state = etna_delete_shader_state; 580bf215546Sopenharmony_ci pctx->create_vs_state = etna_create_shader_state; 581bf215546Sopenharmony_ci pctx->bind_vs_state = etna_bind_vs_state; 582bf215546Sopenharmony_ci pctx->delete_vs_state = etna_delete_shader_state; 583bf215546Sopenharmony_ci} 584bf215546Sopenharmony_ci 585bf215546Sopenharmony_cibool 586bf215546Sopenharmony_cietna_shader_screen_init(struct pipe_screen *pscreen) 587bf215546Sopenharmony_ci{ 588bf215546Sopenharmony_ci struct etna_screen *screen = etna_screen(pscreen); 589bf215546Sopenharmony_ci unsigned num_threads = util_get_cpu_caps()->nr_cpus - 1; 590bf215546Sopenharmony_ci 591bf215546Sopenharmony_ci /* Create at least one thread - even on single core CPU systems. */ 592bf215546Sopenharmony_ci num_threads = MAX2(1, num_threads); 593bf215546Sopenharmony_ci 594bf215546Sopenharmony_ci screen->compiler = etna_compiler_create(pscreen->get_name(pscreen), &screen->specs); 595bf215546Sopenharmony_ci if (!screen->compiler) 596bf215546Sopenharmony_ci return false; 597bf215546Sopenharmony_ci 598bf215546Sopenharmony_ci pscreen->set_max_shader_compiler_threads = etna_set_max_shader_compiler_threads; 599bf215546Sopenharmony_ci pscreen->is_parallel_shader_compilation_finished = etna_is_parallel_shader_compilation_finished; 600bf215546Sopenharmony_ci 601bf215546Sopenharmony_ci return util_queue_init(&screen->shader_compiler_queue, "sh", 64, num_threads, 602bf215546Sopenharmony_ci UTIL_QUEUE_INIT_RESIZE_IF_FULL | UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY, 603bf215546Sopenharmony_ci NULL); 604bf215546Sopenharmony_ci} 605bf215546Sopenharmony_ci 606bf215546Sopenharmony_civoid 607bf215546Sopenharmony_cietna_shader_screen_fini(struct pipe_screen *pscreen) 608bf215546Sopenharmony_ci{ 609bf215546Sopenharmony_ci struct etna_screen *screen = etna_screen(pscreen); 610bf215546Sopenharmony_ci 611bf215546Sopenharmony_ci util_queue_destroy(&screen->shader_compiler_queue); 612bf215546Sopenharmony_ci etna_compiler_destroy(screen->compiler); 613bf215546Sopenharmony_ci} 614