1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright (c) 2012-2015 Etnaviv Project
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sub license,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the
12bf215546Sopenharmony_ci * next paragraph) shall be included in all copies or substantial portions
13bf215546Sopenharmony_ci * of the Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21bf215546Sopenharmony_ci * DEALINGS IN THE SOFTWARE.
22bf215546Sopenharmony_ci *
23bf215546Sopenharmony_ci * Authors:
24bf215546Sopenharmony_ci *    Wladimir J. van der Laan <laanwj@gmail.com>
25bf215546Sopenharmony_ci */
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_ci#include "etnaviv_shader.h"
28bf215546Sopenharmony_ci
29bf215546Sopenharmony_ci#include "etnaviv_compiler.h"
30bf215546Sopenharmony_ci#include "etnaviv_context.h"
31bf215546Sopenharmony_ci#include "etnaviv_debug.h"
32bf215546Sopenharmony_ci#include "etnaviv_disasm.h"
33bf215546Sopenharmony_ci#include "etnaviv_disk_cache.h"
34bf215546Sopenharmony_ci#include "etnaviv_screen.h"
35bf215546Sopenharmony_ci#include "etnaviv_util.h"
36bf215546Sopenharmony_ci
37bf215546Sopenharmony_ci#include "tgsi/tgsi_parse.h"
38bf215546Sopenharmony_ci#include "nir/tgsi_to_nir.h"
39bf215546Sopenharmony_ci#include "util/u_atomic.h"
40bf215546Sopenharmony_ci#include "util/u_cpu_detect.h"
41bf215546Sopenharmony_ci#include "util/u_math.h"
42bf215546Sopenharmony_ci#include "util/u_memory.h"
43bf215546Sopenharmony_ci
44bf215546Sopenharmony_ci/* Upload shader code to bo, if not already done */
45bf215546Sopenharmony_cistatic bool etna_icache_upload_shader(struct etna_context *ctx, struct etna_shader_variant *v)
46bf215546Sopenharmony_ci{
47bf215546Sopenharmony_ci   if (v->bo)
48bf215546Sopenharmony_ci      return true;
49bf215546Sopenharmony_ci   v->bo = etna_bo_new(ctx->screen->dev, v->code_size*4, DRM_ETNA_GEM_CACHE_WC);
50bf215546Sopenharmony_ci   if (!v->bo)
51bf215546Sopenharmony_ci      return false;
52bf215546Sopenharmony_ci
53bf215546Sopenharmony_ci   void *buf = etna_bo_map(v->bo);
54bf215546Sopenharmony_ci   etna_bo_cpu_prep(v->bo, DRM_ETNA_PREP_WRITE);
55bf215546Sopenharmony_ci   memcpy(buf, v->code, v->code_size*4);
56bf215546Sopenharmony_ci   etna_bo_cpu_fini(v->bo);
57bf215546Sopenharmony_ci   DBG("Uploaded %s of %u words to bo %p", v->stage == MESA_SHADER_FRAGMENT ? "fs":"vs", v->code_size, v->bo);
58bf215546Sopenharmony_ci   return true;
59bf215546Sopenharmony_ci}
60bf215546Sopenharmony_ci
61bf215546Sopenharmony_ciextern const char *tgsi_swizzle_names[];
62bf215546Sopenharmony_civoid
63bf215546Sopenharmony_cietna_dump_shader(const struct etna_shader_variant *shader)
64bf215546Sopenharmony_ci{
65bf215546Sopenharmony_ci   if (shader->stage == MESA_SHADER_VERTEX)
66bf215546Sopenharmony_ci      printf("VERT\n");
67bf215546Sopenharmony_ci   else
68bf215546Sopenharmony_ci      printf("FRAG\n");
69bf215546Sopenharmony_ci
70bf215546Sopenharmony_ci   etna_disasm(shader->code, shader->code_size, PRINT_RAW);
71bf215546Sopenharmony_ci
72bf215546Sopenharmony_ci   printf("num loops: %i\n", shader->num_loops);
73bf215546Sopenharmony_ci   printf("num temps: %i\n", shader->num_temps);
74bf215546Sopenharmony_ci   printf("immediates:\n");
75bf215546Sopenharmony_ci   for (int idx = 0; idx < shader->uniforms.count; ++idx) {
76bf215546Sopenharmony_ci      printf(" [%i].%s = %f (0x%08x) (%d)\n",
77bf215546Sopenharmony_ci             idx / 4,
78bf215546Sopenharmony_ci             tgsi_swizzle_names[idx % 4],
79bf215546Sopenharmony_ci             *((float *)&shader->uniforms.data[idx]),
80bf215546Sopenharmony_ci             shader->uniforms.data[idx],
81bf215546Sopenharmony_ci             shader->uniforms.contents[idx]);
82bf215546Sopenharmony_ci   }
83bf215546Sopenharmony_ci   printf("inputs:\n");
84bf215546Sopenharmony_ci   for (int idx = 0; idx < shader->infile.num_reg; ++idx) {
85bf215546Sopenharmony_ci      printf(" [%i] name=%s comps=%i\n", shader->infile.reg[idx].reg,
86bf215546Sopenharmony_ci               (shader->stage == MESA_SHADER_VERTEX) ?
87bf215546Sopenharmony_ci               gl_vert_attrib_name(shader->infile.reg[idx].slot) :
88bf215546Sopenharmony_ci               gl_varying_slot_name_for_stage(shader->infile.reg[idx].slot, shader->stage),
89bf215546Sopenharmony_ci               shader->infile.reg[idx].num_components);
90bf215546Sopenharmony_ci   }
91bf215546Sopenharmony_ci   printf("outputs:\n");
92bf215546Sopenharmony_ci   for (int idx = 0; idx < shader->outfile.num_reg; ++idx) {
93bf215546Sopenharmony_ci      printf(" [%i] name=%s comps=%i\n", shader->outfile.reg[idx].reg,
94bf215546Sopenharmony_ci               (shader->stage == MESA_SHADER_VERTEX) ?
95bf215546Sopenharmony_ci               gl_varying_slot_name_for_stage(shader->outfile.reg[idx].slot, shader->stage) :
96bf215546Sopenharmony_ci               gl_frag_result_name(shader->outfile.reg[idx].slot),
97bf215546Sopenharmony_ci               shader->outfile.reg[idx].num_components);
98bf215546Sopenharmony_ci   }
99bf215546Sopenharmony_ci   printf("special:\n");
100bf215546Sopenharmony_ci   if (shader->stage == MESA_SHADER_VERTEX) {
101bf215546Sopenharmony_ci      printf("  vs_pos_out_reg=%i\n", shader->vs_pos_out_reg);
102bf215546Sopenharmony_ci      printf("  vs_pointsize_out_reg=%i\n", shader->vs_pointsize_out_reg);
103bf215546Sopenharmony_ci      printf("  vs_load_balancing=0x%08x\n", shader->vs_load_balancing);
104bf215546Sopenharmony_ci   } else {
105bf215546Sopenharmony_ci      printf("  ps_color_out_reg=%i\n", shader->ps_color_out_reg);
106bf215546Sopenharmony_ci      printf("  ps_depth_out_reg=%i\n", shader->ps_depth_out_reg);
107bf215546Sopenharmony_ci   }
108bf215546Sopenharmony_ci   printf("  input_count_unk8=0x%08x\n", shader->input_count_unk8);
109bf215546Sopenharmony_ci}
110bf215546Sopenharmony_ci
111bf215546Sopenharmony_ci/* Link vs and fs together: fill in shader_state from vs and fs
112bf215546Sopenharmony_ci * as this function is called every time a new fs or vs is bound, the goal is to
113bf215546Sopenharmony_ci * do little processing as possible here, and to precompute as much as possible in
114bf215546Sopenharmony_ci * the vs/fs shader_object.
115bf215546Sopenharmony_ci *
116bf215546Sopenharmony_ci * XXX we could cache the link result for a certain set of VS/PS; usually a pair
117bf215546Sopenharmony_ci * of VS and PS will be used together anyway.
118bf215546Sopenharmony_ci */
119bf215546Sopenharmony_cistatic bool
120bf215546Sopenharmony_cietna_link_shaders(struct etna_context *ctx, struct compiled_shader_state *cs,
121bf215546Sopenharmony_ci                  struct etna_shader_variant *vs, struct etna_shader_variant *fs)
122bf215546Sopenharmony_ci{
123bf215546Sopenharmony_ci   struct etna_shader_link_info link = { };
124bf215546Sopenharmony_ci   bool failed;
125bf215546Sopenharmony_ci
126bf215546Sopenharmony_ci   assert(vs->stage == MESA_SHADER_VERTEX);
127bf215546Sopenharmony_ci   assert(fs->stage == MESA_SHADER_FRAGMENT);
128bf215546Sopenharmony_ci
129bf215546Sopenharmony_ci#ifdef DEBUG
130bf215546Sopenharmony_ci   if (DBG_ENABLED(ETNA_DBG_DUMP_SHADERS)) {
131bf215546Sopenharmony_ci      etna_dump_shader(vs);
132bf215546Sopenharmony_ci      etna_dump_shader(fs);
133bf215546Sopenharmony_ci   }
134bf215546Sopenharmony_ci#endif
135bf215546Sopenharmony_ci
136bf215546Sopenharmony_ci   failed = etna_link_shader(&link, vs, fs);
137bf215546Sopenharmony_ci
138bf215546Sopenharmony_ci   if (failed) {
139bf215546Sopenharmony_ci      /* linking failed: some fs inputs do not have corresponding
140bf215546Sopenharmony_ci       * vs outputs */
141bf215546Sopenharmony_ci      assert(0);
142bf215546Sopenharmony_ci
143bf215546Sopenharmony_ci      return false;
144bf215546Sopenharmony_ci   }
145bf215546Sopenharmony_ci
146bf215546Sopenharmony_ci   if (DBG_ENABLED(ETNA_DBG_LINKER_MSGS)) {
147bf215546Sopenharmony_ci      debug_printf("link result:\n");
148bf215546Sopenharmony_ci      debug_printf("  vs  -> fs  comps use     pa_attr\n");
149bf215546Sopenharmony_ci
150bf215546Sopenharmony_ci      for (int idx = 0; idx < link.num_varyings; ++idx)
151bf215546Sopenharmony_ci         debug_printf("  t%-2u -> t%-2u %-5.*s %u,%u,%u,%u 0x%08x\n",
152bf215546Sopenharmony_ci                      link.varyings[idx].reg, idx + 1,
153bf215546Sopenharmony_ci                      link.varyings[idx].num_components, "xyzw",
154bf215546Sopenharmony_ci                      link.varyings[idx].use[0], link.varyings[idx].use[1],
155bf215546Sopenharmony_ci                      link.varyings[idx].use[2], link.varyings[idx].use[3],
156bf215546Sopenharmony_ci                      link.varyings[idx].pa_attributes);
157bf215546Sopenharmony_ci   }
158bf215546Sopenharmony_ci
159bf215546Sopenharmony_ci   /* set last_varying_2x flag if the last varying has 1 or 2 components */
160bf215546Sopenharmony_ci   bool last_varying_2x = false;
161bf215546Sopenharmony_ci   if (link.num_varyings > 0 && link.varyings[link.num_varyings - 1].num_components <= 2)
162bf215546Sopenharmony_ci      last_varying_2x = true;
163bf215546Sopenharmony_ci
164bf215546Sopenharmony_ci   cs->RA_CONTROL = VIVS_RA_CONTROL_UNK0 |
165bf215546Sopenharmony_ci                    COND(last_varying_2x, VIVS_RA_CONTROL_LAST_VARYING_2X);
166bf215546Sopenharmony_ci
167bf215546Sopenharmony_ci   cs->PA_ATTRIBUTE_ELEMENT_COUNT = VIVS_PA_ATTRIBUTE_ELEMENT_COUNT_COUNT(link.num_varyings);
168bf215546Sopenharmony_ci   for (int idx = 0; idx < link.num_varyings; ++idx)
169bf215546Sopenharmony_ci      cs->PA_SHADER_ATTRIBUTES[idx] = link.varyings[idx].pa_attributes;
170bf215546Sopenharmony_ci
171bf215546Sopenharmony_ci   cs->VS_END_PC = vs->code_size / 4;
172bf215546Sopenharmony_ci   cs->VS_OUTPUT_COUNT = 1 + link.num_varyings; /* position + varyings */
173bf215546Sopenharmony_ci
174bf215546Sopenharmony_ci   /* vs outputs (varyings) */
175bf215546Sopenharmony_ci   DEFINE_ETNA_BITARRAY(vs_output, 16, 8) = {0};
176bf215546Sopenharmony_ci   int varid = 0;
177bf215546Sopenharmony_ci   etna_bitarray_set(vs_output, 8, varid++, vs->vs_pos_out_reg);
178bf215546Sopenharmony_ci   for (int idx = 0; idx < link.num_varyings; ++idx)
179bf215546Sopenharmony_ci      etna_bitarray_set(vs_output, 8, varid++, link.varyings[idx].reg);
180bf215546Sopenharmony_ci   if (vs->vs_pointsize_out_reg >= 0)
181bf215546Sopenharmony_ci      etna_bitarray_set(vs_output, 8, varid++, vs->vs_pointsize_out_reg); /* pointsize is last */
182bf215546Sopenharmony_ci
183bf215546Sopenharmony_ci   for (int idx = 0; idx < ARRAY_SIZE(cs->VS_OUTPUT); ++idx)
184bf215546Sopenharmony_ci      cs->VS_OUTPUT[idx] = vs_output[idx];
185bf215546Sopenharmony_ci
186bf215546Sopenharmony_ci   if (vs->vs_pointsize_out_reg != -1) {
187bf215546Sopenharmony_ci      /* vertex shader outputs point coordinate, provide extra output and make
188bf215546Sopenharmony_ci       * sure PA config is
189bf215546Sopenharmony_ci       * not masked */
190bf215546Sopenharmony_ci      cs->PA_CONFIG = ~0;
191bf215546Sopenharmony_ci      cs->VS_OUTPUT_COUNT_PSIZE = cs->VS_OUTPUT_COUNT + 1;
192bf215546Sopenharmony_ci   } else {
193bf215546Sopenharmony_ci      /* vertex shader does not output point coordinate, make sure thate
194bf215546Sopenharmony_ci       * POINT_SIZE_ENABLE is masked
195bf215546Sopenharmony_ci       * and no extra output is given */
196bf215546Sopenharmony_ci      cs->PA_CONFIG = ~VIVS_PA_CONFIG_POINT_SIZE_ENABLE;
197bf215546Sopenharmony_ci      cs->VS_OUTPUT_COUNT_PSIZE = cs->VS_OUTPUT_COUNT;
198bf215546Sopenharmony_ci   }
199bf215546Sopenharmony_ci
200bf215546Sopenharmony_ci   /* if fragment shader doesn't read pointcoord, disable it */
201bf215546Sopenharmony_ci   if (link.pcoord_varying_comp_ofs == -1)
202bf215546Sopenharmony_ci      cs->PA_CONFIG &= ~VIVS_PA_CONFIG_POINT_SPRITE_ENABLE;
203bf215546Sopenharmony_ci
204bf215546Sopenharmony_ci   cs->VS_LOAD_BALANCING = vs->vs_load_balancing;
205bf215546Sopenharmony_ci   cs->VS_START_PC = 0;
206bf215546Sopenharmony_ci
207bf215546Sopenharmony_ci   cs->PS_END_PC = fs->code_size / 4;
208bf215546Sopenharmony_ci   cs->PS_OUTPUT_REG = fs->ps_color_out_reg;
209bf215546Sopenharmony_ci   cs->PS_INPUT_COUNT =
210bf215546Sopenharmony_ci      VIVS_PS_INPUT_COUNT_COUNT(link.num_varyings + 1) | /* Number of inputs plus position */
211bf215546Sopenharmony_ci      VIVS_PS_INPUT_COUNT_UNK8(fs->input_count_unk8);
212bf215546Sopenharmony_ci   cs->PS_TEMP_REGISTER_CONTROL =
213bf215546Sopenharmony_ci      VIVS_PS_TEMP_REGISTER_CONTROL_NUM_TEMPS(MAX2(fs->num_temps, link.num_varyings + 1));
214bf215546Sopenharmony_ci   cs->PS_START_PC = 0;
215bf215546Sopenharmony_ci
216bf215546Sopenharmony_ci   /* Precompute PS_INPUT_COUNT and TEMP_REGISTER_CONTROL in the case of MSAA
217bf215546Sopenharmony_ci    * mode, avoids some fumbling in sync_context. */
218bf215546Sopenharmony_ci   cs->PS_INPUT_COUNT_MSAA =
219bf215546Sopenharmony_ci      VIVS_PS_INPUT_COUNT_COUNT(link.num_varyings + 2) | /* MSAA adds another input */
220bf215546Sopenharmony_ci      VIVS_PS_INPUT_COUNT_UNK8(fs->input_count_unk8);
221bf215546Sopenharmony_ci   cs->PS_TEMP_REGISTER_CONTROL_MSAA =
222bf215546Sopenharmony_ci      VIVS_PS_TEMP_REGISTER_CONTROL_NUM_TEMPS(MAX2(fs->num_temps, link.num_varyings + 2));
223bf215546Sopenharmony_ci
224bf215546Sopenharmony_ci   uint32_t total_components = 0;
225bf215546Sopenharmony_ci   DEFINE_ETNA_BITARRAY(num_components, ETNA_NUM_VARYINGS, 4) = {0};
226bf215546Sopenharmony_ci   DEFINE_ETNA_BITARRAY(component_use, 4 * ETNA_NUM_VARYINGS, 2) = {0};
227bf215546Sopenharmony_ci   for (int idx = 0; idx < link.num_varyings; ++idx) {
228bf215546Sopenharmony_ci      const struct etna_varying *varying = &link.varyings[idx];
229bf215546Sopenharmony_ci
230bf215546Sopenharmony_ci      etna_bitarray_set(num_components, 4, idx, varying->num_components);
231bf215546Sopenharmony_ci      for (int comp = 0; comp < varying->num_components; ++comp) {
232bf215546Sopenharmony_ci         etna_bitarray_set(component_use, 2, total_components, varying->use[comp]);
233bf215546Sopenharmony_ci         total_components += 1;
234bf215546Sopenharmony_ci      }
235bf215546Sopenharmony_ci   }
236bf215546Sopenharmony_ci
237bf215546Sopenharmony_ci   cs->GL_VARYING_TOTAL_COMPONENTS =
238bf215546Sopenharmony_ci      VIVS_GL_VARYING_TOTAL_COMPONENTS_NUM(align(total_components, 2));
239bf215546Sopenharmony_ci   cs->GL_VARYING_NUM_COMPONENTS[0] = num_components[0];
240bf215546Sopenharmony_ci   cs->GL_VARYING_NUM_COMPONENTS[1] = num_components[1];
241bf215546Sopenharmony_ci   cs->GL_VARYING_COMPONENT_USE[0] = component_use[0];
242bf215546Sopenharmony_ci   cs->GL_VARYING_COMPONENT_USE[1] = component_use[1];
243bf215546Sopenharmony_ci
244bf215546Sopenharmony_ci   cs->GL_HALTI5_SH_SPECIALS =
245bf215546Sopenharmony_ci      0x7f7f0000 | /* unknown bits, probably other PS inputs */
246bf215546Sopenharmony_ci      /* pointsize is last (see above) */
247bf215546Sopenharmony_ci      VIVS_GL_HALTI5_SH_SPECIALS_VS_PSIZE_OUT((vs->vs_pointsize_out_reg != -1) ?
248bf215546Sopenharmony_ci                                              cs->VS_OUTPUT_COUNT * 4 : 0x00) |
249bf215546Sopenharmony_ci      VIVS_GL_HALTI5_SH_SPECIALS_PS_PCOORD_IN((link.pcoord_varying_comp_ofs != -1) ?
250bf215546Sopenharmony_ci                                              link.pcoord_varying_comp_ofs : 0x7f);
251bf215546Sopenharmony_ci
252bf215546Sopenharmony_ci   cs->writes_z = fs->ps_depth_out_reg >= 0;
253bf215546Sopenharmony_ci   cs->uses_discard = fs->uses_discard;
254bf215546Sopenharmony_ci
255bf215546Sopenharmony_ci   /* reference instruction memory */
256bf215546Sopenharmony_ci   cs->vs_inst_mem_size = vs->code_size;
257bf215546Sopenharmony_ci   cs->VS_INST_MEM = vs->code;
258bf215546Sopenharmony_ci
259bf215546Sopenharmony_ci   cs->ps_inst_mem_size = fs->code_size;
260bf215546Sopenharmony_ci   cs->PS_INST_MEM = fs->code;
261bf215546Sopenharmony_ci
262bf215546Sopenharmony_ci   if (vs->needs_icache || fs->needs_icache) {
263bf215546Sopenharmony_ci      /* If either of the shaders needs ICACHE, we use it for both. It is
264bf215546Sopenharmony_ci       * either switched on or off for the entire shader processor.
265bf215546Sopenharmony_ci       */
266bf215546Sopenharmony_ci      if (!etna_icache_upload_shader(ctx, vs) ||
267bf215546Sopenharmony_ci          !etna_icache_upload_shader(ctx, fs)) {
268bf215546Sopenharmony_ci         assert(0);
269bf215546Sopenharmony_ci         return false;
270bf215546Sopenharmony_ci      }
271bf215546Sopenharmony_ci
272bf215546Sopenharmony_ci      cs->VS_INST_ADDR.bo = vs->bo;
273bf215546Sopenharmony_ci      cs->VS_INST_ADDR.offset = 0;
274bf215546Sopenharmony_ci      cs->VS_INST_ADDR.flags = ETNA_RELOC_READ;
275bf215546Sopenharmony_ci      cs->PS_INST_ADDR.bo = fs->bo;
276bf215546Sopenharmony_ci      cs->PS_INST_ADDR.offset = 0;
277bf215546Sopenharmony_ci      cs->PS_INST_ADDR.flags = ETNA_RELOC_READ;
278bf215546Sopenharmony_ci   } else {
279bf215546Sopenharmony_ci      /* clear relocs */
280bf215546Sopenharmony_ci      memset(&cs->VS_INST_ADDR, 0, sizeof(cs->VS_INST_ADDR));
281bf215546Sopenharmony_ci      memset(&cs->PS_INST_ADDR, 0, sizeof(cs->PS_INST_ADDR));
282bf215546Sopenharmony_ci   }
283bf215546Sopenharmony_ci
284bf215546Sopenharmony_ci   return true;
285bf215546Sopenharmony_ci}
286bf215546Sopenharmony_ci
287bf215546Sopenharmony_cibool
288bf215546Sopenharmony_cietna_shader_link(struct etna_context *ctx)
289bf215546Sopenharmony_ci{
290bf215546Sopenharmony_ci   if (!ctx->shader.vs || !ctx->shader.fs)
291bf215546Sopenharmony_ci      return false;
292bf215546Sopenharmony_ci
293bf215546Sopenharmony_ci   /* re-link vs and fs if needed */
294bf215546Sopenharmony_ci   return etna_link_shaders(ctx, &ctx->shader_state, ctx->shader.vs, ctx->shader.fs);
295bf215546Sopenharmony_ci}
296bf215546Sopenharmony_ci
297bf215546Sopenharmony_civoid
298bf215546Sopenharmony_cietna_destroy_shader(struct etna_shader_variant *shader)
299bf215546Sopenharmony_ci{
300bf215546Sopenharmony_ci   assert(shader);
301bf215546Sopenharmony_ci
302bf215546Sopenharmony_ci   FREE(shader->code);
303bf215546Sopenharmony_ci   FREE(shader->uniforms.data);
304bf215546Sopenharmony_ci   FREE(shader->uniforms.contents);
305bf215546Sopenharmony_ci   FREE(shader);
306bf215546Sopenharmony_ci}
307bf215546Sopenharmony_ci
308bf215546Sopenharmony_cistatic bool
309bf215546Sopenharmony_cietna_shader_update_vs_inputs(struct compiled_shader_state *cs,
310bf215546Sopenharmony_ci                             const struct etna_shader_variant *vs,
311bf215546Sopenharmony_ci                             const struct compiled_vertex_elements_state *ves)
312bf215546Sopenharmony_ci{
313bf215546Sopenharmony_ci   unsigned num_temps, cur_temp, num_vs_inputs;
314bf215546Sopenharmony_ci
315bf215546Sopenharmony_ci   if (!vs)
316bf215546Sopenharmony_ci      return false;
317bf215546Sopenharmony_ci
318bf215546Sopenharmony_ci   /* Number of vertex elements determines number of VS inputs. Otherwise,
319bf215546Sopenharmony_ci    * the GPU crashes. Allocate any unused vertex elements to VS temporary
320bf215546Sopenharmony_ci    * registers. */
321bf215546Sopenharmony_ci   num_vs_inputs = MAX2(ves->num_elements, vs->infile.num_reg);
322bf215546Sopenharmony_ci   if (num_vs_inputs != ves->num_elements) {
323bf215546Sopenharmony_ci      BUG("Number of elements %u does not match the number of VS inputs %zu",
324bf215546Sopenharmony_ci          ves->num_elements, vs->infile.num_reg);
325bf215546Sopenharmony_ci      return false;
326bf215546Sopenharmony_ci   }
327bf215546Sopenharmony_ci
328bf215546Sopenharmony_ci   cur_temp = vs->num_temps;
329bf215546Sopenharmony_ci   num_temps = num_vs_inputs - vs->infile.num_reg + cur_temp;
330bf215546Sopenharmony_ci
331bf215546Sopenharmony_ci   cs->VS_INPUT_COUNT = VIVS_VS_INPUT_COUNT_COUNT(num_vs_inputs) |
332bf215546Sopenharmony_ci                        VIVS_VS_INPUT_COUNT_UNK8(vs->input_count_unk8);
333bf215546Sopenharmony_ci   cs->VS_TEMP_REGISTER_CONTROL =
334bf215546Sopenharmony_ci      VIVS_VS_TEMP_REGISTER_CONTROL_NUM_TEMPS(num_temps);
335bf215546Sopenharmony_ci
336bf215546Sopenharmony_ci   /* vs inputs (attributes) */
337bf215546Sopenharmony_ci   DEFINE_ETNA_BITARRAY(vs_input, 16, 8) = {0};
338bf215546Sopenharmony_ci   for (int idx = 0; idx < num_vs_inputs; ++idx) {
339bf215546Sopenharmony_ci      if (idx < vs->infile.num_reg)
340bf215546Sopenharmony_ci         etna_bitarray_set(vs_input, 8, idx, vs->infile.reg[idx].reg);
341bf215546Sopenharmony_ci      else
342bf215546Sopenharmony_ci         etna_bitarray_set(vs_input, 8, idx, cur_temp++);
343bf215546Sopenharmony_ci   }
344bf215546Sopenharmony_ci
345bf215546Sopenharmony_ci   if (vs->vs_id_in_reg >= 0) {
346bf215546Sopenharmony_ci      cs->VS_INPUT_COUNT = VIVS_VS_INPUT_COUNT_COUNT(num_vs_inputs + 1) |
347bf215546Sopenharmony_ci                           VIVS_VS_INPUT_COUNT_UNK8(vs->input_count_unk8) |
348bf215546Sopenharmony_ci                           VIVS_VS_INPUT_COUNT_ID_ENABLE;
349bf215546Sopenharmony_ci
350bf215546Sopenharmony_ci      etna_bitarray_set(vs_input, 8, num_vs_inputs, vs->vs_id_in_reg);
351bf215546Sopenharmony_ci
352bf215546Sopenharmony_ci      cs->FE_HALTI5_ID_CONFIG =
353bf215546Sopenharmony_ci         VIVS_FE_HALTI5_ID_CONFIG_VERTEX_ID_ENABLE |
354bf215546Sopenharmony_ci         VIVS_FE_HALTI5_ID_CONFIG_INSTANCE_ID_ENABLE |
355bf215546Sopenharmony_ci         VIVS_FE_HALTI5_ID_CONFIG_VERTEX_ID_REG(vs->vs_id_in_reg * 4) |
356bf215546Sopenharmony_ci         VIVS_FE_HALTI5_ID_CONFIG_INSTANCE_ID_REG(vs->vs_id_in_reg * 4 + 1);
357bf215546Sopenharmony_ci   }
358bf215546Sopenharmony_ci
359bf215546Sopenharmony_ci   for (int idx = 0; idx < ARRAY_SIZE(cs->VS_INPUT); ++idx)
360bf215546Sopenharmony_ci      cs->VS_INPUT[idx] = vs_input[idx];
361bf215546Sopenharmony_ci
362bf215546Sopenharmony_ci   return true;
363bf215546Sopenharmony_ci}
364bf215546Sopenharmony_ci
365bf215546Sopenharmony_cistatic inline const char *
366bf215546Sopenharmony_cietna_shader_stage(struct etna_shader_variant *shader)
367bf215546Sopenharmony_ci{
368bf215546Sopenharmony_ci   switch (shader->stage) {
369bf215546Sopenharmony_ci   case MESA_SHADER_VERTEX:     return "VERT";
370bf215546Sopenharmony_ci   case MESA_SHADER_FRAGMENT:   return "FRAG";
371bf215546Sopenharmony_ci   case MESA_SHADER_COMPUTE:    return "CL";
372bf215546Sopenharmony_ci   default:
373bf215546Sopenharmony_ci      unreachable("invalid type");
374bf215546Sopenharmony_ci      return NULL;
375bf215546Sopenharmony_ci   }
376bf215546Sopenharmony_ci}
377bf215546Sopenharmony_ci
378bf215546Sopenharmony_cistatic void
379bf215546Sopenharmony_cidump_shader_info(struct etna_shader_variant *v, struct util_debug_callback *debug)
380bf215546Sopenharmony_ci{
381bf215546Sopenharmony_ci   if (!unlikely(etna_mesa_debug & ETNA_DBG_SHADERDB))
382bf215546Sopenharmony_ci      return;
383bf215546Sopenharmony_ci
384bf215546Sopenharmony_ci   util_debug_message(debug, SHADER_INFO,
385bf215546Sopenharmony_ci         "%s shader: %u instructions, %u temps, "
386bf215546Sopenharmony_ci         "%u immediates, %u loops",
387bf215546Sopenharmony_ci         etna_shader_stage(v),
388bf215546Sopenharmony_ci         v->code_size,
389bf215546Sopenharmony_ci         v->num_temps,
390bf215546Sopenharmony_ci         v->uniforms.count,
391bf215546Sopenharmony_ci         v->num_loops);
392bf215546Sopenharmony_ci}
393bf215546Sopenharmony_ci
394bf215546Sopenharmony_cibool
395bf215546Sopenharmony_cietna_shader_update_vertex(struct etna_context *ctx)
396bf215546Sopenharmony_ci{
397bf215546Sopenharmony_ci   return etna_shader_update_vs_inputs(&ctx->shader_state, ctx->shader.vs,
398bf215546Sopenharmony_ci                                       ctx->vertex_elements);
399bf215546Sopenharmony_ci}
400bf215546Sopenharmony_ci
401bf215546Sopenharmony_cistatic struct etna_shader_variant *
402bf215546Sopenharmony_cicreate_variant(struct etna_shader *shader, struct etna_shader_key key)
403bf215546Sopenharmony_ci{
404bf215546Sopenharmony_ci   struct etna_shader_variant *v = CALLOC_STRUCT(etna_shader_variant);
405bf215546Sopenharmony_ci   int ret;
406bf215546Sopenharmony_ci
407bf215546Sopenharmony_ci   if (!v)
408bf215546Sopenharmony_ci      return NULL;
409bf215546Sopenharmony_ci
410bf215546Sopenharmony_ci   v->shader = shader;
411bf215546Sopenharmony_ci   v->key = key;
412bf215546Sopenharmony_ci   v->id = ++shader->variant_count;
413bf215546Sopenharmony_ci
414bf215546Sopenharmony_ci   if (etna_disk_cache_retrieve(shader->compiler, v))
415bf215546Sopenharmony_ci      return v;
416bf215546Sopenharmony_ci
417bf215546Sopenharmony_ci   ret = etna_compile_shader(v);
418bf215546Sopenharmony_ci   if (!ret) {
419bf215546Sopenharmony_ci      debug_error("compile failed!");
420bf215546Sopenharmony_ci      goto fail;
421bf215546Sopenharmony_ci   }
422bf215546Sopenharmony_ci
423bf215546Sopenharmony_ci   etna_disk_cache_store(shader->compiler, v);
424bf215546Sopenharmony_ci
425bf215546Sopenharmony_ci   return v;
426bf215546Sopenharmony_ci
427bf215546Sopenharmony_cifail:
428bf215546Sopenharmony_ci   FREE(v);
429bf215546Sopenharmony_ci   return NULL;
430bf215546Sopenharmony_ci}
431bf215546Sopenharmony_ci
432bf215546Sopenharmony_cistruct etna_shader_variant *
433bf215546Sopenharmony_cietna_shader_variant(struct etna_shader *shader, struct etna_shader_key key,
434bf215546Sopenharmony_ci                   struct util_debug_callback *debug)
435bf215546Sopenharmony_ci{
436bf215546Sopenharmony_ci   struct etna_shader_variant *v;
437bf215546Sopenharmony_ci
438bf215546Sopenharmony_ci   for (v = shader->variants; v; v = v->next)
439bf215546Sopenharmony_ci      if (etna_shader_key_equal(&key, &v->key))
440bf215546Sopenharmony_ci         return v;
441bf215546Sopenharmony_ci
442bf215546Sopenharmony_ci   /* compile new variant if it doesn't exist already */
443bf215546Sopenharmony_ci   v = create_variant(shader, key);
444bf215546Sopenharmony_ci   if (v) {
445bf215546Sopenharmony_ci      v->next = shader->variants;
446bf215546Sopenharmony_ci      shader->variants = v;
447bf215546Sopenharmony_ci      dump_shader_info(v, debug);
448bf215546Sopenharmony_ci   }
449bf215546Sopenharmony_ci
450bf215546Sopenharmony_ci   return v;
451bf215546Sopenharmony_ci}
452bf215546Sopenharmony_ci
453bf215546Sopenharmony_ci/**
454bf215546Sopenharmony_ci * Should initial variants be compiled synchronously?
455bf215546Sopenharmony_ci *
456bf215546Sopenharmony_ci * The only case where pipe_debug_message() is used in the initial-variants
457bf215546Sopenharmony_ci * path is with ETNA_MESA_DEBUG=shaderdb. So if either debug is disabled (ie.
458bf215546Sopenharmony_ci * debug.debug_message==NULL), or shaderdb stats are not enabled, we can
459bf215546Sopenharmony_ci * compile the initial shader variant asynchronously.
460bf215546Sopenharmony_ci */
461bf215546Sopenharmony_cistatic inline bool
462bf215546Sopenharmony_ciinitial_variants_synchronous(struct etna_context *ctx)
463bf215546Sopenharmony_ci{
464bf215546Sopenharmony_ci   return unlikely(ctx->debug.debug_message) || (etna_mesa_debug & ETNA_DBG_SHADERDB);
465bf215546Sopenharmony_ci}
466bf215546Sopenharmony_ci
467bf215546Sopenharmony_cistatic void
468bf215546Sopenharmony_cicreate_initial_variants_async(void *job, void *gdata, int thread_index)
469bf215546Sopenharmony_ci{
470bf215546Sopenharmony_ci   struct etna_shader *shader = job;
471bf215546Sopenharmony_ci   struct util_debug_callback debug = {};
472bf215546Sopenharmony_ci   static struct etna_shader_key key;
473bf215546Sopenharmony_ci
474bf215546Sopenharmony_ci   etna_shader_variant(shader, key, &debug);
475bf215546Sopenharmony_ci}
476bf215546Sopenharmony_ci
477bf215546Sopenharmony_cistatic void *
478bf215546Sopenharmony_cietna_create_shader_state(struct pipe_context *pctx,
479bf215546Sopenharmony_ci                         const struct pipe_shader_state *pss)
480bf215546Sopenharmony_ci{
481bf215546Sopenharmony_ci   struct etna_context *ctx = etna_context(pctx);
482bf215546Sopenharmony_ci   struct etna_screen *screen = ctx->screen;
483bf215546Sopenharmony_ci   struct etna_compiler *compiler = screen->compiler;
484bf215546Sopenharmony_ci   struct etna_shader *shader = CALLOC_STRUCT(etna_shader);
485bf215546Sopenharmony_ci
486bf215546Sopenharmony_ci   if (!shader)
487bf215546Sopenharmony_ci      return NULL;
488bf215546Sopenharmony_ci
489bf215546Sopenharmony_ci   shader->id = p_atomic_inc_return(&compiler->shader_count);
490bf215546Sopenharmony_ci   shader->specs = &screen->specs;
491bf215546Sopenharmony_ci   shader->compiler = screen->compiler;
492bf215546Sopenharmony_ci   util_queue_fence_init(&shader->ready);
493bf215546Sopenharmony_ci
494bf215546Sopenharmony_ci   shader->nir = (pss->type == PIPE_SHADER_IR_NIR) ? pss->ir.nir :
495bf215546Sopenharmony_ci                  tgsi_to_nir(pss->tokens, pctx->screen, false);
496bf215546Sopenharmony_ci
497bf215546Sopenharmony_ci   etna_disk_cache_init_shader_key(compiler, shader);
498bf215546Sopenharmony_ci
499bf215546Sopenharmony_ci   if (initial_variants_synchronous(ctx)) {
500bf215546Sopenharmony_ci      struct etna_shader_key key = {};
501bf215546Sopenharmony_ci      etna_shader_variant(shader, key, &ctx->debug);
502bf215546Sopenharmony_ci   } else {
503bf215546Sopenharmony_ci      struct etna_screen *screen = ctx->screen;
504bf215546Sopenharmony_ci      util_queue_add_job(&screen->shader_compiler_queue, shader, &shader->ready,
505bf215546Sopenharmony_ci                         create_initial_variants_async, NULL, 0);
506bf215546Sopenharmony_ci   }
507bf215546Sopenharmony_ci
508bf215546Sopenharmony_ci   return shader;
509bf215546Sopenharmony_ci}
510bf215546Sopenharmony_ci
511bf215546Sopenharmony_cistatic void
512bf215546Sopenharmony_cietna_delete_shader_state(struct pipe_context *pctx, void *ss)
513bf215546Sopenharmony_ci{
514bf215546Sopenharmony_ci   struct etna_context *ctx = etna_context(pctx);
515bf215546Sopenharmony_ci   struct etna_screen *screen = ctx->screen;
516bf215546Sopenharmony_ci   struct etna_shader *shader = ss;
517bf215546Sopenharmony_ci   struct etna_shader_variant *v, *t;
518bf215546Sopenharmony_ci
519bf215546Sopenharmony_ci   util_queue_drop_job(&screen->shader_compiler_queue, &shader->ready);
520bf215546Sopenharmony_ci
521bf215546Sopenharmony_ci   v = shader->variants;
522bf215546Sopenharmony_ci   while (v) {
523bf215546Sopenharmony_ci      t = v;
524bf215546Sopenharmony_ci      v = v->next;
525bf215546Sopenharmony_ci      if (t->bo)
526bf215546Sopenharmony_ci         etna_bo_del(t->bo);
527bf215546Sopenharmony_ci
528bf215546Sopenharmony_ci      etna_destroy_shader(t);
529bf215546Sopenharmony_ci   }
530bf215546Sopenharmony_ci
531bf215546Sopenharmony_ci   tgsi_free_tokens(shader->tokens);
532bf215546Sopenharmony_ci   ralloc_free(shader->nir);
533bf215546Sopenharmony_ci   util_queue_fence_destroy(&shader->ready);
534bf215546Sopenharmony_ci   FREE(shader);
535bf215546Sopenharmony_ci}
536bf215546Sopenharmony_ci
537bf215546Sopenharmony_cistatic void
538bf215546Sopenharmony_cietna_bind_fs_state(struct pipe_context *pctx, void *hwcso)
539bf215546Sopenharmony_ci{
540bf215546Sopenharmony_ci   struct etna_context *ctx = etna_context(pctx);
541bf215546Sopenharmony_ci
542bf215546Sopenharmony_ci   ctx->shader.bind_fs = hwcso;
543bf215546Sopenharmony_ci   ctx->dirty |= ETNA_DIRTY_SHADER;
544bf215546Sopenharmony_ci}
545bf215546Sopenharmony_ci
546bf215546Sopenharmony_cistatic void
547bf215546Sopenharmony_cietna_bind_vs_state(struct pipe_context *pctx, void *hwcso)
548bf215546Sopenharmony_ci{
549bf215546Sopenharmony_ci   struct etna_context *ctx = etna_context(pctx);
550bf215546Sopenharmony_ci
551bf215546Sopenharmony_ci   ctx->shader.bind_vs = hwcso;
552bf215546Sopenharmony_ci   ctx->dirty |= ETNA_DIRTY_SHADER;
553bf215546Sopenharmony_ci}
554bf215546Sopenharmony_ci
555bf215546Sopenharmony_cistatic void
556bf215546Sopenharmony_cietna_set_max_shader_compiler_threads(struct pipe_screen *pscreen,
557bf215546Sopenharmony_ci                                     unsigned max_threads)
558bf215546Sopenharmony_ci{
559bf215546Sopenharmony_ci   struct etna_screen *screen = etna_screen(pscreen);
560bf215546Sopenharmony_ci
561bf215546Sopenharmony_ci   util_queue_adjust_num_threads(&screen->shader_compiler_queue, max_threads);
562bf215546Sopenharmony_ci}
563bf215546Sopenharmony_ci
564bf215546Sopenharmony_cistatic bool
565bf215546Sopenharmony_cietna_is_parallel_shader_compilation_finished(struct pipe_screen *pscreen,
566bf215546Sopenharmony_ci                                             void *hwcso,
567bf215546Sopenharmony_ci                                             enum pipe_shader_type shader_type)
568bf215546Sopenharmony_ci{
569bf215546Sopenharmony_ci   struct etna_shader *shader = (struct etna_shader *)hwcso;
570bf215546Sopenharmony_ci
571bf215546Sopenharmony_ci   return util_queue_fence_is_signalled(&shader->ready);
572bf215546Sopenharmony_ci}
573bf215546Sopenharmony_ci
574bf215546Sopenharmony_civoid
575bf215546Sopenharmony_cietna_shader_init(struct pipe_context *pctx)
576bf215546Sopenharmony_ci{
577bf215546Sopenharmony_ci   pctx->create_fs_state = etna_create_shader_state;
578bf215546Sopenharmony_ci   pctx->bind_fs_state = etna_bind_fs_state;
579bf215546Sopenharmony_ci   pctx->delete_fs_state = etna_delete_shader_state;
580bf215546Sopenharmony_ci   pctx->create_vs_state = etna_create_shader_state;
581bf215546Sopenharmony_ci   pctx->bind_vs_state = etna_bind_vs_state;
582bf215546Sopenharmony_ci   pctx->delete_vs_state = etna_delete_shader_state;
583bf215546Sopenharmony_ci}
584bf215546Sopenharmony_ci
585bf215546Sopenharmony_cibool
586bf215546Sopenharmony_cietna_shader_screen_init(struct pipe_screen *pscreen)
587bf215546Sopenharmony_ci{
588bf215546Sopenharmony_ci   struct etna_screen *screen = etna_screen(pscreen);
589bf215546Sopenharmony_ci   unsigned num_threads = util_get_cpu_caps()->nr_cpus - 1;
590bf215546Sopenharmony_ci
591bf215546Sopenharmony_ci   /* Create at least one thread - even on single core CPU systems. */
592bf215546Sopenharmony_ci   num_threads = MAX2(1, num_threads);
593bf215546Sopenharmony_ci
594bf215546Sopenharmony_ci   screen->compiler = etna_compiler_create(pscreen->get_name(pscreen), &screen->specs);
595bf215546Sopenharmony_ci   if (!screen->compiler)
596bf215546Sopenharmony_ci      return false;
597bf215546Sopenharmony_ci
598bf215546Sopenharmony_ci   pscreen->set_max_shader_compiler_threads = etna_set_max_shader_compiler_threads;
599bf215546Sopenharmony_ci   pscreen->is_parallel_shader_compilation_finished = etna_is_parallel_shader_compilation_finished;
600bf215546Sopenharmony_ci
601bf215546Sopenharmony_ci   return util_queue_init(&screen->shader_compiler_queue, "sh", 64, num_threads,
602bf215546Sopenharmony_ci                          UTIL_QUEUE_INIT_RESIZE_IF_FULL | UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY,
603bf215546Sopenharmony_ci                          NULL);
604bf215546Sopenharmony_ci}
605bf215546Sopenharmony_ci
606bf215546Sopenharmony_civoid
607bf215546Sopenharmony_cietna_shader_screen_fini(struct pipe_screen *pscreen)
608bf215546Sopenharmony_ci{
609bf215546Sopenharmony_ci   struct etna_screen *screen = etna_screen(pscreen);
610bf215546Sopenharmony_ci
611bf215546Sopenharmony_ci   util_queue_destroy(&screen->shader_compiler_queue);
612bf215546Sopenharmony_ci   etna_compiler_destroy(screen->compiler);
613bf215546Sopenharmony_ci}
614