1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21bf215546Sopenharmony_ci * SOFTWARE.
22bf215546Sopenharmony_ci *
23bf215546Sopenharmony_ci * Authors:
24bf215546Sopenharmony_ci *    Rob Clark <robclark@freedesktop.org>
25bf215546Sopenharmony_ci */
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_ci#include "pipe/p_screen.h"
28bf215546Sopenharmony_ci#include "pipe/p_state.h"
29bf215546Sopenharmony_ci#include "tgsi/tgsi_dump.h"
30bf215546Sopenharmony_ci#include "tgsi/tgsi_parse.h"
31bf215546Sopenharmony_ci#include "util/format/u_format.h"
32bf215546Sopenharmony_ci#include "util/u_inlines.h"
33bf215546Sopenharmony_ci#include "util/u_memory.h"
34bf215546Sopenharmony_ci#include "util/u_string.h"
35bf215546Sopenharmony_ci
36bf215546Sopenharmony_ci#include "nir/tgsi_to_nir.h"
37bf215546Sopenharmony_ci#include "nir_serialize.h"
38bf215546Sopenharmony_ci
39bf215546Sopenharmony_ci#include "freedreno_context.h"
40bf215546Sopenharmony_ci#include "freedreno_util.h"
41bf215546Sopenharmony_ci
42bf215546Sopenharmony_ci#include "ir3/ir3_cache.h"
43bf215546Sopenharmony_ci#include "ir3/ir3_compiler.h"
44bf215546Sopenharmony_ci#include "ir3/ir3_gallium.h"
45bf215546Sopenharmony_ci#include "ir3/ir3_nir.h"
46bf215546Sopenharmony_ci#include "ir3/ir3_shader.h"
47bf215546Sopenharmony_ci
48bf215546Sopenharmony_ci/**
49bf215546Sopenharmony_ci * The hardware cso for shader state
50bf215546Sopenharmony_ci *
51bf215546Sopenharmony_ci * Initially just a container for the ir3_shader, but this is where we'll
52bf215546Sopenharmony_ci * plumb in async compile.
53bf215546Sopenharmony_ci */
54bf215546Sopenharmony_cistruct ir3_shader_state {
55bf215546Sopenharmony_ci   struct ir3_shader *shader;
56bf215546Sopenharmony_ci
57bf215546Sopenharmony_ci   /* Fence signalled when async compile is completed: */
58bf215546Sopenharmony_ci   struct util_queue_fence ready;
59bf215546Sopenharmony_ci};
60bf215546Sopenharmony_ci
61bf215546Sopenharmony_ci/**
62bf215546Sopenharmony_ci * Should initial variants be compiled synchronously?
63bf215546Sopenharmony_ci *
64bf215546Sopenharmony_ci * The only case where util_debug_message() is used in the initial-variants
65bf215546Sopenharmony_ci * path is with FD_MESA_DEBUG=shaderdb.  So if either debug is disabled (ie.
66bf215546Sopenharmony_ci * debug.debug_message==NULL), or shaderdb stats are not enabled, we can
67bf215546Sopenharmony_ci * compile the initial shader variant asynchronously.
68bf215546Sopenharmony_ci */
69bf215546Sopenharmony_cistatic bool
70bf215546Sopenharmony_ciinitial_variants_synchronous(struct fd_context *ctx)
71bf215546Sopenharmony_ci{
72bf215546Sopenharmony_ci   return unlikely(ctx->debug.debug_message) || FD_DBG(SHADERDB) ||
73bf215546Sopenharmony_ci          FD_DBG(SERIALC);
74bf215546Sopenharmony_ci}
75bf215546Sopenharmony_ci
76bf215546Sopenharmony_cistatic void
77bf215546Sopenharmony_cidump_shader_info(struct ir3_shader_variant *v,
78bf215546Sopenharmony_ci                 struct util_debug_callback *debug)
79bf215546Sopenharmony_ci{
80bf215546Sopenharmony_ci   if (!FD_DBG(SHADERDB))
81bf215546Sopenharmony_ci      return;
82bf215546Sopenharmony_ci
83bf215546Sopenharmony_ci   util_debug_message(
84bf215546Sopenharmony_ci      debug, SHADER_INFO,
85bf215546Sopenharmony_ci      "%s shader: %u inst, %u nops, %u non-nops, %u mov, %u cov, "
86bf215546Sopenharmony_ci      "%u dwords, %u last-baryf, %u half, %u full, %u constlen, "
87bf215546Sopenharmony_ci      "%u cat0, %u cat1, %u cat2, %u cat3, %u cat4, %u cat5, %u cat6, %u cat7, "
88bf215546Sopenharmony_ci      "%u stp, %u ldp, %u sstall, %u (ss), %u systall, %u (sy), %d waves, "
89bf215546Sopenharmony_ci      "%d loops\n",
90bf215546Sopenharmony_ci      ir3_shader_stage(v), v->info.instrs_count, v->info.nops_count,
91bf215546Sopenharmony_ci      v->info.instrs_count - v->info.nops_count, v->info.mov_count,
92bf215546Sopenharmony_ci      v->info.cov_count, v->info.sizedwords, v->info.last_baryf,
93bf215546Sopenharmony_ci      v->info.max_half_reg + 1, v->info.max_reg + 1, v->constlen,
94bf215546Sopenharmony_ci      v->info.instrs_per_cat[0], v->info.instrs_per_cat[1],
95bf215546Sopenharmony_ci      v->info.instrs_per_cat[2], v->info.instrs_per_cat[3],
96bf215546Sopenharmony_ci      v->info.instrs_per_cat[4], v->info.instrs_per_cat[5],
97bf215546Sopenharmony_ci      v->info.instrs_per_cat[6], v->info.instrs_per_cat[7],
98bf215546Sopenharmony_ci      v->info.stp_count, v->info.ldp_count, v->info.sstall,
99bf215546Sopenharmony_ci      v->info.ss, v->info.systall, v->info.sy, v->info.max_waves, v->loops);
100bf215546Sopenharmony_ci}
101bf215546Sopenharmony_ci
102bf215546Sopenharmony_cistatic void
103bf215546Sopenharmony_ciupload_shader_variant(struct ir3_shader_variant *v)
104bf215546Sopenharmony_ci{
105bf215546Sopenharmony_ci   struct ir3_compiler *compiler = v->compiler;
106bf215546Sopenharmony_ci
107bf215546Sopenharmony_ci   assert(!v->bo);
108bf215546Sopenharmony_ci
109bf215546Sopenharmony_ci   v->bo =
110bf215546Sopenharmony_ci      fd_bo_new(compiler->dev, v->info.size, FD_BO_NOMAP,
111bf215546Sopenharmony_ci                "%s:%s", ir3_shader_stage(v), v->name);
112bf215546Sopenharmony_ci
113bf215546Sopenharmony_ci   /* Always include shaders in kernel crash dumps. */
114bf215546Sopenharmony_ci   fd_bo_mark_for_dump(v->bo);
115bf215546Sopenharmony_ci
116bf215546Sopenharmony_ci   fd_bo_upload(v->bo, v->bin, v->info.size);
117bf215546Sopenharmony_ci}
118bf215546Sopenharmony_ci
119bf215546Sopenharmony_cistruct ir3_shader_variant *
120bf215546Sopenharmony_ciir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key,
121bf215546Sopenharmony_ci                   bool binning_pass, struct util_debug_callback *debug)
122bf215546Sopenharmony_ci{
123bf215546Sopenharmony_ci   struct ir3_shader_variant *v;
124bf215546Sopenharmony_ci   bool created = false;
125bf215546Sopenharmony_ci
126bf215546Sopenharmony_ci   /* Some shader key values may not be used by a given ir3_shader (for
127bf215546Sopenharmony_ci    * example, fragment shader saturates in the vertex shader), so clean out
128bf215546Sopenharmony_ci    * those flags to avoid recompiling.
129bf215546Sopenharmony_ci    */
130bf215546Sopenharmony_ci   ir3_key_clear_unused(&key, shader);
131bf215546Sopenharmony_ci
132bf215546Sopenharmony_ci   v = ir3_shader_get_variant(shader, &key, binning_pass, false, &created);
133bf215546Sopenharmony_ci
134bf215546Sopenharmony_ci   if (created) {
135bf215546Sopenharmony_ci      if (shader->initial_variants_done) {
136bf215546Sopenharmony_ci         perf_debug_message(debug, SHADER_INFO,
137bf215546Sopenharmony_ci                            "%s shader: recompiling at draw time: global "
138bf215546Sopenharmony_ci                            "0x%08x, vfsamples %x/%x, astc %x/%x\n",
139bf215546Sopenharmony_ci                            ir3_shader_stage(v), key.global, key.vsamples,
140bf215546Sopenharmony_ci                            key.fsamples, key.vastc_srgb, key.fastc_srgb);
141bf215546Sopenharmony_ci      }
142bf215546Sopenharmony_ci
143bf215546Sopenharmony_ci      dump_shader_info(v, debug);
144bf215546Sopenharmony_ci      upload_shader_variant(v);
145bf215546Sopenharmony_ci
146bf215546Sopenharmony_ci      if (v->binning) {
147bf215546Sopenharmony_ci         upload_shader_variant(v->binning);
148bf215546Sopenharmony_ci         dump_shader_info(v->binning, debug);
149bf215546Sopenharmony_ci      }
150bf215546Sopenharmony_ci   }
151bf215546Sopenharmony_ci
152bf215546Sopenharmony_ci   return v;
153bf215546Sopenharmony_ci}
154bf215546Sopenharmony_ci
155bf215546Sopenharmony_cistatic void
156bf215546Sopenharmony_cicopy_stream_out(struct ir3_stream_output_info *i,
157bf215546Sopenharmony_ci                const struct pipe_stream_output_info *p)
158bf215546Sopenharmony_ci{
159bf215546Sopenharmony_ci   STATIC_ASSERT(ARRAY_SIZE(i->stride) == ARRAY_SIZE(p->stride));
160bf215546Sopenharmony_ci   STATIC_ASSERT(ARRAY_SIZE(i->output) == ARRAY_SIZE(p->output));
161bf215546Sopenharmony_ci
162bf215546Sopenharmony_ci   i->num_outputs = p->num_outputs;
163bf215546Sopenharmony_ci   for (int n = 0; n < ARRAY_SIZE(i->stride); n++)
164bf215546Sopenharmony_ci      i->stride[n] = p->stride[n];
165bf215546Sopenharmony_ci
166bf215546Sopenharmony_ci   for (int n = 0; n < ARRAY_SIZE(i->output); n++) {
167bf215546Sopenharmony_ci      i->output[n].register_index = p->output[n].register_index;
168bf215546Sopenharmony_ci      i->output[n].start_component = p->output[n].start_component;
169bf215546Sopenharmony_ci      i->output[n].num_components = p->output[n].num_components;
170bf215546Sopenharmony_ci      i->output[n].output_buffer = p->output[n].output_buffer;
171bf215546Sopenharmony_ci      i->output[n].dst_offset = p->output[n].dst_offset;
172bf215546Sopenharmony_ci      i->output[n].stream = p->output[n].stream;
173bf215546Sopenharmony_ci   }
174bf215546Sopenharmony_ci}
175bf215546Sopenharmony_ci
176bf215546Sopenharmony_cistatic void
177bf215546Sopenharmony_cicreate_initial_variants(struct ir3_shader_state *hwcso,
178bf215546Sopenharmony_ci                        struct util_debug_callback *debug)
179bf215546Sopenharmony_ci{
180bf215546Sopenharmony_ci   struct ir3_shader *shader = hwcso->shader;
181bf215546Sopenharmony_ci   struct ir3_compiler *compiler = shader->compiler;
182bf215546Sopenharmony_ci   nir_shader *nir = shader->nir;
183bf215546Sopenharmony_ci
184bf215546Sopenharmony_ci   /* Compile standard variants immediately to try to avoid draw-time stalls
185bf215546Sopenharmony_ci    * to run the compiler.
186bf215546Sopenharmony_ci    */
187bf215546Sopenharmony_ci   struct ir3_shader_key key = {
188bf215546Sopenharmony_ci      .tessellation = IR3_TESS_NONE,
189bf215546Sopenharmony_ci      .ucp_enables = MASK(nir->info.clip_distance_array_size),
190bf215546Sopenharmony_ci      .msaa = true,
191bf215546Sopenharmony_ci   };
192bf215546Sopenharmony_ci
193bf215546Sopenharmony_ci   switch (nir->info.stage) {
194bf215546Sopenharmony_ci   case MESA_SHADER_TESS_EVAL:
195bf215546Sopenharmony_ci      key.tessellation = ir3_tess_mode(nir->info.tess._primitive_mode);
196bf215546Sopenharmony_ci      break;
197bf215546Sopenharmony_ci
198bf215546Sopenharmony_ci   case MESA_SHADER_TESS_CTRL:
199bf215546Sopenharmony_ci      /* The primitive_mode field, while it exists for TCS, is not
200bf215546Sopenharmony_ci       * populated (since separable shaders between TCS/TES are legal,
201bf215546Sopenharmony_ci       * so TCS wouldn't have access to TES's declaration).  Make a
202bf215546Sopenharmony_ci       * guess so that we shader-db something plausible for TCS.
203bf215546Sopenharmony_ci       */
204bf215546Sopenharmony_ci      if (nir->info.outputs_written & VARYING_BIT_TESS_LEVEL_INNER)
205bf215546Sopenharmony_ci         key.tessellation = IR3_TESS_TRIANGLES;
206bf215546Sopenharmony_ci      else
207bf215546Sopenharmony_ci         key.tessellation = IR3_TESS_ISOLINES;
208bf215546Sopenharmony_ci      break;
209bf215546Sopenharmony_ci
210bf215546Sopenharmony_ci   case MESA_SHADER_GEOMETRY:
211bf215546Sopenharmony_ci      key.has_gs = true;
212bf215546Sopenharmony_ci      break;
213bf215546Sopenharmony_ci
214bf215546Sopenharmony_ci   default:
215bf215546Sopenharmony_ci      break;
216bf215546Sopenharmony_ci   }
217bf215546Sopenharmony_ci
218bf215546Sopenharmony_ci   key.safe_constlen = false;
219bf215546Sopenharmony_ci   struct ir3_shader_variant *v = ir3_shader_variant(shader, key, false, debug);
220bf215546Sopenharmony_ci   if (!v)
221bf215546Sopenharmony_ci      return;
222bf215546Sopenharmony_ci
223bf215546Sopenharmony_ci   if (v->constlen > compiler->max_const_safe) {
224bf215546Sopenharmony_ci      key.safe_constlen = true;
225bf215546Sopenharmony_ci      ir3_shader_variant(shader, key, false, debug);
226bf215546Sopenharmony_ci   }
227bf215546Sopenharmony_ci
228bf215546Sopenharmony_ci   /* For vertex shaders, also compile initial binning pass shader: */
229bf215546Sopenharmony_ci   if (nir->info.stage == MESA_SHADER_VERTEX) {
230bf215546Sopenharmony_ci      key.safe_constlen = false;
231bf215546Sopenharmony_ci      v = ir3_shader_variant(shader, key, true, debug);
232bf215546Sopenharmony_ci      if (!v)
233bf215546Sopenharmony_ci         return;
234bf215546Sopenharmony_ci
235bf215546Sopenharmony_ci      if (v->constlen > compiler->max_const_safe) {
236bf215546Sopenharmony_ci         key.safe_constlen = true;
237bf215546Sopenharmony_ci         ir3_shader_variant(shader, key, true, debug);
238bf215546Sopenharmony_ci      }
239bf215546Sopenharmony_ci   }
240bf215546Sopenharmony_ci
241bf215546Sopenharmony_ci   shader->initial_variants_done = true;
242bf215546Sopenharmony_ci}
243bf215546Sopenharmony_ci
244bf215546Sopenharmony_cistatic void
245bf215546Sopenharmony_cicreate_initial_variants_async(void *job, void *gdata, int thread_index)
246bf215546Sopenharmony_ci{
247bf215546Sopenharmony_ci   struct ir3_shader_state *hwcso = job;
248bf215546Sopenharmony_ci   struct util_debug_callback debug = {};
249bf215546Sopenharmony_ci
250bf215546Sopenharmony_ci   create_initial_variants(hwcso, &debug);
251bf215546Sopenharmony_ci}
252bf215546Sopenharmony_ci
253bf215546Sopenharmony_cistatic void
254bf215546Sopenharmony_cicreate_initial_compute_variants_async(void *job, void *gdata, int thread_index)
255bf215546Sopenharmony_ci{
256bf215546Sopenharmony_ci   struct ir3_shader_state *hwcso = job;
257bf215546Sopenharmony_ci   struct ir3_shader *shader = hwcso->shader;
258bf215546Sopenharmony_ci   struct util_debug_callback debug = {};
259bf215546Sopenharmony_ci   static struct ir3_shader_key key; /* static is implicitly zeroed */
260bf215546Sopenharmony_ci
261bf215546Sopenharmony_ci   ir3_shader_variant(shader, key, false, &debug);
262bf215546Sopenharmony_ci   shader->initial_variants_done = true;
263bf215546Sopenharmony_ci}
264bf215546Sopenharmony_ci
265bf215546Sopenharmony_ci/* a bit annoying that compute-shader and normal shader state objects
266bf215546Sopenharmony_ci * aren't a bit more aligned.
267bf215546Sopenharmony_ci */
268bf215546Sopenharmony_civoid *
269bf215546Sopenharmony_ciir3_shader_compute_state_create(struct pipe_context *pctx,
270bf215546Sopenharmony_ci                                const struct pipe_compute_state *cso)
271bf215546Sopenharmony_ci{
272bf215546Sopenharmony_ci   struct fd_context *ctx = fd_context(pctx);
273bf215546Sopenharmony_ci
274bf215546Sopenharmony_ci   /* req_input_mem will only be non-zero for cl kernels (ie. clover).
275bf215546Sopenharmony_ci    * This isn't a perfect test because I guess it is possible (but
276bf215546Sopenharmony_ci    * uncommon) for none for the kernel parameters to be a global,
277bf215546Sopenharmony_ci    * but ctx->set_global_bindings() can't fail, so this is the next
278bf215546Sopenharmony_ci    * best place to fail if we need a newer version of kernel driver:
279bf215546Sopenharmony_ci    */
280bf215546Sopenharmony_ci   if ((cso->req_input_mem > 0) &&
281bf215546Sopenharmony_ci       fd_device_version(ctx->dev) < FD_VERSION_BO_IOVA) {
282bf215546Sopenharmony_ci      return NULL;
283bf215546Sopenharmony_ci   }
284bf215546Sopenharmony_ci
285bf215546Sopenharmony_ci   struct ir3_compiler *compiler = ctx->screen->compiler;
286bf215546Sopenharmony_ci   nir_shader *nir;
287bf215546Sopenharmony_ci
288bf215546Sopenharmony_ci   if (cso->ir_type == PIPE_SHADER_IR_NIR) {
289bf215546Sopenharmony_ci      /* we take ownership of the reference: */
290bf215546Sopenharmony_ci      nir = (nir_shader *)cso->prog;
291bf215546Sopenharmony_ci   } else if (cso->ir_type == PIPE_SHADER_IR_NIR_SERIALIZED) {
292bf215546Sopenharmony_ci      const nir_shader_compiler_options *options =
293bf215546Sopenharmony_ci            ir3_get_compiler_options(compiler);
294bf215546Sopenharmony_ci      const struct pipe_binary_program_header *hdr = cso->prog;
295bf215546Sopenharmony_ci      struct blob_reader reader;
296bf215546Sopenharmony_ci
297bf215546Sopenharmony_ci      blob_reader_init(&reader, hdr->blob, hdr->num_bytes);
298bf215546Sopenharmony_ci      nir = nir_deserialize(NULL, options, &reader);
299bf215546Sopenharmony_ci
300bf215546Sopenharmony_ci      ir3_finalize_nir(compiler, nir);
301bf215546Sopenharmony_ci   } else {
302bf215546Sopenharmony_ci      assert(cso->ir_type == PIPE_SHADER_IR_TGSI);
303bf215546Sopenharmony_ci      if (ir3_shader_debug & IR3_DBG_DISASM) {
304bf215546Sopenharmony_ci         tgsi_dump(cso->prog, 0);
305bf215546Sopenharmony_ci      }
306bf215546Sopenharmony_ci      nir = tgsi_to_nir(cso->prog, pctx->screen, false);
307bf215546Sopenharmony_ci   }
308bf215546Sopenharmony_ci
309bf215546Sopenharmony_ci   struct ir3_shader *shader =
310bf215546Sopenharmony_ci      ir3_shader_from_nir(compiler, nir, &(struct ir3_shader_options){
311bf215546Sopenharmony_ci                              /* TODO: force to single on a6xx with legacy
312bf215546Sopenharmony_ci                               * ballot extension that uses 64-bit masks
313bf215546Sopenharmony_ci                               */
314bf215546Sopenharmony_ci                              .api_wavesize = IR3_SINGLE_OR_DOUBLE,
315bf215546Sopenharmony_ci                              .real_wavesize = IR3_SINGLE_OR_DOUBLE,
316bf215546Sopenharmony_ci                          }, NULL);
317bf215546Sopenharmony_ci   shader->cs.req_input_mem = align(cso->req_input_mem, 4) / 4;     /* byte->dword */
318bf215546Sopenharmony_ci   shader->cs.req_local_mem = cso->req_local_mem;
319bf215546Sopenharmony_ci
320bf215546Sopenharmony_ci   struct ir3_shader_state *hwcso = calloc(1, sizeof(*hwcso));
321bf215546Sopenharmony_ci
322bf215546Sopenharmony_ci   util_queue_fence_init(&hwcso->ready);
323bf215546Sopenharmony_ci   hwcso->shader = shader;
324bf215546Sopenharmony_ci
325bf215546Sopenharmony_ci   /* Immediately compile a standard variant.  We have so few variants in our
326bf215546Sopenharmony_ci    * shaders, that doing so almost eliminates draw-time recompiles.  (This
327bf215546Sopenharmony_ci    * is also how we get data from shader-db's ./run)
328bf215546Sopenharmony_ci    */
329bf215546Sopenharmony_ci
330bf215546Sopenharmony_ci   if (initial_variants_synchronous(ctx)) {
331bf215546Sopenharmony_ci      static struct ir3_shader_key key; /* static is implicitly zeroed */
332bf215546Sopenharmony_ci      ir3_shader_variant(shader, key, false, &ctx->debug);
333bf215546Sopenharmony_ci      shader->initial_variants_done = true;
334bf215546Sopenharmony_ci   } else {
335bf215546Sopenharmony_ci      struct fd_screen *screen = ctx->screen;
336bf215546Sopenharmony_ci      util_queue_add_job(&screen->compile_queue, hwcso, &hwcso->ready,
337bf215546Sopenharmony_ci                         create_initial_compute_variants_async, NULL, 0);
338bf215546Sopenharmony_ci   }
339bf215546Sopenharmony_ci
340bf215546Sopenharmony_ci   return hwcso;
341bf215546Sopenharmony_ci}
342bf215546Sopenharmony_ci
343bf215546Sopenharmony_civoid *
344bf215546Sopenharmony_ciir3_shader_state_create(struct pipe_context *pctx,
345bf215546Sopenharmony_ci                        const struct pipe_shader_state *cso)
346bf215546Sopenharmony_ci{
347bf215546Sopenharmony_ci   struct fd_context *ctx = fd_context(pctx);
348bf215546Sopenharmony_ci   struct ir3_compiler *compiler = ctx->screen->compiler;
349bf215546Sopenharmony_ci   struct ir3_shader_state *hwcso = calloc(1, sizeof(*hwcso));
350bf215546Sopenharmony_ci
351bf215546Sopenharmony_ci   /*
352bf215546Sopenharmony_ci    * Convert to nir (if necessary):
353bf215546Sopenharmony_ci    */
354bf215546Sopenharmony_ci
355bf215546Sopenharmony_ci   nir_shader *nir;
356bf215546Sopenharmony_ci   if (cso->type == PIPE_SHADER_IR_NIR) {
357bf215546Sopenharmony_ci      /* we take ownership of the reference: */
358bf215546Sopenharmony_ci      nir = cso->ir.nir;
359bf215546Sopenharmony_ci   } else {
360bf215546Sopenharmony_ci      assert(cso->type == PIPE_SHADER_IR_TGSI);
361bf215546Sopenharmony_ci      if (ir3_shader_debug & IR3_DBG_DISASM) {
362bf215546Sopenharmony_ci         tgsi_dump(cso->tokens, 0);
363bf215546Sopenharmony_ci      }
364bf215546Sopenharmony_ci      nir = tgsi_to_nir(cso->tokens, pctx->screen, false);
365bf215546Sopenharmony_ci   }
366bf215546Sopenharmony_ci
367bf215546Sopenharmony_ci   /*
368bf215546Sopenharmony_ci    * Create ir3_shader:
369bf215546Sopenharmony_ci    *
370bf215546Sopenharmony_ci    * This part is cheap, it doesn't compile initial variants
371bf215546Sopenharmony_ci    */
372bf215546Sopenharmony_ci
373bf215546Sopenharmony_ci   struct ir3_stream_output_info stream_output = {};
374bf215546Sopenharmony_ci   copy_stream_out(&stream_output, &cso->stream_output);
375bf215546Sopenharmony_ci
376bf215546Sopenharmony_ci   hwcso->shader =
377bf215546Sopenharmony_ci      ir3_shader_from_nir(compiler, nir, &(struct ir3_shader_options){
378bf215546Sopenharmony_ci                              /* TODO: force to single on a6xx with legacy
379bf215546Sopenharmony_ci                               * ballot extension that uses 64-bit masks
380bf215546Sopenharmony_ci                               */
381bf215546Sopenharmony_ci                              .api_wavesize = IR3_SINGLE_OR_DOUBLE,
382bf215546Sopenharmony_ci                              .real_wavesize = IR3_SINGLE_OR_DOUBLE,
383bf215546Sopenharmony_ci                          },
384bf215546Sopenharmony_ci                          &stream_output);
385bf215546Sopenharmony_ci
386bf215546Sopenharmony_ci   /*
387bf215546Sopenharmony_ci    * Create initial variants to avoid draw-time stalls.  This is
388bf215546Sopenharmony_ci    * normally done asynchronously, unless debug is enabled (which
389bf215546Sopenharmony_ci    * will be the case for shader-db)
390bf215546Sopenharmony_ci    */
391bf215546Sopenharmony_ci
392bf215546Sopenharmony_ci   util_queue_fence_init(&hwcso->ready);
393bf215546Sopenharmony_ci
394bf215546Sopenharmony_ci   if (initial_variants_synchronous(ctx)) {
395bf215546Sopenharmony_ci      create_initial_variants(hwcso, &ctx->debug);
396bf215546Sopenharmony_ci   } else {
397bf215546Sopenharmony_ci      util_queue_add_job(&ctx->screen->compile_queue, hwcso, &hwcso->ready,
398bf215546Sopenharmony_ci                         create_initial_variants_async, NULL, 0);
399bf215546Sopenharmony_ci   }
400bf215546Sopenharmony_ci
401bf215546Sopenharmony_ci   return hwcso;
402bf215546Sopenharmony_ci}
403bf215546Sopenharmony_ci
404bf215546Sopenharmony_civoid
405bf215546Sopenharmony_ciir3_shader_state_delete(struct pipe_context *pctx, void *_hwcso)
406bf215546Sopenharmony_ci{
407bf215546Sopenharmony_ci   struct fd_context *ctx = fd_context(pctx);
408bf215546Sopenharmony_ci   struct fd_screen *screen = ctx->screen;
409bf215546Sopenharmony_ci   struct ir3_shader_state *hwcso = _hwcso;
410bf215546Sopenharmony_ci   struct ir3_shader *so = hwcso->shader;
411bf215546Sopenharmony_ci
412bf215546Sopenharmony_ci   ir3_cache_invalidate(ctx->shader_cache, hwcso);
413bf215546Sopenharmony_ci
414bf215546Sopenharmony_ci   /* util_queue_drop_job() guarantees that either:
415bf215546Sopenharmony_ci    *  1) job did not execute
416bf215546Sopenharmony_ci    *  2) job completed
417bf215546Sopenharmony_ci    *
418bf215546Sopenharmony_ci    * In either case the fence is signaled
419bf215546Sopenharmony_ci    */
420bf215546Sopenharmony_ci   util_queue_drop_job(&screen->compile_queue, &hwcso->ready);
421bf215546Sopenharmony_ci
422bf215546Sopenharmony_ci   /* free the uploaded shaders, since this is handled outside of the
423bf215546Sopenharmony_ci    * shared ir3 code (ie. not used by turnip):
424bf215546Sopenharmony_ci    */
425bf215546Sopenharmony_ci   for (struct ir3_shader_variant *v = so->variants; v; v = v->next) {
426bf215546Sopenharmony_ci      fd_bo_del(v->bo);
427bf215546Sopenharmony_ci      v->bo = NULL;
428bf215546Sopenharmony_ci
429bf215546Sopenharmony_ci      if (v->binning && v->binning->bo) {
430bf215546Sopenharmony_ci         fd_bo_del(v->binning->bo);
431bf215546Sopenharmony_ci         v->binning->bo = NULL;
432bf215546Sopenharmony_ci      }
433bf215546Sopenharmony_ci   }
434bf215546Sopenharmony_ci
435bf215546Sopenharmony_ci   ir3_shader_destroy(so);
436bf215546Sopenharmony_ci   util_queue_fence_destroy(&hwcso->ready);
437bf215546Sopenharmony_ci   free(hwcso);
438bf215546Sopenharmony_ci}
439bf215546Sopenharmony_ci
440bf215546Sopenharmony_cistruct ir3_shader *
441bf215546Sopenharmony_ciir3_get_shader(struct ir3_shader_state *hwcso)
442bf215546Sopenharmony_ci{
443bf215546Sopenharmony_ci   if (!hwcso)
444bf215546Sopenharmony_ci      return NULL;
445bf215546Sopenharmony_ci
446bf215546Sopenharmony_ci   struct ir3_shader *shader = hwcso->shader;
447bf215546Sopenharmony_ci   perf_time (1000, "waited for %s:%s:%s variants",
448bf215546Sopenharmony_ci              _mesa_shader_stage_to_abbrev(shader->type),
449bf215546Sopenharmony_ci              shader->nir->info.name,
450bf215546Sopenharmony_ci              shader->nir->info.label) {
451bf215546Sopenharmony_ci      /* wait for initial variants to compile: */
452bf215546Sopenharmony_ci      util_queue_fence_wait(&hwcso->ready);
453bf215546Sopenharmony_ci   }
454bf215546Sopenharmony_ci
455bf215546Sopenharmony_ci   return shader;
456bf215546Sopenharmony_ci}
457bf215546Sopenharmony_ci
458bf215546Sopenharmony_cistruct shader_info *
459bf215546Sopenharmony_ciir3_get_shader_info(struct ir3_shader_state *hwcso)
460bf215546Sopenharmony_ci{
461bf215546Sopenharmony_ci   if (!hwcso)
462bf215546Sopenharmony_ci      return NULL;
463bf215546Sopenharmony_ci   return &hwcso->shader->nir->info;
464bf215546Sopenharmony_ci}
465bf215546Sopenharmony_ci
466bf215546Sopenharmony_ci/* fixup dirty shader state in case some "unrelated" (from the state-
467bf215546Sopenharmony_ci * tracker's perspective) state change causes us to switch to a
468bf215546Sopenharmony_ci * different variant.
469bf215546Sopenharmony_ci */
470bf215546Sopenharmony_civoid
471bf215546Sopenharmony_ciir3_fixup_shader_state(struct pipe_context *pctx, struct ir3_shader_key *key)
472bf215546Sopenharmony_ci{
473bf215546Sopenharmony_ci   struct fd_context *ctx = fd_context(pctx);
474bf215546Sopenharmony_ci
475bf215546Sopenharmony_ci   if (!ir3_shader_key_equal(ctx->last.key, key)) {
476bf215546Sopenharmony_ci      if (ir3_shader_key_changes_fs(ctx->last.key, key)) {
477bf215546Sopenharmony_ci         fd_context_dirty_shader(ctx, PIPE_SHADER_FRAGMENT,
478bf215546Sopenharmony_ci                                 FD_DIRTY_SHADER_PROG);
479bf215546Sopenharmony_ci      }
480bf215546Sopenharmony_ci
481bf215546Sopenharmony_ci      if (ir3_shader_key_changes_vs(ctx->last.key, key)) {
482bf215546Sopenharmony_ci         fd_context_dirty_shader(ctx, PIPE_SHADER_VERTEX, FD_DIRTY_SHADER_PROG);
483bf215546Sopenharmony_ci      }
484bf215546Sopenharmony_ci
485bf215546Sopenharmony_ci      /* NOTE: currently only a6xx has gs/tess, but needs no
486bf215546Sopenharmony_ci       * gs/tess specific lowering.
487bf215546Sopenharmony_ci       */
488bf215546Sopenharmony_ci
489bf215546Sopenharmony_ci      *ctx->last.key = *key;
490bf215546Sopenharmony_ci   }
491bf215546Sopenharmony_ci}
492bf215546Sopenharmony_ci
493bf215546Sopenharmony_cistatic char *
494bf215546Sopenharmony_ciir3_screen_finalize_nir(struct pipe_screen *pscreen, void *nir)
495bf215546Sopenharmony_ci{
496bf215546Sopenharmony_ci   struct fd_screen *screen = fd_screen(pscreen);
497bf215546Sopenharmony_ci
498bf215546Sopenharmony_ci   ir3_nir_lower_io_to_temporaries(nir);
499bf215546Sopenharmony_ci   ir3_finalize_nir(screen->compiler, nir);
500bf215546Sopenharmony_ci
501bf215546Sopenharmony_ci   return NULL;
502bf215546Sopenharmony_ci}
503bf215546Sopenharmony_ci
504bf215546Sopenharmony_cistatic void
505bf215546Sopenharmony_ciir3_set_max_shader_compiler_threads(struct pipe_screen *pscreen,
506bf215546Sopenharmony_ci                                    unsigned max_threads)
507bf215546Sopenharmony_ci{
508bf215546Sopenharmony_ci   struct fd_screen *screen = fd_screen(pscreen);
509bf215546Sopenharmony_ci
510bf215546Sopenharmony_ci   /* This function doesn't allow a greater number of threads than
511bf215546Sopenharmony_ci    * the queue had at its creation.
512bf215546Sopenharmony_ci    */
513bf215546Sopenharmony_ci   util_queue_adjust_num_threads(&screen->compile_queue, max_threads);
514bf215546Sopenharmony_ci}
515bf215546Sopenharmony_ci
516bf215546Sopenharmony_cistatic bool
517bf215546Sopenharmony_ciir3_is_parallel_shader_compilation_finished(struct pipe_screen *pscreen,
518bf215546Sopenharmony_ci                                            void *shader,
519bf215546Sopenharmony_ci                                            enum pipe_shader_type shader_type)
520bf215546Sopenharmony_ci{
521bf215546Sopenharmony_ci   struct ir3_shader_state *hwcso = (struct ir3_shader_state *)shader;
522bf215546Sopenharmony_ci
523bf215546Sopenharmony_ci   return util_queue_fence_is_signalled(&hwcso->ready);
524bf215546Sopenharmony_ci}
525bf215546Sopenharmony_ci
526bf215546Sopenharmony_civoid
527bf215546Sopenharmony_ciir3_prog_init(struct pipe_context *pctx)
528bf215546Sopenharmony_ci{
529bf215546Sopenharmony_ci   pctx->create_vs_state = ir3_shader_state_create;
530bf215546Sopenharmony_ci   pctx->delete_vs_state = ir3_shader_state_delete;
531bf215546Sopenharmony_ci
532bf215546Sopenharmony_ci   pctx->create_tcs_state = ir3_shader_state_create;
533bf215546Sopenharmony_ci   pctx->delete_tcs_state = ir3_shader_state_delete;
534bf215546Sopenharmony_ci
535bf215546Sopenharmony_ci   pctx->create_tes_state = ir3_shader_state_create;
536bf215546Sopenharmony_ci   pctx->delete_tes_state = ir3_shader_state_delete;
537bf215546Sopenharmony_ci
538bf215546Sopenharmony_ci   pctx->create_gs_state = ir3_shader_state_create;
539bf215546Sopenharmony_ci   pctx->delete_gs_state = ir3_shader_state_delete;
540bf215546Sopenharmony_ci
541bf215546Sopenharmony_ci   pctx->create_fs_state = ir3_shader_state_create;
542bf215546Sopenharmony_ci   pctx->delete_fs_state = ir3_shader_state_delete;
543bf215546Sopenharmony_ci}
544bf215546Sopenharmony_ci
545bf215546Sopenharmony_civoid
546bf215546Sopenharmony_ciir3_screen_init(struct pipe_screen *pscreen)
547bf215546Sopenharmony_ci{
548bf215546Sopenharmony_ci   struct fd_screen *screen = fd_screen(pscreen);
549bf215546Sopenharmony_ci
550bf215546Sopenharmony_ci   screen->compiler = ir3_compiler_create(screen->dev, screen->dev_id,
551bf215546Sopenharmony_ci                                          &(struct ir3_compiler_options) {});
552bf215546Sopenharmony_ci
553bf215546Sopenharmony_ci   /* TODO do we want to limit things to # of fast cores, or just limit
554bf215546Sopenharmony_ci    * based on total # of both big and little cores.  The little cores
555bf215546Sopenharmony_ci    * tend to be in-order and probably much slower for compiling than
556bf215546Sopenharmony_ci    * big cores.  OTOH if they are sitting idle, maybe it is useful to
557bf215546Sopenharmony_ci    * use them?
558bf215546Sopenharmony_ci    */
559bf215546Sopenharmony_ci   unsigned num_threads = sysconf(_SC_NPROCESSORS_ONLN) - 1;
560bf215546Sopenharmony_ci
561bf215546Sopenharmony_ci   /* Create at least one thread - even on single core CPU systems. */
562bf215546Sopenharmony_ci   num_threads = MAX2(1, num_threads);
563bf215546Sopenharmony_ci
564bf215546Sopenharmony_ci   util_queue_init(&screen->compile_queue, "ir3q", 64, num_threads,
565bf215546Sopenharmony_ci                   UTIL_QUEUE_INIT_RESIZE_IF_FULL |
566bf215546Sopenharmony_ci                      UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY, NULL);
567bf215546Sopenharmony_ci
568bf215546Sopenharmony_ci   pscreen->finalize_nir = ir3_screen_finalize_nir;
569bf215546Sopenharmony_ci   pscreen->set_max_shader_compiler_threads =
570bf215546Sopenharmony_ci      ir3_set_max_shader_compiler_threads;
571bf215546Sopenharmony_ci   pscreen->is_parallel_shader_compilation_finished =
572bf215546Sopenharmony_ci      ir3_is_parallel_shader_compilation_finished;
573bf215546Sopenharmony_ci}
574bf215546Sopenharmony_ci
575bf215546Sopenharmony_civoid
576bf215546Sopenharmony_ciir3_screen_fini(struct pipe_screen *pscreen)
577bf215546Sopenharmony_ci{
578bf215546Sopenharmony_ci   struct fd_screen *screen = fd_screen(pscreen);
579bf215546Sopenharmony_ci
580bf215546Sopenharmony_ci   util_queue_destroy(&screen->compile_queue);
581bf215546Sopenharmony_ci   ir3_compiler_destroy(screen->compiler);
582bf215546Sopenharmony_ci   screen->compiler = NULL;
583bf215546Sopenharmony_ci}
584bf215546Sopenharmony_ci
585bf215546Sopenharmony_civoid
586bf215546Sopenharmony_ciir3_update_max_tf_vtx(struct fd_context *ctx,
587bf215546Sopenharmony_ci                      const struct ir3_shader_variant *v)
588bf215546Sopenharmony_ci{
589bf215546Sopenharmony_ci   struct fd_streamout_stateobj *so = &ctx->streamout;
590bf215546Sopenharmony_ci   const struct ir3_stream_output_info *info = &v->stream_output;
591bf215546Sopenharmony_ci   uint32_t maxvtxcnt = 0x7fffffff;
592bf215546Sopenharmony_ci
593bf215546Sopenharmony_ci   if (v->stream_output.num_outputs == 0)
594bf215546Sopenharmony_ci      maxvtxcnt = 0;
595bf215546Sopenharmony_ci   if (so->num_targets == 0)
596bf215546Sopenharmony_ci      maxvtxcnt = 0;
597bf215546Sopenharmony_ci
598bf215546Sopenharmony_ci   /* offset to write to is:
599bf215546Sopenharmony_ci    *
600bf215546Sopenharmony_ci    *   total_vtxcnt = vtxcnt + offsets[i]
601bf215546Sopenharmony_ci    *   offset = total_vtxcnt * stride[i]
602bf215546Sopenharmony_ci    *
603bf215546Sopenharmony_ci    *   offset =   vtxcnt * stride[i]       ; calculated in shader
604bf215546Sopenharmony_ci    *            + offsets[i] * stride[i]   ; calculated at emit_tfbos()
605bf215546Sopenharmony_ci    *
606bf215546Sopenharmony_ci    * assuming for each vtx, each target buffer will have data written
607bf215546Sopenharmony_ci    * up to 'offset + stride[i]', that leaves maxvtxcnt as:
608bf215546Sopenharmony_ci    *
609bf215546Sopenharmony_ci    *   buffer_size = (maxvtxcnt * stride[i]) + stride[i]
610bf215546Sopenharmony_ci    *   maxvtxcnt   = (buffer_size - stride[i]) / stride[i]
611bf215546Sopenharmony_ci    *
612bf215546Sopenharmony_ci    * but shader is actually doing a less-than (rather than less-than-
613bf215546Sopenharmony_ci    * equal) check, so we can drop the -stride[i].
614bf215546Sopenharmony_ci    *
615bf215546Sopenharmony_ci    * TODO is assumption about `offset + stride[i]` legit?
616bf215546Sopenharmony_ci    */
617bf215546Sopenharmony_ci   for (unsigned i = 0; i < so->num_targets; i++) {
618bf215546Sopenharmony_ci      struct pipe_stream_output_target *target = so->targets[i];
619bf215546Sopenharmony_ci      unsigned stride = info->stride[i] * 4; /* convert dwords->bytes */
620bf215546Sopenharmony_ci      if (target) {
621bf215546Sopenharmony_ci         uint32_t max = target->buffer_size / stride;
622bf215546Sopenharmony_ci         maxvtxcnt = MIN2(maxvtxcnt, max);
623bf215546Sopenharmony_ci      }
624bf215546Sopenharmony_ci   }
625bf215546Sopenharmony_ci
626bf215546Sopenharmony_ci   ctx->streamout.max_tf_vtx = maxvtxcnt;
627bf215546Sopenharmony_ci}
628