1/*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23/**
24 * @file iris_program.c
25 *
26 * This file contains the driver interface for compiling shaders.
27 *
28 * See iris_program_cache.c for the in-memory program cache where the
29 * compiled shaders are stored.
30 */
31
32#include <stdio.h>
33#include <errno.h>
34#include "pipe/p_defines.h"
35#include "pipe/p_state.h"
36#include "pipe/p_context.h"
37#include "pipe/p_screen.h"
38#include "util/u_atomic.h"
39#include "util/u_upload_mgr.h"
40#include "util/debug.h"
41#include "util/u_async_debug.h"
42#include "compiler/nir/nir.h"
43#include "compiler/nir/nir_builder.h"
44#include "compiler/nir/nir_serialize.h"
45#include "intel/compiler/brw_compiler.h"
46#include "intel/compiler/brw_nir.h"
47#include "intel/compiler/brw_prim.h"
48#include "iris_context.h"
49#include "nir/tgsi_to_nir.h"
50
51#define KEY_INIT(prefix)                                                   \
52   .prefix.program_string_id = ish->program_id,                            \
53   .prefix.limit_trig_input_range = screen->driconf.limit_trig_input_range
54#define BRW_KEY_INIT(gen, prog_id, limit_trig_input)       \
55   .base.program_string_id = prog_id,                      \
56   .base.limit_trig_input_range = limit_trig_input,        \
57   .base.tex.swizzles[0 ... BRW_MAX_SAMPLERS - 1] = 0x688, \
58   .base.tex.compressed_multisample_layout_mask = ~0,      \
59   .base.tex.msaa_16 = (gen >= 9 ? ~0 : 0)
60
61struct iris_threaded_compile_job {
62   struct iris_screen *screen;
63   struct u_upload_mgr *uploader;
64   struct util_debug_callback *dbg;
65   struct iris_uncompiled_shader *ish;
66   struct iris_compiled_shader *shader;
67};
68
69static unsigned
70get_new_program_id(struct iris_screen *screen)
71{
72   return p_atomic_inc_return(&screen->program_id);
73}
74
75void
76iris_finalize_program(struct iris_compiled_shader *shader,
77                      struct brw_stage_prog_data *prog_data,
78                      uint32_t *streamout,
79                      enum brw_param_builtin *system_values,
80                      unsigned num_system_values,
81                      unsigned kernel_input_size,
82                      unsigned num_cbufs,
83                      const struct iris_binding_table *bt)
84{
85   shader->prog_data = prog_data;
86   shader->streamout = streamout;
87   shader->system_values = system_values;
88   shader->num_system_values = num_system_values;
89   shader->kernel_input_size = kernel_input_size;
90   shader->num_cbufs = num_cbufs;
91   shader->bt = *bt;
92
93   ralloc_steal(shader, shader->prog_data);
94   ralloc_steal(shader->prog_data, (void *)prog_data->relocs);
95   ralloc_steal(shader->prog_data, prog_data->param);
96   ralloc_steal(shader, shader->streamout);
97   ralloc_steal(shader, shader->system_values);
98}
99
100static struct brw_vs_prog_key
101iris_to_brw_vs_key(const struct iris_screen *screen,
102                   const struct iris_vs_prog_key *key)
103{
104   return (struct brw_vs_prog_key) {
105      BRW_KEY_INIT(screen->devinfo.ver, key->vue.base.program_string_id,
106                   key->vue.base.limit_trig_input_range),
107
108      /* Don't tell the backend about our clip plane constants, we've
109       * already lowered them in NIR and don't want it doing it again.
110       */
111      .nr_userclip_plane_consts = 0,
112   };
113}
114
115static struct brw_tcs_prog_key
116iris_to_brw_tcs_key(const struct iris_screen *screen,
117                    const struct iris_tcs_prog_key *key)
118{
119   return (struct brw_tcs_prog_key) {
120      BRW_KEY_INIT(screen->devinfo.ver, key->vue.base.program_string_id,
121                   key->vue.base.limit_trig_input_range),
122      ._tes_primitive_mode = key->_tes_primitive_mode,
123      .input_vertices = key->input_vertices,
124      .patch_outputs_written = key->patch_outputs_written,
125      .outputs_written = key->outputs_written,
126      .quads_workaround = key->quads_workaround,
127   };
128}
129
130static struct brw_tes_prog_key
131iris_to_brw_tes_key(const struct iris_screen *screen,
132                    const struct iris_tes_prog_key *key)
133{
134   return (struct brw_tes_prog_key) {
135      BRW_KEY_INIT(screen->devinfo.ver, key->vue.base.program_string_id,
136                   key->vue.base.limit_trig_input_range),
137      .patch_inputs_read = key->patch_inputs_read,
138      .inputs_read = key->inputs_read,
139   };
140}
141
142static struct brw_gs_prog_key
143iris_to_brw_gs_key(const struct iris_screen *screen,
144                   const struct iris_gs_prog_key *key)
145{
146   return (struct brw_gs_prog_key) {
147      BRW_KEY_INIT(screen->devinfo.ver, key->vue.base.program_string_id,
148                   key->vue.base.limit_trig_input_range),
149   };
150}
151
152static struct brw_wm_prog_key
153iris_to_brw_fs_key(const struct iris_screen *screen,
154                   const struct iris_fs_prog_key *key)
155{
156   return (struct brw_wm_prog_key) {
157      BRW_KEY_INIT(screen->devinfo.ver, key->base.program_string_id,
158                   key->base.limit_trig_input_range),
159      .nr_color_regions = key->nr_color_regions,
160      .flat_shade = key->flat_shade,
161      .alpha_test_replicate_alpha = key->alpha_test_replicate_alpha,
162      .alpha_to_coverage = key->alpha_to_coverage,
163      .clamp_fragment_color = key->clamp_fragment_color,
164      .persample_interp = key->persample_interp,
165      .multisample_fbo = key->multisample_fbo,
166      .force_dual_color_blend = key->force_dual_color_blend,
167      .coherent_fb_fetch = key->coherent_fb_fetch,
168      .color_outputs_valid = key->color_outputs_valid,
169      .input_slots_valid = key->input_slots_valid,
170      .ignore_sample_mask_out = !key->multisample_fbo,
171   };
172}
173
174static struct brw_cs_prog_key
175iris_to_brw_cs_key(const struct iris_screen *screen,
176                   const struct iris_cs_prog_key *key)
177{
178   return (struct brw_cs_prog_key) {
179      BRW_KEY_INIT(screen->devinfo.ver, key->base.program_string_id,
180                   key->base.limit_trig_input_range),
181   };
182}
183
184static void *
185upload_state(struct u_upload_mgr *uploader,
186             struct iris_state_ref *ref,
187             unsigned size,
188             unsigned alignment)
189{
190   void *p = NULL;
191   u_upload_alloc(uploader, 0, size, alignment, &ref->offset, &ref->res, &p);
192   return p;
193}
194
195void
196iris_upload_ubo_ssbo_surf_state(struct iris_context *ice,
197                                struct pipe_shader_buffer *buf,
198                                struct iris_state_ref *surf_state,
199                                isl_surf_usage_flags_t usage)
200{
201   struct pipe_context *ctx = &ice->ctx;
202   struct iris_screen *screen = (struct iris_screen *) ctx->screen;
203   bool ssbo = usage & ISL_SURF_USAGE_STORAGE_BIT;
204
205   void *map =
206      upload_state(ice->state.surface_uploader, surf_state,
207                   screen->isl_dev.ss.size, 64);
208   if (!unlikely(map)) {
209      surf_state->res = NULL;
210      return;
211   }
212
213   struct iris_resource *res = (void *) buf->buffer;
214   struct iris_bo *surf_bo = iris_resource_bo(surf_state->res);
215   surf_state->offset += iris_bo_offset_from_base_address(surf_bo);
216
217   const bool dataport = ssbo || !screen->compiler->indirect_ubos_use_sampler;
218
219   isl_buffer_fill_state(&screen->isl_dev, map,
220                         .address = res->bo->address + res->offset +
221                                    buf->buffer_offset,
222                         .size_B = buf->buffer_size - res->offset,
223                         .format = dataport ? ISL_FORMAT_RAW
224                                            : ISL_FORMAT_R32G32B32A32_FLOAT,
225                         .swizzle = ISL_SWIZZLE_IDENTITY,
226                         .stride_B = 1,
227                         .mocs = iris_mocs(res->bo, &screen->isl_dev, usage));
228}
229
230static nir_ssa_def *
231get_aoa_deref_offset(nir_builder *b,
232                     nir_deref_instr *deref,
233                     unsigned elem_size)
234{
235   unsigned array_size = elem_size;
236   nir_ssa_def *offset = nir_imm_int(b, 0);
237
238   while (deref->deref_type != nir_deref_type_var) {
239      assert(deref->deref_type == nir_deref_type_array);
240
241      /* This level's element size is the previous level's array size */
242      nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
243      assert(deref->arr.index.ssa);
244      offset = nir_iadd(b, offset,
245                           nir_imul(b, index, nir_imm_int(b, array_size)));
246
247      deref = nir_deref_instr_parent(deref);
248      assert(glsl_type_is_array(deref->type));
249      array_size *= glsl_get_length(deref->type);
250   }
251
252   /* Accessing an invalid surface index with the dataport can result in a
253    * hang.  According to the spec "if the index used to select an individual
254    * element is negative or greater than or equal to the size of the array,
255    * the results of the operation are undefined but may not lead to
256    * termination" -- which is one of the possible outcomes of the hang.
257    * Clamp the index to prevent access outside of the array bounds.
258    */
259   return nir_umin(b, offset, nir_imm_int(b, array_size - elem_size));
260}
261
262static void
263iris_lower_storage_image_derefs(nir_shader *nir)
264{
265   nir_function_impl *impl = nir_shader_get_entrypoint(nir);
266
267   nir_builder b;
268   nir_builder_init(&b, impl);
269
270   nir_foreach_block(block, impl) {
271      nir_foreach_instr_safe(instr, block) {
272         if (instr->type != nir_instr_type_intrinsic)
273            continue;
274
275         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
276         switch (intrin->intrinsic) {
277         case nir_intrinsic_image_deref_load:
278         case nir_intrinsic_image_deref_store:
279         case nir_intrinsic_image_deref_atomic_add:
280         case nir_intrinsic_image_deref_atomic_imin:
281         case nir_intrinsic_image_deref_atomic_umin:
282         case nir_intrinsic_image_deref_atomic_imax:
283         case nir_intrinsic_image_deref_atomic_umax:
284         case nir_intrinsic_image_deref_atomic_and:
285         case nir_intrinsic_image_deref_atomic_or:
286         case nir_intrinsic_image_deref_atomic_xor:
287         case nir_intrinsic_image_deref_atomic_exchange:
288         case nir_intrinsic_image_deref_atomic_comp_swap:
289         case nir_intrinsic_image_deref_size:
290         case nir_intrinsic_image_deref_samples:
291         case nir_intrinsic_image_deref_load_raw_intel:
292         case nir_intrinsic_image_deref_store_raw_intel: {
293            nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
294            nir_variable *var = nir_deref_instr_get_variable(deref);
295
296            b.cursor = nir_before_instr(&intrin->instr);
297            nir_ssa_def *index =
298               nir_iadd(&b, nir_imm_int(&b, var->data.driver_location),
299                            get_aoa_deref_offset(&b, deref, 1));
300            nir_rewrite_image_intrinsic(intrin, index, false);
301            break;
302         }
303
304         default:
305            break;
306         }
307      }
308   }
309}
310
311static bool
312iris_uses_image_atomic(const nir_shader *shader)
313{
314   nir_foreach_function(function, shader) {
315      if (function->impl == NULL)
316         continue;
317
318      nir_foreach_block(block, function->impl) {
319         nir_foreach_instr(instr, block) {
320            if (instr->type != nir_instr_type_intrinsic)
321               continue;
322
323            nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
324            switch (intrin->intrinsic) {
325            case nir_intrinsic_image_deref_atomic_add:
326            case nir_intrinsic_image_deref_atomic_imin:
327            case nir_intrinsic_image_deref_atomic_umin:
328            case nir_intrinsic_image_deref_atomic_imax:
329            case nir_intrinsic_image_deref_atomic_umax:
330            case nir_intrinsic_image_deref_atomic_and:
331            case nir_intrinsic_image_deref_atomic_or:
332            case nir_intrinsic_image_deref_atomic_xor:
333            case nir_intrinsic_image_deref_atomic_exchange:
334            case nir_intrinsic_image_deref_atomic_comp_swap:
335               unreachable("Should have been lowered in "
336                           "iris_lower_storage_image_derefs");
337
338            case nir_intrinsic_image_atomic_add:
339            case nir_intrinsic_image_atomic_imin:
340            case nir_intrinsic_image_atomic_umin:
341            case nir_intrinsic_image_atomic_imax:
342            case nir_intrinsic_image_atomic_umax:
343            case nir_intrinsic_image_atomic_and:
344            case nir_intrinsic_image_atomic_or:
345            case nir_intrinsic_image_atomic_xor:
346            case nir_intrinsic_image_atomic_exchange:
347            case nir_intrinsic_image_atomic_comp_swap:
348               return true;
349
350            default:
351               break;
352            }
353         }
354      }
355   }
356
357   return false;
358}
359
360/**
361 * Undo nir_lower_passthrough_edgeflags but keep the inputs_read flag.
362 */
363static bool
364iris_fix_edge_flags(nir_shader *nir)
365{
366   if (nir->info.stage != MESA_SHADER_VERTEX) {
367      nir_shader_preserve_all_metadata(nir);
368      return false;
369   }
370
371   nir_variable *var = nir_find_variable_with_location(nir, nir_var_shader_out,
372                                                       VARYING_SLOT_EDGE);
373   if (!var) {
374      nir_shader_preserve_all_metadata(nir);
375      return false;
376   }
377
378   var->data.mode = nir_var_shader_temp;
379   nir->info.outputs_written &= ~VARYING_BIT_EDGE;
380   nir->info.inputs_read &= ~VERT_BIT_EDGEFLAG;
381   nir_fixup_deref_modes(nir);
382
383   nir_foreach_function(f, nir) {
384      if (f->impl) {
385         nir_metadata_preserve(f->impl, nir_metadata_block_index |
386                                        nir_metadata_dominance |
387                                        nir_metadata_live_ssa_defs |
388                                        nir_metadata_loop_analysis);
389      } else {
390         nir_metadata_preserve(f->impl, nir_metadata_all);
391      }
392   }
393
394   return true;
395}
396
397/**
398 * Fix an uncompiled shader's stream output info.
399 *
400 * Core Gallium stores output->register_index as a "slot" number, where
401 * slots are assigned consecutively to all outputs in info->outputs_written.
402 * This naive packing of outputs doesn't work for us - we too have slots,
403 * but the layout is defined by the VUE map, which we won't have until we
404 * compile a specific shader variant.  So, we remap these and simply store
405 * VARYING_SLOT_* in our copy's output->register_index fields.
406 *
407 * We also fix up VARYING_SLOT_{LAYER,VIEWPORT,PSIZ} to select the Y/Z/W
408 * components of our VUE header.  See brw_vue_map.c for the layout.
409 */
410static void
411update_so_info(struct pipe_stream_output_info *so_info,
412               uint64_t outputs_written)
413{
414   uint8_t reverse_map[64] = {};
415   unsigned slot = 0;
416   while (outputs_written) {
417      reverse_map[slot++] = u_bit_scan64(&outputs_written);
418   }
419
420   for (unsigned i = 0; i < so_info->num_outputs; i++) {
421      struct pipe_stream_output *output = &so_info->output[i];
422
423      /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
424      output->register_index = reverse_map[output->register_index];
425
426      /* The VUE header contains three scalar fields packed together:
427       * - gl_PointSize is stored in VARYING_SLOT_PSIZ.w
428       * - gl_Layer is stored in VARYING_SLOT_PSIZ.y
429       * - gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z
430       */
431      switch (output->register_index) {
432      case VARYING_SLOT_LAYER:
433         assert(output->num_components == 1);
434         output->register_index = VARYING_SLOT_PSIZ;
435         output->start_component = 1;
436         break;
437      case VARYING_SLOT_VIEWPORT:
438         assert(output->num_components == 1);
439         output->register_index = VARYING_SLOT_PSIZ;
440         output->start_component = 2;
441         break;
442      case VARYING_SLOT_PSIZ:
443         assert(output->num_components == 1);
444         output->start_component = 3;
445         break;
446      }
447
448      //info->outputs_written |= 1ull << output->register_index;
449   }
450}
451
452static void
453setup_vec4_image_sysval(uint32_t *sysvals, uint32_t idx,
454                        unsigned offset, unsigned n)
455{
456   assert(offset % sizeof(uint32_t) == 0);
457
458   for (unsigned i = 0; i < n; ++i)
459      sysvals[i] = BRW_PARAM_IMAGE(idx, offset / sizeof(uint32_t) + i);
460
461   for (unsigned i = n; i < 4; ++i)
462      sysvals[i] = BRW_PARAM_BUILTIN_ZERO;
463}
464
465/**
466 * Associate NIR uniform variables with the prog_data->param[] mechanism
467 * used by the backend.  Also, decide which UBOs we'd like to push in an
468 * ideal situation (though the backend can reduce this).
469 */
470static void
471iris_setup_uniforms(const struct brw_compiler *compiler,
472                    void *mem_ctx,
473                    nir_shader *nir,
474                    struct brw_stage_prog_data *prog_data,
475                    unsigned kernel_input_size,
476                    enum brw_param_builtin **out_system_values,
477                    unsigned *out_num_system_values,
478                    unsigned *out_num_cbufs)
479{
480   UNUSED const struct intel_device_info *devinfo = compiler->devinfo;
481
482   unsigned system_values_start = ALIGN(kernel_input_size, sizeof(uint32_t));
483
484   const unsigned IRIS_MAX_SYSTEM_VALUES =
485      PIPE_MAX_SHADER_IMAGES * BRW_IMAGE_PARAM_SIZE;
486   enum brw_param_builtin *system_values =
487      rzalloc_array(mem_ctx, enum brw_param_builtin, IRIS_MAX_SYSTEM_VALUES);
488   unsigned num_system_values = 0;
489
490   unsigned patch_vert_idx = -1;
491   unsigned ucp_idx[IRIS_MAX_CLIP_PLANES];
492   unsigned img_idx[PIPE_MAX_SHADER_IMAGES];
493   unsigned variable_group_size_idx = -1;
494   unsigned work_dim_idx = -1;
495   memset(ucp_idx, -1, sizeof(ucp_idx));
496   memset(img_idx, -1, sizeof(img_idx));
497
498   nir_function_impl *impl = nir_shader_get_entrypoint(nir);
499
500   nir_builder b;
501   nir_builder_init(&b, impl);
502
503   b.cursor = nir_before_block(nir_start_block(impl));
504   nir_ssa_def *temp_ubo_name = nir_ssa_undef(&b, 1, 32);
505
506   /* Turn system value intrinsics into uniforms */
507   nir_foreach_block(block, impl) {
508      nir_foreach_instr_safe(instr, block) {
509         if (instr->type != nir_instr_type_intrinsic)
510            continue;
511
512         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
513         nir_ssa_def *offset;
514
515         switch (intrin->intrinsic) {
516         case nir_intrinsic_load_constant: {
517            unsigned load_size = intrin->dest.ssa.num_components *
518                                 intrin->dest.ssa.bit_size / 8;
519            unsigned load_align = intrin->dest.ssa.bit_size / 8;
520
521            /* This one is special because it reads from the shader constant
522             * data and not cbuf0 which gallium uploads for us.
523             */
524            b.cursor = nir_instr_remove(&intrin->instr);
525
526            nir_ssa_def *offset =
527               nir_iadd_imm(&b, nir_ssa_for_src(&b, intrin->src[0], 1),
528                                nir_intrinsic_base(intrin));
529
530            assert(load_size < b.shader->constant_data_size);
531            unsigned max_offset = b.shader->constant_data_size - load_size;
532            offset = nir_umin(&b, offset, nir_imm_int(&b, max_offset));
533
534            nir_ssa_def *const_data_base_addr = nir_pack_64_2x32_split(&b,
535               nir_load_reloc_const_intel(&b, BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW),
536               nir_load_reloc_const_intel(&b, BRW_SHADER_RELOC_CONST_DATA_ADDR_HIGH));
537
538            nir_ssa_def *data =
539               nir_load_global(&b, nir_iadd(&b, const_data_base_addr,
540                                                nir_u2u64(&b, offset)),
541                               load_align,
542                               intrin->dest.ssa.num_components,
543                               intrin->dest.ssa.bit_size);
544
545            nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
546                                     data);
547            continue;
548         }
549         case nir_intrinsic_load_user_clip_plane: {
550            unsigned ucp = nir_intrinsic_ucp_id(intrin);
551
552            if (ucp_idx[ucp] == -1) {
553               ucp_idx[ucp] = num_system_values;
554               num_system_values += 4;
555            }
556
557            for (int i = 0; i < 4; i++) {
558               system_values[ucp_idx[ucp] + i] =
559                  BRW_PARAM_BUILTIN_CLIP_PLANE(ucp, i);
560            }
561
562            b.cursor = nir_before_instr(instr);
563            offset = nir_imm_int(&b, system_values_start +
564                                     ucp_idx[ucp] * sizeof(uint32_t));
565            break;
566         }
567         case nir_intrinsic_load_patch_vertices_in:
568            if (patch_vert_idx == -1)
569               patch_vert_idx = num_system_values++;
570
571            system_values[patch_vert_idx] =
572               BRW_PARAM_BUILTIN_PATCH_VERTICES_IN;
573
574            b.cursor = nir_before_instr(instr);
575            offset = nir_imm_int(&b, system_values_start +
576                                     patch_vert_idx * sizeof(uint32_t));
577            break;
578         case nir_intrinsic_image_deref_load_param_intel: {
579            assert(devinfo->ver < 9);
580            nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
581            nir_variable *var = nir_deref_instr_get_variable(deref);
582
583            if (img_idx[var->data.binding] == -1) {
584               /* GL only allows arrays of arrays of images. */
585               assert(glsl_type_is_image(glsl_without_array(var->type)));
586               unsigned num_images = MAX2(1, glsl_get_aoa_size(var->type));
587
588               for (int i = 0; i < num_images; i++) {
589                  const unsigned img = var->data.binding + i;
590
591                  img_idx[img] = num_system_values;
592                  num_system_values += BRW_IMAGE_PARAM_SIZE;
593
594                  uint32_t *img_sv = &system_values[img_idx[img]];
595
596                  setup_vec4_image_sysval(
597                     img_sv + BRW_IMAGE_PARAM_OFFSET_OFFSET, img,
598                     offsetof(struct brw_image_param, offset), 2);
599                  setup_vec4_image_sysval(
600                     img_sv + BRW_IMAGE_PARAM_SIZE_OFFSET, img,
601                     offsetof(struct brw_image_param, size), 3);
602                  setup_vec4_image_sysval(
603                     img_sv + BRW_IMAGE_PARAM_STRIDE_OFFSET, img,
604                     offsetof(struct brw_image_param, stride), 4);
605                  setup_vec4_image_sysval(
606                     img_sv + BRW_IMAGE_PARAM_TILING_OFFSET, img,
607                     offsetof(struct brw_image_param, tiling), 3);
608                  setup_vec4_image_sysval(
609                     img_sv + BRW_IMAGE_PARAM_SWIZZLING_OFFSET, img,
610                     offsetof(struct brw_image_param, swizzling), 2);
611               }
612            }
613
614            b.cursor = nir_before_instr(instr);
615            offset = nir_iadd(&b,
616               get_aoa_deref_offset(&b, deref, BRW_IMAGE_PARAM_SIZE * 4),
617               nir_imm_int(&b, system_values_start +
618                               img_idx[var->data.binding] * 4 +
619                               nir_intrinsic_base(intrin) * 16));
620            break;
621         }
622         case nir_intrinsic_load_workgroup_size: {
623            assert(nir->info.workgroup_size_variable);
624            if (variable_group_size_idx == -1) {
625               variable_group_size_idx = num_system_values;
626               num_system_values += 3;
627               for (int i = 0; i < 3; i++) {
628                  system_values[variable_group_size_idx + i] =
629                     BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_X + i;
630               }
631            }
632
633            b.cursor = nir_before_instr(instr);
634            offset = nir_imm_int(&b, system_values_start +
635                                     variable_group_size_idx * sizeof(uint32_t));
636            break;
637         }
638         case nir_intrinsic_load_work_dim: {
639            if (work_dim_idx == -1) {
640               work_dim_idx = num_system_values++;
641               system_values[work_dim_idx] = BRW_PARAM_BUILTIN_WORK_DIM;
642            }
643            b.cursor = nir_before_instr(instr);
644            offset = nir_imm_int(&b, system_values_start +
645                                     work_dim_idx * sizeof(uint32_t));
646            break;
647         }
648         case nir_intrinsic_load_kernel_input: {
649            assert(nir_intrinsic_base(intrin) +
650                   nir_intrinsic_range(intrin) <= kernel_input_size);
651            b.cursor = nir_before_instr(instr);
652            offset = nir_iadd_imm(&b, intrin->src[0].ssa,
653                                      nir_intrinsic_base(intrin));
654            break;
655         }
656         default:
657            continue;
658         }
659
660         nir_ssa_def *load =
661            nir_load_ubo(&b, intrin->dest.ssa.num_components, intrin->dest.ssa.bit_size,
662                         temp_ubo_name, offset,
663                         .align_mul = 4,
664                         .align_offset = 0,
665                         .range_base = 0,
666                         .range = ~0);
667
668         nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
669                                  load);
670         nir_instr_remove(instr);
671      }
672   }
673
674   nir_validate_shader(nir, "before remapping");
675
676   /* Uniforms are stored in constant buffer 0, the
677    * user-facing UBOs are indexed by one.  So if any constant buffer is
678    * needed, the constant buffer 0 will be needed, so account for it.
679    */
680   unsigned num_cbufs = nir->info.num_ubos;
681   if (num_cbufs || nir->num_uniforms)
682      num_cbufs++;
683
684   /* Place the new params in a new cbuf. */
685   if (num_system_values > 0 || kernel_input_size > 0) {
686      unsigned sysval_cbuf_index = num_cbufs;
687      num_cbufs++;
688
689      system_values = reralloc(mem_ctx, system_values, enum brw_param_builtin,
690                               num_system_values);
691
692      nir_foreach_block(block, impl) {
693         nir_foreach_instr_safe(instr, block) {
694            if (instr->type != nir_instr_type_intrinsic)
695               continue;
696
697            nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr);
698
699            if (load->intrinsic != nir_intrinsic_load_ubo)
700               continue;
701
702            b.cursor = nir_before_instr(instr);
703
704            assert(load->src[0].is_ssa);
705
706            if (load->src[0].ssa == temp_ubo_name) {
707               nir_ssa_def *imm = nir_imm_int(&b, sysval_cbuf_index);
708               nir_instr_rewrite_src(instr, &load->src[0],
709                                     nir_src_for_ssa(imm));
710            }
711         }
712      }
713
714      /* We need to fold the new iadds for brw_nir_analyze_ubo_ranges */
715      nir_opt_constant_folding(nir);
716   } else {
717      ralloc_free(system_values);
718      system_values = NULL;
719   }
720
721   assert(num_cbufs < PIPE_MAX_CONSTANT_BUFFERS);
722   nir_validate_shader(nir, "after remap");
723
724   /* We don't use params[] but gallium leaves num_uniforms set.  We use this
725    * to detect when cbuf0 exists but we don't need it anymore when we get
726    * here.  Instead, zero it out so that the back-end doesn't get confused
727    * when nr_params * 4 != num_uniforms != nr_params * 4.
728    */
729   nir->num_uniforms = 0;
730
731   *out_system_values = system_values;
732   *out_num_system_values = num_system_values;
733   *out_num_cbufs = num_cbufs;
734}
735
736static const char *surface_group_names[] = {
737   [IRIS_SURFACE_GROUP_RENDER_TARGET]      = "render target",
738   [IRIS_SURFACE_GROUP_RENDER_TARGET_READ] = "non-coherent render target read",
739   [IRIS_SURFACE_GROUP_CS_WORK_GROUPS]     = "CS work groups",
740   [IRIS_SURFACE_GROUP_TEXTURE]            = "texture",
741   [IRIS_SURFACE_GROUP_UBO]                = "ubo",
742   [IRIS_SURFACE_GROUP_SSBO]               = "ssbo",
743   [IRIS_SURFACE_GROUP_IMAGE]              = "image",
744};
745
746static void
747iris_print_binding_table(FILE *fp, const char *name,
748                         const struct iris_binding_table *bt)
749{
750   STATIC_ASSERT(ARRAY_SIZE(surface_group_names) == IRIS_SURFACE_GROUP_COUNT);
751
752   uint32_t total = 0;
753   uint32_t compacted = 0;
754
755   for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
756      uint32_t size = bt->sizes[i];
757      total += size;
758      if (size)
759         compacted += util_bitcount64(bt->used_mask[i]);
760   }
761
762   if (total == 0) {
763      fprintf(fp, "Binding table for %s is empty\n\n", name);
764      return;
765   }
766
767   if (total != compacted) {
768      fprintf(fp, "Binding table for %s "
769              "(compacted to %u entries from %u entries)\n",
770              name, compacted, total);
771   } else {
772      fprintf(fp, "Binding table for %s (%u entries)\n", name, total);
773   }
774
775   uint32_t entry = 0;
776   for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
777      uint64_t mask = bt->used_mask[i];
778      while (mask) {
779         int index = u_bit_scan64(&mask);
780         fprintf(fp, "  [%u] %s #%d\n", entry++, surface_group_names[i], index);
781      }
782   }
783   fprintf(fp, "\n");
784}
785
786enum {
787   /* Max elements in a surface group. */
788   SURFACE_GROUP_MAX_ELEMENTS = 64,
789};
790
791/**
792 * Map a <group, index> pair to a binding table index.
793 *
794 * For example: <UBO, 5> => binding table index 12
795 */
796uint32_t
797iris_group_index_to_bti(const struct iris_binding_table *bt,
798                        enum iris_surface_group group, uint32_t index)
799{
800   assert(index < bt->sizes[group]);
801   uint64_t mask = bt->used_mask[group];
802   uint64_t bit = 1ull << index;
803   if (bit & mask) {
804      return bt->offsets[group] + util_bitcount64((bit - 1) & mask);
805   } else {
806      return IRIS_SURFACE_NOT_USED;
807   }
808}
809
810/**
811 * Map a binding table index back to a <group, index> pair.
812 *
813 * For example: binding table index 12 => <UBO, 5>
814 */
815uint32_t
816iris_bti_to_group_index(const struct iris_binding_table *bt,
817                        enum iris_surface_group group, uint32_t bti)
818{
819   uint64_t used_mask = bt->used_mask[group];
820   assert(bti >= bt->offsets[group]);
821
822   uint32_t c = bti - bt->offsets[group];
823   while (used_mask) {
824      int i = u_bit_scan64(&used_mask);
825      if (c == 0)
826         return i;
827      c--;
828   }
829
830   return IRIS_SURFACE_NOT_USED;
831}
832
833static void
834rewrite_src_with_bti(nir_builder *b, struct iris_binding_table *bt,
835                     nir_instr *instr, nir_src *src,
836                     enum iris_surface_group group)
837{
838   assert(bt->sizes[group] > 0);
839
840   b->cursor = nir_before_instr(instr);
841   nir_ssa_def *bti;
842   if (nir_src_is_const(*src)) {
843      uint32_t index = nir_src_as_uint(*src);
844      bti = nir_imm_intN_t(b, iris_group_index_to_bti(bt, group, index),
845                           src->ssa->bit_size);
846   } else {
847      /* Indirect usage makes all the surfaces of the group to be available,
848       * so we can just add the base.
849       */
850      assert(bt->used_mask[group] == BITFIELD64_MASK(bt->sizes[group]));
851      bti = nir_iadd_imm(b, src->ssa, bt->offsets[group]);
852   }
853   nir_instr_rewrite_src(instr, src, nir_src_for_ssa(bti));
854}
855
856static void
857mark_used_with_src(struct iris_binding_table *bt, nir_src *src,
858                   enum iris_surface_group group)
859{
860   assert(bt->sizes[group] > 0);
861
862   if (nir_src_is_const(*src)) {
863      uint64_t index = nir_src_as_uint(*src);
864      assert(index < bt->sizes[group]);
865      bt->used_mask[group] |= 1ull << index;
866   } else {
867      /* There's an indirect usage, we need all the surfaces. */
868      bt->used_mask[group] = BITFIELD64_MASK(bt->sizes[group]);
869   }
870}
871
872static bool
873skip_compacting_binding_tables(void)
874{
875   static int skip = -1;
876   if (skip < 0)
877      skip = env_var_as_boolean("INTEL_DISABLE_COMPACT_BINDING_TABLE", false);
878   return skip;
879}
880
881/**
882 * Set up the binding table indices and apply to the shader.
883 */
884static void
885iris_setup_binding_table(const struct intel_device_info *devinfo,
886                         struct nir_shader *nir,
887                         struct iris_binding_table *bt,
888                         unsigned num_render_targets,
889                         unsigned num_system_values,
890                         unsigned num_cbufs)
891{
892   const struct shader_info *info = &nir->info;
893
894   memset(bt, 0, sizeof(*bt));
895
896   /* Set the sizes for each surface group.  For some groups, we already know
897    * upfront how many will be used, so mark them.
898    */
899   if (info->stage == MESA_SHADER_FRAGMENT) {
900      bt->sizes[IRIS_SURFACE_GROUP_RENDER_TARGET] = num_render_targets;
901      /* All render targets used. */
902      bt->used_mask[IRIS_SURFACE_GROUP_RENDER_TARGET] =
903         BITFIELD64_MASK(num_render_targets);
904
905      /* Setup render target read surface group in order to support non-coherent
906       * framebuffer fetch on Gfx8
907       */
908      if (devinfo->ver == 8 && info->outputs_read) {
909         bt->sizes[IRIS_SURFACE_GROUP_RENDER_TARGET_READ] = num_render_targets;
910         bt->used_mask[IRIS_SURFACE_GROUP_RENDER_TARGET_READ] =
911            BITFIELD64_MASK(num_render_targets);
912      }
913   } else if (info->stage == MESA_SHADER_COMPUTE) {
914      bt->sizes[IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = 1;
915   }
916
917   bt->sizes[IRIS_SURFACE_GROUP_TEXTURE] = BITSET_LAST_BIT(info->textures_used);
918   bt->used_mask[IRIS_SURFACE_GROUP_TEXTURE] = info->textures_used[0];
919
920   bt->sizes[IRIS_SURFACE_GROUP_IMAGE] = info->num_images;
921
922   /* Allocate an extra slot in the UBO section for NIR constants.
923    * Binding table compaction will remove it if unnecessary.
924    *
925    * We don't include them in iris_compiled_shader::num_cbufs because
926    * they are uploaded separately from shs->constbuf[], but from a shader
927    * point of view, they're another UBO (at the end of the section).
928    */
929   bt->sizes[IRIS_SURFACE_GROUP_UBO] = num_cbufs + 1;
930
931   bt->sizes[IRIS_SURFACE_GROUP_SSBO] = info->num_ssbos;
932
933   for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++)
934      assert(bt->sizes[i] <= SURFACE_GROUP_MAX_ELEMENTS);
935
936   /* Mark surfaces used for the cases we don't have the information available
937    * upfront.
938    */
939   nir_function_impl *impl = nir_shader_get_entrypoint(nir);
940   nir_foreach_block (block, impl) {
941      nir_foreach_instr (instr, block) {
942         if (instr->type != nir_instr_type_intrinsic)
943            continue;
944
945         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
946         switch (intrin->intrinsic) {
947         case nir_intrinsic_load_num_workgroups:
948            bt->used_mask[IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = 1;
949            break;
950
951         case nir_intrinsic_load_output:
952            if (devinfo->ver == 8) {
953               mark_used_with_src(bt, &intrin->src[0],
954                                  IRIS_SURFACE_GROUP_RENDER_TARGET_READ);
955            }
956            break;
957
958         case nir_intrinsic_image_size:
959         case nir_intrinsic_image_load:
960         case nir_intrinsic_image_store:
961         case nir_intrinsic_image_atomic_add:
962         case nir_intrinsic_image_atomic_imin:
963         case nir_intrinsic_image_atomic_umin:
964         case nir_intrinsic_image_atomic_imax:
965         case nir_intrinsic_image_atomic_umax:
966         case nir_intrinsic_image_atomic_and:
967         case nir_intrinsic_image_atomic_or:
968         case nir_intrinsic_image_atomic_xor:
969         case nir_intrinsic_image_atomic_exchange:
970         case nir_intrinsic_image_atomic_comp_swap:
971         case nir_intrinsic_image_load_raw_intel:
972         case nir_intrinsic_image_store_raw_intel:
973            mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_IMAGE);
974            break;
975
976         case nir_intrinsic_load_ubo:
977            mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_UBO);
978            break;
979
980         case nir_intrinsic_store_ssbo:
981            mark_used_with_src(bt, &intrin->src[1], IRIS_SURFACE_GROUP_SSBO);
982            break;
983
984         case nir_intrinsic_get_ssbo_size:
985         case nir_intrinsic_ssbo_atomic_add:
986         case nir_intrinsic_ssbo_atomic_imin:
987         case nir_intrinsic_ssbo_atomic_umin:
988         case nir_intrinsic_ssbo_atomic_imax:
989         case nir_intrinsic_ssbo_atomic_umax:
990         case nir_intrinsic_ssbo_atomic_and:
991         case nir_intrinsic_ssbo_atomic_or:
992         case nir_intrinsic_ssbo_atomic_xor:
993         case nir_intrinsic_ssbo_atomic_exchange:
994         case nir_intrinsic_ssbo_atomic_comp_swap:
995         case nir_intrinsic_ssbo_atomic_fmin:
996         case nir_intrinsic_ssbo_atomic_fmax:
997         case nir_intrinsic_ssbo_atomic_fcomp_swap:
998         case nir_intrinsic_load_ssbo:
999            mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_SSBO);
1000            break;
1001
1002         default:
1003            break;
1004         }
1005      }
1006   }
1007
1008   /* When disable we just mark everything as used. */
1009   if (unlikely(skip_compacting_binding_tables())) {
1010      for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++)
1011         bt->used_mask[i] = BITFIELD64_MASK(bt->sizes[i]);
1012   }
1013
1014   /* Calculate the offsets and the binding table size based on the used
1015    * surfaces.  After this point, the functions to go between "group indices"
1016    * and binding table indices can be used.
1017    */
1018   uint32_t next = 0;
1019   for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
1020      if (bt->used_mask[i] != 0) {
1021         bt->offsets[i] = next;
1022         next += util_bitcount64(bt->used_mask[i]);
1023      }
1024   }
1025   bt->size_bytes = next * 4;
1026
1027   if (INTEL_DEBUG(DEBUG_BT)) {
1028      iris_print_binding_table(stderr, gl_shader_stage_name(info->stage), bt);
1029   }
1030
1031   /* Apply the binding table indices.  The backend compiler is not expected
1032    * to change those, as we haven't set any of the *_start entries in brw
1033    * binding_table.
1034    */
1035   nir_builder b;
1036   nir_builder_init(&b, impl);
1037
1038   nir_foreach_block (block, impl) {
1039      nir_foreach_instr (instr, block) {
1040         if (instr->type == nir_instr_type_tex) {
1041            nir_tex_instr *tex = nir_instr_as_tex(instr);
1042            tex->texture_index =
1043               iris_group_index_to_bti(bt, IRIS_SURFACE_GROUP_TEXTURE,
1044                                       tex->texture_index);
1045            continue;
1046         }
1047
1048         if (instr->type != nir_instr_type_intrinsic)
1049            continue;
1050
1051         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1052         switch (intrin->intrinsic) {
1053         case nir_intrinsic_image_size:
1054         case nir_intrinsic_image_load:
1055         case nir_intrinsic_image_store:
1056         case nir_intrinsic_image_atomic_add:
1057         case nir_intrinsic_image_atomic_imin:
1058         case nir_intrinsic_image_atomic_umin:
1059         case nir_intrinsic_image_atomic_imax:
1060         case nir_intrinsic_image_atomic_umax:
1061         case nir_intrinsic_image_atomic_and:
1062         case nir_intrinsic_image_atomic_or:
1063         case nir_intrinsic_image_atomic_xor:
1064         case nir_intrinsic_image_atomic_exchange:
1065         case nir_intrinsic_image_atomic_comp_swap:
1066         case nir_intrinsic_image_load_raw_intel:
1067         case nir_intrinsic_image_store_raw_intel:
1068            rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1069                                 IRIS_SURFACE_GROUP_IMAGE);
1070            break;
1071
1072         case nir_intrinsic_load_ubo:
1073            rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1074                                 IRIS_SURFACE_GROUP_UBO);
1075            break;
1076
1077         case nir_intrinsic_store_ssbo:
1078            rewrite_src_with_bti(&b, bt, instr, &intrin->src[1],
1079                                 IRIS_SURFACE_GROUP_SSBO);
1080            break;
1081
1082         case nir_intrinsic_load_output:
1083            if (devinfo->ver == 8) {
1084               rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1085                                    IRIS_SURFACE_GROUP_RENDER_TARGET_READ);
1086            }
1087            break;
1088
1089         case nir_intrinsic_get_ssbo_size:
1090         case nir_intrinsic_ssbo_atomic_add:
1091         case nir_intrinsic_ssbo_atomic_imin:
1092         case nir_intrinsic_ssbo_atomic_umin:
1093         case nir_intrinsic_ssbo_atomic_imax:
1094         case nir_intrinsic_ssbo_atomic_umax:
1095         case nir_intrinsic_ssbo_atomic_and:
1096         case nir_intrinsic_ssbo_atomic_or:
1097         case nir_intrinsic_ssbo_atomic_xor:
1098         case nir_intrinsic_ssbo_atomic_exchange:
1099         case nir_intrinsic_ssbo_atomic_comp_swap:
1100         case nir_intrinsic_ssbo_atomic_fmin:
1101         case nir_intrinsic_ssbo_atomic_fmax:
1102         case nir_intrinsic_ssbo_atomic_fcomp_swap:
1103         case nir_intrinsic_load_ssbo:
1104            rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1105                                 IRIS_SURFACE_GROUP_SSBO);
1106            break;
1107
1108         default:
1109            break;
1110         }
1111      }
1112   }
1113}
1114
1115static void
1116iris_debug_recompile(struct iris_screen *screen,
1117                     struct util_debug_callback *dbg,
1118                     struct iris_uncompiled_shader *ish,
1119                     const struct brw_base_prog_key *key)
1120{
1121   if (!ish || list_is_empty(&ish->variants)
1122            || list_is_singular(&ish->variants))
1123      return;
1124
1125   const struct brw_compiler *c = screen->compiler;
1126   const struct shader_info *info = &ish->nir->info;
1127
1128   brw_shader_perf_log(c, dbg, "Recompiling %s shader for program %s: %s\n",
1129                       _mesa_shader_stage_to_string(info->stage),
1130                       info->name ? info->name : "(no identifier)",
1131                       info->label ? info->label : "");
1132
1133   struct iris_compiled_shader *shader =
1134      list_first_entry(&ish->variants, struct iris_compiled_shader, link);
1135   const void *old_iris_key = &shader->key;
1136
1137   union brw_any_prog_key old_key;
1138
1139   switch (info->stage) {
1140   case MESA_SHADER_VERTEX:
1141      old_key.vs = iris_to_brw_vs_key(screen, old_iris_key);
1142      break;
1143   case MESA_SHADER_TESS_CTRL:
1144      old_key.tcs = iris_to_brw_tcs_key(screen, old_iris_key);
1145      break;
1146   case MESA_SHADER_TESS_EVAL:
1147      old_key.tes = iris_to_brw_tes_key(screen, old_iris_key);
1148      break;
1149   case MESA_SHADER_GEOMETRY:
1150      old_key.gs = iris_to_brw_gs_key(screen, old_iris_key);
1151      break;
1152   case MESA_SHADER_FRAGMENT:
1153      old_key.wm = iris_to_brw_fs_key(screen, old_iris_key);
1154      break;
1155   case MESA_SHADER_COMPUTE:
1156      old_key.cs = iris_to_brw_cs_key(screen, old_iris_key);
1157      break;
1158   default:
1159      unreachable("invalid shader stage");
1160   }
1161
1162   brw_debug_key_recompile(c, dbg, info->stage, &old_key.base, key);
1163}
1164
1165static void
1166check_urb_size(struct iris_context *ice,
1167               unsigned needed_size,
1168               gl_shader_stage stage)
1169{
1170   unsigned last_allocated_size = ice->shaders.urb.size[stage];
1171
1172   /* If the last URB allocation wasn't large enough for our needs,
1173    * flag it as needing to be reconfigured.  Otherwise, we can use
1174    * the existing config.  However, if the URB is constrained, and
1175    * we can shrink our size for this stage, we may be able to gain
1176    * extra concurrency by reconfiguring it to be smaller.  Do so.
1177    */
1178   if (last_allocated_size < needed_size ||
1179       (ice->shaders.urb.constrained && last_allocated_size > needed_size)) {
1180      ice->state.dirty |= IRIS_DIRTY_URB;
1181   }
1182}
1183
1184/**
1185 * Get the shader for the last enabled geometry stage.
1186 *
1187 * This stage is the one which will feed stream output and the rasterizer.
1188 */
1189static gl_shader_stage
1190last_vue_stage(struct iris_context *ice)
1191{
1192   if (ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])
1193      return MESA_SHADER_GEOMETRY;
1194
1195   if (ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL])
1196      return MESA_SHADER_TESS_EVAL;
1197
1198   return MESA_SHADER_VERTEX;
1199}
1200
1201/**
1202 * \param added  Set to \c true if the variant was added to the list (i.e., a
1203 *               variant matching \c key was not found).  Set to \c false
1204 *               otherwise.
1205 */
1206static inline struct iris_compiled_shader *
1207find_or_add_variant(const struct iris_screen *screen,
1208                    struct iris_uncompiled_shader *ish,
1209                    enum iris_program_cache_id cache_id,
1210                    const void *key, unsigned key_size,
1211                    bool *added)
1212{
1213   struct list_head *start = ish->variants.next;
1214
1215   *added = false;
1216
1217   if (screen->precompile) {
1218      /* Check the first list entry.  There will always be at least one
1219       * variant in the list (most likely the precompile variant), and
1220       * other contexts only append new variants, so we can safely check
1221       * it without locking, saving that cost in the common case.
1222       */
1223      struct iris_compiled_shader *first =
1224         list_first_entry(&ish->variants, struct iris_compiled_shader, link);
1225
1226      if (memcmp(&first->key, key, key_size) == 0) {
1227         util_queue_fence_wait(&first->ready);
1228         return first;
1229      }
1230
1231      /* Skip this one in the loop below */
1232      start = first->link.next;
1233   }
1234
1235   struct iris_compiled_shader *variant = NULL;
1236
1237   /* If it doesn't match, we have to walk the list; other contexts may be
1238    * concurrently appending shaders to it, so we need to lock here.
1239    */
1240   simple_mtx_lock(&ish->lock);
1241
1242   list_for_each_entry_from(struct iris_compiled_shader, v, start,
1243                            &ish->variants, link) {
1244      if (memcmp(&v->key, key, key_size) == 0) {
1245         variant = v;
1246         break;
1247      }
1248   }
1249
1250   if (variant == NULL) {
1251      variant = iris_create_shader_variant(screen, NULL, cache_id,
1252                                           key_size, key);
1253
1254      /* Append our new variant to the shader's variant list. */
1255      list_addtail(&variant->link, &ish->variants);
1256      *added = true;
1257
1258      simple_mtx_unlock(&ish->lock);
1259   } else {
1260      simple_mtx_unlock(&ish->lock);
1261
1262      util_queue_fence_wait(&variant->ready);
1263   }
1264
1265   return variant;
1266}
1267
1268static void
1269iris_threaded_compile_job_delete(void *_job, UNUSED void *_gdata,
1270                                 UNUSED int thread_index)
1271{
1272   free(_job);
1273}
1274
1275static void
1276iris_schedule_compile(struct iris_screen *screen,
1277                      struct util_queue_fence *ready_fence,
1278                      struct util_debug_callback *dbg,
1279                      struct iris_threaded_compile_job *job,
1280                      util_queue_execute_func execute)
1281
1282{
1283   struct util_async_debug_callback async_debug;
1284
1285   if (dbg) {
1286      u_async_debug_init(&async_debug);
1287      job->dbg = &async_debug.base;
1288   }
1289
1290   util_queue_add_job(&screen->shader_compiler_queue, job, ready_fence, execute,
1291                      iris_threaded_compile_job_delete, 0);
1292
1293   if (screen->driconf.sync_compile || dbg)
1294      util_queue_fence_wait(ready_fence);
1295
1296   if (dbg) {
1297      u_async_debug_drain(&async_debug, dbg);
1298      u_async_debug_cleanup(&async_debug);
1299   }
1300}
1301
1302/**
1303 * Compile a vertex shader, and upload the assembly.
1304 */
1305static void
1306iris_compile_vs(struct iris_screen *screen,
1307                struct u_upload_mgr *uploader,
1308                struct util_debug_callback *dbg,
1309                struct iris_uncompiled_shader *ish,
1310                struct iris_compiled_shader *shader)
1311{
1312   const struct brw_compiler *compiler = screen->compiler;
1313   const struct intel_device_info *devinfo = &screen->devinfo;
1314   void *mem_ctx = ralloc_context(NULL);
1315   struct brw_vs_prog_data *vs_prog_data =
1316      rzalloc(mem_ctx, struct brw_vs_prog_data);
1317   struct brw_vue_prog_data *vue_prog_data = &vs_prog_data->base;
1318   struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
1319   enum brw_param_builtin *system_values;
1320   unsigned num_system_values;
1321   unsigned num_cbufs;
1322
1323   nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1324   const struct iris_vs_prog_key *const key = &shader->key.vs;
1325
1326   if (key->vue.nr_userclip_plane_consts) {
1327      nir_function_impl *impl = nir_shader_get_entrypoint(nir);
1328      nir_lower_clip_vs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,
1329                        true, false, NULL);
1330      nir_lower_io_to_temporaries(nir, impl, true, false);
1331      nir_lower_global_vars_to_local(nir);
1332      nir_lower_vars_to_ssa(nir);
1333      nir_shader_gather_info(nir, impl);
1334   }
1335
1336   prog_data->use_alt_mode = nir->info.use_legacy_math_rules;
1337
1338   iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,
1339                       &num_system_values, &num_cbufs);
1340
1341   struct iris_binding_table bt;
1342   iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
1343                            num_system_values, num_cbufs);
1344
1345   brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
1346
1347   brw_compute_vue_map(devinfo,
1348                       &vue_prog_data->vue_map, nir->info.outputs_written,
1349                       nir->info.separate_shader, /* pos_slots */ 1);
1350
1351   struct brw_vs_prog_key brw_key = iris_to_brw_vs_key(screen, key);
1352
1353   struct brw_compile_vs_params params = {
1354      .nir = nir,
1355      .key = &brw_key,
1356      .prog_data = vs_prog_data,
1357      .log_data = dbg,
1358   };
1359
1360   const unsigned *program = brw_compile_vs(compiler, mem_ctx, &params);
1361   if (program == NULL) {
1362      dbg_printf("Failed to compile vertex shader: %s\n", params.error_str);
1363      ralloc_free(mem_ctx);
1364
1365      shader->compilation_failed = true;
1366      util_queue_fence_signal(&shader->ready);
1367
1368      return;
1369   }
1370
1371   shader->compilation_failed = false;
1372
1373   iris_debug_recompile(screen, dbg, ish, &brw_key.base);
1374
1375   uint32_t *so_decls =
1376      screen->vtbl.create_so_decl_list(&ish->stream_output,
1377                                    &vue_prog_data->vue_map);
1378
1379   iris_finalize_program(shader, prog_data, so_decls, system_values,
1380                         num_system_values, 0, num_cbufs, &bt);
1381
1382   iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_VS,
1383                      sizeof(*key), key, program);
1384
1385   iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1386
1387   ralloc_free(mem_ctx);
1388}
1389
1390/**
1391 * Update the current vertex shader variant.
1392 *
1393 * Fill out the key, look in the cache, compile and bind if needed.
1394 */
1395static void
1396iris_update_compiled_vs(struct iris_context *ice)
1397{
1398   struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1399   struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_VERTEX];
1400   struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
1401   struct iris_uncompiled_shader *ish =
1402      ice->shaders.uncompiled[MESA_SHADER_VERTEX];
1403
1404   struct iris_vs_prog_key key = { KEY_INIT(vue.base) };
1405   screen->vtbl.populate_vs_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
1406
1407   struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_VS];
1408   bool added;
1409   struct iris_compiled_shader *shader =
1410      find_or_add_variant(screen, ish, IRIS_CACHE_VS, &key, sizeof(key), &added);
1411
1412   if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
1413                                          &key, sizeof(key))) {
1414      iris_compile_vs(screen, uploader, &ice->dbg, ish, shader);
1415   }
1416
1417   if (shader->compilation_failed)
1418      shader = NULL;
1419
1420   if (old != shader) {
1421      iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_VERTEX],
1422                                    shader);
1423      ice->state.dirty |= IRIS_DIRTY_VF_SGVS;
1424      ice->state.stage_dirty |= IRIS_STAGE_DIRTY_VS |
1425                                IRIS_STAGE_DIRTY_BINDINGS_VS |
1426                                IRIS_STAGE_DIRTY_CONSTANTS_VS;
1427      shs->sysvals_need_upload = true;
1428
1429      unsigned urb_entry_size = shader ?
1430         ((struct brw_vue_prog_data *) shader->prog_data)->urb_entry_size : 0;
1431      check_urb_size(ice, urb_entry_size, MESA_SHADER_VERTEX);
1432   }
1433}
1434
1435/**
1436 * Get the shader_info for a given stage, or NULL if the stage is disabled.
1437 */
1438const struct shader_info *
1439iris_get_shader_info(const struct iris_context *ice, gl_shader_stage stage)
1440{
1441   const struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[stage];
1442
1443   if (!ish)
1444      return NULL;
1445
1446   const nir_shader *nir = ish->nir;
1447   return &nir->info;
1448}
1449
1450/**
1451 * Get the union of TCS output and TES input slots.
1452 *
1453 * TCS and TES need to agree on a common URB entry layout.  In particular,
1454 * the data for all patch vertices is stored in a single URB entry (unlike
1455 * GS which has one entry per input vertex).  This means that per-vertex
1456 * array indexing needs a stride.
1457 *
1458 * SSO requires locations to match, but doesn't require the number of
1459 * outputs/inputs to match (in fact, the TCS often has extra outputs).
1460 * So, we need to take the extra step of unifying these on the fly.
1461 */
1462static void
1463get_unified_tess_slots(const struct iris_context *ice,
1464                       uint64_t *per_vertex_slots,
1465                       uint32_t *per_patch_slots)
1466{
1467   const struct shader_info *tcs =
1468      iris_get_shader_info(ice, MESA_SHADER_TESS_CTRL);
1469   const struct shader_info *tes =
1470      iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
1471
1472   *per_vertex_slots = tes->inputs_read;
1473   *per_patch_slots = tes->patch_inputs_read;
1474
1475   if (tcs) {
1476      *per_vertex_slots |= tcs->outputs_written;
1477      *per_patch_slots |= tcs->patch_outputs_written;
1478   }
1479}
1480
1481/**
1482 * Compile a tessellation control shader, and upload the assembly.
1483 */
1484static void
1485iris_compile_tcs(struct iris_screen *screen,
1486                 struct hash_table *passthrough_ht,
1487                 struct u_upload_mgr *uploader,
1488                 struct util_debug_callback *dbg,
1489                 struct iris_uncompiled_shader *ish,
1490                 struct iris_compiled_shader *shader)
1491{
1492   const struct brw_compiler *compiler = screen->compiler;
1493   const struct nir_shader_compiler_options *options =
1494      compiler->nir_options[MESA_SHADER_TESS_CTRL];
1495   void *mem_ctx = ralloc_context(NULL);
1496   struct brw_tcs_prog_data *tcs_prog_data =
1497      rzalloc(mem_ctx, struct brw_tcs_prog_data);
1498   struct brw_vue_prog_data *vue_prog_data = &tcs_prog_data->base;
1499   struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
1500   const struct intel_device_info *devinfo = &screen->devinfo;
1501   enum brw_param_builtin *system_values = NULL;
1502   unsigned num_system_values = 0;
1503   unsigned num_cbufs = 0;
1504
1505   nir_shader *nir;
1506
1507   struct iris_binding_table bt;
1508
1509   const struct iris_tcs_prog_key *const key = &shader->key.tcs;
1510   struct brw_tcs_prog_key brw_key = iris_to_brw_tcs_key(screen, key);
1511
1512   if (ish) {
1513      nir = nir_shader_clone(mem_ctx, ish->nir);
1514
1515      iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,
1516                          &num_system_values, &num_cbufs);
1517      iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
1518                               num_system_values, num_cbufs);
1519      brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
1520   } else {
1521      nir =
1522         brw_nir_create_passthrough_tcs(mem_ctx, compiler, options, &brw_key);
1523
1524      /* Reserve space for passing the default tess levels as constants. */
1525      num_cbufs = 1;
1526      num_system_values = 8;
1527      system_values =
1528         rzalloc_array(mem_ctx, enum brw_param_builtin, num_system_values);
1529      prog_data->param = rzalloc_array(mem_ctx, uint32_t, num_system_values);
1530      prog_data->nr_params = num_system_values;
1531
1532      if (key->_tes_primitive_mode == TESS_PRIMITIVE_QUADS) {
1533         for (int i = 0; i < 4; i++)
1534            system_values[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;
1535
1536         system_values[3] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X;
1537         system_values[2] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y;
1538      } else if (key->_tes_primitive_mode == TESS_PRIMITIVE_TRIANGLES) {
1539         for (int i = 0; i < 3; i++)
1540            system_values[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;
1541
1542         system_values[4] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X;
1543      } else {
1544         assert(key->_tes_primitive_mode == TESS_PRIMITIVE_ISOLINES);
1545         system_values[7] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_Y;
1546         system_values[6] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X;
1547      }
1548
1549      /* Manually setup the TCS binding table. */
1550      memset(&bt, 0, sizeof(bt));
1551      bt.sizes[IRIS_SURFACE_GROUP_UBO] = 1;
1552      bt.used_mask[IRIS_SURFACE_GROUP_UBO] = 1;
1553      bt.size_bytes = 4;
1554
1555      prog_data->ubo_ranges[0].length = 1;
1556   }
1557
1558   struct brw_compile_tcs_params params = {
1559      .nir = nir,
1560      .key = &brw_key,
1561      .prog_data = tcs_prog_data,
1562      .log_data = dbg,
1563   };
1564
1565   const unsigned *program = brw_compile_tcs(compiler, mem_ctx, &params);
1566   if (program == NULL) {
1567      dbg_printf("Failed to compile control shader: %s\n", params.error_str);
1568      ralloc_free(mem_ctx);
1569
1570      shader->compilation_failed = true;
1571      util_queue_fence_signal(&shader->ready);
1572
1573      return;
1574   }
1575
1576   shader->compilation_failed = false;
1577
1578   iris_debug_recompile(screen, dbg, ish, &brw_key.base);
1579
1580   iris_finalize_program(shader, prog_data, NULL, system_values,
1581                         num_system_values, 0, num_cbufs, &bt);
1582
1583   iris_upload_shader(screen, ish, shader, passthrough_ht, uploader,
1584                      IRIS_CACHE_TCS, sizeof(*key), key, program);
1585
1586   if (ish)
1587      iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1588
1589   ralloc_free(mem_ctx);
1590}
1591
1592/**
1593 * Update the current tessellation control shader variant.
1594 *
1595 * Fill out the key, look in the cache, compile and bind if needed.
1596 */
1597static void
1598iris_update_compiled_tcs(struct iris_context *ice)
1599{
1600   struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_CTRL];
1601   struct iris_uncompiled_shader *tcs =
1602      ice->shaders.uncompiled[MESA_SHADER_TESS_CTRL];
1603   struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1604   struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
1605   const struct brw_compiler *compiler = screen->compiler;
1606   const struct intel_device_info *devinfo = &screen->devinfo;
1607
1608   const struct shader_info *tes_info =
1609      iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
1610   struct iris_tcs_prog_key key = {
1611      .vue.base.program_string_id = tcs ? tcs->program_id : 0,
1612      ._tes_primitive_mode = tes_info->tess._primitive_mode,
1613      .input_vertices =
1614         !tcs || compiler->use_tcs_8_patch ? ice->state.vertices_per_patch : 0,
1615      .quads_workaround = devinfo->ver < 9 &&
1616                          tes_info->tess._primitive_mode == TESS_PRIMITIVE_QUADS &&
1617                          tes_info->tess.spacing == TESS_SPACING_EQUAL,
1618   };
1619   get_unified_tess_slots(ice, &key.outputs_written,
1620                          &key.patch_outputs_written);
1621   screen->vtbl.populate_tcs_key(ice, &key);
1622
1623   struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TCS];
1624   struct iris_compiled_shader *shader;
1625   bool added = false;
1626
1627   if (tcs != NULL) {
1628      shader = find_or_add_variant(screen, tcs, IRIS_CACHE_TCS, &key,
1629                                   sizeof(key), &added);
1630   } else {
1631      /* Look for and possibly create a passthrough TCS */
1632      shader = iris_find_cached_shader(ice, IRIS_CACHE_TCS, sizeof(key), &key);
1633
1634
1635      if (shader == NULL) {
1636         shader = iris_create_shader_variant(screen, ice->shaders.cache,
1637                                             IRIS_CACHE_TCS, sizeof(key), &key);
1638         added = true;
1639      }
1640
1641   }
1642
1643   /* If the shader was not found in (whichever cache), call iris_compile_tcs
1644    * if either ish is NULL or the shader could not be found in the disk
1645    * cache.
1646    */
1647   if (added &&
1648       (tcs == NULL || !iris_disk_cache_retrieve(screen, uploader, tcs, shader,
1649                                                 &key, sizeof(key)))) {
1650      iris_compile_tcs(screen, ice->shaders.cache, uploader, &ice->dbg, tcs,
1651                       shader);
1652   }
1653
1654   if (shader->compilation_failed)
1655      shader = NULL;
1656
1657   if (old != shader) {
1658      iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_CTRL],
1659                                    shader);
1660      ice->state.stage_dirty |= IRIS_STAGE_DIRTY_TCS |
1661                                IRIS_STAGE_DIRTY_BINDINGS_TCS |
1662                                IRIS_STAGE_DIRTY_CONSTANTS_TCS;
1663      shs->sysvals_need_upload = true;
1664
1665      unsigned urb_entry_size = shader ?
1666         ((struct brw_vue_prog_data *) shader->prog_data)->urb_entry_size : 0;
1667      check_urb_size(ice, urb_entry_size, MESA_SHADER_TESS_CTRL);
1668   }
1669}
1670
1671/**
1672 * Compile a tessellation evaluation shader, and upload the assembly.
1673 */
1674static void
1675iris_compile_tes(struct iris_screen *screen,
1676                 struct u_upload_mgr *uploader,
1677                 struct util_debug_callback *dbg,
1678                 struct iris_uncompiled_shader *ish,
1679                 struct iris_compiled_shader *shader)
1680{
1681   const struct brw_compiler *compiler = screen->compiler;
1682   void *mem_ctx = ralloc_context(NULL);
1683   struct brw_tes_prog_data *tes_prog_data =
1684      rzalloc(mem_ctx, struct brw_tes_prog_data);
1685   struct brw_vue_prog_data *vue_prog_data = &tes_prog_data->base;
1686   struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
1687   enum brw_param_builtin *system_values;
1688   const struct intel_device_info *devinfo = &screen->devinfo;
1689   unsigned num_system_values;
1690   unsigned num_cbufs;
1691
1692   nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1693   const struct iris_tes_prog_key *const key = &shader->key.tes;
1694
1695   if (key->vue.nr_userclip_plane_consts) {
1696      nir_function_impl *impl = nir_shader_get_entrypoint(nir);
1697      nir_lower_clip_vs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,
1698                        true, false, NULL);
1699      nir_lower_io_to_temporaries(nir, impl, true, false);
1700      nir_lower_global_vars_to_local(nir);
1701      nir_lower_vars_to_ssa(nir);
1702      nir_shader_gather_info(nir, impl);
1703   }
1704
1705   iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,
1706                       &num_system_values, &num_cbufs);
1707
1708   struct iris_binding_table bt;
1709   iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
1710                            num_system_values, num_cbufs);
1711
1712   brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
1713
1714   struct brw_vue_map input_vue_map;
1715   brw_compute_tess_vue_map(&input_vue_map, key->inputs_read,
1716                            key->patch_inputs_read);
1717
1718   struct brw_tes_prog_key brw_key = iris_to_brw_tes_key(screen, key);
1719
1720   struct brw_compile_tes_params params = {
1721      .nir = nir,
1722      .key = &brw_key,
1723      .prog_data = tes_prog_data,
1724      .input_vue_map = &input_vue_map,
1725      .log_data = dbg,
1726   };
1727
1728   const unsigned *program = brw_compile_tes(compiler, mem_ctx, &params);
1729   if (program == NULL) {
1730      dbg_printf("Failed to compile evaluation shader: %s\n", params.error_str);
1731      ralloc_free(mem_ctx);
1732
1733      shader->compilation_failed = true;
1734      util_queue_fence_signal(&shader->ready);
1735
1736      return;
1737   }
1738
1739   shader->compilation_failed = false;
1740
1741   iris_debug_recompile(screen, dbg, ish, &brw_key.base);
1742
1743   uint32_t *so_decls =
1744      screen->vtbl.create_so_decl_list(&ish->stream_output,
1745                                    &vue_prog_data->vue_map);
1746
1747   iris_finalize_program(shader, prog_data, so_decls, system_values,
1748                         num_system_values, 0, num_cbufs, &bt);
1749
1750   iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_TES,
1751                      sizeof(*key), key, program);
1752
1753   iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1754
1755   ralloc_free(mem_ctx);
1756}
1757
1758/**
1759 * Update the current tessellation evaluation shader variant.
1760 *
1761 * Fill out the key, look in the cache, compile and bind if needed.
1762 */
1763static void
1764iris_update_compiled_tes(struct iris_context *ice)
1765{
1766   struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1767   struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
1768   struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_EVAL];
1769   struct iris_uncompiled_shader *ish =
1770      ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
1771
1772   struct iris_tes_prog_key key = { KEY_INIT(vue.base) };
1773   get_unified_tess_slots(ice, &key.inputs_read, &key.patch_inputs_read);
1774   screen->vtbl.populate_tes_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
1775
1776   struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TES];
1777   bool added;
1778   struct iris_compiled_shader *shader =
1779      find_or_add_variant(screen, ish, IRIS_CACHE_TES, &key, sizeof(key), &added);
1780
1781   if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
1782                                          &key, sizeof(key))) {
1783      iris_compile_tes(screen, uploader, &ice->dbg, ish, shader);
1784   }
1785
1786   if (shader->compilation_failed)
1787      shader = NULL;
1788
1789   if (old != shader) {
1790      iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_EVAL],
1791                                    shader);
1792      ice->state.stage_dirty |= IRIS_STAGE_DIRTY_TES |
1793                                IRIS_STAGE_DIRTY_BINDINGS_TES |
1794                                IRIS_STAGE_DIRTY_CONSTANTS_TES;
1795      shs->sysvals_need_upload = true;
1796
1797      unsigned urb_entry_size = shader ?
1798         ((struct brw_vue_prog_data *) shader->prog_data)->urb_entry_size : 0;
1799      check_urb_size(ice, urb_entry_size, MESA_SHADER_TESS_EVAL);
1800   }
1801
1802   /* TODO: Could compare and avoid flagging this. */
1803   const struct shader_info *tes_info = &ish->nir->info;
1804   if (BITSET_TEST(tes_info->system_values_read, SYSTEM_VALUE_VERTICES_IN)) {
1805      ice->state.stage_dirty |= IRIS_STAGE_DIRTY_CONSTANTS_TES;
1806      ice->state.shaders[MESA_SHADER_TESS_EVAL].sysvals_need_upload = true;
1807   }
1808}
1809
1810/**
1811 * Compile a geometry shader, and upload the assembly.
1812 */
1813static void
1814iris_compile_gs(struct iris_screen *screen,
1815                struct u_upload_mgr *uploader,
1816                struct util_debug_callback *dbg,
1817                struct iris_uncompiled_shader *ish,
1818                struct iris_compiled_shader *shader)
1819{
1820   const struct brw_compiler *compiler = screen->compiler;
1821   const struct intel_device_info *devinfo = &screen->devinfo;
1822   void *mem_ctx = ralloc_context(NULL);
1823   struct brw_gs_prog_data *gs_prog_data =
1824      rzalloc(mem_ctx, struct brw_gs_prog_data);
1825   struct brw_vue_prog_data *vue_prog_data = &gs_prog_data->base;
1826   struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
1827   enum brw_param_builtin *system_values;
1828   unsigned num_system_values;
1829   unsigned num_cbufs;
1830
1831   nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1832   const struct iris_gs_prog_key *const key = &shader->key.gs;
1833
1834   if (key->vue.nr_userclip_plane_consts) {
1835      nir_function_impl *impl = nir_shader_get_entrypoint(nir);
1836      nir_lower_clip_gs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,
1837                        false, NULL);
1838      nir_lower_io_to_temporaries(nir, impl, true, false);
1839      nir_lower_global_vars_to_local(nir);
1840      nir_lower_vars_to_ssa(nir);
1841      nir_shader_gather_info(nir, impl);
1842   }
1843
1844   iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,
1845                       &num_system_values, &num_cbufs);
1846
1847   struct iris_binding_table bt;
1848   iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
1849                            num_system_values, num_cbufs);
1850
1851   brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
1852
1853   brw_compute_vue_map(devinfo,
1854                       &vue_prog_data->vue_map, nir->info.outputs_written,
1855                       nir->info.separate_shader, /* pos_slots */ 1);
1856
1857   struct brw_gs_prog_key brw_key = iris_to_brw_gs_key(screen, key);
1858
1859   struct brw_compile_gs_params params = {
1860      .nir = nir,
1861      .key = &brw_key,
1862      .prog_data = gs_prog_data,
1863      .log_data = dbg,
1864   };
1865
1866   const unsigned *program = brw_compile_gs(compiler, mem_ctx, &params);
1867   if (program == NULL) {
1868      dbg_printf("Failed to compile geometry shader: %s\n", params.error_str);
1869      ralloc_free(mem_ctx);
1870
1871      shader->compilation_failed = true;
1872      util_queue_fence_signal(&shader->ready);
1873
1874      return;
1875   }
1876
1877   shader->compilation_failed = false;
1878
1879   iris_debug_recompile(screen, dbg, ish, &brw_key.base);
1880
1881   uint32_t *so_decls =
1882      screen->vtbl.create_so_decl_list(&ish->stream_output,
1883                                    &vue_prog_data->vue_map);
1884
1885   iris_finalize_program(shader, prog_data, so_decls, system_values,
1886                         num_system_values, 0, num_cbufs, &bt);
1887
1888   iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_GS,
1889                      sizeof(*key), key, program);
1890
1891   iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1892
1893   ralloc_free(mem_ctx);
1894}
1895
1896/**
1897 * Update the current geometry shader variant.
1898 *
1899 * Fill out the key, look in the cache, compile and bind if needed.
1900 */
1901static void
1902iris_update_compiled_gs(struct iris_context *ice)
1903{
1904   struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_GEOMETRY];
1905   struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
1906   struct iris_uncompiled_shader *ish =
1907      ice->shaders.uncompiled[MESA_SHADER_GEOMETRY];
1908   struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_GS];
1909   struct iris_compiled_shader *shader = NULL;
1910   struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1911
1912   if (ish) {
1913      struct iris_gs_prog_key key = { KEY_INIT(vue.base) };
1914      screen->vtbl.populate_gs_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
1915
1916      bool added;
1917
1918      shader = find_or_add_variant(screen, ish, IRIS_CACHE_GS, &key,
1919                                   sizeof(key), &added);
1920
1921      if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
1922                                             &key, sizeof(key))) {
1923         iris_compile_gs(screen, uploader, &ice->dbg, ish, shader);
1924      }
1925
1926      if (shader->compilation_failed)
1927         shader = NULL;
1928   }
1929
1930   if (old != shader) {
1931      iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_GEOMETRY],
1932                                    shader);
1933      ice->state.stage_dirty |= IRIS_STAGE_DIRTY_GS |
1934                                IRIS_STAGE_DIRTY_BINDINGS_GS |
1935                                IRIS_STAGE_DIRTY_CONSTANTS_GS;
1936      shs->sysvals_need_upload = true;
1937
1938      unsigned urb_entry_size = shader ?
1939         ((struct brw_vue_prog_data *) shader->prog_data)->urb_entry_size : 0;
1940      check_urb_size(ice, urb_entry_size, MESA_SHADER_GEOMETRY);
1941   }
1942}
1943
1944/**
1945 * Compile a fragment (pixel) shader, and upload the assembly.
1946 */
1947static void
1948iris_compile_fs(struct iris_screen *screen,
1949                struct u_upload_mgr *uploader,
1950                struct util_debug_callback *dbg,
1951                struct iris_uncompiled_shader *ish,
1952                struct iris_compiled_shader *shader,
1953                struct brw_vue_map *vue_map)
1954{
1955   const struct brw_compiler *compiler = screen->compiler;
1956   void *mem_ctx = ralloc_context(NULL);
1957   struct brw_wm_prog_data *fs_prog_data =
1958      rzalloc(mem_ctx, struct brw_wm_prog_data);
1959   struct brw_stage_prog_data *prog_data = &fs_prog_data->base;
1960   enum brw_param_builtin *system_values;
1961   const struct intel_device_info *devinfo = &screen->devinfo;
1962   unsigned num_system_values;
1963   unsigned num_cbufs;
1964
1965   nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1966   const struct iris_fs_prog_key *const key = &shader->key.fs;
1967
1968   prog_data->use_alt_mode = nir->info.use_legacy_math_rules;
1969
1970   iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,
1971                       &num_system_values, &num_cbufs);
1972
1973   /* Lower output variables to load_output intrinsics before setting up
1974    * binding tables, so iris_setup_binding_table can map any load_output
1975    * intrinsics to IRIS_SURFACE_GROUP_RENDER_TARGET_READ on Gfx8 for
1976    * non-coherent framebuffer fetches.
1977    */
1978   brw_nir_lower_fs_outputs(nir);
1979
1980   /* On Gfx11+, shader RT write messages have a "Null Render Target" bit
1981    * and do not need a binding table entry with a null surface.  Earlier
1982    * generations need an entry for a null surface.
1983    */
1984   int null_rts = devinfo->ver < 11 ? 1 : 0;
1985
1986   struct iris_binding_table bt;
1987   iris_setup_binding_table(devinfo, nir, &bt,
1988                            MAX2(key->nr_color_regions, null_rts),
1989                            num_system_values, num_cbufs);
1990
1991   brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
1992
1993   struct brw_wm_prog_key brw_key = iris_to_brw_fs_key(screen, key);
1994
1995   struct brw_compile_fs_params params = {
1996      .nir = nir,
1997      .key = &brw_key,
1998      .prog_data = fs_prog_data,
1999
2000      .allow_spilling = true,
2001      .vue_map = vue_map,
2002
2003      .log_data = dbg,
2004   };
2005
2006   const unsigned *program = brw_compile_fs(compiler, mem_ctx, &params);
2007   if (program == NULL) {
2008      dbg_printf("Failed to compile fragment shader: %s\n", params.error_str);
2009      ralloc_free(mem_ctx);
2010
2011      shader->compilation_failed = true;
2012      util_queue_fence_signal(&shader->ready);
2013
2014      return;
2015   }
2016
2017   shader->compilation_failed = false;
2018
2019   iris_debug_recompile(screen, dbg, ish, &brw_key.base);
2020
2021   iris_finalize_program(shader, prog_data, NULL, system_values,
2022                         num_system_values, 0, num_cbufs, &bt);
2023
2024   iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_FS,
2025                      sizeof(*key), key, program);
2026
2027   iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
2028
2029   ralloc_free(mem_ctx);
2030}
2031
2032/**
2033 * Update the current fragment shader variant.
2034 *
2035 * Fill out the key, look in the cache, compile and bind if needed.
2036 */
2037static void
2038iris_update_compiled_fs(struct iris_context *ice)
2039{
2040   struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_FRAGMENT];
2041   struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
2042   struct iris_uncompiled_shader *ish =
2043      ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
2044   struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2045   struct iris_fs_prog_key key = { KEY_INIT(base) };
2046   screen->vtbl.populate_fs_key(ice, &ish->nir->info, &key);
2047
2048   struct brw_vue_map *last_vue_map =
2049      &brw_vue_prog_data(ice->shaders.last_vue_shader->prog_data)->vue_map;
2050
2051   if (ish->nos & (1ull << IRIS_NOS_LAST_VUE_MAP))
2052      key.input_slots_valid = last_vue_map->slots_valid;
2053
2054   struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_FS];
2055   bool added;
2056   struct iris_compiled_shader *shader =
2057      find_or_add_variant(screen, ish, IRIS_CACHE_FS, &key,
2058                          sizeof(key), &added);
2059
2060   if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
2061                                          &key, sizeof(key))) {
2062      iris_compile_fs(screen, uploader, &ice->dbg, ish, shader, last_vue_map);
2063   }
2064
2065   if (shader->compilation_failed)
2066      shader = NULL;
2067
2068   if (old != shader) {
2069      // XXX: only need to flag CLIP if barycentric has NONPERSPECTIVE
2070      // toggles.  might be able to avoid flagging SBE too.
2071      iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_FRAGMENT],
2072                                    shader);
2073      ice->state.dirty |= IRIS_DIRTY_WM |
2074                          IRIS_DIRTY_CLIP |
2075                          IRIS_DIRTY_SBE;
2076      ice->state.stage_dirty |= IRIS_STAGE_DIRTY_FS |
2077                                IRIS_STAGE_DIRTY_BINDINGS_FS |
2078                                IRIS_STAGE_DIRTY_CONSTANTS_FS;
2079      shs->sysvals_need_upload = true;
2080   }
2081}
2082
2083/**
2084 * Update the last enabled stage's VUE map.
2085 *
2086 * When the shader feeding the rasterizer's output interface changes, we
2087 * need to re-emit various packets.
2088 */
2089static void
2090update_last_vue_map(struct iris_context *ice,
2091                    struct iris_compiled_shader *shader)
2092{
2093   struct brw_vue_prog_data *vue_prog_data = (void *) shader->prog_data;
2094   struct brw_vue_map *vue_map = &vue_prog_data->vue_map;
2095   struct brw_vue_map *old_map = !ice->shaders.last_vue_shader ? NULL :
2096      &brw_vue_prog_data(ice->shaders.last_vue_shader->prog_data)->vue_map;
2097   const uint64_t changed_slots =
2098      (old_map ? old_map->slots_valid : 0ull) ^ vue_map->slots_valid;
2099
2100   if (changed_slots & VARYING_BIT_VIEWPORT) {
2101      ice->state.num_viewports =
2102         (vue_map->slots_valid & VARYING_BIT_VIEWPORT) ? IRIS_MAX_VIEWPORTS : 1;
2103      ice->state.dirty |= IRIS_DIRTY_CLIP |
2104                          IRIS_DIRTY_SF_CL_VIEWPORT |
2105                          IRIS_DIRTY_CC_VIEWPORT |
2106                          IRIS_DIRTY_SCISSOR_RECT;
2107      ice->state.stage_dirty |= IRIS_STAGE_DIRTY_UNCOMPILED_FS |
2108         ice->state.stage_dirty_for_nos[IRIS_NOS_LAST_VUE_MAP];
2109   }
2110
2111   if (changed_slots || (old_map && old_map->separate != vue_map->separate)) {
2112      ice->state.dirty |= IRIS_DIRTY_SBE;
2113   }
2114
2115   iris_shader_variant_reference(&ice->shaders.last_vue_shader, shader);
2116}
2117
2118static void
2119iris_update_pull_constant_descriptors(struct iris_context *ice,
2120                                      gl_shader_stage stage)
2121{
2122   struct iris_compiled_shader *shader = ice->shaders.prog[stage];
2123
2124   if (!shader || !shader->prog_data->has_ubo_pull)
2125      return;
2126
2127   struct iris_shader_state *shs = &ice->state.shaders[stage];
2128   bool any_new_descriptors =
2129      shader->num_system_values > 0 && shs->sysvals_need_upload;
2130
2131   unsigned bound_cbufs = shs->bound_cbufs;
2132
2133   while (bound_cbufs) {
2134      const int i = u_bit_scan(&bound_cbufs);
2135      struct pipe_shader_buffer *cbuf = &shs->constbuf[i];
2136      struct iris_state_ref *surf_state = &shs->constbuf_surf_state[i];
2137      if (!surf_state->res && cbuf->buffer) {
2138         iris_upload_ubo_ssbo_surf_state(ice, cbuf, surf_state,
2139                                         ISL_SURF_USAGE_CONSTANT_BUFFER_BIT);
2140         any_new_descriptors = true;
2141      }
2142   }
2143
2144   if (any_new_descriptors)
2145      ice->state.stage_dirty |= IRIS_STAGE_DIRTY_BINDINGS_VS << stage;
2146}
2147
2148/**
2149 * Update the current shader variants for the given state.
2150 *
2151 * This should be called on every draw call to ensure that the correct
2152 * shaders are bound.  It will also flag any dirty state triggered by
2153 * swapping out those shaders.
2154 */
2155void
2156iris_update_compiled_shaders(struct iris_context *ice)
2157{
2158   const uint64_t stage_dirty = ice->state.stage_dirty;
2159
2160   if (stage_dirty & (IRIS_STAGE_DIRTY_UNCOMPILED_TCS |
2161                      IRIS_STAGE_DIRTY_UNCOMPILED_TES)) {
2162       struct iris_uncompiled_shader *tes =
2163          ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
2164       if (tes) {
2165          iris_update_compiled_tcs(ice);
2166          iris_update_compiled_tes(ice);
2167       } else {
2168         iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_CTRL], NULL);
2169         iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_EVAL], NULL);
2170          ice->state.stage_dirty |=
2171             IRIS_STAGE_DIRTY_TCS | IRIS_STAGE_DIRTY_TES |
2172             IRIS_STAGE_DIRTY_BINDINGS_TCS | IRIS_STAGE_DIRTY_BINDINGS_TES |
2173             IRIS_STAGE_DIRTY_CONSTANTS_TCS | IRIS_STAGE_DIRTY_CONSTANTS_TES;
2174
2175          if (ice->shaders.urb.constrained)
2176             ice->state.dirty |= IRIS_DIRTY_URB;
2177       }
2178   }
2179
2180   if (stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_VS)
2181      iris_update_compiled_vs(ice);
2182   if (stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_GS)
2183      iris_update_compiled_gs(ice);
2184
2185   if (stage_dirty & (IRIS_STAGE_DIRTY_UNCOMPILED_GS |
2186                      IRIS_STAGE_DIRTY_UNCOMPILED_TES)) {
2187      const struct iris_compiled_shader *gs =
2188         ice->shaders.prog[MESA_SHADER_GEOMETRY];
2189      const struct iris_compiled_shader *tes =
2190         ice->shaders.prog[MESA_SHADER_TESS_EVAL];
2191
2192      bool points_or_lines = false;
2193
2194      if (gs) {
2195         const struct brw_gs_prog_data *gs_prog_data = (void *) gs->prog_data;
2196         points_or_lines =
2197            gs_prog_data->output_topology == _3DPRIM_POINTLIST ||
2198            gs_prog_data->output_topology == _3DPRIM_LINESTRIP;
2199      } else if (tes) {
2200         const struct brw_tes_prog_data *tes_data = (void *) tes->prog_data;
2201         points_or_lines =
2202            tes_data->output_topology == BRW_TESS_OUTPUT_TOPOLOGY_LINE ||
2203            tes_data->output_topology == BRW_TESS_OUTPUT_TOPOLOGY_POINT;
2204      }
2205
2206      if (ice->shaders.output_topology_is_points_or_lines != points_or_lines) {
2207         /* Outbound to XY Clip enables */
2208         ice->shaders.output_topology_is_points_or_lines = points_or_lines;
2209         ice->state.dirty |= IRIS_DIRTY_CLIP;
2210      }
2211   }
2212
2213   gl_shader_stage last_stage = last_vue_stage(ice);
2214   struct iris_compiled_shader *shader = ice->shaders.prog[last_stage];
2215   struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[last_stage];
2216   update_last_vue_map(ice, shader);
2217   if (ice->state.streamout != shader->streamout) {
2218      ice->state.streamout = shader->streamout;
2219      ice->state.dirty |= IRIS_DIRTY_SO_DECL_LIST | IRIS_DIRTY_STREAMOUT;
2220   }
2221
2222   if (ice->state.streamout_active) {
2223      for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
2224         struct iris_stream_output_target *so =
2225            (void *) ice->state.so_target[i];
2226         if (so)
2227            so->stride = ish->stream_output.stride[i] * sizeof(uint32_t);
2228      }
2229   }
2230
2231   if (stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_FS)
2232      iris_update_compiled_fs(ice);
2233
2234   for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_FRAGMENT; i++) {
2235      if (ice->state.stage_dirty & (IRIS_STAGE_DIRTY_CONSTANTS_VS << i))
2236         iris_update_pull_constant_descriptors(ice, i);
2237   }
2238}
2239
2240static void
2241iris_compile_cs(struct iris_screen *screen,
2242                struct u_upload_mgr *uploader,
2243                struct util_debug_callback *dbg,
2244                struct iris_uncompiled_shader *ish,
2245                struct iris_compiled_shader *shader)
2246{
2247   const struct brw_compiler *compiler = screen->compiler;
2248   void *mem_ctx = ralloc_context(NULL);
2249   struct brw_cs_prog_data *cs_prog_data =
2250      rzalloc(mem_ctx, struct brw_cs_prog_data);
2251   struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
2252   enum brw_param_builtin *system_values;
2253   const struct intel_device_info *devinfo = &screen->devinfo;
2254   unsigned num_system_values;
2255   unsigned num_cbufs;
2256
2257   nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
2258   const struct iris_cs_prog_key *const key = &shader->key.cs;
2259
2260   NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics);
2261
2262   iris_setup_uniforms(compiler, mem_ctx, nir, prog_data,
2263                       ish->kernel_input_size,
2264                       &system_values, &num_system_values, &num_cbufs);
2265
2266   struct iris_binding_table bt;
2267   iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
2268                            num_system_values, num_cbufs);
2269
2270   struct brw_cs_prog_key brw_key = iris_to_brw_cs_key(screen, key);
2271
2272   struct brw_compile_cs_params params = {
2273      .nir = nir,
2274      .key = &brw_key,
2275      .prog_data = cs_prog_data,
2276      .log_data = dbg,
2277   };
2278
2279   const unsigned *program = brw_compile_cs(compiler, mem_ctx, &params);
2280   if (program == NULL) {
2281      dbg_printf("Failed to compile compute shader: %s\n", params.error_str);
2282
2283      shader->compilation_failed = true;
2284      util_queue_fence_signal(&shader->ready);
2285
2286      return;
2287   }
2288
2289   shader->compilation_failed = false;
2290
2291   iris_debug_recompile(screen, dbg, ish, &brw_key.base);
2292
2293   iris_finalize_program(shader, prog_data, NULL, system_values,
2294                         num_system_values, ish->kernel_input_size, num_cbufs,
2295                         &bt);
2296
2297   iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_CS,
2298                      sizeof(*key), key, program);
2299
2300   iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
2301
2302   ralloc_free(mem_ctx);
2303}
2304
2305static void
2306iris_update_compiled_cs(struct iris_context *ice)
2307{
2308   struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_COMPUTE];
2309   struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
2310   struct iris_uncompiled_shader *ish =
2311      ice->shaders.uncompiled[MESA_SHADER_COMPUTE];
2312   struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2313   struct iris_cs_prog_key key = { KEY_INIT(base) };
2314   screen->vtbl.populate_cs_key(ice, &key);
2315
2316   struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_CS];
2317   bool added;
2318   struct iris_compiled_shader *shader =
2319      find_or_add_variant(screen, ish, IRIS_CACHE_CS, &key,
2320                          sizeof(key), &added);
2321
2322   if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
2323                                          &key, sizeof(key))) {
2324      iris_compile_cs(screen, uploader, &ice->dbg, ish, shader);
2325   }
2326
2327   if (shader->compilation_failed)
2328      shader = NULL;
2329
2330   if (old != shader) {
2331      iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_COMPUTE],
2332                                    shader);
2333      ice->state.stage_dirty |= IRIS_STAGE_DIRTY_CS |
2334                                IRIS_STAGE_DIRTY_BINDINGS_CS |
2335                                IRIS_STAGE_DIRTY_CONSTANTS_CS;
2336      shs->sysvals_need_upload = true;
2337   }
2338}
2339
2340void
2341iris_update_compiled_compute_shader(struct iris_context *ice)
2342{
2343   if (ice->state.stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_CS)
2344      iris_update_compiled_cs(ice);
2345
2346   if (ice->state.stage_dirty & IRIS_STAGE_DIRTY_CONSTANTS_CS)
2347      iris_update_pull_constant_descriptors(ice, MESA_SHADER_COMPUTE);
2348}
2349
2350void
2351iris_fill_cs_push_const_buffer(struct brw_cs_prog_data *cs_prog_data,
2352                               unsigned threads,
2353                               uint32_t *dst)
2354{
2355   assert(brw_cs_push_const_total_size(cs_prog_data, threads) > 0);
2356   assert(cs_prog_data->push.cross_thread.size == 0);
2357   assert(cs_prog_data->push.per_thread.dwords == 1);
2358   assert(cs_prog_data->base.param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID);
2359   for (unsigned t = 0; t < threads; t++)
2360      dst[8 * t] = t;
2361}
2362
2363/**
2364 * Allocate scratch BOs as needed for the given per-thread size and stage.
2365 */
2366struct iris_bo *
2367iris_get_scratch_space(struct iris_context *ice,
2368                       unsigned per_thread_scratch,
2369                       gl_shader_stage stage)
2370{
2371   struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2372   struct iris_bufmgr *bufmgr = screen->bufmgr;
2373   const struct intel_device_info *devinfo = &screen->devinfo;
2374
2375   unsigned encoded_size = ffs(per_thread_scratch) - 11;
2376   assert(encoded_size < ARRAY_SIZE(ice->shaders.scratch_bos));
2377   assert(per_thread_scratch == 1 << (encoded_size + 10));
2378
2379   /* On GFX version 12.5, scratch access changed to a surface-based model.
2380    * Instead of each shader type having its own layout based on IDs passed
2381    * from the relevant fixed-function unit, all scratch access is based on
2382    * thread IDs like it always has been for compute.
2383    */
2384   if (devinfo->verx10 >= 125)
2385      stage = MESA_SHADER_COMPUTE;
2386
2387   struct iris_bo **bop = &ice->shaders.scratch_bos[encoded_size][stage];
2388
2389   if (!*bop) {
2390      assert(stage < ARRAY_SIZE(devinfo->max_scratch_ids));
2391      uint32_t size = per_thread_scratch * devinfo->max_scratch_ids[stage];
2392      *bop = iris_bo_alloc(bufmgr, "scratch", size, 1024,
2393                           IRIS_MEMZONE_SHADER, 0);
2394   }
2395
2396   return *bop;
2397}
2398
2399const struct iris_state_ref *
2400iris_get_scratch_surf(struct iris_context *ice,
2401                      unsigned per_thread_scratch)
2402{
2403   struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2404   ASSERTED const struct intel_device_info *devinfo = &screen->devinfo;
2405
2406   assert(devinfo->verx10 >= 125);
2407
2408   unsigned encoded_size = ffs(per_thread_scratch) - 11;
2409   assert(encoded_size < ARRAY_SIZE(ice->shaders.scratch_surfs));
2410   assert(per_thread_scratch == 1 << (encoded_size + 10));
2411
2412   struct iris_state_ref *ref = &ice->shaders.scratch_surfs[encoded_size];
2413
2414   if (ref->res)
2415      return ref;
2416
2417   struct iris_bo *scratch_bo =
2418      iris_get_scratch_space(ice, per_thread_scratch, MESA_SHADER_COMPUTE);
2419
2420   void *map = upload_state(ice->state.bindless_uploader, ref,
2421                            screen->isl_dev.ss.size, 64);
2422
2423   isl_buffer_fill_state(&screen->isl_dev, map,
2424                         .address = scratch_bo->address,
2425                         .size_B = scratch_bo->size,
2426                         .format = ISL_FORMAT_RAW,
2427                         .swizzle = ISL_SWIZZLE_IDENTITY,
2428                         .mocs = iris_mocs(scratch_bo, &screen->isl_dev, 0),
2429                         .stride_B = per_thread_scratch,
2430                         .is_scratch = true);
2431
2432   return ref;
2433}
2434
2435/* ------------------------------------------------------------------- */
2436
2437/**
2438 * The pipe->create_[stage]_state() driver hooks.
2439 *
2440 * Performs basic NIR preprocessing, records any state dependencies, and
2441 * returns an iris_uncompiled_shader as the Gallium CSO.
2442 *
2443 * Actual shader compilation to assembly happens later, at first use.
2444 */
2445static void *
2446iris_create_uncompiled_shader(struct iris_screen *screen,
2447                              nir_shader *nir,
2448                              const struct pipe_stream_output_info *so_info)
2449{
2450   struct iris_uncompiled_shader *ish =
2451      calloc(1, sizeof(struct iris_uncompiled_shader));
2452   if (!ish)
2453      return NULL;
2454
2455   pipe_reference_init(&ish->ref, 1);
2456   list_inithead(&ish->variants);
2457   simple_mtx_init(&ish->lock, mtx_plain);
2458   util_queue_fence_init(&ish->ready);
2459
2460   ish->uses_atomic_load_store = iris_uses_image_atomic(nir);
2461
2462   ish->program_id = get_new_program_id(screen);
2463   ish->nir = nir;
2464   if (so_info) {
2465      memcpy(&ish->stream_output, so_info, sizeof(*so_info));
2466      update_so_info(&ish->stream_output, nir->info.outputs_written);
2467   }
2468
2469   if (screen->disk_cache) {
2470      /* Serialize the NIR to a binary blob that we can hash for the disk
2471       * cache.  Drop unnecessary information (like variable names)
2472       * so the serialized NIR is smaller, and also to let us detect more
2473       * isomorphic shaders when hashing, increasing cache hits.
2474       */
2475      struct blob blob;
2476      blob_init(&blob);
2477      nir_serialize(&blob, nir, true);
2478      _mesa_sha1_compute(blob.data, blob.size, ish->nir_sha1);
2479      blob_finish(&blob);
2480   }
2481
2482   return ish;
2483}
2484
2485static void *
2486iris_create_compute_state(struct pipe_context *ctx,
2487                          const struct pipe_compute_state *state)
2488{
2489   struct iris_context *ice = (void *) ctx;
2490   struct iris_screen *screen = (void *) ctx->screen;
2491   struct u_upload_mgr *uploader = ice->shaders.uploader_unsync;
2492   const nir_shader_compiler_options *options =
2493      screen->compiler->nir_options[MESA_SHADER_COMPUTE];
2494
2495   nir_shader *nir;
2496   switch (state->ir_type) {
2497   case PIPE_SHADER_IR_NIR:
2498      nir = (void *)state->prog;
2499      break;
2500
2501   case PIPE_SHADER_IR_NIR_SERIALIZED: {
2502      struct blob_reader reader;
2503      const struct pipe_binary_program_header *hdr = state->prog;
2504      blob_reader_init(&reader, hdr->blob, hdr->num_bytes);
2505      nir = nir_deserialize(NULL, options, &reader);
2506      break;
2507   }
2508
2509   default:
2510      unreachable("Unsupported IR");
2511   }
2512
2513   /* Most of iris doesn't really care about the difference between compute
2514    * shaders and kernels.  We also tend to hard-code COMPUTE everywhere so
2515    * it's way easier if we just normalize to COMPUTE here.
2516    */
2517   assert(nir->info.stage == MESA_SHADER_COMPUTE ||
2518          nir->info.stage == MESA_SHADER_KERNEL);
2519   nir->info.stage = MESA_SHADER_COMPUTE;
2520
2521   struct iris_uncompiled_shader *ish =
2522      iris_create_uncompiled_shader(screen, nir, NULL);
2523   ish->kernel_input_size = state->req_input_mem;
2524   ish->kernel_shared_size = state->req_local_mem;
2525
2526   // XXX: disallow more than 64KB of shared variables
2527
2528   if (screen->precompile) {
2529      struct iris_cs_prog_key key = { KEY_INIT(base) };
2530
2531      struct iris_compiled_shader *shader =
2532         iris_create_shader_variant(screen, NULL, IRIS_CACHE_CS,
2533                                    sizeof(key), &key);
2534
2535      /* Append our new variant to the shader's variant list. */
2536      list_addtail(&shader->link, &ish->variants);
2537
2538      if (!iris_disk_cache_retrieve(screen, uploader, ish, shader,
2539                                    &key, sizeof(key))) {
2540         iris_compile_cs(screen, uploader, &ice->dbg, ish, shader);
2541      }
2542   }
2543
2544   return ish;
2545}
2546
2547static void
2548iris_compile_shader(void *_job, UNUSED void *_gdata, UNUSED int thread_index)
2549{
2550   const struct iris_threaded_compile_job *job =
2551      (struct iris_threaded_compile_job *) _job;
2552
2553   struct iris_screen *screen = job->screen;
2554   struct u_upload_mgr *uploader = job->uploader;
2555   struct util_debug_callback *dbg = job->dbg;
2556   struct iris_uncompiled_shader *ish = job->ish;
2557   struct iris_compiled_shader *shader = job->shader;
2558
2559   switch (ish->nir->info.stage) {
2560   case MESA_SHADER_VERTEX:
2561      iris_compile_vs(screen, uploader, dbg, ish, shader);
2562      break;
2563   case MESA_SHADER_TESS_CTRL:
2564      iris_compile_tcs(screen, NULL, uploader, dbg, ish, shader);
2565      break;
2566   case MESA_SHADER_TESS_EVAL:
2567      iris_compile_tes(screen, uploader, dbg, ish, shader);
2568      break;
2569   case MESA_SHADER_GEOMETRY:
2570      iris_compile_gs(screen, uploader, dbg, ish, shader);
2571      break;
2572   case MESA_SHADER_FRAGMENT:
2573      iris_compile_fs(screen, uploader, dbg, ish, shader, NULL);
2574      break;
2575
2576   default:
2577      unreachable("Invalid shader stage.");
2578   }
2579}
2580
2581static void *
2582iris_create_shader_state(struct pipe_context *ctx,
2583                         const struct pipe_shader_state *state)
2584{
2585   struct iris_context *ice = (void *) ctx;
2586   struct iris_screen *screen = (void *) ctx->screen;
2587   struct nir_shader *nir;
2588
2589   if (state->type == PIPE_SHADER_IR_TGSI)
2590      nir = tgsi_to_nir(state->tokens, ctx->screen, false);
2591   else
2592      nir = state->ir.nir;
2593
2594   const struct shader_info *const info = &nir->info;
2595   struct iris_uncompiled_shader *ish =
2596      iris_create_uncompiled_shader(screen, nir, &state->stream_output);
2597
2598   union iris_any_prog_key key;
2599   unsigned key_size = 0;
2600
2601   memset(&key, 0, sizeof(key));
2602
2603   switch (info->stage) {
2604   case MESA_SHADER_VERTEX:
2605      /* User clip planes */
2606      if (info->clip_distance_array_size == 0)
2607         ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
2608
2609      key.vs = (struct iris_vs_prog_key) { KEY_INIT(vue.base) };
2610      key_size = sizeof(key.vs);
2611      break;
2612
2613   case MESA_SHADER_TESS_CTRL: {
2614      key.tcs = (struct iris_tcs_prog_key) {
2615         KEY_INIT(vue.base),
2616         // XXX: make sure the linker fills this out from the TES...
2617         ._tes_primitive_mode =
2618         info->tess._primitive_mode ? info->tess._primitive_mode
2619                                   : TESS_PRIMITIVE_TRIANGLES,
2620         .outputs_written = info->outputs_written,
2621         .patch_outputs_written = info->patch_outputs_written,
2622      };
2623
2624      /* 8_PATCH mode needs the key to contain the input patch dimensionality.
2625       * We don't have that information, so we randomly guess that the input
2626       * and output patches are the same size.  This is a bad guess, but we
2627       * can't do much better.
2628       */
2629      if (screen->compiler->use_tcs_8_patch)
2630         key.tcs.input_vertices = info->tess.tcs_vertices_out;
2631
2632      key_size = sizeof(key.tcs);
2633      break;
2634   }
2635
2636   case MESA_SHADER_TESS_EVAL:
2637      /* User clip planes */
2638      if (info->clip_distance_array_size == 0)
2639         ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
2640
2641      key.tes = (struct iris_tes_prog_key) {
2642         KEY_INIT(vue.base),
2643         // XXX: not ideal, need TCS output/TES input unification
2644         .inputs_read = info->inputs_read,
2645         .patch_inputs_read = info->patch_inputs_read,
2646      };
2647
2648      key_size = sizeof(key.tes);
2649      break;
2650
2651   case MESA_SHADER_GEOMETRY:
2652      /* User clip planes */
2653      if (info->clip_distance_array_size == 0)
2654         ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
2655
2656      key.gs = (struct iris_gs_prog_key) { KEY_INIT(vue.base) };
2657      key_size = sizeof(key.gs);
2658      break;
2659
2660   case MESA_SHADER_FRAGMENT:
2661      ish->nos |= (1ull << IRIS_NOS_FRAMEBUFFER) |
2662                  (1ull << IRIS_NOS_DEPTH_STENCIL_ALPHA) |
2663                  (1ull << IRIS_NOS_RASTERIZER) |
2664                  (1ull << IRIS_NOS_BLEND);
2665
2666      /* The program key needs the VUE map if there are > 16 inputs */
2667      if (util_bitcount64(info->inputs_read & BRW_FS_VARYING_INPUT_MASK) > 16) {
2668         ish->nos |= (1ull << IRIS_NOS_LAST_VUE_MAP);
2669      }
2670
2671      const uint64_t color_outputs = info->outputs_written &
2672         ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) |
2673           BITFIELD64_BIT(FRAG_RESULT_STENCIL) |
2674           BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK));
2675
2676      bool can_rearrange_varyings =
2677         util_bitcount64(info->inputs_read & BRW_FS_VARYING_INPUT_MASK) <= 16;
2678
2679      const struct intel_device_info *devinfo = &screen->devinfo;
2680
2681      key.fs = (struct iris_fs_prog_key) {
2682         KEY_INIT(base),
2683         .nr_color_regions = util_bitcount(color_outputs),
2684         .coherent_fb_fetch = devinfo->ver >= 9,
2685         .input_slots_valid =
2686            can_rearrange_varyings ? 0 : info->inputs_read | VARYING_BIT_POS,
2687      };
2688
2689      key_size = sizeof(key.fs);
2690      break;
2691
2692   default:
2693      unreachable("Invalid shader stage.");
2694   }
2695
2696   if (screen->precompile) {
2697      struct u_upload_mgr *uploader = ice->shaders.uploader_unsync;
2698
2699      struct iris_compiled_shader *shader =
2700         iris_create_shader_variant(screen, NULL,
2701                                    (enum iris_program_cache_id) info->stage,
2702                                    key_size, &key);
2703
2704      /* Append our new variant to the shader's variant list. */
2705      list_addtail(&shader->link, &ish->variants);
2706
2707      if (!iris_disk_cache_retrieve(screen, uploader, ish, shader,
2708                                    &key, key_size)) {
2709         assert(!util_queue_fence_is_signalled(&shader->ready));
2710
2711         struct iris_threaded_compile_job *job = calloc(1, sizeof(*job));
2712
2713         job->screen = screen;
2714         job->uploader = uploader;
2715         job->ish = ish;
2716         job->shader = shader;
2717
2718         iris_schedule_compile(screen, &ish->ready, &ice->dbg, job,
2719                               iris_compile_shader);
2720      }
2721   }
2722
2723   return ish;
2724}
2725
2726/**
2727 * Called when the refcount on the iris_uncompiled_shader reaches 0.
2728 *
2729 * Frees the iris_uncompiled_shader.
2730 *
2731 * \sa iris_delete_shader_state
2732 */
2733void
2734iris_destroy_shader_state(struct pipe_context *ctx, void *state)
2735{
2736   struct iris_uncompiled_shader *ish = state;
2737
2738   /* No need to take ish->lock; we hold the last reference to ish */
2739   list_for_each_entry_safe(struct iris_compiled_shader, shader,
2740                            &ish->variants, link) {
2741      list_del(&shader->link);
2742
2743      iris_shader_variant_reference(&shader, NULL);
2744   }
2745
2746   simple_mtx_destroy(&ish->lock);
2747   util_queue_fence_destroy(&ish->ready);
2748
2749   ralloc_free(ish->nir);
2750   free(ish);
2751}
2752
2753/**
2754 * The pipe->delete_[stage]_state() driver hooks.
2755 *
2756 * \sa iris_destroy_shader_state
2757 */
2758static void
2759iris_delete_shader_state(struct pipe_context *ctx, void *state)
2760{
2761   struct iris_uncompiled_shader *ish = state;
2762   struct iris_context *ice = (void *) ctx;
2763
2764   const gl_shader_stage stage = ish->nir->info.stage;
2765
2766   if (ice->shaders.uncompiled[stage] == ish) {
2767      ice->shaders.uncompiled[stage] = NULL;
2768      ice->state.stage_dirty |= IRIS_STAGE_DIRTY_UNCOMPILED_VS << stage;
2769   }
2770
2771   if (pipe_reference(&ish->ref, NULL))
2772      iris_destroy_shader_state(ctx, state);
2773}
2774
2775/**
2776 * The pipe->bind_[stage]_state() driver hook.
2777 *
2778 * Binds an uncompiled shader as the current one for a particular stage.
2779 * Updates dirty tracking to account for the shader's NOS.
2780 */
2781static void
2782bind_shader_state(struct iris_context *ice,
2783                  struct iris_uncompiled_shader *ish,
2784                  gl_shader_stage stage)
2785{
2786   uint64_t stage_dirty_bit = IRIS_STAGE_DIRTY_UNCOMPILED_VS << stage;
2787   const uint64_t nos = ish ? ish->nos : 0;
2788
2789   const struct shader_info *old_info = iris_get_shader_info(ice, stage);
2790   const struct shader_info *new_info = ish ? &ish->nir->info : NULL;
2791
2792   if ((old_info ? BITSET_LAST_BIT(old_info->textures_used) : 0) !=
2793       (new_info ? BITSET_LAST_BIT(new_info->textures_used) : 0)) {
2794      ice->state.stage_dirty |= IRIS_STAGE_DIRTY_SAMPLER_STATES_VS << stage;
2795   }
2796
2797   ice->shaders.uncompiled[stage] = ish;
2798   ice->state.stage_dirty |= stage_dirty_bit;
2799
2800   /* Record that CSOs need to mark IRIS_DIRTY_UNCOMPILED_XS when they change
2801    * (or that they no longer need to do so).
2802    */
2803   for (int i = 0; i < IRIS_NOS_COUNT; i++) {
2804      if (nos & (1 << i))
2805         ice->state.stage_dirty_for_nos[i] |= stage_dirty_bit;
2806      else
2807         ice->state.stage_dirty_for_nos[i] &= ~stage_dirty_bit;
2808   }
2809}
2810
2811static void
2812iris_bind_vs_state(struct pipe_context *ctx, void *state)
2813{
2814   struct iris_context *ice = (struct iris_context *)ctx;
2815   struct iris_uncompiled_shader *ish = state;
2816
2817   if (ish) {
2818      const struct shader_info *info = &ish->nir->info;
2819      if (ice->state.window_space_position != info->vs.window_space_position) {
2820         ice->state.window_space_position = info->vs.window_space_position;
2821
2822         ice->state.dirty |= IRIS_DIRTY_CLIP |
2823                             IRIS_DIRTY_RASTER |
2824                             IRIS_DIRTY_CC_VIEWPORT;
2825      }
2826
2827      const bool uses_draw_params =
2828         BITSET_TEST(info->system_values_read, SYSTEM_VALUE_FIRST_VERTEX) ||
2829         BITSET_TEST(info->system_values_read, SYSTEM_VALUE_BASE_INSTANCE);
2830      const bool uses_derived_draw_params =
2831         BITSET_TEST(info->system_values_read, SYSTEM_VALUE_DRAW_ID) ||
2832         BITSET_TEST(info->system_values_read, SYSTEM_VALUE_IS_INDEXED_DRAW);
2833      const bool needs_sgvs_element = uses_draw_params ||
2834         BITSET_TEST(info->system_values_read, SYSTEM_VALUE_INSTANCE_ID) ||
2835         BITSET_TEST(info->system_values_read,
2836                     SYSTEM_VALUE_VERTEX_ID_ZERO_BASE);
2837
2838      if (ice->state.vs_uses_draw_params != uses_draw_params ||
2839          ice->state.vs_uses_derived_draw_params != uses_derived_draw_params ||
2840          ice->state.vs_needs_edge_flag != info->vs.needs_edge_flag ||
2841          ice->state.vs_needs_sgvs_element != needs_sgvs_element) {
2842         ice->state.dirty |= IRIS_DIRTY_VERTEX_BUFFERS |
2843                             IRIS_DIRTY_VERTEX_ELEMENTS;
2844      }
2845
2846      ice->state.vs_uses_draw_params = uses_draw_params;
2847      ice->state.vs_uses_derived_draw_params = uses_derived_draw_params;
2848      ice->state.vs_needs_sgvs_element = needs_sgvs_element;
2849      ice->state.vs_needs_edge_flag = info->vs.needs_edge_flag;
2850   }
2851
2852   bind_shader_state((void *) ctx, state, MESA_SHADER_VERTEX);
2853}
2854
2855static void
2856iris_bind_tcs_state(struct pipe_context *ctx, void *state)
2857{
2858   bind_shader_state((void *) ctx, state, MESA_SHADER_TESS_CTRL);
2859}
2860
2861static void
2862iris_bind_tes_state(struct pipe_context *ctx, void *state)
2863{
2864   struct iris_context *ice = (struct iris_context *)ctx;
2865   struct iris_screen *screen = (struct iris_screen *) ctx->screen;
2866   const struct intel_device_info *devinfo = &screen->devinfo;
2867
2868   /* Enabling/disabling optional stages requires a URB reconfiguration. */
2869   if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL])
2870      ice->state.dirty |= IRIS_DIRTY_URB | (devinfo->verx10 >= 125 ?
2871                                            IRIS_DIRTY_VFG : 0);
2872
2873   bind_shader_state((void *) ctx, state, MESA_SHADER_TESS_EVAL);
2874}
2875
2876static void
2877iris_bind_gs_state(struct pipe_context *ctx, void *state)
2878{
2879   struct iris_context *ice = (struct iris_context *)ctx;
2880
2881   /* Enabling/disabling optional stages requires a URB reconfiguration. */
2882   if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])
2883      ice->state.dirty |= IRIS_DIRTY_URB;
2884
2885   bind_shader_state((void *) ctx, state, MESA_SHADER_GEOMETRY);
2886}
2887
2888static void
2889iris_bind_fs_state(struct pipe_context *ctx, void *state)
2890{
2891   struct iris_context *ice = (struct iris_context *) ctx;
2892   struct iris_screen *screen = (struct iris_screen *) ctx->screen;
2893   const struct intel_device_info *devinfo = &screen->devinfo;
2894   struct iris_uncompiled_shader *old_ish =
2895      ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
2896   struct iris_uncompiled_shader *new_ish = state;
2897
2898   const unsigned color_bits =
2899      BITFIELD64_BIT(FRAG_RESULT_COLOR) |
2900      BITFIELD64_RANGE(FRAG_RESULT_DATA0, BRW_MAX_DRAW_BUFFERS);
2901
2902   /* Fragment shader outputs influence HasWriteableRT */
2903   if (!old_ish || !new_ish ||
2904       (old_ish->nir->info.outputs_written & color_bits) !=
2905       (new_ish->nir->info.outputs_written & color_bits))
2906      ice->state.dirty |= IRIS_DIRTY_PS_BLEND;
2907
2908   if (devinfo->ver == 8)
2909      ice->state.dirty |= IRIS_DIRTY_PMA_FIX;
2910
2911   bind_shader_state((void *) ctx, state, MESA_SHADER_FRAGMENT);
2912}
2913
2914static void
2915iris_bind_cs_state(struct pipe_context *ctx, void *state)
2916{
2917   bind_shader_state((void *) ctx, state, MESA_SHADER_COMPUTE);
2918}
2919
2920static char *
2921iris_finalize_nir(struct pipe_screen *_screen, void *nirptr)
2922{
2923   struct iris_screen *screen = (struct iris_screen *)_screen;
2924   struct nir_shader *nir = (struct nir_shader *) nirptr;
2925   const struct intel_device_info *devinfo = &screen->devinfo;
2926
2927   NIR_PASS_V(nir, iris_fix_edge_flags);
2928
2929   brw_preprocess_nir(screen->compiler, nir, NULL);
2930
2931   NIR_PASS_V(nir, brw_nir_lower_storage_image, devinfo);
2932   NIR_PASS_V(nir, iris_lower_storage_image_derefs);
2933
2934   nir_sweep(nir);
2935
2936   return NULL;
2937}
2938
2939static void
2940iris_set_max_shader_compiler_threads(struct pipe_screen *pscreen,
2941                                     unsigned max_threads)
2942{
2943   struct iris_screen *screen = (struct iris_screen *) pscreen;
2944   util_queue_adjust_num_threads(&screen->shader_compiler_queue, max_threads);
2945}
2946
2947static bool
2948iris_is_parallel_shader_compilation_finished(struct pipe_screen *pscreen,
2949                                             void *v_shader,
2950                                             enum pipe_shader_type p_stage)
2951{
2952   struct iris_screen *screen = (struct iris_screen *) pscreen;
2953
2954   /* Threaded compilation is only used for the precompile.  If precompile is
2955    * disabled, threaded compilation is "done."
2956    */
2957   if (!screen->precompile)
2958      return true;
2959
2960   struct iris_uncompiled_shader *ish = v_shader;
2961
2962   /* When precompile is enabled, the first entry is the precompile variant.
2963    * Check the ready fence of the precompile variant.
2964    */
2965   struct iris_compiled_shader *first =
2966      list_first_entry(&ish->variants, struct iris_compiled_shader, link);
2967
2968   return util_queue_fence_is_signalled(&first->ready);
2969}
2970
2971void
2972iris_init_screen_program_functions(struct pipe_screen *pscreen)
2973{
2974   pscreen->is_parallel_shader_compilation_finished =
2975      iris_is_parallel_shader_compilation_finished;
2976   pscreen->set_max_shader_compiler_threads =
2977      iris_set_max_shader_compiler_threads;
2978   pscreen->finalize_nir = iris_finalize_nir;
2979}
2980
2981void
2982iris_init_program_functions(struct pipe_context *ctx)
2983{
2984   ctx->create_vs_state  = iris_create_shader_state;
2985   ctx->create_tcs_state = iris_create_shader_state;
2986   ctx->create_tes_state = iris_create_shader_state;
2987   ctx->create_gs_state  = iris_create_shader_state;
2988   ctx->create_fs_state  = iris_create_shader_state;
2989   ctx->create_compute_state = iris_create_compute_state;
2990
2991   ctx->delete_vs_state  = iris_delete_shader_state;
2992   ctx->delete_tcs_state = iris_delete_shader_state;
2993   ctx->delete_tes_state = iris_delete_shader_state;
2994   ctx->delete_gs_state  = iris_delete_shader_state;
2995   ctx->delete_fs_state  = iris_delete_shader_state;
2996   ctx->delete_compute_state = iris_delete_shader_state;
2997
2998   ctx->bind_vs_state  = iris_bind_vs_state;
2999   ctx->bind_tcs_state = iris_bind_tcs_state;
3000   ctx->bind_tes_state = iris_bind_tes_state;
3001   ctx->bind_gs_state  = iris_bind_gs_state;
3002   ctx->bind_fs_state  = iris_bind_fs_state;
3003   ctx->bind_compute_state = iris_bind_cs_state;
3004}
3005