1/*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23#ifndef IRIS_CONTEXT_H
24#define IRIS_CONTEXT_H
25
26#include "pipe/p_context.h"
27#include "pipe/p_state.h"
28#include "util/perf/u_trace.h"
29#include "util/set.h"
30#include "util/slab.h"
31#include "util/u_debug.h"
32#include "util/macros.h"
33#include "util/u_threaded_context.h"
34#include "intel/blorp/blorp.h"
35#include "intel/dev/intel_debug.h"
36#include "intel/common/intel_l3_config.h"
37#include "intel/compiler/brw_compiler.h"
38#include "intel/ds/intel_driver_ds.h"
39#include "iris_batch.h"
40#include "iris_binder.h"
41#include "iris_fence.h"
42#include "iris_resource.h"
43#include "iris_screen.h"
44
45struct iris_bo;
46struct iris_context;
47struct blorp_batch;
48struct blorp_params;
49
50#define IRIS_MAX_TEXTURE_BUFFER_SIZE (1 << 27)
51#define IRIS_MAX_TEXTURE_SAMPLERS 32
52/* IRIS_MAX_ABOS and IRIS_MAX_SSBOS must be the same. */
53#define IRIS_MAX_ABOS 16
54#define IRIS_MAX_SSBOS 16
55#define IRIS_MAX_VIEWPORTS 16
56#define IRIS_MAX_CLIP_PLANES 8
57#define IRIS_MAX_GLOBAL_BINDINGS 32
58
59enum iris_param_domain {
60   BRW_PARAM_DOMAIN_BUILTIN = 0,
61   BRW_PARAM_DOMAIN_IMAGE,
62};
63
64enum {
65   DRI_CONF_BO_REUSE_DISABLED,
66   DRI_CONF_BO_REUSE_ALL
67};
68
69#define BRW_PARAM(domain, val)   (BRW_PARAM_DOMAIN_##domain << 24 | (val))
70#define BRW_PARAM_DOMAIN(param)  ((uint32_t)(param) >> 24)
71#define BRW_PARAM_VALUE(param)   ((uint32_t)(param) & 0x00ffffff)
72#define BRW_PARAM_IMAGE(idx, offset) BRW_PARAM(IMAGE, ((idx) << 8) | (offset))
73#define BRW_PARAM_IMAGE_IDX(value)   (BRW_PARAM_VALUE(value) >> 8)
74#define BRW_PARAM_IMAGE_OFFSET(value)(BRW_PARAM_VALUE(value) & 0xf)
75
76/**
77 * Dirty flags.  When state changes, we flag some combination of these
78 * to indicate that particular GPU commands need to be re-emitted.
79 *
80 * Each bit typically corresponds to a single 3DSTATE_* command packet, but
81 * in rare cases they map to a group of related packets that need to be
82 * emitted together.
83 *
84 * See iris_upload_render_state().
85 */
86#define IRIS_DIRTY_COLOR_CALC_STATE               (1ull <<  0)
87#define IRIS_DIRTY_POLYGON_STIPPLE                (1ull <<  1)
88#define IRIS_DIRTY_SCISSOR_RECT                   (1ull <<  2)
89#define IRIS_DIRTY_WM_DEPTH_STENCIL               (1ull <<  3)
90#define IRIS_DIRTY_CC_VIEWPORT                    (1ull <<  4)
91#define IRIS_DIRTY_SF_CL_VIEWPORT                 (1ull <<  5)
92#define IRIS_DIRTY_PS_BLEND                       (1ull <<  6)
93#define IRIS_DIRTY_BLEND_STATE                    (1ull <<  7)
94#define IRIS_DIRTY_RASTER                         (1ull <<  8)
95#define IRIS_DIRTY_CLIP                           (1ull <<  9)
96#define IRIS_DIRTY_SBE                            (1ull << 10)
97#define IRIS_DIRTY_LINE_STIPPLE                   (1ull << 11)
98#define IRIS_DIRTY_VERTEX_ELEMENTS                (1ull << 12)
99#define IRIS_DIRTY_MULTISAMPLE                    (1ull << 13)
100#define IRIS_DIRTY_VERTEX_BUFFERS                 (1ull << 14)
101#define IRIS_DIRTY_SAMPLE_MASK                    (1ull << 15)
102#define IRIS_DIRTY_URB                            (1ull << 16)
103#define IRIS_DIRTY_DEPTH_BUFFER                   (1ull << 17)
104#define IRIS_DIRTY_WM                             (1ull << 18)
105#define IRIS_DIRTY_SO_BUFFERS                     (1ull << 19)
106#define IRIS_DIRTY_SO_DECL_LIST                   (1ull << 20)
107#define IRIS_DIRTY_STREAMOUT                      (1ull << 21)
108#define IRIS_DIRTY_VF_SGVS                        (1ull << 22)
109#define IRIS_DIRTY_VF                             (1ull << 23)
110#define IRIS_DIRTY_VF_TOPOLOGY                    (1ull << 24)
111#define IRIS_DIRTY_RENDER_RESOLVES_AND_FLUSHES    (1ull << 25)
112#define IRIS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES   (1ull << 26)
113#define IRIS_DIRTY_VF_STATISTICS                  (1ull << 27)
114#define IRIS_DIRTY_PMA_FIX                        (1ull << 28)
115#define IRIS_DIRTY_DEPTH_BOUNDS                   (1ull << 29)
116#define IRIS_DIRTY_RENDER_BUFFER                  (1ull << 30)
117#define IRIS_DIRTY_STENCIL_REF                    (1ull << 31)
118#define IRIS_DIRTY_VERTEX_BUFFER_FLUSHES          (1ull << 32)
119#define IRIS_DIRTY_RENDER_MISC_BUFFER_FLUSHES     (1ull << 33)
120#define IRIS_DIRTY_COMPUTE_MISC_BUFFER_FLUSHES    (1ull << 34)
121#define IRIS_DIRTY_VFG                            (1ull << 35)
122
123#define IRIS_ALL_DIRTY_FOR_COMPUTE (IRIS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES | \
124                                    IRIS_DIRTY_COMPUTE_MISC_BUFFER_FLUSHES)
125
126#define IRIS_ALL_DIRTY_FOR_RENDER (~IRIS_ALL_DIRTY_FOR_COMPUTE)
127
128/**
129 * Per-stage dirty flags.  When state changes, we flag some combination of
130 * these to indicate that particular GPU commands need to be re-emitted.
131 * Unlike the IRIS_DIRTY_* flags these are shader stage-specific and can be
132 * indexed by shifting the mask by the shader stage index.
133 *
134 * See iris_upload_render_state().
135 */
136#define IRIS_STAGE_DIRTY_SAMPLER_STATES_VS        (1ull << 0)
137#define IRIS_STAGE_DIRTY_SAMPLER_STATES_TCS       (1ull << 1)
138#define IRIS_STAGE_DIRTY_SAMPLER_STATES_TES       (1ull << 2)
139#define IRIS_STAGE_DIRTY_SAMPLER_STATES_GS        (1ull << 3)
140#define IRIS_STAGE_DIRTY_SAMPLER_STATES_PS        (1ull << 4)
141#define IRIS_STAGE_DIRTY_SAMPLER_STATES_CS        (1ull << 5)
142#define IRIS_STAGE_DIRTY_UNCOMPILED_VS            (1ull << 6)
143#define IRIS_STAGE_DIRTY_UNCOMPILED_TCS           (1ull << 7)
144#define IRIS_STAGE_DIRTY_UNCOMPILED_TES           (1ull << 8)
145#define IRIS_STAGE_DIRTY_UNCOMPILED_GS            (1ull << 9)
146#define IRIS_STAGE_DIRTY_UNCOMPILED_FS            (1ull << 10)
147#define IRIS_STAGE_DIRTY_UNCOMPILED_CS            (1ull << 11)
148#define IRIS_STAGE_DIRTY_VS                       (1ull << 12)
149#define IRIS_STAGE_DIRTY_TCS                      (1ull << 13)
150#define IRIS_STAGE_DIRTY_TES                      (1ull << 14)
151#define IRIS_STAGE_DIRTY_GS                       (1ull << 15)
152#define IRIS_STAGE_DIRTY_FS                       (1ull << 16)
153#define IRIS_STAGE_DIRTY_CS                       (1ull << 17)
154#define IRIS_SHIFT_FOR_STAGE_DIRTY_CONSTANTS      18
155#define IRIS_STAGE_DIRTY_CONSTANTS_VS             (1ull << 18)
156#define IRIS_STAGE_DIRTY_CONSTANTS_TCS            (1ull << 19)
157#define IRIS_STAGE_DIRTY_CONSTANTS_TES            (1ull << 20)
158#define IRIS_STAGE_DIRTY_CONSTANTS_GS             (1ull << 21)
159#define IRIS_STAGE_DIRTY_CONSTANTS_FS             (1ull << 22)
160#define IRIS_STAGE_DIRTY_CONSTANTS_CS             (1ull << 23)
161#define IRIS_SHIFT_FOR_STAGE_DIRTY_BINDINGS       24
162#define IRIS_STAGE_DIRTY_BINDINGS_VS              (1ull << 24)
163#define IRIS_STAGE_DIRTY_BINDINGS_TCS             (1ull << 25)
164#define IRIS_STAGE_DIRTY_BINDINGS_TES             (1ull << 26)
165#define IRIS_STAGE_DIRTY_BINDINGS_GS              (1ull << 27)
166#define IRIS_STAGE_DIRTY_BINDINGS_FS              (1ull << 28)
167#define IRIS_STAGE_DIRTY_BINDINGS_CS              (1ull << 29)
168
169#define IRIS_ALL_STAGE_DIRTY_FOR_COMPUTE (IRIS_STAGE_DIRTY_CS | \
170                                          IRIS_STAGE_DIRTY_SAMPLER_STATES_CS | \
171                                          IRIS_STAGE_DIRTY_UNCOMPILED_CS |    \
172                                          IRIS_STAGE_DIRTY_CONSTANTS_CS |     \
173                                          IRIS_STAGE_DIRTY_BINDINGS_CS)
174
175#define IRIS_ALL_STAGE_DIRTY_FOR_RENDER (~IRIS_ALL_STAGE_DIRTY_FOR_COMPUTE)
176
177#define IRIS_ALL_STAGE_DIRTY_BINDINGS_FOR_RENDER (IRIS_STAGE_DIRTY_BINDINGS_VS  | \
178                                                  IRIS_STAGE_DIRTY_BINDINGS_TCS | \
179                                                  IRIS_STAGE_DIRTY_BINDINGS_TES | \
180                                                  IRIS_STAGE_DIRTY_BINDINGS_GS  | \
181                                                  IRIS_STAGE_DIRTY_BINDINGS_FS)
182
183#define IRIS_ALL_STAGE_DIRTY_BINDINGS (IRIS_ALL_STAGE_DIRTY_BINDINGS_FOR_RENDER | \
184                                       IRIS_STAGE_DIRTY_BINDINGS_CS)
185
186/**
187 * Non-orthogonal state (NOS) dependency flags.
188 *
189 * Shader programs may depend on non-orthogonal state.  These flags are
190 * used to indicate that a shader's key depends on the state provided by
191 * a certain Gallium CSO.  Changing any CSOs marked as a dependency will
192 * cause the driver to re-compute the shader key, possibly triggering a
193 * shader recompile.
194 */
195enum iris_nos_dep {
196   IRIS_NOS_FRAMEBUFFER,
197   IRIS_NOS_DEPTH_STENCIL_ALPHA,
198   IRIS_NOS_RASTERIZER,
199   IRIS_NOS_BLEND,
200   IRIS_NOS_LAST_VUE_MAP,
201
202   IRIS_NOS_COUNT,
203};
204
205/** @{
206 *
207 * Program cache keys for state based recompiles.
208 */
209
210/* Provide explicit padding for each member, to ensure that the compiler
211 * initializes every bit in the shader cache keys.  The keys will be compared
212 * with memcmp.
213 */
214PRAGMA_DIAGNOSTIC_PUSH
215PRAGMA_DIAGNOSTIC_ERROR(-Wpadded)
216
217/**
218 * Note, we need to take care to have padding explicitly declared
219 * for key since we will directly memcmp the whole struct.
220 */
221struct iris_base_prog_key {
222   unsigned program_string_id;
223   bool limit_trig_input_range;
224   unsigned padding:24;
225};
226
227struct iris_vue_prog_key {
228   struct iris_base_prog_key base;
229
230   unsigned nr_userclip_plane_consts:4;
231   unsigned padding:28;
232};
233
234struct iris_vs_prog_key {
235   struct iris_vue_prog_key vue;
236};
237
238struct iris_tcs_prog_key {
239   struct iris_vue_prog_key vue;
240
241   enum tess_primitive_mode _tes_primitive_mode;
242
243   uint8_t input_vertices;
244
245   bool quads_workaround;
246   unsigned padding:16;
247
248   /** A bitfield of per-patch outputs written. */
249   uint32_t patch_outputs_written;
250
251   /** A bitfield of per-vertex outputs written. */
252   uint64_t outputs_written;
253};
254
255struct iris_tes_prog_key {
256   struct iris_vue_prog_key vue;
257
258   /** A bitfield of per-patch inputs read. */
259   uint32_t patch_inputs_read;
260
261   /** A bitfield of per-vertex inputs read. */
262   uint64_t inputs_read;
263};
264
265struct iris_gs_prog_key {
266   struct iris_vue_prog_key vue;
267};
268
269struct iris_fs_prog_key {
270   struct iris_base_prog_key base;
271
272   uint64_t input_slots_valid;
273   uint8_t color_outputs_valid;
274
275   unsigned nr_color_regions:5;
276   bool flat_shade:1;
277   bool alpha_test_replicate_alpha:1;
278   bool alpha_to_coverage:1;
279   bool clamp_fragment_color:1;
280   bool persample_interp:1;
281   bool multisample_fbo:1;
282   bool force_dual_color_blend:1;
283   bool coherent_fb_fetch:1;
284   uint64_t padding:43;
285};
286
287struct iris_cs_prog_key {
288   struct iris_base_prog_key base;
289};
290
291union iris_any_prog_key {
292   struct iris_base_prog_key base;
293   struct iris_vue_prog_key vue;
294   struct iris_vs_prog_key vs;
295   struct iris_tcs_prog_key tcs;
296   struct iris_tes_prog_key tes;
297   struct iris_gs_prog_key gs;
298   struct iris_fs_prog_key fs;
299   struct iris_cs_prog_key cs;
300};
301
302/* Restore the pack alignment to default. */
303PRAGMA_DIAGNOSTIC_POP
304
305/** @} */
306
307struct iris_depth_stencil_alpha_state;
308
309/**
310 * Cache IDs for the in-memory program cache (ice->shaders.cache).
311 */
312enum iris_program_cache_id {
313   IRIS_CACHE_VS  = MESA_SHADER_VERTEX,
314   IRIS_CACHE_TCS = MESA_SHADER_TESS_CTRL,
315   IRIS_CACHE_TES = MESA_SHADER_TESS_EVAL,
316   IRIS_CACHE_GS  = MESA_SHADER_GEOMETRY,
317   IRIS_CACHE_FS  = MESA_SHADER_FRAGMENT,
318   IRIS_CACHE_CS  = MESA_SHADER_COMPUTE,
319   IRIS_CACHE_BLORP,
320};
321
322/** @{
323 *
324 * Defines for PIPE_CONTROL operations, which trigger cache flushes,
325 * synchronization, pipelined memory writes, and so on.
326 *
327 * The bits here are not the actual hardware values.  The actual fields
328 * move between various generations, so we just have flags for each
329 * potential operation, and use genxml to encode the actual packet.
330 */
331enum pipe_control_flags
332{
333   PIPE_CONTROL_FLUSH_LLC                       = (1 << 1),
334   PIPE_CONTROL_LRI_POST_SYNC_OP                = (1 << 2),
335   PIPE_CONTROL_STORE_DATA_INDEX                = (1 << 3),
336   PIPE_CONTROL_CS_STALL                        = (1 << 4),
337   PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET     = (1 << 5),
338   PIPE_CONTROL_SYNC_GFDT                       = (1 << 6),
339   PIPE_CONTROL_TLB_INVALIDATE                  = (1 << 7),
340   PIPE_CONTROL_MEDIA_STATE_CLEAR               = (1 << 8),
341   PIPE_CONTROL_WRITE_IMMEDIATE                 = (1 << 9),
342   PIPE_CONTROL_WRITE_DEPTH_COUNT               = (1 << 10),
343   PIPE_CONTROL_WRITE_TIMESTAMP                 = (1 << 11),
344   PIPE_CONTROL_DEPTH_STALL                     = (1 << 12),
345   PIPE_CONTROL_RENDER_TARGET_FLUSH             = (1 << 13),
346   PIPE_CONTROL_INSTRUCTION_INVALIDATE          = (1 << 14),
347   PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE        = (1 << 15),
348   PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE = (1 << 16),
349   PIPE_CONTROL_NOTIFY_ENABLE                   = (1 << 17),
350   PIPE_CONTROL_FLUSH_ENABLE                    = (1 << 18),
351   PIPE_CONTROL_DATA_CACHE_FLUSH                = (1 << 19),
352   PIPE_CONTROL_VF_CACHE_INVALIDATE             = (1 << 20),
353   PIPE_CONTROL_CONST_CACHE_INVALIDATE          = (1 << 21),
354   PIPE_CONTROL_STATE_CACHE_INVALIDATE          = (1 << 22),
355   PIPE_CONTROL_STALL_AT_SCOREBOARD             = (1 << 23),
356   PIPE_CONTROL_DEPTH_CACHE_FLUSH               = (1 << 24),
357   PIPE_CONTROL_TILE_CACHE_FLUSH                = (1 << 25),
358   PIPE_CONTROL_FLUSH_HDC                       = (1 << 26),
359   PIPE_CONTROL_PSS_STALL_SYNC                  = (1 << 27),
360   PIPE_CONTROL_L3_READ_ONLY_CACHE_INVALIDATE   = (1 << 28),
361};
362
363#define PIPE_CONTROL_CACHE_FLUSH_BITS \
364   (PIPE_CONTROL_DEPTH_CACHE_FLUSH |  \
365    PIPE_CONTROL_DATA_CACHE_FLUSH |   \
366    PIPE_CONTROL_TILE_CACHE_FLUSH |   \
367    PIPE_CONTROL_FLUSH_HDC | \
368    PIPE_CONTROL_RENDER_TARGET_FLUSH)
369
370#define PIPE_CONTROL_CACHE_INVALIDATE_BITS  \
371   (PIPE_CONTROL_STATE_CACHE_INVALIDATE |   \
372    PIPE_CONTROL_CONST_CACHE_INVALIDATE |   \
373    PIPE_CONTROL_VF_CACHE_INVALIDATE |      \
374    PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | \
375    PIPE_CONTROL_INSTRUCTION_INVALIDATE)
376
377#define PIPE_CONTROL_L3_RO_INVALIDATE_BITS       \
378   (PIPE_CONTROL_L3_READ_ONLY_CACHE_INVALIDATE | \
379    PIPE_CONTROL_CONST_CACHE_INVALIDATE)
380
381enum iris_predicate_state {
382   /* The first two states are used if we can determine whether to draw
383    * without having to look at the values in the query object buffer. This
384    * will happen if there is no conditional render in progress, if the query
385    * object is already completed or if something else has already added
386    * samples to the preliminary result.
387    */
388   IRIS_PREDICATE_STATE_RENDER,
389   IRIS_PREDICATE_STATE_DONT_RENDER,
390
391   /* In this case whether to draw or not depends on the result of an
392    * MI_PREDICATE command so the predicate enable bit needs to be checked.
393    */
394   IRIS_PREDICATE_STATE_USE_BIT,
395};
396
397/** @} */
398
399/**
400 * An uncompiled, API-facing shader.  This is the Gallium CSO for shaders.
401 * It primarily contains the NIR for the shader.
402 *
403 * Each API-facing shader can be compiled into multiple shader variants,
404 * based on non-orthogonal state dependencies, recorded in the shader key.
405 *
406 * See iris_compiled_shader, which represents a compiled shader variant.
407 */
408struct iris_uncompiled_shader {
409   struct pipe_reference ref;
410
411   /**
412    * NIR for the shader.
413    *
414    * Even for shaders that originate as TGSI, this pointer will be non-NULL.
415    */
416   struct nir_shader *nir;
417
418   struct pipe_stream_output_info stream_output;
419
420   /* A SHA1 of the serialized NIR for the disk cache. */
421   unsigned char nir_sha1[20];
422
423   unsigned program_id;
424
425   /** Bitfield of (1 << IRIS_NOS_*) flags. */
426   unsigned nos;
427
428   /** Have any shader variants been compiled yet? */
429   bool compiled_once;
430
431   /* Whether shader uses atomic operations. */
432   bool uses_atomic_load_store;
433
434   /** Size (in bytes) of the kernel input data */
435   unsigned kernel_input_size;
436
437   /** Size (in bytes) of the local (shared) data passed as kernel inputs */
438   unsigned kernel_shared_size;
439
440   /** List of iris_compiled_shader variants */
441   struct list_head variants;
442
443   /** Lock for the variants list */
444   simple_mtx_t lock;
445
446   /** For parallel shader compiles */
447   struct util_queue_fence ready;
448};
449
450enum iris_surface_group {
451   IRIS_SURFACE_GROUP_RENDER_TARGET,
452   IRIS_SURFACE_GROUP_RENDER_TARGET_READ,
453   IRIS_SURFACE_GROUP_CS_WORK_GROUPS,
454   IRIS_SURFACE_GROUP_TEXTURE,
455   IRIS_SURFACE_GROUP_IMAGE,
456   IRIS_SURFACE_GROUP_UBO,
457   IRIS_SURFACE_GROUP_SSBO,
458
459   IRIS_SURFACE_GROUP_COUNT,
460};
461
462enum {
463   /* Invalid value for a binding table index. */
464   IRIS_SURFACE_NOT_USED = 0xa0a0a0a0,
465};
466
467struct iris_binding_table {
468   uint32_t size_bytes;
469
470   /** Number of surfaces in each group, before compacting. */
471   uint32_t sizes[IRIS_SURFACE_GROUP_COUNT];
472
473   /** Initial offset of each group. */
474   uint32_t offsets[IRIS_SURFACE_GROUP_COUNT];
475
476   /** Mask of surfaces used in each group. */
477   uint64_t used_mask[IRIS_SURFACE_GROUP_COUNT];
478};
479
480/**
481 * A compiled shader variant, containing a pointer to the GPU assembly,
482 * as well as program data and other packets needed by state upload.
483 *
484 * There can be several iris_compiled_shader variants per API-level shader
485 * (iris_uncompiled_shader), due to state-based recompiles (brw_*_prog_key).
486 */
487struct iris_compiled_shader {
488   struct pipe_reference ref;
489
490   /** Link in the iris_uncompiled_shader::variants list */
491   struct list_head link;
492
493   /** Key for this variant (but not for BLORP programs) */
494   union iris_any_prog_key key;
495
496   /**
497    * Is the variant fully compiled and ready?
498    *
499    * Variants are added to \c iris_uncompiled_shader::variants before
500    * compilation actually occurs.  This signals that compilation has
501    * completed.
502    */
503   struct util_queue_fence ready;
504
505   /** Variant is ready, but compilation failed. */
506   bool compilation_failed;
507
508   /** Reference to the uploaded assembly. */
509   struct iris_state_ref assembly;
510
511   /** Pointer to the assembly in the BO's map. */
512   void *map;
513
514   /** The program data (owned by the program cache hash table) */
515   struct brw_stage_prog_data *prog_data;
516
517   /** A list of system values to be uploaded as uniforms. */
518   enum brw_param_builtin *system_values;
519   unsigned num_system_values;
520
521   /** Size (in bytes) of the kernel input data */
522   unsigned kernel_input_size;
523
524   /** Number of constbufs expected by the shader. */
525   unsigned num_cbufs;
526
527   /**
528    * Derived 3DSTATE_STREAMOUT and 3DSTATE_SO_DECL_LIST packets
529    * (the VUE-based information for transform feedback outputs).
530    */
531   uint32_t *streamout;
532
533   struct iris_binding_table bt;
534
535   /**
536    * Shader packets and other data derived from prog_data.  These must be
537    * completely determined from prog_data.
538    */
539   uint8_t derived_data[0];
540};
541
542/**
543 * API context state that is replicated per shader stage.
544 */
545struct iris_shader_state {
546   /** Uniform Buffers */
547   struct pipe_shader_buffer constbuf[PIPE_MAX_CONSTANT_BUFFERS];
548   struct iris_state_ref constbuf_surf_state[PIPE_MAX_CONSTANT_BUFFERS];
549
550   bool sysvals_need_upload;
551
552   /** Shader Storage Buffers */
553   struct pipe_shader_buffer ssbo[PIPE_MAX_SHADER_BUFFERS];
554   struct iris_state_ref ssbo_surf_state[PIPE_MAX_SHADER_BUFFERS];
555
556   /** Shader Storage Images (image load store) */
557   struct iris_image_view image[PIPE_MAX_SHADER_IMAGES];
558
559   struct iris_state_ref sampler_table;
560   struct iris_sampler_state *samplers[IRIS_MAX_TEXTURE_SAMPLERS];
561   struct iris_sampler_view *textures[IRIS_MAX_TEXTURE_SAMPLERS];
562
563   /** Bitfield of which constant buffers are bound (non-null). */
564   uint32_t bound_cbufs;
565   uint32_t dirty_cbufs;
566
567   /** Bitfield of which image views are bound (non-null). */
568   uint32_t bound_image_views;
569
570   /** Bitfield of which sampler views are bound (non-null). */
571   uint32_t bound_sampler_views;
572
573   /** Bitfield of which shader storage buffers are bound (non-null). */
574   uint32_t bound_ssbos;
575
576   /** Bitfield of which shader storage buffers are writable. */
577   uint32_t writable_ssbos;
578};
579
580/**
581 * Gallium CSO for stream output (transform feedback) targets.
582 */
583struct iris_stream_output_target {
584   struct pipe_stream_output_target base;
585
586   /** Storage holding the offset where we're writing in the buffer */
587   struct iris_state_ref offset;
588
589   /** Stride (bytes-per-vertex) during this transform feedback operation */
590   uint16_t stride;
591
592   /** Does the next 3DSTATE_SO_BUFFER need to zero the offsets? */
593   bool zero_offset;
594};
595
596/**
597 * The API context (derived from pipe_context).
598 *
599 * Most driver state is tracked here.
600 */
601struct iris_context {
602   struct pipe_context ctx;
603   struct threaded_context *thrctx;
604
605   /** A debug callback for KHR_debug output. */
606   struct util_debug_callback dbg;
607
608   /** A device reset status callback for notifying that the GPU is hosed. */
609   struct pipe_device_reset_callback reset;
610
611   /** A set of dmabuf resources dirtied beyond their default aux-states. */
612   struct set *dirty_dmabufs;
613
614   /** Slab allocator for iris_transfer_map objects. */
615   struct slab_child_pool transfer_pool;
616
617   /** Slab allocator for threaded_context's iris_transfer_map objects */
618   struct slab_child_pool transfer_pool_unsync;
619
620   struct blorp_context blorp;
621
622   struct iris_batch batches[IRIS_BATCH_COUNT];
623
624   struct u_upload_mgr *query_buffer_uploader;
625
626   struct intel_ds_device ds;
627
628   struct {
629      struct {
630         /**
631          * Either the value of BaseVertex for indexed draw calls or the value
632          * of the argument <first> for non-indexed draw calls.
633          */
634         int firstvertex;
635         int baseinstance;
636      } params;
637
638      /**
639       * Are the above values the ones stored in the draw_params buffer?
640       * If so, we can compare them against new values to see if anything
641       * changed.  If not, we need to assume they changed.
642       */
643      bool params_valid;
644
645      /**
646       * Resource and offset that stores draw_parameters from the indirect
647       * buffer or to the buffer that stures the previous values for non
648       * indirect draws.
649       */
650      struct iris_state_ref draw_params;
651
652      struct {
653         /**
654          * The value of DrawID. This always comes in from it's own vertex
655          * buffer since it's not part of the indirect draw parameters.
656          */
657         int drawid;
658
659         /**
660          * Stores if an indexed or non-indexed draw (~0/0). Useful to
661          * calculate BaseVertex as an AND of firstvertex and is_indexed_draw.
662          */
663         int is_indexed_draw;
664      } derived_params;
665
666      /**
667       * Resource and offset used for GL_ARB_shader_draw_parameters which
668       * contains parameters that are not present in the indirect buffer as
669       * drawid and is_indexed_draw. They will go in their own vertex element.
670       */
671      struct iris_state_ref derived_draw_params;
672   } draw;
673
674   struct {
675      struct iris_uncompiled_shader *uncompiled[MESA_SHADER_STAGES];
676      struct iris_compiled_shader *prog[MESA_SHADER_STAGES];
677      struct iris_compiled_shader *last_vue_shader;
678      struct {
679         unsigned size[4];
680         unsigned entries[4];
681         unsigned start[4];
682         bool constrained;
683      } urb;
684
685      /** Uploader for shader assembly from the driver thread */
686      struct u_upload_mgr *uploader_driver;
687      /** Uploader for shader assembly from the threaded context */
688      struct u_upload_mgr *uploader_unsync;
689      struct hash_table *cache;
690
691      /** Is a GS or TES outputting points or lines? */
692      bool output_topology_is_points_or_lines;
693
694      /**
695       * Scratch buffers for various sizes and stages.
696       *
697       * Indexed by the "Per-Thread Scratch Space" field's 4-bit encoding,
698       * and shader stage.
699       */
700      struct iris_bo *scratch_bos[1 << 4][MESA_SHADER_STAGES];
701
702      /**
703       * Scratch buffer surface states on Gfx12.5+
704       */
705      struct iris_state_ref scratch_surfs[1 << 4];
706   } shaders;
707
708   struct intel_perf_context *perf_ctx;
709
710   /** Frame number for debug prints */
711   uint32_t frame;
712
713   struct {
714      uint64_t dirty;
715      uint64_t stage_dirty;
716      uint64_t stage_dirty_for_nos[IRIS_NOS_COUNT];
717
718      unsigned num_viewports;
719      unsigned sample_mask;
720      struct iris_blend_state *cso_blend;
721      struct iris_rasterizer_state *cso_rast;
722      struct iris_depth_stencil_alpha_state *cso_zsa;
723      struct iris_vertex_element_state *cso_vertex_elements;
724      struct pipe_blend_color blend_color;
725      struct pipe_poly_stipple poly_stipple;
726      struct pipe_viewport_state viewports[IRIS_MAX_VIEWPORTS];
727      struct pipe_scissor_state scissors[IRIS_MAX_VIEWPORTS];
728      struct pipe_stencil_ref stencil_ref;
729      struct pipe_framebuffer_state framebuffer;
730      struct pipe_clip_state clip_planes;
731
732      float default_outer_level[4];
733      float default_inner_level[2];
734
735      /** Bitfield of which vertex buffers are bound (non-null). */
736      uint64_t bound_vertex_buffers;
737
738      uint8_t patch_vertices;
739      bool primitive_restart;
740      unsigned cut_index;
741      enum pipe_prim_type prim_mode:8;
742      bool prim_is_points_or_lines;
743      uint8_t vertices_per_patch;
744
745      bool window_space_position;
746
747      /** The last compute group size */
748      uint32_t last_block[3];
749
750      /** The last compute grid size */
751      uint32_t last_grid[3];
752      /** The last compute grid dimensions */
753      uint32_t last_grid_dim;
754      /** Reference to the BO containing the compute grid size */
755      struct iris_state_ref grid_size;
756      /** Reference to the SURFACE_STATE for the compute grid resource */
757      struct iris_state_ref grid_surf_state;
758
759      /**
760       * Array of aux usages for drawing, altered to account for any
761       * self-dependencies from resources bound for sampling and rendering.
762       */
763      enum isl_aux_usage draw_aux_usage[BRW_MAX_DRAW_BUFFERS];
764
765      /** Aux usage of the fb's depth buffer (which may or may not exist). */
766      enum isl_aux_usage hiz_usage;
767
768      enum intel_urb_deref_block_size urb_deref_block_size;
769
770      /** Are depth writes enabled?  (Depth buffer may or may not exist.) */
771      bool depth_writes_enabled;
772
773      /** Are stencil writes enabled?  (Stencil buffer may or may not exist.) */
774      bool stencil_writes_enabled;
775
776      /** GenX-specific current state */
777      struct iris_genx_state *genx;
778
779      struct iris_shader_state shaders[MESA_SHADER_STAGES];
780
781      /** Do vertex shader uses shader draw parameters ? */
782      bool vs_uses_draw_params;
783      bool vs_uses_derived_draw_params;
784      bool vs_needs_sgvs_element;
785
786      /** Do vertex shader uses edge flag ? */
787      bool vs_needs_edge_flag;
788
789      /** Do any samplers need border color?  One bit per shader stage. */
790      uint8_t need_border_colors;
791
792      /** Global resource bindings */
793      struct pipe_resource *global_bindings[IRIS_MAX_GLOBAL_BINDINGS];
794
795      struct pipe_stream_output_target *so_target[PIPE_MAX_SO_BUFFERS];
796      bool streamout_active;
797
798      bool statistics_counters_enabled;
799
800      /** Current conditional rendering mode */
801      enum iris_predicate_state predicate;
802
803      /**
804       * Query BO with a MI_PREDICATE_RESULT snapshot calculated on the
805       * render context that needs to be uploaded to the compute context.
806       */
807      struct iris_bo *compute_predicate;
808
809      /** Is a PIPE_QUERY_PRIMITIVES_GENERATED query active? */
810      bool prims_generated_query_active;
811
812      /** 3DSTATE_STREAMOUT and 3DSTATE_SO_DECL_LIST packets */
813      uint32_t *streamout;
814
815      /** The SURFACE_STATE for a 1x1x1 null surface. */
816      struct iris_state_ref unbound_tex;
817
818      /** The SURFACE_STATE for a framebuffer-sized null surface. */
819      struct iris_state_ref null_fb;
820
821      struct u_upload_mgr *surface_uploader;
822      struct u_upload_mgr *bindless_uploader;
823      struct u_upload_mgr *dynamic_uploader;
824
825      struct iris_binder binder;
826
827      /** The high 16-bits of the last VBO/index buffer addresses */
828      uint16_t last_vbo_high_bits[33];
829      uint16_t last_index_bo_high_bits;
830
831      /**
832       * Resources containing streamed state which our render context
833       * currently points to.  Used to re-add these to the validation
834       * list when we start a new batch and haven't resubmitted commands.
835       */
836      struct {
837         struct pipe_resource *cc_vp;
838         struct pipe_resource *sf_cl_vp;
839         struct pipe_resource *color_calc;
840         struct pipe_resource *scissor;
841         struct pipe_resource *blend;
842         struct pipe_resource *index_buffer;
843         struct pipe_resource *cs_thread_ids;
844         struct pipe_resource *cs_desc;
845      } last_res;
846
847      /** Records the size of variable-length state for INTEL_DEBUG=bat */
848      struct hash_table_u64 *sizes;
849
850      /** Last rendering scale argument provided to genX(emit_hashing_mode). */
851      unsigned current_hash_scale;
852
853      /** Resource holding the pixel pipe hashing tables. */
854      struct pipe_resource *pixel_hashing_tables;
855   } state;
856};
857
858#define perf_debug(dbg, ...) do {                      \
859   if (INTEL_DEBUG(DEBUG_PERF))                        \
860      dbg_printf(__VA_ARGS__);                         \
861   if (unlikely(dbg))                                  \
862      util_debug_message(dbg, PERF_INFO, __VA_ARGS__); \
863} while(0)
864
865struct pipe_context *
866iris_create_context(struct pipe_screen *screen, void *priv, unsigned flags);
867void iris_destroy_context(struct pipe_context *ctx);
868
869void iris_lost_context_state(struct iris_batch *batch);
870
871void iris_mark_dirty_dmabuf(struct iris_context *ice,
872                            struct pipe_resource *res);
873void iris_flush_dirty_dmabufs(struct iris_context *ice);
874
875void iris_init_blit_functions(struct pipe_context *ctx);
876void iris_init_clear_functions(struct pipe_context *ctx);
877void iris_init_program_functions(struct pipe_context *ctx);
878void iris_init_screen_program_functions(struct pipe_screen *pscreen);
879void iris_init_resource_functions(struct pipe_context *ctx);
880void iris_init_perfquery_functions(struct pipe_context *ctx);
881void iris_update_compiled_shaders(struct iris_context *ice);
882void iris_update_compiled_compute_shader(struct iris_context *ice);
883void iris_fill_cs_push_const_buffer(struct brw_cs_prog_data *cs_prog_data,
884                                    unsigned threads,
885                                    uint32_t *dst);
886
887
888/* iris_blit.c */
889void iris_blorp_surf_for_resource(struct isl_device *isl_dev,
890                                  struct blorp_surf *surf,
891                                  struct pipe_resource *p_res,
892                                  enum isl_aux_usage aux_usage,
893                                  unsigned level,
894                                  bool is_render_target);
895void iris_copy_region(struct blorp_context *blorp,
896                      struct iris_batch *batch,
897                      struct pipe_resource *dst,
898                      unsigned dst_level,
899                      unsigned dstx, unsigned dsty, unsigned dstz,
900                      struct pipe_resource *src,
901                      unsigned src_level,
902                      const struct pipe_box *src_box);
903
904static inline enum blorp_batch_flags
905iris_blorp_flags_for_batch(struct iris_batch *batch)
906{
907   if (batch->name == IRIS_BATCH_COMPUTE)
908      return BLORP_BATCH_USE_COMPUTE;
909
910   if (batch->name == IRIS_BATCH_BLITTER)
911      return BLORP_BATCH_USE_BLITTER;
912
913   return 0;
914}
915
916/* iris_draw.c */
917
918void iris_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info,
919                   unsigned drawid_offset,
920                   const struct pipe_draw_indirect_info *indirect,
921                   const struct pipe_draw_start_count_bias *draws,
922                   unsigned num_draws);
923void iris_launch_grid(struct pipe_context *, const struct pipe_grid_info *);
924
925/* iris_pipe_control.c */
926
927void iris_emit_pipe_control_flush(struct iris_batch *batch,
928                                  const char *reason, uint32_t flags);
929void iris_emit_pipe_control_write(struct iris_batch *batch,
930                                  const char *reason, uint32_t flags,
931                                  struct iris_bo *bo, uint32_t offset,
932                                  uint64_t imm);
933void iris_emit_end_of_pipe_sync(struct iris_batch *batch,
934                                const char *reason, uint32_t flags);
935void iris_emit_buffer_barrier_for(struct iris_batch *batch,
936                                  struct iris_bo *bo,
937                                  enum iris_domain access);
938void iris_flush_all_caches(struct iris_batch *batch);
939
940#define iris_handle_always_flush_cache(batch) \
941   if (unlikely(batch->screen->driconf.always_flush_cache)) \
942      iris_flush_all_caches(batch);
943
944void iris_init_flush_functions(struct pipe_context *ctx);
945
946/* iris_program.c */
947void iris_upload_ubo_ssbo_surf_state(struct iris_context *ice,
948                                     struct pipe_shader_buffer *buf,
949                                     struct iris_state_ref *surf_state,
950                                     isl_surf_usage_flags_t usage);
951const struct shader_info *iris_get_shader_info(const struct iris_context *ice,
952                                               gl_shader_stage stage);
953struct iris_bo *iris_get_scratch_space(struct iris_context *ice,
954                                       unsigned per_thread_scratch,
955                                       gl_shader_stage stage);
956const struct iris_state_ref *iris_get_scratch_surf(struct iris_context *ice,
957                                                   unsigned per_thread_scratch);
958uint32_t iris_group_index_to_bti(const struct iris_binding_table *bt,
959                                 enum iris_surface_group group,
960                                 uint32_t index);
961uint32_t iris_bti_to_group_index(const struct iris_binding_table *bt,
962                                 enum iris_surface_group group,
963                                 uint32_t bti);
964
965/* iris_disk_cache.c */
966
967void iris_disk_cache_store(struct disk_cache *cache,
968                           const struct iris_uncompiled_shader *ish,
969                           const struct iris_compiled_shader *shader,
970                           const void *prog_key,
971                           uint32_t prog_key_size);
972bool
973iris_disk_cache_retrieve(struct iris_screen *screen,
974                         struct u_upload_mgr *uploader,
975                         struct iris_uncompiled_shader *ish,
976                         struct iris_compiled_shader *shader,
977                         const void *prog_key,
978                         uint32_t prog_key_size);
979
980/* iris_program_cache.c */
981
982void iris_init_program_cache(struct iris_context *ice);
983void iris_destroy_program_cache(struct iris_context *ice);
984struct iris_compiled_shader *iris_find_cached_shader(struct iris_context *ice,
985                                                     enum iris_program_cache_id,
986                                                     uint32_t key_size,
987                                                     const void *key);
988
989struct iris_compiled_shader *iris_create_shader_variant(const struct iris_screen *,
990                                                        void *mem_ctx,
991                                                        enum iris_program_cache_id cache_id,
992                                                        uint32_t key_size,
993                                                        const void *key);
994
995void iris_finalize_program(struct iris_compiled_shader *shader,
996                           struct brw_stage_prog_data *prog_data,
997                           uint32_t *streamout,
998                           enum brw_param_builtin *system_values,
999                           unsigned num_system_values,
1000                           unsigned kernel_input_size,
1001                           unsigned num_cbufs,
1002                           const struct iris_binding_table *bt);
1003
1004void iris_upload_shader(struct iris_screen *screen,
1005                        struct iris_uncompiled_shader *,
1006                        struct iris_compiled_shader *,
1007                        struct hash_table *driver_ht,
1008                        struct u_upload_mgr *uploader,
1009                        enum iris_program_cache_id,
1010                        uint32_t key_size,
1011                        const void *key,
1012                        const void *assembly);
1013void iris_delete_shader_variant(struct iris_compiled_shader *shader);
1014
1015void iris_destroy_shader_state(struct pipe_context *ctx, void *state);
1016
1017static inline void
1018iris_uncompiled_shader_reference(struct pipe_context *ctx,
1019                                 struct iris_uncompiled_shader **dst,
1020                                 struct iris_uncompiled_shader *src)
1021{
1022   if (*dst == src)
1023      return;
1024
1025   struct iris_uncompiled_shader *old_dst = *dst;
1026
1027   if (pipe_reference(old_dst != NULL ? &old_dst->ref : NULL,
1028                      src != NULL ? &src->ref : NULL)) {
1029      iris_destroy_shader_state(ctx, *dst);
1030   }
1031
1032   *dst = src;
1033}
1034
1035static inline void
1036iris_shader_variant_reference(struct iris_compiled_shader **dst,
1037                              struct iris_compiled_shader *src)
1038{
1039   struct iris_compiled_shader *old_dst = *dst;
1040
1041   if (pipe_reference(old_dst ? &old_dst->ref: NULL, src ? &src->ref : NULL))
1042      iris_delete_shader_variant(old_dst);
1043
1044   *dst = src;
1045}
1046
1047bool iris_blorp_lookup_shader(struct blorp_batch *blorp_batch,
1048                              const void *key,
1049                              uint32_t key_size,
1050                              uint32_t *kernel_out,
1051                              void *prog_data_out);
1052bool iris_blorp_upload_shader(struct blorp_batch *blorp_batch, uint32_t stage,
1053                              const void *key, uint32_t key_size,
1054                              const void *kernel, uint32_t kernel_size,
1055                              const struct brw_stage_prog_data *prog_data,
1056                              uint32_t prog_data_size,
1057                              uint32_t *kernel_out,
1058                              void *prog_data_out);
1059
1060/* iris_resolve.c */
1061
1062void iris_predraw_resolve_inputs(struct iris_context *ice,
1063                                 struct iris_batch *batch,
1064                                 bool *draw_aux_buffer_disabled,
1065                                 gl_shader_stage stage,
1066                                 bool consider_framebuffer);
1067void iris_predraw_resolve_framebuffer(struct iris_context *ice,
1068                                      struct iris_batch *batch,
1069                                      bool *draw_aux_buffer_disabled);
1070void iris_predraw_flush_buffers(struct iris_context *ice,
1071                                struct iris_batch *batch,
1072                                gl_shader_stage stage);
1073void iris_postdraw_update_resolve_tracking(struct iris_context *ice,
1074                                           struct iris_batch *batch);
1075void iris_cache_flush_for_render(struct iris_batch *batch,
1076                                 struct iris_bo *bo,
1077                                 enum isl_aux_usage aux_usage);
1078int iris_get_driver_query_info(struct pipe_screen *pscreen, unsigned index,
1079                               struct pipe_driver_query_info *info);
1080int iris_get_driver_query_group_info(struct pipe_screen *pscreen,
1081                                     unsigned index,
1082                                     struct pipe_driver_query_group_info *info);
1083
1084/* iris_state.c */
1085void gfx9_toggle_preemption(struct iris_context *ice,
1086                            struct iris_batch *batch,
1087                            const struct pipe_draw_info *draw);
1088
1089
1090
1091#ifdef genX
1092#  include "iris_genx_protos.h"
1093#else
1094#  define genX(x) gfx4_##x
1095#  include "iris_genx_protos.h"
1096#  undef genX
1097#  define genX(x) gfx5_##x
1098#  include "iris_genx_protos.h"
1099#  undef genX
1100#  define genX(x) gfx6_##x
1101#  include "iris_genx_protos.h"
1102#  undef genX
1103#  define genX(x) gfx7_##x
1104#  include "iris_genx_protos.h"
1105#  undef genX
1106#  define genX(x) gfx75_##x
1107#  include "iris_genx_protos.h"
1108#  undef genX
1109#  define genX(x) gfx8_##x
1110#  include "iris_genx_protos.h"
1111#  undef genX
1112#  define genX(x) gfx9_##x
1113#  include "iris_genx_protos.h"
1114#  undef genX
1115#  define genX(x) gfx11_##x
1116#  include "iris_genx_protos.h"
1117#  undef genX
1118#  define genX(x) gfx12_##x
1119#  include "iris_genx_protos.h"
1120#  undef genX
1121#  define genX(x) gfx125_##x
1122#  include "iris_genx_protos.h"
1123#  undef genX
1124#endif
1125
1126#endif
1127