1/*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23#ifndef CROCUS_CONTEXT_H
24#define CROCUS_CONTEXT_H
25
26#include "pipe/p_context.h"
27#include "pipe/p_state.h"
28#include "util/u_debug.h"
29#include "util/u_threaded_context.h"
30#include "intel/blorp/blorp.h"
31#include "intel/dev/intel_debug.h"
32#include "intel/compiler/brw_compiler.h"
33#include "crocus_batch.h"
34#include "crocus_fence.h"
35#include "crocus_resource.h"
36#include "crocus_screen.h"
37#include "util/u_blitter.h"
38
39struct crocus_bo;
40struct crocus_context;
41struct blorp_batch;
42struct blorp_params;
43
44#define CROCUS_MAX_TEXTURE_BUFFER_SIZE (1 << 27)
45#define CROCUS_MAX_TEXTURE_SAMPLERS 32
46/* CROCUS_MAX_ABOS and CROCUS_MAX_SSBOS must be the same. */
47#define CROCUS_MAX_ABOS 16
48#define CROCUS_MAX_SSBOS 16
49#define CROCUS_MAX_VIEWPORTS 16
50#define CROCUS_MAX_CLIP_PLANES 8
51
52enum crocus_param_domain {
53   BRW_PARAM_DOMAIN_BUILTIN = 0,
54   BRW_PARAM_DOMAIN_IMAGE,
55};
56
57enum {
58   DRI_CONF_BO_REUSE_DISABLED,
59   DRI_CONF_BO_REUSE_ALL
60};
61
62#define BRW_PARAM(domain, val)   (BRW_PARAM_DOMAIN_##domain << 24 | (val))
63#define BRW_PARAM_DOMAIN(param)  ((uint32_t)(param) >> 24)
64#define BRW_PARAM_VALUE(param)   ((uint32_t)(param) & 0x00ffffff)
65#define BRW_PARAM_IMAGE(idx, offset) BRW_PARAM(IMAGE, ((idx) << 8) | (offset))
66#define BRW_PARAM_IMAGE_IDX(value)   (BRW_PARAM_VALUE(value) >> 8)
67#define BRW_PARAM_IMAGE_OFFSET(value)(BRW_PARAM_VALUE(value) & 0xf)
68
69/**
70 * Dirty flags.  When state changes, we flag some combination of these
71 * to indicate that particular GPU commands need to be re-emitted.
72 *
73 * Each bit typically corresponds to a single 3DSTATE_* command packet, but
74 * in rare cases they map to a group of related packets that need to be
75 * emitted together.
76 *
77 * See crocus_upload_render_state().
78 */
79#define CROCUS_DIRTY_COLOR_CALC_STATE         (1ull <<  0)
80#define CROCUS_DIRTY_POLYGON_STIPPLE          (1ull <<  1)
81#define CROCUS_DIRTY_CC_VIEWPORT              (1ull <<  2)
82#define CROCUS_DIRTY_SF_CL_VIEWPORT           (1ull <<  3)
83#define CROCUS_DIRTY_RASTER                   (1ull <<  4)
84#define CROCUS_DIRTY_CLIP                     (1ull <<  5)
85#define CROCUS_DIRTY_LINE_STIPPLE             (1ull <<  6)
86#define CROCUS_DIRTY_VERTEX_ELEMENTS          (1ull <<  7)
87#define CROCUS_DIRTY_VERTEX_BUFFERS           (1ull <<  8)
88#define CROCUS_DIRTY_DRAWING_RECTANGLE        (1ull <<  9)
89#define CROCUS_DIRTY_GEN6_URB                 (1ull << 10)
90#define CROCUS_DIRTY_DEPTH_BUFFER             (1ull << 11)
91#define CROCUS_DIRTY_WM                       (1ull << 12)
92#define CROCUS_DIRTY_SO_DECL_LIST             (1ull << 13)
93#define CROCUS_DIRTY_STREAMOUT                (1ull << 14)
94#define CROCUS_DIRTY_GEN4_CONSTANT_COLOR      (1ull << 15)
95#define CROCUS_DIRTY_GEN4_CURBE               (1ull << 16)
96#define CROCUS_DIRTY_GEN4_URB_FENCE           (1ull << 17)
97#define CROCUS_DIRTY_GEN5_PIPELINED_POINTERS  (1ull << 18)
98#define CROCUS_DIRTY_GEN5_BINDING_TABLE_POINTERS  (1ull << 19)
99#define CROCUS_DIRTY_GEN6_BLEND_STATE         (1ull << 20)
100#define CROCUS_DIRTY_GEN6_SCISSOR_RECT        (1ull << 21)
101#define CROCUS_DIRTY_GEN6_WM_DEPTH_STENCIL    (1ull << 22)
102#define CROCUS_DIRTY_GEN6_MULTISAMPLE         (1ull << 23)
103#define CROCUS_DIRTY_GEN6_SAMPLE_MASK         (1ull << 24)
104#define CROCUS_DIRTY_GEN7_SBE                 (1ull << 25)
105#define CROCUS_DIRTY_GEN7_L3_CONFIG           (1ull << 26)
106#define CROCUS_DIRTY_GEN7_SO_BUFFERS          (1ull << 27)
107#define CROCUS_DIRTY_GEN75_VF                 (1ull << 28)
108#define CROCUS_DIRTY_RENDER_RESOLVES_AND_FLUSHES  (1ull << 29)
109#define CROCUS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES (1ull << 30)
110#define CROCUS_DIRTY_VF_STATISTICS            (1ull << 31)
111#define CROCUS_DIRTY_GEN4_CLIP_PROG           (1ull << 32)
112#define CROCUS_DIRTY_GEN4_SF_PROG             (1ull << 33)
113#define CROCUS_DIRTY_GEN4_FF_GS_PROG          (1ull << 34)
114#define CROCUS_DIRTY_GEN6_SAMPLER_STATE_POINTERS (1ull << 35)
115#define CROCUS_DIRTY_GEN6_SVBI                (1ull << 36)
116#define CROCUS_DIRTY_GEN8_VF_TOPOLOGY         (1ull << 37)
117#define CROCUS_DIRTY_GEN8_PMA_FIX             (1ull << 38)
118#define CROCUS_DIRTY_GEN8_VF_SGVS             (1ull << 39)
119#define CROCUS_DIRTY_GEN8_PS_BLEND            (1ull << 40)
120
121#define CROCUS_ALL_DIRTY_FOR_COMPUTE (CROCUS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES)
122
123#define CROCUS_ALL_DIRTY_FOR_RENDER (~CROCUS_ALL_DIRTY_FOR_COMPUTE)
124
125/**
126 * Per-stage dirty flags.  When state changes, we flag some combination of
127 * these to indicate that particular GPU commands need to be re-emitted.
128 * Unlike the IRIS_DIRTY_* flags these are shader stage-specific and can be
129 * indexed by shifting the mask by the shader stage index.
130 *
131 * See crocus_upload_render_state().
132 */
133#define CROCUS_STAGE_DIRTY_SAMPLER_STATES_VS        (1ull << 0)
134#define CROCUS_STAGE_DIRTY_SAMPLER_STATES_TCS       (1ull << 1)
135#define CROCUS_STAGE_DIRTY_SAMPLER_STATES_TES       (1ull << 2)
136#define CROCUS_STAGE_DIRTY_SAMPLER_STATES_GS        (1ull << 3)
137#define CROCUS_STAGE_DIRTY_SAMPLER_STATES_PS        (1ull << 4)
138#define CROCUS_STAGE_DIRTY_SAMPLER_STATES_CS        (1ull << 5)
139#define CROCUS_STAGE_DIRTY_UNCOMPILED_VS            (1ull << 6)
140#define CROCUS_STAGE_DIRTY_UNCOMPILED_TCS           (1ull << 7)
141#define CROCUS_STAGE_DIRTY_UNCOMPILED_TES           (1ull << 8)
142#define CROCUS_STAGE_DIRTY_UNCOMPILED_GS            (1ull << 9)
143#define CROCUS_STAGE_DIRTY_UNCOMPILED_FS            (1ull << 10)
144#define CROCUS_STAGE_DIRTY_UNCOMPILED_CS            (1ull << 11)
145#define CROCUS_STAGE_DIRTY_VS                       (1ull << 12)
146#define CROCUS_STAGE_DIRTY_TCS                      (1ull << 13)
147#define CROCUS_STAGE_DIRTY_TES                      (1ull << 14)
148#define CROCUS_STAGE_DIRTY_GS                       (1ull << 15)
149#define CROCUS_STAGE_DIRTY_FS                       (1ull << 16)
150#define CROCUS_STAGE_DIRTY_CS                       (1ull << 17)
151#define CROCUS_SHIFT_FOR_STAGE_DIRTY_CONSTANTS      18
152#define CROCUS_STAGE_DIRTY_CONSTANTS_VS             (1ull << 18)
153#define CROCUS_STAGE_DIRTY_CONSTANTS_TCS            (1ull << 19)
154#define CROCUS_STAGE_DIRTY_CONSTANTS_TES            (1ull << 20)
155#define CROCUS_STAGE_DIRTY_CONSTANTS_GS             (1ull << 21)
156#define CROCUS_STAGE_DIRTY_CONSTANTS_FS             (1ull << 22)
157#define CROCUS_STAGE_DIRTY_CONSTANTS_CS             (1ull << 23)
158#define CROCUS_STAGE_DIRTY_BINDINGS_VS              (1ull << 24)
159#define CROCUS_STAGE_DIRTY_BINDINGS_TCS             (1ull << 25)
160#define CROCUS_STAGE_DIRTY_BINDINGS_TES             (1ull << 26)
161#define CROCUS_STAGE_DIRTY_BINDINGS_GS              (1ull << 27)
162#define CROCUS_STAGE_DIRTY_BINDINGS_FS              (1ull << 28)
163#define CROCUS_STAGE_DIRTY_BINDINGS_CS              (1ull << 29)
164
165#define CROCUS_ALL_STAGE_DIRTY_FOR_COMPUTE (CROCUS_STAGE_DIRTY_CS | \
166                                          CROCUS_STAGE_DIRTY_SAMPLER_STATES_CS | \
167                                          CROCUS_STAGE_DIRTY_UNCOMPILED_CS |    \
168                                          CROCUS_STAGE_DIRTY_CONSTANTS_CS |     \
169                                          CROCUS_STAGE_DIRTY_BINDINGS_CS)
170
171#define CROCUS_ALL_STAGE_DIRTY_FOR_RENDER (~CROCUS_ALL_STAGE_DIRTY_FOR_COMPUTE)
172
173#define CROCUS_ALL_STAGE_DIRTY_BINDINGS (CROCUS_STAGE_DIRTY_BINDINGS_VS  | \
174                                       CROCUS_STAGE_DIRTY_BINDINGS_TCS | \
175                                       CROCUS_STAGE_DIRTY_BINDINGS_TES | \
176                                       CROCUS_STAGE_DIRTY_BINDINGS_GS  | \
177                                       CROCUS_STAGE_DIRTY_BINDINGS_FS  | \
178                                       CROCUS_STAGE_DIRTY_BINDINGS_CS)
179
180#define CROCUS_RENDER_STAGE_DIRTY_CONSTANTS (CROCUS_STAGE_DIRTY_CONSTANTS_VS  | \
181                                             CROCUS_STAGE_DIRTY_CONSTANTS_TCS | \
182                                             CROCUS_STAGE_DIRTY_CONSTANTS_TES | \
183                                             CROCUS_STAGE_DIRTY_CONSTANTS_GS  | \
184                                             CROCUS_STAGE_DIRTY_CONSTANTS_FS)
185
186/**
187 * Non-orthogonal state (NOS) dependency flags.
188 *
189 * Shader programs may depend on non-orthogonal state.  These flags are
190 * used to indicate that a shader's key depends on the state provided by
191 * a certain Gallium CSO.  Changing any CSOs marked as a dependency will
192 * cause the driver to re-compute the shader key, possibly triggering a
193 * shader recompile.
194 */
195enum crocus_nos_dep {
196   CROCUS_NOS_FRAMEBUFFER,
197   CROCUS_NOS_DEPTH_STENCIL_ALPHA,
198   CROCUS_NOS_RASTERIZER,
199   CROCUS_NOS_BLEND,
200   CROCUS_NOS_LAST_VUE_MAP,
201   CROCUS_NOS_TEXTURES,
202   CROCUS_NOS_VERTEX_ELEMENTS,
203   CROCUS_NOS_COUNT,
204};
205
206struct crocus_depth_stencil_alpha_state;
207
208/**
209 * Cache IDs for the in-memory program cache (ice->shaders.cache).
210 */
211enum crocus_program_cache_id {
212   CROCUS_CACHE_VS  = MESA_SHADER_VERTEX,
213   CROCUS_CACHE_TCS = MESA_SHADER_TESS_CTRL,
214   CROCUS_CACHE_TES = MESA_SHADER_TESS_EVAL,
215   CROCUS_CACHE_GS  = MESA_SHADER_GEOMETRY,
216   CROCUS_CACHE_FS  = MESA_SHADER_FRAGMENT,
217   CROCUS_CACHE_CS  = MESA_SHADER_COMPUTE,
218   CROCUS_CACHE_BLORP,
219   CROCUS_CACHE_SF,
220   CROCUS_CACHE_CLIP,
221   CROCUS_CACHE_FF_GS,
222};
223
224/** @{
225 *
226 * Defines for PIPE_CONTROL operations, which trigger cache flushes,
227 * synchronization, pipelined memory writes, and so on.
228 *
229 * The bits here are not the actual hardware values.  The actual fields
230 * move between various generations, so we just have flags for each
231 * potential operation, and use genxml to encode the actual packet.
232 */
233enum pipe_control_flags
234{
235   PIPE_CONTROL_FLUSH_LLC                       = (1 << 1),
236   PIPE_CONTROL_LRI_POST_SYNC_OP                = (1 << 2),
237   PIPE_CONTROL_STORE_DATA_INDEX                = (1 << 3),
238   PIPE_CONTROL_CS_STALL                        = (1 << 4),
239   PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET     = (1 << 5),
240   PIPE_CONTROL_SYNC_GFDT                       = (1 << 6),
241   PIPE_CONTROL_TLB_INVALIDATE                  = (1 << 7),
242   PIPE_CONTROL_MEDIA_STATE_CLEAR               = (1 << 8),
243   PIPE_CONTROL_WRITE_IMMEDIATE                 = (1 << 9),
244   PIPE_CONTROL_WRITE_DEPTH_COUNT               = (1 << 10),
245   PIPE_CONTROL_WRITE_TIMESTAMP                 = (1 << 11),
246   PIPE_CONTROL_DEPTH_STALL                     = (1 << 12),
247   PIPE_CONTROL_RENDER_TARGET_FLUSH             = (1 << 13),
248   PIPE_CONTROL_INSTRUCTION_INVALIDATE          = (1 << 14),
249   PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE        = (1 << 15),
250   PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE = (1 << 16),
251   PIPE_CONTROL_NOTIFY_ENABLE                   = (1 << 17),
252   PIPE_CONTROL_FLUSH_ENABLE                    = (1 << 18),
253   PIPE_CONTROL_DATA_CACHE_FLUSH                = (1 << 19),
254   PIPE_CONTROL_VF_CACHE_INVALIDATE             = (1 << 20),
255   PIPE_CONTROL_CONST_CACHE_INVALIDATE          = (1 << 21),
256   PIPE_CONTROL_STATE_CACHE_INVALIDATE          = (1 << 22),
257   PIPE_CONTROL_STALL_AT_SCOREBOARD             = (1 << 23),
258   PIPE_CONTROL_DEPTH_CACHE_FLUSH               = (1 << 24),
259   PIPE_CONTROL_TILE_CACHE_FLUSH                = (1 << 25),
260};
261
262#define PIPE_CONTROL_CACHE_FLUSH_BITS           \
263   (PIPE_CONTROL_DEPTH_CACHE_FLUSH |            \
264    PIPE_CONTROL_DATA_CACHE_FLUSH |             \
265    PIPE_CONTROL_RENDER_TARGET_FLUSH)
266
267#define PIPE_CONTROL_CACHE_INVALIDATE_BITS      \
268   (PIPE_CONTROL_STATE_CACHE_INVALIDATE |       \
269    PIPE_CONTROL_CONST_CACHE_INVALIDATE |       \
270    PIPE_CONTROL_VF_CACHE_INVALIDATE |          \
271    PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |     \
272    PIPE_CONTROL_INSTRUCTION_INVALIDATE)
273
274enum crocus_predicate_state {
275   /* The first two states are used if we can determine whether to draw
276    * without having to look at the values in the query object buffer. This
277    * will happen if there is no conditional render in progress, if the query
278    * object is already completed or if something else has already added
279    * samples to the preliminary result.
280    */
281   CROCUS_PREDICATE_STATE_RENDER,
282   CROCUS_PREDICATE_STATE_DONT_RENDER,
283
284   /* In this case whether to draw or not depends on the result of an
285    * MI_PREDICATE command so the predicate enable bit needs to be checked.
286    */
287   CROCUS_PREDICATE_STATE_USE_BIT,
288   /* In this case, either MI_PREDICATE doesn't exist or we lack the
289    * necessary kernel features to use it.  Stall for the query result.
290    */
291   CROCUS_PREDICATE_STATE_STALL_FOR_QUERY,
292};
293
294/** @} */
295
296/**
297 * An uncompiled, API-facing shader.  This is the Gallium CSO for shaders.
298 * It primarily contains the NIR for the shader.
299 *
300 * Each API-facing shader can be compiled into multiple shader variants,
301 * based on non-orthogonal state dependencies, recorded in the shader key.
302 *
303 * See crocus_compiled_shader, which represents a compiled shader variant.
304 */
305struct crocus_uncompiled_shader {
306   struct nir_shader *nir;
307
308   struct pipe_stream_output_info stream_output;
309
310   /* A SHA1 of the serialized NIR for the disk cache. */
311   unsigned char nir_sha1[20];
312
313   unsigned program_id;
314
315   /** Bitfield of (1 << CROCUS_NOS_*) flags. */
316   unsigned nos;
317
318   /** Have any shader variants been compiled yet? */
319   bool compiled_once;
320
321   bool needs_edge_flag;
322
323   /** Constant data scraped from the shader by nir_opt_large_constants */
324   struct pipe_resource *const_data;
325
326   /** Surface state for const_data */
327   struct crocus_state_ref const_data_state;
328};
329
330enum crocus_surface_group {
331   CROCUS_SURFACE_GROUP_RENDER_TARGET,
332   CROCUS_SURFACE_GROUP_RENDER_TARGET_READ,
333   CROCUS_SURFACE_GROUP_SOL,
334   CROCUS_SURFACE_GROUP_CS_WORK_GROUPS,
335   CROCUS_SURFACE_GROUP_TEXTURE,
336   CROCUS_SURFACE_GROUP_TEXTURE_GATHER,
337   CROCUS_SURFACE_GROUP_IMAGE,
338   CROCUS_SURFACE_GROUP_UBO,
339   CROCUS_SURFACE_GROUP_SSBO,
340
341   CROCUS_SURFACE_GROUP_COUNT,
342};
343
344enum {
345   /* Invalid value for a binding table index. */
346   CROCUS_SURFACE_NOT_USED = 0xa0a0a0a0,
347};
348
349struct crocus_binding_table {
350   uint32_t size_bytes;
351
352   /** Number of surfaces in each group, before compacting. */
353   uint32_t sizes[CROCUS_SURFACE_GROUP_COUNT];
354
355   /** Initial offset of each group. */
356   uint32_t offsets[CROCUS_SURFACE_GROUP_COUNT];
357
358   /** Mask of surfaces used in each group. */
359   uint64_t used_mask[CROCUS_SURFACE_GROUP_COUNT];
360};
361
362/**
363 * A compiled shader variant, containing a pointer to the GPU assembly,
364 * as well as program data and other packets needed by state upload.
365 *
366 * There can be several crocus_compiled_shader variants per API-level shader
367 * (crocus_uncompiled_shader), due to state-based recompiles (brw_*_prog_key).
368 */
369struct crocus_compiled_shader {
370   /** Reference to the uploaded assembly. */
371   uint32_t offset;
372
373   /* asm size in map */
374   uint32_t map_size;
375
376   /** The program data (owned by the program cache hash table) */
377   struct brw_stage_prog_data *prog_data;
378   uint32_t prog_data_size;
379
380   /** A list of system values to be uploaded as uniforms. */
381   enum brw_param_builtin *system_values;
382   unsigned num_system_values;
383
384   /** Number of constbufs expected by the shader. */
385   unsigned num_cbufs;
386
387   /**
388    * Derived 3DSTATE_STREAMOUT and 3DSTATE_SO_DECL_LIST packets
389    * (the VUE-based information for transform feedback outputs).
390    */
391   uint32_t *streamout;
392
393   struct crocus_binding_table bt;
394
395   uint32_t bind_bo_offset;
396   uint32_t surf_offset[128];//TODO
397};
398
399/**
400 * API context state that is replicated per shader stage.
401 */
402struct crocus_shader_state {
403   /** Uniform Buffers */
404   struct pipe_constant_buffer constbufs[PIPE_MAX_CONSTANT_BUFFERS];
405
406   bool sysvals_need_upload;
407
408   /** Shader Storage Buffers */
409   struct pipe_shader_buffer ssbo[PIPE_MAX_SHADER_BUFFERS];
410
411   /** Shader Storage Images (image load store) */
412   struct crocus_image_view image[PIPE_MAX_SHADER_IMAGES];
413
414   struct crocus_sampler_state *samplers[CROCUS_MAX_TEXTURE_SAMPLERS];
415   struct crocus_sampler_view *textures[CROCUS_MAX_TEXTURE_SAMPLERS];
416
417   /** Bitfield of which constant buffers are bound (non-null). */
418   uint32_t bound_cbufs;
419
420   /** Bitfield of which image views are bound (non-null). */
421   uint32_t bound_image_views;
422
423   /** Bitfield of which sampler views are bound (non-null). */
424   uint32_t bound_sampler_views;
425
426   /** Bitfield of which shader storage buffers are bound (non-null). */
427   uint32_t bound_ssbos;
428
429   /** Bitfield of which shader storage buffers are writable. */
430   uint32_t writable_ssbos;
431
432   uint32_t sampler_offset;
433};
434
435/**
436 * The API context (derived from pipe_context).
437 *
438 * Most driver state is tracked here.
439 */
440struct crocus_context {
441   struct pipe_context ctx;
442   struct threaded_context *thrctx;
443
444   /** A debug callback for KHR_debug output. */
445   struct util_debug_callback dbg;
446
447   /** A device reset status callback for notifying that the GPU is hosed. */
448   struct pipe_device_reset_callback reset;
449
450   /** Slab allocator for crocus_transfer_map objects. */
451   struct slab_child_pool transfer_pool;
452
453   /** Slab allocator for threaded_context's crocus_transfer_map objects */
454   struct slab_child_pool transfer_pool_unsync;
455
456   struct blorp_context blorp;
457
458   int batch_count;
459   struct crocus_batch batches[CROCUS_BATCH_COUNT];
460
461   struct u_upload_mgr *query_buffer_uploader;
462
463   struct blitter_context *blitter;
464
465   struct {
466      struct {
467         /**
468          * Either the value of BaseVertex for indexed draw calls or the value
469          * of the argument <first> for non-indexed draw calls.
470          */
471         int firstvertex;
472         int baseinstance;
473      } params;
474
475      /**
476       * Are the above values the ones stored in the draw_params buffer?
477       * If so, we can compare them against new values to see if anything
478       * changed.  If not, we need to assume they changed.
479       */
480      bool params_valid;
481
482      /**
483       * Resource and offset that stores draw_parameters from the indirect
484       * buffer or to the buffer that stures the previous values for non
485       * indirect draws.
486       */
487      struct crocus_state_ref draw_params;
488
489      struct {
490         /**
491          * The value of DrawID. This always comes in from it's own vertex
492          * buffer since it's not part of the indirect draw parameters.
493          */
494         int drawid;
495
496         /**
497          * Stores if an indexed or non-indexed draw (~0/0). Useful to
498          * calculate BaseVertex as an AND of firstvertex and is_indexed_draw.
499          */
500         int is_indexed_draw;
501      } derived_params;
502
503      /**
504       * Resource and offset used for GL_ARB_shader_draw_parameters which
505       * contains parameters that are not present in the indirect buffer as
506       * drawid and is_indexed_draw. They will go in their own vertex element.
507       */
508      struct crocus_state_ref derived_draw_params;
509   } draw;
510
511   struct {
512      struct crocus_uncompiled_shader *uncompiled[MESA_SHADER_STAGES];
513      struct crocus_compiled_shader *prog[MESA_SHADER_STAGES];
514      struct brw_vue_map *last_vue_map;
515
516      struct crocus_bo *cache_bo;
517      uint32_t cache_next_offset;
518      void *cache_bo_map;
519      struct hash_table *cache;
520
521      unsigned urb_size;
522
523      /* gen 4/5 clip/sf progs */
524      struct crocus_compiled_shader *clip_prog;
525      struct crocus_compiled_shader *sf_prog;
526      /* gen4/5 prims, gen6 streamout */
527      struct crocus_compiled_shader *ff_gs_prog;
528      uint32_t clip_offset;
529      uint32_t sf_offset;
530      uint32_t wm_offset;
531      uint32_t vs_offset;
532      uint32_t gs_offset;
533      uint32_t cc_offset;
534
535      /** Is a GS or TES outputting points or lines? */
536      bool output_topology_is_points_or_lines;
537
538      /* Track last VS URB entry size */
539      unsigned last_vs_entry_size;
540
541      /**
542       * Scratch buffers for various sizes and stages.
543       *
544       * Indexed by the "Per-Thread Scratch Space" field's 4-bit encoding,
545       * and shader stage.
546       */
547      struct crocus_bo *scratch_bos[1 << 4][MESA_SHADER_STAGES];
548   } shaders;
549
550   struct {
551      struct crocus_query *query;
552      bool condition;
553      enum pipe_render_cond_flag mode;
554   } condition;
555
556   struct intel_perf_context *perf_ctx;
557
558   struct {
559      uint64_t dirty;
560      uint64_t stage_dirty;
561      uint64_t stage_dirty_for_nos[CROCUS_NOS_COUNT];
562
563      unsigned num_viewports;
564      unsigned sample_mask;
565      struct crocus_blend_state *cso_blend;
566      struct crocus_rasterizer_state *cso_rast;
567      struct crocus_depth_stencil_alpha_state *cso_zsa;
568      struct crocus_vertex_element_state *cso_vertex_elements;
569      struct pipe_blend_color blend_color;
570      struct pipe_poly_stipple poly_stipple;
571      struct pipe_viewport_state viewports[CROCUS_MAX_VIEWPORTS];
572      struct pipe_scissor_state scissors[CROCUS_MAX_VIEWPORTS];
573      struct pipe_stencil_ref stencil_ref;
574      struct pipe_framebuffer_state framebuffer;
575      struct pipe_clip_state clip_planes;
576
577      float default_outer_level[4];
578      float default_inner_level[2];
579
580      /** Bitfield of which vertex buffers are bound (non-null). */
581      uint32_t bound_vertex_buffers;
582      struct pipe_vertex_buffer vertex_buffers[16];
583      uint32_t vb_end[16];
584
585      bool primitive_restart;
586      unsigned cut_index;
587      enum pipe_prim_type reduced_prim_mode:8;
588      enum pipe_prim_type prim_mode:8;
589      bool prim_is_points_or_lines;
590      uint8_t vertices_per_patch;
591      uint8_t patch_vertices;
592
593      bool window_space_position;
594
595      /** The last compute group size */
596      uint32_t last_block[3];
597
598      /** The last compute grid size */
599      uint32_t last_grid[3];
600      /** Reference to the BO containing the compute grid size */
601      struct crocus_state_ref grid_size;
602
603      /**
604       * Array of aux usages for drawing, altered to account for any
605       * self-dependencies from resources bound for sampling and rendering.
606       */
607      enum isl_aux_usage draw_aux_usage[BRW_MAX_DRAW_BUFFERS];
608
609      /** Aux usage of the fb's depth buffer (which may or may not exist). */
610      enum isl_aux_usage hiz_usage;
611
612      /** Bitfield of whether color blending is enabled for RT[i] */
613      uint8_t blend_enables;
614
615      /** Are depth writes enabled?  (Depth buffer may or may not exist.) */
616      bool depth_writes_enabled;
617
618      /** Are stencil writes enabled?  (Stencil buffer may or may not exist.) */
619      bool stencil_writes_enabled;
620
621      /** GenX-specific current state */
622      struct crocus_genx_state *genx;
623
624      struct crocus_shader_state shaders[MESA_SHADER_STAGES];
625
626      /* track if geom shader is active for IVB GT2 workaround */
627      bool gs_enabled;
628      /** Do vertex shader uses shader draw parameters ? */
629      bool vs_uses_draw_params;
630      bool vs_uses_derived_draw_params;
631      bool vs_needs_sgvs_element;
632      bool vs_uses_vertexid;
633      bool vs_uses_instanceid;
634
635      /** Do vertex shader uses edge flag ? */
636      bool vs_needs_edge_flag;
637
638      struct pipe_stream_output_target *so_target[PIPE_MAX_SO_BUFFERS];
639      bool streamout_active;
640      int so_targets;
641
642      bool statistics_counters_enabled;
643
644      /** Current conditional rendering mode */
645      enum crocus_predicate_state predicate;
646      bool predicate_supported;
647
648      /**
649       * Query BO with a MI_PREDICATE_RESULT snapshot calculated on the
650       * render context that needs to be uploaded to the compute context.
651       */
652      struct crocus_bo *compute_predicate;
653
654      /** Is a PIPE_QUERY_PRIMITIVES_GENERATED query active? */
655      bool prims_generated_query_active;
656
657      /** 3DSTATE_STREAMOUT and 3DSTATE_SO_DECL_LIST packets */
658      uint32_t *streamout;
659
660      /**
661       * Resources containing streamed state which our render context
662       * currently points to.  Used to re-add these to the validation
663       * list when we start a new batch and haven't resubmitted commands.
664       */
665      struct {
666         struct pipe_resource *res;
667         uint32_t offset;
668         uint32_t size;
669         uint32_t index_size;
670         bool prim_restart;
671      } index_buffer;
672
673      uint32_t sf_vp_address;
674      uint32_t clip_vp_address;
675      uint32_t cc_vp_address;
676
677      uint32_t stats_wm;
678      float global_depth_offset_clamp;
679
680      uint32_t last_xfb_verts_per_prim;
681      uint64_t svbi;
682   } state;
683
684   /* BRW_NEW_URB_ALLOCATIONS:
685    */
686   struct {
687      uint32_t vsize;                /* vertex size plus header in urb registers */
688      uint32_t gsize;                /* GS output size in urb registers */
689      uint32_t hsize;             /* Tessellation control output size in urb registers */
690      uint32_t dsize;             /* Tessellation evaluation output size in urb registers */
691      uint32_t csize;                /* constant buffer size in urb registers */
692      uint32_t sfsize;                /* setup data size in urb registers */
693
694      bool constrained;
695
696      uint32_t nr_vs_entries;
697      uint32_t nr_hs_entries;
698      uint32_t nr_ds_entries;
699      uint32_t nr_gs_entries;
700      uint32_t nr_clip_entries;
701      uint32_t nr_sf_entries;
702      uint32_t nr_cs_entries;
703
704      uint32_t vs_start;
705      uint32_t hs_start;
706      uint32_t ds_start;
707      uint32_t gs_start;
708      uint32_t clip_start;
709      uint32_t sf_start;
710      uint32_t cs_start;
711      /**
712       * URB size in the current configuration.  The units this is expressed
713       * in are somewhat inconsistent, see intel_device_info::urb::size.
714       *
715       * FINISHME: Represent the URB size consistently in KB on all platforms.
716       */
717      uint32_t size;
718
719      /* True if the most recently sent _3DSTATE_URB message allocated
720       * URB space for the GS.
721       */
722      bool gs_present;
723
724      /* True if the most recently sent _3DSTATE_URB message allocated
725       * URB space for the HS and DS.
726       */
727      bool tess_present;
728   } urb;
729
730   /* GEN4/5 curbe */
731   struct {
732      unsigned wm_start;
733      unsigned wm_size;
734      unsigned clip_start;
735      unsigned clip_size;
736      unsigned vs_start;
737      unsigned vs_size;
738      unsigned total_size;
739
740      struct crocus_resource *curbe_res;
741      unsigned curbe_offset;
742   } curbe;
743
744   /**
745    * A buffer containing a marker + description of the driver. This buffer is
746    * added to all execbufs syscalls so that we can identify the driver that
747    * generated a hang by looking at the content of the buffer in the error
748    * state. It is also used for hardware workarounds that require scratch
749    * writes or reads from some unimportant memory. To avoid overriding the
750    * debug data, use the workaround_address field for workarounds.
751    */
752   struct crocus_bo *workaround_bo;
753   unsigned workaround_offset;
754};
755
756#define perf_debug(dbg, ...) do {                      \
757   if (INTEL_DEBUG(DEBUG_PERF))                        \
758      dbg_printf(__VA_ARGS__);                         \
759   if (unlikely(dbg))                                  \
760      util_debug_message(dbg, PERF_INFO, __VA_ARGS__); \
761} while(0)
762
763
764struct pipe_context *
765crocus_create_context(struct pipe_screen *screen, void *priv, unsigned flags);
766
767void crocus_lost_context_state(struct crocus_batch *batch);
768
769void crocus_init_blit_functions(struct pipe_context *ctx);
770void crocus_init_clear_functions(struct pipe_context *ctx);
771void crocus_init_program_functions(struct pipe_context *ctx);
772void crocus_init_resource_functions(struct pipe_context *ctx);
773bool crocus_update_compiled_shaders(struct crocus_context *ice);
774void crocus_update_compiled_compute_shader(struct crocus_context *ice);
775void crocus_fill_cs_push_const_buffer(struct brw_cs_prog_data *cs_prog_data,
776                                      unsigned threads, uint32_t *dst);
777
778
779/* crocus_blit.c */
780enum crocus_blitter_op
781{
782   CROCUS_SAVE_TEXTURES      = 1,
783   CROCUS_SAVE_FRAMEBUFFER   = 2,
784   CROCUS_SAVE_FRAGMENT_STATE = 4,
785   CROCUS_DISABLE_RENDER_COND = 8,
786};
787void crocus_blitter_begin(struct crocus_context *ice, enum crocus_blitter_op op, bool render_cond);
788
789void crocus_blorp_surf_for_resource(struct crocus_vtable *vtbl,
790                                    struct isl_device *isl_dev,
791                                    struct blorp_surf *surf,
792                                    struct pipe_resource *p_res,
793                                    enum isl_aux_usage aux_usage,
794                                    unsigned level,
795                                    bool is_render_target);
796void crocus_copy_region(struct blorp_context *blorp,
797                        struct crocus_batch *batch,
798                        struct pipe_resource *dst,
799                        unsigned dst_level,
800                        unsigned dstx, unsigned dsty, unsigned dstz,
801                        struct pipe_resource *src,
802                        unsigned src_level,
803                        const struct pipe_box *src_box);
804
805/* crocus_draw.c */
806void crocus_draw_vbo(struct pipe_context *ctx,
807                     const struct pipe_draw_info *info,
808                     unsigned drawid_offset,
809                     const struct pipe_draw_indirect_info *indirect,
810                     const struct pipe_draw_start_count_bias *draws,
811                     unsigned num_draws);
812void crocus_launch_grid(struct pipe_context *, const struct pipe_grid_info *);
813
814/* crocus_pipe_control.c */
815
816void crocus_emit_pipe_control_flush(struct crocus_batch *batch,
817                                    const char *reason, uint32_t flags);
818void crocus_emit_pipe_control_write(struct crocus_batch *batch,
819                                    const char *reason, uint32_t flags,
820                                    struct crocus_bo *bo, uint32_t offset,
821                                    uint64_t imm);
822void crocus_emit_mi_flush(struct crocus_batch *batch);
823void crocus_emit_depth_stall_flushes(struct crocus_batch *batch);
824void crocus_emit_post_sync_nonzero_flush(struct crocus_batch *batch);
825void crocus_emit_end_of_pipe_sync(struct crocus_batch *batch,
826                                  const char *reason, uint32_t flags);
827void crocus_flush_all_caches(struct crocus_batch *batch);
828
829#define crocus_handle_always_flush_cache(batch)                 \
830   if (unlikely(batch->screen->driconf.always_flush_cache))     \
831      crocus_flush_all_caches(batch);
832
833void crocus_init_flush_functions(struct pipe_context *ctx);
834
835/* crocus_program.c */
836const struct shader_info *crocus_get_shader_info(const struct crocus_context *ice,
837                                                 gl_shader_stage stage);
838struct crocus_bo *crocus_get_scratch_space(struct crocus_context *ice,
839                                           unsigned per_thread_scratch,
840                                           gl_shader_stage stage);
841/**
842 * Map a <group, index> pair to a binding table index.
843 *
844 * For example: <UBO, 5> => binding table index 12
845 */
846static inline uint32_t crocus_group_index_to_bti(const struct crocus_binding_table *bt,
847                                                 enum crocus_surface_group group,
848                                                 uint32_t index)
849{
850   assert(index < bt->sizes[group]);
851   uint64_t mask = bt->used_mask[group];
852   uint64_t bit = 1ull << index;
853   if (bit & mask) {
854      return bt->offsets[group] + util_bitcount64((bit - 1) & mask);
855   } else {
856      return CROCUS_SURFACE_NOT_USED;
857   }
858}
859
860/**
861 * Map a binding table index back to a <group, index> pair.
862 *
863 * For example: binding table index 12 => <UBO, 5>
864 */
865static inline uint32_t
866crocus_bti_to_group_index(const struct crocus_binding_table *bt,
867                          enum crocus_surface_group group, uint32_t bti)
868{
869   uint64_t used_mask = bt->used_mask[group];
870   assert(bti >= bt->offsets[group]);
871
872   uint32_t c = bti - bt->offsets[group];
873   while (used_mask) {
874      int i = u_bit_scan64(&used_mask);
875      if (c == 0)
876         return i;
877      c--;
878   }
879
880   return CROCUS_SURFACE_NOT_USED;
881}
882
883
884/* crocus_disk_cache.c */
885
886void crocus_disk_cache_store(struct disk_cache *cache,
887                             const struct crocus_uncompiled_shader *ish,
888                             const struct crocus_compiled_shader *shader,
889                             void *map,
890                             const void *prog_key,
891                             uint32_t prog_key_size);
892struct crocus_compiled_shader *
893crocus_disk_cache_retrieve(struct crocus_context *ice,
894                           const struct crocus_uncompiled_shader *ish,
895                           const void *prog_key,
896                           uint32_t prog_key_size);
897
898/* crocus_program_cache.c */
899
900void crocus_init_program_cache(struct crocus_context *ice);
901void crocus_destroy_program_cache(struct crocus_context *ice);
902void crocus_print_program_cache(struct crocus_context *ice);
903struct crocus_compiled_shader *crocus_find_cached_shader(struct crocus_context *ice,
904                                                         enum crocus_program_cache_id,
905                                                         uint32_t key_size,
906                                                         const void *key);
907struct crocus_compiled_shader *crocus_upload_shader(struct crocus_context *ice,
908                                                    enum crocus_program_cache_id,
909                                                    uint32_t key_size,
910                                                    const void *key,
911                                                    const void *assembly,
912                                                    uint32_t asm_size,
913                                                    struct brw_stage_prog_data *,
914                                                    uint32_t prog_data_size,
915                                                    uint32_t *streamout,
916                                                    enum brw_param_builtin *sysv,
917                                                    unsigned num_system_values,
918                                                    unsigned num_cbufs,
919                                                    const struct crocus_binding_table *bt);
920const void *crocus_find_previous_compile(const struct crocus_context *ice,
921                                         enum crocus_program_cache_id cache_id,
922                                         unsigned program_string_id);
923bool crocus_blorp_lookup_shader(struct blorp_batch *blorp_batch,
924                                const void *key,
925                                uint32_t key_size,
926                                uint32_t *kernel_out,
927                                void *prog_data_out);
928bool crocus_blorp_upload_shader(struct blorp_batch *blorp_batch,
929                                uint32_t stage,
930                                const void *key, uint32_t key_size,
931                                const void *kernel, uint32_t kernel_size,
932                                const struct brw_stage_prog_data *prog_data,
933                                uint32_t prog_data_size,
934                                uint32_t *kernel_out,
935                                void *prog_data_out);
936
937/* crocus_resolve.c */
938
939void crocus_predraw_resolve_inputs(struct crocus_context *ice,
940                                   struct crocus_batch *batch,
941                                   bool *draw_aux_buffer_disabled,
942                                   gl_shader_stage stage,
943                                   bool consider_framebuffer);
944void crocus_predraw_resolve_framebuffer(struct crocus_context *ice,
945                                        struct crocus_batch *batch,
946                                        bool *draw_aux_buffer_disabled);
947void crocus_postdraw_update_resolve_tracking(struct crocus_context *ice,
948                                             struct crocus_batch *batch);
949void crocus_cache_sets_clear(struct crocus_batch *batch);
950void crocus_flush_depth_and_render_caches(struct crocus_batch *batch);
951void crocus_cache_flush_for_read(struct crocus_batch *batch, struct crocus_bo *bo);
952void crocus_cache_flush_for_render(struct crocus_batch *batch,
953                                   struct crocus_bo *bo,
954                                   enum isl_format format,
955                                   enum isl_aux_usage aux_usage);
956void crocus_render_cache_add_bo(struct crocus_batch *batch,
957                                struct crocus_bo *bo,
958                                enum isl_format format,
959                                enum isl_aux_usage aux_usage);
960void crocus_cache_flush_for_depth(struct crocus_batch *batch, struct crocus_bo *bo);
961void crocus_depth_cache_add_bo(struct crocus_batch *batch, struct crocus_bo *bo);
962int crocus_get_driver_query_info(struct pipe_screen *pscreen, unsigned index,
963                                 struct pipe_driver_query_info *info);
964int crocus_get_driver_query_group_info(struct pipe_screen *pscreen,
965                                       unsigned index,
966                                       struct pipe_driver_query_group_info *info);
967
968struct pipe_rasterizer_state *crocus_get_rast_state(struct crocus_context *ctx);
969
970bool crocus_sw_check_cond_render(struct crocus_context *ice);
971static inline bool crocus_check_conditional_render(struct crocus_context *ice)
972{
973   if (ice->state.predicate == CROCUS_PREDICATE_STATE_STALL_FOR_QUERY)
974      return crocus_sw_check_cond_render(ice);
975   return ice->state.predicate != CROCUS_PREDICATE_STATE_DONT_RENDER;
976}
977
978#ifdef genX
979#  include "crocus_genx_protos.h"
980#else
981#  define genX(x) gfx4_##x
982#  include "crocus_genx_protos.h"
983#  undef genX
984#  define genX(x) gfx45_##x
985#  include "crocus_genx_protos.h"
986#  undef genX
987#  define genX(x) gfx5_##x
988#  include "crocus_genx_protos.h"
989#  undef genX
990#  define genX(x) gfx6_##x
991#  include "crocus_genx_protos.h"
992#  undef genX
993#  define genX(x) gfx7_##x
994#  include "crocus_genx_protos.h"
995#  undef genX
996#  define genX(x) gfx75_##x
997#  include "crocus_genx_protos.h"
998#  undef genX
999#  define genX(x) gfx8_##x
1000#  include "crocus_genx_protos.h"
1001#  undef genX
1002#endif
1003
1004#endif
1005