1/* 2 * Copyright © 2017 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23#ifndef CROCUS_CONTEXT_H 24#define CROCUS_CONTEXT_H 25 26#include "pipe/p_context.h" 27#include "pipe/p_state.h" 28#include "util/u_debug.h" 29#include "util/u_threaded_context.h" 30#include "intel/blorp/blorp.h" 31#include "intel/dev/intel_debug.h" 32#include "intel/compiler/brw_compiler.h" 33#include "crocus_batch.h" 34#include "crocus_fence.h" 35#include "crocus_resource.h" 36#include "crocus_screen.h" 37#include "util/u_blitter.h" 38 39struct crocus_bo; 40struct crocus_context; 41struct blorp_batch; 42struct blorp_params; 43 44#define CROCUS_MAX_TEXTURE_BUFFER_SIZE (1 << 27) 45#define CROCUS_MAX_TEXTURE_SAMPLERS 32 46/* CROCUS_MAX_ABOS and CROCUS_MAX_SSBOS must be the same. */ 47#define CROCUS_MAX_ABOS 16 48#define CROCUS_MAX_SSBOS 16 49#define CROCUS_MAX_VIEWPORTS 16 50#define CROCUS_MAX_CLIP_PLANES 8 51 52enum crocus_param_domain { 53 BRW_PARAM_DOMAIN_BUILTIN = 0, 54 BRW_PARAM_DOMAIN_IMAGE, 55}; 56 57enum { 58 DRI_CONF_BO_REUSE_DISABLED, 59 DRI_CONF_BO_REUSE_ALL 60}; 61 62#define BRW_PARAM(domain, val) (BRW_PARAM_DOMAIN_##domain << 24 | (val)) 63#define BRW_PARAM_DOMAIN(param) ((uint32_t)(param) >> 24) 64#define BRW_PARAM_VALUE(param) ((uint32_t)(param) & 0x00ffffff) 65#define BRW_PARAM_IMAGE(idx, offset) BRW_PARAM(IMAGE, ((idx) << 8) | (offset)) 66#define BRW_PARAM_IMAGE_IDX(value) (BRW_PARAM_VALUE(value) >> 8) 67#define BRW_PARAM_IMAGE_OFFSET(value)(BRW_PARAM_VALUE(value) & 0xf) 68 69/** 70 * Dirty flags. When state changes, we flag some combination of these 71 * to indicate that particular GPU commands need to be re-emitted. 72 * 73 * Each bit typically corresponds to a single 3DSTATE_* command packet, but 74 * in rare cases they map to a group of related packets that need to be 75 * emitted together. 76 * 77 * See crocus_upload_render_state(). 78 */ 79#define CROCUS_DIRTY_COLOR_CALC_STATE (1ull << 0) 80#define CROCUS_DIRTY_POLYGON_STIPPLE (1ull << 1) 81#define CROCUS_DIRTY_CC_VIEWPORT (1ull << 2) 82#define CROCUS_DIRTY_SF_CL_VIEWPORT (1ull << 3) 83#define CROCUS_DIRTY_RASTER (1ull << 4) 84#define CROCUS_DIRTY_CLIP (1ull << 5) 85#define CROCUS_DIRTY_LINE_STIPPLE (1ull << 6) 86#define CROCUS_DIRTY_VERTEX_ELEMENTS (1ull << 7) 87#define CROCUS_DIRTY_VERTEX_BUFFERS (1ull << 8) 88#define CROCUS_DIRTY_DRAWING_RECTANGLE (1ull << 9) 89#define CROCUS_DIRTY_GEN6_URB (1ull << 10) 90#define CROCUS_DIRTY_DEPTH_BUFFER (1ull << 11) 91#define CROCUS_DIRTY_WM (1ull << 12) 92#define CROCUS_DIRTY_SO_DECL_LIST (1ull << 13) 93#define CROCUS_DIRTY_STREAMOUT (1ull << 14) 94#define CROCUS_DIRTY_GEN4_CONSTANT_COLOR (1ull << 15) 95#define CROCUS_DIRTY_GEN4_CURBE (1ull << 16) 96#define CROCUS_DIRTY_GEN4_URB_FENCE (1ull << 17) 97#define CROCUS_DIRTY_GEN5_PIPELINED_POINTERS (1ull << 18) 98#define CROCUS_DIRTY_GEN5_BINDING_TABLE_POINTERS (1ull << 19) 99#define CROCUS_DIRTY_GEN6_BLEND_STATE (1ull << 20) 100#define CROCUS_DIRTY_GEN6_SCISSOR_RECT (1ull << 21) 101#define CROCUS_DIRTY_GEN6_WM_DEPTH_STENCIL (1ull << 22) 102#define CROCUS_DIRTY_GEN6_MULTISAMPLE (1ull << 23) 103#define CROCUS_DIRTY_GEN6_SAMPLE_MASK (1ull << 24) 104#define CROCUS_DIRTY_GEN7_SBE (1ull << 25) 105#define CROCUS_DIRTY_GEN7_L3_CONFIG (1ull << 26) 106#define CROCUS_DIRTY_GEN7_SO_BUFFERS (1ull << 27) 107#define CROCUS_DIRTY_GEN75_VF (1ull << 28) 108#define CROCUS_DIRTY_RENDER_RESOLVES_AND_FLUSHES (1ull << 29) 109#define CROCUS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES (1ull << 30) 110#define CROCUS_DIRTY_VF_STATISTICS (1ull << 31) 111#define CROCUS_DIRTY_GEN4_CLIP_PROG (1ull << 32) 112#define CROCUS_DIRTY_GEN4_SF_PROG (1ull << 33) 113#define CROCUS_DIRTY_GEN4_FF_GS_PROG (1ull << 34) 114#define CROCUS_DIRTY_GEN6_SAMPLER_STATE_POINTERS (1ull << 35) 115#define CROCUS_DIRTY_GEN6_SVBI (1ull << 36) 116#define CROCUS_DIRTY_GEN8_VF_TOPOLOGY (1ull << 37) 117#define CROCUS_DIRTY_GEN8_PMA_FIX (1ull << 38) 118#define CROCUS_DIRTY_GEN8_VF_SGVS (1ull << 39) 119#define CROCUS_DIRTY_GEN8_PS_BLEND (1ull << 40) 120 121#define CROCUS_ALL_DIRTY_FOR_COMPUTE (CROCUS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES) 122 123#define CROCUS_ALL_DIRTY_FOR_RENDER (~CROCUS_ALL_DIRTY_FOR_COMPUTE) 124 125/** 126 * Per-stage dirty flags. When state changes, we flag some combination of 127 * these to indicate that particular GPU commands need to be re-emitted. 128 * Unlike the IRIS_DIRTY_* flags these are shader stage-specific and can be 129 * indexed by shifting the mask by the shader stage index. 130 * 131 * See crocus_upload_render_state(). 132 */ 133#define CROCUS_STAGE_DIRTY_SAMPLER_STATES_VS (1ull << 0) 134#define CROCUS_STAGE_DIRTY_SAMPLER_STATES_TCS (1ull << 1) 135#define CROCUS_STAGE_DIRTY_SAMPLER_STATES_TES (1ull << 2) 136#define CROCUS_STAGE_DIRTY_SAMPLER_STATES_GS (1ull << 3) 137#define CROCUS_STAGE_DIRTY_SAMPLER_STATES_PS (1ull << 4) 138#define CROCUS_STAGE_DIRTY_SAMPLER_STATES_CS (1ull << 5) 139#define CROCUS_STAGE_DIRTY_UNCOMPILED_VS (1ull << 6) 140#define CROCUS_STAGE_DIRTY_UNCOMPILED_TCS (1ull << 7) 141#define CROCUS_STAGE_DIRTY_UNCOMPILED_TES (1ull << 8) 142#define CROCUS_STAGE_DIRTY_UNCOMPILED_GS (1ull << 9) 143#define CROCUS_STAGE_DIRTY_UNCOMPILED_FS (1ull << 10) 144#define CROCUS_STAGE_DIRTY_UNCOMPILED_CS (1ull << 11) 145#define CROCUS_STAGE_DIRTY_VS (1ull << 12) 146#define CROCUS_STAGE_DIRTY_TCS (1ull << 13) 147#define CROCUS_STAGE_DIRTY_TES (1ull << 14) 148#define CROCUS_STAGE_DIRTY_GS (1ull << 15) 149#define CROCUS_STAGE_DIRTY_FS (1ull << 16) 150#define CROCUS_STAGE_DIRTY_CS (1ull << 17) 151#define CROCUS_SHIFT_FOR_STAGE_DIRTY_CONSTANTS 18 152#define CROCUS_STAGE_DIRTY_CONSTANTS_VS (1ull << 18) 153#define CROCUS_STAGE_DIRTY_CONSTANTS_TCS (1ull << 19) 154#define CROCUS_STAGE_DIRTY_CONSTANTS_TES (1ull << 20) 155#define CROCUS_STAGE_DIRTY_CONSTANTS_GS (1ull << 21) 156#define CROCUS_STAGE_DIRTY_CONSTANTS_FS (1ull << 22) 157#define CROCUS_STAGE_DIRTY_CONSTANTS_CS (1ull << 23) 158#define CROCUS_STAGE_DIRTY_BINDINGS_VS (1ull << 24) 159#define CROCUS_STAGE_DIRTY_BINDINGS_TCS (1ull << 25) 160#define CROCUS_STAGE_DIRTY_BINDINGS_TES (1ull << 26) 161#define CROCUS_STAGE_DIRTY_BINDINGS_GS (1ull << 27) 162#define CROCUS_STAGE_DIRTY_BINDINGS_FS (1ull << 28) 163#define CROCUS_STAGE_DIRTY_BINDINGS_CS (1ull << 29) 164 165#define CROCUS_ALL_STAGE_DIRTY_FOR_COMPUTE (CROCUS_STAGE_DIRTY_CS | \ 166 CROCUS_STAGE_DIRTY_SAMPLER_STATES_CS | \ 167 CROCUS_STAGE_DIRTY_UNCOMPILED_CS | \ 168 CROCUS_STAGE_DIRTY_CONSTANTS_CS | \ 169 CROCUS_STAGE_DIRTY_BINDINGS_CS) 170 171#define CROCUS_ALL_STAGE_DIRTY_FOR_RENDER (~CROCUS_ALL_STAGE_DIRTY_FOR_COMPUTE) 172 173#define CROCUS_ALL_STAGE_DIRTY_BINDINGS (CROCUS_STAGE_DIRTY_BINDINGS_VS | \ 174 CROCUS_STAGE_DIRTY_BINDINGS_TCS | \ 175 CROCUS_STAGE_DIRTY_BINDINGS_TES | \ 176 CROCUS_STAGE_DIRTY_BINDINGS_GS | \ 177 CROCUS_STAGE_DIRTY_BINDINGS_FS | \ 178 CROCUS_STAGE_DIRTY_BINDINGS_CS) 179 180#define CROCUS_RENDER_STAGE_DIRTY_CONSTANTS (CROCUS_STAGE_DIRTY_CONSTANTS_VS | \ 181 CROCUS_STAGE_DIRTY_CONSTANTS_TCS | \ 182 CROCUS_STAGE_DIRTY_CONSTANTS_TES | \ 183 CROCUS_STAGE_DIRTY_CONSTANTS_GS | \ 184 CROCUS_STAGE_DIRTY_CONSTANTS_FS) 185 186/** 187 * Non-orthogonal state (NOS) dependency flags. 188 * 189 * Shader programs may depend on non-orthogonal state. These flags are 190 * used to indicate that a shader's key depends on the state provided by 191 * a certain Gallium CSO. Changing any CSOs marked as a dependency will 192 * cause the driver to re-compute the shader key, possibly triggering a 193 * shader recompile. 194 */ 195enum crocus_nos_dep { 196 CROCUS_NOS_FRAMEBUFFER, 197 CROCUS_NOS_DEPTH_STENCIL_ALPHA, 198 CROCUS_NOS_RASTERIZER, 199 CROCUS_NOS_BLEND, 200 CROCUS_NOS_LAST_VUE_MAP, 201 CROCUS_NOS_TEXTURES, 202 CROCUS_NOS_VERTEX_ELEMENTS, 203 CROCUS_NOS_COUNT, 204}; 205 206struct crocus_depth_stencil_alpha_state; 207 208/** 209 * Cache IDs for the in-memory program cache (ice->shaders.cache). 210 */ 211enum crocus_program_cache_id { 212 CROCUS_CACHE_VS = MESA_SHADER_VERTEX, 213 CROCUS_CACHE_TCS = MESA_SHADER_TESS_CTRL, 214 CROCUS_CACHE_TES = MESA_SHADER_TESS_EVAL, 215 CROCUS_CACHE_GS = MESA_SHADER_GEOMETRY, 216 CROCUS_CACHE_FS = MESA_SHADER_FRAGMENT, 217 CROCUS_CACHE_CS = MESA_SHADER_COMPUTE, 218 CROCUS_CACHE_BLORP, 219 CROCUS_CACHE_SF, 220 CROCUS_CACHE_CLIP, 221 CROCUS_CACHE_FF_GS, 222}; 223 224/** @{ 225 * 226 * Defines for PIPE_CONTROL operations, which trigger cache flushes, 227 * synchronization, pipelined memory writes, and so on. 228 * 229 * The bits here are not the actual hardware values. The actual fields 230 * move between various generations, so we just have flags for each 231 * potential operation, and use genxml to encode the actual packet. 232 */ 233enum pipe_control_flags 234{ 235 PIPE_CONTROL_FLUSH_LLC = (1 << 1), 236 PIPE_CONTROL_LRI_POST_SYNC_OP = (1 << 2), 237 PIPE_CONTROL_STORE_DATA_INDEX = (1 << 3), 238 PIPE_CONTROL_CS_STALL = (1 << 4), 239 PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET = (1 << 5), 240 PIPE_CONTROL_SYNC_GFDT = (1 << 6), 241 PIPE_CONTROL_TLB_INVALIDATE = (1 << 7), 242 PIPE_CONTROL_MEDIA_STATE_CLEAR = (1 << 8), 243 PIPE_CONTROL_WRITE_IMMEDIATE = (1 << 9), 244 PIPE_CONTROL_WRITE_DEPTH_COUNT = (1 << 10), 245 PIPE_CONTROL_WRITE_TIMESTAMP = (1 << 11), 246 PIPE_CONTROL_DEPTH_STALL = (1 << 12), 247 PIPE_CONTROL_RENDER_TARGET_FLUSH = (1 << 13), 248 PIPE_CONTROL_INSTRUCTION_INVALIDATE = (1 << 14), 249 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE = (1 << 15), 250 PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE = (1 << 16), 251 PIPE_CONTROL_NOTIFY_ENABLE = (1 << 17), 252 PIPE_CONTROL_FLUSH_ENABLE = (1 << 18), 253 PIPE_CONTROL_DATA_CACHE_FLUSH = (1 << 19), 254 PIPE_CONTROL_VF_CACHE_INVALIDATE = (1 << 20), 255 PIPE_CONTROL_CONST_CACHE_INVALIDATE = (1 << 21), 256 PIPE_CONTROL_STATE_CACHE_INVALIDATE = (1 << 22), 257 PIPE_CONTROL_STALL_AT_SCOREBOARD = (1 << 23), 258 PIPE_CONTROL_DEPTH_CACHE_FLUSH = (1 << 24), 259 PIPE_CONTROL_TILE_CACHE_FLUSH = (1 << 25), 260}; 261 262#define PIPE_CONTROL_CACHE_FLUSH_BITS \ 263 (PIPE_CONTROL_DEPTH_CACHE_FLUSH | \ 264 PIPE_CONTROL_DATA_CACHE_FLUSH | \ 265 PIPE_CONTROL_RENDER_TARGET_FLUSH) 266 267#define PIPE_CONTROL_CACHE_INVALIDATE_BITS \ 268 (PIPE_CONTROL_STATE_CACHE_INVALIDATE | \ 269 PIPE_CONTROL_CONST_CACHE_INVALIDATE | \ 270 PIPE_CONTROL_VF_CACHE_INVALIDATE | \ 271 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | \ 272 PIPE_CONTROL_INSTRUCTION_INVALIDATE) 273 274enum crocus_predicate_state { 275 /* The first two states are used if we can determine whether to draw 276 * without having to look at the values in the query object buffer. This 277 * will happen if there is no conditional render in progress, if the query 278 * object is already completed or if something else has already added 279 * samples to the preliminary result. 280 */ 281 CROCUS_PREDICATE_STATE_RENDER, 282 CROCUS_PREDICATE_STATE_DONT_RENDER, 283 284 /* In this case whether to draw or not depends on the result of an 285 * MI_PREDICATE command so the predicate enable bit needs to be checked. 286 */ 287 CROCUS_PREDICATE_STATE_USE_BIT, 288 /* In this case, either MI_PREDICATE doesn't exist or we lack the 289 * necessary kernel features to use it. Stall for the query result. 290 */ 291 CROCUS_PREDICATE_STATE_STALL_FOR_QUERY, 292}; 293 294/** @} */ 295 296/** 297 * An uncompiled, API-facing shader. This is the Gallium CSO for shaders. 298 * It primarily contains the NIR for the shader. 299 * 300 * Each API-facing shader can be compiled into multiple shader variants, 301 * based on non-orthogonal state dependencies, recorded in the shader key. 302 * 303 * See crocus_compiled_shader, which represents a compiled shader variant. 304 */ 305struct crocus_uncompiled_shader { 306 struct nir_shader *nir; 307 308 struct pipe_stream_output_info stream_output; 309 310 /* A SHA1 of the serialized NIR for the disk cache. */ 311 unsigned char nir_sha1[20]; 312 313 unsigned program_id; 314 315 /** Bitfield of (1 << CROCUS_NOS_*) flags. */ 316 unsigned nos; 317 318 /** Have any shader variants been compiled yet? */ 319 bool compiled_once; 320 321 bool needs_edge_flag; 322 323 /** Constant data scraped from the shader by nir_opt_large_constants */ 324 struct pipe_resource *const_data; 325 326 /** Surface state for const_data */ 327 struct crocus_state_ref const_data_state; 328}; 329 330enum crocus_surface_group { 331 CROCUS_SURFACE_GROUP_RENDER_TARGET, 332 CROCUS_SURFACE_GROUP_RENDER_TARGET_READ, 333 CROCUS_SURFACE_GROUP_SOL, 334 CROCUS_SURFACE_GROUP_CS_WORK_GROUPS, 335 CROCUS_SURFACE_GROUP_TEXTURE, 336 CROCUS_SURFACE_GROUP_TEXTURE_GATHER, 337 CROCUS_SURFACE_GROUP_IMAGE, 338 CROCUS_SURFACE_GROUP_UBO, 339 CROCUS_SURFACE_GROUP_SSBO, 340 341 CROCUS_SURFACE_GROUP_COUNT, 342}; 343 344enum { 345 /* Invalid value for a binding table index. */ 346 CROCUS_SURFACE_NOT_USED = 0xa0a0a0a0, 347}; 348 349struct crocus_binding_table { 350 uint32_t size_bytes; 351 352 /** Number of surfaces in each group, before compacting. */ 353 uint32_t sizes[CROCUS_SURFACE_GROUP_COUNT]; 354 355 /** Initial offset of each group. */ 356 uint32_t offsets[CROCUS_SURFACE_GROUP_COUNT]; 357 358 /** Mask of surfaces used in each group. */ 359 uint64_t used_mask[CROCUS_SURFACE_GROUP_COUNT]; 360}; 361 362/** 363 * A compiled shader variant, containing a pointer to the GPU assembly, 364 * as well as program data and other packets needed by state upload. 365 * 366 * There can be several crocus_compiled_shader variants per API-level shader 367 * (crocus_uncompiled_shader), due to state-based recompiles (brw_*_prog_key). 368 */ 369struct crocus_compiled_shader { 370 /** Reference to the uploaded assembly. */ 371 uint32_t offset; 372 373 /* asm size in map */ 374 uint32_t map_size; 375 376 /** The program data (owned by the program cache hash table) */ 377 struct brw_stage_prog_data *prog_data; 378 uint32_t prog_data_size; 379 380 /** A list of system values to be uploaded as uniforms. */ 381 enum brw_param_builtin *system_values; 382 unsigned num_system_values; 383 384 /** Number of constbufs expected by the shader. */ 385 unsigned num_cbufs; 386 387 /** 388 * Derived 3DSTATE_STREAMOUT and 3DSTATE_SO_DECL_LIST packets 389 * (the VUE-based information for transform feedback outputs). 390 */ 391 uint32_t *streamout; 392 393 struct crocus_binding_table bt; 394 395 uint32_t bind_bo_offset; 396 uint32_t surf_offset[128];//TODO 397}; 398 399/** 400 * API context state that is replicated per shader stage. 401 */ 402struct crocus_shader_state { 403 /** Uniform Buffers */ 404 struct pipe_constant_buffer constbufs[PIPE_MAX_CONSTANT_BUFFERS]; 405 406 bool sysvals_need_upload; 407 408 /** Shader Storage Buffers */ 409 struct pipe_shader_buffer ssbo[PIPE_MAX_SHADER_BUFFERS]; 410 411 /** Shader Storage Images (image load store) */ 412 struct crocus_image_view image[PIPE_MAX_SHADER_IMAGES]; 413 414 struct crocus_sampler_state *samplers[CROCUS_MAX_TEXTURE_SAMPLERS]; 415 struct crocus_sampler_view *textures[CROCUS_MAX_TEXTURE_SAMPLERS]; 416 417 /** Bitfield of which constant buffers are bound (non-null). */ 418 uint32_t bound_cbufs; 419 420 /** Bitfield of which image views are bound (non-null). */ 421 uint32_t bound_image_views; 422 423 /** Bitfield of which sampler views are bound (non-null). */ 424 uint32_t bound_sampler_views; 425 426 /** Bitfield of which shader storage buffers are bound (non-null). */ 427 uint32_t bound_ssbos; 428 429 /** Bitfield of which shader storage buffers are writable. */ 430 uint32_t writable_ssbos; 431 432 uint32_t sampler_offset; 433}; 434 435/** 436 * The API context (derived from pipe_context). 437 * 438 * Most driver state is tracked here. 439 */ 440struct crocus_context { 441 struct pipe_context ctx; 442 struct threaded_context *thrctx; 443 444 /** A debug callback for KHR_debug output. */ 445 struct util_debug_callback dbg; 446 447 /** A device reset status callback for notifying that the GPU is hosed. */ 448 struct pipe_device_reset_callback reset; 449 450 /** Slab allocator for crocus_transfer_map objects. */ 451 struct slab_child_pool transfer_pool; 452 453 /** Slab allocator for threaded_context's crocus_transfer_map objects */ 454 struct slab_child_pool transfer_pool_unsync; 455 456 struct blorp_context blorp; 457 458 int batch_count; 459 struct crocus_batch batches[CROCUS_BATCH_COUNT]; 460 461 struct u_upload_mgr *query_buffer_uploader; 462 463 struct blitter_context *blitter; 464 465 struct { 466 struct { 467 /** 468 * Either the value of BaseVertex for indexed draw calls or the value 469 * of the argument <first> for non-indexed draw calls. 470 */ 471 int firstvertex; 472 int baseinstance; 473 } params; 474 475 /** 476 * Are the above values the ones stored in the draw_params buffer? 477 * If so, we can compare them against new values to see if anything 478 * changed. If not, we need to assume they changed. 479 */ 480 bool params_valid; 481 482 /** 483 * Resource and offset that stores draw_parameters from the indirect 484 * buffer or to the buffer that stures the previous values for non 485 * indirect draws. 486 */ 487 struct crocus_state_ref draw_params; 488 489 struct { 490 /** 491 * The value of DrawID. This always comes in from it's own vertex 492 * buffer since it's not part of the indirect draw parameters. 493 */ 494 int drawid; 495 496 /** 497 * Stores if an indexed or non-indexed draw (~0/0). Useful to 498 * calculate BaseVertex as an AND of firstvertex and is_indexed_draw. 499 */ 500 int is_indexed_draw; 501 } derived_params; 502 503 /** 504 * Resource and offset used for GL_ARB_shader_draw_parameters which 505 * contains parameters that are not present in the indirect buffer as 506 * drawid and is_indexed_draw. They will go in their own vertex element. 507 */ 508 struct crocus_state_ref derived_draw_params; 509 } draw; 510 511 struct { 512 struct crocus_uncompiled_shader *uncompiled[MESA_SHADER_STAGES]; 513 struct crocus_compiled_shader *prog[MESA_SHADER_STAGES]; 514 struct brw_vue_map *last_vue_map; 515 516 struct crocus_bo *cache_bo; 517 uint32_t cache_next_offset; 518 void *cache_bo_map; 519 struct hash_table *cache; 520 521 unsigned urb_size; 522 523 /* gen 4/5 clip/sf progs */ 524 struct crocus_compiled_shader *clip_prog; 525 struct crocus_compiled_shader *sf_prog; 526 /* gen4/5 prims, gen6 streamout */ 527 struct crocus_compiled_shader *ff_gs_prog; 528 uint32_t clip_offset; 529 uint32_t sf_offset; 530 uint32_t wm_offset; 531 uint32_t vs_offset; 532 uint32_t gs_offset; 533 uint32_t cc_offset; 534 535 /** Is a GS or TES outputting points or lines? */ 536 bool output_topology_is_points_or_lines; 537 538 /* Track last VS URB entry size */ 539 unsigned last_vs_entry_size; 540 541 /** 542 * Scratch buffers for various sizes and stages. 543 * 544 * Indexed by the "Per-Thread Scratch Space" field's 4-bit encoding, 545 * and shader stage. 546 */ 547 struct crocus_bo *scratch_bos[1 << 4][MESA_SHADER_STAGES]; 548 } shaders; 549 550 struct { 551 struct crocus_query *query; 552 bool condition; 553 enum pipe_render_cond_flag mode; 554 } condition; 555 556 struct intel_perf_context *perf_ctx; 557 558 struct { 559 uint64_t dirty; 560 uint64_t stage_dirty; 561 uint64_t stage_dirty_for_nos[CROCUS_NOS_COUNT]; 562 563 unsigned num_viewports; 564 unsigned sample_mask; 565 struct crocus_blend_state *cso_blend; 566 struct crocus_rasterizer_state *cso_rast; 567 struct crocus_depth_stencil_alpha_state *cso_zsa; 568 struct crocus_vertex_element_state *cso_vertex_elements; 569 struct pipe_blend_color blend_color; 570 struct pipe_poly_stipple poly_stipple; 571 struct pipe_viewport_state viewports[CROCUS_MAX_VIEWPORTS]; 572 struct pipe_scissor_state scissors[CROCUS_MAX_VIEWPORTS]; 573 struct pipe_stencil_ref stencil_ref; 574 struct pipe_framebuffer_state framebuffer; 575 struct pipe_clip_state clip_planes; 576 577 float default_outer_level[4]; 578 float default_inner_level[2]; 579 580 /** Bitfield of which vertex buffers are bound (non-null). */ 581 uint32_t bound_vertex_buffers; 582 struct pipe_vertex_buffer vertex_buffers[16]; 583 uint32_t vb_end[16]; 584 585 bool primitive_restart; 586 unsigned cut_index; 587 enum pipe_prim_type reduced_prim_mode:8; 588 enum pipe_prim_type prim_mode:8; 589 bool prim_is_points_or_lines; 590 uint8_t vertices_per_patch; 591 uint8_t patch_vertices; 592 593 bool window_space_position; 594 595 /** The last compute group size */ 596 uint32_t last_block[3]; 597 598 /** The last compute grid size */ 599 uint32_t last_grid[3]; 600 /** Reference to the BO containing the compute grid size */ 601 struct crocus_state_ref grid_size; 602 603 /** 604 * Array of aux usages for drawing, altered to account for any 605 * self-dependencies from resources bound for sampling and rendering. 606 */ 607 enum isl_aux_usage draw_aux_usage[BRW_MAX_DRAW_BUFFERS]; 608 609 /** Aux usage of the fb's depth buffer (which may or may not exist). */ 610 enum isl_aux_usage hiz_usage; 611 612 /** Bitfield of whether color blending is enabled for RT[i] */ 613 uint8_t blend_enables; 614 615 /** Are depth writes enabled? (Depth buffer may or may not exist.) */ 616 bool depth_writes_enabled; 617 618 /** Are stencil writes enabled? (Stencil buffer may or may not exist.) */ 619 bool stencil_writes_enabled; 620 621 /** GenX-specific current state */ 622 struct crocus_genx_state *genx; 623 624 struct crocus_shader_state shaders[MESA_SHADER_STAGES]; 625 626 /* track if geom shader is active for IVB GT2 workaround */ 627 bool gs_enabled; 628 /** Do vertex shader uses shader draw parameters ? */ 629 bool vs_uses_draw_params; 630 bool vs_uses_derived_draw_params; 631 bool vs_needs_sgvs_element; 632 bool vs_uses_vertexid; 633 bool vs_uses_instanceid; 634 635 /** Do vertex shader uses edge flag ? */ 636 bool vs_needs_edge_flag; 637 638 struct pipe_stream_output_target *so_target[PIPE_MAX_SO_BUFFERS]; 639 bool streamout_active; 640 int so_targets; 641 642 bool statistics_counters_enabled; 643 644 /** Current conditional rendering mode */ 645 enum crocus_predicate_state predicate; 646 bool predicate_supported; 647 648 /** 649 * Query BO with a MI_PREDICATE_RESULT snapshot calculated on the 650 * render context that needs to be uploaded to the compute context. 651 */ 652 struct crocus_bo *compute_predicate; 653 654 /** Is a PIPE_QUERY_PRIMITIVES_GENERATED query active? */ 655 bool prims_generated_query_active; 656 657 /** 3DSTATE_STREAMOUT and 3DSTATE_SO_DECL_LIST packets */ 658 uint32_t *streamout; 659 660 /** 661 * Resources containing streamed state which our render context 662 * currently points to. Used to re-add these to the validation 663 * list when we start a new batch and haven't resubmitted commands. 664 */ 665 struct { 666 struct pipe_resource *res; 667 uint32_t offset; 668 uint32_t size; 669 uint32_t index_size; 670 bool prim_restart; 671 } index_buffer; 672 673 uint32_t sf_vp_address; 674 uint32_t clip_vp_address; 675 uint32_t cc_vp_address; 676 677 uint32_t stats_wm; 678 float global_depth_offset_clamp; 679 680 uint32_t last_xfb_verts_per_prim; 681 uint64_t svbi; 682 } state; 683 684 /* BRW_NEW_URB_ALLOCATIONS: 685 */ 686 struct { 687 uint32_t vsize; /* vertex size plus header in urb registers */ 688 uint32_t gsize; /* GS output size in urb registers */ 689 uint32_t hsize; /* Tessellation control output size in urb registers */ 690 uint32_t dsize; /* Tessellation evaluation output size in urb registers */ 691 uint32_t csize; /* constant buffer size in urb registers */ 692 uint32_t sfsize; /* setup data size in urb registers */ 693 694 bool constrained; 695 696 uint32_t nr_vs_entries; 697 uint32_t nr_hs_entries; 698 uint32_t nr_ds_entries; 699 uint32_t nr_gs_entries; 700 uint32_t nr_clip_entries; 701 uint32_t nr_sf_entries; 702 uint32_t nr_cs_entries; 703 704 uint32_t vs_start; 705 uint32_t hs_start; 706 uint32_t ds_start; 707 uint32_t gs_start; 708 uint32_t clip_start; 709 uint32_t sf_start; 710 uint32_t cs_start; 711 /** 712 * URB size in the current configuration. The units this is expressed 713 * in are somewhat inconsistent, see intel_device_info::urb::size. 714 * 715 * FINISHME: Represent the URB size consistently in KB on all platforms. 716 */ 717 uint32_t size; 718 719 /* True if the most recently sent _3DSTATE_URB message allocated 720 * URB space for the GS. 721 */ 722 bool gs_present; 723 724 /* True if the most recently sent _3DSTATE_URB message allocated 725 * URB space for the HS and DS. 726 */ 727 bool tess_present; 728 } urb; 729 730 /* GEN4/5 curbe */ 731 struct { 732 unsigned wm_start; 733 unsigned wm_size; 734 unsigned clip_start; 735 unsigned clip_size; 736 unsigned vs_start; 737 unsigned vs_size; 738 unsigned total_size; 739 740 struct crocus_resource *curbe_res; 741 unsigned curbe_offset; 742 } curbe; 743 744 /** 745 * A buffer containing a marker + description of the driver. This buffer is 746 * added to all execbufs syscalls so that we can identify the driver that 747 * generated a hang by looking at the content of the buffer in the error 748 * state. It is also used for hardware workarounds that require scratch 749 * writes or reads from some unimportant memory. To avoid overriding the 750 * debug data, use the workaround_address field for workarounds. 751 */ 752 struct crocus_bo *workaround_bo; 753 unsigned workaround_offset; 754}; 755 756#define perf_debug(dbg, ...) do { \ 757 if (INTEL_DEBUG(DEBUG_PERF)) \ 758 dbg_printf(__VA_ARGS__); \ 759 if (unlikely(dbg)) \ 760 util_debug_message(dbg, PERF_INFO, __VA_ARGS__); \ 761} while(0) 762 763 764struct pipe_context * 765crocus_create_context(struct pipe_screen *screen, void *priv, unsigned flags); 766 767void crocus_lost_context_state(struct crocus_batch *batch); 768 769void crocus_init_blit_functions(struct pipe_context *ctx); 770void crocus_init_clear_functions(struct pipe_context *ctx); 771void crocus_init_program_functions(struct pipe_context *ctx); 772void crocus_init_resource_functions(struct pipe_context *ctx); 773bool crocus_update_compiled_shaders(struct crocus_context *ice); 774void crocus_update_compiled_compute_shader(struct crocus_context *ice); 775void crocus_fill_cs_push_const_buffer(struct brw_cs_prog_data *cs_prog_data, 776 unsigned threads, uint32_t *dst); 777 778 779/* crocus_blit.c */ 780enum crocus_blitter_op 781{ 782 CROCUS_SAVE_TEXTURES = 1, 783 CROCUS_SAVE_FRAMEBUFFER = 2, 784 CROCUS_SAVE_FRAGMENT_STATE = 4, 785 CROCUS_DISABLE_RENDER_COND = 8, 786}; 787void crocus_blitter_begin(struct crocus_context *ice, enum crocus_blitter_op op, bool render_cond); 788 789void crocus_blorp_surf_for_resource(struct crocus_vtable *vtbl, 790 struct isl_device *isl_dev, 791 struct blorp_surf *surf, 792 struct pipe_resource *p_res, 793 enum isl_aux_usage aux_usage, 794 unsigned level, 795 bool is_render_target); 796void crocus_copy_region(struct blorp_context *blorp, 797 struct crocus_batch *batch, 798 struct pipe_resource *dst, 799 unsigned dst_level, 800 unsigned dstx, unsigned dsty, unsigned dstz, 801 struct pipe_resource *src, 802 unsigned src_level, 803 const struct pipe_box *src_box); 804 805/* crocus_draw.c */ 806void crocus_draw_vbo(struct pipe_context *ctx, 807 const struct pipe_draw_info *info, 808 unsigned drawid_offset, 809 const struct pipe_draw_indirect_info *indirect, 810 const struct pipe_draw_start_count_bias *draws, 811 unsigned num_draws); 812void crocus_launch_grid(struct pipe_context *, const struct pipe_grid_info *); 813 814/* crocus_pipe_control.c */ 815 816void crocus_emit_pipe_control_flush(struct crocus_batch *batch, 817 const char *reason, uint32_t flags); 818void crocus_emit_pipe_control_write(struct crocus_batch *batch, 819 const char *reason, uint32_t flags, 820 struct crocus_bo *bo, uint32_t offset, 821 uint64_t imm); 822void crocus_emit_mi_flush(struct crocus_batch *batch); 823void crocus_emit_depth_stall_flushes(struct crocus_batch *batch); 824void crocus_emit_post_sync_nonzero_flush(struct crocus_batch *batch); 825void crocus_emit_end_of_pipe_sync(struct crocus_batch *batch, 826 const char *reason, uint32_t flags); 827void crocus_flush_all_caches(struct crocus_batch *batch); 828 829#define crocus_handle_always_flush_cache(batch) \ 830 if (unlikely(batch->screen->driconf.always_flush_cache)) \ 831 crocus_flush_all_caches(batch); 832 833void crocus_init_flush_functions(struct pipe_context *ctx); 834 835/* crocus_program.c */ 836const struct shader_info *crocus_get_shader_info(const struct crocus_context *ice, 837 gl_shader_stage stage); 838struct crocus_bo *crocus_get_scratch_space(struct crocus_context *ice, 839 unsigned per_thread_scratch, 840 gl_shader_stage stage); 841/** 842 * Map a <group, index> pair to a binding table index. 843 * 844 * For example: <UBO, 5> => binding table index 12 845 */ 846static inline uint32_t crocus_group_index_to_bti(const struct crocus_binding_table *bt, 847 enum crocus_surface_group group, 848 uint32_t index) 849{ 850 assert(index < bt->sizes[group]); 851 uint64_t mask = bt->used_mask[group]; 852 uint64_t bit = 1ull << index; 853 if (bit & mask) { 854 return bt->offsets[group] + util_bitcount64((bit - 1) & mask); 855 } else { 856 return CROCUS_SURFACE_NOT_USED; 857 } 858} 859 860/** 861 * Map a binding table index back to a <group, index> pair. 862 * 863 * For example: binding table index 12 => <UBO, 5> 864 */ 865static inline uint32_t 866crocus_bti_to_group_index(const struct crocus_binding_table *bt, 867 enum crocus_surface_group group, uint32_t bti) 868{ 869 uint64_t used_mask = bt->used_mask[group]; 870 assert(bti >= bt->offsets[group]); 871 872 uint32_t c = bti - bt->offsets[group]; 873 while (used_mask) { 874 int i = u_bit_scan64(&used_mask); 875 if (c == 0) 876 return i; 877 c--; 878 } 879 880 return CROCUS_SURFACE_NOT_USED; 881} 882 883 884/* crocus_disk_cache.c */ 885 886void crocus_disk_cache_store(struct disk_cache *cache, 887 const struct crocus_uncompiled_shader *ish, 888 const struct crocus_compiled_shader *shader, 889 void *map, 890 const void *prog_key, 891 uint32_t prog_key_size); 892struct crocus_compiled_shader * 893crocus_disk_cache_retrieve(struct crocus_context *ice, 894 const struct crocus_uncompiled_shader *ish, 895 const void *prog_key, 896 uint32_t prog_key_size); 897 898/* crocus_program_cache.c */ 899 900void crocus_init_program_cache(struct crocus_context *ice); 901void crocus_destroy_program_cache(struct crocus_context *ice); 902void crocus_print_program_cache(struct crocus_context *ice); 903struct crocus_compiled_shader *crocus_find_cached_shader(struct crocus_context *ice, 904 enum crocus_program_cache_id, 905 uint32_t key_size, 906 const void *key); 907struct crocus_compiled_shader *crocus_upload_shader(struct crocus_context *ice, 908 enum crocus_program_cache_id, 909 uint32_t key_size, 910 const void *key, 911 const void *assembly, 912 uint32_t asm_size, 913 struct brw_stage_prog_data *, 914 uint32_t prog_data_size, 915 uint32_t *streamout, 916 enum brw_param_builtin *sysv, 917 unsigned num_system_values, 918 unsigned num_cbufs, 919 const struct crocus_binding_table *bt); 920const void *crocus_find_previous_compile(const struct crocus_context *ice, 921 enum crocus_program_cache_id cache_id, 922 unsigned program_string_id); 923bool crocus_blorp_lookup_shader(struct blorp_batch *blorp_batch, 924 const void *key, 925 uint32_t key_size, 926 uint32_t *kernel_out, 927 void *prog_data_out); 928bool crocus_blorp_upload_shader(struct blorp_batch *blorp_batch, 929 uint32_t stage, 930 const void *key, uint32_t key_size, 931 const void *kernel, uint32_t kernel_size, 932 const struct brw_stage_prog_data *prog_data, 933 uint32_t prog_data_size, 934 uint32_t *kernel_out, 935 void *prog_data_out); 936 937/* crocus_resolve.c */ 938 939void crocus_predraw_resolve_inputs(struct crocus_context *ice, 940 struct crocus_batch *batch, 941 bool *draw_aux_buffer_disabled, 942 gl_shader_stage stage, 943 bool consider_framebuffer); 944void crocus_predraw_resolve_framebuffer(struct crocus_context *ice, 945 struct crocus_batch *batch, 946 bool *draw_aux_buffer_disabled); 947void crocus_postdraw_update_resolve_tracking(struct crocus_context *ice, 948 struct crocus_batch *batch); 949void crocus_cache_sets_clear(struct crocus_batch *batch); 950void crocus_flush_depth_and_render_caches(struct crocus_batch *batch); 951void crocus_cache_flush_for_read(struct crocus_batch *batch, struct crocus_bo *bo); 952void crocus_cache_flush_for_render(struct crocus_batch *batch, 953 struct crocus_bo *bo, 954 enum isl_format format, 955 enum isl_aux_usage aux_usage); 956void crocus_render_cache_add_bo(struct crocus_batch *batch, 957 struct crocus_bo *bo, 958 enum isl_format format, 959 enum isl_aux_usage aux_usage); 960void crocus_cache_flush_for_depth(struct crocus_batch *batch, struct crocus_bo *bo); 961void crocus_depth_cache_add_bo(struct crocus_batch *batch, struct crocus_bo *bo); 962int crocus_get_driver_query_info(struct pipe_screen *pscreen, unsigned index, 963 struct pipe_driver_query_info *info); 964int crocus_get_driver_query_group_info(struct pipe_screen *pscreen, 965 unsigned index, 966 struct pipe_driver_query_group_info *info); 967 968struct pipe_rasterizer_state *crocus_get_rast_state(struct crocus_context *ctx); 969 970bool crocus_sw_check_cond_render(struct crocus_context *ice); 971static inline bool crocus_check_conditional_render(struct crocus_context *ice) 972{ 973 if (ice->state.predicate == CROCUS_PREDICATE_STATE_STALL_FOR_QUERY) 974 return crocus_sw_check_cond_render(ice); 975 return ice->state.predicate != CROCUS_PREDICATE_STATE_DONT_RENDER; 976} 977 978#ifdef genX 979# include "crocus_genx_protos.h" 980#else 981# define genX(x) gfx4_##x 982# include "crocus_genx_protos.h" 983# undef genX 984# define genX(x) gfx45_##x 985# include "crocus_genx_protos.h" 986# undef genX 987# define genX(x) gfx5_##x 988# include "crocus_genx_protos.h" 989# undef genX 990# define genX(x) gfx6_##x 991# include "crocus_genx_protos.h" 992# undef genX 993# define genX(x) gfx7_##x 994# include "crocus_genx_protos.h" 995# undef genX 996# define genX(x) gfx75_##x 997# include "crocus_genx_protos.h" 998# undef genX 999# define genX(x) gfx8_##x 1000# include "crocus_genx_protos.h" 1001# undef genX 1002#endif 1003 1004#endif 1005