1/* 2 * Copyright © 2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#ifndef ANV_PRIVATE_H 25#define ANV_PRIVATE_H 26 27#include <stdlib.h> 28#include <stdio.h> 29#include <stdbool.h> 30#include <pthread.h> 31#include <assert.h> 32#include <stdint.h> 33#include "drm-uapi/i915_drm.h" 34#include "drm-uapi/drm_fourcc.h" 35 36#ifdef HAVE_VALGRIND 37#include <valgrind.h> 38#include <memcheck.h> 39#define VG(x) x 40#ifndef NDEBUG 41#define __gen_validate_value(x) VALGRIND_CHECK_MEM_IS_DEFINED(&(x), sizeof(x)) 42#endif 43#else 44#define VG(x) ((void)0) 45#endif 46 47#include "common/intel_clflush.h" 48#include "common/intel_decoder.h" 49#include "common/intel_gem.h" 50#include "common/intel_l3_config.h" 51#include "common/intel_measure.h" 52#include "common/intel_sample_positions.h" 53#include "dev/intel_device_info.h" 54#include "blorp/blorp.h" 55#include "compiler/brw_compiler.h" 56#include "compiler/brw_rt.h" 57#include "ds/intel_driver_ds.h" 58#include "util/bitset.h" 59#include "util/bitscan.h" 60#include "util/macros.h" 61#include "util/hash_table.h" 62#include "util/list.h" 63#include "util/perf/u_trace.h" 64#include "util/sparse_array.h" 65#include "util/u_atomic.h" 66#include "util/u_vector.h" 67#include "util/u_math.h" 68#include "util/vma.h" 69#include "util/xmlconfig.h" 70#include "vk_alloc.h" 71#include "vk_buffer.h" 72#include "vk_command_buffer.h" 73#include "vk_command_pool.h" 74#include "vk_debug_report.h" 75#include "vk_device.h" 76#include "vk_drm_syncobj.h" 77#include "vk_enum_defines.h" 78#include "vk_framebuffer.h" 79#include "vk_graphics_state.h" 80#include "vk_image.h" 81#include "vk_instance.h" 82#include "vk_pipeline_cache.h" 83#include "vk_physical_device.h" 84#include "vk_shader_module.h" 85#include "vk_sync.h" 86#include "vk_sync_timeline.h" 87#include "vk_util.h" 88#include "vk_queue.h" 89#include "vk_log.h" 90 91/* Pre-declarations needed for WSI entrypoints */ 92struct wl_surface; 93struct wl_display; 94typedef struct xcb_connection_t xcb_connection_t; 95typedef uint32_t xcb_visualid_t; 96typedef uint32_t xcb_window_t; 97 98struct anv_batch; 99struct anv_buffer; 100struct anv_buffer_view; 101struct anv_image_view; 102struct anv_acceleration_structure; 103struct anv_instance; 104 105struct intel_aux_map_context; 106struct intel_perf_config; 107struct intel_perf_counter_pass; 108struct intel_perf_query_result; 109 110#include <vulkan/vulkan.h> 111#include <vulkan/vk_icd.h> 112 113#include "anv_android.h" 114#include "anv_entrypoints.h" 115#include "isl/isl.h" 116 117#include "dev/intel_debug.h" 118#undef MESA_LOG_TAG 119#define MESA_LOG_TAG "MESA-INTEL" 120#include "util/log.h" 121#include "wsi_common.h" 122 123#define NSEC_PER_SEC 1000000000ull 124 125/* anv Virtual Memory Layout 126 * ========================= 127 * 128 * When the anv driver is determining the virtual graphics addresses of memory 129 * objects itself using the softpin mechanism, the following memory ranges 130 * will be used. 131 * 132 * Three special considerations to notice: 133 * 134 * (1) the dynamic state pool is located within the same 4 GiB as the low 135 * heap. This is to work around a VF cache issue described in a comment in 136 * anv_physical_device_init_heaps. 137 * 138 * (2) the binding table pool is located at lower addresses than the surface 139 * state pool, within a 4 GiB range. This allows surface state base addresses 140 * to cover both binding tables (16 bit offsets) and surface states (32 bit 141 * offsets). 142 * 143 * (3) the last 4 GiB of the address space is withheld from the high 144 * heap. Various hardware units will read past the end of an object for 145 * various reasons. This healthy margin prevents reads from wrapping around 146 * 48-bit addresses. 147 */ 148#define GENERAL_STATE_POOL_MIN_ADDRESS 0x000000200000ULL /* 2 MiB */ 149#define GENERAL_STATE_POOL_MAX_ADDRESS 0x00003fffffffULL 150#define LOW_HEAP_MIN_ADDRESS 0x000040000000ULL /* 1 GiB */ 151#define LOW_HEAP_MAX_ADDRESS 0x00007fffffffULL 152#define DYNAMIC_STATE_POOL_MIN_ADDRESS 0x0000c0000000ULL /* 3 GiB */ 153#define DYNAMIC_STATE_POOL_MAX_ADDRESS 0x0000ffffffffULL 154#define BINDING_TABLE_POOL_MIN_ADDRESS 0x000100000000ULL /* 4 GiB */ 155#define BINDING_TABLE_POOL_MAX_ADDRESS 0x00013fffffffULL 156#define SURFACE_STATE_POOL_MIN_ADDRESS 0x000140000000ULL /* 5 GiB */ 157#define SURFACE_STATE_POOL_MAX_ADDRESS 0x00017fffffffULL 158#define INSTRUCTION_STATE_POOL_MIN_ADDRESS 0x000180000000ULL /* 6 GiB */ 159#define INSTRUCTION_STATE_POOL_MAX_ADDRESS 0x0001bfffffffULL 160#define CLIENT_VISIBLE_HEAP_MIN_ADDRESS 0x0001c0000000ULL /* 7 GiB */ 161#define CLIENT_VISIBLE_HEAP_MAX_ADDRESS 0x0002bfffffffULL 162#define HIGH_HEAP_MIN_ADDRESS 0x0002c0000000ULL /* 11 GiB */ 163 164#define GENERAL_STATE_POOL_SIZE \ 165 (GENERAL_STATE_POOL_MAX_ADDRESS - GENERAL_STATE_POOL_MIN_ADDRESS + 1) 166#define LOW_HEAP_SIZE \ 167 (LOW_HEAP_MAX_ADDRESS - LOW_HEAP_MIN_ADDRESS + 1) 168#define DYNAMIC_STATE_POOL_SIZE \ 169 (DYNAMIC_STATE_POOL_MAX_ADDRESS - DYNAMIC_STATE_POOL_MIN_ADDRESS + 1) 170#define BINDING_TABLE_POOL_SIZE \ 171 (BINDING_TABLE_POOL_MAX_ADDRESS - BINDING_TABLE_POOL_MIN_ADDRESS + 1) 172#define BINDING_TABLE_POOL_BLOCK_SIZE (65536) 173#define SURFACE_STATE_POOL_SIZE \ 174 (SURFACE_STATE_POOL_MAX_ADDRESS - SURFACE_STATE_POOL_MIN_ADDRESS + 1) 175#define INSTRUCTION_STATE_POOL_SIZE \ 176 (INSTRUCTION_STATE_POOL_MAX_ADDRESS - INSTRUCTION_STATE_POOL_MIN_ADDRESS + 1) 177#define CLIENT_VISIBLE_HEAP_SIZE \ 178 (CLIENT_VISIBLE_HEAP_MAX_ADDRESS - CLIENT_VISIBLE_HEAP_MIN_ADDRESS + 1) 179 180/* Allowing different clear colors requires us to perform a depth resolve at 181 * the end of certain render passes. This is because while slow clears store 182 * the clear color in the HiZ buffer, fast clears (without a resolve) don't. 183 * See the PRMs for examples describing when additional resolves would be 184 * necessary. To enable fast clears without requiring extra resolves, we set 185 * the clear value to a globally-defined one. We could allow different values 186 * if the user doesn't expect coherent data during or after a render passes 187 * (VK_ATTACHMENT_STORE_OP_DONT_CARE), but such users (aside from the CTS) 188 * don't seem to exist yet. In almost all Vulkan applications tested thus far, 189 * 1.0f seems to be the only value used. The only application that doesn't set 190 * this value does so through the usage of an seemingly uninitialized clear 191 * value. 192 */ 193#define ANV_HZ_FC_VAL 1.0f 194 195/* 3DSTATE_VERTEX_BUFFER supports 33 VBs, we use 2 for base & drawid SGVs */ 196#define MAX_VBS (33 - 2) 197 198/* 3DSTATE_VERTEX_ELEMENTS supports up to 34 VEs, but our backend compiler 199 * only supports the push model of VS inputs, and we only have 128 GRFs, 200 * minus the g0 and g1 payload, which gives us a maximum of 31 VEs. Plus, 201 * we use two of them for SGVs. 202 */ 203#define MAX_VES (31 - 2) 204 205#define MAX_XFB_BUFFERS 4 206#define MAX_XFB_STREAMS 4 207#define MAX_SETS 32 208#define MAX_RTS 8 209#define MAX_VIEWPORTS 16 210#define MAX_SCISSORS 16 211#define MAX_PUSH_CONSTANTS_SIZE 128 212#define MAX_DYNAMIC_BUFFERS 16 213#define MAX_IMAGES 64 214#define MAX_PUSH_DESCRIPTORS 32 /* Minimum requirement */ 215#define MAX_INLINE_UNIFORM_BLOCK_SIZE 4096 216#define MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS 32 217/* We need 16 for UBO block reads to work and 32 for push UBOs. However, we 218 * use 64 here to avoid cache issues. This could most likely bring it back to 219 * 32 if we had different virtual addresses for the different views on a given 220 * GEM object. 221 */ 222#define ANV_UBO_ALIGNMENT 64 223#define ANV_SSBO_ALIGNMENT 4 224#define ANV_SSBO_BOUNDS_CHECK_ALIGNMENT 4 225#define MAX_VIEWS_FOR_PRIMITIVE_REPLICATION 16 226#define MAX_SAMPLE_LOCATIONS 16 227 228/* From the Skylake PRM Vol. 7 "Binding Table Surface State Model": 229 * 230 * "The surface state model is used when a Binding Table Index (specified 231 * in the message descriptor) of less than 240 is specified. In this model, 232 * the Binding Table Index is used to index into the binding table, and the 233 * binding table entry contains a pointer to the SURFACE_STATE." 234 * 235 * Binding table values above 240 are used for various things in the hardware 236 * such as stateless, stateless with incoherent cache, SLM, and bindless. 237 */ 238#define MAX_BINDING_TABLE_SIZE 240 239 240/* The kernel relocation API has a limitation of a 32-bit delta value 241 * applied to the address before it is written which, in spite of it being 242 * unsigned, is treated as signed . Because of the way that this maps to 243 * the Vulkan API, we cannot handle an offset into a buffer that does not 244 * fit into a signed 32 bits. The only mechanism we have for dealing with 245 * this at the moment is to limit all VkDeviceMemory objects to a maximum 246 * of 2GB each. The Vulkan spec allows us to do this: 247 * 248 * "Some platforms may have a limit on the maximum size of a single 249 * allocation. For example, certain systems may fail to create 250 * allocations with a size greater than or equal to 4GB. Such a limit is 251 * implementation-dependent, and if such a failure occurs then the error 252 * VK_ERROR_OUT_OF_DEVICE_MEMORY should be returned." 253 */ 254#define MAX_MEMORY_ALLOCATION_SIZE (1ull << 31) 255 256#define ANV_SVGS_VB_INDEX MAX_VBS 257#define ANV_DRAWID_VB_INDEX (MAX_VBS + 1) 258 259/* We reserve this MI ALU register for the purpose of handling predication. 260 * Other code which uses the MI ALU should leave it alone. 261 */ 262#define ANV_PREDICATE_RESULT_REG 0x2678 /* MI_ALU_REG15 */ 263 264/* We reserve this MI ALU register to pass around an offset computed from 265 * VkPerformanceQuerySubmitInfoKHR::counterPassIndex VK_KHR_performance_query. 266 * Other code which uses the MI ALU should leave it alone. 267 */ 268#define ANV_PERF_QUERY_OFFSET_REG 0x2670 /* MI_ALU_REG14 */ 269 270#define ANV_GRAPHICS_SHADER_STAGE_COUNT (MESA_SHADER_MESH + 1) 271 272/* For gfx12 we set the streamout buffers using 4 separate commands 273 * (3DSTATE_SO_BUFFER_INDEX_*) instead of 3DSTATE_SO_BUFFER. However the layout 274 * of the 3DSTATE_SO_BUFFER_INDEX_* commands is identical to that of 275 * 3DSTATE_SO_BUFFER apart from the SOBufferIndex field, so for now we use the 276 * 3DSTATE_SO_BUFFER command, but change the 3DCommandSubOpcode. 277 * SO_BUFFER_INDEX_0_CMD is actually the 3DCommandSubOpcode for 278 * 3DSTATE_SO_BUFFER_INDEX_0. 279 */ 280#define SO_BUFFER_INDEX_0_CMD 0x60 281#define anv_printflike(a, b) __attribute__((__format__(__printf__, a, b))) 282 283static inline uint32_t 284align_down_npot_u32(uint32_t v, uint32_t a) 285{ 286 return v - (v % a); 287} 288 289static inline uint32_t 290align_down_u32(uint32_t v, uint32_t a) 291{ 292 assert(a != 0 && a == (a & -a)); 293 return v & ~(a - 1); 294} 295 296static inline uint32_t 297align_u32(uint32_t v, uint32_t a) 298{ 299 assert(a != 0 && a == (a & -a)); 300 return align_down_u32(v + a - 1, a); 301} 302 303static inline uint64_t 304align_down_u64(uint64_t v, uint64_t a) 305{ 306 assert(a != 0 && a == (a & -a)); 307 return v & ~(a - 1); 308} 309 310static inline uint64_t 311align_u64(uint64_t v, uint64_t a) 312{ 313 return align_down_u64(v + a - 1, a); 314} 315 316static inline int32_t 317align_i32(int32_t v, int32_t a) 318{ 319 assert(a != 0 && a == (a & -a)); 320 return (v + a - 1) & ~(a - 1); 321} 322 323/** Alignment must be a power of 2. */ 324static inline bool 325anv_is_aligned(uintmax_t n, uintmax_t a) 326{ 327 assert(a == (a & -a)); 328 return (n & (a - 1)) == 0; 329} 330 331static inline uint32_t 332anv_minify(uint32_t n, uint32_t levels) 333{ 334 if (unlikely(n == 0)) 335 return 0; 336 else 337 return MAX2(n >> levels, 1); 338} 339 340static inline float 341anv_clamp_f(float f, float min, float max) 342{ 343 assert(min < max); 344 345 if (f > max) 346 return max; 347 else if (f < min) 348 return min; 349 else 350 return f; 351} 352 353static inline bool 354anv_clear_mask(uint32_t *inout_mask, uint32_t clear_mask) 355{ 356 if (*inout_mask & clear_mask) { 357 *inout_mask &= ~clear_mask; 358 return true; 359 } else { 360 return false; 361 } 362} 363 364static inline union isl_color_value 365vk_to_isl_color(VkClearColorValue color) 366{ 367 return (union isl_color_value) { 368 .u32 = { 369 color.uint32[0], 370 color.uint32[1], 371 color.uint32[2], 372 color.uint32[3], 373 }, 374 }; 375} 376 377static inline union isl_color_value 378vk_to_isl_color_with_format(VkClearColorValue color, enum isl_format format) 379{ 380 const struct isl_format_layout *fmtl = isl_format_get_layout(format); 381 union isl_color_value isl_color = { .u32 = {0, } }; 382 383#define COPY_COLOR_CHANNEL(c, i) \ 384 if (fmtl->channels.c.bits) \ 385 isl_color.u32[i] = color.uint32[i] 386 387 COPY_COLOR_CHANNEL(r, 0); 388 COPY_COLOR_CHANNEL(g, 1); 389 COPY_COLOR_CHANNEL(b, 2); 390 COPY_COLOR_CHANNEL(a, 3); 391 392#undef COPY_COLOR_CHANNEL 393 394 return isl_color; 395} 396 397static inline void *anv_unpack_ptr(uintptr_t ptr, int bits, int *flags) 398{ 399 uintptr_t mask = (1ull << bits) - 1; 400 *flags = ptr & mask; 401 return (void *) (ptr & ~mask); 402} 403 404static inline uintptr_t anv_pack_ptr(void *ptr, int bits, int flags) 405{ 406 uintptr_t value = (uintptr_t) ptr; 407 uintptr_t mask = (1ull << bits) - 1; 408 return value | (mask & flags); 409} 410 411/** 412 * Warn on ignored extension structs. 413 * 414 * The Vulkan spec requires us to ignore unsupported or unknown structs in 415 * a pNext chain. In debug mode, emitting warnings for ignored structs may 416 * help us discover structs that we should not have ignored. 417 * 418 * 419 * From the Vulkan 1.0.38 spec: 420 * 421 * Any component of the implementation (the loader, any enabled layers, 422 * and drivers) must skip over, without processing (other than reading the 423 * sType and pNext members) any chained structures with sType values not 424 * defined by extensions supported by that component. 425 */ 426#define anv_debug_ignored_stype(sType) \ 427 mesa_logd("%s: ignored VkStructureType %u\n", __func__, (sType)) 428 429void __anv_perf_warn(struct anv_device *device, 430 const struct vk_object_base *object, 431 const char *file, int line, const char *format, ...) 432 anv_printflike(5, 6); 433 434/** 435 * Print a FINISHME message, including its source location. 436 */ 437#define anv_finishme(format, ...) \ 438 do { \ 439 static bool reported = false; \ 440 if (!reported) { \ 441 mesa_logw("%s:%d: FINISHME: " format, __FILE__, __LINE__, \ 442 ##__VA_ARGS__); \ 443 reported = true; \ 444 } \ 445 } while (0) 446 447/** 448 * Print a perf warning message. Set INTEL_DEBUG=perf to see these. 449 */ 450#define anv_perf_warn(objects_macro, format, ...) \ 451 do { \ 452 static bool reported = false; \ 453 if (!reported && INTEL_DEBUG(DEBUG_PERF)) { \ 454 __vk_log(VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT, \ 455 VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT, \ 456 objects_macro, __FILE__, __LINE__, \ 457 format, ## __VA_ARGS__); \ 458 reported = true; \ 459 } \ 460 } while (0) 461 462/* A non-fatal assert. Useful for debugging. */ 463#ifdef DEBUG 464#define anv_assert(x) ({ \ 465 if (unlikely(!(x))) \ 466 mesa_loge("%s:%d ASSERT: %s", __FILE__, __LINE__, #x); \ 467}) 468#else 469#define anv_assert(x) 470#endif 471 472struct anv_bo { 473 const char *name; 474 475 uint32_t gem_handle; 476 477 uint32_t refcount; 478 479 /* Index into the current validation list. This is used by the 480 * validation list building algorithm to track which buffers are already 481 * in the validation list so that we can ensure uniqueness. 482 */ 483 uint32_t exec_obj_index; 484 485 /* Index for use with util_sparse_array_free_list */ 486 uint32_t free_index; 487 488 /* Last known offset. This value is provided by the kernel when we 489 * execbuf and is used as the presumed offset for the next bunch of 490 * relocations. 491 */ 492 uint64_t offset; 493 494 /** Size of the buffer not including implicit aux */ 495 uint64_t size; 496 497 /* Map for internally mapped BOs. 498 * 499 * If ANV_BO_ALLOC_MAPPED is set in flags, this is the map for the whole 500 * BO. If ANV_BO_WRAPPER is set in flags, map points to the wrapped BO. 501 */ 502 void *map; 503 504 /** Size of the implicit CCS range at the end of the buffer 505 * 506 * On Gfx12, CCS data is always a direct 1/256 scale-down. A single 64K 507 * page of main surface data maps to a 256B chunk of CCS data and that 508 * mapping is provided on TGL-LP by the AUX table which maps virtual memory 509 * addresses in the main surface to virtual memory addresses for CCS data. 510 * 511 * Because we can't change these maps around easily and because Vulkan 512 * allows two VkImages to be bound to overlapping memory regions (as long 513 * as the app is careful), it's not feasible to make this mapping part of 514 * the image. (On Gfx11 and earlier, the mapping was provided via 515 * RENDER_SURFACE_STATE so each image had its own main -> CCS mapping.) 516 * Instead, we attach the CCS data directly to the buffer object and setup 517 * the AUX table mapping at BO creation time. 518 * 519 * This field is for internal tracking use by the BO allocator only and 520 * should not be touched by other parts of the code. If something wants to 521 * know if a BO has implicit CCS data, it should instead look at the 522 * has_implicit_ccs boolean below. 523 * 524 * This data is not included in maps of this buffer. 525 */ 526 uint32_t _ccs_size; 527 528 /** Flags to pass to the kernel through drm_i915_exec_object2::flags */ 529 uint32_t flags; 530 531 /** True if this BO may be shared with other processes */ 532 bool is_external:1; 533 534 /** True if this BO is a wrapper 535 * 536 * When set to true, none of the fields in this BO are meaningful except 537 * for anv_bo::is_wrapper and anv_bo::map which points to the actual BO. 538 * See also anv_bo_unwrap(). Wrapper BOs are not allowed when use_softpin 539 * is set in the physical device. 540 */ 541 bool is_wrapper:1; 542 543 /** See also ANV_BO_ALLOC_FIXED_ADDRESS */ 544 bool has_fixed_address:1; 545 546 /** True if this BO wraps a host pointer */ 547 bool from_host_ptr:1; 548 549 /** See also ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS */ 550 bool has_client_visible_address:1; 551 552 /** True if this BO has implicit CCS data attached to it */ 553 bool has_implicit_ccs:1; 554}; 555 556static inline struct anv_bo * 557anv_bo_ref(struct anv_bo *bo) 558{ 559 p_atomic_inc(&bo->refcount); 560 return bo; 561} 562 563static inline struct anv_bo * 564anv_bo_unwrap(struct anv_bo *bo) 565{ 566 while (bo->is_wrapper) 567 bo = bo->map; 568 return bo; 569} 570 571static inline bool 572anv_bo_is_pinned(struct anv_bo *bo) 573{ 574#if defined(GFX_VERx10) && GFX_VERx10 >= 90 575 /* Sky Lake and later always uses softpin */ 576 assert(bo->flags & EXEC_OBJECT_PINNED); 577 return true; 578#elif defined(GFX_VERx10) && GFX_VERx10 < 80 579 /* Haswell and earlier never use softpin */ 580 assert(!(bo->flags & EXEC_OBJECT_PINNED)); 581 assert(!bo->has_fixed_address); 582 return false; 583#else 584 /* If we don't have a GFX_VERx10 #define, we need to look at the BO. Also, 585 * for GFX version 8, we need to look at the BO because Broadwell softpins 586 * but Cherryview doesn't. 587 */ 588 assert((bo->flags & EXEC_OBJECT_PINNED) || !bo->has_fixed_address); 589 return (bo->flags & EXEC_OBJECT_PINNED) != 0; 590#endif 591} 592 593struct anv_address { 594 struct anv_bo *bo; 595 int64_t offset; 596}; 597 598#define ANV_NULL_ADDRESS ((struct anv_address) { NULL, 0 }) 599 600static inline struct anv_address 601anv_address_from_u64(uint64_t addr_u64) 602{ 603 assert(addr_u64 == intel_canonical_address(addr_u64)); 604 return (struct anv_address) { 605 .bo = NULL, 606 .offset = addr_u64, 607 }; 608} 609 610static inline bool 611anv_address_is_null(struct anv_address addr) 612{ 613 return addr.bo == NULL && addr.offset == 0; 614} 615 616static inline uint64_t 617anv_address_physical(struct anv_address addr) 618{ 619 if (addr.bo && anv_bo_is_pinned(addr.bo)) { 620 return intel_canonical_address(addr.bo->offset + addr.offset); 621 } else { 622 return intel_canonical_address(addr.offset); 623 } 624} 625 626static inline struct anv_address 627anv_address_add(struct anv_address addr, uint64_t offset) 628{ 629 addr.offset += offset; 630 return addr; 631} 632 633/* Represents a lock-free linked list of "free" things. This is used by 634 * both the block pool and the state pools. Unfortunately, in order to 635 * solve the ABA problem, we can't use a single uint32_t head. 636 */ 637union anv_free_list { 638 struct { 639 uint32_t offset; 640 641 /* A simple count that is incremented every time the head changes. */ 642 uint32_t count; 643 }; 644 /* Make sure it's aligned to 64 bits. This will make atomic operations 645 * faster on 32 bit platforms. 646 */ 647 uint64_t u64 __attribute__ ((aligned (8))); 648}; 649 650#define ANV_FREE_LIST_EMPTY ((union anv_free_list) { { UINT32_MAX, 0 } }) 651 652struct anv_block_state { 653 union { 654 struct { 655 uint32_t next; 656 uint32_t end; 657 }; 658 /* Make sure it's aligned to 64 bits. This will make atomic operations 659 * faster on 32 bit platforms. 660 */ 661 uint64_t u64 __attribute__ ((aligned (8))); 662 }; 663}; 664 665#define anv_block_pool_foreach_bo(bo, pool) \ 666 for (struct anv_bo **_pp_bo = (pool)->bos, *bo; \ 667 _pp_bo != &(pool)->bos[(pool)->nbos] && (bo = *_pp_bo, true); \ 668 _pp_bo++) 669 670#define ANV_MAX_BLOCK_POOL_BOS 20 671 672struct anv_block_pool { 673 const char *name; 674 675 struct anv_device *device; 676 bool use_relocations; 677 678 /* Wrapper BO for use in relocation lists. This BO is simply a wrapper 679 * around the actual BO so that we grow the pool after the wrapper BO has 680 * been put in a relocation list. This is only used in the non-softpin 681 * case. 682 */ 683 struct anv_bo wrapper_bo; 684 685 struct anv_bo *bos[ANV_MAX_BLOCK_POOL_BOS]; 686 struct anv_bo *bo; 687 uint32_t nbos; 688 689 uint64_t size; 690 691 /* The address where the start of the pool is pinned. The various bos that 692 * are created as the pool grows will have addresses in the range 693 * [start_address, start_address + BLOCK_POOL_MEMFD_SIZE). 694 */ 695 uint64_t start_address; 696 697 /* The offset from the start of the bo to the "center" of the block 698 * pool. Pointers to allocated blocks are given by 699 * bo.map + center_bo_offset + offsets. 700 */ 701 uint32_t center_bo_offset; 702 703 /* Current memory map of the block pool. This pointer may or may not 704 * point to the actual beginning of the block pool memory. If 705 * anv_block_pool_alloc_back has ever been called, then this pointer 706 * will point to the "center" position of the buffer and all offsets 707 * (negative or positive) given out by the block pool alloc functions 708 * will be valid relative to this pointer. 709 * 710 * In particular, map == bo.map + center_offset 711 * 712 * DO NOT access this pointer directly. Use anv_block_pool_map() instead, 713 * since it will handle the softpin case as well, where this points to NULL. 714 */ 715 void *map; 716 int fd; 717 718 /** 719 * Array of mmaps and gem handles owned by the block pool, reclaimed when 720 * the block pool is destroyed. 721 */ 722 struct u_vector mmap_cleanups; 723 724 struct anv_block_state state; 725 726 struct anv_block_state back_state; 727}; 728 729/* Block pools are backed by a fixed-size 1GB memfd */ 730#define BLOCK_POOL_MEMFD_SIZE (1ul << 30) 731 732/* The center of the block pool is also the middle of the memfd. This may 733 * change in the future if we decide differently for some reason. 734 */ 735#define BLOCK_POOL_MEMFD_CENTER (BLOCK_POOL_MEMFD_SIZE / 2) 736 737static inline uint32_t 738anv_block_pool_size(struct anv_block_pool *pool) 739{ 740 return pool->state.end + pool->back_state.end; 741} 742 743struct anv_state { 744 int32_t offset; 745 uint32_t alloc_size; 746 void *map; 747 uint32_t idx; 748}; 749 750#define ANV_STATE_NULL ((struct anv_state) { .alloc_size = 0 }) 751 752struct anv_fixed_size_state_pool { 753 union anv_free_list free_list; 754 struct anv_block_state block; 755}; 756 757#define ANV_MIN_STATE_SIZE_LOG2 6 758#define ANV_MAX_STATE_SIZE_LOG2 22 759 760#define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2 + 1) 761 762struct anv_free_entry { 763 uint32_t next; 764 struct anv_state state; 765}; 766 767struct anv_state_table { 768 struct anv_device *device; 769 int fd; 770 struct anv_free_entry *map; 771 uint32_t size; 772 struct anv_block_state state; 773 struct u_vector cleanups; 774}; 775 776struct anv_state_pool { 777 struct anv_block_pool block_pool; 778 779 /* Offset into the relevant state base address where the state pool starts 780 * allocating memory. 781 */ 782 int32_t start_offset; 783 784 struct anv_state_table table; 785 786 /* The size of blocks which will be allocated from the block pool */ 787 uint32_t block_size; 788 789 /** Free list for "back" allocations */ 790 union anv_free_list back_alloc_free_list; 791 792 struct anv_fixed_size_state_pool buckets[ANV_STATE_BUCKETS]; 793}; 794 795struct anv_state_reserved_pool { 796 struct anv_state_pool *pool; 797 union anv_free_list reserved_blocks; 798 uint32_t count; 799}; 800 801struct anv_state_stream { 802 struct anv_state_pool *state_pool; 803 804 /* The size of blocks to allocate from the state pool */ 805 uint32_t block_size; 806 807 /* Current block we're allocating from */ 808 struct anv_state block; 809 810 /* Offset into the current block at which to allocate the next state */ 811 uint32_t next; 812 813 /* List of all blocks allocated from this pool */ 814 struct util_dynarray all_blocks; 815}; 816 817/* The block_pool functions exported for testing only. The block pool should 818 * only be used via a state pool (see below). 819 */ 820VkResult anv_block_pool_init(struct anv_block_pool *pool, 821 struct anv_device *device, 822 const char *name, 823 uint64_t start_address, 824 uint32_t initial_size); 825void anv_block_pool_finish(struct anv_block_pool *pool); 826int32_t anv_block_pool_alloc(struct anv_block_pool *pool, 827 uint32_t block_size, uint32_t *padding); 828int32_t anv_block_pool_alloc_back(struct anv_block_pool *pool, 829 uint32_t block_size); 830void* anv_block_pool_map(struct anv_block_pool *pool, int32_t offset, uint32_t 831size); 832 833VkResult anv_state_pool_init(struct anv_state_pool *pool, 834 struct anv_device *device, 835 const char *name, 836 uint64_t base_address, 837 int32_t start_offset, 838 uint32_t block_size); 839void anv_state_pool_finish(struct anv_state_pool *pool); 840struct anv_state anv_state_pool_alloc(struct anv_state_pool *pool, 841 uint32_t state_size, uint32_t alignment); 842struct anv_state anv_state_pool_alloc_back(struct anv_state_pool *pool); 843void anv_state_pool_free(struct anv_state_pool *pool, struct anv_state state); 844void anv_state_stream_init(struct anv_state_stream *stream, 845 struct anv_state_pool *state_pool, 846 uint32_t block_size); 847void anv_state_stream_finish(struct anv_state_stream *stream); 848struct anv_state anv_state_stream_alloc(struct anv_state_stream *stream, 849 uint32_t size, uint32_t alignment); 850 851void anv_state_reserved_pool_init(struct anv_state_reserved_pool *pool, 852 struct anv_state_pool *parent, 853 uint32_t count, uint32_t size, 854 uint32_t alignment); 855void anv_state_reserved_pool_finish(struct anv_state_reserved_pool *pool); 856struct anv_state anv_state_reserved_pool_alloc(struct anv_state_reserved_pool *pool); 857void anv_state_reserved_pool_free(struct anv_state_reserved_pool *pool, 858 struct anv_state state); 859 860VkResult anv_state_table_init(struct anv_state_table *table, 861 struct anv_device *device, 862 uint32_t initial_entries); 863void anv_state_table_finish(struct anv_state_table *table); 864VkResult anv_state_table_add(struct anv_state_table *table, uint32_t *idx, 865 uint32_t count); 866void anv_free_list_push(union anv_free_list *list, 867 struct anv_state_table *table, 868 uint32_t idx, uint32_t count); 869struct anv_state* anv_free_list_pop(union anv_free_list *list, 870 struct anv_state_table *table); 871 872 873static inline struct anv_state * 874anv_state_table_get(struct anv_state_table *table, uint32_t idx) 875{ 876 return &table->map[idx].state; 877} 878/** 879 * Implements a pool of re-usable BOs. The interface is identical to that 880 * of block_pool except that each block is its own BO. 881 */ 882struct anv_bo_pool { 883 const char *name; 884 885 struct anv_device *device; 886 887 struct util_sparse_array_free_list free_list[16]; 888}; 889 890void anv_bo_pool_init(struct anv_bo_pool *pool, struct anv_device *device, 891 const char *name); 892void anv_bo_pool_finish(struct anv_bo_pool *pool); 893VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, uint32_t size, 894 struct anv_bo **bo_out); 895void anv_bo_pool_free(struct anv_bo_pool *pool, struct anv_bo *bo); 896 897struct anv_scratch_pool { 898 /* Indexed by Per-Thread Scratch Space number (the hardware value) and stage */ 899 struct anv_bo *bos[16][MESA_SHADER_STAGES]; 900 uint32_t surfs[16]; 901 struct anv_state surf_states[16]; 902}; 903 904void anv_scratch_pool_init(struct anv_device *device, 905 struct anv_scratch_pool *pool); 906void anv_scratch_pool_finish(struct anv_device *device, 907 struct anv_scratch_pool *pool); 908struct anv_bo *anv_scratch_pool_alloc(struct anv_device *device, 909 struct anv_scratch_pool *pool, 910 gl_shader_stage stage, 911 unsigned per_thread_scratch); 912uint32_t anv_scratch_pool_get_surf(struct anv_device *device, 913 struct anv_scratch_pool *pool, 914 unsigned per_thread_scratch); 915 916/** Implements a BO cache that ensures a 1-1 mapping of GEM BOs to anv_bos */ 917struct anv_bo_cache { 918 struct util_sparse_array bo_map; 919 pthread_mutex_t mutex; 920}; 921 922VkResult anv_bo_cache_init(struct anv_bo_cache *cache, 923 struct anv_device *device); 924void anv_bo_cache_finish(struct anv_bo_cache *cache); 925 926struct anv_queue_family { 927 /* Standard bits passed on to the client */ 928 VkQueueFlags queueFlags; 929 uint32_t queueCount; 930 931 /* Driver internal information */ 932 enum drm_i915_gem_engine_class engine_class; 933}; 934 935#define ANV_MAX_QUEUE_FAMILIES 3 936 937struct anv_memory_type { 938 /* Standard bits passed on to the client */ 939 VkMemoryPropertyFlags propertyFlags; 940 uint32_t heapIndex; 941}; 942 943struct anv_memory_heap { 944 /* Standard bits passed on to the client */ 945 VkDeviceSize size; 946 VkMemoryHeapFlags flags; 947 948 /** Driver-internal book-keeping. 949 * 950 * Align it to 64 bits to make atomic operations faster on 32 bit platforms. 951 */ 952 VkDeviceSize used __attribute__ ((aligned (8))); 953 954 bool is_local_mem; 955}; 956 957struct anv_memregion { 958 struct drm_i915_gem_memory_class_instance region; 959 uint64_t size; 960 uint64_t available; 961}; 962 963struct anv_physical_device { 964 struct vk_physical_device vk; 965 966 /* Link in anv_instance::physical_devices */ 967 struct list_head link; 968 969 struct anv_instance * instance; 970 char path[20]; 971 struct intel_device_info info; 972 /** Amount of "GPU memory" we want to advertise 973 * 974 * Clearly, this value is bogus since Intel is a UMA architecture. On 975 * gfx7 platforms, we are limited by GTT size unless we want to implement 976 * fine-grained tracking and GTT splitting. On Broadwell and above we are 977 * practically unlimited. However, we will never report more than 3/4 of 978 * the total system ram to try and avoid running out of RAM. 979 */ 980 bool supports_48bit_addresses; 981 struct brw_compiler * compiler; 982 struct isl_device isl_dev; 983 struct intel_perf_config * perf; 984 /* True if hardware support is incomplete/alpha */ 985 bool is_alpha; 986 /* 987 * Number of commands required to implement a performance query begin + 988 * end. 989 */ 990 uint32_t n_perf_query_commands; 991 int cmd_parser_version; 992 bool has_exec_async; 993 bool has_exec_capture; 994 int max_context_priority; 995 bool has_context_isolation; 996 bool has_mmap_offset; 997 bool has_userptr_probe; 998 uint64_t gtt_size; 999 1000 bool use_relocations; 1001 bool use_softpin; 1002 bool always_use_bindless; 1003 bool use_call_secondary; 1004 1005 /** True if we can access buffers using A64 messages */ 1006 bool has_a64_buffer_access; 1007 /** True if we can use bindless access for images */ 1008 bool has_bindless_images; 1009 /** True if we can use bindless access for samplers */ 1010 bool has_bindless_samplers; 1011 /** True if we can use timeline semaphores through execbuf */ 1012 bool has_exec_timeline; 1013 1014 /** True if we can read the GPU timestamp register 1015 * 1016 * When running in a virtual context, the timestamp register is unreadable 1017 * on Gfx12+. 1018 */ 1019 bool has_reg_timestamp; 1020 1021 /** True if this device has implicit AUX 1022 * 1023 * If true, CCS is handled as an implicit attachment to the BO rather than 1024 * as an explicitly bound surface. 1025 */ 1026 bool has_implicit_ccs; 1027 1028 bool always_flush_cache; 1029 1030 struct { 1031 uint32_t family_count; 1032 struct anv_queue_family families[ANV_MAX_QUEUE_FAMILIES]; 1033 } queue; 1034 1035 struct { 1036 uint32_t type_count; 1037 struct anv_memory_type types[VK_MAX_MEMORY_TYPES]; 1038 uint32_t heap_count; 1039 struct anv_memory_heap heaps[VK_MAX_MEMORY_HEAPS]; 1040 bool need_clflush; 1041 } memory; 1042 1043 /* Either we have a single vram region and it's all mappable, or we have 1044 * both mappable & non-mappable parts. System memory is always available. 1045 */ 1046 struct anv_memregion vram_mappable; 1047 struct anv_memregion vram_non_mappable; 1048 struct anv_memregion sys; 1049 uint8_t driver_build_sha1[20]; 1050 uint8_t pipeline_cache_uuid[VK_UUID_SIZE]; 1051 uint8_t driver_uuid[VK_UUID_SIZE]; 1052 uint8_t device_uuid[VK_UUID_SIZE]; 1053 1054 struct vk_sync_type sync_syncobj_type; 1055 struct vk_sync_timeline_type sync_timeline_type; 1056 const struct vk_sync_type * sync_types[4]; 1057 1058 struct wsi_device wsi_device; 1059 int local_fd; 1060 bool has_local; 1061 int64_t local_major; 1062 int64_t local_minor; 1063 int master_fd; 1064 bool has_master; 1065 int64_t master_major; 1066 int64_t master_minor; 1067 struct drm_i915_query_engine_info * engine_info; 1068 1069 void (*cmd_emit_timestamp)(struct anv_batch *, struct anv_device *, struct anv_address, bool); 1070 struct intel_measure_device measure_device; 1071}; 1072 1073static inline bool 1074anv_physical_device_has_vram(const struct anv_physical_device *device) 1075{ 1076 return device->vram_mappable.size > 0; 1077} 1078 1079struct anv_app_info { 1080 const char* app_name; 1081 uint32_t app_version; 1082 const char* engine_name; 1083 uint32_t engine_version; 1084 uint32_t api_version; 1085}; 1086 1087struct anv_instance { 1088 struct vk_instance vk; 1089 1090 bool physical_devices_enumerated; 1091 struct list_head physical_devices; 1092 1093 struct driOptionCache dri_options; 1094 struct driOptionCache available_dri_options; 1095 1096 /** 1097 * Workarounds for game bugs. 1098 */ 1099 bool assume_full_subgroups; 1100 bool limit_trig_input_range; 1101 bool sample_mask_out_opengl_behaviour; 1102}; 1103 1104VkResult anv_init_wsi(struct anv_physical_device *physical_device); 1105void anv_finish_wsi(struct anv_physical_device *physical_device); 1106 1107struct anv_queue { 1108 struct vk_queue vk; 1109 1110 struct anv_device * device; 1111 1112 const struct anv_queue_family * family; 1113 1114 uint32_t index_in_family; 1115 1116 uint32_t exec_flags; 1117 1118 /** Synchronization object for debug purposes (DEBUG_SYNC) */ 1119 struct vk_sync *sync; 1120 1121 struct intel_ds_queue * ds; 1122}; 1123 1124struct nir_xfb_info; 1125struct anv_pipeline_bind_map; 1126 1127extern const struct vk_pipeline_cache_object_ops *const anv_cache_import_ops[2]; 1128 1129struct anv_shader_bin * 1130anv_device_search_for_kernel(struct anv_device *device, 1131 struct vk_pipeline_cache *cache, 1132 const void *key_data, uint32_t key_size, 1133 bool *user_cache_bit); 1134 1135struct anv_shader_bin * 1136anv_device_upload_kernel(struct anv_device *device, 1137 struct vk_pipeline_cache *cache, 1138 gl_shader_stage stage, 1139 const void *key_data, uint32_t key_size, 1140 const void *kernel_data, uint32_t kernel_size, 1141 const struct brw_stage_prog_data *prog_data, 1142 uint32_t prog_data_size, 1143 const struct brw_compile_stats *stats, 1144 uint32_t num_stats, 1145 const struct nir_xfb_info *xfb_info, 1146 const struct anv_pipeline_bind_map *bind_map); 1147 1148struct nir_shader; 1149struct nir_shader_compiler_options; 1150 1151struct nir_shader * 1152anv_device_search_for_nir(struct anv_device *device, 1153 struct vk_pipeline_cache *cache, 1154 const struct nir_shader_compiler_options *nir_options, 1155 unsigned char sha1_key[20], 1156 void *mem_ctx); 1157 1158void 1159anv_device_upload_nir(struct anv_device *device, 1160 struct vk_pipeline_cache *cache, 1161 const struct nir_shader *nir, 1162 unsigned char sha1_key[20]); 1163 1164struct anv_device { 1165 struct vk_device vk; 1166 1167 struct anv_physical_device * physical; 1168 struct intel_device_info info; 1169 struct isl_device isl_dev; 1170 int context_id; 1171 int fd; 1172 bool can_chain_batches; 1173 bool robust_buffer_access; 1174 1175 pthread_mutex_t vma_mutex; 1176 struct util_vma_heap vma_lo; 1177 struct util_vma_heap vma_cva; 1178 struct util_vma_heap vma_hi; 1179 1180 /** List of all anv_device_memory objects */ 1181 struct list_head memory_objects; 1182 1183 struct anv_bo_pool batch_bo_pool; 1184 struct anv_bo_pool utrace_bo_pool; 1185 1186 struct anv_bo_cache bo_cache; 1187 1188 struct anv_state_pool general_state_pool; 1189 struct anv_state_pool dynamic_state_pool; 1190 struct anv_state_pool instruction_state_pool; 1191 struct anv_state_pool binding_table_pool; 1192 struct anv_state_pool surface_state_pool; 1193 1194 struct anv_state_reserved_pool custom_border_colors; 1195 1196 /** BO used for various workarounds 1197 * 1198 * There are a number of workarounds on our hardware which require writing 1199 * data somewhere and it doesn't really matter where. For that, we use 1200 * this BO and just write to the first dword or so. 1201 * 1202 * We also need to be able to handle NULL buffers bound as pushed UBOs. 1203 * For that, we use the high bytes (>= 1024) of the workaround BO. 1204 */ 1205 struct anv_bo * workaround_bo; 1206 struct anv_address workaround_address; 1207 1208 struct anv_bo * trivial_batch_bo; 1209 struct anv_state null_surface_state; 1210 1211 struct vk_pipeline_cache * default_pipeline_cache; 1212 struct vk_pipeline_cache * internal_cache; 1213 struct blorp_context blorp; 1214 1215 struct anv_state border_colors; 1216 1217 struct anv_state slice_hash; 1218 1219 /** An array of CPS_STATE structures grouped by MAX_VIEWPORTS elements 1220 * 1221 * We need to emit CPS_STATE structures for each viewport accessible by a 1222 * pipeline. So rather than write many identical CPS_STATE structures 1223 * dynamically, we can enumerate all possible combinaisons and then just 1224 * emit a 3DSTATE_CPS_POINTERS instruction with the right offset into this 1225 * array. 1226 */ 1227 struct anv_state cps_states; 1228 1229 uint32_t queue_count; 1230 struct anv_queue * queues; 1231 1232 struct anv_scratch_pool scratch_pool; 1233 struct anv_bo *rt_scratch_bos[16]; 1234 1235 /** Shadow ray query BO 1236 * 1237 * The ray_query_bo only holds the current ray being traced. When using 1238 * more than 1 ray query per thread, we cannot fit all the queries in 1239 * there, so we need a another buffer to hold query data that is not 1240 * currently being used by the HW for tracing, similar to a scratch space. 1241 * 1242 * The size of the shadow buffer depends on the number of queries per 1243 * shader. 1244 */ 1245 struct anv_bo *ray_query_shadow_bos[16]; 1246 /** Ray query buffer used to communicated with HW unit. 1247 */ 1248 struct anv_bo *ray_query_bo; 1249 1250 struct anv_shader_bin *rt_trampoline; 1251 struct anv_shader_bin *rt_trivial_return; 1252 1253 pthread_mutex_t mutex; 1254 pthread_cond_t queue_submit; 1255 1256 struct intel_batch_decode_ctx decoder_ctx; 1257 /* 1258 * When decoding a anv_cmd_buffer, we might need to search for BOs through 1259 * the cmd_buffer's list. 1260 */ 1261 struct anv_cmd_buffer *cmd_buffer_being_decoded; 1262 1263 int perf_fd; /* -1 if no opened */ 1264 uint64_t perf_metric; /* 0 if unset */ 1265 1266 struct intel_aux_map_context *aux_map_ctx; 1267 1268 const struct intel_l3_config *l3_config; 1269 1270 struct intel_debug_block_frame *debug_frame_desc; 1271 1272 struct intel_ds_device ds; 1273}; 1274 1275#if defined(GFX_VERx10) && GFX_VERx10 >= 90 1276#define ANV_ALWAYS_SOFTPIN true 1277#else 1278#define ANV_ALWAYS_SOFTPIN false 1279#endif 1280 1281static inline bool 1282anv_use_relocations(const struct anv_physical_device *pdevice) 1283{ 1284#if defined(GFX_VERx10) && GFX_VERx10 >= 90 1285 /* Sky Lake and later always uses softpin */ 1286 assert(!pdevice->use_relocations); 1287 return false; 1288#elif defined(GFX_VERx10) && GFX_VERx10 < 80 1289 /* Haswell and earlier never use softpin */ 1290 assert(pdevice->use_relocations); 1291 return true; 1292#else 1293 /* If we don't have a GFX_VERx10 #define, we need to look at the physical 1294 * device. Also, for GFX version 8, we need to look at the physical 1295 * device because Broadwell softpins but Cherryview doesn't. 1296 */ 1297 return pdevice->use_relocations; 1298#endif 1299} 1300 1301static inline struct anv_state_pool * 1302anv_binding_table_pool(struct anv_device *device) 1303{ 1304 if (anv_use_relocations(device->physical)) 1305 return &device->surface_state_pool; 1306 else 1307 return &device->binding_table_pool; 1308} 1309 1310static inline struct anv_state 1311anv_binding_table_pool_alloc(struct anv_device *device) 1312{ 1313 if (anv_use_relocations(device->physical)) 1314 return anv_state_pool_alloc_back(&device->surface_state_pool); 1315 else 1316 return anv_state_pool_alloc(&device->binding_table_pool, 1317 device->binding_table_pool.block_size, 0); 1318} 1319 1320static inline void 1321anv_binding_table_pool_free(struct anv_device *device, struct anv_state state) { 1322 anv_state_pool_free(anv_binding_table_pool(device), state); 1323} 1324 1325static inline uint32_t 1326anv_mocs(const struct anv_device *device, 1327 const struct anv_bo *bo, 1328 isl_surf_usage_flags_t usage) 1329{ 1330 return isl_mocs(&device->isl_dev, usage, bo && bo->is_external); 1331} 1332 1333void anv_device_init_blorp(struct anv_device *device); 1334void anv_device_finish_blorp(struct anv_device *device); 1335 1336enum anv_bo_alloc_flags { 1337 /** Specifies that the BO must have a 32-bit address 1338 * 1339 * This is the opposite of EXEC_OBJECT_SUPPORTS_48B_ADDRESS. 1340 */ 1341 ANV_BO_ALLOC_32BIT_ADDRESS = (1 << 0), 1342 1343 /** Specifies that the BO may be shared externally */ 1344 ANV_BO_ALLOC_EXTERNAL = (1 << 1), 1345 1346 /** Specifies that the BO should be mapped */ 1347 ANV_BO_ALLOC_MAPPED = (1 << 2), 1348 1349 /** Specifies that the BO should be snooped so we get coherency */ 1350 ANV_BO_ALLOC_SNOOPED = (1 << 3), 1351 1352 /** Specifies that the BO should be captured in error states */ 1353 ANV_BO_ALLOC_CAPTURE = (1 << 4), 1354 1355 /** Specifies that the BO will have an address assigned by the caller 1356 * 1357 * Such BOs do not exist in any VMA heap. 1358 */ 1359 ANV_BO_ALLOC_FIXED_ADDRESS = (1 << 5), 1360 1361 /** Enables implicit synchronization on the BO 1362 * 1363 * This is the opposite of EXEC_OBJECT_ASYNC. 1364 */ 1365 ANV_BO_ALLOC_IMPLICIT_SYNC = (1 << 6), 1366 1367 /** Enables implicit synchronization on the BO 1368 * 1369 * This is equivalent to EXEC_OBJECT_WRITE. 1370 */ 1371 ANV_BO_ALLOC_IMPLICIT_WRITE = (1 << 7), 1372 1373 /** Has an address which is visible to the client */ 1374 ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS = (1 << 8), 1375 1376 /** This buffer has implicit CCS data attached to it */ 1377 ANV_BO_ALLOC_IMPLICIT_CCS = (1 << 9), 1378 1379 /** This buffer is allocated from local memory */ 1380 ANV_BO_ALLOC_LOCAL_MEM = (1 << 10), 1381 1382 /** This buffer is allocated from local memory and should be cpu visible */ 1383 ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE = (1 << 11), 1384}; 1385 1386VkResult anv_device_alloc_bo(struct anv_device *device, 1387 const char *name, uint64_t size, 1388 enum anv_bo_alloc_flags alloc_flags, 1389 uint64_t explicit_address, 1390 struct anv_bo **bo); 1391VkResult anv_device_map_bo(struct anv_device *device, 1392 struct anv_bo *bo, 1393 uint64_t offset, 1394 size_t size, 1395 uint32_t gem_flags, 1396 void **map_out); 1397void anv_device_unmap_bo(struct anv_device *device, 1398 struct anv_bo *bo, 1399 void *map, size_t map_size); 1400VkResult anv_device_import_bo_from_host_ptr(struct anv_device *device, 1401 void *host_ptr, uint32_t size, 1402 enum anv_bo_alloc_flags alloc_flags, 1403 uint64_t client_address, 1404 struct anv_bo **bo_out); 1405VkResult anv_device_import_bo(struct anv_device *device, int fd, 1406 enum anv_bo_alloc_flags alloc_flags, 1407 uint64_t client_address, 1408 struct anv_bo **bo); 1409VkResult anv_device_export_bo(struct anv_device *device, 1410 struct anv_bo *bo, int *fd_out); 1411VkResult anv_device_get_bo_tiling(struct anv_device *device, 1412 struct anv_bo *bo, 1413 enum isl_tiling *tiling_out); 1414VkResult anv_device_set_bo_tiling(struct anv_device *device, 1415 struct anv_bo *bo, 1416 uint32_t row_pitch_B, 1417 enum isl_tiling tiling); 1418void anv_device_release_bo(struct anv_device *device, 1419 struct anv_bo *bo); 1420 1421static inline struct anv_bo * 1422anv_device_lookup_bo(struct anv_device *device, uint32_t gem_handle) 1423{ 1424 return util_sparse_array_get(&device->bo_cache.bo_map, gem_handle); 1425} 1426 1427VkResult anv_device_wait(struct anv_device *device, struct anv_bo *bo, 1428 int64_t timeout); 1429 1430VkResult anv_queue_init(struct anv_device *device, struct anv_queue *queue, 1431 uint32_t exec_flags, 1432 const VkDeviceQueueCreateInfo *pCreateInfo, 1433 uint32_t index_in_family); 1434void anv_queue_finish(struct anv_queue *queue); 1435 1436VkResult anv_queue_submit(struct vk_queue *queue, 1437 struct vk_queue_submit *submit); 1438VkResult anv_queue_submit_simple_batch(struct anv_queue *queue, 1439 struct anv_batch *batch); 1440 1441void* anv_gem_mmap(struct anv_device *device, 1442 uint32_t gem_handle, uint64_t offset, uint64_t size, uint32_t flags); 1443void anv_gem_munmap(struct anv_device *device, void *p, uint64_t size); 1444uint32_t anv_gem_create(struct anv_device *device, uint64_t size); 1445void anv_gem_close(struct anv_device *device, uint32_t gem_handle); 1446uint32_t anv_gem_create_regions(struct anv_device *device, uint64_t anv_bo_size, 1447 uint32_t flags, uint32_t num_regions, 1448 struct drm_i915_gem_memory_class_instance *regions); 1449uint32_t anv_gem_userptr(struct anv_device *device, void *mem, size_t size); 1450int anv_gem_busy(struct anv_device *device, uint32_t gem_handle); 1451int anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns); 1452int anv_gem_execbuffer(struct anv_device *device, 1453 struct drm_i915_gem_execbuffer2 *execbuf); 1454int anv_gem_set_tiling(struct anv_device *device, uint32_t gem_handle, 1455 uint32_t stride, uint32_t tiling); 1456int anv_gem_create_context(struct anv_device *device); 1457bool anv_gem_has_context_priority(int fd, int priority); 1458int anv_gem_destroy_context(struct anv_device *device, int context); 1459int anv_gem_set_context_param(int fd, int context, uint32_t param, 1460 uint64_t value); 1461int anv_gem_get_param(int fd, uint32_t param); 1462int anv_gem_get_tiling(struct anv_device *device, uint32_t gem_handle); 1463int anv_gem_context_get_reset_stats(int fd, int context, 1464 uint32_t *active, uint32_t *pending); 1465int anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle); 1466int anv_gem_reg_read(int fd, uint32_t offset, uint64_t *result); 1467uint32_t anv_gem_fd_to_handle(struct anv_device *device, int fd); 1468int anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle, uint32_t caching); 1469int anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle, 1470 uint32_t read_domains, uint32_t write_domain); 1471int anv_i915_query(int fd, uint64_t query_id, void *buffer, 1472 int32_t *buffer_len); 1473struct drm_i915_query_engine_info *anv_gem_get_engine_info(int fd); 1474 1475uint64_t anv_vma_alloc(struct anv_device *device, 1476 uint64_t size, uint64_t align, 1477 enum anv_bo_alloc_flags alloc_flags, 1478 uint64_t client_address); 1479void anv_vma_free(struct anv_device *device, 1480 uint64_t address, uint64_t size); 1481 1482struct anv_reloc_list { 1483 uint32_t num_relocs; 1484 uint32_t array_length; 1485 struct drm_i915_gem_relocation_entry * relocs; 1486 struct anv_bo ** reloc_bos; 1487 uint32_t dep_words; 1488 BITSET_WORD * deps; 1489}; 1490 1491VkResult anv_reloc_list_init(struct anv_reloc_list *list, 1492 const VkAllocationCallbacks *alloc); 1493void anv_reloc_list_finish(struct anv_reloc_list *list, 1494 const VkAllocationCallbacks *alloc); 1495 1496VkResult anv_reloc_list_add(struct anv_reloc_list *list, 1497 const VkAllocationCallbacks *alloc, 1498 uint32_t offset, struct anv_bo *target_bo, 1499 uint32_t delta, uint64_t *address_u64_out); 1500 1501VkResult anv_reloc_list_add_bo(struct anv_reloc_list *list, 1502 const VkAllocationCallbacks *alloc, 1503 struct anv_bo *target_bo); 1504 1505struct anv_batch_bo { 1506 /* Link in the anv_cmd_buffer.owned_batch_bos list */ 1507 struct list_head link; 1508 1509 struct anv_bo * bo; 1510 1511 /* Bytes actually consumed in this batch BO */ 1512 uint32_t length; 1513 1514 /* When this batch BO is used as part of a primary batch buffer, this 1515 * tracked whether it is chained to another primary batch buffer. 1516 * 1517 * If this is the case, the relocation list's last entry points the 1518 * location of the MI_BATCH_BUFFER_START chaining to the next batch. 1519 */ 1520 bool chained; 1521 1522 struct anv_reloc_list relocs; 1523}; 1524 1525struct anv_batch { 1526 const VkAllocationCallbacks * alloc; 1527 1528 struct anv_address start_addr; 1529 1530 void * start; 1531 void * end; 1532 void * next; 1533 1534 struct anv_reloc_list * relocs; 1535 1536 /* This callback is called (with the associated user data) in the event 1537 * that the batch runs out of space. 1538 */ 1539 VkResult (*extend_cb)(struct anv_batch *, void *); 1540 void * user_data; 1541 1542 /** 1543 * Current error status of the command buffer. Used to track inconsistent 1544 * or incomplete command buffer states that are the consequence of run-time 1545 * errors such as out of memory scenarios. We want to track this in the 1546 * batch because the command buffer object is not visible to some parts 1547 * of the driver. 1548 */ 1549 VkResult status; 1550}; 1551 1552void *anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords); 1553void anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other); 1554struct anv_address anv_batch_address(struct anv_batch *batch, void *batch_location); 1555 1556static inline void 1557anv_batch_set_storage(struct anv_batch *batch, struct anv_address addr, 1558 void *map, size_t size) 1559{ 1560 batch->start_addr = addr; 1561 batch->next = batch->start = map; 1562 batch->end = map + size; 1563} 1564 1565static inline VkResult 1566anv_batch_set_error(struct anv_batch *batch, VkResult error) 1567{ 1568 assert(error != VK_SUCCESS); 1569 if (batch->status == VK_SUCCESS) 1570 batch->status = error; 1571 return batch->status; 1572} 1573 1574static inline bool 1575anv_batch_has_error(struct anv_batch *batch) 1576{ 1577 return batch->status != VK_SUCCESS; 1578} 1579 1580static inline uint64_t 1581anv_batch_emit_reloc(struct anv_batch *batch, 1582 void *location, struct anv_bo *bo, uint32_t delta) 1583{ 1584 uint64_t address_u64 = 0; 1585 VkResult result; 1586 1587 if (ANV_ALWAYS_SOFTPIN) { 1588 address_u64 = bo->offset + delta; 1589 result = anv_reloc_list_add_bo(batch->relocs, batch->alloc, bo); 1590 } else { 1591 result = anv_reloc_list_add(batch->relocs, batch->alloc, 1592 location - batch->start, bo, delta, 1593 &address_u64); 1594 } 1595 if (unlikely(result != VK_SUCCESS)) { 1596 anv_batch_set_error(batch, result); 1597 return 0; 1598 } 1599 1600 return address_u64; 1601} 1602 1603static inline void 1604write_reloc(const struct anv_device *device, void *p, uint64_t v, bool flush) 1605{ 1606 unsigned reloc_size = 0; 1607 if (device->info.ver >= 8) { 1608 reloc_size = sizeof(uint64_t); 1609 *(uint64_t *)p = intel_canonical_address(v); 1610 } else { 1611 reloc_size = sizeof(uint32_t); 1612 *(uint32_t *)p = v; 1613 } 1614 1615 if (flush && device->physical->memory.need_clflush) 1616 intel_flush_range(p, reloc_size); 1617} 1618 1619static inline uint64_t 1620_anv_combine_address(struct anv_batch *batch, void *location, 1621 const struct anv_address address, uint32_t delta) 1622{ 1623 if (address.bo == NULL) { 1624 return address.offset + delta; 1625 } else if (batch == NULL) { 1626 assert(anv_bo_is_pinned(address.bo)); 1627 return anv_address_physical(anv_address_add(address, delta)); 1628 } else { 1629 assert(batch->start <= location && location < batch->end); 1630 /* i915 relocations are signed. */ 1631 assert(INT32_MIN <= address.offset && address.offset <= INT32_MAX); 1632 return anv_batch_emit_reloc(batch, location, address.bo, address.offset + delta); 1633 } 1634} 1635 1636#define __gen_address_type struct anv_address 1637#define __gen_user_data struct anv_batch 1638#define __gen_combine_address _anv_combine_address 1639 1640/* Wrapper macros needed to work around preprocessor argument issues. In 1641 * particular, arguments don't get pre-evaluated if they are concatenated. 1642 * This means that, if you pass GENX(3DSTATE_PS) into the emit macro, the 1643 * GENX macro won't get evaluated if the emit macro contains "cmd ## foo". 1644 * We can work around this easily enough with these helpers. 1645 */ 1646#define __anv_cmd_length(cmd) cmd ## _length 1647#define __anv_cmd_length_bias(cmd) cmd ## _length_bias 1648#define __anv_cmd_header(cmd) cmd ## _header 1649#define __anv_cmd_pack(cmd) cmd ## _pack 1650#define __anv_reg_num(reg) reg ## _num 1651 1652#define anv_pack_struct(dst, struc, ...) do { \ 1653 struct struc __template = { \ 1654 __VA_ARGS__ \ 1655 }; \ 1656 __anv_cmd_pack(struc)(NULL, dst, &__template); \ 1657 VG(VALGRIND_CHECK_MEM_IS_DEFINED(dst, __anv_cmd_length(struc) * 4)); \ 1658 } while (0) 1659 1660#define anv_batch_emitn(batch, n, cmd, ...) ({ \ 1661 void *__dst = anv_batch_emit_dwords(batch, n); \ 1662 if (__dst) { \ 1663 struct cmd __template = { \ 1664 __anv_cmd_header(cmd), \ 1665 .DWordLength = n - __anv_cmd_length_bias(cmd), \ 1666 __VA_ARGS__ \ 1667 }; \ 1668 __anv_cmd_pack(cmd)(batch, __dst, &__template); \ 1669 } \ 1670 __dst; \ 1671 }) 1672 1673#define anv_batch_emit_merge(batch, dwords0, dwords1) \ 1674 do { \ 1675 uint32_t *dw; \ 1676 \ 1677 STATIC_ASSERT(ARRAY_SIZE(dwords0) == ARRAY_SIZE(dwords1)); \ 1678 dw = anv_batch_emit_dwords((batch), ARRAY_SIZE(dwords0)); \ 1679 if (!dw) \ 1680 break; \ 1681 for (uint32_t i = 0; i < ARRAY_SIZE(dwords0); i++) \ 1682 dw[i] = (dwords0)[i] | (dwords1)[i]; \ 1683 VG(VALGRIND_CHECK_MEM_IS_DEFINED(dw, ARRAY_SIZE(dwords0) * 4));\ 1684 } while (0) 1685 1686#define anv_batch_emit(batch, cmd, name) \ 1687 for (struct cmd name = { __anv_cmd_header(cmd) }, \ 1688 *_dst = anv_batch_emit_dwords(batch, __anv_cmd_length(cmd)); \ 1689 __builtin_expect(_dst != NULL, 1); \ 1690 ({ __anv_cmd_pack(cmd)(batch, _dst, &name); \ 1691 VG(VALGRIND_CHECK_MEM_IS_DEFINED(_dst, __anv_cmd_length(cmd) * 4)); \ 1692 _dst = NULL; \ 1693 })) 1694 1695#define anv_batch_write_reg(batch, reg, name) \ 1696 for (struct reg name = {}, *_cont = (struct reg *)1; _cont != NULL; \ 1697 ({ \ 1698 uint32_t _dw[__anv_cmd_length(reg)]; \ 1699 __anv_cmd_pack(reg)(NULL, _dw, &name); \ 1700 for (unsigned i = 0; i < __anv_cmd_length(reg); i++) { \ 1701 anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), lri) { \ 1702 lri.RegisterOffset = __anv_reg_num(reg); \ 1703 lri.DataDWord = _dw[i]; \ 1704 } \ 1705 } \ 1706 _cont = NULL; \ 1707 })) 1708 1709/* #define __gen_get_batch_dwords anv_batch_emit_dwords */ 1710/* #define __gen_get_batch_address anv_batch_address */ 1711/* #define __gen_address_value anv_address_physical */ 1712/* #define __gen_address_offset anv_address_add */ 1713 1714struct anv_device_memory { 1715 struct vk_object_base base; 1716 1717 struct list_head link; 1718 1719 struct anv_bo * bo; 1720 const struct anv_memory_type * type; 1721 1722 void * map; 1723 size_t map_size; 1724 1725 /* The map, from the user PoV is map + map_delta */ 1726 uint64_t map_delta; 1727 1728 /* The map, from the user PoV is map + map_delta */ 1729 uint32_t map_delta; 1730 1731 /* If set, we are holding reference to AHardwareBuffer 1732 * which we must release when memory is freed. 1733 */ 1734 struct AHardwareBuffer * ahw; 1735 1736 /* If set, this memory comes from a host pointer. */ 1737 void * host_ptr; 1738}; 1739 1740/** 1741 * Header for Vertex URB Entry (VUE) 1742 */ 1743struct anv_vue_header { 1744 uint32_t Reserved; 1745 uint32_t RTAIndex; /* RenderTargetArrayIndex */ 1746 uint32_t ViewportIndex; 1747 float PointWidth; 1748}; 1749 1750/** Struct representing a sampled image descriptor 1751 * 1752 * This descriptor layout is used for sampled images, bare sampler, and 1753 * combined image/sampler descriptors. 1754 */ 1755struct anv_sampled_image_descriptor { 1756 /** Bindless image handle 1757 * 1758 * This is expected to already be shifted such that the 20-bit 1759 * SURFACE_STATE table index is in the top 20 bits. 1760 */ 1761 uint32_t image; 1762 1763 /** Bindless sampler handle 1764 * 1765 * This is assumed to be a 32B-aligned SAMPLER_STATE pointer relative 1766 * to the dynamic state base address. 1767 */ 1768 uint32_t sampler; 1769}; 1770 1771struct anv_texture_swizzle_descriptor { 1772 /** Texture swizzle 1773 * 1774 * See also nir_intrinsic_channel_select_intel 1775 */ 1776 uint8_t swizzle[4]; 1777 1778 /** Unused padding to ensure the struct is a multiple of 64 bits */ 1779 uint32_t _pad; 1780}; 1781 1782/** Struct representing a storage image descriptor */ 1783struct anv_storage_image_descriptor { 1784 /** Bindless image handles 1785 * 1786 * These are expected to already be shifted such that the 20-bit 1787 * SURFACE_STATE table index is in the top 20 bits. 1788 */ 1789 uint32_t vanilla; 1790 uint32_t lowered; 1791}; 1792 1793/** Struct representing a address/range descriptor 1794 * 1795 * The fields of this struct correspond directly to the data layout of 1796 * nir_address_format_64bit_bounded_global addresses. The last field is the 1797 * offset in the NIR address so it must be zero so that when you load the 1798 * descriptor you get a pointer to the start of the range. 1799 */ 1800struct anv_address_range_descriptor { 1801 uint64_t address; 1802 uint32_t range; 1803 uint32_t zero; 1804}; 1805 1806enum anv_descriptor_data { 1807 /** The descriptor contains a BTI reference to a surface state */ 1808 ANV_DESCRIPTOR_SURFACE_STATE = (1 << 0), 1809 /** The descriptor contains a BTI reference to a sampler state */ 1810 ANV_DESCRIPTOR_SAMPLER_STATE = (1 << 1), 1811 /** The descriptor contains an actual buffer view */ 1812 ANV_DESCRIPTOR_BUFFER_VIEW = (1 << 2), 1813 /** The descriptor contains auxiliary image layout data */ 1814 ANV_DESCRIPTOR_IMAGE_PARAM = (1 << 3), 1815 /** The descriptor contains auxiliary image layout data */ 1816 ANV_DESCRIPTOR_INLINE_UNIFORM = (1 << 4), 1817 /** anv_address_range_descriptor with a buffer address and range */ 1818 ANV_DESCRIPTOR_ADDRESS_RANGE = (1 << 5), 1819 /** Bindless surface handle */ 1820 ANV_DESCRIPTOR_SAMPLED_IMAGE = (1 << 6), 1821 /** Storage image handles */ 1822 ANV_DESCRIPTOR_STORAGE_IMAGE = (1 << 7), 1823 /** Storage image handles */ 1824 ANV_DESCRIPTOR_TEXTURE_SWIZZLE = (1 << 8), 1825}; 1826 1827struct anv_descriptor_set_binding_layout { 1828 /* The type of the descriptors in this binding */ 1829 VkDescriptorType type; 1830 1831 /* Flags provided when this binding was created */ 1832 VkDescriptorBindingFlags flags; 1833 1834 /* Bitfield representing the type of data this descriptor contains */ 1835 enum anv_descriptor_data data; 1836 1837 /* Maximum number of YCbCr texture/sampler planes */ 1838 uint8_t max_plane_count; 1839 1840 /* Number of array elements in this binding (or size in bytes for inline 1841 * uniform data) 1842 */ 1843 uint32_t array_size; 1844 1845 /* Index into the flattened descriptor set */ 1846 uint32_t descriptor_index; 1847 1848 /* Index into the dynamic state array for a dynamic buffer */ 1849 int16_t dynamic_offset_index; 1850 1851 /* Index into the descriptor set buffer views */ 1852 int32_t buffer_view_index; 1853 1854 /* Offset into the descriptor buffer where this descriptor lives */ 1855 uint32_t descriptor_offset; 1856 1857 /* Pre computed stride */ 1858 unsigned descriptor_stride; 1859 1860 /* Immutable samplers (or NULL if no immutable samplers) */ 1861 struct anv_sampler **immutable_samplers; 1862}; 1863 1864bool anv_descriptor_supports_bindless(const struct anv_physical_device *pdevice, 1865 const struct anv_descriptor_set_binding_layout *binding, 1866 bool sampler); 1867 1868bool anv_descriptor_requires_bindless(const struct anv_physical_device *pdevice, 1869 const struct anv_descriptor_set_binding_layout *binding, 1870 bool sampler); 1871 1872struct anv_descriptor_set_layout { 1873 struct vk_object_base base; 1874 1875 /* Descriptor set layouts can be destroyed at almost any time */ 1876 uint32_t ref_cnt; 1877 1878 /* Number of bindings in this descriptor set */ 1879 uint32_t binding_count; 1880 1881 /* Total number of descriptors */ 1882 uint32_t descriptor_count; 1883 1884 /* Shader stages affected by this descriptor set */ 1885 uint16_t shader_stages; 1886 1887 /* Number of buffer views in this descriptor set */ 1888 uint32_t buffer_view_count; 1889 1890 /* Number of dynamic offsets used by this descriptor set */ 1891 uint16_t dynamic_offset_count; 1892 1893 /* For each dynamic buffer, which VkShaderStageFlagBits stages are using 1894 * this buffer 1895 */ 1896 VkShaderStageFlags dynamic_offset_stages[MAX_DYNAMIC_BUFFERS]; 1897 1898 /* Size of the descriptor buffer for this descriptor set */ 1899 uint32_t descriptor_buffer_size; 1900 1901 /* Bindings in this descriptor set */ 1902 struct anv_descriptor_set_binding_layout binding[0]; 1903}; 1904 1905void anv_descriptor_set_layout_destroy(struct anv_device *device, 1906 struct anv_descriptor_set_layout *layout); 1907 1908static inline void 1909anv_descriptor_set_layout_ref(struct anv_descriptor_set_layout *layout) 1910{ 1911 assert(layout && layout->ref_cnt >= 1); 1912 p_atomic_inc(&layout->ref_cnt); 1913} 1914 1915static inline void 1916anv_descriptor_set_layout_unref(struct anv_device *device, 1917 struct anv_descriptor_set_layout *layout) 1918{ 1919 assert(layout && layout->ref_cnt >= 1); 1920 if (p_atomic_dec_zero(&layout->ref_cnt)) 1921 anv_descriptor_set_layout_destroy(device, layout); 1922} 1923 1924struct anv_descriptor { 1925 VkDescriptorType type; 1926 1927 union { 1928 struct { 1929 VkImageLayout layout; 1930 struct anv_image_view *image_view; 1931 struct anv_sampler *sampler; 1932 }; 1933 1934 struct { 1935 struct anv_buffer_view *set_buffer_view; 1936 struct anv_buffer *buffer; 1937 uint64_t offset; 1938 uint64_t range; 1939 }; 1940 1941 struct anv_buffer_view *buffer_view; 1942 1943 struct anv_acceleration_structure *accel_struct; 1944 }; 1945}; 1946 1947struct anv_descriptor_set { 1948 struct vk_object_base base; 1949 1950 struct anv_descriptor_pool *pool; 1951 struct anv_descriptor_set_layout *layout; 1952 1953 /* Amount of space occupied in the the pool by this descriptor set. It can 1954 * be larger than the size of the descriptor set. 1955 */ 1956 uint32_t size; 1957 1958 /* State relative to anv_descriptor_pool::bo */ 1959 struct anv_state desc_mem; 1960 /* Surface state for the descriptor buffer */ 1961 struct anv_state desc_surface_state; 1962 1963 /* Descriptor set address. */ 1964 struct anv_address desc_addr; 1965 1966 uint32_t buffer_view_count; 1967 struct anv_buffer_view *buffer_views; 1968 1969 /* Link to descriptor pool's desc_sets list . */ 1970 struct list_head pool_link; 1971 1972 uint32_t descriptor_count; 1973 struct anv_descriptor descriptors[0]; 1974}; 1975 1976static inline bool 1977anv_descriptor_set_is_push(struct anv_descriptor_set *set) 1978{ 1979 return set->pool == NULL; 1980} 1981 1982struct anv_buffer_view { 1983 struct vk_object_base base; 1984 1985 uint64_t range; /**< VkBufferViewCreateInfo::range */ 1986 1987 struct anv_address address; 1988 1989 struct anv_state surface_state; 1990 struct anv_state storage_surface_state; 1991 struct anv_state lowered_storage_surface_state; 1992 1993 struct brw_image_param lowered_storage_image_param; 1994}; 1995 1996struct anv_push_descriptor_set { 1997 struct anv_descriptor_set set; 1998 1999 /* Put this field right behind anv_descriptor_set so it fills up the 2000 * descriptors[0] field. */ 2001 struct anv_descriptor descriptors[MAX_PUSH_DESCRIPTORS]; 2002 2003 /** True if the descriptor set buffer has been referenced by a draw or 2004 * dispatch command. 2005 */ 2006 bool set_used_on_gpu; 2007 2008 struct anv_buffer_view buffer_views[MAX_PUSH_DESCRIPTORS]; 2009}; 2010 2011static inline struct anv_address 2012anv_descriptor_set_address(struct anv_descriptor_set *set) 2013{ 2014 if (anv_descriptor_set_is_push(set)) { 2015 /* We have to flag push descriptor set as used on the GPU 2016 * so that the next time we push descriptors, we grab a new memory. 2017 */ 2018 struct anv_push_descriptor_set *push_set = 2019 (struct anv_push_descriptor_set *)set; 2020 push_set->set_used_on_gpu = true; 2021 } 2022 2023 return set->desc_addr; 2024} 2025 2026struct anv_descriptor_pool { 2027 struct vk_object_base base; 2028 2029 uint32_t size; 2030 uint32_t next; 2031 uint32_t free_list; 2032 2033 struct anv_bo *bo; 2034 struct util_vma_heap bo_heap; 2035 2036 struct anv_state_stream surface_state_stream; 2037 void *surface_state_free_list; 2038 2039 struct list_head desc_sets; 2040 2041 bool host_only; 2042 2043 char data[0]; 2044}; 2045 2046struct anv_descriptor_template_entry { 2047 /* The type of descriptor in this entry */ 2048 VkDescriptorType type; 2049 2050 /* Binding in the descriptor set */ 2051 uint32_t binding; 2052 2053 /* Offset at which to write into the descriptor set binding */ 2054 uint32_t array_element; 2055 2056 /* Number of elements to write into the descriptor set binding */ 2057 uint32_t array_count; 2058 2059 /* Offset into the user provided data */ 2060 size_t offset; 2061 2062 /* Stride between elements into the user provided data */ 2063 size_t stride; 2064}; 2065 2066struct anv_descriptor_update_template { 2067 struct vk_object_base base; 2068 2069 VkPipelineBindPoint bind_point; 2070 2071 /* The descriptor set this template corresponds to. This value is only 2072 * valid if the template was created with the templateType 2073 * VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET. 2074 */ 2075 uint8_t set; 2076 2077 /* Number of entries in this template */ 2078 uint32_t entry_count; 2079 2080 /* Entries of the template */ 2081 struct anv_descriptor_template_entry entries[0]; 2082}; 2083 2084size_t 2085anv_descriptor_set_layout_size(const struct anv_descriptor_set_layout *layout, 2086 uint32_t var_desc_count); 2087 2088uint32_t 2089anv_descriptor_set_layout_descriptor_buffer_size(const struct anv_descriptor_set_layout *set_layout, 2090 uint32_t var_desc_count); 2091 2092void 2093anv_descriptor_set_write_image_view(struct anv_device *device, 2094 struct anv_descriptor_set *set, 2095 const VkDescriptorImageInfo * const info, 2096 VkDescriptorType type, 2097 uint32_t binding, 2098 uint32_t element); 2099 2100void 2101anv_descriptor_set_write_buffer_view(struct anv_device *device, 2102 struct anv_descriptor_set *set, 2103 VkDescriptorType type, 2104 struct anv_buffer_view *buffer_view, 2105 uint32_t binding, 2106 uint32_t element); 2107 2108void 2109anv_descriptor_set_write_buffer(struct anv_device *device, 2110 struct anv_descriptor_set *set, 2111 struct anv_state_stream *alloc_stream, 2112 VkDescriptorType type, 2113 struct anv_buffer *buffer, 2114 uint32_t binding, 2115 uint32_t element, 2116 VkDeviceSize offset, 2117 VkDeviceSize range); 2118 2119void 2120anv_descriptor_set_write_acceleration_structure(struct anv_device *device, 2121 struct anv_descriptor_set *set, 2122 struct anv_acceleration_structure *accel, 2123 uint32_t binding, 2124 uint32_t element); 2125 2126void 2127anv_descriptor_set_write_inline_uniform_data(struct anv_device *device, 2128 struct anv_descriptor_set *set, 2129 uint32_t binding, 2130 const void *data, 2131 size_t offset, 2132 size_t size); 2133 2134void 2135anv_descriptor_set_write_template(struct anv_device *device, 2136 struct anv_descriptor_set *set, 2137 struct anv_state_stream *alloc_stream, 2138 const struct anv_descriptor_update_template *template, 2139 const void *data); 2140 2141#define ANV_DESCRIPTOR_SET_NULL (UINT8_MAX - 5) 2142#define ANV_DESCRIPTOR_SET_PUSH_CONSTANTS (UINT8_MAX - 4) 2143#define ANV_DESCRIPTOR_SET_DESCRIPTORS (UINT8_MAX - 3) 2144#define ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS (UINT8_MAX - 2) 2145#define ANV_DESCRIPTOR_SET_SHADER_CONSTANTS (UINT8_MAX - 1) 2146#define ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS UINT8_MAX 2147 2148struct anv_pipeline_binding { 2149 /** Index in the descriptor set 2150 * 2151 * This is a flattened index; the descriptor set layout is already taken 2152 * into account. 2153 */ 2154 uint32_t index; 2155 2156 /** The descriptor set this surface corresponds to. 2157 * 2158 * The special ANV_DESCRIPTOR_SET_* values above indicates that this 2159 * binding is not a normal descriptor set but something else. 2160 */ 2161 uint8_t set; 2162 2163 union { 2164 /** Plane in the binding index for images */ 2165 uint8_t plane; 2166 2167 /** Dynamic offset index (for dynamic UBOs and SSBOs) */ 2168 uint8_t dynamic_offset_index; 2169 }; 2170 2171 /** For a storage image, whether it requires a lowered surface */ 2172 uint8_t lowered_storage_surface; 2173 2174 /** Pad to 64 bits so that there are no holes and we can safely memcmp 2175 * assuming POD zero-initialization. 2176 */ 2177 uint8_t pad; 2178}; 2179 2180struct anv_push_range { 2181 /** Index in the descriptor set */ 2182 uint32_t index; 2183 2184 /** Descriptor set index */ 2185 uint8_t set; 2186 2187 /** Dynamic offset index (for dynamic UBOs) */ 2188 uint8_t dynamic_offset_index; 2189 2190 /** Start offset in units of 32B */ 2191 uint8_t start; 2192 2193 /** Range in units of 32B */ 2194 uint8_t length; 2195}; 2196 2197struct anv_pipeline_layout { 2198 struct vk_object_base base; 2199 2200 struct { 2201 struct anv_descriptor_set_layout *layout; 2202 uint32_t dynamic_offset_start; 2203 } set[MAX_SETS]; 2204 2205 uint32_t num_sets; 2206 2207 unsigned char sha1[20]; 2208}; 2209 2210struct anv_buffer { 2211 struct vk_buffer vk; 2212 2213 /* Set when bound */ 2214 struct anv_address address; 2215}; 2216 2217enum anv_cmd_dirty_bits { 2218 ANV_CMD_DIRTY_PIPELINE = 1 << 0, 2219 ANV_CMD_DIRTY_INDEX_BUFFER = 1 << 1, 2220 ANV_CMD_DIRTY_RENDER_TARGETS = 1 << 2, 2221 ANV_CMD_DIRTY_XFB_ENABLE = 1 << 3, 2222}; 2223typedef enum anv_cmd_dirty_bits anv_cmd_dirty_mask_t; 2224 2225enum anv_pipe_bits { 2226 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT = (1 << 0), 2227 ANV_PIPE_STALL_AT_SCOREBOARD_BIT = (1 << 1), 2228 ANV_PIPE_STATE_CACHE_INVALIDATE_BIT = (1 << 2), 2229 ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT = (1 << 3), 2230 ANV_PIPE_VF_CACHE_INVALIDATE_BIT = (1 << 4), 2231 ANV_PIPE_DATA_CACHE_FLUSH_BIT = (1 << 5), 2232 ANV_PIPE_TILE_CACHE_FLUSH_BIT = (1 << 6), 2233 ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT = (1 << 10), 2234 ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT = (1 << 11), 2235 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT = (1 << 12), 2236 ANV_PIPE_DEPTH_STALL_BIT = (1 << 13), 2237 2238 /* ANV_PIPE_HDC_PIPELINE_FLUSH_BIT is a precise way to ensure prior data 2239 * cache work has completed. Available on Gfx12+. For earlier Gfx we 2240 * must reinterpret this flush as ANV_PIPE_DATA_CACHE_FLUSH_BIT. 2241 */ 2242 ANV_PIPE_HDC_PIPELINE_FLUSH_BIT = (1 << 14), 2243 ANV_PIPE_PSS_STALL_SYNC_BIT = (1 << 15), 2244 ANV_PIPE_CS_STALL_BIT = (1 << 20), 2245 ANV_PIPE_END_OF_PIPE_SYNC_BIT = (1 << 21), 2246 2247 /* This bit does not exist directly in PIPE_CONTROL. Instead it means that 2248 * a flush has happened but not a CS stall. The next time we do any sort 2249 * of invalidation we need to insert a CS stall at that time. Otherwise, 2250 * we would have to CS stall on every flush which could be bad. 2251 */ 2252 ANV_PIPE_NEEDS_END_OF_PIPE_SYNC_BIT = (1 << 22), 2253 2254 /* This bit does not exist directly in PIPE_CONTROL. It means that render 2255 * target operations related to transfer commands with VkBuffer as 2256 * destination are ongoing. Some operations like copies on the command 2257 * streamer might need to be aware of this to trigger the appropriate stall 2258 * before they can proceed with the copy. 2259 */ 2260 ANV_PIPE_RENDER_TARGET_BUFFER_WRITES = (1 << 23), 2261 2262 /* This bit does not exist directly in PIPE_CONTROL. It means that Gfx12 2263 * AUX-TT data has changed and we need to invalidate AUX-TT data. This is 2264 * done by writing the AUX-TT register. 2265 */ 2266 ANV_PIPE_AUX_TABLE_INVALIDATE_BIT = (1 << 24), 2267 2268 /* This bit does not exist directly in PIPE_CONTROL. It means that a 2269 * PIPE_CONTROL with a post-sync operation will follow. This is used to 2270 * implement a workaround for Gfx9. 2271 */ 2272 ANV_PIPE_POST_SYNC_BIT = (1 << 25), 2273}; 2274 2275#define ANV_PIPE_FLUSH_BITS ( \ 2276 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | \ 2277 ANV_PIPE_DATA_CACHE_FLUSH_BIT | \ 2278 ANV_PIPE_HDC_PIPELINE_FLUSH_BIT | \ 2279 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | \ 2280 ANV_PIPE_TILE_CACHE_FLUSH_BIT) 2281 2282#define ANV_PIPE_STALL_BITS ( \ 2283 ANV_PIPE_STALL_AT_SCOREBOARD_BIT | \ 2284 ANV_PIPE_DEPTH_STALL_BIT | \ 2285 ANV_PIPE_CS_STALL_BIT) 2286 2287#define ANV_PIPE_INVALIDATE_BITS ( \ 2288 ANV_PIPE_STATE_CACHE_INVALIDATE_BIT | \ 2289 ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT | \ 2290 ANV_PIPE_VF_CACHE_INVALIDATE_BIT | \ 2291 ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT | \ 2292 ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT | \ 2293 ANV_PIPE_AUX_TABLE_INVALIDATE_BIT) 2294 2295enum intel_ds_stall_flag 2296anv_pipe_flush_bit_to_ds_stall_flag(enum anv_pipe_bits bits); 2297 2298static inline enum anv_pipe_bits 2299anv_pipe_flush_bits_for_access_flags(struct anv_device *device, 2300 VkAccessFlags2 flags) 2301{ 2302 enum anv_pipe_bits pipe_bits = 0; 2303 2304 u_foreach_bit64(b, flags) { 2305 switch ((VkAccessFlags2)BITFIELD64_BIT(b)) { 2306 case VK_ACCESS_2_SHADER_WRITE_BIT: 2307 case VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT: 2308 /* We're transitioning a buffer that was previously used as write 2309 * destination through the data port. To make its content available 2310 * to future operations, flush the hdc pipeline. 2311 */ 2312 pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT; 2313 break; 2314 case VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT: 2315 /* We're transitioning a buffer that was previously used as render 2316 * target. To make its content available to future operations, flush 2317 * the render target cache. 2318 */ 2319 pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT; 2320 break; 2321 case VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT: 2322 /* We're transitioning a buffer that was previously used as depth 2323 * buffer. To make its content available to future operations, flush 2324 * the depth cache. 2325 */ 2326 pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT; 2327 break; 2328 case VK_ACCESS_2_TRANSFER_WRITE_BIT: 2329 /* We're transitioning a buffer that was previously used as a 2330 * transfer write destination. Generic write operations include color 2331 * & depth operations as well as buffer operations like : 2332 * - vkCmdClearColorImage() 2333 * - vkCmdClearDepthStencilImage() 2334 * - vkCmdBlitImage() 2335 * - vkCmdCopy*(), vkCmdUpdate*(), vkCmdFill*() 2336 * 2337 * Most of these operations are implemented using Blorp which writes 2338 * through the render target, so flush that cache to make it visible 2339 * to future operations. And for depth related operations we also 2340 * need to flush the depth cache. 2341 */ 2342 pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT; 2343 pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT; 2344 break; 2345 case VK_ACCESS_2_MEMORY_WRITE_BIT: 2346 /* We're transitioning a buffer for generic write operations. Flush 2347 * all the caches. 2348 */ 2349 pipe_bits |= ANV_PIPE_FLUSH_BITS; 2350 break; 2351 case VK_ACCESS_2_HOST_WRITE_BIT: 2352 /* We're transitioning a buffer for access by CPU. Invalidate 2353 * all the caches. Since data and tile caches don't have invalidate, 2354 * we are forced to flush those as well. 2355 */ 2356 pipe_bits |= ANV_PIPE_FLUSH_BITS; 2357 pipe_bits |= ANV_PIPE_INVALIDATE_BITS; 2358 break; 2359 case VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT: 2360 case VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT: 2361 /* We're transitioning a buffer written either from VS stage or from 2362 * the command streamer (see CmdEndTransformFeedbackEXT), we just 2363 * need to stall the CS. 2364 */ 2365 pipe_bits |= ANV_PIPE_CS_STALL_BIT; 2366 break; 2367 default: 2368 break; /* Nothing to do */ 2369 } 2370 } 2371 2372 return pipe_bits; 2373} 2374 2375static inline enum anv_pipe_bits 2376anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device, 2377 VkAccessFlags2 flags) 2378{ 2379 enum anv_pipe_bits pipe_bits = 0; 2380 2381 u_foreach_bit64(b, flags) { 2382 switch ((VkAccessFlags2)BITFIELD64_BIT(b)) { 2383 case VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT: 2384 /* Indirect draw commands take a buffer as input that we're going to 2385 * read from the command streamer to load some of the HW registers 2386 * (see genX_cmd_buffer.c:load_indirect_parameters). This requires a 2387 * command streamer stall so that all the cache flushes have 2388 * completed before the command streamer loads from memory. 2389 */ 2390 pipe_bits |= ANV_PIPE_CS_STALL_BIT; 2391 /* Indirect draw commands also set gl_BaseVertex & gl_BaseIndex 2392 * through a vertex buffer, so invalidate that cache. 2393 */ 2394 pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT; 2395 /* For CmdDipatchIndirect, we also load gl_NumWorkGroups through a 2396 * UBO from the buffer, so we need to invalidate constant cache. 2397 */ 2398 pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT; 2399 pipe_bits |= ANV_PIPE_DATA_CACHE_FLUSH_BIT; 2400 /* Tile cache flush needed For CmdDipatchIndirect since command 2401 * streamer and vertex fetch aren't L3 coherent. 2402 */ 2403 pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT; 2404 break; 2405 case VK_ACCESS_2_INDEX_READ_BIT: 2406 case VK_ACCESS_2_VERTEX_ATTRIBUTE_READ_BIT: 2407 /* We transitioning a buffer to be used for as input for vkCmdDraw* 2408 * commands, so we invalidate the VF cache to make sure there is no 2409 * stale data when we start rendering. 2410 */ 2411 pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT; 2412 break; 2413 case VK_ACCESS_2_UNIFORM_READ_BIT: 2414 /* We transitioning a buffer to be used as uniform data. Because 2415 * uniform is accessed through the data port & sampler, we need to 2416 * invalidate the texture cache (sampler) & constant cache (data 2417 * port) to avoid stale data. 2418 */ 2419 pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT; 2420 if (device->physical->compiler->indirect_ubos_use_sampler) 2421 pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT; 2422 else 2423 pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT; 2424 break; 2425 case VK_ACCESS_2_SHADER_READ_BIT: 2426 case VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT: 2427 case VK_ACCESS_2_TRANSFER_READ_BIT: 2428 /* Transitioning a buffer to be read through the sampler, so 2429 * invalidate the texture cache, we don't want any stale data. 2430 */ 2431 pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT; 2432 break; 2433 case VK_ACCESS_2_MEMORY_READ_BIT: 2434 /* Transitioning a buffer for generic read, invalidate all the 2435 * caches. 2436 */ 2437 pipe_bits |= ANV_PIPE_INVALIDATE_BITS; 2438 break; 2439 case VK_ACCESS_2_MEMORY_WRITE_BIT: 2440 /* Generic write, make sure all previously written things land in 2441 * memory. 2442 */ 2443 pipe_bits |= ANV_PIPE_FLUSH_BITS; 2444 break; 2445 case VK_ACCESS_2_CONDITIONAL_RENDERING_READ_BIT_EXT: 2446 case VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT: 2447 /* Transitioning a buffer for conditional rendering or transform 2448 * feedback. We'll load the content of this buffer into HW registers 2449 * using the command streamer, so we need to stall the command 2450 * streamer , so we need to stall the command streamer to make sure 2451 * any in-flight flush operations have completed. 2452 */ 2453 pipe_bits |= ANV_PIPE_CS_STALL_BIT; 2454 pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT; 2455 pipe_bits |= ANV_PIPE_DATA_CACHE_FLUSH_BIT; 2456 break; 2457 case VK_ACCESS_2_HOST_READ_BIT: 2458 /* We're transitioning a buffer that was written by CPU. Flush 2459 * all the caches. 2460 */ 2461 pipe_bits |= ANV_PIPE_FLUSH_BITS; 2462 break; 2463 case VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT: 2464 /* We're transitioning a buffer to be written by the streamout fixed 2465 * function. This one is apparently not L3 coherent, so we need a 2466 * tile cache flush to make sure any previous write is not going to 2467 * create WaW hazards. 2468 */ 2469 pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT; 2470 break; 2471 default: 2472 break; /* Nothing to do */ 2473 } 2474 } 2475 2476 return pipe_bits; 2477} 2478 2479#define VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV ( \ 2480 VK_IMAGE_ASPECT_COLOR_BIT | \ 2481 VK_IMAGE_ASPECT_PLANE_0_BIT | \ 2482 VK_IMAGE_ASPECT_PLANE_1_BIT | \ 2483 VK_IMAGE_ASPECT_PLANE_2_BIT) 2484#define VK_IMAGE_ASPECT_PLANES_BITS_ANV ( \ 2485 VK_IMAGE_ASPECT_PLANE_0_BIT | \ 2486 VK_IMAGE_ASPECT_PLANE_1_BIT | \ 2487 VK_IMAGE_ASPECT_PLANE_2_BIT) 2488 2489struct anv_vertex_binding { 2490 struct anv_buffer * buffer; 2491 VkDeviceSize offset; 2492 VkDeviceSize size; 2493}; 2494 2495struct anv_xfb_binding { 2496 struct anv_buffer * buffer; 2497 VkDeviceSize offset; 2498 VkDeviceSize size; 2499}; 2500 2501struct anv_push_constants { 2502 /** Push constant data provided by the client through vkPushConstants */ 2503 uint8_t client_data[MAX_PUSH_CONSTANTS_SIZE]; 2504 2505 /** Dynamic offsets for dynamic UBOs and SSBOs */ 2506 uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS]; 2507 2508 /* Robust access pushed registers. */ 2509 uint64_t push_reg_mask[MESA_SHADER_STAGES]; 2510 2511 /** Ray query globals (RT_DISPATCH_GLOBALS) */ 2512 uint64_t ray_query_globals; 2513 2514 /* Base addresses for descriptor sets */ 2515 uint64_t desc_sets[MAX_SETS]; 2516 2517 struct { 2518 /** Base workgroup ID 2519 * 2520 * Used for vkCmdDispatchBase. 2521 */ 2522 uint32_t base_work_group_id[3]; 2523 2524 /** Subgroup ID 2525 * 2526 * This is never set by software but is implicitly filled out when 2527 * uploading the push constants for compute shaders. 2528 */ 2529 uint32_t subgroup_id; 2530 } cs; 2531}; 2532 2533struct anv_surface_state { 2534 struct anv_state state; 2535 /** Address of the surface referred to by this state 2536 * 2537 * This address is relative to the start of the BO. 2538 */ 2539 struct anv_address address; 2540 /* Address of the aux surface, if any 2541 * 2542 * This field is ANV_NULL_ADDRESS if and only if no aux surface exists. 2543 * 2544 * With the exception of gfx8, the bottom 12 bits of this address' offset 2545 * include extra aux information. 2546 */ 2547 struct anv_address aux_address; 2548 /* Address of the clear color, if any 2549 * 2550 * This address is relative to the start of the BO. 2551 */ 2552 struct anv_address clear_address; 2553}; 2554 2555struct anv_attachment { 2556 VkFormat vk_format; 2557 const struct anv_image_view *iview; 2558 VkImageLayout layout; 2559 enum isl_aux_usage aux_usage; 2560 struct anv_surface_state surface_state; 2561 2562 VkResolveModeFlagBits resolve_mode; 2563 const struct anv_image_view *resolve_iview; 2564 VkImageLayout resolve_layout; 2565}; 2566 2567/** State tracking for vertex buffer flushes 2568 * 2569 * On Gfx8-9, the VF cache only considers the bottom 32 bits of memory 2570 * addresses. If you happen to have two vertex buffers which get placed 2571 * exactly 4 GiB apart and use them in back-to-back draw calls, you can get 2572 * collisions. In order to solve this problem, we track vertex address ranges 2573 * which are live in the cache and invalidate the cache if one ever exceeds 32 2574 * bits. 2575 */ 2576struct anv_vb_cache_range { 2577 /* Virtual address at which the live vertex buffer cache range starts for 2578 * this vertex buffer index. 2579 */ 2580 uint64_t start; 2581 2582 /* Virtual address of the byte after where vertex buffer cache range ends. 2583 * This is exclusive such that end - start is the size of the range. 2584 */ 2585 uint64_t end; 2586}; 2587 2588/* Check whether we need to apply the Gfx8-9 vertex buffer workaround*/ 2589static inline bool 2590anv_gfx8_9_vb_cache_range_needs_workaround(struct anv_vb_cache_range *bound, 2591 struct anv_vb_cache_range *dirty, 2592 struct anv_address vb_address, 2593 uint32_t vb_size) 2594{ 2595 if (vb_size == 0) { 2596 bound->start = 0; 2597 bound->end = 0; 2598 return false; 2599 } 2600 2601 assert(vb_address.bo && anv_bo_is_pinned(vb_address.bo)); 2602 bound->start = intel_48b_address(anv_address_physical(vb_address)); 2603 bound->end = bound->start + vb_size; 2604 assert(bound->end > bound->start); /* No overflow */ 2605 2606 /* Align everything to a cache line */ 2607 bound->start &= ~(64ull - 1ull); 2608 bound->end = align_u64(bound->end, 64); 2609 2610 /* Compute the dirty range */ 2611 dirty->start = MIN2(dirty->start, bound->start); 2612 dirty->end = MAX2(dirty->end, bound->end); 2613 2614 /* If our range is larger than 32 bits, we have to flush */ 2615 assert(bound->end - bound->start <= (1ull << 32)); 2616 return (dirty->end - dirty->start) > (1ull << 32); 2617} 2618 2619/** State tracking for particular pipeline bind point 2620 * 2621 * This struct is the base struct for anv_cmd_graphics_state and 2622 * anv_cmd_compute_state. These are used to track state which is bound to a 2623 * particular type of pipeline. Generic state that applies per-stage such as 2624 * binding table offsets and push constants is tracked generically with a 2625 * per-stage array in anv_cmd_state. 2626 */ 2627struct anv_cmd_pipeline_state { 2628 struct anv_descriptor_set *descriptors[MAX_SETS]; 2629 struct anv_push_descriptor_set *push_descriptors[MAX_SETS]; 2630 2631 struct anv_push_constants push_constants; 2632 2633 /* Push constant state allocated when flushing push constants. */ 2634 struct anv_state push_constants_state; 2635}; 2636 2637/** State tracking for graphics pipeline 2638 * 2639 * This has anv_cmd_pipeline_state as a base struct to track things which get 2640 * bound to a graphics pipeline. Along with general pipeline bind point state 2641 * which is in the anv_cmd_pipeline_state base struct, it also contains other 2642 * state which is graphics-specific. 2643 */ 2644struct anv_cmd_graphics_state { 2645 struct anv_cmd_pipeline_state base; 2646 2647 struct anv_graphics_pipeline *pipeline; 2648 2649 VkRenderingFlags rendering_flags; 2650 VkRect2D render_area; 2651 uint32_t layer_count; 2652 uint32_t samples; 2653 uint32_t view_mask; 2654 uint32_t color_att_count; 2655 struct anv_state att_states; 2656 struct anv_attachment color_att[MAX_RTS]; 2657 struct anv_attachment depth_att; 2658 struct anv_attachment stencil_att; 2659 struct anv_state null_surface_state; 2660 2661 anv_cmd_dirty_mask_t dirty; 2662 uint32_t vb_dirty; 2663 2664 struct anv_vb_cache_range ib_bound_range; 2665 struct anv_vb_cache_range ib_dirty_range; 2666 struct anv_vb_cache_range vb_bound_ranges[33]; 2667 struct anv_vb_cache_range vb_dirty_ranges[33]; 2668 2669 uint32_t restart_index; 2670 2671 VkShaderStageFlags push_constant_stages; 2672 2673 uint32_t primitive_topology; 2674 2675 struct anv_buffer *index_buffer; 2676 uint32_t index_type; /**< 3DSTATE_INDEX_BUFFER.IndexFormat */ 2677 uint32_t index_offset; 2678 2679 struct vk_sample_locations_state sample_locations; 2680}; 2681 2682enum anv_depth_reg_mode { 2683 ANV_DEPTH_REG_MODE_UNKNOWN = 0, 2684 ANV_DEPTH_REG_MODE_HW_DEFAULT, 2685 ANV_DEPTH_REG_MODE_D16_1X_MSAA, 2686}; 2687 2688/** State tracking for compute pipeline 2689 * 2690 * This has anv_cmd_pipeline_state as a base struct to track things which get 2691 * bound to a compute pipeline. Along with general pipeline bind point state 2692 * which is in the anv_cmd_pipeline_state base struct, it also contains other 2693 * state which is compute-specific. 2694 */ 2695struct anv_cmd_compute_state { 2696 struct anv_cmd_pipeline_state base; 2697 2698 struct anv_compute_pipeline *pipeline; 2699 2700 bool pipeline_dirty; 2701 2702 struct anv_state push_data; 2703 2704 struct anv_address num_workgroups; 2705}; 2706 2707struct anv_cmd_ray_tracing_state { 2708 struct anv_cmd_pipeline_state base; 2709 2710 struct anv_ray_tracing_pipeline *pipeline; 2711 2712 bool pipeline_dirty; 2713 2714 struct { 2715 struct anv_bo *bo; 2716 struct brw_rt_scratch_layout layout; 2717 } scratch; 2718}; 2719 2720/** State required while building cmd buffer */ 2721struct anv_cmd_state { 2722 /* PIPELINE_SELECT.PipelineSelection */ 2723 uint32_t current_pipeline; 2724 const struct intel_l3_config * current_l3_config; 2725 uint32_t last_aux_map_state; 2726 2727 struct anv_cmd_graphics_state gfx; 2728 struct anv_cmd_compute_state compute; 2729 struct anv_cmd_ray_tracing_state rt; 2730 2731 enum anv_pipe_bits pending_pipe_bits; 2732 VkShaderStageFlags descriptors_dirty; 2733 VkShaderStageFlags push_constants_dirty; 2734 2735 struct anv_vertex_binding vertex_bindings[MAX_VBS]; 2736 bool xfb_enabled; 2737 struct anv_xfb_binding xfb_bindings[MAX_XFB_BUFFERS]; 2738 struct anv_state binding_tables[MESA_VULKAN_SHADER_STAGES]; 2739 struct anv_state samplers[MESA_VULKAN_SHADER_STAGES]; 2740 2741 unsigned char sampler_sha1s[MESA_VULKAN_SHADER_STAGES][20]; 2742 unsigned char surface_sha1s[MESA_VULKAN_SHADER_STAGES][20]; 2743 unsigned char push_sha1s[MESA_VULKAN_SHADER_STAGES][20]; 2744 2745 /** 2746 * Whether or not the gfx8 PMA fix is enabled. We ensure that, at the top 2747 * of any command buffer it is disabled by disabling it in EndCommandBuffer 2748 * and before invoking the secondary in ExecuteCommands. 2749 */ 2750 bool pma_fix_enabled; 2751 2752 /** 2753 * Whether or not we know for certain that HiZ is enabled for the current 2754 * subpass. If, for whatever reason, we are unsure as to whether HiZ is 2755 * enabled or not, this will be false. 2756 */ 2757 bool hiz_enabled; 2758 2759 /* We ensure the registers for the gfx12 D16 fix are initialized at the 2760 * first non-NULL depth stencil packet emission of every command buffer. 2761 * For secondary command buffer execution, we transfer the state from the 2762 * last command buffer to the primary (if known). 2763 */ 2764 enum anv_depth_reg_mode depth_reg_mode; 2765 2766 /** 2767 * Whether RHWO optimization is enabled (Wa_1508744258). 2768 */ 2769 bool rhwo_optimization_enabled; 2770 2771 /** 2772 * Pending state of the RHWO optimization, to be applied at the next 2773 * genX(cmd_buffer_apply_pipe_flushes). 2774 */ 2775 bool pending_rhwo_optimization_enabled; 2776 2777 bool conditional_render_enabled; 2778 2779 /** 2780 * Last rendering scale argument provided to 2781 * genX(cmd_buffer_emit_hashing_mode)(). 2782 */ 2783 unsigned current_hash_scale; 2784 2785 /** 2786 * A buffer used for spill/fill of ray queries. 2787 */ 2788 struct anv_bo * ray_query_shadow_bo; 2789}; 2790 2791#define ANV_MIN_CMD_BUFFER_BATCH_SIZE 8192 2792#define ANV_MAX_CMD_BUFFER_BATCH_SIZE (16 * 1024 * 1024) 2793 2794enum anv_cmd_buffer_exec_mode { 2795 ANV_CMD_BUFFER_EXEC_MODE_PRIMARY, 2796 ANV_CMD_BUFFER_EXEC_MODE_EMIT, 2797 ANV_CMD_BUFFER_EXEC_MODE_GROW_AND_EMIT, 2798 ANV_CMD_BUFFER_EXEC_MODE_CHAIN, 2799 ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN, 2800 ANV_CMD_BUFFER_EXEC_MODE_CALL_AND_RETURN, 2801}; 2802 2803struct anv_measure_batch; 2804 2805struct anv_cmd_buffer { 2806 struct vk_command_buffer vk; 2807 2808 struct anv_device * device; 2809 struct anv_queue_family * queue_family; 2810 2811 struct anv_batch batch; 2812 2813 /* Pointer to the location in the batch where MI_BATCH_BUFFER_END was 2814 * recorded upon calling vkEndCommandBuffer(). This is useful if we need to 2815 * rewrite the end to chain multiple batch together at vkQueueSubmit(). 2816 */ 2817 void * batch_end; 2818 2819 /* Fields required for the actual chain of anv_batch_bo's. 2820 * 2821 * These fields are initialized by anv_cmd_buffer_init_batch_bo_chain(). 2822 */ 2823 struct list_head batch_bos; 2824 enum anv_cmd_buffer_exec_mode exec_mode; 2825 2826 /* A vector of anv_batch_bo pointers for every batch or surface buffer 2827 * referenced by this command buffer 2828 * 2829 * initialized by anv_cmd_buffer_init_batch_bo_chain() 2830 */ 2831 struct u_vector seen_bbos; 2832 2833 /* A vector of int32_t's for every block of binding tables. 2834 * 2835 * initialized by anv_cmd_buffer_init_batch_bo_chain() 2836 */ 2837 struct u_vector bt_block_states; 2838 struct anv_state bt_next; 2839 2840 struct anv_reloc_list surface_relocs; 2841 /** Last seen surface state block pool center bo offset */ 2842 uint32_t last_ss_pool_center; 2843 2844 /* Serial for tracking buffer completion */ 2845 uint32_t serial; 2846 2847 /* Stream objects for storing temporary data */ 2848 struct anv_state_stream surface_state_stream; 2849 struct anv_state_stream dynamic_state_stream; 2850 struct anv_state_stream general_state_stream; 2851 2852 VkCommandBufferUsageFlags usage_flags; 2853 2854 struct anv_query_pool *perf_query_pool; 2855 2856 struct anv_cmd_state state; 2857 2858 struct anv_address return_addr; 2859 2860 /* Set by SetPerformanceMarkerINTEL, written into queries by CmdBeginQuery */ 2861 uint64_t intel_perf_marker; 2862 2863 struct anv_measure_batch *measure; 2864 2865 /** 2866 * KHR_performance_query requires self modifying command buffers and this 2867 * array has the location of modifying commands to the query begin and end 2868 * instructions storing performance counters. The array length is 2869 * anv_physical_device::n_perf_query_commands. 2870 */ 2871 struct mi_address_token *self_mod_locations; 2872 2873 /** 2874 * Index tracking which of the self_mod_locations items have already been 2875 * used. 2876 */ 2877 uint32_t perf_reloc_idx; 2878 2879 /** 2880 * Sum of all the anv_batch_bo sizes allocated for this command buffer. 2881 * Used to increase allocation size for long command buffers. 2882 */ 2883 uint32_t total_batch_size; 2884 2885 /** 2886 * 2887 */ 2888 struct u_trace trace; 2889}; 2890 2891/* Determine whether we can chain a given cmd_buffer to another one. We need 2892 * softpin and we also need to make sure that we can edit the end of the batch 2893 * to point to next one, which requires the command buffer to not be used 2894 * simultaneously. 2895 */ 2896static inline bool 2897anv_cmd_buffer_is_chainable(struct anv_cmd_buffer *cmd_buffer) 2898{ 2899 return !anv_use_relocations(cmd_buffer->device->physical) && 2900 !(cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT); 2901} 2902 2903VkResult anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer); 2904void anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer); 2905void anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer); 2906void anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer); 2907void anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, 2908 struct anv_cmd_buffer *secondary); 2909void anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer); 2910VkResult anv_cmd_buffer_execbuf(struct anv_queue *queue, 2911 struct anv_cmd_buffer *cmd_buffer, 2912 const VkSemaphore *in_semaphores, 2913 const uint64_t *in_wait_values, 2914 uint32_t num_in_semaphores, 2915 const VkSemaphore *out_semaphores, 2916 const uint64_t *out_signal_values, 2917 uint32_t num_out_semaphores, 2918 VkFence fence, 2919 int perf_query_pass); 2920 2921VkResult anv_cmd_buffer_reset(struct anv_cmd_buffer *cmd_buffer); 2922 2923struct anv_state anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer, 2924 const void *data, uint32_t size, uint32_t alignment); 2925struct anv_state anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer, 2926 uint32_t *a, uint32_t *b, 2927 uint32_t dwords, uint32_t alignment); 2928 2929struct anv_address 2930anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer); 2931struct anv_state 2932anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer, 2933 uint32_t entries, uint32_t *state_offset); 2934struct anv_state 2935anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer); 2936struct anv_state 2937anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer, 2938 uint32_t size, uint32_t alignment); 2939 2940VkResult 2941anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer); 2942 2943void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); 2944 2945struct anv_state 2946anv_cmd_buffer_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer); 2947struct anv_state 2948anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer); 2949 2950VkResult 2951anv_cmd_buffer_alloc_blorp_binding_table(struct anv_cmd_buffer *cmd_buffer, 2952 uint32_t num_entries, 2953 uint32_t *state_offset, 2954 struct anv_state *bt_state); 2955 2956void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer); 2957 2958void anv_cmd_emit_conditional_render_predicate(struct anv_cmd_buffer *cmd_buffer); 2959 2960enum anv_bo_sync_state { 2961 /** Indicates that this is a new (or newly reset fence) */ 2962 ANV_BO_SYNC_STATE_RESET, 2963 2964 /** Indicates that this fence has been submitted to the GPU but is still 2965 * (as far as we know) in use by the GPU. 2966 */ 2967 ANV_BO_SYNC_STATE_SUBMITTED, 2968 2969 ANV_BO_SYNC_STATE_SIGNALED, 2970}; 2971 2972struct anv_bo_sync { 2973 struct vk_sync sync; 2974 2975 enum anv_bo_sync_state state; 2976 struct anv_bo *bo; 2977}; 2978 2979extern const struct vk_sync_type anv_bo_sync_type; 2980 2981static inline bool 2982vk_sync_is_anv_bo_sync(const struct vk_sync *sync) 2983{ 2984 return sync->type == &anv_bo_sync_type; 2985} 2986 2987VkResult anv_create_sync_for_memory(struct vk_device *device, 2988 VkDeviceMemory memory, 2989 bool signal_memory, 2990 struct vk_sync **sync_out); 2991 2992struct anv_event { 2993 struct vk_object_base base; 2994 uint64_t semaphore; 2995 struct anv_state state; 2996}; 2997 2998#define ANV_STAGE_MASK ((1 << MESA_VULKAN_SHADER_STAGES) - 1) 2999 3000#define anv_foreach_stage(stage, stage_bits) \ 3001 for (gl_shader_stage stage, \ 3002 __tmp = (gl_shader_stage)((stage_bits) & ANV_STAGE_MASK); \ 3003 stage = __builtin_ffs(__tmp) - 1, __tmp; \ 3004 __tmp &= ~(1 << (stage))) 3005 3006struct anv_pipeline_bind_map { 3007 unsigned char surface_sha1[20]; 3008 unsigned char sampler_sha1[20]; 3009 unsigned char push_sha1[20]; 3010 3011 uint32_t surface_count; 3012 uint32_t sampler_count; 3013 3014 struct anv_pipeline_binding * surface_to_descriptor; 3015 struct anv_pipeline_binding * sampler_to_descriptor; 3016 3017 struct anv_push_range push_ranges[4]; 3018}; 3019 3020struct anv_shader_bin { 3021 struct vk_pipeline_cache_object base; 3022 3023 gl_shader_stage stage; 3024 3025 struct anv_state kernel; 3026 uint32_t kernel_size; 3027 3028 const struct brw_stage_prog_data *prog_data; 3029 uint32_t prog_data_size; 3030 3031 struct brw_compile_stats stats[3]; 3032 uint32_t num_stats; 3033 3034 struct nir_xfb_info *xfb_info; 3035 3036 struct anv_pipeline_bind_map bind_map; 3037}; 3038 3039struct anv_shader_bin * 3040anv_shader_bin_create(struct anv_device *device, 3041 gl_shader_stage stage, 3042 const void *key, uint32_t key_size, 3043 const void *kernel, uint32_t kernel_size, 3044 const struct brw_stage_prog_data *prog_data, 3045 uint32_t prog_data_size, 3046 const struct brw_compile_stats *stats, uint32_t num_stats, 3047 const struct nir_xfb_info *xfb_info, 3048 const struct anv_pipeline_bind_map *bind_map); 3049 3050static inline void 3051anv_shader_bin_ref(struct anv_shader_bin *shader) 3052{ 3053 vk_pipeline_cache_object_ref(&shader->base); 3054} 3055 3056static inline void 3057anv_shader_bin_unref(struct anv_device *device, struct anv_shader_bin *shader) 3058{ 3059 vk_pipeline_cache_object_unref(&shader->base); 3060} 3061 3062#define anv_shader_bin_get_bsr(bin, local_arg_offset) ({ \ 3063 assert((local_arg_offset) % 8 == 0); \ 3064 const struct brw_bs_prog_data *prog_data = \ 3065 brw_bs_prog_data_const(bin->prog_data); \ 3066 assert(prog_data->simd_size == 8 || prog_data->simd_size == 16); \ 3067 \ 3068 (struct GFX_BINDLESS_SHADER_RECORD) { \ 3069 .OffsetToLocalArguments = (local_arg_offset) / 8, \ 3070 .BindlessShaderDispatchMode = \ 3071 prog_data->simd_size == 16 ? RT_SIMD16 : RT_SIMD8, \ 3072 .KernelStartPointer = bin->kernel.offset, \ 3073 }; \ 3074}) 3075 3076struct anv_pipeline_executable { 3077 gl_shader_stage stage; 3078 3079 struct brw_compile_stats stats; 3080 3081 char *nir; 3082 char *disasm; 3083}; 3084 3085enum anv_pipeline_type { 3086 ANV_PIPELINE_GRAPHICS, 3087 ANV_PIPELINE_COMPUTE, 3088 ANV_PIPELINE_RAY_TRACING, 3089}; 3090 3091struct anv_pipeline { 3092 struct vk_object_base base; 3093 3094 struct anv_device * device; 3095 3096 struct anv_batch batch; 3097 struct anv_reloc_list batch_relocs; 3098 3099 void * mem_ctx; 3100 3101 enum anv_pipeline_type type; 3102 VkPipelineCreateFlags flags; 3103 3104 uint32_t ray_queries; 3105 3106 struct util_dynarray executables; 3107 3108 const struct intel_l3_config * l3_config; 3109}; 3110 3111struct anv_graphics_pipeline { 3112 struct anv_pipeline base; 3113 3114 /* Shaders */ 3115 struct anv_shader_bin * shaders[ANV_GRAPHICS_SHADER_STAGE_COUNT]; 3116 3117 VkShaderStageFlags active_stages; 3118 3119 struct vk_sample_locations_state sample_locations; 3120 struct vk_dynamic_graphics_state dynamic_state; 3121 3122 /* These fields are required with dynamic primitive topology, 3123 * rasterization_samples used only with gen < 8. 3124 */ 3125 VkLineRasterizationModeEXT line_mode; 3126 VkPolygonMode polygon_mode; 3127 uint32_t patch_control_points; 3128 uint32_t rasterization_samples; 3129 3130 VkColorComponentFlags color_comp_writes[MAX_RTS]; 3131 3132 uint32_t view_mask; 3133 uint32_t instance_multiplier; 3134 3135 bool depth_clamp_enable; 3136 bool depth_clip_enable; 3137 bool kill_pixel; 3138 bool force_fragment_thread_dispatch; 3139 bool negative_one_to_one; 3140 3141 /* When primitive replication is used, subpass->view_mask will describe what 3142 * views to replicate. 3143 */ 3144 bool use_primitive_replication; 3145 3146 uint32_t vb_used; 3147 struct anv_pipeline_vertex_binding { 3148 uint32_t stride; 3149 bool instanced; 3150 uint32_t instance_divisor; 3151 } vb[MAX_VBS]; 3152 3153 /* Pre computed CS instructions that can directly be copied into 3154 * anv_cmd_buffer. 3155 */ 3156 uint32_t batch_data[512]; 3157 3158 /* Pre packed CS instructions & structures that need to be merged later 3159 * with dynamic state. 3160 */ 3161 struct { 3162 uint32_t sf[7]; 3163 uint32_t clip[4]; 3164 uint32_t xfb_bo_pitch[4]; 3165 uint32_t wm[3]; 3166 uint32_t blend_state[MAX_RTS * 2]; 3167 uint32_t streamout_state[3]; 3168 } gfx7; 3169 3170 struct { 3171 uint32_t sf[4]; 3172 uint32_t raster[5]; 3173 uint32_t wm[2]; 3174 uint32_t ps_blend[2]; 3175 uint32_t blend_state[1 + MAX_RTS * 2]; 3176 uint32_t streamout_state[5]; 3177 } gfx8; 3178}; 3179 3180struct anv_compute_pipeline { 3181 struct anv_pipeline base; 3182 3183 struct anv_shader_bin * cs; 3184 uint32_t batch_data[9]; 3185 uint32_t interface_descriptor_data[8]; 3186}; 3187 3188struct anv_rt_shader_group { 3189 VkRayTracingShaderGroupTypeKHR type; 3190 3191 struct anv_shader_bin *general; 3192 struct anv_shader_bin *closest_hit; 3193 struct anv_shader_bin *any_hit; 3194 struct anv_shader_bin *intersection; 3195 3196 /* VK_KHR_ray_tracing requires shaderGroupHandleSize == 32 */ 3197 uint32_t handle[8]; 3198}; 3199 3200struct anv_ray_tracing_pipeline { 3201 struct anv_pipeline base; 3202 3203 /* All shaders in the pipeline */ 3204 struct util_dynarray shaders; 3205 3206 uint32_t group_count; 3207 struct anv_rt_shader_group * groups; 3208 3209 /* If non-zero, this is the default computed stack size as per the stack 3210 * size computation in the Vulkan spec. If zero, that indicates that the 3211 * client has requested a dynamic stack size. 3212 */ 3213 uint32_t stack_size; 3214}; 3215 3216#define ANV_DECL_PIPELINE_DOWNCAST(pipe_type, pipe_enum) \ 3217 static inline struct anv_##pipe_type##_pipeline * \ 3218 anv_pipeline_to_##pipe_type(struct anv_pipeline *pipeline) \ 3219 { \ 3220 assert(pipeline->type == pipe_enum); \ 3221 return (struct anv_##pipe_type##_pipeline *) pipeline; \ 3222 } 3223 3224ANV_DECL_PIPELINE_DOWNCAST(graphics, ANV_PIPELINE_GRAPHICS) 3225ANV_DECL_PIPELINE_DOWNCAST(compute, ANV_PIPELINE_COMPUTE) 3226ANV_DECL_PIPELINE_DOWNCAST(ray_tracing, ANV_PIPELINE_RAY_TRACING) 3227 3228static inline bool 3229anv_pipeline_has_stage(const struct anv_graphics_pipeline *pipeline, 3230 gl_shader_stage stage) 3231{ 3232 return (pipeline->active_stages & mesa_to_vk_shader_stage(stage)) != 0; 3233} 3234 3235static inline bool 3236anv_pipeline_is_primitive(const struct anv_graphics_pipeline *pipeline) 3237{ 3238 return anv_pipeline_has_stage(pipeline, MESA_SHADER_VERTEX); 3239} 3240 3241static inline bool 3242anv_pipeline_is_mesh(const struct anv_graphics_pipeline *pipeline) 3243{ 3244 return anv_pipeline_has_stage(pipeline, MESA_SHADER_MESH); 3245} 3246 3247static inline bool 3248anv_cmd_buffer_all_color_write_masked(const struct anv_cmd_buffer *cmd_buffer) 3249{ 3250 const struct anv_cmd_graphics_state *state = &cmd_buffer->state.gfx; 3251 const struct vk_dynamic_graphics_state *dyn = 3252 &cmd_buffer->vk.dynamic_graphics_state; 3253 uint8_t color_writes = dyn->cb.color_write_enables; 3254 3255 /* All writes disabled through vkCmdSetColorWriteEnableEXT */ 3256 if ((color_writes & ((1u << state->color_att_count) - 1)) == 0) 3257 return true; 3258 3259 /* Or all write masks are empty */ 3260 for (uint32_t i = 0; i < state->color_att_count; i++) { 3261 if (state->pipeline->color_comp_writes[i] != 0) 3262 return false; 3263 } 3264 3265 return true; 3266} 3267 3268#define ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(prefix, stage) \ 3269static inline const struct brw_##prefix##_prog_data * \ 3270get_##prefix##_prog_data(const struct anv_graphics_pipeline *pipeline) \ 3271{ \ 3272 if (anv_pipeline_has_stage(pipeline, stage)) { \ 3273 return (const struct brw_##prefix##_prog_data *) \ 3274 pipeline->shaders[stage]->prog_data; \ 3275 } else { \ 3276 return NULL; \ 3277 } \ 3278} 3279 3280ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(vs, MESA_SHADER_VERTEX) 3281ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(tcs, MESA_SHADER_TESS_CTRL) 3282ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(tes, MESA_SHADER_TESS_EVAL) 3283ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(gs, MESA_SHADER_GEOMETRY) 3284ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(wm, MESA_SHADER_FRAGMENT) 3285ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(mesh, MESA_SHADER_MESH) 3286ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(task, MESA_SHADER_TASK) 3287 3288static inline const struct brw_cs_prog_data * 3289get_cs_prog_data(const struct anv_compute_pipeline *pipeline) 3290{ 3291 assert(pipeline->cs); 3292 return (const struct brw_cs_prog_data *) pipeline->cs->prog_data; 3293} 3294 3295static inline const struct brw_vue_prog_data * 3296anv_pipeline_get_last_vue_prog_data(const struct anv_graphics_pipeline *pipeline) 3297{ 3298 if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) 3299 return &get_gs_prog_data(pipeline)->base; 3300 else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) 3301 return &get_tes_prog_data(pipeline)->base; 3302 else 3303 return &get_vs_prog_data(pipeline)->base; 3304} 3305 3306VkResult 3307anv_device_init_rt_shaders(struct anv_device *device); 3308 3309void 3310anv_device_finish_rt_shaders(struct anv_device *device); 3311 3312VkResult 3313anv_pipeline_init(struct anv_pipeline *pipeline, 3314 struct anv_device *device, 3315 enum anv_pipeline_type type, 3316 VkPipelineCreateFlags flags, 3317 const VkAllocationCallbacks *pAllocator); 3318 3319void 3320anv_pipeline_finish(struct anv_pipeline *pipeline, 3321 struct anv_device *device, 3322 const VkAllocationCallbacks *pAllocator); 3323 3324struct anv_format_plane { 3325 enum isl_format isl_format:16; 3326 struct isl_swizzle swizzle; 3327 3328 /* Whether this plane contains chroma channels */ 3329 bool has_chroma; 3330 3331 /* For downscaling of YUV planes */ 3332 uint8_t denominator_scales[2]; 3333 3334 /* How to map sampled ycbcr planes to a single 4 component element. */ 3335 struct isl_swizzle ycbcr_swizzle; 3336 3337 /* What aspect is associated to this plane */ 3338 VkImageAspectFlags aspect; 3339}; 3340 3341 3342struct anv_format { 3343 struct anv_format_plane planes[3]; 3344 VkFormat vk_format; 3345 uint8_t n_planes; 3346 bool can_ycbcr; 3347}; 3348 3349static inline void 3350anv_assert_valid_aspect_set(VkImageAspectFlags aspects) 3351{ 3352 if (util_bitcount(aspects) == 1) { 3353 assert(aspects & (VK_IMAGE_ASPECT_COLOR_BIT | 3354 VK_IMAGE_ASPECT_DEPTH_BIT | 3355 VK_IMAGE_ASPECT_STENCIL_BIT | 3356 VK_IMAGE_ASPECT_PLANE_0_BIT | 3357 VK_IMAGE_ASPECT_PLANE_1_BIT | 3358 VK_IMAGE_ASPECT_PLANE_2_BIT)); 3359 } else if (aspects & VK_IMAGE_ASPECT_PLANES_BITS_ANV) { 3360 assert(aspects == VK_IMAGE_ASPECT_PLANE_0_BIT || 3361 aspects == (VK_IMAGE_ASPECT_PLANE_0_BIT | 3362 VK_IMAGE_ASPECT_PLANE_1_BIT) || 3363 aspects == (VK_IMAGE_ASPECT_PLANE_0_BIT | 3364 VK_IMAGE_ASPECT_PLANE_1_BIT | 3365 VK_IMAGE_ASPECT_PLANE_2_BIT)); 3366 } else { 3367 assert(aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | 3368 VK_IMAGE_ASPECT_STENCIL_BIT)); 3369 } 3370} 3371 3372/** 3373 * Return the aspect's plane relative to all_aspects. For an image, for 3374 * instance, all_aspects would be the set of aspects in the image. For 3375 * an image view, all_aspects would be the subset of aspects represented 3376 * by that particular view. 3377 */ 3378static inline uint32_t 3379anv_aspect_to_plane(VkImageAspectFlags all_aspects, 3380 VkImageAspectFlagBits aspect) 3381{ 3382 anv_assert_valid_aspect_set(all_aspects); 3383 assert(util_bitcount(aspect) == 1); 3384 assert(!(aspect & ~all_aspects)); 3385 3386 /* Because we always put image and view planes in aspect-bit-order, the 3387 * plane index is the number of bits in all_aspects before aspect. 3388 */ 3389 return util_bitcount(all_aspects & (aspect - 1)); 3390} 3391 3392#define anv_foreach_image_aspect_bit(b, image, aspects) \ 3393 u_foreach_bit(b, vk_image_expand_aspect_mask(&(image)->vk, aspects)) 3394 3395const struct anv_format * 3396anv_get_format(VkFormat format); 3397 3398static inline uint32_t 3399anv_get_format_planes(VkFormat vk_format) 3400{ 3401 const struct anv_format *format = anv_get_format(vk_format); 3402 3403 return format != NULL ? format->n_planes : 0; 3404} 3405 3406struct anv_format_plane 3407anv_get_format_plane(const struct intel_device_info *devinfo, 3408 VkFormat vk_format, uint32_t plane, 3409 VkImageTiling tiling); 3410 3411struct anv_format_plane 3412anv_get_format_aspect(const struct intel_device_info *devinfo, 3413 VkFormat vk_format, 3414 VkImageAspectFlagBits aspect, VkImageTiling tiling); 3415 3416static inline enum isl_format 3417anv_get_isl_format(const struct intel_device_info *devinfo, VkFormat vk_format, 3418 VkImageAspectFlags aspect, VkImageTiling tiling) 3419{ 3420 return anv_get_format_aspect(devinfo, vk_format, aspect, tiling).isl_format; 3421} 3422 3423bool anv_formats_ccs_e_compatible(const struct intel_device_info *devinfo, 3424 VkImageCreateFlags create_flags, 3425 VkFormat vk_format, VkImageTiling vk_tiling, 3426 VkImageUsageFlags vk_usage, 3427 const VkImageFormatListCreateInfo *fmt_list); 3428 3429extern VkFormat 3430vk_format_from_android(unsigned android_format, unsigned android_usage); 3431 3432static inline struct isl_swizzle 3433anv_swizzle_for_render(struct isl_swizzle swizzle) 3434{ 3435 /* Sometimes the swizzle will have alpha map to one. We do this to fake 3436 * RGB as RGBA for texturing 3437 */ 3438 assert(swizzle.a == ISL_CHANNEL_SELECT_ONE || 3439 swizzle.a == ISL_CHANNEL_SELECT_ALPHA); 3440 3441 /* But it doesn't matter what we render to that channel */ 3442 swizzle.a = ISL_CHANNEL_SELECT_ALPHA; 3443 3444 return swizzle; 3445} 3446 3447void 3448anv_pipeline_setup_l3_config(struct anv_pipeline *pipeline, bool needs_slm); 3449 3450/** 3451 * Describes how each part of anv_image will be bound to memory. 3452 */ 3453struct anv_image_memory_range { 3454 /** 3455 * Disjoint bindings into which each portion of the image will be bound. 3456 * 3457 * Binding images to memory can be complicated and invold binding different 3458 * portions of the image to different memory objects or regions. For most 3459 * images, everything lives in the MAIN binding and gets bound by 3460 * vkBindImageMemory. For disjoint multi-planar images, each plane has 3461 * a unique, disjoint binding and gets bound by vkBindImageMemory2 with 3462 * VkBindImagePlaneMemoryInfo. There may also exist bits of memory which are 3463 * implicit or driver-managed and live in special-case bindings. 3464 */ 3465 enum anv_image_memory_binding { 3466 /** 3467 * Used if and only if image is not multi-planar disjoint. Bound by 3468 * vkBindImageMemory2 without VkBindImagePlaneMemoryInfo. 3469 */ 3470 ANV_IMAGE_MEMORY_BINDING_MAIN, 3471 3472 /** 3473 * Used if and only if image is multi-planar disjoint. Bound by 3474 * vkBindImageMemory2 with VkBindImagePlaneMemoryInfo. 3475 */ 3476 ANV_IMAGE_MEMORY_BINDING_PLANE_0, 3477 ANV_IMAGE_MEMORY_BINDING_PLANE_1, 3478 ANV_IMAGE_MEMORY_BINDING_PLANE_2, 3479 3480 /** 3481 * Driver-private bo. In special cases we may store the aux surface and/or 3482 * aux state in this binding. 3483 */ 3484 ANV_IMAGE_MEMORY_BINDING_PRIVATE, 3485 3486 /** Sentinel */ 3487 ANV_IMAGE_MEMORY_BINDING_END, 3488 } binding; 3489 3490 /** 3491 * Offset is relative to the start of the binding created by 3492 * vkBindImageMemory, not to the start of the bo. 3493 */ 3494 uint64_t offset; 3495 3496 uint64_t size; 3497 uint32_t alignment; 3498}; 3499 3500/** 3501 * Subsurface of an anv_image. 3502 */ 3503struct anv_surface { 3504 struct isl_surf isl; 3505 struct anv_image_memory_range memory_range; 3506}; 3507 3508static inline bool MUST_CHECK 3509anv_surface_is_valid(const struct anv_surface *surface) 3510{ 3511 return surface->isl.size_B > 0 && surface->memory_range.size > 0; 3512} 3513 3514struct anv_image { 3515 struct vk_image vk; 3516 3517 uint32_t n_planes; 3518 3519 /** 3520 * Image has multi-planar format and was created with 3521 * VK_IMAGE_CREATE_DISJOINT_BIT. 3522 */ 3523 bool disjoint; 3524 3525 /** 3526 * Image was imported from an struct AHardwareBuffer. We have to delay 3527 * final image creation until bind time. 3528 */ 3529 bool from_ahb; 3530 3531 /** 3532 * Image was imported from gralloc with VkNativeBufferANDROID. The gralloc bo 3533 * must be released when the image is destroyed. 3534 */ 3535 bool from_gralloc; 3536 3537 /** 3538 * The memory bindings created by vkCreateImage and vkBindImageMemory. 3539 * 3540 * For details on the image's memory layout, see check_memory_bindings(). 3541 * 3542 * vkCreateImage constructs the `memory_range` for each 3543 * anv_image_memory_binding. After vkCreateImage, each binding is valid if 3544 * and only if `memory_range::size > 0`. 3545 * 3546 * vkBindImageMemory binds each valid `memory_range` to an `address`. 3547 * Usually, the app will provide the address via the parameters of 3548 * vkBindImageMemory. However, special-case bindings may be bound to 3549 * driver-private memory. 3550 */ 3551 struct anv_image_binding { 3552 struct anv_image_memory_range memory_range; 3553 struct anv_address address; 3554 } bindings[ANV_IMAGE_MEMORY_BINDING_END]; 3555 3556 /** 3557 * Image subsurfaces 3558 * 3559 * For each foo, anv_image::planes[x].surface is valid if and only if 3560 * anv_image::aspects has a x aspect. Refer to anv_image_aspect_to_plane() 3561 * to figure the number associated with a given aspect. 3562 * 3563 * The hardware requires that the depth buffer and stencil buffer be 3564 * separate surfaces. From Vulkan's perspective, though, depth and stencil 3565 * reside in the same VkImage. To satisfy both the hardware and Vulkan, we 3566 * allocate the depth and stencil buffers as separate surfaces in the same 3567 * bo. 3568 */ 3569 struct anv_image_plane { 3570 struct anv_surface primary_surface; 3571 3572 /** 3573 * A surface which shadows the main surface and may have different 3574 * tiling. This is used for sampling using a tiling that isn't supported 3575 * for other operations. 3576 */ 3577 struct anv_surface shadow_surface; 3578 3579 /** 3580 * The base aux usage for this image. For color images, this can be 3581 * either CCS_E or CCS_D depending on whether or not we can reliably 3582 * leave CCS on all the time. 3583 */ 3584 enum isl_aux_usage aux_usage; 3585 3586 struct anv_surface aux_surface; 3587 3588 /** Location of the fast clear state. */ 3589 struct anv_image_memory_range fast_clear_memory_range; 3590 3591 /** 3592 * Whether this image can be fast cleared with non-zero clear colors. 3593 * This can happen with mutable images when formats of different bit 3594 * sizes per components are used. 3595 * 3596 * On Gfx9+, because the clear colors are stored as a 4 components 32bit 3597 * values, we can clear in R16G16_UNORM (store 2 16bit values in the 3598 * components 0 & 1 of the clear color) and then draw in R32_UINT which 3599 * would interpret the clear color as a single component value, using 3600 * only the first 16bit component of the previous written clear color. 3601 * 3602 * On Gfx7/7.5/8, only CC_ZERO/CC_ONE clear colors are supported, this 3603 * boolean will prevent the usage of CC_ONE. 3604 */ 3605 bool can_non_zero_fast_clear; 3606 } planes[3]; 3607}; 3608 3609static inline bool 3610anv_image_is_externally_shared(const struct anv_image *image) 3611{ 3612 return image->vk.drm_format_mod != DRM_FORMAT_MOD_INVALID || 3613 image->vk.external_handle_types != 0; 3614} 3615 3616static inline bool 3617anv_image_has_private_binding(const struct anv_image *image) 3618{ 3619 const struct anv_image_binding private_binding = 3620 image->bindings[ANV_IMAGE_MEMORY_BINDING_PRIVATE]; 3621 return private_binding.memory_range.size != 0; 3622} 3623 3624/* The ordering of this enum is important */ 3625enum anv_fast_clear_type { 3626 /** Image does not have/support any fast-clear blocks */ 3627 ANV_FAST_CLEAR_NONE = 0, 3628 /** Image has/supports fast-clear but only to the default value */ 3629 ANV_FAST_CLEAR_DEFAULT_VALUE = 1, 3630 /** Image has/supports fast-clear with an arbitrary fast-clear value */ 3631 ANV_FAST_CLEAR_ANY = 2, 3632}; 3633 3634/** 3635 * Return the aspect's _format_ plane, not its _memory_ plane (using the 3636 * vocabulary of VK_EXT_image_drm_format_modifier). As a consequence, \a 3637 * aspect_mask may contain VK_IMAGE_ASPECT_PLANE_*, but must not contain 3638 * VK_IMAGE_ASPECT_MEMORY_PLANE_* . 3639 */ 3640static inline uint32_t 3641anv_image_aspect_to_plane(const struct anv_image *image, 3642 VkImageAspectFlagBits aspect) 3643{ 3644 return anv_aspect_to_plane(image->vk.aspects, aspect); 3645} 3646 3647/* Returns the number of auxiliary buffer levels attached to an image. */ 3648static inline uint8_t 3649anv_image_aux_levels(const struct anv_image * const image, 3650 VkImageAspectFlagBits aspect) 3651{ 3652 uint32_t plane = anv_image_aspect_to_plane(image, aspect); 3653 if (image->planes[plane].aux_usage == ISL_AUX_USAGE_NONE) 3654 return 0; 3655 3656 return image->vk.mip_levels; 3657} 3658 3659/* Returns the number of auxiliary buffer layers attached to an image. */ 3660static inline uint32_t 3661anv_image_aux_layers(const struct anv_image * const image, 3662 VkImageAspectFlagBits aspect, 3663 const uint8_t miplevel) 3664{ 3665 assert(image); 3666 3667 /* The miplevel must exist in the main buffer. */ 3668 assert(miplevel < image->vk.mip_levels); 3669 3670 if (miplevel >= anv_image_aux_levels(image, aspect)) { 3671 /* There are no layers with auxiliary data because the miplevel has no 3672 * auxiliary data. 3673 */ 3674 return 0; 3675 } 3676 3677 return MAX2(image->vk.array_layers, image->vk.extent.depth >> miplevel); 3678} 3679 3680static inline struct anv_address MUST_CHECK 3681anv_image_address(const struct anv_image *image, 3682 const struct anv_image_memory_range *mem_range) 3683{ 3684 const struct anv_image_binding *binding = &image->bindings[mem_range->binding]; 3685 assert(binding->memory_range.offset == 0); 3686 3687 if (mem_range->size == 0) 3688 return ANV_NULL_ADDRESS; 3689 3690 return anv_address_add(binding->address, mem_range->offset); 3691} 3692 3693static inline struct anv_address 3694anv_image_get_clear_color_addr(UNUSED const struct anv_device *device, 3695 const struct anv_image *image, 3696 VkImageAspectFlagBits aspect) 3697{ 3698 assert(image->vk.aspects & (VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV | 3699 VK_IMAGE_ASPECT_DEPTH_BIT)); 3700 3701 uint32_t plane = anv_image_aspect_to_plane(image, aspect); 3702 const struct anv_image_memory_range *mem_range = 3703 &image->planes[plane].fast_clear_memory_range; 3704 3705 return anv_image_address(image, mem_range); 3706} 3707 3708static inline struct anv_address 3709anv_image_get_fast_clear_type_addr(const struct anv_device *device, 3710 const struct anv_image *image, 3711 VkImageAspectFlagBits aspect) 3712{ 3713 struct anv_address addr = 3714 anv_image_get_clear_color_addr(device, image, aspect); 3715 3716 const unsigned clear_color_state_size = device->info.ver >= 10 ? 3717 device->isl_dev.ss.clear_color_state_size : 3718 device->isl_dev.ss.clear_value_size; 3719 return anv_address_add(addr, clear_color_state_size); 3720} 3721 3722static inline struct anv_address 3723anv_image_get_compression_state_addr(const struct anv_device *device, 3724 const struct anv_image *image, 3725 VkImageAspectFlagBits aspect, 3726 uint32_t level, uint32_t array_layer) 3727{ 3728 assert(level < anv_image_aux_levels(image, aspect)); 3729 assert(array_layer < anv_image_aux_layers(image, aspect, level)); 3730 UNUSED uint32_t plane = anv_image_aspect_to_plane(image, aspect); 3731 assert(image->planes[plane].aux_usage == ISL_AUX_USAGE_CCS_E); 3732 3733 /* Relative to start of the plane's fast clear memory range */ 3734 uint32_t offset; 3735 3736 offset = 4; /* Go past the fast clear type */ 3737 3738 if (image->vk.image_type == VK_IMAGE_TYPE_3D) { 3739 for (uint32_t l = 0; l < level; l++) 3740 offset += anv_minify(image->vk.extent.depth, l) * 4; 3741 } else { 3742 offset += level * image->vk.array_layers * 4; 3743 } 3744 3745 offset += array_layer * 4; 3746 3747 assert(offset < image->planes[plane].fast_clear_memory_range.size); 3748 3749 return anv_address_add( 3750 anv_image_get_fast_clear_type_addr(device, image, aspect), 3751 offset); 3752} 3753 3754/* Returns true if a HiZ-enabled depth buffer can be sampled from. */ 3755static inline bool 3756anv_can_sample_with_hiz(const struct intel_device_info * const devinfo, 3757 const struct anv_image *image) 3758{ 3759 if (!(image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) 3760 return false; 3761 3762 /* For Gfx8-11, there are some restrictions around sampling from HiZ. 3763 * The Skylake PRM docs for RENDER_SURFACE_STATE::AuxiliarySurfaceMode 3764 * say: 3765 * 3766 * "If this field is set to AUX_HIZ, Number of Multisamples must 3767 * be MULTISAMPLECOUNT_1, and Surface Type cannot be SURFTYPE_3D." 3768 */ 3769 if (image->vk.image_type == VK_IMAGE_TYPE_3D) 3770 return false; 3771 3772 /* Allow this feature on BDW even though it is disabled in the BDW devinfo 3773 * struct. There's documentation which suggests that this feature actually 3774 * reduces performance on BDW, but it has only been observed to help so 3775 * far. Sampling fast-cleared blocks on BDW must also be handled with care 3776 * (see depth_stencil_attachment_compute_aux_usage() for more info). 3777 */ 3778 if (devinfo->ver != 8 && !devinfo->has_sample_with_hiz) 3779 return false; 3780 3781 return image->vk.samples == 1; 3782} 3783 3784/* Returns true if an MCS-enabled buffer can be sampled from. */ 3785static inline bool 3786anv_can_sample_mcs_with_clear(const struct intel_device_info * const devinfo, 3787 const struct anv_image *image) 3788{ 3789 assert(image->vk.aspects == VK_IMAGE_ASPECT_COLOR_BIT); 3790 const uint32_t plane = 3791 anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_COLOR_BIT); 3792 3793 assert(isl_aux_usage_has_mcs(image->planes[plane].aux_usage)); 3794 3795 const struct anv_surface *anv_surf = &image->planes[plane].primary_surface; 3796 3797 /* On TGL, the sampler has an issue with some 8 and 16bpp MSAA fast clears. 3798 * See HSD 1707282275, wa_14013111325. Due to the use of 3799 * format-reinterpretation, a simplified workaround is implemented. 3800 */ 3801 if (devinfo->ver >= 12 && 3802 isl_format_get_layout(anv_surf->isl.format)->bpb <= 16) { 3803 return false; 3804 } 3805 3806 return true; 3807} 3808 3809static inline bool 3810anv_image_plane_uses_aux_map(const struct anv_device *device, 3811 const struct anv_image *image, 3812 uint32_t plane) 3813{ 3814 return device->info.has_aux_map && 3815 isl_aux_usage_has_ccs(image->planes[plane].aux_usage); 3816} 3817 3818void 3819anv_cmd_buffer_mark_image_written(struct anv_cmd_buffer *cmd_buffer, 3820 const struct anv_image *image, 3821 VkImageAspectFlagBits aspect, 3822 enum isl_aux_usage aux_usage, 3823 uint32_t level, 3824 uint32_t base_layer, 3825 uint32_t layer_count); 3826 3827void 3828anv_image_clear_color(struct anv_cmd_buffer *cmd_buffer, 3829 const struct anv_image *image, 3830 VkImageAspectFlagBits aspect, 3831 enum isl_aux_usage aux_usage, 3832 enum isl_format format, struct isl_swizzle swizzle, 3833 uint32_t level, uint32_t base_layer, uint32_t layer_count, 3834 VkRect2D area, union isl_color_value clear_color); 3835void 3836anv_image_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer, 3837 const struct anv_image *image, 3838 VkImageAspectFlags aspects, 3839 enum isl_aux_usage depth_aux_usage, 3840 uint32_t level, 3841 uint32_t base_layer, uint32_t layer_count, 3842 VkRect2D area, 3843 float depth_value, uint8_t stencil_value); 3844void 3845anv_image_msaa_resolve(struct anv_cmd_buffer *cmd_buffer, 3846 const struct anv_image *src_image, 3847 enum isl_aux_usage src_aux_usage, 3848 uint32_t src_level, uint32_t src_base_layer, 3849 const struct anv_image *dst_image, 3850 enum isl_aux_usage dst_aux_usage, 3851 uint32_t dst_level, uint32_t dst_base_layer, 3852 VkImageAspectFlagBits aspect, 3853 uint32_t src_x, uint32_t src_y, 3854 uint32_t dst_x, uint32_t dst_y, 3855 uint32_t width, uint32_t height, 3856 uint32_t layer_count, 3857 enum blorp_filter filter); 3858void 3859anv_image_hiz_op(struct anv_cmd_buffer *cmd_buffer, 3860 const struct anv_image *image, 3861 VkImageAspectFlagBits aspect, uint32_t level, 3862 uint32_t base_layer, uint32_t layer_count, 3863 enum isl_aux_op hiz_op); 3864void 3865anv_image_hiz_clear(struct anv_cmd_buffer *cmd_buffer, 3866 const struct anv_image *image, 3867 VkImageAspectFlags aspects, 3868 uint32_t level, 3869 uint32_t base_layer, uint32_t layer_count, 3870 VkRect2D area, uint8_t stencil_value); 3871void 3872anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer, 3873 const struct anv_image *image, 3874 enum isl_format format, struct isl_swizzle swizzle, 3875 VkImageAspectFlagBits aspect, 3876 uint32_t base_layer, uint32_t layer_count, 3877 enum isl_aux_op mcs_op, union isl_color_value *clear_value, 3878 bool predicate); 3879void 3880anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer, 3881 const struct anv_image *image, 3882 enum isl_format format, struct isl_swizzle swizzle, 3883 VkImageAspectFlagBits aspect, uint32_t level, 3884 uint32_t base_layer, uint32_t layer_count, 3885 enum isl_aux_op ccs_op, union isl_color_value *clear_value, 3886 bool predicate); 3887 3888void 3889anv_image_copy_to_shadow(struct anv_cmd_buffer *cmd_buffer, 3890 const struct anv_image *image, 3891 VkImageAspectFlagBits aspect, 3892 uint32_t base_level, uint32_t level_count, 3893 uint32_t base_layer, uint32_t layer_count); 3894 3895enum isl_aux_state ATTRIBUTE_PURE 3896anv_layout_to_aux_state(const struct intel_device_info * const devinfo, 3897 const struct anv_image *image, 3898 const VkImageAspectFlagBits aspect, 3899 const VkImageLayout layout); 3900 3901enum isl_aux_usage ATTRIBUTE_PURE 3902anv_layout_to_aux_usage(const struct intel_device_info * const devinfo, 3903 const struct anv_image *image, 3904 const VkImageAspectFlagBits aspect, 3905 const VkImageUsageFlagBits usage, 3906 const VkImageLayout layout); 3907 3908enum anv_fast_clear_type ATTRIBUTE_PURE 3909anv_layout_to_fast_clear_type(const struct intel_device_info * const devinfo, 3910 const struct anv_image * const image, 3911 const VkImageAspectFlagBits aspect, 3912 const VkImageLayout layout); 3913 3914static inline bool 3915anv_image_aspects_compatible(VkImageAspectFlags aspects1, 3916 VkImageAspectFlags aspects2) 3917{ 3918 if (aspects1 == aspects2) 3919 return true; 3920 3921 /* Only 1 color aspects are compatibles. */ 3922 if ((aspects1 & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) != 0 && 3923 (aspects2 & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) != 0 && 3924 util_bitcount(aspects1) == util_bitcount(aspects2)) 3925 return true; 3926 3927 return false; 3928} 3929 3930struct anv_image_view { 3931 struct vk_image_view vk; 3932 3933 const struct anv_image *image; /**< VkImageViewCreateInfo::image */ 3934 3935 unsigned n_planes; 3936 struct { 3937 uint32_t image_plane; 3938 3939 struct isl_view isl; 3940 3941 /** 3942 * RENDER_SURFACE_STATE when using image as a sampler surface with an 3943 * image layout of SHADER_READ_ONLY_OPTIMAL or 3944 * DEPTH_STENCIL_READ_ONLY_OPTIMAL. 3945 */ 3946 struct anv_surface_state optimal_sampler_surface_state; 3947 3948 /** 3949 * RENDER_SURFACE_STATE when using image as a sampler surface with an 3950 * image layout of GENERAL. 3951 */ 3952 struct anv_surface_state general_sampler_surface_state; 3953 3954 /** 3955 * RENDER_SURFACE_STATE when using image as a storage image. Separate 3956 * states for vanilla (with the original format) and one which has been 3957 * lowered to a format suitable for reading. This may be a raw surface 3958 * in extreme cases or simply a surface with a different format where we 3959 * expect some conversion to be done in the shader. 3960 */ 3961 struct anv_surface_state storage_surface_state; 3962 struct anv_surface_state lowered_storage_surface_state; 3963 3964 struct brw_image_param lowered_storage_image_param; 3965 } planes[3]; 3966}; 3967 3968enum anv_image_view_state_flags { 3969 ANV_IMAGE_VIEW_STATE_STORAGE_LOWERED = (1 << 0), 3970 ANV_IMAGE_VIEW_STATE_TEXTURE_OPTIMAL = (1 << 1), 3971}; 3972 3973void anv_image_fill_surface_state(struct anv_device *device, 3974 const struct anv_image *image, 3975 VkImageAspectFlagBits aspect, 3976 const struct isl_view *view, 3977 isl_surf_usage_flags_t view_usage, 3978 enum isl_aux_usage aux_usage, 3979 const union isl_color_value *clear_color, 3980 enum anv_image_view_state_flags flags, 3981 struct anv_surface_state *state_inout, 3982 struct brw_image_param *image_param_out); 3983 3984struct anv_image_create_info { 3985 const VkImageCreateInfo *vk_info; 3986 3987 /** An opt-in bitmask which filters an ISL-mapping of the Vulkan tiling. */ 3988 isl_tiling_flags_t isl_tiling_flags; 3989 3990 /** These flags will be added to any derived from VkImageCreateInfo. */ 3991 isl_surf_usage_flags_t isl_extra_usage_flags; 3992}; 3993 3994VkResult anv_image_init(struct anv_device *device, struct anv_image *image, 3995 const struct anv_image_create_info *create_info); 3996 3997void anv_image_finish(struct anv_image *image); 3998 3999void anv_image_get_memory_requirements(struct anv_device *device, 4000 struct anv_image *image, 4001 VkImageAspectFlags aspects, 4002 VkMemoryRequirements2 *pMemoryRequirements); 4003 4004enum isl_format 4005anv_isl_format_for_descriptor_type(const struct anv_device *device, 4006 VkDescriptorType type); 4007 4008static inline uint32_t 4009anv_rasterization_aa_mode(VkPolygonMode raster_mode, 4010 VkLineRasterizationModeEXT line_mode) 4011{ 4012 if (raster_mode == VK_POLYGON_MODE_LINE && 4013 line_mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT) 4014 return true; 4015 return false; 4016} 4017 4018VkFormatFeatureFlags2 4019anv_get_image_format_features2(const struct intel_device_info *devinfo, 4020 VkFormat vk_format, 4021 const struct anv_format *anv_format, 4022 VkImageTiling vk_tiling, 4023 const struct isl_drm_modifier_info *isl_mod_info); 4024 4025void anv_fill_buffer_surface_state(struct anv_device *device, 4026 struct anv_state state, 4027 enum isl_format format, 4028 struct isl_swizzle swizzle, 4029 isl_surf_usage_flags_t usage, 4030 struct anv_address address, 4031 uint32_t range, uint32_t stride); 4032 4033 4034/* Haswell border color is a bit of a disaster. Float and unorm formats use a 4035 * straightforward 32-bit float color in the first 64 bytes. Instead of using 4036 * a nice float/integer union like Gfx8+, Haswell specifies the integer border 4037 * color as a separate entry /after/ the float color. The layout of this entry 4038 * also depends on the format's bpp (with extra hacks for RG32), and overlaps. 4039 * 4040 * Since we don't know the format/bpp, we can't make any of the border colors 4041 * containing '1' work for all formats, as it would be in the wrong place for 4042 * some of them. We opt to make 32-bit integers work as this seems like the 4043 * most common option. Fortunately, transparent black works regardless, as 4044 * all zeroes is the same in every bit-size. 4045 */ 4046struct hsw_border_color { 4047 float float32[4]; 4048 uint32_t _pad0[12]; 4049 uint32_t uint32[4]; 4050 uint32_t _pad1[108]; 4051}; 4052 4053struct gfx8_border_color { 4054 union { 4055 float float32[4]; 4056 uint32_t uint32[4]; 4057 }; 4058 /* Pad out to 64 bytes */ 4059 uint32_t _pad[12]; 4060}; 4061 4062struct anv_ycbcr_conversion { 4063 struct vk_object_base base; 4064 4065 const struct anv_format * format; 4066 VkSamplerYcbcrModelConversion ycbcr_model; 4067 VkSamplerYcbcrRange ycbcr_range; 4068 VkComponentSwizzle mapping[4]; 4069 VkChromaLocation chroma_offsets[2]; 4070 VkFilter chroma_filter; 4071 bool chroma_reconstruction; 4072}; 4073 4074struct anv_sampler { 4075 struct vk_object_base base; 4076 4077 uint32_t state[3][4]; 4078 uint32_t n_planes; 4079 struct anv_ycbcr_conversion *conversion; 4080 4081 /* Blob of sampler state data which is guaranteed to be 32-byte aligned 4082 * and with a 32-byte stride for use as bindless samplers. 4083 */ 4084 struct anv_state bindless_state; 4085 4086 struct anv_state custom_border_color; 4087}; 4088 4089#define ANV_PIPELINE_STATISTICS_MASK 0x000007ff 4090 4091struct anv_query_pool { 4092 struct vk_object_base base; 4093 4094 VkQueryType type; 4095 VkQueryPipelineStatisticFlags pipeline_statistics; 4096 /** Stride between slots, in bytes */ 4097 uint32_t stride; 4098 /** Number of slots in this query pool */ 4099 uint32_t slots; 4100 struct anv_bo * bo; 4101 4102 /* KHR perf queries : */ 4103 uint32_t pass_size; 4104 uint32_t data_offset; 4105 uint32_t snapshot_size; 4106 uint32_t n_counters; 4107 struct intel_perf_counter_pass *counter_pass; 4108 uint32_t n_passes; 4109 struct intel_perf_query_info **pass_query; 4110}; 4111 4112static inline uint32_t khr_perf_query_preamble_offset(const struct anv_query_pool *pool, 4113 uint32_t pass) 4114{ 4115 return pool->pass_size * pass + 8; 4116} 4117 4118struct anv_acceleration_structure { 4119 struct vk_object_base base; 4120 4121 VkDeviceSize size; 4122 struct anv_address address; 4123}; 4124 4125int anv_get_instance_entrypoint_index(const char *name); 4126int anv_get_device_entrypoint_index(const char *name); 4127int anv_get_physical_device_entrypoint_index(const char *name); 4128 4129const char *anv_get_instance_entry_name(int index); 4130const char *anv_get_physical_device_entry_name(int index); 4131const char *anv_get_device_entry_name(int index); 4132 4133bool 4134anv_instance_entrypoint_is_enabled(int index, uint32_t core_version, 4135 const struct vk_instance_extension_table *instance); 4136bool 4137anv_physical_device_entrypoint_is_enabled(int index, uint32_t core_version, 4138 const struct vk_instance_extension_table *instance); 4139bool 4140anv_device_entrypoint_is_enabled(int index, uint32_t core_version, 4141 const struct vk_instance_extension_table *instance, 4142 const struct vk_device_extension_table *device); 4143 4144const struct vk_device_dispatch_table * 4145anv_get_device_dispatch_table(const struct intel_device_info *devinfo); 4146 4147void 4148anv_dump_pipe_bits(enum anv_pipe_bits bits); 4149 4150static inline void 4151anv_add_pending_pipe_bits(struct anv_cmd_buffer* cmd_buffer, 4152 enum anv_pipe_bits bits, 4153 const char* reason) 4154{ 4155 cmd_buffer->state.pending_pipe_bits |= bits; 4156 if (INTEL_DEBUG(DEBUG_PIPE_CONTROL) && bits) 4157 { 4158 fputs("pc: add ", stderr); 4159 anv_dump_pipe_bits(bits); 4160 fprintf(stderr, "reason: %s\n", reason); 4161 } 4162} 4163 4164struct anv_performance_configuration_intel { 4165 struct vk_object_base base; 4166 4167 struct intel_perf_registers *register_config; 4168 4169 uint64_t config_id; 4170}; 4171 4172void anv_physical_device_init_perf(struct anv_physical_device *device, int fd); 4173void anv_device_perf_init(struct anv_device *device); 4174void anv_perf_write_pass_results(struct intel_perf_config *perf, 4175 struct anv_query_pool *pool, uint32_t pass, 4176 const struct intel_perf_query_result *accumulated_results, 4177 union VkPerformanceCounterResultKHR *results); 4178 4179/* Use to emit a series of memcpy operations */ 4180struct anv_memcpy_state { 4181 struct anv_device *device; 4182 struct anv_batch *batch; 4183 4184 struct anv_vb_cache_range vb_bound; 4185 struct anv_vb_cache_range vb_dirty; 4186}; 4187 4188struct anv_utrace_flush_copy { 4189 /* Needs to be the first field */ 4190 struct intel_ds_flush_data ds; 4191 4192 /* Batch stuff to implement of copy of timestamps recorded in another 4193 * buffer. 4194 */ 4195 struct anv_reloc_list relocs; 4196 struct anv_batch batch; 4197 struct anv_bo *batch_bo; 4198 4199 /* Buffer of 64bits timestamps */ 4200 struct anv_bo *trace_bo; 4201 4202 /* Syncobj to be signaled when the batch completes */ 4203 struct vk_sync *sync; 4204 4205 /* Queue on which all the recorded traces are submitted */ 4206 struct anv_queue *queue; 4207 4208 struct anv_memcpy_state memcpy_state; 4209}; 4210 4211void anv_device_utrace_init(struct anv_device *device); 4212void anv_device_utrace_finish(struct anv_device *device); 4213VkResult 4214anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue, 4215 uint32_t cmd_buffer_count, 4216 struct anv_cmd_buffer **cmd_buffers, 4217 struct anv_utrace_flush_copy **out_flush_data); 4218 4219#ifdef HAVE_PERFETTO 4220void anv_perfetto_init(void); 4221uint64_t anv_perfetto_begin_submit(struct anv_queue *queue); 4222void anv_perfetto_end_submit(struct anv_queue *queue, uint32_t submission_id, 4223 uint64_t start_ts); 4224#else 4225static inline void anv_perfetto_init(void) 4226{ 4227} 4228static inline uint64_t anv_perfetto_begin_submit(struct anv_queue *queue) 4229{ 4230 return 0; 4231} 4232static inline void anv_perfetto_end_submit(struct anv_queue *queue, 4233 uint32_t submission_id, 4234 uint64_t start_ts) 4235{} 4236#endif 4237 4238 4239#define ANV_FROM_HANDLE(__anv_type, __name, __handle) \ 4240 VK_FROM_HANDLE(__anv_type, __name, __handle) 4241 4242VK_DEFINE_HANDLE_CASTS(anv_cmd_buffer, vk.base, VkCommandBuffer, 4243 VK_OBJECT_TYPE_COMMAND_BUFFER) 4244VK_DEFINE_HANDLE_CASTS(anv_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE) 4245VK_DEFINE_HANDLE_CASTS(anv_instance, vk.base, VkInstance, VK_OBJECT_TYPE_INSTANCE) 4246VK_DEFINE_HANDLE_CASTS(anv_physical_device, vk.base, VkPhysicalDevice, 4247 VK_OBJECT_TYPE_PHYSICAL_DEVICE) 4248VK_DEFINE_HANDLE_CASTS(anv_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE) 4249 4250VK_DEFINE_NONDISP_HANDLE_CASTS(anv_acceleration_structure, base, 4251 VkAccelerationStructureKHR, 4252 VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR) 4253VK_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer, vk.base, VkBuffer, 4254 VK_OBJECT_TYPE_BUFFER) 4255VK_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer_view, base, VkBufferView, 4256 VK_OBJECT_TYPE_BUFFER_VIEW) 4257VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_pool, base, VkDescriptorPool, 4258 VK_OBJECT_TYPE_DESCRIPTOR_POOL) 4259VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set, base, VkDescriptorSet, 4260 VK_OBJECT_TYPE_DESCRIPTOR_SET) 4261VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set_layout, base, 4262 VkDescriptorSetLayout, 4263 VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT) 4264VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_update_template, base, 4265 VkDescriptorUpdateTemplate, 4266 VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE) 4267VK_DEFINE_NONDISP_HANDLE_CASTS(anv_device_memory, base, VkDeviceMemory, 4268 VK_OBJECT_TYPE_DEVICE_MEMORY) 4269VK_DEFINE_NONDISP_HANDLE_CASTS(anv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT) 4270VK_DEFINE_NONDISP_HANDLE_CASTS(anv_image, vk.base, VkImage, VK_OBJECT_TYPE_IMAGE) 4271VK_DEFINE_NONDISP_HANDLE_CASTS(anv_image_view, vk.base, VkImageView, 4272 VK_OBJECT_TYPE_IMAGE_VIEW); 4273VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline, base, VkPipeline, 4274 VK_OBJECT_TYPE_PIPELINE) 4275VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_layout, base, VkPipelineLayout, 4276 VK_OBJECT_TYPE_PIPELINE_LAYOUT) 4277VK_DEFINE_NONDISP_HANDLE_CASTS(anv_query_pool, base, VkQueryPool, 4278 VK_OBJECT_TYPE_QUERY_POOL) 4279VK_DEFINE_NONDISP_HANDLE_CASTS(anv_sampler, base, VkSampler, 4280 VK_OBJECT_TYPE_SAMPLER) 4281VK_DEFINE_NONDISP_HANDLE_CASTS(anv_ycbcr_conversion, base, 4282 VkSamplerYcbcrConversion, 4283 VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION) 4284VK_DEFINE_NONDISP_HANDLE_CASTS(anv_performance_configuration_intel, base, 4285 VkPerformanceConfigurationINTEL, 4286 VK_OBJECT_TYPE_PERFORMANCE_CONFIGURATION_INTEL) 4287 4288#define anv_genX(devinfo, thing) ({ \ 4289 __typeof(&gfx9_##thing) genX_thing; \ 4290 switch ((devinfo)->verx10) { \ 4291 case 70: \ 4292 genX_thing = &gfx7_##thing; \ 4293 break; \ 4294 case 75: \ 4295 genX_thing = &gfx75_##thing; \ 4296 break; \ 4297 case 80: \ 4298 genX_thing = &gfx8_##thing; \ 4299 break; \ 4300 case 90: \ 4301 genX_thing = &gfx9_##thing; \ 4302 break; \ 4303 case 110: \ 4304 genX_thing = &gfx11_##thing; \ 4305 break; \ 4306 case 120: \ 4307 genX_thing = &gfx12_##thing; \ 4308 break; \ 4309 case 125: \ 4310 genX_thing = &gfx125_##thing; \ 4311 break; \ 4312 default: \ 4313 unreachable("Unknown hardware generation"); \ 4314 } \ 4315 genX_thing; \ 4316}) 4317 4318/* Gen-specific function declarations */ 4319#ifdef genX 4320# include "anv_genX.h" 4321#else 4322# define genX(x) gfx7_##x 4323# include "anv_genX.h" 4324# undef genX 4325# define genX(x) gfx75_##x 4326# include "anv_genX.h" 4327# undef genX 4328# define genX(x) gfx8_##x 4329# include "anv_genX.h" 4330# undef genX 4331# define genX(x) gfx9_##x 4332# include "anv_genX.h" 4333# undef genX 4334# define genX(x) gfx11_##x 4335# include "anv_genX.h" 4336# undef genX 4337# define genX(x) gfx12_##x 4338# include "anv_genX.h" 4339# undef genX 4340# define genX(x) gfx125_##x 4341# include "anv_genX.h" 4342# undef genX 4343#endif 4344 4345#endif /* ANV_PRIVATE_H */ 4346