1/* 2 * Copyright © 2019 Raspberry Pi Ltd 3 * 4 * based in part on anv driver which is: 5 * Copyright © 2015 Intel Corporation 6 * 7 * based in part on radv driver which is: 8 * Copyright © 2016 Red Hat. 9 * Copyright © 2016 Bas Nieuwenhuizen 10 * 11 * Permission is hereby granted, free of charge, to any person obtaining a 12 * copy of this software and associated documentation files (the "Software"), 13 * to deal in the Software without restriction, including without limitation 14 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 15 * and/or sell copies of the Software, and to permit persons to whom the 16 * Software is furnished to do so, subject to the following conditions: 17 * 18 * The above copyright notice and this permission notice (including the next 19 * paragraph) shall be included in all copies or substantial portions of the 20 * Software. 21 * 22 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 23 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 24 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 25 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 26 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 27 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 28 * IN THE SOFTWARE. 29 */ 30#ifndef V3DV_PRIVATE_H 31#define V3DV_PRIVATE_H 32 33#include <stdio.h> 34#include <string.h> 35#include <vulkan/vulkan.h> 36#include <vulkan/vk_icd.h> 37#include <vk_enum_to_str.h> 38 39#include "vk_device.h" 40#include "vk_format.h" 41#include "vk_instance.h" 42#include "vk_image.h" 43#include "vk_log.h" 44#include "vk_physical_device.h" 45#include "vk_shader_module.h" 46#include "vk_sync.h" 47#include "vk_sync_timeline.h" 48#include "vk_util.h" 49 50#include "vk_command_buffer.h" 51#include "vk_command_pool.h" 52#include "vk_queue.h" 53 54#include <xf86drm.h> 55 56#ifdef HAVE_VALGRIND 57#include <valgrind.h> 58#include <memcheck.h> 59#define VG(x) x 60#else 61#define VG(x) ((void)0) 62#endif 63 64#include "v3dv_limits.h" 65 66#include "common/v3d_device_info.h" 67#include "common/v3d_limits.h" 68#include "common/v3d_tiling.h" 69#include "common/v3d_util.h" 70 71#include "compiler/shader_enums.h" 72#include "compiler/spirv/nir_spirv.h" 73 74#include "compiler/v3d_compiler.h" 75 76#include "vk_debug_report.h" 77#include "util/set.h" 78#include "util/hash_table.h" 79#include "util/sparse_array.h" 80#include "util/xmlconfig.h" 81#include "u_atomic.h" 82 83#include "v3dv_entrypoints.h" 84#include "v3dv_bo.h" 85 86#include "drm-uapi/v3d_drm.h" 87 88#include "vk_alloc.h" 89#include "simulator/v3d_simulator.h" 90 91#include "v3dv_cl.h" 92 93#include "wsi_common.h" 94 95/* A non-fatal assert. Useful for debugging. */ 96#ifdef DEBUG 97#define v3dv_assert(x) ({ \ 98 if (unlikely(!(x))) \ 99 fprintf(stderr, "%s:%d ASSERT: %s", __FILE__, __LINE__, #x); \ 100}) 101#else 102#define v3dv_assert(x) 103#endif 104 105#define perf_debug(...) do { \ 106 if (unlikely(V3D_DEBUG & V3D_DEBUG_PERF)) \ 107 fprintf(stderr, __VA_ARGS__); \ 108} while (0) 109 110struct v3dv_instance; 111 112#ifdef USE_V3D_SIMULATOR 113#define using_v3d_simulator true 114#else 115#define using_v3d_simulator false 116#endif 117 118struct v3d_simulator_file; 119 120/* Minimum required by the Vulkan 1.1 spec */ 121#define MAX_MEMORY_ALLOCATION_SIZE (1ull << 30) 122 123struct v3dv_physical_device { 124 struct vk_physical_device vk; 125 126 char *name; 127 int32_t render_fd; 128 int32_t display_fd; 129 int32_t master_fd; 130 131 /* We need these because it is not clear how to detect 132 * valid devids in a portable way 133 */ 134 bool has_primary; 135 bool has_render; 136 137 dev_t primary_devid; 138 dev_t render_devid; 139 140#if using_v3d_simulator 141 uint32_t device_id; 142#endif 143 144 uint8_t driver_build_sha1[20]; 145 uint8_t pipeline_cache_uuid[VK_UUID_SIZE]; 146 uint8_t device_uuid[VK_UUID_SIZE]; 147 uint8_t driver_uuid[VK_UUID_SIZE]; 148 149 struct vk_sync_type drm_syncobj_type; 150 struct vk_sync_timeline_type sync_timeline_type; 151 const struct vk_sync_type *sync_types[3]; 152 153 struct disk_cache *disk_cache; 154 155 mtx_t mutex; 156 157 struct wsi_device wsi_device; 158 159 VkPhysicalDeviceMemoryProperties memory; 160 161 struct v3d_device_info devinfo; 162 163 struct v3d_simulator_file *sim_file; 164 165 const struct v3d_compiler *compiler; 166 uint32_t next_program_id; 167 168 /* This array holds all our 'struct v3dv_bo' allocations. We use this 169 * so we can add a refcount to our BOs and check if a particular BO 170 * was already allocated in this device using its GEM handle. This is 171 * necessary to properly manage BO imports, because the kernel doesn't 172 * refcount the underlying BO memory. 173 * 174 * Specifically, when self-importing (i.e. importing a BO into the same 175 * device that created it), the kernel will give us the same BO handle 176 * for both BOs and we must only free it once when both references are 177 * freed. Otherwise, if we are not self-importing, we get two differnt BO 178 * handles, and we want to free each one individually. 179 * 180 * The BOs in this map all have a refcnt with the referece counter and 181 * only self-imported BOs will ever have a refcnt > 1. 182 */ 183 struct util_sparse_array bo_map; 184 185 struct { 186 bool merge_jobs; 187 } options; 188 189 struct { 190 bool multisync; 191 bool perfmon; 192 } caps; 193}; 194 195VkResult v3dv_physical_device_acquire_display(struct v3dv_instance *instance, 196 struct v3dv_physical_device *pdevice, 197 VkIcdSurfaceBase *surface); 198 199static inline struct v3dv_bo * 200v3dv_device_lookup_bo(struct v3dv_physical_device *device, uint32_t handle) 201{ 202 return (struct v3dv_bo *) util_sparse_array_get(&device->bo_map, handle); 203} 204 205VkResult v3dv_wsi_init(struct v3dv_physical_device *physical_device); 206void v3dv_wsi_finish(struct v3dv_physical_device *physical_device); 207struct v3dv_image *v3dv_wsi_get_image_from_swapchain(VkSwapchainKHR swapchain, 208 uint32_t index); 209 210void v3dv_meta_clear_init(struct v3dv_device *device); 211void v3dv_meta_clear_finish(struct v3dv_device *device); 212 213void v3dv_meta_blit_init(struct v3dv_device *device); 214void v3dv_meta_blit_finish(struct v3dv_device *device); 215 216void v3dv_meta_texel_buffer_copy_init(struct v3dv_device *device); 217void v3dv_meta_texel_buffer_copy_finish(struct v3dv_device *device); 218 219bool v3dv_meta_can_use_tlb(struct v3dv_image *image, 220 const VkOffset3D *offset, 221 VkFormat *compat_format); 222 223struct v3dv_instance { 224 struct vk_instance vk; 225 226 int physicalDeviceCount; 227 struct v3dv_physical_device physicalDevice; 228 229 bool pipeline_cache_enabled; 230 bool default_pipeline_cache_enabled; 231}; 232 233/* FIXME: In addition to tracking the last job submitted by GPU queue (cl, csd, 234 * tfu), we still need a syncobj to track the last overall job submitted 235 * (V3DV_QUEUE_ANY) for the case we don't support multisync. Someday we can 236 * start expecting multisync to be present and drop the legacy implementation 237 * together with this V3DV_QUEUE_ANY tracker. 238 */ 239enum v3dv_queue_type { 240 V3DV_QUEUE_CL = 0, 241 V3DV_QUEUE_CSD, 242 V3DV_QUEUE_TFU, 243 V3DV_QUEUE_ANY, 244 V3DV_QUEUE_COUNT, 245}; 246 247/* For each GPU queue, we use a syncobj to track the last job submitted. We 248 * set the flag `first` to determine when we are starting a new cmd buffer 249 * batch and therefore a job submitted to a given queue will be the first in a 250 * cmd buf batch. 251 */ 252struct v3dv_last_job_sync { 253 /* If the job is the first submitted to a GPU queue in a cmd buffer batch. 254 * 255 * We use V3DV_QUEUE_{CL,CSD,TFU} both with and without multisync. 256 */ 257 bool first[V3DV_QUEUE_COUNT]; 258 /* Array of syncobj to track the last job submitted to a GPU queue. 259 * 260 * With multisync we use V3DV_QUEUE_{CL,CSD,TFU} to track syncobjs for each 261 * queue, but without multisync we only track the last job submitted to any 262 * queue in V3DV_QUEUE_ANY. 263 */ 264 uint32_t syncs[V3DV_QUEUE_COUNT]; 265}; 266 267struct v3dv_queue { 268 struct vk_queue vk; 269 270 struct v3dv_device *device; 271 272 struct v3dv_last_job_sync last_job_syncs; 273 274 struct v3dv_job *noop_job; 275 276 /* The last active perfmon ID to prevent mixing of counter results when a 277 * job is submitted with a different perfmon id. 278 */ 279 uint32_t last_perfmon_id; 280}; 281 282VkResult v3dv_queue_driver_submit(struct vk_queue *vk_queue, 283 struct vk_queue_submit *submit); 284 285#define V3DV_META_BLIT_CACHE_KEY_SIZE (4 * sizeof(uint32_t)) 286#define V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE (3 * sizeof(uint32_t) + \ 287 sizeof(VkComponentMapping)) 288 289struct v3dv_meta_color_clear_pipeline { 290 VkPipeline pipeline; 291 VkRenderPass pass; 292 bool cached; 293 uint64_t key; 294}; 295 296struct v3dv_meta_depth_clear_pipeline { 297 VkPipeline pipeline; 298 uint64_t key; 299}; 300 301struct v3dv_meta_blit_pipeline { 302 VkPipeline pipeline; 303 VkRenderPass pass; 304 VkRenderPass pass_no_load; 305 uint8_t key[V3DV_META_BLIT_CACHE_KEY_SIZE]; 306}; 307 308struct v3dv_meta_texel_buffer_copy_pipeline { 309 VkPipeline pipeline; 310 VkRenderPass pass; 311 VkRenderPass pass_no_load; 312 uint8_t key[V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE]; 313}; 314 315struct v3dv_pipeline_key { 316 bool robust_buffer_access; 317 uint8_t topology; 318 uint8_t logicop_func; 319 bool msaa; 320 bool sample_coverage; 321 bool sample_alpha_to_coverage; 322 bool sample_alpha_to_one; 323 uint8_t cbufs; 324 struct { 325 enum pipe_format format; 326 uint8_t swizzle[4]; 327 } color_fmt[V3D_MAX_DRAW_BUFFERS]; 328 uint8_t f32_color_rb; 329 uint32_t va_swap_rb_mask; 330 bool has_multiview; 331}; 332 333struct v3dv_pipeline_cache_stats { 334 uint32_t miss; 335 uint32_t hit; 336 uint32_t count; 337 uint32_t on_disk_hit; 338}; 339 340/* Equivalent to gl_shader_stage, but including the coordinate shaders 341 * 342 * FIXME: perhaps move to common 343 */ 344enum broadcom_shader_stage { 345 BROADCOM_SHADER_VERTEX, 346 BROADCOM_SHADER_VERTEX_BIN, 347 BROADCOM_SHADER_GEOMETRY, 348 BROADCOM_SHADER_GEOMETRY_BIN, 349 BROADCOM_SHADER_FRAGMENT, 350 BROADCOM_SHADER_COMPUTE, 351}; 352 353#define BROADCOM_SHADER_STAGES (BROADCOM_SHADER_COMPUTE + 1) 354 355/* Assumes that coordinate shaders will be custom-handled by the caller */ 356static inline enum broadcom_shader_stage 357gl_shader_stage_to_broadcom(gl_shader_stage stage) 358{ 359 switch (stage) { 360 case MESA_SHADER_VERTEX: 361 return BROADCOM_SHADER_VERTEX; 362 case MESA_SHADER_GEOMETRY: 363 return BROADCOM_SHADER_GEOMETRY; 364 case MESA_SHADER_FRAGMENT: 365 return BROADCOM_SHADER_FRAGMENT; 366 case MESA_SHADER_COMPUTE: 367 return BROADCOM_SHADER_COMPUTE; 368 default: 369 unreachable("Unknown gl shader stage"); 370 } 371} 372 373static inline gl_shader_stage 374broadcom_shader_stage_to_gl(enum broadcom_shader_stage stage) 375{ 376 switch (stage) { 377 case BROADCOM_SHADER_VERTEX: 378 case BROADCOM_SHADER_VERTEX_BIN: 379 return MESA_SHADER_VERTEX; 380 case BROADCOM_SHADER_GEOMETRY: 381 case BROADCOM_SHADER_GEOMETRY_BIN: 382 return MESA_SHADER_GEOMETRY; 383 case BROADCOM_SHADER_FRAGMENT: 384 return MESA_SHADER_FRAGMENT; 385 case BROADCOM_SHADER_COMPUTE: 386 return MESA_SHADER_COMPUTE; 387 default: 388 unreachable("Unknown broadcom shader stage"); 389 } 390} 391 392static inline bool 393broadcom_shader_stage_is_binning(enum broadcom_shader_stage stage) 394{ 395 switch (stage) { 396 case BROADCOM_SHADER_VERTEX_BIN: 397 case BROADCOM_SHADER_GEOMETRY_BIN: 398 return true; 399 default: 400 return false; 401 } 402} 403 404static inline bool 405broadcom_shader_stage_is_render_with_binning(enum broadcom_shader_stage stage) 406{ 407 switch (stage) { 408 case BROADCOM_SHADER_VERTEX: 409 case BROADCOM_SHADER_GEOMETRY: 410 return true; 411 default: 412 return false; 413 } 414} 415 416static inline enum broadcom_shader_stage 417broadcom_binning_shader_stage_for_render_stage(enum broadcom_shader_stage stage) 418{ 419 switch (stage) { 420 case BROADCOM_SHADER_VERTEX: 421 return BROADCOM_SHADER_VERTEX_BIN; 422 case BROADCOM_SHADER_GEOMETRY: 423 return BROADCOM_SHADER_GEOMETRY_BIN; 424 default: 425 unreachable("Invalid shader stage"); 426 } 427} 428 429static inline const char * 430broadcom_shader_stage_name(enum broadcom_shader_stage stage) 431{ 432 switch(stage) { 433 case BROADCOM_SHADER_VERTEX_BIN: 434 return "MESA_SHADER_VERTEX_BIN"; 435 case BROADCOM_SHADER_GEOMETRY_BIN: 436 return "MESA_SHADER_GEOMETRY_BIN"; 437 default: 438 return gl_shader_stage_name(broadcom_shader_stage_to_gl(stage)); 439 } 440} 441 442struct v3dv_pipeline_cache { 443 struct vk_object_base base; 444 445 struct v3dv_device *device; 446 mtx_t mutex; 447 448 struct hash_table *nir_cache; 449 struct v3dv_pipeline_cache_stats nir_stats; 450 451 struct hash_table *cache; 452 struct v3dv_pipeline_cache_stats stats; 453 454 /* For VK_EXT_pipeline_creation_cache_control. */ 455 bool externally_synchronized; 456}; 457 458struct v3dv_device { 459 struct vk_device vk; 460 461 struct v3dv_instance *instance; 462 struct v3dv_physical_device *pdevice; 463 464 struct v3d_device_info devinfo; 465 struct v3dv_queue queue; 466 467 /* Guards query->maybe_available and value for timestamps */ 468 mtx_t query_mutex; 469 470 /* Signaled whenever a query is ended */ 471 cnd_t query_ended; 472 473 /* Resources used for meta operations */ 474 struct { 475 mtx_t mtx; 476 struct { 477 VkPipelineLayout p_layout; 478 struct hash_table *cache; /* v3dv_meta_color_clear_pipeline */ 479 } color_clear; 480 struct { 481 VkPipelineLayout p_layout; 482 struct hash_table *cache; /* v3dv_meta_depth_clear_pipeline */ 483 } depth_clear; 484 struct { 485 VkDescriptorSetLayout ds_layout; 486 VkPipelineLayout p_layout; 487 struct hash_table *cache[3]; /* v3dv_meta_blit_pipeline for 1d, 2d, 3d */ 488 } blit; 489 struct { 490 VkDescriptorSetLayout ds_layout; 491 VkPipelineLayout p_layout; 492 struct hash_table *cache[3]; /* v3dv_meta_texel_buffer_copy_pipeline for 1d, 2d, 3d */ 493 } texel_buffer_copy; 494 } meta; 495 496 struct v3dv_bo_cache { 497 /** List of struct v3d_bo freed, by age. */ 498 struct list_head time_list; 499 /** List of struct v3d_bo freed, per size, by age. */ 500 struct list_head *size_list; 501 uint32_t size_list_size; 502 503 mtx_t lock; 504 505 uint32_t cache_size; 506 uint32_t cache_count; 507 uint32_t max_cache_size; 508 } bo_cache; 509 510 uint32_t bo_size; 511 uint32_t bo_count; 512 513 struct v3dv_pipeline_cache default_pipeline_cache; 514 515 /* GL_SHADER_STATE_RECORD needs to speficy default attribute values. The 516 * following covers the most common case, that is all attributes format 517 * being float being float, allowing us to reuse the same BO for all 518 * pipelines matching this requirement. Pipelines that need integer 519 * attributes will create their own BO. 520 */ 521 struct v3dv_bo *default_attribute_float; 522 VkPhysicalDeviceFeatures features; 523 524 void *device_address_mem_ctx; 525 struct util_dynarray device_address_bo_list; /* Array of struct v3dv_bo * */ 526 527#ifdef ANDROID 528 const void *gralloc; 529 enum { 530 V3DV_GRALLOC_UNKNOWN, 531 V3DV_GRALLOC_CROS, 532 V3DV_GRALLOC_OTHER, 533 } gralloc_type; 534#endif 535}; 536 537struct v3dv_device_memory { 538 struct vk_object_base base; 539 540 struct v3dv_bo *bo; 541 const VkMemoryType *type; 542 bool is_for_wsi; 543 bool is_for_device_address; 544}; 545 546#define V3D_OUTPUT_IMAGE_FORMAT_NO 255 547#define TEXTURE_DATA_FORMAT_NO 255 548 549struct v3dv_format { 550 bool supported; 551 552 /* One of V3D33_OUTPUT_IMAGE_FORMAT_*, or OUTPUT_IMAGE_FORMAT_NO */ 553 uint8_t rt_type; 554 555 /* One of V3D33_TEXTURE_DATA_FORMAT_*. */ 556 uint8_t tex_type; 557 558 /* Swizzle to apply to the RGBA shader output for storing to the tile 559 * buffer, to the RGBA tile buffer to produce shader input (for 560 * blending), and for turning the rgba8888 texture sampler return 561 * value into shader rgba values. 562 */ 563 uint8_t swizzle[4]; 564 565 /* Whether the return value is 16F/I/UI or 32F/I/UI. */ 566 uint8_t return_size; 567 568 /* If the format supports (linear) filtering when texturing. */ 569 bool supports_filtering; 570}; 571 572struct v3d_resource_slice { 573 uint32_t offset; 574 uint32_t stride; 575 uint32_t padded_height; 576 /* Size of a single pane of the slice. For 3D textures, there will be 577 * a number of panes equal to the minified, power-of-two-aligned 578 * depth. 579 */ 580 uint32_t size; 581 uint8_t ub_pad; 582 enum v3d_tiling_mode tiling; 583 uint32_t padded_height_of_output_image_in_uif_blocks; 584}; 585 586bool v3dv_format_swizzle_needs_rb_swap(const uint8_t *swizzle); 587bool v3dv_format_swizzle_needs_reverse(const uint8_t *swizzle); 588 589struct v3dv_image { 590 struct vk_image vk; 591 592 const struct v3dv_format *format; 593 uint32_t cpp; 594 bool tiled; 595 596 struct v3d_resource_slice slices[V3D_MAX_MIP_LEVELS]; 597 uint64_t size; /* Total size in bytes */ 598 uint32_t cube_map_stride; 599 600 struct v3dv_device_memory *mem; 601 VkDeviceSize mem_offset; 602 uint32_t alignment; 603 604#ifdef ANDROID 605 /* Image is backed by VK_ANDROID_native_buffer, */ 606 bool is_native_buffer_memory; 607#endif 608}; 609 610VkImageViewType v3dv_image_type_to_view_type(VkImageType type); 611 612/* Pre-generating packets needs to consider changes in packet sizes across hw 613 * versions. Keep things simple and allocate enough space for any supported 614 * version. We ensure the size is large enough through static asserts. 615 */ 616#define V3DV_TEXTURE_SHADER_STATE_LENGTH 32 617#define V3DV_SAMPLER_STATE_LENGTH 24 618#define V3DV_BLEND_CFG_LENGTH 5 619#define V3DV_CFG_BITS_LENGTH 4 620#define V3DV_GL_SHADER_STATE_RECORD_LENGTH 36 621#define V3DV_VCM_CACHE_SIZE_LENGTH 2 622#define V3DV_GL_SHADER_STATE_ATTRIBUTE_RECORD_LENGTH 16 623#define V3DV_STENCIL_CFG_LENGTH 6 624 625struct v3dv_image_view { 626 struct vk_image_view vk; 627 628 const struct v3dv_format *format; 629 bool swap_rb; 630 bool channel_reverse; 631 uint32_t internal_bpp; 632 uint32_t internal_type; 633 uint32_t offset; 634 635 /* Precomputed (composed from createinfo->components and formar swizzle) 636 * swizzles to pass in to the shader key. 637 * 638 * This could be also included on the descriptor bo, but the shader state 639 * packet doesn't need it on a bo, so we can just avoid a memory copy 640 */ 641 uint8_t swizzle[4]; 642 643 /* Prepacked TEXTURE_SHADER_STATE. It will be copied to the descriptor info 644 * during UpdateDescriptorSets. 645 * 646 * Empirical tests show that cube arrays need a different shader state 647 * depending on whether they are used with a sampler or not, so for these 648 * we generate two states and select the one to use based on the descriptor 649 * type. 650 */ 651 uint8_t texture_shader_state[2][V3DV_TEXTURE_SHADER_STATE_LENGTH]; 652}; 653 654VkResult v3dv_create_image_view(struct v3dv_device *device, 655 const VkImageViewCreateInfo *pCreateInfo, 656 VkImageView *pView); 657 658uint32_t v3dv_layer_offset(const struct v3dv_image *image, uint32_t level, uint32_t layer); 659 660struct v3dv_buffer { 661 struct vk_object_base base; 662 663 VkDeviceSize size; 664 VkBufferUsageFlags usage; 665 uint32_t alignment; 666 667 struct v3dv_device_memory *mem; 668 VkDeviceSize mem_offset; 669}; 670 671struct v3dv_buffer_view { 672 struct vk_object_base base; 673 674 struct v3dv_buffer *buffer; 675 676 VkFormat vk_format; 677 const struct v3dv_format *format; 678 uint32_t internal_bpp; 679 uint32_t internal_type; 680 681 uint32_t offset; 682 uint32_t size; 683 uint32_t num_elements; 684 685 /* Prepacked TEXTURE_SHADER_STATE. */ 686 uint8_t texture_shader_state[V3DV_TEXTURE_SHADER_STATE_LENGTH]; 687}; 688 689struct v3dv_subpass_attachment { 690 uint32_t attachment; 691 VkImageLayout layout; 692}; 693 694struct v3dv_subpass { 695 uint32_t input_count; 696 struct v3dv_subpass_attachment *input_attachments; 697 698 uint32_t color_count; 699 struct v3dv_subpass_attachment *color_attachments; 700 struct v3dv_subpass_attachment *resolve_attachments; 701 702 struct v3dv_subpass_attachment ds_attachment; 703 struct v3dv_subpass_attachment ds_resolve_attachment; 704 bool resolve_depth, resolve_stencil; 705 706 /* If we need to emit the clear of the depth/stencil attachment using a 707 * a draw call instead of using the TLB (GFXH-1461). 708 */ 709 bool do_depth_clear_with_draw; 710 bool do_stencil_clear_with_draw; 711 712 /* Multiview */ 713 uint32_t view_mask; 714}; 715 716struct v3dv_render_pass_attachment { 717 VkAttachmentDescription2 desc; 718 719 uint32_t first_subpass; 720 uint32_t last_subpass; 721 722 /* When multiview is enabled, we no longer care about when a particular 723 * attachment is first or last used in a render pass, since not all views 724 * in the attachment will meet that criteria. Instead, we need to track 725 * each individual view (layer) in each attachment and emit our stores, 726 * loads and clears accordingly. 727 */ 728 struct { 729 uint32_t first_subpass; 730 uint32_t last_subpass; 731 } views[MAX_MULTIVIEW_VIEW_COUNT]; 732 733 /* If this is a multisampled attachment that is going to be resolved, 734 * whether we may be able to use the TLB hardware resolve based on the 735 * attachment format. 736 */ 737 bool try_tlb_resolve; 738}; 739 740struct v3dv_render_pass { 741 struct vk_object_base base; 742 743 bool multiview_enabled; 744 745 uint32_t attachment_count; 746 struct v3dv_render_pass_attachment *attachments; 747 748 uint32_t subpass_count; 749 struct v3dv_subpass *subpasses; 750 751 struct v3dv_subpass_attachment *subpass_attachments; 752}; 753 754struct v3dv_framebuffer { 755 struct vk_object_base base; 756 757 uint32_t width; 758 uint32_t height; 759 uint32_t layers; 760 761 /* Typically, edge tiles in the framebuffer have padding depending on the 762 * underlying tiling layout. One consequnce of this is that when the 763 * framebuffer dimensions are not aligned to tile boundaries, tile stores 764 * would still write full tiles on the edges and write to the padded area. 765 * If the framebuffer is aliasing a smaller region of a larger image, then 766 * we need to be careful with this though, as we won't have padding on the 767 * edge tiles (which typically means that we need to load the tile buffer 768 * before we store). 769 */ 770 bool has_edge_padding; 771 772 uint32_t attachment_count; 773 uint32_t color_attachment_count; 774 775 /* Notice that elements in 'attachments' will be NULL if the framebuffer 776 * was created imageless. The driver is expected to access attachment info 777 * from the command buffer state instead. 778 */ 779 struct v3dv_image_view *attachments[0]; 780}; 781 782struct v3dv_frame_tiling { 783 uint32_t width; 784 uint32_t height; 785 uint32_t layers; 786 uint32_t render_target_count; 787 uint32_t internal_bpp; 788 bool msaa; 789 bool double_buffer; 790 uint32_t tile_width; 791 uint32_t tile_height; 792 uint32_t draw_tiles_x; 793 uint32_t draw_tiles_y; 794 uint32_t supertile_width; 795 uint32_t supertile_height; 796 uint32_t frame_width_in_supertiles; 797 uint32_t frame_height_in_supertiles; 798}; 799 800bool v3dv_subpass_area_is_tile_aligned(struct v3dv_device *device, 801 const VkRect2D *area, 802 struct v3dv_framebuffer *fb, 803 struct v3dv_render_pass *pass, 804 uint32_t subpass_idx); 805 806/* Checks if we need to emit 2 initial tile clears for double buffer mode. 807 * This happens when we render at least 2 tiles, because in this mode each 808 * tile uses a different half of the tile buffer memory so we can have 2 tiles 809 * in flight (one being stored to memory and the next being rendered). In this 810 * scenario, if we emit a single initial tile clear we would only clear the 811 * first half of the tile buffer. 812 */ 813static inline bool 814v3dv_do_double_initial_tile_clear(const struct v3dv_frame_tiling *tiling) 815{ 816 return tiling->double_buffer && 817 (tiling->draw_tiles_x > 1 || tiling->draw_tiles_y > 1 || 818 tiling->layers > 1); 819} 820 821enum v3dv_cmd_buffer_status { 822 V3DV_CMD_BUFFER_STATUS_NEW = 0, 823 V3DV_CMD_BUFFER_STATUS_INITIALIZED = 1, 824 V3DV_CMD_BUFFER_STATUS_RECORDING = 2, 825 V3DV_CMD_BUFFER_STATUS_EXECUTABLE = 3 826}; 827 828union v3dv_clear_value { 829 uint32_t color[4]; 830 struct { 831 float z; 832 uint8_t s; 833 }; 834}; 835 836struct v3dv_cmd_buffer_attachment_state { 837 /* The original clear value as provided by the Vulkan API */ 838 VkClearValue vk_clear_value; 839 840 /* The hardware clear value */ 841 union v3dv_clear_value clear_value; 842 843 /* The underlying image view (from the framebuffer or, if imageless 844 * framebuffer is used, from VkRenderPassAttachmentBeginInfo. 845 */ 846 struct v3dv_image_view *image_view; 847 848 /* If this is a multisampled attachment with a resolve operation. */ 849 bool has_resolve; 850 851 /* If this is a multisampled attachment with a resolve operation, 852 * whether we can use the TLB for the resolve. 853 */ 854 bool use_tlb_resolve; 855}; 856 857struct v3dv_viewport_state { 858 uint32_t count; 859 VkViewport viewports[MAX_VIEWPORTS]; 860 float translate[MAX_VIEWPORTS][3]; 861 float scale[MAX_VIEWPORTS][3]; 862}; 863 864struct v3dv_scissor_state { 865 uint32_t count; 866 VkRect2D scissors[MAX_SCISSORS]; 867}; 868 869/* Mostly a v3dv mapping of VkDynamicState, used to track which data as 870 * defined as dynamic 871 */ 872enum v3dv_dynamic_state_bits { 873 V3DV_DYNAMIC_VIEWPORT = 1 << 0, 874 V3DV_DYNAMIC_SCISSOR = 1 << 1, 875 V3DV_DYNAMIC_STENCIL_COMPARE_MASK = 1 << 2, 876 V3DV_DYNAMIC_STENCIL_WRITE_MASK = 1 << 3, 877 V3DV_DYNAMIC_STENCIL_REFERENCE = 1 << 4, 878 V3DV_DYNAMIC_BLEND_CONSTANTS = 1 << 5, 879 V3DV_DYNAMIC_DEPTH_BIAS = 1 << 6, 880 V3DV_DYNAMIC_LINE_WIDTH = 1 << 7, 881 V3DV_DYNAMIC_COLOR_WRITE_ENABLE = 1 << 8, 882 V3DV_DYNAMIC_ALL = (1 << 9) - 1, 883}; 884 885/* Flags for dirty pipeline state. 886 */ 887enum v3dv_cmd_dirty_bits { 888 V3DV_CMD_DIRTY_VIEWPORT = 1 << 0, 889 V3DV_CMD_DIRTY_SCISSOR = 1 << 1, 890 V3DV_CMD_DIRTY_STENCIL_COMPARE_MASK = 1 << 2, 891 V3DV_CMD_DIRTY_STENCIL_WRITE_MASK = 1 << 3, 892 V3DV_CMD_DIRTY_STENCIL_REFERENCE = 1 << 4, 893 V3DV_CMD_DIRTY_PIPELINE = 1 << 5, 894 V3DV_CMD_DIRTY_COMPUTE_PIPELINE = 1 << 6, 895 V3DV_CMD_DIRTY_VERTEX_BUFFER = 1 << 7, 896 V3DV_CMD_DIRTY_INDEX_BUFFER = 1 << 8, 897 V3DV_CMD_DIRTY_DESCRIPTOR_SETS = 1 << 9, 898 V3DV_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS = 1 << 10, 899 V3DV_CMD_DIRTY_PUSH_CONSTANTS = 1 << 11, 900 V3DV_CMD_DIRTY_PUSH_CONSTANTS_UBO = 1 << 12, 901 V3DV_CMD_DIRTY_BLEND_CONSTANTS = 1 << 13, 902 V3DV_CMD_DIRTY_OCCLUSION_QUERY = 1 << 14, 903 V3DV_CMD_DIRTY_DEPTH_BIAS = 1 << 15, 904 V3DV_CMD_DIRTY_LINE_WIDTH = 1 << 16, 905 V3DV_CMD_DIRTY_VIEW_INDEX = 1 << 17, 906 V3DV_CMD_DIRTY_COLOR_WRITE_ENABLE = 1 << 18, 907}; 908 909struct v3dv_dynamic_state { 910 /** 911 * Bitmask of (1 << VK_DYNAMIC_STATE_*). 912 * Defines the set of saved dynamic state. 913 */ 914 uint32_t mask; 915 916 struct v3dv_viewport_state viewport; 917 918 struct v3dv_scissor_state scissor; 919 920 struct { 921 uint32_t front; 922 uint32_t back; 923 } stencil_compare_mask; 924 925 struct { 926 uint32_t front; 927 uint32_t back; 928 } stencil_write_mask; 929 930 struct { 931 uint32_t front; 932 uint32_t back; 933 } stencil_reference; 934 935 float blend_constants[4]; 936 937 struct { 938 float constant_factor; 939 float depth_bias_clamp; 940 float slope_factor; 941 } depth_bias; 942 943 float line_width; 944 945 uint32_t color_write_enable; 946}; 947 948void v3dv_viewport_compute_xform(const VkViewport *viewport, 949 float scale[3], 950 float translate[3]); 951 952enum v3dv_ez_state { 953 V3D_EZ_UNDECIDED = 0, 954 V3D_EZ_GT_GE, 955 V3D_EZ_LT_LE, 956 V3D_EZ_DISABLED, 957}; 958 959enum v3dv_job_type { 960 V3DV_JOB_TYPE_GPU_CL = 0, 961 V3DV_JOB_TYPE_GPU_CL_SECONDARY, 962 V3DV_JOB_TYPE_GPU_TFU, 963 V3DV_JOB_TYPE_GPU_CSD, 964 V3DV_JOB_TYPE_CPU_RESET_QUERIES, 965 V3DV_JOB_TYPE_CPU_END_QUERY, 966 V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS, 967 V3DV_JOB_TYPE_CPU_SET_EVENT, 968 V3DV_JOB_TYPE_CPU_WAIT_EVENTS, 969 V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE, 970 V3DV_JOB_TYPE_CPU_CSD_INDIRECT, 971 V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY, 972}; 973 974struct v3dv_reset_query_cpu_job_info { 975 struct v3dv_query_pool *pool; 976 uint32_t first; 977 uint32_t count; 978}; 979 980struct v3dv_end_query_cpu_job_info { 981 struct v3dv_query_pool *pool; 982 uint32_t query; 983 984 /* This is one unless multiview is used */ 985 uint32_t count; 986}; 987 988struct v3dv_copy_query_results_cpu_job_info { 989 struct v3dv_query_pool *pool; 990 uint32_t first; 991 uint32_t count; 992 struct v3dv_buffer *dst; 993 uint32_t offset; 994 uint32_t stride; 995 VkQueryResultFlags flags; 996}; 997 998struct v3dv_submit_sync_info { 999 /* List of syncs to wait before running a job */ 1000 uint32_t wait_count; 1001 struct vk_sync_wait *waits; 1002 1003 /* List of syncs to signal when all jobs complete */ 1004 uint32_t signal_count; 1005 struct vk_sync_signal *signals; 1006}; 1007 1008struct v3dv_event_set_cpu_job_info { 1009 struct v3dv_event *event; 1010 int state; 1011}; 1012 1013struct v3dv_event_wait_cpu_job_info { 1014 /* List of events to wait on */ 1015 uint32_t event_count; 1016 struct v3dv_event **events; 1017}; 1018 1019struct v3dv_copy_buffer_to_image_cpu_job_info { 1020 struct v3dv_image *image; 1021 struct v3dv_buffer *buffer; 1022 uint32_t buffer_offset; 1023 uint32_t buffer_stride; 1024 uint32_t buffer_layer_stride; 1025 VkOffset3D image_offset; 1026 VkExtent3D image_extent; 1027 uint32_t mip_level; 1028 uint32_t base_layer; 1029 uint32_t layer_count; 1030}; 1031 1032struct v3dv_csd_indirect_cpu_job_info { 1033 struct v3dv_buffer *buffer; 1034 uint32_t offset; 1035 struct v3dv_job *csd_job; 1036 uint32_t wg_size; 1037 uint32_t *wg_uniform_offsets[3]; 1038 bool needs_wg_uniform_rewrite; 1039}; 1040 1041struct v3dv_timestamp_query_cpu_job_info { 1042 struct v3dv_query_pool *pool; 1043 uint32_t query; 1044 1045 /* This is one unless multiview is used */ 1046 uint32_t count; 1047}; 1048 1049/* Number of perfmons required to handle all supported performance counters */ 1050#define V3DV_MAX_PERFMONS DIV_ROUND_UP(V3D_PERFCNT_NUM, \ 1051 DRM_V3D_MAX_PERF_COUNTERS) 1052 1053struct v3dv_perf_query { 1054 uint32_t kperfmon_ids[V3DV_MAX_PERFMONS]; 1055 1056 /* A DRM syncobj to wait on the GPU jobs for which we are collecting 1057 * performance data. 1058 */ 1059 struct vk_sync *last_job_sync; 1060}; 1061 1062struct v3dv_job { 1063 struct list_head list_link; 1064 1065 /* We only create job clones when executing secondary command buffers into 1066 * primaries. These clones don't make deep copies of the original object 1067 * so we want to flag them to avoid freeing resources they don't own. 1068 */ 1069 bool is_clone; 1070 1071 /* If the job executes on the transfer stage of the pipeline */ 1072 bool is_transfer; 1073 1074 /* VK_KHR_buffer_device_address allows shaders to use pointers that can 1075 * dereference memory in any buffer that has been flagged with 1076 * VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_KHR. These buffers may not 1077 * be bound via descriptor sets, so we need to make sure that a job that 1078 * uses this functionality includes all these buffers in its kernel 1079 * submission. 1080 */ 1081 bool uses_buffer_device_address; 1082 1083 enum v3dv_job_type type; 1084 1085 struct v3dv_device *device; 1086 1087 struct v3dv_cmd_buffer *cmd_buffer; 1088 1089 struct v3dv_cl bcl; 1090 struct v3dv_cl rcl; 1091 struct v3dv_cl indirect; 1092 1093 /* Set of all BOs referenced by the job. This will be used for making 1094 * the list of BOs that the kernel will need to have paged in to 1095 * execute our job. 1096 */ 1097 struct set *bos; 1098 uint32_t bo_count; 1099 uint64_t bo_handle_mask; 1100 1101 struct v3dv_bo *tile_alloc; 1102 struct v3dv_bo *tile_state; 1103 1104 bool tmu_dirty_rcl; 1105 1106 uint32_t first_subpass; 1107 1108 /* When the current subpass is split into multiple jobs, this flag is set 1109 * to true for any jobs after the first in the same subpass. 1110 */ 1111 bool is_subpass_continue; 1112 1113 /* If this job is the last job emitted for a subpass. */ 1114 bool is_subpass_finish; 1115 1116 struct v3dv_frame_tiling frame_tiling; 1117 1118 enum v3dv_ez_state ez_state; 1119 enum v3dv_ez_state first_ez_state; 1120 1121 /* If we have already decided if we need to disable Early Z/S completely 1122 * for this job. 1123 */ 1124 bool decided_global_ez_enable; 1125 1126 /* If the job emitted any draw calls with Early Z/S enabled */ 1127 bool has_ez_draws; 1128 1129 /* If this job has been configured to use early Z/S clear */ 1130 bool early_zs_clear; 1131 1132 /* Number of draw calls recorded into the job */ 1133 uint32_t draw_count; 1134 1135 /* A flag indicating whether we want to flush every draw separately. This 1136 * can be used for debugging, or for cases where special circumstances 1137 * require this behavior. 1138 */ 1139 bool always_flush; 1140 1141 /* A mask of V3DV_BARRIER_* indicating the source(s) of the barrier. We 1142 * can use this to select the hw queues where we need to serialize the job. 1143 */ 1144 uint8_t serialize; 1145 1146 /* If this is a CL job, whether we should sync before binning */ 1147 bool needs_bcl_sync; 1148 1149 /* Job specs for CPU jobs */ 1150 union { 1151 struct v3dv_reset_query_cpu_job_info query_reset; 1152 struct v3dv_end_query_cpu_job_info query_end; 1153 struct v3dv_copy_query_results_cpu_job_info query_copy_results; 1154 struct v3dv_event_set_cpu_job_info event_set; 1155 struct v3dv_event_wait_cpu_job_info event_wait; 1156 struct v3dv_copy_buffer_to_image_cpu_job_info copy_buffer_to_image; 1157 struct v3dv_csd_indirect_cpu_job_info csd_indirect; 1158 struct v3dv_timestamp_query_cpu_job_info query_timestamp; 1159 } cpu; 1160 1161 /* Job specs for TFU jobs */ 1162 struct drm_v3d_submit_tfu tfu; 1163 1164 /* Job specs for CSD jobs */ 1165 struct { 1166 struct v3dv_bo *shared_memory; 1167 uint32_t wg_count[3]; 1168 uint32_t wg_base[3]; 1169 struct drm_v3d_submit_csd submit; 1170 } csd; 1171 1172 /* Perfmons with last job sync for CSD and CL jobs */ 1173 struct v3dv_perf_query *perf; 1174}; 1175 1176void v3dv_job_init(struct v3dv_job *job, 1177 enum v3dv_job_type type, 1178 struct v3dv_device *device, 1179 struct v3dv_cmd_buffer *cmd_buffer, 1180 int32_t subpass_idx); 1181void v3dv_job_destroy(struct v3dv_job *job); 1182 1183void v3dv_job_add_bo(struct v3dv_job *job, struct v3dv_bo *bo); 1184void v3dv_job_add_bo_unchecked(struct v3dv_job *job, struct v3dv_bo *bo); 1185 1186void v3dv_job_start_frame(struct v3dv_job *job, 1187 uint32_t width, 1188 uint32_t height, 1189 uint32_t layers, 1190 bool allocate_tile_state_for_all_layers, 1191 uint32_t render_target_count, 1192 uint8_t max_internal_bpp, 1193 bool msaa); 1194 1195bool v3dv_job_type_is_gpu(struct v3dv_job *job); 1196 1197struct v3dv_job * 1198v3dv_job_clone_in_cmd_buffer(struct v3dv_job *job, 1199 struct v3dv_cmd_buffer *cmd_buffer); 1200 1201struct v3dv_job *v3dv_cmd_buffer_create_cpu_job(struct v3dv_device *device, 1202 enum v3dv_job_type type, 1203 struct v3dv_cmd_buffer *cmd_buffer, 1204 uint32_t subpass_idx); 1205 1206void 1207v3dv_cmd_buffer_ensure_array_state(struct v3dv_cmd_buffer *cmd_buffer, 1208 uint32_t slot_size, 1209 uint32_t used_count, 1210 uint32_t *alloc_count, 1211 void **ptr); 1212 1213void v3dv_cmd_buffer_emit_pre_draw(struct v3dv_cmd_buffer *cmd_buffer, 1214 bool indexed, bool indirect); 1215 1216/* FIXME: only used on v3dv_cmd_buffer and v3dvx_cmd_buffer, perhaps move to a 1217 * cmd_buffer specific header? 1218 */ 1219struct v3dv_draw_info { 1220 uint32_t vertex_count; 1221 uint32_t instance_count; 1222 uint32_t first_vertex; 1223 uint32_t first_instance; 1224}; 1225 1226struct v3dv_vertex_binding { 1227 struct v3dv_buffer *buffer; 1228 VkDeviceSize offset; 1229}; 1230 1231struct v3dv_descriptor_state { 1232 struct v3dv_descriptor_set *descriptor_sets[MAX_SETS]; 1233 uint32_t valid; 1234 uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS]; 1235}; 1236 1237struct v3dv_cmd_pipeline_state { 1238 struct v3dv_pipeline *pipeline; 1239 1240 struct v3dv_descriptor_state descriptor_state; 1241}; 1242 1243enum { 1244 V3DV_BARRIER_GRAPHICS_BIT = (1 << 0), 1245 V3DV_BARRIER_COMPUTE_BIT = (1 << 1), 1246 V3DV_BARRIER_TRANSFER_BIT = (1 << 2), 1247}; 1248#define V3DV_BARRIER_ALL (V3DV_BARRIER_GRAPHICS_BIT | \ 1249 V3DV_BARRIER_TRANSFER_BIT | \ 1250 V3DV_BARRIER_COMPUTE_BIT); 1251 1252struct v3dv_barrier_state { 1253 /* Mask of V3DV_BARRIER_* indicating where we consume a barrier. */ 1254 uint8_t dst_mask; 1255 1256 /* For each possible consumer of a barrier, a mask of V3DV_BARRIER_* 1257 * indicating the sources of the dependency. 1258 */ 1259 uint8_t src_mask_graphics; 1260 uint8_t src_mask_transfer; 1261 uint8_t src_mask_compute; 1262 1263 /* For graphics barriers, access masks involved. Used to decide if we need 1264 * to execute a binning or render barrier. 1265 */ 1266 VkAccessFlags bcl_buffer_access; 1267 VkAccessFlags bcl_image_access; 1268}; 1269 1270struct v3dv_cmd_buffer_state { 1271 struct v3dv_render_pass *pass; 1272 struct v3dv_framebuffer *framebuffer; 1273 VkRect2D render_area; 1274 1275 /* Current job being recorded */ 1276 struct v3dv_job *job; 1277 1278 uint32_t subpass_idx; 1279 1280 struct v3dv_cmd_pipeline_state gfx; 1281 struct v3dv_cmd_pipeline_state compute; 1282 1283 struct v3dv_dynamic_state dynamic; 1284 1285 uint32_t dirty; 1286 VkShaderStageFlagBits dirty_descriptor_stages; 1287 VkShaderStageFlagBits dirty_push_constants_stages; 1288 1289 /* Current clip window. We use this to check whether we have an active 1290 * scissor, since in that case we can't use TLB clears and need to fallback 1291 * to drawing rects. 1292 */ 1293 VkRect2D clip_window; 1294 1295 /* Whether our render area is aligned to tile boundaries. If this is false 1296 * then we have tiles that are only partially covered by the render area, 1297 * and therefore, we need to be careful with our loads and stores so we don't 1298 * modify pixels for the tile area that is not covered by the render area. 1299 * This means, for example, that we can't use the TLB to clear, since that 1300 * always clears full tiles. 1301 */ 1302 bool tile_aligned_render_area; 1303 1304 /* FIXME: we have just one client-side BO for the push constants, 1305 * independently of the stageFlags in vkCmdPushConstants, and the 1306 * pipelineBindPoint in vkCmdBindPipeline. We could probably do more stage 1307 * tunning in the future if it makes sense. 1308 */ 1309 uint32_t push_constants_size; 1310 uint32_t push_constants_data[MAX_PUSH_CONSTANTS_SIZE / 4]; 1311 1312 uint32_t attachment_alloc_count; 1313 struct v3dv_cmd_buffer_attachment_state *attachments; 1314 1315 struct v3dv_vertex_binding vertex_bindings[MAX_VBS]; 1316 1317 struct { 1318 VkBuffer buffer; 1319 VkDeviceSize offset; 1320 uint8_t index_size; 1321 } index_buffer; 1322 1323 /* Current uniforms */ 1324 struct { 1325 struct v3dv_cl_reloc vs_bin; 1326 struct v3dv_cl_reloc vs; 1327 struct v3dv_cl_reloc gs_bin; 1328 struct v3dv_cl_reloc gs; 1329 struct v3dv_cl_reloc fs; 1330 } uniforms; 1331 1332 /* Current view index for multiview rendering */ 1333 uint32_t view_index; 1334 1335 /* Used to flag OOM conditions during command buffer recording */ 1336 bool oom; 1337 1338 /* If we are currently recording job(s) for a transfer operation */ 1339 bool is_transfer; 1340 1341 /* Barrier state tracking */ 1342 struct v3dv_barrier_state barrier; 1343 1344 /* Secondary command buffer state */ 1345 struct { 1346 bool occlusion_query_enable; 1347 } inheritance; 1348 1349 /* Command buffer state saved during a meta operation */ 1350 struct { 1351 uint32_t subpass_idx; 1352 VkRenderPass pass; 1353 VkFramebuffer framebuffer; 1354 1355 uint32_t attachment_alloc_count; 1356 uint32_t attachment_count; 1357 struct v3dv_cmd_buffer_attachment_state *attachments; 1358 1359 bool tile_aligned_render_area; 1360 VkRect2D render_area; 1361 1362 struct v3dv_dynamic_state dynamic; 1363 1364 struct v3dv_cmd_pipeline_state gfx; 1365 bool has_descriptor_state; 1366 1367 uint32_t push_constants[MAX_PUSH_CONSTANTS_SIZE / 4]; 1368 uint32_t push_constants_size; 1369 } meta; 1370 1371 /* Command buffer state for queries */ 1372 struct { 1373 /* A list of vkCmdQueryEnd commands recorded in the command buffer during 1374 * a render pass. We queue these here and then schedule the corresponding 1375 * CPU jobs for them at the time we finish the GPU job in which they have 1376 * been recorded. 1377 */ 1378 struct { 1379 uint32_t used_count; 1380 uint32_t alloc_count; 1381 struct v3dv_end_query_cpu_job_info *states; 1382 } end; 1383 1384 struct { 1385 /* This BO is not NULL if we have an active occlusion query, that is, 1386 * we have called vkCmdBeginQuery but not vkCmdEndQuery. 1387 */ 1388 struct v3dv_bo *bo; 1389 uint32_t offset; 1390 1391 /* This pointer is not NULL if we have an active performance query */ 1392 struct v3dv_perf_query *perf; 1393 } active_query; 1394 } query; 1395}; 1396 1397/* The following struct represents the info from a descriptor that we store on 1398 * the host memory. They are mostly links to other existing vulkan objects, 1399 * like the image_view in order to access to swizzle info, or the buffer used 1400 * for a UBO/SSBO, for example. 1401 * 1402 * FIXME: revisit if makes sense to just move everything that would be needed 1403 * from a descriptor to the bo. 1404 */ 1405struct v3dv_descriptor { 1406 VkDescriptorType type; 1407 1408 union { 1409 struct { 1410 struct v3dv_image_view *image_view; 1411 struct v3dv_sampler *sampler; 1412 }; 1413 1414 struct { 1415 struct v3dv_buffer *buffer; 1416 size_t offset; 1417 size_t range; 1418 }; 1419 1420 struct v3dv_buffer_view *buffer_view; 1421 }; 1422}; 1423 1424struct v3dv_query { 1425 bool maybe_available; 1426 union { 1427 /* Used by GPU queries (occlusion) */ 1428 struct { 1429 struct v3dv_bo *bo; 1430 uint32_t offset; 1431 }; 1432 /* Used by CPU queries (timestamp) */ 1433 uint64_t value; 1434 1435 /* Used by performance queries */ 1436 struct v3dv_perf_query perf; 1437 }; 1438}; 1439 1440struct v3dv_query_pool { 1441 struct vk_object_base base; 1442 1443 struct v3dv_bo *bo; /* Only used with GPU queries (occlusion) */ 1444 1445 /* Only used with performance queries */ 1446 struct { 1447 uint32_t ncounters; 1448 uint8_t counters[V3D_PERFCNT_NUM]; 1449 1450 /* V3D has a limit on the number of counters we can track in a 1451 * single performance monitor, so if too many counters are requested 1452 * we need to create multiple monitors to record all of them. This 1453 * field represents the number of monitors required for the number 1454 * of counters requested. 1455 */ 1456 uint8_t nperfmons; 1457 } perfmon; 1458 1459 VkQueryType query_type; 1460 uint32_t query_count; 1461 struct v3dv_query *queries; 1462}; 1463 1464VkResult v3dv_get_query_pool_results(struct v3dv_device *device, 1465 struct v3dv_query_pool *pool, 1466 uint32_t first, 1467 uint32_t count, 1468 void *data, 1469 VkDeviceSize stride, 1470 VkQueryResultFlags flags); 1471 1472void v3dv_reset_query_pools(struct v3dv_device *device, 1473 struct v3dv_query_pool *query_pool, 1474 uint32_t first, 1475 uint32_t last); 1476 1477typedef void (*v3dv_cmd_buffer_private_obj_destroy_cb)(VkDevice device, 1478 uint64_t pobj, 1479 VkAllocationCallbacks *alloc); 1480struct v3dv_cmd_buffer_private_obj { 1481 struct list_head list_link; 1482 uint64_t obj; 1483 v3dv_cmd_buffer_private_obj_destroy_cb destroy_cb; 1484}; 1485 1486struct v3dv_cmd_buffer { 1487 struct vk_command_buffer vk; 1488 1489 struct v3dv_device *device; 1490 1491 /* Used at submit time to link command buffers in the submission that have 1492 * spawned wait threads, so we can then wait on all of them to complete 1493 * before we process any signal sempahores or fences. 1494 */ 1495 struct list_head list_link; 1496 1497 VkCommandBufferUsageFlags usage_flags; 1498 1499 enum v3dv_cmd_buffer_status status; 1500 1501 struct v3dv_cmd_buffer_state state; 1502 1503 /* Buffer where we upload push constant data to resolve indirect indexing */ 1504 struct v3dv_cl_reloc push_constants_resource; 1505 1506 /* Collection of Vulkan objects created internally by the driver (typically 1507 * during recording of meta operations) that are part of the command buffer 1508 * and should be destroyed with it. 1509 */ 1510 struct list_head private_objs; /* v3dv_cmd_buffer_private_obj */ 1511 1512 /* Per-command buffer resources for meta operations. */ 1513 struct { 1514 struct { 1515 /* The current descriptor pool for blit sources */ 1516 VkDescriptorPool dspool; 1517 } blit; 1518 struct { 1519 /* The current descriptor pool for texel buffer copy sources */ 1520 VkDescriptorPool dspool; 1521 } texel_buffer_copy; 1522 } meta; 1523 1524 /* List of jobs in the command buffer. For primary command buffers it 1525 * represents the jobs we want to submit to the GPU. For secondary command 1526 * buffers it represents jobs that will be merged into a primary command 1527 * buffer via vkCmdExecuteCommands. 1528 */ 1529 struct list_head jobs; 1530}; 1531 1532struct v3dv_job *v3dv_cmd_buffer_start_job(struct v3dv_cmd_buffer *cmd_buffer, 1533 int32_t subpass_idx, 1534 enum v3dv_job_type type); 1535void v3dv_cmd_buffer_finish_job(struct v3dv_cmd_buffer *cmd_buffer); 1536 1537struct v3dv_job *v3dv_cmd_buffer_subpass_start(struct v3dv_cmd_buffer *cmd_buffer, 1538 uint32_t subpass_idx); 1539struct v3dv_job *v3dv_cmd_buffer_subpass_resume(struct v3dv_cmd_buffer *cmd_buffer, 1540 uint32_t subpass_idx); 1541 1542void v3dv_cmd_buffer_subpass_finish(struct v3dv_cmd_buffer *cmd_buffer); 1543 1544void v3dv_cmd_buffer_meta_state_push(struct v3dv_cmd_buffer *cmd_buffer, 1545 bool push_descriptor_state); 1546void v3dv_cmd_buffer_meta_state_pop(struct v3dv_cmd_buffer *cmd_buffer, 1547 uint32_t dirty_dynamic_state, 1548 bool needs_subpass_resume); 1549 1550void v3dv_cmd_buffer_reset_queries(struct v3dv_cmd_buffer *cmd_buffer, 1551 struct v3dv_query_pool *pool, 1552 uint32_t first, 1553 uint32_t count); 1554 1555void v3dv_cmd_buffer_begin_query(struct v3dv_cmd_buffer *cmd_buffer, 1556 struct v3dv_query_pool *pool, 1557 uint32_t query, 1558 VkQueryControlFlags flags); 1559 1560void v3dv_cmd_buffer_end_query(struct v3dv_cmd_buffer *cmd_buffer, 1561 struct v3dv_query_pool *pool, 1562 uint32_t query); 1563 1564void v3dv_cmd_buffer_copy_query_results(struct v3dv_cmd_buffer *cmd_buffer, 1565 struct v3dv_query_pool *pool, 1566 uint32_t first, 1567 uint32_t count, 1568 struct v3dv_buffer *dst, 1569 uint32_t offset, 1570 uint32_t stride, 1571 VkQueryResultFlags flags); 1572 1573void v3dv_cmd_buffer_add_tfu_job(struct v3dv_cmd_buffer *cmd_buffer, 1574 struct drm_v3d_submit_tfu *tfu); 1575 1576void v3dv_cmd_buffer_rewrite_indirect_csd_job(struct v3dv_csd_indirect_cpu_job_info *info, 1577 const uint32_t *wg_counts); 1578 1579void v3dv_cmd_buffer_add_private_obj(struct v3dv_cmd_buffer *cmd_buffer, 1580 uint64_t obj, 1581 v3dv_cmd_buffer_private_obj_destroy_cb destroy_cb); 1582 1583void v3dv_cmd_buffer_merge_barrier_state(struct v3dv_barrier_state *dst, 1584 struct v3dv_barrier_state *src); 1585 1586struct v3dv_event { 1587 struct vk_object_base base; 1588 int state; 1589}; 1590 1591struct v3dv_shader_variant { 1592 enum broadcom_shader_stage stage; 1593 1594 union { 1595 struct v3d_prog_data *base; 1596 struct v3d_vs_prog_data *vs; 1597 struct v3d_gs_prog_data *gs; 1598 struct v3d_fs_prog_data *fs; 1599 struct v3d_compute_prog_data *cs; 1600 } prog_data; 1601 1602 /* We explicitly save the prog_data_size as it would make easier to 1603 * serialize 1604 */ 1605 uint32_t prog_data_size; 1606 1607 /* The assembly for this variant will be uploaded to a BO shared with all 1608 * other shader stages in that pipeline. This is the offset in that BO. 1609 */ 1610 uint32_t assembly_offset; 1611 1612 /* Note: it is really likely that qpu_insts would be NULL, as it will be 1613 * used only temporarily, to upload it to the shared bo, as we compile the 1614 * different stages individually. 1615 */ 1616 uint64_t *qpu_insts; 1617 uint32_t qpu_insts_size; 1618}; 1619 1620/* 1621 * Per-stage info for each stage, useful so shader_module_compile_to_nir and 1622 * other methods doesn't have so many parameters. 1623 * 1624 * FIXME: for the case of the coordinate shader and the vertex shader, module, 1625 * entrypoint, spec_info and nir are the same. There are also info only 1626 * relevant to some stages. But seemed too much a hassle to create a new 1627 * struct only to handle that. Revisit if such kind of info starts to grow. 1628 */ 1629struct v3dv_pipeline_stage { 1630 struct v3dv_pipeline *pipeline; 1631 1632 enum broadcom_shader_stage stage; 1633 1634 const struct vk_shader_module *module; 1635 const char *entrypoint; 1636 const VkSpecializationInfo *spec_info; 1637 1638 nir_shader *nir; 1639 1640 /* The following is the combined hash of module+entrypoint+spec_info+nir */ 1641 unsigned char shader_sha1[20]; 1642 1643 /** A name for this program, so you can track it in shader-db output. */ 1644 uint32_t program_id; 1645 1646 VkPipelineCreationFeedback feedback; 1647}; 1648 1649/* We are using the descriptor pool entry for two things: 1650 * * Track the allocated sets, so we can properly free it if needed 1651 * * Track the suballocated pool bo regions, so if some descriptor set is 1652 * freed, the gap could be reallocated later. 1653 * 1654 * Those only make sense if the pool was not created with the flag 1655 * VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT 1656 */ 1657struct v3dv_descriptor_pool_entry 1658{ 1659 struct v3dv_descriptor_set *set; 1660 /* Offset and size of the subregion allocated for this entry from the 1661 * pool->bo 1662 */ 1663 uint32_t offset; 1664 uint32_t size; 1665}; 1666 1667struct v3dv_descriptor_pool { 1668 struct vk_object_base base; 1669 1670 /* A list with all descriptor sets allocated from the pool. */ 1671 struct list_head set_list; 1672 1673 /* If this descriptor pool has been allocated for the driver for internal 1674 * use, typically to implement meta operations. 1675 */ 1676 bool is_driver_internal; 1677 1678 struct v3dv_bo *bo; 1679 /* Current offset at the descriptor bo. 0 means that we didn't use it for 1680 * any descriptor. If the descriptor bo is NULL, current offset is 1681 * meaningless 1682 */ 1683 uint32_t current_offset; 1684 1685 /* If VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT is not set the 1686 * descriptor sets are handled as a whole as pool memory and handled by the 1687 * following pointers. If set, they are not used, and individually 1688 * descriptor sets are allocated/freed. 1689 */ 1690 uint8_t *host_memory_base; 1691 uint8_t *host_memory_ptr; 1692 uint8_t *host_memory_end; 1693 1694 uint32_t entry_count; 1695 uint32_t max_entry_count; 1696 struct v3dv_descriptor_pool_entry entries[0]; 1697}; 1698 1699struct v3dv_descriptor_set { 1700 struct vk_object_base base; 1701 1702 /* List link into the list of all sets allocated from the pool */ 1703 struct list_head pool_link; 1704 1705 struct v3dv_descriptor_pool *pool; 1706 1707 struct v3dv_descriptor_set_layout *layout; 1708 1709 /* Offset relative to the descriptor pool bo for this set */ 1710 uint32_t base_offset; 1711 1712 /* The descriptors below can be indexed (set/binding) using the set_layout 1713 */ 1714 struct v3dv_descriptor descriptors[0]; 1715}; 1716 1717struct v3dv_descriptor_set_binding_layout { 1718 VkDescriptorType type; 1719 1720 /* Number of array elements in this binding */ 1721 uint32_t array_size; 1722 1723 /* Index into the flattend descriptor set */ 1724 uint32_t descriptor_index; 1725 1726 uint32_t dynamic_offset_count; 1727 uint32_t dynamic_offset_index; 1728 1729 /* Offset into the descriptor set where this descriptor lives (final offset 1730 * on the descriptor bo need to take into account set->base_offset) 1731 */ 1732 uint32_t descriptor_offset; 1733 1734 /* Offset in the v3dv_descriptor_set_layout of the immutable samplers, or 0 1735 * if there are no immutable samplers. 1736 */ 1737 uint32_t immutable_samplers_offset; 1738}; 1739 1740struct v3dv_descriptor_set_layout { 1741 struct vk_object_base base; 1742 1743 VkDescriptorSetLayoutCreateFlags flags; 1744 1745 /* Number of bindings in this descriptor set */ 1746 uint32_t binding_count; 1747 1748 /* Total bo size needed for this descriptor set 1749 */ 1750 uint32_t bo_size; 1751 1752 /* Shader stages affected by this descriptor set */ 1753 uint16_t shader_stages; 1754 1755 /* Number of descriptors in this descriptor set */ 1756 uint32_t descriptor_count; 1757 1758 /* Number of dynamic offsets used by this descriptor set */ 1759 uint16_t dynamic_offset_count; 1760 1761 /* Descriptor set layouts can be destroyed even if they are still being 1762 * used. 1763 */ 1764 uint32_t ref_cnt; 1765 1766 /* Bindings in this descriptor set */ 1767 struct v3dv_descriptor_set_binding_layout binding[0]; 1768}; 1769 1770void 1771v3dv_descriptor_set_layout_destroy(struct v3dv_device *device, 1772 struct v3dv_descriptor_set_layout *set_layout); 1773 1774static inline void 1775v3dv_descriptor_set_layout_ref(struct v3dv_descriptor_set_layout *set_layout) 1776{ 1777 assert(set_layout && set_layout->ref_cnt >= 1); 1778 p_atomic_inc(&set_layout->ref_cnt); 1779} 1780 1781static inline void 1782v3dv_descriptor_set_layout_unref(struct v3dv_device *device, 1783 struct v3dv_descriptor_set_layout *set_layout) 1784{ 1785 assert(set_layout && set_layout->ref_cnt >= 1); 1786 if (p_atomic_dec_zero(&set_layout->ref_cnt)) 1787 v3dv_descriptor_set_layout_destroy(device, set_layout); 1788} 1789 1790struct v3dv_pipeline_layout { 1791 struct vk_object_base base; 1792 1793 struct { 1794 struct v3dv_descriptor_set_layout *layout; 1795 uint32_t dynamic_offset_start; 1796 } set[MAX_SETS]; 1797 1798 uint32_t num_sets; 1799 1800 /* Shader stages that are declared to use descriptors from this layout */ 1801 uint32_t shader_stages; 1802 1803 uint32_t dynamic_offset_count; 1804 uint32_t push_constant_size; 1805 1806 unsigned char sha1[20]; 1807}; 1808 1809/* 1810 * We are using descriptor maps for ubo/ssbo and texture/samplers, so we need 1811 * it to be big enough to include the max value for all of them. 1812 * 1813 * FIXME: one alternative would be to allocate the map as big as you need for 1814 * each descriptor type. That would means more individual allocations. 1815 */ 1816#define DESCRIPTOR_MAP_SIZE MAX3(V3D_MAX_TEXTURE_SAMPLERS, \ 1817 MAX_UNIFORM_BUFFERS + MAX_INLINE_UNIFORM_BUFFERS, \ 1818 MAX_STORAGE_BUFFERS) 1819 1820 1821struct v3dv_descriptor_map { 1822 /* TODO: avoid fixed size array/justify the size */ 1823 unsigned num_desc; /* Number of descriptors */ 1824 int set[DESCRIPTOR_MAP_SIZE]; 1825 int binding[DESCRIPTOR_MAP_SIZE]; 1826 int array_index[DESCRIPTOR_MAP_SIZE]; 1827 int array_size[DESCRIPTOR_MAP_SIZE]; 1828 bool used[DESCRIPTOR_MAP_SIZE]; 1829 1830 /* NOTE: the following is only for sampler, but this is the easier place to 1831 * put it. 1832 */ 1833 uint8_t return_size[DESCRIPTOR_MAP_SIZE]; 1834}; 1835 1836struct v3dv_sampler { 1837 struct vk_object_base base; 1838 1839 bool compare_enable; 1840 bool unnormalized_coordinates; 1841 bool clamp_to_transparent_black_border; 1842 1843 /* Prepacked SAMPLER_STATE, that is referenced as part of the tmu 1844 * configuration. If needed it will be copied to the descriptor info during 1845 * UpdateDescriptorSets 1846 */ 1847 uint8_t sampler_state[V3DV_SAMPLER_STATE_LENGTH]; 1848}; 1849 1850struct v3dv_descriptor_template_entry { 1851 /* The type of descriptor in this entry */ 1852 VkDescriptorType type; 1853 1854 /* Binding in the descriptor set */ 1855 uint32_t binding; 1856 1857 /* Offset at which to write into the descriptor set binding */ 1858 uint32_t array_element; 1859 1860 /* Number of elements to write into the descriptor set binding */ 1861 uint32_t array_count; 1862 1863 /* Offset into the user provided data */ 1864 size_t offset; 1865 1866 /* Stride between elements into the user provided data */ 1867 size_t stride; 1868}; 1869 1870struct v3dv_descriptor_update_template { 1871 struct vk_object_base base; 1872 1873 VkPipelineBindPoint bind_point; 1874 1875 /* The descriptor set this template corresponds to. This value is only 1876 * valid if the template was created with the templateType 1877 * VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET. 1878 */ 1879 uint8_t set; 1880 1881 /* Number of entries in this template */ 1882 uint32_t entry_count; 1883 1884 /* Entries of the template */ 1885 struct v3dv_descriptor_template_entry entries[0]; 1886}; 1887 1888 1889/* We keep two special values for the sampler idx that represents exactly when a 1890 * sampler is not needed/provided. The main use is that even if we don't have 1891 * sampler, we still need to do the output unpacking (through 1892 * nir_lower_tex). The easier way to do this is to add those special "no 1893 * sampler" in the sampler_map, and then use the proper unpacking for that 1894 * case. 1895 * 1896 * We have one when we want a 16bit output size, and other when we want a 1897 * 32bit output size. We use the info coming from the RelaxedPrecision 1898 * decoration to decide between one and the other. 1899 */ 1900#define V3DV_NO_SAMPLER_16BIT_IDX 0 1901#define V3DV_NO_SAMPLER_32BIT_IDX 1 1902 1903/* 1904 * Following two methods are using on the combined to/from texture/sampler 1905 * indices maps at v3dv_pipeline. 1906 */ 1907static inline uint32_t 1908v3dv_pipeline_combined_index_key_create(uint32_t texture_index, 1909 uint32_t sampler_index) 1910{ 1911 return texture_index << 24 | sampler_index; 1912} 1913 1914static inline void 1915v3dv_pipeline_combined_index_key_unpack(uint32_t combined_index_key, 1916 uint32_t *texture_index, 1917 uint32_t *sampler_index) 1918{ 1919 uint32_t texture = combined_index_key >> 24; 1920 uint32_t sampler = combined_index_key & 0xffffff; 1921 1922 if (texture_index) 1923 *texture_index = texture; 1924 1925 if (sampler_index) 1926 *sampler_index = sampler; 1927} 1928 1929struct v3dv_descriptor_maps { 1930 struct v3dv_descriptor_map ubo_map; 1931 struct v3dv_descriptor_map ssbo_map; 1932 struct v3dv_descriptor_map sampler_map; 1933 struct v3dv_descriptor_map texture_map; 1934}; 1935 1936/* The structure represents data shared between different objects, like the 1937 * pipeline and the pipeline cache, so we ref count it to know when it should 1938 * be freed. 1939 */ 1940struct v3dv_pipeline_shared_data { 1941 uint32_t ref_cnt; 1942 1943 unsigned char sha1_key[20]; 1944 1945 struct v3dv_descriptor_maps *maps[BROADCOM_SHADER_STAGES]; 1946 struct v3dv_shader_variant *variants[BROADCOM_SHADER_STAGES]; 1947 1948 struct v3dv_bo *assembly_bo; 1949}; 1950 1951struct v3dv_pipeline_executable_data { 1952 enum broadcom_shader_stage stage; 1953 char *nir_str; 1954 char *qpu_str; 1955}; 1956 1957struct v3dv_pipeline { 1958 struct vk_object_base base; 1959 1960 struct v3dv_device *device; 1961 1962 VkShaderStageFlags active_stages; 1963 1964 struct v3dv_render_pass *pass; 1965 struct v3dv_subpass *subpass; 1966 1967 /* Note: We can't use just a MESA_SHADER_STAGES array because we also need 1968 * to track binning shaders. Note these will be freed once the pipeline 1969 * has been compiled. 1970 */ 1971 struct v3dv_pipeline_stage *vs; 1972 struct v3dv_pipeline_stage *vs_bin; 1973 struct v3dv_pipeline_stage *gs; 1974 struct v3dv_pipeline_stage *gs_bin; 1975 struct v3dv_pipeline_stage *fs; 1976 struct v3dv_pipeline_stage *cs; 1977 1978 /* Flags for whether optional pipeline stages are present, for convenience */ 1979 bool has_gs; 1980 1981 /* Whether any stage in this pipeline uses VK_KHR_buffer_device_address */ 1982 bool uses_buffer_device_address; 1983 1984 /* Spilling memory requirements */ 1985 struct { 1986 struct v3dv_bo *bo; 1987 uint32_t size_per_thread; 1988 } spill; 1989 1990 struct v3dv_dynamic_state dynamic_state; 1991 1992 struct v3dv_pipeline_layout *layout; 1993 1994 /* Whether this pipeline enables depth writes */ 1995 bool z_updates_enable; 1996 1997 enum v3dv_ez_state ez_state; 1998 1999 /* If ez_state is V3D_EZ_DISABLED, if the reason for disabling is that the 2000 * pipeline selects an incompatible depth test function. 2001 */ 2002 bool incompatible_ez_test; 2003 2004 bool msaa; 2005 bool sample_rate_shading; 2006 uint32_t sample_mask; 2007 2008 bool primitive_restart; 2009 2010 /* Accessed by binding. So vb[binding]->stride is the stride of the vertex 2011 * array with such binding 2012 */ 2013 struct v3dv_pipeline_vertex_binding { 2014 uint32_t stride; 2015 uint32_t instance_divisor; 2016 } vb[MAX_VBS]; 2017 uint32_t vb_count; 2018 2019 /* Note that a lot of info from VkVertexInputAttributeDescription is 2020 * already prepacked, so here we are only storing those that need recheck 2021 * later. The array must be indexed by driver location, since that is the 2022 * order in which we need to emit the attributes. 2023 */ 2024 struct v3dv_pipeline_vertex_attrib { 2025 uint32_t binding; 2026 uint32_t offset; 2027 VkFormat vk_format; 2028 } va[MAX_VERTEX_ATTRIBS]; 2029 uint32_t va_count; 2030 2031 enum pipe_prim_type topology; 2032 2033 struct v3dv_pipeline_shared_data *shared_data; 2034 2035 /* It is the combined stages sha1, layout sha1, plus the pipeline key sha1. */ 2036 unsigned char sha1[20]; 2037 2038 /* In general we can reuse v3dv_device->default_attribute_float, so note 2039 * that the following can be NULL. 2040 * 2041 * FIXME: the content of this BO will be small, so it could be improved to 2042 * be uploaded to a common BO. But as in most cases it will be NULL, it is 2043 * not a priority. 2044 */ 2045 struct v3dv_bo *default_attribute_values; 2046 2047 struct vpm_config vpm_cfg; 2048 struct vpm_config vpm_cfg_bin; 2049 2050 /* If the pipeline should emit any of the stencil configuration packets */ 2051 bool emit_stencil_cfg[2]; 2052 2053 /* Blend state */ 2054 struct { 2055 /* Per-RT bit mask with blend enables */ 2056 uint8_t enables; 2057 /* Per-RT prepacked blend config packets */ 2058 uint8_t cfg[V3D_MAX_DRAW_BUFFERS][V3DV_BLEND_CFG_LENGTH]; 2059 /* Flag indicating whether the blend factors in use require 2060 * color constants. 2061 */ 2062 bool needs_color_constants; 2063 /* Mask with enabled color channels for each RT (4 bits per RT) */ 2064 uint32_t color_write_masks; 2065 } blend; 2066 2067 /* Depth bias */ 2068 struct { 2069 bool enabled; 2070 bool is_z16; 2071 } depth_bias; 2072 2073 struct { 2074 void *mem_ctx; 2075 bool has_data; 2076 struct util_dynarray data; /* Array of v3dv_pipeline_executable_data */ 2077 } executables; 2078 2079 /* Packets prepacked during pipeline creation 2080 */ 2081 uint8_t cfg_bits[V3DV_CFG_BITS_LENGTH]; 2082 uint8_t shader_state_record[V3DV_GL_SHADER_STATE_RECORD_LENGTH]; 2083 uint8_t vcm_cache_size[V3DV_VCM_CACHE_SIZE_LENGTH]; 2084 uint8_t vertex_attrs[V3DV_GL_SHADER_STATE_ATTRIBUTE_RECORD_LENGTH * 2085 MAX_VERTEX_ATTRIBS]; 2086 uint8_t stencil_cfg[2][V3DV_STENCIL_CFG_LENGTH]; 2087}; 2088 2089static inline VkPipelineBindPoint 2090v3dv_pipeline_get_binding_point(struct v3dv_pipeline *pipeline) 2091{ 2092 assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT || 2093 !(pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT)); 2094 return pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT ? 2095 VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS; 2096} 2097 2098static inline struct v3dv_descriptor_state* 2099v3dv_cmd_buffer_get_descriptor_state(struct v3dv_cmd_buffer *cmd_buffer, 2100 struct v3dv_pipeline *pipeline) 2101{ 2102 if (v3dv_pipeline_get_binding_point(pipeline) == VK_PIPELINE_BIND_POINT_COMPUTE) 2103 return &cmd_buffer->state.compute.descriptor_state; 2104 else 2105 return &cmd_buffer->state.gfx.descriptor_state; 2106} 2107 2108const nir_shader_compiler_options *v3dv_pipeline_get_nir_options(void); 2109 2110uint32_t v3dv_physical_device_vendor_id(struct v3dv_physical_device *dev); 2111uint32_t v3dv_physical_device_device_id(struct v3dv_physical_device *dev); 2112 2113#define v3dv_debug_ignored_stype(sType) \ 2114 mesa_logd("%s: ignored VkStructureType %u:%s\n\n", __func__, (sType), vk_StructureType_to_str(sType)) 2115 2116const uint8_t *v3dv_get_format_swizzle(struct v3dv_device *device, VkFormat f); 2117uint8_t v3dv_get_tex_return_size(const struct v3dv_format *vf, bool compare_enable); 2118const struct v3dv_format * 2119v3dv_get_compatible_tfu_format(struct v3dv_device *device, 2120 uint32_t bpp, VkFormat *out_vk_format); 2121bool v3dv_buffer_format_supports_features(struct v3dv_device *device, 2122 VkFormat vk_format, 2123 VkFormatFeatureFlags2 features); 2124 2125struct v3dv_cl_reloc v3dv_write_uniforms(struct v3dv_cmd_buffer *cmd_buffer, 2126 struct v3dv_pipeline *pipeline, 2127 struct v3dv_shader_variant *variant); 2128 2129struct v3dv_cl_reloc v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer, 2130 struct v3dv_pipeline *pipeline, 2131 struct v3dv_shader_variant *variant, 2132 uint32_t **wg_count_offsets); 2133 2134struct v3dv_shader_variant * 2135v3dv_get_shader_variant(struct v3dv_pipeline_stage *p_stage, 2136 struct v3dv_pipeline_cache *cache, 2137 struct v3d_key *key, 2138 size_t key_size, 2139 const VkAllocationCallbacks *pAllocator, 2140 VkResult *out_vk_result); 2141 2142struct v3dv_shader_variant * 2143v3dv_shader_variant_create(struct v3dv_device *device, 2144 enum broadcom_shader_stage stage, 2145 struct v3d_prog_data *prog_data, 2146 uint32_t prog_data_size, 2147 uint32_t assembly_offset, 2148 uint64_t *qpu_insts, 2149 uint32_t qpu_insts_size, 2150 VkResult *out_vk_result); 2151 2152void 2153v3dv_shader_variant_destroy(struct v3dv_device *device, 2154 struct v3dv_shader_variant *variant); 2155 2156static inline void 2157v3dv_pipeline_shared_data_ref(struct v3dv_pipeline_shared_data *shared_data) 2158{ 2159 assert(shared_data && shared_data->ref_cnt >= 1); 2160 p_atomic_inc(&shared_data->ref_cnt); 2161} 2162 2163void 2164v3dv_pipeline_shared_data_destroy(struct v3dv_device *device, 2165 struct v3dv_pipeline_shared_data *shared_data); 2166 2167static inline void 2168v3dv_pipeline_shared_data_unref(struct v3dv_device *device, 2169 struct v3dv_pipeline_shared_data *shared_data) 2170{ 2171 assert(shared_data && shared_data->ref_cnt >= 1); 2172 if (p_atomic_dec_zero(&shared_data->ref_cnt)) 2173 v3dv_pipeline_shared_data_destroy(device, shared_data); 2174} 2175 2176struct v3dv_descriptor * 2177v3dv_descriptor_map_get_descriptor(struct v3dv_descriptor_state *descriptor_state, 2178 struct v3dv_descriptor_map *map, 2179 struct v3dv_pipeline_layout *pipeline_layout, 2180 uint32_t index, 2181 uint32_t *dynamic_offset); 2182 2183struct v3dv_cl_reloc 2184v3dv_descriptor_map_get_descriptor_bo(struct v3dv_device *device, 2185 struct v3dv_descriptor_state *descriptor_state, 2186 struct v3dv_descriptor_map *map, 2187 struct v3dv_pipeline_layout *pipeline_layout, 2188 uint32_t index, 2189 VkDescriptorType *out_type); 2190 2191const struct v3dv_sampler * 2192v3dv_descriptor_map_get_sampler(struct v3dv_descriptor_state *descriptor_state, 2193 struct v3dv_descriptor_map *map, 2194 struct v3dv_pipeline_layout *pipeline_layout, 2195 uint32_t index); 2196 2197struct v3dv_cl_reloc 2198v3dv_descriptor_map_get_sampler_state(struct v3dv_device *device, 2199 struct v3dv_descriptor_state *descriptor_state, 2200 struct v3dv_descriptor_map *map, 2201 struct v3dv_pipeline_layout *pipeline_layout, 2202 uint32_t index); 2203 2204struct v3dv_cl_reloc 2205v3dv_descriptor_map_get_texture_shader_state(struct v3dv_device *device, 2206 struct v3dv_descriptor_state *descriptor_state, 2207 struct v3dv_descriptor_map *map, 2208 struct v3dv_pipeline_layout *pipeline_layout, 2209 uint32_t index); 2210 2211struct v3dv_bo* 2212v3dv_descriptor_map_get_texture_bo(struct v3dv_descriptor_state *descriptor_state, 2213 struct v3dv_descriptor_map *map, 2214 struct v3dv_pipeline_layout *pipeline_layout, 2215 uint32_t index); 2216 2217static inline const struct v3dv_sampler * 2218v3dv_immutable_samplers(const struct v3dv_descriptor_set_layout *set, 2219 const struct v3dv_descriptor_set_binding_layout *binding) 2220{ 2221 assert(binding->immutable_samplers_offset); 2222 return (const struct v3dv_sampler *) ((const char *) set + binding->immutable_samplers_offset); 2223} 2224 2225void v3dv_pipeline_cache_init(struct v3dv_pipeline_cache *cache, 2226 struct v3dv_device *device, 2227 VkPipelineCacheCreateFlags, 2228 bool cache_enabled); 2229 2230void v3dv_pipeline_cache_finish(struct v3dv_pipeline_cache *cache); 2231 2232void v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline, 2233 struct v3dv_pipeline_cache *cache, 2234 nir_shader *nir, 2235 unsigned char sha1_key[20]); 2236 2237nir_shader* v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline *pipeline, 2238 struct v3dv_pipeline_cache *cache, 2239 const nir_shader_compiler_options *nir_options, 2240 unsigned char sha1_key[20]); 2241 2242struct v3dv_pipeline_shared_data * 2243v3dv_pipeline_cache_search_for_pipeline(struct v3dv_pipeline_cache *cache, 2244 unsigned char sha1_key[20], 2245 bool *cache_hit); 2246 2247void 2248v3dv_pipeline_cache_upload_pipeline(struct v3dv_pipeline *pipeline, 2249 struct v3dv_pipeline_cache *cache); 2250 2251struct v3dv_bo * 2252v3dv_pipeline_create_default_attribute_values(struct v3dv_device *device, 2253 struct v3dv_pipeline *pipeline); 2254 2255#define V3DV_FROM_HANDLE(__v3dv_type, __name, __handle) \ 2256 VK_FROM_HANDLE(__v3dv_type, __name, __handle) 2257 2258VK_DEFINE_HANDLE_CASTS(v3dv_cmd_buffer, vk.base, VkCommandBuffer, 2259 VK_OBJECT_TYPE_COMMAND_BUFFER) 2260VK_DEFINE_HANDLE_CASTS(v3dv_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE) 2261VK_DEFINE_HANDLE_CASTS(v3dv_instance, vk.base, VkInstance, 2262 VK_OBJECT_TYPE_INSTANCE) 2263VK_DEFINE_HANDLE_CASTS(v3dv_physical_device, vk.base, VkPhysicalDevice, 2264 VK_OBJECT_TYPE_PHYSICAL_DEVICE) 2265VK_DEFINE_HANDLE_CASTS(v3dv_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE) 2266 2267VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer, base, VkBuffer, 2268 VK_OBJECT_TYPE_BUFFER) 2269VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer_view, base, VkBufferView, 2270 VK_OBJECT_TYPE_BUFFER_VIEW) 2271VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_device_memory, base, VkDeviceMemory, 2272 VK_OBJECT_TYPE_DEVICE_MEMORY) 2273VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_pool, base, VkDescriptorPool, 2274 VK_OBJECT_TYPE_DESCRIPTOR_POOL) 2275VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_set, base, VkDescriptorSet, 2276 VK_OBJECT_TYPE_DESCRIPTOR_SET) 2277VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_set_layout, base, 2278 VkDescriptorSetLayout, 2279 VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT) 2280VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_update_template, base, 2281 VkDescriptorUpdateTemplate, 2282 VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE) 2283VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT) 2284VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_framebuffer, base, VkFramebuffer, 2285 VK_OBJECT_TYPE_FRAMEBUFFER) 2286VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image, vk.base, VkImage, 2287 VK_OBJECT_TYPE_IMAGE) 2288VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image_view, vk.base, VkImageView, 2289 VK_OBJECT_TYPE_IMAGE_VIEW) 2290VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline, base, VkPipeline, 2291 VK_OBJECT_TYPE_PIPELINE) 2292VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline_cache, base, VkPipelineCache, 2293 VK_OBJECT_TYPE_PIPELINE_CACHE) 2294VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline_layout, base, VkPipelineLayout, 2295 VK_OBJECT_TYPE_PIPELINE_LAYOUT) 2296VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_query_pool, base, VkQueryPool, 2297 VK_OBJECT_TYPE_QUERY_POOL) 2298VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_render_pass, base, VkRenderPass, 2299 VK_OBJECT_TYPE_RENDER_PASS) 2300VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_sampler, base, VkSampler, 2301 VK_OBJECT_TYPE_SAMPLER) 2302 2303static inline int 2304v3dv_ioctl(int fd, unsigned long request, void *arg) 2305{ 2306 if (using_v3d_simulator) 2307 return v3d_simulator_ioctl(fd, request, arg); 2308 else 2309 return drmIoctl(fd, request, arg); 2310} 2311 2312/* Flags OOM conditions in command buffer state. 2313 * 2314 * Note: notice that no-op jobs don't have a command buffer reference. 2315 */ 2316static inline void 2317v3dv_flag_oom(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_job *job) 2318{ 2319 if (cmd_buffer) { 2320 cmd_buffer->state.oom = true; 2321 } else { 2322 assert(job); 2323 if (job->cmd_buffer) 2324 job->cmd_buffer->state.oom = true; 2325 } 2326} 2327 2328#define v3dv_return_if_oom(_cmd_buffer, _job) do { \ 2329 const struct v3dv_cmd_buffer *__cmd_buffer = _cmd_buffer; \ 2330 if (__cmd_buffer && __cmd_buffer->state.oom) \ 2331 return; \ 2332 const struct v3dv_job *__job = _job; \ 2333 if (__job && __job->cmd_buffer && __job->cmd_buffer->state.oom) \ 2334 return; \ 2335} while(0) \ 2336 2337static inline uint32_t 2338u64_hash(const void *key) 2339{ 2340 return _mesa_hash_data(key, sizeof(uint64_t)); 2341} 2342 2343static inline bool 2344u64_compare(const void *key1, const void *key2) 2345{ 2346 return memcmp(key1, key2, sizeof(uint64_t)) == 0; 2347} 2348 2349/* Helper to call hw ver speficic functions */ 2350#define v3dv_X(device, thing) ({ \ 2351 __typeof(&v3d42_##thing) v3d_X_thing; \ 2352 switch (device->devinfo.ver) { \ 2353 case 42: \ 2354 v3d_X_thing = &v3d42_##thing; \ 2355 break; \ 2356 default: \ 2357 unreachable("Unsupported hardware generation"); \ 2358 } \ 2359 v3d_X_thing; \ 2360}) 2361 2362 2363/* v3d_macros from common requires v3dX and V3DX definitions. Below we need to 2364 * define v3dX for each version supported, because when we compile code that 2365 * is not version-specific, all version-specific macros need to be already 2366 * defined. 2367 */ 2368#ifdef v3dX 2369# include "v3dvx_private.h" 2370#else 2371# define v3dX(x) v3d42_##x 2372# include "v3dvx_private.h" 2373# undef v3dX 2374#endif 2375 2376#ifdef ANDROID 2377VkResult 2378v3dv_gralloc_info(struct v3dv_device *device, 2379 const VkNativeBufferANDROID *gralloc_info, 2380 int *out_dmabuf, 2381 int *out_stride, 2382 int *out_size, 2383 uint64_t *out_modifier); 2384 2385VkResult 2386v3dv_import_native_buffer_fd(VkDevice device_h, 2387 int dma_buf, 2388 const VkAllocationCallbacks *alloc, 2389 VkImage image_h); 2390#endif /* ANDROID */ 2391 2392#endif /* V3DV_PRIVATE_H */ 2393