1/* 2 * Copyright © 2016 Red Hat. 3 * Copyright © 2016 Bas Nieuwenhuizen 4 * 5 * based in part on anv driver which is: 6 * Copyright © 2015 Intel Corporation 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a 9 * copy of this software and associated documentation files (the "Software"), 10 * to deal in the Software without restriction, including without limitation 11 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 12 * and/or sell copies of the Software, and to permit persons to whom the 13 * Software is furnished to do so, subject to the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the next 16 * paragraph) shall be included in all copies or substantial portions of the 17 * Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 25 * IN THE SOFTWARE. 26 */ 27 28#ifndef RADV_PRIVATE_H 29#define RADV_PRIVATE_H 30 31#include <assert.h> 32#include <stdbool.h> 33#include <stdint.h> 34#include <stdio.h> 35#include <stdlib.h> 36#include <string.h> 37#ifdef HAVE_VALGRIND 38#include <memcheck.h> 39#include <valgrind.h> 40#define VG(x) x 41#else 42#define VG(x) ((void)0) 43#endif 44 45#include "c11/threads.h" 46#ifndef _WIN32 47#include <amdgpu.h> 48#include <xf86drm.h> 49#endif 50#include "compiler/shader_enums.h" 51#include "util/bitscan.h" 52#include "util/list.h" 53#include "util/macros.h" 54#include "util/rwlock.h" 55#include "util/xmlconfig.h" 56#include "vk_alloc.h" 57#include "vk_buffer.h" 58#include "vk_command_buffer.h" 59#include "vk_command_pool.h" 60#include "vk_debug_report.h" 61#include "vk_device.h" 62#include "vk_format.h" 63#include "vk_instance.h" 64#include "vk_log.h" 65#include "vk_physical_device.h" 66#include "vk_shader_module.h" 67#include "vk_queue.h" 68#include "vk_util.h" 69#include "vk_image.h" 70#include "vk_framebuffer.h" 71 72#include "ac_binary.h" 73#include "ac_gpu_info.h" 74#include "ac_shader_util.h" 75#include "ac_spm.h" 76#include "ac_sqtt.h" 77#include "ac_surface.h" 78#include "radv_constants.h" 79#include "radv_descriptor_set.h" 80#include "radv_radeon_winsys.h" 81#include "radv_shader.h" 82#include "radv_shader_args.h" 83#include "sid.h" 84 85#include "radix_sort/radix_sort_vk_devaddr.h" 86 87/* Pre-declarations needed for WSI entrypoints */ 88struct wl_surface; 89struct wl_display; 90typedef struct xcb_connection_t xcb_connection_t; 91typedef uint32_t xcb_visualid_t; 92typedef uint32_t xcb_window_t; 93 94#include <vulkan/vk_android_native_buffer.h> 95#include <vulkan/vk_icd.h> 96#include <vulkan/vulkan.h> 97#include <vulkan/vulkan_android.h> 98 99#include "radv_entrypoints.h" 100 101#include "wsi_common.h" 102 103#ifdef __cplusplus 104extern "C" 105{ 106#endif 107 108/* Helper to determine if we should compile 109 * any of the Android AHB support. 110 * 111 * To actually enable the ext we also need 112 * the necessary kernel support. 113 */ 114#if defined(ANDROID) && ANDROID_API_LEVEL >= 26 115#define RADV_SUPPORT_ANDROID_HARDWARE_BUFFER 1 116#include <vndk/hardware_buffer.h> 117#else 118#define RADV_SUPPORT_ANDROID_HARDWARE_BUFFER 0 119#endif 120 121#ifdef _WIN32 122#define RADV_SUPPORT_CALIBRATED_TIMESTAMPS 0 123#else 124#define RADV_SUPPORT_CALIBRATED_TIMESTAMPS 1 125#endif 126 127#ifdef _WIN32 128#define radv_printflike(a, b) 129#else 130#define radv_printflike(a, b) __attribute__((__format__(__printf__, a, b))) 131#endif 132 133static inline uint32_t 134align_u32(uint32_t v, uint32_t a) 135{ 136 assert(a != 0 && a == (a & -a)); 137 return (v + a - 1) & ~(a - 1); 138} 139 140static inline uint32_t 141align_u32_npot(uint32_t v, uint32_t a) 142{ 143 return (v + a - 1) / a * a; 144} 145 146static inline uint64_t 147align_u64(uint64_t v, uint64_t a) 148{ 149 assert(a != 0 && a == (a & -a)); 150 return (v + a - 1) & ~(a - 1); 151} 152 153static inline int32_t 154align_i32(int32_t v, int32_t a) 155{ 156 assert(a != 0 && a == (a & -a)); 157 return (v + a - 1) & ~(a - 1); 158} 159 160/** Alignment must be a power of 2. */ 161static inline bool 162radv_is_aligned(uintmax_t n, uintmax_t a) 163{ 164 assert(a == (a & -a)); 165 return (n & (a - 1)) == 0; 166} 167 168static inline uint32_t 169round_up_u32(uint32_t v, uint32_t a) 170{ 171 return (v + a - 1) / a; 172} 173 174static inline uint64_t 175round_up_u64(uint64_t v, uint64_t a) 176{ 177 return (v + a - 1) / a; 178} 179 180static inline uint32_t 181radv_minify(uint32_t n, uint32_t levels) 182{ 183 if (unlikely(n == 0)) 184 return 0; 185 else 186 return MAX2(n >> levels, 1); 187} 188static inline float 189radv_clamp_f(float f, float min, float max) 190{ 191 assert(min < max); 192 193 if (f > max) 194 return max; 195 else if (f < min) 196 return min; 197 else 198 return f; 199} 200 201static inline bool 202radv_clear_mask(uint32_t *inout_mask, uint32_t clear_mask) 203{ 204 if (*inout_mask & clear_mask) { 205 *inout_mask &= ~clear_mask; 206 return true; 207 } else { 208 return false; 209 } 210} 211 212static inline int 213radv_float_to_sfixed(float value, unsigned frac_bits) 214{ 215 return value * (1 << frac_bits); 216} 217 218static inline unsigned int 219radv_float_to_ufixed(float value, unsigned frac_bits) 220{ 221 return value * (1 << frac_bits); 222} 223 224/* Whenever we generate an error, pass it through this function. Useful for 225 * debugging, where we can break on it. Only call at error site, not when 226 * propagating errors. Might be useful to plug in a stack trace here. 227 */ 228 229struct radv_image_view; 230struct radv_instance; 231 232/* A non-fatal assert. Useful for debugging. */ 233#ifdef NDEBUG 234#define radv_assert(x) \ 235 do { \ 236 } while (0) 237#else 238#define radv_assert(x) \ 239 do { \ 240 if (unlikely(!(x))) \ 241 fprintf(stderr, "%s:%d ASSERT: %s\n", __FILE__, __LINE__, #x); \ 242 } while (0) 243#endif 244 245int radv_get_instance_entrypoint_index(const char *name); 246int radv_get_device_entrypoint_index(const char *name); 247int radv_get_physical_device_entrypoint_index(const char *name); 248 249const char *radv_get_instance_entry_name(int index); 250const char *radv_get_physical_device_entry_name(int index); 251const char *radv_get_device_entry_name(int index); 252 253/* queue types */ 254enum radv_queue_family { 255 RADV_QUEUE_GENERAL, 256 RADV_QUEUE_COMPUTE, 257 RADV_QUEUE_TRANSFER, 258 RADV_MAX_QUEUE_FAMILIES, 259 RADV_QUEUE_FOREIGN = RADV_MAX_QUEUE_FAMILIES, 260 RADV_QUEUE_IGNORED, 261}; 262 263struct radv_perfcounter_desc; 264 265struct radv_physical_device { 266 struct vk_physical_device vk; 267 268 /* Link in radv_instance::physical_devices */ 269 struct list_head link; 270 271 struct radv_instance *instance; 272 273 struct radeon_winsys *ws; 274 struct radeon_info rad_info; 275 char name[VK_MAX_PHYSICAL_DEVICE_NAME_SIZE]; 276 char marketing_name[VK_MAX_PHYSICAL_DEVICE_NAME_SIZE]; 277 uint8_t driver_uuid[VK_UUID_SIZE]; 278 uint8_t device_uuid[VK_UUID_SIZE]; 279 uint8_t cache_uuid[VK_UUID_SIZE]; 280 281 int local_fd; 282 int master_fd; 283 struct wsi_device wsi_device; 284 285 bool out_of_order_rast_allowed; 286 287 /* Whether DCC should be enabled for MSAA textures. */ 288 bool dcc_msaa_allowed; 289 290 /* Whether to enable NGG. */ 291 bool use_ngg; 292 293 /* Whether to enable NGG culling. */ 294 bool use_ngg_culling; 295 296 /* Whether to enable NGG streamout. */ 297 bool use_ngg_streamout; 298 299 /* Number of threads per wave. */ 300 uint8_t ps_wave_size; 301 uint8_t cs_wave_size; 302 uint8_t ge_wave_size; 303 uint8_t rt_wave_size; 304 305 /* Whether to use the LLVM compiler backend */ 306 bool use_llvm; 307 308 /* Whether to emulate ETC2 image support on HW without support. */ 309 bool emulate_etc2; 310 311 /* This is the drivers on-disk cache used as a fallback as opposed to 312 * the pipeline cache defined by apps. 313 */ 314 struct disk_cache *disk_cache; 315 316 VkPhysicalDeviceMemoryProperties memory_properties; 317 enum radeon_bo_domain memory_domains[VK_MAX_MEMORY_TYPES]; 318 enum radeon_bo_flag memory_flags[VK_MAX_MEMORY_TYPES]; 319 unsigned heaps; 320 321 /* Bitmask of memory types that use the 32-bit address space. */ 322 uint32_t memory_types_32bit; 323 324#ifndef _WIN32 325 int available_nodes; 326 drmPciBusInfo bus_info; 327 328 dev_t primary_devid; 329 dev_t render_devid; 330#endif 331 332 nir_shader_compiler_options nir_options[MESA_VULKAN_SHADER_STAGES]; 333 334 enum radv_queue_family vk_queue_to_radv[RADV_MAX_QUEUE_FAMILIES]; 335 uint32_t num_queues; 336 337 uint32_t gs_table_depth; 338 339 struct ac_hs_info hs; 340 struct ac_task_info task_info; 341 342 /* Performance counters. */ 343 struct ac_perfcounters ac_perfcounters; 344 345 uint32_t num_perfcounters; 346 struct radv_perfcounter_desc *perfcounters; 347}; 348 349struct radv_instance { 350 struct vk_instance vk; 351 352 VkAllocationCallbacks alloc; 353 354 uint64_t debug_flags; 355 uint64_t perftest_flags; 356 357 bool physical_devices_enumerated; 358 struct list_head physical_devices; 359 360 struct driOptionCache dri_options; 361 struct driOptionCache available_dri_options; 362 363 /** 364 * Workarounds for game bugs. 365 */ 366 bool enable_mrt_output_nan_fixup; 367 bool disable_tc_compat_htile_in_general; 368 bool disable_shrink_image_store; 369 bool absolute_depth_bias; 370 bool disable_aniso_single_level; 371 bool zero_vram; 372 bool disable_sinking_load_input_fs; 373 bool flush_before_query_copy; 374}; 375 376VkResult radv_init_wsi(struct radv_physical_device *physical_device); 377void radv_finish_wsi(struct radv_physical_device *physical_device); 378 379struct cache_entry; 380 381struct radv_pipeline_cache { 382 struct vk_object_base base; 383 struct radv_device *device; 384 mtx_t mutex; 385 VkPipelineCacheCreateFlags flags; 386 387 uint32_t total_size; 388 uint32_t table_size; 389 uint32_t kernel_count; 390 struct cache_entry **hash_table; 391 bool modified; 392 393 VkAllocationCallbacks alloc; 394}; 395 396struct radv_shader_binary; 397struct radv_shader; 398struct radv_pipeline_shader_stack_size; 399 400void radv_pipeline_cache_init(struct radv_pipeline_cache *cache, struct radv_device *device); 401void radv_pipeline_cache_finish(struct radv_pipeline_cache *cache); 402bool radv_pipeline_cache_load(struct radv_pipeline_cache *cache, const void *data, size_t size); 403 404bool radv_create_shaders_from_pipeline_cache( 405 struct radv_device *device, struct radv_pipeline_cache *cache, const unsigned char *sha1, 406 struct radv_pipeline *pipeline, struct radv_pipeline_shader_stack_size **stack_sizes, 407 uint32_t *num_stack_sizes, bool *found_in_application_cache); 408 409void radv_pipeline_cache_insert_shaders( 410 struct radv_device *device, struct radv_pipeline_cache *cache, const unsigned char *sha1, 411 struct radv_pipeline *pipeline, struct radv_shader_binary *const *binaries, 412 const struct radv_pipeline_shader_stack_size *stack_sizes, uint32_t num_stack_sizes); 413 414VkResult radv_upload_shaders(struct radv_device *device, struct radv_pipeline *pipeline, 415 struct radv_shader_binary **binaries, 416 struct radv_shader_binary *gs_copy_binary); 417 418enum radv_blit_ds_layout { 419 RADV_BLIT_DS_LAYOUT_TILE_ENABLE, 420 RADV_BLIT_DS_LAYOUT_TILE_DISABLE, 421 RADV_BLIT_DS_LAYOUT_COUNT, 422}; 423 424static inline enum radv_blit_ds_layout 425radv_meta_blit_ds_to_type(VkImageLayout layout) 426{ 427 return (layout == VK_IMAGE_LAYOUT_GENERAL) ? RADV_BLIT_DS_LAYOUT_TILE_DISABLE 428 : RADV_BLIT_DS_LAYOUT_TILE_ENABLE; 429} 430 431static inline VkImageLayout 432radv_meta_blit_ds_to_layout(enum radv_blit_ds_layout ds_layout) 433{ 434 return ds_layout == RADV_BLIT_DS_LAYOUT_TILE_ENABLE ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL 435 : VK_IMAGE_LAYOUT_GENERAL; 436} 437 438enum radv_meta_dst_layout { 439 RADV_META_DST_LAYOUT_GENERAL, 440 RADV_META_DST_LAYOUT_OPTIMAL, 441 RADV_META_DST_LAYOUT_COUNT, 442}; 443 444static inline enum radv_meta_dst_layout 445radv_meta_dst_layout_from_layout(VkImageLayout layout) 446{ 447 return (layout == VK_IMAGE_LAYOUT_GENERAL) ? RADV_META_DST_LAYOUT_GENERAL 448 : RADV_META_DST_LAYOUT_OPTIMAL; 449} 450 451static inline VkImageLayout 452radv_meta_dst_layout_to_layout(enum radv_meta_dst_layout layout) 453{ 454 return layout == RADV_META_DST_LAYOUT_OPTIMAL ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL 455 : VK_IMAGE_LAYOUT_GENERAL; 456} 457 458struct radv_meta_state { 459 VkAllocationCallbacks alloc; 460 461 struct radv_pipeline_cache cache; 462 463 /* 464 * For on-demand pipeline creation, makes sure that 465 * only one thread tries to build a pipeline at the same time. 466 */ 467 mtx_t mtx; 468 469 /** 470 * Use array element `i` for images with `2^i` samples. 471 */ 472 struct { 473 VkPipeline color_pipelines[NUM_META_FS_KEYS]; 474 } color_clear[MAX_SAMPLES_LOG2][MAX_RTS]; 475 476 struct { 477 VkPipeline depth_only_pipeline[NUM_DEPTH_CLEAR_PIPELINES]; 478 VkPipeline stencil_only_pipeline[NUM_DEPTH_CLEAR_PIPELINES]; 479 VkPipeline depthstencil_pipeline[NUM_DEPTH_CLEAR_PIPELINES]; 480 481 VkPipeline depth_only_unrestricted_pipeline[NUM_DEPTH_CLEAR_PIPELINES]; 482 VkPipeline stencil_only_unrestricted_pipeline[NUM_DEPTH_CLEAR_PIPELINES]; 483 VkPipeline depthstencil_unrestricted_pipeline[NUM_DEPTH_CLEAR_PIPELINES]; 484 } ds_clear[MAX_SAMPLES_LOG2]; 485 486 VkPipelineLayout clear_color_p_layout; 487 VkPipelineLayout clear_depth_p_layout; 488 VkPipelineLayout clear_depth_unrestricted_p_layout; 489 490 /* Optimized compute fast HTILE clear for stencil or depth only. */ 491 VkPipeline clear_htile_mask_pipeline; 492 VkPipelineLayout clear_htile_mask_p_layout; 493 VkDescriptorSetLayout clear_htile_mask_ds_layout; 494 495 /* Copy VRS into HTILE. */ 496 VkPipeline copy_vrs_htile_pipeline; 497 VkPipelineLayout copy_vrs_htile_p_layout; 498 VkDescriptorSetLayout copy_vrs_htile_ds_layout; 499 500 /* Clear DCC with comp-to-single. */ 501 VkPipeline clear_dcc_comp_to_single_pipeline[2]; /* 0: 1x, 1: 2x/4x/8x */ 502 VkPipelineLayout clear_dcc_comp_to_single_p_layout; 503 VkDescriptorSetLayout clear_dcc_comp_to_single_ds_layout; 504 505 struct { 506 /** Pipeline that blits from a 1D image. */ 507 VkPipeline pipeline_1d_src[NUM_META_FS_KEYS]; 508 509 /** Pipeline that blits from a 2D image. */ 510 VkPipeline pipeline_2d_src[NUM_META_FS_KEYS]; 511 512 /** Pipeline that blits from a 3D image. */ 513 VkPipeline pipeline_3d_src[NUM_META_FS_KEYS]; 514 515 VkPipeline depth_only_1d_pipeline; 516 VkPipeline depth_only_2d_pipeline; 517 VkPipeline depth_only_3d_pipeline; 518 519 VkPipeline stencil_only_1d_pipeline; 520 VkPipeline stencil_only_2d_pipeline; 521 VkPipeline stencil_only_3d_pipeline; 522 VkPipelineLayout pipeline_layout; 523 VkDescriptorSetLayout ds_layout; 524 } blit; 525 526 struct { 527 VkPipelineLayout p_layouts[5]; 528 VkDescriptorSetLayout ds_layouts[5]; 529 VkPipeline pipelines[5][NUM_META_FS_KEYS]; 530 531 VkPipeline depth_only_pipeline[5]; 532 533 VkPipeline stencil_only_pipeline[5]; 534 } blit2d[MAX_SAMPLES_LOG2]; 535 536 struct { 537 VkPipelineLayout img_p_layout; 538 VkDescriptorSetLayout img_ds_layout; 539 VkPipeline pipeline; 540 VkPipeline pipeline_3d; 541 } itob; 542 struct { 543 VkPipelineLayout img_p_layout; 544 VkDescriptorSetLayout img_ds_layout; 545 VkPipeline pipeline; 546 VkPipeline pipeline_3d; 547 } btoi; 548 struct { 549 VkPipelineLayout img_p_layout; 550 VkDescriptorSetLayout img_ds_layout; 551 VkPipeline pipeline; 552 } btoi_r32g32b32; 553 struct { 554 VkPipelineLayout img_p_layout; 555 VkDescriptorSetLayout img_ds_layout; 556 VkPipeline pipeline[MAX_SAMPLES_LOG2]; 557 VkPipeline pipeline_3d; 558 } itoi; 559 struct { 560 VkPipelineLayout img_p_layout; 561 VkDescriptorSetLayout img_ds_layout; 562 VkPipeline pipeline; 563 } itoi_r32g32b32; 564 struct { 565 VkPipelineLayout img_p_layout; 566 VkDescriptorSetLayout img_ds_layout; 567 VkPipeline pipeline[MAX_SAMPLES_LOG2]; 568 VkPipeline pipeline_3d; 569 } cleari; 570 struct { 571 VkPipelineLayout img_p_layout; 572 VkDescriptorSetLayout img_ds_layout; 573 VkPipeline pipeline; 574 } cleari_r32g32b32; 575 struct { 576 VkPipelineLayout p_layout; 577 VkDescriptorSetLayout ds_layout; 578 VkPipeline pipeline[MAX_SAMPLES_LOG2]; 579 } fmask_copy; 580 581 struct { 582 VkPipelineLayout p_layout; 583 VkPipeline pipeline[NUM_META_FS_KEYS]; 584 } resolve; 585 586 struct { 587 VkDescriptorSetLayout ds_layout; 588 VkPipelineLayout p_layout; 589 struct { 590 VkPipeline pipeline; 591 VkPipeline i_pipeline; 592 VkPipeline srgb_pipeline; 593 } rc[MAX_SAMPLES_LOG2]; 594 595 VkPipeline depth_zero_pipeline; 596 struct { 597 VkPipeline average_pipeline; 598 VkPipeline max_pipeline; 599 VkPipeline min_pipeline; 600 } depth[MAX_SAMPLES_LOG2]; 601 602 VkPipeline stencil_zero_pipeline; 603 struct { 604 VkPipeline max_pipeline; 605 VkPipeline min_pipeline; 606 } stencil[MAX_SAMPLES_LOG2]; 607 } resolve_compute; 608 609 struct { 610 VkDescriptorSetLayout ds_layout; 611 VkPipelineLayout p_layout; 612 613 struct { 614 VkPipeline pipeline[NUM_META_FS_KEYS]; 615 } rc[MAX_SAMPLES_LOG2]; 616 617 VkPipeline depth_zero_pipeline; 618 struct { 619 VkPipeline average_pipeline; 620 VkPipeline max_pipeline; 621 VkPipeline min_pipeline; 622 } depth[MAX_SAMPLES_LOG2]; 623 624 VkPipeline stencil_zero_pipeline; 625 struct { 626 VkPipeline max_pipeline; 627 VkPipeline min_pipeline; 628 } stencil[MAX_SAMPLES_LOG2]; 629 } resolve_fragment; 630 631 struct { 632 VkPipelineLayout p_layout; 633 VkPipeline decompress_pipeline; 634 VkPipeline resummarize_pipeline; 635 } depth_decomp[MAX_SAMPLES_LOG2]; 636 637 VkDescriptorSetLayout expand_depth_stencil_compute_ds_layout; 638 VkPipelineLayout expand_depth_stencil_compute_p_layout; 639 VkPipeline expand_depth_stencil_compute_pipeline; 640 641 struct { 642 VkPipelineLayout p_layout; 643 VkPipeline cmask_eliminate_pipeline; 644 VkPipeline fmask_decompress_pipeline; 645 VkPipeline dcc_decompress_pipeline; 646 647 VkDescriptorSetLayout dcc_decompress_compute_ds_layout; 648 VkPipelineLayout dcc_decompress_compute_p_layout; 649 VkPipeline dcc_decompress_compute_pipeline; 650 } fast_clear_flush; 651 652 struct { 653 VkPipelineLayout fill_p_layout; 654 VkPipelineLayout copy_p_layout; 655 VkPipeline fill_pipeline; 656 VkPipeline copy_pipeline; 657 } buffer; 658 659 struct { 660 VkDescriptorSetLayout ds_layout; 661 VkPipelineLayout p_layout; 662 VkPipeline occlusion_query_pipeline; 663 VkPipeline pipeline_statistics_query_pipeline; 664 VkPipeline tfb_query_pipeline; 665 VkPipeline timestamp_query_pipeline; 666 VkPipeline pg_query_pipeline; 667 } query; 668 669 struct { 670 VkDescriptorSetLayout ds_layout; 671 VkPipelineLayout p_layout; 672 VkPipeline pipeline[MAX_SAMPLES_LOG2]; 673 } fmask_expand; 674 675 struct { 676 VkDescriptorSetLayout ds_layout; 677 VkPipelineLayout p_layout; 678 VkPipeline pipeline[32]; 679 } dcc_retile; 680 681 struct { 682 VkPipelineLayout leaf_p_layout; 683 VkPipeline leaf_pipeline; 684 VkPipelineLayout morton_p_layout; 685 VkPipeline morton_pipeline; 686 VkPipelineLayout internal_p_layout; 687 VkPipeline internal_pipeline; 688 VkPipelineLayout copy_p_layout; 689 VkPipeline copy_pipeline; 690 691 struct radix_sort_vk *radix_sort; 692 struct radix_sort_vk_sort_devaddr_info radix_sort_info; 693 } accel_struct_build; 694 695 struct { 696 VkDescriptorSetLayout ds_layout; 697 VkPipelineLayout p_layout; 698 VkPipeline pipeline; 699 } etc_decode; 700 701 struct { 702 VkDescriptorSetLayout ds_layout; 703 VkPipelineLayout p_layout; 704 VkPipeline pipeline; 705 } dgc_prepare; 706}; 707 708#define RADV_NUM_HW_CTX (RADEON_CTX_PRIORITY_REALTIME + 1) 709 710struct radv_deferred_queue_submission; 711 712static inline enum radv_queue_family 713vk_queue_to_radv(const struct radv_physical_device *phys_dev, int queue_family_index) 714{ 715 if (queue_family_index == VK_QUEUE_FAMILY_EXTERNAL || 716 queue_family_index == VK_QUEUE_FAMILY_FOREIGN_EXT) 717 return RADV_QUEUE_FOREIGN; 718 if (queue_family_index == VK_QUEUE_FAMILY_IGNORED) 719 return RADV_QUEUE_IGNORED; 720 721 assert(queue_family_index < RADV_MAX_QUEUE_FAMILIES); 722 return phys_dev->vk_queue_to_radv[queue_family_index]; 723} 724 725enum amd_ip_type radv_queue_family_to_ring(struct radv_physical_device *physical_device, 726 enum radv_queue_family f); 727 728struct radv_queue_ring_info { 729 uint32_t scratch_size_per_wave; 730 uint32_t scratch_waves; 731 uint32_t compute_scratch_size_per_wave; 732 uint32_t compute_scratch_waves; 733 uint32_t esgs_ring_size; 734 uint32_t gsvs_ring_size; 735 bool tess_rings; 736 bool task_rings; 737 bool mesh_scratch_ring; 738 bool gds; 739 bool gds_oa; 740 bool sample_positions; 741}; 742 743struct radv_queue_state { 744 enum radv_queue_family qf; 745 struct radv_queue_ring_info ring_info; 746 747 struct radeon_winsys_bo *scratch_bo; 748 struct radeon_winsys_bo *descriptor_bo; 749 struct radeon_winsys_bo *compute_scratch_bo; 750 struct radeon_winsys_bo *esgs_ring_bo; 751 struct radeon_winsys_bo *gsvs_ring_bo; 752 struct radeon_winsys_bo *tess_rings_bo; 753 struct radeon_winsys_bo *task_rings_bo; 754 struct radeon_winsys_bo *mesh_scratch_ring_bo; 755 struct radeon_winsys_bo *gds_bo; 756 struct radeon_winsys_bo *gds_oa_bo; 757 758 struct radeon_cmdbuf *initial_preamble_cs; 759 struct radeon_cmdbuf *initial_full_flush_preamble_cs; 760 struct radeon_cmdbuf *continue_preamble_cs; 761}; 762 763struct radv_queue { 764 struct vk_queue vk; 765 struct radv_device *device; 766 struct radeon_winsys_ctx *hw_ctx; 767 enum radeon_ctx_priority priority; 768 struct radv_queue_state state; 769 struct radv_queue_state *ace_internal_state; 770}; 771 772#define RADV_BORDER_COLOR_COUNT 4096 773#define RADV_BORDER_COLOR_BUFFER_SIZE (sizeof(VkClearColorValue) * RADV_BORDER_COLOR_COUNT) 774 775struct radv_device_border_color_data { 776 bool used[RADV_BORDER_COLOR_COUNT]; 777 778 struct radeon_winsys_bo *bo; 779 VkClearColorValue *colors_gpu_ptr; 780 781 /* Mutex is required to guarantee vkCreateSampler thread safety 782 * given that we are writing to a buffer and checking color occupation */ 783 mtx_t mutex; 784}; 785 786enum radv_force_vrs { 787 RADV_FORCE_VRS_1x1 = 0, 788 RADV_FORCE_VRS_2x2, 789 RADV_FORCE_VRS_2x1, 790 RADV_FORCE_VRS_1x2, 791}; 792 793struct radv_notifier { 794 int fd; 795 int watch; 796 bool quit; 797 thrd_t thread; 798}; 799 800struct radv_device { 801 struct vk_device vk; 802 803 struct radv_instance *instance; 804 struct radeon_winsys *ws; 805 806 struct radeon_winsys_ctx *hw_ctx[RADV_NUM_HW_CTX]; 807 struct radv_meta_state meta_state; 808 809 struct radv_queue *queues[RADV_MAX_QUEUE_FAMILIES]; 810 int queue_count[RADV_MAX_QUEUE_FAMILIES]; 811 812 bool pbb_allowed; 813 uint32_t scratch_waves; 814 uint32_t dispatch_initiator; 815 uint32_t dispatch_initiator_task; 816 817 /* MSAA sample locations. 818 * The first index is the sample index. 819 * The second index is the coordinate: X, Y. */ 820 float sample_locations_1x[1][2]; 821 float sample_locations_2x[2][2]; 822 float sample_locations_4x[4][2]; 823 float sample_locations_8x[8][2]; 824 825 /* GFX7 and later */ 826 uint32_t gfx_init_size_dw; 827 struct radeon_winsys_bo *gfx_init; 828 829 struct radeon_winsys_bo *trace_bo; 830 uint32_t *trace_id_ptr; 831 832 /* Whether to keep shader debug info, for debugging. */ 833 bool keep_shader_info; 834 835 struct radv_physical_device *physical_device; 836 837 /* Backup in-memory cache to be used if the app doesn't provide one */ 838 struct radv_pipeline_cache *mem_cache; 839 840 /* 841 * use different counters so MSAA MRTs get consecutive surface indices, 842 * even if MASK is allocated in between. 843 */ 844 uint32_t image_mrt_offset_counter; 845 uint32_t fmask_mrt_offset_counter; 846 847 struct list_head shader_arenas; 848 unsigned shader_arena_shift; 849 uint8_t shader_free_list_mask; 850 struct list_head shader_free_lists[RADV_SHADER_ALLOC_NUM_FREE_LISTS]; 851 struct list_head shader_block_obj_pool; 852 mtx_t shader_arena_mutex; 853 854 /* For detecting VM faults reported by dmesg. */ 855 uint64_t dmesg_timestamp; 856 857 /* Whether the app has enabled the robustBufferAccess/robustBufferAccess2 features. */ 858 bool robust_buffer_access; 859 bool robust_buffer_access2; 860 861 /* Whether to inline the compute dispatch size in user sgprs. */ 862 bool load_grid_size_from_user_sgpr; 863 864 /* Whether the driver uses a global BO list. */ 865 bool use_global_bo_list; 866 867 /* Whether attachment VRS is enabled. */ 868 bool attachment_vrs_enabled; 869 870 /* Whether shader image 32-bit float atomics are enabled. */ 871 bool image_float32_atomics; 872 873 /* Whether 2D views of 3D image is enabled. */ 874 bool image_2d_view_of_3d; 875 876 /* Whether primitives generated query features are enabled. */ 877 bool primitives_generated_query; 878 879 /* Whether anisotropy is forced with RADV_TEX_ANISO (-1 is disabled). */ 880 int force_aniso; 881 882 struct radv_device_border_color_data border_color_data; 883 884 /* Thread trace. */ 885 struct ac_thread_trace_data thread_trace; 886 887 /* SPM. */ 888 struct ac_spm_trace_data spm_trace; 889 890 /* Trap handler. */ 891 struct radv_trap_handler_shader *trap_handler_shader; 892 struct radeon_winsys_bo *tma_bo; /* Trap Memory Address */ 893 uint32_t *tma_ptr; 894 895 /* Overallocation. */ 896 bool overallocation_disallowed; 897 uint64_t allocated_memory_size[VK_MAX_MEMORY_HEAPS]; 898 mtx_t overallocation_mutex; 899 900 /* RADV_FORCE_VRS. */ 901 struct radv_notifier notifier; 902 enum radv_force_vrs force_vrs; 903 904 /* Depth image for VRS when not bound by the app. */ 905 struct { 906 struct radv_image *image; 907 struct radv_buffer *buffer; /* HTILE */ 908 struct radv_device_memory *mem; 909 } vrs; 910 911 struct u_rwlock vs_prologs_lock; 912 struct hash_table *vs_prologs; 913 914 /* Prime blit sdma queue */ 915 struct radv_queue *private_sdma_queue; 916 917 struct radv_shader_part *simple_vs_prologs[MAX_VERTEX_ATTRIBS]; 918 struct radv_shader_part *instance_rate_vs_prologs[816]; 919 920 simple_mtx_t trace_mtx; 921 922 /* Whether per-vertex VRS is forced. */ 923 bool force_vrs_enabled; 924 925 /* Whether shaders created through application entrypoints are considered internal. */ 926 bool app_shaders_internal; 927 928 simple_mtx_t pstate_mtx; 929 unsigned pstate_cnt; 930 931 /* BO to contain some performance counter helpers: 932 * - A lock for profiling cmdbuffers. 933 * - a temporary fence for the end query synchronization. 934 * - the pass to use for profiling. (as an array of bools) 935 */ 936 struct radeon_winsys_bo *perf_counter_bo; 937 938 /* Interleaved lock/unlock commandbuffers for perfcounter passes. */ 939 struct radeon_cmdbuf **perf_counter_lock_cs; 940 941 bool uses_device_generated_commands; 942}; 943 944bool radv_device_acquire_performance_counters(struct radv_device *device); 945void radv_device_release_performance_counters(struct radv_device *device); 946 947struct radv_device_memory { 948 struct vk_object_base base; 949 struct radeon_winsys_bo *bo; 950 /* for dedicated allocations */ 951 struct radv_image *image; 952 struct radv_buffer *buffer; 953 uint32_t heap_index; 954 uint64_t alloc_size; 955 void *map; 956 void *user_ptr; 957 958#if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER 959 struct AHardwareBuffer *android_hardware_buffer; 960#endif 961}; 962 963void radv_device_memory_init(struct radv_device_memory *mem, struct radv_device *device, 964 struct radeon_winsys_bo *bo); 965void radv_device_memory_finish(struct radv_device_memory *mem); 966 967struct radv_descriptor_range { 968 uint64_t va; 969 uint32_t size; 970}; 971 972struct radv_descriptor_set_header { 973 struct vk_object_base base; 974 struct radv_descriptor_set_layout *layout; 975 uint32_t size; 976 uint32_t buffer_count; 977 978 struct radeon_winsys_bo *bo; 979 uint64_t va; 980 uint32_t *mapped_ptr; 981 struct radv_descriptor_range *dynamic_descriptors; 982}; 983 984struct radv_descriptor_set { 985 struct radv_descriptor_set_header header; 986 987 struct radeon_winsys_bo *descriptors[]; 988}; 989 990struct radv_push_descriptor_set { 991 struct radv_descriptor_set_header set; 992 uint32_t capacity; 993}; 994 995struct radv_descriptor_pool_entry { 996 uint32_t offset; 997 uint32_t size; 998 struct radv_descriptor_set *set; 999}; 1000 1001struct radv_descriptor_pool { 1002 struct vk_object_base base; 1003 struct radeon_winsys_bo *bo; 1004 uint8_t *host_bo; 1005 uint8_t *mapped_ptr; 1006 uint64_t current_offset; 1007 uint64_t size; 1008 1009 uint8_t *host_memory_base; 1010 uint8_t *host_memory_ptr; 1011 uint8_t *host_memory_end; 1012 1013 uint32_t entry_count; 1014 uint32_t max_entry_count; 1015 struct radv_descriptor_pool_entry entries[0]; 1016}; 1017 1018struct radv_descriptor_update_template_entry { 1019 VkDescriptorType descriptor_type; 1020 1021 /* The number of descriptors to update */ 1022 uint32_t descriptor_count; 1023 1024 /* Into mapped_ptr or dynamic_descriptors, in units of the respective array */ 1025 uint32_t dst_offset; 1026 1027 /* In dwords. Not valid/used for dynamic descriptors */ 1028 uint32_t dst_stride; 1029 1030 uint32_t buffer_offset; 1031 1032 /* Only valid for combined image samplers and samplers */ 1033 uint8_t has_sampler; 1034 uint8_t sampler_offset; 1035 1036 /* In bytes */ 1037 size_t src_offset; 1038 size_t src_stride; 1039 1040 /* For push descriptors */ 1041 const uint32_t *immutable_samplers; 1042}; 1043 1044struct radv_descriptor_update_template { 1045 struct vk_object_base base; 1046 uint32_t entry_count; 1047 VkPipelineBindPoint bind_point; 1048 struct radv_descriptor_update_template_entry entry[0]; 1049}; 1050 1051struct radv_buffer { 1052 struct vk_buffer vk; 1053 1054 /* Set when bound */ 1055 struct radeon_winsys_bo *bo; 1056 VkDeviceSize offset; 1057}; 1058 1059void radv_buffer_init(struct radv_buffer *buffer, struct radv_device *device, 1060 struct radeon_winsys_bo *bo, uint64_t size, uint64_t offset); 1061void radv_buffer_finish(struct radv_buffer *buffer); 1062 1063enum radv_dynamic_state_bits { 1064 RADV_DYNAMIC_VIEWPORT = 1ull << 0, 1065 RADV_DYNAMIC_SCISSOR = 1ull << 1, 1066 RADV_DYNAMIC_LINE_WIDTH = 1ull << 2, 1067 RADV_DYNAMIC_DEPTH_BIAS = 1ull << 3, 1068 RADV_DYNAMIC_BLEND_CONSTANTS = 1ull << 4, 1069 RADV_DYNAMIC_DEPTH_BOUNDS = 1ull << 5, 1070 RADV_DYNAMIC_STENCIL_COMPARE_MASK = 1ull << 6, 1071 RADV_DYNAMIC_STENCIL_WRITE_MASK = 1ull << 7, 1072 RADV_DYNAMIC_STENCIL_REFERENCE = 1ull << 8, 1073 RADV_DYNAMIC_DISCARD_RECTANGLE = 1ull << 9, 1074 RADV_DYNAMIC_SAMPLE_LOCATIONS = 1ull << 10, 1075 RADV_DYNAMIC_LINE_STIPPLE = 1ull << 11, 1076 RADV_DYNAMIC_CULL_MODE = 1ull << 12, 1077 RADV_DYNAMIC_FRONT_FACE = 1ull << 13, 1078 RADV_DYNAMIC_PRIMITIVE_TOPOLOGY = 1ull << 14, 1079 RADV_DYNAMIC_DEPTH_TEST_ENABLE = 1ull << 15, 1080 RADV_DYNAMIC_DEPTH_WRITE_ENABLE = 1ull << 16, 1081 RADV_DYNAMIC_DEPTH_COMPARE_OP = 1ull << 17, 1082 RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE = 1ull << 18, 1083 RADV_DYNAMIC_STENCIL_TEST_ENABLE = 1ull << 19, 1084 RADV_DYNAMIC_STENCIL_OP = 1ull << 20, 1085 RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE = 1ull << 21, 1086 RADV_DYNAMIC_FRAGMENT_SHADING_RATE = 1ull << 22, 1087 RADV_DYNAMIC_PATCH_CONTROL_POINTS = 1ull << 23, 1088 RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE = 1ull << 24, 1089 RADV_DYNAMIC_DEPTH_BIAS_ENABLE = 1ull << 25, 1090 RADV_DYNAMIC_LOGIC_OP = 1ull << 26, 1091 RADV_DYNAMIC_PRIMITIVE_RESTART_ENABLE = 1ull << 27, 1092 RADV_DYNAMIC_COLOR_WRITE_ENABLE = 1ull << 28, 1093 RADV_DYNAMIC_VERTEX_INPUT = 1ull << 29, 1094 RADV_DYNAMIC_ALL = (1ull << 30) - 1, 1095}; 1096 1097enum radv_cmd_dirty_bits { 1098 /* Keep the dynamic state dirty bits in sync with 1099 * enum radv_dynamic_state_bits */ 1100 RADV_CMD_DIRTY_DYNAMIC_VIEWPORT = 1ull << 0, 1101 RADV_CMD_DIRTY_DYNAMIC_SCISSOR = 1ull << 1, 1102 RADV_CMD_DIRTY_DYNAMIC_LINE_WIDTH = 1ull << 2, 1103 RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS = 1ull << 3, 1104 RADV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS = 1ull << 4, 1105 RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS = 1ull << 5, 1106 RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK = 1ull << 6, 1107 RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK = 1ull << 7, 1108 RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE = 1ull << 8, 1109 RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE = 1ull << 9, 1110 RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS = 1ull << 10, 1111 RADV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE = 1ull << 11, 1112 RADV_CMD_DIRTY_DYNAMIC_CULL_MODE = 1ull << 12, 1113 RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE = 1ull << 13, 1114 RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY = 1ull << 14, 1115 RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE = 1ull << 15, 1116 RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE = 1ull << 16, 1117 RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP = 1ull << 17, 1118 RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE = 1ull << 18, 1119 RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE = 1ull << 19, 1120 RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP = 1ull << 20, 1121 RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE = 1ull << 21, 1122 RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE = 1ull << 22, 1123 RADV_CMD_DIRTY_DYNAMIC_PATCH_CONTROL_POINTS = 1ull << 23, 1124 RADV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE = 1ull << 24, 1125 RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS_ENABLE = 1ull << 25, 1126 RADV_CMD_DIRTY_DYNAMIC_LOGIC_OP = 1ull << 26, 1127 RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_RESTART_ENABLE = 1ull << 27, 1128 RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_ENABLE = 1ull << 28, 1129 RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT = 1ull << 29, 1130 RADV_CMD_DIRTY_DYNAMIC_ALL = (1ull << 30) - 1, 1131 RADV_CMD_DIRTY_PIPELINE = 1ull << 30, 1132 RADV_CMD_DIRTY_INDEX_BUFFER = 1ull << 31, 1133 RADV_CMD_DIRTY_FRAMEBUFFER = 1ull << 32, 1134 RADV_CMD_DIRTY_VERTEX_BUFFER = 1ull << 33, 1135 RADV_CMD_DIRTY_STREAMOUT_BUFFER = 1ull << 34, 1136}; 1137 1138enum radv_cmd_flush_bits { 1139 /* Instruction cache. */ 1140 RADV_CMD_FLAG_INV_ICACHE = 1 << 0, 1141 /* Scalar L1 cache. */ 1142 RADV_CMD_FLAG_INV_SCACHE = 1 << 1, 1143 /* Vector L1 cache. */ 1144 RADV_CMD_FLAG_INV_VCACHE = 1 << 2, 1145 /* L2 cache + L2 metadata cache writeback & invalidate. 1146 * GFX6-8: Used by shaders only. GFX9-10: Used by everything. */ 1147 RADV_CMD_FLAG_INV_L2 = 1 << 3, 1148 /* L2 writeback (write dirty L2 lines to memory for non-L2 clients). 1149 * Only used for coherency with non-L2 clients like CB, DB, CP on GFX6-8. 1150 * GFX6-7 will do complete invalidation, because the writeback is unsupported. */ 1151 RADV_CMD_FLAG_WB_L2 = 1 << 4, 1152 /* Invalidate the metadata cache. To be used when the DCC/HTILE metadata 1153 * changed and we want to read an image from shaders. */ 1154 RADV_CMD_FLAG_INV_L2_METADATA = 1 << 5, 1155 /* Framebuffer caches */ 1156 RADV_CMD_FLAG_FLUSH_AND_INV_CB_META = 1 << 6, 1157 RADV_CMD_FLAG_FLUSH_AND_INV_DB_META = 1 << 7, 1158 RADV_CMD_FLAG_FLUSH_AND_INV_DB = 1 << 8, 1159 RADV_CMD_FLAG_FLUSH_AND_INV_CB = 1 << 9, 1160 /* Engine synchronization. */ 1161 RADV_CMD_FLAG_VS_PARTIAL_FLUSH = 1 << 10, 1162 RADV_CMD_FLAG_PS_PARTIAL_FLUSH = 1 << 11, 1163 RADV_CMD_FLAG_CS_PARTIAL_FLUSH = 1 << 12, 1164 RADV_CMD_FLAG_VGT_FLUSH = 1 << 13, 1165 /* Pipeline query controls. */ 1166 RADV_CMD_FLAG_START_PIPELINE_STATS = 1 << 14, 1167 RADV_CMD_FLAG_STOP_PIPELINE_STATS = 1 << 15, 1168 RADV_CMD_FLAG_VGT_STREAMOUT_SYNC = 1 << 16, 1169 1170 RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER = 1171 (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META | 1172 RADV_CMD_FLAG_FLUSH_AND_INV_DB | RADV_CMD_FLAG_FLUSH_AND_INV_DB_META), 1173 1174 RADV_CMD_FLUSH_ALL_COMPUTE = 1175 (RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE | RADV_CMD_FLAG_INV_VCACHE | 1176 RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_WB_L2 | RADV_CMD_FLAG_CS_PARTIAL_FLUSH), 1177}; 1178 1179enum radv_nggc_settings { 1180 radv_nggc_none = 0, 1181 radv_nggc_front_face = 1 << 0, 1182 radv_nggc_back_face = 1 << 1, 1183 radv_nggc_face_is_ccw = 1 << 2, 1184 radv_nggc_small_primitives = 1 << 3, 1185}; 1186 1187struct radv_vertex_binding { 1188 VkDeviceSize offset; 1189 VkDeviceSize size; 1190 VkDeviceSize stride; 1191}; 1192 1193struct radv_streamout_binding { 1194 struct radv_buffer *buffer; 1195 VkDeviceSize offset; 1196 VkDeviceSize size; 1197}; 1198 1199struct radv_streamout_state { 1200 /* Mask of bound streamout buffers. */ 1201 uint8_t enabled_mask; 1202 1203 /* State of VGT_STRMOUT_BUFFER_(CONFIG|END) */ 1204 uint32_t hw_enabled_mask; 1205 1206 /* State of VGT_STRMOUT_(CONFIG|EN) */ 1207 bool streamout_enabled; 1208}; 1209 1210struct radv_viewport_state { 1211 uint32_t count; 1212 VkViewport viewports[MAX_VIEWPORTS]; 1213 struct { 1214 float scale[3]; 1215 float translate[3]; 1216 } xform[MAX_VIEWPORTS]; 1217}; 1218 1219struct radv_scissor_state { 1220 uint32_t count; 1221 VkRect2D scissors[MAX_SCISSORS]; 1222}; 1223 1224struct radv_discard_rectangle_state { 1225 uint32_t count; 1226 VkRect2D rectangles[MAX_DISCARD_RECTANGLES]; 1227}; 1228 1229struct radv_sample_locations_state { 1230 VkSampleCountFlagBits per_pixel; 1231 VkExtent2D grid_size; 1232 uint32_t count; 1233 VkSampleLocationEXT locations[MAX_SAMPLE_LOCATIONS]; 1234}; 1235 1236struct radv_dynamic_state { 1237 /** 1238 * Bitmask of (1ull << VK_DYNAMIC_STATE_*). 1239 * Defines the set of saved dynamic state. 1240 */ 1241 uint64_t mask; 1242 1243 struct radv_viewport_state viewport; 1244 1245 struct radv_scissor_state scissor; 1246 1247 float line_width; 1248 1249 struct { 1250 float bias; 1251 float clamp; 1252 float slope; 1253 } depth_bias; 1254 1255 float blend_constants[4]; 1256 1257 struct { 1258 float min; 1259 float max; 1260 } depth_bounds; 1261 1262 struct { 1263 uint32_t front; 1264 uint32_t back; 1265 } stencil_compare_mask; 1266 1267 struct { 1268 uint32_t front; 1269 uint32_t back; 1270 } stencil_write_mask; 1271 1272 struct { 1273 struct { 1274 VkStencilOp fail_op; 1275 VkStencilOp pass_op; 1276 VkStencilOp depth_fail_op; 1277 VkCompareOp compare_op; 1278 } front; 1279 1280 struct { 1281 VkStencilOp fail_op; 1282 VkStencilOp pass_op; 1283 VkStencilOp depth_fail_op; 1284 VkCompareOp compare_op; 1285 } back; 1286 } stencil_op; 1287 1288 struct { 1289 uint32_t front; 1290 uint32_t back; 1291 } stencil_reference; 1292 1293 struct radv_discard_rectangle_state discard_rectangle; 1294 1295 struct radv_sample_locations_state sample_location; 1296 1297 struct { 1298 uint32_t factor; 1299 uint16_t pattern; 1300 } line_stipple; 1301 1302 VkCullModeFlags cull_mode; 1303 VkFrontFace front_face; 1304 unsigned primitive_topology; 1305 1306 bool depth_test_enable; 1307 bool depth_write_enable; 1308 VkCompareOp depth_compare_op; 1309 bool depth_bounds_test_enable; 1310 bool stencil_test_enable; 1311 1312 struct { 1313 VkExtent2D size; 1314 VkFragmentShadingRateCombinerOpKHR combiner_ops[2]; 1315 } fragment_shading_rate; 1316 1317 bool depth_bias_enable; 1318 bool primitive_restart_enable; 1319 bool rasterizer_discard_enable; 1320 1321 unsigned logic_op; 1322 1323 uint32_t color_write_enable; 1324}; 1325 1326extern const struct radv_dynamic_state default_dynamic_state; 1327 1328const char *radv_get_debug_option_name(int id); 1329 1330const char *radv_get_perftest_option_name(int id); 1331 1332int radv_get_int_debug_option(const char *name, int default_value); 1333 1334struct radv_color_buffer_info { 1335 uint64_t cb_color_base; 1336 uint64_t cb_color_cmask; 1337 uint64_t cb_color_fmask; 1338 uint64_t cb_dcc_base; 1339 uint32_t cb_color_slice; 1340 uint32_t cb_color_view; 1341 uint32_t cb_color_info; 1342 uint32_t cb_color_attrib; 1343 uint32_t cb_color_attrib2; /* GFX9 and later */ 1344 uint32_t cb_color_attrib3; /* GFX10 and later */ 1345 uint32_t cb_dcc_control; 1346 uint32_t cb_color_cmask_slice; 1347 uint32_t cb_color_fmask_slice; 1348 union { 1349 uint32_t cb_color_pitch; // GFX6-GFX8 1350 uint32_t cb_mrt_epitch; // GFX9+ 1351 }; 1352}; 1353 1354struct radv_ds_buffer_info { 1355 uint64_t db_z_read_base; 1356 uint64_t db_stencil_read_base; 1357 uint64_t db_z_write_base; 1358 uint64_t db_stencil_write_base; 1359 uint64_t db_htile_data_base; 1360 uint32_t db_depth_info; 1361 uint32_t db_z_info; 1362 uint32_t db_stencil_info; 1363 uint32_t db_depth_view; 1364 uint32_t db_depth_size; 1365 uint32_t db_depth_slice; 1366 uint32_t db_htile_surface; 1367 uint32_t pa_su_poly_offset_db_fmt_cntl; 1368 uint32_t db_z_info2; /* GFX9 only */ 1369 uint32_t db_stencil_info2; /* GFX9 only */ 1370}; 1371 1372void radv_initialise_color_surface(struct radv_device *device, struct radv_color_buffer_info *cb, 1373 struct radv_image_view *iview); 1374void radv_initialise_ds_surface(struct radv_device *device, struct radv_ds_buffer_info *ds, 1375 struct radv_image_view *iview); 1376void radv_initialise_vrs_surface(struct radv_image *image, struct radv_buffer *htile_buffer, 1377 struct radv_ds_buffer_info *ds); 1378 1379/** 1380 * Attachment state when recording a renderpass instance. 1381 * 1382 * The clear value is valid only if there exists a pending clear. 1383 */ 1384struct radv_attachment_state { 1385 VkImageAspectFlags pending_clear_aspects; 1386 uint32_t cleared_views; 1387 VkClearValue clear_value; 1388 VkImageLayout current_layout; 1389 VkImageLayout current_stencil_layout; 1390 bool current_in_render_loop; 1391 struct radv_sample_locations_state sample_location; 1392 1393 union { 1394 struct radv_color_buffer_info cb; 1395 struct radv_ds_buffer_info ds; 1396 }; 1397 struct radv_image_view *iview; 1398}; 1399 1400struct radv_descriptor_state { 1401 struct radv_descriptor_set *sets[MAX_SETS]; 1402 uint32_t dirty; 1403 uint32_t valid; 1404 struct radv_push_descriptor_set push_set; 1405 bool push_dirty; 1406 uint32_t dynamic_buffers[4 * MAX_DYNAMIC_BUFFERS]; 1407}; 1408 1409struct radv_subpass_sample_locs_state { 1410 uint32_t subpass_idx; 1411 struct radv_sample_locations_state sample_location; 1412}; 1413 1414enum rgp_flush_bits { 1415 RGP_FLUSH_WAIT_ON_EOP_TS = 0x1, 1416 RGP_FLUSH_VS_PARTIAL_FLUSH = 0x2, 1417 RGP_FLUSH_PS_PARTIAL_FLUSH = 0x4, 1418 RGP_FLUSH_CS_PARTIAL_FLUSH = 0x8, 1419 RGP_FLUSH_PFP_SYNC_ME = 0x10, 1420 RGP_FLUSH_SYNC_CP_DMA = 0x20, 1421 RGP_FLUSH_INVAL_VMEM_L0 = 0x40, 1422 RGP_FLUSH_INVAL_ICACHE = 0x80, 1423 RGP_FLUSH_INVAL_SMEM_L0 = 0x100, 1424 RGP_FLUSH_FLUSH_L2 = 0x200, 1425 RGP_FLUSH_INVAL_L2 = 0x400, 1426 RGP_FLUSH_FLUSH_CB = 0x800, 1427 RGP_FLUSH_INVAL_CB = 0x1000, 1428 RGP_FLUSH_FLUSH_DB = 0x2000, 1429 RGP_FLUSH_INVAL_DB = 0x4000, 1430 RGP_FLUSH_INVAL_L1 = 0x8000, 1431}; 1432 1433struct radv_cmd_state { 1434 /* Vertex descriptors */ 1435 uint64_t vb_va; 1436 1437 bool predicating; 1438 uint64_t dirty; 1439 1440 uint32_t prefetch_L2_mask; 1441 1442 struct radv_graphics_pipeline *graphics_pipeline; 1443 struct radv_graphics_pipeline *emitted_graphics_pipeline; 1444 struct radv_compute_pipeline *compute_pipeline; 1445 struct radv_compute_pipeline *emitted_compute_pipeline; 1446 struct radv_compute_pipeline *rt_pipeline; /* emitted = emitted_compute_pipeline */ 1447 struct vk_framebuffer *framebuffer; 1448 struct radv_render_pass *pass; 1449 const struct radv_subpass *subpass; 1450 struct radv_dynamic_state dynamic; 1451 struct radv_vs_input_state dynamic_vs_input; 1452 struct radv_attachment_state *attachments; 1453 struct radv_streamout_state streamout; 1454 VkRect2D render_area; 1455 1456 uint32_t num_subpass_sample_locs; 1457 struct radv_subpass_sample_locs_state *subpass_sample_locs; 1458 1459 /* Index buffer */ 1460 struct radv_buffer *index_buffer; 1461 uint64_t index_offset; 1462 uint32_t index_type; 1463 uint32_t max_index_count; 1464 uint64_t index_va; 1465 int32_t last_index_type; 1466 1467 int32_t last_primitive_reset_en; 1468 uint32_t last_primitive_reset_index; 1469 enum radv_cmd_flush_bits flush_bits; 1470 unsigned active_occlusion_queries; 1471 bool perfect_occlusion_queries_enabled; 1472 unsigned active_pipeline_queries; 1473 unsigned active_pipeline_gds_queries; 1474 bool prims_gen_query_enabled; 1475 uint32_t trace_id; 1476 uint32_t last_ia_multi_vgt_param; 1477 1478 uint32_t last_num_instances; 1479 uint32_t last_first_instance; 1480 uint32_t last_vertex_offset; 1481 uint32_t last_drawid; 1482 uint32_t last_subpass_color_count; 1483 1484 uint32_t last_sx_ps_downconvert; 1485 uint32_t last_sx_blend_opt_epsilon; 1486 uint32_t last_sx_blend_opt_control; 1487 1488 /* Whether CP DMA is busy/idle. */ 1489 bool dma_is_busy; 1490 1491 /* Whether any images that are not L2 coherent are dirty from the CB. */ 1492 bool rb_noncoherent_dirty; 1493 1494 /* Conditional rendering info. */ 1495 uint8_t predication_op; /* 32-bit or 64-bit predicate value */ 1496 int predication_type; /* -1: disabled, 0: normal, 1: inverted */ 1497 uint64_t predication_va; 1498 1499 /* Inheritance info. */ 1500 VkQueryPipelineStatisticFlags inherited_pipeline_statistics; 1501 1502 bool context_roll_without_scissor_emitted; 1503 1504 /* SQTT related state. */ 1505 uint32_t current_event_type; 1506 uint32_t num_events; 1507 uint32_t num_layout_transitions; 1508 bool pending_sqtt_barrier_end; 1509 enum rgp_flush_bits sqtt_flush_bits; 1510 1511 /* NGG culling state. */ 1512 uint32_t last_nggc_settings; 1513 int8_t last_nggc_settings_sgpr_idx; 1514 bool last_nggc_skip; 1515 1516 /* Mesh shading state. */ 1517 bool mesh_shading; 1518 1519 uint8_t cb_mip[MAX_RTS]; 1520 1521 /* Whether DRAW_{INDEX}_INDIRECT_MULTI is emitted. */ 1522 bool uses_draw_indirect_multi; 1523 1524 uint32_t rt_stack_size; 1525 1526 struct radv_shader_part *emitted_vs_prolog; 1527 uint32_t *emitted_vs_prolog_key; 1528 uint32_t emitted_vs_prolog_key_hash; 1529 uint32_t vbo_misaligned_mask; 1530 uint32_t vbo_misaligned_mask_invalid; 1531 uint32_t vbo_bound_mask; 1532 1533 /* Whether the cmdbuffer owns the current render pass rather than the app. */ 1534 bool own_render_pass; 1535 1536 /* Per-vertex VRS state. */ 1537 uint32_t last_vrs_rates; 1538 int8_t last_vrs_rates_sgpr_idx; 1539 1540 /* Whether to suspend streamout for internal driver operations. */ 1541 bool suspend_streamout; 1542 1543 /* Whether this commandbuffer uses performance counters. */ 1544 bool uses_perf_counters; 1545}; 1546 1547struct radv_cmd_pool { 1548 struct vk_command_pool vk; 1549 struct list_head cmd_buffers; 1550 struct list_head free_cmd_buffers; 1551}; 1552 1553struct radv_cmd_buffer_upload { 1554 uint8_t *map; 1555 unsigned offset; 1556 uint64_t size; 1557 struct radeon_winsys_bo *upload_bo; 1558 struct list_head list; 1559}; 1560 1561enum radv_cmd_buffer_status { 1562 RADV_CMD_BUFFER_STATUS_INVALID, 1563 RADV_CMD_BUFFER_STATUS_INITIAL, 1564 RADV_CMD_BUFFER_STATUS_RECORDING, 1565 RADV_CMD_BUFFER_STATUS_EXECUTABLE, 1566 RADV_CMD_BUFFER_STATUS_PENDING, 1567}; 1568 1569struct dynamic_vertex_format_cache { 1570 VkFormat format; 1571 uint8_t hw_fmt; 1572 uint8_t fmt_align_req_minus_1; 1573 uint8_t fmt_size; 1574 bool post_shuffle; 1575 bool alpha_adjust_lo; 1576 bool alpha_adjust_hi; 1577}; 1578 1579struct radv_cmd_buffer { 1580 struct vk_command_buffer vk; 1581 1582 struct radv_device *device; 1583 1584 struct radv_cmd_pool *pool; 1585 struct list_head pool_link; 1586 1587 struct util_dynarray cached_vertex_formats; 1588 VkCommandBufferUsageFlags usage_flags; 1589 enum radv_cmd_buffer_status status; 1590 struct radeon_cmdbuf *cs; 1591 struct radv_cmd_state state; 1592 struct radv_buffer *vertex_binding_buffers[MAX_VBS]; 1593 struct radv_vertex_binding vertex_bindings[MAX_VBS]; 1594 uint32_t used_vertex_bindings; 1595 struct radv_streamout_binding streamout_bindings[MAX_SO_BUFFERS]; 1596 enum radv_queue_family qf; 1597 1598 uint8_t push_constants[MAX_PUSH_CONSTANTS_SIZE]; 1599 VkShaderStageFlags push_constant_stages; 1600 struct radv_descriptor_set_header meta_push_descriptors; 1601 1602 struct radv_descriptor_state descriptors[MAX_BIND_POINTS]; 1603 1604 struct radv_cmd_buffer_upload upload; 1605 1606 uint32_t scratch_size_per_wave_needed; 1607 uint32_t scratch_waves_wanted; 1608 uint32_t compute_scratch_size_per_wave_needed; 1609 uint32_t compute_scratch_waves_wanted; 1610 uint32_t esgs_ring_size_needed; 1611 uint32_t gsvs_ring_size_needed; 1612 bool tess_rings_needed; 1613 bool task_rings_needed; 1614 bool mesh_scratch_ring_needed; 1615 bool gds_needed; /* for GFX10 streamout and NGG GS queries */ 1616 bool gds_oa_needed; /* for GFX10 streamout */ 1617 bool sample_positions_needed; 1618 1619 VkResult record_result; 1620 1621 uint64_t gfx9_fence_va; 1622 uint32_t gfx9_fence_idx; 1623 uint64_t gfx9_eop_bug_va; 1624 1625 uint64_t mec_inv_pred_va; /* For inverted predication when using MEC. */ 1626 bool mec_inv_pred_emitted; /* To ensure we don't have to repeat inverting the VA. */ 1627 1628 struct { 1629 /** 1630 * Internal command stream that is used when some graphics work 1631 * also requires a submission to the compute queue. 1632 */ 1633 struct radeon_cmdbuf *cs; 1634 1635 /** Flush bits for the internal cmdbuf. */ 1636 enum radv_cmd_flush_bits flush_bits; 1637 1638 /** 1639 * For synchronization between the ACE and GFX cmdbuf. 1640 * The value of this semaphore is incremented whenever we 1641 * encounter a barrier that affects ACE. At sync points, 1642 * GFX writes the value to its address, and ACE waits until 1643 * it detects that the value has been written. 1644 */ 1645 struct { 1646 uint64_t va; /* Virtual address of the semaphore. */ 1647 uint32_t gfx2ace_value; /* Current value on GFX. */ 1648 uint32_t emitted_gfx2ace_value; /* Emitted value on GFX. */ 1649 } sem; 1650 } ace_internal; 1651 1652 /** 1653 * Whether a query pool has been resetted and we have to flush caches. 1654 */ 1655 bool pending_reset_query; 1656 1657 /** 1658 * Bitmask of pending active query flushes. 1659 */ 1660 enum radv_cmd_flush_bits active_query_flush_bits; 1661}; 1662 1663struct radv_image; 1664struct radv_image_view; 1665 1666bool radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer); 1667 1668bool radv_is_streamout_enabled(struct radv_cmd_buffer *cmd_buffer); 1669void radv_emit_streamout_enable(struct radv_cmd_buffer *cmd_buffer); 1670 1671void si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs); 1672void si_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs); 1673 1674void cik_create_gfx_config(struct radv_device *device); 1675 1676void si_write_scissors(struct radeon_cmdbuf *cs, int first, int count, const VkRect2D *scissors, 1677 const VkViewport *viewports, unsigned rast_prim, float line_width); 1678 1679uint32_t si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_draw, 1680 bool indirect_draw, bool count_from_stream_output, 1681 uint32_t draw_vertex_count, unsigned topology, 1682 bool prim_restart_enable); 1683void si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, bool is_mec, 1684 unsigned event, unsigned event_flags, unsigned dst_sel, 1685 unsigned data_sel, uint64_t va, uint32_t new_fence, 1686 uint64_t gfx9_eop_bug_va); 1687 1688void radv_cp_wait_mem(struct radeon_cmdbuf *cs, uint32_t op, uint64_t va, uint32_t ref, 1689 uint32_t mask); 1690void si_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, 1691 uint32_t *fence_ptr, uint64_t va, bool is_mec, 1692 enum radv_cmd_flush_bits flush_bits, 1693 enum rgp_flush_bits *sqtt_flush_bits, uint64_t gfx9_eop_bug_va); 1694void si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer); 1695void si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, bool draw_visible, 1696 unsigned pred_op, uint64_t va); 1697void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uint64_t dest_va, 1698 uint64_t size); 1699void si_cs_cp_dma_prefetch(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va, 1700 unsigned size, bool predicating); 1701void si_cp_dma_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va, unsigned size); 1702void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t size, 1703 unsigned value); 1704void si_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer); 1705 1706void radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer, bool enable_occlusion_queries); 1707uint32_t radv_get_pa_su_sc_mode_cntl(const struct radv_cmd_buffer *cmd_buffer); 1708uint32_t radv_get_vgt_index_size(uint32_t type); 1709 1710unsigned radv_instance_rate_prolog_index(unsigned num_attributes, uint32_t instance_rate_inputs); 1711uint32_t radv_hash_vs_prolog(const void *key_); 1712bool radv_cmp_vs_prolog(const void *a_, const void *b_); 1713 1714bool radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, unsigned size, 1715 unsigned *out_offset, void **ptr); 1716void radv_cmd_buffer_set_subpass(struct radv_cmd_buffer *cmd_buffer, 1717 const struct radv_subpass *subpass); 1718void radv_cmd_buffer_restore_subpass(struct radv_cmd_buffer *cmd_buffer, 1719 const struct radv_subpass *subpass); 1720bool radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer, unsigned size, 1721 const void *data, unsigned *out_offset); 1722void radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer, 1723 const struct radv_graphics_pipeline *pipeline, 1724 bool full_null_descriptors, void *vb_ptr); 1725void radv_write_scissors(struct radv_cmd_buffer *cmd_buffer, struct radeon_cmdbuf *cs); 1726 1727void radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer *cmd_buffer); 1728void radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer); 1729void radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer); 1730void radv_depth_stencil_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer, 1731 VkImageAspectFlags aspects, 1732 VkResolveModeFlagBits resolve_mode); 1733void radv_cmd_buffer_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer); 1734void radv_depth_stencil_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer, 1735 VkImageAspectFlags aspects, 1736 VkResolveModeFlagBits resolve_mode); 1737void radv_emit_default_sample_locations(struct radeon_cmdbuf *cs, int nr_samples); 1738unsigned radv_get_default_max_sample_dist(int log_samples); 1739void radv_device_init_msaa(struct radv_device *device); 1740VkResult radv_device_init_vrs_state(struct radv_device *device); 1741 1742void radv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, 1743 const struct radv_image_view *iview, 1744 VkClearDepthStencilValue ds_clear_value, 1745 VkImageAspectFlags aspects); 1746 1747void radv_update_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, 1748 const struct radv_image_view *iview, int cb_idx, 1749 uint32_t color_values[2]); 1750 1751bool radv_image_use_dcc_image_stores(const struct radv_device *device, 1752 const struct radv_image *image); 1753bool radv_image_use_dcc_predication(const struct radv_device *device, 1754 const struct radv_image *image); 1755 1756void radv_update_fce_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, 1757 const VkImageSubresourceRange *range, bool value); 1758 1759void radv_update_dcc_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, 1760 const VkImageSubresourceRange *range, bool value); 1761enum radv_cmd_flush_bits radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, 1762 VkAccessFlags2 src_flags, 1763 const struct radv_image *image); 1764enum radv_cmd_flush_bits radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, 1765 VkAccessFlags2 dst_flags, 1766 const struct radv_image *image); 1767uint32_t radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image, 1768 struct radeon_winsys_bo *bo, uint64_t va, uint64_t size, uint32_t value); 1769void radv_copy_buffer(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *src_bo, 1770 struct radeon_winsys_bo *dst_bo, uint64_t src_offset, uint64_t dst_offset, 1771 uint64_t size); 1772 1773void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer); 1774bool radv_get_memory_fd(struct radv_device *device, struct radv_device_memory *memory, int *pFD); 1775void radv_free_memory(struct radv_device *device, const VkAllocationCallbacks *pAllocator, 1776 struct radv_device_memory *mem); 1777 1778static inline void 1779radv_emit_shader_pointer_head(struct radeon_cmdbuf *cs, unsigned sh_offset, unsigned pointer_count, 1780 bool use_32bit_pointers) 1781{ 1782 radeon_emit(cs, PKT3(PKT3_SET_SH_REG, pointer_count * (use_32bit_pointers ? 1 : 2), 0)); 1783 radeon_emit(cs, (sh_offset - SI_SH_REG_OFFSET) >> 2); 1784} 1785 1786static inline void 1787radv_emit_shader_pointer_body(struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va, 1788 bool use_32bit_pointers) 1789{ 1790 radeon_emit(cs, va); 1791 1792 if (use_32bit_pointers) { 1793 assert(va == 0 || (va >> 32) == device->physical_device->rad_info.address32_hi); 1794 } else { 1795 radeon_emit(cs, va >> 32); 1796 } 1797} 1798 1799static inline void 1800radv_emit_shader_pointer(struct radv_device *device, struct radeon_cmdbuf *cs, uint32_t sh_offset, 1801 uint64_t va, bool global) 1802{ 1803 bool use_32bit_pointers = !global; 1804 1805 radv_emit_shader_pointer_head(cs, sh_offset, 1, use_32bit_pointers); 1806 radv_emit_shader_pointer_body(device, cs, va, use_32bit_pointers); 1807} 1808 1809static inline struct radv_descriptor_state * 1810radv_get_descriptors_state(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point) 1811{ 1812 switch (bind_point) { 1813 case VK_PIPELINE_BIND_POINT_GRAPHICS: 1814 case VK_PIPELINE_BIND_POINT_COMPUTE: 1815 return &cmd_buffer->descriptors[bind_point]; 1816 case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR: 1817 return &cmd_buffer->descriptors[2]; 1818 default: 1819 unreachable("Unhandled bind point"); 1820 } 1821} 1822 1823void 1824radv_get_viewport_xform(const VkViewport *viewport, float scale[3], float translate[3]); 1825 1826/* 1827 * Takes x,y,z as exact numbers of invocations, instead of blocks. 1828 * 1829 * Limitations: Can't call normal dispatch functions without binding or rebinding 1830 * the compute pipeline. 1831 */ 1832void radv_unaligned_dispatch(struct radv_cmd_buffer *cmd_buffer, uint32_t x, uint32_t y, 1833 uint32_t z); 1834 1835void radv_indirect_dispatch(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *bo, 1836 uint64_t va); 1837 1838struct radv_event { 1839 struct vk_object_base base; 1840 struct radeon_winsys_bo *bo; 1841 uint64_t *map; 1842}; 1843 1844#define RADV_HASH_SHADER_CS_WAVE32 (1 << 1) 1845#define RADV_HASH_SHADER_PS_WAVE32 (1 << 2) 1846#define RADV_HASH_SHADER_GE_WAVE32 (1 << 3) 1847#define RADV_HASH_SHADER_LLVM (1 << 4) 1848#define RADV_HASH_SHADER_KEEP_STATISTICS (1 << 8) 1849#define RADV_HASH_SHADER_USE_NGG_CULLING (1 << 13) 1850#define RADV_HASH_SHADER_ROBUST_BUFFER_ACCESS (1 << 14) 1851#define RADV_HASH_SHADER_ROBUST_BUFFER_ACCESS2 (1 << 15) 1852#define RADV_HASH_SHADER_EMULATE_RT (1 << 16) 1853#define RADV_HASH_SHADER_SPLIT_FMA (1 << 17) 1854#define RADV_HASH_SHADER_RT_WAVE64 (1 << 18) 1855 1856struct radv_pipeline_key; 1857 1858void radv_pipeline_stage_init(const VkPipelineShaderStageCreateInfo *sinfo, 1859 struct radv_pipeline_stage *out_stage, gl_shader_stage stage); 1860 1861void radv_hash_shaders(unsigned char *hash, const struct radv_pipeline_stage *stages, 1862 const struct radv_pipeline_layout *layout, 1863 const struct radv_pipeline_key *key, uint32_t flags); 1864 1865void radv_hash_rt_shaders(unsigned char *hash, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, 1866 uint32_t flags); 1867 1868uint32_t radv_get_hash_flags(const struct radv_device *device, bool stats); 1869 1870bool radv_rt_pipeline_has_dynamic_stack_size(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo); 1871 1872bool radv_enable_rt(const struct radv_physical_device *pdevice, bool rt_pipelines); 1873 1874bool radv_emulate_rt(const struct radv_physical_device *pdevice); 1875 1876enum { 1877 RADV_RT_STAGE_BITS = (VK_SHADER_STAGE_RAYGEN_BIT_KHR | VK_SHADER_STAGE_ANY_HIT_BIT_KHR | 1878 VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | VK_SHADER_STAGE_MISS_BIT_KHR | 1879 VK_SHADER_STAGE_INTERSECTION_BIT_KHR | VK_SHADER_STAGE_CALLABLE_BIT_KHR) 1880}; 1881 1882#define RADV_STAGE_MASK ((1 << MESA_VULKAN_SHADER_STAGES) - 1) 1883 1884#define radv_foreach_stage(stage, stage_bits) \ 1885 for (gl_shader_stage stage, __tmp = (gl_shader_stage)((stage_bits)&RADV_STAGE_MASK); \ 1886 stage = ffs(__tmp) - 1, __tmp; __tmp &= ~(1 << (stage))) 1887 1888extern const VkFormat radv_fs_key_format_exemplars[NUM_META_FS_KEYS]; 1889unsigned radv_format_meta_fs_key(struct radv_device *device, VkFormat format); 1890 1891struct radv_multisample_state { 1892 uint32_t db_eqaa; 1893 uint32_t pa_sc_mode_cntl_0; 1894 uint32_t pa_sc_mode_cntl_1; 1895 uint32_t pa_sc_aa_config; 1896 uint32_t pa_sc_aa_mask[2]; 1897 unsigned num_samples; 1898}; 1899 1900struct radv_vrs_state { 1901 uint32_t pa_cl_vrs_cntl; 1902}; 1903 1904struct radv_prim_vertex_count { 1905 uint8_t min; 1906 uint8_t incr; 1907}; 1908 1909struct radv_ia_multi_vgt_param_helpers { 1910 uint32_t base; 1911 bool partial_es_wave; 1912 uint8_t primgroup_size; 1913 bool ia_switch_on_eoi; 1914 bool partial_vs_wave; 1915}; 1916 1917struct radv_binning_state { 1918 uint32_t pa_sc_binner_cntl_0; 1919}; 1920 1921#define SI_GS_PER_ES 128 1922 1923enum radv_pipeline_type { 1924 RADV_PIPELINE_GRAPHICS, 1925 /* Compute pipeline (incl raytracing pipeline) */ 1926 RADV_PIPELINE_COMPUTE, 1927 /* Pipeline library. This can't actually run and merely is a partial pipeline. */ 1928 RADV_PIPELINE_LIBRARY 1929}; 1930 1931struct radv_pipeline_group_handle { 1932 uint32_t handles[2]; 1933}; 1934 1935struct radv_pipeline_shader_stack_size { 1936 uint32_t recursive_size; 1937 /* anyhit + intersection */ 1938 uint32_t non_recursive_size; 1939}; 1940 1941struct radv_pipeline_slab { 1942 uint32_t ref_count; 1943 1944 union radv_shader_arena_block *alloc; 1945}; 1946 1947void radv_pipeline_slab_destroy(struct radv_device *device, struct radv_pipeline_slab *slab); 1948 1949struct radv_vertex_input_info { 1950 uint32_t instance_rate_inputs; 1951 uint32_t instance_rate_divisors[MAX_VERTEX_ATTRIBS]; 1952 uint8_t vertex_attribute_formats[MAX_VERTEX_ATTRIBS]; 1953 uint32_t vertex_attribute_bindings[MAX_VERTEX_ATTRIBS]; 1954 uint32_t vertex_attribute_offsets[MAX_VERTEX_ATTRIBS]; 1955 uint32_t vertex_attribute_strides[MAX_VERTEX_ATTRIBS]; 1956 uint8_t vertex_binding_align[MAX_VBS]; 1957 enum radv_vs_input_alpha_adjust vertex_alpha_adjust[MAX_VERTEX_ATTRIBS]; 1958 uint32_t vertex_post_shuffle; 1959 uint32_t binding_stride[MAX_VBS]; 1960 uint8_t attrib_bindings[MAX_VERTEX_ATTRIBS]; 1961 uint32_t attrib_ends[MAX_VERTEX_ATTRIBS]; 1962 uint32_t attrib_index_offset[MAX_VERTEX_ATTRIBS]; 1963}; 1964 1965struct radv_input_assembly_info { 1966 uint8_t primitive_topology; /* VkPrimitiveTopology */ 1967 bool primitive_restart_enable; 1968}; 1969 1970struct radv_tessellation_info { 1971 uint8_t patch_control_points; 1972 VkTessellationDomainOrigin domain_origin; 1973}; 1974 1975struct radv_viewport_info { 1976 bool negative_one_to_one; 1977 uint8_t viewport_count; 1978 uint8_t scissor_count; 1979 VkRect2D scissors[MAX_SCISSORS]; 1980 VkViewport viewports[MAX_VIEWPORTS]; 1981}; 1982 1983struct radv_rasterization_info { 1984 bool discard_enable; 1985 VkFrontFace front_face; 1986 VkCullModeFlags cull_mode; 1987 uint8_t polygon_mode; /* VkPolygonMode */ 1988 bool depth_bias_enable; 1989 bool depth_clamp_enable; 1990 float line_width; 1991 float depth_bias_constant_factor; 1992 float depth_bias_clamp; 1993 float depth_bias_slope_factor; 1994 VkConservativeRasterizationModeEXT conservative_mode; 1995 bool provoking_vtx_last; 1996 bool stippled_line_enable; 1997 VkLineRasterizationModeEXT line_raster_mode; 1998 uint32_t line_stipple_factor; 1999 uint16_t line_stipple_pattern; 2000 bool depth_clip_disable; 2001 VkRasterizationOrderAMD order; 2002}; 2003 2004struct radv_discard_rectangle_info { 2005 VkDiscardRectangleModeEXT mode; 2006 VkRect2D rects[MAX_DISCARD_RECTANGLES]; 2007 uint8_t count; 2008}; 2009 2010struct radv_multisample_info { 2011 bool sample_shading_enable; 2012 bool alpha_to_coverage_enable; 2013 bool sample_locs_enable; 2014 VkSampleCountFlagBits raster_samples; 2015 float min_sample_shading; 2016 uint16_t sample_mask; 2017 uint8_t sample_locs_count; 2018 VkSampleCountFlagBits sample_locs_per_pixel; 2019 VkExtent2D sample_locs_grid_size; 2020 VkSampleLocationEXT sample_locs[MAX_SAMPLE_LOCATIONS]; 2021}; 2022 2023struct radv_stencil_op_info { 2024 VkStencilOp fail_op; 2025 VkStencilOp pass_op; 2026 VkStencilOp depth_fail_op; 2027 VkCompareOp compare_op; 2028 uint8_t compare_mask; 2029 uint8_t write_mask; 2030 uint8_t reference; 2031}; 2032 2033struct radv_depth_stencil_info { 2034 bool stencil_test_enable; 2035 bool depth_test_enable; 2036 bool depth_write_enable; 2037 bool depth_bounds_test_enable; 2038 struct { 2039 float min; 2040 float max; 2041 } depth_bounds; 2042 struct radv_stencil_op_info front; 2043 struct radv_stencil_op_info back; 2044 VkCompareOp depth_compare_op; 2045}; 2046 2047struct radv_rendering_info { 2048 uint32_t view_mask; 2049 uint32_t color_att_count; 2050 VkFormat color_att_formats[MAX_RTS]; 2051 VkFormat depth_att_format; 2052 VkFormat stencil_att_format; 2053}; 2054 2055struct radv_color_blend_info { 2056 bool logic_op_enable; 2057 uint8_t att_count; 2058 uint16_t logic_op; 2059 uint32_t color_write_enable; 2060 float blend_constants[4]; 2061 struct { 2062 uint8_t color_write_mask; 2063 bool blend_enable; 2064 uint16_t color_blend_op; 2065 uint16_t alpha_blend_op; 2066 uint16_t src_color_blend_factor; 2067 uint16_t dst_color_blend_factor; 2068 uint16_t src_alpha_blend_factor; 2069 uint16_t dst_alpha_blend_factor; 2070 } att[MAX_RTS]; 2071}; 2072 2073struct radv_fragment_shading_rate_info { 2074 VkExtent2D size; 2075 VkFragmentShadingRateCombinerOpKHR combiner_ops[2]; 2076}; 2077 2078struct radv_graphics_pipeline_info { 2079 struct radv_vertex_input_info vi; 2080 struct radv_input_assembly_info ia; 2081 2082 struct radv_tessellation_info ts; 2083 struct radv_viewport_info vp; 2084 struct radv_rasterization_info rs; 2085 struct radv_discard_rectangle_info dr; 2086 2087 struct radv_multisample_info ms; 2088 struct radv_depth_stencil_info ds; 2089 struct radv_rendering_info ri; 2090 struct radv_color_blend_info cb; 2091 2092 struct radv_fragment_shading_rate_info fsr; 2093 2094 /* VK_AMD_mixed_attachment_samples */ 2095 uint8_t color_att_samples; 2096 uint8_t ds_att_samples; 2097}; 2098 2099enum radv_depth_clamp_mode { 2100 RADV_DEPTH_CLAMP_MODE_VIEWPORT = 0, /* Clamp to the viewport min/max depth bounds */ 2101 RADV_DEPTH_CLAMP_MODE_ZERO_TO_ONE = 1, /* Clamp between 0.0f and 1.0f */ 2102 RADV_DEPTH_CLAMP_MODE_DISABLED = 2, /* Disable depth clamping */ 2103}; 2104 2105struct radv_pipeline { 2106 struct vk_object_base base; 2107 enum radv_pipeline_type type; 2108 2109 struct radv_device *device; 2110 2111 struct radv_pipeline_slab *slab; 2112 struct radeon_winsys_bo *slab_bo; 2113 2114 bool need_indirect_descriptor_sets; 2115 struct radv_shader *shaders[MESA_VULKAN_SHADER_STAGES]; 2116 struct radv_shader *gs_copy_shader; 2117 2118 struct radeon_cmdbuf cs; 2119 uint32_t ctx_cs_hash; 2120 struct radeon_cmdbuf ctx_cs; 2121 2122 uint32_t user_data_0[MESA_VULKAN_SHADER_STAGES]; 2123 2124 unsigned max_waves; 2125 unsigned scratch_bytes_per_wave; 2126 2127 /* Unique pipeline hash identifier. */ 2128 uint64_t pipeline_hash; 2129 2130 /* Pipeline layout info. */ 2131 uint32_t push_constant_size; 2132 uint32_t dynamic_offset_count; 2133}; 2134 2135struct radv_graphics_pipeline { 2136 struct radv_pipeline base; 2137 2138 VkShaderStageFlags active_stages; 2139 2140 struct radv_dynamic_state dynamic_state; 2141 2142 uint64_t dynamic_states; 2143 struct radv_multisample_state ms; 2144 struct radv_binning_state binning; 2145 struct radv_vrs_state vrs; 2146 uint32_t spi_baryc_cntl; 2147 unsigned esgs_ring_size; 2148 unsigned gsvs_ring_size; 2149 uint32_t vtx_base_sgpr; 2150 struct radv_ia_multi_vgt_param_helpers ia_multi_vgt_param; 2151 uint8_t vtx_emit_num; 2152 uint64_t needed_dynamic_state; 2153 unsigned tess_patch_control_points; 2154 unsigned pa_su_sc_mode_cntl; 2155 unsigned db_depth_control; 2156 unsigned pa_cl_clip_cntl; 2157 unsigned cb_color_control; 2158 uint32_t binding_stride[MAX_VBS]; 2159 uint8_t attrib_bindings[MAX_VERTEX_ATTRIBS]; 2160 uint32_t attrib_ends[MAX_VERTEX_ATTRIBS]; 2161 uint32_t attrib_index_offset[MAX_VERTEX_ATTRIBS]; 2162 uint8_t last_vertex_attrib_bit; 2163 uint8_t next_vertex_stage : 8; 2164 uint32_t vb_desc_usage_mask; 2165 uint32_t vb_desc_alloc_size; 2166 2167 /* Last pre-PS API stage */ 2168 gl_shader_stage last_vgt_api_stage; 2169 2170 /* Used for rbplus */ 2171 uint32_t col_format; 2172 uint32_t cb_target_mask; 2173 2174 bool disable_out_of_order_rast_for_occlusion; 2175 bool uses_drawid; 2176 bool uses_baseinstance; 2177 bool uses_dynamic_stride; 2178 bool uses_conservative_overestimate; 2179 bool negative_one_to_one; 2180 enum radv_depth_clamp_mode depth_clamp_mode; 2181 bool use_per_attribute_vb_descs; 2182 bool can_use_simple_input; 2183 bool uses_user_sample_locations; 2184 2185 /* Whether the pipeline forces per-vertex VRS (GFX10.3+). */ 2186 bool force_vrs_per_vertex; 2187 2188 /* Whether the pipeline uses NGG (GFX10+). */ 2189 bool is_ngg; 2190 bool has_ngg_culling; 2191 2192 /* Not NULL if graphics pipeline uses streamout. */ 2193 struct radv_shader *streamout_shader; 2194 2195 unsigned rast_prim; 2196 float line_width; 2197}; 2198 2199struct radv_compute_pipeline { 2200 struct radv_pipeline base; 2201 2202 bool cs_regalloc_hang_bug; 2203 2204 /* Raytracing */ 2205 struct radv_pipeline_group_handle *rt_group_handles; 2206 struct radv_pipeline_shader_stack_size *rt_stack_sizes; 2207 bool dynamic_stack_size; 2208 uint32_t group_count; 2209}; 2210 2211struct radv_library_pipeline { 2212 struct radv_pipeline base; 2213 2214 unsigned stage_count; 2215 VkPipelineShaderStageCreateInfo *stages; 2216 unsigned group_count; 2217 VkRayTracingShaderGroupCreateInfoKHR *groups; 2218 VkPipelineShaderStageModuleIdentifierCreateInfoEXT *identifiers; 2219 struct { 2220 uint8_t sha1[SHA1_DIGEST_LENGTH]; 2221 } *hashes; 2222}; 2223 2224#define RADV_DECL_PIPELINE_DOWNCAST(pipe_type, pipe_enum) \ 2225 static inline struct radv_##pipe_type##_pipeline * \ 2226 radv_pipeline_to_##pipe_type(struct radv_pipeline *pipeline) \ 2227 { \ 2228 assert(pipeline->type == pipe_enum); \ 2229 return (struct radv_##pipe_type##_pipeline *) pipeline; \ 2230 } 2231 2232RADV_DECL_PIPELINE_DOWNCAST(graphics, RADV_PIPELINE_GRAPHICS) 2233RADV_DECL_PIPELINE_DOWNCAST(compute, RADV_PIPELINE_COMPUTE) 2234RADV_DECL_PIPELINE_DOWNCAST(library, RADV_PIPELINE_LIBRARY) 2235 2236struct radv_pipeline_stage { 2237 gl_shader_stage stage; 2238 2239 struct { 2240 const struct vk_object_base *object; 2241 const char *data; 2242 uint32_t size; 2243 unsigned char sha1[20]; 2244 } spirv; 2245 2246 const char *entrypoint; 2247 const VkSpecializationInfo *spec_info; 2248 2249 unsigned char shader_sha1[20]; 2250 2251 nir_shader *nir; 2252 nir_shader *internal_nir; /* meta shaders */ 2253 2254 struct radv_shader_info info; 2255 struct radv_shader_args args; 2256 2257 VkPipelineCreationFeedback feedback; 2258}; 2259 2260static inline bool 2261radv_pipeline_has_stage(const struct radv_graphics_pipeline *pipeline, gl_shader_stage stage) 2262{ 2263 return pipeline->base.shaders[stage]; 2264} 2265 2266bool radv_pipeline_has_ngg_passthrough(const struct radv_graphics_pipeline *pipeline); 2267 2268bool radv_pipeline_has_gs_copy_shader(const struct radv_pipeline *pipeline); 2269 2270struct radv_userdata_info *radv_lookup_user_sgpr(struct radv_pipeline *pipeline, 2271 gl_shader_stage stage, int idx); 2272 2273struct radv_shader *radv_get_shader(const struct radv_pipeline *pipeline, gl_shader_stage stage); 2274 2275void radv_pipeline_emit_hw_cs(const struct radv_physical_device *pdevice, struct radeon_cmdbuf *cs, 2276 const struct radv_shader *shader); 2277 2278void radv_pipeline_emit_compute_state(const struct radv_physical_device *pdevice, 2279 struct radeon_cmdbuf *cs, const struct radv_shader *shader); 2280 2281struct radv_graphics_pipeline_create_info { 2282 bool use_rectlist; 2283 bool db_depth_clear; 2284 bool db_stencil_clear; 2285 bool depth_compress_disable; 2286 bool stencil_compress_disable; 2287 bool resummarize_enable; 2288 uint32_t custom_blend_mode; 2289}; 2290 2291void radv_pipeline_init(struct radv_device *device, struct radv_pipeline *pipeline, 2292 enum radv_pipeline_type type); 2293 2294VkResult radv_graphics_pipeline_create(VkDevice device, VkPipelineCache cache, 2295 const VkGraphicsPipelineCreateInfo *pCreateInfo, 2296 const struct radv_graphics_pipeline_create_info *extra, 2297 const VkAllocationCallbacks *alloc, VkPipeline *pPipeline); 2298 2299VkResult radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache, 2300 const VkComputePipelineCreateInfo *pCreateInfo, 2301 const VkAllocationCallbacks *pAllocator, 2302 const uint8_t *custom_hash, 2303 struct radv_pipeline_shader_stack_size *rt_stack_sizes, 2304 uint32_t rt_group_count, VkPipeline *pPipeline); 2305 2306void radv_pipeline_destroy(struct radv_device *device, struct radv_pipeline *pipeline, 2307 const VkAllocationCallbacks *allocator); 2308 2309struct radv_binning_settings { 2310 unsigned context_states_per_bin; /* allowed range: [1, 6] */ 2311 unsigned persistent_states_per_bin; /* allowed range: [1, 32] */ 2312 unsigned fpovs_per_batch; /* allowed range: [0, 255], 0 = unlimited */ 2313}; 2314 2315struct radv_binning_settings radv_get_binning_settings(const struct radv_physical_device *pdev); 2316 2317struct vk_format_description; 2318uint32_t radv_translate_buffer_dataformat(const struct util_format_description *desc, 2319 int first_non_void); 2320uint32_t radv_translate_buffer_numformat(const struct util_format_description *desc, 2321 int first_non_void); 2322bool radv_is_buffer_format_supported(VkFormat format, bool *scaled); 2323void radv_translate_vertex_format(const struct radv_physical_device *pdevice, VkFormat format, 2324 const struct util_format_description *desc, unsigned *dfmt, 2325 unsigned *nfmt, bool *post_shuffle, 2326 enum radv_vs_input_alpha_adjust *alpha_adjust); 2327uint32_t radv_translate_colorformat(VkFormat format); 2328uint32_t radv_translate_color_numformat(VkFormat format, const struct util_format_description *desc, 2329 int first_non_void); 2330uint32_t radv_colorformat_endian_swap(uint32_t colorformat); 2331unsigned radv_translate_colorswap(VkFormat format, bool do_endian_swap); 2332uint32_t radv_translate_dbformat(VkFormat format); 2333uint32_t radv_translate_tex_dataformat(VkFormat format, const struct util_format_description *desc, 2334 int first_non_void); 2335uint32_t radv_translate_tex_numformat(VkFormat format, const struct util_format_description *desc, 2336 int first_non_void); 2337bool radv_format_pack_clear_color(VkFormat format, uint32_t clear_vals[2], 2338 VkClearColorValue *value); 2339bool radv_is_storage_image_format_supported(struct radv_physical_device *physical_device, 2340 VkFormat format); 2341bool radv_is_colorbuffer_format_supported(const struct radv_physical_device *pdevice, 2342 VkFormat format, bool *blendable); 2343bool radv_dcc_formats_compatible(enum amd_gfx_level gfx_level, VkFormat format1, VkFormat format2, 2344 bool *sign_reinterpret); 2345bool radv_is_atomic_format_supported(VkFormat format); 2346bool radv_device_supports_etc(struct radv_physical_device *physical_device); 2347 2348static const VkImageUsageFlags RADV_IMAGE_USAGE_WRITE_BITS = 2349 VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | 2350 VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_STORAGE_BIT; 2351 2352struct radv_image_plane { 2353 VkFormat format; 2354 struct radeon_surf surface; 2355}; 2356 2357struct radv_image_binding { 2358 /* Set when bound */ 2359 struct radeon_winsys_bo *bo; 2360 VkDeviceSize offset; 2361}; 2362 2363struct radv_image { 2364 struct vk_image vk; 2365 2366 struct ac_surf_info info; 2367 2368 VkDeviceSize size; 2369 uint32_t alignment; 2370 2371 unsigned queue_family_mask; 2372 bool exclusive; 2373 bool shareable; 2374 bool l2_coherent; 2375 bool dcc_sign_reinterpret; 2376 bool support_comp_to_single; 2377 2378 struct radv_image_binding bindings[3]; 2379 bool tc_compatible_cmask; 2380 2381 uint64_t clear_value_offset; 2382 uint64_t fce_pred_offset; 2383 uint64_t dcc_pred_offset; 2384 2385 /* 2386 * Metadata for the TC-compat zrange workaround. If the 32-bit value 2387 * stored at this offset is UINT_MAX, the driver will emit 2388 * DB_Z_INFO.ZRANGE_PRECISION=0, otherwise it will skip the 2389 * SET_CONTEXT_REG packet. 2390 */ 2391 uint64_t tc_compat_zrange_offset; 2392 2393 /* For VK_ANDROID_native_buffer, the WSI image owns the memory, */ 2394 VkDeviceMemory owned_memory; 2395 2396 unsigned plane_count; 2397 bool disjoint; 2398 struct radv_image_plane planes[0]; 2399}; 2400 2401/* Whether the image has a htile that is known consistent with the contents of 2402 * the image and is allowed to be in compressed form. 2403 * 2404 * If this is false reads that don't use the htile should be able to return 2405 * correct results. 2406 */ 2407bool radv_layout_is_htile_compressed(const struct radv_device *device, 2408 const struct radv_image *image, VkImageLayout layout, 2409 bool in_render_loop, unsigned queue_mask); 2410 2411bool radv_layout_can_fast_clear(const struct radv_device *device, const struct radv_image *image, 2412 unsigned level, VkImageLayout layout, bool in_render_loop, 2413 unsigned queue_mask); 2414 2415bool radv_layout_dcc_compressed(const struct radv_device *device, const struct radv_image *image, 2416 unsigned level, VkImageLayout layout, bool in_render_loop, 2417 unsigned queue_mask); 2418 2419bool radv_layout_fmask_compressed(const struct radv_device *device, const struct radv_image *image, 2420 VkImageLayout layout, unsigned queue_mask); 2421 2422/** 2423 * Return whether the image has CMASK metadata for color surfaces. 2424 */ 2425static inline bool 2426radv_image_has_cmask(const struct radv_image *image) 2427{ 2428 return image->planes[0].surface.cmask_offset; 2429} 2430 2431/** 2432 * Return whether the image has FMASK metadata for color surfaces. 2433 */ 2434static inline bool 2435radv_image_has_fmask(const struct radv_image *image) 2436{ 2437 return image->planes[0].surface.fmask_offset; 2438} 2439 2440/** 2441 * Return whether the image has DCC metadata for color surfaces. 2442 */ 2443static inline bool 2444radv_image_has_dcc(const struct radv_image *image) 2445{ 2446 return !(image->planes[0].surface.flags & RADEON_SURF_Z_OR_SBUFFER) && 2447 image->planes[0].surface.meta_offset; 2448} 2449 2450/** 2451 * Return whether the image is TC-compatible CMASK. 2452 */ 2453static inline bool 2454radv_image_is_tc_compat_cmask(const struct radv_image *image) 2455{ 2456 return radv_image_has_fmask(image) && image->tc_compatible_cmask; 2457} 2458 2459/** 2460 * Return whether DCC metadata is enabled for a level. 2461 */ 2462static inline bool 2463radv_dcc_enabled(const struct radv_image *image, unsigned level) 2464{ 2465 return radv_image_has_dcc(image) && level < image->planes[0].surface.num_meta_levels; 2466} 2467 2468/** 2469 * Return whether the image has CB metadata. 2470 */ 2471static inline bool 2472radv_image_has_CB_metadata(const struct radv_image *image) 2473{ 2474 return radv_image_has_cmask(image) || radv_image_has_fmask(image) || radv_image_has_dcc(image); 2475} 2476 2477/** 2478 * Return whether the image has HTILE metadata for depth surfaces. 2479 */ 2480static inline bool 2481radv_image_has_htile(const struct radv_image *image) 2482{ 2483 return image->planes[0].surface.flags & RADEON_SURF_Z_OR_SBUFFER && 2484 image->planes[0].surface.meta_size; 2485} 2486 2487/** 2488 * Return whether the image has VRS HTILE metadata for depth surfaces 2489 */ 2490static inline bool 2491radv_image_has_vrs_htile(const struct radv_device *device, const struct radv_image *image) 2492{ 2493 /* Any depth buffer can potentially use VRS. */ 2494 return device->attachment_vrs_enabled && radv_image_has_htile(image) && 2495 (image->vk.usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT); 2496} 2497 2498/** 2499 * Return whether HTILE metadata is enabled for a level. 2500 */ 2501static inline bool 2502radv_htile_enabled(const struct radv_image *image, unsigned level) 2503{ 2504 return radv_image_has_htile(image) && level < image->planes[0].surface.num_meta_levels; 2505} 2506 2507/** 2508 * Return whether the image is TC-compatible HTILE. 2509 */ 2510static inline bool 2511radv_image_is_tc_compat_htile(const struct radv_image *image) 2512{ 2513 return radv_image_has_htile(image) && 2514 (image->planes[0].surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE); 2515} 2516 2517/** 2518 * Return whether the entire HTILE buffer can be used for depth in order to 2519 * improve HiZ Z-Range precision. 2520 */ 2521static inline bool 2522radv_image_tile_stencil_disabled(const struct radv_device *device, const struct radv_image *image) 2523{ 2524 if (device->physical_device->rad_info.gfx_level >= GFX9) { 2525 return !vk_format_has_stencil(image->vk.format) && !radv_image_has_vrs_htile(device, image); 2526 } else { 2527 /* Due to a hw bug, TILE_STENCIL_DISABLE must be set to 0 for 2528 * the TC-compat ZRANGE issue even if no stencil is used. 2529 */ 2530 return !vk_format_has_stencil(image->vk.format) && !radv_image_is_tc_compat_htile(image); 2531 } 2532} 2533 2534static inline bool 2535radv_image_has_clear_value(const struct radv_image *image) 2536{ 2537 return image->clear_value_offset != 0; 2538} 2539 2540static inline uint64_t 2541radv_image_get_fast_clear_va(const struct radv_image *image, uint32_t base_level) 2542{ 2543 assert(radv_image_has_clear_value(image)); 2544 2545 uint64_t va = radv_buffer_get_va(image->bindings[0].bo); 2546 va += image->bindings[0].offset + image->clear_value_offset + base_level * 8; 2547 return va; 2548} 2549 2550static inline uint64_t 2551radv_image_get_fce_pred_va(const struct radv_image *image, uint32_t base_level) 2552{ 2553 assert(image->fce_pred_offset != 0); 2554 2555 uint64_t va = radv_buffer_get_va(image->bindings[0].bo); 2556 va += image->bindings[0].offset + image->fce_pred_offset + base_level * 8; 2557 return va; 2558} 2559 2560static inline uint64_t 2561radv_image_get_dcc_pred_va(const struct radv_image *image, uint32_t base_level) 2562{ 2563 assert(image->dcc_pred_offset != 0); 2564 2565 uint64_t va = radv_buffer_get_va(image->bindings[0].bo); 2566 va += image->bindings[0].offset + image->dcc_pred_offset + base_level * 8; 2567 return va; 2568} 2569 2570static inline uint64_t 2571radv_get_tc_compat_zrange_va(const struct radv_image *image, uint32_t base_level) 2572{ 2573 assert(image->tc_compat_zrange_offset != 0); 2574 2575 uint64_t va = radv_buffer_get_va(image->bindings[0].bo); 2576 va += image->bindings[0].offset + image->tc_compat_zrange_offset + base_level * 4; 2577 return va; 2578} 2579 2580static inline uint64_t 2581radv_get_ds_clear_value_va(const struct radv_image *image, uint32_t base_level) 2582{ 2583 assert(radv_image_has_clear_value(image)); 2584 2585 uint64_t va = radv_buffer_get_va(image->bindings[0].bo); 2586 va += image->bindings[0].offset + image->clear_value_offset + base_level * 8; 2587 return va; 2588} 2589 2590static inline uint32_t 2591radv_get_htile_initial_value(const struct radv_device *device, const struct radv_image *image) 2592{ 2593 uint32_t initial_value; 2594 2595 if (radv_image_tile_stencil_disabled(device, image)) { 2596 /* Z only (no stencil): 2597 * 2598 * |31 18|17 4|3 0| 2599 * +---------+---------+-------+ 2600 * | Max Z | Min Z | ZMask | 2601 */ 2602 initial_value = 0xfffc000f; 2603 } else { 2604 /* Z and stencil: 2605 * 2606 * |31 12|11 10|9 8|7 6|5 4|3 0| 2607 * +-----------+-----+------+-----+-----+-------+ 2608 * | Z Range | | SMem | SR1 | SR0 | ZMask | 2609 * 2610 * SR0/SR1 contains the stencil test results. Initializing 2611 * SR0/SR1 to 0x3 means the stencil test result is unknown. 2612 * 2613 * Z, stencil and 4 bit VRS encoding: 2614 * |31 12|11 10|9 8|7 6|5 4|3 0| 2615 * +-----------+------------+------+------------+-----+-------+ 2616 * | Z Range | VRS y-rate | SMem | VRS x-rate | SR0 | ZMask | 2617 */ 2618 if (radv_image_has_vrs_htile(device, image)) { 2619 /* Initialize the VRS x-rate value at 0, so the hw interprets it as 1 sample. */ 2620 initial_value = 0xfffff33f; 2621 } else { 2622 initial_value = 0xfffff3ff; 2623 } 2624 } 2625 2626 return initial_value; 2627} 2628 2629static inline bool 2630radv_image_get_iterate256(struct radv_device *device, struct radv_image *image) 2631{ 2632 /* ITERATE_256 is required for depth or stencil MSAA images that are TC-compatible HTILE. */ 2633 return device->physical_device->rad_info.gfx_level >= GFX10 && 2634 (image->vk.usage & 2635 (VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT)) && 2636 radv_image_is_tc_compat_htile(image) && image->info.samples > 1; 2637} 2638 2639unsigned radv_image_queue_family_mask(const struct radv_image *image, 2640 enum radv_queue_family family, 2641 enum radv_queue_family queue_family); 2642 2643static inline uint32_t 2644radv_get_layerCount(const struct radv_image *image, const VkImageSubresourceRange *range) 2645{ 2646 return range->layerCount == VK_REMAINING_ARRAY_LAYERS 2647 ? image->info.array_size - range->baseArrayLayer 2648 : range->layerCount; 2649} 2650 2651static inline uint32_t 2652radv_get_levelCount(const struct radv_image *image, const VkImageSubresourceRange *range) 2653{ 2654 return range->levelCount == VK_REMAINING_MIP_LEVELS ? image->info.levels - range->baseMipLevel 2655 : range->levelCount; 2656} 2657 2658bool radv_image_is_renderable(struct radv_device *device, struct radv_image *image); 2659 2660struct radeon_bo_metadata; 2661void radv_init_metadata(struct radv_device *device, struct radv_image *image, 2662 struct radeon_bo_metadata *metadata); 2663 2664void radv_image_override_offset_stride(struct radv_device *device, struct radv_image *image, 2665 uint64_t offset, uint32_t stride); 2666 2667union radv_descriptor { 2668 struct { 2669 uint32_t plane0_descriptor[8]; 2670 uint32_t fmask_descriptor[8]; 2671 }; 2672 struct { 2673 uint32_t plane_descriptors[3][8]; 2674 }; 2675}; 2676 2677struct radv_image_view { 2678 struct vk_image_view vk; 2679 struct radv_image *image; /**< VkImageViewCreateInfo::image */ 2680 2681 unsigned plane_id; 2682 VkExtent3D extent; /**< Extent of VkImageViewCreateInfo::baseMipLevel. */ 2683 2684 /* Whether the image iview supports fast clear. */ 2685 bool support_fast_clear; 2686 2687 bool disable_dcc_mrt; 2688 2689 union radv_descriptor descriptor; 2690 2691 /* Descriptor for use as a storage image as opposed to a sampled image. 2692 * This has a few differences for cube maps (e.g. type). 2693 */ 2694 union radv_descriptor storage_descriptor; 2695}; 2696 2697struct radv_image_create_info { 2698 const VkImageCreateInfo *vk_info; 2699 bool scanout; 2700 bool no_metadata_planes; 2701 bool prime_blit_src; 2702 const struct radeon_bo_metadata *bo_metadata; 2703}; 2704 2705VkResult 2706radv_image_create_layout(struct radv_device *device, struct radv_image_create_info create_info, 2707 const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *mod_info, 2708 struct radv_image *image); 2709 2710VkResult radv_image_create(VkDevice _device, const struct radv_image_create_info *info, 2711 const VkAllocationCallbacks *alloc, VkImage *pImage); 2712 2713bool radv_are_formats_dcc_compatible(const struct radv_physical_device *pdev, const void *pNext, 2714 VkFormat format, VkImageCreateFlags flags, 2715 bool *sign_reinterpret); 2716 2717bool vi_alpha_is_on_msb(struct radv_device *device, VkFormat format); 2718 2719VkResult radv_image_from_gralloc(VkDevice device_h, const VkImageCreateInfo *base_info, 2720 const VkNativeBufferANDROID *gralloc_info, 2721 const VkAllocationCallbacks *alloc, VkImage *out_image_h); 2722uint64_t radv_ahb_usage_from_vk_usage(const VkImageCreateFlags vk_create, 2723 const VkImageUsageFlags vk_usage); 2724VkResult radv_import_ahb_memory(struct radv_device *device, struct radv_device_memory *mem, 2725 unsigned priority, 2726 const VkImportAndroidHardwareBufferInfoANDROID *info); 2727VkResult radv_create_ahb_memory(struct radv_device *device, struct radv_device_memory *mem, 2728 unsigned priority, const VkMemoryAllocateInfo *pAllocateInfo); 2729 2730VkFormat radv_select_android_external_format(const void *next, VkFormat default_format); 2731 2732bool radv_android_gralloc_supports_format(VkFormat format, VkImageUsageFlagBits usage); 2733 2734struct radv_image_view_extra_create_info { 2735 bool disable_compression; 2736 bool enable_compression; 2737 bool disable_dcc_mrt; 2738 bool from_client; /**< Set only if this came from vkCreateImage */ 2739}; 2740 2741void radv_image_view_init(struct radv_image_view *view, struct radv_device *device, 2742 const VkImageViewCreateInfo *pCreateInfo, 2743 VkImageCreateFlags img_create_flags, 2744 const struct radv_image_view_extra_create_info *extra_create_info); 2745void radv_image_view_finish(struct radv_image_view *iview); 2746 2747VkFormat radv_get_aspect_format(struct radv_image *image, VkImageAspectFlags mask); 2748 2749struct radv_sampler_ycbcr_conversion_state { 2750 VkFormat format; 2751 VkSamplerYcbcrModelConversion ycbcr_model; 2752 VkSamplerYcbcrRange ycbcr_range; 2753 VkComponentMapping components; 2754 VkChromaLocation chroma_offsets[2]; 2755 VkFilter chroma_filter; 2756}; 2757 2758struct radv_sampler_ycbcr_conversion { 2759 struct vk_object_base base; 2760 /* The state is hashed for the descriptor set layout. */ 2761 struct radv_sampler_ycbcr_conversion_state state; 2762}; 2763 2764struct radv_buffer_view { 2765 struct vk_object_base base; 2766 struct radeon_winsys_bo *bo; 2767 VkFormat vk_format; 2768 uint64_t range; /**< VkBufferViewCreateInfo::range */ 2769 uint32_t state[4]; 2770}; 2771void radv_buffer_view_init(struct radv_buffer_view *view, struct radv_device *device, 2772 const VkBufferViewCreateInfo *pCreateInfo); 2773void radv_buffer_view_finish(struct radv_buffer_view *view); 2774 2775static inline bool 2776radv_image_extent_compare(const struct radv_image *image, const VkExtent3D *extent) 2777{ 2778 if (extent->width != image->info.width || extent->height != image->info.height || 2779 extent->depth != image->info.depth) 2780 return false; 2781 return true; 2782} 2783 2784struct radv_sampler { 2785 struct vk_object_base base; 2786 uint32_t state[4]; 2787 struct radv_sampler_ycbcr_conversion *ycbcr_sampler; 2788 uint32_t border_color_slot; 2789}; 2790 2791struct radv_subpass_barrier { 2792 VkPipelineStageFlags2 src_stage_mask; 2793 VkPipelineStageFlags2 dst_stage_mask; 2794 VkAccessFlags2 src_access_mask; 2795 VkAccessFlags2 dst_access_mask; 2796}; 2797 2798void radv_emit_subpass_barrier(struct radv_cmd_buffer *cmd_buffer, 2799 const struct radv_subpass_barrier *barrier); 2800 2801struct radv_subpass_attachment { 2802 uint32_t attachment; 2803 VkImageLayout layout; 2804 VkImageLayout stencil_layout; 2805 bool in_render_loop; 2806}; 2807 2808struct radv_subpass { 2809 uint32_t attachment_count; 2810 struct radv_subpass_attachment *attachments; 2811 2812 uint32_t input_count; 2813 uint32_t color_count; 2814 struct radv_subpass_attachment *input_attachments; 2815 struct radv_subpass_attachment *color_attachments; 2816 struct radv_subpass_attachment *resolve_attachments; 2817 struct radv_subpass_attachment *depth_stencil_attachment; 2818 struct radv_subpass_attachment *ds_resolve_attachment; 2819 struct radv_subpass_attachment *vrs_attachment; 2820 VkResolveModeFlagBits depth_resolve_mode; 2821 VkResolveModeFlagBits stencil_resolve_mode; 2822 2823 /** Subpass has at least one color resolve attachment */ 2824 bool has_color_resolve; 2825 2826 struct radv_subpass_barrier start_barrier; 2827 2828 uint32_t view_mask; 2829 2830 VkSampleCountFlagBits color_sample_count; 2831 VkSampleCountFlagBits depth_sample_count; 2832 VkSampleCountFlagBits max_sample_count; 2833 2834 /* Whether the subpass has ingoing/outgoing external dependencies. */ 2835 bool has_ingoing_dep; 2836 bool has_outgoing_dep; 2837}; 2838 2839uint32_t radv_get_subpass_id(struct radv_cmd_buffer *cmd_buffer); 2840 2841struct radv_render_pass_attachment { 2842 VkFormat format; 2843 uint32_t samples; 2844 VkAttachmentLoadOp load_op; 2845 VkAttachmentLoadOp stencil_load_op; 2846 VkImageLayout initial_layout; 2847 VkImageLayout final_layout; 2848 VkImageLayout stencil_initial_layout; 2849 VkImageLayout stencil_final_layout; 2850 2851 /* The subpass id in which the attachment will be used first/last. */ 2852 uint32_t first_subpass_idx; 2853 uint32_t last_subpass_idx; 2854}; 2855 2856struct radv_render_pass { 2857 struct vk_object_base base; 2858 uint32_t attachment_count; 2859 uint32_t subpass_count; 2860 struct radv_subpass_attachment *subpass_attachments; 2861 struct radv_render_pass_attachment *attachments; 2862 struct radv_subpass_barrier end_barrier; 2863 struct radv_subpass subpasses[0]; 2864}; 2865 2866VkResult radv_device_init_meta(struct radv_device *device); 2867void radv_device_finish_meta(struct radv_device *device); 2868 2869struct radv_query_pool { 2870 struct vk_object_base base; 2871 struct radeon_winsys_bo *bo; 2872 uint32_t stride; 2873 uint32_t availability_offset; 2874 uint64_t size; 2875 char *ptr; 2876 VkQueryType type; 2877 uint32_t pipeline_stats_mask; 2878 bool uses_gds; /* For NGG GS on GFX10+ */ 2879}; 2880 2881struct radv_perfcounter_impl; 2882 2883struct radv_pc_query_pool { 2884 struct radv_query_pool b; 2885 2886 uint32_t *pc_regs; 2887 unsigned num_pc_regs; 2888 2889 unsigned num_passes; 2890 2891 unsigned num_counters; 2892 struct radv_perfcounter_impl *counters; 2893}; 2894 2895void radv_pc_deinit_query_pool(struct radv_pc_query_pool *pool); 2896VkResult radv_pc_init_query_pool(struct radv_physical_device *pdevice, 2897 const VkQueryPoolCreateInfo *pCreateInfo, 2898 struct radv_pc_query_pool *pool); 2899void radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool, 2900 uint64_t va); 2901void radv_pc_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool, 2902 uint64_t va); 2903void radv_pc_get_results(const struct radv_pc_query_pool *pc_pool, const uint64_t *data, void *out); 2904 2905bool radv_queue_internal_submit(struct radv_queue *queue, struct radeon_cmdbuf *cs); 2906 2907int radv_queue_init(struct radv_device *device, struct radv_queue *queue, int idx, 2908 const VkDeviceQueueCreateInfo *create_info, 2909 const VkDeviceQueueGlobalPriorityCreateInfoKHR *global_priority); 2910 2911void radv_set_descriptor_set(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point, 2912 struct radv_descriptor_set *set, unsigned idx); 2913 2914void radv_cmd_update_descriptor_sets(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer, 2915 VkDescriptorSet overrideSet, uint32_t descriptorWriteCount, 2916 const VkWriteDescriptorSet *pDescriptorWrites, 2917 uint32_t descriptorCopyCount, 2918 const VkCopyDescriptorSet *pDescriptorCopies); 2919 2920void radv_cmd_update_descriptor_set_with_template(struct radv_device *device, 2921 struct radv_cmd_buffer *cmd_buffer, 2922 struct radv_descriptor_set *set, 2923 VkDescriptorUpdateTemplate descriptorUpdateTemplate, 2924 const void *pData); 2925 2926void radv_meta_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer, 2927 VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout _layout, 2928 uint32_t set, uint32_t descriptorWriteCount, 2929 const VkWriteDescriptorSet *pDescriptorWrites); 2930 2931uint32_t radv_init_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, 2932 const VkImageSubresourceRange *range, uint32_t value); 2933 2934uint32_t radv_init_fmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, 2935 const VkImageSubresourceRange *range); 2936 2937/* radv_nir_to_llvm.c */ 2938struct radv_shader_args; 2939struct radv_nir_compiler_options; 2940struct radv_shader_info; 2941 2942void llvm_compile_shader(const struct radv_nir_compiler_options *options, 2943 const struct radv_shader_info *info, unsigned shader_count, 2944 struct nir_shader *const *shaders, struct radv_shader_binary **binary, 2945 const struct radv_shader_args *args); 2946 2947/* radv_shader_info.h */ 2948struct radv_shader_info; 2949 2950void radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *nir, 2951 const struct radv_pipeline_layout *layout, 2952 const struct radv_pipeline_key *pipeline_key, 2953 struct radv_shader_info *info); 2954 2955void radv_nir_shader_info_init(struct radv_shader_info *info); 2956 2957bool radv_thread_trace_init(struct radv_device *device); 2958void radv_thread_trace_finish(struct radv_device *device); 2959bool radv_begin_thread_trace(struct radv_queue *queue); 2960bool radv_end_thread_trace(struct radv_queue *queue); 2961bool radv_get_thread_trace(struct radv_queue *queue, struct ac_thread_trace *thread_trace); 2962void radv_emit_thread_trace_userdata(struct radv_cmd_buffer *cmd_buffer, const void *data, 2963 uint32_t num_dwords); 2964bool radv_is_instruction_timing_enabled(void); 2965 2966void radv_emit_inhibit_clockgating(struct radv_device *device, struct radeon_cmdbuf *cs, 2967 bool inhibit); 2968void radv_emit_spi_config_cntl(struct radv_device *device, struct radeon_cmdbuf *cs, bool enable); 2969 2970bool radv_sdma_copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, 2971 struct radv_buffer *buffer, const VkBufferImageCopy2 *region); 2972 2973/* radv_sqtt_layer_.c */ 2974struct radv_barrier_data { 2975 union { 2976 struct { 2977 uint16_t depth_stencil_expand : 1; 2978 uint16_t htile_hiz_range_expand : 1; 2979 uint16_t depth_stencil_resummarize : 1; 2980 uint16_t dcc_decompress : 1; 2981 uint16_t fmask_decompress : 1; 2982 uint16_t fast_clear_eliminate : 1; 2983 uint16_t fmask_color_expand : 1; 2984 uint16_t init_mask_ram : 1; 2985 uint16_t reserved : 8; 2986 }; 2987 uint16_t all; 2988 } layout_transitions; 2989}; 2990 2991/** 2992 * Value for the reason field of an RGP barrier start marker originating from 2993 * the Vulkan client (does not include PAL-defined values). (Table 15) 2994 */ 2995enum rgp_barrier_reason { 2996 RGP_BARRIER_UNKNOWN_REASON = 0xFFFFFFFF, 2997 2998 /* External app-generated barrier reasons, i.e. API synchronization 2999 * commands Range of valid values: [0x00000001 ... 0x7FFFFFFF]. 3000 */ 3001 RGP_BARRIER_EXTERNAL_CMD_PIPELINE_BARRIER = 0x00000001, 3002 RGP_BARRIER_EXTERNAL_RENDER_PASS_SYNC = 0x00000002, 3003 RGP_BARRIER_EXTERNAL_CMD_WAIT_EVENTS = 0x00000003, 3004 3005 /* Internal barrier reasons, i.e. implicit synchronization inserted by 3006 * the Vulkan driver Range of valid values: [0xC0000000 ... 0xFFFFFFFE]. 3007 */ 3008 RGP_BARRIER_INTERNAL_BASE = 0xC0000000, 3009 RGP_BARRIER_INTERNAL_PRE_RESET_QUERY_POOL_SYNC = RGP_BARRIER_INTERNAL_BASE + 0, 3010 RGP_BARRIER_INTERNAL_POST_RESET_QUERY_POOL_SYNC = RGP_BARRIER_INTERNAL_BASE + 1, 3011 RGP_BARRIER_INTERNAL_GPU_EVENT_RECYCLE_STALL = RGP_BARRIER_INTERNAL_BASE + 2, 3012 RGP_BARRIER_INTERNAL_PRE_COPY_QUERY_POOL_RESULTS_SYNC = RGP_BARRIER_INTERNAL_BASE + 3 3013}; 3014 3015void radv_describe_begin_cmd_buffer(struct radv_cmd_buffer *cmd_buffer); 3016void radv_describe_end_cmd_buffer(struct radv_cmd_buffer *cmd_buffer); 3017void radv_describe_draw(struct radv_cmd_buffer *cmd_buffer); 3018void radv_describe_dispatch(struct radv_cmd_buffer *cmd_buffer, int x, int y, int z); 3019void radv_describe_begin_render_pass_clear(struct radv_cmd_buffer *cmd_buffer, 3020 VkImageAspectFlagBits aspects); 3021void radv_describe_end_render_pass_clear(struct radv_cmd_buffer *cmd_buffer); 3022void radv_describe_begin_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer); 3023void radv_describe_end_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer); 3024void radv_describe_barrier_start(struct radv_cmd_buffer *cmd_buffer, 3025 enum rgp_barrier_reason reason); 3026void radv_describe_barrier_end(struct radv_cmd_buffer *cmd_buffer); 3027void radv_describe_barrier_end_delayed(struct radv_cmd_buffer *cmd_buffer); 3028void radv_describe_layout_transition(struct radv_cmd_buffer *cmd_buffer, 3029 const struct radv_barrier_data *barrier); 3030 3031struct radv_indirect_command_layout { 3032 struct vk_object_base base; 3033 3034 uint32_t input_stride; 3035 uint32_t token_count; 3036 3037 bool indexed; 3038 bool binds_index_buffer; 3039 bool binds_state; 3040 uint16_t draw_params_offset; 3041 uint16_t index_buffer_offset; 3042 3043 uint16_t state_offset; 3044 3045 uint32_t bind_vbo_mask; 3046 uint32_t vbo_offsets[MAX_VBS]; 3047 3048 uint64_t push_constant_mask; 3049 uint32_t push_constant_offsets[MAX_PUSH_CONSTANTS_SIZE / 4]; 3050 3051 uint32_t ibo_type_32; 3052 uint32_t ibo_type_8; 3053 3054 VkIndirectCommandsLayoutTokenNV tokens[0]; 3055}; 3056 3057uint32_t radv_get_indirect_cmdbuf_size(const VkGeneratedCommandsInfoNV *cmd_info); 3058 3059void radv_prepare_dgc(struct radv_cmd_buffer *cmd_buffer, 3060 const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo); 3061 3062uint64_t radv_get_current_time(void); 3063 3064static inline uint32_t 3065si_conv_gl_prim_to_vertices(enum shader_prim gl_prim) 3066{ 3067 switch (gl_prim) { 3068 case SHADER_PRIM_POINTS: 3069 return 1; 3070 case SHADER_PRIM_LINES: 3071 case SHADER_PRIM_LINE_STRIP: 3072 return 2; 3073 case SHADER_PRIM_TRIANGLES: 3074 case SHADER_PRIM_TRIANGLE_STRIP: 3075 return 3; 3076 case SHADER_PRIM_LINES_ADJACENCY: 3077 return 4; 3078 case SHADER_PRIM_TRIANGLES_ADJACENCY: 3079 return 6; 3080 case SHADER_PRIM_QUADS: 3081 return V_028A6C_TRISTRIP; 3082 default: 3083 assert(0); 3084 return 0; 3085 } 3086} 3087 3088static inline uint32_t 3089si_conv_prim_to_gs_out(uint32_t topology) 3090{ 3091 switch (topology) { 3092 case V_008958_DI_PT_POINTLIST: 3093 case V_008958_DI_PT_PATCH: 3094 return V_028A6C_POINTLIST; 3095 case V_008958_DI_PT_LINELIST: 3096 case V_008958_DI_PT_LINESTRIP: 3097 case V_008958_DI_PT_LINELIST_ADJ: 3098 case V_008958_DI_PT_LINESTRIP_ADJ: 3099 return V_028A6C_LINESTRIP; 3100 case V_008958_DI_PT_TRILIST: 3101 case V_008958_DI_PT_TRISTRIP: 3102 case V_008958_DI_PT_TRIFAN: 3103 case V_008958_DI_PT_TRILIST_ADJ: 3104 case V_008958_DI_PT_TRISTRIP_ADJ: 3105 return V_028A6C_TRISTRIP; 3106 default: 3107 assert(0); 3108 return 0; 3109 } 3110} 3111 3112static inline uint32_t 3113si_translate_prim(unsigned topology) 3114{ 3115 switch (topology) { 3116 case VK_PRIMITIVE_TOPOLOGY_POINT_LIST: 3117 return V_008958_DI_PT_POINTLIST; 3118 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST: 3119 return V_008958_DI_PT_LINELIST; 3120 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP: 3121 return V_008958_DI_PT_LINESTRIP; 3122 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST: 3123 return V_008958_DI_PT_TRILIST; 3124 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP: 3125 return V_008958_DI_PT_TRISTRIP; 3126 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN: 3127 return V_008958_DI_PT_TRIFAN; 3128 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY: 3129 return V_008958_DI_PT_LINELIST_ADJ; 3130 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY: 3131 return V_008958_DI_PT_LINESTRIP_ADJ; 3132 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY: 3133 return V_008958_DI_PT_TRILIST_ADJ; 3134 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY: 3135 return V_008958_DI_PT_TRISTRIP_ADJ; 3136 case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST: 3137 return V_008958_DI_PT_PATCH; 3138 default: 3139 unreachable("unhandled primitive type"); 3140 } 3141} 3142 3143static inline bool 3144radv_prim_is_points_or_lines(unsigned topology) 3145{ 3146 switch (topology) { 3147 case V_008958_DI_PT_POINTLIST: 3148 case V_008958_DI_PT_LINELIST: 3149 case V_008958_DI_PT_LINESTRIP: 3150 case V_008958_DI_PT_LINELIST_ADJ: 3151 case V_008958_DI_PT_LINESTRIP_ADJ: 3152 return true; 3153 default: 3154 return false; 3155 } 3156} 3157 3158static inline bool 3159radv_rast_prim_is_point(unsigned rast_prim) 3160{ 3161 return rast_prim == V_028A6C_POINTLIST; 3162} 3163 3164static inline bool 3165radv_rast_prim_is_line(unsigned rast_prim) 3166{ 3167 return rast_prim == V_028A6C_LINESTRIP; 3168} 3169 3170static inline bool 3171radv_rast_prim_is_points_or_lines(unsigned rast_prim) 3172{ 3173 return radv_rast_prim_is_point(rast_prim) || radv_rast_prim_is_line(rast_prim); 3174} 3175 3176static inline uint32_t 3177si_translate_stencil_op(enum VkStencilOp op) 3178{ 3179 switch (op) { 3180 case VK_STENCIL_OP_KEEP: 3181 return V_02842C_STENCIL_KEEP; 3182 case VK_STENCIL_OP_ZERO: 3183 return V_02842C_STENCIL_ZERO; 3184 case VK_STENCIL_OP_REPLACE: 3185 return V_02842C_STENCIL_REPLACE_TEST; 3186 case VK_STENCIL_OP_INCREMENT_AND_CLAMP: 3187 return V_02842C_STENCIL_ADD_CLAMP; 3188 case VK_STENCIL_OP_DECREMENT_AND_CLAMP: 3189 return V_02842C_STENCIL_SUB_CLAMP; 3190 case VK_STENCIL_OP_INVERT: 3191 return V_02842C_STENCIL_INVERT; 3192 case VK_STENCIL_OP_INCREMENT_AND_WRAP: 3193 return V_02842C_STENCIL_ADD_WRAP; 3194 case VK_STENCIL_OP_DECREMENT_AND_WRAP: 3195 return V_02842C_STENCIL_SUB_WRAP; 3196 default: 3197 return 0; 3198 } 3199} 3200 3201static inline uint32_t 3202si_translate_blend_logic_op(VkLogicOp op) 3203{ 3204 switch (op) { 3205 case VK_LOGIC_OP_CLEAR: 3206 return V_028808_ROP3_CLEAR; 3207 case VK_LOGIC_OP_AND: 3208 return V_028808_ROP3_AND; 3209 case VK_LOGIC_OP_AND_REVERSE: 3210 return V_028808_ROP3_AND_REVERSE; 3211 case VK_LOGIC_OP_COPY: 3212 return V_028808_ROP3_COPY; 3213 case VK_LOGIC_OP_AND_INVERTED: 3214 return V_028808_ROP3_AND_INVERTED; 3215 case VK_LOGIC_OP_NO_OP: 3216 return V_028808_ROP3_NO_OP; 3217 case VK_LOGIC_OP_XOR: 3218 return V_028808_ROP3_XOR; 3219 case VK_LOGIC_OP_OR: 3220 return V_028808_ROP3_OR; 3221 case VK_LOGIC_OP_NOR: 3222 return V_028808_ROP3_NOR; 3223 case VK_LOGIC_OP_EQUIVALENT: 3224 return V_028808_ROP3_EQUIVALENT; 3225 case VK_LOGIC_OP_INVERT: 3226 return V_028808_ROP3_INVERT; 3227 case VK_LOGIC_OP_OR_REVERSE: 3228 return V_028808_ROP3_OR_REVERSE; 3229 case VK_LOGIC_OP_COPY_INVERTED: 3230 return V_028808_ROP3_COPY_INVERTED; 3231 case VK_LOGIC_OP_OR_INVERTED: 3232 return V_028808_ROP3_OR_INVERTED; 3233 case VK_LOGIC_OP_NAND: 3234 return V_028808_ROP3_NAND; 3235 case VK_LOGIC_OP_SET: 3236 return V_028808_ROP3_SET; 3237 default: 3238 unreachable("Unhandled logic op"); 3239 } 3240} 3241 3242/* 3243 * Queue helper to get ring. 3244 * placed here as it needs queue + device structs. 3245 */ 3246static inline enum amd_ip_type 3247radv_queue_ring(struct radv_queue *queue) 3248{ 3249 return radv_queue_family_to_ring(queue->device->physical_device, queue->state.qf); 3250} 3251 3252/** 3253 * Helper used for debugging compiler issues by enabling/disabling LLVM for a 3254 * specific shader stage (developers only). 3255 */ 3256static inline bool 3257radv_use_llvm_for_stage(struct radv_device *device, UNUSED gl_shader_stage stage) 3258{ 3259 return device->physical_device->use_llvm; 3260} 3261 3262static inline bool 3263radv_has_shader_buffer_float_minmax(const struct radv_physical_device *pdevice) 3264{ 3265 return (pdevice->rad_info.gfx_level <= GFX7 && !pdevice->use_llvm) || 3266 pdevice->rad_info.gfx_level >= GFX10; 3267} 3268 3269struct radv_acceleration_structure { 3270 struct vk_object_base base; 3271 3272 struct radeon_winsys_bo *bo; 3273 uint64_t mem_offset; 3274 uint64_t size; 3275}; 3276 3277static inline uint64_t 3278radv_accel_struct_get_va(const struct radv_acceleration_structure *accel) 3279{ 3280 return radv_buffer_get_va(accel->bo) + accel->mem_offset; 3281} 3282 3283/* radv_perfcounter.c */ 3284void radv_perfcounter_emit_shaders(struct radeon_cmdbuf *cs, unsigned shaders); 3285void radv_perfcounter_emit_spm_reset(struct radeon_cmdbuf *cs); 3286void radv_perfcounter_emit_spm_start(struct radv_device *device, struct radeon_cmdbuf *cs, 3287 int family); 3288void radv_perfcounter_emit_spm_stop(struct radv_device *device, struct radeon_cmdbuf *cs, 3289 int family); 3290 3291/* radv_spm.c */ 3292bool radv_spm_init(struct radv_device *device); 3293void radv_spm_finish(struct radv_device *device); 3294void radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs); 3295 3296#define RADV_FROM_HANDLE(__radv_type, __name, __handle) \ 3297 VK_FROM_HANDLE(__radv_type, __name, __handle) 3298 3299VK_DEFINE_HANDLE_CASTS(radv_cmd_buffer, vk.base, VkCommandBuffer, 3300 VK_OBJECT_TYPE_COMMAND_BUFFER) 3301VK_DEFINE_HANDLE_CASTS(radv_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE) 3302VK_DEFINE_HANDLE_CASTS(radv_instance, vk.base, VkInstance, VK_OBJECT_TYPE_INSTANCE) 3303VK_DEFINE_HANDLE_CASTS(radv_physical_device, vk.base, VkPhysicalDevice, 3304 VK_OBJECT_TYPE_PHYSICAL_DEVICE) 3305VK_DEFINE_HANDLE_CASTS(radv_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE) 3306VK_DEFINE_NONDISP_HANDLE_CASTS(radv_acceleration_structure, base, 3307 VkAccelerationStructureKHR, 3308 VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR) 3309VK_DEFINE_NONDISP_HANDLE_CASTS(radv_cmd_pool, vk.base, VkCommandPool, 3310 VK_OBJECT_TYPE_COMMAND_POOL) 3311VK_DEFINE_NONDISP_HANDLE_CASTS(radv_buffer, vk.base, VkBuffer, VK_OBJECT_TYPE_BUFFER) 3312VK_DEFINE_NONDISP_HANDLE_CASTS(radv_buffer_view, base, VkBufferView, 3313 VK_OBJECT_TYPE_BUFFER_VIEW) 3314VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_pool, base, VkDescriptorPool, 3315 VK_OBJECT_TYPE_DESCRIPTOR_POOL) 3316VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_set, header.base, VkDescriptorSet, 3317 VK_OBJECT_TYPE_DESCRIPTOR_SET) 3318VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_set_layout, vk.base, VkDescriptorSetLayout, 3319 VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT) 3320VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_update_template, base, 3321 VkDescriptorUpdateTemplate, 3322 VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE) 3323VK_DEFINE_NONDISP_HANDLE_CASTS(radv_device_memory, base, VkDeviceMemory, 3324 VK_OBJECT_TYPE_DEVICE_MEMORY) 3325VK_DEFINE_NONDISP_HANDLE_CASTS(radv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT) 3326VK_DEFINE_NONDISP_HANDLE_CASTS(radv_image, vk.base, VkImage, VK_OBJECT_TYPE_IMAGE) 3327VK_DEFINE_NONDISP_HANDLE_CASTS(radv_image_view, vk.base, VkImageView, 3328 VK_OBJECT_TYPE_IMAGE_VIEW); 3329VK_DEFINE_NONDISP_HANDLE_CASTS(radv_indirect_command_layout, base, VkIndirectCommandsLayoutNV, 3330 VK_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NV) 3331VK_DEFINE_NONDISP_HANDLE_CASTS(radv_pipeline_cache, base, VkPipelineCache, 3332 VK_OBJECT_TYPE_PIPELINE_CACHE) 3333VK_DEFINE_NONDISP_HANDLE_CASTS(radv_pipeline, base, VkPipeline, 3334 VK_OBJECT_TYPE_PIPELINE) 3335VK_DEFINE_NONDISP_HANDLE_CASTS(radv_pipeline_layout, base, VkPipelineLayout, 3336 VK_OBJECT_TYPE_PIPELINE_LAYOUT) 3337VK_DEFINE_NONDISP_HANDLE_CASTS(radv_query_pool, base, VkQueryPool, 3338 VK_OBJECT_TYPE_QUERY_POOL) 3339VK_DEFINE_NONDISP_HANDLE_CASTS(radv_render_pass, base, VkRenderPass, 3340 VK_OBJECT_TYPE_RENDER_PASS) 3341VK_DEFINE_NONDISP_HANDLE_CASTS(radv_sampler, base, VkSampler, 3342 VK_OBJECT_TYPE_SAMPLER) 3343VK_DEFINE_NONDISP_HANDLE_CASTS(radv_sampler_ycbcr_conversion, base, 3344 VkSamplerYcbcrConversion, 3345 VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION) 3346 3347#ifdef __cplusplus 3348} 3349#endif 3350 3351#endif /* RADV_PRIVATE_H */ 3352