1/* 2 * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> 3 * Copyright 2010 Marek Olšák <maraeo@gmail.com> 4 * Copyright 2018 Advanced Micro Devices, Inc. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the "Software"), 9 * to deal in the Software without restriction, including without limitation 10 * on the rights to use, copy, modify, merge, publish, distribute, sub 11 * license, and/or sell copies of the Software, and to permit persons to whom 12 * the Software is furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the next 15 * paragraph) shall be included in all copies or substantial portions of the 16 * Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 24 * USE OR OTHER DEALINGS IN THE SOFTWARE. */ 25 26#ifndef RADEON_WINSYS_H 27#define RADEON_WINSYS_H 28 29/* The public winsys interface header for the radeon driver. */ 30 31/* Skip command submission. Same as RADEON_NOOP=1. */ 32#define RADEON_FLUSH_NOOP (1u << 29) 33 34/* Toggle the secure submission boolean after the flush */ 35#define RADEON_FLUSH_TOGGLE_SECURE_SUBMISSION (1u << 30) 36 37/* Whether the next IB can start immediately and not wait for draws and 38 * dispatches from the current IB to finish. */ 39#define RADEON_FLUSH_START_NEXT_GFX_IB_NOW (1u << 31) 40 41#define RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW \ 42 (PIPE_FLUSH_ASYNC | RADEON_FLUSH_START_NEXT_GFX_IB_NOW) 43 44#include "amd/common/ac_gpu_info.h" 45#include "amd/common/ac_surface.h" 46#include "pipebuffer/pb_buffer.h" 47 48/* Tiling flags. */ 49enum radeon_bo_layout 50{ 51 RADEON_LAYOUT_LINEAR = 0, 52 RADEON_LAYOUT_TILED, 53 RADEON_LAYOUT_SQUARETILED, 54 55 RADEON_LAYOUT_UNKNOWN 56}; 57 58enum radeon_bo_domain 59{ /* bitfield */ 60 RADEON_DOMAIN_GTT = 2, 61 RADEON_DOMAIN_VRAM = 4, 62 RADEON_DOMAIN_VRAM_GTT = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT, 63 RADEON_DOMAIN_GDS = 8, 64 RADEON_DOMAIN_OA = 16, 65}; 66 67enum radeon_bo_flag 68{ /* bitfield */ 69 RADEON_FLAG_GTT_WC = (1 << 0), 70 RADEON_FLAG_NO_CPU_ACCESS = (1 << 1), 71 RADEON_FLAG_NO_SUBALLOC = (1 << 2), 72 RADEON_FLAG_SPARSE = (1 << 3), 73 RADEON_FLAG_NO_INTERPROCESS_SHARING = (1 << 4), 74 RADEON_FLAG_READ_ONLY = (1 << 5), 75 RADEON_FLAG_32BIT = (1 << 6), 76 RADEON_FLAG_ENCRYPTED = (1 << 7), 77 RADEON_FLAG_GL2_BYPASS = (1 << 8), /* only gfx9 and newer */ 78 RADEON_FLAG_DRIVER_INTERNAL = (1 << 9), 79 /* Discard on eviction (instead of moving the buffer to GTT). 80 * This guarantees that this buffer will never be moved to GTT. 81 */ 82 RADEON_FLAG_DISCARDABLE = (1 << 10), 83 RADEON_FLAG_MALL_NOALLOC = (1 << 11), /* don't cache in the infinity cache */ 84}; 85 86enum radeon_map_flags 87{ 88 /* Indicates that the caller will unmap the buffer. 89 * 90 * Not unmapping buffers is an important performance optimization for 91 * OpenGL (avoids kernel overhead for frequently mapped buffers). 92 */ 93 RADEON_MAP_TEMPORARY = (PIPE_MAP_DRV_PRV << 0), 94}; 95 96#define RADEON_SPARSE_PAGE_SIZE (64 * 1024) 97 98enum radeon_value_id 99{ 100 RADEON_REQUESTED_VRAM_MEMORY, 101 RADEON_REQUESTED_GTT_MEMORY, 102 RADEON_MAPPED_VRAM, 103 RADEON_MAPPED_GTT, 104 RADEON_SLAB_WASTED_VRAM, 105 RADEON_SLAB_WASTED_GTT, 106 RADEON_BUFFER_WAIT_TIME_NS, 107 RADEON_NUM_MAPPED_BUFFERS, 108 RADEON_TIMESTAMP, 109 RADEON_NUM_GFX_IBS, 110 RADEON_NUM_SDMA_IBS, 111 RADEON_GFX_BO_LIST_COUNTER, /* number of BOs submitted in gfx IBs */ 112 RADEON_GFX_IB_SIZE_COUNTER, 113 RADEON_NUM_BYTES_MOVED, 114 RADEON_NUM_EVICTIONS, 115 RADEON_NUM_VRAM_CPU_PAGE_FAULTS, 116 RADEON_VRAM_USAGE, 117 RADEON_VRAM_VIS_USAGE, 118 RADEON_GTT_USAGE, 119 RADEON_GPU_TEMPERATURE, 120 RADEON_CURRENT_SCLK, 121 RADEON_CURRENT_MCLK, 122 RADEON_CS_THREAD_TIME, 123}; 124 125enum radeon_ctx_priority 126{ 127 RADEON_CTX_PRIORITY_LOW = 0, 128 RADEON_CTX_PRIORITY_MEDIUM, 129 RADEON_CTX_PRIORITY_HIGH, 130 RADEON_CTX_PRIORITY_REALTIME, 131}; 132 133/* Each group of two has the same priority. */ 134#define RADEON_PRIO_FENCE_TRACE (1 << 0) 135#define RADEON_PRIO_SO_FILLED_SIZE (1 << 1) 136 137#define RADEON_PRIO_QUERY (1 << 2) 138#define RADEON_PRIO_IB (1 << 3) 139 140#define RADEON_PRIO_DRAW_INDIRECT (1 << 4) 141#define RADEON_PRIO_INDEX_BUFFER (1 << 5) 142 143#define RADEON_PRIO_CP_DMA (1 << 6) 144#define RADEON_PRIO_BORDER_COLORS (1 << 7) 145 146#define RADEON_PRIO_CONST_BUFFER (1 << 8) 147#define RADEON_PRIO_DESCRIPTORS (1 << 9) 148 149#define RADEON_PRIO_SAMPLER_BUFFER (1 << 10) 150#define RADEON_PRIO_VERTEX_BUFFER (1 << 11) 151 152#define RADEON_PRIO_SHADER_RW_BUFFER (1 << 12) 153#define RADEON_PRIO_SAMPLER_TEXTURE (1 << 13) 154 155#define RADEON_PRIO_SHADER_RW_IMAGE (1 << 14) 156#define RADEON_PRIO_SAMPLER_TEXTURE_MSAA (1 << 15) 157 158#define RADEON_PRIO_COLOR_BUFFER (1 << 16) 159#define RADEON_PRIO_DEPTH_BUFFER (1 << 17) 160 161#define RADEON_PRIO_COLOR_BUFFER_MSAA (1 << 18) 162#define RADEON_PRIO_DEPTH_BUFFER_MSAA (1 << 19) 163 164#define RADEON_PRIO_SEPARATE_META (1 << 20) 165#define RADEON_PRIO_SHADER_BINARY (1 << 21) /* the hw can't hide instruction cache misses */ 166 167#define RADEON_PRIO_SHADER_RINGS (1 << 22) 168#define RADEON_PRIO_SCRATCH_BUFFER (1 << 23) 169 170#define RADEON_ALL_PRIORITIES (RADEON_USAGE_READ - 1) 171 172/* Upper bits of priorities are used by usage flags. */ 173#define RADEON_USAGE_READ (1 << 28) 174#define RADEON_USAGE_WRITE (1 << 29) 175#define RADEON_USAGE_READWRITE (RADEON_USAGE_READ | RADEON_USAGE_WRITE) 176 177/* The winsys ensures that the CS submission will be scheduled after 178 * previously flushed CSs referencing this BO in a conflicting way. 179 */ 180#define RADEON_USAGE_SYNCHRONIZED (1 << 30) 181 182/* When used, an implicit sync is done to make sure a compute shader 183 * will read the written values from a previous draw. 184 */ 185#define RADEON_USAGE_NEEDS_IMPLICIT_SYNC (1u << 31) 186 187struct winsys_handle; 188struct radeon_winsys_ctx; 189 190struct radeon_cmdbuf_chunk { 191 unsigned cdw; /* Number of used dwords. */ 192 unsigned max_dw; /* Maximum number of dwords. */ 193 uint32_t *buf; /* The base pointer of the chunk. */ 194}; 195 196struct radeon_cmdbuf { 197 struct radeon_cmdbuf_chunk current; 198 struct radeon_cmdbuf_chunk *prev; 199 uint16_t num_prev; /* Number of previous chunks. */ 200 uint16_t max_prev; /* Space in array pointed to by prev. */ 201 unsigned prev_dw; /* Total number of dwords in previous chunks. */ 202 203 /* Memory usage of the buffer list. These are always 0 for preamble IBs. */ 204 uint32_t used_vram_kb; 205 uint32_t used_gart_kb; 206 uint64_t gpu_address; 207 208 /* Private winsys data. */ 209 void *priv; 210 void *csc; /* amdgpu_cs_context */ 211}; 212 213/* Tiling info for display code, DRI sharing, and other data. */ 214struct radeon_bo_metadata { 215 /* Tiling flags describing the texture layout for display code 216 * and DRI sharing. 217 */ 218 union { 219 struct { 220 enum radeon_bo_layout microtile; 221 enum radeon_bo_layout macrotile; 222 unsigned pipe_config; 223 unsigned bankw; 224 unsigned bankh; 225 unsigned tile_split; 226 unsigned mtilea; 227 unsigned num_banks; 228 unsigned stride; 229 bool scanout; 230 } legacy; 231 } u; 232 233 enum radeon_surf_mode mode; /* Output from buffer_get_metadata */ 234 235 /* Additional metadata associated with the buffer, in bytes. 236 * The maximum size is 64 * 4. This is opaque for the winsys & kernel. 237 * Supported by amdgpu only. 238 */ 239 uint32_t size_metadata; 240 uint32_t metadata[64]; 241}; 242 243enum radeon_feature_id 244{ 245 RADEON_FID_R300_HYPERZ_ACCESS, /* ZMask + HiZ */ 246 RADEON_FID_R300_CMASK_ACCESS, 247}; 248 249struct radeon_bo_list_item { 250 uint64_t bo_size; 251 uint64_t vm_address; 252 uint32_t priority_usage; /* mask of (1 << RADEON_PRIO_*) */ 253}; 254 255struct radeon_winsys { 256 /** 257 * The screen object this winsys was created for 258 */ 259 struct pipe_screen *screen; 260 /** 261 * Has the application created at least one TMZ buffer. 262 */ 263 const bool uses_secure_bos; 264 265 /** 266 * Decrement the winsys reference count. 267 * 268 * \param ws The winsys this function is called for. 269 * \return True if the winsys and screen should be destroyed. 270 */ 271 bool (*unref)(struct radeon_winsys *ws); 272 273 /** 274 * Destroy this winsys. 275 * 276 * \param ws The winsys this function is called from. 277 */ 278 void (*destroy)(struct radeon_winsys *ws); 279 280 /** 281 * Query an info structure from winsys. 282 * 283 * \param ws The winsys this function is called from. 284 * \param info Return structure 285 */ 286 void (*query_info)(struct radeon_winsys *ws, struct radeon_info *info, 287 bool enable_smart_access_memory, 288 bool disable_smart_access_memory); 289 290 /** 291 * A hint for the winsys that it should pin its execution threads to 292 * a group of cores sharing a specific L3 cache if the CPU has multiple 293 * L3 caches. This is needed for good multithreading performance on 294 * AMD Zen CPUs. 295 */ 296 void (*pin_threads_to_L3_cache)(struct radeon_winsys *ws, unsigned cache); 297 298 /************************************************************************** 299 * Buffer management. Buffer attributes are mostly fixed over its lifetime. 300 * 301 * Remember that gallium gets to choose the interface it needs, and the 302 * window systems must then implement that interface (rather than the 303 * other way around...). 304 *************************************************************************/ 305 306 /** 307 * Create a buffer object. 308 * 309 * \param ws The winsys this function is called from. 310 * \param size The size to allocate. 311 * \param alignment An alignment of the buffer in memory. 312 * \param use_reusable_pool Whether the cache buffer manager should be used. 313 * \param domain A bitmask of the RADEON_DOMAIN_* flags. 314 * \return The created buffer object. 315 */ 316 struct pb_buffer *(*buffer_create)(struct radeon_winsys *ws, uint64_t size, unsigned alignment, 317 enum radeon_bo_domain domain, enum radeon_bo_flag flags); 318 319 /** 320 * Map the entire data store of a buffer object into the client's address 321 * space. 322 * 323 * Callers are expected to unmap buffers again if and only if the 324 * RADEON_MAP_TEMPORARY flag is set in \p usage. 325 * 326 * \param buf A winsys buffer object to map. 327 * \param cs A command stream to flush if the buffer is referenced by it. 328 * \param usage A bitmask of the PIPE_MAP_* and RADEON_MAP_* flags. 329 * \return The pointer at the beginning of the buffer. 330 */ 331 void *(*buffer_map)(struct radeon_winsys *ws, struct pb_buffer *buf, 332 struct radeon_cmdbuf *cs, enum pipe_map_flags usage); 333 334 /** 335 * Unmap a buffer object from the client's address space. 336 * 337 * \param buf A winsys buffer object to unmap. 338 */ 339 void (*buffer_unmap)(struct radeon_winsys *ws, struct pb_buffer *buf); 340 341 /** 342 * Wait for the buffer and return true if the buffer is not used 343 * by the device. 344 * 345 * The timeout of 0 will only return the status. 346 * The timeout of PIPE_TIMEOUT_INFINITE will always wait until the buffer 347 * is idle. 348 */ 349 bool (*buffer_wait)(struct radeon_winsys *ws, struct pb_buffer *buf, 350 uint64_t timeout, unsigned usage); 351 352 /** 353 * Return buffer metadata. 354 * (tiling info for display code, DRI sharing, and other data) 355 * 356 * \param buf A winsys buffer object to get the flags from. 357 * \param md Metadata 358 */ 359 void (*buffer_get_metadata)(struct radeon_winsys *ws, struct pb_buffer *buf, 360 struct radeon_bo_metadata *md, struct radeon_surf *surf); 361 362 /** 363 * Set buffer metadata. 364 * (tiling info for display code, DRI sharing, and other data) 365 * 366 * \param buf A winsys buffer object to set the flags for. 367 * \param md Metadata 368 */ 369 void (*buffer_set_metadata)(struct radeon_winsys *ws, struct pb_buffer *buf, 370 struct radeon_bo_metadata *md, struct radeon_surf *surf); 371 372 /** 373 * Get a winsys buffer from a winsys handle. The internal structure 374 * of the handle is platform-specific and only a winsys should access it. 375 * 376 * \param ws The winsys this function is called from. 377 * \param whandle A winsys handle pointer as was received from a state 378 * tracker. 379 */ 380 struct pb_buffer *(*buffer_from_handle)(struct radeon_winsys *ws, struct winsys_handle *whandle, 381 unsigned vm_alignment, bool is_prime_linear_buffer); 382 383 /** 384 * Get a winsys buffer from a user pointer. The resulting buffer can't 385 * be exported. Both pointer and size must be page aligned. 386 * 387 * \param ws The winsys this function is called from. 388 * \param pointer User pointer to turn into a buffer object. 389 * \param Size Size in bytes for the new buffer. 390 */ 391 struct pb_buffer *(*buffer_from_ptr)(struct radeon_winsys *ws, void *pointer, uint64_t size, enum radeon_bo_flag flags); 392 393 /** 394 * Whether the buffer was created from a user pointer. 395 * 396 * \param buf A winsys buffer object 397 * \return whether \p buf was created via buffer_from_ptr 398 */ 399 bool (*buffer_is_user_ptr)(struct pb_buffer *buf); 400 401 /** Whether the buffer was suballocated. */ 402 bool (*buffer_is_suballocated)(struct pb_buffer *buf); 403 404 /** 405 * Get a winsys handle from a winsys buffer. The internal structure 406 * of the handle is platform-specific and only a winsys should access it. 407 * 408 * \param ws The winsys instance for which the handle is to be valid 409 * \param buf A winsys buffer object to get the handle from. 410 * \param whandle A winsys handle pointer. 411 * \return true on success. 412 */ 413 bool (*buffer_get_handle)(struct radeon_winsys *ws, struct pb_buffer *buf, 414 struct winsys_handle *whandle); 415 416 /** 417 * Change the commitment of a (64KB-page aligned) region of the given 418 * sparse buffer. 419 * 420 * \warning There is no automatic synchronization with command submission. 421 * 422 * \note Only implemented by the amdgpu winsys. 423 * 424 * \return false on out of memory or other failure, true on success. 425 */ 426 bool (*buffer_commit)(struct radeon_winsys *ws, struct pb_buffer *buf, 427 uint64_t offset, uint64_t size, bool commit); 428 429 /** 430 * Return the virtual address of a buffer. 431 * 432 * When virtual memory is not in use, this is the offset relative to the 433 * relocation base (non-zero for sub-allocated buffers). 434 * 435 * \param buf A winsys buffer object 436 * \return virtual address 437 */ 438 uint64_t (*buffer_get_virtual_address)(struct pb_buffer *buf); 439 440 /** 441 * Return the offset of this buffer relative to the relocation base. 442 * This is only non-zero for sub-allocated buffers. 443 * 444 * This is only supported in the radeon winsys, since amdgpu uses virtual 445 * addresses in submissions even for the video engines. 446 * 447 * \param buf A winsys buffer object 448 * \return the offset for relocations 449 */ 450 unsigned (*buffer_get_reloc_offset)(struct pb_buffer *buf); 451 452 /** 453 * Query the initial placement of the buffer from the kernel driver. 454 */ 455 enum radeon_bo_domain (*buffer_get_initial_domain)(struct pb_buffer *buf); 456 457 /** 458 * Query the flags used for creation of this buffer. 459 * 460 * Note that for imported buffer this may be lossy since not all flags 461 * are passed 1:1. 462 */ 463 enum radeon_bo_flag (*buffer_get_flags)(struct pb_buffer *buf); 464 465 /************************************************************************** 466 * Command submission. 467 * 468 * Each pipe context should create its own command stream and submit 469 * commands independently of other contexts. 470 *************************************************************************/ 471 472 /** 473 * Create a command submission context. 474 * Various command streams can be submitted to the same context. 475 */ 476 struct radeon_winsys_ctx *(*ctx_create)(struct radeon_winsys *ws, 477 enum radeon_ctx_priority priority); 478 479 /** 480 * Destroy a context. 481 */ 482 void (*ctx_destroy)(struct radeon_winsys_ctx *ctx); 483 484 /** 485 * Query a GPU reset status. 486 */ 487 enum pipe_reset_status (*ctx_query_reset_status)(struct radeon_winsys_ctx *ctx, 488 bool full_reset_only, 489 bool *needs_reset); 490 491 /** 492 * Create a command stream. 493 * 494 * \param cs The returned structure that is initialized by cs_create. 495 * \param ctx The submission context 496 * \param ip_type The IP type (GFX, DMA, UVD) 497 * \param flush Flush callback function associated with the command stream. 498 * \param user User pointer that will be passed to the flush callback. 499 * 500 * \return true on success 501 */ 502 bool (*cs_create)(struct radeon_cmdbuf *cs, 503 struct radeon_winsys_ctx *ctx, enum amd_ip_type amd_ip_type, 504 void (*flush)(void *ctx, unsigned flags, 505 struct pipe_fence_handle **fence), 506 void *flush_ctx, bool stop_exec_on_failure); 507 508 /** 509 * Set or change the CS preamble, which is a sequence of packets that is executed before 510 * the command buffer. If the winsys doesn't support preambles, the packets are inserted 511 * into the command buffer. 512 * 513 * \param cs Command stream 514 * \param preamble_ib Preamble IB for the context. 515 * \param preamble_num_dw Number of dwords in the preamble IB. 516 * \param preamble_changed Whether the preamble changed or is the same as the last one. 517 */ 518 void (*cs_set_preamble)(struct radeon_cmdbuf *cs, const uint32_t *preamble_ib, 519 unsigned preamble_num_dw, bool preamble_changed); 520 521 /** 522 * Set up and enable mid command buffer preemption for the command stream. 523 * 524 * \param cs Command stream 525 * \param preamble_ib Non-preemptible preamble IB for the context. 526 * \param preamble_num_dw Number of dwords in the preamble IB. 527 */ 528 bool (*cs_setup_preemption)(struct radeon_cmdbuf *cs, const uint32_t *preamble_ib, 529 unsigned preamble_num_dw); 530 531 /** 532 * Destroy a command stream. 533 * 534 * \param cs A command stream to destroy. 535 */ 536 void (*cs_destroy)(struct radeon_cmdbuf *cs); 537 538 /** 539 * Add a buffer. Each buffer used by a CS must be added using this function. 540 * 541 * \param cs Command stream 542 * \param buf Buffer 543 * \param usage Usage 544 * \param domain Bitmask of the RADEON_DOMAIN_* flags. 545 * \return Buffer index. 546 */ 547 unsigned (*cs_add_buffer)(struct radeon_cmdbuf *cs, struct pb_buffer *buf, 548 unsigned usage, enum radeon_bo_domain domain); 549 550 /** 551 * Return the index of an already-added buffer. 552 * 553 * Not supported on amdgpu. Drivers with GPUVM should not care about 554 * buffer indices. 555 * 556 * \param cs Command stream 557 * \param buf Buffer 558 * \return The buffer index, or -1 if the buffer has not been added. 559 */ 560 int (*cs_lookup_buffer)(struct radeon_cmdbuf *cs, struct pb_buffer *buf); 561 562 /** 563 * Return true if there is enough memory in VRAM and GTT for the buffers 564 * added so far. If the validation fails, all buffers which have 565 * been added since the last call of cs_validate will be removed and 566 * the CS will be flushed (provided there are still any buffers). 567 * 568 * \param cs A command stream to validate. 569 */ 570 bool (*cs_validate)(struct radeon_cmdbuf *cs); 571 572 /** 573 * Check whether the given number of dwords is available in the IB. 574 * Optionally chain a new chunk of the IB if necessary and supported. 575 * 576 * \param cs A command stream. 577 * \param dw Number of CS dwords requested by the caller. 578 * \return true if there is enough space 579 */ 580 bool (*cs_check_space)(struct radeon_cmdbuf *cs, unsigned dw); 581 582 /** 583 * Return the buffer list. 584 * 585 * This is the buffer list as passed to the kernel, i.e. it only contains 586 * the parent buffers of sub-allocated buffers. 587 * 588 * \param cs Command stream 589 * \param list Returned buffer list. Set to NULL to query the count only. 590 * \return The buffer count. 591 */ 592 unsigned (*cs_get_buffer_list)(struct radeon_cmdbuf *cs, struct radeon_bo_list_item *list); 593 594 /** 595 * Flush a command stream. 596 * 597 * \param cs A command stream to flush. 598 * \param flags, PIPE_FLUSH_* flags. 599 * \param fence Pointer to a fence. If non-NULL, a fence is inserted 600 * after the CS and is returned through this parameter. 601 * \return Negative POSIX error code or 0 for success. 602 * Asynchronous submissions never return an error. 603 */ 604 int (*cs_flush)(struct radeon_cmdbuf *cs, unsigned flags, struct pipe_fence_handle **fence); 605 606 /** 607 * Create a fence before the CS is flushed. 608 * The user must flush manually to complete the initializaton of the fence. 609 * 610 * The fence must not be used for anything except \ref cs_add_fence_dependency 611 * before the flush. 612 */ 613 struct pipe_fence_handle *(*cs_get_next_fence)(struct radeon_cmdbuf *cs); 614 615 /** 616 * Return true if a buffer is referenced by a command stream. 617 * 618 * \param cs A command stream. 619 * \param buf A winsys buffer. 620 */ 621 bool (*cs_is_buffer_referenced)(struct radeon_cmdbuf *cs, struct pb_buffer *buf, 622 unsigned usage); 623 624 /** 625 * Request access to a feature for a command stream. 626 * 627 * \param cs A command stream. 628 * \param fid Feature ID, one of RADEON_FID_* 629 * \param enable Whether to enable or disable the feature. 630 */ 631 bool (*cs_request_feature)(struct radeon_cmdbuf *cs, enum radeon_feature_id fid, bool enable); 632 /** 633 * Make sure all asynchronous flush of the cs have completed 634 * 635 * \param cs A command stream. 636 */ 637 void (*cs_sync_flush)(struct radeon_cmdbuf *cs); 638 639 /** 640 * Add a fence dependency to the CS, so that the CS will wait for 641 * the fence before execution. 642 * 643 * \param dependency_flags Bitmask of RADEON_DEPENDENCY_* 644 */ 645 void (*cs_add_fence_dependency)(struct radeon_cmdbuf *cs, struct pipe_fence_handle *fence, 646 unsigned dependency_flags); 647 648 /** 649 * Signal a syncobj when the CS finishes execution. 650 */ 651 void (*cs_add_syncobj_signal)(struct radeon_cmdbuf *cs, struct pipe_fence_handle *fence); 652 653 /** 654 * Wait for the fence and return true if the fence has been signalled. 655 * The timeout of 0 will only return the status. 656 * The timeout of PIPE_TIMEOUT_INFINITE will always wait until the fence 657 * is signalled. 658 */ 659 bool (*fence_wait)(struct radeon_winsys *ws, struct pipe_fence_handle *fence, uint64_t timeout); 660 661 /** 662 * Reference counting for fences. 663 */ 664 void (*fence_reference)(struct pipe_fence_handle **dst, struct pipe_fence_handle *src); 665 666 /** 667 * Create a new fence object corresponding to the given syncobj fd. 668 */ 669 struct pipe_fence_handle *(*fence_import_syncobj)(struct radeon_winsys *ws, int fd); 670 671 /** 672 * Create a new fence object corresponding to the given sync_file. 673 */ 674 struct pipe_fence_handle *(*fence_import_sync_file)(struct radeon_winsys *ws, int fd); 675 676 /** 677 * Return a sync_file FD corresponding to the given fence object. 678 */ 679 int (*fence_export_sync_file)(struct radeon_winsys *ws, struct pipe_fence_handle *fence); 680 681 /** 682 * Return a sync file FD that is already signalled. 683 */ 684 int (*export_signalled_sync_file)(struct radeon_winsys *ws); 685 686 /** 687 * Initialize surface 688 * 689 * \param ws The winsys this function is called from. 690 * \param tex Input texture description 691 * \param flags Bitmask of RADEON_SURF_* flags 692 * \param bpe Bytes per pixel, it can be different for Z buffers. 693 * \param mode Preferred tile mode. (linear, 1D, or 2D) 694 * \param surf Output structure 695 */ 696 int (*surface_init)(struct radeon_winsys *ws, const struct pipe_resource *tex, uint64_t flags, 697 unsigned bpe, enum radeon_surf_mode mode, struct radeon_surf *surf); 698 699 uint64_t (*query_value)(struct radeon_winsys *ws, enum radeon_value_id value); 700 701 bool (*read_registers)(struct radeon_winsys *ws, unsigned reg_offset, unsigned num_registers, 702 uint32_t *out); 703 704 /** 705 * Secure context 706 */ 707 bool (*cs_is_secure)(struct radeon_cmdbuf *cs); 708}; 709 710static inline bool radeon_emitted(struct radeon_cmdbuf *cs, unsigned num_dw) 711{ 712 return cs && (cs->prev_dw + cs->current.cdw > num_dw); 713} 714 715static inline void radeon_emit(struct radeon_cmdbuf *cs, uint32_t value) 716{ 717 cs->current.buf[cs->current.cdw++] = value; 718} 719 720static inline void radeon_emit_array(struct radeon_cmdbuf *cs, const uint32_t *values, 721 unsigned count) 722{ 723 memcpy(cs->current.buf + cs->current.cdw, values, count * 4); 724 cs->current.cdw += count; 725} 726 727static inline bool radeon_uses_secure_bos(struct radeon_winsys* ws) 728{ 729 return ws->uses_secure_bos; 730} 731 732static inline void 733radeon_bo_reference(struct radeon_winsys *rws, struct pb_buffer **dst, struct pb_buffer *src) 734{ 735 pb_reference_with_winsys(rws, dst, src); 736} 737 738/* The following bits describe the heaps managed by slab allocators (pb_slab) and 739 * the allocation cache (pb_cache). 740 */ 741#define RADEON_HEAP_BIT_VRAM (1 << 0) /* if false, it's GTT */ 742#define RADEON_HEAP_BIT_READ_ONLY (1 << 1) /* both VRAM and GTT */ 743#define RADEON_HEAP_BIT_32BIT (1 << 2) /* both VRAM and GTT */ 744#define RADEON_HEAP_BIT_ENCRYPTED (1 << 3) /* both VRAM and GTT */ 745 746#define RADEON_HEAP_BIT_NO_CPU_ACCESS (1 << 4) /* VRAM only */ 747#define RADEON_HEAP_BIT_MALL_NOALLOC (1 << 5) /* VRAM only */ 748 749#define RADEON_HEAP_BIT_WC (1 << 4) /* GTT only, VRAM implies this to be true */ 750#define RADEON_HEAP_BIT_GL2_BYPASS (1 << 5) /* GTT only */ 751 752/* The number of all possible heap descriptions using the bits above. */ 753#define RADEON_NUM_HEAPS (1 << 6) 754 755static inline enum radeon_bo_domain radeon_domain_from_heap(int heap) 756{ 757 assert(heap >= 0); 758 759 if (heap & RADEON_HEAP_BIT_VRAM) 760 return RADEON_DOMAIN_VRAM; 761 else 762 return RADEON_DOMAIN_GTT; 763} 764 765static inline unsigned radeon_flags_from_heap(int heap) 766{ 767 assert(heap >= 0); 768 769 unsigned flags = RADEON_FLAG_NO_INTERPROCESS_SHARING; 770 771 if (heap & RADEON_HEAP_BIT_READ_ONLY) 772 flags |= RADEON_FLAG_READ_ONLY; 773 if (heap & RADEON_HEAP_BIT_32BIT) 774 flags |= RADEON_FLAG_32BIT; 775 if (heap & RADEON_HEAP_BIT_ENCRYPTED) 776 flags |= RADEON_FLAG_ENCRYPTED; 777 778 if (heap & RADEON_HEAP_BIT_VRAM) { 779 flags |= RADEON_FLAG_GTT_WC; 780 if (heap & RADEON_HEAP_BIT_NO_CPU_ACCESS) 781 flags |= RADEON_FLAG_NO_CPU_ACCESS; 782 if (heap & RADEON_HEAP_BIT_MALL_NOALLOC) 783 flags |= RADEON_FLAG_MALL_NOALLOC; 784 } else { 785 /* GTT only */ 786 if (heap & RADEON_HEAP_BIT_WC) 787 flags |= RADEON_FLAG_GTT_WC; 788 if (heap & RADEON_HEAP_BIT_GL2_BYPASS) 789 flags |= RADEON_FLAG_GL2_BYPASS; 790 } 791 792 return flags; 793} 794 795/* This cleans up flags, so that we can comfortably assume that no invalid flag combinations 796 * are set. 797 */ 798static void radeon_canonicalize_bo_flags(enum radeon_bo_domain *_domain, 799 enum radeon_bo_flag *_flags) 800{ 801 unsigned domain = *_domain; 802 unsigned flags = *_flags; 803 804 /* Only set 1 domain, e.g. ignore GTT if VRAM is set. */ 805 if (domain) 806 domain = BITFIELD_BIT(ffs(domain) - 1); 807 else 808 domain = RADEON_DOMAIN_VRAM; 809 810 switch (domain) { 811 case RADEON_DOMAIN_VRAM: 812 flags |= RADEON_FLAG_GTT_WC; 813 flags &= ~RADEON_FLAG_GL2_BYPASS; 814 break; 815 case RADEON_DOMAIN_GTT: 816 flags &= ~RADEON_FLAG_NO_CPU_ACCESS; 817 flags &= ~RADEON_FLAG_MALL_NOALLOC; 818 break; 819 case RADEON_DOMAIN_GDS: 820 case RADEON_DOMAIN_OA: 821 flags |= RADEON_FLAG_NO_SUBALLOC | RADEON_FLAG_NO_CPU_ACCESS; 822 flags &= ~RADEON_FLAG_SPARSE; 823 break; 824 } 825 826 /* Sparse buffers must have NO_CPU_ACCESS set. */ 827 if (flags & RADEON_FLAG_SPARSE) 828 flags |= RADEON_FLAG_NO_CPU_ACCESS; 829 830 *_domain = (enum radeon_bo_domain)domain; 831 *_flags = (enum radeon_bo_flag)flags; 832} 833 834/* Return the heap index for winsys allocators, or -1 on failure. */ 835static inline int radeon_get_heap_index(enum radeon_bo_domain domain, enum radeon_bo_flag flags) 836{ 837 radeon_canonicalize_bo_flags(&domain, &flags); 838 839 /* Resources with interprocess sharing don't use any winsys allocators. */ 840 if (!(flags & RADEON_FLAG_NO_INTERPROCESS_SHARING)) 841 return -1; 842 843 /* These are unsupported flags. */ 844 /* RADEON_FLAG_DRIVER_INTERNAL is ignored. It doesn't affect allocators. */ 845 if (flags & (RADEON_FLAG_NO_SUBALLOC | RADEON_FLAG_SPARSE | 846 RADEON_FLAG_DISCARDABLE)) 847 return -1; 848 849 int heap = 0; 850 851 if (flags & RADEON_FLAG_READ_ONLY) 852 heap |= RADEON_HEAP_BIT_READ_ONLY; 853 if (flags & RADEON_FLAG_32BIT) 854 heap |= RADEON_HEAP_BIT_32BIT; 855 if (flags & RADEON_FLAG_ENCRYPTED) 856 heap |= RADEON_HEAP_BIT_ENCRYPTED; 857 858 if (domain == RADEON_DOMAIN_VRAM) { 859 /* VRAM | GTT shouldn't occur, but if it does, ignore GTT. */ 860 heap |= RADEON_HEAP_BIT_VRAM; 861 if (flags & RADEON_FLAG_NO_CPU_ACCESS) 862 heap |= RADEON_HEAP_BIT_NO_CPU_ACCESS; 863 if (flags & RADEON_FLAG_MALL_NOALLOC) 864 heap |= RADEON_HEAP_BIT_MALL_NOALLOC; 865 /* RADEON_FLAG_WC is ignored and implied to be true for VRAM */ 866 /* RADEON_FLAG_GL2_BYPASS is ignored and implied to be false for VRAM */ 867 } else if (domain == RADEON_DOMAIN_GTT) { 868 /* GTT is implied by RADEON_HEAP_BIT_VRAM not being set. */ 869 if (flags & RADEON_FLAG_GTT_WC) 870 heap |= RADEON_HEAP_BIT_WC; 871 if (flags & RADEON_FLAG_GL2_BYPASS) 872 heap |= RADEON_HEAP_BIT_GL2_BYPASS; 873 /* RADEON_FLAG_NO_CPU_ACCESS is ignored and implied to be false for GTT */ 874 /* RADEON_FLAG_MALL_NOALLOC is ignored and implied to be false for GTT */ 875 } else { 876 return -1; /* */ 877 } 878 879 assert(heap < RADEON_NUM_HEAPS); 880 return heap; 881} 882 883typedef struct pipe_screen *(*radeon_screen_create_t)(struct radeon_winsys *, 884 const struct pipe_screen_config *); 885 886/* These functions create the radeon_winsys instance for the corresponding kernel driver. */ 887struct radeon_winsys * 888amdgpu_winsys_create(int fd, const struct pipe_screen_config *config, 889 radeon_screen_create_t screen_create); 890struct radeon_winsys * 891radeon_drm_winsys_create(int fd, const struct pipe_screen_config *config, 892 radeon_screen_create_t screen_create); 893 894#endif 895