1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2017 Advanced Micro Devices, Inc. 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining 5bf215546Sopenharmony_ci * a copy of this software and associated documentation files (the 6bf215546Sopenharmony_ci * "Software"), to deal in the Software without restriction, including 7bf215546Sopenharmony_ci * without limitation the rights to use, copy, modify, merge, publish, 8bf215546Sopenharmony_ci * distribute, sub license, and/or sell copies of the Software, and to 9bf215546Sopenharmony_ci * permit persons to whom the Software is furnished to do so, subject to 10bf215546Sopenharmony_ci * the following conditions: 11bf215546Sopenharmony_ci * 12bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 13bf215546Sopenharmony_ci * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 14bf215546Sopenharmony_ci * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 15bf215546Sopenharmony_ci * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS 16bf215546Sopenharmony_ci * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 18bf215546Sopenharmony_ci * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19bf215546Sopenharmony_ci * USE OR OTHER DEALINGS IN THE SOFTWARE. 20bf215546Sopenharmony_ci * 21bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the 22bf215546Sopenharmony_ci * next paragraph) shall be included in all copies or substantial portions 23bf215546Sopenharmony_ci * of the Software. 24bf215546Sopenharmony_ci */ 25bf215546Sopenharmony_ci 26bf215546Sopenharmony_ci#include "ac_gpu_info.h" 27bf215546Sopenharmony_ci#include "ac_shader_util.h" 28bf215546Sopenharmony_ci#include "ac_debug.h" 29bf215546Sopenharmony_ci 30bf215546Sopenharmony_ci#include "addrlib/src/amdgpu_asic_addr.h" 31bf215546Sopenharmony_ci#include "sid.h" 32bf215546Sopenharmony_ci#include "util/macros.h" 33bf215546Sopenharmony_ci#include "util/u_cpu_detect.h" 34bf215546Sopenharmony_ci#include "util/u_math.h" 35bf215546Sopenharmony_ci#include "util/os_misc.h" 36bf215546Sopenharmony_ci#include "util/bitset.h" 37bf215546Sopenharmony_ci 38bf215546Sopenharmony_ci#include <stdio.h> 39bf215546Sopenharmony_ci#include <ctype.h> 40bf215546Sopenharmony_ci 41bf215546Sopenharmony_ci#define AMDGPU_ARCTURUS_RANGE 0x32, 0x3C 42bf215546Sopenharmony_ci#define AMDGPU_ALDEBARAN_RANGE 0x3C, 0xFF 43bf215546Sopenharmony_ci 44bf215546Sopenharmony_ci#define ASICREV_IS_ARCTURUS(r) ASICREV_IS(r, ARCTURUS) 45bf215546Sopenharmony_ci#define ASICREV_IS_ALDEBARAN(r) ASICREV_IS(r, ALDEBARAN) 46bf215546Sopenharmony_ci 47bf215546Sopenharmony_ci#ifdef _WIN32 48bf215546Sopenharmony_ci#define DRM_CAP_ADDFB2_MODIFIERS 0x10 49bf215546Sopenharmony_ci#define DRM_CAP_SYNCOBJ 0x13 50bf215546Sopenharmony_ci#define DRM_CAP_SYNCOBJ_TIMELINE 0x14 51bf215546Sopenharmony_ci#define AMDGPU_GEM_DOMAIN_GTT 0x2 52bf215546Sopenharmony_ci#define AMDGPU_GEM_DOMAIN_VRAM 0x4 53bf215546Sopenharmony_ci#define AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED (1 << 0) 54bf215546Sopenharmony_ci#define AMDGPU_GEM_CREATE_ENCRYPTED (1 << 10) 55bf215546Sopenharmony_ci#define AMDGPU_HW_IP_GFX 0 56bf215546Sopenharmony_ci#define AMDGPU_HW_IP_COMPUTE 1 57bf215546Sopenharmony_ci#define AMDGPU_HW_IP_DMA 2 58bf215546Sopenharmony_ci#define AMDGPU_HW_IP_UVD 3 59bf215546Sopenharmony_ci#define AMDGPU_HW_IP_VCE 4 60bf215546Sopenharmony_ci#define AMDGPU_HW_IP_UVD_ENC 5 61bf215546Sopenharmony_ci#define AMDGPU_HW_IP_VCN_DEC 6 62bf215546Sopenharmony_ci#define AMDGPU_HW_IP_VCN_ENC 7 63bf215546Sopenharmony_ci#define AMDGPU_HW_IP_VCN_JPEG 8 64bf215546Sopenharmony_ci#define AMDGPU_IDS_FLAGS_FUSION 0x1 65bf215546Sopenharmony_ci#define AMDGPU_IDS_FLAGS_PREEMPTION 0x2 66bf215546Sopenharmony_ci#define AMDGPU_IDS_FLAGS_TMZ 0x4 67bf215546Sopenharmony_ci#define AMDGPU_INFO_FW_VCE 0x1 68bf215546Sopenharmony_ci#define AMDGPU_INFO_FW_UVD 0x2 69bf215546Sopenharmony_ci#define AMDGPU_INFO_FW_GFX_ME 0x04 70bf215546Sopenharmony_ci#define AMDGPU_INFO_FW_GFX_PFP 0x05 71bf215546Sopenharmony_ci#define AMDGPU_INFO_FW_GFX_CE 0x06 72bf215546Sopenharmony_ci#define AMDGPU_INFO_DEV_INFO 0x16 73bf215546Sopenharmony_ci#define AMDGPU_INFO_MEMORY 0x19 74bf215546Sopenharmony_ci#define AMDGPU_INFO_VIDEO_CAPS_DECODE 0 75bf215546Sopenharmony_ci#define AMDGPU_INFO_VIDEO_CAPS_ENCODE 1 76bf215546Sopenharmony_ci#define AMDGPU_INFO_FW_GFX_MEC 0x08 77bf215546Sopenharmony_ci 78bf215546Sopenharmony_ci#define AMDGPU_VRAM_TYPE_UNKNOWN 0 79bf215546Sopenharmony_ci#define AMDGPU_VRAM_TYPE_GDDR1 1 80bf215546Sopenharmony_ci#define AMDGPU_VRAM_TYPE_DDR2 2 81bf215546Sopenharmony_ci#define AMDGPU_VRAM_TYPE_GDDR3 3 82bf215546Sopenharmony_ci#define AMDGPU_VRAM_TYPE_GDDR4 4 83bf215546Sopenharmony_ci#define AMDGPU_VRAM_TYPE_GDDR5 5 84bf215546Sopenharmony_ci#define AMDGPU_VRAM_TYPE_HBM 6 85bf215546Sopenharmony_ci#define AMDGPU_VRAM_TYPE_DDR3 7 86bf215546Sopenharmony_ci#define AMDGPU_VRAM_TYPE_DDR4 8 87bf215546Sopenharmony_ci#define AMDGPU_VRAM_TYPE_GDDR6 9 88bf215546Sopenharmony_ci#define AMDGPU_VRAM_TYPE_DDR5 10 89bf215546Sopenharmony_ci 90bf215546Sopenharmony_cistruct drm_amdgpu_heap_info { 91bf215546Sopenharmony_ci uint64_t total_heap_size; 92bf215546Sopenharmony_ci}; 93bf215546Sopenharmony_cistruct drm_amdgpu_memory_info { 94bf215546Sopenharmony_ci struct drm_amdgpu_heap_info vram; 95bf215546Sopenharmony_ci struct drm_amdgpu_heap_info cpu_accessible_vram; 96bf215546Sopenharmony_ci struct drm_amdgpu_heap_info gtt; 97bf215546Sopenharmony_ci}; 98bf215546Sopenharmony_cistruct drm_amdgpu_info_device { 99bf215546Sopenharmony_ci /** PCI Device ID */ 100bf215546Sopenharmony_ci uint32_t device_id; 101bf215546Sopenharmony_ci /** Internal chip revision: A0, A1, etc.) */ 102bf215546Sopenharmony_ci uint32_t chip_rev; 103bf215546Sopenharmony_ci uint32_t external_rev; 104bf215546Sopenharmony_ci /** Revision id in PCI Config space */ 105bf215546Sopenharmony_ci uint32_t pci_rev; 106bf215546Sopenharmony_ci uint32_t family; 107bf215546Sopenharmony_ci uint32_t num_shader_engines; 108bf215546Sopenharmony_ci uint32_t num_shader_arrays_per_engine; 109bf215546Sopenharmony_ci /* in KHz */ 110bf215546Sopenharmony_ci uint32_t gpu_counter_freq; 111bf215546Sopenharmony_ci uint64_t max_engine_clock; 112bf215546Sopenharmony_ci uint64_t max_memory_clock; 113bf215546Sopenharmony_ci /* cu information */ 114bf215546Sopenharmony_ci uint32_t cu_active_number; 115bf215546Sopenharmony_ci /* NOTE: cu_ao_mask is INVALID, DON'T use it */ 116bf215546Sopenharmony_ci uint32_t cu_ao_mask; 117bf215546Sopenharmony_ci uint32_t cu_bitmap[4][4]; 118bf215546Sopenharmony_ci /** Render backend pipe mask. One render backend is CB+DB. */ 119bf215546Sopenharmony_ci uint32_t enabled_rb_pipes_mask; 120bf215546Sopenharmony_ci uint32_t num_rb_pipes; 121bf215546Sopenharmony_ci uint32_t num_hw_gfx_contexts; 122bf215546Sopenharmony_ci uint32_t _pad; 123bf215546Sopenharmony_ci uint64_t ids_flags; 124bf215546Sopenharmony_ci /** Starting virtual address for UMDs. */ 125bf215546Sopenharmony_ci uint64_t virtual_address_offset; 126bf215546Sopenharmony_ci /** The maximum virtual address */ 127bf215546Sopenharmony_ci uint64_t virtual_address_max; 128bf215546Sopenharmony_ci /** Required alignment of virtual addresses. */ 129bf215546Sopenharmony_ci uint32_t virtual_address_alignment; 130bf215546Sopenharmony_ci /** Page table entry - fragment size */ 131bf215546Sopenharmony_ci uint32_t pte_fragment_size; 132bf215546Sopenharmony_ci uint32_t gart_page_size; 133bf215546Sopenharmony_ci /** constant engine ram size*/ 134bf215546Sopenharmony_ci uint32_t ce_ram_size; 135bf215546Sopenharmony_ci /** video memory type info*/ 136bf215546Sopenharmony_ci uint32_t vram_type; 137bf215546Sopenharmony_ci /** video memory bit width*/ 138bf215546Sopenharmony_ci uint32_t vram_bit_width; 139bf215546Sopenharmony_ci /* vce harvesting instance */ 140bf215546Sopenharmony_ci uint32_t vce_harvest_config; 141bf215546Sopenharmony_ci /* gfx double offchip LDS buffers */ 142bf215546Sopenharmony_ci uint32_t gc_double_offchip_lds_buf; 143bf215546Sopenharmony_ci /* NGG Primitive Buffer */ 144bf215546Sopenharmony_ci uint64_t prim_buf_gpu_addr; 145bf215546Sopenharmony_ci /* NGG Position Buffer */ 146bf215546Sopenharmony_ci uint64_t pos_buf_gpu_addr; 147bf215546Sopenharmony_ci /* NGG Control Sideband */ 148bf215546Sopenharmony_ci uint64_t cntl_sb_buf_gpu_addr; 149bf215546Sopenharmony_ci /* NGG Parameter Cache */ 150bf215546Sopenharmony_ci uint64_t param_buf_gpu_addr; 151bf215546Sopenharmony_ci uint32_t prim_buf_size; 152bf215546Sopenharmony_ci uint32_t pos_buf_size; 153bf215546Sopenharmony_ci uint32_t cntl_sb_buf_size; 154bf215546Sopenharmony_ci uint32_t param_buf_size; 155bf215546Sopenharmony_ci /* wavefront size*/ 156bf215546Sopenharmony_ci uint32_t wave_front_size; 157bf215546Sopenharmony_ci /* shader visible vgprs*/ 158bf215546Sopenharmony_ci uint32_t num_shader_visible_vgprs; 159bf215546Sopenharmony_ci /* CU per shader array*/ 160bf215546Sopenharmony_ci uint32_t num_cu_per_sh; 161bf215546Sopenharmony_ci /* number of tcc blocks*/ 162bf215546Sopenharmony_ci uint32_t num_tcc_blocks; 163bf215546Sopenharmony_ci /* gs vgt table depth*/ 164bf215546Sopenharmony_ci uint32_t gs_vgt_table_depth; 165bf215546Sopenharmony_ci /* gs primitive buffer depth*/ 166bf215546Sopenharmony_ci uint32_t gs_prim_buffer_depth; 167bf215546Sopenharmony_ci /* max gs wavefront per vgt*/ 168bf215546Sopenharmony_ci uint32_t max_gs_waves_per_vgt; 169bf215546Sopenharmony_ci uint32_t _pad1; 170bf215546Sopenharmony_ci /* always on cu bitmap */ 171bf215546Sopenharmony_ci uint32_t cu_ao_bitmap[4][4]; 172bf215546Sopenharmony_ci /** Starting high virtual address for UMDs. */ 173bf215546Sopenharmony_ci uint64_t high_va_offset; 174bf215546Sopenharmony_ci /** The maximum high virtual address */ 175bf215546Sopenharmony_ci uint64_t high_va_max; 176bf215546Sopenharmony_ci /* gfx10 pa_sc_tile_steering_override */ 177bf215546Sopenharmony_ci uint32_t pa_sc_tile_steering_override; 178bf215546Sopenharmony_ci /* disabled TCCs */ 179bf215546Sopenharmony_ci uint64_t tcc_disabled_mask; 180bf215546Sopenharmony_ci}; 181bf215546Sopenharmony_cistruct drm_amdgpu_info_hw_ip { 182bf215546Sopenharmony_ci uint32_t hw_ip_version_major; 183bf215546Sopenharmony_ci uint32_t hw_ip_version_minor; 184bf215546Sopenharmony_ci uint32_t ib_start_alignment; 185bf215546Sopenharmony_ci uint32_t ib_size_alignment; 186bf215546Sopenharmony_ci uint32_t available_rings; 187bf215546Sopenharmony_ci uint32_t ip_discovery_version; 188bf215546Sopenharmony_ci}; 189bf215546Sopenharmony_citypedef struct _drmPciBusInfo { 190bf215546Sopenharmony_ci uint16_t domain; 191bf215546Sopenharmony_ci uint8_t bus; 192bf215546Sopenharmony_ci uint8_t dev; 193bf215546Sopenharmony_ci uint8_t func; 194bf215546Sopenharmony_ci} drmPciBusInfo, *drmPciBusInfoPtr; 195bf215546Sopenharmony_citypedef struct _drmDevice { 196bf215546Sopenharmony_ci union { 197bf215546Sopenharmony_ci drmPciBusInfoPtr pci; 198bf215546Sopenharmony_ci } businfo; 199bf215546Sopenharmony_ci} drmDevice, *drmDevicePtr; 200bf215546Sopenharmony_cienum amdgpu_sw_info { 201bf215546Sopenharmony_ci amdgpu_sw_info_address32_hi = 0, 202bf215546Sopenharmony_ci}; 203bf215546Sopenharmony_citypedef struct amdgpu_device *amdgpu_device_handle; 204bf215546Sopenharmony_citypedef struct amdgpu_bo *amdgpu_bo_handle; 205bf215546Sopenharmony_cistruct amdgpu_bo_alloc_request { 206bf215546Sopenharmony_ci uint64_t alloc_size; 207bf215546Sopenharmony_ci uint64_t phys_alignment; 208bf215546Sopenharmony_ci uint32_t preferred_heap; 209bf215546Sopenharmony_ci uint64_t flags; 210bf215546Sopenharmony_ci}; 211bf215546Sopenharmony_cistruct amdgpu_gds_resource_info { 212bf215546Sopenharmony_ci uint32_t gds_gfx_partition_size; 213bf215546Sopenharmony_ci uint32_t gds_total_size; 214bf215546Sopenharmony_ci}; 215bf215546Sopenharmony_cistruct amdgpu_buffer_size_alignments { 216bf215546Sopenharmony_ci uint64_t size_local; 217bf215546Sopenharmony_ci uint64_t size_remote; 218bf215546Sopenharmony_ci}; 219bf215546Sopenharmony_cistruct amdgpu_heap_info { 220bf215546Sopenharmony_ci uint64_t heap_size; 221bf215546Sopenharmony_ci}; 222bf215546Sopenharmony_cistruct amdgpu_gpu_info { 223bf215546Sopenharmony_ci uint32_t asic_id; 224bf215546Sopenharmony_ci uint32_t chip_external_rev; 225bf215546Sopenharmony_ci uint32_t family_id; 226bf215546Sopenharmony_ci uint64_t ids_flags; 227bf215546Sopenharmony_ci uint64_t max_engine_clk; 228bf215546Sopenharmony_ci uint64_t max_memory_clk; 229bf215546Sopenharmony_ci uint32_t num_shader_engines; 230bf215546Sopenharmony_ci uint32_t num_shader_arrays_per_engine; 231bf215546Sopenharmony_ci uint32_t rb_pipes; 232bf215546Sopenharmony_ci uint32_t enabled_rb_pipes_mask; 233bf215546Sopenharmony_ci uint32_t gpu_counter_freq; 234bf215546Sopenharmony_ci uint32_t mc_arb_ramcfg; 235bf215546Sopenharmony_ci uint32_t gb_addr_cfg; 236bf215546Sopenharmony_ci uint32_t gb_tile_mode[32]; 237bf215546Sopenharmony_ci uint32_t gb_macro_tile_mode[16]; 238bf215546Sopenharmony_ci uint32_t cu_bitmap[4][4]; 239bf215546Sopenharmony_ci uint32_t vram_type; 240bf215546Sopenharmony_ci uint32_t vram_bit_width; 241bf215546Sopenharmony_ci uint32_t ce_ram_size; 242bf215546Sopenharmony_ci uint32_t vce_harvest_config; 243bf215546Sopenharmony_ci uint32_t pci_rev_id; 244bf215546Sopenharmony_ci}; 245bf215546Sopenharmony_cistatic int drmGetCap(int fd, uint64_t capability, uint64_t *value) 246bf215546Sopenharmony_ci{ 247bf215546Sopenharmony_ci return -EINVAL; 248bf215546Sopenharmony_ci} 249bf215546Sopenharmony_cistatic void drmFreeDevice(drmDevicePtr *device) 250bf215546Sopenharmony_ci{ 251bf215546Sopenharmony_ci} 252bf215546Sopenharmony_cistatic int drmGetDevice2(int fd, uint32_t flags, drmDevicePtr *device) 253bf215546Sopenharmony_ci{ 254bf215546Sopenharmony_ci return -ENODEV; 255bf215546Sopenharmony_ci} 256bf215546Sopenharmony_cistatic int amdgpu_bo_alloc(amdgpu_device_handle dev, 257bf215546Sopenharmony_ci struct amdgpu_bo_alloc_request *alloc_buffer, 258bf215546Sopenharmony_ci amdgpu_bo_handle *buf_handle) 259bf215546Sopenharmony_ci{ 260bf215546Sopenharmony_ci return -EINVAL; 261bf215546Sopenharmony_ci} 262bf215546Sopenharmony_cistatic int amdgpu_bo_free(amdgpu_bo_handle buf_handle) 263bf215546Sopenharmony_ci{ 264bf215546Sopenharmony_ci return -EINVAL; 265bf215546Sopenharmony_ci} 266bf215546Sopenharmony_cistatic int amdgpu_query_buffer_size_alignment(amdgpu_device_handle dev, 267bf215546Sopenharmony_ci struct amdgpu_buffer_size_alignments 268bf215546Sopenharmony_ci *info) 269bf215546Sopenharmony_ci{ 270bf215546Sopenharmony_ci return -EINVAL; 271bf215546Sopenharmony_ci} 272bf215546Sopenharmony_cistatic int amdgpu_query_firmware_version(amdgpu_device_handle dev, unsigned fw_type, 273bf215546Sopenharmony_ci unsigned ip_instance, unsigned index, 274bf215546Sopenharmony_ci uint32_t *version, uint32_t *feature) 275bf215546Sopenharmony_ci{ 276bf215546Sopenharmony_ci return -EINVAL; 277bf215546Sopenharmony_ci} 278bf215546Sopenharmony_cistatic int amdgpu_query_hw_ip_info(amdgpu_device_handle dev, unsigned type, 279bf215546Sopenharmony_ci unsigned ip_instance, 280bf215546Sopenharmony_ci struct drm_amdgpu_info_hw_ip *info) 281bf215546Sopenharmony_ci{ 282bf215546Sopenharmony_ci return -EINVAL; 283bf215546Sopenharmony_ci} 284bf215546Sopenharmony_cistatic int amdgpu_query_heap_info(amdgpu_device_handle dev, uint32_t heap, 285bf215546Sopenharmony_ci uint32_t flags, struct amdgpu_heap_info *info) 286bf215546Sopenharmony_ci{ 287bf215546Sopenharmony_ci return -EINVAL; 288bf215546Sopenharmony_ci} 289bf215546Sopenharmony_cistatic int amdgpu_query_gpu_info(amdgpu_device_handle dev, 290bf215546Sopenharmony_ci struct amdgpu_gpu_info *info) 291bf215546Sopenharmony_ci{ 292bf215546Sopenharmony_ci return -EINVAL; 293bf215546Sopenharmony_ci} 294bf215546Sopenharmony_cistatic int amdgpu_query_info(amdgpu_device_handle dev, unsigned info_id, 295bf215546Sopenharmony_ci unsigned size, void *value) 296bf215546Sopenharmony_ci{ 297bf215546Sopenharmony_ci return -EINVAL; 298bf215546Sopenharmony_ci} 299bf215546Sopenharmony_cistatic int amdgpu_query_sw_info(amdgpu_device_handle dev, enum amdgpu_sw_info info, 300bf215546Sopenharmony_ci void *value) 301bf215546Sopenharmony_ci{ 302bf215546Sopenharmony_ci return -EINVAL; 303bf215546Sopenharmony_ci} 304bf215546Sopenharmony_cistatic int amdgpu_query_gds_info(amdgpu_device_handle dev, 305bf215546Sopenharmony_ci struct amdgpu_gds_resource_info *gds_info) 306bf215546Sopenharmony_ci{ 307bf215546Sopenharmony_ci return -EINVAL; 308bf215546Sopenharmony_ci} 309bf215546Sopenharmony_cistatic int amdgpu_query_video_caps_info(amdgpu_device_handle dev, unsigned cap_type, 310bf215546Sopenharmony_ci unsigned size, void *value) 311bf215546Sopenharmony_ci{ 312bf215546Sopenharmony_ci return -EINVAL; 313bf215546Sopenharmony_ci} 314bf215546Sopenharmony_cistatic const char *amdgpu_get_marketing_name(amdgpu_device_handle dev) 315bf215546Sopenharmony_ci{ 316bf215546Sopenharmony_ci return NULL; 317bf215546Sopenharmony_ci} 318bf215546Sopenharmony_ci#else 319bf215546Sopenharmony_ci#include "drm-uapi/amdgpu_drm.h" 320bf215546Sopenharmony_ci#include <amdgpu.h> 321bf215546Sopenharmony_ci#include <xf86drm.h> 322bf215546Sopenharmony_ci#endif 323bf215546Sopenharmony_ci 324bf215546Sopenharmony_ci#define CIK_TILE_MODE_COLOR_2D 14 325bf215546Sopenharmony_ci 326bf215546Sopenharmony_ci#define CIK__GB_TILE_MODE__PIPE_CONFIG(x) (((x) >> 6) & 0x1f) 327bf215546Sopenharmony_ci#define CIK__PIPE_CONFIG__ADDR_SURF_P2 0 328bf215546Sopenharmony_ci#define CIK__PIPE_CONFIG__ADDR_SURF_P4_8x16 4 329bf215546Sopenharmony_ci#define CIK__PIPE_CONFIG__ADDR_SURF_P4_16x16 5 330bf215546Sopenharmony_ci#define CIK__PIPE_CONFIG__ADDR_SURF_P4_16x32 6 331bf215546Sopenharmony_ci#define CIK__PIPE_CONFIG__ADDR_SURF_P4_32x32 7 332bf215546Sopenharmony_ci#define CIK__PIPE_CONFIG__ADDR_SURF_P8_16x16_8x16 8 333bf215546Sopenharmony_ci#define CIK__PIPE_CONFIG__ADDR_SURF_P8_16x32_8x16 9 334bf215546Sopenharmony_ci#define CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_8x16 10 335bf215546Sopenharmony_ci#define CIK__PIPE_CONFIG__ADDR_SURF_P8_16x32_16x16 11 336bf215546Sopenharmony_ci#define CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x16 12 337bf215546Sopenharmony_ci#define CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x32 13 338bf215546Sopenharmony_ci#define CIK__PIPE_CONFIG__ADDR_SURF_P8_32x64_32x32 14 339bf215546Sopenharmony_ci#define CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_8X16 16 340bf215546Sopenharmony_ci#define CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_16X16 17 341bf215546Sopenharmony_ci 342bf215546Sopenharmony_cistatic unsigned cik_get_num_tile_pipes(struct amdgpu_gpu_info *info) 343bf215546Sopenharmony_ci{ 344bf215546Sopenharmony_ci unsigned mode2d = info->gb_tile_mode[CIK_TILE_MODE_COLOR_2D]; 345bf215546Sopenharmony_ci 346bf215546Sopenharmony_ci switch (CIK__GB_TILE_MODE__PIPE_CONFIG(mode2d)) { 347bf215546Sopenharmony_ci case CIK__PIPE_CONFIG__ADDR_SURF_P2: 348bf215546Sopenharmony_ci return 2; 349bf215546Sopenharmony_ci case CIK__PIPE_CONFIG__ADDR_SURF_P4_8x16: 350bf215546Sopenharmony_ci case CIK__PIPE_CONFIG__ADDR_SURF_P4_16x16: 351bf215546Sopenharmony_ci case CIK__PIPE_CONFIG__ADDR_SURF_P4_16x32: 352bf215546Sopenharmony_ci case CIK__PIPE_CONFIG__ADDR_SURF_P4_32x32: 353bf215546Sopenharmony_ci return 4; 354bf215546Sopenharmony_ci case CIK__PIPE_CONFIG__ADDR_SURF_P8_16x16_8x16: 355bf215546Sopenharmony_ci case CIK__PIPE_CONFIG__ADDR_SURF_P8_16x32_8x16: 356bf215546Sopenharmony_ci case CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_8x16: 357bf215546Sopenharmony_ci case CIK__PIPE_CONFIG__ADDR_SURF_P8_16x32_16x16: 358bf215546Sopenharmony_ci case CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x16: 359bf215546Sopenharmony_ci case CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x32: 360bf215546Sopenharmony_ci case CIK__PIPE_CONFIG__ADDR_SURF_P8_32x64_32x32: 361bf215546Sopenharmony_ci return 8; 362bf215546Sopenharmony_ci case CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_8X16: 363bf215546Sopenharmony_ci case CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_16X16: 364bf215546Sopenharmony_ci return 16; 365bf215546Sopenharmony_ci default: 366bf215546Sopenharmony_ci fprintf(stderr, "Invalid GFX7 pipe configuration, assuming P2\n"); 367bf215546Sopenharmony_ci assert(!"this should never occur"); 368bf215546Sopenharmony_ci return 2; 369bf215546Sopenharmony_ci } 370bf215546Sopenharmony_ci} 371bf215546Sopenharmony_ci 372bf215546Sopenharmony_cistatic bool has_syncobj(int fd) 373bf215546Sopenharmony_ci{ 374bf215546Sopenharmony_ci uint64_t value; 375bf215546Sopenharmony_ci if (drmGetCap(fd, DRM_CAP_SYNCOBJ, &value)) 376bf215546Sopenharmony_ci return false; 377bf215546Sopenharmony_ci return value ? true : false; 378bf215546Sopenharmony_ci} 379bf215546Sopenharmony_ci 380bf215546Sopenharmony_cistatic bool has_timeline_syncobj(int fd) 381bf215546Sopenharmony_ci{ 382bf215546Sopenharmony_ci uint64_t value; 383bf215546Sopenharmony_ci if (drmGetCap(fd, DRM_CAP_SYNCOBJ_TIMELINE, &value)) 384bf215546Sopenharmony_ci return false; 385bf215546Sopenharmony_ci return value ? true : false; 386bf215546Sopenharmony_ci} 387bf215546Sopenharmony_ci 388bf215546Sopenharmony_cistatic bool has_modifiers(int fd) 389bf215546Sopenharmony_ci{ 390bf215546Sopenharmony_ci uint64_t value; 391bf215546Sopenharmony_ci if (drmGetCap(fd, DRM_CAP_ADDFB2_MODIFIERS, &value)) 392bf215546Sopenharmony_ci return false; 393bf215546Sopenharmony_ci return value ? true : false; 394bf215546Sopenharmony_ci} 395bf215546Sopenharmony_ci 396bf215546Sopenharmony_cistatic uint64_t fix_vram_size(uint64_t size) 397bf215546Sopenharmony_ci{ 398bf215546Sopenharmony_ci /* The VRAM size is underreported, so we need to fix it, because 399bf215546Sopenharmony_ci * it's used to compute the number of memory modules for harvesting. 400bf215546Sopenharmony_ci */ 401bf215546Sopenharmony_ci return align64(size, 256 * 1024 * 1024); 402bf215546Sopenharmony_ci} 403bf215546Sopenharmony_ci 404bf215546Sopenharmony_cistatic bool 405bf215546Sopenharmony_cihas_tmz_support(amdgpu_device_handle dev, struct radeon_info *info, uint32_t ids_flags) 406bf215546Sopenharmony_ci{ 407bf215546Sopenharmony_ci struct amdgpu_bo_alloc_request request = {0}; 408bf215546Sopenharmony_ci int r; 409bf215546Sopenharmony_ci amdgpu_bo_handle bo; 410bf215546Sopenharmony_ci 411bf215546Sopenharmony_ci if (ids_flags & AMDGPU_IDS_FLAGS_TMZ) 412bf215546Sopenharmony_ci return true; 413bf215546Sopenharmony_ci 414bf215546Sopenharmony_ci /* AMDGPU_IDS_FLAGS_TMZ is supported starting from drm_minor 40 */ 415bf215546Sopenharmony_ci if (info->drm_minor >= 40) 416bf215546Sopenharmony_ci return false; 417bf215546Sopenharmony_ci 418bf215546Sopenharmony_ci /* Find out ourselves if TMZ is enabled */ 419bf215546Sopenharmony_ci if (info->gfx_level < GFX9) 420bf215546Sopenharmony_ci return false; 421bf215546Sopenharmony_ci 422bf215546Sopenharmony_ci if (info->drm_minor < 36) 423bf215546Sopenharmony_ci return false; 424bf215546Sopenharmony_ci 425bf215546Sopenharmony_ci request.alloc_size = 256; 426bf215546Sopenharmony_ci request.phys_alignment = 1024; 427bf215546Sopenharmony_ci request.preferred_heap = AMDGPU_GEM_DOMAIN_VRAM; 428bf215546Sopenharmony_ci request.flags = AMDGPU_GEM_CREATE_ENCRYPTED; 429bf215546Sopenharmony_ci r = amdgpu_bo_alloc(dev, &request, &bo); 430bf215546Sopenharmony_ci if (r) 431bf215546Sopenharmony_ci return false; 432bf215546Sopenharmony_ci amdgpu_bo_free(bo); 433bf215546Sopenharmony_ci return true; 434bf215546Sopenharmony_ci} 435bf215546Sopenharmony_ci 436bf215546Sopenharmony_cistatic void set_custom_cu_en_mask(struct radeon_info *info) 437bf215546Sopenharmony_ci{ 438bf215546Sopenharmony_ci info->spi_cu_en = ~0; 439bf215546Sopenharmony_ci 440bf215546Sopenharmony_ci const char *cu_env_var = os_get_option("AMD_CU_MASK"); 441bf215546Sopenharmony_ci if (!cu_env_var) 442bf215546Sopenharmony_ci return; 443bf215546Sopenharmony_ci 444bf215546Sopenharmony_ci int size = strlen(cu_env_var); 445bf215546Sopenharmony_ci char *str = alloca(size + 1); 446bf215546Sopenharmony_ci memset(str, 0, size + 1); 447bf215546Sopenharmony_ci 448bf215546Sopenharmony_ci size = 0; 449bf215546Sopenharmony_ci 450bf215546Sopenharmony_ci /* Strip whitespace. */ 451bf215546Sopenharmony_ci for (unsigned src = 0; cu_env_var[src]; src++) { 452bf215546Sopenharmony_ci if (cu_env_var[src] != ' ' && cu_env_var[src] != '\t' && 453bf215546Sopenharmony_ci cu_env_var[src] != '\n' && cu_env_var[src] != '\r') { 454bf215546Sopenharmony_ci str[size++] = cu_env_var[src]; 455bf215546Sopenharmony_ci } 456bf215546Sopenharmony_ci } 457bf215546Sopenharmony_ci 458bf215546Sopenharmony_ci /* The following syntax is used, all whitespace is ignored: 459bf215546Sopenharmony_ci * ID = [0-9][0-9]* ex. base 10 numbers 460bf215546Sopenharmony_ci * ID_list = (ID | ID-ID)[, (ID | ID-ID)]* ex. 0,2-4,7 461bf215546Sopenharmony_ci * CU_list = 0x[0-F]* | ID_list ex. 0x337F OR 0,2-4,7 462bf215546Sopenharmony_ci * AMD_CU_MASK = CU_list 463bf215546Sopenharmony_ci * 464bf215546Sopenharmony_ci * It's a CU mask within a shader array. It's applied to all shader arrays. 465bf215546Sopenharmony_ci */ 466bf215546Sopenharmony_ci bool is_good_form = true; 467bf215546Sopenharmony_ci uint32_t spi_cu_en = 0; 468bf215546Sopenharmony_ci 469bf215546Sopenharmony_ci if (size > 2 && str[0] == '0' && (str[1] == 'x' || str[1] == 'X')) { 470bf215546Sopenharmony_ci str += 2; 471bf215546Sopenharmony_ci size -= 2; 472bf215546Sopenharmony_ci 473bf215546Sopenharmony_ci for (unsigned i = 0; i < size; i++) 474bf215546Sopenharmony_ci is_good_form &= isxdigit(str[i]) != 0; 475bf215546Sopenharmony_ci 476bf215546Sopenharmony_ci if (!is_good_form) { 477bf215546Sopenharmony_ci fprintf(stderr, "amd: invalid AMD_CU_MASK: ill-formed hex value\n"); 478bf215546Sopenharmony_ci } else { 479bf215546Sopenharmony_ci spi_cu_en = strtol(str, NULL, 16); 480bf215546Sopenharmony_ci } 481bf215546Sopenharmony_ci } else { 482bf215546Sopenharmony_ci /* Parse ID_list. */ 483bf215546Sopenharmony_ci long first = 0, last = -1; 484bf215546Sopenharmony_ci 485bf215546Sopenharmony_ci if (!isdigit(*str)) { 486bf215546Sopenharmony_ci is_good_form = false; 487bf215546Sopenharmony_ci } else { 488bf215546Sopenharmony_ci while (*str) { 489bf215546Sopenharmony_ci bool comma = false; 490bf215546Sopenharmony_ci 491bf215546Sopenharmony_ci if (isdigit(*str)) { 492bf215546Sopenharmony_ci first = last = strtol(str, &str, 10); 493bf215546Sopenharmony_ci } else if (*str == '-') { 494bf215546Sopenharmony_ci str++; 495bf215546Sopenharmony_ci /* Parse a digit after a dash. */ 496bf215546Sopenharmony_ci if (isdigit(*str)) { 497bf215546Sopenharmony_ci last = strtol(str, &str, 10); 498bf215546Sopenharmony_ci } else { 499bf215546Sopenharmony_ci fprintf(stderr, "amd: invalid AMD_CU_MASK: expected a digit after -\n"); 500bf215546Sopenharmony_ci is_good_form = false; 501bf215546Sopenharmony_ci break; 502bf215546Sopenharmony_ci } 503bf215546Sopenharmony_ci } else if (*str == ',') { 504bf215546Sopenharmony_ci comma = true; 505bf215546Sopenharmony_ci str++; 506bf215546Sopenharmony_ci if (!isdigit(*str)) { 507bf215546Sopenharmony_ci fprintf(stderr, "amd: invalid AMD_CU_MASK: expected a digit after ,\n"); 508bf215546Sopenharmony_ci is_good_form = false; 509bf215546Sopenharmony_ci break; 510bf215546Sopenharmony_ci } 511bf215546Sopenharmony_ci } 512bf215546Sopenharmony_ci 513bf215546Sopenharmony_ci if (comma || !*str) { 514bf215546Sopenharmony_ci if (first > last) { 515bf215546Sopenharmony_ci fprintf(stderr, "amd: invalid AMD_CU_MASK: range not increasing (%li, %li)\n", first, last); 516bf215546Sopenharmony_ci is_good_form = false; 517bf215546Sopenharmony_ci break; 518bf215546Sopenharmony_ci } 519bf215546Sopenharmony_ci if (last > 31) { 520bf215546Sopenharmony_ci fprintf(stderr, "amd: invalid AMD_CU_MASK: index too large (%li)\n", last); 521bf215546Sopenharmony_ci is_good_form = false; 522bf215546Sopenharmony_ci break; 523bf215546Sopenharmony_ci } 524bf215546Sopenharmony_ci 525bf215546Sopenharmony_ci spi_cu_en |= BITFIELD_RANGE(first, last - first + 1); 526bf215546Sopenharmony_ci last = -1; 527bf215546Sopenharmony_ci } 528bf215546Sopenharmony_ci } 529bf215546Sopenharmony_ci } 530bf215546Sopenharmony_ci } 531bf215546Sopenharmony_ci 532bf215546Sopenharmony_ci /* The mask is parsed. Now assign bits to CUs. */ 533bf215546Sopenharmony_ci if (is_good_form) { 534bf215546Sopenharmony_ci bool error = false; 535bf215546Sopenharmony_ci 536bf215546Sopenharmony_ci /* Clear bits that have no effect. */ 537bf215546Sopenharmony_ci spi_cu_en &= BITFIELD_MASK(info->max_good_cu_per_sa); 538bf215546Sopenharmony_ci 539bf215546Sopenharmony_ci if (!spi_cu_en) { 540bf215546Sopenharmony_ci fprintf(stderr, "amd: invalid AMD_CU_MASK: at least 1 CU in each SA must be enabled\n"); 541bf215546Sopenharmony_ci error = true; 542bf215546Sopenharmony_ci } 543bf215546Sopenharmony_ci 544bf215546Sopenharmony_ci if (info->has_graphics) { 545bf215546Sopenharmony_ci uint32_t min_full_cu_mask = BITFIELD_MASK(info->min_good_cu_per_sa); 546bf215546Sopenharmony_ci 547bf215546Sopenharmony_ci /* The hw ignores all non-compute CU masks if any of them is 0. Disallow that. */ 548bf215546Sopenharmony_ci if ((spi_cu_en & min_full_cu_mask) == 0) { 549bf215546Sopenharmony_ci fprintf(stderr, "amd: invalid AMD_CU_MASK: at least 1 CU from 0x%x per SA must be " 550bf215546Sopenharmony_ci "enabled (SPI limitation)\n", min_full_cu_mask); 551bf215546Sopenharmony_ci error = true; 552bf215546Sopenharmony_ci } 553bf215546Sopenharmony_ci 554bf215546Sopenharmony_ci /* We usually disable 1 or 2 CUs for VS and GS, which means at last 1 other CU 555bf215546Sopenharmony_ci * must be enabled. 556bf215546Sopenharmony_ci */ 557bf215546Sopenharmony_ci uint32_t cu_mask_ge, unused; 558bf215546Sopenharmony_ci ac_compute_late_alloc(info, false, false, false, &unused, &cu_mask_ge); 559bf215546Sopenharmony_ci cu_mask_ge &= min_full_cu_mask; 560bf215546Sopenharmony_ci 561bf215546Sopenharmony_ci if ((spi_cu_en & cu_mask_ge) == 0) { 562bf215546Sopenharmony_ci fprintf(stderr, "amd: invalid AMD_CU_MASK: at least 1 CU from 0x%x per SA must be " 563bf215546Sopenharmony_ci "enabled (late alloc constraint for GE)\n", cu_mask_ge); 564bf215546Sopenharmony_ci error = true; 565bf215546Sopenharmony_ci } 566bf215546Sopenharmony_ci 567bf215546Sopenharmony_ci if ((min_full_cu_mask & spi_cu_en & ~cu_mask_ge) == 0) { 568bf215546Sopenharmony_ci fprintf(stderr, "amd: invalid AMD_CU_MASK: at least 1 CU from 0x%x per SA must be " 569bf215546Sopenharmony_ci "enabled (late alloc constraint for PS)\n", 570bf215546Sopenharmony_ci min_full_cu_mask & ~cu_mask_ge); 571bf215546Sopenharmony_ci error = true; 572bf215546Sopenharmony_ci } 573bf215546Sopenharmony_ci } 574bf215546Sopenharmony_ci 575bf215546Sopenharmony_ci if (!error) { 576bf215546Sopenharmony_ci info->spi_cu_en = spi_cu_en; 577bf215546Sopenharmony_ci info->spi_cu_en_has_effect = spi_cu_en & BITFIELD_MASK(info->max_good_cu_per_sa); 578bf215546Sopenharmony_ci } 579bf215546Sopenharmony_ci } 580bf215546Sopenharmony_ci} 581bf215546Sopenharmony_ci 582bf215546Sopenharmony_cibool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info) 583bf215546Sopenharmony_ci{ 584bf215546Sopenharmony_ci struct amdgpu_gpu_info amdinfo; 585bf215546Sopenharmony_ci struct drm_amdgpu_info_device device_info = {0}; 586bf215546Sopenharmony_ci struct amdgpu_buffer_size_alignments alignment_info = {0}; 587bf215546Sopenharmony_ci uint32_t vce_version = 0, vce_feature = 0, uvd_version = 0, uvd_feature = 0; 588bf215546Sopenharmony_ci int r, i, j; 589bf215546Sopenharmony_ci amdgpu_device_handle dev = dev_p; 590bf215546Sopenharmony_ci drmDevicePtr devinfo; 591bf215546Sopenharmony_ci 592bf215546Sopenharmony_ci STATIC_ASSERT(AMDGPU_HW_IP_GFX == AMD_IP_GFX); 593bf215546Sopenharmony_ci STATIC_ASSERT(AMDGPU_HW_IP_COMPUTE == AMD_IP_COMPUTE); 594bf215546Sopenharmony_ci STATIC_ASSERT(AMDGPU_HW_IP_DMA == AMD_IP_SDMA); 595bf215546Sopenharmony_ci STATIC_ASSERT(AMDGPU_HW_IP_UVD == AMD_IP_UVD); 596bf215546Sopenharmony_ci STATIC_ASSERT(AMDGPU_HW_IP_VCE == AMD_IP_VCE); 597bf215546Sopenharmony_ci STATIC_ASSERT(AMDGPU_HW_IP_UVD_ENC == AMD_IP_UVD_ENC); 598bf215546Sopenharmony_ci STATIC_ASSERT(AMDGPU_HW_IP_VCN_DEC == AMD_IP_VCN_DEC); 599bf215546Sopenharmony_ci STATIC_ASSERT(AMDGPU_HW_IP_VCN_ENC == AMD_IP_VCN_ENC); 600bf215546Sopenharmony_ci STATIC_ASSERT(AMDGPU_HW_IP_VCN_JPEG == AMD_IP_VCN_JPEG); 601bf215546Sopenharmony_ci 602bf215546Sopenharmony_ci /* Get PCI info. */ 603bf215546Sopenharmony_ci r = drmGetDevice2(fd, 0, &devinfo); 604bf215546Sopenharmony_ci if (r) { 605bf215546Sopenharmony_ci fprintf(stderr, "amdgpu: drmGetDevice2 failed.\n"); 606bf215546Sopenharmony_ci return false; 607bf215546Sopenharmony_ci } 608bf215546Sopenharmony_ci info->pci_domain = devinfo->businfo.pci->domain; 609bf215546Sopenharmony_ci info->pci_bus = devinfo->businfo.pci->bus; 610bf215546Sopenharmony_ci info->pci_dev = devinfo->businfo.pci->dev; 611bf215546Sopenharmony_ci info->pci_func = devinfo->businfo.pci->func; 612bf215546Sopenharmony_ci drmFreeDevice(&devinfo); 613bf215546Sopenharmony_ci 614bf215546Sopenharmony_ci assert(info->drm_major == 3); 615bf215546Sopenharmony_ci info->is_amdgpu = true; 616bf215546Sopenharmony_ci 617bf215546Sopenharmony_ci if (info->drm_minor < 15) { 618bf215546Sopenharmony_ci fprintf(stderr, "amdgpu: DRM version is %u.%u.%u, but this driver is " 619bf215546Sopenharmony_ci "only compatible with 3.15.0 (kernel 4.12) or later.\n", 620bf215546Sopenharmony_ci info->drm_major, info->drm_minor, info->drm_patchlevel); 621bf215546Sopenharmony_ci return false; 622bf215546Sopenharmony_ci } 623bf215546Sopenharmony_ci 624bf215546Sopenharmony_ci /* Query hardware and driver information. */ 625bf215546Sopenharmony_ci r = amdgpu_query_gpu_info(dev, &amdinfo); 626bf215546Sopenharmony_ci if (r) { 627bf215546Sopenharmony_ci fprintf(stderr, "amdgpu: amdgpu_query_gpu_info failed.\n"); 628bf215546Sopenharmony_ci return false; 629bf215546Sopenharmony_ci } 630bf215546Sopenharmony_ci 631bf215546Sopenharmony_ci r = amdgpu_query_info(dev, AMDGPU_INFO_DEV_INFO, sizeof(device_info), &device_info); 632bf215546Sopenharmony_ci if (r) { 633bf215546Sopenharmony_ci fprintf(stderr, "amdgpu: amdgpu_query_info(dev_info) failed.\n"); 634bf215546Sopenharmony_ci return false; 635bf215546Sopenharmony_ci } 636bf215546Sopenharmony_ci 637bf215546Sopenharmony_ci r = amdgpu_query_buffer_size_alignment(dev, &alignment_info); 638bf215546Sopenharmony_ci if (r) { 639bf215546Sopenharmony_ci fprintf(stderr, "amdgpu: amdgpu_query_buffer_size_alignment failed.\n"); 640bf215546Sopenharmony_ci return false; 641bf215546Sopenharmony_ci } 642bf215546Sopenharmony_ci 643bf215546Sopenharmony_ci for (unsigned ip_type = 0; ip_type < AMD_NUM_IP_TYPES; ip_type++) { 644bf215546Sopenharmony_ci struct drm_amdgpu_info_hw_ip ip_info = {0}; 645bf215546Sopenharmony_ci 646bf215546Sopenharmony_ci r = amdgpu_query_hw_ip_info(dev, ip_type, 0, &ip_info); 647bf215546Sopenharmony_ci if (r || !ip_info.available_rings) 648bf215546Sopenharmony_ci continue; 649bf215546Sopenharmony_ci 650bf215546Sopenharmony_ci /* Gfx6-8 don't set ip_discovery_version. */ 651bf215546Sopenharmony_ci if (info->drm_minor >= 48 && ip_info.ip_discovery_version) { 652bf215546Sopenharmony_ci info->ip[ip_type].ver_major = (ip_info.ip_discovery_version >> 16) & 0xff; 653bf215546Sopenharmony_ci info->ip[ip_type].ver_minor = (ip_info.ip_discovery_version >> 8) & 0xff; 654bf215546Sopenharmony_ci } else { 655bf215546Sopenharmony_ci info->ip[ip_type].ver_major = ip_info.hw_ip_version_major; 656bf215546Sopenharmony_ci info->ip[ip_type].ver_minor = ip_info.hw_ip_version_minor; 657bf215546Sopenharmony_ci 658bf215546Sopenharmony_ci /* Fix incorrect IP versions reported by the kernel. */ 659bf215546Sopenharmony_ci if (device_info.family == FAMILY_NV && 660bf215546Sopenharmony_ci (ASICREV_IS(device_info.external_rev, NAVI10) || 661bf215546Sopenharmony_ci ASICREV_IS(device_info.external_rev, NAVI12) || 662bf215546Sopenharmony_ci ASICREV_IS(device_info.external_rev, NAVI14))) 663bf215546Sopenharmony_ci info->ip[AMD_IP_GFX].ver_minor = info->ip[AMD_IP_COMPUTE].ver_minor = 1; 664bf215546Sopenharmony_ci else if (device_info.family == FAMILY_NV || 665bf215546Sopenharmony_ci device_info.family == FAMILY_VGH || 666bf215546Sopenharmony_ci device_info.family == FAMILY_RMB || 667bf215546Sopenharmony_ci device_info.family == FAMILY_GC_10_3_6 || 668bf215546Sopenharmony_ci device_info.family == FAMILY_GC_10_3_7) 669bf215546Sopenharmony_ci info->ip[AMD_IP_GFX].ver_minor = info->ip[AMD_IP_COMPUTE].ver_minor = 3; 670bf215546Sopenharmony_ci } 671bf215546Sopenharmony_ci info->ip[ip_type].num_queues = util_bitcount(ip_info.available_rings); 672bf215546Sopenharmony_ci info->ib_alignment = MAX3(info->ib_alignment, ip_info.ib_start_alignment, 673bf215546Sopenharmony_ci ip_info.ib_size_alignment); 674bf215546Sopenharmony_ci } 675bf215546Sopenharmony_ci 676bf215546Sopenharmony_ci /* Only require gfx or compute. */ 677bf215546Sopenharmony_ci if (!info->ip[AMD_IP_GFX].num_queues && !info->ip[AMD_IP_COMPUTE].num_queues) { 678bf215546Sopenharmony_ci fprintf(stderr, "amdgpu: failed to find gfx or compute.\n"); 679bf215546Sopenharmony_ci return false; 680bf215546Sopenharmony_ci } 681bf215546Sopenharmony_ci 682bf215546Sopenharmony_ci assert(util_is_power_of_two_or_zero(info->ip[AMD_IP_COMPUTE].num_queues)); 683bf215546Sopenharmony_ci assert(util_is_power_of_two_or_zero(info->ip[AMD_IP_SDMA].num_queues)); 684bf215546Sopenharmony_ci 685bf215546Sopenharmony_ci /* The kernel pads gfx and compute IBs to 256 dwords since: 686bf215546Sopenharmony_ci * 66f3b2d527154bd258a57c8815004b5964aa1cf5 687bf215546Sopenharmony_ci * Do the same. 688bf215546Sopenharmony_ci */ 689bf215546Sopenharmony_ci info->ib_alignment = MAX2(info->ib_alignment, 1024); 690bf215546Sopenharmony_ci 691bf215546Sopenharmony_ci r = amdgpu_query_firmware_version(dev, AMDGPU_INFO_FW_GFX_ME, 0, 0, &info->me_fw_version, 692bf215546Sopenharmony_ci &info->me_fw_feature); 693bf215546Sopenharmony_ci if (r) { 694bf215546Sopenharmony_ci fprintf(stderr, "amdgpu: amdgpu_query_firmware_version(me) failed.\n"); 695bf215546Sopenharmony_ci return false; 696bf215546Sopenharmony_ci } 697bf215546Sopenharmony_ci 698bf215546Sopenharmony_ci r = amdgpu_query_firmware_version(dev, AMDGPU_INFO_FW_GFX_MEC, 0, 0, &info->mec_fw_version, 699bf215546Sopenharmony_ci &info->mec_fw_feature); 700bf215546Sopenharmony_ci if (r) { 701bf215546Sopenharmony_ci fprintf(stderr, "amdgpu: amdgpu_query_firmware_version(mec) failed.\n"); 702bf215546Sopenharmony_ci return false; 703bf215546Sopenharmony_ci } 704bf215546Sopenharmony_ci 705bf215546Sopenharmony_ci r = amdgpu_query_firmware_version(dev, AMDGPU_INFO_FW_GFX_PFP, 0, 0, &info->pfp_fw_version, 706bf215546Sopenharmony_ci &info->pfp_fw_feature); 707bf215546Sopenharmony_ci if (r) { 708bf215546Sopenharmony_ci fprintf(stderr, "amdgpu: amdgpu_query_firmware_version(pfp) failed.\n"); 709bf215546Sopenharmony_ci return false; 710bf215546Sopenharmony_ci } 711bf215546Sopenharmony_ci 712bf215546Sopenharmony_ci r = amdgpu_query_firmware_version(dev, AMDGPU_INFO_FW_UVD, 0, 0, &uvd_version, &uvd_feature); 713bf215546Sopenharmony_ci if (r) { 714bf215546Sopenharmony_ci fprintf(stderr, "amdgpu: amdgpu_query_firmware_version(uvd) failed.\n"); 715bf215546Sopenharmony_ci return false; 716bf215546Sopenharmony_ci } 717bf215546Sopenharmony_ci 718bf215546Sopenharmony_ci r = amdgpu_query_firmware_version(dev, AMDGPU_INFO_FW_VCE, 0, 0, &vce_version, &vce_feature); 719bf215546Sopenharmony_ci if (r) { 720bf215546Sopenharmony_ci fprintf(stderr, "amdgpu: amdgpu_query_firmware_version(vce) failed.\n"); 721bf215546Sopenharmony_ci return false; 722bf215546Sopenharmony_ci } 723bf215546Sopenharmony_ci 724bf215546Sopenharmony_ci r = amdgpu_query_sw_info(dev, amdgpu_sw_info_address32_hi, &info->address32_hi); 725bf215546Sopenharmony_ci if (r) { 726bf215546Sopenharmony_ci fprintf(stderr, "amdgpu: amdgpu_query_sw_info(address32_hi) failed.\n"); 727bf215546Sopenharmony_ci return false; 728bf215546Sopenharmony_ci } 729bf215546Sopenharmony_ci 730bf215546Sopenharmony_ci struct drm_amdgpu_memory_info meminfo = {0}; 731bf215546Sopenharmony_ci 732bf215546Sopenharmony_ci r = amdgpu_query_info(dev, AMDGPU_INFO_MEMORY, sizeof(meminfo), &meminfo); 733bf215546Sopenharmony_ci if (r) { 734bf215546Sopenharmony_ci fprintf(stderr, "amdgpu: amdgpu_query_info(memory) failed.\n"); 735bf215546Sopenharmony_ci return false; 736bf215546Sopenharmony_ci } 737bf215546Sopenharmony_ci 738bf215546Sopenharmony_ci /* Note: usable_heap_size values can be random and can't be relied on. */ 739bf215546Sopenharmony_ci info->gart_size_kb = DIV_ROUND_UP(meminfo.gtt.total_heap_size, 1024); 740bf215546Sopenharmony_ci info->vram_size_kb = DIV_ROUND_UP(fix_vram_size(meminfo.vram.total_heap_size), 1024); 741bf215546Sopenharmony_ci info->vram_vis_size_kb = DIV_ROUND_UP(meminfo.cpu_accessible_vram.total_heap_size, 1024); 742bf215546Sopenharmony_ci 743bf215546Sopenharmony_ci if (info->drm_minor >= 41) { 744bf215546Sopenharmony_ci amdgpu_query_video_caps_info(dev, AMDGPU_INFO_VIDEO_CAPS_DECODE, 745bf215546Sopenharmony_ci sizeof(info->dec_caps), &(info->dec_caps)); 746bf215546Sopenharmony_ci amdgpu_query_video_caps_info(dev, AMDGPU_INFO_VIDEO_CAPS_ENCODE, 747bf215546Sopenharmony_ci sizeof(info->enc_caps), &(info->enc_caps)); 748bf215546Sopenharmony_ci } 749bf215546Sopenharmony_ci 750bf215546Sopenharmony_ci /* Add some margin of error, though this shouldn't be needed in theory. */ 751bf215546Sopenharmony_ci info->all_vram_visible = info->vram_size_kb * 0.9 < info->vram_vis_size_kb; 752bf215546Sopenharmony_ci 753bf215546Sopenharmony_ci /* Set chip identification. */ 754bf215546Sopenharmony_ci info->pci_id = device_info.device_id; 755bf215546Sopenharmony_ci info->pci_rev_id = device_info.pci_rev; 756bf215546Sopenharmony_ci info->vce_harvest_config = device_info.vce_harvest_config; 757bf215546Sopenharmony_ci 758bf215546Sopenharmony_ci#define identify_chip2(asic, chipname) \ 759bf215546Sopenharmony_ci if (ASICREV_IS(device_info.external_rev, asic)) { \ 760bf215546Sopenharmony_ci info->family = CHIP_##chipname; \ 761bf215546Sopenharmony_ci info->name = #chipname; \ 762bf215546Sopenharmony_ci } 763bf215546Sopenharmony_ci#define identify_chip(chipname) identify_chip2(chipname, chipname) 764bf215546Sopenharmony_ci 765bf215546Sopenharmony_ci switch (device_info.family) { 766bf215546Sopenharmony_ci case FAMILY_SI: 767bf215546Sopenharmony_ci identify_chip(TAHITI); 768bf215546Sopenharmony_ci identify_chip(PITCAIRN); 769bf215546Sopenharmony_ci identify_chip2(CAPEVERDE, VERDE); 770bf215546Sopenharmony_ci identify_chip(OLAND); 771bf215546Sopenharmony_ci identify_chip(HAINAN); 772bf215546Sopenharmony_ci break; 773bf215546Sopenharmony_ci case FAMILY_CI: 774bf215546Sopenharmony_ci identify_chip(BONAIRE); 775bf215546Sopenharmony_ci identify_chip(HAWAII); 776bf215546Sopenharmony_ci break; 777bf215546Sopenharmony_ci case FAMILY_KV: 778bf215546Sopenharmony_ci identify_chip2(SPECTRE, KAVERI); 779bf215546Sopenharmony_ci identify_chip2(SPOOKY, KAVERI); 780bf215546Sopenharmony_ci identify_chip2(KALINDI, KABINI); 781bf215546Sopenharmony_ci identify_chip2(GODAVARI, KABINI); 782bf215546Sopenharmony_ci break; 783bf215546Sopenharmony_ci case FAMILY_VI: 784bf215546Sopenharmony_ci identify_chip(ICELAND); 785bf215546Sopenharmony_ci identify_chip(TONGA); 786bf215546Sopenharmony_ci identify_chip(FIJI); 787bf215546Sopenharmony_ci identify_chip(POLARIS10); 788bf215546Sopenharmony_ci identify_chip(POLARIS11); 789bf215546Sopenharmony_ci identify_chip(POLARIS12); 790bf215546Sopenharmony_ci identify_chip(VEGAM); 791bf215546Sopenharmony_ci break; 792bf215546Sopenharmony_ci case FAMILY_CZ: 793bf215546Sopenharmony_ci identify_chip(CARRIZO); 794bf215546Sopenharmony_ci identify_chip(STONEY); 795bf215546Sopenharmony_ci break; 796bf215546Sopenharmony_ci case FAMILY_AI: 797bf215546Sopenharmony_ci identify_chip(VEGA10); 798bf215546Sopenharmony_ci identify_chip(VEGA12); 799bf215546Sopenharmony_ci identify_chip(VEGA20); 800bf215546Sopenharmony_ci identify_chip(ARCTURUS); 801bf215546Sopenharmony_ci identify_chip(ALDEBARAN); 802bf215546Sopenharmony_ci break; 803bf215546Sopenharmony_ci case FAMILY_RV: 804bf215546Sopenharmony_ci identify_chip(RAVEN); 805bf215546Sopenharmony_ci identify_chip(RAVEN2); 806bf215546Sopenharmony_ci identify_chip(RENOIR); 807bf215546Sopenharmony_ci break; 808bf215546Sopenharmony_ci case FAMILY_NV: 809bf215546Sopenharmony_ci identify_chip(NAVI10); 810bf215546Sopenharmony_ci identify_chip(NAVI12); 811bf215546Sopenharmony_ci identify_chip(NAVI14); 812bf215546Sopenharmony_ci identify_chip(NAVI21); 813bf215546Sopenharmony_ci identify_chip(NAVI22); 814bf215546Sopenharmony_ci identify_chip(NAVI23); 815bf215546Sopenharmony_ci identify_chip(NAVI24); 816bf215546Sopenharmony_ci break; 817bf215546Sopenharmony_ci case FAMILY_VGH: 818bf215546Sopenharmony_ci identify_chip(VANGOGH); 819bf215546Sopenharmony_ci break; 820bf215546Sopenharmony_ci case FAMILY_RMB: 821bf215546Sopenharmony_ci identify_chip(REMBRANDT); 822bf215546Sopenharmony_ci break; 823bf215546Sopenharmony_ci case FAMILY_GC_10_3_6: 824bf215546Sopenharmony_ci identify_chip(GFX1036); 825bf215546Sopenharmony_ci break; 826bf215546Sopenharmony_ci case FAMILY_GC_10_3_7: 827bf215546Sopenharmony_ci identify_chip2(GFX1037, GFX1036); 828bf215546Sopenharmony_ci break; 829bf215546Sopenharmony_ci case FAMILY_GFX1100: 830bf215546Sopenharmony_ci identify_chip(GFX1100); 831bf215546Sopenharmony_ci identify_chip(GFX1101); 832bf215546Sopenharmony_ci identify_chip(GFX1102); 833bf215546Sopenharmony_ci break; 834bf215546Sopenharmony_ci case FAMILY_GFX1103: 835bf215546Sopenharmony_ci identify_chip(GFX1103); 836bf215546Sopenharmony_ci break; 837bf215546Sopenharmony_ci } 838bf215546Sopenharmony_ci 839bf215546Sopenharmony_ci if (!info->name) { 840bf215546Sopenharmony_ci fprintf(stderr, "amdgpu: unknown (family_id, chip_external_rev): (%u, %u)\n", 841bf215546Sopenharmony_ci device_info.family, device_info.external_rev); 842bf215546Sopenharmony_ci return false; 843bf215546Sopenharmony_ci } 844bf215546Sopenharmony_ci 845bf215546Sopenharmony_ci memset(info->lowercase_name, 0, sizeof(info->lowercase_name)); 846bf215546Sopenharmony_ci for (unsigned i = 0; info->name[i] && i < ARRAY_SIZE(info->lowercase_name) - 1; i++) 847bf215546Sopenharmony_ci info->lowercase_name[i] = tolower(info->name[i]); 848bf215546Sopenharmony_ci 849bf215546Sopenharmony_ci if (info->ip[AMD_IP_GFX].ver_major == 11) 850bf215546Sopenharmony_ci info->gfx_level = GFX11; 851bf215546Sopenharmony_ci else if (info->ip[AMD_IP_GFX].ver_major == 10 && info->ip[AMD_IP_GFX].ver_minor == 3) 852bf215546Sopenharmony_ci info->gfx_level = GFX10_3; 853bf215546Sopenharmony_ci else if (info->ip[AMD_IP_GFX].ver_major == 10 && info->ip[AMD_IP_GFX].ver_minor == 1) 854bf215546Sopenharmony_ci info->gfx_level = GFX10; 855bf215546Sopenharmony_ci else if (info->ip[AMD_IP_GFX].ver_major == 9 || info->ip[AMD_IP_COMPUTE].ver_major == 9) 856bf215546Sopenharmony_ci info->gfx_level = GFX9; 857bf215546Sopenharmony_ci else if (info->ip[AMD_IP_GFX].ver_major == 8) 858bf215546Sopenharmony_ci info->gfx_level = GFX8; 859bf215546Sopenharmony_ci else if (info->ip[AMD_IP_GFX].ver_major == 7) 860bf215546Sopenharmony_ci info->gfx_level = GFX7; 861bf215546Sopenharmony_ci else if (info->ip[AMD_IP_GFX].ver_major == 6) 862bf215546Sopenharmony_ci info->gfx_level = GFX6; 863bf215546Sopenharmony_ci else { 864bf215546Sopenharmony_ci fprintf(stderr, "amdgpu: Unknown gfx version: %u.%u\n", 865bf215546Sopenharmony_ci info->ip[AMD_IP_GFX].ver_major, info->ip[AMD_IP_GFX].ver_minor); 866bf215546Sopenharmony_ci return false; 867bf215546Sopenharmony_ci } 868bf215546Sopenharmony_ci 869bf215546Sopenharmony_ci info->smart_access_memory = info->all_vram_visible && 870bf215546Sopenharmony_ci info->gfx_level >= GFX10_3 && 871bf215546Sopenharmony_ci util_get_cpu_caps()->family >= CPU_AMD_ZEN3 && 872bf215546Sopenharmony_ci util_get_cpu_caps()->family < CPU_AMD_LAST; 873bf215546Sopenharmony_ci 874bf215546Sopenharmony_ci info->family_id = device_info.family; 875bf215546Sopenharmony_ci info->chip_external_rev = device_info.external_rev; 876bf215546Sopenharmony_ci info->chip_rev = device_info.chip_rev; 877bf215546Sopenharmony_ci info->marketing_name = amdgpu_get_marketing_name(dev); 878bf215546Sopenharmony_ci info->is_pro_graphics = info->marketing_name && (strstr(info->marketing_name, "Pro") || 879bf215546Sopenharmony_ci strstr(info->marketing_name, "PRO") || 880bf215546Sopenharmony_ci strstr(info->marketing_name, "Frontier")); 881bf215546Sopenharmony_ci 882bf215546Sopenharmony_ci /* Set which chips have dedicated VRAM. */ 883bf215546Sopenharmony_ci info->has_dedicated_vram = !(device_info.ids_flags & AMDGPU_IDS_FLAGS_FUSION); 884bf215546Sopenharmony_ci 885bf215546Sopenharmony_ci /* The kernel can split large buffers in VRAM but not in GTT, so large 886bf215546Sopenharmony_ci * allocations can fail or cause buffer movement failures in the kernel. 887bf215546Sopenharmony_ci */ 888bf215546Sopenharmony_ci if (info->has_dedicated_vram) 889bf215546Sopenharmony_ci info->max_heap_size_kb = info->vram_size_kb; 890bf215546Sopenharmony_ci else 891bf215546Sopenharmony_ci info->max_heap_size_kb = info->gart_size_kb; 892bf215546Sopenharmony_ci 893bf215546Sopenharmony_ci info->vram_type = device_info.vram_type; 894bf215546Sopenharmony_ci info->memory_bus_width = device_info.vram_bit_width; 895bf215546Sopenharmony_ci 896bf215546Sopenharmony_ci /* Set which chips have uncached device memory. */ 897bf215546Sopenharmony_ci info->has_l2_uncached = info->gfx_level >= GFX9; 898bf215546Sopenharmony_ci 899bf215546Sopenharmony_ci /* Set hardware information. */ 900bf215546Sopenharmony_ci /* convert the shader/memory clocks from KHz to MHz */ 901bf215546Sopenharmony_ci info->max_gpu_freq_mhz = device_info.max_engine_clock / 1000; 902bf215546Sopenharmony_ci info->memory_freq_mhz_effective = info->memory_freq_mhz = device_info.max_memory_clock / 1000; 903bf215546Sopenharmony_ci info->max_tcc_blocks = device_info.num_tcc_blocks; 904bf215546Sopenharmony_ci info->max_se = device_info.num_shader_engines; 905bf215546Sopenharmony_ci info->max_sa_per_se = device_info.num_shader_arrays_per_engine; 906bf215546Sopenharmony_ci info->uvd_fw_version = info->ip[AMD_IP_UVD].num_queues ? uvd_version : 0; 907bf215546Sopenharmony_ci info->vce_fw_version = info->ip[AMD_IP_VCE].num_queues ? vce_version : 0; 908bf215546Sopenharmony_ci 909bf215546Sopenharmony_ci /* Based on MemoryOpsPerClockTable from PAL. */ 910bf215546Sopenharmony_ci switch (info->vram_type) { 911bf215546Sopenharmony_ci case AMDGPU_VRAM_TYPE_DDR2: 912bf215546Sopenharmony_ci case AMDGPU_VRAM_TYPE_DDR3: 913bf215546Sopenharmony_ci case AMDGPU_VRAM_TYPE_DDR4: /* same for LPDDR4 */ 914bf215546Sopenharmony_ci case AMDGPU_VRAM_TYPE_HBM: /* same for HBM2 and HBM3 */ 915bf215546Sopenharmony_ci info->memory_freq_mhz_effective *= 2; 916bf215546Sopenharmony_ci break; 917bf215546Sopenharmony_ci case AMDGPU_VRAM_TYPE_DDR5: /* same for LPDDR5 */ 918bf215546Sopenharmony_ci case AMDGPU_VRAM_TYPE_GDDR5: 919bf215546Sopenharmony_ci info->memory_freq_mhz_effective *= 4; 920bf215546Sopenharmony_ci break; 921bf215546Sopenharmony_ci case AMDGPU_VRAM_TYPE_GDDR6: 922bf215546Sopenharmony_ci info->memory_freq_mhz_effective *= 16; 923bf215546Sopenharmony_ci break; 924bf215546Sopenharmony_ci } 925bf215546Sopenharmony_ci 926bf215546Sopenharmony_ci /* unified ring */ 927bf215546Sopenharmony_ci info->has_video_hw.vcn_decode 928bf215546Sopenharmony_ci = info->family >= CHIP_GFX1100 929bf215546Sopenharmony_ci ? info->ip[AMD_IP_VCN_UNIFIED].num_queues != 0 930bf215546Sopenharmony_ci : info->ip[AMD_IP_VCN_DEC].num_queues != 0; 931bf215546Sopenharmony_ci info->has_userptr = true; 932bf215546Sopenharmony_ci info->has_syncobj = has_syncobj(fd); 933bf215546Sopenharmony_ci info->has_timeline_syncobj = has_timeline_syncobj(fd); 934bf215546Sopenharmony_ci info->has_fence_to_handle = info->has_syncobj && info->drm_minor >= 21; 935bf215546Sopenharmony_ci info->has_local_buffers = info->drm_minor >= 20; 936bf215546Sopenharmony_ci info->has_bo_metadata = true; 937bf215546Sopenharmony_ci info->has_eqaa_surface_allocator = info->gfx_level < GFX11; 938bf215546Sopenharmony_ci /* Disable sparse mappings on GFX6 due to VM faults in CP DMA. Enable them once 939bf215546Sopenharmony_ci * these faults are mitigated in software. 940bf215546Sopenharmony_ci */ 941bf215546Sopenharmony_ci info->has_sparse_vm_mappings = info->gfx_level >= GFX7; 942bf215546Sopenharmony_ci info->has_scheduled_fence_dependency = info->drm_minor >= 28; 943bf215546Sopenharmony_ci info->mid_command_buffer_preemption_enabled = device_info.ids_flags & AMDGPU_IDS_FLAGS_PREEMPTION; 944bf215546Sopenharmony_ci info->has_tmz_support = has_tmz_support(dev, info, device_info.ids_flags); 945bf215546Sopenharmony_ci info->kernel_has_modifiers = has_modifiers(fd); 946bf215546Sopenharmony_ci info->has_graphics = info->ip[AMD_IP_GFX].num_queues > 0; 947bf215546Sopenharmony_ci 948bf215546Sopenharmony_ci info->pa_sc_tile_steering_override = device_info.pa_sc_tile_steering_override; 949bf215546Sopenharmony_ci info->max_render_backends = device_info.num_rb_pipes; 950bf215546Sopenharmony_ci /* The value returned by the kernel driver was wrong. */ 951bf215546Sopenharmony_ci if (info->family == CHIP_KAVERI) 952bf215546Sopenharmony_ci info->max_render_backends = 2; 953bf215546Sopenharmony_ci 954bf215546Sopenharmony_ci info->clock_crystal_freq = device_info.gpu_counter_freq; 955bf215546Sopenharmony_ci if (!info->clock_crystal_freq) { 956bf215546Sopenharmony_ci fprintf(stderr, "amdgpu: clock crystal frequency is 0, timestamps will be wrong\n"); 957bf215546Sopenharmony_ci info->clock_crystal_freq = 1; 958bf215546Sopenharmony_ci } 959bf215546Sopenharmony_ci if (info->gfx_level >= GFX10) { 960bf215546Sopenharmony_ci info->tcc_cache_line_size = 128; 961bf215546Sopenharmony_ci 962bf215546Sopenharmony_ci if (info->drm_minor >= 35) { 963bf215546Sopenharmony_ci info->num_tcc_blocks = info->max_tcc_blocks - util_bitcount64(device_info.tcc_disabled_mask); 964bf215546Sopenharmony_ci } else { 965bf215546Sopenharmony_ci /* This is a hack, but it's all we can do without a kernel upgrade. */ 966bf215546Sopenharmony_ci info->num_tcc_blocks = info->vram_size_kb / (512 * 1024); 967bf215546Sopenharmony_ci if (info->num_tcc_blocks > info->max_tcc_blocks) 968bf215546Sopenharmony_ci info->num_tcc_blocks /= 2; 969bf215546Sopenharmony_ci } 970bf215546Sopenharmony_ci } else { 971bf215546Sopenharmony_ci if (!info->has_graphics && info->family >= CHIP_ALDEBARAN) 972bf215546Sopenharmony_ci info->tcc_cache_line_size = 128; 973bf215546Sopenharmony_ci else 974bf215546Sopenharmony_ci info->tcc_cache_line_size = 64; 975bf215546Sopenharmony_ci 976bf215546Sopenharmony_ci info->num_tcc_blocks = info->max_tcc_blocks; 977bf215546Sopenharmony_ci } 978bf215546Sopenharmony_ci 979bf215546Sopenharmony_ci info->tcc_rb_non_coherent = !util_is_power_of_two_or_zero(info->num_tcc_blocks); 980bf215546Sopenharmony_ci 981bf215546Sopenharmony_ci switch (info->family) { 982bf215546Sopenharmony_ci case CHIP_TAHITI: 983bf215546Sopenharmony_ci case CHIP_PITCAIRN: 984bf215546Sopenharmony_ci case CHIP_OLAND: 985bf215546Sopenharmony_ci case CHIP_HAWAII: 986bf215546Sopenharmony_ci case CHIP_KABINI: 987bf215546Sopenharmony_ci case CHIP_TONGA: 988bf215546Sopenharmony_ci case CHIP_STONEY: 989bf215546Sopenharmony_ci case CHIP_RAVEN2: 990bf215546Sopenharmony_ci info->l2_cache_size = info->num_tcc_blocks * 64 * 1024; 991bf215546Sopenharmony_ci break; 992bf215546Sopenharmony_ci case CHIP_VERDE: 993bf215546Sopenharmony_ci case CHIP_HAINAN: 994bf215546Sopenharmony_ci case CHIP_BONAIRE: 995bf215546Sopenharmony_ci case CHIP_KAVERI: 996bf215546Sopenharmony_ci case CHIP_ICELAND: 997bf215546Sopenharmony_ci case CHIP_CARRIZO: 998bf215546Sopenharmony_ci case CHIP_FIJI: 999bf215546Sopenharmony_ci case CHIP_POLARIS12: 1000bf215546Sopenharmony_ci case CHIP_VEGAM: 1001bf215546Sopenharmony_ci info->l2_cache_size = info->num_tcc_blocks * 128 * 1024; 1002bf215546Sopenharmony_ci break; 1003bf215546Sopenharmony_ci default: 1004bf215546Sopenharmony_ci info->l2_cache_size = info->num_tcc_blocks * 256 * 1024; 1005bf215546Sopenharmony_ci break; 1006bf215546Sopenharmony_ci case CHIP_REMBRANDT: 1007bf215546Sopenharmony_ci info->l2_cache_size = info->num_tcc_blocks * 512 * 1024; 1008bf215546Sopenharmony_ci break; 1009bf215546Sopenharmony_ci } 1010bf215546Sopenharmony_ci 1011bf215546Sopenharmony_ci info->l1_cache_size = 16384; 1012bf215546Sopenharmony_ci 1013bf215546Sopenharmony_ci info->mc_arb_ramcfg = amdinfo.mc_arb_ramcfg; 1014bf215546Sopenharmony_ci info->gb_addr_config = amdinfo.gb_addr_cfg; 1015bf215546Sopenharmony_ci if (info->gfx_level >= GFX9) { 1016bf215546Sopenharmony_ci info->num_tile_pipes = 1 << G_0098F8_NUM_PIPES(info->gb_addr_config); 1017bf215546Sopenharmony_ci info->pipe_interleave_bytes = 256 << G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config); 1018bf215546Sopenharmony_ci } else { 1019bf215546Sopenharmony_ci info->num_tile_pipes = cik_get_num_tile_pipes(&amdinfo); 1020bf215546Sopenharmony_ci info->pipe_interleave_bytes = 256 << G_0098F8_PIPE_INTERLEAVE_SIZE_GFX6(info->gb_addr_config); 1021bf215546Sopenharmony_ci } 1022bf215546Sopenharmony_ci info->r600_has_virtual_memory = true; 1023bf215546Sopenharmony_ci 1024bf215546Sopenharmony_ci /* LDS is 64KB per CU (4 SIMDs), which is 16KB per SIMD (usage above 1025bf215546Sopenharmony_ci * 16KB makes some SIMDs unoccupied). 1026bf215546Sopenharmony_ci * 1027bf215546Sopenharmony_ci * LDS is 128KB in WGP mode and 64KB in CU mode. Assume the WGP mode is used. 1028bf215546Sopenharmony_ci */ 1029bf215546Sopenharmony_ci info->lds_size_per_workgroup = info->gfx_level >= GFX10 ? 128 * 1024 : 64 * 1024; 1030bf215546Sopenharmony_ci /* lds_encode_granularity is the block size used for encoding registers. 1031bf215546Sopenharmony_ci * lds_alloc_granularity is what the hardware will align the LDS size to. 1032bf215546Sopenharmony_ci */ 1033bf215546Sopenharmony_ci info->lds_encode_granularity = info->gfx_level >= GFX7 ? 128 * 4 : 64 * 4; 1034bf215546Sopenharmony_ci info->lds_alloc_granularity = info->gfx_level >= GFX10_3 ? 256 * 4 : info->lds_encode_granularity; 1035bf215546Sopenharmony_ci 1036bf215546Sopenharmony_ci /* This is "align_mask" copied from the kernel, maximums of all IP versions. */ 1037bf215546Sopenharmony_ci info->ib_pad_dw_mask[AMD_IP_GFX] = 0xff; 1038bf215546Sopenharmony_ci info->ib_pad_dw_mask[AMD_IP_COMPUTE] = 0xff; 1039bf215546Sopenharmony_ci info->ib_pad_dw_mask[AMD_IP_SDMA] = 0xf; 1040bf215546Sopenharmony_ci info->ib_pad_dw_mask[AMD_IP_UVD] = 0xf; 1041bf215546Sopenharmony_ci info->ib_pad_dw_mask[AMD_IP_VCE] = 0x3f; 1042bf215546Sopenharmony_ci info->ib_pad_dw_mask[AMD_IP_UVD_ENC] = 0x3f; 1043bf215546Sopenharmony_ci info->ib_pad_dw_mask[AMD_IP_VCN_DEC] = 0xf; 1044bf215546Sopenharmony_ci info->ib_pad_dw_mask[AMD_IP_VCN_ENC] = 0x3f; 1045bf215546Sopenharmony_ci info->ib_pad_dw_mask[AMD_IP_VCN_JPEG] = 0xf; 1046bf215546Sopenharmony_ci 1047bf215546Sopenharmony_ci /* The mere presence of CLEAR_STATE in the IB causes random GPU hangs 1048bf215546Sopenharmony_ci * on GFX6. Some CLEAR_STATE cause asic hang on radeon kernel, etc. 1049bf215546Sopenharmony_ci * SPI_VS_OUT_CONFIG. So only enable GFX7 CLEAR_STATE on amdgpu kernel. 1050bf215546Sopenharmony_ci */ 1051bf215546Sopenharmony_ci info->has_clear_state = info->gfx_level >= GFX7; 1052bf215546Sopenharmony_ci 1053bf215546Sopenharmony_ci info->has_distributed_tess = 1054bf215546Sopenharmony_ci info->gfx_level >= GFX10 || (info->gfx_level >= GFX8 && info->max_se >= 2); 1055bf215546Sopenharmony_ci 1056bf215546Sopenharmony_ci info->has_dcc_constant_encode = 1057bf215546Sopenharmony_ci info->family == CHIP_RAVEN2 || info->family == CHIP_RENOIR || info->gfx_level >= GFX10; 1058bf215546Sopenharmony_ci 1059bf215546Sopenharmony_ci info->has_rbplus = info->family == CHIP_STONEY || info->gfx_level >= GFX9; 1060bf215546Sopenharmony_ci 1061bf215546Sopenharmony_ci /* Some chips have RB+ registers, but don't support RB+. Those must 1062bf215546Sopenharmony_ci * always disable it. 1063bf215546Sopenharmony_ci */ 1064bf215546Sopenharmony_ci info->rbplus_allowed = 1065bf215546Sopenharmony_ci info->has_rbplus && 1066bf215546Sopenharmony_ci (info->family == CHIP_STONEY || info->family == CHIP_VEGA12 || info->family == CHIP_RAVEN || 1067bf215546Sopenharmony_ci info->family == CHIP_RAVEN2 || info->family == CHIP_RENOIR || info->gfx_level >= GFX10_3); 1068bf215546Sopenharmony_ci 1069bf215546Sopenharmony_ci info->has_out_of_order_rast = 1070bf215546Sopenharmony_ci info->gfx_level >= GFX8 && info->gfx_level <= GFX9 && info->max_se >= 2; 1071bf215546Sopenharmony_ci 1072bf215546Sopenharmony_ci /* Whether chips support double rate packed math instructions. */ 1073bf215546Sopenharmony_ci info->has_packed_math_16bit = info->gfx_level >= GFX9; 1074bf215546Sopenharmony_ci 1075bf215546Sopenharmony_ci /* Whether chips support dot product instructions. A subset of these support a smaller 1076bf215546Sopenharmony_ci * instruction encoding which accumulates with the destination. 1077bf215546Sopenharmony_ci */ 1078bf215546Sopenharmony_ci info->has_accelerated_dot_product = 1079bf215546Sopenharmony_ci info->family == CHIP_ARCTURUS || info->family == CHIP_ALDEBARAN || 1080bf215546Sopenharmony_ci info->family == CHIP_VEGA20 || info->family >= CHIP_NAVI12; 1081bf215546Sopenharmony_ci 1082bf215546Sopenharmony_ci /* TODO: Figure out how to use LOAD_CONTEXT_REG on GFX6-GFX7. */ 1083bf215546Sopenharmony_ci info->has_load_ctx_reg_pkt = 1084bf215546Sopenharmony_ci info->gfx_level >= GFX9 || (info->gfx_level >= GFX8 && info->me_fw_feature >= 41); 1085bf215546Sopenharmony_ci 1086bf215546Sopenharmony_ci info->cpdma_prefetch_writes_memory = info->gfx_level <= GFX8; 1087bf215546Sopenharmony_ci 1088bf215546Sopenharmony_ci info->has_gfx9_scissor_bug = info->family == CHIP_VEGA10 || info->family == CHIP_RAVEN; 1089bf215546Sopenharmony_ci 1090bf215546Sopenharmony_ci info->has_tc_compat_zrange_bug = info->gfx_level >= GFX8 && info->gfx_level <= GFX9; 1091bf215546Sopenharmony_ci 1092bf215546Sopenharmony_ci info->has_msaa_sample_loc_bug = 1093bf215546Sopenharmony_ci (info->family >= CHIP_POLARIS10 && info->family <= CHIP_POLARIS12) || 1094bf215546Sopenharmony_ci info->family == CHIP_VEGA10 || info->family == CHIP_RAVEN; 1095bf215546Sopenharmony_ci 1096bf215546Sopenharmony_ci info->has_ls_vgpr_init_bug = info->family == CHIP_VEGA10 || info->family == CHIP_RAVEN; 1097bf215546Sopenharmony_ci 1098bf215546Sopenharmony_ci /* Drawing from 0-sized index buffers causes hangs on gfx10. */ 1099bf215546Sopenharmony_ci info->has_zero_index_buffer_bug = info->gfx_level == GFX10; 1100bf215546Sopenharmony_ci 1101bf215546Sopenharmony_ci /* Whether chips are affected by the image load/sample/gather hw bug when 1102bf215546Sopenharmony_ci * DCC is enabled (ie. WRITE_COMPRESS_ENABLE should be 0). 1103bf215546Sopenharmony_ci */ 1104bf215546Sopenharmony_ci info->has_image_load_dcc_bug = info->family == CHIP_NAVI23 || 1105bf215546Sopenharmony_ci info->family == CHIP_VANGOGH || 1106bf215546Sopenharmony_ci info->family == CHIP_REMBRANDT; 1107bf215546Sopenharmony_ci 1108bf215546Sopenharmony_ci /* DB has a bug when ITERATE_256 is set to 1 that can cause a hang. The 1109bf215546Sopenharmony_ci * workaround is to set DECOMPRESS_ON_Z_PLANES to 2 for 4X MSAA D/S images. 1110bf215546Sopenharmony_ci */ 1111bf215546Sopenharmony_ci info->has_two_planes_iterate256_bug = info->gfx_level == GFX10; 1112bf215546Sopenharmony_ci 1113bf215546Sopenharmony_ci /* GFX10+Navi21: NGG->legacy transitions require VGT_FLUSH. */ 1114bf215546Sopenharmony_ci info->has_vgt_flush_ngg_legacy_bug = info->gfx_level == GFX10 || 1115bf215546Sopenharmony_ci info->family == CHIP_NAVI21; 1116bf215546Sopenharmony_ci 1117bf215546Sopenharmony_ci /* HW bug workaround when CS threadgroups > 256 threads and async compute 1118bf215546Sopenharmony_ci * isn't used, i.e. only one compute job can run at a time. If async 1119bf215546Sopenharmony_ci * compute is possible, the threadgroup size must be limited to 256 threads 1120bf215546Sopenharmony_ci * on all queues to avoid the bug. 1121bf215546Sopenharmony_ci * Only GFX6 and certain GFX7 chips are affected. 1122bf215546Sopenharmony_ci * 1123bf215546Sopenharmony_ci * FIXME: RADV doesn't limit the number of threads for async compute. 1124bf215546Sopenharmony_ci */ 1125bf215546Sopenharmony_ci info->has_cs_regalloc_hang_bug = info->gfx_level == GFX6 || 1126bf215546Sopenharmony_ci info->family == CHIP_BONAIRE || 1127bf215546Sopenharmony_ci info->family == CHIP_KABINI; 1128bf215546Sopenharmony_ci 1129bf215546Sopenharmony_ci /* Support for GFX10.3 was added with F32_ME_FEATURE_VERSION_31 but the 1130bf215546Sopenharmony_ci * feature version wasn't bumped. 1131bf215546Sopenharmony_ci */ 1132bf215546Sopenharmony_ci info->has_32bit_predication = (info->gfx_level >= GFX10 && 1133bf215546Sopenharmony_ci info->me_fw_feature >= 32) || 1134bf215546Sopenharmony_ci (info->gfx_level == GFX9 && 1135bf215546Sopenharmony_ci info->me_fw_feature >= 52); 1136bf215546Sopenharmony_ci 1137bf215546Sopenharmony_ci info->has_export_conflict_bug = info->gfx_level == GFX11; 1138bf215546Sopenharmony_ci 1139bf215546Sopenharmony_ci /* Get the number of good compute units. */ 1140bf215546Sopenharmony_ci info->num_cu = 0; 1141bf215546Sopenharmony_ci for (i = 0; i < info->max_se; i++) { 1142bf215546Sopenharmony_ci for (j = 0; j < info->max_sa_per_se; j++) { 1143bf215546Sopenharmony_ci if (info->gfx_level >= GFX11) { 1144bf215546Sopenharmony_ci assert(info->max_sa_per_se <= 2); 1145bf215546Sopenharmony_ci info->cu_mask[i][j] = device_info.cu_bitmap[i % 4][(i / 4) * 2 + j]; 1146bf215546Sopenharmony_ci } else if (info->family == CHIP_ARCTURUS) { 1147bf215546Sopenharmony_ci /* The CU bitmap in amd gpu info structure is 1148bf215546Sopenharmony_ci * 4x4 size array, and it's usually suitable for Vega 1149bf215546Sopenharmony_ci * ASICs which has 4*2 SE/SA layout. 1150bf215546Sopenharmony_ci * But for Arcturus, SE/SA layout is changed to 8*1. 1151bf215546Sopenharmony_ci * To mostly reduce the impact, we make it compatible 1152bf215546Sopenharmony_ci * with current bitmap array as below: 1153bf215546Sopenharmony_ci * SE4 --> cu_bitmap[0][1] 1154bf215546Sopenharmony_ci * SE5 --> cu_bitmap[1][1] 1155bf215546Sopenharmony_ci * SE6 --> cu_bitmap[2][1] 1156bf215546Sopenharmony_ci * SE7 --> cu_bitmap[3][1] 1157bf215546Sopenharmony_ci */ 1158bf215546Sopenharmony_ci assert(info->max_sa_per_se == 1); 1159bf215546Sopenharmony_ci info->cu_mask[i][0] = device_info.cu_bitmap[i % 4][i / 4]; 1160bf215546Sopenharmony_ci } else { 1161bf215546Sopenharmony_ci info->cu_mask[i][j] = device_info.cu_bitmap[i][j]; 1162bf215546Sopenharmony_ci } 1163bf215546Sopenharmony_ci info->num_cu += util_bitcount(info->cu_mask[i][j]); 1164bf215546Sopenharmony_ci } 1165bf215546Sopenharmony_ci } 1166bf215546Sopenharmony_ci 1167bf215546Sopenharmony_ci /* Derive the number of enabled SEs from the CU mask. */ 1168bf215546Sopenharmony_ci if (info->gfx_level >= GFX10_3 && info->max_se > 1) { 1169bf215546Sopenharmony_ci info->num_se = 0; 1170bf215546Sopenharmony_ci 1171bf215546Sopenharmony_ci for (unsigned se = 0; se < info->max_se; se++) { 1172bf215546Sopenharmony_ci for (unsigned sa = 0; sa < info->max_sa_per_se; sa++) { 1173bf215546Sopenharmony_ci if (info->cu_mask[se][sa]) { 1174bf215546Sopenharmony_ci info->num_se++; 1175bf215546Sopenharmony_ci break; 1176bf215546Sopenharmony_ci } 1177bf215546Sopenharmony_ci } 1178bf215546Sopenharmony_ci } 1179bf215546Sopenharmony_ci } else { 1180bf215546Sopenharmony_ci /* GFX10 and older always enable all SEs because they don't support SE harvesting. */ 1181bf215546Sopenharmony_ci info->num_se = info->max_se; 1182bf215546Sopenharmony_ci } 1183bf215546Sopenharmony_ci 1184bf215546Sopenharmony_ci /* On GFX10, only whole WGPs (in units of 2 CUs) can be disabled, 1185bf215546Sopenharmony_ci * and max - min <= 2. 1186bf215546Sopenharmony_ci */ 1187bf215546Sopenharmony_ci unsigned cu_group = info->gfx_level >= GFX10 ? 2 : 1; 1188bf215546Sopenharmony_ci info->max_good_cu_per_sa = 1189bf215546Sopenharmony_ci DIV_ROUND_UP(info->num_cu, (info->num_se * info->max_sa_per_se * cu_group)) * 1190bf215546Sopenharmony_ci cu_group; 1191bf215546Sopenharmony_ci info->min_good_cu_per_sa = 1192bf215546Sopenharmony_ci (info->num_cu / (info->num_se * info->max_sa_per_se * cu_group)) * cu_group; 1193bf215546Sopenharmony_ci 1194bf215546Sopenharmony_ci memcpy(info->si_tile_mode_array, amdinfo.gb_tile_mode, sizeof(amdinfo.gb_tile_mode)); 1195bf215546Sopenharmony_ci info->enabled_rb_mask = amdinfo.enabled_rb_pipes_mask; 1196bf215546Sopenharmony_ci 1197bf215546Sopenharmony_ci memcpy(info->cik_macrotile_mode_array, amdinfo.gb_macro_tile_mode, 1198bf215546Sopenharmony_ci sizeof(amdinfo.gb_macro_tile_mode)); 1199bf215546Sopenharmony_ci 1200bf215546Sopenharmony_ci info->pte_fragment_size = alignment_info.size_local; 1201bf215546Sopenharmony_ci info->gart_page_size = alignment_info.size_remote; 1202bf215546Sopenharmony_ci 1203bf215546Sopenharmony_ci if (info->gfx_level == GFX6) 1204bf215546Sopenharmony_ci info->gfx_ib_pad_with_type2 = true; 1205bf215546Sopenharmony_ci 1206bf215546Sopenharmony_ci /* GFX10 and maybe GFX9 need this alignment for cache coherency. */ 1207bf215546Sopenharmony_ci if (info->gfx_level >= GFX9) 1208bf215546Sopenharmony_ci info->ib_alignment = MAX2(info->ib_alignment, info->tcc_cache_line_size); 1209bf215546Sopenharmony_ci 1210bf215546Sopenharmony_ci if ((info->drm_minor >= 31 && (info->family == CHIP_RAVEN || info->family == CHIP_RAVEN2 || 1211bf215546Sopenharmony_ci info->family == CHIP_RENOIR)) || 1212bf215546Sopenharmony_ci info->gfx_level >= GFX10_3) { 1213bf215546Sopenharmony_ci /* GFX10+ requires retiling in all cases. */ 1214bf215546Sopenharmony_ci if (info->max_render_backends == 1 && info->gfx_level == GFX9) 1215bf215546Sopenharmony_ci info->use_display_dcc_unaligned = true; 1216bf215546Sopenharmony_ci else 1217bf215546Sopenharmony_ci info->use_display_dcc_with_retile_blit = true; 1218bf215546Sopenharmony_ci } 1219bf215546Sopenharmony_ci 1220bf215546Sopenharmony_ci info->has_stable_pstate = info->drm_minor >= 45; 1221bf215546Sopenharmony_ci 1222bf215546Sopenharmony_ci if (info->gfx_level >= GFX11) { 1223bf215546Sopenharmony_ci info->pc_lines = 1024; 1224bf215546Sopenharmony_ci info->pbb_max_alloc_count = 255; /* minimum is 2, maximum is 256 */ 1225bf215546Sopenharmony_ci } else if (info->gfx_level >= GFX9 && info->has_graphics) { 1226bf215546Sopenharmony_ci unsigned pc_lines = 0; 1227bf215546Sopenharmony_ci 1228bf215546Sopenharmony_ci switch (info->family) { 1229bf215546Sopenharmony_ci case CHIP_VEGA10: 1230bf215546Sopenharmony_ci case CHIP_VEGA12: 1231bf215546Sopenharmony_ci case CHIP_VEGA20: 1232bf215546Sopenharmony_ci pc_lines = 2048; 1233bf215546Sopenharmony_ci break; 1234bf215546Sopenharmony_ci case CHIP_RAVEN: 1235bf215546Sopenharmony_ci case CHIP_RAVEN2: 1236bf215546Sopenharmony_ci case CHIP_RENOIR: 1237bf215546Sopenharmony_ci case CHIP_NAVI10: 1238bf215546Sopenharmony_ci case CHIP_NAVI12: 1239bf215546Sopenharmony_ci case CHIP_NAVI21: 1240bf215546Sopenharmony_ci case CHIP_NAVI22: 1241bf215546Sopenharmony_ci case CHIP_NAVI23: 1242bf215546Sopenharmony_ci pc_lines = 1024; 1243bf215546Sopenharmony_ci break; 1244bf215546Sopenharmony_ci case CHIP_NAVI14: 1245bf215546Sopenharmony_ci case CHIP_NAVI24: 1246bf215546Sopenharmony_ci pc_lines = 512; 1247bf215546Sopenharmony_ci break; 1248bf215546Sopenharmony_ci case CHIP_VANGOGH: 1249bf215546Sopenharmony_ci case CHIP_REMBRANDT: 1250bf215546Sopenharmony_ci case CHIP_GFX1036: 1251bf215546Sopenharmony_ci pc_lines = 256; 1252bf215546Sopenharmony_ci break; 1253bf215546Sopenharmony_ci default: 1254bf215546Sopenharmony_ci assert(0); 1255bf215546Sopenharmony_ci } 1256bf215546Sopenharmony_ci 1257bf215546Sopenharmony_ci info->pc_lines = pc_lines; 1258bf215546Sopenharmony_ci 1259bf215546Sopenharmony_ci if (info->gfx_level >= GFX10) { 1260bf215546Sopenharmony_ci info->pbb_max_alloc_count = pc_lines / 3; 1261bf215546Sopenharmony_ci } else { 1262bf215546Sopenharmony_ci info->pbb_max_alloc_count = MIN2(128, pc_lines / (4 * info->max_se)); 1263bf215546Sopenharmony_ci } 1264bf215546Sopenharmony_ci } 1265bf215546Sopenharmony_ci 1266bf215546Sopenharmony_ci if (info->gfx_level >= GFX10_3) 1267bf215546Sopenharmony_ci info->max_wave64_per_simd = 16; 1268bf215546Sopenharmony_ci else if (info->gfx_level == GFX10) 1269bf215546Sopenharmony_ci info->max_wave64_per_simd = 20; 1270bf215546Sopenharmony_ci else if (info->family >= CHIP_POLARIS10 && info->family <= CHIP_VEGAM) 1271bf215546Sopenharmony_ci info->max_wave64_per_simd = 8; 1272bf215546Sopenharmony_ci else 1273bf215546Sopenharmony_ci info->max_wave64_per_simd = 10; 1274bf215546Sopenharmony_ci 1275bf215546Sopenharmony_ci if (info->gfx_level >= GFX10) { 1276bf215546Sopenharmony_ci info->num_physical_sgprs_per_simd = 128 * info->max_wave64_per_simd; 1277bf215546Sopenharmony_ci info->min_sgpr_alloc = 128; 1278bf215546Sopenharmony_ci info->sgpr_alloc_granularity = 128; 1279bf215546Sopenharmony_ci } else if (info->gfx_level >= GFX8) { 1280bf215546Sopenharmony_ci info->num_physical_sgprs_per_simd = 800; 1281bf215546Sopenharmony_ci info->min_sgpr_alloc = 16; 1282bf215546Sopenharmony_ci info->sgpr_alloc_granularity = 16; 1283bf215546Sopenharmony_ci } else { 1284bf215546Sopenharmony_ci info->num_physical_sgprs_per_simd = 512; 1285bf215546Sopenharmony_ci info->min_sgpr_alloc = 8; 1286bf215546Sopenharmony_ci info->sgpr_alloc_granularity = 8; 1287bf215546Sopenharmony_ci } 1288bf215546Sopenharmony_ci 1289bf215546Sopenharmony_ci info->has_3d_cube_border_color_mipmap = info->has_graphics || info->family == CHIP_ARCTURUS; 1290bf215546Sopenharmony_ci info->never_stop_sq_perf_counters = info->gfx_level == GFX10 || 1291bf215546Sopenharmony_ci info->gfx_level == GFX10_3; 1292bf215546Sopenharmony_ci info->never_send_perfcounter_stop = info->gfx_level == GFX11; 1293bf215546Sopenharmony_ci info->has_sqtt_rb_harvest_bug = (info->family == CHIP_NAVI23 || 1294bf215546Sopenharmony_ci info->family == CHIP_NAVI24 || 1295bf215546Sopenharmony_ci info->family == CHIP_REMBRANDT || 1296bf215546Sopenharmony_ci info->family == CHIP_VANGOGH) && 1297bf215546Sopenharmony_ci util_bitcount(info->enabled_rb_mask) != 1298bf215546Sopenharmony_ci info->max_render_backends; 1299bf215546Sopenharmony_ci 1300bf215546Sopenharmony_ci /* On GFX10.3, the polarity of AUTO_FLUSH_MODE is inverted. */ 1301bf215546Sopenharmony_ci info->has_sqtt_auto_flush_mode_bug = info->gfx_level == GFX10_3; 1302bf215546Sopenharmony_ci 1303bf215546Sopenharmony_ci info->max_sgpr_alloc = info->family == CHIP_TONGA || info->family == CHIP_ICELAND ? 96 : 104; 1304bf215546Sopenharmony_ci 1305bf215546Sopenharmony_ci if (!info->has_graphics && info->family >= CHIP_ALDEBARAN) { 1306bf215546Sopenharmony_ci info->min_wave64_vgpr_alloc = 8; 1307bf215546Sopenharmony_ci info->max_vgpr_alloc = 512; 1308bf215546Sopenharmony_ci info->wave64_vgpr_alloc_granularity = 8; 1309bf215546Sopenharmony_ci } else { 1310bf215546Sopenharmony_ci info->min_wave64_vgpr_alloc = 4; 1311bf215546Sopenharmony_ci info->max_vgpr_alloc = 256; 1312bf215546Sopenharmony_ci info->wave64_vgpr_alloc_granularity = 4; 1313bf215546Sopenharmony_ci } 1314bf215546Sopenharmony_ci 1315bf215546Sopenharmony_ci info->num_physical_wave64_vgprs_per_simd = info->gfx_level >= GFX10 ? 512 : 256; 1316bf215546Sopenharmony_ci info->num_simd_per_compute_unit = info->gfx_level >= GFX10 ? 2 : 4; 1317bf215546Sopenharmony_ci 1318bf215546Sopenharmony_ci /* BIG_PAGE is supported since gfx10.3 and requires VRAM. VRAM is only guaranteed 1319bf215546Sopenharmony_ci * with AMDGPU_GEM_CREATE_DISCARDABLE. DISCARDABLE was added in DRM 3.47.0. 1320bf215546Sopenharmony_ci */ 1321bf215546Sopenharmony_ci info->discardable_allows_big_page = info->gfx_level >= GFX10_3 && 1322bf215546Sopenharmony_ci info->has_dedicated_vram && 1323bf215546Sopenharmony_ci info->drm_minor >= 47; 1324bf215546Sopenharmony_ci 1325bf215546Sopenharmony_ci /* The maximum number of scratch waves. The number is only a function of the number of CUs. 1326bf215546Sopenharmony_ci * It should be large enough to hold at least 1 threadgroup. Use the minimum per-SA CU count. 1327bf215546Sopenharmony_ci * 1328bf215546Sopenharmony_ci * We can decrease the number to make it fit into the infinity cache. 1329bf215546Sopenharmony_ci */ 1330bf215546Sopenharmony_ci const unsigned max_waves_per_tg = 32; /* 1024 threads in Wave32 */ 1331bf215546Sopenharmony_ci info->max_scratch_waves = MAX2(32 * info->min_good_cu_per_sa * info->max_sa_per_se * info->num_se, 1332bf215546Sopenharmony_ci max_waves_per_tg); 1333bf215546Sopenharmony_ci info->num_rb = util_bitcount(info->enabled_rb_mask); 1334bf215546Sopenharmony_ci info->max_gflops = info->num_cu * 128 * info->max_gpu_freq_mhz / 1000; 1335bf215546Sopenharmony_ci info->memory_bandwidth_gbps = DIV_ROUND_UP(info->memory_freq_mhz_effective * info->memory_bus_width / 8, 1000); 1336bf215546Sopenharmony_ci 1337bf215546Sopenharmony_ci if (info->gfx_level >= GFX10_3 && info->has_dedicated_vram) { 1338bf215546Sopenharmony_ci info->l3_cache_size_mb = info->num_tcc_blocks * 1339bf215546Sopenharmony_ci (info->family == CHIP_NAVI21 || 1340bf215546Sopenharmony_ci info->family == CHIP_NAVI22 ? 8 : 4); 1341bf215546Sopenharmony_ci } 1342bf215546Sopenharmony_ci 1343bf215546Sopenharmony_ci set_custom_cu_en_mask(info); 1344bf215546Sopenharmony_ci 1345bf215546Sopenharmony_ci const char *ib_filename = debug_get_option("AMD_PARSE_IB", NULL); 1346bf215546Sopenharmony_ci if (ib_filename) { 1347bf215546Sopenharmony_ci FILE *f = fopen(ib_filename, "r"); 1348bf215546Sopenharmony_ci if (f) { 1349bf215546Sopenharmony_ci fseek(f, 0, SEEK_END); 1350bf215546Sopenharmony_ci size_t size = ftell(f); 1351bf215546Sopenharmony_ci uint32_t *ib = (uint32_t *)malloc(size); 1352bf215546Sopenharmony_ci fseek(f, 0, SEEK_SET); 1353bf215546Sopenharmony_ci size_t n_read = fread(ib, 1, size, f); 1354bf215546Sopenharmony_ci fclose(f); 1355bf215546Sopenharmony_ci 1356bf215546Sopenharmony_ci if (n_read != size) { 1357bf215546Sopenharmony_ci fprintf(stderr, "failed to read %zu bytes from '%s'\n", size, ib_filename); 1358bf215546Sopenharmony_ci exit(1); 1359bf215546Sopenharmony_ci } 1360bf215546Sopenharmony_ci 1361bf215546Sopenharmony_ci ac_parse_ib(stdout, ib, size / 4, NULL, 0, "IB", info->gfx_level, NULL, NULL); 1362bf215546Sopenharmony_ci free(ib); 1363bf215546Sopenharmony_ci exit(0); 1364bf215546Sopenharmony_ci } 1365bf215546Sopenharmony_ci } 1366bf215546Sopenharmony_ci return true; 1367bf215546Sopenharmony_ci} 1368bf215546Sopenharmony_ci 1369bf215546Sopenharmony_civoid ac_compute_driver_uuid(char *uuid, size_t size) 1370bf215546Sopenharmony_ci{ 1371bf215546Sopenharmony_ci char amd_uuid[] = "AMD-MESA-DRV"; 1372bf215546Sopenharmony_ci 1373bf215546Sopenharmony_ci assert(size >= sizeof(amd_uuid)); 1374bf215546Sopenharmony_ci 1375bf215546Sopenharmony_ci memset(uuid, 0, size); 1376bf215546Sopenharmony_ci strncpy(uuid, amd_uuid, size); 1377bf215546Sopenharmony_ci} 1378bf215546Sopenharmony_ci 1379bf215546Sopenharmony_civoid ac_compute_device_uuid(struct radeon_info *info, char *uuid, size_t size) 1380bf215546Sopenharmony_ci{ 1381bf215546Sopenharmony_ci uint32_t *uint_uuid = (uint32_t *)uuid; 1382bf215546Sopenharmony_ci 1383bf215546Sopenharmony_ci assert(size >= sizeof(uint32_t) * 4); 1384bf215546Sopenharmony_ci 1385bf215546Sopenharmony_ci /** 1386bf215546Sopenharmony_ci * Use the device info directly instead of using a sha1. GL/VK UUIDs 1387bf215546Sopenharmony_ci * are 16 byte vs 20 byte for sha1, and the truncation that would be 1388bf215546Sopenharmony_ci * required would get rid of part of the little entropy we have. 1389bf215546Sopenharmony_ci * */ 1390bf215546Sopenharmony_ci memset(uuid, 0, size); 1391bf215546Sopenharmony_ci uint_uuid[0] = info->pci_domain; 1392bf215546Sopenharmony_ci uint_uuid[1] = info->pci_bus; 1393bf215546Sopenharmony_ci uint_uuid[2] = info->pci_dev; 1394bf215546Sopenharmony_ci uint_uuid[3] = info->pci_func; 1395bf215546Sopenharmony_ci} 1396bf215546Sopenharmony_ci 1397bf215546Sopenharmony_civoid ac_print_gpu_info(struct radeon_info *info, FILE *f) 1398bf215546Sopenharmony_ci{ 1399bf215546Sopenharmony_ci fprintf(f, "Device info:\n"); 1400bf215546Sopenharmony_ci fprintf(f, " name = %s\n", info->name); 1401bf215546Sopenharmony_ci fprintf(f, " marketing_name = %s\n", info->marketing_name); 1402bf215546Sopenharmony_ci fprintf(f, " num_se = %i\n", info->num_se); 1403bf215546Sopenharmony_ci fprintf(f, " num_rb = %i\n", info->num_rb); 1404bf215546Sopenharmony_ci fprintf(f, " num_cu = %i\n", info->num_cu); 1405bf215546Sopenharmony_ci fprintf(f, " max_gpu_freq = %i MHz\n", info->max_gpu_freq_mhz); 1406bf215546Sopenharmony_ci fprintf(f, " max_gflops = %u GFLOPS\n", info->max_gflops); 1407bf215546Sopenharmony_ci 1408bf215546Sopenharmony_ci if (info->gfx_level >= GFX10) { 1409bf215546Sopenharmony_ci fprintf(f, " l0_cache_size = %i KB\n", DIV_ROUND_UP(info->l1_cache_size, 1024)); 1410bf215546Sopenharmony_ci fprintf(f, " l1_cache_size = %i KB\n", 128); 1411bf215546Sopenharmony_ci } else { 1412bf215546Sopenharmony_ci fprintf(f, " l1_cache_size = %i KB\n", DIV_ROUND_UP(info->l1_cache_size, 1024)); 1413bf215546Sopenharmony_ci } 1414bf215546Sopenharmony_ci 1415bf215546Sopenharmony_ci fprintf(f, " l2_cache_size = %i KB\n", DIV_ROUND_UP(info->l2_cache_size, 1024)); 1416bf215546Sopenharmony_ci 1417bf215546Sopenharmony_ci if (info->l3_cache_size_mb) 1418bf215546Sopenharmony_ci fprintf(f, " l3_cache_size = %i MB\n", info->l3_cache_size_mb); 1419bf215546Sopenharmony_ci 1420bf215546Sopenharmony_ci fprintf(f, " memory_channels = %u (TCC blocks)\n", info->num_tcc_blocks); 1421bf215546Sopenharmony_ci fprintf(f, " memory_size = %u GB (%u MB)\n", 1422bf215546Sopenharmony_ci DIV_ROUND_UP(info->vram_size_kb, (1024 * 1024)), 1423bf215546Sopenharmony_ci DIV_ROUND_UP(info->vram_size_kb, 1024)); 1424bf215546Sopenharmony_ci fprintf(f, " memory_freq = %u GHz\n", DIV_ROUND_UP(info->memory_freq_mhz_effective, 1000)); 1425bf215546Sopenharmony_ci fprintf(f, " memory_bus_width = %u bits\n", info->memory_bus_width); 1426bf215546Sopenharmony_ci fprintf(f, " memory_bandwidth = %u GB/s\n", info->memory_bandwidth_gbps); 1427bf215546Sopenharmony_ci fprintf(f, " clock_crystal_freq = %i KHz\n", info->clock_crystal_freq); 1428bf215546Sopenharmony_ci 1429bf215546Sopenharmony_ci const char *ip_string[] = { 1430bf215546Sopenharmony_ci [AMD_IP_GFX] = "GFX", 1431bf215546Sopenharmony_ci [AMD_IP_COMPUTE] = "COMP", 1432bf215546Sopenharmony_ci [AMD_IP_SDMA] = "SDMA", 1433bf215546Sopenharmony_ci [AMD_IP_UVD] = "UVD", 1434bf215546Sopenharmony_ci [AMD_IP_VCE] = "VCE", 1435bf215546Sopenharmony_ci [AMD_IP_UVD_ENC] = "UVD_ENC", 1436bf215546Sopenharmony_ci [AMD_IP_VCN_DEC] = "VCN_DEC", 1437bf215546Sopenharmony_ci [AMD_IP_VCN_ENC] = info->family >= CHIP_GFX1100 ? "VCN" : "VCN_ENC", 1438bf215546Sopenharmony_ci [AMD_IP_VCN_JPEG] = "VCN_JPG", 1439bf215546Sopenharmony_ci }; 1440bf215546Sopenharmony_ci 1441bf215546Sopenharmony_ci for (unsigned i = 0; i < AMD_NUM_IP_TYPES; i++) { 1442bf215546Sopenharmony_ci if (info->ip[i].num_queues) { 1443bf215546Sopenharmony_ci fprintf(f, " IP %-7s %2u.%u \tqueues:%u\n", ip_string[i], 1444bf215546Sopenharmony_ci info->ip[i].ver_major, info->ip[i].ver_minor, info->ip[i].num_queues); 1445bf215546Sopenharmony_ci } 1446bf215546Sopenharmony_ci } 1447bf215546Sopenharmony_ci 1448bf215546Sopenharmony_ci fprintf(f, "Identification:\n"); 1449bf215546Sopenharmony_ci fprintf(f, " pci (domain:bus:dev.func): %04x:%02x:%02x.%x\n", info->pci_domain, info->pci_bus, 1450bf215546Sopenharmony_ci info->pci_dev, info->pci_func); 1451bf215546Sopenharmony_ci fprintf(f, " pci_id = 0x%x\n", info->pci_id); 1452bf215546Sopenharmony_ci fprintf(f, " pci_rev_id = 0x%x\n", info->pci_rev_id); 1453bf215546Sopenharmony_ci fprintf(f, " family = %i\n", info->family); 1454bf215546Sopenharmony_ci fprintf(f, " gfx_level = %i\n", info->gfx_level); 1455bf215546Sopenharmony_ci fprintf(f, " family_id = %i\n", info->family_id); 1456bf215546Sopenharmony_ci fprintf(f, " chip_external_rev = %i\n", info->chip_external_rev); 1457bf215546Sopenharmony_ci fprintf(f, " chip_rev = %i\n", info->chip_rev); 1458bf215546Sopenharmony_ci 1459bf215546Sopenharmony_ci fprintf(f, "Flags:\n"); 1460bf215546Sopenharmony_ci fprintf(f, " is_pro_graphics = %u\n", info->is_pro_graphics); 1461bf215546Sopenharmony_ci fprintf(f, " has_graphics = %i\n", info->has_graphics); 1462bf215546Sopenharmony_ci fprintf(f, " has_clear_state = %u\n", info->has_clear_state); 1463bf215546Sopenharmony_ci fprintf(f, " has_distributed_tess = %u\n", info->has_distributed_tess); 1464bf215546Sopenharmony_ci fprintf(f, " has_dcc_constant_encode = %u\n", info->has_dcc_constant_encode); 1465bf215546Sopenharmony_ci fprintf(f, " has_rbplus = %u\n", info->has_rbplus); 1466bf215546Sopenharmony_ci fprintf(f, " rbplus_allowed = %u\n", info->rbplus_allowed); 1467bf215546Sopenharmony_ci fprintf(f, " has_load_ctx_reg_pkt = %u\n", info->has_load_ctx_reg_pkt); 1468bf215546Sopenharmony_ci fprintf(f, " has_out_of_order_rast = %u\n", info->has_out_of_order_rast); 1469bf215546Sopenharmony_ci fprintf(f, " cpdma_prefetch_writes_memory = %u\n", info->cpdma_prefetch_writes_memory); 1470bf215546Sopenharmony_ci fprintf(f, " has_gfx9_scissor_bug = %i\n", info->has_gfx9_scissor_bug); 1471bf215546Sopenharmony_ci fprintf(f, " has_tc_compat_zrange_bug = %i\n", info->has_tc_compat_zrange_bug); 1472bf215546Sopenharmony_ci fprintf(f, " has_msaa_sample_loc_bug = %i\n", info->has_msaa_sample_loc_bug); 1473bf215546Sopenharmony_ci fprintf(f, " has_ls_vgpr_init_bug = %i\n", info->has_ls_vgpr_init_bug); 1474bf215546Sopenharmony_ci fprintf(f, " has_32bit_predication = %i\n", info->has_32bit_predication); 1475bf215546Sopenharmony_ci fprintf(f, " has_3d_cube_border_color_mipmap = %i\n", info->has_3d_cube_border_color_mipmap); 1476bf215546Sopenharmony_ci fprintf(f, " never_stop_sq_perf_counters = %i\n", info->never_stop_sq_perf_counters); 1477bf215546Sopenharmony_ci fprintf(f, " has_sqtt_rb_harvest_bug = %i\n", info->has_sqtt_rb_harvest_bug); 1478bf215546Sopenharmony_ci fprintf(f, " has_sqtt_auto_flush_mode_bug = %i\n", info->has_sqtt_auto_flush_mode_bug); 1479bf215546Sopenharmony_ci fprintf(f, " never_send_perfcounter_stop = %i\n", info->never_send_perfcounter_stop); 1480bf215546Sopenharmony_ci fprintf(f, " discardable_allows_big_page = %i\n", info->discardable_allows_big_page); 1481bf215546Sopenharmony_ci 1482bf215546Sopenharmony_ci fprintf(f, "Display features:\n"); 1483bf215546Sopenharmony_ci fprintf(f, " use_display_dcc_unaligned = %u\n", info->use_display_dcc_unaligned); 1484bf215546Sopenharmony_ci fprintf(f, " use_display_dcc_with_retile_blit = %u\n", info->use_display_dcc_with_retile_blit); 1485bf215546Sopenharmony_ci 1486bf215546Sopenharmony_ci fprintf(f, "Memory info:\n"); 1487bf215546Sopenharmony_ci fprintf(f, " pte_fragment_size = %u\n", info->pte_fragment_size); 1488bf215546Sopenharmony_ci fprintf(f, " gart_page_size = %u\n", info->gart_page_size); 1489bf215546Sopenharmony_ci fprintf(f, " gart_size = %i MB\n", (int)DIV_ROUND_UP(info->gart_size_kb, 1024)); 1490bf215546Sopenharmony_ci fprintf(f, " vram_size = %i MB\n", (int)DIV_ROUND_UP(info->vram_size_kb, 1024)); 1491bf215546Sopenharmony_ci fprintf(f, " vram_vis_size = %i MB\n", (int)DIV_ROUND_UP(info->vram_vis_size_kb, 1024)); 1492bf215546Sopenharmony_ci fprintf(f, " vram_type = %i\n", info->vram_type); 1493bf215546Sopenharmony_ci fprintf(f, " max_heap_size_kb = %i MB\n", (int)DIV_ROUND_UP(info->max_heap_size_kb, 1024)); 1494bf215546Sopenharmony_ci fprintf(f, " min_alloc_size = %u\n", info->min_alloc_size); 1495bf215546Sopenharmony_ci fprintf(f, " address32_hi = 0x%x\n", info->address32_hi); 1496bf215546Sopenharmony_ci fprintf(f, " has_dedicated_vram = %u\n", info->has_dedicated_vram); 1497bf215546Sopenharmony_ci fprintf(f, " all_vram_visible = %u\n", info->all_vram_visible); 1498bf215546Sopenharmony_ci fprintf(f, " smart_access_memory = %u\n", info->smart_access_memory); 1499bf215546Sopenharmony_ci fprintf(f, " max_tcc_blocks = %i\n", info->max_tcc_blocks); 1500bf215546Sopenharmony_ci fprintf(f, " tcc_cache_line_size = %u\n", info->tcc_cache_line_size); 1501bf215546Sopenharmony_ci fprintf(f, " tcc_rb_non_coherent = %u\n", info->tcc_rb_non_coherent); 1502bf215546Sopenharmony_ci fprintf(f, " pc_lines = %u\n", info->pc_lines); 1503bf215546Sopenharmony_ci fprintf(f, " lds_size_per_workgroup = %u\n", info->lds_size_per_workgroup); 1504bf215546Sopenharmony_ci fprintf(f, " lds_alloc_granularity = %i\n", info->lds_alloc_granularity); 1505bf215546Sopenharmony_ci fprintf(f, " lds_encode_granularity = %i\n", info->lds_encode_granularity); 1506bf215546Sopenharmony_ci fprintf(f, " max_memory_clock = %i MHz\n", info->memory_freq_mhz); 1507bf215546Sopenharmony_ci 1508bf215546Sopenharmony_ci fprintf(f, "CP info:\n"); 1509bf215546Sopenharmony_ci fprintf(f, " gfx_ib_pad_with_type2 = %i\n", info->gfx_ib_pad_with_type2); 1510bf215546Sopenharmony_ci fprintf(f, " ib_alignment = %u\n", info->ib_alignment); 1511bf215546Sopenharmony_ci fprintf(f, " me_fw_version = %i\n", info->me_fw_version); 1512bf215546Sopenharmony_ci fprintf(f, " me_fw_feature = %i\n", info->me_fw_feature); 1513bf215546Sopenharmony_ci fprintf(f, " mec_fw_version = %i\n", info->mec_fw_version); 1514bf215546Sopenharmony_ci fprintf(f, " mec_fw_feature = %i\n", info->mec_fw_feature); 1515bf215546Sopenharmony_ci fprintf(f, " pfp_fw_version = %i\n", info->pfp_fw_version); 1516bf215546Sopenharmony_ci fprintf(f, " pfp_fw_feature = %i\n", info->pfp_fw_feature); 1517bf215546Sopenharmony_ci 1518bf215546Sopenharmony_ci fprintf(f, "Multimedia info:\n"); 1519bf215546Sopenharmony_ci fprintf(f, " vce_encode = %u\n", info->ip[AMD_IP_VCE].num_queues); 1520bf215546Sopenharmony_ci 1521bf215546Sopenharmony_ci if (info->family >= CHIP_GFX1100) 1522bf215546Sopenharmony_ci fprintf(f, " vcn_unified = %u\n", info->has_video_hw.vcn_decode); 1523bf215546Sopenharmony_ci else { 1524bf215546Sopenharmony_ci fprintf(f, " vcn_decode = %u\n", info->has_video_hw.vcn_decode); 1525bf215546Sopenharmony_ci fprintf(f, " vcn_encode = %u\n", info->ip[AMD_IP_VCN_ENC].num_queues); 1526bf215546Sopenharmony_ci } 1527bf215546Sopenharmony_ci 1528bf215546Sopenharmony_ci fprintf(f, " uvd_fw_version = %u\n", info->uvd_fw_version); 1529bf215546Sopenharmony_ci fprintf(f, " vce_fw_version = %u\n", info->vce_fw_version); 1530bf215546Sopenharmony_ci fprintf(f, " vce_harvest_config = %i\n", info->vce_harvest_config); 1531bf215546Sopenharmony_ci 1532bf215546Sopenharmony_ci fprintf(f, "Kernel & winsys capabilities:\n"); 1533bf215546Sopenharmony_ci fprintf(f, " drm = %i.%i.%i\n", info->drm_major, info->drm_minor, info->drm_patchlevel); 1534bf215546Sopenharmony_ci fprintf(f, " has_userptr = %i\n", info->has_userptr); 1535bf215546Sopenharmony_ci fprintf(f, " has_syncobj = %u\n", info->has_syncobj); 1536bf215546Sopenharmony_ci fprintf(f, " has_timeline_syncobj = %u\n", info->has_timeline_syncobj); 1537bf215546Sopenharmony_ci fprintf(f, " has_fence_to_handle = %u\n", info->has_fence_to_handle); 1538bf215546Sopenharmony_ci fprintf(f, " has_local_buffers = %u\n", info->has_local_buffers); 1539bf215546Sopenharmony_ci fprintf(f, " has_bo_metadata = %u\n", info->has_bo_metadata); 1540bf215546Sopenharmony_ci fprintf(f, " has_eqaa_surface_allocator = %u\n", info->has_eqaa_surface_allocator); 1541bf215546Sopenharmony_ci fprintf(f, " has_sparse_vm_mappings = %u\n", info->has_sparse_vm_mappings); 1542bf215546Sopenharmony_ci fprintf(f, " has_stable_pstate = %u\n", info->has_stable_pstate); 1543bf215546Sopenharmony_ci fprintf(f, " has_scheduled_fence_dependency = %u\n", info->has_scheduled_fence_dependency); 1544bf215546Sopenharmony_ci fprintf(f, " mid_command_buffer_preemption_enabled = %u\n", 1545bf215546Sopenharmony_ci info->mid_command_buffer_preemption_enabled); 1546bf215546Sopenharmony_ci fprintf(f, " has_tmz_support = %u\n", info->has_tmz_support); 1547bf215546Sopenharmony_ci 1548bf215546Sopenharmony_ci fprintf(f, "Shader core info:\n"); 1549bf215546Sopenharmony_ci for (unsigned i = 0; i < info->max_se; i++) { 1550bf215546Sopenharmony_ci for (unsigned j = 0; j < info->max_sa_per_se; j++) { 1551bf215546Sopenharmony_ci fprintf(f, " cu_mask[SE%u][SA%u] = 0x%x \t(%u)\tCU_EN = 0x%x\n", i, j, 1552bf215546Sopenharmony_ci info->cu_mask[i][j], util_bitcount(info->cu_mask[i][j]), 1553bf215546Sopenharmony_ci info->spi_cu_en & BITFIELD_MASK(util_bitcount(info->cu_mask[i][j]))); 1554bf215546Sopenharmony_ci } 1555bf215546Sopenharmony_ci } 1556bf215546Sopenharmony_ci fprintf(f, " spi_cu_en_has_effect = %i\n", info->spi_cu_en_has_effect); 1557bf215546Sopenharmony_ci fprintf(f, " max_good_cu_per_sa = %i\n", info->max_good_cu_per_sa); 1558bf215546Sopenharmony_ci fprintf(f, " min_good_cu_per_sa = %i\n", info->min_good_cu_per_sa); 1559bf215546Sopenharmony_ci fprintf(f, " max_se = %i\n", info->max_se); 1560bf215546Sopenharmony_ci fprintf(f, " max_sa_per_se = %i\n", info->max_sa_per_se); 1561bf215546Sopenharmony_ci fprintf(f, " max_wave64_per_simd = %i\n", info->max_wave64_per_simd); 1562bf215546Sopenharmony_ci fprintf(f, " num_physical_sgprs_per_simd = %i\n", info->num_physical_sgprs_per_simd); 1563bf215546Sopenharmony_ci fprintf(f, " num_physical_wave64_vgprs_per_simd = %i\n", 1564bf215546Sopenharmony_ci info->num_physical_wave64_vgprs_per_simd); 1565bf215546Sopenharmony_ci fprintf(f, " num_simd_per_compute_unit = %i\n", info->num_simd_per_compute_unit); 1566bf215546Sopenharmony_ci fprintf(f, " min_sgpr_alloc = %i\n", info->min_sgpr_alloc); 1567bf215546Sopenharmony_ci fprintf(f, " max_sgpr_alloc = %i\n", info->max_sgpr_alloc); 1568bf215546Sopenharmony_ci fprintf(f, " sgpr_alloc_granularity = %i\n", info->sgpr_alloc_granularity); 1569bf215546Sopenharmony_ci fprintf(f, " min_wave64_vgpr_alloc = %i\n", info->min_wave64_vgpr_alloc); 1570bf215546Sopenharmony_ci fprintf(f, " max_vgpr_alloc = %i\n", info->max_vgpr_alloc); 1571bf215546Sopenharmony_ci fprintf(f, " wave64_vgpr_alloc_granularity = %i\n", info->wave64_vgpr_alloc_granularity); 1572bf215546Sopenharmony_ci fprintf(f, " max_scratch_waves = %i\n", info->max_scratch_waves); 1573bf215546Sopenharmony_ci 1574bf215546Sopenharmony_ci fprintf(f, "Render backend info:\n"); 1575bf215546Sopenharmony_ci fprintf(f, " pa_sc_tile_steering_override = 0x%x\n", info->pa_sc_tile_steering_override); 1576bf215546Sopenharmony_ci fprintf(f, " max_render_backends = %i\n", info->max_render_backends); 1577bf215546Sopenharmony_ci fprintf(f, " num_tile_pipes = %i\n", info->num_tile_pipes); 1578bf215546Sopenharmony_ci fprintf(f, " pipe_interleave_bytes = %i\n", info->pipe_interleave_bytes); 1579bf215546Sopenharmony_ci fprintf(f, " enabled_rb_mask = 0x%x\n", info->enabled_rb_mask); 1580bf215546Sopenharmony_ci fprintf(f, " max_alignment = %u\n", (unsigned)info->max_alignment); 1581bf215546Sopenharmony_ci fprintf(f, " pbb_max_alloc_count = %u\n", info->pbb_max_alloc_count); 1582bf215546Sopenharmony_ci 1583bf215546Sopenharmony_ci fprintf(f, "GB_ADDR_CONFIG: 0x%08x\n", info->gb_addr_config); 1584bf215546Sopenharmony_ci if (info->gfx_level >= GFX10) { 1585bf215546Sopenharmony_ci fprintf(f, " num_pipes = %u\n", 1 << G_0098F8_NUM_PIPES(info->gb_addr_config)); 1586bf215546Sopenharmony_ci fprintf(f, " pipe_interleave_size = %u\n", 1587bf215546Sopenharmony_ci 256 << G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config)); 1588bf215546Sopenharmony_ci fprintf(f, " max_compressed_frags = %u\n", 1589bf215546Sopenharmony_ci 1 << G_0098F8_MAX_COMPRESSED_FRAGS(info->gb_addr_config)); 1590bf215546Sopenharmony_ci if (info->gfx_level >= GFX10_3) 1591bf215546Sopenharmony_ci fprintf(f, " num_pkrs = %u\n", 1 << G_0098F8_NUM_PKRS(info->gb_addr_config)); 1592bf215546Sopenharmony_ci } else if (info->gfx_level == GFX9) { 1593bf215546Sopenharmony_ci fprintf(f, " num_pipes = %u\n", 1 << G_0098F8_NUM_PIPES(info->gb_addr_config)); 1594bf215546Sopenharmony_ci fprintf(f, " pipe_interleave_size = %u\n", 1595bf215546Sopenharmony_ci 256 << G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config)); 1596bf215546Sopenharmony_ci fprintf(f, " max_compressed_frags = %u\n", 1597bf215546Sopenharmony_ci 1 << G_0098F8_MAX_COMPRESSED_FRAGS(info->gb_addr_config)); 1598bf215546Sopenharmony_ci fprintf(f, " bank_interleave_size = %u\n", 1599bf215546Sopenharmony_ci 1 << G_0098F8_BANK_INTERLEAVE_SIZE(info->gb_addr_config)); 1600bf215546Sopenharmony_ci fprintf(f, " num_banks = %u\n", 1 << G_0098F8_NUM_BANKS(info->gb_addr_config)); 1601bf215546Sopenharmony_ci fprintf(f, " shader_engine_tile_size = %u\n", 1602bf215546Sopenharmony_ci 16 << G_0098F8_SHADER_ENGINE_TILE_SIZE(info->gb_addr_config)); 1603bf215546Sopenharmony_ci fprintf(f, " num_shader_engines = %u\n", 1604bf215546Sopenharmony_ci 1 << G_0098F8_NUM_SHADER_ENGINES_GFX9(info->gb_addr_config)); 1605bf215546Sopenharmony_ci fprintf(f, " num_gpus = %u (raw)\n", G_0098F8_NUM_GPUS_GFX9(info->gb_addr_config)); 1606bf215546Sopenharmony_ci fprintf(f, " multi_gpu_tile_size = %u (raw)\n", 1607bf215546Sopenharmony_ci G_0098F8_MULTI_GPU_TILE_SIZE(info->gb_addr_config)); 1608bf215546Sopenharmony_ci fprintf(f, " num_rb_per_se = %u\n", 1 << G_0098F8_NUM_RB_PER_SE(info->gb_addr_config)); 1609bf215546Sopenharmony_ci fprintf(f, " row_size = %u\n", 1024 << G_0098F8_ROW_SIZE(info->gb_addr_config)); 1610bf215546Sopenharmony_ci fprintf(f, " num_lower_pipes = %u (raw)\n", G_0098F8_NUM_LOWER_PIPES(info->gb_addr_config)); 1611bf215546Sopenharmony_ci fprintf(f, " se_enable = %u (raw)\n", G_0098F8_SE_ENABLE(info->gb_addr_config)); 1612bf215546Sopenharmony_ci } else { 1613bf215546Sopenharmony_ci fprintf(f, " num_pipes = %u\n", 1 << G_0098F8_NUM_PIPES(info->gb_addr_config)); 1614bf215546Sopenharmony_ci fprintf(f, " pipe_interleave_size = %u\n", 1615bf215546Sopenharmony_ci 256 << G_0098F8_PIPE_INTERLEAVE_SIZE_GFX6(info->gb_addr_config)); 1616bf215546Sopenharmony_ci fprintf(f, " bank_interleave_size = %u\n", 1617bf215546Sopenharmony_ci 1 << G_0098F8_BANK_INTERLEAVE_SIZE(info->gb_addr_config)); 1618bf215546Sopenharmony_ci fprintf(f, " num_shader_engines = %u\n", 1619bf215546Sopenharmony_ci 1 << G_0098F8_NUM_SHADER_ENGINES_GFX6(info->gb_addr_config)); 1620bf215546Sopenharmony_ci fprintf(f, " shader_engine_tile_size = %u\n", 1621bf215546Sopenharmony_ci 16 << G_0098F8_SHADER_ENGINE_TILE_SIZE(info->gb_addr_config)); 1622bf215546Sopenharmony_ci fprintf(f, " num_gpus = %u (raw)\n", G_0098F8_NUM_GPUS_GFX6(info->gb_addr_config)); 1623bf215546Sopenharmony_ci fprintf(f, " multi_gpu_tile_size = %u (raw)\n", 1624bf215546Sopenharmony_ci G_0098F8_MULTI_GPU_TILE_SIZE(info->gb_addr_config)); 1625bf215546Sopenharmony_ci fprintf(f, " row_size = %u\n", 1024 << G_0098F8_ROW_SIZE(info->gb_addr_config)); 1626bf215546Sopenharmony_ci fprintf(f, " num_lower_pipes = %u (raw)\n", G_0098F8_NUM_LOWER_PIPES(info->gb_addr_config)); 1627bf215546Sopenharmony_ci } 1628bf215546Sopenharmony_ci} 1629bf215546Sopenharmony_ci 1630bf215546Sopenharmony_ciint ac_get_gs_table_depth(enum amd_gfx_level gfx_level, enum radeon_family family) 1631bf215546Sopenharmony_ci{ 1632bf215546Sopenharmony_ci if (gfx_level >= GFX9) 1633bf215546Sopenharmony_ci return -1; 1634bf215546Sopenharmony_ci 1635bf215546Sopenharmony_ci switch (family) { 1636bf215546Sopenharmony_ci case CHIP_OLAND: 1637bf215546Sopenharmony_ci case CHIP_HAINAN: 1638bf215546Sopenharmony_ci case CHIP_KAVERI: 1639bf215546Sopenharmony_ci case CHIP_KABINI: 1640bf215546Sopenharmony_ci case CHIP_ICELAND: 1641bf215546Sopenharmony_ci case CHIP_CARRIZO: 1642bf215546Sopenharmony_ci case CHIP_STONEY: 1643bf215546Sopenharmony_ci return 16; 1644bf215546Sopenharmony_ci case CHIP_TAHITI: 1645bf215546Sopenharmony_ci case CHIP_PITCAIRN: 1646bf215546Sopenharmony_ci case CHIP_VERDE: 1647bf215546Sopenharmony_ci case CHIP_BONAIRE: 1648bf215546Sopenharmony_ci case CHIP_HAWAII: 1649bf215546Sopenharmony_ci case CHIP_TONGA: 1650bf215546Sopenharmony_ci case CHIP_FIJI: 1651bf215546Sopenharmony_ci case CHIP_POLARIS10: 1652bf215546Sopenharmony_ci case CHIP_POLARIS11: 1653bf215546Sopenharmony_ci case CHIP_POLARIS12: 1654bf215546Sopenharmony_ci case CHIP_VEGAM: 1655bf215546Sopenharmony_ci return 32; 1656bf215546Sopenharmony_ci default: 1657bf215546Sopenharmony_ci unreachable("Unknown GPU"); 1658bf215546Sopenharmony_ci } 1659bf215546Sopenharmony_ci} 1660bf215546Sopenharmony_ci 1661bf215546Sopenharmony_civoid ac_get_raster_config(struct radeon_info *info, uint32_t *raster_config_p, 1662bf215546Sopenharmony_ci uint32_t *raster_config_1_p, uint32_t *se_tile_repeat_p) 1663bf215546Sopenharmony_ci{ 1664bf215546Sopenharmony_ci unsigned raster_config, raster_config_1, se_tile_repeat; 1665bf215546Sopenharmony_ci 1666bf215546Sopenharmony_ci switch (info->family) { 1667bf215546Sopenharmony_ci /* 1 SE / 1 RB */ 1668bf215546Sopenharmony_ci case CHIP_HAINAN: 1669bf215546Sopenharmony_ci case CHIP_KABINI: 1670bf215546Sopenharmony_ci case CHIP_STONEY: 1671bf215546Sopenharmony_ci raster_config = 0x00000000; 1672bf215546Sopenharmony_ci raster_config_1 = 0x00000000; 1673bf215546Sopenharmony_ci break; 1674bf215546Sopenharmony_ci /* 1 SE / 4 RBs */ 1675bf215546Sopenharmony_ci case CHIP_VERDE: 1676bf215546Sopenharmony_ci raster_config = 0x0000124a; 1677bf215546Sopenharmony_ci raster_config_1 = 0x00000000; 1678bf215546Sopenharmony_ci break; 1679bf215546Sopenharmony_ci /* 1 SE / 2 RBs (Oland is special) */ 1680bf215546Sopenharmony_ci case CHIP_OLAND: 1681bf215546Sopenharmony_ci raster_config = 0x00000082; 1682bf215546Sopenharmony_ci raster_config_1 = 0x00000000; 1683bf215546Sopenharmony_ci break; 1684bf215546Sopenharmony_ci /* 1 SE / 2 RBs */ 1685bf215546Sopenharmony_ci case CHIP_KAVERI: 1686bf215546Sopenharmony_ci case CHIP_ICELAND: 1687bf215546Sopenharmony_ci case CHIP_CARRIZO: 1688bf215546Sopenharmony_ci raster_config = 0x00000002; 1689bf215546Sopenharmony_ci raster_config_1 = 0x00000000; 1690bf215546Sopenharmony_ci break; 1691bf215546Sopenharmony_ci /* 2 SEs / 4 RBs */ 1692bf215546Sopenharmony_ci case CHIP_BONAIRE: 1693bf215546Sopenharmony_ci case CHIP_POLARIS11: 1694bf215546Sopenharmony_ci case CHIP_POLARIS12: 1695bf215546Sopenharmony_ci raster_config = 0x16000012; 1696bf215546Sopenharmony_ci raster_config_1 = 0x00000000; 1697bf215546Sopenharmony_ci break; 1698bf215546Sopenharmony_ci /* 2 SEs / 8 RBs */ 1699bf215546Sopenharmony_ci case CHIP_TAHITI: 1700bf215546Sopenharmony_ci case CHIP_PITCAIRN: 1701bf215546Sopenharmony_ci raster_config = 0x2a00126a; 1702bf215546Sopenharmony_ci raster_config_1 = 0x00000000; 1703bf215546Sopenharmony_ci break; 1704bf215546Sopenharmony_ci /* 4 SEs / 8 RBs */ 1705bf215546Sopenharmony_ci case CHIP_TONGA: 1706bf215546Sopenharmony_ci case CHIP_POLARIS10: 1707bf215546Sopenharmony_ci raster_config = 0x16000012; 1708bf215546Sopenharmony_ci raster_config_1 = 0x0000002a; 1709bf215546Sopenharmony_ci break; 1710bf215546Sopenharmony_ci /* 4 SEs / 16 RBs */ 1711bf215546Sopenharmony_ci case CHIP_HAWAII: 1712bf215546Sopenharmony_ci case CHIP_FIJI: 1713bf215546Sopenharmony_ci case CHIP_VEGAM: 1714bf215546Sopenharmony_ci raster_config = 0x3a00161a; 1715bf215546Sopenharmony_ci raster_config_1 = 0x0000002e; 1716bf215546Sopenharmony_ci break; 1717bf215546Sopenharmony_ci default: 1718bf215546Sopenharmony_ci fprintf(stderr, "ac: Unknown GPU, using 0 for raster_config\n"); 1719bf215546Sopenharmony_ci raster_config = 0x00000000; 1720bf215546Sopenharmony_ci raster_config_1 = 0x00000000; 1721bf215546Sopenharmony_ci break; 1722bf215546Sopenharmony_ci } 1723bf215546Sopenharmony_ci 1724bf215546Sopenharmony_ci /* drm/radeon on Kaveri is buggy, so disable 1 RB to work around it. 1725bf215546Sopenharmony_ci * This decreases performance by up to 50% when the RB is the bottleneck. 1726bf215546Sopenharmony_ci */ 1727bf215546Sopenharmony_ci if (info->family == CHIP_KAVERI && !info->is_amdgpu) 1728bf215546Sopenharmony_ci raster_config = 0x00000000; 1729bf215546Sopenharmony_ci 1730bf215546Sopenharmony_ci /* Fiji: Old kernels have incorrect tiling config. This decreases 1731bf215546Sopenharmony_ci * RB performance by 25%. (it disables 1 RB in the second packer) 1732bf215546Sopenharmony_ci */ 1733bf215546Sopenharmony_ci if (info->family == CHIP_FIJI && info->cik_macrotile_mode_array[0] == 0x000000e8) { 1734bf215546Sopenharmony_ci raster_config = 0x16000012; 1735bf215546Sopenharmony_ci raster_config_1 = 0x0000002a; 1736bf215546Sopenharmony_ci } 1737bf215546Sopenharmony_ci 1738bf215546Sopenharmony_ci unsigned se_width = 8 << G_028350_SE_XSEL_GFX6(raster_config); 1739bf215546Sopenharmony_ci unsigned se_height = 8 << G_028350_SE_YSEL_GFX6(raster_config); 1740bf215546Sopenharmony_ci 1741bf215546Sopenharmony_ci /* I don't know how to calculate this, though this is probably a good guess. */ 1742bf215546Sopenharmony_ci se_tile_repeat = MAX2(se_width, se_height) * info->max_se; 1743bf215546Sopenharmony_ci 1744bf215546Sopenharmony_ci *raster_config_p = raster_config; 1745bf215546Sopenharmony_ci *raster_config_1_p = raster_config_1; 1746bf215546Sopenharmony_ci if (se_tile_repeat_p) 1747bf215546Sopenharmony_ci *se_tile_repeat_p = se_tile_repeat; 1748bf215546Sopenharmony_ci} 1749bf215546Sopenharmony_ci 1750bf215546Sopenharmony_civoid ac_get_harvested_configs(struct radeon_info *info, unsigned raster_config, 1751bf215546Sopenharmony_ci unsigned *cik_raster_config_1_p, unsigned *raster_config_se) 1752bf215546Sopenharmony_ci{ 1753bf215546Sopenharmony_ci unsigned sh_per_se = MAX2(info->max_sa_per_se, 1); 1754bf215546Sopenharmony_ci unsigned num_se = MAX2(info->max_se, 1); 1755bf215546Sopenharmony_ci unsigned rb_mask = info->enabled_rb_mask; 1756bf215546Sopenharmony_ci unsigned num_rb = MIN2(info->max_render_backends, 16); 1757bf215546Sopenharmony_ci unsigned rb_per_pkr = MIN2(num_rb / num_se / sh_per_se, 2); 1758bf215546Sopenharmony_ci unsigned rb_per_se = num_rb / num_se; 1759bf215546Sopenharmony_ci unsigned se_mask[4]; 1760bf215546Sopenharmony_ci unsigned se; 1761bf215546Sopenharmony_ci 1762bf215546Sopenharmony_ci se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask; 1763bf215546Sopenharmony_ci se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask; 1764bf215546Sopenharmony_ci se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask; 1765bf215546Sopenharmony_ci se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask; 1766bf215546Sopenharmony_ci 1767bf215546Sopenharmony_ci assert(num_se == 1 || num_se == 2 || num_se == 4); 1768bf215546Sopenharmony_ci assert(sh_per_se == 1 || sh_per_se == 2); 1769bf215546Sopenharmony_ci assert(rb_per_pkr == 1 || rb_per_pkr == 2); 1770bf215546Sopenharmony_ci 1771bf215546Sopenharmony_ci if (info->gfx_level >= GFX7) { 1772bf215546Sopenharmony_ci unsigned raster_config_1 = *cik_raster_config_1_p; 1773bf215546Sopenharmony_ci if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) || (!se_mask[2] && !se_mask[3]))) { 1774bf215546Sopenharmony_ci raster_config_1 &= C_028354_SE_PAIR_MAP; 1775bf215546Sopenharmony_ci 1776bf215546Sopenharmony_ci if (!se_mask[0] && !se_mask[1]) { 1777bf215546Sopenharmony_ci raster_config_1 |= S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_3); 1778bf215546Sopenharmony_ci } else { 1779bf215546Sopenharmony_ci raster_config_1 |= S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_0); 1780bf215546Sopenharmony_ci } 1781bf215546Sopenharmony_ci *cik_raster_config_1_p = raster_config_1; 1782bf215546Sopenharmony_ci } 1783bf215546Sopenharmony_ci } 1784bf215546Sopenharmony_ci 1785bf215546Sopenharmony_ci for (se = 0; se < num_se; se++) { 1786bf215546Sopenharmony_ci unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se); 1787bf215546Sopenharmony_ci unsigned pkr1_mask = pkr0_mask << rb_per_pkr; 1788bf215546Sopenharmony_ci int idx = (se / 2) * 2; 1789bf215546Sopenharmony_ci 1790bf215546Sopenharmony_ci raster_config_se[se] = raster_config; 1791bf215546Sopenharmony_ci if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) { 1792bf215546Sopenharmony_ci raster_config_se[se] &= C_028350_SE_MAP; 1793bf215546Sopenharmony_ci 1794bf215546Sopenharmony_ci if (!se_mask[idx]) { 1795bf215546Sopenharmony_ci raster_config_se[se] |= S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_3); 1796bf215546Sopenharmony_ci } else { 1797bf215546Sopenharmony_ci raster_config_se[se] |= S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_0); 1798bf215546Sopenharmony_ci } 1799bf215546Sopenharmony_ci } 1800bf215546Sopenharmony_ci 1801bf215546Sopenharmony_ci pkr0_mask &= rb_mask; 1802bf215546Sopenharmony_ci pkr1_mask &= rb_mask; 1803bf215546Sopenharmony_ci if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) { 1804bf215546Sopenharmony_ci raster_config_se[se] &= C_028350_PKR_MAP; 1805bf215546Sopenharmony_ci 1806bf215546Sopenharmony_ci if (!pkr0_mask) { 1807bf215546Sopenharmony_ci raster_config_se[se] |= S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_3); 1808bf215546Sopenharmony_ci } else { 1809bf215546Sopenharmony_ci raster_config_se[se] |= S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_0); 1810bf215546Sopenharmony_ci } 1811bf215546Sopenharmony_ci } 1812bf215546Sopenharmony_ci 1813bf215546Sopenharmony_ci if (rb_per_se >= 2) { 1814bf215546Sopenharmony_ci unsigned rb0_mask = 1 << (se * rb_per_se); 1815bf215546Sopenharmony_ci unsigned rb1_mask = rb0_mask << 1; 1816bf215546Sopenharmony_ci 1817bf215546Sopenharmony_ci rb0_mask &= rb_mask; 1818bf215546Sopenharmony_ci rb1_mask &= rb_mask; 1819bf215546Sopenharmony_ci if (!rb0_mask || !rb1_mask) { 1820bf215546Sopenharmony_ci raster_config_se[se] &= C_028350_RB_MAP_PKR0; 1821bf215546Sopenharmony_ci 1822bf215546Sopenharmony_ci if (!rb0_mask) { 1823bf215546Sopenharmony_ci raster_config_se[se] |= S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_3); 1824bf215546Sopenharmony_ci } else { 1825bf215546Sopenharmony_ci raster_config_se[se] |= S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_0); 1826bf215546Sopenharmony_ci } 1827bf215546Sopenharmony_ci } 1828bf215546Sopenharmony_ci 1829bf215546Sopenharmony_ci if (rb_per_se > 2) { 1830bf215546Sopenharmony_ci rb0_mask = 1 << (se * rb_per_se + rb_per_pkr); 1831bf215546Sopenharmony_ci rb1_mask = rb0_mask << 1; 1832bf215546Sopenharmony_ci rb0_mask &= rb_mask; 1833bf215546Sopenharmony_ci rb1_mask &= rb_mask; 1834bf215546Sopenharmony_ci if (!rb0_mask || !rb1_mask) { 1835bf215546Sopenharmony_ci raster_config_se[se] &= C_028350_RB_MAP_PKR1; 1836bf215546Sopenharmony_ci 1837bf215546Sopenharmony_ci if (!rb0_mask) { 1838bf215546Sopenharmony_ci raster_config_se[se] |= S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_3); 1839bf215546Sopenharmony_ci } else { 1840bf215546Sopenharmony_ci raster_config_se[se] |= S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_0); 1841bf215546Sopenharmony_ci } 1842bf215546Sopenharmony_ci } 1843bf215546Sopenharmony_ci } 1844bf215546Sopenharmony_ci } 1845bf215546Sopenharmony_ci } 1846bf215546Sopenharmony_ci} 1847bf215546Sopenharmony_ci 1848bf215546Sopenharmony_ciunsigned 1849bf215546Sopenharmony_ciac_get_compute_resource_limits(const struct radeon_info *info, unsigned waves_per_threadgroup, 1850bf215546Sopenharmony_ci unsigned max_waves_per_sh, unsigned threadgroups_per_cu) 1851bf215546Sopenharmony_ci{ 1852bf215546Sopenharmony_ci unsigned compute_resource_limits = S_00B854_SIMD_DEST_CNTL(waves_per_threadgroup % 4 == 0); 1853bf215546Sopenharmony_ci 1854bf215546Sopenharmony_ci if (info->gfx_level >= GFX7) { 1855bf215546Sopenharmony_ci unsigned num_cu_per_se = info->num_cu / info->num_se; 1856bf215546Sopenharmony_ci 1857bf215546Sopenharmony_ci /* Gfx9 should set the limit to max instead of 0 to fix high priority compute. */ 1858bf215546Sopenharmony_ci if (info->gfx_level == GFX9 && !max_waves_per_sh) { 1859bf215546Sopenharmony_ci max_waves_per_sh = info->max_good_cu_per_sa * info->num_simd_per_compute_unit * 1860bf215546Sopenharmony_ci info->max_wave64_per_simd; 1861bf215546Sopenharmony_ci } 1862bf215546Sopenharmony_ci 1863bf215546Sopenharmony_ci /* Force even distribution on all SIMDs in CU if the workgroup 1864bf215546Sopenharmony_ci * size is 64. This has shown some good improvements if # of CUs 1865bf215546Sopenharmony_ci * per SE is not a multiple of 4. 1866bf215546Sopenharmony_ci */ 1867bf215546Sopenharmony_ci if (num_cu_per_se % 4 && waves_per_threadgroup == 1) 1868bf215546Sopenharmony_ci compute_resource_limits |= S_00B854_FORCE_SIMD_DIST(1); 1869bf215546Sopenharmony_ci 1870bf215546Sopenharmony_ci assert(threadgroups_per_cu >= 1 && threadgroups_per_cu <= 8); 1871bf215546Sopenharmony_ci compute_resource_limits |= 1872bf215546Sopenharmony_ci S_00B854_WAVES_PER_SH(max_waves_per_sh) | S_00B854_CU_GROUP_COUNT(threadgroups_per_cu - 1); 1873bf215546Sopenharmony_ci } else { 1874bf215546Sopenharmony_ci /* GFX6 */ 1875bf215546Sopenharmony_ci if (max_waves_per_sh) { 1876bf215546Sopenharmony_ci unsigned limit_div16 = DIV_ROUND_UP(max_waves_per_sh, 16); 1877bf215546Sopenharmony_ci compute_resource_limits |= S_00B854_WAVES_PER_SH_GFX6(limit_div16); 1878bf215546Sopenharmony_ci } 1879bf215546Sopenharmony_ci } 1880bf215546Sopenharmony_ci return compute_resource_limits; 1881bf215546Sopenharmony_ci} 1882bf215546Sopenharmony_ci 1883bf215546Sopenharmony_civoid ac_get_hs_info(struct radeon_info *info, 1884bf215546Sopenharmony_ci struct ac_hs_info *hs) 1885bf215546Sopenharmony_ci{ 1886bf215546Sopenharmony_ci bool double_offchip_buffers = info->gfx_level >= GFX7 && 1887bf215546Sopenharmony_ci info->family != CHIP_CARRIZO && 1888bf215546Sopenharmony_ci info->family != CHIP_STONEY; 1889bf215546Sopenharmony_ci unsigned max_offchip_buffers_per_se; 1890bf215546Sopenharmony_ci unsigned max_offchip_buffers; 1891bf215546Sopenharmony_ci unsigned offchip_granularity; 1892bf215546Sopenharmony_ci unsigned hs_offchip_param; 1893bf215546Sopenharmony_ci 1894bf215546Sopenharmony_ci hs->tess_offchip_block_dw_size = 1895bf215546Sopenharmony_ci info->family == CHIP_HAWAII ? 4096 : 8192; 1896bf215546Sopenharmony_ci 1897bf215546Sopenharmony_ci /* 1898bf215546Sopenharmony_ci * Per RadeonSI: 1899bf215546Sopenharmony_ci * This must be one less than the maximum number due to a hw limitation. 1900bf215546Sopenharmony_ci * Various hardware bugs need this. 1901bf215546Sopenharmony_ci * 1902bf215546Sopenharmony_ci * Per AMDVLK: 1903bf215546Sopenharmony_ci * Vega10 should limit max_offchip_buffers to 508 (4 * 127). 1904bf215546Sopenharmony_ci * Gfx7 should limit max_offchip_buffers to 508 1905bf215546Sopenharmony_ci * Gfx6 should limit max_offchip_buffers to 126 (2 * 63) 1906bf215546Sopenharmony_ci * 1907bf215546Sopenharmony_ci * Follow AMDVLK here. 1908bf215546Sopenharmony_ci */ 1909bf215546Sopenharmony_ci if (info->gfx_level >= GFX11) { 1910bf215546Sopenharmony_ci max_offchip_buffers_per_se = 256; /* TODO: we could decrease this to reduce memory/cache usage */ 1911bf215546Sopenharmony_ci } else if (info->gfx_level >= GFX10) { 1912bf215546Sopenharmony_ci max_offchip_buffers_per_se = 128; 1913bf215546Sopenharmony_ci } else if (info->family == CHIP_VEGA12 || info->family == CHIP_VEGA20) { 1914bf215546Sopenharmony_ci /* Only certain chips can use the maximum value. */ 1915bf215546Sopenharmony_ci max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64; 1916bf215546Sopenharmony_ci } else { 1917bf215546Sopenharmony_ci max_offchip_buffers_per_se = double_offchip_buffers ? 127 : 63; 1918bf215546Sopenharmony_ci } 1919bf215546Sopenharmony_ci 1920bf215546Sopenharmony_ci max_offchip_buffers = max_offchip_buffers_per_se * info->max_se; 1921bf215546Sopenharmony_ci 1922bf215546Sopenharmony_ci /* Hawaii has a bug with offchip buffers > 256 that can be worked 1923bf215546Sopenharmony_ci * around by setting 4K granularity. 1924bf215546Sopenharmony_ci */ 1925bf215546Sopenharmony_ci if (hs->tess_offchip_block_dw_size == 4096) { 1926bf215546Sopenharmony_ci assert(info->family == CHIP_HAWAII); 1927bf215546Sopenharmony_ci offchip_granularity = V_03093C_X_4K_DWORDS; 1928bf215546Sopenharmony_ci } else { 1929bf215546Sopenharmony_ci assert(hs->tess_offchip_block_dw_size == 8192); 1930bf215546Sopenharmony_ci offchip_granularity = V_03093C_X_8K_DWORDS; 1931bf215546Sopenharmony_ci } 1932bf215546Sopenharmony_ci 1933bf215546Sopenharmony_ci switch (info->gfx_level) { 1934bf215546Sopenharmony_ci case GFX6: 1935bf215546Sopenharmony_ci max_offchip_buffers = MIN2(max_offchip_buffers, 126); 1936bf215546Sopenharmony_ci break; 1937bf215546Sopenharmony_ci case GFX7: 1938bf215546Sopenharmony_ci case GFX8: 1939bf215546Sopenharmony_ci case GFX9: 1940bf215546Sopenharmony_ci max_offchip_buffers = MIN2(max_offchip_buffers, 508); 1941bf215546Sopenharmony_ci break; 1942bf215546Sopenharmony_ci case GFX10: 1943bf215546Sopenharmony_ci break; 1944bf215546Sopenharmony_ci default: 1945bf215546Sopenharmony_ci break; 1946bf215546Sopenharmony_ci } 1947bf215546Sopenharmony_ci 1948bf215546Sopenharmony_ci hs->max_offchip_buffers = max_offchip_buffers; 1949bf215546Sopenharmony_ci 1950bf215546Sopenharmony_ci if (info->gfx_level >= GFX11) { 1951bf215546Sopenharmony_ci /* OFFCHIP_BUFFERING is per SE. */ 1952bf215546Sopenharmony_ci hs_offchip_param = S_03093C_OFFCHIP_BUFFERING_GFX103(max_offchip_buffers_per_se - 1) | 1953bf215546Sopenharmony_ci S_03093C_OFFCHIP_GRANULARITY_GFX103(offchip_granularity); 1954bf215546Sopenharmony_ci } else if (info->gfx_level >= GFX10_3) { 1955bf215546Sopenharmony_ci hs_offchip_param = S_03093C_OFFCHIP_BUFFERING_GFX103(max_offchip_buffers - 1) | 1956bf215546Sopenharmony_ci S_03093C_OFFCHIP_GRANULARITY_GFX103(offchip_granularity); 1957bf215546Sopenharmony_ci } else if (info->gfx_level >= GFX7) { 1958bf215546Sopenharmony_ci if (info->gfx_level >= GFX8) 1959bf215546Sopenharmony_ci --max_offchip_buffers; 1960bf215546Sopenharmony_ci hs_offchip_param = S_03093C_OFFCHIP_BUFFERING_GFX7(max_offchip_buffers) | 1961bf215546Sopenharmony_ci S_03093C_OFFCHIP_GRANULARITY_GFX7(offchip_granularity); 1962bf215546Sopenharmony_ci } else { 1963bf215546Sopenharmony_ci hs_offchip_param = S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers); 1964bf215546Sopenharmony_ci } 1965bf215546Sopenharmony_ci 1966bf215546Sopenharmony_ci hs->hs_offchip_param = hs_offchip_param; 1967bf215546Sopenharmony_ci 1968bf215546Sopenharmony_ci hs->tess_factor_ring_size = 48 * 1024 * info->max_se; 1969bf215546Sopenharmony_ci hs->tess_offchip_ring_offset = align(hs->tess_factor_ring_size, 64 * 1024); 1970bf215546Sopenharmony_ci hs->tess_offchip_ring_size = hs->max_offchip_buffers * hs->tess_offchip_block_dw_size * 4; 1971bf215546Sopenharmony_ci} 1972bf215546Sopenharmony_ci 1973bf215546Sopenharmony_cistatic uint16_t get_task_num_entries(enum radeon_family fam) 1974bf215546Sopenharmony_ci{ 1975bf215546Sopenharmony_ci /* Number of task shader ring entries. Needs to be a power of two. 1976bf215546Sopenharmony_ci * Use a low number on smaller chips so we don't waste space, 1977bf215546Sopenharmony_ci * but keep it high on bigger chips so it doesn't inhibit parallelism. 1978bf215546Sopenharmony_ci * 1979bf215546Sopenharmony_ci * This number is compiled into task/mesh shaders as a constant. 1980bf215546Sopenharmony_ci * In order to ensure this works fine with the shader cache, we must 1981bf215546Sopenharmony_ci * base this decision on the chip family, not the number of CUs in 1982bf215546Sopenharmony_ci * the current GPU. (So, the cache remains consistent for all 1983bf215546Sopenharmony_ci * chips in the same family.) 1984bf215546Sopenharmony_ci */ 1985bf215546Sopenharmony_ci switch (fam) { 1986bf215546Sopenharmony_ci case CHIP_VANGOGH: 1987bf215546Sopenharmony_ci case CHIP_NAVI24: 1988bf215546Sopenharmony_ci case CHIP_REMBRANDT: 1989bf215546Sopenharmony_ci return 256; 1990bf215546Sopenharmony_ci case CHIP_NAVI21: 1991bf215546Sopenharmony_ci case CHIP_NAVI22: 1992bf215546Sopenharmony_ci case CHIP_NAVI23: 1993bf215546Sopenharmony_ci default: 1994bf215546Sopenharmony_ci return 1024; 1995bf215546Sopenharmony_ci } 1996bf215546Sopenharmony_ci} 1997bf215546Sopenharmony_ci 1998bf215546Sopenharmony_civoid ac_get_task_info(struct radeon_info *info, 1999bf215546Sopenharmony_ci struct ac_task_info *task_info) 2000bf215546Sopenharmony_ci{ 2001bf215546Sopenharmony_ci const uint16_t num_entries = get_task_num_entries(info->family); 2002bf215546Sopenharmony_ci const uint32_t draw_ring_bytes = num_entries * AC_TASK_DRAW_ENTRY_BYTES; 2003bf215546Sopenharmony_ci const uint32_t payload_ring_bytes = num_entries * AC_TASK_PAYLOAD_ENTRY_BYTES; 2004bf215546Sopenharmony_ci 2005bf215546Sopenharmony_ci /* Ensure that the addresses of each ring are 256 byte aligned. */ 2006bf215546Sopenharmony_ci task_info->num_entries = num_entries; 2007bf215546Sopenharmony_ci task_info->draw_ring_offset = ALIGN(AC_TASK_CTRLBUF_BYTES, 256); 2008bf215546Sopenharmony_ci task_info->payload_ring_offset = ALIGN(task_info->draw_ring_offset + draw_ring_bytes, 256); 2009bf215546Sopenharmony_ci task_info->bo_size_bytes = task_info->payload_ring_offset + payload_ring_bytes; 2010bf215546Sopenharmony_ci} 2011