162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 OR MIT 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright 2015-2022 Advanced Micro Devices, Inc. 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 662306a36Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 762306a36Sopenharmony_ci * to deal in the Software without restriction, including without limitation 862306a36Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 962306a36Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 1062306a36Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 1162306a36Sopenharmony_ci * 1262306a36Sopenharmony_ci * The above copyright notice and this permission notice shall be included in 1362306a36Sopenharmony_ci * all copies or substantial portions of the Software. 1462306a36Sopenharmony_ci * 1562306a36Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1662306a36Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1762306a36Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1862306a36Sopenharmony_ci * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 1962306a36Sopenharmony_ci * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 2062306a36Sopenharmony_ci * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 2162306a36Sopenharmony_ci * OTHER DEALINGS IN THE SOFTWARE. 2262306a36Sopenharmony_ci */ 2362306a36Sopenharmony_ci 2462306a36Sopenharmony_ci#include <linux/pci.h> 2562306a36Sopenharmony_ci#include <linux/acpi.h> 2662306a36Sopenharmony_ci#include "kfd_crat.h" 2762306a36Sopenharmony_ci#include "kfd_priv.h" 2862306a36Sopenharmony_ci#include "kfd_topology.h" 2962306a36Sopenharmony_ci#include "amdgpu.h" 3062306a36Sopenharmony_ci#include "amdgpu_amdkfd.h" 3162306a36Sopenharmony_ci 3262306a36Sopenharmony_ci/* GPU Processor ID base for dGPUs for which VCRAT needs to be created. 3362306a36Sopenharmony_ci * GPU processor ID are expressed with Bit[31]=1. 3462306a36Sopenharmony_ci * The base is set to 0x8000_0000 + 0x1000 to avoid collision with GPU IDs 3562306a36Sopenharmony_ci * used in the CRAT. 3662306a36Sopenharmony_ci */ 3762306a36Sopenharmony_cistatic uint32_t gpu_processor_id_low = 0x80001000; 3862306a36Sopenharmony_ci 3962306a36Sopenharmony_ci/* Return the next available gpu_processor_id and increment it for next GPU 4062306a36Sopenharmony_ci * @total_cu_count - Total CUs present in the GPU including ones 4162306a36Sopenharmony_ci * masked off 4262306a36Sopenharmony_ci */ 4362306a36Sopenharmony_cistatic inline unsigned int get_and_inc_gpu_processor_id( 4462306a36Sopenharmony_ci unsigned int total_cu_count) 4562306a36Sopenharmony_ci{ 4662306a36Sopenharmony_ci int current_id = gpu_processor_id_low; 4762306a36Sopenharmony_ci 4862306a36Sopenharmony_ci gpu_processor_id_low += total_cu_count; 4962306a36Sopenharmony_ci return current_id; 5062306a36Sopenharmony_ci} 5162306a36Sopenharmony_ci 5262306a36Sopenharmony_ci 5362306a36Sopenharmony_cistatic struct kfd_gpu_cache_info kaveri_cache_info[] = { 5462306a36Sopenharmony_ci { 5562306a36Sopenharmony_ci /* TCP L1 Cache per CU */ 5662306a36Sopenharmony_ci .cache_size = 16, 5762306a36Sopenharmony_ci .cache_level = 1, 5862306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 5962306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 6062306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 6162306a36Sopenharmony_ci .num_cu_shared = 1, 6262306a36Sopenharmony_ci }, 6362306a36Sopenharmony_ci { 6462306a36Sopenharmony_ci /* Scalar L1 Instruction Cache (in SQC module) per bank */ 6562306a36Sopenharmony_ci .cache_size = 16, 6662306a36Sopenharmony_ci .cache_level = 1, 6762306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 6862306a36Sopenharmony_ci CRAT_CACHE_FLAGS_INST_CACHE | 6962306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 7062306a36Sopenharmony_ci .num_cu_shared = 2, 7162306a36Sopenharmony_ci }, 7262306a36Sopenharmony_ci { 7362306a36Sopenharmony_ci /* Scalar L1 Data Cache (in SQC module) per bank */ 7462306a36Sopenharmony_ci .cache_size = 8, 7562306a36Sopenharmony_ci .cache_level = 1, 7662306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 7762306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 7862306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 7962306a36Sopenharmony_ci .num_cu_shared = 2, 8062306a36Sopenharmony_ci }, 8162306a36Sopenharmony_ci 8262306a36Sopenharmony_ci /* TODO: Add L2 Cache information */ 8362306a36Sopenharmony_ci}; 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_ci 8662306a36Sopenharmony_cistatic struct kfd_gpu_cache_info carrizo_cache_info[] = { 8762306a36Sopenharmony_ci { 8862306a36Sopenharmony_ci /* TCP L1 Cache per CU */ 8962306a36Sopenharmony_ci .cache_size = 16, 9062306a36Sopenharmony_ci .cache_level = 1, 9162306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 9262306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 9362306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 9462306a36Sopenharmony_ci .num_cu_shared = 1, 9562306a36Sopenharmony_ci }, 9662306a36Sopenharmony_ci { 9762306a36Sopenharmony_ci /* Scalar L1 Instruction Cache (in SQC module) per bank */ 9862306a36Sopenharmony_ci .cache_size = 8, 9962306a36Sopenharmony_ci .cache_level = 1, 10062306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 10162306a36Sopenharmony_ci CRAT_CACHE_FLAGS_INST_CACHE | 10262306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 10362306a36Sopenharmony_ci .num_cu_shared = 4, 10462306a36Sopenharmony_ci }, 10562306a36Sopenharmony_ci { 10662306a36Sopenharmony_ci /* Scalar L1 Data Cache (in SQC module) per bank. */ 10762306a36Sopenharmony_ci .cache_size = 4, 10862306a36Sopenharmony_ci .cache_level = 1, 10962306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 11062306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 11162306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 11262306a36Sopenharmony_ci .num_cu_shared = 4, 11362306a36Sopenharmony_ci }, 11462306a36Sopenharmony_ci 11562306a36Sopenharmony_ci /* TODO: Add L2 Cache information */ 11662306a36Sopenharmony_ci}; 11762306a36Sopenharmony_ci 11862306a36Sopenharmony_ci#define hawaii_cache_info kaveri_cache_info 11962306a36Sopenharmony_ci#define tonga_cache_info carrizo_cache_info 12062306a36Sopenharmony_ci#define fiji_cache_info carrizo_cache_info 12162306a36Sopenharmony_ci#define polaris10_cache_info carrizo_cache_info 12262306a36Sopenharmony_ci#define polaris11_cache_info carrizo_cache_info 12362306a36Sopenharmony_ci#define polaris12_cache_info carrizo_cache_info 12462306a36Sopenharmony_ci#define vegam_cache_info carrizo_cache_info 12562306a36Sopenharmony_ci 12662306a36Sopenharmony_ci/* NOTE: L1 cache information has been updated and L2/L3 12762306a36Sopenharmony_ci * cache information has been added for Vega10 and 12862306a36Sopenharmony_ci * newer ASICs. The unit for cache_size is KiB. 12962306a36Sopenharmony_ci * In future, check & update cache details 13062306a36Sopenharmony_ci * for every new ASIC is required. 13162306a36Sopenharmony_ci */ 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_cistatic struct kfd_gpu_cache_info vega10_cache_info[] = { 13462306a36Sopenharmony_ci { 13562306a36Sopenharmony_ci /* TCP L1 Cache per CU */ 13662306a36Sopenharmony_ci .cache_size = 16, 13762306a36Sopenharmony_ci .cache_level = 1, 13862306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 13962306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 14062306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 14162306a36Sopenharmony_ci .num_cu_shared = 1, 14262306a36Sopenharmony_ci }, 14362306a36Sopenharmony_ci { 14462306a36Sopenharmony_ci /* Scalar L1 Instruction Cache per SQC */ 14562306a36Sopenharmony_ci .cache_size = 32, 14662306a36Sopenharmony_ci .cache_level = 1, 14762306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 14862306a36Sopenharmony_ci CRAT_CACHE_FLAGS_INST_CACHE | 14962306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 15062306a36Sopenharmony_ci .num_cu_shared = 3, 15162306a36Sopenharmony_ci }, 15262306a36Sopenharmony_ci { 15362306a36Sopenharmony_ci /* Scalar L1 Data Cache per SQC */ 15462306a36Sopenharmony_ci .cache_size = 16, 15562306a36Sopenharmony_ci .cache_level = 1, 15662306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 15762306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 15862306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 15962306a36Sopenharmony_ci .num_cu_shared = 3, 16062306a36Sopenharmony_ci }, 16162306a36Sopenharmony_ci { 16262306a36Sopenharmony_ci /* L2 Data Cache per GPU (Total Tex Cache) */ 16362306a36Sopenharmony_ci .cache_size = 4096, 16462306a36Sopenharmony_ci .cache_level = 2, 16562306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 16662306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 16762306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 16862306a36Sopenharmony_ci .num_cu_shared = 16, 16962306a36Sopenharmony_ci }, 17062306a36Sopenharmony_ci}; 17162306a36Sopenharmony_ci 17262306a36Sopenharmony_cistatic struct kfd_gpu_cache_info raven_cache_info[] = { 17362306a36Sopenharmony_ci { 17462306a36Sopenharmony_ci /* TCP L1 Cache per CU */ 17562306a36Sopenharmony_ci .cache_size = 16, 17662306a36Sopenharmony_ci .cache_level = 1, 17762306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 17862306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 17962306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 18062306a36Sopenharmony_ci .num_cu_shared = 1, 18162306a36Sopenharmony_ci }, 18262306a36Sopenharmony_ci { 18362306a36Sopenharmony_ci /* Scalar L1 Instruction Cache per SQC */ 18462306a36Sopenharmony_ci .cache_size = 32, 18562306a36Sopenharmony_ci .cache_level = 1, 18662306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 18762306a36Sopenharmony_ci CRAT_CACHE_FLAGS_INST_CACHE | 18862306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 18962306a36Sopenharmony_ci .num_cu_shared = 3, 19062306a36Sopenharmony_ci }, 19162306a36Sopenharmony_ci { 19262306a36Sopenharmony_ci /* Scalar L1 Data Cache per SQC */ 19362306a36Sopenharmony_ci .cache_size = 16, 19462306a36Sopenharmony_ci .cache_level = 1, 19562306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 19662306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 19762306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 19862306a36Sopenharmony_ci .num_cu_shared = 3, 19962306a36Sopenharmony_ci }, 20062306a36Sopenharmony_ci { 20162306a36Sopenharmony_ci /* L2 Data Cache per GPU (Total Tex Cache) */ 20262306a36Sopenharmony_ci .cache_size = 1024, 20362306a36Sopenharmony_ci .cache_level = 2, 20462306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 20562306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 20662306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 20762306a36Sopenharmony_ci .num_cu_shared = 11, 20862306a36Sopenharmony_ci }, 20962306a36Sopenharmony_ci}; 21062306a36Sopenharmony_ci 21162306a36Sopenharmony_cistatic struct kfd_gpu_cache_info renoir_cache_info[] = { 21262306a36Sopenharmony_ci { 21362306a36Sopenharmony_ci /* TCP L1 Cache per CU */ 21462306a36Sopenharmony_ci .cache_size = 16, 21562306a36Sopenharmony_ci .cache_level = 1, 21662306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 21762306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 21862306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 21962306a36Sopenharmony_ci .num_cu_shared = 1, 22062306a36Sopenharmony_ci }, 22162306a36Sopenharmony_ci { 22262306a36Sopenharmony_ci /* Scalar L1 Instruction Cache per SQC */ 22362306a36Sopenharmony_ci .cache_size = 32, 22462306a36Sopenharmony_ci .cache_level = 1, 22562306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 22662306a36Sopenharmony_ci CRAT_CACHE_FLAGS_INST_CACHE | 22762306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 22862306a36Sopenharmony_ci .num_cu_shared = 3, 22962306a36Sopenharmony_ci }, 23062306a36Sopenharmony_ci { 23162306a36Sopenharmony_ci /* Scalar L1 Data Cache per SQC */ 23262306a36Sopenharmony_ci .cache_size = 16, 23362306a36Sopenharmony_ci .cache_level = 1, 23462306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 23562306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 23662306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 23762306a36Sopenharmony_ci .num_cu_shared = 3, 23862306a36Sopenharmony_ci }, 23962306a36Sopenharmony_ci { 24062306a36Sopenharmony_ci /* L2 Data Cache per GPU (Total Tex Cache) */ 24162306a36Sopenharmony_ci .cache_size = 1024, 24262306a36Sopenharmony_ci .cache_level = 2, 24362306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 24462306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 24562306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 24662306a36Sopenharmony_ci .num_cu_shared = 8, 24762306a36Sopenharmony_ci }, 24862306a36Sopenharmony_ci}; 24962306a36Sopenharmony_ci 25062306a36Sopenharmony_cistatic struct kfd_gpu_cache_info vega12_cache_info[] = { 25162306a36Sopenharmony_ci { 25262306a36Sopenharmony_ci /* TCP L1 Cache per CU */ 25362306a36Sopenharmony_ci .cache_size = 16, 25462306a36Sopenharmony_ci .cache_level = 1, 25562306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 25662306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 25762306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 25862306a36Sopenharmony_ci .num_cu_shared = 1, 25962306a36Sopenharmony_ci }, 26062306a36Sopenharmony_ci { 26162306a36Sopenharmony_ci /* Scalar L1 Instruction Cache per SQC */ 26262306a36Sopenharmony_ci .cache_size = 32, 26362306a36Sopenharmony_ci .cache_level = 1, 26462306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 26562306a36Sopenharmony_ci CRAT_CACHE_FLAGS_INST_CACHE | 26662306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 26762306a36Sopenharmony_ci .num_cu_shared = 3, 26862306a36Sopenharmony_ci }, 26962306a36Sopenharmony_ci { 27062306a36Sopenharmony_ci /* Scalar L1 Data Cache per SQC */ 27162306a36Sopenharmony_ci .cache_size = 16, 27262306a36Sopenharmony_ci .cache_level = 1, 27362306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 27462306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 27562306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 27662306a36Sopenharmony_ci .num_cu_shared = 3, 27762306a36Sopenharmony_ci }, 27862306a36Sopenharmony_ci { 27962306a36Sopenharmony_ci /* L2 Data Cache per GPU (Total Tex Cache) */ 28062306a36Sopenharmony_ci .cache_size = 2048, 28162306a36Sopenharmony_ci .cache_level = 2, 28262306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 28362306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 28462306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 28562306a36Sopenharmony_ci .num_cu_shared = 5, 28662306a36Sopenharmony_ci }, 28762306a36Sopenharmony_ci}; 28862306a36Sopenharmony_ci 28962306a36Sopenharmony_cistatic struct kfd_gpu_cache_info vega20_cache_info[] = { 29062306a36Sopenharmony_ci { 29162306a36Sopenharmony_ci /* TCP L1 Cache per CU */ 29262306a36Sopenharmony_ci .cache_size = 16, 29362306a36Sopenharmony_ci .cache_level = 1, 29462306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 29562306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 29662306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 29762306a36Sopenharmony_ci .num_cu_shared = 1, 29862306a36Sopenharmony_ci }, 29962306a36Sopenharmony_ci { 30062306a36Sopenharmony_ci /* Scalar L1 Instruction Cache per SQC */ 30162306a36Sopenharmony_ci .cache_size = 32, 30262306a36Sopenharmony_ci .cache_level = 1, 30362306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 30462306a36Sopenharmony_ci CRAT_CACHE_FLAGS_INST_CACHE | 30562306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 30662306a36Sopenharmony_ci .num_cu_shared = 3, 30762306a36Sopenharmony_ci }, 30862306a36Sopenharmony_ci { 30962306a36Sopenharmony_ci /* Scalar L1 Data Cache per SQC */ 31062306a36Sopenharmony_ci .cache_size = 16, 31162306a36Sopenharmony_ci .cache_level = 1, 31262306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 31362306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 31462306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 31562306a36Sopenharmony_ci .num_cu_shared = 3, 31662306a36Sopenharmony_ci }, 31762306a36Sopenharmony_ci { 31862306a36Sopenharmony_ci /* L2 Data Cache per GPU (Total Tex Cache) */ 31962306a36Sopenharmony_ci .cache_size = 8192, 32062306a36Sopenharmony_ci .cache_level = 2, 32162306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 32262306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 32362306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 32462306a36Sopenharmony_ci .num_cu_shared = 16, 32562306a36Sopenharmony_ci }, 32662306a36Sopenharmony_ci}; 32762306a36Sopenharmony_ci 32862306a36Sopenharmony_cistatic struct kfd_gpu_cache_info aldebaran_cache_info[] = { 32962306a36Sopenharmony_ci { 33062306a36Sopenharmony_ci /* TCP L1 Cache per CU */ 33162306a36Sopenharmony_ci .cache_size = 16, 33262306a36Sopenharmony_ci .cache_level = 1, 33362306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 33462306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 33562306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 33662306a36Sopenharmony_ci .num_cu_shared = 1, 33762306a36Sopenharmony_ci }, 33862306a36Sopenharmony_ci { 33962306a36Sopenharmony_ci /* Scalar L1 Instruction Cache per SQC */ 34062306a36Sopenharmony_ci .cache_size = 32, 34162306a36Sopenharmony_ci .cache_level = 1, 34262306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 34362306a36Sopenharmony_ci CRAT_CACHE_FLAGS_INST_CACHE | 34462306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 34562306a36Sopenharmony_ci .num_cu_shared = 2, 34662306a36Sopenharmony_ci }, 34762306a36Sopenharmony_ci { 34862306a36Sopenharmony_ci /* Scalar L1 Data Cache per SQC */ 34962306a36Sopenharmony_ci .cache_size = 16, 35062306a36Sopenharmony_ci .cache_level = 1, 35162306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 35262306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 35362306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 35462306a36Sopenharmony_ci .num_cu_shared = 2, 35562306a36Sopenharmony_ci }, 35662306a36Sopenharmony_ci { 35762306a36Sopenharmony_ci /* L2 Data Cache per GPU (Total Tex Cache) */ 35862306a36Sopenharmony_ci .cache_size = 8192, 35962306a36Sopenharmony_ci .cache_level = 2, 36062306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 36162306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 36262306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 36362306a36Sopenharmony_ci .num_cu_shared = 14, 36462306a36Sopenharmony_ci }, 36562306a36Sopenharmony_ci}; 36662306a36Sopenharmony_ci 36762306a36Sopenharmony_cistatic struct kfd_gpu_cache_info navi10_cache_info[] = { 36862306a36Sopenharmony_ci { 36962306a36Sopenharmony_ci /* TCP L1 Cache per CU */ 37062306a36Sopenharmony_ci .cache_size = 16, 37162306a36Sopenharmony_ci .cache_level = 1, 37262306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 37362306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 37462306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 37562306a36Sopenharmony_ci .num_cu_shared = 1, 37662306a36Sopenharmony_ci }, 37762306a36Sopenharmony_ci { 37862306a36Sopenharmony_ci /* Scalar L1 Instruction Cache per SQC */ 37962306a36Sopenharmony_ci .cache_size = 32, 38062306a36Sopenharmony_ci .cache_level = 1, 38162306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 38262306a36Sopenharmony_ci CRAT_CACHE_FLAGS_INST_CACHE | 38362306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 38462306a36Sopenharmony_ci .num_cu_shared = 2, 38562306a36Sopenharmony_ci }, 38662306a36Sopenharmony_ci { 38762306a36Sopenharmony_ci /* Scalar L1 Data Cache per SQC */ 38862306a36Sopenharmony_ci .cache_size = 16, 38962306a36Sopenharmony_ci .cache_level = 1, 39062306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 39162306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 39262306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 39362306a36Sopenharmony_ci .num_cu_shared = 2, 39462306a36Sopenharmony_ci }, 39562306a36Sopenharmony_ci { 39662306a36Sopenharmony_ci /* GL1 Data Cache per SA */ 39762306a36Sopenharmony_ci .cache_size = 128, 39862306a36Sopenharmony_ci .cache_level = 1, 39962306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 40062306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 40162306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 40262306a36Sopenharmony_ci .num_cu_shared = 10, 40362306a36Sopenharmony_ci }, 40462306a36Sopenharmony_ci { 40562306a36Sopenharmony_ci /* L2 Data Cache per GPU (Total Tex Cache) */ 40662306a36Sopenharmony_ci .cache_size = 4096, 40762306a36Sopenharmony_ci .cache_level = 2, 40862306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 40962306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 41062306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 41162306a36Sopenharmony_ci .num_cu_shared = 10, 41262306a36Sopenharmony_ci }, 41362306a36Sopenharmony_ci}; 41462306a36Sopenharmony_ci 41562306a36Sopenharmony_cistatic struct kfd_gpu_cache_info vangogh_cache_info[] = { 41662306a36Sopenharmony_ci { 41762306a36Sopenharmony_ci /* TCP L1 Cache per CU */ 41862306a36Sopenharmony_ci .cache_size = 16, 41962306a36Sopenharmony_ci .cache_level = 1, 42062306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 42162306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 42262306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 42362306a36Sopenharmony_ci .num_cu_shared = 1, 42462306a36Sopenharmony_ci }, 42562306a36Sopenharmony_ci { 42662306a36Sopenharmony_ci /* Scalar L1 Instruction Cache per SQC */ 42762306a36Sopenharmony_ci .cache_size = 32, 42862306a36Sopenharmony_ci .cache_level = 1, 42962306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 43062306a36Sopenharmony_ci CRAT_CACHE_FLAGS_INST_CACHE | 43162306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 43262306a36Sopenharmony_ci .num_cu_shared = 2, 43362306a36Sopenharmony_ci }, 43462306a36Sopenharmony_ci { 43562306a36Sopenharmony_ci /* Scalar L1 Data Cache per SQC */ 43662306a36Sopenharmony_ci .cache_size = 16, 43762306a36Sopenharmony_ci .cache_level = 1, 43862306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 43962306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 44062306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 44162306a36Sopenharmony_ci .num_cu_shared = 2, 44262306a36Sopenharmony_ci }, 44362306a36Sopenharmony_ci { 44462306a36Sopenharmony_ci /* GL1 Data Cache per SA */ 44562306a36Sopenharmony_ci .cache_size = 128, 44662306a36Sopenharmony_ci .cache_level = 1, 44762306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 44862306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 44962306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 45062306a36Sopenharmony_ci .num_cu_shared = 8, 45162306a36Sopenharmony_ci }, 45262306a36Sopenharmony_ci { 45362306a36Sopenharmony_ci /* L2 Data Cache per GPU (Total Tex Cache) */ 45462306a36Sopenharmony_ci .cache_size = 1024, 45562306a36Sopenharmony_ci .cache_level = 2, 45662306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 45762306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 45862306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 45962306a36Sopenharmony_ci .num_cu_shared = 8, 46062306a36Sopenharmony_ci }, 46162306a36Sopenharmony_ci}; 46262306a36Sopenharmony_ci 46362306a36Sopenharmony_cistatic struct kfd_gpu_cache_info navi14_cache_info[] = { 46462306a36Sopenharmony_ci { 46562306a36Sopenharmony_ci /* TCP L1 Cache per CU */ 46662306a36Sopenharmony_ci .cache_size = 16, 46762306a36Sopenharmony_ci .cache_level = 1, 46862306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 46962306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 47062306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 47162306a36Sopenharmony_ci .num_cu_shared = 1, 47262306a36Sopenharmony_ci }, 47362306a36Sopenharmony_ci { 47462306a36Sopenharmony_ci /* Scalar L1 Instruction Cache per SQC */ 47562306a36Sopenharmony_ci .cache_size = 32, 47662306a36Sopenharmony_ci .cache_level = 1, 47762306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 47862306a36Sopenharmony_ci CRAT_CACHE_FLAGS_INST_CACHE | 47962306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 48062306a36Sopenharmony_ci .num_cu_shared = 2, 48162306a36Sopenharmony_ci }, 48262306a36Sopenharmony_ci { 48362306a36Sopenharmony_ci /* Scalar L1 Data Cache per SQC */ 48462306a36Sopenharmony_ci .cache_size = 16, 48562306a36Sopenharmony_ci .cache_level = 1, 48662306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 48762306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 48862306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 48962306a36Sopenharmony_ci .num_cu_shared = 2, 49062306a36Sopenharmony_ci }, 49162306a36Sopenharmony_ci { 49262306a36Sopenharmony_ci /* GL1 Data Cache per SA */ 49362306a36Sopenharmony_ci .cache_size = 128, 49462306a36Sopenharmony_ci .cache_level = 1, 49562306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 49662306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 49762306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 49862306a36Sopenharmony_ci .num_cu_shared = 12, 49962306a36Sopenharmony_ci }, 50062306a36Sopenharmony_ci { 50162306a36Sopenharmony_ci /* L2 Data Cache per GPU (Total Tex Cache) */ 50262306a36Sopenharmony_ci .cache_size = 2048, 50362306a36Sopenharmony_ci .cache_level = 2, 50462306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 50562306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 50662306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 50762306a36Sopenharmony_ci .num_cu_shared = 12, 50862306a36Sopenharmony_ci }, 50962306a36Sopenharmony_ci}; 51062306a36Sopenharmony_ci 51162306a36Sopenharmony_cistatic struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = { 51262306a36Sopenharmony_ci { 51362306a36Sopenharmony_ci /* TCP L1 Cache per CU */ 51462306a36Sopenharmony_ci .cache_size = 16, 51562306a36Sopenharmony_ci .cache_level = 1, 51662306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 51762306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 51862306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 51962306a36Sopenharmony_ci .num_cu_shared = 1, 52062306a36Sopenharmony_ci }, 52162306a36Sopenharmony_ci { 52262306a36Sopenharmony_ci /* Scalar L1 Instruction Cache per SQC */ 52362306a36Sopenharmony_ci .cache_size = 32, 52462306a36Sopenharmony_ci .cache_level = 1, 52562306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 52662306a36Sopenharmony_ci CRAT_CACHE_FLAGS_INST_CACHE | 52762306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 52862306a36Sopenharmony_ci .num_cu_shared = 2, 52962306a36Sopenharmony_ci }, 53062306a36Sopenharmony_ci { 53162306a36Sopenharmony_ci /* Scalar L1 Data Cache per SQC */ 53262306a36Sopenharmony_ci .cache_size = 16, 53362306a36Sopenharmony_ci .cache_level = 1, 53462306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 53562306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 53662306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 53762306a36Sopenharmony_ci .num_cu_shared = 2, 53862306a36Sopenharmony_ci }, 53962306a36Sopenharmony_ci { 54062306a36Sopenharmony_ci /* GL1 Data Cache per SA */ 54162306a36Sopenharmony_ci .cache_size = 128, 54262306a36Sopenharmony_ci .cache_level = 1, 54362306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 54462306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 54562306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 54662306a36Sopenharmony_ci .num_cu_shared = 10, 54762306a36Sopenharmony_ci }, 54862306a36Sopenharmony_ci { 54962306a36Sopenharmony_ci /* L2 Data Cache per GPU (Total Tex Cache) */ 55062306a36Sopenharmony_ci .cache_size = 4096, 55162306a36Sopenharmony_ci .cache_level = 2, 55262306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 55362306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 55462306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 55562306a36Sopenharmony_ci .num_cu_shared = 10, 55662306a36Sopenharmony_ci }, 55762306a36Sopenharmony_ci { 55862306a36Sopenharmony_ci /* L3 Data Cache per GPU */ 55962306a36Sopenharmony_ci .cache_size = 128*1024, 56062306a36Sopenharmony_ci .cache_level = 3, 56162306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 56262306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 56362306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 56462306a36Sopenharmony_ci .num_cu_shared = 10, 56562306a36Sopenharmony_ci }, 56662306a36Sopenharmony_ci}; 56762306a36Sopenharmony_ci 56862306a36Sopenharmony_cistatic struct kfd_gpu_cache_info navy_flounder_cache_info[] = { 56962306a36Sopenharmony_ci { 57062306a36Sopenharmony_ci /* TCP L1 Cache per CU */ 57162306a36Sopenharmony_ci .cache_size = 16, 57262306a36Sopenharmony_ci .cache_level = 1, 57362306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 57462306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 57562306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 57662306a36Sopenharmony_ci .num_cu_shared = 1, 57762306a36Sopenharmony_ci }, 57862306a36Sopenharmony_ci { 57962306a36Sopenharmony_ci /* Scalar L1 Instruction Cache per SQC */ 58062306a36Sopenharmony_ci .cache_size = 32, 58162306a36Sopenharmony_ci .cache_level = 1, 58262306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 58362306a36Sopenharmony_ci CRAT_CACHE_FLAGS_INST_CACHE | 58462306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 58562306a36Sopenharmony_ci .num_cu_shared = 2, 58662306a36Sopenharmony_ci }, 58762306a36Sopenharmony_ci { 58862306a36Sopenharmony_ci /* Scalar L1 Data Cache per SQC */ 58962306a36Sopenharmony_ci .cache_size = 16, 59062306a36Sopenharmony_ci .cache_level = 1, 59162306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 59262306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 59362306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 59462306a36Sopenharmony_ci .num_cu_shared = 2, 59562306a36Sopenharmony_ci }, 59662306a36Sopenharmony_ci { 59762306a36Sopenharmony_ci /* GL1 Data Cache per SA */ 59862306a36Sopenharmony_ci .cache_size = 128, 59962306a36Sopenharmony_ci .cache_level = 1, 60062306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 60162306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 60262306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 60362306a36Sopenharmony_ci .num_cu_shared = 10, 60462306a36Sopenharmony_ci }, 60562306a36Sopenharmony_ci { 60662306a36Sopenharmony_ci /* L2 Data Cache per GPU (Total Tex Cache) */ 60762306a36Sopenharmony_ci .cache_size = 3072, 60862306a36Sopenharmony_ci .cache_level = 2, 60962306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 61062306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 61162306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 61262306a36Sopenharmony_ci .num_cu_shared = 10, 61362306a36Sopenharmony_ci }, 61462306a36Sopenharmony_ci { 61562306a36Sopenharmony_ci /* L3 Data Cache per GPU */ 61662306a36Sopenharmony_ci .cache_size = 96*1024, 61762306a36Sopenharmony_ci .cache_level = 3, 61862306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 61962306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 62062306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 62162306a36Sopenharmony_ci .num_cu_shared = 10, 62262306a36Sopenharmony_ci }, 62362306a36Sopenharmony_ci}; 62462306a36Sopenharmony_ci 62562306a36Sopenharmony_cistatic struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = { 62662306a36Sopenharmony_ci { 62762306a36Sopenharmony_ci /* TCP L1 Cache per CU */ 62862306a36Sopenharmony_ci .cache_size = 16, 62962306a36Sopenharmony_ci .cache_level = 1, 63062306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 63162306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 63262306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 63362306a36Sopenharmony_ci .num_cu_shared = 1, 63462306a36Sopenharmony_ci }, 63562306a36Sopenharmony_ci { 63662306a36Sopenharmony_ci /* Scalar L1 Instruction Cache per SQC */ 63762306a36Sopenharmony_ci .cache_size = 32, 63862306a36Sopenharmony_ci .cache_level = 1, 63962306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 64062306a36Sopenharmony_ci CRAT_CACHE_FLAGS_INST_CACHE | 64162306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 64262306a36Sopenharmony_ci .num_cu_shared = 2, 64362306a36Sopenharmony_ci }, 64462306a36Sopenharmony_ci { 64562306a36Sopenharmony_ci /* Scalar L1 Data Cache per SQC */ 64662306a36Sopenharmony_ci .cache_size = 16, 64762306a36Sopenharmony_ci .cache_level = 1, 64862306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 64962306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 65062306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 65162306a36Sopenharmony_ci .num_cu_shared = 2, 65262306a36Sopenharmony_ci }, 65362306a36Sopenharmony_ci { 65462306a36Sopenharmony_ci /* GL1 Data Cache per SA */ 65562306a36Sopenharmony_ci .cache_size = 128, 65662306a36Sopenharmony_ci .cache_level = 1, 65762306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 65862306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 65962306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 66062306a36Sopenharmony_ci .num_cu_shared = 8, 66162306a36Sopenharmony_ci }, 66262306a36Sopenharmony_ci { 66362306a36Sopenharmony_ci /* L2 Data Cache per GPU (Total Tex Cache) */ 66462306a36Sopenharmony_ci .cache_size = 2048, 66562306a36Sopenharmony_ci .cache_level = 2, 66662306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 66762306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 66862306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 66962306a36Sopenharmony_ci .num_cu_shared = 8, 67062306a36Sopenharmony_ci }, 67162306a36Sopenharmony_ci { 67262306a36Sopenharmony_ci /* L3 Data Cache per GPU */ 67362306a36Sopenharmony_ci .cache_size = 32*1024, 67462306a36Sopenharmony_ci .cache_level = 3, 67562306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 67662306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 67762306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 67862306a36Sopenharmony_ci .num_cu_shared = 8, 67962306a36Sopenharmony_ci }, 68062306a36Sopenharmony_ci}; 68162306a36Sopenharmony_ci 68262306a36Sopenharmony_cistatic struct kfd_gpu_cache_info beige_goby_cache_info[] = { 68362306a36Sopenharmony_ci { 68462306a36Sopenharmony_ci /* TCP L1 Cache per CU */ 68562306a36Sopenharmony_ci .cache_size = 16, 68662306a36Sopenharmony_ci .cache_level = 1, 68762306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 68862306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 68962306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 69062306a36Sopenharmony_ci .num_cu_shared = 1, 69162306a36Sopenharmony_ci }, 69262306a36Sopenharmony_ci { 69362306a36Sopenharmony_ci /* Scalar L1 Instruction Cache per SQC */ 69462306a36Sopenharmony_ci .cache_size = 32, 69562306a36Sopenharmony_ci .cache_level = 1, 69662306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 69762306a36Sopenharmony_ci CRAT_CACHE_FLAGS_INST_CACHE | 69862306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 69962306a36Sopenharmony_ci .num_cu_shared = 2, 70062306a36Sopenharmony_ci }, 70162306a36Sopenharmony_ci { 70262306a36Sopenharmony_ci /* Scalar L1 Data Cache per SQC */ 70362306a36Sopenharmony_ci .cache_size = 16, 70462306a36Sopenharmony_ci .cache_level = 1, 70562306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 70662306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 70762306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 70862306a36Sopenharmony_ci .num_cu_shared = 2, 70962306a36Sopenharmony_ci }, 71062306a36Sopenharmony_ci { 71162306a36Sopenharmony_ci /* GL1 Data Cache per SA */ 71262306a36Sopenharmony_ci .cache_size = 128, 71362306a36Sopenharmony_ci .cache_level = 1, 71462306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 71562306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 71662306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 71762306a36Sopenharmony_ci .num_cu_shared = 8, 71862306a36Sopenharmony_ci }, 71962306a36Sopenharmony_ci { 72062306a36Sopenharmony_ci /* L2 Data Cache per GPU (Total Tex Cache) */ 72162306a36Sopenharmony_ci .cache_size = 1024, 72262306a36Sopenharmony_ci .cache_level = 2, 72362306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 72462306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 72562306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 72662306a36Sopenharmony_ci .num_cu_shared = 8, 72762306a36Sopenharmony_ci }, 72862306a36Sopenharmony_ci { 72962306a36Sopenharmony_ci /* L3 Data Cache per GPU */ 73062306a36Sopenharmony_ci .cache_size = 16*1024, 73162306a36Sopenharmony_ci .cache_level = 3, 73262306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 73362306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 73462306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 73562306a36Sopenharmony_ci .num_cu_shared = 8, 73662306a36Sopenharmony_ci }, 73762306a36Sopenharmony_ci}; 73862306a36Sopenharmony_ci 73962306a36Sopenharmony_cistatic struct kfd_gpu_cache_info yellow_carp_cache_info[] = { 74062306a36Sopenharmony_ci { 74162306a36Sopenharmony_ci /* TCP L1 Cache per CU */ 74262306a36Sopenharmony_ci .cache_size = 16, 74362306a36Sopenharmony_ci .cache_level = 1, 74462306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 74562306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 74662306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 74762306a36Sopenharmony_ci .num_cu_shared = 1, 74862306a36Sopenharmony_ci }, 74962306a36Sopenharmony_ci { 75062306a36Sopenharmony_ci /* Scalar L1 Instruction Cache per SQC */ 75162306a36Sopenharmony_ci .cache_size = 32, 75262306a36Sopenharmony_ci .cache_level = 1, 75362306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 75462306a36Sopenharmony_ci CRAT_CACHE_FLAGS_INST_CACHE | 75562306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 75662306a36Sopenharmony_ci .num_cu_shared = 2, 75762306a36Sopenharmony_ci }, 75862306a36Sopenharmony_ci { 75962306a36Sopenharmony_ci /* Scalar L1 Data Cache per SQC */ 76062306a36Sopenharmony_ci .cache_size = 16, 76162306a36Sopenharmony_ci .cache_level = 1, 76262306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 76362306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 76462306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 76562306a36Sopenharmony_ci .num_cu_shared = 2, 76662306a36Sopenharmony_ci }, 76762306a36Sopenharmony_ci { 76862306a36Sopenharmony_ci /* GL1 Data Cache per SA */ 76962306a36Sopenharmony_ci .cache_size = 128, 77062306a36Sopenharmony_ci .cache_level = 1, 77162306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 77262306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 77362306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 77462306a36Sopenharmony_ci .num_cu_shared = 6, 77562306a36Sopenharmony_ci }, 77662306a36Sopenharmony_ci { 77762306a36Sopenharmony_ci /* L2 Data Cache per GPU (Total Tex Cache) */ 77862306a36Sopenharmony_ci .cache_size = 2048, 77962306a36Sopenharmony_ci .cache_level = 2, 78062306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 78162306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 78262306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 78362306a36Sopenharmony_ci .num_cu_shared = 6, 78462306a36Sopenharmony_ci }, 78562306a36Sopenharmony_ci}; 78662306a36Sopenharmony_ci 78762306a36Sopenharmony_cistatic struct kfd_gpu_cache_info gfx1037_cache_info[] = { 78862306a36Sopenharmony_ci { 78962306a36Sopenharmony_ci /* TCP L1 Cache per CU */ 79062306a36Sopenharmony_ci .cache_size = 16, 79162306a36Sopenharmony_ci .cache_level = 1, 79262306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 79362306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 79462306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 79562306a36Sopenharmony_ci .num_cu_shared = 1, 79662306a36Sopenharmony_ci }, 79762306a36Sopenharmony_ci { 79862306a36Sopenharmony_ci /* Scalar L1 Instruction Cache per SQC */ 79962306a36Sopenharmony_ci .cache_size = 32, 80062306a36Sopenharmony_ci .cache_level = 1, 80162306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 80262306a36Sopenharmony_ci CRAT_CACHE_FLAGS_INST_CACHE | 80362306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 80462306a36Sopenharmony_ci .num_cu_shared = 2, 80562306a36Sopenharmony_ci }, 80662306a36Sopenharmony_ci { 80762306a36Sopenharmony_ci /* Scalar L1 Data Cache per SQC */ 80862306a36Sopenharmony_ci .cache_size = 16, 80962306a36Sopenharmony_ci .cache_level = 1, 81062306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 81162306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 81262306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 81362306a36Sopenharmony_ci .num_cu_shared = 2, 81462306a36Sopenharmony_ci }, 81562306a36Sopenharmony_ci { 81662306a36Sopenharmony_ci /* GL1 Data Cache per SA */ 81762306a36Sopenharmony_ci .cache_size = 128, 81862306a36Sopenharmony_ci .cache_level = 1, 81962306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 82062306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 82162306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 82262306a36Sopenharmony_ci .num_cu_shared = 2, 82362306a36Sopenharmony_ci }, 82462306a36Sopenharmony_ci { 82562306a36Sopenharmony_ci /* L2 Data Cache per GPU (Total Tex Cache) */ 82662306a36Sopenharmony_ci .cache_size = 256, 82762306a36Sopenharmony_ci .cache_level = 2, 82862306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 82962306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 83062306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 83162306a36Sopenharmony_ci .num_cu_shared = 2, 83262306a36Sopenharmony_ci }, 83362306a36Sopenharmony_ci}; 83462306a36Sopenharmony_ci 83562306a36Sopenharmony_cistatic struct kfd_gpu_cache_info gc_10_3_6_cache_info[] = { 83662306a36Sopenharmony_ci { 83762306a36Sopenharmony_ci /* TCP L1 Cache per CU */ 83862306a36Sopenharmony_ci .cache_size = 16, 83962306a36Sopenharmony_ci .cache_level = 1, 84062306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 84162306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 84262306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 84362306a36Sopenharmony_ci .num_cu_shared = 1, 84462306a36Sopenharmony_ci }, 84562306a36Sopenharmony_ci { 84662306a36Sopenharmony_ci /* Scalar L1 Instruction Cache per SQC */ 84762306a36Sopenharmony_ci .cache_size = 32, 84862306a36Sopenharmony_ci .cache_level = 1, 84962306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 85062306a36Sopenharmony_ci CRAT_CACHE_FLAGS_INST_CACHE | 85162306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 85262306a36Sopenharmony_ci .num_cu_shared = 2, 85362306a36Sopenharmony_ci }, 85462306a36Sopenharmony_ci { 85562306a36Sopenharmony_ci /* Scalar L1 Data Cache per SQC */ 85662306a36Sopenharmony_ci .cache_size = 16, 85762306a36Sopenharmony_ci .cache_level = 1, 85862306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 85962306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 86062306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 86162306a36Sopenharmony_ci .num_cu_shared = 2, 86262306a36Sopenharmony_ci }, 86362306a36Sopenharmony_ci { 86462306a36Sopenharmony_ci /* GL1 Data Cache per SA */ 86562306a36Sopenharmony_ci .cache_size = 128, 86662306a36Sopenharmony_ci .cache_level = 1, 86762306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 86862306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 86962306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 87062306a36Sopenharmony_ci .num_cu_shared = 2, 87162306a36Sopenharmony_ci }, 87262306a36Sopenharmony_ci { 87362306a36Sopenharmony_ci /* L2 Data Cache per GPU (Total Tex Cache) */ 87462306a36Sopenharmony_ci .cache_size = 256, 87562306a36Sopenharmony_ci .cache_level = 2, 87662306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 87762306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 87862306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 87962306a36Sopenharmony_ci .num_cu_shared = 2, 88062306a36Sopenharmony_ci }, 88162306a36Sopenharmony_ci}; 88262306a36Sopenharmony_ci 88362306a36Sopenharmony_cistatic struct kfd_gpu_cache_info dummy_cache_info[] = { 88462306a36Sopenharmony_ci { 88562306a36Sopenharmony_ci /* TCP L1 Cache per CU */ 88662306a36Sopenharmony_ci .cache_size = 16, 88762306a36Sopenharmony_ci .cache_level = 1, 88862306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 88962306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 89062306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 89162306a36Sopenharmony_ci .num_cu_shared = 1, 89262306a36Sopenharmony_ci }, 89362306a36Sopenharmony_ci { 89462306a36Sopenharmony_ci /* Scalar L1 Instruction Cache per SQC */ 89562306a36Sopenharmony_ci .cache_size = 32, 89662306a36Sopenharmony_ci .cache_level = 1, 89762306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 89862306a36Sopenharmony_ci CRAT_CACHE_FLAGS_INST_CACHE | 89962306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 90062306a36Sopenharmony_ci .num_cu_shared = 2, 90162306a36Sopenharmony_ci }, 90262306a36Sopenharmony_ci { 90362306a36Sopenharmony_ci /* Scalar L1 Data Cache per SQC */ 90462306a36Sopenharmony_ci .cache_size = 16, 90562306a36Sopenharmony_ci .cache_level = 1, 90662306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 90762306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 90862306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 90962306a36Sopenharmony_ci .num_cu_shared = 2, 91062306a36Sopenharmony_ci }, 91162306a36Sopenharmony_ci { 91262306a36Sopenharmony_ci /* GL1 Data Cache per SA */ 91362306a36Sopenharmony_ci .cache_size = 128, 91462306a36Sopenharmony_ci .cache_level = 1, 91562306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 91662306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 91762306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 91862306a36Sopenharmony_ci .num_cu_shared = 6, 91962306a36Sopenharmony_ci }, 92062306a36Sopenharmony_ci { 92162306a36Sopenharmony_ci /* L2 Data Cache per GPU (Total Tex Cache) */ 92262306a36Sopenharmony_ci .cache_size = 2048, 92362306a36Sopenharmony_ci .cache_level = 2, 92462306a36Sopenharmony_ci .flags = (CRAT_CACHE_FLAGS_ENABLED | 92562306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 92662306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE), 92762306a36Sopenharmony_ci .num_cu_shared = 6, 92862306a36Sopenharmony_ci }, 92962306a36Sopenharmony_ci}; 93062306a36Sopenharmony_ci 93162306a36Sopenharmony_cistatic void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev, 93262306a36Sopenharmony_ci struct crat_subtype_computeunit *cu) 93362306a36Sopenharmony_ci{ 93462306a36Sopenharmony_ci dev->node_props.cpu_cores_count = cu->num_cpu_cores; 93562306a36Sopenharmony_ci dev->node_props.cpu_core_id_base = cu->processor_id_low; 93662306a36Sopenharmony_ci if (cu->hsa_capability & CRAT_CU_FLAGS_IOMMU_PRESENT) 93762306a36Sopenharmony_ci dev->node_props.capability |= HSA_CAP_ATS_PRESENT; 93862306a36Sopenharmony_ci 93962306a36Sopenharmony_ci pr_debug("CU CPU: cores=%d id_base=%d\n", cu->num_cpu_cores, 94062306a36Sopenharmony_ci cu->processor_id_low); 94162306a36Sopenharmony_ci} 94262306a36Sopenharmony_ci 94362306a36Sopenharmony_cistatic void kfd_populated_cu_info_gpu(struct kfd_topology_device *dev, 94462306a36Sopenharmony_ci struct crat_subtype_computeunit *cu) 94562306a36Sopenharmony_ci{ 94662306a36Sopenharmony_ci dev->node_props.simd_id_base = cu->processor_id_low; 94762306a36Sopenharmony_ci dev->node_props.simd_count = cu->num_simd_cores; 94862306a36Sopenharmony_ci dev->node_props.lds_size_in_kb = cu->lds_size_in_kb; 94962306a36Sopenharmony_ci dev->node_props.max_waves_per_simd = cu->max_waves_simd; 95062306a36Sopenharmony_ci dev->node_props.wave_front_size = cu->wave_front_size; 95162306a36Sopenharmony_ci dev->node_props.array_count = cu->array_count; 95262306a36Sopenharmony_ci dev->node_props.cu_per_simd_array = cu->num_cu_per_array; 95362306a36Sopenharmony_ci dev->node_props.simd_per_cu = cu->num_simd_per_cu; 95462306a36Sopenharmony_ci dev->node_props.max_slots_scratch_cu = cu->max_slots_scatch_cu; 95562306a36Sopenharmony_ci if (cu->hsa_capability & CRAT_CU_FLAGS_HOT_PLUGGABLE) 95662306a36Sopenharmony_ci dev->node_props.capability |= HSA_CAP_HOT_PLUGGABLE; 95762306a36Sopenharmony_ci pr_debug("CU GPU: id_base=%d\n", cu->processor_id_low); 95862306a36Sopenharmony_ci} 95962306a36Sopenharmony_ci 96062306a36Sopenharmony_ci/* kfd_parse_subtype_cu - parse compute unit subtypes and attach it to correct 96162306a36Sopenharmony_ci * topology device present in the device_list 96262306a36Sopenharmony_ci */ 96362306a36Sopenharmony_cistatic int kfd_parse_subtype_cu(struct crat_subtype_computeunit *cu, 96462306a36Sopenharmony_ci struct list_head *device_list) 96562306a36Sopenharmony_ci{ 96662306a36Sopenharmony_ci struct kfd_topology_device *dev; 96762306a36Sopenharmony_ci 96862306a36Sopenharmony_ci pr_debug("Found CU entry in CRAT table with proximity_domain=%d caps=%x\n", 96962306a36Sopenharmony_ci cu->proximity_domain, cu->hsa_capability); 97062306a36Sopenharmony_ci list_for_each_entry(dev, device_list, list) { 97162306a36Sopenharmony_ci if (cu->proximity_domain == dev->proximity_domain) { 97262306a36Sopenharmony_ci if (cu->flags & CRAT_CU_FLAGS_CPU_PRESENT) 97362306a36Sopenharmony_ci kfd_populated_cu_info_cpu(dev, cu); 97462306a36Sopenharmony_ci 97562306a36Sopenharmony_ci if (cu->flags & CRAT_CU_FLAGS_GPU_PRESENT) 97662306a36Sopenharmony_ci kfd_populated_cu_info_gpu(dev, cu); 97762306a36Sopenharmony_ci break; 97862306a36Sopenharmony_ci } 97962306a36Sopenharmony_ci } 98062306a36Sopenharmony_ci 98162306a36Sopenharmony_ci return 0; 98262306a36Sopenharmony_ci} 98362306a36Sopenharmony_ci 98462306a36Sopenharmony_cistatic struct kfd_mem_properties * 98562306a36Sopenharmony_cifind_subtype_mem(uint32_t heap_type, uint32_t flags, uint32_t width, 98662306a36Sopenharmony_ci struct kfd_topology_device *dev) 98762306a36Sopenharmony_ci{ 98862306a36Sopenharmony_ci struct kfd_mem_properties *props; 98962306a36Sopenharmony_ci 99062306a36Sopenharmony_ci list_for_each_entry(props, &dev->mem_props, list) { 99162306a36Sopenharmony_ci if (props->heap_type == heap_type 99262306a36Sopenharmony_ci && props->flags == flags 99362306a36Sopenharmony_ci && props->width == width) 99462306a36Sopenharmony_ci return props; 99562306a36Sopenharmony_ci } 99662306a36Sopenharmony_ci 99762306a36Sopenharmony_ci return NULL; 99862306a36Sopenharmony_ci} 99962306a36Sopenharmony_ci/* kfd_parse_subtype_mem - parse memory subtypes and attach it to correct 100062306a36Sopenharmony_ci * topology device present in the device_list 100162306a36Sopenharmony_ci */ 100262306a36Sopenharmony_cistatic int kfd_parse_subtype_mem(struct crat_subtype_memory *mem, 100362306a36Sopenharmony_ci struct list_head *device_list) 100462306a36Sopenharmony_ci{ 100562306a36Sopenharmony_ci struct kfd_mem_properties *props; 100662306a36Sopenharmony_ci struct kfd_topology_device *dev; 100762306a36Sopenharmony_ci uint32_t heap_type; 100862306a36Sopenharmony_ci uint64_t size_in_bytes; 100962306a36Sopenharmony_ci uint32_t flags = 0; 101062306a36Sopenharmony_ci uint32_t width; 101162306a36Sopenharmony_ci 101262306a36Sopenharmony_ci pr_debug("Found memory entry in CRAT table with proximity_domain=%d\n", 101362306a36Sopenharmony_ci mem->proximity_domain); 101462306a36Sopenharmony_ci list_for_each_entry(dev, device_list, list) { 101562306a36Sopenharmony_ci if (mem->proximity_domain == dev->proximity_domain) { 101662306a36Sopenharmony_ci /* We're on GPU node */ 101762306a36Sopenharmony_ci if (dev->node_props.cpu_cores_count == 0) { 101862306a36Sopenharmony_ci /* APU */ 101962306a36Sopenharmony_ci if (mem->visibility_type == 0) 102062306a36Sopenharmony_ci heap_type = 102162306a36Sopenharmony_ci HSA_MEM_HEAP_TYPE_FB_PRIVATE; 102262306a36Sopenharmony_ci /* dGPU */ 102362306a36Sopenharmony_ci else 102462306a36Sopenharmony_ci heap_type = mem->visibility_type; 102562306a36Sopenharmony_ci } else 102662306a36Sopenharmony_ci heap_type = HSA_MEM_HEAP_TYPE_SYSTEM; 102762306a36Sopenharmony_ci 102862306a36Sopenharmony_ci if (mem->flags & CRAT_MEM_FLAGS_HOT_PLUGGABLE) 102962306a36Sopenharmony_ci flags |= HSA_MEM_FLAGS_HOT_PLUGGABLE; 103062306a36Sopenharmony_ci if (mem->flags & CRAT_MEM_FLAGS_NON_VOLATILE) 103162306a36Sopenharmony_ci flags |= HSA_MEM_FLAGS_NON_VOLATILE; 103262306a36Sopenharmony_ci 103362306a36Sopenharmony_ci size_in_bytes = 103462306a36Sopenharmony_ci ((uint64_t)mem->length_high << 32) + 103562306a36Sopenharmony_ci mem->length_low; 103662306a36Sopenharmony_ci width = mem->width; 103762306a36Sopenharmony_ci 103862306a36Sopenharmony_ci /* Multiple banks of the same type are aggregated into 103962306a36Sopenharmony_ci * one. User mode doesn't care about multiple physical 104062306a36Sopenharmony_ci * memory segments. It's managed as a single virtual 104162306a36Sopenharmony_ci * heap for user mode. 104262306a36Sopenharmony_ci */ 104362306a36Sopenharmony_ci props = find_subtype_mem(heap_type, flags, width, dev); 104462306a36Sopenharmony_ci if (props) { 104562306a36Sopenharmony_ci props->size_in_bytes += size_in_bytes; 104662306a36Sopenharmony_ci break; 104762306a36Sopenharmony_ci } 104862306a36Sopenharmony_ci 104962306a36Sopenharmony_ci props = kfd_alloc_struct(props); 105062306a36Sopenharmony_ci if (!props) 105162306a36Sopenharmony_ci return -ENOMEM; 105262306a36Sopenharmony_ci 105362306a36Sopenharmony_ci props->heap_type = heap_type; 105462306a36Sopenharmony_ci props->flags = flags; 105562306a36Sopenharmony_ci props->size_in_bytes = size_in_bytes; 105662306a36Sopenharmony_ci props->width = width; 105762306a36Sopenharmony_ci 105862306a36Sopenharmony_ci dev->node_props.mem_banks_count++; 105962306a36Sopenharmony_ci list_add_tail(&props->list, &dev->mem_props); 106062306a36Sopenharmony_ci 106162306a36Sopenharmony_ci break; 106262306a36Sopenharmony_ci } 106362306a36Sopenharmony_ci } 106462306a36Sopenharmony_ci 106562306a36Sopenharmony_ci return 0; 106662306a36Sopenharmony_ci} 106762306a36Sopenharmony_ci 106862306a36Sopenharmony_ci/* kfd_parse_subtype_cache - parse cache subtypes and attach it to correct 106962306a36Sopenharmony_ci * topology device present in the device_list 107062306a36Sopenharmony_ci */ 107162306a36Sopenharmony_cistatic int kfd_parse_subtype_cache(struct crat_subtype_cache *cache, 107262306a36Sopenharmony_ci struct list_head *device_list) 107362306a36Sopenharmony_ci{ 107462306a36Sopenharmony_ci struct kfd_cache_properties *props; 107562306a36Sopenharmony_ci struct kfd_topology_device *dev; 107662306a36Sopenharmony_ci uint32_t id; 107762306a36Sopenharmony_ci uint32_t total_num_of_cu; 107862306a36Sopenharmony_ci 107962306a36Sopenharmony_ci id = cache->processor_id_low; 108062306a36Sopenharmony_ci 108162306a36Sopenharmony_ci pr_debug("Found cache entry in CRAT table with processor_id=%d\n", id); 108262306a36Sopenharmony_ci list_for_each_entry(dev, device_list, list) { 108362306a36Sopenharmony_ci total_num_of_cu = (dev->node_props.array_count * 108462306a36Sopenharmony_ci dev->node_props.cu_per_simd_array); 108562306a36Sopenharmony_ci 108662306a36Sopenharmony_ci /* Cache infomration in CRAT doesn't have proximity_domain 108762306a36Sopenharmony_ci * information as it is associated with a CPU core or GPU 108862306a36Sopenharmony_ci * Compute Unit. So map the cache using CPU core Id or SIMD 108962306a36Sopenharmony_ci * (GPU) ID. 109062306a36Sopenharmony_ci * TODO: This works because currently we can safely assume that 109162306a36Sopenharmony_ci * Compute Units are parsed before caches are parsed. In 109262306a36Sopenharmony_ci * future, remove this dependency 109362306a36Sopenharmony_ci */ 109462306a36Sopenharmony_ci if ((id >= dev->node_props.cpu_core_id_base && 109562306a36Sopenharmony_ci id <= dev->node_props.cpu_core_id_base + 109662306a36Sopenharmony_ci dev->node_props.cpu_cores_count) || 109762306a36Sopenharmony_ci (id >= dev->node_props.simd_id_base && 109862306a36Sopenharmony_ci id < dev->node_props.simd_id_base + 109962306a36Sopenharmony_ci total_num_of_cu)) { 110062306a36Sopenharmony_ci props = kfd_alloc_struct(props); 110162306a36Sopenharmony_ci if (!props) 110262306a36Sopenharmony_ci return -ENOMEM; 110362306a36Sopenharmony_ci 110462306a36Sopenharmony_ci props->processor_id_low = id; 110562306a36Sopenharmony_ci props->cache_level = cache->cache_level; 110662306a36Sopenharmony_ci props->cache_size = cache->cache_size; 110762306a36Sopenharmony_ci props->cacheline_size = cache->cache_line_size; 110862306a36Sopenharmony_ci props->cachelines_per_tag = cache->lines_per_tag; 110962306a36Sopenharmony_ci props->cache_assoc = cache->associativity; 111062306a36Sopenharmony_ci props->cache_latency = cache->cache_latency; 111162306a36Sopenharmony_ci 111262306a36Sopenharmony_ci memcpy(props->sibling_map, cache->sibling_map, 111362306a36Sopenharmony_ci CRAT_SIBLINGMAP_SIZE); 111462306a36Sopenharmony_ci 111562306a36Sopenharmony_ci /* set the sibling_map_size as 32 for CRAT from ACPI */ 111662306a36Sopenharmony_ci props->sibling_map_size = CRAT_SIBLINGMAP_SIZE; 111762306a36Sopenharmony_ci 111862306a36Sopenharmony_ci if (cache->flags & CRAT_CACHE_FLAGS_DATA_CACHE) 111962306a36Sopenharmony_ci props->cache_type |= HSA_CACHE_TYPE_DATA; 112062306a36Sopenharmony_ci if (cache->flags & CRAT_CACHE_FLAGS_INST_CACHE) 112162306a36Sopenharmony_ci props->cache_type |= HSA_CACHE_TYPE_INSTRUCTION; 112262306a36Sopenharmony_ci if (cache->flags & CRAT_CACHE_FLAGS_CPU_CACHE) 112362306a36Sopenharmony_ci props->cache_type |= HSA_CACHE_TYPE_CPU; 112462306a36Sopenharmony_ci if (cache->flags & CRAT_CACHE_FLAGS_SIMD_CACHE) 112562306a36Sopenharmony_ci props->cache_type |= HSA_CACHE_TYPE_HSACU; 112662306a36Sopenharmony_ci 112762306a36Sopenharmony_ci dev->node_props.caches_count++; 112862306a36Sopenharmony_ci list_add_tail(&props->list, &dev->cache_props); 112962306a36Sopenharmony_ci 113062306a36Sopenharmony_ci break; 113162306a36Sopenharmony_ci } 113262306a36Sopenharmony_ci } 113362306a36Sopenharmony_ci 113462306a36Sopenharmony_ci return 0; 113562306a36Sopenharmony_ci} 113662306a36Sopenharmony_ci 113762306a36Sopenharmony_ci/* kfd_parse_subtype_iolink - parse iolink subtypes and attach it to correct 113862306a36Sopenharmony_ci * topology device present in the device_list 113962306a36Sopenharmony_ci */ 114062306a36Sopenharmony_cistatic int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink, 114162306a36Sopenharmony_ci struct list_head *device_list) 114262306a36Sopenharmony_ci{ 114362306a36Sopenharmony_ci struct kfd_iolink_properties *props = NULL, *props2; 114462306a36Sopenharmony_ci struct kfd_topology_device *dev, *to_dev; 114562306a36Sopenharmony_ci uint32_t id_from; 114662306a36Sopenharmony_ci uint32_t id_to; 114762306a36Sopenharmony_ci 114862306a36Sopenharmony_ci id_from = iolink->proximity_domain_from; 114962306a36Sopenharmony_ci id_to = iolink->proximity_domain_to; 115062306a36Sopenharmony_ci 115162306a36Sopenharmony_ci pr_debug("Found IO link entry in CRAT table with id_from=%d, id_to %d\n", 115262306a36Sopenharmony_ci id_from, id_to); 115362306a36Sopenharmony_ci list_for_each_entry(dev, device_list, list) { 115462306a36Sopenharmony_ci if (id_from == dev->proximity_domain) { 115562306a36Sopenharmony_ci props = kfd_alloc_struct(props); 115662306a36Sopenharmony_ci if (!props) 115762306a36Sopenharmony_ci return -ENOMEM; 115862306a36Sopenharmony_ci 115962306a36Sopenharmony_ci props->node_from = id_from; 116062306a36Sopenharmony_ci props->node_to = id_to; 116162306a36Sopenharmony_ci props->ver_maj = iolink->version_major; 116262306a36Sopenharmony_ci props->ver_min = iolink->version_minor; 116362306a36Sopenharmony_ci props->iolink_type = iolink->io_interface_type; 116462306a36Sopenharmony_ci 116562306a36Sopenharmony_ci if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS) 116662306a36Sopenharmony_ci props->weight = 20; 116762306a36Sopenharmony_ci else if (props->iolink_type == CRAT_IOLINK_TYPE_XGMI) 116862306a36Sopenharmony_ci props->weight = iolink->weight_xgmi; 116962306a36Sopenharmony_ci else 117062306a36Sopenharmony_ci props->weight = node_distance(id_from, id_to); 117162306a36Sopenharmony_ci 117262306a36Sopenharmony_ci props->min_latency = iolink->minimum_latency; 117362306a36Sopenharmony_ci props->max_latency = iolink->maximum_latency; 117462306a36Sopenharmony_ci props->min_bandwidth = iolink->minimum_bandwidth_mbs; 117562306a36Sopenharmony_ci props->max_bandwidth = iolink->maximum_bandwidth_mbs; 117662306a36Sopenharmony_ci props->rec_transfer_size = 117762306a36Sopenharmony_ci iolink->recommended_transfer_size; 117862306a36Sopenharmony_ci 117962306a36Sopenharmony_ci dev->node_props.io_links_count++; 118062306a36Sopenharmony_ci list_add_tail(&props->list, &dev->io_link_props); 118162306a36Sopenharmony_ci break; 118262306a36Sopenharmony_ci } 118362306a36Sopenharmony_ci } 118462306a36Sopenharmony_ci 118562306a36Sopenharmony_ci /* CPU topology is created before GPUs are detected, so CPU->GPU 118662306a36Sopenharmony_ci * links are not built at that time. If a PCIe type is discovered, it 118762306a36Sopenharmony_ci * means a GPU is detected and we are adding GPU->CPU to the topology. 118862306a36Sopenharmony_ci * At this time, also add the corresponded CPU->GPU link if GPU 118962306a36Sopenharmony_ci * is large bar. 119062306a36Sopenharmony_ci * For xGMI, we only added the link with one direction in the crat 119162306a36Sopenharmony_ci * table, add corresponded reversed direction link now. 119262306a36Sopenharmony_ci */ 119362306a36Sopenharmony_ci if (props && (iolink->flags & CRAT_IOLINK_FLAGS_BI_DIRECTIONAL)) { 119462306a36Sopenharmony_ci to_dev = kfd_topology_device_by_proximity_domain_no_lock(id_to); 119562306a36Sopenharmony_ci if (!to_dev) 119662306a36Sopenharmony_ci return -ENODEV; 119762306a36Sopenharmony_ci /* same everything but the other direction */ 119862306a36Sopenharmony_ci props2 = kmemdup(props, sizeof(*props2), GFP_KERNEL); 119962306a36Sopenharmony_ci if (!props2) 120062306a36Sopenharmony_ci return -ENOMEM; 120162306a36Sopenharmony_ci 120262306a36Sopenharmony_ci props2->node_from = id_to; 120362306a36Sopenharmony_ci props2->node_to = id_from; 120462306a36Sopenharmony_ci props2->kobj = NULL; 120562306a36Sopenharmony_ci to_dev->node_props.io_links_count++; 120662306a36Sopenharmony_ci list_add_tail(&props2->list, &to_dev->io_link_props); 120762306a36Sopenharmony_ci } 120862306a36Sopenharmony_ci 120962306a36Sopenharmony_ci return 0; 121062306a36Sopenharmony_ci} 121162306a36Sopenharmony_ci 121262306a36Sopenharmony_ci/* kfd_parse_subtype - parse subtypes and attach it to correct topology device 121362306a36Sopenharmony_ci * present in the device_list 121462306a36Sopenharmony_ci * @sub_type_hdr - subtype section of crat_image 121562306a36Sopenharmony_ci * @device_list - list of topology devices present in this crat_image 121662306a36Sopenharmony_ci */ 121762306a36Sopenharmony_cistatic int kfd_parse_subtype(struct crat_subtype_generic *sub_type_hdr, 121862306a36Sopenharmony_ci struct list_head *device_list) 121962306a36Sopenharmony_ci{ 122062306a36Sopenharmony_ci struct crat_subtype_computeunit *cu; 122162306a36Sopenharmony_ci struct crat_subtype_memory *mem; 122262306a36Sopenharmony_ci struct crat_subtype_cache *cache; 122362306a36Sopenharmony_ci struct crat_subtype_iolink *iolink; 122462306a36Sopenharmony_ci int ret = 0; 122562306a36Sopenharmony_ci 122662306a36Sopenharmony_ci switch (sub_type_hdr->type) { 122762306a36Sopenharmony_ci case CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY: 122862306a36Sopenharmony_ci cu = (struct crat_subtype_computeunit *)sub_type_hdr; 122962306a36Sopenharmony_ci ret = kfd_parse_subtype_cu(cu, device_list); 123062306a36Sopenharmony_ci break; 123162306a36Sopenharmony_ci case CRAT_SUBTYPE_MEMORY_AFFINITY: 123262306a36Sopenharmony_ci mem = (struct crat_subtype_memory *)sub_type_hdr; 123362306a36Sopenharmony_ci ret = kfd_parse_subtype_mem(mem, device_list); 123462306a36Sopenharmony_ci break; 123562306a36Sopenharmony_ci case CRAT_SUBTYPE_CACHE_AFFINITY: 123662306a36Sopenharmony_ci cache = (struct crat_subtype_cache *)sub_type_hdr; 123762306a36Sopenharmony_ci ret = kfd_parse_subtype_cache(cache, device_list); 123862306a36Sopenharmony_ci break; 123962306a36Sopenharmony_ci case CRAT_SUBTYPE_TLB_AFFINITY: 124062306a36Sopenharmony_ci /* 124162306a36Sopenharmony_ci * For now, nothing to do here 124262306a36Sopenharmony_ci */ 124362306a36Sopenharmony_ci pr_debug("Found TLB entry in CRAT table (not processing)\n"); 124462306a36Sopenharmony_ci break; 124562306a36Sopenharmony_ci case CRAT_SUBTYPE_CCOMPUTE_AFFINITY: 124662306a36Sopenharmony_ci /* 124762306a36Sopenharmony_ci * For now, nothing to do here 124862306a36Sopenharmony_ci */ 124962306a36Sopenharmony_ci pr_debug("Found CCOMPUTE entry in CRAT table (not processing)\n"); 125062306a36Sopenharmony_ci break; 125162306a36Sopenharmony_ci case CRAT_SUBTYPE_IOLINK_AFFINITY: 125262306a36Sopenharmony_ci iolink = (struct crat_subtype_iolink *)sub_type_hdr; 125362306a36Sopenharmony_ci ret = kfd_parse_subtype_iolink(iolink, device_list); 125462306a36Sopenharmony_ci break; 125562306a36Sopenharmony_ci default: 125662306a36Sopenharmony_ci pr_warn("Unknown subtype %d in CRAT\n", 125762306a36Sopenharmony_ci sub_type_hdr->type); 125862306a36Sopenharmony_ci } 125962306a36Sopenharmony_ci 126062306a36Sopenharmony_ci return ret; 126162306a36Sopenharmony_ci} 126262306a36Sopenharmony_ci 126362306a36Sopenharmony_ci/* kfd_parse_crat_table - parse CRAT table. For each node present in CRAT 126462306a36Sopenharmony_ci * create a kfd_topology_device and add in to device_list. Also parse 126562306a36Sopenharmony_ci * CRAT subtypes and attach it to appropriate kfd_topology_device 126662306a36Sopenharmony_ci * @crat_image - input image containing CRAT 126762306a36Sopenharmony_ci * @device_list - [OUT] list of kfd_topology_device generated after 126862306a36Sopenharmony_ci * parsing crat_image 126962306a36Sopenharmony_ci * @proximity_domain - Proximity domain of the first device in the table 127062306a36Sopenharmony_ci * 127162306a36Sopenharmony_ci * Return - 0 if successful else -ve value 127262306a36Sopenharmony_ci */ 127362306a36Sopenharmony_ciint kfd_parse_crat_table(void *crat_image, struct list_head *device_list, 127462306a36Sopenharmony_ci uint32_t proximity_domain) 127562306a36Sopenharmony_ci{ 127662306a36Sopenharmony_ci struct kfd_topology_device *top_dev = NULL; 127762306a36Sopenharmony_ci struct crat_subtype_generic *sub_type_hdr; 127862306a36Sopenharmony_ci uint16_t node_id; 127962306a36Sopenharmony_ci int ret = 0; 128062306a36Sopenharmony_ci struct crat_header *crat_table = (struct crat_header *)crat_image; 128162306a36Sopenharmony_ci uint16_t num_nodes; 128262306a36Sopenharmony_ci uint32_t image_len; 128362306a36Sopenharmony_ci 128462306a36Sopenharmony_ci if (!crat_image) 128562306a36Sopenharmony_ci return -EINVAL; 128662306a36Sopenharmony_ci 128762306a36Sopenharmony_ci if (!list_empty(device_list)) { 128862306a36Sopenharmony_ci pr_warn("Error device list should be empty\n"); 128962306a36Sopenharmony_ci return -EINVAL; 129062306a36Sopenharmony_ci } 129162306a36Sopenharmony_ci 129262306a36Sopenharmony_ci num_nodes = crat_table->num_domains; 129362306a36Sopenharmony_ci image_len = crat_table->length; 129462306a36Sopenharmony_ci 129562306a36Sopenharmony_ci pr_debug("Parsing CRAT table with %d nodes\n", num_nodes); 129662306a36Sopenharmony_ci 129762306a36Sopenharmony_ci for (node_id = 0; node_id < num_nodes; node_id++) { 129862306a36Sopenharmony_ci top_dev = kfd_create_topology_device(device_list); 129962306a36Sopenharmony_ci if (!top_dev) 130062306a36Sopenharmony_ci break; 130162306a36Sopenharmony_ci top_dev->proximity_domain = proximity_domain++; 130262306a36Sopenharmony_ci } 130362306a36Sopenharmony_ci 130462306a36Sopenharmony_ci if (!top_dev) { 130562306a36Sopenharmony_ci ret = -ENOMEM; 130662306a36Sopenharmony_ci goto err; 130762306a36Sopenharmony_ci } 130862306a36Sopenharmony_ci 130962306a36Sopenharmony_ci memcpy(top_dev->oem_id, crat_table->oem_id, CRAT_OEMID_LENGTH); 131062306a36Sopenharmony_ci memcpy(top_dev->oem_table_id, crat_table->oem_table_id, 131162306a36Sopenharmony_ci CRAT_OEMTABLEID_LENGTH); 131262306a36Sopenharmony_ci top_dev->oem_revision = crat_table->oem_revision; 131362306a36Sopenharmony_ci 131462306a36Sopenharmony_ci sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1); 131562306a36Sopenharmony_ci while ((char *)sub_type_hdr + sizeof(struct crat_subtype_generic) < 131662306a36Sopenharmony_ci ((char *)crat_image) + image_len) { 131762306a36Sopenharmony_ci if (sub_type_hdr->flags & CRAT_SUBTYPE_FLAGS_ENABLED) { 131862306a36Sopenharmony_ci ret = kfd_parse_subtype(sub_type_hdr, device_list); 131962306a36Sopenharmony_ci if (ret) 132062306a36Sopenharmony_ci break; 132162306a36Sopenharmony_ci } 132262306a36Sopenharmony_ci 132362306a36Sopenharmony_ci sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + 132462306a36Sopenharmony_ci sub_type_hdr->length); 132562306a36Sopenharmony_ci } 132662306a36Sopenharmony_ci 132762306a36Sopenharmony_cierr: 132862306a36Sopenharmony_ci if (ret) 132962306a36Sopenharmony_ci kfd_release_topology_device_list(device_list); 133062306a36Sopenharmony_ci 133162306a36Sopenharmony_ci return ret; 133262306a36Sopenharmony_ci} 133362306a36Sopenharmony_ci 133462306a36Sopenharmony_ci 133562306a36Sopenharmony_cistatic int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, 133662306a36Sopenharmony_ci struct kfd_gpu_cache_info *pcache_info) 133762306a36Sopenharmony_ci{ 133862306a36Sopenharmony_ci struct amdgpu_device *adev = kdev->adev; 133962306a36Sopenharmony_ci int i = 0; 134062306a36Sopenharmony_ci 134162306a36Sopenharmony_ci /* TCP L1 Cache per CU */ 134262306a36Sopenharmony_ci if (adev->gfx.config.gc_tcp_l1_size) { 134362306a36Sopenharmony_ci pcache_info[i].cache_size = adev->gfx.config.gc_tcp_l1_size; 134462306a36Sopenharmony_ci pcache_info[i].cache_level = 1; 134562306a36Sopenharmony_ci pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | 134662306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 134762306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE); 134862306a36Sopenharmony_ci pcache_info[0].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2; 134962306a36Sopenharmony_ci i++; 135062306a36Sopenharmony_ci } 135162306a36Sopenharmony_ci /* Scalar L1 Instruction Cache per SQC */ 135262306a36Sopenharmony_ci if (adev->gfx.config.gc_l1_instruction_cache_size_per_sqc) { 135362306a36Sopenharmony_ci pcache_info[i].cache_size = 135462306a36Sopenharmony_ci adev->gfx.config.gc_l1_instruction_cache_size_per_sqc; 135562306a36Sopenharmony_ci pcache_info[i].cache_level = 1; 135662306a36Sopenharmony_ci pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | 135762306a36Sopenharmony_ci CRAT_CACHE_FLAGS_INST_CACHE | 135862306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE); 135962306a36Sopenharmony_ci pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2; 136062306a36Sopenharmony_ci i++; 136162306a36Sopenharmony_ci } 136262306a36Sopenharmony_ci /* Scalar L1 Data Cache per SQC */ 136362306a36Sopenharmony_ci if (adev->gfx.config.gc_l1_data_cache_size_per_sqc) { 136462306a36Sopenharmony_ci pcache_info[i].cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc; 136562306a36Sopenharmony_ci pcache_info[i].cache_level = 1; 136662306a36Sopenharmony_ci pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | 136762306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 136862306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE); 136962306a36Sopenharmony_ci pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2; 137062306a36Sopenharmony_ci i++; 137162306a36Sopenharmony_ci } 137262306a36Sopenharmony_ci /* GL1 Data Cache per SA */ 137362306a36Sopenharmony_ci if (adev->gfx.config.gc_gl1c_per_sa && 137462306a36Sopenharmony_ci adev->gfx.config.gc_gl1c_size_per_instance) { 137562306a36Sopenharmony_ci pcache_info[i].cache_size = adev->gfx.config.gc_gl1c_per_sa * 137662306a36Sopenharmony_ci adev->gfx.config.gc_gl1c_size_per_instance; 137762306a36Sopenharmony_ci pcache_info[i].cache_level = 1; 137862306a36Sopenharmony_ci pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | 137962306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 138062306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE); 138162306a36Sopenharmony_ci pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; 138262306a36Sopenharmony_ci i++; 138362306a36Sopenharmony_ci } 138462306a36Sopenharmony_ci /* L2 Data Cache per GPU (Total Tex Cache) */ 138562306a36Sopenharmony_ci if (adev->gfx.config.gc_gl2c_per_gpu) { 138662306a36Sopenharmony_ci pcache_info[i].cache_size = adev->gfx.config.gc_gl2c_per_gpu; 138762306a36Sopenharmony_ci pcache_info[i].cache_level = 2; 138862306a36Sopenharmony_ci pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | 138962306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 139062306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE); 139162306a36Sopenharmony_ci pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; 139262306a36Sopenharmony_ci i++; 139362306a36Sopenharmony_ci } 139462306a36Sopenharmony_ci /* L3 Data Cache per GPU */ 139562306a36Sopenharmony_ci if (adev->gmc.mall_size) { 139662306a36Sopenharmony_ci pcache_info[i].cache_size = adev->gmc.mall_size / 1024; 139762306a36Sopenharmony_ci pcache_info[i].cache_level = 3; 139862306a36Sopenharmony_ci pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | 139962306a36Sopenharmony_ci CRAT_CACHE_FLAGS_DATA_CACHE | 140062306a36Sopenharmony_ci CRAT_CACHE_FLAGS_SIMD_CACHE); 140162306a36Sopenharmony_ci pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; 140262306a36Sopenharmony_ci i++; 140362306a36Sopenharmony_ci } 140462306a36Sopenharmony_ci return i; 140562306a36Sopenharmony_ci} 140662306a36Sopenharmony_ci 140762306a36Sopenharmony_ciint kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pcache_info) 140862306a36Sopenharmony_ci{ 140962306a36Sopenharmony_ci int num_of_cache_types = 0; 141062306a36Sopenharmony_ci 141162306a36Sopenharmony_ci switch (kdev->adev->asic_type) { 141262306a36Sopenharmony_ci case CHIP_KAVERI: 141362306a36Sopenharmony_ci *pcache_info = kaveri_cache_info; 141462306a36Sopenharmony_ci num_of_cache_types = ARRAY_SIZE(kaveri_cache_info); 141562306a36Sopenharmony_ci break; 141662306a36Sopenharmony_ci case CHIP_HAWAII: 141762306a36Sopenharmony_ci *pcache_info = hawaii_cache_info; 141862306a36Sopenharmony_ci num_of_cache_types = ARRAY_SIZE(hawaii_cache_info); 141962306a36Sopenharmony_ci break; 142062306a36Sopenharmony_ci case CHIP_CARRIZO: 142162306a36Sopenharmony_ci *pcache_info = carrizo_cache_info; 142262306a36Sopenharmony_ci num_of_cache_types = ARRAY_SIZE(carrizo_cache_info); 142362306a36Sopenharmony_ci break; 142462306a36Sopenharmony_ci case CHIP_TONGA: 142562306a36Sopenharmony_ci *pcache_info = tonga_cache_info; 142662306a36Sopenharmony_ci num_of_cache_types = ARRAY_SIZE(tonga_cache_info); 142762306a36Sopenharmony_ci break; 142862306a36Sopenharmony_ci case CHIP_FIJI: 142962306a36Sopenharmony_ci *pcache_info = fiji_cache_info; 143062306a36Sopenharmony_ci num_of_cache_types = ARRAY_SIZE(fiji_cache_info); 143162306a36Sopenharmony_ci break; 143262306a36Sopenharmony_ci case CHIP_POLARIS10: 143362306a36Sopenharmony_ci *pcache_info = polaris10_cache_info; 143462306a36Sopenharmony_ci num_of_cache_types = ARRAY_SIZE(polaris10_cache_info); 143562306a36Sopenharmony_ci break; 143662306a36Sopenharmony_ci case CHIP_POLARIS11: 143762306a36Sopenharmony_ci *pcache_info = polaris11_cache_info; 143862306a36Sopenharmony_ci num_of_cache_types = ARRAY_SIZE(polaris11_cache_info); 143962306a36Sopenharmony_ci break; 144062306a36Sopenharmony_ci case CHIP_POLARIS12: 144162306a36Sopenharmony_ci *pcache_info = polaris12_cache_info; 144262306a36Sopenharmony_ci num_of_cache_types = ARRAY_SIZE(polaris12_cache_info); 144362306a36Sopenharmony_ci break; 144462306a36Sopenharmony_ci case CHIP_VEGAM: 144562306a36Sopenharmony_ci *pcache_info = vegam_cache_info; 144662306a36Sopenharmony_ci num_of_cache_types = ARRAY_SIZE(vegam_cache_info); 144762306a36Sopenharmony_ci break; 144862306a36Sopenharmony_ci default: 144962306a36Sopenharmony_ci switch (KFD_GC_VERSION(kdev)) { 145062306a36Sopenharmony_ci case IP_VERSION(9, 0, 1): 145162306a36Sopenharmony_ci *pcache_info = vega10_cache_info; 145262306a36Sopenharmony_ci num_of_cache_types = ARRAY_SIZE(vega10_cache_info); 145362306a36Sopenharmony_ci break; 145462306a36Sopenharmony_ci case IP_VERSION(9, 2, 1): 145562306a36Sopenharmony_ci *pcache_info = vega12_cache_info; 145662306a36Sopenharmony_ci num_of_cache_types = ARRAY_SIZE(vega12_cache_info); 145762306a36Sopenharmony_ci break; 145862306a36Sopenharmony_ci case IP_VERSION(9, 4, 0): 145962306a36Sopenharmony_ci case IP_VERSION(9, 4, 1): 146062306a36Sopenharmony_ci *pcache_info = vega20_cache_info; 146162306a36Sopenharmony_ci num_of_cache_types = ARRAY_SIZE(vega20_cache_info); 146262306a36Sopenharmony_ci break; 146362306a36Sopenharmony_ci case IP_VERSION(9, 4, 2): 146462306a36Sopenharmony_ci case IP_VERSION(9, 4, 3): 146562306a36Sopenharmony_ci *pcache_info = aldebaran_cache_info; 146662306a36Sopenharmony_ci num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info); 146762306a36Sopenharmony_ci break; 146862306a36Sopenharmony_ci case IP_VERSION(9, 1, 0): 146962306a36Sopenharmony_ci case IP_VERSION(9, 2, 2): 147062306a36Sopenharmony_ci *pcache_info = raven_cache_info; 147162306a36Sopenharmony_ci num_of_cache_types = ARRAY_SIZE(raven_cache_info); 147262306a36Sopenharmony_ci break; 147362306a36Sopenharmony_ci case IP_VERSION(9, 3, 0): 147462306a36Sopenharmony_ci *pcache_info = renoir_cache_info; 147562306a36Sopenharmony_ci num_of_cache_types = ARRAY_SIZE(renoir_cache_info); 147662306a36Sopenharmony_ci break; 147762306a36Sopenharmony_ci case IP_VERSION(10, 1, 10): 147862306a36Sopenharmony_ci case IP_VERSION(10, 1, 2): 147962306a36Sopenharmony_ci case IP_VERSION(10, 1, 3): 148062306a36Sopenharmony_ci case IP_VERSION(10, 1, 4): 148162306a36Sopenharmony_ci *pcache_info = navi10_cache_info; 148262306a36Sopenharmony_ci num_of_cache_types = ARRAY_SIZE(navi10_cache_info); 148362306a36Sopenharmony_ci break; 148462306a36Sopenharmony_ci case IP_VERSION(10, 1, 1): 148562306a36Sopenharmony_ci *pcache_info = navi14_cache_info; 148662306a36Sopenharmony_ci num_of_cache_types = ARRAY_SIZE(navi14_cache_info); 148762306a36Sopenharmony_ci break; 148862306a36Sopenharmony_ci case IP_VERSION(10, 3, 0): 148962306a36Sopenharmony_ci *pcache_info = sienna_cichlid_cache_info; 149062306a36Sopenharmony_ci num_of_cache_types = ARRAY_SIZE(sienna_cichlid_cache_info); 149162306a36Sopenharmony_ci break; 149262306a36Sopenharmony_ci case IP_VERSION(10, 3, 2): 149362306a36Sopenharmony_ci *pcache_info = navy_flounder_cache_info; 149462306a36Sopenharmony_ci num_of_cache_types = ARRAY_SIZE(navy_flounder_cache_info); 149562306a36Sopenharmony_ci break; 149662306a36Sopenharmony_ci case IP_VERSION(10, 3, 4): 149762306a36Sopenharmony_ci *pcache_info = dimgrey_cavefish_cache_info; 149862306a36Sopenharmony_ci num_of_cache_types = ARRAY_SIZE(dimgrey_cavefish_cache_info); 149962306a36Sopenharmony_ci break; 150062306a36Sopenharmony_ci case IP_VERSION(10, 3, 1): 150162306a36Sopenharmony_ci *pcache_info = vangogh_cache_info; 150262306a36Sopenharmony_ci num_of_cache_types = ARRAY_SIZE(vangogh_cache_info); 150362306a36Sopenharmony_ci break; 150462306a36Sopenharmony_ci case IP_VERSION(10, 3, 5): 150562306a36Sopenharmony_ci *pcache_info = beige_goby_cache_info; 150662306a36Sopenharmony_ci num_of_cache_types = ARRAY_SIZE(beige_goby_cache_info); 150762306a36Sopenharmony_ci break; 150862306a36Sopenharmony_ci case IP_VERSION(10, 3, 3): 150962306a36Sopenharmony_ci *pcache_info = yellow_carp_cache_info; 151062306a36Sopenharmony_ci num_of_cache_types = ARRAY_SIZE(yellow_carp_cache_info); 151162306a36Sopenharmony_ci break; 151262306a36Sopenharmony_ci case IP_VERSION(10, 3, 6): 151362306a36Sopenharmony_ci *pcache_info = gc_10_3_6_cache_info; 151462306a36Sopenharmony_ci num_of_cache_types = ARRAY_SIZE(gc_10_3_6_cache_info); 151562306a36Sopenharmony_ci break; 151662306a36Sopenharmony_ci case IP_VERSION(10, 3, 7): 151762306a36Sopenharmony_ci *pcache_info = gfx1037_cache_info; 151862306a36Sopenharmony_ci num_of_cache_types = ARRAY_SIZE(gfx1037_cache_info); 151962306a36Sopenharmony_ci break; 152062306a36Sopenharmony_ci case IP_VERSION(11, 0, 0): 152162306a36Sopenharmony_ci case IP_VERSION(11, 0, 1): 152262306a36Sopenharmony_ci case IP_VERSION(11, 0, 2): 152362306a36Sopenharmony_ci case IP_VERSION(11, 0, 3): 152462306a36Sopenharmony_ci case IP_VERSION(11, 0, 4): 152562306a36Sopenharmony_ci num_of_cache_types = 152662306a36Sopenharmony_ci kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd, *pcache_info); 152762306a36Sopenharmony_ci break; 152862306a36Sopenharmony_ci default: 152962306a36Sopenharmony_ci *pcache_info = dummy_cache_info; 153062306a36Sopenharmony_ci num_of_cache_types = ARRAY_SIZE(dummy_cache_info); 153162306a36Sopenharmony_ci pr_warn("dummy cache info is used temporarily and real cache info need update later.\n"); 153262306a36Sopenharmony_ci break; 153362306a36Sopenharmony_ci } 153462306a36Sopenharmony_ci } 153562306a36Sopenharmony_ci return num_of_cache_types; 153662306a36Sopenharmony_ci} 153762306a36Sopenharmony_ci 153862306a36Sopenharmony_ci/* Memory required to create Virtual CRAT. 153962306a36Sopenharmony_ci * Since there is no easy way to predict the amount of memory required, the 154062306a36Sopenharmony_ci * following amount is allocated for GPU Virtual CRAT. This is 154162306a36Sopenharmony_ci * expected to cover all known conditions. But to be safe additional check 154262306a36Sopenharmony_ci * is put in the code to ensure we don't overwrite. 154362306a36Sopenharmony_ci */ 154462306a36Sopenharmony_ci#define VCRAT_SIZE_FOR_GPU (4 * PAGE_SIZE) 154562306a36Sopenharmony_ci 154662306a36Sopenharmony_ci/* kfd_fill_cu_for_cpu - Fill in Compute info for the given CPU NUMA node 154762306a36Sopenharmony_ci * 154862306a36Sopenharmony_ci * @numa_node_id: CPU NUMA node id 154962306a36Sopenharmony_ci * @avail_size: Available size in the memory 155062306a36Sopenharmony_ci * @sub_type_hdr: Memory into which compute info will be filled in 155162306a36Sopenharmony_ci * 155262306a36Sopenharmony_ci * Return 0 if successful else return -ve value 155362306a36Sopenharmony_ci */ 155462306a36Sopenharmony_cistatic int kfd_fill_cu_for_cpu(int numa_node_id, int *avail_size, 155562306a36Sopenharmony_ci int proximity_domain, 155662306a36Sopenharmony_ci struct crat_subtype_computeunit *sub_type_hdr) 155762306a36Sopenharmony_ci{ 155862306a36Sopenharmony_ci const struct cpumask *cpumask; 155962306a36Sopenharmony_ci 156062306a36Sopenharmony_ci *avail_size -= sizeof(struct crat_subtype_computeunit); 156162306a36Sopenharmony_ci if (*avail_size < 0) 156262306a36Sopenharmony_ci return -ENOMEM; 156362306a36Sopenharmony_ci 156462306a36Sopenharmony_ci memset(sub_type_hdr, 0, sizeof(struct crat_subtype_computeunit)); 156562306a36Sopenharmony_ci 156662306a36Sopenharmony_ci /* Fill in subtype header data */ 156762306a36Sopenharmony_ci sub_type_hdr->type = CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY; 156862306a36Sopenharmony_ci sub_type_hdr->length = sizeof(struct crat_subtype_computeunit); 156962306a36Sopenharmony_ci sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED; 157062306a36Sopenharmony_ci 157162306a36Sopenharmony_ci cpumask = cpumask_of_node(numa_node_id); 157262306a36Sopenharmony_ci 157362306a36Sopenharmony_ci /* Fill in CU data */ 157462306a36Sopenharmony_ci sub_type_hdr->flags |= CRAT_CU_FLAGS_CPU_PRESENT; 157562306a36Sopenharmony_ci sub_type_hdr->proximity_domain = proximity_domain; 157662306a36Sopenharmony_ci sub_type_hdr->processor_id_low = kfd_numa_node_to_apic_id(numa_node_id); 157762306a36Sopenharmony_ci if (sub_type_hdr->processor_id_low == -1) 157862306a36Sopenharmony_ci return -EINVAL; 157962306a36Sopenharmony_ci 158062306a36Sopenharmony_ci sub_type_hdr->num_cpu_cores = cpumask_weight(cpumask); 158162306a36Sopenharmony_ci 158262306a36Sopenharmony_ci return 0; 158362306a36Sopenharmony_ci} 158462306a36Sopenharmony_ci 158562306a36Sopenharmony_ci/* kfd_fill_mem_info_for_cpu - Fill in Memory info for the given CPU NUMA node 158662306a36Sopenharmony_ci * 158762306a36Sopenharmony_ci * @numa_node_id: CPU NUMA node id 158862306a36Sopenharmony_ci * @avail_size: Available size in the memory 158962306a36Sopenharmony_ci * @sub_type_hdr: Memory into which compute info will be filled in 159062306a36Sopenharmony_ci * 159162306a36Sopenharmony_ci * Return 0 if successful else return -ve value 159262306a36Sopenharmony_ci */ 159362306a36Sopenharmony_cistatic int kfd_fill_mem_info_for_cpu(int numa_node_id, int *avail_size, 159462306a36Sopenharmony_ci int proximity_domain, 159562306a36Sopenharmony_ci struct crat_subtype_memory *sub_type_hdr) 159662306a36Sopenharmony_ci{ 159762306a36Sopenharmony_ci uint64_t mem_in_bytes = 0; 159862306a36Sopenharmony_ci pg_data_t *pgdat; 159962306a36Sopenharmony_ci int zone_type; 160062306a36Sopenharmony_ci 160162306a36Sopenharmony_ci *avail_size -= sizeof(struct crat_subtype_memory); 160262306a36Sopenharmony_ci if (*avail_size < 0) 160362306a36Sopenharmony_ci return -ENOMEM; 160462306a36Sopenharmony_ci 160562306a36Sopenharmony_ci memset(sub_type_hdr, 0, sizeof(struct crat_subtype_memory)); 160662306a36Sopenharmony_ci 160762306a36Sopenharmony_ci /* Fill in subtype header data */ 160862306a36Sopenharmony_ci sub_type_hdr->type = CRAT_SUBTYPE_MEMORY_AFFINITY; 160962306a36Sopenharmony_ci sub_type_hdr->length = sizeof(struct crat_subtype_memory); 161062306a36Sopenharmony_ci sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED; 161162306a36Sopenharmony_ci 161262306a36Sopenharmony_ci /* Fill in Memory Subunit data */ 161362306a36Sopenharmony_ci 161462306a36Sopenharmony_ci /* Unlike si_meminfo, si_meminfo_node is not exported. So 161562306a36Sopenharmony_ci * the following lines are duplicated from si_meminfo_node 161662306a36Sopenharmony_ci * function 161762306a36Sopenharmony_ci */ 161862306a36Sopenharmony_ci pgdat = NODE_DATA(numa_node_id); 161962306a36Sopenharmony_ci for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) 162062306a36Sopenharmony_ci mem_in_bytes += zone_managed_pages(&pgdat->node_zones[zone_type]); 162162306a36Sopenharmony_ci mem_in_bytes <<= PAGE_SHIFT; 162262306a36Sopenharmony_ci 162362306a36Sopenharmony_ci sub_type_hdr->length_low = lower_32_bits(mem_in_bytes); 162462306a36Sopenharmony_ci sub_type_hdr->length_high = upper_32_bits(mem_in_bytes); 162562306a36Sopenharmony_ci sub_type_hdr->proximity_domain = proximity_domain; 162662306a36Sopenharmony_ci 162762306a36Sopenharmony_ci return 0; 162862306a36Sopenharmony_ci} 162962306a36Sopenharmony_ci 163062306a36Sopenharmony_ci#ifdef CONFIG_X86_64 163162306a36Sopenharmony_cistatic int kfd_fill_iolink_info_for_cpu(int numa_node_id, int *avail_size, 163262306a36Sopenharmony_ci uint32_t *num_entries, 163362306a36Sopenharmony_ci struct crat_subtype_iolink *sub_type_hdr) 163462306a36Sopenharmony_ci{ 163562306a36Sopenharmony_ci int nid; 163662306a36Sopenharmony_ci struct cpuinfo_x86 *c = &cpu_data(0); 163762306a36Sopenharmony_ci uint8_t link_type; 163862306a36Sopenharmony_ci 163962306a36Sopenharmony_ci if (c->x86_vendor == X86_VENDOR_AMD) 164062306a36Sopenharmony_ci link_type = CRAT_IOLINK_TYPE_HYPERTRANSPORT; 164162306a36Sopenharmony_ci else 164262306a36Sopenharmony_ci link_type = CRAT_IOLINK_TYPE_QPI_1_1; 164362306a36Sopenharmony_ci 164462306a36Sopenharmony_ci *num_entries = 0; 164562306a36Sopenharmony_ci 164662306a36Sopenharmony_ci /* Create IO links from this node to other CPU nodes */ 164762306a36Sopenharmony_ci for_each_online_node(nid) { 164862306a36Sopenharmony_ci if (nid == numa_node_id) /* node itself */ 164962306a36Sopenharmony_ci continue; 165062306a36Sopenharmony_ci 165162306a36Sopenharmony_ci *avail_size -= sizeof(struct crat_subtype_iolink); 165262306a36Sopenharmony_ci if (*avail_size < 0) 165362306a36Sopenharmony_ci return -ENOMEM; 165462306a36Sopenharmony_ci 165562306a36Sopenharmony_ci memset(sub_type_hdr, 0, sizeof(struct crat_subtype_iolink)); 165662306a36Sopenharmony_ci 165762306a36Sopenharmony_ci /* Fill in subtype header data */ 165862306a36Sopenharmony_ci sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY; 165962306a36Sopenharmony_ci sub_type_hdr->length = sizeof(struct crat_subtype_iolink); 166062306a36Sopenharmony_ci sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED; 166162306a36Sopenharmony_ci 166262306a36Sopenharmony_ci /* Fill in IO link data */ 166362306a36Sopenharmony_ci sub_type_hdr->proximity_domain_from = numa_node_id; 166462306a36Sopenharmony_ci sub_type_hdr->proximity_domain_to = nid; 166562306a36Sopenharmony_ci sub_type_hdr->io_interface_type = link_type; 166662306a36Sopenharmony_ci 166762306a36Sopenharmony_ci (*num_entries)++; 166862306a36Sopenharmony_ci sub_type_hdr++; 166962306a36Sopenharmony_ci } 167062306a36Sopenharmony_ci 167162306a36Sopenharmony_ci return 0; 167262306a36Sopenharmony_ci} 167362306a36Sopenharmony_ci#endif 167462306a36Sopenharmony_ci 167562306a36Sopenharmony_ci/* kfd_create_vcrat_image_cpu - Create Virtual CRAT for CPU 167662306a36Sopenharmony_ci * 167762306a36Sopenharmony_ci * @pcrat_image: Fill in VCRAT for CPU 167862306a36Sopenharmony_ci * @size: [IN] allocated size of crat_image. 167962306a36Sopenharmony_ci * [OUT] actual size of data filled in crat_image 168062306a36Sopenharmony_ci */ 168162306a36Sopenharmony_cistatic int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size) 168262306a36Sopenharmony_ci{ 168362306a36Sopenharmony_ci struct crat_header *crat_table = (struct crat_header *)pcrat_image; 168462306a36Sopenharmony_ci struct acpi_table_header *acpi_table; 168562306a36Sopenharmony_ci acpi_status status; 168662306a36Sopenharmony_ci struct crat_subtype_generic *sub_type_hdr; 168762306a36Sopenharmony_ci int avail_size = *size; 168862306a36Sopenharmony_ci int numa_node_id; 168962306a36Sopenharmony_ci#ifdef CONFIG_X86_64 169062306a36Sopenharmony_ci uint32_t entries = 0; 169162306a36Sopenharmony_ci#endif 169262306a36Sopenharmony_ci int ret = 0; 169362306a36Sopenharmony_ci 169462306a36Sopenharmony_ci if (!pcrat_image) 169562306a36Sopenharmony_ci return -EINVAL; 169662306a36Sopenharmony_ci 169762306a36Sopenharmony_ci /* Fill in CRAT Header. 169862306a36Sopenharmony_ci * Modify length and total_entries as subunits are added. 169962306a36Sopenharmony_ci */ 170062306a36Sopenharmony_ci avail_size -= sizeof(struct crat_header); 170162306a36Sopenharmony_ci if (avail_size < 0) 170262306a36Sopenharmony_ci return -ENOMEM; 170362306a36Sopenharmony_ci 170462306a36Sopenharmony_ci memset(crat_table, 0, sizeof(struct crat_header)); 170562306a36Sopenharmony_ci memcpy(&crat_table->signature, CRAT_SIGNATURE, 170662306a36Sopenharmony_ci sizeof(crat_table->signature)); 170762306a36Sopenharmony_ci crat_table->length = sizeof(struct crat_header); 170862306a36Sopenharmony_ci 170962306a36Sopenharmony_ci status = acpi_get_table("DSDT", 0, &acpi_table); 171062306a36Sopenharmony_ci if (status != AE_OK) 171162306a36Sopenharmony_ci pr_warn("DSDT table not found for OEM information\n"); 171262306a36Sopenharmony_ci else { 171362306a36Sopenharmony_ci crat_table->oem_revision = acpi_table->revision; 171462306a36Sopenharmony_ci memcpy(crat_table->oem_id, acpi_table->oem_id, 171562306a36Sopenharmony_ci CRAT_OEMID_LENGTH); 171662306a36Sopenharmony_ci memcpy(crat_table->oem_table_id, acpi_table->oem_table_id, 171762306a36Sopenharmony_ci CRAT_OEMTABLEID_LENGTH); 171862306a36Sopenharmony_ci acpi_put_table(acpi_table); 171962306a36Sopenharmony_ci } 172062306a36Sopenharmony_ci crat_table->total_entries = 0; 172162306a36Sopenharmony_ci crat_table->num_domains = 0; 172262306a36Sopenharmony_ci 172362306a36Sopenharmony_ci sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1); 172462306a36Sopenharmony_ci 172562306a36Sopenharmony_ci for_each_online_node(numa_node_id) { 172662306a36Sopenharmony_ci if (kfd_numa_node_to_apic_id(numa_node_id) == -1) 172762306a36Sopenharmony_ci continue; 172862306a36Sopenharmony_ci 172962306a36Sopenharmony_ci /* Fill in Subtype: Compute Unit */ 173062306a36Sopenharmony_ci ret = kfd_fill_cu_for_cpu(numa_node_id, &avail_size, 173162306a36Sopenharmony_ci crat_table->num_domains, 173262306a36Sopenharmony_ci (struct crat_subtype_computeunit *)sub_type_hdr); 173362306a36Sopenharmony_ci if (ret < 0) 173462306a36Sopenharmony_ci return ret; 173562306a36Sopenharmony_ci crat_table->length += sub_type_hdr->length; 173662306a36Sopenharmony_ci crat_table->total_entries++; 173762306a36Sopenharmony_ci 173862306a36Sopenharmony_ci sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + 173962306a36Sopenharmony_ci sub_type_hdr->length); 174062306a36Sopenharmony_ci 174162306a36Sopenharmony_ci /* Fill in Subtype: Memory */ 174262306a36Sopenharmony_ci ret = kfd_fill_mem_info_for_cpu(numa_node_id, &avail_size, 174362306a36Sopenharmony_ci crat_table->num_domains, 174462306a36Sopenharmony_ci (struct crat_subtype_memory *)sub_type_hdr); 174562306a36Sopenharmony_ci if (ret < 0) 174662306a36Sopenharmony_ci return ret; 174762306a36Sopenharmony_ci crat_table->length += sub_type_hdr->length; 174862306a36Sopenharmony_ci crat_table->total_entries++; 174962306a36Sopenharmony_ci 175062306a36Sopenharmony_ci sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + 175162306a36Sopenharmony_ci sub_type_hdr->length); 175262306a36Sopenharmony_ci 175362306a36Sopenharmony_ci /* Fill in Subtype: IO Link */ 175462306a36Sopenharmony_ci#ifdef CONFIG_X86_64 175562306a36Sopenharmony_ci ret = kfd_fill_iolink_info_for_cpu(numa_node_id, &avail_size, 175662306a36Sopenharmony_ci &entries, 175762306a36Sopenharmony_ci (struct crat_subtype_iolink *)sub_type_hdr); 175862306a36Sopenharmony_ci if (ret < 0) 175962306a36Sopenharmony_ci return ret; 176062306a36Sopenharmony_ci 176162306a36Sopenharmony_ci if (entries) { 176262306a36Sopenharmony_ci crat_table->length += (sub_type_hdr->length * entries); 176362306a36Sopenharmony_ci crat_table->total_entries += entries; 176462306a36Sopenharmony_ci 176562306a36Sopenharmony_ci sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + 176662306a36Sopenharmony_ci sub_type_hdr->length * entries); 176762306a36Sopenharmony_ci } 176862306a36Sopenharmony_ci#else 176962306a36Sopenharmony_ci pr_info("IO link not available for non x86 platforms\n"); 177062306a36Sopenharmony_ci#endif 177162306a36Sopenharmony_ci 177262306a36Sopenharmony_ci crat_table->num_domains++; 177362306a36Sopenharmony_ci } 177462306a36Sopenharmony_ci 177562306a36Sopenharmony_ci /* TODO: Add cache Subtype for CPU. 177662306a36Sopenharmony_ci * Currently, CPU cache information is available in function 177762306a36Sopenharmony_ci * detect_cache_attributes(cpu) defined in the file 177862306a36Sopenharmony_ci * ./arch/x86/kernel/cpu/intel_cacheinfo.c. This function is not 177962306a36Sopenharmony_ci * exported and to get the same information the code needs to be 178062306a36Sopenharmony_ci * duplicated. 178162306a36Sopenharmony_ci */ 178262306a36Sopenharmony_ci 178362306a36Sopenharmony_ci *size = crat_table->length; 178462306a36Sopenharmony_ci pr_info("Virtual CRAT table created for CPU\n"); 178562306a36Sopenharmony_ci 178662306a36Sopenharmony_ci return 0; 178762306a36Sopenharmony_ci} 178862306a36Sopenharmony_ci 178962306a36Sopenharmony_cistatic int kfd_fill_gpu_memory_affinity(int *avail_size, 179062306a36Sopenharmony_ci struct kfd_node *kdev, uint8_t type, uint64_t size, 179162306a36Sopenharmony_ci struct crat_subtype_memory *sub_type_hdr, 179262306a36Sopenharmony_ci uint32_t proximity_domain, 179362306a36Sopenharmony_ci const struct kfd_local_mem_info *local_mem_info) 179462306a36Sopenharmony_ci{ 179562306a36Sopenharmony_ci *avail_size -= sizeof(struct crat_subtype_memory); 179662306a36Sopenharmony_ci if (*avail_size < 0) 179762306a36Sopenharmony_ci return -ENOMEM; 179862306a36Sopenharmony_ci 179962306a36Sopenharmony_ci memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_memory)); 180062306a36Sopenharmony_ci sub_type_hdr->type = CRAT_SUBTYPE_MEMORY_AFFINITY; 180162306a36Sopenharmony_ci sub_type_hdr->length = sizeof(struct crat_subtype_memory); 180262306a36Sopenharmony_ci sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED; 180362306a36Sopenharmony_ci 180462306a36Sopenharmony_ci sub_type_hdr->proximity_domain = proximity_domain; 180562306a36Sopenharmony_ci 180662306a36Sopenharmony_ci pr_debug("Fill gpu memory affinity - type 0x%x size 0x%llx\n", 180762306a36Sopenharmony_ci type, size); 180862306a36Sopenharmony_ci 180962306a36Sopenharmony_ci sub_type_hdr->length_low = lower_32_bits(size); 181062306a36Sopenharmony_ci sub_type_hdr->length_high = upper_32_bits(size); 181162306a36Sopenharmony_ci 181262306a36Sopenharmony_ci sub_type_hdr->width = local_mem_info->vram_width; 181362306a36Sopenharmony_ci sub_type_hdr->visibility_type = type; 181462306a36Sopenharmony_ci 181562306a36Sopenharmony_ci return 0; 181662306a36Sopenharmony_ci} 181762306a36Sopenharmony_ci 181862306a36Sopenharmony_ci#ifdef CONFIG_ACPI_NUMA 181962306a36Sopenharmony_cistatic void kfd_find_numa_node_in_srat(struct kfd_node *kdev) 182062306a36Sopenharmony_ci{ 182162306a36Sopenharmony_ci struct acpi_table_header *table_header = NULL; 182262306a36Sopenharmony_ci struct acpi_subtable_header *sub_header = NULL; 182362306a36Sopenharmony_ci unsigned long table_end, subtable_len; 182462306a36Sopenharmony_ci u32 pci_id = pci_domain_nr(kdev->adev->pdev->bus) << 16 | 182562306a36Sopenharmony_ci pci_dev_id(kdev->adev->pdev); 182662306a36Sopenharmony_ci u32 bdf; 182762306a36Sopenharmony_ci acpi_status status; 182862306a36Sopenharmony_ci struct acpi_srat_cpu_affinity *cpu; 182962306a36Sopenharmony_ci struct acpi_srat_generic_affinity *gpu; 183062306a36Sopenharmony_ci int pxm = 0, max_pxm = 0; 183162306a36Sopenharmony_ci int numa_node = NUMA_NO_NODE; 183262306a36Sopenharmony_ci bool found = false; 183362306a36Sopenharmony_ci 183462306a36Sopenharmony_ci /* Fetch the SRAT table from ACPI */ 183562306a36Sopenharmony_ci status = acpi_get_table(ACPI_SIG_SRAT, 0, &table_header); 183662306a36Sopenharmony_ci if (status == AE_NOT_FOUND) { 183762306a36Sopenharmony_ci pr_warn("SRAT table not found\n"); 183862306a36Sopenharmony_ci return; 183962306a36Sopenharmony_ci } else if (ACPI_FAILURE(status)) { 184062306a36Sopenharmony_ci const char *err = acpi_format_exception(status); 184162306a36Sopenharmony_ci pr_err("SRAT table error: %s\n", err); 184262306a36Sopenharmony_ci return; 184362306a36Sopenharmony_ci } 184462306a36Sopenharmony_ci 184562306a36Sopenharmony_ci table_end = (unsigned long)table_header + table_header->length; 184662306a36Sopenharmony_ci 184762306a36Sopenharmony_ci /* Parse all entries looking for a match. */ 184862306a36Sopenharmony_ci sub_header = (struct acpi_subtable_header *) 184962306a36Sopenharmony_ci ((unsigned long)table_header + 185062306a36Sopenharmony_ci sizeof(struct acpi_table_srat)); 185162306a36Sopenharmony_ci subtable_len = sub_header->length; 185262306a36Sopenharmony_ci 185362306a36Sopenharmony_ci while (((unsigned long)sub_header) + subtable_len < table_end) { 185462306a36Sopenharmony_ci /* 185562306a36Sopenharmony_ci * If length is 0, break from this loop to avoid 185662306a36Sopenharmony_ci * infinite loop. 185762306a36Sopenharmony_ci */ 185862306a36Sopenharmony_ci if (subtable_len == 0) { 185962306a36Sopenharmony_ci pr_err("SRAT invalid zero length\n"); 186062306a36Sopenharmony_ci break; 186162306a36Sopenharmony_ci } 186262306a36Sopenharmony_ci 186362306a36Sopenharmony_ci switch (sub_header->type) { 186462306a36Sopenharmony_ci case ACPI_SRAT_TYPE_CPU_AFFINITY: 186562306a36Sopenharmony_ci cpu = (struct acpi_srat_cpu_affinity *)sub_header; 186662306a36Sopenharmony_ci pxm = *((u32 *)cpu->proximity_domain_hi) << 8 | 186762306a36Sopenharmony_ci cpu->proximity_domain_lo; 186862306a36Sopenharmony_ci if (pxm > max_pxm) 186962306a36Sopenharmony_ci max_pxm = pxm; 187062306a36Sopenharmony_ci break; 187162306a36Sopenharmony_ci case ACPI_SRAT_TYPE_GENERIC_AFFINITY: 187262306a36Sopenharmony_ci gpu = (struct acpi_srat_generic_affinity *)sub_header; 187362306a36Sopenharmony_ci bdf = *((u16 *)(&gpu->device_handle[0])) << 16 | 187462306a36Sopenharmony_ci *((u16 *)(&gpu->device_handle[2])); 187562306a36Sopenharmony_ci if (bdf == pci_id) { 187662306a36Sopenharmony_ci found = true; 187762306a36Sopenharmony_ci numa_node = pxm_to_node(gpu->proximity_domain); 187862306a36Sopenharmony_ci } 187962306a36Sopenharmony_ci break; 188062306a36Sopenharmony_ci default: 188162306a36Sopenharmony_ci break; 188262306a36Sopenharmony_ci } 188362306a36Sopenharmony_ci 188462306a36Sopenharmony_ci if (found) 188562306a36Sopenharmony_ci break; 188662306a36Sopenharmony_ci 188762306a36Sopenharmony_ci sub_header = (struct acpi_subtable_header *) 188862306a36Sopenharmony_ci ((unsigned long)sub_header + subtable_len); 188962306a36Sopenharmony_ci subtable_len = sub_header->length; 189062306a36Sopenharmony_ci } 189162306a36Sopenharmony_ci 189262306a36Sopenharmony_ci acpi_put_table(table_header); 189362306a36Sopenharmony_ci 189462306a36Sopenharmony_ci /* Workaround bad cpu-gpu binding case */ 189562306a36Sopenharmony_ci if (found && (numa_node < 0 || 189662306a36Sopenharmony_ci numa_node > pxm_to_node(max_pxm))) 189762306a36Sopenharmony_ci numa_node = 0; 189862306a36Sopenharmony_ci 189962306a36Sopenharmony_ci if (numa_node != NUMA_NO_NODE) 190062306a36Sopenharmony_ci set_dev_node(&kdev->adev->pdev->dev, numa_node); 190162306a36Sopenharmony_ci} 190262306a36Sopenharmony_ci#endif 190362306a36Sopenharmony_ci 190462306a36Sopenharmony_ci#define KFD_CRAT_INTRA_SOCKET_WEIGHT 13 190562306a36Sopenharmony_ci#define KFD_CRAT_XGMI_WEIGHT 15 190662306a36Sopenharmony_ci 190762306a36Sopenharmony_ci/* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU 190862306a36Sopenharmony_ci * to its NUMA node 190962306a36Sopenharmony_ci * @avail_size: Available size in the memory 191062306a36Sopenharmony_ci * @kdev - [IN] GPU device 191162306a36Sopenharmony_ci * @sub_type_hdr: Memory into which io link info will be filled in 191262306a36Sopenharmony_ci * @proximity_domain - proximity domain of the GPU node 191362306a36Sopenharmony_ci * 191462306a36Sopenharmony_ci * Return 0 if successful else return -ve value 191562306a36Sopenharmony_ci */ 191662306a36Sopenharmony_cistatic int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size, 191762306a36Sopenharmony_ci struct kfd_node *kdev, 191862306a36Sopenharmony_ci struct crat_subtype_iolink *sub_type_hdr, 191962306a36Sopenharmony_ci uint32_t proximity_domain) 192062306a36Sopenharmony_ci{ 192162306a36Sopenharmony_ci *avail_size -= sizeof(struct crat_subtype_iolink); 192262306a36Sopenharmony_ci if (*avail_size < 0) 192362306a36Sopenharmony_ci return -ENOMEM; 192462306a36Sopenharmony_ci 192562306a36Sopenharmony_ci memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_iolink)); 192662306a36Sopenharmony_ci 192762306a36Sopenharmony_ci /* Fill in subtype header data */ 192862306a36Sopenharmony_ci sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY; 192962306a36Sopenharmony_ci sub_type_hdr->length = sizeof(struct crat_subtype_iolink); 193062306a36Sopenharmony_ci sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED; 193162306a36Sopenharmony_ci if (kfd_dev_is_large_bar(kdev)) 193262306a36Sopenharmony_ci sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL; 193362306a36Sopenharmony_ci 193462306a36Sopenharmony_ci /* Fill in IOLINK subtype. 193562306a36Sopenharmony_ci * TODO: Fill-in other fields of iolink subtype 193662306a36Sopenharmony_ci */ 193762306a36Sopenharmony_ci if (kdev->adev->gmc.xgmi.connected_to_cpu || 193862306a36Sopenharmony_ci (KFD_GC_VERSION(kdev) == IP_VERSION(9, 4, 3) && 193962306a36Sopenharmony_ci kdev->adev->smuio.funcs->get_pkg_type(kdev->adev) == 194062306a36Sopenharmony_ci AMDGPU_PKG_TYPE_APU)) { 194162306a36Sopenharmony_ci bool ext_cpu = KFD_GC_VERSION(kdev) != IP_VERSION(9, 4, 3); 194262306a36Sopenharmony_ci int mem_bw = 819200, weight = ext_cpu ? KFD_CRAT_XGMI_WEIGHT : 194362306a36Sopenharmony_ci KFD_CRAT_INTRA_SOCKET_WEIGHT; 194462306a36Sopenharmony_ci uint32_t bandwidth = ext_cpu ? amdgpu_amdkfd_get_xgmi_bandwidth_mbytes( 194562306a36Sopenharmony_ci kdev->adev, NULL, true) : mem_bw; 194662306a36Sopenharmony_ci 194762306a36Sopenharmony_ci /* 194862306a36Sopenharmony_ci * with host gpu xgmi link, host can access gpu memory whether 194962306a36Sopenharmony_ci * or not pcie bar type is large, so always create bidirectional 195062306a36Sopenharmony_ci * io link. 195162306a36Sopenharmony_ci */ 195262306a36Sopenharmony_ci sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL; 195362306a36Sopenharmony_ci sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI; 195462306a36Sopenharmony_ci sub_type_hdr->weight_xgmi = weight; 195562306a36Sopenharmony_ci sub_type_hdr->minimum_bandwidth_mbs = bandwidth; 195662306a36Sopenharmony_ci sub_type_hdr->maximum_bandwidth_mbs = bandwidth; 195762306a36Sopenharmony_ci } else { 195862306a36Sopenharmony_ci sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS; 195962306a36Sopenharmony_ci sub_type_hdr->minimum_bandwidth_mbs = 196062306a36Sopenharmony_ci amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->adev, true); 196162306a36Sopenharmony_ci sub_type_hdr->maximum_bandwidth_mbs = 196262306a36Sopenharmony_ci amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->adev, false); 196362306a36Sopenharmony_ci } 196462306a36Sopenharmony_ci 196562306a36Sopenharmony_ci sub_type_hdr->proximity_domain_from = proximity_domain; 196662306a36Sopenharmony_ci 196762306a36Sopenharmony_ci#ifdef CONFIG_ACPI_NUMA 196862306a36Sopenharmony_ci if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE && 196962306a36Sopenharmony_ci num_possible_nodes() > 1) 197062306a36Sopenharmony_ci kfd_find_numa_node_in_srat(kdev); 197162306a36Sopenharmony_ci#endif 197262306a36Sopenharmony_ci#ifdef CONFIG_NUMA 197362306a36Sopenharmony_ci if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE) 197462306a36Sopenharmony_ci sub_type_hdr->proximity_domain_to = 0; 197562306a36Sopenharmony_ci else 197662306a36Sopenharmony_ci sub_type_hdr->proximity_domain_to = kdev->adev->pdev->dev.numa_node; 197762306a36Sopenharmony_ci#else 197862306a36Sopenharmony_ci sub_type_hdr->proximity_domain_to = 0; 197962306a36Sopenharmony_ci#endif 198062306a36Sopenharmony_ci return 0; 198162306a36Sopenharmony_ci} 198262306a36Sopenharmony_ci 198362306a36Sopenharmony_cistatic int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size, 198462306a36Sopenharmony_ci struct kfd_node *kdev, 198562306a36Sopenharmony_ci struct kfd_node *peer_kdev, 198662306a36Sopenharmony_ci struct crat_subtype_iolink *sub_type_hdr, 198762306a36Sopenharmony_ci uint32_t proximity_domain_from, 198862306a36Sopenharmony_ci uint32_t proximity_domain_to) 198962306a36Sopenharmony_ci{ 199062306a36Sopenharmony_ci bool use_ta_info = kdev->kfd->num_nodes == 1; 199162306a36Sopenharmony_ci 199262306a36Sopenharmony_ci *avail_size -= sizeof(struct crat_subtype_iolink); 199362306a36Sopenharmony_ci if (*avail_size < 0) 199462306a36Sopenharmony_ci return -ENOMEM; 199562306a36Sopenharmony_ci 199662306a36Sopenharmony_ci memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_iolink)); 199762306a36Sopenharmony_ci 199862306a36Sopenharmony_ci sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY; 199962306a36Sopenharmony_ci sub_type_hdr->length = sizeof(struct crat_subtype_iolink); 200062306a36Sopenharmony_ci sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED | 200162306a36Sopenharmony_ci CRAT_IOLINK_FLAGS_BI_DIRECTIONAL; 200262306a36Sopenharmony_ci 200362306a36Sopenharmony_ci sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI; 200462306a36Sopenharmony_ci sub_type_hdr->proximity_domain_from = proximity_domain_from; 200562306a36Sopenharmony_ci sub_type_hdr->proximity_domain_to = proximity_domain_to; 200662306a36Sopenharmony_ci 200762306a36Sopenharmony_ci if (use_ta_info) { 200862306a36Sopenharmony_ci sub_type_hdr->weight_xgmi = KFD_CRAT_XGMI_WEIGHT * 200962306a36Sopenharmony_ci amdgpu_amdkfd_get_xgmi_hops_count(kdev->adev, peer_kdev->adev); 201062306a36Sopenharmony_ci sub_type_hdr->maximum_bandwidth_mbs = 201162306a36Sopenharmony_ci amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, 201262306a36Sopenharmony_ci peer_kdev->adev, false); 201362306a36Sopenharmony_ci sub_type_hdr->minimum_bandwidth_mbs = sub_type_hdr->maximum_bandwidth_mbs ? 201462306a36Sopenharmony_ci amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, NULL, true) : 0; 201562306a36Sopenharmony_ci } else { 201662306a36Sopenharmony_ci bool is_single_hop = kdev->kfd == peer_kdev->kfd; 201762306a36Sopenharmony_ci int weight = is_single_hop ? KFD_CRAT_INTRA_SOCKET_WEIGHT : 201862306a36Sopenharmony_ci (2 * KFD_CRAT_INTRA_SOCKET_WEIGHT) + KFD_CRAT_XGMI_WEIGHT; 201962306a36Sopenharmony_ci int mem_bw = 819200; 202062306a36Sopenharmony_ci 202162306a36Sopenharmony_ci sub_type_hdr->weight_xgmi = weight; 202262306a36Sopenharmony_ci sub_type_hdr->maximum_bandwidth_mbs = is_single_hop ? mem_bw : 0; 202362306a36Sopenharmony_ci sub_type_hdr->minimum_bandwidth_mbs = is_single_hop ? mem_bw : 0; 202462306a36Sopenharmony_ci } 202562306a36Sopenharmony_ci 202662306a36Sopenharmony_ci return 0; 202762306a36Sopenharmony_ci} 202862306a36Sopenharmony_ci 202962306a36Sopenharmony_ci/* kfd_create_vcrat_image_gpu - Create Virtual CRAT for CPU 203062306a36Sopenharmony_ci * 203162306a36Sopenharmony_ci * @pcrat_image: Fill in VCRAT for GPU 203262306a36Sopenharmony_ci * @size: [IN] allocated size of crat_image. 203362306a36Sopenharmony_ci * [OUT] actual size of data filled in crat_image 203462306a36Sopenharmony_ci */ 203562306a36Sopenharmony_cistatic int kfd_create_vcrat_image_gpu(void *pcrat_image, 203662306a36Sopenharmony_ci size_t *size, struct kfd_node *kdev, 203762306a36Sopenharmony_ci uint32_t proximity_domain) 203862306a36Sopenharmony_ci{ 203962306a36Sopenharmony_ci struct crat_header *crat_table = (struct crat_header *)pcrat_image; 204062306a36Sopenharmony_ci struct crat_subtype_generic *sub_type_hdr; 204162306a36Sopenharmony_ci struct kfd_local_mem_info local_mem_info; 204262306a36Sopenharmony_ci struct kfd_topology_device *peer_dev; 204362306a36Sopenharmony_ci struct crat_subtype_computeunit *cu; 204462306a36Sopenharmony_ci struct kfd_cu_info cu_info; 204562306a36Sopenharmony_ci int avail_size = *size; 204662306a36Sopenharmony_ci uint32_t total_num_of_cu; 204762306a36Sopenharmony_ci uint32_t nid = 0; 204862306a36Sopenharmony_ci int ret = 0; 204962306a36Sopenharmony_ci 205062306a36Sopenharmony_ci if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_GPU) 205162306a36Sopenharmony_ci return -EINVAL; 205262306a36Sopenharmony_ci 205362306a36Sopenharmony_ci /* Fill the CRAT Header. 205462306a36Sopenharmony_ci * Modify length and total_entries as subunits are added. 205562306a36Sopenharmony_ci */ 205662306a36Sopenharmony_ci avail_size -= sizeof(struct crat_header); 205762306a36Sopenharmony_ci if (avail_size < 0) 205862306a36Sopenharmony_ci return -ENOMEM; 205962306a36Sopenharmony_ci 206062306a36Sopenharmony_ci memset(crat_table, 0, sizeof(struct crat_header)); 206162306a36Sopenharmony_ci 206262306a36Sopenharmony_ci memcpy(&crat_table->signature, CRAT_SIGNATURE, 206362306a36Sopenharmony_ci sizeof(crat_table->signature)); 206462306a36Sopenharmony_ci /* Change length as we add more subtypes*/ 206562306a36Sopenharmony_ci crat_table->length = sizeof(struct crat_header); 206662306a36Sopenharmony_ci crat_table->num_domains = 1; 206762306a36Sopenharmony_ci crat_table->total_entries = 0; 206862306a36Sopenharmony_ci 206962306a36Sopenharmony_ci /* Fill in Subtype: Compute Unit 207062306a36Sopenharmony_ci * First fill in the sub type header and then sub type data 207162306a36Sopenharmony_ci */ 207262306a36Sopenharmony_ci avail_size -= sizeof(struct crat_subtype_computeunit); 207362306a36Sopenharmony_ci if (avail_size < 0) 207462306a36Sopenharmony_ci return -ENOMEM; 207562306a36Sopenharmony_ci 207662306a36Sopenharmony_ci sub_type_hdr = (struct crat_subtype_generic *)(crat_table + 1); 207762306a36Sopenharmony_ci memset(sub_type_hdr, 0, sizeof(struct crat_subtype_computeunit)); 207862306a36Sopenharmony_ci 207962306a36Sopenharmony_ci sub_type_hdr->type = CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY; 208062306a36Sopenharmony_ci sub_type_hdr->length = sizeof(struct crat_subtype_computeunit); 208162306a36Sopenharmony_ci sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED; 208262306a36Sopenharmony_ci 208362306a36Sopenharmony_ci /* Fill CU subtype data */ 208462306a36Sopenharmony_ci cu = (struct crat_subtype_computeunit *)sub_type_hdr; 208562306a36Sopenharmony_ci cu->flags |= CRAT_CU_FLAGS_GPU_PRESENT; 208662306a36Sopenharmony_ci cu->proximity_domain = proximity_domain; 208762306a36Sopenharmony_ci 208862306a36Sopenharmony_ci amdgpu_amdkfd_get_cu_info(kdev->adev, &cu_info); 208962306a36Sopenharmony_ci cu->num_simd_per_cu = cu_info.simd_per_cu; 209062306a36Sopenharmony_ci cu->num_simd_cores = cu_info.simd_per_cu * 209162306a36Sopenharmony_ci (cu_info.cu_active_number / kdev->kfd->num_nodes); 209262306a36Sopenharmony_ci cu->max_waves_simd = cu_info.max_waves_per_simd; 209362306a36Sopenharmony_ci 209462306a36Sopenharmony_ci cu->wave_front_size = cu_info.wave_front_size; 209562306a36Sopenharmony_ci cu->array_count = cu_info.num_shader_arrays_per_engine * 209662306a36Sopenharmony_ci cu_info.num_shader_engines; 209762306a36Sopenharmony_ci total_num_of_cu = (cu->array_count * cu_info.num_cu_per_sh); 209862306a36Sopenharmony_ci cu->processor_id_low = get_and_inc_gpu_processor_id(total_num_of_cu); 209962306a36Sopenharmony_ci cu->num_cu_per_array = cu_info.num_cu_per_sh; 210062306a36Sopenharmony_ci cu->max_slots_scatch_cu = cu_info.max_scratch_slots_per_cu; 210162306a36Sopenharmony_ci cu->num_banks = cu_info.num_shader_engines; 210262306a36Sopenharmony_ci cu->lds_size_in_kb = cu_info.lds_size; 210362306a36Sopenharmony_ci 210462306a36Sopenharmony_ci cu->hsa_capability = 0; 210562306a36Sopenharmony_ci 210662306a36Sopenharmony_ci crat_table->length += sub_type_hdr->length; 210762306a36Sopenharmony_ci crat_table->total_entries++; 210862306a36Sopenharmony_ci 210962306a36Sopenharmony_ci /* Fill in Subtype: Memory. Only on systems with large BAR (no 211062306a36Sopenharmony_ci * private FB), report memory as public. On other systems 211162306a36Sopenharmony_ci * report the total FB size (public+private) as a single 211262306a36Sopenharmony_ci * private heap. 211362306a36Sopenharmony_ci */ 211462306a36Sopenharmony_ci local_mem_info = kdev->local_mem_info; 211562306a36Sopenharmony_ci sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + 211662306a36Sopenharmony_ci sub_type_hdr->length); 211762306a36Sopenharmony_ci 211862306a36Sopenharmony_ci if (debug_largebar) 211962306a36Sopenharmony_ci local_mem_info.local_mem_size_private = 0; 212062306a36Sopenharmony_ci 212162306a36Sopenharmony_ci if (local_mem_info.local_mem_size_private == 0) 212262306a36Sopenharmony_ci ret = kfd_fill_gpu_memory_affinity(&avail_size, 212362306a36Sopenharmony_ci kdev, HSA_MEM_HEAP_TYPE_FB_PUBLIC, 212462306a36Sopenharmony_ci local_mem_info.local_mem_size_public, 212562306a36Sopenharmony_ci (struct crat_subtype_memory *)sub_type_hdr, 212662306a36Sopenharmony_ci proximity_domain, 212762306a36Sopenharmony_ci &local_mem_info); 212862306a36Sopenharmony_ci else 212962306a36Sopenharmony_ci ret = kfd_fill_gpu_memory_affinity(&avail_size, 213062306a36Sopenharmony_ci kdev, HSA_MEM_HEAP_TYPE_FB_PRIVATE, 213162306a36Sopenharmony_ci local_mem_info.local_mem_size_public + 213262306a36Sopenharmony_ci local_mem_info.local_mem_size_private, 213362306a36Sopenharmony_ci (struct crat_subtype_memory *)sub_type_hdr, 213462306a36Sopenharmony_ci proximity_domain, 213562306a36Sopenharmony_ci &local_mem_info); 213662306a36Sopenharmony_ci if (ret < 0) 213762306a36Sopenharmony_ci return ret; 213862306a36Sopenharmony_ci 213962306a36Sopenharmony_ci crat_table->length += sizeof(struct crat_subtype_memory); 214062306a36Sopenharmony_ci crat_table->total_entries++; 214162306a36Sopenharmony_ci 214262306a36Sopenharmony_ci /* Fill in Subtype: IO_LINKS 214362306a36Sopenharmony_ci * Only direct links are added here which is Link from GPU to 214462306a36Sopenharmony_ci * its NUMA node. Indirect links are added by userspace. 214562306a36Sopenharmony_ci */ 214662306a36Sopenharmony_ci sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + 214762306a36Sopenharmony_ci sub_type_hdr->length); 214862306a36Sopenharmony_ci ret = kfd_fill_gpu_direct_io_link_to_cpu(&avail_size, kdev, 214962306a36Sopenharmony_ci (struct crat_subtype_iolink *)sub_type_hdr, proximity_domain); 215062306a36Sopenharmony_ci 215162306a36Sopenharmony_ci if (ret < 0) 215262306a36Sopenharmony_ci return ret; 215362306a36Sopenharmony_ci 215462306a36Sopenharmony_ci crat_table->length += sub_type_hdr->length; 215562306a36Sopenharmony_ci crat_table->total_entries++; 215662306a36Sopenharmony_ci 215762306a36Sopenharmony_ci 215862306a36Sopenharmony_ci /* Fill in Subtype: IO_LINKS 215962306a36Sopenharmony_ci * Direct links from GPU to other GPUs through xGMI. 216062306a36Sopenharmony_ci * We will loop GPUs that already be processed (with lower value 216162306a36Sopenharmony_ci * of proximity_domain), add the link for the GPUs with same 216262306a36Sopenharmony_ci * hive id (from this GPU to other GPU) . The reversed iolink 216362306a36Sopenharmony_ci * (from other GPU to this GPU) will be added 216462306a36Sopenharmony_ci * in kfd_parse_subtype_iolink. 216562306a36Sopenharmony_ci */ 216662306a36Sopenharmony_ci if (kdev->kfd->hive_id) { 216762306a36Sopenharmony_ci for (nid = 0; nid < proximity_domain; ++nid) { 216862306a36Sopenharmony_ci peer_dev = kfd_topology_device_by_proximity_domain_no_lock(nid); 216962306a36Sopenharmony_ci if (!peer_dev->gpu) 217062306a36Sopenharmony_ci continue; 217162306a36Sopenharmony_ci if (peer_dev->gpu->kfd->hive_id != kdev->kfd->hive_id) 217262306a36Sopenharmony_ci continue; 217362306a36Sopenharmony_ci sub_type_hdr = (typeof(sub_type_hdr))( 217462306a36Sopenharmony_ci (char *)sub_type_hdr + 217562306a36Sopenharmony_ci sizeof(struct crat_subtype_iolink)); 217662306a36Sopenharmony_ci ret = kfd_fill_gpu_xgmi_link_to_gpu( 217762306a36Sopenharmony_ci &avail_size, kdev, peer_dev->gpu, 217862306a36Sopenharmony_ci (struct crat_subtype_iolink *)sub_type_hdr, 217962306a36Sopenharmony_ci proximity_domain, nid); 218062306a36Sopenharmony_ci if (ret < 0) 218162306a36Sopenharmony_ci return ret; 218262306a36Sopenharmony_ci crat_table->length += sub_type_hdr->length; 218362306a36Sopenharmony_ci crat_table->total_entries++; 218462306a36Sopenharmony_ci } 218562306a36Sopenharmony_ci } 218662306a36Sopenharmony_ci *size = crat_table->length; 218762306a36Sopenharmony_ci pr_info("Virtual CRAT table created for GPU\n"); 218862306a36Sopenharmony_ci 218962306a36Sopenharmony_ci return ret; 219062306a36Sopenharmony_ci} 219162306a36Sopenharmony_ci 219262306a36Sopenharmony_ci/* kfd_create_crat_image_virtual - Allocates memory for CRAT image and 219362306a36Sopenharmony_ci * creates a Virtual CRAT (VCRAT) image 219462306a36Sopenharmony_ci * 219562306a36Sopenharmony_ci * NOTE: Call kfd_destroy_crat_image to free CRAT image memory 219662306a36Sopenharmony_ci * 219762306a36Sopenharmony_ci * @crat_image: VCRAT image created because ACPI does not have a 219862306a36Sopenharmony_ci * CRAT for this device 219962306a36Sopenharmony_ci * @size: [OUT] size of virtual crat_image 220062306a36Sopenharmony_ci * @flags: COMPUTE_UNIT_CPU - Create VCRAT for CPU device 220162306a36Sopenharmony_ci * COMPUTE_UNIT_GPU - Create VCRAT for GPU 220262306a36Sopenharmony_ci * (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU) - Create VCRAT for APU 220362306a36Sopenharmony_ci * -- this option is not currently implemented. 220462306a36Sopenharmony_ci * The assumption is that all AMD APUs will have CRAT 220562306a36Sopenharmony_ci * @kdev: Valid kfd_node required if flags contain COMPUTE_UNIT_GPU 220662306a36Sopenharmony_ci * 220762306a36Sopenharmony_ci * Return 0 if successful else return -ve value 220862306a36Sopenharmony_ci */ 220962306a36Sopenharmony_ciint kfd_create_crat_image_virtual(void **crat_image, size_t *size, 221062306a36Sopenharmony_ci int flags, struct kfd_node *kdev, 221162306a36Sopenharmony_ci uint32_t proximity_domain) 221262306a36Sopenharmony_ci{ 221362306a36Sopenharmony_ci void *pcrat_image = NULL; 221462306a36Sopenharmony_ci int ret = 0, num_nodes; 221562306a36Sopenharmony_ci size_t dyn_size; 221662306a36Sopenharmony_ci 221762306a36Sopenharmony_ci if (!crat_image) 221862306a36Sopenharmony_ci return -EINVAL; 221962306a36Sopenharmony_ci 222062306a36Sopenharmony_ci *crat_image = NULL; 222162306a36Sopenharmony_ci 222262306a36Sopenharmony_ci /* Allocate the CPU Virtual CRAT size based on the number of online 222362306a36Sopenharmony_ci * nodes. Allocate VCRAT_SIZE_FOR_GPU for GPU virtual CRAT image. 222462306a36Sopenharmony_ci * This should cover all the current conditions. A check is put not 222562306a36Sopenharmony_ci * to overwrite beyond allocated size for GPUs 222662306a36Sopenharmony_ci */ 222762306a36Sopenharmony_ci switch (flags) { 222862306a36Sopenharmony_ci case COMPUTE_UNIT_CPU: 222962306a36Sopenharmony_ci num_nodes = num_online_nodes(); 223062306a36Sopenharmony_ci dyn_size = sizeof(struct crat_header) + 223162306a36Sopenharmony_ci num_nodes * (sizeof(struct crat_subtype_computeunit) + 223262306a36Sopenharmony_ci sizeof(struct crat_subtype_memory) + 223362306a36Sopenharmony_ci (num_nodes - 1) * sizeof(struct crat_subtype_iolink)); 223462306a36Sopenharmony_ci pcrat_image = kvmalloc(dyn_size, GFP_KERNEL); 223562306a36Sopenharmony_ci if (!pcrat_image) 223662306a36Sopenharmony_ci return -ENOMEM; 223762306a36Sopenharmony_ci *size = dyn_size; 223862306a36Sopenharmony_ci pr_debug("CRAT size is %ld", dyn_size); 223962306a36Sopenharmony_ci ret = kfd_create_vcrat_image_cpu(pcrat_image, size); 224062306a36Sopenharmony_ci break; 224162306a36Sopenharmony_ci case COMPUTE_UNIT_GPU: 224262306a36Sopenharmony_ci if (!kdev) 224362306a36Sopenharmony_ci return -EINVAL; 224462306a36Sopenharmony_ci pcrat_image = kvmalloc(VCRAT_SIZE_FOR_GPU, GFP_KERNEL); 224562306a36Sopenharmony_ci if (!pcrat_image) 224662306a36Sopenharmony_ci return -ENOMEM; 224762306a36Sopenharmony_ci *size = VCRAT_SIZE_FOR_GPU; 224862306a36Sopenharmony_ci ret = kfd_create_vcrat_image_gpu(pcrat_image, size, kdev, 224962306a36Sopenharmony_ci proximity_domain); 225062306a36Sopenharmony_ci break; 225162306a36Sopenharmony_ci case (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU): 225262306a36Sopenharmony_ci /* TODO: */ 225362306a36Sopenharmony_ci ret = -EINVAL; 225462306a36Sopenharmony_ci pr_err("VCRAT not implemented for APU\n"); 225562306a36Sopenharmony_ci break; 225662306a36Sopenharmony_ci default: 225762306a36Sopenharmony_ci ret = -EINVAL; 225862306a36Sopenharmony_ci } 225962306a36Sopenharmony_ci 226062306a36Sopenharmony_ci if (!ret) 226162306a36Sopenharmony_ci *crat_image = pcrat_image; 226262306a36Sopenharmony_ci else 226362306a36Sopenharmony_ci kvfree(pcrat_image); 226462306a36Sopenharmony_ci 226562306a36Sopenharmony_ci return ret; 226662306a36Sopenharmony_ci} 226762306a36Sopenharmony_ci 226862306a36Sopenharmony_ci 226962306a36Sopenharmony_ci/* kfd_destroy_crat_image 227062306a36Sopenharmony_ci * 227162306a36Sopenharmony_ci * @crat_image: [IN] - crat_image from kfd_create_crat_image_xxx(..) 227262306a36Sopenharmony_ci * 227362306a36Sopenharmony_ci */ 227462306a36Sopenharmony_civoid kfd_destroy_crat_image(void *crat_image) 227562306a36Sopenharmony_ci{ 227662306a36Sopenharmony_ci kvfree(crat_image); 227762306a36Sopenharmony_ci} 2278