162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 OR MIT 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright 2014-2022 Advanced Micro Devices, Inc. 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 662306a36Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 762306a36Sopenharmony_ci * to deal in the Software without restriction, including without limitation 862306a36Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 962306a36Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 1062306a36Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 1162306a36Sopenharmony_ci * 1262306a36Sopenharmony_ci * The above copyright notice and this permission notice shall be included in 1362306a36Sopenharmony_ci * all copies or substantial portions of the Software. 1462306a36Sopenharmony_ci * 1562306a36Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1662306a36Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1762306a36Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1862306a36Sopenharmony_ci * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 1962306a36Sopenharmony_ci * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 2062306a36Sopenharmony_ci * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 2162306a36Sopenharmony_ci * OTHER DEALINGS IN THE SOFTWARE. 2262306a36Sopenharmony_ci */ 2362306a36Sopenharmony_ci 2462306a36Sopenharmony_ci#include <linux/bsearch.h> 2562306a36Sopenharmony_ci#include <linux/pci.h> 2662306a36Sopenharmony_ci#include <linux/slab.h> 2762306a36Sopenharmony_ci#include "kfd_priv.h" 2862306a36Sopenharmony_ci#include "kfd_device_queue_manager.h" 2962306a36Sopenharmony_ci#include "kfd_pm4_headers_vi.h" 3062306a36Sopenharmony_ci#include "kfd_pm4_headers_aldebaran.h" 3162306a36Sopenharmony_ci#include "cwsr_trap_handler.h" 3262306a36Sopenharmony_ci#include "amdgpu_amdkfd.h" 3362306a36Sopenharmony_ci#include "kfd_smi_events.h" 3462306a36Sopenharmony_ci#include "kfd_svm.h" 3562306a36Sopenharmony_ci#include "kfd_migrate.h" 3662306a36Sopenharmony_ci#include "amdgpu.h" 3762306a36Sopenharmony_ci#include "amdgpu_xcp.h" 3862306a36Sopenharmony_ci 3962306a36Sopenharmony_ci#define MQD_SIZE_ALIGNED 768 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_ci/* 4262306a36Sopenharmony_ci * kfd_locked is used to lock the kfd driver during suspend or reset 4362306a36Sopenharmony_ci * once locked, kfd driver will stop any further GPU execution. 4462306a36Sopenharmony_ci * create process (open) will return -EAGAIN. 4562306a36Sopenharmony_ci */ 4662306a36Sopenharmony_cistatic int kfd_locked; 4762306a36Sopenharmony_ci 4862306a36Sopenharmony_ci#ifdef CONFIG_DRM_AMDGPU_CIK 4962306a36Sopenharmony_ciextern const struct kfd2kgd_calls gfx_v7_kfd2kgd; 5062306a36Sopenharmony_ci#endif 5162306a36Sopenharmony_ciextern const struct kfd2kgd_calls gfx_v8_kfd2kgd; 5262306a36Sopenharmony_ciextern const struct kfd2kgd_calls gfx_v9_kfd2kgd; 5362306a36Sopenharmony_ciextern const struct kfd2kgd_calls arcturus_kfd2kgd; 5462306a36Sopenharmony_ciextern const struct kfd2kgd_calls aldebaran_kfd2kgd; 5562306a36Sopenharmony_ciextern const struct kfd2kgd_calls gc_9_4_3_kfd2kgd; 5662306a36Sopenharmony_ciextern const struct kfd2kgd_calls gfx_v10_kfd2kgd; 5762306a36Sopenharmony_ciextern const struct kfd2kgd_calls gfx_v10_3_kfd2kgd; 5862306a36Sopenharmony_ciextern const struct kfd2kgd_calls gfx_v11_kfd2kgd; 5962306a36Sopenharmony_ci 6062306a36Sopenharmony_cistatic int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, 6162306a36Sopenharmony_ci unsigned int chunk_size); 6262306a36Sopenharmony_cistatic void kfd_gtt_sa_fini(struct kfd_dev *kfd); 6362306a36Sopenharmony_ci 6462306a36Sopenharmony_cistatic int kfd_resume(struct kfd_node *kfd); 6562306a36Sopenharmony_ci 6662306a36Sopenharmony_cistatic void kfd_device_info_set_sdma_info(struct kfd_dev *kfd) 6762306a36Sopenharmony_ci{ 6862306a36Sopenharmony_ci uint32_t sdma_version = kfd->adev->ip_versions[SDMA0_HWIP][0]; 6962306a36Sopenharmony_ci 7062306a36Sopenharmony_ci switch (sdma_version) { 7162306a36Sopenharmony_ci case IP_VERSION(4, 0, 0):/* VEGA10 */ 7262306a36Sopenharmony_ci case IP_VERSION(4, 0, 1):/* VEGA12 */ 7362306a36Sopenharmony_ci case IP_VERSION(4, 1, 0):/* RAVEN */ 7462306a36Sopenharmony_ci case IP_VERSION(4, 1, 1):/* RAVEN */ 7562306a36Sopenharmony_ci case IP_VERSION(4, 1, 2):/* RENOIR */ 7662306a36Sopenharmony_ci case IP_VERSION(5, 2, 1):/* VANGOGH */ 7762306a36Sopenharmony_ci case IP_VERSION(5, 2, 3):/* YELLOW_CARP */ 7862306a36Sopenharmony_ci case IP_VERSION(5, 2, 6):/* GC 10.3.6 */ 7962306a36Sopenharmony_ci case IP_VERSION(5, 2, 7):/* GC 10.3.7 */ 8062306a36Sopenharmony_ci kfd->device_info.num_sdma_queues_per_engine = 2; 8162306a36Sopenharmony_ci break; 8262306a36Sopenharmony_ci case IP_VERSION(4, 2, 0):/* VEGA20 */ 8362306a36Sopenharmony_ci case IP_VERSION(4, 2, 2):/* ARCTURUS */ 8462306a36Sopenharmony_ci case IP_VERSION(4, 4, 0):/* ALDEBARAN */ 8562306a36Sopenharmony_ci case IP_VERSION(4, 4, 2): 8662306a36Sopenharmony_ci case IP_VERSION(5, 0, 0):/* NAVI10 */ 8762306a36Sopenharmony_ci case IP_VERSION(5, 0, 1):/* CYAN_SKILLFISH */ 8862306a36Sopenharmony_ci case IP_VERSION(5, 0, 2):/* NAVI14 */ 8962306a36Sopenharmony_ci case IP_VERSION(5, 0, 5):/* NAVI12 */ 9062306a36Sopenharmony_ci case IP_VERSION(5, 2, 0):/* SIENNA_CICHLID */ 9162306a36Sopenharmony_ci case IP_VERSION(5, 2, 2):/* NAVY_FLOUNDER */ 9262306a36Sopenharmony_ci case IP_VERSION(5, 2, 4):/* DIMGREY_CAVEFISH */ 9362306a36Sopenharmony_ci case IP_VERSION(5, 2, 5):/* BEIGE_GOBY */ 9462306a36Sopenharmony_ci case IP_VERSION(6, 0, 0): 9562306a36Sopenharmony_ci case IP_VERSION(6, 0, 1): 9662306a36Sopenharmony_ci case IP_VERSION(6, 0, 2): 9762306a36Sopenharmony_ci case IP_VERSION(6, 0, 3): 9862306a36Sopenharmony_ci kfd->device_info.num_sdma_queues_per_engine = 8; 9962306a36Sopenharmony_ci break; 10062306a36Sopenharmony_ci default: 10162306a36Sopenharmony_ci dev_warn(kfd_device, 10262306a36Sopenharmony_ci "Default sdma queue per engine(8) is set due to mismatch of sdma ip block(SDMA_HWIP:0x%x).\n", 10362306a36Sopenharmony_ci sdma_version); 10462306a36Sopenharmony_ci kfd->device_info.num_sdma_queues_per_engine = 8; 10562306a36Sopenharmony_ci } 10662306a36Sopenharmony_ci 10762306a36Sopenharmony_ci bitmap_zero(kfd->device_info.reserved_sdma_queues_bitmap, KFD_MAX_SDMA_QUEUES); 10862306a36Sopenharmony_ci 10962306a36Sopenharmony_ci switch (sdma_version) { 11062306a36Sopenharmony_ci case IP_VERSION(6, 0, 0): 11162306a36Sopenharmony_ci case IP_VERSION(6, 0, 1): 11262306a36Sopenharmony_ci case IP_VERSION(6, 0, 2): 11362306a36Sopenharmony_ci case IP_VERSION(6, 0, 3): 11462306a36Sopenharmony_ci /* Reserve 1 for paging and 1 for gfx */ 11562306a36Sopenharmony_ci kfd->device_info.num_reserved_sdma_queues_per_engine = 2; 11662306a36Sopenharmony_ci /* BIT(0)=engine-0 queue-0; BIT(1)=engine-1 queue-0; BIT(2)=engine-0 queue-1; ... */ 11762306a36Sopenharmony_ci bitmap_set(kfd->device_info.reserved_sdma_queues_bitmap, 0, 11862306a36Sopenharmony_ci kfd->adev->sdma.num_instances * 11962306a36Sopenharmony_ci kfd->device_info.num_reserved_sdma_queues_per_engine); 12062306a36Sopenharmony_ci break; 12162306a36Sopenharmony_ci default: 12262306a36Sopenharmony_ci break; 12362306a36Sopenharmony_ci } 12462306a36Sopenharmony_ci} 12562306a36Sopenharmony_ci 12662306a36Sopenharmony_cistatic void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd) 12762306a36Sopenharmony_ci{ 12862306a36Sopenharmony_ci uint32_t gc_version = KFD_GC_VERSION(kfd); 12962306a36Sopenharmony_ci 13062306a36Sopenharmony_ci switch (gc_version) { 13162306a36Sopenharmony_ci case IP_VERSION(9, 0, 1): /* VEGA10 */ 13262306a36Sopenharmony_ci case IP_VERSION(9, 1, 0): /* RAVEN */ 13362306a36Sopenharmony_ci case IP_VERSION(9, 2, 1): /* VEGA12 */ 13462306a36Sopenharmony_ci case IP_VERSION(9, 2, 2): /* RAVEN */ 13562306a36Sopenharmony_ci case IP_VERSION(9, 3, 0): /* RENOIR */ 13662306a36Sopenharmony_ci case IP_VERSION(9, 4, 0): /* VEGA20 */ 13762306a36Sopenharmony_ci case IP_VERSION(9, 4, 1): /* ARCTURUS */ 13862306a36Sopenharmony_ci case IP_VERSION(9, 4, 2): /* ALDEBARAN */ 13962306a36Sopenharmony_ci kfd->device_info.event_interrupt_class = &event_interrupt_class_v9; 14062306a36Sopenharmony_ci break; 14162306a36Sopenharmony_ci case IP_VERSION(9, 4, 3): /* GC 9.4.3 */ 14262306a36Sopenharmony_ci kfd->device_info.event_interrupt_class = 14362306a36Sopenharmony_ci &event_interrupt_class_v9_4_3; 14462306a36Sopenharmony_ci break; 14562306a36Sopenharmony_ci case IP_VERSION(10, 3, 1): /* VANGOGH */ 14662306a36Sopenharmony_ci case IP_VERSION(10, 3, 3): /* YELLOW_CARP */ 14762306a36Sopenharmony_ci case IP_VERSION(10, 3, 6): /* GC 10.3.6 */ 14862306a36Sopenharmony_ci case IP_VERSION(10, 3, 7): /* GC 10.3.7 */ 14962306a36Sopenharmony_ci case IP_VERSION(10, 1, 3): /* CYAN_SKILLFISH */ 15062306a36Sopenharmony_ci case IP_VERSION(10, 1, 4): 15162306a36Sopenharmony_ci case IP_VERSION(10, 1, 10): /* NAVI10 */ 15262306a36Sopenharmony_ci case IP_VERSION(10, 1, 2): /* NAVI12 */ 15362306a36Sopenharmony_ci case IP_VERSION(10, 1, 1): /* NAVI14 */ 15462306a36Sopenharmony_ci case IP_VERSION(10, 3, 0): /* SIENNA_CICHLID */ 15562306a36Sopenharmony_ci case IP_VERSION(10, 3, 2): /* NAVY_FLOUNDER */ 15662306a36Sopenharmony_ci case IP_VERSION(10, 3, 4): /* DIMGREY_CAVEFISH */ 15762306a36Sopenharmony_ci case IP_VERSION(10, 3, 5): /* BEIGE_GOBY */ 15862306a36Sopenharmony_ci kfd->device_info.event_interrupt_class = &event_interrupt_class_v10; 15962306a36Sopenharmony_ci break; 16062306a36Sopenharmony_ci case IP_VERSION(11, 0, 0): 16162306a36Sopenharmony_ci case IP_VERSION(11, 0, 1): 16262306a36Sopenharmony_ci case IP_VERSION(11, 0, 2): 16362306a36Sopenharmony_ci case IP_VERSION(11, 0, 3): 16462306a36Sopenharmony_ci case IP_VERSION(11, 0, 4): 16562306a36Sopenharmony_ci kfd->device_info.event_interrupt_class = &event_interrupt_class_v11; 16662306a36Sopenharmony_ci break; 16762306a36Sopenharmony_ci default: 16862306a36Sopenharmony_ci dev_warn(kfd_device, "v9 event interrupt handler is set due to " 16962306a36Sopenharmony_ci "mismatch of gc ip block(GC_HWIP:0x%x).\n", gc_version); 17062306a36Sopenharmony_ci kfd->device_info.event_interrupt_class = &event_interrupt_class_v9; 17162306a36Sopenharmony_ci } 17262306a36Sopenharmony_ci} 17362306a36Sopenharmony_ci 17462306a36Sopenharmony_cistatic void kfd_device_info_init(struct kfd_dev *kfd, 17562306a36Sopenharmony_ci bool vf, uint32_t gfx_target_version) 17662306a36Sopenharmony_ci{ 17762306a36Sopenharmony_ci uint32_t gc_version = KFD_GC_VERSION(kfd); 17862306a36Sopenharmony_ci uint32_t asic_type = kfd->adev->asic_type; 17962306a36Sopenharmony_ci 18062306a36Sopenharmony_ci kfd->device_info.max_pasid_bits = 16; 18162306a36Sopenharmony_ci kfd->device_info.max_no_of_hqd = 24; 18262306a36Sopenharmony_ci kfd->device_info.num_of_watch_points = 4; 18362306a36Sopenharmony_ci kfd->device_info.mqd_size_aligned = MQD_SIZE_ALIGNED; 18462306a36Sopenharmony_ci kfd->device_info.gfx_target_version = gfx_target_version; 18562306a36Sopenharmony_ci 18662306a36Sopenharmony_ci if (KFD_IS_SOC15(kfd)) { 18762306a36Sopenharmony_ci kfd->device_info.doorbell_size = 8; 18862306a36Sopenharmony_ci kfd->device_info.ih_ring_entry_size = 8 * sizeof(uint32_t); 18962306a36Sopenharmony_ci kfd->device_info.supports_cwsr = true; 19062306a36Sopenharmony_ci 19162306a36Sopenharmony_ci kfd_device_info_set_sdma_info(kfd); 19262306a36Sopenharmony_ci 19362306a36Sopenharmony_ci kfd_device_info_set_event_interrupt_class(kfd); 19462306a36Sopenharmony_ci 19562306a36Sopenharmony_ci if (gc_version < IP_VERSION(11, 0, 0)) { 19662306a36Sopenharmony_ci /* Navi2x+, Navi1x+ */ 19762306a36Sopenharmony_ci if (gc_version == IP_VERSION(10, 3, 6)) 19862306a36Sopenharmony_ci kfd->device_info.no_atomic_fw_version = 14; 19962306a36Sopenharmony_ci else if (gc_version == IP_VERSION(10, 3, 7)) 20062306a36Sopenharmony_ci kfd->device_info.no_atomic_fw_version = 3; 20162306a36Sopenharmony_ci else if (gc_version >= IP_VERSION(10, 3, 0)) 20262306a36Sopenharmony_ci kfd->device_info.no_atomic_fw_version = 92; 20362306a36Sopenharmony_ci else if (gc_version >= IP_VERSION(10, 1, 1)) 20462306a36Sopenharmony_ci kfd->device_info.no_atomic_fw_version = 145; 20562306a36Sopenharmony_ci 20662306a36Sopenharmony_ci /* Navi1x+ */ 20762306a36Sopenharmony_ci if (gc_version >= IP_VERSION(10, 1, 1)) 20862306a36Sopenharmony_ci kfd->device_info.needs_pci_atomics = true; 20962306a36Sopenharmony_ci } else if (gc_version < IP_VERSION(12, 0, 0)) { 21062306a36Sopenharmony_ci /* 21162306a36Sopenharmony_ci * PCIe atomics support acknowledgment in GFX11 RS64 CPFW requires 21262306a36Sopenharmony_ci * MEC version >= 509. Prior RS64 CPFW versions (and all F32) require 21362306a36Sopenharmony_ci * PCIe atomics support. 21462306a36Sopenharmony_ci */ 21562306a36Sopenharmony_ci kfd->device_info.needs_pci_atomics = true; 21662306a36Sopenharmony_ci kfd->device_info.no_atomic_fw_version = kfd->adev->gfx.rs64_enable ? 509 : 0; 21762306a36Sopenharmony_ci } 21862306a36Sopenharmony_ci } else { 21962306a36Sopenharmony_ci kfd->device_info.doorbell_size = 4; 22062306a36Sopenharmony_ci kfd->device_info.ih_ring_entry_size = 4 * sizeof(uint32_t); 22162306a36Sopenharmony_ci kfd->device_info.event_interrupt_class = &event_interrupt_class_cik; 22262306a36Sopenharmony_ci kfd->device_info.num_sdma_queues_per_engine = 2; 22362306a36Sopenharmony_ci 22462306a36Sopenharmony_ci if (asic_type != CHIP_KAVERI && 22562306a36Sopenharmony_ci asic_type != CHIP_HAWAII && 22662306a36Sopenharmony_ci asic_type != CHIP_TONGA) 22762306a36Sopenharmony_ci kfd->device_info.supports_cwsr = true; 22862306a36Sopenharmony_ci 22962306a36Sopenharmony_ci if (asic_type != CHIP_HAWAII && !vf) 23062306a36Sopenharmony_ci kfd->device_info.needs_pci_atomics = true; 23162306a36Sopenharmony_ci } 23262306a36Sopenharmony_ci} 23362306a36Sopenharmony_ci 23462306a36Sopenharmony_cistruct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf) 23562306a36Sopenharmony_ci{ 23662306a36Sopenharmony_ci struct kfd_dev *kfd = NULL; 23762306a36Sopenharmony_ci const struct kfd2kgd_calls *f2g = NULL; 23862306a36Sopenharmony_ci uint32_t gfx_target_version = 0; 23962306a36Sopenharmony_ci 24062306a36Sopenharmony_ci switch (adev->asic_type) { 24162306a36Sopenharmony_ci#ifdef CONFIG_DRM_AMDGPU_CIK 24262306a36Sopenharmony_ci case CHIP_KAVERI: 24362306a36Sopenharmony_ci gfx_target_version = 70000; 24462306a36Sopenharmony_ci if (!vf) 24562306a36Sopenharmony_ci f2g = &gfx_v7_kfd2kgd; 24662306a36Sopenharmony_ci break; 24762306a36Sopenharmony_ci#endif 24862306a36Sopenharmony_ci case CHIP_CARRIZO: 24962306a36Sopenharmony_ci gfx_target_version = 80001; 25062306a36Sopenharmony_ci if (!vf) 25162306a36Sopenharmony_ci f2g = &gfx_v8_kfd2kgd; 25262306a36Sopenharmony_ci break; 25362306a36Sopenharmony_ci#ifdef CONFIG_DRM_AMDGPU_CIK 25462306a36Sopenharmony_ci case CHIP_HAWAII: 25562306a36Sopenharmony_ci gfx_target_version = 70001; 25662306a36Sopenharmony_ci if (!amdgpu_exp_hw_support) 25762306a36Sopenharmony_ci pr_info( 25862306a36Sopenharmony_ci "KFD support on Hawaii is experimental. See modparam exp_hw_support\n" 25962306a36Sopenharmony_ci ); 26062306a36Sopenharmony_ci else if (!vf) 26162306a36Sopenharmony_ci f2g = &gfx_v7_kfd2kgd; 26262306a36Sopenharmony_ci break; 26362306a36Sopenharmony_ci#endif 26462306a36Sopenharmony_ci case CHIP_TONGA: 26562306a36Sopenharmony_ci gfx_target_version = 80002; 26662306a36Sopenharmony_ci if (!vf) 26762306a36Sopenharmony_ci f2g = &gfx_v8_kfd2kgd; 26862306a36Sopenharmony_ci break; 26962306a36Sopenharmony_ci case CHIP_FIJI: 27062306a36Sopenharmony_ci case CHIP_POLARIS10: 27162306a36Sopenharmony_ci gfx_target_version = 80003; 27262306a36Sopenharmony_ci f2g = &gfx_v8_kfd2kgd; 27362306a36Sopenharmony_ci break; 27462306a36Sopenharmony_ci case CHIP_POLARIS11: 27562306a36Sopenharmony_ci case CHIP_POLARIS12: 27662306a36Sopenharmony_ci case CHIP_VEGAM: 27762306a36Sopenharmony_ci gfx_target_version = 80003; 27862306a36Sopenharmony_ci if (!vf) 27962306a36Sopenharmony_ci f2g = &gfx_v8_kfd2kgd; 28062306a36Sopenharmony_ci break; 28162306a36Sopenharmony_ci default: 28262306a36Sopenharmony_ci switch (adev->ip_versions[GC_HWIP][0]) { 28362306a36Sopenharmony_ci /* Vega 10 */ 28462306a36Sopenharmony_ci case IP_VERSION(9, 0, 1): 28562306a36Sopenharmony_ci gfx_target_version = 90000; 28662306a36Sopenharmony_ci f2g = &gfx_v9_kfd2kgd; 28762306a36Sopenharmony_ci break; 28862306a36Sopenharmony_ci /* Raven */ 28962306a36Sopenharmony_ci case IP_VERSION(9, 1, 0): 29062306a36Sopenharmony_ci case IP_VERSION(9, 2, 2): 29162306a36Sopenharmony_ci gfx_target_version = 90002; 29262306a36Sopenharmony_ci if (!vf) 29362306a36Sopenharmony_ci f2g = &gfx_v9_kfd2kgd; 29462306a36Sopenharmony_ci break; 29562306a36Sopenharmony_ci /* Vega12 */ 29662306a36Sopenharmony_ci case IP_VERSION(9, 2, 1): 29762306a36Sopenharmony_ci gfx_target_version = 90004; 29862306a36Sopenharmony_ci if (!vf) 29962306a36Sopenharmony_ci f2g = &gfx_v9_kfd2kgd; 30062306a36Sopenharmony_ci break; 30162306a36Sopenharmony_ci /* Renoir */ 30262306a36Sopenharmony_ci case IP_VERSION(9, 3, 0): 30362306a36Sopenharmony_ci gfx_target_version = 90012; 30462306a36Sopenharmony_ci if (!vf) 30562306a36Sopenharmony_ci f2g = &gfx_v9_kfd2kgd; 30662306a36Sopenharmony_ci break; 30762306a36Sopenharmony_ci /* Vega20 */ 30862306a36Sopenharmony_ci case IP_VERSION(9, 4, 0): 30962306a36Sopenharmony_ci gfx_target_version = 90006; 31062306a36Sopenharmony_ci if (!vf) 31162306a36Sopenharmony_ci f2g = &gfx_v9_kfd2kgd; 31262306a36Sopenharmony_ci break; 31362306a36Sopenharmony_ci /* Arcturus */ 31462306a36Sopenharmony_ci case IP_VERSION(9, 4, 1): 31562306a36Sopenharmony_ci gfx_target_version = 90008; 31662306a36Sopenharmony_ci f2g = &arcturus_kfd2kgd; 31762306a36Sopenharmony_ci break; 31862306a36Sopenharmony_ci /* Aldebaran */ 31962306a36Sopenharmony_ci case IP_VERSION(9, 4, 2): 32062306a36Sopenharmony_ci gfx_target_version = 90010; 32162306a36Sopenharmony_ci f2g = &aldebaran_kfd2kgd; 32262306a36Sopenharmony_ci break; 32362306a36Sopenharmony_ci case IP_VERSION(9, 4, 3): 32462306a36Sopenharmony_ci gfx_target_version = adev->rev_id >= 1 ? 90402 32562306a36Sopenharmony_ci : adev->flags & AMD_IS_APU ? 90400 32662306a36Sopenharmony_ci : 90401; 32762306a36Sopenharmony_ci f2g = &gc_9_4_3_kfd2kgd; 32862306a36Sopenharmony_ci break; 32962306a36Sopenharmony_ci /* Navi10 */ 33062306a36Sopenharmony_ci case IP_VERSION(10, 1, 10): 33162306a36Sopenharmony_ci gfx_target_version = 100100; 33262306a36Sopenharmony_ci if (!vf) 33362306a36Sopenharmony_ci f2g = &gfx_v10_kfd2kgd; 33462306a36Sopenharmony_ci break; 33562306a36Sopenharmony_ci /* Navi12 */ 33662306a36Sopenharmony_ci case IP_VERSION(10, 1, 2): 33762306a36Sopenharmony_ci gfx_target_version = 100101; 33862306a36Sopenharmony_ci f2g = &gfx_v10_kfd2kgd; 33962306a36Sopenharmony_ci break; 34062306a36Sopenharmony_ci /* Navi14 */ 34162306a36Sopenharmony_ci case IP_VERSION(10, 1, 1): 34262306a36Sopenharmony_ci gfx_target_version = 100102; 34362306a36Sopenharmony_ci if (!vf) 34462306a36Sopenharmony_ci f2g = &gfx_v10_kfd2kgd; 34562306a36Sopenharmony_ci break; 34662306a36Sopenharmony_ci /* Cyan Skillfish */ 34762306a36Sopenharmony_ci case IP_VERSION(10, 1, 3): 34862306a36Sopenharmony_ci case IP_VERSION(10, 1, 4): 34962306a36Sopenharmony_ci gfx_target_version = 100103; 35062306a36Sopenharmony_ci if (!vf) 35162306a36Sopenharmony_ci f2g = &gfx_v10_kfd2kgd; 35262306a36Sopenharmony_ci break; 35362306a36Sopenharmony_ci /* Sienna Cichlid */ 35462306a36Sopenharmony_ci case IP_VERSION(10, 3, 0): 35562306a36Sopenharmony_ci gfx_target_version = 100300; 35662306a36Sopenharmony_ci f2g = &gfx_v10_3_kfd2kgd; 35762306a36Sopenharmony_ci break; 35862306a36Sopenharmony_ci /* Navy Flounder */ 35962306a36Sopenharmony_ci case IP_VERSION(10, 3, 2): 36062306a36Sopenharmony_ci gfx_target_version = 100301; 36162306a36Sopenharmony_ci f2g = &gfx_v10_3_kfd2kgd; 36262306a36Sopenharmony_ci break; 36362306a36Sopenharmony_ci /* Van Gogh */ 36462306a36Sopenharmony_ci case IP_VERSION(10, 3, 1): 36562306a36Sopenharmony_ci gfx_target_version = 100303; 36662306a36Sopenharmony_ci if (!vf) 36762306a36Sopenharmony_ci f2g = &gfx_v10_3_kfd2kgd; 36862306a36Sopenharmony_ci break; 36962306a36Sopenharmony_ci /* Dimgrey Cavefish */ 37062306a36Sopenharmony_ci case IP_VERSION(10, 3, 4): 37162306a36Sopenharmony_ci gfx_target_version = 100302; 37262306a36Sopenharmony_ci f2g = &gfx_v10_3_kfd2kgd; 37362306a36Sopenharmony_ci break; 37462306a36Sopenharmony_ci /* Beige Goby */ 37562306a36Sopenharmony_ci case IP_VERSION(10, 3, 5): 37662306a36Sopenharmony_ci gfx_target_version = 100304; 37762306a36Sopenharmony_ci f2g = &gfx_v10_3_kfd2kgd; 37862306a36Sopenharmony_ci break; 37962306a36Sopenharmony_ci /* Yellow Carp */ 38062306a36Sopenharmony_ci case IP_VERSION(10, 3, 3): 38162306a36Sopenharmony_ci gfx_target_version = 100305; 38262306a36Sopenharmony_ci if (!vf) 38362306a36Sopenharmony_ci f2g = &gfx_v10_3_kfd2kgd; 38462306a36Sopenharmony_ci break; 38562306a36Sopenharmony_ci case IP_VERSION(10, 3, 6): 38662306a36Sopenharmony_ci case IP_VERSION(10, 3, 7): 38762306a36Sopenharmony_ci gfx_target_version = 100306; 38862306a36Sopenharmony_ci if (!vf) 38962306a36Sopenharmony_ci f2g = &gfx_v10_3_kfd2kgd; 39062306a36Sopenharmony_ci break; 39162306a36Sopenharmony_ci case IP_VERSION(11, 0, 0): 39262306a36Sopenharmony_ci gfx_target_version = 110000; 39362306a36Sopenharmony_ci f2g = &gfx_v11_kfd2kgd; 39462306a36Sopenharmony_ci break; 39562306a36Sopenharmony_ci case IP_VERSION(11, 0, 1): 39662306a36Sopenharmony_ci case IP_VERSION(11, 0, 4): 39762306a36Sopenharmony_ci gfx_target_version = 110003; 39862306a36Sopenharmony_ci f2g = &gfx_v11_kfd2kgd; 39962306a36Sopenharmony_ci break; 40062306a36Sopenharmony_ci case IP_VERSION(11, 0, 2): 40162306a36Sopenharmony_ci gfx_target_version = 110002; 40262306a36Sopenharmony_ci f2g = &gfx_v11_kfd2kgd; 40362306a36Sopenharmony_ci break; 40462306a36Sopenharmony_ci case IP_VERSION(11, 0, 3): 40562306a36Sopenharmony_ci if ((adev->pdev->device == 0x7460 && 40662306a36Sopenharmony_ci adev->pdev->revision == 0x00) || 40762306a36Sopenharmony_ci (adev->pdev->device == 0x7461 && 40862306a36Sopenharmony_ci adev->pdev->revision == 0x00)) 40962306a36Sopenharmony_ci /* Note: Compiler version is 11.0.5 while HW version is 11.0.3 */ 41062306a36Sopenharmony_ci gfx_target_version = 110005; 41162306a36Sopenharmony_ci else 41262306a36Sopenharmony_ci /* Note: Compiler version is 11.0.1 while HW version is 11.0.3 */ 41362306a36Sopenharmony_ci gfx_target_version = 110001; 41462306a36Sopenharmony_ci f2g = &gfx_v11_kfd2kgd; 41562306a36Sopenharmony_ci break; 41662306a36Sopenharmony_ci default: 41762306a36Sopenharmony_ci break; 41862306a36Sopenharmony_ci } 41962306a36Sopenharmony_ci break; 42062306a36Sopenharmony_ci } 42162306a36Sopenharmony_ci 42262306a36Sopenharmony_ci if (!f2g) { 42362306a36Sopenharmony_ci if (adev->ip_versions[GC_HWIP][0]) 42462306a36Sopenharmony_ci dev_err(kfd_device, "GC IP %06x %s not supported in kfd\n", 42562306a36Sopenharmony_ci adev->ip_versions[GC_HWIP][0], vf ? "VF" : ""); 42662306a36Sopenharmony_ci else 42762306a36Sopenharmony_ci dev_err(kfd_device, "%s %s not supported in kfd\n", 42862306a36Sopenharmony_ci amdgpu_asic_name[adev->asic_type], vf ? "VF" : ""); 42962306a36Sopenharmony_ci return NULL; 43062306a36Sopenharmony_ci } 43162306a36Sopenharmony_ci 43262306a36Sopenharmony_ci kfd = kzalloc(sizeof(*kfd), GFP_KERNEL); 43362306a36Sopenharmony_ci if (!kfd) 43462306a36Sopenharmony_ci return NULL; 43562306a36Sopenharmony_ci 43662306a36Sopenharmony_ci kfd->adev = adev; 43762306a36Sopenharmony_ci kfd_device_info_init(kfd, vf, gfx_target_version); 43862306a36Sopenharmony_ci kfd->init_complete = false; 43962306a36Sopenharmony_ci kfd->kfd2kgd = f2g; 44062306a36Sopenharmony_ci atomic_set(&kfd->compute_profile, 0); 44162306a36Sopenharmony_ci 44262306a36Sopenharmony_ci mutex_init(&kfd->doorbell_mutex); 44362306a36Sopenharmony_ci 44462306a36Sopenharmony_ci ida_init(&kfd->doorbell_ida); 44562306a36Sopenharmony_ci 44662306a36Sopenharmony_ci return kfd; 44762306a36Sopenharmony_ci} 44862306a36Sopenharmony_ci 44962306a36Sopenharmony_cistatic void kfd_cwsr_init(struct kfd_dev *kfd) 45062306a36Sopenharmony_ci{ 45162306a36Sopenharmony_ci if (cwsr_enable && kfd->device_info.supports_cwsr) { 45262306a36Sopenharmony_ci if (KFD_GC_VERSION(kfd) < IP_VERSION(9, 0, 1)) { 45362306a36Sopenharmony_ci BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE); 45462306a36Sopenharmony_ci kfd->cwsr_isa = cwsr_trap_gfx8_hex; 45562306a36Sopenharmony_ci kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex); 45662306a36Sopenharmony_ci } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 1)) { 45762306a36Sopenharmony_ci BUILD_BUG_ON(sizeof(cwsr_trap_arcturus_hex) > PAGE_SIZE); 45862306a36Sopenharmony_ci kfd->cwsr_isa = cwsr_trap_arcturus_hex; 45962306a36Sopenharmony_ci kfd->cwsr_isa_size = sizeof(cwsr_trap_arcturus_hex); 46062306a36Sopenharmony_ci } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2)) { 46162306a36Sopenharmony_ci BUILD_BUG_ON(sizeof(cwsr_trap_aldebaran_hex) > PAGE_SIZE); 46262306a36Sopenharmony_ci kfd->cwsr_isa = cwsr_trap_aldebaran_hex; 46362306a36Sopenharmony_ci kfd->cwsr_isa_size = sizeof(cwsr_trap_aldebaran_hex); 46462306a36Sopenharmony_ci } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3)) { 46562306a36Sopenharmony_ci BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_4_3_hex) > PAGE_SIZE); 46662306a36Sopenharmony_ci kfd->cwsr_isa = cwsr_trap_gfx9_4_3_hex; 46762306a36Sopenharmony_ci kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_4_3_hex); 46862306a36Sopenharmony_ci } else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 1, 1)) { 46962306a36Sopenharmony_ci BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE); 47062306a36Sopenharmony_ci kfd->cwsr_isa = cwsr_trap_gfx9_hex; 47162306a36Sopenharmony_ci kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex); 47262306a36Sopenharmony_ci } else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 3, 0)) { 47362306a36Sopenharmony_ci BUILD_BUG_ON(sizeof(cwsr_trap_nv1x_hex) > PAGE_SIZE); 47462306a36Sopenharmony_ci kfd->cwsr_isa = cwsr_trap_nv1x_hex; 47562306a36Sopenharmony_ci kfd->cwsr_isa_size = sizeof(cwsr_trap_nv1x_hex); 47662306a36Sopenharmony_ci } else if (KFD_GC_VERSION(kfd) < IP_VERSION(11, 0, 0)) { 47762306a36Sopenharmony_ci BUILD_BUG_ON(sizeof(cwsr_trap_gfx10_hex) > PAGE_SIZE); 47862306a36Sopenharmony_ci kfd->cwsr_isa = cwsr_trap_gfx10_hex; 47962306a36Sopenharmony_ci kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx10_hex); 48062306a36Sopenharmony_ci } else { 48162306a36Sopenharmony_ci BUILD_BUG_ON(sizeof(cwsr_trap_gfx11_hex) > PAGE_SIZE); 48262306a36Sopenharmony_ci kfd->cwsr_isa = cwsr_trap_gfx11_hex; 48362306a36Sopenharmony_ci kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx11_hex); 48462306a36Sopenharmony_ci } 48562306a36Sopenharmony_ci 48662306a36Sopenharmony_ci kfd->cwsr_enabled = true; 48762306a36Sopenharmony_ci } 48862306a36Sopenharmony_ci} 48962306a36Sopenharmony_ci 49062306a36Sopenharmony_cistatic int kfd_gws_init(struct kfd_node *node) 49162306a36Sopenharmony_ci{ 49262306a36Sopenharmony_ci int ret = 0; 49362306a36Sopenharmony_ci struct kfd_dev *kfd = node->kfd; 49462306a36Sopenharmony_ci uint32_t mes_rev = node->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK; 49562306a36Sopenharmony_ci 49662306a36Sopenharmony_ci if (node->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) 49762306a36Sopenharmony_ci return 0; 49862306a36Sopenharmony_ci 49962306a36Sopenharmony_ci if (hws_gws_support || (KFD_IS_SOC15(node) && 50062306a36Sopenharmony_ci ((KFD_GC_VERSION(node) == IP_VERSION(9, 0, 1) 50162306a36Sopenharmony_ci && kfd->mec2_fw_version >= 0x81b3) || 50262306a36Sopenharmony_ci (KFD_GC_VERSION(node) <= IP_VERSION(9, 4, 0) 50362306a36Sopenharmony_ci && kfd->mec2_fw_version >= 0x1b3) || 50462306a36Sopenharmony_ci (KFD_GC_VERSION(node) == IP_VERSION(9, 4, 1) 50562306a36Sopenharmony_ci && kfd->mec2_fw_version >= 0x30) || 50662306a36Sopenharmony_ci (KFD_GC_VERSION(node) == IP_VERSION(9, 4, 2) 50762306a36Sopenharmony_ci && kfd->mec2_fw_version >= 0x28) || 50862306a36Sopenharmony_ci (KFD_GC_VERSION(node) == IP_VERSION(9, 4, 3)) || 50962306a36Sopenharmony_ci (KFD_GC_VERSION(node) >= IP_VERSION(10, 3, 0) 51062306a36Sopenharmony_ci && KFD_GC_VERSION(node) < IP_VERSION(11, 0, 0) 51162306a36Sopenharmony_ci && kfd->mec2_fw_version >= 0x6b) || 51262306a36Sopenharmony_ci (KFD_GC_VERSION(node) >= IP_VERSION(11, 0, 0) 51362306a36Sopenharmony_ci && KFD_GC_VERSION(node) < IP_VERSION(12, 0, 0) 51462306a36Sopenharmony_ci && mes_rev >= 68)))) 51562306a36Sopenharmony_ci ret = amdgpu_amdkfd_alloc_gws(node->adev, 51662306a36Sopenharmony_ci node->adev->gds.gws_size, &node->gws); 51762306a36Sopenharmony_ci 51862306a36Sopenharmony_ci return ret; 51962306a36Sopenharmony_ci} 52062306a36Sopenharmony_ci 52162306a36Sopenharmony_cistatic void kfd_smi_init(struct kfd_node *dev) 52262306a36Sopenharmony_ci{ 52362306a36Sopenharmony_ci INIT_LIST_HEAD(&dev->smi_clients); 52462306a36Sopenharmony_ci spin_lock_init(&dev->smi_lock); 52562306a36Sopenharmony_ci} 52662306a36Sopenharmony_ci 52762306a36Sopenharmony_cistatic int kfd_init_node(struct kfd_node *node) 52862306a36Sopenharmony_ci{ 52962306a36Sopenharmony_ci int err = -1; 53062306a36Sopenharmony_ci 53162306a36Sopenharmony_ci if (kfd_interrupt_init(node)) { 53262306a36Sopenharmony_ci dev_err(kfd_device, "Error initializing interrupts\n"); 53362306a36Sopenharmony_ci goto kfd_interrupt_error; 53462306a36Sopenharmony_ci } 53562306a36Sopenharmony_ci 53662306a36Sopenharmony_ci node->dqm = device_queue_manager_init(node); 53762306a36Sopenharmony_ci if (!node->dqm) { 53862306a36Sopenharmony_ci dev_err(kfd_device, "Error initializing queue manager\n"); 53962306a36Sopenharmony_ci goto device_queue_manager_error; 54062306a36Sopenharmony_ci } 54162306a36Sopenharmony_ci 54262306a36Sopenharmony_ci if (kfd_gws_init(node)) { 54362306a36Sopenharmony_ci dev_err(kfd_device, "Could not allocate %d gws\n", 54462306a36Sopenharmony_ci node->adev->gds.gws_size); 54562306a36Sopenharmony_ci goto gws_error; 54662306a36Sopenharmony_ci } 54762306a36Sopenharmony_ci 54862306a36Sopenharmony_ci if (kfd_resume(node)) 54962306a36Sopenharmony_ci goto kfd_resume_error; 55062306a36Sopenharmony_ci 55162306a36Sopenharmony_ci if (kfd_topology_add_device(node)) { 55262306a36Sopenharmony_ci dev_err(kfd_device, "Error adding device to topology\n"); 55362306a36Sopenharmony_ci goto kfd_topology_add_device_error; 55462306a36Sopenharmony_ci } 55562306a36Sopenharmony_ci 55662306a36Sopenharmony_ci kfd_smi_init(node); 55762306a36Sopenharmony_ci 55862306a36Sopenharmony_ci return 0; 55962306a36Sopenharmony_ci 56062306a36Sopenharmony_cikfd_topology_add_device_error: 56162306a36Sopenharmony_cikfd_resume_error: 56262306a36Sopenharmony_cigws_error: 56362306a36Sopenharmony_ci device_queue_manager_uninit(node->dqm); 56462306a36Sopenharmony_cidevice_queue_manager_error: 56562306a36Sopenharmony_ci kfd_interrupt_exit(node); 56662306a36Sopenharmony_cikfd_interrupt_error: 56762306a36Sopenharmony_ci if (node->gws) 56862306a36Sopenharmony_ci amdgpu_amdkfd_free_gws(node->adev, node->gws); 56962306a36Sopenharmony_ci 57062306a36Sopenharmony_ci /* Cleanup the node memory here */ 57162306a36Sopenharmony_ci kfree(node); 57262306a36Sopenharmony_ci return err; 57362306a36Sopenharmony_ci} 57462306a36Sopenharmony_ci 57562306a36Sopenharmony_cistatic void kfd_cleanup_nodes(struct kfd_dev *kfd, unsigned int num_nodes) 57662306a36Sopenharmony_ci{ 57762306a36Sopenharmony_ci struct kfd_node *knode; 57862306a36Sopenharmony_ci unsigned int i; 57962306a36Sopenharmony_ci 58062306a36Sopenharmony_ci for (i = 0; i < num_nodes; i++) { 58162306a36Sopenharmony_ci knode = kfd->nodes[i]; 58262306a36Sopenharmony_ci device_queue_manager_uninit(knode->dqm); 58362306a36Sopenharmony_ci kfd_interrupt_exit(knode); 58462306a36Sopenharmony_ci kfd_topology_remove_device(knode); 58562306a36Sopenharmony_ci if (knode->gws) 58662306a36Sopenharmony_ci amdgpu_amdkfd_free_gws(knode->adev, knode->gws); 58762306a36Sopenharmony_ci kfree(knode); 58862306a36Sopenharmony_ci kfd->nodes[i] = NULL; 58962306a36Sopenharmony_ci } 59062306a36Sopenharmony_ci} 59162306a36Sopenharmony_ci 59262306a36Sopenharmony_cistatic void kfd_setup_interrupt_bitmap(struct kfd_node *node, 59362306a36Sopenharmony_ci unsigned int kfd_node_idx) 59462306a36Sopenharmony_ci{ 59562306a36Sopenharmony_ci struct amdgpu_device *adev = node->adev; 59662306a36Sopenharmony_ci uint32_t xcc_mask = node->xcc_mask; 59762306a36Sopenharmony_ci uint32_t xcc, mapped_xcc; 59862306a36Sopenharmony_ci /* 59962306a36Sopenharmony_ci * Interrupt bitmap is setup for processing interrupts from 60062306a36Sopenharmony_ci * different XCDs and AIDs. 60162306a36Sopenharmony_ci * Interrupt bitmap is defined as follows: 60262306a36Sopenharmony_ci * 1. Bits 0-15 - correspond to the NodeId field. 60362306a36Sopenharmony_ci * Each bit corresponds to NodeId number. For example, if 60462306a36Sopenharmony_ci * a KFD node has interrupt bitmap set to 0x7, then this 60562306a36Sopenharmony_ci * KFD node will process interrupts with NodeId = 0, 1 and 2 60662306a36Sopenharmony_ci * in the IH cookie. 60762306a36Sopenharmony_ci * 2. Bits 16-31 - unused. 60862306a36Sopenharmony_ci * 60962306a36Sopenharmony_ci * Please note that the kfd_node_idx argument passed to this 61062306a36Sopenharmony_ci * function is not related to NodeId field received in the 61162306a36Sopenharmony_ci * IH cookie. 61262306a36Sopenharmony_ci * 61362306a36Sopenharmony_ci * In CPX mode, a KFD node will process an interrupt if: 61462306a36Sopenharmony_ci * - the Node Id matches the corresponding bit set in 61562306a36Sopenharmony_ci * Bits 0-15. 61662306a36Sopenharmony_ci * - AND VMID reported in the interrupt lies within the 61762306a36Sopenharmony_ci * VMID range of the node. 61862306a36Sopenharmony_ci */ 61962306a36Sopenharmony_ci for_each_inst(xcc, xcc_mask) { 62062306a36Sopenharmony_ci mapped_xcc = GET_INST(GC, xcc); 62162306a36Sopenharmony_ci node->interrupt_bitmap |= (mapped_xcc % 2 ? 5 : 3) << (4 * (mapped_xcc / 2)); 62262306a36Sopenharmony_ci } 62362306a36Sopenharmony_ci dev_info(kfd_device, "Node: %d, interrupt_bitmap: %x\n", kfd_node_idx, 62462306a36Sopenharmony_ci node->interrupt_bitmap); 62562306a36Sopenharmony_ci} 62662306a36Sopenharmony_ci 62762306a36Sopenharmony_cibool kgd2kfd_device_init(struct kfd_dev *kfd, 62862306a36Sopenharmony_ci const struct kgd2kfd_shared_resources *gpu_resources) 62962306a36Sopenharmony_ci{ 63062306a36Sopenharmony_ci unsigned int size, map_process_packet_size, i; 63162306a36Sopenharmony_ci struct kfd_node *node; 63262306a36Sopenharmony_ci uint32_t first_vmid_kfd, last_vmid_kfd, vmid_num_kfd; 63362306a36Sopenharmony_ci unsigned int max_proc_per_quantum; 63462306a36Sopenharmony_ci int partition_mode; 63562306a36Sopenharmony_ci int xcp_idx; 63662306a36Sopenharmony_ci 63762306a36Sopenharmony_ci kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev, 63862306a36Sopenharmony_ci KGD_ENGINE_MEC1); 63962306a36Sopenharmony_ci kfd->mec2_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev, 64062306a36Sopenharmony_ci KGD_ENGINE_MEC2); 64162306a36Sopenharmony_ci kfd->sdma_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev, 64262306a36Sopenharmony_ci KGD_ENGINE_SDMA1); 64362306a36Sopenharmony_ci kfd->shared_resources = *gpu_resources; 64462306a36Sopenharmony_ci 64562306a36Sopenharmony_ci kfd->num_nodes = amdgpu_xcp_get_num_xcp(kfd->adev->xcp_mgr); 64662306a36Sopenharmony_ci 64762306a36Sopenharmony_ci if (kfd->num_nodes == 0) { 64862306a36Sopenharmony_ci dev_err(kfd_device, 64962306a36Sopenharmony_ci "KFD num nodes cannot be 0, num_xcc_in_node: %d\n", 65062306a36Sopenharmony_ci kfd->adev->gfx.num_xcc_per_xcp); 65162306a36Sopenharmony_ci goto out; 65262306a36Sopenharmony_ci } 65362306a36Sopenharmony_ci 65462306a36Sopenharmony_ci /* Allow BIF to recode atomics to PCIe 3.0 AtomicOps. 65562306a36Sopenharmony_ci * 32 and 64-bit requests are possible and must be 65662306a36Sopenharmony_ci * supported. 65762306a36Sopenharmony_ci */ 65862306a36Sopenharmony_ci kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kfd->adev); 65962306a36Sopenharmony_ci if (!kfd->pci_atomic_requested && 66062306a36Sopenharmony_ci kfd->device_info.needs_pci_atomics && 66162306a36Sopenharmony_ci (!kfd->device_info.no_atomic_fw_version || 66262306a36Sopenharmony_ci kfd->mec_fw_version < kfd->device_info.no_atomic_fw_version)) { 66362306a36Sopenharmony_ci dev_info(kfd_device, 66462306a36Sopenharmony_ci "skipped device %x:%x, PCI rejects atomics %d<%d\n", 66562306a36Sopenharmony_ci kfd->adev->pdev->vendor, kfd->adev->pdev->device, 66662306a36Sopenharmony_ci kfd->mec_fw_version, 66762306a36Sopenharmony_ci kfd->device_info.no_atomic_fw_version); 66862306a36Sopenharmony_ci return false; 66962306a36Sopenharmony_ci } 67062306a36Sopenharmony_ci 67162306a36Sopenharmony_ci first_vmid_kfd = ffs(gpu_resources->compute_vmid_bitmap)-1; 67262306a36Sopenharmony_ci last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1; 67362306a36Sopenharmony_ci vmid_num_kfd = last_vmid_kfd - first_vmid_kfd + 1; 67462306a36Sopenharmony_ci 67562306a36Sopenharmony_ci /* For GFX9.4.3, we need special handling for VMIDs depending on 67662306a36Sopenharmony_ci * partition mode. 67762306a36Sopenharmony_ci * In CPX mode, the VMID range needs to be shared between XCDs. 67862306a36Sopenharmony_ci * Additionally, there are 13 VMIDs (3-15) available for KFD. To 67962306a36Sopenharmony_ci * divide them equally, we change starting VMID to 4 and not use 68062306a36Sopenharmony_ci * VMID 3. 68162306a36Sopenharmony_ci * If the VMID range changes for GFX9.4.3, then this code MUST be 68262306a36Sopenharmony_ci * revisited. 68362306a36Sopenharmony_ci */ 68462306a36Sopenharmony_ci if (kfd->adev->xcp_mgr) { 68562306a36Sopenharmony_ci partition_mode = amdgpu_xcp_query_partition_mode(kfd->adev->xcp_mgr, 68662306a36Sopenharmony_ci AMDGPU_XCP_FL_LOCKED); 68762306a36Sopenharmony_ci if (partition_mode == AMDGPU_CPX_PARTITION_MODE && 68862306a36Sopenharmony_ci kfd->num_nodes != 1) { 68962306a36Sopenharmony_ci vmid_num_kfd /= 2; 69062306a36Sopenharmony_ci first_vmid_kfd = last_vmid_kfd + 1 - vmid_num_kfd*2; 69162306a36Sopenharmony_ci } 69262306a36Sopenharmony_ci } 69362306a36Sopenharmony_ci 69462306a36Sopenharmony_ci /* Verify module parameters regarding mapped process number*/ 69562306a36Sopenharmony_ci if (hws_max_conc_proc >= 0) 69662306a36Sopenharmony_ci max_proc_per_quantum = min((u32)hws_max_conc_proc, vmid_num_kfd); 69762306a36Sopenharmony_ci else 69862306a36Sopenharmony_ci max_proc_per_quantum = vmid_num_kfd; 69962306a36Sopenharmony_ci 70062306a36Sopenharmony_ci /* calculate max size of mqds needed for queues */ 70162306a36Sopenharmony_ci size = max_num_of_queues_per_device * 70262306a36Sopenharmony_ci kfd->device_info.mqd_size_aligned; 70362306a36Sopenharmony_ci 70462306a36Sopenharmony_ci /* 70562306a36Sopenharmony_ci * calculate max size of runlist packet. 70662306a36Sopenharmony_ci * There can be only 2 packets at once 70762306a36Sopenharmony_ci */ 70862306a36Sopenharmony_ci map_process_packet_size = KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2) ? 70962306a36Sopenharmony_ci sizeof(struct pm4_mes_map_process_aldebaran) : 71062306a36Sopenharmony_ci sizeof(struct pm4_mes_map_process); 71162306a36Sopenharmony_ci size += (KFD_MAX_NUM_OF_PROCESSES * map_process_packet_size + 71262306a36Sopenharmony_ci max_num_of_queues_per_device * sizeof(struct pm4_mes_map_queues) 71362306a36Sopenharmony_ci + sizeof(struct pm4_mes_runlist)) * 2; 71462306a36Sopenharmony_ci 71562306a36Sopenharmony_ci /* Add size of HIQ & DIQ */ 71662306a36Sopenharmony_ci size += KFD_KERNEL_QUEUE_SIZE * 2; 71762306a36Sopenharmony_ci 71862306a36Sopenharmony_ci /* add another 512KB for all other allocations on gart (HPD, fences) */ 71962306a36Sopenharmony_ci size += 512 * 1024; 72062306a36Sopenharmony_ci 72162306a36Sopenharmony_ci if (amdgpu_amdkfd_alloc_gtt_mem( 72262306a36Sopenharmony_ci kfd->adev, size, &kfd->gtt_mem, 72362306a36Sopenharmony_ci &kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr, 72462306a36Sopenharmony_ci false)) { 72562306a36Sopenharmony_ci dev_err(kfd_device, "Could not allocate %d bytes\n", size); 72662306a36Sopenharmony_ci goto alloc_gtt_mem_failure; 72762306a36Sopenharmony_ci } 72862306a36Sopenharmony_ci 72962306a36Sopenharmony_ci dev_info(kfd_device, "Allocated %d bytes on gart\n", size); 73062306a36Sopenharmony_ci 73162306a36Sopenharmony_ci /* Initialize GTT sa with 512 byte chunk size */ 73262306a36Sopenharmony_ci if (kfd_gtt_sa_init(kfd, size, 512) != 0) { 73362306a36Sopenharmony_ci dev_err(kfd_device, "Error initializing gtt sub-allocator\n"); 73462306a36Sopenharmony_ci goto kfd_gtt_sa_init_error; 73562306a36Sopenharmony_ci } 73662306a36Sopenharmony_ci 73762306a36Sopenharmony_ci if (kfd_doorbell_init(kfd)) { 73862306a36Sopenharmony_ci dev_err(kfd_device, 73962306a36Sopenharmony_ci "Error initializing doorbell aperture\n"); 74062306a36Sopenharmony_ci goto kfd_doorbell_error; 74162306a36Sopenharmony_ci } 74262306a36Sopenharmony_ci 74362306a36Sopenharmony_ci if (amdgpu_use_xgmi_p2p) 74462306a36Sopenharmony_ci kfd->hive_id = kfd->adev->gmc.xgmi.hive_id; 74562306a36Sopenharmony_ci 74662306a36Sopenharmony_ci /* 74762306a36Sopenharmony_ci * For GFX9.4.3, the KFD abstracts all partitions within a socket as 74862306a36Sopenharmony_ci * xGMI connected in the topology so assign a unique hive id per 74962306a36Sopenharmony_ci * device based on the pci device location if device is in PCIe mode. 75062306a36Sopenharmony_ci */ 75162306a36Sopenharmony_ci if (!kfd->hive_id && (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3)) && kfd->num_nodes > 1) 75262306a36Sopenharmony_ci kfd->hive_id = pci_dev_id(kfd->adev->pdev); 75362306a36Sopenharmony_ci 75462306a36Sopenharmony_ci kfd->noretry = kfd->adev->gmc.noretry; 75562306a36Sopenharmony_ci 75662306a36Sopenharmony_ci kfd_cwsr_init(kfd); 75762306a36Sopenharmony_ci 75862306a36Sopenharmony_ci dev_info(kfd_device, "Total number of KFD nodes to be created: %d\n", 75962306a36Sopenharmony_ci kfd->num_nodes); 76062306a36Sopenharmony_ci 76162306a36Sopenharmony_ci /* Allocate the KFD nodes */ 76262306a36Sopenharmony_ci for (i = 0, xcp_idx = 0; i < kfd->num_nodes; i++) { 76362306a36Sopenharmony_ci node = kzalloc(sizeof(struct kfd_node), GFP_KERNEL); 76462306a36Sopenharmony_ci if (!node) 76562306a36Sopenharmony_ci goto node_alloc_error; 76662306a36Sopenharmony_ci 76762306a36Sopenharmony_ci node->node_id = i; 76862306a36Sopenharmony_ci node->adev = kfd->adev; 76962306a36Sopenharmony_ci node->kfd = kfd; 77062306a36Sopenharmony_ci node->kfd2kgd = kfd->kfd2kgd; 77162306a36Sopenharmony_ci node->vm_info.vmid_num_kfd = vmid_num_kfd; 77262306a36Sopenharmony_ci node->xcp = amdgpu_get_next_xcp(kfd->adev->xcp_mgr, &xcp_idx); 77362306a36Sopenharmony_ci /* TODO : Check if error handling is needed */ 77462306a36Sopenharmony_ci if (node->xcp) { 77562306a36Sopenharmony_ci amdgpu_xcp_get_inst_details(node->xcp, AMDGPU_XCP_GFX, 77662306a36Sopenharmony_ci &node->xcc_mask); 77762306a36Sopenharmony_ci ++xcp_idx; 77862306a36Sopenharmony_ci } else { 77962306a36Sopenharmony_ci node->xcc_mask = 78062306a36Sopenharmony_ci (1U << NUM_XCC(kfd->adev->gfx.xcc_mask)) - 1; 78162306a36Sopenharmony_ci } 78262306a36Sopenharmony_ci 78362306a36Sopenharmony_ci if (node->xcp) { 78462306a36Sopenharmony_ci dev_info(kfd_device, "KFD node %d partition %d size %lldM\n", 78562306a36Sopenharmony_ci node->node_id, node->xcp->mem_id, 78662306a36Sopenharmony_ci KFD_XCP_MEMORY_SIZE(node->adev, node->node_id) >> 20); 78762306a36Sopenharmony_ci } 78862306a36Sopenharmony_ci 78962306a36Sopenharmony_ci if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) && 79062306a36Sopenharmony_ci partition_mode == AMDGPU_CPX_PARTITION_MODE && 79162306a36Sopenharmony_ci kfd->num_nodes != 1) { 79262306a36Sopenharmony_ci /* For GFX9.4.3 and CPX mode, first XCD gets VMID range 79362306a36Sopenharmony_ci * 4-9 and second XCD gets VMID range 10-15. 79462306a36Sopenharmony_ci */ 79562306a36Sopenharmony_ci 79662306a36Sopenharmony_ci node->vm_info.first_vmid_kfd = (i%2 == 0) ? 79762306a36Sopenharmony_ci first_vmid_kfd : 79862306a36Sopenharmony_ci first_vmid_kfd+vmid_num_kfd; 79962306a36Sopenharmony_ci node->vm_info.last_vmid_kfd = (i%2 == 0) ? 80062306a36Sopenharmony_ci last_vmid_kfd-vmid_num_kfd : 80162306a36Sopenharmony_ci last_vmid_kfd; 80262306a36Sopenharmony_ci node->compute_vmid_bitmap = 80362306a36Sopenharmony_ci ((0x1 << (node->vm_info.last_vmid_kfd + 1)) - 1) - 80462306a36Sopenharmony_ci ((0x1 << (node->vm_info.first_vmid_kfd)) - 1); 80562306a36Sopenharmony_ci } else { 80662306a36Sopenharmony_ci node->vm_info.first_vmid_kfd = first_vmid_kfd; 80762306a36Sopenharmony_ci node->vm_info.last_vmid_kfd = last_vmid_kfd; 80862306a36Sopenharmony_ci node->compute_vmid_bitmap = 80962306a36Sopenharmony_ci gpu_resources->compute_vmid_bitmap; 81062306a36Sopenharmony_ci } 81162306a36Sopenharmony_ci node->max_proc_per_quantum = max_proc_per_quantum; 81262306a36Sopenharmony_ci atomic_set(&node->sram_ecc_flag, 0); 81362306a36Sopenharmony_ci 81462306a36Sopenharmony_ci amdgpu_amdkfd_get_local_mem_info(kfd->adev, 81562306a36Sopenharmony_ci &node->local_mem_info, node->xcp); 81662306a36Sopenharmony_ci 81762306a36Sopenharmony_ci if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3)) 81862306a36Sopenharmony_ci kfd_setup_interrupt_bitmap(node, i); 81962306a36Sopenharmony_ci 82062306a36Sopenharmony_ci /* Initialize the KFD node */ 82162306a36Sopenharmony_ci if (kfd_init_node(node)) { 82262306a36Sopenharmony_ci dev_err(kfd_device, "Error initializing KFD node\n"); 82362306a36Sopenharmony_ci goto node_init_error; 82462306a36Sopenharmony_ci } 82562306a36Sopenharmony_ci kfd->nodes[i] = node; 82662306a36Sopenharmony_ci } 82762306a36Sopenharmony_ci 82862306a36Sopenharmony_ci svm_range_set_max_pages(kfd->adev); 82962306a36Sopenharmony_ci 83062306a36Sopenharmony_ci spin_lock_init(&kfd->watch_points_lock); 83162306a36Sopenharmony_ci 83262306a36Sopenharmony_ci kfd->init_complete = true; 83362306a36Sopenharmony_ci dev_info(kfd_device, "added device %x:%x\n", kfd->adev->pdev->vendor, 83462306a36Sopenharmony_ci kfd->adev->pdev->device); 83562306a36Sopenharmony_ci 83662306a36Sopenharmony_ci pr_debug("Starting kfd with the following scheduling policy %d\n", 83762306a36Sopenharmony_ci node->dqm->sched_policy); 83862306a36Sopenharmony_ci 83962306a36Sopenharmony_ci goto out; 84062306a36Sopenharmony_ci 84162306a36Sopenharmony_cinode_init_error: 84262306a36Sopenharmony_cinode_alloc_error: 84362306a36Sopenharmony_ci kfd_cleanup_nodes(kfd, i); 84462306a36Sopenharmony_ci kfd_doorbell_fini(kfd); 84562306a36Sopenharmony_cikfd_doorbell_error: 84662306a36Sopenharmony_ci kfd_gtt_sa_fini(kfd); 84762306a36Sopenharmony_cikfd_gtt_sa_init_error: 84862306a36Sopenharmony_ci amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem); 84962306a36Sopenharmony_cialloc_gtt_mem_failure: 85062306a36Sopenharmony_ci dev_err(kfd_device, 85162306a36Sopenharmony_ci "device %x:%x NOT added due to errors\n", 85262306a36Sopenharmony_ci kfd->adev->pdev->vendor, kfd->adev->pdev->device); 85362306a36Sopenharmony_ciout: 85462306a36Sopenharmony_ci return kfd->init_complete; 85562306a36Sopenharmony_ci} 85662306a36Sopenharmony_ci 85762306a36Sopenharmony_civoid kgd2kfd_device_exit(struct kfd_dev *kfd) 85862306a36Sopenharmony_ci{ 85962306a36Sopenharmony_ci if (kfd->init_complete) { 86062306a36Sopenharmony_ci /* Cleanup KFD nodes */ 86162306a36Sopenharmony_ci kfd_cleanup_nodes(kfd, kfd->num_nodes); 86262306a36Sopenharmony_ci /* Cleanup common/shared resources */ 86362306a36Sopenharmony_ci kfd_doorbell_fini(kfd); 86462306a36Sopenharmony_ci ida_destroy(&kfd->doorbell_ida); 86562306a36Sopenharmony_ci kfd_gtt_sa_fini(kfd); 86662306a36Sopenharmony_ci amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem); 86762306a36Sopenharmony_ci } 86862306a36Sopenharmony_ci 86962306a36Sopenharmony_ci kfree(kfd); 87062306a36Sopenharmony_ci} 87162306a36Sopenharmony_ci 87262306a36Sopenharmony_ciint kgd2kfd_pre_reset(struct kfd_dev *kfd) 87362306a36Sopenharmony_ci{ 87462306a36Sopenharmony_ci struct kfd_node *node; 87562306a36Sopenharmony_ci int i; 87662306a36Sopenharmony_ci 87762306a36Sopenharmony_ci if (!kfd->init_complete) 87862306a36Sopenharmony_ci return 0; 87962306a36Sopenharmony_ci 88062306a36Sopenharmony_ci for (i = 0; i < kfd->num_nodes; i++) { 88162306a36Sopenharmony_ci node = kfd->nodes[i]; 88262306a36Sopenharmony_ci kfd_smi_event_update_gpu_reset(node, false); 88362306a36Sopenharmony_ci node->dqm->ops.pre_reset(node->dqm); 88462306a36Sopenharmony_ci } 88562306a36Sopenharmony_ci 88662306a36Sopenharmony_ci kgd2kfd_suspend(kfd, false); 88762306a36Sopenharmony_ci 88862306a36Sopenharmony_ci for (i = 0; i < kfd->num_nodes; i++) 88962306a36Sopenharmony_ci kfd_signal_reset_event(kfd->nodes[i]); 89062306a36Sopenharmony_ci 89162306a36Sopenharmony_ci return 0; 89262306a36Sopenharmony_ci} 89362306a36Sopenharmony_ci 89462306a36Sopenharmony_ci/* 89562306a36Sopenharmony_ci * Fix me. KFD won't be able to resume existing process for now. 89662306a36Sopenharmony_ci * We will keep all existing process in a evicted state and 89762306a36Sopenharmony_ci * wait the process to be terminated. 89862306a36Sopenharmony_ci */ 89962306a36Sopenharmony_ci 90062306a36Sopenharmony_ciint kgd2kfd_post_reset(struct kfd_dev *kfd) 90162306a36Sopenharmony_ci{ 90262306a36Sopenharmony_ci int ret; 90362306a36Sopenharmony_ci struct kfd_node *node; 90462306a36Sopenharmony_ci int i; 90562306a36Sopenharmony_ci 90662306a36Sopenharmony_ci if (!kfd->init_complete) 90762306a36Sopenharmony_ci return 0; 90862306a36Sopenharmony_ci 90962306a36Sopenharmony_ci for (i = 0; i < kfd->num_nodes; i++) { 91062306a36Sopenharmony_ci ret = kfd_resume(kfd->nodes[i]); 91162306a36Sopenharmony_ci if (ret) 91262306a36Sopenharmony_ci return ret; 91362306a36Sopenharmony_ci } 91462306a36Sopenharmony_ci 91562306a36Sopenharmony_ci mutex_lock(&kfd_processes_mutex); 91662306a36Sopenharmony_ci --kfd_locked; 91762306a36Sopenharmony_ci mutex_unlock(&kfd_processes_mutex); 91862306a36Sopenharmony_ci 91962306a36Sopenharmony_ci for (i = 0; i < kfd->num_nodes; i++) { 92062306a36Sopenharmony_ci node = kfd->nodes[i]; 92162306a36Sopenharmony_ci atomic_set(&node->sram_ecc_flag, 0); 92262306a36Sopenharmony_ci kfd_smi_event_update_gpu_reset(node, true); 92362306a36Sopenharmony_ci } 92462306a36Sopenharmony_ci 92562306a36Sopenharmony_ci return 0; 92662306a36Sopenharmony_ci} 92762306a36Sopenharmony_ci 92862306a36Sopenharmony_cibool kfd_is_locked(void) 92962306a36Sopenharmony_ci{ 93062306a36Sopenharmony_ci lockdep_assert_held(&kfd_processes_mutex); 93162306a36Sopenharmony_ci return (kfd_locked > 0); 93262306a36Sopenharmony_ci} 93362306a36Sopenharmony_ci 93462306a36Sopenharmony_civoid kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm) 93562306a36Sopenharmony_ci{ 93662306a36Sopenharmony_ci struct kfd_node *node; 93762306a36Sopenharmony_ci int i; 93862306a36Sopenharmony_ci int count; 93962306a36Sopenharmony_ci 94062306a36Sopenharmony_ci if (!kfd->init_complete) 94162306a36Sopenharmony_ci return; 94262306a36Sopenharmony_ci 94362306a36Sopenharmony_ci /* for runtime suspend, skip locking kfd */ 94462306a36Sopenharmony_ci if (!run_pm) { 94562306a36Sopenharmony_ci mutex_lock(&kfd_processes_mutex); 94662306a36Sopenharmony_ci count = ++kfd_locked; 94762306a36Sopenharmony_ci mutex_unlock(&kfd_processes_mutex); 94862306a36Sopenharmony_ci 94962306a36Sopenharmony_ci /* For first KFD device suspend all the KFD processes */ 95062306a36Sopenharmony_ci if (count == 1) 95162306a36Sopenharmony_ci kfd_suspend_all_processes(); 95262306a36Sopenharmony_ci } 95362306a36Sopenharmony_ci 95462306a36Sopenharmony_ci for (i = 0; i < kfd->num_nodes; i++) { 95562306a36Sopenharmony_ci node = kfd->nodes[i]; 95662306a36Sopenharmony_ci node->dqm->ops.stop(node->dqm); 95762306a36Sopenharmony_ci } 95862306a36Sopenharmony_ci} 95962306a36Sopenharmony_ci 96062306a36Sopenharmony_ciint kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) 96162306a36Sopenharmony_ci{ 96262306a36Sopenharmony_ci int ret, count, i; 96362306a36Sopenharmony_ci 96462306a36Sopenharmony_ci if (!kfd->init_complete) 96562306a36Sopenharmony_ci return 0; 96662306a36Sopenharmony_ci 96762306a36Sopenharmony_ci for (i = 0; i < kfd->num_nodes; i++) { 96862306a36Sopenharmony_ci ret = kfd_resume(kfd->nodes[i]); 96962306a36Sopenharmony_ci if (ret) 97062306a36Sopenharmony_ci return ret; 97162306a36Sopenharmony_ci } 97262306a36Sopenharmony_ci 97362306a36Sopenharmony_ci /* for runtime resume, skip unlocking kfd */ 97462306a36Sopenharmony_ci if (!run_pm) { 97562306a36Sopenharmony_ci mutex_lock(&kfd_processes_mutex); 97662306a36Sopenharmony_ci count = --kfd_locked; 97762306a36Sopenharmony_ci mutex_unlock(&kfd_processes_mutex); 97862306a36Sopenharmony_ci 97962306a36Sopenharmony_ci WARN_ONCE(count < 0, "KFD suspend / resume ref. error"); 98062306a36Sopenharmony_ci if (count == 0) 98162306a36Sopenharmony_ci ret = kfd_resume_all_processes(); 98262306a36Sopenharmony_ci } 98362306a36Sopenharmony_ci 98462306a36Sopenharmony_ci return ret; 98562306a36Sopenharmony_ci} 98662306a36Sopenharmony_ci 98762306a36Sopenharmony_cistatic int kfd_resume(struct kfd_node *node) 98862306a36Sopenharmony_ci{ 98962306a36Sopenharmony_ci int err = 0; 99062306a36Sopenharmony_ci 99162306a36Sopenharmony_ci err = node->dqm->ops.start(node->dqm); 99262306a36Sopenharmony_ci if (err) 99362306a36Sopenharmony_ci dev_err(kfd_device, 99462306a36Sopenharmony_ci "Error starting queue manager for device %x:%x\n", 99562306a36Sopenharmony_ci node->adev->pdev->vendor, node->adev->pdev->device); 99662306a36Sopenharmony_ci 99762306a36Sopenharmony_ci return err; 99862306a36Sopenharmony_ci} 99962306a36Sopenharmony_ci 100062306a36Sopenharmony_cistatic inline void kfd_queue_work(struct workqueue_struct *wq, 100162306a36Sopenharmony_ci struct work_struct *work) 100262306a36Sopenharmony_ci{ 100362306a36Sopenharmony_ci int cpu, new_cpu; 100462306a36Sopenharmony_ci 100562306a36Sopenharmony_ci cpu = new_cpu = smp_processor_id(); 100662306a36Sopenharmony_ci do { 100762306a36Sopenharmony_ci new_cpu = cpumask_next(new_cpu, cpu_online_mask) % nr_cpu_ids; 100862306a36Sopenharmony_ci if (cpu_to_node(new_cpu) == numa_node_id()) 100962306a36Sopenharmony_ci break; 101062306a36Sopenharmony_ci } while (cpu != new_cpu); 101162306a36Sopenharmony_ci 101262306a36Sopenharmony_ci queue_work_on(new_cpu, wq, work); 101362306a36Sopenharmony_ci} 101462306a36Sopenharmony_ci 101562306a36Sopenharmony_ci/* This is called directly from KGD at ISR. */ 101662306a36Sopenharmony_civoid kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) 101762306a36Sopenharmony_ci{ 101862306a36Sopenharmony_ci uint32_t patched_ihre[KFD_MAX_RING_ENTRY_SIZE], i; 101962306a36Sopenharmony_ci bool is_patched = false; 102062306a36Sopenharmony_ci unsigned long flags; 102162306a36Sopenharmony_ci struct kfd_node *node; 102262306a36Sopenharmony_ci 102362306a36Sopenharmony_ci if (!kfd->init_complete) 102462306a36Sopenharmony_ci return; 102562306a36Sopenharmony_ci 102662306a36Sopenharmony_ci if (kfd->device_info.ih_ring_entry_size > sizeof(patched_ihre)) { 102762306a36Sopenharmony_ci dev_err_once(kfd_device, "Ring entry too small\n"); 102862306a36Sopenharmony_ci return; 102962306a36Sopenharmony_ci } 103062306a36Sopenharmony_ci 103162306a36Sopenharmony_ci for (i = 0; i < kfd->num_nodes; i++) { 103262306a36Sopenharmony_ci node = kfd->nodes[i]; 103362306a36Sopenharmony_ci spin_lock_irqsave(&node->interrupt_lock, flags); 103462306a36Sopenharmony_ci 103562306a36Sopenharmony_ci if (node->interrupts_active 103662306a36Sopenharmony_ci && interrupt_is_wanted(node, ih_ring_entry, 103762306a36Sopenharmony_ci patched_ihre, &is_patched) 103862306a36Sopenharmony_ci && enqueue_ih_ring_entry(node, 103962306a36Sopenharmony_ci is_patched ? patched_ihre : ih_ring_entry)) { 104062306a36Sopenharmony_ci kfd_queue_work(node->ih_wq, &node->interrupt_work); 104162306a36Sopenharmony_ci spin_unlock_irqrestore(&node->interrupt_lock, flags); 104262306a36Sopenharmony_ci return; 104362306a36Sopenharmony_ci } 104462306a36Sopenharmony_ci spin_unlock_irqrestore(&node->interrupt_lock, flags); 104562306a36Sopenharmony_ci } 104662306a36Sopenharmony_ci 104762306a36Sopenharmony_ci} 104862306a36Sopenharmony_ci 104962306a36Sopenharmony_ciint kgd2kfd_quiesce_mm(struct mm_struct *mm, uint32_t trigger) 105062306a36Sopenharmony_ci{ 105162306a36Sopenharmony_ci struct kfd_process *p; 105262306a36Sopenharmony_ci int r; 105362306a36Sopenharmony_ci 105462306a36Sopenharmony_ci /* Because we are called from arbitrary context (workqueue) as opposed 105562306a36Sopenharmony_ci * to process context, kfd_process could attempt to exit while we are 105662306a36Sopenharmony_ci * running so the lookup function increments the process ref count. 105762306a36Sopenharmony_ci */ 105862306a36Sopenharmony_ci p = kfd_lookup_process_by_mm(mm); 105962306a36Sopenharmony_ci if (!p) 106062306a36Sopenharmony_ci return -ESRCH; 106162306a36Sopenharmony_ci 106262306a36Sopenharmony_ci WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid); 106362306a36Sopenharmony_ci r = kfd_process_evict_queues(p, trigger); 106462306a36Sopenharmony_ci 106562306a36Sopenharmony_ci kfd_unref_process(p); 106662306a36Sopenharmony_ci return r; 106762306a36Sopenharmony_ci} 106862306a36Sopenharmony_ci 106962306a36Sopenharmony_ciint kgd2kfd_resume_mm(struct mm_struct *mm) 107062306a36Sopenharmony_ci{ 107162306a36Sopenharmony_ci struct kfd_process *p; 107262306a36Sopenharmony_ci int r; 107362306a36Sopenharmony_ci 107462306a36Sopenharmony_ci /* Because we are called from arbitrary context (workqueue) as opposed 107562306a36Sopenharmony_ci * to process context, kfd_process could attempt to exit while we are 107662306a36Sopenharmony_ci * running so the lookup function increments the process ref count. 107762306a36Sopenharmony_ci */ 107862306a36Sopenharmony_ci p = kfd_lookup_process_by_mm(mm); 107962306a36Sopenharmony_ci if (!p) 108062306a36Sopenharmony_ci return -ESRCH; 108162306a36Sopenharmony_ci 108262306a36Sopenharmony_ci r = kfd_process_restore_queues(p); 108362306a36Sopenharmony_ci 108462306a36Sopenharmony_ci kfd_unref_process(p); 108562306a36Sopenharmony_ci return r; 108662306a36Sopenharmony_ci} 108762306a36Sopenharmony_ci 108862306a36Sopenharmony_ci/** kgd2kfd_schedule_evict_and_restore_process - Schedules work queue that will 108962306a36Sopenharmony_ci * prepare for safe eviction of KFD BOs that belong to the specified 109062306a36Sopenharmony_ci * process. 109162306a36Sopenharmony_ci * 109262306a36Sopenharmony_ci * @mm: mm_struct that identifies the specified KFD process 109362306a36Sopenharmony_ci * @fence: eviction fence attached to KFD process BOs 109462306a36Sopenharmony_ci * 109562306a36Sopenharmony_ci */ 109662306a36Sopenharmony_ciint kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, 109762306a36Sopenharmony_ci struct dma_fence *fence) 109862306a36Sopenharmony_ci{ 109962306a36Sopenharmony_ci struct kfd_process *p; 110062306a36Sopenharmony_ci unsigned long active_time; 110162306a36Sopenharmony_ci unsigned long delay_jiffies = msecs_to_jiffies(PROCESS_ACTIVE_TIME_MS); 110262306a36Sopenharmony_ci 110362306a36Sopenharmony_ci if (!fence) 110462306a36Sopenharmony_ci return -EINVAL; 110562306a36Sopenharmony_ci 110662306a36Sopenharmony_ci if (dma_fence_is_signaled(fence)) 110762306a36Sopenharmony_ci return 0; 110862306a36Sopenharmony_ci 110962306a36Sopenharmony_ci p = kfd_lookup_process_by_mm(mm); 111062306a36Sopenharmony_ci if (!p) 111162306a36Sopenharmony_ci return -ENODEV; 111262306a36Sopenharmony_ci 111362306a36Sopenharmony_ci if (fence->seqno == p->last_eviction_seqno) 111462306a36Sopenharmony_ci goto out; 111562306a36Sopenharmony_ci 111662306a36Sopenharmony_ci p->last_eviction_seqno = fence->seqno; 111762306a36Sopenharmony_ci 111862306a36Sopenharmony_ci /* Avoid KFD process starvation. Wait for at least 111962306a36Sopenharmony_ci * PROCESS_ACTIVE_TIME_MS before evicting the process again 112062306a36Sopenharmony_ci */ 112162306a36Sopenharmony_ci active_time = get_jiffies_64() - p->last_restore_timestamp; 112262306a36Sopenharmony_ci if (delay_jiffies > active_time) 112362306a36Sopenharmony_ci delay_jiffies -= active_time; 112462306a36Sopenharmony_ci else 112562306a36Sopenharmony_ci delay_jiffies = 0; 112662306a36Sopenharmony_ci 112762306a36Sopenharmony_ci /* During process initialization eviction_work.dwork is initialized 112862306a36Sopenharmony_ci * to kfd_evict_bo_worker 112962306a36Sopenharmony_ci */ 113062306a36Sopenharmony_ci WARN(debug_evictions, "Scheduling eviction of pid %d in %ld jiffies", 113162306a36Sopenharmony_ci p->lead_thread->pid, delay_jiffies); 113262306a36Sopenharmony_ci schedule_delayed_work(&p->eviction_work, delay_jiffies); 113362306a36Sopenharmony_ciout: 113462306a36Sopenharmony_ci kfd_unref_process(p); 113562306a36Sopenharmony_ci return 0; 113662306a36Sopenharmony_ci} 113762306a36Sopenharmony_ci 113862306a36Sopenharmony_cistatic int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, 113962306a36Sopenharmony_ci unsigned int chunk_size) 114062306a36Sopenharmony_ci{ 114162306a36Sopenharmony_ci if (WARN_ON(buf_size < chunk_size)) 114262306a36Sopenharmony_ci return -EINVAL; 114362306a36Sopenharmony_ci if (WARN_ON(buf_size == 0)) 114462306a36Sopenharmony_ci return -EINVAL; 114562306a36Sopenharmony_ci if (WARN_ON(chunk_size == 0)) 114662306a36Sopenharmony_ci return -EINVAL; 114762306a36Sopenharmony_ci 114862306a36Sopenharmony_ci kfd->gtt_sa_chunk_size = chunk_size; 114962306a36Sopenharmony_ci kfd->gtt_sa_num_of_chunks = buf_size / chunk_size; 115062306a36Sopenharmony_ci 115162306a36Sopenharmony_ci kfd->gtt_sa_bitmap = bitmap_zalloc(kfd->gtt_sa_num_of_chunks, 115262306a36Sopenharmony_ci GFP_KERNEL); 115362306a36Sopenharmony_ci if (!kfd->gtt_sa_bitmap) 115462306a36Sopenharmony_ci return -ENOMEM; 115562306a36Sopenharmony_ci 115662306a36Sopenharmony_ci pr_debug("gtt_sa_num_of_chunks = %d, gtt_sa_bitmap = %p\n", 115762306a36Sopenharmony_ci kfd->gtt_sa_num_of_chunks, kfd->gtt_sa_bitmap); 115862306a36Sopenharmony_ci 115962306a36Sopenharmony_ci mutex_init(&kfd->gtt_sa_lock); 116062306a36Sopenharmony_ci 116162306a36Sopenharmony_ci return 0; 116262306a36Sopenharmony_ci} 116362306a36Sopenharmony_ci 116462306a36Sopenharmony_cistatic void kfd_gtt_sa_fini(struct kfd_dev *kfd) 116562306a36Sopenharmony_ci{ 116662306a36Sopenharmony_ci mutex_destroy(&kfd->gtt_sa_lock); 116762306a36Sopenharmony_ci bitmap_free(kfd->gtt_sa_bitmap); 116862306a36Sopenharmony_ci} 116962306a36Sopenharmony_ci 117062306a36Sopenharmony_cistatic inline uint64_t kfd_gtt_sa_calc_gpu_addr(uint64_t start_addr, 117162306a36Sopenharmony_ci unsigned int bit_num, 117262306a36Sopenharmony_ci unsigned int chunk_size) 117362306a36Sopenharmony_ci{ 117462306a36Sopenharmony_ci return start_addr + bit_num * chunk_size; 117562306a36Sopenharmony_ci} 117662306a36Sopenharmony_ci 117762306a36Sopenharmony_cistatic inline uint32_t *kfd_gtt_sa_calc_cpu_addr(void *start_addr, 117862306a36Sopenharmony_ci unsigned int bit_num, 117962306a36Sopenharmony_ci unsigned int chunk_size) 118062306a36Sopenharmony_ci{ 118162306a36Sopenharmony_ci return (uint32_t *) ((uint64_t) start_addr + bit_num * chunk_size); 118262306a36Sopenharmony_ci} 118362306a36Sopenharmony_ci 118462306a36Sopenharmony_ciint kfd_gtt_sa_allocate(struct kfd_node *node, unsigned int size, 118562306a36Sopenharmony_ci struct kfd_mem_obj **mem_obj) 118662306a36Sopenharmony_ci{ 118762306a36Sopenharmony_ci unsigned int found, start_search, cur_size; 118862306a36Sopenharmony_ci struct kfd_dev *kfd = node->kfd; 118962306a36Sopenharmony_ci 119062306a36Sopenharmony_ci if (size == 0) 119162306a36Sopenharmony_ci return -EINVAL; 119262306a36Sopenharmony_ci 119362306a36Sopenharmony_ci if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size) 119462306a36Sopenharmony_ci return -ENOMEM; 119562306a36Sopenharmony_ci 119662306a36Sopenharmony_ci *mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL); 119762306a36Sopenharmony_ci if (!(*mem_obj)) 119862306a36Sopenharmony_ci return -ENOMEM; 119962306a36Sopenharmony_ci 120062306a36Sopenharmony_ci pr_debug("Allocated mem_obj = %p for size = %d\n", *mem_obj, size); 120162306a36Sopenharmony_ci 120262306a36Sopenharmony_ci start_search = 0; 120362306a36Sopenharmony_ci 120462306a36Sopenharmony_ci mutex_lock(&kfd->gtt_sa_lock); 120562306a36Sopenharmony_ci 120662306a36Sopenharmony_cikfd_gtt_restart_search: 120762306a36Sopenharmony_ci /* Find the first chunk that is free */ 120862306a36Sopenharmony_ci found = find_next_zero_bit(kfd->gtt_sa_bitmap, 120962306a36Sopenharmony_ci kfd->gtt_sa_num_of_chunks, 121062306a36Sopenharmony_ci start_search); 121162306a36Sopenharmony_ci 121262306a36Sopenharmony_ci pr_debug("Found = %d\n", found); 121362306a36Sopenharmony_ci 121462306a36Sopenharmony_ci /* If there wasn't any free chunk, bail out */ 121562306a36Sopenharmony_ci if (found == kfd->gtt_sa_num_of_chunks) 121662306a36Sopenharmony_ci goto kfd_gtt_no_free_chunk; 121762306a36Sopenharmony_ci 121862306a36Sopenharmony_ci /* Update fields of mem_obj */ 121962306a36Sopenharmony_ci (*mem_obj)->range_start = found; 122062306a36Sopenharmony_ci (*mem_obj)->range_end = found; 122162306a36Sopenharmony_ci (*mem_obj)->gpu_addr = kfd_gtt_sa_calc_gpu_addr( 122262306a36Sopenharmony_ci kfd->gtt_start_gpu_addr, 122362306a36Sopenharmony_ci found, 122462306a36Sopenharmony_ci kfd->gtt_sa_chunk_size); 122562306a36Sopenharmony_ci (*mem_obj)->cpu_ptr = kfd_gtt_sa_calc_cpu_addr( 122662306a36Sopenharmony_ci kfd->gtt_start_cpu_ptr, 122762306a36Sopenharmony_ci found, 122862306a36Sopenharmony_ci kfd->gtt_sa_chunk_size); 122962306a36Sopenharmony_ci 123062306a36Sopenharmony_ci pr_debug("gpu_addr = %p, cpu_addr = %p\n", 123162306a36Sopenharmony_ci (uint64_t *) (*mem_obj)->gpu_addr, (*mem_obj)->cpu_ptr); 123262306a36Sopenharmony_ci 123362306a36Sopenharmony_ci /* If we need only one chunk, mark it as allocated and get out */ 123462306a36Sopenharmony_ci if (size <= kfd->gtt_sa_chunk_size) { 123562306a36Sopenharmony_ci pr_debug("Single bit\n"); 123662306a36Sopenharmony_ci __set_bit(found, kfd->gtt_sa_bitmap); 123762306a36Sopenharmony_ci goto kfd_gtt_out; 123862306a36Sopenharmony_ci } 123962306a36Sopenharmony_ci 124062306a36Sopenharmony_ci /* Otherwise, try to see if we have enough contiguous chunks */ 124162306a36Sopenharmony_ci cur_size = size - kfd->gtt_sa_chunk_size; 124262306a36Sopenharmony_ci do { 124362306a36Sopenharmony_ci (*mem_obj)->range_end = 124462306a36Sopenharmony_ci find_next_zero_bit(kfd->gtt_sa_bitmap, 124562306a36Sopenharmony_ci kfd->gtt_sa_num_of_chunks, ++found); 124662306a36Sopenharmony_ci /* 124762306a36Sopenharmony_ci * If next free chunk is not contiguous than we need to 124862306a36Sopenharmony_ci * restart our search from the last free chunk we found (which 124962306a36Sopenharmony_ci * wasn't contiguous to the previous ones 125062306a36Sopenharmony_ci */ 125162306a36Sopenharmony_ci if ((*mem_obj)->range_end != found) { 125262306a36Sopenharmony_ci start_search = found; 125362306a36Sopenharmony_ci goto kfd_gtt_restart_search; 125462306a36Sopenharmony_ci } 125562306a36Sopenharmony_ci 125662306a36Sopenharmony_ci /* 125762306a36Sopenharmony_ci * If we reached end of buffer, bail out with error 125862306a36Sopenharmony_ci */ 125962306a36Sopenharmony_ci if (found == kfd->gtt_sa_num_of_chunks) 126062306a36Sopenharmony_ci goto kfd_gtt_no_free_chunk; 126162306a36Sopenharmony_ci 126262306a36Sopenharmony_ci /* Check if we don't need another chunk */ 126362306a36Sopenharmony_ci if (cur_size <= kfd->gtt_sa_chunk_size) 126462306a36Sopenharmony_ci cur_size = 0; 126562306a36Sopenharmony_ci else 126662306a36Sopenharmony_ci cur_size -= kfd->gtt_sa_chunk_size; 126762306a36Sopenharmony_ci 126862306a36Sopenharmony_ci } while (cur_size > 0); 126962306a36Sopenharmony_ci 127062306a36Sopenharmony_ci pr_debug("range_start = %d, range_end = %d\n", 127162306a36Sopenharmony_ci (*mem_obj)->range_start, (*mem_obj)->range_end); 127262306a36Sopenharmony_ci 127362306a36Sopenharmony_ci /* Mark the chunks as allocated */ 127462306a36Sopenharmony_ci bitmap_set(kfd->gtt_sa_bitmap, (*mem_obj)->range_start, 127562306a36Sopenharmony_ci (*mem_obj)->range_end - (*mem_obj)->range_start + 1); 127662306a36Sopenharmony_ci 127762306a36Sopenharmony_cikfd_gtt_out: 127862306a36Sopenharmony_ci mutex_unlock(&kfd->gtt_sa_lock); 127962306a36Sopenharmony_ci return 0; 128062306a36Sopenharmony_ci 128162306a36Sopenharmony_cikfd_gtt_no_free_chunk: 128262306a36Sopenharmony_ci pr_debug("Allocation failed with mem_obj = %p\n", *mem_obj); 128362306a36Sopenharmony_ci mutex_unlock(&kfd->gtt_sa_lock); 128462306a36Sopenharmony_ci kfree(*mem_obj); 128562306a36Sopenharmony_ci return -ENOMEM; 128662306a36Sopenharmony_ci} 128762306a36Sopenharmony_ci 128862306a36Sopenharmony_ciint kfd_gtt_sa_free(struct kfd_node *node, struct kfd_mem_obj *mem_obj) 128962306a36Sopenharmony_ci{ 129062306a36Sopenharmony_ci struct kfd_dev *kfd = node->kfd; 129162306a36Sopenharmony_ci 129262306a36Sopenharmony_ci /* Act like kfree when trying to free a NULL object */ 129362306a36Sopenharmony_ci if (!mem_obj) 129462306a36Sopenharmony_ci return 0; 129562306a36Sopenharmony_ci 129662306a36Sopenharmony_ci pr_debug("Free mem_obj = %p, range_start = %d, range_end = %d\n", 129762306a36Sopenharmony_ci mem_obj, mem_obj->range_start, mem_obj->range_end); 129862306a36Sopenharmony_ci 129962306a36Sopenharmony_ci mutex_lock(&kfd->gtt_sa_lock); 130062306a36Sopenharmony_ci 130162306a36Sopenharmony_ci /* Mark the chunks as free */ 130262306a36Sopenharmony_ci bitmap_clear(kfd->gtt_sa_bitmap, mem_obj->range_start, 130362306a36Sopenharmony_ci mem_obj->range_end - mem_obj->range_start + 1); 130462306a36Sopenharmony_ci 130562306a36Sopenharmony_ci mutex_unlock(&kfd->gtt_sa_lock); 130662306a36Sopenharmony_ci 130762306a36Sopenharmony_ci kfree(mem_obj); 130862306a36Sopenharmony_ci return 0; 130962306a36Sopenharmony_ci} 131062306a36Sopenharmony_ci 131162306a36Sopenharmony_civoid kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd) 131262306a36Sopenharmony_ci{ 131362306a36Sopenharmony_ci /* 131462306a36Sopenharmony_ci * TODO: Currently update SRAM ECC flag for first node. 131562306a36Sopenharmony_ci * This needs to be updated later when we can 131662306a36Sopenharmony_ci * identify SRAM ECC error on other nodes also. 131762306a36Sopenharmony_ci */ 131862306a36Sopenharmony_ci if (kfd) 131962306a36Sopenharmony_ci atomic_inc(&kfd->nodes[0]->sram_ecc_flag); 132062306a36Sopenharmony_ci} 132162306a36Sopenharmony_ci 132262306a36Sopenharmony_civoid kfd_inc_compute_active(struct kfd_node *node) 132362306a36Sopenharmony_ci{ 132462306a36Sopenharmony_ci if (atomic_inc_return(&node->kfd->compute_profile) == 1) 132562306a36Sopenharmony_ci amdgpu_amdkfd_set_compute_idle(node->adev, false); 132662306a36Sopenharmony_ci} 132762306a36Sopenharmony_ci 132862306a36Sopenharmony_civoid kfd_dec_compute_active(struct kfd_node *node) 132962306a36Sopenharmony_ci{ 133062306a36Sopenharmony_ci int count = atomic_dec_return(&node->kfd->compute_profile); 133162306a36Sopenharmony_ci 133262306a36Sopenharmony_ci if (count == 0) 133362306a36Sopenharmony_ci amdgpu_amdkfd_set_compute_idle(node->adev, true); 133462306a36Sopenharmony_ci WARN_ONCE(count < 0, "Compute profile ref. count error"); 133562306a36Sopenharmony_ci} 133662306a36Sopenharmony_ci 133762306a36Sopenharmony_civoid kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask) 133862306a36Sopenharmony_ci{ 133962306a36Sopenharmony_ci /* 134062306a36Sopenharmony_ci * TODO: For now, raise the throttling event only on first node. 134162306a36Sopenharmony_ci * This will need to change after we are able to determine 134262306a36Sopenharmony_ci * which node raised the throttling event. 134362306a36Sopenharmony_ci */ 134462306a36Sopenharmony_ci if (kfd && kfd->init_complete) 134562306a36Sopenharmony_ci kfd_smi_event_update_thermal_throttling(kfd->nodes[0], 134662306a36Sopenharmony_ci throttle_bitmask); 134762306a36Sopenharmony_ci} 134862306a36Sopenharmony_ci 134962306a36Sopenharmony_ci/* kfd_get_num_sdma_engines returns the number of PCIe optimized SDMA and 135062306a36Sopenharmony_ci * kfd_get_num_xgmi_sdma_engines returns the number of XGMI SDMA. 135162306a36Sopenharmony_ci * When the device has more than two engines, we reserve two for PCIe to enable 135262306a36Sopenharmony_ci * full-duplex and the rest are used as XGMI. 135362306a36Sopenharmony_ci */ 135462306a36Sopenharmony_ciunsigned int kfd_get_num_sdma_engines(struct kfd_node *node) 135562306a36Sopenharmony_ci{ 135662306a36Sopenharmony_ci /* If XGMI is not supported, all SDMA engines are PCIe */ 135762306a36Sopenharmony_ci if (!node->adev->gmc.xgmi.supported) 135862306a36Sopenharmony_ci return node->adev->sdma.num_instances/(int)node->kfd->num_nodes; 135962306a36Sopenharmony_ci 136062306a36Sopenharmony_ci return min(node->adev->sdma.num_instances/(int)node->kfd->num_nodes, 2); 136162306a36Sopenharmony_ci} 136262306a36Sopenharmony_ci 136362306a36Sopenharmony_ciunsigned int kfd_get_num_xgmi_sdma_engines(struct kfd_node *node) 136462306a36Sopenharmony_ci{ 136562306a36Sopenharmony_ci /* After reserved for PCIe, the rest of engines are XGMI */ 136662306a36Sopenharmony_ci return node->adev->sdma.num_instances/(int)node->kfd->num_nodes - 136762306a36Sopenharmony_ci kfd_get_num_sdma_engines(node); 136862306a36Sopenharmony_ci} 136962306a36Sopenharmony_ci 137062306a36Sopenharmony_ciint kgd2kfd_check_and_lock_kfd(void) 137162306a36Sopenharmony_ci{ 137262306a36Sopenharmony_ci mutex_lock(&kfd_processes_mutex); 137362306a36Sopenharmony_ci if (!hash_empty(kfd_processes_table) || kfd_is_locked()) { 137462306a36Sopenharmony_ci mutex_unlock(&kfd_processes_mutex); 137562306a36Sopenharmony_ci return -EBUSY; 137662306a36Sopenharmony_ci } 137762306a36Sopenharmony_ci 137862306a36Sopenharmony_ci ++kfd_locked; 137962306a36Sopenharmony_ci mutex_unlock(&kfd_processes_mutex); 138062306a36Sopenharmony_ci 138162306a36Sopenharmony_ci return 0; 138262306a36Sopenharmony_ci} 138362306a36Sopenharmony_ci 138462306a36Sopenharmony_civoid kgd2kfd_unlock_kfd(void) 138562306a36Sopenharmony_ci{ 138662306a36Sopenharmony_ci mutex_lock(&kfd_processes_mutex); 138762306a36Sopenharmony_ci --kfd_locked; 138862306a36Sopenharmony_ci mutex_unlock(&kfd_processes_mutex); 138962306a36Sopenharmony_ci} 139062306a36Sopenharmony_ci 139162306a36Sopenharmony_ci#if defined(CONFIG_DEBUG_FS) 139262306a36Sopenharmony_ci 139362306a36Sopenharmony_ci/* This function will send a package to HIQ to hang the HWS 139462306a36Sopenharmony_ci * which will trigger a GPU reset and bring the HWS back to normal state 139562306a36Sopenharmony_ci */ 139662306a36Sopenharmony_ciint kfd_debugfs_hang_hws(struct kfd_node *dev) 139762306a36Sopenharmony_ci{ 139862306a36Sopenharmony_ci if (dev->dqm->sched_policy != KFD_SCHED_POLICY_HWS) { 139962306a36Sopenharmony_ci pr_err("HWS is not enabled"); 140062306a36Sopenharmony_ci return -EINVAL; 140162306a36Sopenharmony_ci } 140262306a36Sopenharmony_ci 140362306a36Sopenharmony_ci return dqm_debugfs_hang_hws(dev->dqm); 140462306a36Sopenharmony_ci} 140562306a36Sopenharmony_ci 140662306a36Sopenharmony_ci#endif 1407