162306a36Sopenharmony_ci// SPDX-License-Identifier: MIT 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright 2014 Advanced Micro Devices, Inc. 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 662306a36Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 762306a36Sopenharmony_ci * to deal in the Software without restriction, including without limitation 862306a36Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 962306a36Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 1062306a36Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 1162306a36Sopenharmony_ci * 1262306a36Sopenharmony_ci * The above copyright notice and this permission notice shall be included in 1362306a36Sopenharmony_ci * all copies or substantial portions of the Software. 1462306a36Sopenharmony_ci * 1562306a36Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1662306a36Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1762306a36Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1862306a36Sopenharmony_ci * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 1962306a36Sopenharmony_ci * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 2062306a36Sopenharmony_ci * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 2162306a36Sopenharmony_ci * OTHER DEALINGS IN THE SOFTWARE. 2262306a36Sopenharmony_ci */ 2362306a36Sopenharmony_ci 2462306a36Sopenharmony_ci#include "amdgpu_amdkfd.h" 2562306a36Sopenharmony_ci#include "amd_pcie.h" 2662306a36Sopenharmony_ci#include "amd_shared.h" 2762306a36Sopenharmony_ci 2862306a36Sopenharmony_ci#include "amdgpu.h" 2962306a36Sopenharmony_ci#include "amdgpu_gfx.h" 3062306a36Sopenharmony_ci#include "amdgpu_dma_buf.h" 3162306a36Sopenharmony_ci#include <linux/module.h> 3262306a36Sopenharmony_ci#include <linux/dma-buf.h> 3362306a36Sopenharmony_ci#include "amdgpu_xgmi.h" 3462306a36Sopenharmony_ci#include <uapi/linux/kfd_ioctl.h> 3562306a36Sopenharmony_ci#include "amdgpu_ras.h" 3662306a36Sopenharmony_ci#include "amdgpu_umc.h" 3762306a36Sopenharmony_ci#include "amdgpu_reset.h" 3862306a36Sopenharmony_ci 3962306a36Sopenharmony_ci/* Total memory size in system memory and all GPU VRAM. Used to 4062306a36Sopenharmony_ci * estimate worst case amount of memory to reserve for page tables 4162306a36Sopenharmony_ci */ 4262306a36Sopenharmony_ciuint64_t amdgpu_amdkfd_total_mem_size; 4362306a36Sopenharmony_ci 4462306a36Sopenharmony_cistatic bool kfd_initialized; 4562306a36Sopenharmony_ci 4662306a36Sopenharmony_ciint amdgpu_amdkfd_init(void) 4762306a36Sopenharmony_ci{ 4862306a36Sopenharmony_ci struct sysinfo si; 4962306a36Sopenharmony_ci int ret; 5062306a36Sopenharmony_ci 5162306a36Sopenharmony_ci si_meminfo(&si); 5262306a36Sopenharmony_ci amdgpu_amdkfd_total_mem_size = si.freeram - si.freehigh; 5362306a36Sopenharmony_ci amdgpu_amdkfd_total_mem_size *= si.mem_unit; 5462306a36Sopenharmony_ci 5562306a36Sopenharmony_ci ret = kgd2kfd_init(); 5662306a36Sopenharmony_ci kfd_initialized = !ret; 5762306a36Sopenharmony_ci 5862306a36Sopenharmony_ci return ret; 5962306a36Sopenharmony_ci} 6062306a36Sopenharmony_ci 6162306a36Sopenharmony_civoid amdgpu_amdkfd_fini(void) 6262306a36Sopenharmony_ci{ 6362306a36Sopenharmony_ci if (kfd_initialized) { 6462306a36Sopenharmony_ci kgd2kfd_exit(); 6562306a36Sopenharmony_ci kfd_initialized = false; 6662306a36Sopenharmony_ci } 6762306a36Sopenharmony_ci} 6862306a36Sopenharmony_ci 6962306a36Sopenharmony_civoid amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) 7062306a36Sopenharmony_ci{ 7162306a36Sopenharmony_ci bool vf = amdgpu_sriov_vf(adev); 7262306a36Sopenharmony_ci 7362306a36Sopenharmony_ci if (!kfd_initialized) 7462306a36Sopenharmony_ci return; 7562306a36Sopenharmony_ci 7662306a36Sopenharmony_ci adev->kfd.dev = kgd2kfd_probe(adev, vf); 7762306a36Sopenharmony_ci} 7862306a36Sopenharmony_ci 7962306a36Sopenharmony_ci/** 8062306a36Sopenharmony_ci * amdgpu_doorbell_get_kfd_info - Report doorbell configuration required to 8162306a36Sopenharmony_ci * setup amdkfd 8262306a36Sopenharmony_ci * 8362306a36Sopenharmony_ci * @adev: amdgpu_device pointer 8462306a36Sopenharmony_ci * @aperture_base: output returning doorbell aperture base physical address 8562306a36Sopenharmony_ci * @aperture_size: output returning doorbell aperture size in bytes 8662306a36Sopenharmony_ci * @start_offset: output returning # of doorbell bytes reserved for amdgpu. 8762306a36Sopenharmony_ci * 8862306a36Sopenharmony_ci * amdgpu and amdkfd share the doorbell aperture. amdgpu sets it up, 8962306a36Sopenharmony_ci * takes doorbells required for its own rings and reports the setup to amdkfd. 9062306a36Sopenharmony_ci * amdgpu reserved doorbells are at the start of the doorbell aperture. 9162306a36Sopenharmony_ci */ 9262306a36Sopenharmony_cistatic void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev, 9362306a36Sopenharmony_ci phys_addr_t *aperture_base, 9462306a36Sopenharmony_ci size_t *aperture_size, 9562306a36Sopenharmony_ci size_t *start_offset) 9662306a36Sopenharmony_ci{ 9762306a36Sopenharmony_ci /* 9862306a36Sopenharmony_ci * The first num_kernel_doorbells are used by amdgpu. 9962306a36Sopenharmony_ci * amdkfd takes whatever's left in the aperture. 10062306a36Sopenharmony_ci */ 10162306a36Sopenharmony_ci if (adev->enable_mes) { 10262306a36Sopenharmony_ci /* 10362306a36Sopenharmony_ci * With MES enabled, we only need to initialize 10462306a36Sopenharmony_ci * the base address. The size and offset are 10562306a36Sopenharmony_ci * not initialized as AMDGPU manages the whole 10662306a36Sopenharmony_ci * doorbell space. 10762306a36Sopenharmony_ci */ 10862306a36Sopenharmony_ci *aperture_base = adev->doorbell.base; 10962306a36Sopenharmony_ci *aperture_size = 0; 11062306a36Sopenharmony_ci *start_offset = 0; 11162306a36Sopenharmony_ci } else if (adev->doorbell.size > adev->doorbell.num_kernel_doorbells * 11262306a36Sopenharmony_ci sizeof(u32)) { 11362306a36Sopenharmony_ci *aperture_base = adev->doorbell.base; 11462306a36Sopenharmony_ci *aperture_size = adev->doorbell.size; 11562306a36Sopenharmony_ci *start_offset = adev->doorbell.num_kernel_doorbells * sizeof(u32); 11662306a36Sopenharmony_ci } else { 11762306a36Sopenharmony_ci *aperture_base = 0; 11862306a36Sopenharmony_ci *aperture_size = 0; 11962306a36Sopenharmony_ci *start_offset = 0; 12062306a36Sopenharmony_ci } 12162306a36Sopenharmony_ci} 12262306a36Sopenharmony_ci 12362306a36Sopenharmony_ci 12462306a36Sopenharmony_cistatic void amdgpu_amdkfd_reset_work(struct work_struct *work) 12562306a36Sopenharmony_ci{ 12662306a36Sopenharmony_ci struct amdgpu_device *adev = container_of(work, struct amdgpu_device, 12762306a36Sopenharmony_ci kfd.reset_work); 12862306a36Sopenharmony_ci 12962306a36Sopenharmony_ci struct amdgpu_reset_context reset_context; 13062306a36Sopenharmony_ci 13162306a36Sopenharmony_ci memset(&reset_context, 0, sizeof(reset_context)); 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_ci reset_context.method = AMD_RESET_METHOD_NONE; 13462306a36Sopenharmony_ci reset_context.reset_req_dev = adev; 13562306a36Sopenharmony_ci clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); 13662306a36Sopenharmony_ci 13762306a36Sopenharmony_ci amdgpu_device_gpu_recover(adev, NULL, &reset_context); 13862306a36Sopenharmony_ci} 13962306a36Sopenharmony_ci 14062306a36Sopenharmony_civoid amdgpu_amdkfd_device_init(struct amdgpu_device *adev) 14162306a36Sopenharmony_ci{ 14262306a36Sopenharmony_ci int i; 14362306a36Sopenharmony_ci int last_valid_bit; 14462306a36Sopenharmony_ci 14562306a36Sopenharmony_ci amdgpu_amdkfd_gpuvm_init_mem_limits(); 14662306a36Sopenharmony_ci 14762306a36Sopenharmony_ci if (adev->kfd.dev) { 14862306a36Sopenharmony_ci struct kgd2kfd_shared_resources gpu_resources = { 14962306a36Sopenharmony_ci .compute_vmid_bitmap = 15062306a36Sopenharmony_ci ((1 << AMDGPU_NUM_VMID) - 1) - 15162306a36Sopenharmony_ci ((1 << adev->vm_manager.first_kfd_vmid) - 1), 15262306a36Sopenharmony_ci .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec, 15362306a36Sopenharmony_ci .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe, 15462306a36Sopenharmony_ci .gpuvm_size = min(adev->vm_manager.max_pfn 15562306a36Sopenharmony_ci << AMDGPU_GPU_PAGE_SHIFT, 15662306a36Sopenharmony_ci AMDGPU_GMC_HOLE_START), 15762306a36Sopenharmony_ci .drm_render_minor = adev_to_drm(adev)->render->index, 15862306a36Sopenharmony_ci .sdma_doorbell_idx = adev->doorbell_index.sdma_engine, 15962306a36Sopenharmony_ci .enable_mes = adev->enable_mes, 16062306a36Sopenharmony_ci }; 16162306a36Sopenharmony_ci 16262306a36Sopenharmony_ci /* this is going to have a few of the MSBs set that we need to 16362306a36Sopenharmony_ci * clear 16462306a36Sopenharmony_ci */ 16562306a36Sopenharmony_ci bitmap_complement(gpu_resources.cp_queue_bitmap, 16662306a36Sopenharmony_ci adev->gfx.mec_bitmap[0].queue_bitmap, 16762306a36Sopenharmony_ci KGD_MAX_QUEUES); 16862306a36Sopenharmony_ci 16962306a36Sopenharmony_ci /* According to linux/bitmap.h we shouldn't use bitmap_clear if 17062306a36Sopenharmony_ci * nbits is not compile time constant 17162306a36Sopenharmony_ci */ 17262306a36Sopenharmony_ci last_valid_bit = 1 /* only first MEC can have compute queues */ 17362306a36Sopenharmony_ci * adev->gfx.mec.num_pipe_per_mec 17462306a36Sopenharmony_ci * adev->gfx.mec.num_queue_per_pipe; 17562306a36Sopenharmony_ci for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i) 17662306a36Sopenharmony_ci clear_bit(i, gpu_resources.cp_queue_bitmap); 17762306a36Sopenharmony_ci 17862306a36Sopenharmony_ci amdgpu_doorbell_get_kfd_info(adev, 17962306a36Sopenharmony_ci &gpu_resources.doorbell_physical_address, 18062306a36Sopenharmony_ci &gpu_resources.doorbell_aperture_size, 18162306a36Sopenharmony_ci &gpu_resources.doorbell_start_offset); 18262306a36Sopenharmony_ci 18362306a36Sopenharmony_ci /* Since SOC15, BIF starts to statically use the 18462306a36Sopenharmony_ci * lower 12 bits of doorbell addresses for routing 18562306a36Sopenharmony_ci * based on settings in registers like 18662306a36Sopenharmony_ci * SDMA0_DOORBELL_RANGE etc.. 18762306a36Sopenharmony_ci * In order to route a doorbell to CP engine, the lower 18862306a36Sopenharmony_ci * 12 bits of its address has to be outside the range 18962306a36Sopenharmony_ci * set for SDMA, VCN, and IH blocks. 19062306a36Sopenharmony_ci */ 19162306a36Sopenharmony_ci if (adev->asic_type >= CHIP_VEGA10) { 19262306a36Sopenharmony_ci gpu_resources.non_cp_doorbells_start = 19362306a36Sopenharmony_ci adev->doorbell_index.first_non_cp; 19462306a36Sopenharmony_ci gpu_resources.non_cp_doorbells_end = 19562306a36Sopenharmony_ci adev->doorbell_index.last_non_cp; 19662306a36Sopenharmony_ci } 19762306a36Sopenharmony_ci 19862306a36Sopenharmony_ci adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev, 19962306a36Sopenharmony_ci &gpu_resources); 20062306a36Sopenharmony_ci 20162306a36Sopenharmony_ci amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size; 20262306a36Sopenharmony_ci 20362306a36Sopenharmony_ci INIT_WORK(&adev->kfd.reset_work, amdgpu_amdkfd_reset_work); 20462306a36Sopenharmony_ci } 20562306a36Sopenharmony_ci} 20662306a36Sopenharmony_ci 20762306a36Sopenharmony_civoid amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev) 20862306a36Sopenharmony_ci{ 20962306a36Sopenharmony_ci if (adev->kfd.dev) { 21062306a36Sopenharmony_ci kgd2kfd_device_exit(adev->kfd.dev); 21162306a36Sopenharmony_ci adev->kfd.dev = NULL; 21262306a36Sopenharmony_ci amdgpu_amdkfd_total_mem_size -= adev->gmc.real_vram_size; 21362306a36Sopenharmony_ci } 21462306a36Sopenharmony_ci} 21562306a36Sopenharmony_ci 21662306a36Sopenharmony_civoid amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, 21762306a36Sopenharmony_ci const void *ih_ring_entry) 21862306a36Sopenharmony_ci{ 21962306a36Sopenharmony_ci if (adev->kfd.dev) 22062306a36Sopenharmony_ci kgd2kfd_interrupt(adev->kfd.dev, ih_ring_entry); 22162306a36Sopenharmony_ci} 22262306a36Sopenharmony_ci 22362306a36Sopenharmony_civoid amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm) 22462306a36Sopenharmony_ci{ 22562306a36Sopenharmony_ci if (adev->kfd.dev) 22662306a36Sopenharmony_ci kgd2kfd_suspend(adev->kfd.dev, run_pm); 22762306a36Sopenharmony_ci} 22862306a36Sopenharmony_ci 22962306a36Sopenharmony_ciint amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm) 23062306a36Sopenharmony_ci{ 23162306a36Sopenharmony_ci int r = 0; 23262306a36Sopenharmony_ci 23362306a36Sopenharmony_ci if (adev->kfd.dev) 23462306a36Sopenharmony_ci r = kgd2kfd_resume(adev->kfd.dev, run_pm); 23562306a36Sopenharmony_ci 23662306a36Sopenharmony_ci return r; 23762306a36Sopenharmony_ci} 23862306a36Sopenharmony_ci 23962306a36Sopenharmony_ciint amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev) 24062306a36Sopenharmony_ci{ 24162306a36Sopenharmony_ci int r = 0; 24262306a36Sopenharmony_ci 24362306a36Sopenharmony_ci if (adev->kfd.dev) 24462306a36Sopenharmony_ci r = kgd2kfd_pre_reset(adev->kfd.dev); 24562306a36Sopenharmony_ci 24662306a36Sopenharmony_ci return r; 24762306a36Sopenharmony_ci} 24862306a36Sopenharmony_ci 24962306a36Sopenharmony_ciint amdgpu_amdkfd_post_reset(struct amdgpu_device *adev) 25062306a36Sopenharmony_ci{ 25162306a36Sopenharmony_ci int r = 0; 25262306a36Sopenharmony_ci 25362306a36Sopenharmony_ci if (adev->kfd.dev) 25462306a36Sopenharmony_ci r = kgd2kfd_post_reset(adev->kfd.dev); 25562306a36Sopenharmony_ci 25662306a36Sopenharmony_ci return r; 25762306a36Sopenharmony_ci} 25862306a36Sopenharmony_ci 25962306a36Sopenharmony_civoid amdgpu_amdkfd_gpu_reset(struct amdgpu_device *adev) 26062306a36Sopenharmony_ci{ 26162306a36Sopenharmony_ci if (amdgpu_device_should_recover_gpu(adev)) 26262306a36Sopenharmony_ci amdgpu_reset_domain_schedule(adev->reset_domain, 26362306a36Sopenharmony_ci &adev->kfd.reset_work); 26462306a36Sopenharmony_ci} 26562306a36Sopenharmony_ci 26662306a36Sopenharmony_ciint amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size, 26762306a36Sopenharmony_ci void **mem_obj, uint64_t *gpu_addr, 26862306a36Sopenharmony_ci void **cpu_ptr, bool cp_mqd_gfx9) 26962306a36Sopenharmony_ci{ 27062306a36Sopenharmony_ci struct amdgpu_bo *bo = NULL; 27162306a36Sopenharmony_ci struct amdgpu_bo_param bp; 27262306a36Sopenharmony_ci int r; 27362306a36Sopenharmony_ci void *cpu_ptr_tmp = NULL; 27462306a36Sopenharmony_ci 27562306a36Sopenharmony_ci memset(&bp, 0, sizeof(bp)); 27662306a36Sopenharmony_ci bp.size = size; 27762306a36Sopenharmony_ci bp.byte_align = PAGE_SIZE; 27862306a36Sopenharmony_ci bp.domain = AMDGPU_GEM_DOMAIN_GTT; 27962306a36Sopenharmony_ci bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC; 28062306a36Sopenharmony_ci bp.type = ttm_bo_type_kernel; 28162306a36Sopenharmony_ci bp.resv = NULL; 28262306a36Sopenharmony_ci bp.bo_ptr_size = sizeof(struct amdgpu_bo); 28362306a36Sopenharmony_ci 28462306a36Sopenharmony_ci if (cp_mqd_gfx9) 28562306a36Sopenharmony_ci bp.flags |= AMDGPU_GEM_CREATE_CP_MQD_GFX9; 28662306a36Sopenharmony_ci 28762306a36Sopenharmony_ci r = amdgpu_bo_create(adev, &bp, &bo); 28862306a36Sopenharmony_ci if (r) { 28962306a36Sopenharmony_ci dev_err(adev->dev, 29062306a36Sopenharmony_ci "failed to allocate BO for amdkfd (%d)\n", r); 29162306a36Sopenharmony_ci return r; 29262306a36Sopenharmony_ci } 29362306a36Sopenharmony_ci 29462306a36Sopenharmony_ci /* map the buffer */ 29562306a36Sopenharmony_ci r = amdgpu_bo_reserve(bo, true); 29662306a36Sopenharmony_ci if (r) { 29762306a36Sopenharmony_ci dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r); 29862306a36Sopenharmony_ci goto allocate_mem_reserve_bo_failed; 29962306a36Sopenharmony_ci } 30062306a36Sopenharmony_ci 30162306a36Sopenharmony_ci r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); 30262306a36Sopenharmony_ci if (r) { 30362306a36Sopenharmony_ci dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r); 30462306a36Sopenharmony_ci goto allocate_mem_pin_bo_failed; 30562306a36Sopenharmony_ci } 30662306a36Sopenharmony_ci 30762306a36Sopenharmony_ci r = amdgpu_ttm_alloc_gart(&bo->tbo); 30862306a36Sopenharmony_ci if (r) { 30962306a36Sopenharmony_ci dev_err(adev->dev, "%p bind failed\n", bo); 31062306a36Sopenharmony_ci goto allocate_mem_kmap_bo_failed; 31162306a36Sopenharmony_ci } 31262306a36Sopenharmony_ci 31362306a36Sopenharmony_ci r = amdgpu_bo_kmap(bo, &cpu_ptr_tmp); 31462306a36Sopenharmony_ci if (r) { 31562306a36Sopenharmony_ci dev_err(adev->dev, 31662306a36Sopenharmony_ci "(%d) failed to map bo to kernel for amdkfd\n", r); 31762306a36Sopenharmony_ci goto allocate_mem_kmap_bo_failed; 31862306a36Sopenharmony_ci } 31962306a36Sopenharmony_ci 32062306a36Sopenharmony_ci *mem_obj = bo; 32162306a36Sopenharmony_ci *gpu_addr = amdgpu_bo_gpu_offset(bo); 32262306a36Sopenharmony_ci *cpu_ptr = cpu_ptr_tmp; 32362306a36Sopenharmony_ci 32462306a36Sopenharmony_ci amdgpu_bo_unreserve(bo); 32562306a36Sopenharmony_ci 32662306a36Sopenharmony_ci return 0; 32762306a36Sopenharmony_ci 32862306a36Sopenharmony_ciallocate_mem_kmap_bo_failed: 32962306a36Sopenharmony_ci amdgpu_bo_unpin(bo); 33062306a36Sopenharmony_ciallocate_mem_pin_bo_failed: 33162306a36Sopenharmony_ci amdgpu_bo_unreserve(bo); 33262306a36Sopenharmony_ciallocate_mem_reserve_bo_failed: 33362306a36Sopenharmony_ci amdgpu_bo_unref(&bo); 33462306a36Sopenharmony_ci 33562306a36Sopenharmony_ci return r; 33662306a36Sopenharmony_ci} 33762306a36Sopenharmony_ci 33862306a36Sopenharmony_civoid amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void *mem_obj) 33962306a36Sopenharmony_ci{ 34062306a36Sopenharmony_ci struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj; 34162306a36Sopenharmony_ci 34262306a36Sopenharmony_ci amdgpu_bo_reserve(bo, true); 34362306a36Sopenharmony_ci amdgpu_bo_kunmap(bo); 34462306a36Sopenharmony_ci amdgpu_bo_unpin(bo); 34562306a36Sopenharmony_ci amdgpu_bo_unreserve(bo); 34662306a36Sopenharmony_ci amdgpu_bo_unref(&(bo)); 34762306a36Sopenharmony_ci} 34862306a36Sopenharmony_ci 34962306a36Sopenharmony_ciint amdgpu_amdkfd_alloc_gws(struct amdgpu_device *adev, size_t size, 35062306a36Sopenharmony_ci void **mem_obj) 35162306a36Sopenharmony_ci{ 35262306a36Sopenharmony_ci struct amdgpu_bo *bo = NULL; 35362306a36Sopenharmony_ci struct amdgpu_bo_user *ubo; 35462306a36Sopenharmony_ci struct amdgpu_bo_param bp; 35562306a36Sopenharmony_ci int r; 35662306a36Sopenharmony_ci 35762306a36Sopenharmony_ci memset(&bp, 0, sizeof(bp)); 35862306a36Sopenharmony_ci bp.size = size; 35962306a36Sopenharmony_ci bp.byte_align = 1; 36062306a36Sopenharmony_ci bp.domain = AMDGPU_GEM_DOMAIN_GWS; 36162306a36Sopenharmony_ci bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS; 36262306a36Sopenharmony_ci bp.type = ttm_bo_type_device; 36362306a36Sopenharmony_ci bp.resv = NULL; 36462306a36Sopenharmony_ci bp.bo_ptr_size = sizeof(struct amdgpu_bo); 36562306a36Sopenharmony_ci 36662306a36Sopenharmony_ci r = amdgpu_bo_create_user(adev, &bp, &ubo); 36762306a36Sopenharmony_ci if (r) { 36862306a36Sopenharmony_ci dev_err(adev->dev, 36962306a36Sopenharmony_ci "failed to allocate gws BO for amdkfd (%d)\n", r); 37062306a36Sopenharmony_ci return r; 37162306a36Sopenharmony_ci } 37262306a36Sopenharmony_ci 37362306a36Sopenharmony_ci bo = &ubo->bo; 37462306a36Sopenharmony_ci *mem_obj = bo; 37562306a36Sopenharmony_ci return 0; 37662306a36Sopenharmony_ci} 37762306a36Sopenharmony_ci 37862306a36Sopenharmony_civoid amdgpu_amdkfd_free_gws(struct amdgpu_device *adev, void *mem_obj) 37962306a36Sopenharmony_ci{ 38062306a36Sopenharmony_ci struct amdgpu_bo *bo = (struct amdgpu_bo *)mem_obj; 38162306a36Sopenharmony_ci 38262306a36Sopenharmony_ci amdgpu_bo_unref(&bo); 38362306a36Sopenharmony_ci} 38462306a36Sopenharmony_ci 38562306a36Sopenharmony_ciuint32_t amdgpu_amdkfd_get_fw_version(struct amdgpu_device *adev, 38662306a36Sopenharmony_ci enum kgd_engine_type type) 38762306a36Sopenharmony_ci{ 38862306a36Sopenharmony_ci switch (type) { 38962306a36Sopenharmony_ci case KGD_ENGINE_PFP: 39062306a36Sopenharmony_ci return adev->gfx.pfp_fw_version; 39162306a36Sopenharmony_ci 39262306a36Sopenharmony_ci case KGD_ENGINE_ME: 39362306a36Sopenharmony_ci return adev->gfx.me_fw_version; 39462306a36Sopenharmony_ci 39562306a36Sopenharmony_ci case KGD_ENGINE_CE: 39662306a36Sopenharmony_ci return adev->gfx.ce_fw_version; 39762306a36Sopenharmony_ci 39862306a36Sopenharmony_ci case KGD_ENGINE_MEC1: 39962306a36Sopenharmony_ci return adev->gfx.mec_fw_version; 40062306a36Sopenharmony_ci 40162306a36Sopenharmony_ci case KGD_ENGINE_MEC2: 40262306a36Sopenharmony_ci return adev->gfx.mec2_fw_version; 40362306a36Sopenharmony_ci 40462306a36Sopenharmony_ci case KGD_ENGINE_RLC: 40562306a36Sopenharmony_ci return adev->gfx.rlc_fw_version; 40662306a36Sopenharmony_ci 40762306a36Sopenharmony_ci case KGD_ENGINE_SDMA1: 40862306a36Sopenharmony_ci return adev->sdma.instance[0].fw_version; 40962306a36Sopenharmony_ci 41062306a36Sopenharmony_ci case KGD_ENGINE_SDMA2: 41162306a36Sopenharmony_ci return adev->sdma.instance[1].fw_version; 41262306a36Sopenharmony_ci 41362306a36Sopenharmony_ci default: 41462306a36Sopenharmony_ci return 0; 41562306a36Sopenharmony_ci } 41662306a36Sopenharmony_ci 41762306a36Sopenharmony_ci return 0; 41862306a36Sopenharmony_ci} 41962306a36Sopenharmony_ci 42062306a36Sopenharmony_civoid amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev, 42162306a36Sopenharmony_ci struct kfd_local_mem_info *mem_info, 42262306a36Sopenharmony_ci struct amdgpu_xcp *xcp) 42362306a36Sopenharmony_ci{ 42462306a36Sopenharmony_ci memset(mem_info, 0, sizeof(*mem_info)); 42562306a36Sopenharmony_ci 42662306a36Sopenharmony_ci if (xcp) { 42762306a36Sopenharmony_ci if (adev->gmc.real_vram_size == adev->gmc.visible_vram_size) 42862306a36Sopenharmony_ci mem_info->local_mem_size_public = 42962306a36Sopenharmony_ci KFD_XCP_MEMORY_SIZE(adev, xcp->id); 43062306a36Sopenharmony_ci else 43162306a36Sopenharmony_ci mem_info->local_mem_size_private = 43262306a36Sopenharmony_ci KFD_XCP_MEMORY_SIZE(adev, xcp->id); 43362306a36Sopenharmony_ci } else { 43462306a36Sopenharmony_ci mem_info->local_mem_size_public = adev->gmc.visible_vram_size; 43562306a36Sopenharmony_ci mem_info->local_mem_size_private = adev->gmc.real_vram_size - 43662306a36Sopenharmony_ci adev->gmc.visible_vram_size; 43762306a36Sopenharmony_ci } 43862306a36Sopenharmony_ci mem_info->vram_width = adev->gmc.vram_width; 43962306a36Sopenharmony_ci 44062306a36Sopenharmony_ci pr_debug("Address base: %pap public 0x%llx private 0x%llx\n", 44162306a36Sopenharmony_ci &adev->gmc.aper_base, 44262306a36Sopenharmony_ci mem_info->local_mem_size_public, 44362306a36Sopenharmony_ci mem_info->local_mem_size_private); 44462306a36Sopenharmony_ci 44562306a36Sopenharmony_ci if (adev->pm.dpm_enabled) { 44662306a36Sopenharmony_ci if (amdgpu_emu_mode == 1) 44762306a36Sopenharmony_ci mem_info->mem_clk_max = 0; 44862306a36Sopenharmony_ci else 44962306a36Sopenharmony_ci mem_info->mem_clk_max = amdgpu_dpm_get_mclk(adev, false) / 100; 45062306a36Sopenharmony_ci } else 45162306a36Sopenharmony_ci mem_info->mem_clk_max = 100; 45262306a36Sopenharmony_ci} 45362306a36Sopenharmony_ci 45462306a36Sopenharmony_ciuint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct amdgpu_device *adev) 45562306a36Sopenharmony_ci{ 45662306a36Sopenharmony_ci if (adev->gfx.funcs->get_gpu_clock_counter) 45762306a36Sopenharmony_ci return adev->gfx.funcs->get_gpu_clock_counter(adev); 45862306a36Sopenharmony_ci return 0; 45962306a36Sopenharmony_ci} 46062306a36Sopenharmony_ci 46162306a36Sopenharmony_ciuint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct amdgpu_device *adev) 46262306a36Sopenharmony_ci{ 46362306a36Sopenharmony_ci /* the sclk is in quantas of 10kHz */ 46462306a36Sopenharmony_ci if (adev->pm.dpm_enabled) 46562306a36Sopenharmony_ci return amdgpu_dpm_get_sclk(adev, false) / 100; 46662306a36Sopenharmony_ci else 46762306a36Sopenharmony_ci return 100; 46862306a36Sopenharmony_ci} 46962306a36Sopenharmony_ci 47062306a36Sopenharmony_civoid amdgpu_amdkfd_get_cu_info(struct amdgpu_device *adev, struct kfd_cu_info *cu_info) 47162306a36Sopenharmony_ci{ 47262306a36Sopenharmony_ci struct amdgpu_cu_info acu_info = adev->gfx.cu_info; 47362306a36Sopenharmony_ci 47462306a36Sopenharmony_ci memset(cu_info, 0, sizeof(*cu_info)); 47562306a36Sopenharmony_ci if (sizeof(cu_info->cu_bitmap) != sizeof(acu_info.bitmap)) 47662306a36Sopenharmony_ci return; 47762306a36Sopenharmony_ci 47862306a36Sopenharmony_ci cu_info->cu_active_number = acu_info.number; 47962306a36Sopenharmony_ci cu_info->cu_ao_mask = acu_info.ao_cu_mask; 48062306a36Sopenharmony_ci memcpy(&cu_info->cu_bitmap[0], &acu_info.bitmap[0], 48162306a36Sopenharmony_ci sizeof(cu_info->cu_bitmap)); 48262306a36Sopenharmony_ci cu_info->num_shader_engines = adev->gfx.config.max_shader_engines; 48362306a36Sopenharmony_ci cu_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se; 48462306a36Sopenharmony_ci cu_info->num_cu_per_sh = adev->gfx.config.max_cu_per_sh; 48562306a36Sopenharmony_ci cu_info->simd_per_cu = acu_info.simd_per_cu; 48662306a36Sopenharmony_ci cu_info->max_waves_per_simd = acu_info.max_waves_per_simd; 48762306a36Sopenharmony_ci cu_info->wave_front_size = acu_info.wave_front_size; 48862306a36Sopenharmony_ci cu_info->max_scratch_slots_per_cu = acu_info.max_scratch_slots_per_cu; 48962306a36Sopenharmony_ci cu_info->lds_size = acu_info.lds_size; 49062306a36Sopenharmony_ci} 49162306a36Sopenharmony_ci 49262306a36Sopenharmony_ciint amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd, 49362306a36Sopenharmony_ci struct amdgpu_device **dmabuf_adev, 49462306a36Sopenharmony_ci uint64_t *bo_size, void *metadata_buffer, 49562306a36Sopenharmony_ci size_t buffer_size, uint32_t *metadata_size, 49662306a36Sopenharmony_ci uint32_t *flags, int8_t *xcp_id) 49762306a36Sopenharmony_ci{ 49862306a36Sopenharmony_ci struct dma_buf *dma_buf; 49962306a36Sopenharmony_ci struct drm_gem_object *obj; 50062306a36Sopenharmony_ci struct amdgpu_bo *bo; 50162306a36Sopenharmony_ci uint64_t metadata_flags; 50262306a36Sopenharmony_ci int r = -EINVAL; 50362306a36Sopenharmony_ci 50462306a36Sopenharmony_ci dma_buf = dma_buf_get(dma_buf_fd); 50562306a36Sopenharmony_ci if (IS_ERR(dma_buf)) 50662306a36Sopenharmony_ci return PTR_ERR(dma_buf); 50762306a36Sopenharmony_ci 50862306a36Sopenharmony_ci if (dma_buf->ops != &amdgpu_dmabuf_ops) 50962306a36Sopenharmony_ci /* Can't handle non-graphics buffers */ 51062306a36Sopenharmony_ci goto out_put; 51162306a36Sopenharmony_ci 51262306a36Sopenharmony_ci obj = dma_buf->priv; 51362306a36Sopenharmony_ci if (obj->dev->driver != adev_to_drm(adev)->driver) 51462306a36Sopenharmony_ci /* Can't handle buffers from different drivers */ 51562306a36Sopenharmony_ci goto out_put; 51662306a36Sopenharmony_ci 51762306a36Sopenharmony_ci adev = drm_to_adev(obj->dev); 51862306a36Sopenharmony_ci bo = gem_to_amdgpu_bo(obj); 51962306a36Sopenharmony_ci if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM | 52062306a36Sopenharmony_ci AMDGPU_GEM_DOMAIN_GTT))) 52162306a36Sopenharmony_ci /* Only VRAM and GTT BOs are supported */ 52262306a36Sopenharmony_ci goto out_put; 52362306a36Sopenharmony_ci 52462306a36Sopenharmony_ci r = 0; 52562306a36Sopenharmony_ci if (dmabuf_adev) 52662306a36Sopenharmony_ci *dmabuf_adev = adev; 52762306a36Sopenharmony_ci if (bo_size) 52862306a36Sopenharmony_ci *bo_size = amdgpu_bo_size(bo); 52962306a36Sopenharmony_ci if (metadata_buffer) 53062306a36Sopenharmony_ci r = amdgpu_bo_get_metadata(bo, metadata_buffer, buffer_size, 53162306a36Sopenharmony_ci metadata_size, &metadata_flags); 53262306a36Sopenharmony_ci if (flags) { 53362306a36Sopenharmony_ci *flags = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? 53462306a36Sopenharmony_ci KFD_IOC_ALLOC_MEM_FLAGS_VRAM 53562306a36Sopenharmony_ci : KFD_IOC_ALLOC_MEM_FLAGS_GTT; 53662306a36Sopenharmony_ci 53762306a36Sopenharmony_ci if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) 53862306a36Sopenharmony_ci *flags |= KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC; 53962306a36Sopenharmony_ci } 54062306a36Sopenharmony_ci if (xcp_id) 54162306a36Sopenharmony_ci *xcp_id = bo->xcp_id; 54262306a36Sopenharmony_ci 54362306a36Sopenharmony_ciout_put: 54462306a36Sopenharmony_ci dma_buf_put(dma_buf); 54562306a36Sopenharmony_ci return r; 54662306a36Sopenharmony_ci} 54762306a36Sopenharmony_ci 54862306a36Sopenharmony_ciuint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct amdgpu_device *dst, 54962306a36Sopenharmony_ci struct amdgpu_device *src) 55062306a36Sopenharmony_ci{ 55162306a36Sopenharmony_ci struct amdgpu_device *peer_adev = src; 55262306a36Sopenharmony_ci struct amdgpu_device *adev = dst; 55362306a36Sopenharmony_ci int ret = amdgpu_xgmi_get_hops_count(adev, peer_adev); 55462306a36Sopenharmony_ci 55562306a36Sopenharmony_ci if (ret < 0) { 55662306a36Sopenharmony_ci DRM_ERROR("amdgpu: failed to get xgmi hops count between node %d and %d. ret = %d\n", 55762306a36Sopenharmony_ci adev->gmc.xgmi.physical_node_id, 55862306a36Sopenharmony_ci peer_adev->gmc.xgmi.physical_node_id, ret); 55962306a36Sopenharmony_ci ret = 0; 56062306a36Sopenharmony_ci } 56162306a36Sopenharmony_ci return (uint8_t)ret; 56262306a36Sopenharmony_ci} 56362306a36Sopenharmony_ci 56462306a36Sopenharmony_ciint amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct amdgpu_device *dst, 56562306a36Sopenharmony_ci struct amdgpu_device *src, 56662306a36Sopenharmony_ci bool is_min) 56762306a36Sopenharmony_ci{ 56862306a36Sopenharmony_ci struct amdgpu_device *adev = dst, *peer_adev; 56962306a36Sopenharmony_ci int num_links; 57062306a36Sopenharmony_ci 57162306a36Sopenharmony_ci if (adev->asic_type != CHIP_ALDEBARAN) 57262306a36Sopenharmony_ci return 0; 57362306a36Sopenharmony_ci 57462306a36Sopenharmony_ci if (src) 57562306a36Sopenharmony_ci peer_adev = src; 57662306a36Sopenharmony_ci 57762306a36Sopenharmony_ci /* num links returns 0 for indirect peers since indirect route is unknown. */ 57862306a36Sopenharmony_ci num_links = is_min ? 1 : amdgpu_xgmi_get_num_links(adev, peer_adev); 57962306a36Sopenharmony_ci if (num_links < 0) { 58062306a36Sopenharmony_ci DRM_ERROR("amdgpu: failed to get xgmi num links between node %d and %d. ret = %d\n", 58162306a36Sopenharmony_ci adev->gmc.xgmi.physical_node_id, 58262306a36Sopenharmony_ci peer_adev->gmc.xgmi.physical_node_id, num_links); 58362306a36Sopenharmony_ci num_links = 0; 58462306a36Sopenharmony_ci } 58562306a36Sopenharmony_ci 58662306a36Sopenharmony_ci /* Aldebaran xGMI DPM is defeatured so assume x16 x 25Gbps for bandwidth. */ 58762306a36Sopenharmony_ci return (num_links * 16 * 25000)/BITS_PER_BYTE; 58862306a36Sopenharmony_ci} 58962306a36Sopenharmony_ci 59062306a36Sopenharmony_ciint amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_min) 59162306a36Sopenharmony_ci{ 59262306a36Sopenharmony_ci int num_lanes_shift = (is_min ? ffs(adev->pm.pcie_mlw_mask) : 59362306a36Sopenharmony_ci fls(adev->pm.pcie_mlw_mask)) - 1; 59462306a36Sopenharmony_ci int gen_speed_shift = (is_min ? ffs(adev->pm.pcie_gen_mask & 59562306a36Sopenharmony_ci CAIL_PCIE_LINK_SPEED_SUPPORT_MASK) : 59662306a36Sopenharmony_ci fls(adev->pm.pcie_gen_mask & 59762306a36Sopenharmony_ci CAIL_PCIE_LINK_SPEED_SUPPORT_MASK)) - 1; 59862306a36Sopenharmony_ci uint32_t num_lanes_mask = 1 << num_lanes_shift; 59962306a36Sopenharmony_ci uint32_t gen_speed_mask = 1 << gen_speed_shift; 60062306a36Sopenharmony_ci int num_lanes_factor = 0, gen_speed_mbits_factor = 0; 60162306a36Sopenharmony_ci 60262306a36Sopenharmony_ci switch (num_lanes_mask) { 60362306a36Sopenharmony_ci case CAIL_PCIE_LINK_WIDTH_SUPPORT_X1: 60462306a36Sopenharmony_ci num_lanes_factor = 1; 60562306a36Sopenharmony_ci break; 60662306a36Sopenharmony_ci case CAIL_PCIE_LINK_WIDTH_SUPPORT_X2: 60762306a36Sopenharmony_ci num_lanes_factor = 2; 60862306a36Sopenharmony_ci break; 60962306a36Sopenharmony_ci case CAIL_PCIE_LINK_WIDTH_SUPPORT_X4: 61062306a36Sopenharmony_ci num_lanes_factor = 4; 61162306a36Sopenharmony_ci break; 61262306a36Sopenharmony_ci case CAIL_PCIE_LINK_WIDTH_SUPPORT_X8: 61362306a36Sopenharmony_ci num_lanes_factor = 8; 61462306a36Sopenharmony_ci break; 61562306a36Sopenharmony_ci case CAIL_PCIE_LINK_WIDTH_SUPPORT_X12: 61662306a36Sopenharmony_ci num_lanes_factor = 12; 61762306a36Sopenharmony_ci break; 61862306a36Sopenharmony_ci case CAIL_PCIE_LINK_WIDTH_SUPPORT_X16: 61962306a36Sopenharmony_ci num_lanes_factor = 16; 62062306a36Sopenharmony_ci break; 62162306a36Sopenharmony_ci case CAIL_PCIE_LINK_WIDTH_SUPPORT_X32: 62262306a36Sopenharmony_ci num_lanes_factor = 32; 62362306a36Sopenharmony_ci break; 62462306a36Sopenharmony_ci } 62562306a36Sopenharmony_ci 62662306a36Sopenharmony_ci switch (gen_speed_mask) { 62762306a36Sopenharmony_ci case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1: 62862306a36Sopenharmony_ci gen_speed_mbits_factor = 2500; 62962306a36Sopenharmony_ci break; 63062306a36Sopenharmony_ci case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2: 63162306a36Sopenharmony_ci gen_speed_mbits_factor = 5000; 63262306a36Sopenharmony_ci break; 63362306a36Sopenharmony_ci case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3: 63462306a36Sopenharmony_ci gen_speed_mbits_factor = 8000; 63562306a36Sopenharmony_ci break; 63662306a36Sopenharmony_ci case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4: 63762306a36Sopenharmony_ci gen_speed_mbits_factor = 16000; 63862306a36Sopenharmony_ci break; 63962306a36Sopenharmony_ci case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5: 64062306a36Sopenharmony_ci gen_speed_mbits_factor = 32000; 64162306a36Sopenharmony_ci break; 64262306a36Sopenharmony_ci } 64362306a36Sopenharmony_ci 64462306a36Sopenharmony_ci return (num_lanes_factor * gen_speed_mbits_factor)/BITS_PER_BYTE; 64562306a36Sopenharmony_ci} 64662306a36Sopenharmony_ci 64762306a36Sopenharmony_ciint amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev, 64862306a36Sopenharmony_ci enum kgd_engine_type engine, 64962306a36Sopenharmony_ci uint32_t vmid, uint64_t gpu_addr, 65062306a36Sopenharmony_ci uint32_t *ib_cmd, uint32_t ib_len) 65162306a36Sopenharmony_ci{ 65262306a36Sopenharmony_ci struct amdgpu_job *job; 65362306a36Sopenharmony_ci struct amdgpu_ib *ib; 65462306a36Sopenharmony_ci struct amdgpu_ring *ring; 65562306a36Sopenharmony_ci struct dma_fence *f = NULL; 65662306a36Sopenharmony_ci int ret; 65762306a36Sopenharmony_ci 65862306a36Sopenharmony_ci switch (engine) { 65962306a36Sopenharmony_ci case KGD_ENGINE_MEC1: 66062306a36Sopenharmony_ci ring = &adev->gfx.compute_ring[0]; 66162306a36Sopenharmony_ci break; 66262306a36Sopenharmony_ci case KGD_ENGINE_SDMA1: 66362306a36Sopenharmony_ci ring = &adev->sdma.instance[0].ring; 66462306a36Sopenharmony_ci break; 66562306a36Sopenharmony_ci case KGD_ENGINE_SDMA2: 66662306a36Sopenharmony_ci ring = &adev->sdma.instance[1].ring; 66762306a36Sopenharmony_ci break; 66862306a36Sopenharmony_ci default: 66962306a36Sopenharmony_ci pr_err("Invalid engine in IB submission: %d\n", engine); 67062306a36Sopenharmony_ci ret = -EINVAL; 67162306a36Sopenharmony_ci goto err; 67262306a36Sopenharmony_ci } 67362306a36Sopenharmony_ci 67462306a36Sopenharmony_ci ret = amdgpu_job_alloc(adev, NULL, NULL, NULL, 1, &job); 67562306a36Sopenharmony_ci if (ret) 67662306a36Sopenharmony_ci goto err; 67762306a36Sopenharmony_ci 67862306a36Sopenharmony_ci ib = &job->ibs[0]; 67962306a36Sopenharmony_ci memset(ib, 0, sizeof(struct amdgpu_ib)); 68062306a36Sopenharmony_ci 68162306a36Sopenharmony_ci ib->gpu_addr = gpu_addr; 68262306a36Sopenharmony_ci ib->ptr = ib_cmd; 68362306a36Sopenharmony_ci ib->length_dw = ib_len; 68462306a36Sopenharmony_ci /* This works for NO_HWS. TODO: need to handle without knowing VMID */ 68562306a36Sopenharmony_ci job->vmid = vmid; 68662306a36Sopenharmony_ci job->num_ibs = 1; 68762306a36Sopenharmony_ci 68862306a36Sopenharmony_ci ret = amdgpu_ib_schedule(ring, 1, ib, job, &f); 68962306a36Sopenharmony_ci 69062306a36Sopenharmony_ci if (ret) { 69162306a36Sopenharmony_ci DRM_ERROR("amdgpu: failed to schedule IB.\n"); 69262306a36Sopenharmony_ci goto err_ib_sched; 69362306a36Sopenharmony_ci } 69462306a36Sopenharmony_ci 69562306a36Sopenharmony_ci /* Drop the initial kref_init count (see drm_sched_main as example) */ 69662306a36Sopenharmony_ci dma_fence_put(f); 69762306a36Sopenharmony_ci ret = dma_fence_wait(f, false); 69862306a36Sopenharmony_ci 69962306a36Sopenharmony_cierr_ib_sched: 70062306a36Sopenharmony_ci amdgpu_job_free(job); 70162306a36Sopenharmony_cierr: 70262306a36Sopenharmony_ci return ret; 70362306a36Sopenharmony_ci} 70462306a36Sopenharmony_ci 70562306a36Sopenharmony_civoid amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle) 70662306a36Sopenharmony_ci{ 70762306a36Sopenharmony_ci /* Temporary workaround to fix issues observed in some 70862306a36Sopenharmony_ci * compute applications when GFXOFF is enabled on GFX11. 70962306a36Sopenharmony_ci */ 71062306a36Sopenharmony_ci if (IP_VERSION_MAJ(adev->ip_versions[GC_HWIP][0]) == 11) { 71162306a36Sopenharmony_ci pr_debug("GFXOFF is %s\n", idle ? "enabled" : "disabled"); 71262306a36Sopenharmony_ci amdgpu_gfx_off_ctrl(adev, idle); 71362306a36Sopenharmony_ci } 71462306a36Sopenharmony_ci amdgpu_dpm_switch_power_profile(adev, 71562306a36Sopenharmony_ci PP_SMC_POWER_PROFILE_COMPUTE, 71662306a36Sopenharmony_ci !idle); 71762306a36Sopenharmony_ci} 71862306a36Sopenharmony_ci 71962306a36Sopenharmony_cibool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) 72062306a36Sopenharmony_ci{ 72162306a36Sopenharmony_ci if (adev->kfd.dev) 72262306a36Sopenharmony_ci return vmid >= adev->vm_manager.first_kfd_vmid; 72362306a36Sopenharmony_ci 72462306a36Sopenharmony_ci return false; 72562306a36Sopenharmony_ci} 72662306a36Sopenharmony_ci 72762306a36Sopenharmony_ciint amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev, 72862306a36Sopenharmony_ci uint16_t vmid) 72962306a36Sopenharmony_ci{ 73062306a36Sopenharmony_ci if (adev->family == AMDGPU_FAMILY_AI) { 73162306a36Sopenharmony_ci int i; 73262306a36Sopenharmony_ci 73362306a36Sopenharmony_ci for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) 73462306a36Sopenharmony_ci amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0); 73562306a36Sopenharmony_ci } else { 73662306a36Sopenharmony_ci amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB(0), 0); 73762306a36Sopenharmony_ci } 73862306a36Sopenharmony_ci 73962306a36Sopenharmony_ci return 0; 74062306a36Sopenharmony_ci} 74162306a36Sopenharmony_ci 74262306a36Sopenharmony_ciint amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev, 74362306a36Sopenharmony_ci uint16_t pasid, 74462306a36Sopenharmony_ci enum TLB_FLUSH_TYPE flush_type, 74562306a36Sopenharmony_ci uint32_t inst) 74662306a36Sopenharmony_ci{ 74762306a36Sopenharmony_ci bool all_hub = false; 74862306a36Sopenharmony_ci 74962306a36Sopenharmony_ci if (adev->family == AMDGPU_FAMILY_AI || 75062306a36Sopenharmony_ci adev->family == AMDGPU_FAMILY_RV) 75162306a36Sopenharmony_ci all_hub = true; 75262306a36Sopenharmony_ci 75362306a36Sopenharmony_ci return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub, inst); 75462306a36Sopenharmony_ci} 75562306a36Sopenharmony_ci 75662306a36Sopenharmony_cibool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev) 75762306a36Sopenharmony_ci{ 75862306a36Sopenharmony_ci return adev->have_atomics_support; 75962306a36Sopenharmony_ci} 76062306a36Sopenharmony_ci 76162306a36Sopenharmony_civoid amdgpu_amdkfd_debug_mem_fence(struct amdgpu_device *adev) 76262306a36Sopenharmony_ci{ 76362306a36Sopenharmony_ci amdgpu_device_flush_hdp(adev, NULL); 76462306a36Sopenharmony_ci} 76562306a36Sopenharmony_ci 76662306a36Sopenharmony_civoid amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, bool reset) 76762306a36Sopenharmony_ci{ 76862306a36Sopenharmony_ci amdgpu_umc_poison_handler(adev, reset); 76962306a36Sopenharmony_ci} 77062306a36Sopenharmony_ci 77162306a36Sopenharmony_ciint amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev, 77262306a36Sopenharmony_ci uint32_t *payload) 77362306a36Sopenharmony_ci{ 77462306a36Sopenharmony_ci int ret; 77562306a36Sopenharmony_ci 77662306a36Sopenharmony_ci /* Device or IH ring is not ready so bail. */ 77762306a36Sopenharmony_ci ret = amdgpu_ih_wait_on_checkpoint_process_ts(adev, &adev->irq.ih); 77862306a36Sopenharmony_ci if (ret) 77962306a36Sopenharmony_ci return ret; 78062306a36Sopenharmony_ci 78162306a36Sopenharmony_ci /* Send payload to fence KFD interrupts */ 78262306a36Sopenharmony_ci amdgpu_amdkfd_interrupt(adev, payload); 78362306a36Sopenharmony_ci 78462306a36Sopenharmony_ci return 0; 78562306a36Sopenharmony_ci} 78662306a36Sopenharmony_ci 78762306a36Sopenharmony_cibool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev) 78862306a36Sopenharmony_ci{ 78962306a36Sopenharmony_ci if (adev->gfx.ras && adev->gfx.ras->query_utcl2_poison_status) 79062306a36Sopenharmony_ci return adev->gfx.ras->query_utcl2_poison_status(adev); 79162306a36Sopenharmony_ci else 79262306a36Sopenharmony_ci return false; 79362306a36Sopenharmony_ci} 79462306a36Sopenharmony_ci 79562306a36Sopenharmony_ciint amdgpu_amdkfd_check_and_lock_kfd(struct amdgpu_device *adev) 79662306a36Sopenharmony_ci{ 79762306a36Sopenharmony_ci return kgd2kfd_check_and_lock_kfd(); 79862306a36Sopenharmony_ci} 79962306a36Sopenharmony_ci 80062306a36Sopenharmony_civoid amdgpu_amdkfd_unlock_kfd(struct amdgpu_device *adev) 80162306a36Sopenharmony_ci{ 80262306a36Sopenharmony_ci kgd2kfd_unlock_kfd(); 80362306a36Sopenharmony_ci} 80462306a36Sopenharmony_ci 80562306a36Sopenharmony_ci 80662306a36Sopenharmony_ciu64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int xcp_id) 80762306a36Sopenharmony_ci{ 80862306a36Sopenharmony_ci u64 tmp; 80962306a36Sopenharmony_ci s8 mem_id = KFD_XCP_MEM_ID(adev, xcp_id); 81062306a36Sopenharmony_ci 81162306a36Sopenharmony_ci if (adev->gmc.num_mem_partitions && xcp_id >= 0 && mem_id >= 0) { 81262306a36Sopenharmony_ci tmp = adev->gmc.mem_partitions[mem_id].size; 81362306a36Sopenharmony_ci do_div(tmp, adev->xcp_mgr->num_xcp_per_mem_partition); 81462306a36Sopenharmony_ci return ALIGN_DOWN(tmp, PAGE_SIZE); 81562306a36Sopenharmony_ci } else { 81662306a36Sopenharmony_ci return adev->gmc.real_vram_size; 81762306a36Sopenharmony_ci } 81862306a36Sopenharmony_ci} 81962306a36Sopenharmony_ci 82062306a36Sopenharmony_ciint amdgpu_amdkfd_unmap_hiq(struct amdgpu_device *adev, u32 doorbell_off, 82162306a36Sopenharmony_ci u32 inst) 82262306a36Sopenharmony_ci{ 82362306a36Sopenharmony_ci struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst]; 82462306a36Sopenharmony_ci struct amdgpu_ring *kiq_ring = &kiq->ring; 82562306a36Sopenharmony_ci struct amdgpu_ring_funcs *ring_funcs; 82662306a36Sopenharmony_ci struct amdgpu_ring *ring; 82762306a36Sopenharmony_ci int r = 0; 82862306a36Sopenharmony_ci 82962306a36Sopenharmony_ci if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) 83062306a36Sopenharmony_ci return -EINVAL; 83162306a36Sopenharmony_ci 83262306a36Sopenharmony_ci ring_funcs = kzalloc(sizeof(*ring_funcs), GFP_KERNEL); 83362306a36Sopenharmony_ci if (!ring_funcs) 83462306a36Sopenharmony_ci return -ENOMEM; 83562306a36Sopenharmony_ci 83662306a36Sopenharmony_ci ring = kzalloc(sizeof(*ring), GFP_KERNEL); 83762306a36Sopenharmony_ci if (!ring) { 83862306a36Sopenharmony_ci r = -ENOMEM; 83962306a36Sopenharmony_ci goto free_ring_funcs; 84062306a36Sopenharmony_ci } 84162306a36Sopenharmony_ci 84262306a36Sopenharmony_ci ring_funcs->type = AMDGPU_RING_TYPE_COMPUTE; 84362306a36Sopenharmony_ci ring->doorbell_index = doorbell_off; 84462306a36Sopenharmony_ci ring->funcs = ring_funcs; 84562306a36Sopenharmony_ci 84662306a36Sopenharmony_ci spin_lock(&kiq->ring_lock); 84762306a36Sopenharmony_ci 84862306a36Sopenharmony_ci if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { 84962306a36Sopenharmony_ci spin_unlock(&kiq->ring_lock); 85062306a36Sopenharmony_ci r = -ENOMEM; 85162306a36Sopenharmony_ci goto free_ring; 85262306a36Sopenharmony_ci } 85362306a36Sopenharmony_ci 85462306a36Sopenharmony_ci kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES, 0, 0); 85562306a36Sopenharmony_ci 85662306a36Sopenharmony_ci if (kiq_ring->sched.ready && !adev->job_hang) 85762306a36Sopenharmony_ci r = amdgpu_ring_test_helper(kiq_ring); 85862306a36Sopenharmony_ci 85962306a36Sopenharmony_ci spin_unlock(&kiq->ring_lock); 86062306a36Sopenharmony_ci 86162306a36Sopenharmony_cifree_ring: 86262306a36Sopenharmony_ci kfree(ring); 86362306a36Sopenharmony_ci 86462306a36Sopenharmony_cifree_ring_funcs: 86562306a36Sopenharmony_ci kfree(ring_funcs); 86662306a36Sopenharmony_ci 86762306a36Sopenharmony_ci return r; 86862306a36Sopenharmony_ci} 869