18c2ecf20Sopenharmony_ci/* 28c2ecf20Sopenharmony_ci * Copyright 2014 Advanced Micro Devices, Inc. 38c2ecf20Sopenharmony_ci * 48c2ecf20Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 58c2ecf20Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 68c2ecf20Sopenharmony_ci * to deal in the Software without restriction, including without limitation 78c2ecf20Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 88c2ecf20Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 98c2ecf20Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 108c2ecf20Sopenharmony_ci * 118c2ecf20Sopenharmony_ci * The above copyright notice and this permission notice shall be included in 128c2ecf20Sopenharmony_ci * all copies or substantial portions of the Software. 138c2ecf20Sopenharmony_ci * 148c2ecf20Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 158c2ecf20Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 168c2ecf20Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 178c2ecf20Sopenharmony_ci * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 188c2ecf20Sopenharmony_ci * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 198c2ecf20Sopenharmony_ci * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 208c2ecf20Sopenharmony_ci * OTHER DEALINGS IN THE SOFTWARE. 218c2ecf20Sopenharmony_ci */ 228c2ecf20Sopenharmony_ci 238c2ecf20Sopenharmony_ci#ifndef KFD_PRIV_H_INCLUDED 248c2ecf20Sopenharmony_ci#define KFD_PRIV_H_INCLUDED 258c2ecf20Sopenharmony_ci 268c2ecf20Sopenharmony_ci#include <linux/hashtable.h> 278c2ecf20Sopenharmony_ci#include <linux/mmu_notifier.h> 288c2ecf20Sopenharmony_ci#include <linux/mutex.h> 298c2ecf20Sopenharmony_ci#include <linux/types.h> 308c2ecf20Sopenharmony_ci#include <linux/atomic.h> 318c2ecf20Sopenharmony_ci#include <linux/workqueue.h> 328c2ecf20Sopenharmony_ci#include <linux/spinlock.h> 338c2ecf20Sopenharmony_ci#include <linux/kfd_ioctl.h> 348c2ecf20Sopenharmony_ci#include <linux/idr.h> 358c2ecf20Sopenharmony_ci#include <linux/kfifo.h> 368c2ecf20Sopenharmony_ci#include <linux/seq_file.h> 378c2ecf20Sopenharmony_ci#include <linux/kref.h> 388c2ecf20Sopenharmony_ci#include <linux/sysfs.h> 398c2ecf20Sopenharmony_ci#include <linux/device_cgroup.h> 408c2ecf20Sopenharmony_ci#include <drm/drm_file.h> 418c2ecf20Sopenharmony_ci#include <drm/drm_drv.h> 428c2ecf20Sopenharmony_ci#include <drm/drm_device.h> 438c2ecf20Sopenharmony_ci#include <drm/drm_ioctl.h> 448c2ecf20Sopenharmony_ci#include <kgd_kfd_interface.h> 458c2ecf20Sopenharmony_ci#include <linux/swap.h> 468c2ecf20Sopenharmony_ci 478c2ecf20Sopenharmony_ci#include "amd_shared.h" 488c2ecf20Sopenharmony_ci 498c2ecf20Sopenharmony_ci#define KFD_MAX_RING_ENTRY_SIZE 8 508c2ecf20Sopenharmony_ci 518c2ecf20Sopenharmony_ci#define KFD_SYSFS_FILE_MODE 0444 528c2ecf20Sopenharmony_ci 538c2ecf20Sopenharmony_ci/* GPU ID hash width in bits */ 548c2ecf20Sopenharmony_ci#define KFD_GPU_ID_HASH_WIDTH 16 558c2ecf20Sopenharmony_ci 568c2ecf20Sopenharmony_ci/* Use upper bits of mmap offset to store KFD driver specific information. 578c2ecf20Sopenharmony_ci * BITS[63:62] - Encode MMAP type 588c2ecf20Sopenharmony_ci * BITS[61:46] - Encode gpu_id. To identify to which GPU the offset belongs to 598c2ecf20Sopenharmony_ci * BITS[45:0] - MMAP offset value 608c2ecf20Sopenharmony_ci * 618c2ecf20Sopenharmony_ci * NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these 628c2ecf20Sopenharmony_ci * defines are w.r.t to PAGE_SIZE 638c2ecf20Sopenharmony_ci */ 648c2ecf20Sopenharmony_ci#define KFD_MMAP_TYPE_SHIFT 62 658c2ecf20Sopenharmony_ci#define KFD_MMAP_TYPE_MASK (0x3ULL << KFD_MMAP_TYPE_SHIFT) 668c2ecf20Sopenharmony_ci#define KFD_MMAP_TYPE_DOORBELL (0x3ULL << KFD_MMAP_TYPE_SHIFT) 678c2ecf20Sopenharmony_ci#define KFD_MMAP_TYPE_EVENTS (0x2ULL << KFD_MMAP_TYPE_SHIFT) 688c2ecf20Sopenharmony_ci#define KFD_MMAP_TYPE_RESERVED_MEM (0x1ULL << KFD_MMAP_TYPE_SHIFT) 698c2ecf20Sopenharmony_ci#define KFD_MMAP_TYPE_MMIO (0x0ULL << KFD_MMAP_TYPE_SHIFT) 708c2ecf20Sopenharmony_ci 718c2ecf20Sopenharmony_ci#define KFD_MMAP_GPU_ID_SHIFT 46 728c2ecf20Sopenharmony_ci#define KFD_MMAP_GPU_ID_MASK (((1ULL << KFD_GPU_ID_HASH_WIDTH) - 1) \ 738c2ecf20Sopenharmony_ci << KFD_MMAP_GPU_ID_SHIFT) 748c2ecf20Sopenharmony_ci#define KFD_MMAP_GPU_ID(gpu_id) ((((uint64_t)gpu_id) << KFD_MMAP_GPU_ID_SHIFT)\ 758c2ecf20Sopenharmony_ci & KFD_MMAP_GPU_ID_MASK) 768c2ecf20Sopenharmony_ci#define KFD_MMAP_GET_GPU_ID(offset) ((offset & KFD_MMAP_GPU_ID_MASK) \ 778c2ecf20Sopenharmony_ci >> KFD_MMAP_GPU_ID_SHIFT) 788c2ecf20Sopenharmony_ci 798c2ecf20Sopenharmony_ci/* 808c2ecf20Sopenharmony_ci * When working with cp scheduler we should assign the HIQ manually or via 818c2ecf20Sopenharmony_ci * the amdgpu driver to a fixed hqd slot, here are the fixed HIQ hqd slot 828c2ecf20Sopenharmony_ci * definitions for Kaveri. In Kaveri only the first ME queues participates 838c2ecf20Sopenharmony_ci * in the cp scheduling taking that in mind we set the HIQ slot in the 848c2ecf20Sopenharmony_ci * second ME. 858c2ecf20Sopenharmony_ci */ 868c2ecf20Sopenharmony_ci#define KFD_CIK_HIQ_PIPE 4 878c2ecf20Sopenharmony_ci#define KFD_CIK_HIQ_QUEUE 0 888c2ecf20Sopenharmony_ci 898c2ecf20Sopenharmony_ci/* Macro for allocating structures */ 908c2ecf20Sopenharmony_ci#define kfd_alloc_struct(ptr_to_struct) \ 918c2ecf20Sopenharmony_ci ((typeof(ptr_to_struct)) kzalloc(sizeof(*ptr_to_struct), GFP_KERNEL)) 928c2ecf20Sopenharmony_ci 938c2ecf20Sopenharmony_ci#define KFD_MAX_NUM_OF_PROCESSES 512 948c2ecf20Sopenharmony_ci#define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS 1024 958c2ecf20Sopenharmony_ci 968c2ecf20Sopenharmony_ci/* 978c2ecf20Sopenharmony_ci * Size of the per-process TBA+TMA buffer: 2 pages 988c2ecf20Sopenharmony_ci * 998c2ecf20Sopenharmony_ci * The first page is the TBA used for the CWSR ISA code. The second 1008c2ecf20Sopenharmony_ci * page is used as TMA for user-mode trap handler setup in daisy-chain mode. 1018c2ecf20Sopenharmony_ci */ 1028c2ecf20Sopenharmony_ci#define KFD_CWSR_TBA_TMA_SIZE (PAGE_SIZE * 2) 1038c2ecf20Sopenharmony_ci#define KFD_CWSR_TMA_OFFSET PAGE_SIZE 1048c2ecf20Sopenharmony_ci 1058c2ecf20Sopenharmony_ci#define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE \ 1068c2ecf20Sopenharmony_ci (KFD_MAX_NUM_OF_PROCESSES * \ 1078c2ecf20Sopenharmony_ci KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) 1088c2ecf20Sopenharmony_ci 1098c2ecf20Sopenharmony_ci#define KFD_KERNEL_QUEUE_SIZE 2048 1108c2ecf20Sopenharmony_ci 1118c2ecf20Sopenharmony_ci#define KFD_UNMAP_LATENCY_MS (4000) 1128c2ecf20Sopenharmony_ci 1138c2ecf20Sopenharmony_ci/* 1148c2ecf20Sopenharmony_ci * 512 = 0x200 1158c2ecf20Sopenharmony_ci * The doorbell index distance between SDMA RLC (2*i) and (2*i+1) in the 1168c2ecf20Sopenharmony_ci * same SDMA engine on SOC15, which has 8-byte doorbells for SDMA. 1178c2ecf20Sopenharmony_ci * 512 8-byte doorbell distance (i.e. one page away) ensures that SDMA RLC 1188c2ecf20Sopenharmony_ci * (2*i+1) doorbells (in terms of the lower 12 bit address) lie exactly in 1198c2ecf20Sopenharmony_ci * the OFFSET and SIZE set in registers like BIF_SDMA0_DOORBELL_RANGE. 1208c2ecf20Sopenharmony_ci */ 1218c2ecf20Sopenharmony_ci#define KFD_QUEUE_DOORBELL_MIRROR_OFFSET 512 1228c2ecf20Sopenharmony_ci 1238c2ecf20Sopenharmony_ci 1248c2ecf20Sopenharmony_ci/* 1258c2ecf20Sopenharmony_ci * Kernel module parameter to specify maximum number of supported queues per 1268c2ecf20Sopenharmony_ci * device 1278c2ecf20Sopenharmony_ci */ 1288c2ecf20Sopenharmony_ciextern int max_num_of_queues_per_device; 1298c2ecf20Sopenharmony_ci 1308c2ecf20Sopenharmony_ci 1318c2ecf20Sopenharmony_ci/* Kernel module parameter to specify the scheduling policy */ 1328c2ecf20Sopenharmony_ciextern int sched_policy; 1338c2ecf20Sopenharmony_ci 1348c2ecf20Sopenharmony_ci/* 1358c2ecf20Sopenharmony_ci * Kernel module parameter to specify the maximum process 1368c2ecf20Sopenharmony_ci * number per HW scheduler 1378c2ecf20Sopenharmony_ci */ 1388c2ecf20Sopenharmony_ciextern int hws_max_conc_proc; 1398c2ecf20Sopenharmony_ci 1408c2ecf20Sopenharmony_ciextern int cwsr_enable; 1418c2ecf20Sopenharmony_ci 1428c2ecf20Sopenharmony_ci/* 1438c2ecf20Sopenharmony_ci * Kernel module parameter to specify whether to send sigterm to HSA process on 1448c2ecf20Sopenharmony_ci * unhandled exception 1458c2ecf20Sopenharmony_ci */ 1468c2ecf20Sopenharmony_ciextern int send_sigterm; 1478c2ecf20Sopenharmony_ci 1488c2ecf20Sopenharmony_ci/* 1498c2ecf20Sopenharmony_ci * This kernel module is used to simulate large bar machine on non-large bar 1508c2ecf20Sopenharmony_ci * enabled machines. 1518c2ecf20Sopenharmony_ci */ 1528c2ecf20Sopenharmony_ciextern int debug_largebar; 1538c2ecf20Sopenharmony_ci 1548c2ecf20Sopenharmony_ci/* 1558c2ecf20Sopenharmony_ci * Ignore CRAT table during KFD initialization, can be used to work around 1568c2ecf20Sopenharmony_ci * broken CRAT tables on some AMD systems 1578c2ecf20Sopenharmony_ci */ 1588c2ecf20Sopenharmony_ciextern int ignore_crat; 1598c2ecf20Sopenharmony_ci 1608c2ecf20Sopenharmony_ci/* Set sh_mem_config.retry_disable on GFX v9 */ 1618c2ecf20Sopenharmony_ciextern int amdgpu_noretry; 1628c2ecf20Sopenharmony_ci 1638c2ecf20Sopenharmony_ci/* Halt if HWS hang is detected */ 1648c2ecf20Sopenharmony_ciextern int halt_if_hws_hang; 1658c2ecf20Sopenharmony_ci 1668c2ecf20Sopenharmony_ci/* Whether MEC FW support GWS barriers */ 1678c2ecf20Sopenharmony_ciextern bool hws_gws_support; 1688c2ecf20Sopenharmony_ci 1698c2ecf20Sopenharmony_ci/* Queue preemption timeout in ms */ 1708c2ecf20Sopenharmony_ciextern int queue_preemption_timeout_ms; 1718c2ecf20Sopenharmony_ci 1728c2ecf20Sopenharmony_ci/* Enable eviction debug messages */ 1738c2ecf20Sopenharmony_ciextern bool debug_evictions; 1748c2ecf20Sopenharmony_ci 1758c2ecf20Sopenharmony_cienum cache_policy { 1768c2ecf20Sopenharmony_ci cache_policy_coherent, 1778c2ecf20Sopenharmony_ci cache_policy_noncoherent 1788c2ecf20Sopenharmony_ci}; 1798c2ecf20Sopenharmony_ci 1808c2ecf20Sopenharmony_ci#define KFD_IS_SOC15(chip) ((chip) >= CHIP_VEGA10) 1818c2ecf20Sopenharmony_ci 1828c2ecf20Sopenharmony_cistruct kfd_event_interrupt_class { 1838c2ecf20Sopenharmony_ci bool (*interrupt_isr)(struct kfd_dev *dev, 1848c2ecf20Sopenharmony_ci const uint32_t *ih_ring_entry, uint32_t *patched_ihre, 1858c2ecf20Sopenharmony_ci bool *patched_flag); 1868c2ecf20Sopenharmony_ci void (*interrupt_wq)(struct kfd_dev *dev, 1878c2ecf20Sopenharmony_ci const uint32_t *ih_ring_entry); 1888c2ecf20Sopenharmony_ci}; 1898c2ecf20Sopenharmony_ci 1908c2ecf20Sopenharmony_cistruct kfd_device_info { 1918c2ecf20Sopenharmony_ci enum amd_asic_type asic_family; 1928c2ecf20Sopenharmony_ci const char *asic_name; 1938c2ecf20Sopenharmony_ci const struct kfd_event_interrupt_class *event_interrupt_class; 1948c2ecf20Sopenharmony_ci unsigned int max_pasid_bits; 1958c2ecf20Sopenharmony_ci unsigned int max_no_of_hqd; 1968c2ecf20Sopenharmony_ci unsigned int doorbell_size; 1978c2ecf20Sopenharmony_ci size_t ih_ring_entry_size; 1988c2ecf20Sopenharmony_ci uint8_t num_of_watch_points; 1998c2ecf20Sopenharmony_ci uint16_t mqd_size_aligned; 2008c2ecf20Sopenharmony_ci bool supports_cwsr; 2018c2ecf20Sopenharmony_ci bool needs_iommu_device; 2028c2ecf20Sopenharmony_ci bool needs_pci_atomics; 2038c2ecf20Sopenharmony_ci unsigned int num_sdma_engines; 2048c2ecf20Sopenharmony_ci unsigned int num_xgmi_sdma_engines; 2058c2ecf20Sopenharmony_ci unsigned int num_sdma_queues_per_engine; 2068c2ecf20Sopenharmony_ci}; 2078c2ecf20Sopenharmony_ci 2088c2ecf20Sopenharmony_cistruct kfd_mem_obj { 2098c2ecf20Sopenharmony_ci uint32_t range_start; 2108c2ecf20Sopenharmony_ci uint32_t range_end; 2118c2ecf20Sopenharmony_ci uint64_t gpu_addr; 2128c2ecf20Sopenharmony_ci uint32_t *cpu_ptr; 2138c2ecf20Sopenharmony_ci void *gtt_mem; 2148c2ecf20Sopenharmony_ci}; 2158c2ecf20Sopenharmony_ci 2168c2ecf20Sopenharmony_cistruct kfd_vmid_info { 2178c2ecf20Sopenharmony_ci uint32_t first_vmid_kfd; 2188c2ecf20Sopenharmony_ci uint32_t last_vmid_kfd; 2198c2ecf20Sopenharmony_ci uint32_t vmid_num_kfd; 2208c2ecf20Sopenharmony_ci}; 2218c2ecf20Sopenharmony_ci 2228c2ecf20Sopenharmony_cistruct kfd_dev { 2238c2ecf20Sopenharmony_ci struct kgd_dev *kgd; 2248c2ecf20Sopenharmony_ci 2258c2ecf20Sopenharmony_ci const struct kfd_device_info *device_info; 2268c2ecf20Sopenharmony_ci struct pci_dev *pdev; 2278c2ecf20Sopenharmony_ci struct drm_device *ddev; 2288c2ecf20Sopenharmony_ci 2298c2ecf20Sopenharmony_ci unsigned int id; /* topology stub index */ 2308c2ecf20Sopenharmony_ci 2318c2ecf20Sopenharmony_ci phys_addr_t doorbell_base; /* Start of actual doorbells used by 2328c2ecf20Sopenharmony_ci * KFD. It is aligned for mapping 2338c2ecf20Sopenharmony_ci * into user mode 2348c2ecf20Sopenharmony_ci */ 2358c2ecf20Sopenharmony_ci size_t doorbell_base_dw_offset; /* Offset from the start of the PCI 2368c2ecf20Sopenharmony_ci * doorbell BAR to the first KFD 2378c2ecf20Sopenharmony_ci * doorbell in dwords. GFX reserves 2388c2ecf20Sopenharmony_ci * the segment before this offset. 2398c2ecf20Sopenharmony_ci */ 2408c2ecf20Sopenharmony_ci u32 __iomem *doorbell_kernel_ptr; /* This is a pointer for a doorbells 2418c2ecf20Sopenharmony_ci * page used by kernel queue 2428c2ecf20Sopenharmony_ci */ 2438c2ecf20Sopenharmony_ci 2448c2ecf20Sopenharmony_ci struct kgd2kfd_shared_resources shared_resources; 2458c2ecf20Sopenharmony_ci struct kfd_vmid_info vm_info; 2468c2ecf20Sopenharmony_ci 2478c2ecf20Sopenharmony_ci const struct kfd2kgd_calls *kfd2kgd; 2488c2ecf20Sopenharmony_ci struct mutex doorbell_mutex; 2498c2ecf20Sopenharmony_ci DECLARE_BITMAP(doorbell_available_index, 2508c2ecf20Sopenharmony_ci KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); 2518c2ecf20Sopenharmony_ci 2528c2ecf20Sopenharmony_ci void *gtt_mem; 2538c2ecf20Sopenharmony_ci uint64_t gtt_start_gpu_addr; 2548c2ecf20Sopenharmony_ci void *gtt_start_cpu_ptr; 2558c2ecf20Sopenharmony_ci void *gtt_sa_bitmap; 2568c2ecf20Sopenharmony_ci struct mutex gtt_sa_lock; 2578c2ecf20Sopenharmony_ci unsigned int gtt_sa_chunk_size; 2588c2ecf20Sopenharmony_ci unsigned int gtt_sa_num_of_chunks; 2598c2ecf20Sopenharmony_ci 2608c2ecf20Sopenharmony_ci /* Interrupts */ 2618c2ecf20Sopenharmony_ci struct kfifo ih_fifo; 2628c2ecf20Sopenharmony_ci struct workqueue_struct *ih_wq; 2638c2ecf20Sopenharmony_ci struct work_struct interrupt_work; 2648c2ecf20Sopenharmony_ci spinlock_t interrupt_lock; 2658c2ecf20Sopenharmony_ci 2668c2ecf20Sopenharmony_ci /* QCM Device instance */ 2678c2ecf20Sopenharmony_ci struct device_queue_manager *dqm; 2688c2ecf20Sopenharmony_ci 2698c2ecf20Sopenharmony_ci bool init_complete; 2708c2ecf20Sopenharmony_ci /* 2718c2ecf20Sopenharmony_ci * Interrupts of interest to KFD are copied 2728c2ecf20Sopenharmony_ci * from the HW ring into a SW ring. 2738c2ecf20Sopenharmony_ci */ 2748c2ecf20Sopenharmony_ci bool interrupts_active; 2758c2ecf20Sopenharmony_ci 2768c2ecf20Sopenharmony_ci /* Debug manager */ 2778c2ecf20Sopenharmony_ci struct kfd_dbgmgr *dbgmgr; 2788c2ecf20Sopenharmony_ci 2798c2ecf20Sopenharmony_ci /* Firmware versions */ 2808c2ecf20Sopenharmony_ci uint16_t mec_fw_version; 2818c2ecf20Sopenharmony_ci uint16_t mec2_fw_version; 2828c2ecf20Sopenharmony_ci uint16_t sdma_fw_version; 2838c2ecf20Sopenharmony_ci 2848c2ecf20Sopenharmony_ci /* Maximum process number mapped to HW scheduler */ 2858c2ecf20Sopenharmony_ci unsigned int max_proc_per_quantum; 2868c2ecf20Sopenharmony_ci 2878c2ecf20Sopenharmony_ci /* CWSR */ 2888c2ecf20Sopenharmony_ci bool cwsr_enabled; 2898c2ecf20Sopenharmony_ci const void *cwsr_isa; 2908c2ecf20Sopenharmony_ci unsigned int cwsr_isa_size; 2918c2ecf20Sopenharmony_ci 2928c2ecf20Sopenharmony_ci /* xGMI */ 2938c2ecf20Sopenharmony_ci uint64_t hive_id; 2948c2ecf20Sopenharmony_ci 2958c2ecf20Sopenharmony_ci /* UUID */ 2968c2ecf20Sopenharmony_ci uint64_t unique_id; 2978c2ecf20Sopenharmony_ci 2988c2ecf20Sopenharmony_ci bool pci_atomic_requested; 2998c2ecf20Sopenharmony_ci 3008c2ecf20Sopenharmony_ci /* Use IOMMU v2 flag */ 3018c2ecf20Sopenharmony_ci bool use_iommu_v2; 3028c2ecf20Sopenharmony_ci 3038c2ecf20Sopenharmony_ci /* SRAM ECC flag */ 3048c2ecf20Sopenharmony_ci atomic_t sram_ecc_flag; 3058c2ecf20Sopenharmony_ci 3068c2ecf20Sopenharmony_ci /* Compute Profile ref. count */ 3078c2ecf20Sopenharmony_ci atomic_t compute_profile; 3088c2ecf20Sopenharmony_ci 3098c2ecf20Sopenharmony_ci /* Global GWS resource shared between processes */ 3108c2ecf20Sopenharmony_ci void *gws; 3118c2ecf20Sopenharmony_ci 3128c2ecf20Sopenharmony_ci /* Clients watching SMI events */ 3138c2ecf20Sopenharmony_ci struct list_head smi_clients; 3148c2ecf20Sopenharmony_ci spinlock_t smi_lock; 3158c2ecf20Sopenharmony_ci 3168c2ecf20Sopenharmony_ci uint32_t reset_seq_num; 3178c2ecf20Sopenharmony_ci 3188c2ecf20Sopenharmony_ci struct ida doorbell_ida; 3198c2ecf20Sopenharmony_ci unsigned int max_doorbell_slices; 3208c2ecf20Sopenharmony_ci 3218c2ecf20Sopenharmony_ci int noretry; 3228c2ecf20Sopenharmony_ci}; 3238c2ecf20Sopenharmony_ci 3248c2ecf20Sopenharmony_cienum kfd_mempool { 3258c2ecf20Sopenharmony_ci KFD_MEMPOOL_SYSTEM_CACHEABLE = 1, 3268c2ecf20Sopenharmony_ci KFD_MEMPOOL_SYSTEM_WRITECOMBINE = 2, 3278c2ecf20Sopenharmony_ci KFD_MEMPOOL_FRAMEBUFFER = 3, 3288c2ecf20Sopenharmony_ci}; 3298c2ecf20Sopenharmony_ci 3308c2ecf20Sopenharmony_ci/* Character device interface */ 3318c2ecf20Sopenharmony_ciint kfd_chardev_init(void); 3328c2ecf20Sopenharmony_civoid kfd_chardev_exit(void); 3338c2ecf20Sopenharmony_cistruct device *kfd_chardev(void); 3348c2ecf20Sopenharmony_ci 3358c2ecf20Sopenharmony_ci/** 3368c2ecf20Sopenharmony_ci * enum kfd_unmap_queues_filter - Enum for queue filters. 3378c2ecf20Sopenharmony_ci * 3388c2ecf20Sopenharmony_ci * @KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE: Preempts single queue. 3398c2ecf20Sopenharmony_ci * 3408c2ecf20Sopenharmony_ci * @KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES: Preempts all queues in the 3418c2ecf20Sopenharmony_ci * running queues list. 3428c2ecf20Sopenharmony_ci * 3438c2ecf20Sopenharmony_ci * @KFD_UNMAP_QUEUES_FILTER_BY_PASID: Preempts queues that belongs to 3448c2ecf20Sopenharmony_ci * specific process. 3458c2ecf20Sopenharmony_ci * 3468c2ecf20Sopenharmony_ci */ 3478c2ecf20Sopenharmony_cienum kfd_unmap_queues_filter { 3488c2ecf20Sopenharmony_ci KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE, 3498c2ecf20Sopenharmony_ci KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 3508c2ecf20Sopenharmony_ci KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 3518c2ecf20Sopenharmony_ci KFD_UNMAP_QUEUES_FILTER_BY_PASID 3528c2ecf20Sopenharmony_ci}; 3538c2ecf20Sopenharmony_ci 3548c2ecf20Sopenharmony_ci/** 3558c2ecf20Sopenharmony_ci * enum kfd_queue_type - Enum for various queue types. 3568c2ecf20Sopenharmony_ci * 3578c2ecf20Sopenharmony_ci * @KFD_QUEUE_TYPE_COMPUTE: Regular user mode queue type. 3588c2ecf20Sopenharmony_ci * 3598c2ecf20Sopenharmony_ci * @KFD_QUEUE_TYPE_SDMA: SDMA user mode queue type. 3608c2ecf20Sopenharmony_ci * 3618c2ecf20Sopenharmony_ci * @KFD_QUEUE_TYPE_HIQ: HIQ queue type. 3628c2ecf20Sopenharmony_ci * 3638c2ecf20Sopenharmony_ci * @KFD_QUEUE_TYPE_DIQ: DIQ queue type. 3648c2ecf20Sopenharmony_ci * 3658c2ecf20Sopenharmony_ci * @KFD_QUEUE_TYPE_SDMA_XGMI: Special SDMA queue for XGMI interface. 3668c2ecf20Sopenharmony_ci */ 3678c2ecf20Sopenharmony_cienum kfd_queue_type { 3688c2ecf20Sopenharmony_ci KFD_QUEUE_TYPE_COMPUTE, 3698c2ecf20Sopenharmony_ci KFD_QUEUE_TYPE_SDMA, 3708c2ecf20Sopenharmony_ci KFD_QUEUE_TYPE_HIQ, 3718c2ecf20Sopenharmony_ci KFD_QUEUE_TYPE_DIQ, 3728c2ecf20Sopenharmony_ci KFD_QUEUE_TYPE_SDMA_XGMI 3738c2ecf20Sopenharmony_ci}; 3748c2ecf20Sopenharmony_ci 3758c2ecf20Sopenharmony_cienum kfd_queue_format { 3768c2ecf20Sopenharmony_ci KFD_QUEUE_FORMAT_PM4, 3778c2ecf20Sopenharmony_ci KFD_QUEUE_FORMAT_AQL 3788c2ecf20Sopenharmony_ci}; 3798c2ecf20Sopenharmony_ci 3808c2ecf20Sopenharmony_cienum KFD_QUEUE_PRIORITY { 3818c2ecf20Sopenharmony_ci KFD_QUEUE_PRIORITY_MINIMUM = 0, 3828c2ecf20Sopenharmony_ci KFD_QUEUE_PRIORITY_MAXIMUM = 15 3838c2ecf20Sopenharmony_ci}; 3848c2ecf20Sopenharmony_ci 3858c2ecf20Sopenharmony_ci/** 3868c2ecf20Sopenharmony_ci * struct queue_properties 3878c2ecf20Sopenharmony_ci * 3888c2ecf20Sopenharmony_ci * @type: The queue type. 3898c2ecf20Sopenharmony_ci * 3908c2ecf20Sopenharmony_ci * @queue_id: Queue identifier. 3918c2ecf20Sopenharmony_ci * 3928c2ecf20Sopenharmony_ci * @queue_address: Queue ring buffer address. 3938c2ecf20Sopenharmony_ci * 3948c2ecf20Sopenharmony_ci * @queue_size: Queue ring buffer size. 3958c2ecf20Sopenharmony_ci * 3968c2ecf20Sopenharmony_ci * @priority: Defines the queue priority relative to other queues in the 3978c2ecf20Sopenharmony_ci * process. 3988c2ecf20Sopenharmony_ci * This is just an indication and HW scheduling may override the priority as 3998c2ecf20Sopenharmony_ci * necessary while keeping the relative prioritization. 4008c2ecf20Sopenharmony_ci * the priority granularity is from 0 to f which f is the highest priority. 4018c2ecf20Sopenharmony_ci * currently all queues are initialized with the highest priority. 4028c2ecf20Sopenharmony_ci * 4038c2ecf20Sopenharmony_ci * @queue_percent: This field is partially implemented and currently a zero in 4048c2ecf20Sopenharmony_ci * this field defines that the queue is non active. 4058c2ecf20Sopenharmony_ci * 4068c2ecf20Sopenharmony_ci * @read_ptr: User space address which points to the number of dwords the 4078c2ecf20Sopenharmony_ci * cp read from the ring buffer. This field updates automatically by the H/W. 4088c2ecf20Sopenharmony_ci * 4098c2ecf20Sopenharmony_ci * @write_ptr: Defines the number of dwords written to the ring buffer. 4108c2ecf20Sopenharmony_ci * 4118c2ecf20Sopenharmony_ci * @doorbell_ptr: Notifies the H/W of new packet written to the queue ring 4128c2ecf20Sopenharmony_ci * buffer. This field should be similar to write_ptr and the user should 4138c2ecf20Sopenharmony_ci * update this field after updating the write_ptr. 4148c2ecf20Sopenharmony_ci * 4158c2ecf20Sopenharmony_ci * @doorbell_off: The doorbell offset in the doorbell pci-bar. 4168c2ecf20Sopenharmony_ci * 4178c2ecf20Sopenharmony_ci * @is_interop: Defines if this is a interop queue. Interop queue means that 4188c2ecf20Sopenharmony_ci * the queue can access both graphics and compute resources. 4198c2ecf20Sopenharmony_ci * 4208c2ecf20Sopenharmony_ci * @is_evicted: Defines if the queue is evicted. Only active queues 4218c2ecf20Sopenharmony_ci * are evicted, rendering them inactive. 4228c2ecf20Sopenharmony_ci * 4238c2ecf20Sopenharmony_ci * @is_active: Defines if the queue is active or not. @is_active and 4248c2ecf20Sopenharmony_ci * @is_evicted are protected by the DQM lock. 4258c2ecf20Sopenharmony_ci * 4268c2ecf20Sopenharmony_ci * @is_gws: Defines if the queue has been updated to be GWS-capable or not. 4278c2ecf20Sopenharmony_ci * @is_gws should be protected by the DQM lock, since changing it can yield the 4288c2ecf20Sopenharmony_ci * possibility of updating DQM state on number of GWS queues. 4298c2ecf20Sopenharmony_ci * 4308c2ecf20Sopenharmony_ci * @vmid: If the scheduling mode is no cp scheduling the field defines the vmid 4318c2ecf20Sopenharmony_ci * of the queue. 4328c2ecf20Sopenharmony_ci * 4338c2ecf20Sopenharmony_ci * This structure represents the queue properties for each queue no matter if 4348c2ecf20Sopenharmony_ci * it's user mode or kernel mode queue. 4358c2ecf20Sopenharmony_ci * 4368c2ecf20Sopenharmony_ci */ 4378c2ecf20Sopenharmony_cistruct queue_properties { 4388c2ecf20Sopenharmony_ci enum kfd_queue_type type; 4398c2ecf20Sopenharmony_ci enum kfd_queue_format format; 4408c2ecf20Sopenharmony_ci unsigned int queue_id; 4418c2ecf20Sopenharmony_ci uint64_t queue_address; 4428c2ecf20Sopenharmony_ci uint64_t queue_size; 4438c2ecf20Sopenharmony_ci uint32_t priority; 4448c2ecf20Sopenharmony_ci uint32_t queue_percent; 4458c2ecf20Sopenharmony_ci uint32_t *read_ptr; 4468c2ecf20Sopenharmony_ci uint32_t *write_ptr; 4478c2ecf20Sopenharmony_ci void __iomem *doorbell_ptr; 4488c2ecf20Sopenharmony_ci uint32_t doorbell_off; 4498c2ecf20Sopenharmony_ci bool is_interop; 4508c2ecf20Sopenharmony_ci bool is_evicted; 4518c2ecf20Sopenharmony_ci bool is_active; 4528c2ecf20Sopenharmony_ci bool is_gws; 4538c2ecf20Sopenharmony_ci /* Not relevant for user mode queues in cp scheduling */ 4548c2ecf20Sopenharmony_ci unsigned int vmid; 4558c2ecf20Sopenharmony_ci /* Relevant only for sdma queues*/ 4568c2ecf20Sopenharmony_ci uint32_t sdma_engine_id; 4578c2ecf20Sopenharmony_ci uint32_t sdma_queue_id; 4588c2ecf20Sopenharmony_ci uint32_t sdma_vm_addr; 4598c2ecf20Sopenharmony_ci /* Relevant only for VI */ 4608c2ecf20Sopenharmony_ci uint64_t eop_ring_buffer_address; 4618c2ecf20Sopenharmony_ci uint32_t eop_ring_buffer_size; 4628c2ecf20Sopenharmony_ci uint64_t ctx_save_restore_area_address; 4638c2ecf20Sopenharmony_ci uint32_t ctx_save_restore_area_size; 4648c2ecf20Sopenharmony_ci uint32_t ctl_stack_size; 4658c2ecf20Sopenharmony_ci uint64_t tba_addr; 4668c2ecf20Sopenharmony_ci uint64_t tma_addr; 4678c2ecf20Sopenharmony_ci /* Relevant for CU */ 4688c2ecf20Sopenharmony_ci uint32_t cu_mask_count; /* Must be a multiple of 32 */ 4698c2ecf20Sopenharmony_ci uint32_t *cu_mask; 4708c2ecf20Sopenharmony_ci}; 4718c2ecf20Sopenharmony_ci 4728c2ecf20Sopenharmony_ci#define QUEUE_IS_ACTIVE(q) ((q).queue_size > 0 && \ 4738c2ecf20Sopenharmony_ci (q).queue_address != 0 && \ 4748c2ecf20Sopenharmony_ci (q).queue_percent > 0 && \ 4758c2ecf20Sopenharmony_ci !(q).is_evicted) 4768c2ecf20Sopenharmony_ci 4778c2ecf20Sopenharmony_ci/** 4788c2ecf20Sopenharmony_ci * struct queue 4798c2ecf20Sopenharmony_ci * 4808c2ecf20Sopenharmony_ci * @list: Queue linked list. 4818c2ecf20Sopenharmony_ci * 4828c2ecf20Sopenharmony_ci * @mqd: The queue MQD (memory queue descriptor). 4838c2ecf20Sopenharmony_ci * 4848c2ecf20Sopenharmony_ci * @mqd_mem_obj: The MQD local gpu memory object. 4858c2ecf20Sopenharmony_ci * 4868c2ecf20Sopenharmony_ci * @gart_mqd_addr: The MQD gart mc address. 4878c2ecf20Sopenharmony_ci * 4888c2ecf20Sopenharmony_ci * @properties: The queue properties. 4898c2ecf20Sopenharmony_ci * 4908c2ecf20Sopenharmony_ci * @mec: Used only in no cp scheduling mode and identifies to micro engine id 4918c2ecf20Sopenharmony_ci * that the queue should be executed on. 4928c2ecf20Sopenharmony_ci * 4938c2ecf20Sopenharmony_ci * @pipe: Used only in no cp scheduling mode and identifies the queue's pipe 4948c2ecf20Sopenharmony_ci * id. 4958c2ecf20Sopenharmony_ci * 4968c2ecf20Sopenharmony_ci * @queue: Used only in no cp scheduliong mode and identifies the queue's slot. 4978c2ecf20Sopenharmony_ci * 4988c2ecf20Sopenharmony_ci * @process: The kfd process that created this queue. 4998c2ecf20Sopenharmony_ci * 5008c2ecf20Sopenharmony_ci * @device: The kfd device that created this queue. 5018c2ecf20Sopenharmony_ci * 5028c2ecf20Sopenharmony_ci * @gws: Pointing to gws kgd_mem if this is a gws control queue; NULL 5038c2ecf20Sopenharmony_ci * otherwise. 5048c2ecf20Sopenharmony_ci * 5058c2ecf20Sopenharmony_ci * This structure represents user mode compute queues. 5068c2ecf20Sopenharmony_ci * It contains all the necessary data to handle such queues. 5078c2ecf20Sopenharmony_ci * 5088c2ecf20Sopenharmony_ci */ 5098c2ecf20Sopenharmony_ci 5108c2ecf20Sopenharmony_cistruct queue { 5118c2ecf20Sopenharmony_ci struct list_head list; 5128c2ecf20Sopenharmony_ci void *mqd; 5138c2ecf20Sopenharmony_ci struct kfd_mem_obj *mqd_mem_obj; 5148c2ecf20Sopenharmony_ci uint64_t gart_mqd_addr; 5158c2ecf20Sopenharmony_ci struct queue_properties properties; 5168c2ecf20Sopenharmony_ci 5178c2ecf20Sopenharmony_ci uint32_t mec; 5188c2ecf20Sopenharmony_ci uint32_t pipe; 5198c2ecf20Sopenharmony_ci uint32_t queue; 5208c2ecf20Sopenharmony_ci 5218c2ecf20Sopenharmony_ci unsigned int sdma_id; 5228c2ecf20Sopenharmony_ci unsigned int doorbell_id; 5238c2ecf20Sopenharmony_ci 5248c2ecf20Sopenharmony_ci struct kfd_process *process; 5258c2ecf20Sopenharmony_ci struct kfd_dev *device; 5268c2ecf20Sopenharmony_ci void *gws; 5278c2ecf20Sopenharmony_ci 5288c2ecf20Sopenharmony_ci /* procfs */ 5298c2ecf20Sopenharmony_ci struct kobject kobj; 5308c2ecf20Sopenharmony_ci}; 5318c2ecf20Sopenharmony_ci 5328c2ecf20Sopenharmony_cienum KFD_MQD_TYPE { 5338c2ecf20Sopenharmony_ci KFD_MQD_TYPE_HIQ = 0, /* for hiq */ 5348c2ecf20Sopenharmony_ci KFD_MQD_TYPE_CP, /* for cp queues and diq */ 5358c2ecf20Sopenharmony_ci KFD_MQD_TYPE_SDMA, /* for sdma queues */ 5368c2ecf20Sopenharmony_ci KFD_MQD_TYPE_DIQ, /* for diq */ 5378c2ecf20Sopenharmony_ci KFD_MQD_TYPE_MAX 5388c2ecf20Sopenharmony_ci}; 5398c2ecf20Sopenharmony_ci 5408c2ecf20Sopenharmony_cienum KFD_PIPE_PRIORITY { 5418c2ecf20Sopenharmony_ci KFD_PIPE_PRIORITY_CS_LOW = 0, 5428c2ecf20Sopenharmony_ci KFD_PIPE_PRIORITY_CS_MEDIUM, 5438c2ecf20Sopenharmony_ci KFD_PIPE_PRIORITY_CS_HIGH 5448c2ecf20Sopenharmony_ci}; 5458c2ecf20Sopenharmony_ci 5468c2ecf20Sopenharmony_cistruct scheduling_resources { 5478c2ecf20Sopenharmony_ci unsigned int vmid_mask; 5488c2ecf20Sopenharmony_ci enum kfd_queue_type type; 5498c2ecf20Sopenharmony_ci uint64_t queue_mask; 5508c2ecf20Sopenharmony_ci uint64_t gws_mask; 5518c2ecf20Sopenharmony_ci uint32_t oac_mask; 5528c2ecf20Sopenharmony_ci uint32_t gds_heap_base; 5538c2ecf20Sopenharmony_ci uint32_t gds_heap_size; 5548c2ecf20Sopenharmony_ci}; 5558c2ecf20Sopenharmony_ci 5568c2ecf20Sopenharmony_cistruct process_queue_manager { 5578c2ecf20Sopenharmony_ci /* data */ 5588c2ecf20Sopenharmony_ci struct kfd_process *process; 5598c2ecf20Sopenharmony_ci struct list_head queues; 5608c2ecf20Sopenharmony_ci unsigned long *queue_slot_bitmap; 5618c2ecf20Sopenharmony_ci}; 5628c2ecf20Sopenharmony_ci 5638c2ecf20Sopenharmony_cistruct qcm_process_device { 5648c2ecf20Sopenharmony_ci /* The Device Queue Manager that owns this data */ 5658c2ecf20Sopenharmony_ci struct device_queue_manager *dqm; 5668c2ecf20Sopenharmony_ci struct process_queue_manager *pqm; 5678c2ecf20Sopenharmony_ci /* Queues list */ 5688c2ecf20Sopenharmony_ci struct list_head queues_list; 5698c2ecf20Sopenharmony_ci struct list_head priv_queue_list; 5708c2ecf20Sopenharmony_ci 5718c2ecf20Sopenharmony_ci unsigned int queue_count; 5728c2ecf20Sopenharmony_ci unsigned int vmid; 5738c2ecf20Sopenharmony_ci bool is_debug; 5748c2ecf20Sopenharmony_ci unsigned int evicted; /* eviction counter, 0=active */ 5758c2ecf20Sopenharmony_ci 5768c2ecf20Sopenharmony_ci /* This flag tells if we should reset all wavefronts on 5778c2ecf20Sopenharmony_ci * process termination 5788c2ecf20Sopenharmony_ci */ 5798c2ecf20Sopenharmony_ci bool reset_wavefronts; 5808c2ecf20Sopenharmony_ci 5818c2ecf20Sopenharmony_ci /* This flag tells us if this process has a GWS-capable 5828c2ecf20Sopenharmony_ci * queue that will be mapped into the runlist. It's 5838c2ecf20Sopenharmony_ci * possible to request a GWS BO, but not have the queue 5848c2ecf20Sopenharmony_ci * currently mapped, and this changes how the MAP_PROCESS 5858c2ecf20Sopenharmony_ci * PM4 packet is configured. 5868c2ecf20Sopenharmony_ci */ 5878c2ecf20Sopenharmony_ci bool mapped_gws_queue; 5888c2ecf20Sopenharmony_ci 5898c2ecf20Sopenharmony_ci /* All the memory management data should be here too */ 5908c2ecf20Sopenharmony_ci uint64_t gds_context_area; 5918c2ecf20Sopenharmony_ci /* Contains page table flags such as AMDGPU_PTE_VALID since gfx9 */ 5928c2ecf20Sopenharmony_ci uint64_t page_table_base; 5938c2ecf20Sopenharmony_ci uint32_t sh_mem_config; 5948c2ecf20Sopenharmony_ci uint32_t sh_mem_bases; 5958c2ecf20Sopenharmony_ci uint32_t sh_mem_ape1_base; 5968c2ecf20Sopenharmony_ci uint32_t sh_mem_ape1_limit; 5978c2ecf20Sopenharmony_ci uint32_t gds_size; 5988c2ecf20Sopenharmony_ci uint32_t num_gws; 5998c2ecf20Sopenharmony_ci uint32_t num_oac; 6008c2ecf20Sopenharmony_ci uint32_t sh_hidden_private_base; 6018c2ecf20Sopenharmony_ci 6028c2ecf20Sopenharmony_ci /* CWSR memory */ 6038c2ecf20Sopenharmony_ci void *cwsr_kaddr; 6048c2ecf20Sopenharmony_ci uint64_t cwsr_base; 6058c2ecf20Sopenharmony_ci uint64_t tba_addr; 6068c2ecf20Sopenharmony_ci uint64_t tma_addr; 6078c2ecf20Sopenharmony_ci 6088c2ecf20Sopenharmony_ci /* IB memory */ 6098c2ecf20Sopenharmony_ci uint64_t ib_base; 6108c2ecf20Sopenharmony_ci void *ib_kaddr; 6118c2ecf20Sopenharmony_ci 6128c2ecf20Sopenharmony_ci /* doorbell resources per process per device */ 6138c2ecf20Sopenharmony_ci unsigned long *doorbell_bitmap; 6148c2ecf20Sopenharmony_ci}; 6158c2ecf20Sopenharmony_ci 6168c2ecf20Sopenharmony_ci/* KFD Memory Eviction */ 6178c2ecf20Sopenharmony_ci 6188c2ecf20Sopenharmony_ci/* Approx. wait time before attempting to restore evicted BOs */ 6198c2ecf20Sopenharmony_ci#define PROCESS_RESTORE_TIME_MS 100 6208c2ecf20Sopenharmony_ci/* Approx. back off time if restore fails due to lack of memory */ 6218c2ecf20Sopenharmony_ci#define PROCESS_BACK_OFF_TIME_MS 100 6228c2ecf20Sopenharmony_ci/* Approx. time before evicting the process again */ 6238c2ecf20Sopenharmony_ci#define PROCESS_ACTIVE_TIME_MS 10 6248c2ecf20Sopenharmony_ci 6258c2ecf20Sopenharmony_ci/* 8 byte handle containing GPU ID in the most significant 4 bytes and 6268c2ecf20Sopenharmony_ci * idr_handle in the least significant 4 bytes 6278c2ecf20Sopenharmony_ci */ 6288c2ecf20Sopenharmony_ci#define MAKE_HANDLE(gpu_id, idr_handle) \ 6298c2ecf20Sopenharmony_ci (((uint64_t)(gpu_id) << 32) + idr_handle) 6308c2ecf20Sopenharmony_ci#define GET_GPU_ID(handle) (handle >> 32) 6318c2ecf20Sopenharmony_ci#define GET_IDR_HANDLE(handle) (handle & 0xFFFFFFFF) 6328c2ecf20Sopenharmony_ci 6338c2ecf20Sopenharmony_cienum kfd_pdd_bound { 6348c2ecf20Sopenharmony_ci PDD_UNBOUND = 0, 6358c2ecf20Sopenharmony_ci PDD_BOUND, 6368c2ecf20Sopenharmony_ci PDD_BOUND_SUSPENDED, 6378c2ecf20Sopenharmony_ci}; 6388c2ecf20Sopenharmony_ci 6398c2ecf20Sopenharmony_ci#define MAX_SYSFS_FILENAME_LEN 15 6408c2ecf20Sopenharmony_ci 6418c2ecf20Sopenharmony_ci/* 6428c2ecf20Sopenharmony_ci * SDMA counter runs at 100MHz frequency. 6438c2ecf20Sopenharmony_ci * We display SDMA activity in microsecond granularity in sysfs. 6448c2ecf20Sopenharmony_ci * As a result, the divisor is 100. 6458c2ecf20Sopenharmony_ci */ 6468c2ecf20Sopenharmony_ci#define SDMA_ACTIVITY_DIVISOR 100 6478c2ecf20Sopenharmony_ci 6488c2ecf20Sopenharmony_ci/* Data that is per-process-per device. */ 6498c2ecf20Sopenharmony_cistruct kfd_process_device { 6508c2ecf20Sopenharmony_ci /* 6518c2ecf20Sopenharmony_ci * List of all per-device data for a process. 6528c2ecf20Sopenharmony_ci * Starts from kfd_process.per_device_data. 6538c2ecf20Sopenharmony_ci */ 6548c2ecf20Sopenharmony_ci struct list_head per_device_list; 6558c2ecf20Sopenharmony_ci 6568c2ecf20Sopenharmony_ci /* The device that owns this data. */ 6578c2ecf20Sopenharmony_ci struct kfd_dev *dev; 6588c2ecf20Sopenharmony_ci 6598c2ecf20Sopenharmony_ci /* The process that owns this kfd_process_device. */ 6608c2ecf20Sopenharmony_ci struct kfd_process *process; 6618c2ecf20Sopenharmony_ci 6628c2ecf20Sopenharmony_ci /* per-process-per device QCM data structure */ 6638c2ecf20Sopenharmony_ci struct qcm_process_device qpd; 6648c2ecf20Sopenharmony_ci 6658c2ecf20Sopenharmony_ci /*Apertures*/ 6668c2ecf20Sopenharmony_ci uint64_t lds_base; 6678c2ecf20Sopenharmony_ci uint64_t lds_limit; 6688c2ecf20Sopenharmony_ci uint64_t gpuvm_base; 6698c2ecf20Sopenharmony_ci uint64_t gpuvm_limit; 6708c2ecf20Sopenharmony_ci uint64_t scratch_base; 6718c2ecf20Sopenharmony_ci uint64_t scratch_limit; 6728c2ecf20Sopenharmony_ci 6738c2ecf20Sopenharmony_ci /* VM context for GPUVM allocations */ 6748c2ecf20Sopenharmony_ci struct file *drm_file; 6758c2ecf20Sopenharmony_ci void *vm; 6768c2ecf20Sopenharmony_ci 6778c2ecf20Sopenharmony_ci /* GPUVM allocations storage */ 6788c2ecf20Sopenharmony_ci struct idr alloc_idr; 6798c2ecf20Sopenharmony_ci 6808c2ecf20Sopenharmony_ci /* Flag used to tell the pdd has dequeued from the dqm. 6818c2ecf20Sopenharmony_ci * This is used to prevent dev->dqm->ops.process_termination() from 6828c2ecf20Sopenharmony_ci * being called twice when it is already called in IOMMU callback 6838c2ecf20Sopenharmony_ci * function. 6848c2ecf20Sopenharmony_ci */ 6858c2ecf20Sopenharmony_ci bool already_dequeued; 6868c2ecf20Sopenharmony_ci bool runtime_inuse; 6878c2ecf20Sopenharmony_ci 6888c2ecf20Sopenharmony_ci /* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */ 6898c2ecf20Sopenharmony_ci enum kfd_pdd_bound bound; 6908c2ecf20Sopenharmony_ci 6918c2ecf20Sopenharmony_ci /* VRAM usage */ 6928c2ecf20Sopenharmony_ci uint64_t vram_usage; 6938c2ecf20Sopenharmony_ci struct attribute attr_vram; 6948c2ecf20Sopenharmony_ci char vram_filename[MAX_SYSFS_FILENAME_LEN]; 6958c2ecf20Sopenharmony_ci 6968c2ecf20Sopenharmony_ci /* SDMA activity tracking */ 6978c2ecf20Sopenharmony_ci uint64_t sdma_past_activity_counter; 6988c2ecf20Sopenharmony_ci struct attribute attr_sdma; 6998c2ecf20Sopenharmony_ci char sdma_filename[MAX_SYSFS_FILENAME_LEN]; 7008c2ecf20Sopenharmony_ci 7018c2ecf20Sopenharmony_ci /* Eviction activity tracking */ 7028c2ecf20Sopenharmony_ci uint64_t last_evict_timestamp; 7038c2ecf20Sopenharmony_ci atomic64_t evict_duration_counter; 7048c2ecf20Sopenharmony_ci struct attribute attr_evict; 7058c2ecf20Sopenharmony_ci 7068c2ecf20Sopenharmony_ci struct kobject *kobj_stats; 7078c2ecf20Sopenharmony_ci unsigned int doorbell_index; 7088c2ecf20Sopenharmony_ci 7098c2ecf20Sopenharmony_ci /* 7108c2ecf20Sopenharmony_ci * @cu_occupancy: Reports occupancy of Compute Units (CU) of a process 7118c2ecf20Sopenharmony_ci * that is associated with device encoded by "this" struct instance. The 7128c2ecf20Sopenharmony_ci * value reflects CU usage by all of the waves launched by this process 7138c2ecf20Sopenharmony_ci * on this device. A very important property of occupancy parameter is 7148c2ecf20Sopenharmony_ci * that its value is a snapshot of current use. 7158c2ecf20Sopenharmony_ci * 7168c2ecf20Sopenharmony_ci * Following is to be noted regarding how this parameter is reported: 7178c2ecf20Sopenharmony_ci * 7188c2ecf20Sopenharmony_ci * The number of waves that a CU can launch is limited by couple of 7198c2ecf20Sopenharmony_ci * parameters. These are encoded by struct amdgpu_cu_info instance 7208c2ecf20Sopenharmony_ci * that is part of every device definition. For GFX9 devices this 7218c2ecf20Sopenharmony_ci * translates to 40 waves (simd_per_cu * max_waves_per_simd) when waves 7228c2ecf20Sopenharmony_ci * do not use scratch memory and 32 waves (max_scratch_slots_per_cu) 7238c2ecf20Sopenharmony_ci * when they do use scratch memory. This could change for future 7248c2ecf20Sopenharmony_ci * devices and therefore this example should be considered as a guide. 7258c2ecf20Sopenharmony_ci * 7268c2ecf20Sopenharmony_ci * All CU's of a device are available for the process. This may not be true 7278c2ecf20Sopenharmony_ci * under certain conditions - e.g. CU masking. 7288c2ecf20Sopenharmony_ci * 7298c2ecf20Sopenharmony_ci * Finally number of CU's that are occupied by a process is affected by both 7308c2ecf20Sopenharmony_ci * number of CU's a device has along with number of other competing processes 7318c2ecf20Sopenharmony_ci */ 7328c2ecf20Sopenharmony_ci struct attribute attr_cu_occupancy; 7338c2ecf20Sopenharmony_ci}; 7348c2ecf20Sopenharmony_ci 7358c2ecf20Sopenharmony_ci#define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd) 7368c2ecf20Sopenharmony_ci 7378c2ecf20Sopenharmony_ci/* Process data */ 7388c2ecf20Sopenharmony_cistruct kfd_process { 7398c2ecf20Sopenharmony_ci /* 7408c2ecf20Sopenharmony_ci * kfd_process are stored in an mm_struct*->kfd_process* 7418c2ecf20Sopenharmony_ci * hash table (kfd_processes in kfd_process.c) 7428c2ecf20Sopenharmony_ci */ 7438c2ecf20Sopenharmony_ci struct hlist_node kfd_processes; 7448c2ecf20Sopenharmony_ci 7458c2ecf20Sopenharmony_ci /* 7468c2ecf20Sopenharmony_ci * Opaque pointer to mm_struct. We don't hold a reference to 7478c2ecf20Sopenharmony_ci * it so it should never be dereferenced from here. This is 7488c2ecf20Sopenharmony_ci * only used for looking up processes by their mm. 7498c2ecf20Sopenharmony_ci */ 7508c2ecf20Sopenharmony_ci void *mm; 7518c2ecf20Sopenharmony_ci 7528c2ecf20Sopenharmony_ci struct kref ref; 7538c2ecf20Sopenharmony_ci struct work_struct release_work; 7548c2ecf20Sopenharmony_ci 7558c2ecf20Sopenharmony_ci struct mutex mutex; 7568c2ecf20Sopenharmony_ci 7578c2ecf20Sopenharmony_ci /* 7588c2ecf20Sopenharmony_ci * In any process, the thread that started main() is the lead 7598c2ecf20Sopenharmony_ci * thread and outlives the rest. 7608c2ecf20Sopenharmony_ci * It is here because amd_iommu_bind_pasid wants a task_struct. 7618c2ecf20Sopenharmony_ci * It can also be used for safely getting a reference to the 7628c2ecf20Sopenharmony_ci * mm_struct of the process. 7638c2ecf20Sopenharmony_ci */ 7648c2ecf20Sopenharmony_ci struct task_struct *lead_thread; 7658c2ecf20Sopenharmony_ci 7668c2ecf20Sopenharmony_ci /* We want to receive a notification when the mm_struct is destroyed */ 7678c2ecf20Sopenharmony_ci struct mmu_notifier mmu_notifier; 7688c2ecf20Sopenharmony_ci 7698c2ecf20Sopenharmony_ci u32 pasid; 7708c2ecf20Sopenharmony_ci 7718c2ecf20Sopenharmony_ci /* 7728c2ecf20Sopenharmony_ci * List of kfd_process_device structures, 7738c2ecf20Sopenharmony_ci * one for each device the process is using. 7748c2ecf20Sopenharmony_ci */ 7758c2ecf20Sopenharmony_ci struct list_head per_device_data; 7768c2ecf20Sopenharmony_ci 7778c2ecf20Sopenharmony_ci struct process_queue_manager pqm; 7788c2ecf20Sopenharmony_ci 7798c2ecf20Sopenharmony_ci /*Is the user space process 32 bit?*/ 7808c2ecf20Sopenharmony_ci bool is_32bit_user_mode; 7818c2ecf20Sopenharmony_ci 7828c2ecf20Sopenharmony_ci /* Event-related data */ 7838c2ecf20Sopenharmony_ci struct mutex event_mutex; 7848c2ecf20Sopenharmony_ci /* Event ID allocator and lookup */ 7858c2ecf20Sopenharmony_ci struct idr event_idr; 7868c2ecf20Sopenharmony_ci /* Event page */ 7878c2ecf20Sopenharmony_ci struct kfd_signal_page *signal_page; 7888c2ecf20Sopenharmony_ci size_t signal_mapped_size; 7898c2ecf20Sopenharmony_ci size_t signal_event_count; 7908c2ecf20Sopenharmony_ci bool signal_event_limit_reached; 7918c2ecf20Sopenharmony_ci 7928c2ecf20Sopenharmony_ci /* Information used for memory eviction */ 7938c2ecf20Sopenharmony_ci void *kgd_process_info; 7948c2ecf20Sopenharmony_ci /* Eviction fence that is attached to all the BOs of this process. The 7958c2ecf20Sopenharmony_ci * fence will be triggered during eviction and new one will be created 7968c2ecf20Sopenharmony_ci * during restore 7978c2ecf20Sopenharmony_ci */ 7988c2ecf20Sopenharmony_ci struct dma_fence *ef; 7998c2ecf20Sopenharmony_ci 8008c2ecf20Sopenharmony_ci /* Work items for evicting and restoring BOs */ 8018c2ecf20Sopenharmony_ci struct delayed_work eviction_work; 8028c2ecf20Sopenharmony_ci struct delayed_work restore_work; 8038c2ecf20Sopenharmony_ci /* seqno of the last scheduled eviction */ 8048c2ecf20Sopenharmony_ci unsigned int last_eviction_seqno; 8058c2ecf20Sopenharmony_ci /* Approx. the last timestamp (in jiffies) when the process was 8068c2ecf20Sopenharmony_ci * restored after an eviction 8078c2ecf20Sopenharmony_ci */ 8088c2ecf20Sopenharmony_ci unsigned long last_restore_timestamp; 8098c2ecf20Sopenharmony_ci 8108c2ecf20Sopenharmony_ci /* Kobj for our procfs */ 8118c2ecf20Sopenharmony_ci struct kobject *kobj; 8128c2ecf20Sopenharmony_ci struct kobject *kobj_queues; 8138c2ecf20Sopenharmony_ci struct attribute attr_pasid; 8148c2ecf20Sopenharmony_ci}; 8158c2ecf20Sopenharmony_ci 8168c2ecf20Sopenharmony_ci#define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */ 8178c2ecf20Sopenharmony_ciextern DECLARE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE); 8188c2ecf20Sopenharmony_ciextern struct srcu_struct kfd_processes_srcu; 8198c2ecf20Sopenharmony_ci 8208c2ecf20Sopenharmony_ci/** 8218c2ecf20Sopenharmony_ci * typedef amdkfd_ioctl_t - typedef for ioctl function pointer. 8228c2ecf20Sopenharmony_ci * 8238c2ecf20Sopenharmony_ci * @filep: pointer to file structure. 8248c2ecf20Sopenharmony_ci * @p: amdkfd process pointer. 8258c2ecf20Sopenharmony_ci * @data: pointer to arg that was copied from user. 8268c2ecf20Sopenharmony_ci * 8278c2ecf20Sopenharmony_ci * Return: returns ioctl completion code. 8288c2ecf20Sopenharmony_ci */ 8298c2ecf20Sopenharmony_citypedef int amdkfd_ioctl_t(struct file *filep, struct kfd_process *p, 8308c2ecf20Sopenharmony_ci void *data); 8318c2ecf20Sopenharmony_ci 8328c2ecf20Sopenharmony_cistruct amdkfd_ioctl_desc { 8338c2ecf20Sopenharmony_ci unsigned int cmd; 8348c2ecf20Sopenharmony_ci int flags; 8358c2ecf20Sopenharmony_ci amdkfd_ioctl_t *func; 8368c2ecf20Sopenharmony_ci unsigned int cmd_drv; 8378c2ecf20Sopenharmony_ci const char *name; 8388c2ecf20Sopenharmony_ci}; 8398c2ecf20Sopenharmony_cibool kfd_dev_is_large_bar(struct kfd_dev *dev); 8408c2ecf20Sopenharmony_ci 8418c2ecf20Sopenharmony_ciint kfd_process_create_wq(void); 8428c2ecf20Sopenharmony_civoid kfd_process_destroy_wq(void); 8438c2ecf20Sopenharmony_cistruct kfd_process *kfd_create_process(struct file *filep); 8448c2ecf20Sopenharmony_cistruct kfd_process *kfd_get_process(const struct task_struct *); 8458c2ecf20Sopenharmony_cistruct kfd_process *kfd_lookup_process_by_pasid(u32 pasid); 8468c2ecf20Sopenharmony_cistruct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm); 8478c2ecf20Sopenharmony_civoid kfd_unref_process(struct kfd_process *p); 8488c2ecf20Sopenharmony_ciint kfd_process_evict_queues(struct kfd_process *p); 8498c2ecf20Sopenharmony_ciint kfd_process_restore_queues(struct kfd_process *p); 8508c2ecf20Sopenharmony_civoid kfd_suspend_all_processes(void); 8518c2ecf20Sopenharmony_ciint kfd_resume_all_processes(void); 8528c2ecf20Sopenharmony_ci 8538c2ecf20Sopenharmony_ciint kfd_process_device_init_vm(struct kfd_process_device *pdd, 8548c2ecf20Sopenharmony_ci struct file *drm_file); 8558c2ecf20Sopenharmony_cistruct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, 8568c2ecf20Sopenharmony_ci struct kfd_process *p); 8578c2ecf20Sopenharmony_cistruct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, 8588c2ecf20Sopenharmony_ci struct kfd_process *p); 8598c2ecf20Sopenharmony_cistruct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, 8608c2ecf20Sopenharmony_ci struct kfd_process *p); 8618c2ecf20Sopenharmony_ci 8628c2ecf20Sopenharmony_ciint kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process, 8638c2ecf20Sopenharmony_ci struct vm_area_struct *vma); 8648c2ecf20Sopenharmony_ci 8658c2ecf20Sopenharmony_ci/* KFD process API for creating and translating handles */ 8668c2ecf20Sopenharmony_ciint kfd_process_device_create_obj_handle(struct kfd_process_device *pdd, 8678c2ecf20Sopenharmony_ci void *mem); 8688c2ecf20Sopenharmony_civoid *kfd_process_device_translate_handle(struct kfd_process_device *p, 8698c2ecf20Sopenharmony_ci int handle); 8708c2ecf20Sopenharmony_civoid kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd, 8718c2ecf20Sopenharmony_ci int handle); 8728c2ecf20Sopenharmony_ci 8738c2ecf20Sopenharmony_ci/* Process device data iterator */ 8748c2ecf20Sopenharmony_cistruct kfd_process_device *kfd_get_first_process_device_data( 8758c2ecf20Sopenharmony_ci struct kfd_process *p); 8768c2ecf20Sopenharmony_cistruct kfd_process_device *kfd_get_next_process_device_data( 8778c2ecf20Sopenharmony_ci struct kfd_process *p, 8788c2ecf20Sopenharmony_ci struct kfd_process_device *pdd); 8798c2ecf20Sopenharmony_cibool kfd_has_process_device_data(struct kfd_process *p); 8808c2ecf20Sopenharmony_ci 8818c2ecf20Sopenharmony_ci/* PASIDs */ 8828c2ecf20Sopenharmony_ciint kfd_pasid_init(void); 8838c2ecf20Sopenharmony_civoid kfd_pasid_exit(void); 8848c2ecf20Sopenharmony_cibool kfd_set_pasid_limit(unsigned int new_limit); 8858c2ecf20Sopenharmony_ciunsigned int kfd_get_pasid_limit(void); 8868c2ecf20Sopenharmony_ciu32 kfd_pasid_alloc(void); 8878c2ecf20Sopenharmony_civoid kfd_pasid_free(u32 pasid); 8888c2ecf20Sopenharmony_ci 8898c2ecf20Sopenharmony_ci/* Doorbells */ 8908c2ecf20Sopenharmony_cisize_t kfd_doorbell_process_slice(struct kfd_dev *kfd); 8918c2ecf20Sopenharmony_ciint kfd_doorbell_init(struct kfd_dev *kfd); 8928c2ecf20Sopenharmony_civoid kfd_doorbell_fini(struct kfd_dev *kfd); 8938c2ecf20Sopenharmony_ciint kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process, 8948c2ecf20Sopenharmony_ci struct vm_area_struct *vma); 8958c2ecf20Sopenharmony_civoid __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, 8968c2ecf20Sopenharmony_ci unsigned int *doorbell_off); 8978c2ecf20Sopenharmony_civoid kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr); 8988c2ecf20Sopenharmony_ciu32 read_kernel_doorbell(u32 __iomem *db); 8998c2ecf20Sopenharmony_civoid write_kernel_doorbell(void __iomem *db, u32 value); 9008c2ecf20Sopenharmony_civoid write_kernel_doorbell64(void __iomem *db, u64 value); 9018c2ecf20Sopenharmony_ciunsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd, 9028c2ecf20Sopenharmony_ci struct kfd_process_device *pdd, 9038c2ecf20Sopenharmony_ci unsigned int doorbell_id); 9048c2ecf20Sopenharmony_ciphys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd); 9058c2ecf20Sopenharmony_ciint kfd_alloc_process_doorbells(struct kfd_dev *kfd, 9068c2ecf20Sopenharmony_ci unsigned int *doorbell_index); 9078c2ecf20Sopenharmony_civoid kfd_free_process_doorbells(struct kfd_dev *kfd, 9088c2ecf20Sopenharmony_ci unsigned int doorbell_index); 9098c2ecf20Sopenharmony_ci/* GTT Sub-Allocator */ 9108c2ecf20Sopenharmony_ci 9118c2ecf20Sopenharmony_ciint kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size, 9128c2ecf20Sopenharmony_ci struct kfd_mem_obj **mem_obj); 9138c2ecf20Sopenharmony_ci 9148c2ecf20Sopenharmony_ciint kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj); 9158c2ecf20Sopenharmony_ci 9168c2ecf20Sopenharmony_ciextern struct device *kfd_device; 9178c2ecf20Sopenharmony_ci 9188c2ecf20Sopenharmony_ci/* KFD's procfs */ 9198c2ecf20Sopenharmony_civoid kfd_procfs_init(void); 9208c2ecf20Sopenharmony_civoid kfd_procfs_shutdown(void); 9218c2ecf20Sopenharmony_ciint kfd_procfs_add_queue(struct queue *q); 9228c2ecf20Sopenharmony_civoid kfd_procfs_del_queue(struct queue *q); 9238c2ecf20Sopenharmony_ci 9248c2ecf20Sopenharmony_ci/* Topology */ 9258c2ecf20Sopenharmony_ciint kfd_topology_init(void); 9268c2ecf20Sopenharmony_civoid kfd_topology_shutdown(void); 9278c2ecf20Sopenharmony_ciint kfd_topology_add_device(struct kfd_dev *gpu); 9288c2ecf20Sopenharmony_ciint kfd_topology_remove_device(struct kfd_dev *gpu); 9298c2ecf20Sopenharmony_cistruct kfd_topology_device *kfd_topology_device_by_proximity_domain( 9308c2ecf20Sopenharmony_ci uint32_t proximity_domain); 9318c2ecf20Sopenharmony_cistruct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id); 9328c2ecf20Sopenharmony_cistruct kfd_dev *kfd_device_by_id(uint32_t gpu_id); 9338c2ecf20Sopenharmony_cistruct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev); 9348c2ecf20Sopenharmony_cistruct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd); 9358c2ecf20Sopenharmony_ciint kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev); 9368c2ecf20Sopenharmony_ciint kfd_numa_node_to_apic_id(int numa_node_id); 9378c2ecf20Sopenharmony_civoid kfd_double_confirm_iommu_support(struct kfd_dev *gpu); 9388c2ecf20Sopenharmony_ci 9398c2ecf20Sopenharmony_ci/* Interrupts */ 9408c2ecf20Sopenharmony_ciint kfd_interrupt_init(struct kfd_dev *dev); 9418c2ecf20Sopenharmony_civoid kfd_interrupt_exit(struct kfd_dev *dev); 9428c2ecf20Sopenharmony_cibool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry); 9438c2ecf20Sopenharmony_cibool interrupt_is_wanted(struct kfd_dev *dev, 9448c2ecf20Sopenharmony_ci const uint32_t *ih_ring_entry, 9458c2ecf20Sopenharmony_ci uint32_t *patched_ihre, bool *flag); 9468c2ecf20Sopenharmony_ci 9478c2ecf20Sopenharmony_ci/* amdkfd Apertures */ 9488c2ecf20Sopenharmony_ciint kfd_init_apertures(struct kfd_process *process); 9498c2ecf20Sopenharmony_ci 9508c2ecf20Sopenharmony_ci/* Queue Context Management */ 9518c2ecf20Sopenharmony_ciint init_queue(struct queue **q, const struct queue_properties *properties); 9528c2ecf20Sopenharmony_civoid uninit_queue(struct queue *q); 9538c2ecf20Sopenharmony_civoid print_queue_properties(struct queue_properties *q); 9548c2ecf20Sopenharmony_civoid print_queue(struct queue *q); 9558c2ecf20Sopenharmony_ci 9568c2ecf20Sopenharmony_cistruct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, 9578c2ecf20Sopenharmony_ci struct kfd_dev *dev); 9588c2ecf20Sopenharmony_cistruct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type, 9598c2ecf20Sopenharmony_ci struct kfd_dev *dev); 9608c2ecf20Sopenharmony_cistruct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, 9618c2ecf20Sopenharmony_ci struct kfd_dev *dev); 9628c2ecf20Sopenharmony_cistruct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type, 9638c2ecf20Sopenharmony_ci struct kfd_dev *dev); 9648c2ecf20Sopenharmony_cistruct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, 9658c2ecf20Sopenharmony_ci struct kfd_dev *dev); 9668c2ecf20Sopenharmony_cistruct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type, 9678c2ecf20Sopenharmony_ci struct kfd_dev *dev); 9688c2ecf20Sopenharmony_cistruct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev); 9698c2ecf20Sopenharmony_civoid device_queue_manager_uninit(struct device_queue_manager *dqm); 9708c2ecf20Sopenharmony_cistruct kernel_queue *kernel_queue_init(struct kfd_dev *dev, 9718c2ecf20Sopenharmony_ci enum kfd_queue_type type); 9728c2ecf20Sopenharmony_civoid kernel_queue_uninit(struct kernel_queue *kq, bool hanging); 9738c2ecf20Sopenharmony_ciint kfd_process_vm_fault(struct device_queue_manager *dqm, u32 pasid); 9748c2ecf20Sopenharmony_ci 9758c2ecf20Sopenharmony_ci/* Process Queue Manager */ 9768c2ecf20Sopenharmony_cistruct process_queue_node { 9778c2ecf20Sopenharmony_ci struct queue *q; 9788c2ecf20Sopenharmony_ci struct kernel_queue *kq; 9798c2ecf20Sopenharmony_ci struct list_head process_queue_list; 9808c2ecf20Sopenharmony_ci}; 9818c2ecf20Sopenharmony_ci 9828c2ecf20Sopenharmony_civoid kfd_process_dequeue_from_device(struct kfd_process_device *pdd); 9838c2ecf20Sopenharmony_civoid kfd_process_dequeue_from_all_devices(struct kfd_process *p); 9848c2ecf20Sopenharmony_ciint pqm_init(struct process_queue_manager *pqm, struct kfd_process *p); 9858c2ecf20Sopenharmony_civoid pqm_uninit(struct process_queue_manager *pqm); 9868c2ecf20Sopenharmony_ciint pqm_create_queue(struct process_queue_manager *pqm, 9878c2ecf20Sopenharmony_ci struct kfd_dev *dev, 9888c2ecf20Sopenharmony_ci struct file *f, 9898c2ecf20Sopenharmony_ci struct queue_properties *properties, 9908c2ecf20Sopenharmony_ci unsigned int *qid, 9918c2ecf20Sopenharmony_ci uint32_t *p_doorbell_offset_in_process); 9928c2ecf20Sopenharmony_ciint pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid); 9938c2ecf20Sopenharmony_ciint pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid, 9948c2ecf20Sopenharmony_ci struct queue_properties *p); 9958c2ecf20Sopenharmony_ciint pqm_set_cu_mask(struct process_queue_manager *pqm, unsigned int qid, 9968c2ecf20Sopenharmony_ci struct queue_properties *p); 9978c2ecf20Sopenharmony_ciint pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid, 9988c2ecf20Sopenharmony_ci void *gws); 9998c2ecf20Sopenharmony_cistruct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm, 10008c2ecf20Sopenharmony_ci unsigned int qid); 10018c2ecf20Sopenharmony_cistruct queue *pqm_get_user_queue(struct process_queue_manager *pqm, 10028c2ecf20Sopenharmony_ci unsigned int qid); 10038c2ecf20Sopenharmony_ciint pqm_get_wave_state(struct process_queue_manager *pqm, 10048c2ecf20Sopenharmony_ci unsigned int qid, 10058c2ecf20Sopenharmony_ci void __user *ctl_stack, 10068c2ecf20Sopenharmony_ci u32 *ctl_stack_used_size, 10078c2ecf20Sopenharmony_ci u32 *save_area_used_size); 10088c2ecf20Sopenharmony_ci 10098c2ecf20Sopenharmony_ciint amdkfd_fence_wait_timeout(uint64_t *fence_addr, 10108c2ecf20Sopenharmony_ci uint64_t fence_value, 10118c2ecf20Sopenharmony_ci unsigned int timeout_ms); 10128c2ecf20Sopenharmony_ci 10138c2ecf20Sopenharmony_ci/* Packet Manager */ 10148c2ecf20Sopenharmony_ci 10158c2ecf20Sopenharmony_ci#define KFD_FENCE_COMPLETED (100) 10168c2ecf20Sopenharmony_ci#define KFD_FENCE_INIT (10) 10178c2ecf20Sopenharmony_ci 10188c2ecf20Sopenharmony_cistruct packet_manager { 10198c2ecf20Sopenharmony_ci struct device_queue_manager *dqm; 10208c2ecf20Sopenharmony_ci struct kernel_queue *priv_queue; 10218c2ecf20Sopenharmony_ci struct mutex lock; 10228c2ecf20Sopenharmony_ci bool allocated; 10238c2ecf20Sopenharmony_ci struct kfd_mem_obj *ib_buffer_obj; 10248c2ecf20Sopenharmony_ci unsigned int ib_size_bytes; 10258c2ecf20Sopenharmony_ci bool is_over_subscription; 10268c2ecf20Sopenharmony_ci 10278c2ecf20Sopenharmony_ci const struct packet_manager_funcs *pmf; 10288c2ecf20Sopenharmony_ci}; 10298c2ecf20Sopenharmony_ci 10308c2ecf20Sopenharmony_cistruct packet_manager_funcs { 10318c2ecf20Sopenharmony_ci /* Support ASIC-specific packet formats for PM4 packets */ 10328c2ecf20Sopenharmony_ci int (*map_process)(struct packet_manager *pm, uint32_t *buffer, 10338c2ecf20Sopenharmony_ci struct qcm_process_device *qpd); 10348c2ecf20Sopenharmony_ci int (*runlist)(struct packet_manager *pm, uint32_t *buffer, 10358c2ecf20Sopenharmony_ci uint64_t ib, size_t ib_size_in_dwords, bool chain); 10368c2ecf20Sopenharmony_ci int (*set_resources)(struct packet_manager *pm, uint32_t *buffer, 10378c2ecf20Sopenharmony_ci struct scheduling_resources *res); 10388c2ecf20Sopenharmony_ci int (*map_queues)(struct packet_manager *pm, uint32_t *buffer, 10398c2ecf20Sopenharmony_ci struct queue *q, bool is_static); 10408c2ecf20Sopenharmony_ci int (*unmap_queues)(struct packet_manager *pm, uint32_t *buffer, 10418c2ecf20Sopenharmony_ci enum kfd_queue_type type, 10428c2ecf20Sopenharmony_ci enum kfd_unmap_queues_filter mode, 10438c2ecf20Sopenharmony_ci uint32_t filter_param, bool reset, 10448c2ecf20Sopenharmony_ci unsigned int sdma_engine); 10458c2ecf20Sopenharmony_ci int (*query_status)(struct packet_manager *pm, uint32_t *buffer, 10468c2ecf20Sopenharmony_ci uint64_t fence_address, uint64_t fence_value); 10478c2ecf20Sopenharmony_ci int (*release_mem)(uint64_t gpu_addr, uint32_t *buffer); 10488c2ecf20Sopenharmony_ci 10498c2ecf20Sopenharmony_ci /* Packet sizes */ 10508c2ecf20Sopenharmony_ci int map_process_size; 10518c2ecf20Sopenharmony_ci int runlist_size; 10528c2ecf20Sopenharmony_ci int set_resources_size; 10538c2ecf20Sopenharmony_ci int map_queues_size; 10548c2ecf20Sopenharmony_ci int unmap_queues_size; 10558c2ecf20Sopenharmony_ci int query_status_size; 10568c2ecf20Sopenharmony_ci int release_mem_size; 10578c2ecf20Sopenharmony_ci}; 10588c2ecf20Sopenharmony_ci 10598c2ecf20Sopenharmony_ciextern const struct packet_manager_funcs kfd_vi_pm_funcs; 10608c2ecf20Sopenharmony_ciextern const struct packet_manager_funcs kfd_v9_pm_funcs; 10618c2ecf20Sopenharmony_ci 10628c2ecf20Sopenharmony_ciint pm_init(struct packet_manager *pm, struct device_queue_manager *dqm); 10638c2ecf20Sopenharmony_civoid pm_uninit(struct packet_manager *pm, bool hanging); 10648c2ecf20Sopenharmony_ciint pm_send_set_resources(struct packet_manager *pm, 10658c2ecf20Sopenharmony_ci struct scheduling_resources *res); 10668c2ecf20Sopenharmony_ciint pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues); 10678c2ecf20Sopenharmony_ciint pm_send_query_status(struct packet_manager *pm, uint64_t fence_address, 10688c2ecf20Sopenharmony_ci uint64_t fence_value); 10698c2ecf20Sopenharmony_ci 10708c2ecf20Sopenharmony_ciint pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, 10718c2ecf20Sopenharmony_ci enum kfd_unmap_queues_filter mode, 10728c2ecf20Sopenharmony_ci uint32_t filter_param, bool reset, 10738c2ecf20Sopenharmony_ci unsigned int sdma_engine); 10748c2ecf20Sopenharmony_ci 10758c2ecf20Sopenharmony_civoid pm_release_ib(struct packet_manager *pm); 10768c2ecf20Sopenharmony_ci 10778c2ecf20Sopenharmony_ci/* Following PM funcs can be shared among VI and AI */ 10788c2ecf20Sopenharmony_ciunsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size); 10798c2ecf20Sopenharmony_ci 10808c2ecf20Sopenharmony_ciuint64_t kfd_get_number_elems(struct kfd_dev *kfd); 10818c2ecf20Sopenharmony_ci 10828c2ecf20Sopenharmony_ci/* Events */ 10838c2ecf20Sopenharmony_ciextern const struct kfd_event_interrupt_class event_interrupt_class_cik; 10848c2ecf20Sopenharmony_ciextern const struct kfd_event_interrupt_class event_interrupt_class_v9; 10858c2ecf20Sopenharmony_ci 10868c2ecf20Sopenharmony_ciextern const struct kfd_device_global_init_class device_global_init_class_cik; 10878c2ecf20Sopenharmony_ci 10888c2ecf20Sopenharmony_civoid kfd_event_init_process(struct kfd_process *p); 10898c2ecf20Sopenharmony_civoid kfd_event_free_process(struct kfd_process *p); 10908c2ecf20Sopenharmony_ciint kfd_event_mmap(struct kfd_process *process, struct vm_area_struct *vma); 10918c2ecf20Sopenharmony_ciint kfd_wait_on_events(struct kfd_process *p, 10928c2ecf20Sopenharmony_ci uint32_t num_events, void __user *data, 10938c2ecf20Sopenharmony_ci bool all, uint32_t user_timeout_ms, 10948c2ecf20Sopenharmony_ci uint32_t *wait_result); 10958c2ecf20Sopenharmony_civoid kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id, 10968c2ecf20Sopenharmony_ci uint32_t valid_id_bits); 10978c2ecf20Sopenharmony_civoid kfd_signal_iommu_event(struct kfd_dev *dev, 10988c2ecf20Sopenharmony_ci u32 pasid, unsigned long address, 10998c2ecf20Sopenharmony_ci bool is_write_requested, bool is_execute_requested); 11008c2ecf20Sopenharmony_civoid kfd_signal_hw_exception_event(u32 pasid); 11018c2ecf20Sopenharmony_ciint kfd_set_event(struct kfd_process *p, uint32_t event_id); 11028c2ecf20Sopenharmony_ciint kfd_reset_event(struct kfd_process *p, uint32_t event_id); 11038c2ecf20Sopenharmony_ciint kfd_event_page_set(struct kfd_process *p, void *kernel_address, 11048c2ecf20Sopenharmony_ci uint64_t size); 11058c2ecf20Sopenharmony_ciint kfd_event_create(struct file *devkfd, struct kfd_process *p, 11068c2ecf20Sopenharmony_ci uint32_t event_type, bool auto_reset, uint32_t node_id, 11078c2ecf20Sopenharmony_ci uint32_t *event_id, uint32_t *event_trigger_data, 11088c2ecf20Sopenharmony_ci uint64_t *event_page_offset, uint32_t *event_slot_index); 11098c2ecf20Sopenharmony_ciint kfd_event_destroy(struct kfd_process *p, uint32_t event_id); 11108c2ecf20Sopenharmony_ci 11118c2ecf20Sopenharmony_civoid kfd_signal_vm_fault_event(struct kfd_dev *dev, u32 pasid, 11128c2ecf20Sopenharmony_ci struct kfd_vm_fault_info *info); 11138c2ecf20Sopenharmony_ci 11148c2ecf20Sopenharmony_civoid kfd_signal_reset_event(struct kfd_dev *dev); 11158c2ecf20Sopenharmony_ci 11168c2ecf20Sopenharmony_civoid kfd_flush_tlb(struct kfd_process_device *pdd); 11178c2ecf20Sopenharmony_ci 11188c2ecf20Sopenharmony_ciint dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p); 11198c2ecf20Sopenharmony_ci 11208c2ecf20Sopenharmony_cibool kfd_is_locked(void); 11218c2ecf20Sopenharmony_ci 11228c2ecf20Sopenharmony_ci/* Compute profile */ 11238c2ecf20Sopenharmony_civoid kfd_inc_compute_active(struct kfd_dev *dev); 11248c2ecf20Sopenharmony_civoid kfd_dec_compute_active(struct kfd_dev *dev); 11258c2ecf20Sopenharmony_ci 11268c2ecf20Sopenharmony_ci/* Cgroup Support */ 11278c2ecf20Sopenharmony_ci/* Check with device cgroup if @kfd device is accessible */ 11288c2ecf20Sopenharmony_cistatic inline int kfd_devcgroup_check_permission(struct kfd_dev *kfd) 11298c2ecf20Sopenharmony_ci{ 11308c2ecf20Sopenharmony_ci#if defined(CONFIG_CGROUP_DEVICE) || defined(CONFIG_CGROUP_BPF) 11318c2ecf20Sopenharmony_ci struct drm_device *ddev = kfd->ddev; 11328c2ecf20Sopenharmony_ci 11338c2ecf20Sopenharmony_ci return devcgroup_check_permission(DEVCG_DEV_CHAR, DRM_MAJOR, 11348c2ecf20Sopenharmony_ci ddev->render->index, 11358c2ecf20Sopenharmony_ci DEVCG_ACC_WRITE | DEVCG_ACC_READ); 11368c2ecf20Sopenharmony_ci#else 11378c2ecf20Sopenharmony_ci return 0; 11388c2ecf20Sopenharmony_ci#endif 11398c2ecf20Sopenharmony_ci} 11408c2ecf20Sopenharmony_ci 11418c2ecf20Sopenharmony_ci/* Debugfs */ 11428c2ecf20Sopenharmony_ci#if defined(CONFIG_DEBUG_FS) 11438c2ecf20Sopenharmony_ci 11448c2ecf20Sopenharmony_civoid kfd_debugfs_init(void); 11458c2ecf20Sopenharmony_civoid kfd_debugfs_fini(void); 11468c2ecf20Sopenharmony_ciint kfd_debugfs_mqds_by_process(struct seq_file *m, void *data); 11478c2ecf20Sopenharmony_ciint pqm_debugfs_mqds(struct seq_file *m, void *data); 11488c2ecf20Sopenharmony_ciint kfd_debugfs_hqds_by_device(struct seq_file *m, void *data); 11498c2ecf20Sopenharmony_ciint dqm_debugfs_hqds(struct seq_file *m, void *data); 11508c2ecf20Sopenharmony_ciint kfd_debugfs_rls_by_device(struct seq_file *m, void *data); 11518c2ecf20Sopenharmony_ciint pm_debugfs_runlist(struct seq_file *m, void *data); 11528c2ecf20Sopenharmony_ci 11538c2ecf20Sopenharmony_ciint kfd_debugfs_hang_hws(struct kfd_dev *dev); 11548c2ecf20Sopenharmony_ciint pm_debugfs_hang_hws(struct packet_manager *pm); 11558c2ecf20Sopenharmony_ciint dqm_debugfs_execute_queues(struct device_queue_manager *dqm); 11568c2ecf20Sopenharmony_ci 11578c2ecf20Sopenharmony_ci#else 11588c2ecf20Sopenharmony_ci 11598c2ecf20Sopenharmony_cistatic inline void kfd_debugfs_init(void) {} 11608c2ecf20Sopenharmony_cistatic inline void kfd_debugfs_fini(void) {} 11618c2ecf20Sopenharmony_ci 11628c2ecf20Sopenharmony_ci#endif 11638c2ecf20Sopenharmony_ci 11648c2ecf20Sopenharmony_ci#endif 1165