18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci 38c2ecf20Sopenharmony_ci/* 48c2ecf20Sopenharmony_ci * Copyright 2016-2019 HabanaLabs, Ltd. 58c2ecf20Sopenharmony_ci * All Rights Reserved. 68c2ecf20Sopenharmony_ci */ 78c2ecf20Sopenharmony_ci 88c2ecf20Sopenharmony_ci#include <uapi/misc/habanalabs.h> 98c2ecf20Sopenharmony_ci#include "habanalabs.h" 108c2ecf20Sopenharmony_ci#include "../include/hw_ip/mmu/mmu_general.h" 118c2ecf20Sopenharmony_ci 128c2ecf20Sopenharmony_ci#include <linux/uaccess.h> 138c2ecf20Sopenharmony_ci#include <linux/slab.h> 148c2ecf20Sopenharmony_ci#include <linux/genalloc.h> 158c2ecf20Sopenharmony_ci 168c2ecf20Sopenharmony_ci#define HL_MMU_DEBUG 0 178c2ecf20Sopenharmony_ci 188c2ecf20Sopenharmony_ci/* 198c2ecf20Sopenharmony_ci * The va ranges in context object contain a list with the available chunks of 208c2ecf20Sopenharmony_ci * device virtual memory. 218c2ecf20Sopenharmony_ci * There is one range for host allocations and one for DRAM allocations. 228c2ecf20Sopenharmony_ci * 238c2ecf20Sopenharmony_ci * On initialization each range contains one chunk of all of its available 248c2ecf20Sopenharmony_ci * virtual range which is a half of the total device virtual range. 258c2ecf20Sopenharmony_ci * 268c2ecf20Sopenharmony_ci * On each mapping of physical pages, a suitable virtual range chunk (with a 278c2ecf20Sopenharmony_ci * minimum size) is selected from the list. If the chunk size equals the 288c2ecf20Sopenharmony_ci * requested size, the chunk is returned. Otherwise, the chunk is split into 298c2ecf20Sopenharmony_ci * two chunks - one to return as result and a remainder to stay in the list. 308c2ecf20Sopenharmony_ci * 318c2ecf20Sopenharmony_ci * On each Unmapping of a virtual address, the relevant virtual chunk is 328c2ecf20Sopenharmony_ci * returned to the list. The chunk is added to the list and if its edges match 338c2ecf20Sopenharmony_ci * the edges of the adjacent chunks (means a contiguous chunk can be created), 348c2ecf20Sopenharmony_ci * the chunks are merged. 358c2ecf20Sopenharmony_ci * 368c2ecf20Sopenharmony_ci * On finish, the list is checked to have only one chunk of all the relevant 378c2ecf20Sopenharmony_ci * virtual range (which is a half of the device total virtual range). 388c2ecf20Sopenharmony_ci * If not (means not all mappings were unmapped), a warning is printed. 398c2ecf20Sopenharmony_ci */ 408c2ecf20Sopenharmony_ci 418c2ecf20Sopenharmony_ci/* 428c2ecf20Sopenharmony_ci * alloc_device_memory - allocate device memory 438c2ecf20Sopenharmony_ci * 448c2ecf20Sopenharmony_ci * @ctx : current context 458c2ecf20Sopenharmony_ci * @args : host parameters containing the requested size 468c2ecf20Sopenharmony_ci * @ret_handle : result handle 478c2ecf20Sopenharmony_ci * 488c2ecf20Sopenharmony_ci * This function does the following: 498c2ecf20Sopenharmony_ci * - Allocate the requested size rounded up to 2MB pages 508c2ecf20Sopenharmony_ci * - Return unique handle 518c2ecf20Sopenharmony_ci */ 528c2ecf20Sopenharmony_cistatic int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args, 538c2ecf20Sopenharmony_ci u32 *ret_handle) 548c2ecf20Sopenharmony_ci{ 558c2ecf20Sopenharmony_ci struct hl_device *hdev = ctx->hdev; 568c2ecf20Sopenharmony_ci struct hl_vm *vm = &hdev->vm; 578c2ecf20Sopenharmony_ci struct hl_vm_phys_pg_pack *phys_pg_pack; 588c2ecf20Sopenharmony_ci u64 paddr = 0, total_size, num_pgs, i; 598c2ecf20Sopenharmony_ci u32 num_curr_pgs, page_size, page_shift; 608c2ecf20Sopenharmony_ci int handle, rc; 618c2ecf20Sopenharmony_ci bool contiguous; 628c2ecf20Sopenharmony_ci 638c2ecf20Sopenharmony_ci num_curr_pgs = 0; 648c2ecf20Sopenharmony_ci page_size = hdev->asic_prop.dram_page_size; 658c2ecf20Sopenharmony_ci page_shift = __ffs(page_size); 668c2ecf20Sopenharmony_ci num_pgs = (args->alloc.mem_size + (page_size - 1)) >> page_shift; 678c2ecf20Sopenharmony_ci total_size = num_pgs << page_shift; 688c2ecf20Sopenharmony_ci 698c2ecf20Sopenharmony_ci if (!total_size) { 708c2ecf20Sopenharmony_ci dev_err(hdev->dev, "Cannot allocate 0 bytes\n"); 718c2ecf20Sopenharmony_ci return -EINVAL; 728c2ecf20Sopenharmony_ci } 738c2ecf20Sopenharmony_ci 748c2ecf20Sopenharmony_ci contiguous = args->flags & HL_MEM_CONTIGUOUS; 758c2ecf20Sopenharmony_ci 768c2ecf20Sopenharmony_ci if (contiguous) { 778c2ecf20Sopenharmony_ci paddr = (u64) gen_pool_alloc(vm->dram_pg_pool, total_size); 788c2ecf20Sopenharmony_ci if (!paddr) { 798c2ecf20Sopenharmony_ci dev_err(hdev->dev, 808c2ecf20Sopenharmony_ci "failed to allocate %llu contiguous pages with total size of %llu\n", 818c2ecf20Sopenharmony_ci num_pgs, total_size); 828c2ecf20Sopenharmony_ci return -ENOMEM; 838c2ecf20Sopenharmony_ci } 848c2ecf20Sopenharmony_ci } 858c2ecf20Sopenharmony_ci 868c2ecf20Sopenharmony_ci phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL); 878c2ecf20Sopenharmony_ci if (!phys_pg_pack) { 888c2ecf20Sopenharmony_ci rc = -ENOMEM; 898c2ecf20Sopenharmony_ci goto pages_pack_err; 908c2ecf20Sopenharmony_ci } 918c2ecf20Sopenharmony_ci 928c2ecf20Sopenharmony_ci phys_pg_pack->vm_type = VM_TYPE_PHYS_PACK; 938c2ecf20Sopenharmony_ci phys_pg_pack->asid = ctx->asid; 948c2ecf20Sopenharmony_ci phys_pg_pack->npages = num_pgs; 958c2ecf20Sopenharmony_ci phys_pg_pack->page_size = page_size; 968c2ecf20Sopenharmony_ci phys_pg_pack->total_size = total_size; 978c2ecf20Sopenharmony_ci phys_pg_pack->flags = args->flags; 988c2ecf20Sopenharmony_ci phys_pg_pack->contiguous = contiguous; 998c2ecf20Sopenharmony_ci 1008c2ecf20Sopenharmony_ci phys_pg_pack->pages = kvmalloc_array(num_pgs, sizeof(u64), GFP_KERNEL); 1018c2ecf20Sopenharmony_ci if (ZERO_OR_NULL_PTR(phys_pg_pack->pages)) { 1028c2ecf20Sopenharmony_ci rc = -ENOMEM; 1038c2ecf20Sopenharmony_ci goto pages_arr_err; 1048c2ecf20Sopenharmony_ci } 1058c2ecf20Sopenharmony_ci 1068c2ecf20Sopenharmony_ci if (phys_pg_pack->contiguous) { 1078c2ecf20Sopenharmony_ci for (i = 0 ; i < num_pgs ; i++) 1088c2ecf20Sopenharmony_ci phys_pg_pack->pages[i] = paddr + i * page_size; 1098c2ecf20Sopenharmony_ci } else { 1108c2ecf20Sopenharmony_ci for (i = 0 ; i < num_pgs ; i++) { 1118c2ecf20Sopenharmony_ci phys_pg_pack->pages[i] = (u64) gen_pool_alloc( 1128c2ecf20Sopenharmony_ci vm->dram_pg_pool, 1138c2ecf20Sopenharmony_ci page_size); 1148c2ecf20Sopenharmony_ci if (!phys_pg_pack->pages[i]) { 1158c2ecf20Sopenharmony_ci dev_err(hdev->dev, 1168c2ecf20Sopenharmony_ci "Failed to allocate device memory (out of memory)\n"); 1178c2ecf20Sopenharmony_ci rc = -ENOMEM; 1188c2ecf20Sopenharmony_ci goto page_err; 1198c2ecf20Sopenharmony_ci } 1208c2ecf20Sopenharmony_ci 1218c2ecf20Sopenharmony_ci num_curr_pgs++; 1228c2ecf20Sopenharmony_ci } 1238c2ecf20Sopenharmony_ci } 1248c2ecf20Sopenharmony_ci 1258c2ecf20Sopenharmony_ci spin_lock(&vm->idr_lock); 1268c2ecf20Sopenharmony_ci handle = idr_alloc(&vm->phys_pg_pack_handles, phys_pg_pack, 1, 0, 1278c2ecf20Sopenharmony_ci GFP_ATOMIC); 1288c2ecf20Sopenharmony_ci spin_unlock(&vm->idr_lock); 1298c2ecf20Sopenharmony_ci 1308c2ecf20Sopenharmony_ci if (handle < 0) { 1318c2ecf20Sopenharmony_ci dev_err(hdev->dev, "Failed to get handle for page\n"); 1328c2ecf20Sopenharmony_ci rc = -EFAULT; 1338c2ecf20Sopenharmony_ci goto idr_err; 1348c2ecf20Sopenharmony_ci } 1358c2ecf20Sopenharmony_ci 1368c2ecf20Sopenharmony_ci for (i = 0 ; i < num_pgs ; i++) 1378c2ecf20Sopenharmony_ci kref_get(&vm->dram_pg_pool_refcount); 1388c2ecf20Sopenharmony_ci 1398c2ecf20Sopenharmony_ci phys_pg_pack->handle = handle; 1408c2ecf20Sopenharmony_ci 1418c2ecf20Sopenharmony_ci atomic64_add(phys_pg_pack->total_size, &ctx->dram_phys_mem); 1428c2ecf20Sopenharmony_ci atomic64_add(phys_pg_pack->total_size, &hdev->dram_used_mem); 1438c2ecf20Sopenharmony_ci 1448c2ecf20Sopenharmony_ci *ret_handle = handle; 1458c2ecf20Sopenharmony_ci 1468c2ecf20Sopenharmony_ci return 0; 1478c2ecf20Sopenharmony_ci 1488c2ecf20Sopenharmony_ciidr_err: 1498c2ecf20Sopenharmony_cipage_err: 1508c2ecf20Sopenharmony_ci if (!phys_pg_pack->contiguous) 1518c2ecf20Sopenharmony_ci for (i = 0 ; i < num_curr_pgs ; i++) 1528c2ecf20Sopenharmony_ci gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[i], 1538c2ecf20Sopenharmony_ci page_size); 1548c2ecf20Sopenharmony_ci 1558c2ecf20Sopenharmony_ci kvfree(phys_pg_pack->pages); 1568c2ecf20Sopenharmony_cipages_arr_err: 1578c2ecf20Sopenharmony_ci kfree(phys_pg_pack); 1588c2ecf20Sopenharmony_cipages_pack_err: 1598c2ecf20Sopenharmony_ci if (contiguous) 1608c2ecf20Sopenharmony_ci gen_pool_free(vm->dram_pg_pool, paddr, total_size); 1618c2ecf20Sopenharmony_ci 1628c2ecf20Sopenharmony_ci return rc; 1638c2ecf20Sopenharmony_ci} 1648c2ecf20Sopenharmony_ci 1658c2ecf20Sopenharmony_ci/* 1668c2ecf20Sopenharmony_ci * dma_map_host_va - DMA mapping of the given host virtual address. 1678c2ecf20Sopenharmony_ci * @hdev: habanalabs device structure 1688c2ecf20Sopenharmony_ci * @addr: the host virtual address of the memory area 1698c2ecf20Sopenharmony_ci * @size: the size of the memory area 1708c2ecf20Sopenharmony_ci * @p_userptr: pointer to result userptr structure 1718c2ecf20Sopenharmony_ci * 1728c2ecf20Sopenharmony_ci * This function does the following: 1738c2ecf20Sopenharmony_ci * - Allocate userptr structure 1748c2ecf20Sopenharmony_ci * - Pin the given host memory using the userptr structure 1758c2ecf20Sopenharmony_ci * - Perform DMA mapping to have the DMA addresses of the pages 1768c2ecf20Sopenharmony_ci */ 1778c2ecf20Sopenharmony_cistatic int dma_map_host_va(struct hl_device *hdev, u64 addr, u64 size, 1788c2ecf20Sopenharmony_ci struct hl_userptr **p_userptr) 1798c2ecf20Sopenharmony_ci{ 1808c2ecf20Sopenharmony_ci struct hl_userptr *userptr; 1818c2ecf20Sopenharmony_ci int rc; 1828c2ecf20Sopenharmony_ci 1838c2ecf20Sopenharmony_ci userptr = kzalloc(sizeof(*userptr), GFP_KERNEL); 1848c2ecf20Sopenharmony_ci if (!userptr) { 1858c2ecf20Sopenharmony_ci rc = -ENOMEM; 1868c2ecf20Sopenharmony_ci goto userptr_err; 1878c2ecf20Sopenharmony_ci } 1888c2ecf20Sopenharmony_ci 1898c2ecf20Sopenharmony_ci rc = hl_pin_host_memory(hdev, addr, size, userptr); 1908c2ecf20Sopenharmony_ci if (rc) { 1918c2ecf20Sopenharmony_ci dev_err(hdev->dev, "Failed to pin host memory\n"); 1928c2ecf20Sopenharmony_ci goto pin_err; 1938c2ecf20Sopenharmony_ci } 1948c2ecf20Sopenharmony_ci 1958c2ecf20Sopenharmony_ci rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl, 1968c2ecf20Sopenharmony_ci userptr->sgt->nents, DMA_BIDIRECTIONAL); 1978c2ecf20Sopenharmony_ci if (rc) { 1988c2ecf20Sopenharmony_ci dev_err(hdev->dev, "failed to map sgt with DMA region\n"); 1998c2ecf20Sopenharmony_ci goto dma_map_err; 2008c2ecf20Sopenharmony_ci } 2018c2ecf20Sopenharmony_ci 2028c2ecf20Sopenharmony_ci userptr->dma_mapped = true; 2038c2ecf20Sopenharmony_ci userptr->dir = DMA_BIDIRECTIONAL; 2048c2ecf20Sopenharmony_ci userptr->vm_type = VM_TYPE_USERPTR; 2058c2ecf20Sopenharmony_ci 2068c2ecf20Sopenharmony_ci *p_userptr = userptr; 2078c2ecf20Sopenharmony_ci 2088c2ecf20Sopenharmony_ci return 0; 2098c2ecf20Sopenharmony_ci 2108c2ecf20Sopenharmony_cidma_map_err: 2118c2ecf20Sopenharmony_ci hl_unpin_host_memory(hdev, userptr); 2128c2ecf20Sopenharmony_cipin_err: 2138c2ecf20Sopenharmony_ci kfree(userptr); 2148c2ecf20Sopenharmony_ciuserptr_err: 2158c2ecf20Sopenharmony_ci 2168c2ecf20Sopenharmony_ci return rc; 2178c2ecf20Sopenharmony_ci} 2188c2ecf20Sopenharmony_ci 2198c2ecf20Sopenharmony_ci/* 2208c2ecf20Sopenharmony_ci * dma_unmap_host_va - DMA unmapping of the given host virtual address. 2218c2ecf20Sopenharmony_ci * @hdev: habanalabs device structure 2228c2ecf20Sopenharmony_ci * @userptr: userptr to free 2238c2ecf20Sopenharmony_ci * 2248c2ecf20Sopenharmony_ci * This function does the following: 2258c2ecf20Sopenharmony_ci * - Unpins the physical pages 2268c2ecf20Sopenharmony_ci * - Frees the userptr structure 2278c2ecf20Sopenharmony_ci */ 2288c2ecf20Sopenharmony_cistatic void dma_unmap_host_va(struct hl_device *hdev, 2298c2ecf20Sopenharmony_ci struct hl_userptr *userptr) 2308c2ecf20Sopenharmony_ci{ 2318c2ecf20Sopenharmony_ci hl_unpin_host_memory(hdev, userptr); 2328c2ecf20Sopenharmony_ci kfree(userptr); 2338c2ecf20Sopenharmony_ci} 2348c2ecf20Sopenharmony_ci 2358c2ecf20Sopenharmony_ci/* 2368c2ecf20Sopenharmony_ci * dram_pg_pool_do_release - free DRAM pages pool 2378c2ecf20Sopenharmony_ci * 2388c2ecf20Sopenharmony_ci * @ref : pointer to reference object 2398c2ecf20Sopenharmony_ci * 2408c2ecf20Sopenharmony_ci * This function does the following: 2418c2ecf20Sopenharmony_ci * - Frees the idr structure of physical pages handles 2428c2ecf20Sopenharmony_ci * - Frees the generic pool of DRAM physical pages 2438c2ecf20Sopenharmony_ci */ 2448c2ecf20Sopenharmony_cistatic void dram_pg_pool_do_release(struct kref *ref) 2458c2ecf20Sopenharmony_ci{ 2468c2ecf20Sopenharmony_ci struct hl_vm *vm = container_of(ref, struct hl_vm, 2478c2ecf20Sopenharmony_ci dram_pg_pool_refcount); 2488c2ecf20Sopenharmony_ci 2498c2ecf20Sopenharmony_ci /* 2508c2ecf20Sopenharmony_ci * free the idr here as only here we know for sure that there are no 2518c2ecf20Sopenharmony_ci * allocated physical pages and hence there are no handles in use 2528c2ecf20Sopenharmony_ci */ 2538c2ecf20Sopenharmony_ci idr_destroy(&vm->phys_pg_pack_handles); 2548c2ecf20Sopenharmony_ci gen_pool_destroy(vm->dram_pg_pool); 2558c2ecf20Sopenharmony_ci} 2568c2ecf20Sopenharmony_ci 2578c2ecf20Sopenharmony_ci/* 2588c2ecf20Sopenharmony_ci * free_phys_pg_pack - free physical page pack 2598c2ecf20Sopenharmony_ci * @hdev: habanalabs device structure 2608c2ecf20Sopenharmony_ci * @phys_pg_pack: physical page pack to free 2618c2ecf20Sopenharmony_ci * 2628c2ecf20Sopenharmony_ci * This function does the following: 2638c2ecf20Sopenharmony_ci * - For DRAM memory only, iterate over the pack and free each physical block 2648c2ecf20Sopenharmony_ci * structure by returning it to the general pool 2658c2ecf20Sopenharmony_ci * - Free the hl_vm_phys_pg_pack structure 2668c2ecf20Sopenharmony_ci */ 2678c2ecf20Sopenharmony_cistatic void free_phys_pg_pack(struct hl_device *hdev, 2688c2ecf20Sopenharmony_ci struct hl_vm_phys_pg_pack *phys_pg_pack) 2698c2ecf20Sopenharmony_ci{ 2708c2ecf20Sopenharmony_ci struct hl_vm *vm = &hdev->vm; 2718c2ecf20Sopenharmony_ci u64 i; 2728c2ecf20Sopenharmony_ci 2738c2ecf20Sopenharmony_ci if (!phys_pg_pack->created_from_userptr) { 2748c2ecf20Sopenharmony_ci if (phys_pg_pack->contiguous) { 2758c2ecf20Sopenharmony_ci gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[0], 2768c2ecf20Sopenharmony_ci phys_pg_pack->total_size); 2778c2ecf20Sopenharmony_ci 2788c2ecf20Sopenharmony_ci for (i = 0; i < phys_pg_pack->npages ; i++) 2798c2ecf20Sopenharmony_ci kref_put(&vm->dram_pg_pool_refcount, 2808c2ecf20Sopenharmony_ci dram_pg_pool_do_release); 2818c2ecf20Sopenharmony_ci } else { 2828c2ecf20Sopenharmony_ci for (i = 0 ; i < phys_pg_pack->npages ; i++) { 2838c2ecf20Sopenharmony_ci gen_pool_free(vm->dram_pg_pool, 2848c2ecf20Sopenharmony_ci phys_pg_pack->pages[i], 2858c2ecf20Sopenharmony_ci phys_pg_pack->page_size); 2868c2ecf20Sopenharmony_ci kref_put(&vm->dram_pg_pool_refcount, 2878c2ecf20Sopenharmony_ci dram_pg_pool_do_release); 2888c2ecf20Sopenharmony_ci } 2898c2ecf20Sopenharmony_ci } 2908c2ecf20Sopenharmony_ci } 2918c2ecf20Sopenharmony_ci 2928c2ecf20Sopenharmony_ci kvfree(phys_pg_pack->pages); 2938c2ecf20Sopenharmony_ci kfree(phys_pg_pack); 2948c2ecf20Sopenharmony_ci} 2958c2ecf20Sopenharmony_ci 2968c2ecf20Sopenharmony_ci/* 2978c2ecf20Sopenharmony_ci * free_device_memory - free device memory 2988c2ecf20Sopenharmony_ci * 2998c2ecf20Sopenharmony_ci * @ctx : current context 3008c2ecf20Sopenharmony_ci * @handle : handle of the memory chunk to free 3018c2ecf20Sopenharmony_ci * 3028c2ecf20Sopenharmony_ci * This function does the following: 3038c2ecf20Sopenharmony_ci * - Free the device memory related to the given handle 3048c2ecf20Sopenharmony_ci */ 3058c2ecf20Sopenharmony_cistatic int free_device_memory(struct hl_ctx *ctx, u32 handle) 3068c2ecf20Sopenharmony_ci{ 3078c2ecf20Sopenharmony_ci struct hl_device *hdev = ctx->hdev; 3088c2ecf20Sopenharmony_ci struct hl_vm *vm = &hdev->vm; 3098c2ecf20Sopenharmony_ci struct hl_vm_phys_pg_pack *phys_pg_pack; 3108c2ecf20Sopenharmony_ci 3118c2ecf20Sopenharmony_ci spin_lock(&vm->idr_lock); 3128c2ecf20Sopenharmony_ci phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle); 3138c2ecf20Sopenharmony_ci if (phys_pg_pack) { 3148c2ecf20Sopenharmony_ci if (atomic_read(&phys_pg_pack->mapping_cnt) > 0) { 3158c2ecf20Sopenharmony_ci dev_err(hdev->dev, "handle %u is mapped, cannot free\n", 3168c2ecf20Sopenharmony_ci handle); 3178c2ecf20Sopenharmony_ci spin_unlock(&vm->idr_lock); 3188c2ecf20Sopenharmony_ci return -EINVAL; 3198c2ecf20Sopenharmony_ci } 3208c2ecf20Sopenharmony_ci 3218c2ecf20Sopenharmony_ci /* 3228c2ecf20Sopenharmony_ci * must remove from idr before the freeing of the physical 3238c2ecf20Sopenharmony_ci * pages as the refcount of the pool is also the trigger of the 3248c2ecf20Sopenharmony_ci * idr destroy 3258c2ecf20Sopenharmony_ci */ 3268c2ecf20Sopenharmony_ci idr_remove(&vm->phys_pg_pack_handles, handle); 3278c2ecf20Sopenharmony_ci spin_unlock(&vm->idr_lock); 3288c2ecf20Sopenharmony_ci 3298c2ecf20Sopenharmony_ci atomic64_sub(phys_pg_pack->total_size, &ctx->dram_phys_mem); 3308c2ecf20Sopenharmony_ci atomic64_sub(phys_pg_pack->total_size, &hdev->dram_used_mem); 3318c2ecf20Sopenharmony_ci 3328c2ecf20Sopenharmony_ci free_phys_pg_pack(hdev, phys_pg_pack); 3338c2ecf20Sopenharmony_ci } else { 3348c2ecf20Sopenharmony_ci spin_unlock(&vm->idr_lock); 3358c2ecf20Sopenharmony_ci dev_err(hdev->dev, 3368c2ecf20Sopenharmony_ci "free device memory failed, no match for handle %u\n", 3378c2ecf20Sopenharmony_ci handle); 3388c2ecf20Sopenharmony_ci return -EINVAL; 3398c2ecf20Sopenharmony_ci } 3408c2ecf20Sopenharmony_ci 3418c2ecf20Sopenharmony_ci return 0; 3428c2ecf20Sopenharmony_ci} 3438c2ecf20Sopenharmony_ci 3448c2ecf20Sopenharmony_ci/* 3458c2ecf20Sopenharmony_ci * clear_va_list_locked - free virtual addresses list 3468c2ecf20Sopenharmony_ci * 3478c2ecf20Sopenharmony_ci * @hdev : habanalabs device structure 3488c2ecf20Sopenharmony_ci * @va_list : list of virtual addresses to free 3498c2ecf20Sopenharmony_ci * 3508c2ecf20Sopenharmony_ci * This function does the following: 3518c2ecf20Sopenharmony_ci * - Iterate over the list and free each virtual addresses block 3528c2ecf20Sopenharmony_ci * 3538c2ecf20Sopenharmony_ci * This function should be called only when va_list lock is taken 3548c2ecf20Sopenharmony_ci */ 3558c2ecf20Sopenharmony_cistatic void clear_va_list_locked(struct hl_device *hdev, 3568c2ecf20Sopenharmony_ci struct list_head *va_list) 3578c2ecf20Sopenharmony_ci{ 3588c2ecf20Sopenharmony_ci struct hl_vm_va_block *va_block, *tmp; 3598c2ecf20Sopenharmony_ci 3608c2ecf20Sopenharmony_ci list_for_each_entry_safe(va_block, tmp, va_list, node) { 3618c2ecf20Sopenharmony_ci list_del(&va_block->node); 3628c2ecf20Sopenharmony_ci kfree(va_block); 3638c2ecf20Sopenharmony_ci } 3648c2ecf20Sopenharmony_ci} 3658c2ecf20Sopenharmony_ci 3668c2ecf20Sopenharmony_ci/* 3678c2ecf20Sopenharmony_ci * print_va_list_locked - print virtual addresses list 3688c2ecf20Sopenharmony_ci * 3698c2ecf20Sopenharmony_ci * @hdev : habanalabs device structure 3708c2ecf20Sopenharmony_ci * @va_list : list of virtual addresses to print 3718c2ecf20Sopenharmony_ci * 3728c2ecf20Sopenharmony_ci * This function does the following: 3738c2ecf20Sopenharmony_ci * - Iterate over the list and print each virtual addresses block 3748c2ecf20Sopenharmony_ci * 3758c2ecf20Sopenharmony_ci * This function should be called only when va_list lock is taken 3768c2ecf20Sopenharmony_ci */ 3778c2ecf20Sopenharmony_cistatic void print_va_list_locked(struct hl_device *hdev, 3788c2ecf20Sopenharmony_ci struct list_head *va_list) 3798c2ecf20Sopenharmony_ci{ 3808c2ecf20Sopenharmony_ci#if HL_MMU_DEBUG 3818c2ecf20Sopenharmony_ci struct hl_vm_va_block *va_block; 3828c2ecf20Sopenharmony_ci 3838c2ecf20Sopenharmony_ci dev_dbg(hdev->dev, "print va list:\n"); 3848c2ecf20Sopenharmony_ci 3858c2ecf20Sopenharmony_ci list_for_each_entry(va_block, va_list, node) 3868c2ecf20Sopenharmony_ci dev_dbg(hdev->dev, 3878c2ecf20Sopenharmony_ci "va block, start: 0x%llx, end: 0x%llx, size: %llu\n", 3888c2ecf20Sopenharmony_ci va_block->start, va_block->end, va_block->size); 3898c2ecf20Sopenharmony_ci#endif 3908c2ecf20Sopenharmony_ci} 3918c2ecf20Sopenharmony_ci 3928c2ecf20Sopenharmony_ci/* 3938c2ecf20Sopenharmony_ci * merge_va_blocks_locked - merge a virtual block if possible 3948c2ecf20Sopenharmony_ci * 3958c2ecf20Sopenharmony_ci * @hdev : pointer to the habanalabs device structure 3968c2ecf20Sopenharmony_ci * @va_list : pointer to the virtual addresses block list 3978c2ecf20Sopenharmony_ci * @va_block : virtual block to merge with adjacent blocks 3988c2ecf20Sopenharmony_ci * 3998c2ecf20Sopenharmony_ci * This function does the following: 4008c2ecf20Sopenharmony_ci * - Merge the given blocks with the adjacent blocks if their virtual ranges 4018c2ecf20Sopenharmony_ci * create a contiguous virtual range 4028c2ecf20Sopenharmony_ci * 4038c2ecf20Sopenharmony_ci * This Function should be called only when va_list lock is taken 4048c2ecf20Sopenharmony_ci */ 4058c2ecf20Sopenharmony_cistatic void merge_va_blocks_locked(struct hl_device *hdev, 4068c2ecf20Sopenharmony_ci struct list_head *va_list, struct hl_vm_va_block *va_block) 4078c2ecf20Sopenharmony_ci{ 4088c2ecf20Sopenharmony_ci struct hl_vm_va_block *prev, *next; 4098c2ecf20Sopenharmony_ci 4108c2ecf20Sopenharmony_ci prev = list_prev_entry(va_block, node); 4118c2ecf20Sopenharmony_ci if (&prev->node != va_list && prev->end + 1 == va_block->start) { 4128c2ecf20Sopenharmony_ci prev->end = va_block->end; 4138c2ecf20Sopenharmony_ci prev->size = prev->end - prev->start; 4148c2ecf20Sopenharmony_ci list_del(&va_block->node); 4158c2ecf20Sopenharmony_ci kfree(va_block); 4168c2ecf20Sopenharmony_ci va_block = prev; 4178c2ecf20Sopenharmony_ci } 4188c2ecf20Sopenharmony_ci 4198c2ecf20Sopenharmony_ci next = list_next_entry(va_block, node); 4208c2ecf20Sopenharmony_ci if (&next->node != va_list && va_block->end + 1 == next->start) { 4218c2ecf20Sopenharmony_ci next->start = va_block->start; 4228c2ecf20Sopenharmony_ci next->size = next->end - next->start; 4238c2ecf20Sopenharmony_ci list_del(&va_block->node); 4248c2ecf20Sopenharmony_ci kfree(va_block); 4258c2ecf20Sopenharmony_ci } 4268c2ecf20Sopenharmony_ci} 4278c2ecf20Sopenharmony_ci 4288c2ecf20Sopenharmony_ci/* 4298c2ecf20Sopenharmony_ci * add_va_block_locked - add a virtual block to the virtual addresses list 4308c2ecf20Sopenharmony_ci * 4318c2ecf20Sopenharmony_ci * @hdev : pointer to the habanalabs device structure 4328c2ecf20Sopenharmony_ci * @va_list : pointer to the virtual addresses block list 4338c2ecf20Sopenharmony_ci * @start : start virtual address 4348c2ecf20Sopenharmony_ci * @end : end virtual address 4358c2ecf20Sopenharmony_ci * 4368c2ecf20Sopenharmony_ci * This function does the following: 4378c2ecf20Sopenharmony_ci * - Add the given block to the virtual blocks list and merge with other 4388c2ecf20Sopenharmony_ci * blocks if a contiguous virtual block can be created 4398c2ecf20Sopenharmony_ci * 4408c2ecf20Sopenharmony_ci * This Function should be called only when va_list lock is taken 4418c2ecf20Sopenharmony_ci */ 4428c2ecf20Sopenharmony_cistatic int add_va_block_locked(struct hl_device *hdev, 4438c2ecf20Sopenharmony_ci struct list_head *va_list, u64 start, u64 end) 4448c2ecf20Sopenharmony_ci{ 4458c2ecf20Sopenharmony_ci struct hl_vm_va_block *va_block, *res = NULL; 4468c2ecf20Sopenharmony_ci u64 size = end - start; 4478c2ecf20Sopenharmony_ci 4488c2ecf20Sopenharmony_ci print_va_list_locked(hdev, va_list); 4498c2ecf20Sopenharmony_ci 4508c2ecf20Sopenharmony_ci list_for_each_entry(va_block, va_list, node) { 4518c2ecf20Sopenharmony_ci /* TODO: remove upon matureness */ 4528c2ecf20Sopenharmony_ci if (hl_mem_area_crosses_range(start, size, va_block->start, 4538c2ecf20Sopenharmony_ci va_block->end)) { 4548c2ecf20Sopenharmony_ci dev_err(hdev->dev, 4558c2ecf20Sopenharmony_ci "block crossing ranges at start 0x%llx, end 0x%llx\n", 4568c2ecf20Sopenharmony_ci va_block->start, va_block->end); 4578c2ecf20Sopenharmony_ci return -EINVAL; 4588c2ecf20Sopenharmony_ci } 4598c2ecf20Sopenharmony_ci 4608c2ecf20Sopenharmony_ci if (va_block->end < start) 4618c2ecf20Sopenharmony_ci res = va_block; 4628c2ecf20Sopenharmony_ci } 4638c2ecf20Sopenharmony_ci 4648c2ecf20Sopenharmony_ci va_block = kmalloc(sizeof(*va_block), GFP_KERNEL); 4658c2ecf20Sopenharmony_ci if (!va_block) 4668c2ecf20Sopenharmony_ci return -ENOMEM; 4678c2ecf20Sopenharmony_ci 4688c2ecf20Sopenharmony_ci va_block->start = start; 4698c2ecf20Sopenharmony_ci va_block->end = end; 4708c2ecf20Sopenharmony_ci va_block->size = size; 4718c2ecf20Sopenharmony_ci 4728c2ecf20Sopenharmony_ci if (!res) 4738c2ecf20Sopenharmony_ci list_add(&va_block->node, va_list); 4748c2ecf20Sopenharmony_ci else 4758c2ecf20Sopenharmony_ci list_add(&va_block->node, &res->node); 4768c2ecf20Sopenharmony_ci 4778c2ecf20Sopenharmony_ci merge_va_blocks_locked(hdev, va_list, va_block); 4788c2ecf20Sopenharmony_ci 4798c2ecf20Sopenharmony_ci print_va_list_locked(hdev, va_list); 4808c2ecf20Sopenharmony_ci 4818c2ecf20Sopenharmony_ci return 0; 4828c2ecf20Sopenharmony_ci} 4838c2ecf20Sopenharmony_ci 4848c2ecf20Sopenharmony_ci/* 4858c2ecf20Sopenharmony_ci * add_va_block - wrapper for add_va_block_locked 4868c2ecf20Sopenharmony_ci * 4878c2ecf20Sopenharmony_ci * @hdev : pointer to the habanalabs device structure 4888c2ecf20Sopenharmony_ci * @va_list : pointer to the virtual addresses block list 4898c2ecf20Sopenharmony_ci * @start : start virtual address 4908c2ecf20Sopenharmony_ci * @end : end virtual address 4918c2ecf20Sopenharmony_ci * 4928c2ecf20Sopenharmony_ci * This function does the following: 4938c2ecf20Sopenharmony_ci * - Takes the list lock and calls add_va_block_locked 4948c2ecf20Sopenharmony_ci */ 4958c2ecf20Sopenharmony_cistatic inline int add_va_block(struct hl_device *hdev, 4968c2ecf20Sopenharmony_ci struct hl_va_range *va_range, u64 start, u64 end) 4978c2ecf20Sopenharmony_ci{ 4988c2ecf20Sopenharmony_ci int rc; 4998c2ecf20Sopenharmony_ci 5008c2ecf20Sopenharmony_ci mutex_lock(&va_range->lock); 5018c2ecf20Sopenharmony_ci rc = add_va_block_locked(hdev, &va_range->list, start, end); 5028c2ecf20Sopenharmony_ci mutex_unlock(&va_range->lock); 5038c2ecf20Sopenharmony_ci 5048c2ecf20Sopenharmony_ci return rc; 5058c2ecf20Sopenharmony_ci} 5068c2ecf20Sopenharmony_ci 5078c2ecf20Sopenharmony_ci/* 5088c2ecf20Sopenharmony_ci * get_va_block() - get a virtual block for the given size and alignment. 5098c2ecf20Sopenharmony_ci * @hdev: pointer to the habanalabs device structure. 5108c2ecf20Sopenharmony_ci * @va_range: pointer to the virtual addresses range. 5118c2ecf20Sopenharmony_ci * @size: requested block size. 5128c2ecf20Sopenharmony_ci * @hint_addr: hint for requested address by the user. 5138c2ecf20Sopenharmony_ci * @va_block_align: required alignment of the virtual block start address. 5148c2ecf20Sopenharmony_ci * 5158c2ecf20Sopenharmony_ci * This function does the following: 5168c2ecf20Sopenharmony_ci * - Iterate on the virtual block list to find a suitable virtual block for the 5178c2ecf20Sopenharmony_ci * given size and alignment. 5188c2ecf20Sopenharmony_ci * - Reserve the requested block and update the list. 5198c2ecf20Sopenharmony_ci * - Return the start address of the virtual block. 5208c2ecf20Sopenharmony_ci */ 5218c2ecf20Sopenharmony_cistatic u64 get_va_block(struct hl_device *hdev, struct hl_va_range *va_range, 5228c2ecf20Sopenharmony_ci u64 size, u64 hint_addr, u32 va_block_align) 5238c2ecf20Sopenharmony_ci{ 5248c2ecf20Sopenharmony_ci struct hl_vm_va_block *va_block, *new_va_block = NULL; 5258c2ecf20Sopenharmony_ci u64 valid_start, valid_size, prev_start, prev_end, align_mask, 5268c2ecf20Sopenharmony_ci res_valid_start = 0, res_valid_size = 0; 5278c2ecf20Sopenharmony_ci bool add_prev = false; 5288c2ecf20Sopenharmony_ci 5298c2ecf20Sopenharmony_ci align_mask = ~((u64)va_block_align - 1); 5308c2ecf20Sopenharmony_ci 5318c2ecf20Sopenharmony_ci /* check if hint_addr is aligned */ 5328c2ecf20Sopenharmony_ci if (hint_addr & (va_block_align - 1)) 5338c2ecf20Sopenharmony_ci hint_addr = 0; 5348c2ecf20Sopenharmony_ci 5358c2ecf20Sopenharmony_ci mutex_lock(&va_range->lock); 5368c2ecf20Sopenharmony_ci 5378c2ecf20Sopenharmony_ci print_va_list_locked(hdev, &va_range->list); 5388c2ecf20Sopenharmony_ci 5398c2ecf20Sopenharmony_ci list_for_each_entry(va_block, &va_range->list, node) { 5408c2ecf20Sopenharmony_ci /* calc the first possible aligned addr */ 5418c2ecf20Sopenharmony_ci valid_start = va_block->start; 5428c2ecf20Sopenharmony_ci 5438c2ecf20Sopenharmony_ci if (valid_start & (va_block_align - 1)) { 5448c2ecf20Sopenharmony_ci valid_start &= align_mask; 5458c2ecf20Sopenharmony_ci valid_start += va_block_align; 5468c2ecf20Sopenharmony_ci if (valid_start > va_block->end) 5478c2ecf20Sopenharmony_ci continue; 5488c2ecf20Sopenharmony_ci } 5498c2ecf20Sopenharmony_ci 5508c2ecf20Sopenharmony_ci valid_size = va_block->end - valid_start; 5518c2ecf20Sopenharmony_ci 5528c2ecf20Sopenharmony_ci if (valid_size >= size && 5538c2ecf20Sopenharmony_ci (!new_va_block || valid_size < res_valid_size)) { 5548c2ecf20Sopenharmony_ci new_va_block = va_block; 5558c2ecf20Sopenharmony_ci res_valid_start = valid_start; 5568c2ecf20Sopenharmony_ci res_valid_size = valid_size; 5578c2ecf20Sopenharmony_ci } 5588c2ecf20Sopenharmony_ci 5598c2ecf20Sopenharmony_ci if (hint_addr && hint_addr >= valid_start && 5608c2ecf20Sopenharmony_ci ((hint_addr + size) <= va_block->end)) { 5618c2ecf20Sopenharmony_ci new_va_block = va_block; 5628c2ecf20Sopenharmony_ci res_valid_start = hint_addr; 5638c2ecf20Sopenharmony_ci res_valid_size = valid_size; 5648c2ecf20Sopenharmony_ci break; 5658c2ecf20Sopenharmony_ci } 5668c2ecf20Sopenharmony_ci } 5678c2ecf20Sopenharmony_ci 5688c2ecf20Sopenharmony_ci if (!new_va_block) { 5698c2ecf20Sopenharmony_ci dev_err(hdev->dev, "no available va block for size %llu\n", 5708c2ecf20Sopenharmony_ci size); 5718c2ecf20Sopenharmony_ci goto out; 5728c2ecf20Sopenharmony_ci } 5738c2ecf20Sopenharmony_ci 5748c2ecf20Sopenharmony_ci if (res_valid_start > new_va_block->start) { 5758c2ecf20Sopenharmony_ci prev_start = new_va_block->start; 5768c2ecf20Sopenharmony_ci prev_end = res_valid_start - 1; 5778c2ecf20Sopenharmony_ci 5788c2ecf20Sopenharmony_ci new_va_block->start = res_valid_start; 5798c2ecf20Sopenharmony_ci new_va_block->size = res_valid_size; 5808c2ecf20Sopenharmony_ci 5818c2ecf20Sopenharmony_ci add_prev = true; 5828c2ecf20Sopenharmony_ci } 5838c2ecf20Sopenharmony_ci 5848c2ecf20Sopenharmony_ci if (new_va_block->size > size) { 5858c2ecf20Sopenharmony_ci new_va_block->start += size; 5868c2ecf20Sopenharmony_ci new_va_block->size = new_va_block->end - new_va_block->start; 5878c2ecf20Sopenharmony_ci } else { 5888c2ecf20Sopenharmony_ci list_del(&new_va_block->node); 5898c2ecf20Sopenharmony_ci kfree(new_va_block); 5908c2ecf20Sopenharmony_ci } 5918c2ecf20Sopenharmony_ci 5928c2ecf20Sopenharmony_ci if (add_prev) 5938c2ecf20Sopenharmony_ci add_va_block_locked(hdev, &va_range->list, prev_start, 5948c2ecf20Sopenharmony_ci prev_end); 5958c2ecf20Sopenharmony_ci 5968c2ecf20Sopenharmony_ci print_va_list_locked(hdev, &va_range->list); 5978c2ecf20Sopenharmony_ciout: 5988c2ecf20Sopenharmony_ci mutex_unlock(&va_range->lock); 5998c2ecf20Sopenharmony_ci 6008c2ecf20Sopenharmony_ci return res_valid_start; 6018c2ecf20Sopenharmony_ci} 6028c2ecf20Sopenharmony_ci 6038c2ecf20Sopenharmony_ci/* 6048c2ecf20Sopenharmony_ci * get_sg_info - get number of pages and the DMA address from SG list 6058c2ecf20Sopenharmony_ci * 6068c2ecf20Sopenharmony_ci * @sg : the SG list 6078c2ecf20Sopenharmony_ci * @dma_addr : pointer to DMA address to return 6088c2ecf20Sopenharmony_ci * 6098c2ecf20Sopenharmony_ci * Calculate the number of consecutive pages described by the SG list. Take the 6108c2ecf20Sopenharmony_ci * offset of the address in the first page, add to it the length and round it up 6118c2ecf20Sopenharmony_ci * to the number of needed pages. 6128c2ecf20Sopenharmony_ci */ 6138c2ecf20Sopenharmony_cistatic u32 get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr) 6148c2ecf20Sopenharmony_ci{ 6158c2ecf20Sopenharmony_ci *dma_addr = sg_dma_address(sg); 6168c2ecf20Sopenharmony_ci 6178c2ecf20Sopenharmony_ci return ((((*dma_addr) & (PAGE_SIZE - 1)) + sg_dma_len(sg)) + 6188c2ecf20Sopenharmony_ci (PAGE_SIZE - 1)) >> PAGE_SHIFT; 6198c2ecf20Sopenharmony_ci} 6208c2ecf20Sopenharmony_ci 6218c2ecf20Sopenharmony_ci/* 6228c2ecf20Sopenharmony_ci * init_phys_pg_pack_from_userptr - initialize physical page pack from host 6238c2ecf20Sopenharmony_ci * memory 6248c2ecf20Sopenharmony_ci * @ctx: current context 6258c2ecf20Sopenharmony_ci * @userptr: userptr to initialize from 6268c2ecf20Sopenharmony_ci * @pphys_pg_pack: result pointer 6278c2ecf20Sopenharmony_ci * 6288c2ecf20Sopenharmony_ci * This function does the following: 6298c2ecf20Sopenharmony_ci * - Pin the physical pages related to the given virtual block 6308c2ecf20Sopenharmony_ci * - Create a physical page pack from the physical pages related to the given 6318c2ecf20Sopenharmony_ci * virtual block 6328c2ecf20Sopenharmony_ci */ 6338c2ecf20Sopenharmony_cistatic int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx, 6348c2ecf20Sopenharmony_ci struct hl_userptr *userptr, 6358c2ecf20Sopenharmony_ci struct hl_vm_phys_pg_pack **pphys_pg_pack) 6368c2ecf20Sopenharmony_ci{ 6378c2ecf20Sopenharmony_ci struct hl_vm_phys_pg_pack *phys_pg_pack; 6388c2ecf20Sopenharmony_ci struct scatterlist *sg; 6398c2ecf20Sopenharmony_ci dma_addr_t dma_addr; 6408c2ecf20Sopenharmony_ci u64 page_mask, total_npages; 6418c2ecf20Sopenharmony_ci u32 npages, page_size = PAGE_SIZE, 6428c2ecf20Sopenharmony_ci huge_page_size = ctx->hdev->asic_prop.pmmu_huge.page_size; 6438c2ecf20Sopenharmony_ci bool first = true, is_huge_page_opt = true; 6448c2ecf20Sopenharmony_ci int rc, i, j; 6458c2ecf20Sopenharmony_ci u32 pgs_in_huge_page = huge_page_size >> __ffs(page_size); 6468c2ecf20Sopenharmony_ci 6478c2ecf20Sopenharmony_ci phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL); 6488c2ecf20Sopenharmony_ci if (!phys_pg_pack) 6498c2ecf20Sopenharmony_ci return -ENOMEM; 6508c2ecf20Sopenharmony_ci 6518c2ecf20Sopenharmony_ci phys_pg_pack->vm_type = userptr->vm_type; 6528c2ecf20Sopenharmony_ci phys_pg_pack->created_from_userptr = true; 6538c2ecf20Sopenharmony_ci phys_pg_pack->asid = ctx->asid; 6548c2ecf20Sopenharmony_ci atomic_set(&phys_pg_pack->mapping_cnt, 1); 6558c2ecf20Sopenharmony_ci 6568c2ecf20Sopenharmony_ci /* Only if all dma_addrs are aligned to 2MB and their 6578c2ecf20Sopenharmony_ci * sizes is at least 2MB, we can use huge page mapping. 6588c2ecf20Sopenharmony_ci * We limit the 2MB optimization to this condition, 6598c2ecf20Sopenharmony_ci * since later on we acquire the related VA range as one 6608c2ecf20Sopenharmony_ci * consecutive block. 6618c2ecf20Sopenharmony_ci */ 6628c2ecf20Sopenharmony_ci total_npages = 0; 6638c2ecf20Sopenharmony_ci for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) { 6648c2ecf20Sopenharmony_ci npages = get_sg_info(sg, &dma_addr); 6658c2ecf20Sopenharmony_ci 6668c2ecf20Sopenharmony_ci total_npages += npages; 6678c2ecf20Sopenharmony_ci 6688c2ecf20Sopenharmony_ci if ((npages % pgs_in_huge_page) || 6698c2ecf20Sopenharmony_ci (dma_addr & (huge_page_size - 1))) 6708c2ecf20Sopenharmony_ci is_huge_page_opt = false; 6718c2ecf20Sopenharmony_ci } 6728c2ecf20Sopenharmony_ci 6738c2ecf20Sopenharmony_ci if (is_huge_page_opt) { 6748c2ecf20Sopenharmony_ci page_size = huge_page_size; 6758c2ecf20Sopenharmony_ci do_div(total_npages, pgs_in_huge_page); 6768c2ecf20Sopenharmony_ci } 6778c2ecf20Sopenharmony_ci 6788c2ecf20Sopenharmony_ci page_mask = ~(((u64) page_size) - 1); 6798c2ecf20Sopenharmony_ci 6808c2ecf20Sopenharmony_ci phys_pg_pack->pages = kvmalloc_array(total_npages, sizeof(u64), 6818c2ecf20Sopenharmony_ci GFP_KERNEL); 6828c2ecf20Sopenharmony_ci if (ZERO_OR_NULL_PTR(phys_pg_pack->pages)) { 6838c2ecf20Sopenharmony_ci rc = -ENOMEM; 6848c2ecf20Sopenharmony_ci goto page_pack_arr_mem_err; 6858c2ecf20Sopenharmony_ci } 6868c2ecf20Sopenharmony_ci 6878c2ecf20Sopenharmony_ci phys_pg_pack->npages = total_npages; 6888c2ecf20Sopenharmony_ci phys_pg_pack->page_size = page_size; 6898c2ecf20Sopenharmony_ci phys_pg_pack->total_size = total_npages * page_size; 6908c2ecf20Sopenharmony_ci 6918c2ecf20Sopenharmony_ci j = 0; 6928c2ecf20Sopenharmony_ci for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) { 6938c2ecf20Sopenharmony_ci npages = get_sg_info(sg, &dma_addr); 6948c2ecf20Sopenharmony_ci 6958c2ecf20Sopenharmony_ci /* align down to physical page size and save the offset */ 6968c2ecf20Sopenharmony_ci if (first) { 6978c2ecf20Sopenharmony_ci first = false; 6988c2ecf20Sopenharmony_ci phys_pg_pack->offset = dma_addr & (page_size - 1); 6998c2ecf20Sopenharmony_ci dma_addr &= page_mask; 7008c2ecf20Sopenharmony_ci } 7018c2ecf20Sopenharmony_ci 7028c2ecf20Sopenharmony_ci while (npages) { 7038c2ecf20Sopenharmony_ci phys_pg_pack->pages[j++] = dma_addr; 7048c2ecf20Sopenharmony_ci dma_addr += page_size; 7058c2ecf20Sopenharmony_ci 7068c2ecf20Sopenharmony_ci if (is_huge_page_opt) 7078c2ecf20Sopenharmony_ci npages -= pgs_in_huge_page; 7088c2ecf20Sopenharmony_ci else 7098c2ecf20Sopenharmony_ci npages--; 7108c2ecf20Sopenharmony_ci } 7118c2ecf20Sopenharmony_ci } 7128c2ecf20Sopenharmony_ci 7138c2ecf20Sopenharmony_ci *pphys_pg_pack = phys_pg_pack; 7148c2ecf20Sopenharmony_ci 7158c2ecf20Sopenharmony_ci return 0; 7168c2ecf20Sopenharmony_ci 7178c2ecf20Sopenharmony_cipage_pack_arr_mem_err: 7188c2ecf20Sopenharmony_ci kfree(phys_pg_pack); 7198c2ecf20Sopenharmony_ci 7208c2ecf20Sopenharmony_ci return rc; 7218c2ecf20Sopenharmony_ci} 7228c2ecf20Sopenharmony_ci 7238c2ecf20Sopenharmony_ci/* 7248c2ecf20Sopenharmony_ci * map_phys_pg_pack - maps the physical page pack. 7258c2ecf20Sopenharmony_ci * @ctx: current context 7268c2ecf20Sopenharmony_ci * @vaddr: start address of the virtual area to map from 7278c2ecf20Sopenharmony_ci * @phys_pg_pack: the pack of physical pages to map to 7288c2ecf20Sopenharmony_ci * 7298c2ecf20Sopenharmony_ci * This function does the following: 7308c2ecf20Sopenharmony_ci * - Maps each chunk of virtual memory to matching physical chunk 7318c2ecf20Sopenharmony_ci * - Stores number of successful mappings in the given argument 7328c2ecf20Sopenharmony_ci * - Returns 0 on success, error code otherwise 7338c2ecf20Sopenharmony_ci */ 7348c2ecf20Sopenharmony_cistatic int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, 7358c2ecf20Sopenharmony_ci struct hl_vm_phys_pg_pack *phys_pg_pack) 7368c2ecf20Sopenharmony_ci{ 7378c2ecf20Sopenharmony_ci struct hl_device *hdev = ctx->hdev; 7388c2ecf20Sopenharmony_ci u64 next_vaddr = vaddr, paddr, mapped_pg_cnt = 0, i; 7398c2ecf20Sopenharmony_ci u32 page_size = phys_pg_pack->page_size; 7408c2ecf20Sopenharmony_ci int rc = 0; 7418c2ecf20Sopenharmony_ci 7428c2ecf20Sopenharmony_ci for (i = 0 ; i < phys_pg_pack->npages ; i++) { 7438c2ecf20Sopenharmony_ci paddr = phys_pg_pack->pages[i]; 7448c2ecf20Sopenharmony_ci 7458c2ecf20Sopenharmony_ci rc = hl_mmu_map(ctx, next_vaddr, paddr, page_size, 7468c2ecf20Sopenharmony_ci (i + 1) == phys_pg_pack->npages); 7478c2ecf20Sopenharmony_ci if (rc) { 7488c2ecf20Sopenharmony_ci dev_err(hdev->dev, 7498c2ecf20Sopenharmony_ci "map failed for handle %u, npages: %llu, mapped: %llu", 7508c2ecf20Sopenharmony_ci phys_pg_pack->handle, phys_pg_pack->npages, 7518c2ecf20Sopenharmony_ci mapped_pg_cnt); 7528c2ecf20Sopenharmony_ci goto err; 7538c2ecf20Sopenharmony_ci } 7548c2ecf20Sopenharmony_ci 7558c2ecf20Sopenharmony_ci mapped_pg_cnt++; 7568c2ecf20Sopenharmony_ci next_vaddr += page_size; 7578c2ecf20Sopenharmony_ci } 7588c2ecf20Sopenharmony_ci 7598c2ecf20Sopenharmony_ci return 0; 7608c2ecf20Sopenharmony_ci 7618c2ecf20Sopenharmony_cierr: 7628c2ecf20Sopenharmony_ci next_vaddr = vaddr; 7638c2ecf20Sopenharmony_ci for (i = 0 ; i < mapped_pg_cnt ; i++) { 7648c2ecf20Sopenharmony_ci if (hl_mmu_unmap(ctx, next_vaddr, page_size, 7658c2ecf20Sopenharmony_ci (i + 1) == mapped_pg_cnt)) 7668c2ecf20Sopenharmony_ci dev_warn_ratelimited(hdev->dev, 7678c2ecf20Sopenharmony_ci "failed to unmap handle %u, va: 0x%llx, pa: 0x%llx, page size: %u\n", 7688c2ecf20Sopenharmony_ci phys_pg_pack->handle, next_vaddr, 7698c2ecf20Sopenharmony_ci phys_pg_pack->pages[i], page_size); 7708c2ecf20Sopenharmony_ci 7718c2ecf20Sopenharmony_ci next_vaddr += page_size; 7728c2ecf20Sopenharmony_ci } 7738c2ecf20Sopenharmony_ci 7748c2ecf20Sopenharmony_ci return rc; 7758c2ecf20Sopenharmony_ci} 7768c2ecf20Sopenharmony_ci 7778c2ecf20Sopenharmony_ci/* 7788c2ecf20Sopenharmony_ci * unmap_phys_pg_pack - unmaps the physical page pack 7798c2ecf20Sopenharmony_ci * @ctx: current context 7808c2ecf20Sopenharmony_ci * @vaddr: start address of the virtual area to unmap 7818c2ecf20Sopenharmony_ci * @phys_pg_pack: the pack of physical pages to unmap 7828c2ecf20Sopenharmony_ci */ 7838c2ecf20Sopenharmony_cistatic void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, 7848c2ecf20Sopenharmony_ci struct hl_vm_phys_pg_pack *phys_pg_pack) 7858c2ecf20Sopenharmony_ci{ 7868c2ecf20Sopenharmony_ci struct hl_device *hdev = ctx->hdev; 7878c2ecf20Sopenharmony_ci u64 next_vaddr, i; 7888c2ecf20Sopenharmony_ci u32 page_size; 7898c2ecf20Sopenharmony_ci 7908c2ecf20Sopenharmony_ci page_size = phys_pg_pack->page_size; 7918c2ecf20Sopenharmony_ci next_vaddr = vaddr; 7928c2ecf20Sopenharmony_ci 7938c2ecf20Sopenharmony_ci for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) { 7948c2ecf20Sopenharmony_ci if (hl_mmu_unmap(ctx, next_vaddr, page_size, 7958c2ecf20Sopenharmony_ci (i + 1) == phys_pg_pack->npages)) 7968c2ecf20Sopenharmony_ci dev_warn_ratelimited(hdev->dev, 7978c2ecf20Sopenharmony_ci "unmap failed for vaddr: 0x%llx\n", next_vaddr); 7988c2ecf20Sopenharmony_ci 7998c2ecf20Sopenharmony_ci /* 8008c2ecf20Sopenharmony_ci * unmapping on Palladium can be really long, so avoid a CPU 8018c2ecf20Sopenharmony_ci * soft lockup bug by sleeping a little between unmapping pages 8028c2ecf20Sopenharmony_ci */ 8038c2ecf20Sopenharmony_ci if (hdev->pldm) 8048c2ecf20Sopenharmony_ci usleep_range(500, 1000); 8058c2ecf20Sopenharmony_ci } 8068c2ecf20Sopenharmony_ci} 8078c2ecf20Sopenharmony_ci 8088c2ecf20Sopenharmony_cistatic int get_paddr_from_handle(struct hl_ctx *ctx, struct hl_mem_in *args, 8098c2ecf20Sopenharmony_ci u64 *paddr) 8108c2ecf20Sopenharmony_ci{ 8118c2ecf20Sopenharmony_ci struct hl_device *hdev = ctx->hdev; 8128c2ecf20Sopenharmony_ci struct hl_vm *vm = &hdev->vm; 8138c2ecf20Sopenharmony_ci struct hl_vm_phys_pg_pack *phys_pg_pack; 8148c2ecf20Sopenharmony_ci u32 handle; 8158c2ecf20Sopenharmony_ci 8168c2ecf20Sopenharmony_ci handle = lower_32_bits(args->map_device.handle); 8178c2ecf20Sopenharmony_ci spin_lock(&vm->idr_lock); 8188c2ecf20Sopenharmony_ci phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle); 8198c2ecf20Sopenharmony_ci if (!phys_pg_pack) { 8208c2ecf20Sopenharmony_ci spin_unlock(&vm->idr_lock); 8218c2ecf20Sopenharmony_ci dev_err(hdev->dev, "no match for handle %u\n", handle); 8228c2ecf20Sopenharmony_ci return -EINVAL; 8238c2ecf20Sopenharmony_ci } 8248c2ecf20Sopenharmony_ci 8258c2ecf20Sopenharmony_ci *paddr = phys_pg_pack->pages[0]; 8268c2ecf20Sopenharmony_ci 8278c2ecf20Sopenharmony_ci spin_unlock(&vm->idr_lock); 8288c2ecf20Sopenharmony_ci 8298c2ecf20Sopenharmony_ci return 0; 8308c2ecf20Sopenharmony_ci} 8318c2ecf20Sopenharmony_ci 8328c2ecf20Sopenharmony_ci/* 8338c2ecf20Sopenharmony_ci * map_device_va - map the given memory 8348c2ecf20Sopenharmony_ci * 8358c2ecf20Sopenharmony_ci * @ctx : current context 8368c2ecf20Sopenharmony_ci * @args : host parameters with handle/host virtual address 8378c2ecf20Sopenharmony_ci * @device_addr : pointer to result device virtual address 8388c2ecf20Sopenharmony_ci * 8398c2ecf20Sopenharmony_ci * This function does the following: 8408c2ecf20Sopenharmony_ci * - If given a physical device memory handle, map to a device virtual block 8418c2ecf20Sopenharmony_ci * and return the start address of this block 8428c2ecf20Sopenharmony_ci * - If given a host virtual address and size, find the related physical pages, 8438c2ecf20Sopenharmony_ci * map a device virtual block to this pages and return the start address of 8448c2ecf20Sopenharmony_ci * this block 8458c2ecf20Sopenharmony_ci */ 8468c2ecf20Sopenharmony_cistatic int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, 8478c2ecf20Sopenharmony_ci u64 *device_addr) 8488c2ecf20Sopenharmony_ci{ 8498c2ecf20Sopenharmony_ci struct hl_device *hdev = ctx->hdev; 8508c2ecf20Sopenharmony_ci struct hl_vm *vm = &hdev->vm; 8518c2ecf20Sopenharmony_ci struct hl_vm_phys_pg_pack *phys_pg_pack; 8528c2ecf20Sopenharmony_ci struct hl_userptr *userptr = NULL; 8538c2ecf20Sopenharmony_ci struct hl_vm_hash_node *hnode; 8548c2ecf20Sopenharmony_ci struct hl_va_range *va_range; 8558c2ecf20Sopenharmony_ci enum vm_type_t *vm_type; 8568c2ecf20Sopenharmony_ci u64 ret_vaddr, hint_addr; 8578c2ecf20Sopenharmony_ci u32 handle = 0, va_block_align; 8588c2ecf20Sopenharmony_ci int rc; 8598c2ecf20Sopenharmony_ci bool is_userptr = args->flags & HL_MEM_USERPTR; 8608c2ecf20Sopenharmony_ci 8618c2ecf20Sopenharmony_ci /* Assume failure */ 8628c2ecf20Sopenharmony_ci *device_addr = 0; 8638c2ecf20Sopenharmony_ci 8648c2ecf20Sopenharmony_ci if (is_userptr) { 8658c2ecf20Sopenharmony_ci u64 addr = args->map_host.host_virt_addr, 8668c2ecf20Sopenharmony_ci size = args->map_host.mem_size; 8678c2ecf20Sopenharmony_ci u32 page_size = hdev->asic_prop.pmmu.page_size, 8688c2ecf20Sopenharmony_ci huge_page_size = hdev->asic_prop.pmmu_huge.page_size; 8698c2ecf20Sopenharmony_ci 8708c2ecf20Sopenharmony_ci rc = dma_map_host_va(hdev, addr, size, &userptr); 8718c2ecf20Sopenharmony_ci if (rc) { 8728c2ecf20Sopenharmony_ci dev_err(hdev->dev, "failed to get userptr from va\n"); 8738c2ecf20Sopenharmony_ci return rc; 8748c2ecf20Sopenharmony_ci } 8758c2ecf20Sopenharmony_ci 8768c2ecf20Sopenharmony_ci rc = init_phys_pg_pack_from_userptr(ctx, userptr, 8778c2ecf20Sopenharmony_ci &phys_pg_pack); 8788c2ecf20Sopenharmony_ci if (rc) { 8798c2ecf20Sopenharmony_ci dev_err(hdev->dev, 8808c2ecf20Sopenharmony_ci "unable to init page pack for vaddr 0x%llx\n", 8818c2ecf20Sopenharmony_ci addr); 8828c2ecf20Sopenharmony_ci goto init_page_pack_err; 8838c2ecf20Sopenharmony_ci } 8848c2ecf20Sopenharmony_ci 8858c2ecf20Sopenharmony_ci vm_type = (enum vm_type_t *) userptr; 8868c2ecf20Sopenharmony_ci hint_addr = args->map_host.hint_addr; 8878c2ecf20Sopenharmony_ci handle = phys_pg_pack->handle; 8888c2ecf20Sopenharmony_ci 8898c2ecf20Sopenharmony_ci /* get required alignment */ 8908c2ecf20Sopenharmony_ci if (phys_pg_pack->page_size == page_size) { 8918c2ecf20Sopenharmony_ci va_range = ctx->host_va_range; 8928c2ecf20Sopenharmony_ci 8938c2ecf20Sopenharmony_ci /* 8948c2ecf20Sopenharmony_ci * huge page alignment may be needed in case of regular 8958c2ecf20Sopenharmony_ci * page mapping, depending on the host VA alignment 8968c2ecf20Sopenharmony_ci */ 8978c2ecf20Sopenharmony_ci if (addr & (huge_page_size - 1)) 8988c2ecf20Sopenharmony_ci va_block_align = page_size; 8998c2ecf20Sopenharmony_ci else 9008c2ecf20Sopenharmony_ci va_block_align = huge_page_size; 9018c2ecf20Sopenharmony_ci } else { 9028c2ecf20Sopenharmony_ci /* 9038c2ecf20Sopenharmony_ci * huge page alignment is needed in case of huge page 9048c2ecf20Sopenharmony_ci * mapping 9058c2ecf20Sopenharmony_ci */ 9068c2ecf20Sopenharmony_ci va_range = ctx->host_huge_va_range; 9078c2ecf20Sopenharmony_ci va_block_align = huge_page_size; 9088c2ecf20Sopenharmony_ci } 9098c2ecf20Sopenharmony_ci } else { 9108c2ecf20Sopenharmony_ci handle = lower_32_bits(args->map_device.handle); 9118c2ecf20Sopenharmony_ci 9128c2ecf20Sopenharmony_ci spin_lock(&vm->idr_lock); 9138c2ecf20Sopenharmony_ci phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle); 9148c2ecf20Sopenharmony_ci if (!phys_pg_pack) { 9158c2ecf20Sopenharmony_ci spin_unlock(&vm->idr_lock); 9168c2ecf20Sopenharmony_ci dev_err(hdev->dev, 9178c2ecf20Sopenharmony_ci "no match for handle %u\n", handle); 9188c2ecf20Sopenharmony_ci return -EINVAL; 9198c2ecf20Sopenharmony_ci } 9208c2ecf20Sopenharmony_ci 9218c2ecf20Sopenharmony_ci /* increment now to avoid freeing device memory while mapping */ 9228c2ecf20Sopenharmony_ci atomic_inc(&phys_pg_pack->mapping_cnt); 9238c2ecf20Sopenharmony_ci 9248c2ecf20Sopenharmony_ci spin_unlock(&vm->idr_lock); 9258c2ecf20Sopenharmony_ci 9268c2ecf20Sopenharmony_ci vm_type = (enum vm_type_t *) phys_pg_pack; 9278c2ecf20Sopenharmony_ci 9288c2ecf20Sopenharmony_ci hint_addr = args->map_device.hint_addr; 9298c2ecf20Sopenharmony_ci 9308c2ecf20Sopenharmony_ci /* DRAM VA alignment is the same as the DRAM page size */ 9318c2ecf20Sopenharmony_ci va_range = ctx->dram_va_range; 9328c2ecf20Sopenharmony_ci va_block_align = hdev->asic_prop.dmmu.page_size; 9338c2ecf20Sopenharmony_ci } 9348c2ecf20Sopenharmony_ci 9358c2ecf20Sopenharmony_ci /* 9368c2ecf20Sopenharmony_ci * relevant for mapping device physical memory only, as host memory is 9378c2ecf20Sopenharmony_ci * implicitly shared 9388c2ecf20Sopenharmony_ci */ 9398c2ecf20Sopenharmony_ci if (!is_userptr && !(phys_pg_pack->flags & HL_MEM_SHARED) && 9408c2ecf20Sopenharmony_ci phys_pg_pack->asid != ctx->asid) { 9418c2ecf20Sopenharmony_ci dev_err(hdev->dev, 9428c2ecf20Sopenharmony_ci "Failed to map memory, handle %u is not shared\n", 9438c2ecf20Sopenharmony_ci handle); 9448c2ecf20Sopenharmony_ci rc = -EPERM; 9458c2ecf20Sopenharmony_ci goto shared_err; 9468c2ecf20Sopenharmony_ci } 9478c2ecf20Sopenharmony_ci 9488c2ecf20Sopenharmony_ci hnode = kzalloc(sizeof(*hnode), GFP_KERNEL); 9498c2ecf20Sopenharmony_ci if (!hnode) { 9508c2ecf20Sopenharmony_ci rc = -ENOMEM; 9518c2ecf20Sopenharmony_ci goto hnode_err; 9528c2ecf20Sopenharmony_ci } 9538c2ecf20Sopenharmony_ci 9548c2ecf20Sopenharmony_ci ret_vaddr = get_va_block(hdev, va_range, phys_pg_pack->total_size, 9558c2ecf20Sopenharmony_ci hint_addr, va_block_align); 9568c2ecf20Sopenharmony_ci if (!ret_vaddr) { 9578c2ecf20Sopenharmony_ci dev_err(hdev->dev, "no available va block for handle %u\n", 9588c2ecf20Sopenharmony_ci handle); 9598c2ecf20Sopenharmony_ci rc = -ENOMEM; 9608c2ecf20Sopenharmony_ci goto va_block_err; 9618c2ecf20Sopenharmony_ci } 9628c2ecf20Sopenharmony_ci 9638c2ecf20Sopenharmony_ci mutex_lock(&ctx->mmu_lock); 9648c2ecf20Sopenharmony_ci 9658c2ecf20Sopenharmony_ci rc = map_phys_pg_pack(ctx, ret_vaddr, phys_pg_pack); 9668c2ecf20Sopenharmony_ci if (rc) { 9678c2ecf20Sopenharmony_ci mutex_unlock(&ctx->mmu_lock); 9688c2ecf20Sopenharmony_ci dev_err(hdev->dev, "mapping page pack failed for handle %u\n", 9698c2ecf20Sopenharmony_ci handle); 9708c2ecf20Sopenharmony_ci goto map_err; 9718c2ecf20Sopenharmony_ci } 9728c2ecf20Sopenharmony_ci 9738c2ecf20Sopenharmony_ci rc = hdev->asic_funcs->mmu_invalidate_cache(hdev, false, *vm_type); 9748c2ecf20Sopenharmony_ci 9758c2ecf20Sopenharmony_ci mutex_unlock(&ctx->mmu_lock); 9768c2ecf20Sopenharmony_ci 9778c2ecf20Sopenharmony_ci if (rc) { 9788c2ecf20Sopenharmony_ci dev_err(hdev->dev, 9798c2ecf20Sopenharmony_ci "mapping handle %u failed due to MMU cache invalidation\n", 9808c2ecf20Sopenharmony_ci handle); 9818c2ecf20Sopenharmony_ci goto map_err; 9828c2ecf20Sopenharmony_ci } 9838c2ecf20Sopenharmony_ci 9848c2ecf20Sopenharmony_ci ret_vaddr += phys_pg_pack->offset; 9858c2ecf20Sopenharmony_ci 9868c2ecf20Sopenharmony_ci hnode->ptr = vm_type; 9878c2ecf20Sopenharmony_ci hnode->vaddr = ret_vaddr; 9888c2ecf20Sopenharmony_ci 9898c2ecf20Sopenharmony_ci mutex_lock(&ctx->mem_hash_lock); 9908c2ecf20Sopenharmony_ci hash_add(ctx->mem_hash, &hnode->node, ret_vaddr); 9918c2ecf20Sopenharmony_ci mutex_unlock(&ctx->mem_hash_lock); 9928c2ecf20Sopenharmony_ci 9938c2ecf20Sopenharmony_ci *device_addr = ret_vaddr; 9948c2ecf20Sopenharmony_ci 9958c2ecf20Sopenharmony_ci if (is_userptr) 9968c2ecf20Sopenharmony_ci free_phys_pg_pack(hdev, phys_pg_pack); 9978c2ecf20Sopenharmony_ci 9988c2ecf20Sopenharmony_ci return 0; 9998c2ecf20Sopenharmony_ci 10008c2ecf20Sopenharmony_cimap_err: 10018c2ecf20Sopenharmony_ci if (add_va_block(hdev, va_range, ret_vaddr, 10028c2ecf20Sopenharmony_ci ret_vaddr + phys_pg_pack->total_size - 1)) 10038c2ecf20Sopenharmony_ci dev_warn(hdev->dev, 10048c2ecf20Sopenharmony_ci "release va block failed for handle 0x%x, vaddr: 0x%llx\n", 10058c2ecf20Sopenharmony_ci handle, ret_vaddr); 10068c2ecf20Sopenharmony_ci 10078c2ecf20Sopenharmony_civa_block_err: 10088c2ecf20Sopenharmony_ci kfree(hnode); 10098c2ecf20Sopenharmony_cihnode_err: 10108c2ecf20Sopenharmony_cishared_err: 10118c2ecf20Sopenharmony_ci atomic_dec(&phys_pg_pack->mapping_cnt); 10128c2ecf20Sopenharmony_ci if (is_userptr) 10138c2ecf20Sopenharmony_ci free_phys_pg_pack(hdev, phys_pg_pack); 10148c2ecf20Sopenharmony_ciinit_page_pack_err: 10158c2ecf20Sopenharmony_ci if (is_userptr) 10168c2ecf20Sopenharmony_ci dma_unmap_host_va(hdev, userptr); 10178c2ecf20Sopenharmony_ci 10188c2ecf20Sopenharmony_ci return rc; 10198c2ecf20Sopenharmony_ci} 10208c2ecf20Sopenharmony_ci 10218c2ecf20Sopenharmony_ci/* 10228c2ecf20Sopenharmony_ci * unmap_device_va - unmap the given device virtual address 10238c2ecf20Sopenharmony_ci * 10248c2ecf20Sopenharmony_ci * @ctx : current context 10258c2ecf20Sopenharmony_ci * @vaddr : device virtual address to unmap 10268c2ecf20Sopenharmony_ci * @ctx_free : true if in context free flow, false otherwise. 10278c2ecf20Sopenharmony_ci * 10288c2ecf20Sopenharmony_ci * This function does the following: 10298c2ecf20Sopenharmony_ci * - Unmap the physical pages related to the given virtual address 10308c2ecf20Sopenharmony_ci * - return the device virtual block to the virtual block list 10318c2ecf20Sopenharmony_ci */ 10328c2ecf20Sopenharmony_cistatic int unmap_device_va(struct hl_ctx *ctx, u64 vaddr, bool ctx_free) 10338c2ecf20Sopenharmony_ci{ 10348c2ecf20Sopenharmony_ci struct hl_device *hdev = ctx->hdev; 10358c2ecf20Sopenharmony_ci struct hl_vm_phys_pg_pack *phys_pg_pack = NULL; 10368c2ecf20Sopenharmony_ci struct hl_vm_hash_node *hnode = NULL; 10378c2ecf20Sopenharmony_ci struct hl_userptr *userptr = NULL; 10388c2ecf20Sopenharmony_ci struct hl_va_range *va_range; 10398c2ecf20Sopenharmony_ci enum vm_type_t *vm_type; 10408c2ecf20Sopenharmony_ci bool is_userptr; 10418c2ecf20Sopenharmony_ci int rc = 0; 10428c2ecf20Sopenharmony_ci 10438c2ecf20Sopenharmony_ci /* protect from double entrance */ 10448c2ecf20Sopenharmony_ci mutex_lock(&ctx->mem_hash_lock); 10458c2ecf20Sopenharmony_ci hash_for_each_possible(ctx->mem_hash, hnode, node, (unsigned long)vaddr) 10468c2ecf20Sopenharmony_ci if (vaddr == hnode->vaddr) 10478c2ecf20Sopenharmony_ci break; 10488c2ecf20Sopenharmony_ci 10498c2ecf20Sopenharmony_ci if (!hnode) { 10508c2ecf20Sopenharmony_ci mutex_unlock(&ctx->mem_hash_lock); 10518c2ecf20Sopenharmony_ci dev_err(hdev->dev, 10528c2ecf20Sopenharmony_ci "unmap failed, no mem hnode for vaddr 0x%llx\n", 10538c2ecf20Sopenharmony_ci vaddr); 10548c2ecf20Sopenharmony_ci return -EINVAL; 10558c2ecf20Sopenharmony_ci } 10568c2ecf20Sopenharmony_ci 10578c2ecf20Sopenharmony_ci hash_del(&hnode->node); 10588c2ecf20Sopenharmony_ci mutex_unlock(&ctx->mem_hash_lock); 10598c2ecf20Sopenharmony_ci 10608c2ecf20Sopenharmony_ci vm_type = hnode->ptr; 10618c2ecf20Sopenharmony_ci 10628c2ecf20Sopenharmony_ci if (*vm_type == VM_TYPE_USERPTR) { 10638c2ecf20Sopenharmony_ci is_userptr = true; 10648c2ecf20Sopenharmony_ci userptr = hnode->ptr; 10658c2ecf20Sopenharmony_ci rc = init_phys_pg_pack_from_userptr(ctx, userptr, 10668c2ecf20Sopenharmony_ci &phys_pg_pack); 10678c2ecf20Sopenharmony_ci if (rc) { 10688c2ecf20Sopenharmony_ci dev_err(hdev->dev, 10698c2ecf20Sopenharmony_ci "unable to init page pack for vaddr 0x%llx\n", 10708c2ecf20Sopenharmony_ci vaddr); 10718c2ecf20Sopenharmony_ci goto vm_type_err; 10728c2ecf20Sopenharmony_ci } 10738c2ecf20Sopenharmony_ci 10748c2ecf20Sopenharmony_ci if (phys_pg_pack->page_size == 10758c2ecf20Sopenharmony_ci hdev->asic_prop.pmmu.page_size) 10768c2ecf20Sopenharmony_ci va_range = ctx->host_va_range; 10778c2ecf20Sopenharmony_ci else 10788c2ecf20Sopenharmony_ci va_range = ctx->host_huge_va_range; 10798c2ecf20Sopenharmony_ci } else if (*vm_type == VM_TYPE_PHYS_PACK) { 10808c2ecf20Sopenharmony_ci is_userptr = false; 10818c2ecf20Sopenharmony_ci va_range = ctx->dram_va_range; 10828c2ecf20Sopenharmony_ci phys_pg_pack = hnode->ptr; 10838c2ecf20Sopenharmony_ci } else { 10848c2ecf20Sopenharmony_ci dev_warn(hdev->dev, 10858c2ecf20Sopenharmony_ci "unmap failed, unknown vm desc for vaddr 0x%llx\n", 10868c2ecf20Sopenharmony_ci vaddr); 10878c2ecf20Sopenharmony_ci rc = -EFAULT; 10888c2ecf20Sopenharmony_ci goto vm_type_err; 10898c2ecf20Sopenharmony_ci } 10908c2ecf20Sopenharmony_ci 10918c2ecf20Sopenharmony_ci if (atomic_read(&phys_pg_pack->mapping_cnt) == 0) { 10928c2ecf20Sopenharmony_ci dev_err(hdev->dev, "vaddr 0x%llx is not mapped\n", vaddr); 10938c2ecf20Sopenharmony_ci rc = -EINVAL; 10948c2ecf20Sopenharmony_ci goto mapping_cnt_err; 10958c2ecf20Sopenharmony_ci } 10968c2ecf20Sopenharmony_ci 10978c2ecf20Sopenharmony_ci vaddr &= ~(((u64) phys_pg_pack->page_size) - 1); 10988c2ecf20Sopenharmony_ci 10998c2ecf20Sopenharmony_ci mutex_lock(&ctx->mmu_lock); 11008c2ecf20Sopenharmony_ci 11018c2ecf20Sopenharmony_ci unmap_phys_pg_pack(ctx, vaddr, phys_pg_pack); 11028c2ecf20Sopenharmony_ci 11038c2ecf20Sopenharmony_ci /* 11048c2ecf20Sopenharmony_ci * During context free this function is called in a loop to clean all 11058c2ecf20Sopenharmony_ci * the context mappings. Hence the cache invalidation can be called once 11068c2ecf20Sopenharmony_ci * at the loop end rather than for each iteration 11078c2ecf20Sopenharmony_ci */ 11088c2ecf20Sopenharmony_ci if (!ctx_free) 11098c2ecf20Sopenharmony_ci rc = hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 11108c2ecf20Sopenharmony_ci *vm_type); 11118c2ecf20Sopenharmony_ci 11128c2ecf20Sopenharmony_ci mutex_unlock(&ctx->mmu_lock); 11138c2ecf20Sopenharmony_ci 11148c2ecf20Sopenharmony_ci /* 11158c2ecf20Sopenharmony_ci * If the context is closing we don't need to check for the MMU cache 11168c2ecf20Sopenharmony_ci * invalidation return code and update the VA free list as in this flow 11178c2ecf20Sopenharmony_ci * we invalidate the MMU cache outside of this unmap function and the VA 11188c2ecf20Sopenharmony_ci * free list will be freed anyway. 11198c2ecf20Sopenharmony_ci */ 11208c2ecf20Sopenharmony_ci if (!ctx_free) { 11218c2ecf20Sopenharmony_ci int tmp_rc; 11228c2ecf20Sopenharmony_ci 11238c2ecf20Sopenharmony_ci if (rc) 11248c2ecf20Sopenharmony_ci dev_err(hdev->dev, 11258c2ecf20Sopenharmony_ci "unmapping vaddr 0x%llx failed due to MMU cache invalidation\n", 11268c2ecf20Sopenharmony_ci vaddr); 11278c2ecf20Sopenharmony_ci 11288c2ecf20Sopenharmony_ci tmp_rc = add_va_block(hdev, va_range, vaddr, 11298c2ecf20Sopenharmony_ci vaddr + phys_pg_pack->total_size - 1); 11308c2ecf20Sopenharmony_ci if (tmp_rc) { 11318c2ecf20Sopenharmony_ci dev_warn(hdev->dev, 11328c2ecf20Sopenharmony_ci "add va block failed for vaddr: 0x%llx\n", 11338c2ecf20Sopenharmony_ci vaddr); 11348c2ecf20Sopenharmony_ci if (!rc) 11358c2ecf20Sopenharmony_ci rc = tmp_rc; 11368c2ecf20Sopenharmony_ci } 11378c2ecf20Sopenharmony_ci } 11388c2ecf20Sopenharmony_ci 11398c2ecf20Sopenharmony_ci atomic_dec(&phys_pg_pack->mapping_cnt); 11408c2ecf20Sopenharmony_ci kfree(hnode); 11418c2ecf20Sopenharmony_ci 11428c2ecf20Sopenharmony_ci if (is_userptr) { 11438c2ecf20Sopenharmony_ci free_phys_pg_pack(hdev, phys_pg_pack); 11448c2ecf20Sopenharmony_ci dma_unmap_host_va(hdev, userptr); 11458c2ecf20Sopenharmony_ci } 11468c2ecf20Sopenharmony_ci 11478c2ecf20Sopenharmony_ci return rc; 11488c2ecf20Sopenharmony_ci 11498c2ecf20Sopenharmony_cimapping_cnt_err: 11508c2ecf20Sopenharmony_ci if (is_userptr) 11518c2ecf20Sopenharmony_ci free_phys_pg_pack(hdev, phys_pg_pack); 11528c2ecf20Sopenharmony_civm_type_err: 11538c2ecf20Sopenharmony_ci mutex_lock(&ctx->mem_hash_lock); 11548c2ecf20Sopenharmony_ci hash_add(ctx->mem_hash, &hnode->node, vaddr); 11558c2ecf20Sopenharmony_ci mutex_unlock(&ctx->mem_hash_lock); 11568c2ecf20Sopenharmony_ci 11578c2ecf20Sopenharmony_ci return rc; 11588c2ecf20Sopenharmony_ci} 11598c2ecf20Sopenharmony_ci 11608c2ecf20Sopenharmony_cistatic int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args) 11618c2ecf20Sopenharmony_ci{ 11628c2ecf20Sopenharmony_ci struct hl_device *hdev = hpriv->hdev; 11638c2ecf20Sopenharmony_ci struct hl_ctx *ctx = hpriv->ctx; 11648c2ecf20Sopenharmony_ci u64 device_addr = 0; 11658c2ecf20Sopenharmony_ci u32 handle = 0; 11668c2ecf20Sopenharmony_ci int rc; 11678c2ecf20Sopenharmony_ci 11688c2ecf20Sopenharmony_ci switch (args->in.op) { 11698c2ecf20Sopenharmony_ci case HL_MEM_OP_ALLOC: 11708c2ecf20Sopenharmony_ci if (args->in.alloc.mem_size == 0) { 11718c2ecf20Sopenharmony_ci dev_err(hdev->dev, 11728c2ecf20Sopenharmony_ci "alloc size must be larger than 0\n"); 11738c2ecf20Sopenharmony_ci rc = -EINVAL; 11748c2ecf20Sopenharmony_ci goto out; 11758c2ecf20Sopenharmony_ci } 11768c2ecf20Sopenharmony_ci 11778c2ecf20Sopenharmony_ci /* Force contiguous as there are no real MMU 11788c2ecf20Sopenharmony_ci * translations to overcome physical memory gaps 11798c2ecf20Sopenharmony_ci */ 11808c2ecf20Sopenharmony_ci args->in.flags |= HL_MEM_CONTIGUOUS; 11818c2ecf20Sopenharmony_ci rc = alloc_device_memory(ctx, &args->in, &handle); 11828c2ecf20Sopenharmony_ci 11838c2ecf20Sopenharmony_ci memset(args, 0, sizeof(*args)); 11848c2ecf20Sopenharmony_ci args->out.handle = (__u64) handle; 11858c2ecf20Sopenharmony_ci break; 11868c2ecf20Sopenharmony_ci 11878c2ecf20Sopenharmony_ci case HL_MEM_OP_FREE: 11888c2ecf20Sopenharmony_ci rc = free_device_memory(ctx, args->in.free.handle); 11898c2ecf20Sopenharmony_ci break; 11908c2ecf20Sopenharmony_ci 11918c2ecf20Sopenharmony_ci case HL_MEM_OP_MAP: 11928c2ecf20Sopenharmony_ci if (args->in.flags & HL_MEM_USERPTR) { 11938c2ecf20Sopenharmony_ci device_addr = args->in.map_host.host_virt_addr; 11948c2ecf20Sopenharmony_ci rc = 0; 11958c2ecf20Sopenharmony_ci } else { 11968c2ecf20Sopenharmony_ci rc = get_paddr_from_handle(ctx, &args->in, 11978c2ecf20Sopenharmony_ci &device_addr); 11988c2ecf20Sopenharmony_ci } 11998c2ecf20Sopenharmony_ci 12008c2ecf20Sopenharmony_ci memset(args, 0, sizeof(*args)); 12018c2ecf20Sopenharmony_ci args->out.device_virt_addr = device_addr; 12028c2ecf20Sopenharmony_ci break; 12038c2ecf20Sopenharmony_ci 12048c2ecf20Sopenharmony_ci case HL_MEM_OP_UNMAP: 12058c2ecf20Sopenharmony_ci rc = 0; 12068c2ecf20Sopenharmony_ci break; 12078c2ecf20Sopenharmony_ci 12088c2ecf20Sopenharmony_ci default: 12098c2ecf20Sopenharmony_ci dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n"); 12108c2ecf20Sopenharmony_ci rc = -ENOTTY; 12118c2ecf20Sopenharmony_ci break; 12128c2ecf20Sopenharmony_ci } 12138c2ecf20Sopenharmony_ci 12148c2ecf20Sopenharmony_ciout: 12158c2ecf20Sopenharmony_ci return rc; 12168c2ecf20Sopenharmony_ci} 12178c2ecf20Sopenharmony_ci 12188c2ecf20Sopenharmony_ciint hl_mem_ioctl(struct hl_fpriv *hpriv, void *data) 12198c2ecf20Sopenharmony_ci{ 12208c2ecf20Sopenharmony_ci union hl_mem_args *args = data; 12218c2ecf20Sopenharmony_ci struct hl_device *hdev = hpriv->hdev; 12228c2ecf20Sopenharmony_ci struct hl_ctx *ctx = hpriv->ctx; 12238c2ecf20Sopenharmony_ci u64 device_addr = 0; 12248c2ecf20Sopenharmony_ci u32 handle = 0; 12258c2ecf20Sopenharmony_ci int rc; 12268c2ecf20Sopenharmony_ci 12278c2ecf20Sopenharmony_ci if (hl_device_disabled_or_in_reset(hdev)) { 12288c2ecf20Sopenharmony_ci dev_warn_ratelimited(hdev->dev, 12298c2ecf20Sopenharmony_ci "Device is %s. Can't execute MEMORY IOCTL\n", 12308c2ecf20Sopenharmony_ci atomic_read(&hdev->in_reset) ? "in_reset" : "disabled"); 12318c2ecf20Sopenharmony_ci return -EBUSY; 12328c2ecf20Sopenharmony_ci } 12338c2ecf20Sopenharmony_ci 12348c2ecf20Sopenharmony_ci if (!hdev->mmu_enable) 12358c2ecf20Sopenharmony_ci return mem_ioctl_no_mmu(hpriv, args); 12368c2ecf20Sopenharmony_ci 12378c2ecf20Sopenharmony_ci switch (args->in.op) { 12388c2ecf20Sopenharmony_ci case HL_MEM_OP_ALLOC: 12398c2ecf20Sopenharmony_ci if (!hdev->dram_supports_virtual_memory) { 12408c2ecf20Sopenharmony_ci dev_err(hdev->dev, "DRAM alloc is not supported\n"); 12418c2ecf20Sopenharmony_ci rc = -EINVAL; 12428c2ecf20Sopenharmony_ci goto out; 12438c2ecf20Sopenharmony_ci } 12448c2ecf20Sopenharmony_ci 12458c2ecf20Sopenharmony_ci if (args->in.alloc.mem_size == 0) { 12468c2ecf20Sopenharmony_ci dev_err(hdev->dev, 12478c2ecf20Sopenharmony_ci "alloc size must be larger than 0\n"); 12488c2ecf20Sopenharmony_ci rc = -EINVAL; 12498c2ecf20Sopenharmony_ci goto out; 12508c2ecf20Sopenharmony_ci } 12518c2ecf20Sopenharmony_ci rc = alloc_device_memory(ctx, &args->in, &handle); 12528c2ecf20Sopenharmony_ci 12538c2ecf20Sopenharmony_ci memset(args, 0, sizeof(*args)); 12548c2ecf20Sopenharmony_ci args->out.handle = (__u64) handle; 12558c2ecf20Sopenharmony_ci break; 12568c2ecf20Sopenharmony_ci 12578c2ecf20Sopenharmony_ci case HL_MEM_OP_FREE: 12588c2ecf20Sopenharmony_ci rc = free_device_memory(ctx, args->in.free.handle); 12598c2ecf20Sopenharmony_ci break; 12608c2ecf20Sopenharmony_ci 12618c2ecf20Sopenharmony_ci case HL_MEM_OP_MAP: 12628c2ecf20Sopenharmony_ci rc = map_device_va(ctx, &args->in, &device_addr); 12638c2ecf20Sopenharmony_ci 12648c2ecf20Sopenharmony_ci memset(args, 0, sizeof(*args)); 12658c2ecf20Sopenharmony_ci args->out.device_virt_addr = device_addr; 12668c2ecf20Sopenharmony_ci break; 12678c2ecf20Sopenharmony_ci 12688c2ecf20Sopenharmony_ci case HL_MEM_OP_UNMAP: 12698c2ecf20Sopenharmony_ci rc = unmap_device_va(ctx, args->in.unmap.device_virt_addr, 12708c2ecf20Sopenharmony_ci false); 12718c2ecf20Sopenharmony_ci break; 12728c2ecf20Sopenharmony_ci 12738c2ecf20Sopenharmony_ci default: 12748c2ecf20Sopenharmony_ci dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n"); 12758c2ecf20Sopenharmony_ci rc = -ENOTTY; 12768c2ecf20Sopenharmony_ci break; 12778c2ecf20Sopenharmony_ci } 12788c2ecf20Sopenharmony_ci 12798c2ecf20Sopenharmony_ciout: 12808c2ecf20Sopenharmony_ci return rc; 12818c2ecf20Sopenharmony_ci} 12828c2ecf20Sopenharmony_ci 12838c2ecf20Sopenharmony_cistatic int get_user_memory(struct hl_device *hdev, u64 addr, u64 size, 12848c2ecf20Sopenharmony_ci u32 npages, u64 start, u32 offset, 12858c2ecf20Sopenharmony_ci struct hl_userptr *userptr) 12868c2ecf20Sopenharmony_ci{ 12878c2ecf20Sopenharmony_ci int rc; 12888c2ecf20Sopenharmony_ci 12898c2ecf20Sopenharmony_ci if (!access_ok((void __user *) (uintptr_t) addr, size)) { 12908c2ecf20Sopenharmony_ci dev_err(hdev->dev, "user pointer is invalid - 0x%llx\n", addr); 12918c2ecf20Sopenharmony_ci return -EFAULT; 12928c2ecf20Sopenharmony_ci } 12938c2ecf20Sopenharmony_ci 12948c2ecf20Sopenharmony_ci userptr->vec = frame_vector_create(npages); 12958c2ecf20Sopenharmony_ci if (!userptr->vec) { 12968c2ecf20Sopenharmony_ci dev_err(hdev->dev, "Failed to create frame vector\n"); 12978c2ecf20Sopenharmony_ci return -ENOMEM; 12988c2ecf20Sopenharmony_ci } 12998c2ecf20Sopenharmony_ci 13008c2ecf20Sopenharmony_ci rc = get_vaddr_frames(start, npages, FOLL_FORCE | FOLL_WRITE, 13018c2ecf20Sopenharmony_ci userptr->vec); 13028c2ecf20Sopenharmony_ci 13038c2ecf20Sopenharmony_ci if (rc != npages) { 13048c2ecf20Sopenharmony_ci dev_err(hdev->dev, 13058c2ecf20Sopenharmony_ci "Failed to map host memory, user ptr probably wrong\n"); 13068c2ecf20Sopenharmony_ci if (rc < 0) 13078c2ecf20Sopenharmony_ci goto destroy_framevec; 13088c2ecf20Sopenharmony_ci rc = -EFAULT; 13098c2ecf20Sopenharmony_ci goto put_framevec; 13108c2ecf20Sopenharmony_ci } 13118c2ecf20Sopenharmony_ci 13128c2ecf20Sopenharmony_ci if (frame_vector_to_pages(userptr->vec) < 0) { 13138c2ecf20Sopenharmony_ci dev_err(hdev->dev, 13148c2ecf20Sopenharmony_ci "Failed to translate frame vector to pages\n"); 13158c2ecf20Sopenharmony_ci rc = -EFAULT; 13168c2ecf20Sopenharmony_ci goto put_framevec; 13178c2ecf20Sopenharmony_ci } 13188c2ecf20Sopenharmony_ci 13198c2ecf20Sopenharmony_ci rc = sg_alloc_table_from_pages(userptr->sgt, 13208c2ecf20Sopenharmony_ci frame_vector_pages(userptr->vec), 13218c2ecf20Sopenharmony_ci npages, offset, size, GFP_ATOMIC); 13228c2ecf20Sopenharmony_ci if (rc < 0) { 13238c2ecf20Sopenharmony_ci dev_err(hdev->dev, "failed to create SG table from pages\n"); 13248c2ecf20Sopenharmony_ci goto put_framevec; 13258c2ecf20Sopenharmony_ci } 13268c2ecf20Sopenharmony_ci 13278c2ecf20Sopenharmony_ci return 0; 13288c2ecf20Sopenharmony_ci 13298c2ecf20Sopenharmony_ciput_framevec: 13308c2ecf20Sopenharmony_ci put_vaddr_frames(userptr->vec); 13318c2ecf20Sopenharmony_cidestroy_framevec: 13328c2ecf20Sopenharmony_ci frame_vector_destroy(userptr->vec); 13338c2ecf20Sopenharmony_ci return rc; 13348c2ecf20Sopenharmony_ci} 13358c2ecf20Sopenharmony_ci 13368c2ecf20Sopenharmony_ci/* 13378c2ecf20Sopenharmony_ci * hl_pin_host_memory - pins a chunk of host memory. 13388c2ecf20Sopenharmony_ci * @hdev: pointer to the habanalabs device structure 13398c2ecf20Sopenharmony_ci * @addr: the host virtual address of the memory area 13408c2ecf20Sopenharmony_ci * @size: the size of the memory area 13418c2ecf20Sopenharmony_ci * @userptr: pointer to hl_userptr structure 13428c2ecf20Sopenharmony_ci * 13438c2ecf20Sopenharmony_ci * This function does the following: 13448c2ecf20Sopenharmony_ci * - Pins the physical pages 13458c2ecf20Sopenharmony_ci * - Create an SG list from those pages 13468c2ecf20Sopenharmony_ci */ 13478c2ecf20Sopenharmony_ciint hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size, 13488c2ecf20Sopenharmony_ci struct hl_userptr *userptr) 13498c2ecf20Sopenharmony_ci{ 13508c2ecf20Sopenharmony_ci u64 start, end; 13518c2ecf20Sopenharmony_ci u32 npages, offset; 13528c2ecf20Sopenharmony_ci int rc; 13538c2ecf20Sopenharmony_ci 13548c2ecf20Sopenharmony_ci if (!size) { 13558c2ecf20Sopenharmony_ci dev_err(hdev->dev, "size to pin is invalid - %llu\n", size); 13568c2ecf20Sopenharmony_ci return -EINVAL; 13578c2ecf20Sopenharmony_ci } 13588c2ecf20Sopenharmony_ci 13598c2ecf20Sopenharmony_ci /* 13608c2ecf20Sopenharmony_ci * If the combination of the address and size requested for this memory 13618c2ecf20Sopenharmony_ci * region causes an integer overflow, return error. 13628c2ecf20Sopenharmony_ci */ 13638c2ecf20Sopenharmony_ci if (((addr + size) < addr) || 13648c2ecf20Sopenharmony_ci PAGE_ALIGN(addr + size) < (addr + size)) { 13658c2ecf20Sopenharmony_ci dev_err(hdev->dev, 13668c2ecf20Sopenharmony_ci "user pointer 0x%llx + %llu causes integer overflow\n", 13678c2ecf20Sopenharmony_ci addr, size); 13688c2ecf20Sopenharmony_ci return -EINVAL; 13698c2ecf20Sopenharmony_ci } 13708c2ecf20Sopenharmony_ci 13718c2ecf20Sopenharmony_ci /* 13728c2ecf20Sopenharmony_ci * This function can be called also from data path, hence use atomic 13738c2ecf20Sopenharmony_ci * always as it is not a big allocation. 13748c2ecf20Sopenharmony_ci */ 13758c2ecf20Sopenharmony_ci userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_ATOMIC); 13768c2ecf20Sopenharmony_ci if (!userptr->sgt) 13778c2ecf20Sopenharmony_ci return -ENOMEM; 13788c2ecf20Sopenharmony_ci 13798c2ecf20Sopenharmony_ci start = addr & PAGE_MASK; 13808c2ecf20Sopenharmony_ci offset = addr & ~PAGE_MASK; 13818c2ecf20Sopenharmony_ci end = PAGE_ALIGN(addr + size); 13828c2ecf20Sopenharmony_ci npages = (end - start) >> PAGE_SHIFT; 13838c2ecf20Sopenharmony_ci 13848c2ecf20Sopenharmony_ci userptr->size = size; 13858c2ecf20Sopenharmony_ci userptr->addr = addr; 13868c2ecf20Sopenharmony_ci userptr->dma_mapped = false; 13878c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&userptr->job_node); 13888c2ecf20Sopenharmony_ci 13898c2ecf20Sopenharmony_ci rc = get_user_memory(hdev, addr, size, npages, start, offset, 13908c2ecf20Sopenharmony_ci userptr); 13918c2ecf20Sopenharmony_ci if (rc) { 13928c2ecf20Sopenharmony_ci dev_err(hdev->dev, 13938c2ecf20Sopenharmony_ci "failed to get user memory for address 0x%llx\n", 13948c2ecf20Sopenharmony_ci addr); 13958c2ecf20Sopenharmony_ci goto free_sgt; 13968c2ecf20Sopenharmony_ci } 13978c2ecf20Sopenharmony_ci 13988c2ecf20Sopenharmony_ci hl_debugfs_add_userptr(hdev, userptr); 13998c2ecf20Sopenharmony_ci 14008c2ecf20Sopenharmony_ci return 0; 14018c2ecf20Sopenharmony_ci 14028c2ecf20Sopenharmony_cifree_sgt: 14038c2ecf20Sopenharmony_ci kfree(userptr->sgt); 14048c2ecf20Sopenharmony_ci return rc; 14058c2ecf20Sopenharmony_ci} 14068c2ecf20Sopenharmony_ci 14078c2ecf20Sopenharmony_ci/* 14088c2ecf20Sopenharmony_ci * hl_unpin_host_memory - unpins a chunk of host memory. 14098c2ecf20Sopenharmony_ci * @hdev: pointer to the habanalabs device structure 14108c2ecf20Sopenharmony_ci * @userptr: pointer to hl_userptr structure 14118c2ecf20Sopenharmony_ci * 14128c2ecf20Sopenharmony_ci * This function does the following: 14138c2ecf20Sopenharmony_ci * - Unpins the physical pages related to the host memory 14148c2ecf20Sopenharmony_ci * - Free the SG list 14158c2ecf20Sopenharmony_ci */ 14168c2ecf20Sopenharmony_civoid hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr) 14178c2ecf20Sopenharmony_ci{ 14188c2ecf20Sopenharmony_ci struct page **pages; 14198c2ecf20Sopenharmony_ci 14208c2ecf20Sopenharmony_ci hl_debugfs_remove_userptr(hdev, userptr); 14218c2ecf20Sopenharmony_ci 14228c2ecf20Sopenharmony_ci if (userptr->dma_mapped) 14238c2ecf20Sopenharmony_ci hdev->asic_funcs->hl_dma_unmap_sg(hdev, userptr->sgt->sgl, 14248c2ecf20Sopenharmony_ci userptr->sgt->nents, 14258c2ecf20Sopenharmony_ci userptr->dir); 14268c2ecf20Sopenharmony_ci 14278c2ecf20Sopenharmony_ci pages = frame_vector_pages(userptr->vec); 14288c2ecf20Sopenharmony_ci if (!IS_ERR(pages)) { 14298c2ecf20Sopenharmony_ci int i; 14308c2ecf20Sopenharmony_ci 14318c2ecf20Sopenharmony_ci for (i = 0; i < frame_vector_count(userptr->vec); i++) 14328c2ecf20Sopenharmony_ci set_page_dirty_lock(pages[i]); 14338c2ecf20Sopenharmony_ci } 14348c2ecf20Sopenharmony_ci put_vaddr_frames(userptr->vec); 14358c2ecf20Sopenharmony_ci frame_vector_destroy(userptr->vec); 14368c2ecf20Sopenharmony_ci 14378c2ecf20Sopenharmony_ci list_del(&userptr->job_node); 14388c2ecf20Sopenharmony_ci 14398c2ecf20Sopenharmony_ci sg_free_table(userptr->sgt); 14408c2ecf20Sopenharmony_ci kfree(userptr->sgt); 14418c2ecf20Sopenharmony_ci} 14428c2ecf20Sopenharmony_ci 14438c2ecf20Sopenharmony_ci/* 14448c2ecf20Sopenharmony_ci * hl_userptr_delete_list - clear userptr list 14458c2ecf20Sopenharmony_ci * 14468c2ecf20Sopenharmony_ci * @hdev : pointer to the habanalabs device structure 14478c2ecf20Sopenharmony_ci * @userptr_list : pointer to the list to clear 14488c2ecf20Sopenharmony_ci * 14498c2ecf20Sopenharmony_ci * This function does the following: 14508c2ecf20Sopenharmony_ci * - Iterates over the list and unpins the host memory and frees the userptr 14518c2ecf20Sopenharmony_ci * structure. 14528c2ecf20Sopenharmony_ci */ 14538c2ecf20Sopenharmony_civoid hl_userptr_delete_list(struct hl_device *hdev, 14548c2ecf20Sopenharmony_ci struct list_head *userptr_list) 14558c2ecf20Sopenharmony_ci{ 14568c2ecf20Sopenharmony_ci struct hl_userptr *userptr, *tmp; 14578c2ecf20Sopenharmony_ci 14588c2ecf20Sopenharmony_ci list_for_each_entry_safe(userptr, tmp, userptr_list, job_node) { 14598c2ecf20Sopenharmony_ci hl_unpin_host_memory(hdev, userptr); 14608c2ecf20Sopenharmony_ci kfree(userptr); 14618c2ecf20Sopenharmony_ci } 14628c2ecf20Sopenharmony_ci 14638c2ecf20Sopenharmony_ci INIT_LIST_HEAD(userptr_list); 14648c2ecf20Sopenharmony_ci} 14658c2ecf20Sopenharmony_ci 14668c2ecf20Sopenharmony_ci/* 14678c2ecf20Sopenharmony_ci * hl_userptr_is_pinned - returns whether the given userptr is pinned 14688c2ecf20Sopenharmony_ci * 14698c2ecf20Sopenharmony_ci * @hdev : pointer to the habanalabs device structure 14708c2ecf20Sopenharmony_ci * @userptr_list : pointer to the list to clear 14718c2ecf20Sopenharmony_ci * @userptr : pointer to userptr to check 14728c2ecf20Sopenharmony_ci * 14738c2ecf20Sopenharmony_ci * This function does the following: 14748c2ecf20Sopenharmony_ci * - Iterates over the list and checks if the given userptr is in it, means is 14758c2ecf20Sopenharmony_ci * pinned. If so, returns true, otherwise returns false. 14768c2ecf20Sopenharmony_ci */ 14778c2ecf20Sopenharmony_cibool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr, 14788c2ecf20Sopenharmony_ci u32 size, struct list_head *userptr_list, 14798c2ecf20Sopenharmony_ci struct hl_userptr **userptr) 14808c2ecf20Sopenharmony_ci{ 14818c2ecf20Sopenharmony_ci list_for_each_entry((*userptr), userptr_list, job_node) { 14828c2ecf20Sopenharmony_ci if ((addr == (*userptr)->addr) && (size == (*userptr)->size)) 14838c2ecf20Sopenharmony_ci return true; 14848c2ecf20Sopenharmony_ci } 14858c2ecf20Sopenharmony_ci 14868c2ecf20Sopenharmony_ci return false; 14878c2ecf20Sopenharmony_ci} 14888c2ecf20Sopenharmony_ci 14898c2ecf20Sopenharmony_ci/* 14908c2ecf20Sopenharmony_ci * va_range_init - initialize virtual addresses range 14918c2ecf20Sopenharmony_ci * @hdev: pointer to the habanalabs device structure 14928c2ecf20Sopenharmony_ci * @va_range: pointer to the range to initialize 14938c2ecf20Sopenharmony_ci * @start: range start address 14948c2ecf20Sopenharmony_ci * @end: range end address 14958c2ecf20Sopenharmony_ci * 14968c2ecf20Sopenharmony_ci * This function does the following: 14978c2ecf20Sopenharmony_ci * - Initializes the virtual addresses list of the given range with the given 14988c2ecf20Sopenharmony_ci * addresses. 14998c2ecf20Sopenharmony_ci */ 15008c2ecf20Sopenharmony_cistatic int va_range_init(struct hl_device *hdev, struct hl_va_range *va_range, 15018c2ecf20Sopenharmony_ci u64 start, u64 end) 15028c2ecf20Sopenharmony_ci{ 15038c2ecf20Sopenharmony_ci int rc; 15048c2ecf20Sopenharmony_ci 15058c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&va_range->list); 15068c2ecf20Sopenharmony_ci 15078c2ecf20Sopenharmony_ci /* PAGE_SIZE alignment */ 15088c2ecf20Sopenharmony_ci 15098c2ecf20Sopenharmony_ci if (start & (PAGE_SIZE - 1)) { 15108c2ecf20Sopenharmony_ci start &= PAGE_MASK; 15118c2ecf20Sopenharmony_ci start += PAGE_SIZE; 15128c2ecf20Sopenharmony_ci } 15138c2ecf20Sopenharmony_ci 15148c2ecf20Sopenharmony_ci if (end & (PAGE_SIZE - 1)) 15158c2ecf20Sopenharmony_ci end &= PAGE_MASK; 15168c2ecf20Sopenharmony_ci 15178c2ecf20Sopenharmony_ci if (start >= end) { 15188c2ecf20Sopenharmony_ci dev_err(hdev->dev, "too small vm range for va list\n"); 15198c2ecf20Sopenharmony_ci return -EFAULT; 15208c2ecf20Sopenharmony_ci } 15218c2ecf20Sopenharmony_ci 15228c2ecf20Sopenharmony_ci rc = add_va_block(hdev, va_range, start, end); 15238c2ecf20Sopenharmony_ci 15248c2ecf20Sopenharmony_ci if (rc) { 15258c2ecf20Sopenharmony_ci dev_err(hdev->dev, "Failed to init host va list\n"); 15268c2ecf20Sopenharmony_ci return rc; 15278c2ecf20Sopenharmony_ci } 15288c2ecf20Sopenharmony_ci 15298c2ecf20Sopenharmony_ci va_range->start_addr = start; 15308c2ecf20Sopenharmony_ci va_range->end_addr = end; 15318c2ecf20Sopenharmony_ci 15328c2ecf20Sopenharmony_ci return 0; 15338c2ecf20Sopenharmony_ci} 15348c2ecf20Sopenharmony_ci 15358c2ecf20Sopenharmony_ci/* 15368c2ecf20Sopenharmony_ci * va_range_fini() - clear a virtual addresses range 15378c2ecf20Sopenharmony_ci * @hdev: pointer to the habanalabs structure 15388c2ecf20Sopenharmony_ci * va_range: pointer to virtual addresses range 15398c2ecf20Sopenharmony_ci * 15408c2ecf20Sopenharmony_ci * This function does the following: 15418c2ecf20Sopenharmony_ci * - Frees the virtual addresses block list and its lock 15428c2ecf20Sopenharmony_ci */ 15438c2ecf20Sopenharmony_cistatic void va_range_fini(struct hl_device *hdev, 15448c2ecf20Sopenharmony_ci struct hl_va_range *va_range) 15458c2ecf20Sopenharmony_ci{ 15468c2ecf20Sopenharmony_ci mutex_lock(&va_range->lock); 15478c2ecf20Sopenharmony_ci clear_va_list_locked(hdev, &va_range->list); 15488c2ecf20Sopenharmony_ci mutex_unlock(&va_range->lock); 15498c2ecf20Sopenharmony_ci 15508c2ecf20Sopenharmony_ci mutex_destroy(&va_range->lock); 15518c2ecf20Sopenharmony_ci kfree(va_range); 15528c2ecf20Sopenharmony_ci} 15538c2ecf20Sopenharmony_ci 15548c2ecf20Sopenharmony_ci/* 15558c2ecf20Sopenharmony_ci * vm_ctx_init_with_ranges() - initialize virtual memory for context 15568c2ecf20Sopenharmony_ci * @ctx: pointer to the habanalabs context structure 15578c2ecf20Sopenharmony_ci * @host_range_start: host virtual addresses range start. 15588c2ecf20Sopenharmony_ci * @host_range_end: host virtual addresses range end. 15598c2ecf20Sopenharmony_ci * @host_huge_range_start: host virtual addresses range start for memory 15608c2ecf20Sopenharmony_ci * allocated with huge pages. 15618c2ecf20Sopenharmony_ci * @host_huge_range_end: host virtual addresses range end for memory allocated 15628c2ecf20Sopenharmony_ci * with huge pages. 15638c2ecf20Sopenharmony_ci * @dram_range_start: dram virtual addresses range start. 15648c2ecf20Sopenharmony_ci * @dram_range_end: dram virtual addresses range end. 15658c2ecf20Sopenharmony_ci * 15668c2ecf20Sopenharmony_ci * This function initializes the following: 15678c2ecf20Sopenharmony_ci * - MMU for context 15688c2ecf20Sopenharmony_ci * - Virtual address to area descriptor hashtable 15698c2ecf20Sopenharmony_ci * - Virtual block list of available virtual memory 15708c2ecf20Sopenharmony_ci */ 15718c2ecf20Sopenharmony_cistatic int vm_ctx_init_with_ranges(struct hl_ctx *ctx, 15728c2ecf20Sopenharmony_ci u64 host_range_start, 15738c2ecf20Sopenharmony_ci u64 host_range_end, 15748c2ecf20Sopenharmony_ci u64 host_huge_range_start, 15758c2ecf20Sopenharmony_ci u64 host_huge_range_end, 15768c2ecf20Sopenharmony_ci u64 dram_range_start, 15778c2ecf20Sopenharmony_ci u64 dram_range_end) 15788c2ecf20Sopenharmony_ci{ 15798c2ecf20Sopenharmony_ci struct hl_device *hdev = ctx->hdev; 15808c2ecf20Sopenharmony_ci int rc; 15818c2ecf20Sopenharmony_ci 15828c2ecf20Sopenharmony_ci ctx->host_va_range = kzalloc(sizeof(*ctx->host_va_range), GFP_KERNEL); 15838c2ecf20Sopenharmony_ci if (!ctx->host_va_range) 15848c2ecf20Sopenharmony_ci return -ENOMEM; 15858c2ecf20Sopenharmony_ci 15868c2ecf20Sopenharmony_ci ctx->host_huge_va_range = kzalloc(sizeof(*ctx->host_huge_va_range), 15878c2ecf20Sopenharmony_ci GFP_KERNEL); 15888c2ecf20Sopenharmony_ci if (!ctx->host_huge_va_range) { 15898c2ecf20Sopenharmony_ci rc = -ENOMEM; 15908c2ecf20Sopenharmony_ci goto host_huge_va_range_err; 15918c2ecf20Sopenharmony_ci } 15928c2ecf20Sopenharmony_ci 15938c2ecf20Sopenharmony_ci ctx->dram_va_range = kzalloc(sizeof(*ctx->dram_va_range), GFP_KERNEL); 15948c2ecf20Sopenharmony_ci if (!ctx->dram_va_range) { 15958c2ecf20Sopenharmony_ci rc = -ENOMEM; 15968c2ecf20Sopenharmony_ci goto dram_va_range_err; 15978c2ecf20Sopenharmony_ci } 15988c2ecf20Sopenharmony_ci 15998c2ecf20Sopenharmony_ci rc = hl_mmu_ctx_init(ctx); 16008c2ecf20Sopenharmony_ci if (rc) { 16018c2ecf20Sopenharmony_ci dev_err(hdev->dev, "failed to init context %d\n", ctx->asid); 16028c2ecf20Sopenharmony_ci goto mmu_ctx_err; 16038c2ecf20Sopenharmony_ci } 16048c2ecf20Sopenharmony_ci 16058c2ecf20Sopenharmony_ci mutex_init(&ctx->mem_hash_lock); 16068c2ecf20Sopenharmony_ci hash_init(ctx->mem_hash); 16078c2ecf20Sopenharmony_ci 16088c2ecf20Sopenharmony_ci mutex_init(&ctx->host_va_range->lock); 16098c2ecf20Sopenharmony_ci 16108c2ecf20Sopenharmony_ci rc = va_range_init(hdev, ctx->host_va_range, host_range_start, 16118c2ecf20Sopenharmony_ci host_range_end); 16128c2ecf20Sopenharmony_ci if (rc) { 16138c2ecf20Sopenharmony_ci dev_err(hdev->dev, "failed to init host vm range\n"); 16148c2ecf20Sopenharmony_ci goto host_page_range_err; 16158c2ecf20Sopenharmony_ci } 16168c2ecf20Sopenharmony_ci 16178c2ecf20Sopenharmony_ci if (hdev->pmmu_huge_range) { 16188c2ecf20Sopenharmony_ci mutex_init(&ctx->host_huge_va_range->lock); 16198c2ecf20Sopenharmony_ci 16208c2ecf20Sopenharmony_ci rc = va_range_init(hdev, ctx->host_huge_va_range, 16218c2ecf20Sopenharmony_ci host_huge_range_start, 16228c2ecf20Sopenharmony_ci host_huge_range_end); 16238c2ecf20Sopenharmony_ci if (rc) { 16248c2ecf20Sopenharmony_ci dev_err(hdev->dev, 16258c2ecf20Sopenharmony_ci "failed to init host huge vm range\n"); 16268c2ecf20Sopenharmony_ci goto host_hpage_range_err; 16278c2ecf20Sopenharmony_ci } 16288c2ecf20Sopenharmony_ci } else { 16298c2ecf20Sopenharmony_ci kfree(ctx->host_huge_va_range); 16308c2ecf20Sopenharmony_ci ctx->host_huge_va_range = ctx->host_va_range; 16318c2ecf20Sopenharmony_ci } 16328c2ecf20Sopenharmony_ci 16338c2ecf20Sopenharmony_ci mutex_init(&ctx->dram_va_range->lock); 16348c2ecf20Sopenharmony_ci 16358c2ecf20Sopenharmony_ci rc = va_range_init(hdev, ctx->dram_va_range, dram_range_start, 16368c2ecf20Sopenharmony_ci dram_range_end); 16378c2ecf20Sopenharmony_ci if (rc) { 16388c2ecf20Sopenharmony_ci dev_err(hdev->dev, "failed to init dram vm range\n"); 16398c2ecf20Sopenharmony_ci goto dram_vm_err; 16408c2ecf20Sopenharmony_ci } 16418c2ecf20Sopenharmony_ci 16428c2ecf20Sopenharmony_ci hl_debugfs_add_ctx_mem_hash(hdev, ctx); 16438c2ecf20Sopenharmony_ci 16448c2ecf20Sopenharmony_ci return 0; 16458c2ecf20Sopenharmony_ci 16468c2ecf20Sopenharmony_cidram_vm_err: 16478c2ecf20Sopenharmony_ci mutex_destroy(&ctx->dram_va_range->lock); 16488c2ecf20Sopenharmony_ci 16498c2ecf20Sopenharmony_ci if (hdev->pmmu_huge_range) { 16508c2ecf20Sopenharmony_ci mutex_lock(&ctx->host_huge_va_range->lock); 16518c2ecf20Sopenharmony_ci clear_va_list_locked(hdev, &ctx->host_huge_va_range->list); 16528c2ecf20Sopenharmony_ci mutex_unlock(&ctx->host_huge_va_range->lock); 16538c2ecf20Sopenharmony_ci } 16548c2ecf20Sopenharmony_cihost_hpage_range_err: 16558c2ecf20Sopenharmony_ci if (hdev->pmmu_huge_range) 16568c2ecf20Sopenharmony_ci mutex_destroy(&ctx->host_huge_va_range->lock); 16578c2ecf20Sopenharmony_ci mutex_lock(&ctx->host_va_range->lock); 16588c2ecf20Sopenharmony_ci clear_va_list_locked(hdev, &ctx->host_va_range->list); 16598c2ecf20Sopenharmony_ci mutex_unlock(&ctx->host_va_range->lock); 16608c2ecf20Sopenharmony_cihost_page_range_err: 16618c2ecf20Sopenharmony_ci mutex_destroy(&ctx->host_va_range->lock); 16628c2ecf20Sopenharmony_ci mutex_destroy(&ctx->mem_hash_lock); 16638c2ecf20Sopenharmony_ci hl_mmu_ctx_fini(ctx); 16648c2ecf20Sopenharmony_cimmu_ctx_err: 16658c2ecf20Sopenharmony_ci kfree(ctx->dram_va_range); 16668c2ecf20Sopenharmony_cidram_va_range_err: 16678c2ecf20Sopenharmony_ci kfree(ctx->host_huge_va_range); 16688c2ecf20Sopenharmony_cihost_huge_va_range_err: 16698c2ecf20Sopenharmony_ci kfree(ctx->host_va_range); 16708c2ecf20Sopenharmony_ci 16718c2ecf20Sopenharmony_ci return rc; 16728c2ecf20Sopenharmony_ci} 16738c2ecf20Sopenharmony_ci 16748c2ecf20Sopenharmony_ciint hl_vm_ctx_init(struct hl_ctx *ctx) 16758c2ecf20Sopenharmony_ci{ 16768c2ecf20Sopenharmony_ci struct asic_fixed_properties *prop = &ctx->hdev->asic_prop; 16778c2ecf20Sopenharmony_ci u64 host_range_start, host_range_end, host_huge_range_start, 16788c2ecf20Sopenharmony_ci host_huge_range_end, dram_range_start, dram_range_end; 16798c2ecf20Sopenharmony_ci 16808c2ecf20Sopenharmony_ci atomic64_set(&ctx->dram_phys_mem, 0); 16818c2ecf20Sopenharmony_ci 16828c2ecf20Sopenharmony_ci /* 16838c2ecf20Sopenharmony_ci * - If MMU is enabled, init the ranges as usual. 16848c2ecf20Sopenharmony_ci * - If MMU is disabled, in case of host mapping, the returned address 16858c2ecf20Sopenharmony_ci * is the given one. 16868c2ecf20Sopenharmony_ci * In case of DRAM mapping, the returned address is the physical 16878c2ecf20Sopenharmony_ci * address of the memory related to the given handle. 16888c2ecf20Sopenharmony_ci */ 16898c2ecf20Sopenharmony_ci if (ctx->hdev->mmu_enable) { 16908c2ecf20Sopenharmony_ci dram_range_start = prop->dmmu.start_addr; 16918c2ecf20Sopenharmony_ci dram_range_end = prop->dmmu.end_addr; 16928c2ecf20Sopenharmony_ci host_range_start = prop->pmmu.start_addr; 16938c2ecf20Sopenharmony_ci host_range_end = prop->pmmu.end_addr; 16948c2ecf20Sopenharmony_ci host_huge_range_start = prop->pmmu_huge.start_addr; 16958c2ecf20Sopenharmony_ci host_huge_range_end = prop->pmmu_huge.end_addr; 16968c2ecf20Sopenharmony_ci } else { 16978c2ecf20Sopenharmony_ci dram_range_start = prop->dram_user_base_address; 16988c2ecf20Sopenharmony_ci dram_range_end = prop->dram_end_address; 16998c2ecf20Sopenharmony_ci host_range_start = prop->dram_user_base_address; 17008c2ecf20Sopenharmony_ci host_range_end = prop->dram_end_address; 17018c2ecf20Sopenharmony_ci host_huge_range_start = prop->dram_user_base_address; 17028c2ecf20Sopenharmony_ci host_huge_range_end = prop->dram_end_address; 17038c2ecf20Sopenharmony_ci } 17048c2ecf20Sopenharmony_ci 17058c2ecf20Sopenharmony_ci return vm_ctx_init_with_ranges(ctx, host_range_start, host_range_end, 17068c2ecf20Sopenharmony_ci host_huge_range_start, 17078c2ecf20Sopenharmony_ci host_huge_range_end, 17088c2ecf20Sopenharmony_ci dram_range_start, 17098c2ecf20Sopenharmony_ci dram_range_end); 17108c2ecf20Sopenharmony_ci} 17118c2ecf20Sopenharmony_ci 17128c2ecf20Sopenharmony_ci/* 17138c2ecf20Sopenharmony_ci * hl_vm_ctx_fini - virtual memory teardown of context 17148c2ecf20Sopenharmony_ci * 17158c2ecf20Sopenharmony_ci * @ctx : pointer to the habanalabs context structure 17168c2ecf20Sopenharmony_ci * 17178c2ecf20Sopenharmony_ci * This function perform teardown the following: 17188c2ecf20Sopenharmony_ci * - Virtual block list of available virtual memory 17198c2ecf20Sopenharmony_ci * - Virtual address to area descriptor hashtable 17208c2ecf20Sopenharmony_ci * - MMU for context 17218c2ecf20Sopenharmony_ci * 17228c2ecf20Sopenharmony_ci * In addition this function does the following: 17238c2ecf20Sopenharmony_ci * - Unmaps the existing hashtable nodes if the hashtable is not empty. The 17248c2ecf20Sopenharmony_ci * hashtable should be empty as no valid mappings should exist at this 17258c2ecf20Sopenharmony_ci * point. 17268c2ecf20Sopenharmony_ci * - Frees any existing physical page list from the idr which relates to the 17278c2ecf20Sopenharmony_ci * current context asid. 17288c2ecf20Sopenharmony_ci * - This function checks the virtual block list for correctness. At this point 17298c2ecf20Sopenharmony_ci * the list should contain one element which describes the whole virtual 17308c2ecf20Sopenharmony_ci * memory range of the context. Otherwise, a warning is printed. 17318c2ecf20Sopenharmony_ci */ 17328c2ecf20Sopenharmony_civoid hl_vm_ctx_fini(struct hl_ctx *ctx) 17338c2ecf20Sopenharmony_ci{ 17348c2ecf20Sopenharmony_ci struct hl_device *hdev = ctx->hdev; 17358c2ecf20Sopenharmony_ci struct hl_vm *vm = &hdev->vm; 17368c2ecf20Sopenharmony_ci struct hl_vm_phys_pg_pack *phys_pg_list; 17378c2ecf20Sopenharmony_ci struct hl_vm_hash_node *hnode; 17388c2ecf20Sopenharmony_ci struct hlist_node *tmp_node; 17398c2ecf20Sopenharmony_ci int i; 17408c2ecf20Sopenharmony_ci 17418c2ecf20Sopenharmony_ci hl_debugfs_remove_ctx_mem_hash(hdev, ctx); 17428c2ecf20Sopenharmony_ci 17438c2ecf20Sopenharmony_ci /* 17448c2ecf20Sopenharmony_ci * Clearly something went wrong on hard reset so no point in printing 17458c2ecf20Sopenharmony_ci * another side effect error 17468c2ecf20Sopenharmony_ci */ 17478c2ecf20Sopenharmony_ci if (!hdev->hard_reset_pending && !hash_empty(ctx->mem_hash)) 17488c2ecf20Sopenharmony_ci dev_notice(hdev->dev, 17498c2ecf20Sopenharmony_ci "user released device without removing its memory mappings\n"); 17508c2ecf20Sopenharmony_ci 17518c2ecf20Sopenharmony_ci hash_for_each_safe(ctx->mem_hash, i, tmp_node, hnode, node) { 17528c2ecf20Sopenharmony_ci dev_dbg(hdev->dev, 17538c2ecf20Sopenharmony_ci "hl_mem_hash_node of vaddr 0x%llx of asid %d is still alive\n", 17548c2ecf20Sopenharmony_ci hnode->vaddr, ctx->asid); 17558c2ecf20Sopenharmony_ci unmap_device_va(ctx, hnode->vaddr, true); 17568c2ecf20Sopenharmony_ci } 17578c2ecf20Sopenharmony_ci 17588c2ecf20Sopenharmony_ci /* invalidate the cache once after the unmapping loop */ 17598c2ecf20Sopenharmony_ci hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR); 17608c2ecf20Sopenharmony_ci hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_PHYS_PACK); 17618c2ecf20Sopenharmony_ci 17628c2ecf20Sopenharmony_ci spin_lock(&vm->idr_lock); 17638c2ecf20Sopenharmony_ci idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_list, i) 17648c2ecf20Sopenharmony_ci if (phys_pg_list->asid == ctx->asid) { 17658c2ecf20Sopenharmony_ci dev_dbg(hdev->dev, 17668c2ecf20Sopenharmony_ci "page list 0x%px of asid %d is still alive\n", 17678c2ecf20Sopenharmony_ci phys_pg_list, ctx->asid); 17688c2ecf20Sopenharmony_ci atomic64_sub(phys_pg_list->total_size, 17698c2ecf20Sopenharmony_ci &hdev->dram_used_mem); 17708c2ecf20Sopenharmony_ci free_phys_pg_pack(hdev, phys_pg_list); 17718c2ecf20Sopenharmony_ci idr_remove(&vm->phys_pg_pack_handles, i); 17728c2ecf20Sopenharmony_ci } 17738c2ecf20Sopenharmony_ci spin_unlock(&vm->idr_lock); 17748c2ecf20Sopenharmony_ci 17758c2ecf20Sopenharmony_ci va_range_fini(hdev, ctx->dram_va_range); 17768c2ecf20Sopenharmony_ci if (hdev->pmmu_huge_range) 17778c2ecf20Sopenharmony_ci va_range_fini(hdev, ctx->host_huge_va_range); 17788c2ecf20Sopenharmony_ci va_range_fini(hdev, ctx->host_va_range); 17798c2ecf20Sopenharmony_ci 17808c2ecf20Sopenharmony_ci mutex_destroy(&ctx->mem_hash_lock); 17818c2ecf20Sopenharmony_ci hl_mmu_ctx_fini(ctx); 17828c2ecf20Sopenharmony_ci} 17838c2ecf20Sopenharmony_ci 17848c2ecf20Sopenharmony_ci/* 17858c2ecf20Sopenharmony_ci * hl_vm_init - initialize virtual memory module 17868c2ecf20Sopenharmony_ci * 17878c2ecf20Sopenharmony_ci * @hdev : pointer to the habanalabs device structure 17888c2ecf20Sopenharmony_ci * 17898c2ecf20Sopenharmony_ci * This function initializes the following: 17908c2ecf20Sopenharmony_ci * - MMU module 17918c2ecf20Sopenharmony_ci * - DRAM physical pages pool of 2MB 17928c2ecf20Sopenharmony_ci * - Idr for device memory allocation handles 17938c2ecf20Sopenharmony_ci */ 17948c2ecf20Sopenharmony_ciint hl_vm_init(struct hl_device *hdev) 17958c2ecf20Sopenharmony_ci{ 17968c2ecf20Sopenharmony_ci struct asic_fixed_properties *prop = &hdev->asic_prop; 17978c2ecf20Sopenharmony_ci struct hl_vm *vm = &hdev->vm; 17988c2ecf20Sopenharmony_ci int rc; 17998c2ecf20Sopenharmony_ci 18008c2ecf20Sopenharmony_ci vm->dram_pg_pool = gen_pool_create(__ffs(prop->dram_page_size), -1); 18018c2ecf20Sopenharmony_ci if (!vm->dram_pg_pool) { 18028c2ecf20Sopenharmony_ci dev_err(hdev->dev, "Failed to create dram page pool\n"); 18038c2ecf20Sopenharmony_ci return -ENOMEM; 18048c2ecf20Sopenharmony_ci } 18058c2ecf20Sopenharmony_ci 18068c2ecf20Sopenharmony_ci kref_init(&vm->dram_pg_pool_refcount); 18078c2ecf20Sopenharmony_ci 18088c2ecf20Sopenharmony_ci rc = gen_pool_add(vm->dram_pg_pool, prop->dram_user_base_address, 18098c2ecf20Sopenharmony_ci prop->dram_end_address - prop->dram_user_base_address, 18108c2ecf20Sopenharmony_ci -1); 18118c2ecf20Sopenharmony_ci 18128c2ecf20Sopenharmony_ci if (rc) { 18138c2ecf20Sopenharmony_ci dev_err(hdev->dev, 18148c2ecf20Sopenharmony_ci "Failed to add memory to dram page pool %d\n", rc); 18158c2ecf20Sopenharmony_ci goto pool_add_err; 18168c2ecf20Sopenharmony_ci } 18178c2ecf20Sopenharmony_ci 18188c2ecf20Sopenharmony_ci spin_lock_init(&vm->idr_lock); 18198c2ecf20Sopenharmony_ci idr_init(&vm->phys_pg_pack_handles); 18208c2ecf20Sopenharmony_ci 18218c2ecf20Sopenharmony_ci atomic64_set(&hdev->dram_used_mem, 0); 18228c2ecf20Sopenharmony_ci 18238c2ecf20Sopenharmony_ci vm->init_done = true; 18248c2ecf20Sopenharmony_ci 18258c2ecf20Sopenharmony_ci return 0; 18268c2ecf20Sopenharmony_ci 18278c2ecf20Sopenharmony_cipool_add_err: 18288c2ecf20Sopenharmony_ci gen_pool_destroy(vm->dram_pg_pool); 18298c2ecf20Sopenharmony_ci 18308c2ecf20Sopenharmony_ci return rc; 18318c2ecf20Sopenharmony_ci} 18328c2ecf20Sopenharmony_ci 18338c2ecf20Sopenharmony_ci/* 18348c2ecf20Sopenharmony_ci * hl_vm_fini - virtual memory module teardown 18358c2ecf20Sopenharmony_ci * 18368c2ecf20Sopenharmony_ci * @hdev : pointer to the habanalabs device structure 18378c2ecf20Sopenharmony_ci * 18388c2ecf20Sopenharmony_ci * This function perform teardown to the following: 18398c2ecf20Sopenharmony_ci * - Idr for device memory allocation handles 18408c2ecf20Sopenharmony_ci * - DRAM physical pages pool of 2MB 18418c2ecf20Sopenharmony_ci * - MMU module 18428c2ecf20Sopenharmony_ci */ 18438c2ecf20Sopenharmony_civoid hl_vm_fini(struct hl_device *hdev) 18448c2ecf20Sopenharmony_ci{ 18458c2ecf20Sopenharmony_ci struct hl_vm *vm = &hdev->vm; 18468c2ecf20Sopenharmony_ci 18478c2ecf20Sopenharmony_ci if (!vm->init_done) 18488c2ecf20Sopenharmony_ci return; 18498c2ecf20Sopenharmony_ci 18508c2ecf20Sopenharmony_ci /* 18518c2ecf20Sopenharmony_ci * At this point all the contexts should be freed and hence no DRAM 18528c2ecf20Sopenharmony_ci * memory should be in use. Hence the DRAM pool should be freed here. 18538c2ecf20Sopenharmony_ci */ 18548c2ecf20Sopenharmony_ci if (kref_put(&vm->dram_pg_pool_refcount, dram_pg_pool_do_release) != 1) 18558c2ecf20Sopenharmony_ci dev_warn(hdev->dev, "dram_pg_pool was not destroyed on %s\n", 18568c2ecf20Sopenharmony_ci __func__); 18578c2ecf20Sopenharmony_ci 18588c2ecf20Sopenharmony_ci vm->init_done = false; 18598c2ecf20Sopenharmony_ci} 1860