162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci 362306a36Sopenharmony_ci/* 462306a36Sopenharmony_ci * Copyright 2016-2022 HabanaLabs, Ltd. 562306a36Sopenharmony_ci * All Rights Reserved. 662306a36Sopenharmony_ci */ 762306a36Sopenharmony_ci 862306a36Sopenharmony_ci#include <linux/slab.h> 962306a36Sopenharmony_ci 1062306a36Sopenharmony_ci#include "../habanalabs.h" 1162306a36Sopenharmony_ci 1262306a36Sopenharmony_ci#include <trace/events/habanalabs.h> 1362306a36Sopenharmony_ci 1462306a36Sopenharmony_ci/** 1562306a36Sopenharmony_ci * hl_mmu_get_funcs() - get MMU functions structure 1662306a36Sopenharmony_ci * @hdev: habanalabs device structure. 1762306a36Sopenharmony_ci * @pgt_residency: page table residency. 1862306a36Sopenharmony_ci * @is_dram_addr: true if we need HMMU functions 1962306a36Sopenharmony_ci * 2062306a36Sopenharmony_ci * @return appropriate MMU functions structure 2162306a36Sopenharmony_ci */ 2262306a36Sopenharmony_cistatic struct hl_mmu_funcs *hl_mmu_get_funcs(struct hl_device *hdev, int pgt_residency, 2362306a36Sopenharmony_ci bool is_dram_addr) 2462306a36Sopenharmony_ci{ 2562306a36Sopenharmony_ci return &hdev->mmu_func[pgt_residency]; 2662306a36Sopenharmony_ci} 2762306a36Sopenharmony_ci 2862306a36Sopenharmony_cibool hl_is_dram_va(struct hl_device *hdev, u64 virt_addr) 2962306a36Sopenharmony_ci{ 3062306a36Sopenharmony_ci struct asic_fixed_properties *prop = &hdev->asic_prop; 3162306a36Sopenharmony_ci 3262306a36Sopenharmony_ci return hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, 3362306a36Sopenharmony_ci prop->dmmu.start_addr, 3462306a36Sopenharmony_ci prop->dmmu.end_addr); 3562306a36Sopenharmony_ci} 3662306a36Sopenharmony_ci 3762306a36Sopenharmony_ci/** 3862306a36Sopenharmony_ci * hl_mmu_init() - initialize the MMU module. 3962306a36Sopenharmony_ci * @hdev: habanalabs device structure. 4062306a36Sopenharmony_ci * 4162306a36Sopenharmony_ci * Return: 0 for success, non-zero for failure. 4262306a36Sopenharmony_ci */ 4362306a36Sopenharmony_ciint hl_mmu_init(struct hl_device *hdev) 4462306a36Sopenharmony_ci{ 4562306a36Sopenharmony_ci int rc = -EOPNOTSUPP; 4662306a36Sopenharmony_ci 4762306a36Sopenharmony_ci if (hdev->mmu_disable) 4862306a36Sopenharmony_ci return 0; 4962306a36Sopenharmony_ci 5062306a36Sopenharmony_ci mutex_init(&hdev->mmu_lock); 5162306a36Sopenharmony_ci 5262306a36Sopenharmony_ci if (hdev->mmu_func[MMU_DR_PGT].init != NULL) { 5362306a36Sopenharmony_ci rc = hdev->mmu_func[MMU_DR_PGT].init(hdev); 5462306a36Sopenharmony_ci if (rc) 5562306a36Sopenharmony_ci return rc; 5662306a36Sopenharmony_ci } 5762306a36Sopenharmony_ci 5862306a36Sopenharmony_ci if (hdev->mmu_func[MMU_HR_PGT].init != NULL) { 5962306a36Sopenharmony_ci rc = hdev->mmu_func[MMU_HR_PGT].init(hdev); 6062306a36Sopenharmony_ci if (rc) 6162306a36Sopenharmony_ci goto fini_dr_mmu; 6262306a36Sopenharmony_ci } 6362306a36Sopenharmony_ci 6462306a36Sopenharmony_ci return 0; 6562306a36Sopenharmony_ci 6662306a36Sopenharmony_cifini_dr_mmu: 6762306a36Sopenharmony_ci if (hdev->mmu_func[MMU_DR_PGT].fini != NULL) 6862306a36Sopenharmony_ci hdev->mmu_func[MMU_DR_PGT].fini(hdev); 6962306a36Sopenharmony_ci 7062306a36Sopenharmony_ci return rc; 7162306a36Sopenharmony_ci} 7262306a36Sopenharmony_ci 7362306a36Sopenharmony_ci/** 7462306a36Sopenharmony_ci * hl_mmu_fini() - release the MMU module. 7562306a36Sopenharmony_ci * @hdev: habanalabs device structure. 7662306a36Sopenharmony_ci * 7762306a36Sopenharmony_ci * This function does the following: 7862306a36Sopenharmony_ci * - Disable MMU in H/W. 7962306a36Sopenharmony_ci * - Free the pgt_infos pool. 8062306a36Sopenharmony_ci * 8162306a36Sopenharmony_ci * All contexts should be freed before calling this function. 8262306a36Sopenharmony_ci */ 8362306a36Sopenharmony_civoid hl_mmu_fini(struct hl_device *hdev) 8462306a36Sopenharmony_ci{ 8562306a36Sopenharmony_ci if (hdev->mmu_disable) 8662306a36Sopenharmony_ci return; 8762306a36Sopenharmony_ci 8862306a36Sopenharmony_ci if (hdev->mmu_func[MMU_DR_PGT].fini != NULL) 8962306a36Sopenharmony_ci hdev->mmu_func[MMU_DR_PGT].fini(hdev); 9062306a36Sopenharmony_ci 9162306a36Sopenharmony_ci if (hdev->mmu_func[MMU_HR_PGT].fini != NULL) 9262306a36Sopenharmony_ci hdev->mmu_func[MMU_HR_PGT].fini(hdev); 9362306a36Sopenharmony_ci 9462306a36Sopenharmony_ci mutex_destroy(&hdev->mmu_lock); 9562306a36Sopenharmony_ci} 9662306a36Sopenharmony_ci 9762306a36Sopenharmony_ci/** 9862306a36Sopenharmony_ci * hl_mmu_ctx_init() - initialize a context for using the MMU module. 9962306a36Sopenharmony_ci * @ctx: pointer to the context structure to initialize. 10062306a36Sopenharmony_ci * 10162306a36Sopenharmony_ci * Initialize a mutex to protect the concurrent mapping flow, a hash to hold all 10262306a36Sopenharmony_ci * page tables hops related to this context. 10362306a36Sopenharmony_ci * Return: 0 on success, non-zero otherwise. 10462306a36Sopenharmony_ci */ 10562306a36Sopenharmony_ciint hl_mmu_ctx_init(struct hl_ctx *ctx) 10662306a36Sopenharmony_ci{ 10762306a36Sopenharmony_ci struct hl_device *hdev = ctx->hdev; 10862306a36Sopenharmony_ci int rc = -EOPNOTSUPP; 10962306a36Sopenharmony_ci 11062306a36Sopenharmony_ci if (hdev->mmu_disable) 11162306a36Sopenharmony_ci return 0; 11262306a36Sopenharmony_ci 11362306a36Sopenharmony_ci if (hdev->mmu_func[MMU_DR_PGT].ctx_init != NULL) { 11462306a36Sopenharmony_ci rc = hdev->mmu_func[MMU_DR_PGT].ctx_init(ctx); 11562306a36Sopenharmony_ci if (rc) 11662306a36Sopenharmony_ci return rc; 11762306a36Sopenharmony_ci } 11862306a36Sopenharmony_ci 11962306a36Sopenharmony_ci if (hdev->mmu_func[MMU_HR_PGT].ctx_init != NULL) { 12062306a36Sopenharmony_ci rc = hdev->mmu_func[MMU_HR_PGT].ctx_init(ctx); 12162306a36Sopenharmony_ci if (rc) 12262306a36Sopenharmony_ci goto fini_dr_ctx; 12362306a36Sopenharmony_ci } 12462306a36Sopenharmony_ci 12562306a36Sopenharmony_ci return 0; 12662306a36Sopenharmony_ci 12762306a36Sopenharmony_cifini_dr_ctx: 12862306a36Sopenharmony_ci if (hdev->mmu_func[MMU_DR_PGT].fini != NULL) 12962306a36Sopenharmony_ci hdev->mmu_func[MMU_DR_PGT].fini(hdev); 13062306a36Sopenharmony_ci 13162306a36Sopenharmony_ci return rc; 13262306a36Sopenharmony_ci} 13362306a36Sopenharmony_ci 13462306a36Sopenharmony_ci/* 13562306a36Sopenharmony_ci * hl_mmu_ctx_fini - disable a ctx from using the mmu module 13662306a36Sopenharmony_ci * 13762306a36Sopenharmony_ci * @ctx: pointer to the context structure 13862306a36Sopenharmony_ci * 13962306a36Sopenharmony_ci * This function does the following: 14062306a36Sopenharmony_ci * - Free any pgts which were not freed yet 14162306a36Sopenharmony_ci * - Free the mutex 14262306a36Sopenharmony_ci * - Free DRAM default page mapping hops 14362306a36Sopenharmony_ci */ 14462306a36Sopenharmony_civoid hl_mmu_ctx_fini(struct hl_ctx *ctx) 14562306a36Sopenharmony_ci{ 14662306a36Sopenharmony_ci struct hl_device *hdev = ctx->hdev; 14762306a36Sopenharmony_ci 14862306a36Sopenharmony_ci if (hdev->mmu_disable) 14962306a36Sopenharmony_ci return; 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_ci if (hdev->mmu_func[MMU_DR_PGT].ctx_fini != NULL) 15262306a36Sopenharmony_ci hdev->mmu_func[MMU_DR_PGT].ctx_fini(ctx); 15362306a36Sopenharmony_ci 15462306a36Sopenharmony_ci if (hdev->mmu_func[MMU_HR_PGT].ctx_fini != NULL) 15562306a36Sopenharmony_ci hdev->mmu_func[MMU_HR_PGT].ctx_fini(ctx); 15662306a36Sopenharmony_ci} 15762306a36Sopenharmony_ci 15862306a36Sopenharmony_ci/* 15962306a36Sopenharmony_ci * hl_mmu_get_real_page_size - get real page size to use in map/unmap operation 16062306a36Sopenharmony_ci * 16162306a36Sopenharmony_ci * @hdev: pointer to device data. 16262306a36Sopenharmony_ci * @mmu_prop: MMU properties. 16362306a36Sopenharmony_ci * @page_size: page size 16462306a36Sopenharmony_ci * @real_page_size: set here the actual page size to use for the operation 16562306a36Sopenharmony_ci * @is_dram_addr: true if DRAM address, otherwise false. 16662306a36Sopenharmony_ci * 16762306a36Sopenharmony_ci * @return 0 on success, otherwise non 0 error code 16862306a36Sopenharmony_ci * 16962306a36Sopenharmony_ci * note that this is general implementation that can fit most MMU arch. but as this is used as an 17062306a36Sopenharmony_ci * MMU function: 17162306a36Sopenharmony_ci * 1. it shall not be called directly- only from mmu_func structure instance 17262306a36Sopenharmony_ci * 2. each MMU may modify the implementation internally 17362306a36Sopenharmony_ci */ 17462306a36Sopenharmony_ciint hl_mmu_get_real_page_size(struct hl_device *hdev, struct hl_mmu_properties *mmu_prop, 17562306a36Sopenharmony_ci u32 page_size, u32 *real_page_size, bool is_dram_addr) 17662306a36Sopenharmony_ci{ 17762306a36Sopenharmony_ci /* 17862306a36Sopenharmony_ci * The H/W handles mapping of specific page sizes. Hence if the page 17962306a36Sopenharmony_ci * size is bigger, we break it to sub-pages and map them separately. 18062306a36Sopenharmony_ci */ 18162306a36Sopenharmony_ci if ((page_size % mmu_prop->page_size) == 0) { 18262306a36Sopenharmony_ci *real_page_size = mmu_prop->page_size; 18362306a36Sopenharmony_ci return 0; 18462306a36Sopenharmony_ci } 18562306a36Sopenharmony_ci 18662306a36Sopenharmony_ci dev_err(hdev->dev, "page size of %u is not %uKB aligned, can't map\n", 18762306a36Sopenharmony_ci page_size, mmu_prop->page_size >> 10); 18862306a36Sopenharmony_ci 18962306a36Sopenharmony_ci return -EFAULT; 19062306a36Sopenharmony_ci} 19162306a36Sopenharmony_ci 19262306a36Sopenharmony_cistatic struct hl_mmu_properties *hl_mmu_get_prop(struct hl_device *hdev, u32 page_size, 19362306a36Sopenharmony_ci bool is_dram_addr) 19462306a36Sopenharmony_ci{ 19562306a36Sopenharmony_ci struct asic_fixed_properties *prop = &hdev->asic_prop; 19662306a36Sopenharmony_ci 19762306a36Sopenharmony_ci if (is_dram_addr) 19862306a36Sopenharmony_ci return &prop->dmmu; 19962306a36Sopenharmony_ci else if ((page_size % prop->pmmu_huge.page_size) == 0) 20062306a36Sopenharmony_ci return &prop->pmmu_huge; 20162306a36Sopenharmony_ci 20262306a36Sopenharmony_ci return &prop->pmmu; 20362306a36Sopenharmony_ci} 20462306a36Sopenharmony_ci 20562306a36Sopenharmony_ci/* 20662306a36Sopenharmony_ci * hl_mmu_unmap_page - unmaps a virtual addr 20762306a36Sopenharmony_ci * 20862306a36Sopenharmony_ci * @ctx: pointer to the context structure 20962306a36Sopenharmony_ci * @virt_addr: virt addr to map from 21062306a36Sopenharmony_ci * @page_size: size of the page to unmap 21162306a36Sopenharmony_ci * @flush_pte: whether to do a PCI flush 21262306a36Sopenharmony_ci * 21362306a36Sopenharmony_ci * This function does the following: 21462306a36Sopenharmony_ci * - Check that the virt addr is mapped 21562306a36Sopenharmony_ci * - Unmap the virt addr and frees pgts if possible 21662306a36Sopenharmony_ci * - Returns 0 on success, -EINVAL if the given addr is not mapped 21762306a36Sopenharmony_ci * 21862306a36Sopenharmony_ci * Because this function changes the page tables in the device and because it 21962306a36Sopenharmony_ci * changes the MMU hash, it must be protected by a lock. 22062306a36Sopenharmony_ci * However, because it maps only a single page, the lock should be implemented 22162306a36Sopenharmony_ci * in a higher level in order to protect the entire mapping of the memory area 22262306a36Sopenharmony_ci * 22362306a36Sopenharmony_ci * For optimization reasons PCI flush may be requested once after unmapping of 22462306a36Sopenharmony_ci * large area. 22562306a36Sopenharmony_ci */ 22662306a36Sopenharmony_ciint hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, bool flush_pte) 22762306a36Sopenharmony_ci{ 22862306a36Sopenharmony_ci struct hl_device *hdev = ctx->hdev; 22962306a36Sopenharmony_ci struct hl_mmu_properties *mmu_prop; 23062306a36Sopenharmony_ci struct hl_mmu_funcs *mmu_funcs; 23162306a36Sopenharmony_ci int i, pgt_residency, rc = 0; 23262306a36Sopenharmony_ci u32 real_page_size, npages; 23362306a36Sopenharmony_ci u64 real_virt_addr; 23462306a36Sopenharmony_ci bool is_dram_addr; 23562306a36Sopenharmony_ci 23662306a36Sopenharmony_ci if (hdev->mmu_disable) 23762306a36Sopenharmony_ci return 0; 23862306a36Sopenharmony_ci 23962306a36Sopenharmony_ci is_dram_addr = hl_is_dram_va(hdev, virt_addr); 24062306a36Sopenharmony_ci mmu_prop = hl_mmu_get_prop(hdev, page_size, is_dram_addr); 24162306a36Sopenharmony_ci 24262306a36Sopenharmony_ci pgt_residency = mmu_prop->host_resident ? MMU_HR_PGT : MMU_DR_PGT; 24362306a36Sopenharmony_ci mmu_funcs = hl_mmu_get_funcs(hdev, pgt_residency, is_dram_addr); 24462306a36Sopenharmony_ci 24562306a36Sopenharmony_ci rc = hdev->asic_funcs->mmu_get_real_page_size(hdev, mmu_prop, page_size, &real_page_size, 24662306a36Sopenharmony_ci is_dram_addr); 24762306a36Sopenharmony_ci if (rc) 24862306a36Sopenharmony_ci return rc; 24962306a36Sopenharmony_ci 25062306a36Sopenharmony_ci npages = page_size / real_page_size; 25162306a36Sopenharmony_ci real_virt_addr = virt_addr; 25262306a36Sopenharmony_ci 25362306a36Sopenharmony_ci for (i = 0 ; i < npages ; i++) { 25462306a36Sopenharmony_ci rc = mmu_funcs->unmap(ctx, real_virt_addr, is_dram_addr); 25562306a36Sopenharmony_ci if (rc) 25662306a36Sopenharmony_ci break; 25762306a36Sopenharmony_ci 25862306a36Sopenharmony_ci real_virt_addr += real_page_size; 25962306a36Sopenharmony_ci } 26062306a36Sopenharmony_ci 26162306a36Sopenharmony_ci if (flush_pte) 26262306a36Sopenharmony_ci mmu_funcs->flush(ctx); 26362306a36Sopenharmony_ci 26462306a36Sopenharmony_ci if (trace_habanalabs_mmu_unmap_enabled() && !rc) 26562306a36Sopenharmony_ci trace_habanalabs_mmu_unmap(hdev->dev, virt_addr, 0, page_size, flush_pte); 26662306a36Sopenharmony_ci 26762306a36Sopenharmony_ci return rc; 26862306a36Sopenharmony_ci} 26962306a36Sopenharmony_ci 27062306a36Sopenharmony_ci/* 27162306a36Sopenharmony_ci * hl_mmu_map_page - maps a virtual addr to physical addr 27262306a36Sopenharmony_ci * 27362306a36Sopenharmony_ci * @ctx: pointer to the context structure 27462306a36Sopenharmony_ci * @virt_addr: virt addr to map from 27562306a36Sopenharmony_ci * @phys_addr: phys addr to map to 27662306a36Sopenharmony_ci * @page_size: physical page size 27762306a36Sopenharmony_ci * @flush_pte: whether to do a PCI flush 27862306a36Sopenharmony_ci * 27962306a36Sopenharmony_ci * This function does the following: 28062306a36Sopenharmony_ci * - Check that the virt addr is not mapped 28162306a36Sopenharmony_ci * - Allocate pgts as necessary in order to map the virt addr to the phys 28262306a36Sopenharmony_ci * - Returns 0 on success, -EINVAL if addr is already mapped, or -ENOMEM. 28362306a36Sopenharmony_ci * 28462306a36Sopenharmony_ci * Because this function changes the page tables in the device and because it 28562306a36Sopenharmony_ci * changes the MMU hash, it must be protected by a lock. 28662306a36Sopenharmony_ci * However, because it maps only a single page, the lock should be implemented 28762306a36Sopenharmony_ci * in a higher level in order to protect the entire mapping of the memory area 28862306a36Sopenharmony_ci * 28962306a36Sopenharmony_ci * For optimization reasons PCI flush may be requested once after mapping of 29062306a36Sopenharmony_ci * large area. 29162306a36Sopenharmony_ci */ 29262306a36Sopenharmony_ciint hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size, 29362306a36Sopenharmony_ci bool flush_pte) 29462306a36Sopenharmony_ci{ 29562306a36Sopenharmony_ci int i, rc, pgt_residency, mapped_cnt = 0; 29662306a36Sopenharmony_ci struct hl_device *hdev = ctx->hdev; 29762306a36Sopenharmony_ci struct hl_mmu_properties *mmu_prop; 29862306a36Sopenharmony_ci u64 real_virt_addr, real_phys_addr; 29962306a36Sopenharmony_ci struct hl_mmu_funcs *mmu_funcs; 30062306a36Sopenharmony_ci u32 real_page_size, npages; 30162306a36Sopenharmony_ci bool is_dram_addr; 30262306a36Sopenharmony_ci 30362306a36Sopenharmony_ci 30462306a36Sopenharmony_ci if (hdev->mmu_disable) 30562306a36Sopenharmony_ci return 0; 30662306a36Sopenharmony_ci 30762306a36Sopenharmony_ci is_dram_addr = hl_is_dram_va(hdev, virt_addr); 30862306a36Sopenharmony_ci mmu_prop = hl_mmu_get_prop(hdev, page_size, is_dram_addr); 30962306a36Sopenharmony_ci 31062306a36Sopenharmony_ci pgt_residency = mmu_prop->host_resident ? MMU_HR_PGT : MMU_DR_PGT; 31162306a36Sopenharmony_ci mmu_funcs = hl_mmu_get_funcs(hdev, pgt_residency, is_dram_addr); 31262306a36Sopenharmony_ci 31362306a36Sopenharmony_ci rc = hdev->asic_funcs->mmu_get_real_page_size(hdev, mmu_prop, page_size, &real_page_size, 31462306a36Sopenharmony_ci is_dram_addr); 31562306a36Sopenharmony_ci if (rc) 31662306a36Sopenharmony_ci return rc; 31762306a36Sopenharmony_ci 31862306a36Sopenharmony_ci /* 31962306a36Sopenharmony_ci * Verify that the phys and virt addresses are aligned with the 32062306a36Sopenharmony_ci * MMU page size (in dram this means checking the address and MMU 32162306a36Sopenharmony_ci * after scrambling) 32262306a36Sopenharmony_ci */ 32362306a36Sopenharmony_ci if ((is_dram_addr && 32462306a36Sopenharmony_ci ((hdev->asic_funcs->scramble_addr(hdev, phys_addr) & 32562306a36Sopenharmony_ci (mmu_prop->page_size - 1)) || 32662306a36Sopenharmony_ci (hdev->asic_funcs->scramble_addr(hdev, virt_addr) & 32762306a36Sopenharmony_ci (mmu_prop->page_size - 1)))) || 32862306a36Sopenharmony_ci (!is_dram_addr && ((phys_addr & (real_page_size - 1)) || 32962306a36Sopenharmony_ci (virt_addr & (real_page_size - 1))))) 33062306a36Sopenharmony_ci dev_crit(hdev->dev, 33162306a36Sopenharmony_ci "Mapping address 0x%llx with virtual address 0x%llx and page size of 0x%x is erroneous! Addresses must be divisible by page size", 33262306a36Sopenharmony_ci phys_addr, virt_addr, real_page_size); 33362306a36Sopenharmony_ci 33462306a36Sopenharmony_ci npages = page_size / real_page_size; 33562306a36Sopenharmony_ci real_virt_addr = virt_addr; 33662306a36Sopenharmony_ci real_phys_addr = phys_addr; 33762306a36Sopenharmony_ci 33862306a36Sopenharmony_ci for (i = 0 ; i < npages ; i++) { 33962306a36Sopenharmony_ci rc = mmu_funcs->map(ctx, real_virt_addr, real_phys_addr, real_page_size, 34062306a36Sopenharmony_ci is_dram_addr); 34162306a36Sopenharmony_ci if (rc) 34262306a36Sopenharmony_ci goto err; 34362306a36Sopenharmony_ci 34462306a36Sopenharmony_ci real_virt_addr += real_page_size; 34562306a36Sopenharmony_ci real_phys_addr += real_page_size; 34662306a36Sopenharmony_ci mapped_cnt++; 34762306a36Sopenharmony_ci } 34862306a36Sopenharmony_ci 34962306a36Sopenharmony_ci if (flush_pte) 35062306a36Sopenharmony_ci mmu_funcs->flush(ctx); 35162306a36Sopenharmony_ci 35262306a36Sopenharmony_ci trace_habanalabs_mmu_map(hdev->dev, virt_addr, phys_addr, page_size, flush_pte); 35362306a36Sopenharmony_ci 35462306a36Sopenharmony_ci return 0; 35562306a36Sopenharmony_ci 35662306a36Sopenharmony_cierr: 35762306a36Sopenharmony_ci real_virt_addr = virt_addr; 35862306a36Sopenharmony_ci for (i = 0 ; i < mapped_cnt ; i++) { 35962306a36Sopenharmony_ci if (mmu_funcs->unmap(ctx, real_virt_addr, is_dram_addr)) 36062306a36Sopenharmony_ci dev_warn_ratelimited(hdev->dev, 36162306a36Sopenharmony_ci "failed to unmap va: 0x%llx\n", real_virt_addr); 36262306a36Sopenharmony_ci 36362306a36Sopenharmony_ci real_virt_addr += real_page_size; 36462306a36Sopenharmony_ci } 36562306a36Sopenharmony_ci 36662306a36Sopenharmony_ci mmu_funcs->flush(ctx); 36762306a36Sopenharmony_ci 36862306a36Sopenharmony_ci return rc; 36962306a36Sopenharmony_ci} 37062306a36Sopenharmony_ci 37162306a36Sopenharmony_ci/* 37262306a36Sopenharmony_ci * hl_mmu_map_contiguous - implements a wrapper for hl_mmu_map_page 37362306a36Sopenharmony_ci * for mapping contiguous physical memory 37462306a36Sopenharmony_ci * 37562306a36Sopenharmony_ci * @ctx: pointer to the context structure 37662306a36Sopenharmony_ci * @virt_addr: virt addr to map from 37762306a36Sopenharmony_ci * @phys_addr: phys addr to map to 37862306a36Sopenharmony_ci * @size: size to map 37962306a36Sopenharmony_ci * 38062306a36Sopenharmony_ci */ 38162306a36Sopenharmony_ciint hl_mmu_map_contiguous(struct hl_ctx *ctx, u64 virt_addr, 38262306a36Sopenharmony_ci u64 phys_addr, u32 size) 38362306a36Sopenharmony_ci{ 38462306a36Sopenharmony_ci struct hl_device *hdev = ctx->hdev; 38562306a36Sopenharmony_ci struct asic_fixed_properties *prop = &hdev->asic_prop; 38662306a36Sopenharmony_ci u64 curr_va, curr_pa; 38762306a36Sopenharmony_ci u32 page_size; 38862306a36Sopenharmony_ci bool flush_pte; 38962306a36Sopenharmony_ci int rc = 0, off; 39062306a36Sopenharmony_ci 39162306a36Sopenharmony_ci if (hl_mem_area_inside_range(virt_addr, size, 39262306a36Sopenharmony_ci prop->dmmu.start_addr, prop->dmmu.end_addr)) 39362306a36Sopenharmony_ci page_size = prop->dmmu.page_size; 39462306a36Sopenharmony_ci else if (hl_mem_area_inside_range(virt_addr, size, 39562306a36Sopenharmony_ci prop->pmmu.start_addr, prop->pmmu.end_addr)) 39662306a36Sopenharmony_ci page_size = prop->pmmu.page_size; 39762306a36Sopenharmony_ci else if (hl_mem_area_inside_range(virt_addr, size, 39862306a36Sopenharmony_ci prop->pmmu_huge.start_addr, prop->pmmu_huge.end_addr)) 39962306a36Sopenharmony_ci page_size = prop->pmmu_huge.page_size; 40062306a36Sopenharmony_ci else 40162306a36Sopenharmony_ci return -EINVAL; 40262306a36Sopenharmony_ci 40362306a36Sopenharmony_ci for (off = 0 ; off < size ; off += page_size) { 40462306a36Sopenharmony_ci curr_va = virt_addr + off; 40562306a36Sopenharmony_ci curr_pa = phys_addr + off; 40662306a36Sopenharmony_ci flush_pte = (off + page_size) >= size; 40762306a36Sopenharmony_ci rc = hl_mmu_map_page(ctx, curr_va, curr_pa, page_size, 40862306a36Sopenharmony_ci flush_pte); 40962306a36Sopenharmony_ci if (rc) { 41062306a36Sopenharmony_ci dev_err(hdev->dev, 41162306a36Sopenharmony_ci "Map failed for va 0x%llx to pa 0x%llx\n", 41262306a36Sopenharmony_ci curr_va, curr_pa); 41362306a36Sopenharmony_ci /* last mapping failed so don't try to unmap it - reduce off by page_size */ 41462306a36Sopenharmony_ci off -= page_size; 41562306a36Sopenharmony_ci goto unmap; 41662306a36Sopenharmony_ci } 41762306a36Sopenharmony_ci } 41862306a36Sopenharmony_ci 41962306a36Sopenharmony_ci return rc; 42062306a36Sopenharmony_ci 42162306a36Sopenharmony_ciunmap: 42262306a36Sopenharmony_ci for (; off >= 0 ; off -= page_size) { 42362306a36Sopenharmony_ci curr_va = virt_addr + off; 42462306a36Sopenharmony_ci flush_pte = (off - (s32) page_size) < 0; 42562306a36Sopenharmony_ci if (hl_mmu_unmap_page(ctx, curr_va, page_size, flush_pte)) 42662306a36Sopenharmony_ci dev_warn_ratelimited(hdev->dev, 42762306a36Sopenharmony_ci "failed to unmap va 0x%llx\n", curr_va); 42862306a36Sopenharmony_ci } 42962306a36Sopenharmony_ci 43062306a36Sopenharmony_ci return rc; 43162306a36Sopenharmony_ci} 43262306a36Sopenharmony_ci 43362306a36Sopenharmony_ci/* 43462306a36Sopenharmony_ci * hl_mmu_unmap_contiguous - implements a wrapper for hl_mmu_unmap_page 43562306a36Sopenharmony_ci * for unmapping contiguous physical memory 43662306a36Sopenharmony_ci * 43762306a36Sopenharmony_ci * @ctx: pointer to the context structure 43862306a36Sopenharmony_ci * @virt_addr: virt addr to unmap 43962306a36Sopenharmony_ci * @size: size to unmap 44062306a36Sopenharmony_ci * 44162306a36Sopenharmony_ci */ 44262306a36Sopenharmony_ciint hl_mmu_unmap_contiguous(struct hl_ctx *ctx, u64 virt_addr, u32 size) 44362306a36Sopenharmony_ci{ 44462306a36Sopenharmony_ci struct hl_device *hdev = ctx->hdev; 44562306a36Sopenharmony_ci struct asic_fixed_properties *prop = &hdev->asic_prop; 44662306a36Sopenharmony_ci u64 curr_va; 44762306a36Sopenharmony_ci u32 page_size; 44862306a36Sopenharmony_ci bool flush_pte; 44962306a36Sopenharmony_ci int rc = 0, off; 45062306a36Sopenharmony_ci 45162306a36Sopenharmony_ci if (hl_mem_area_inside_range(virt_addr, size, 45262306a36Sopenharmony_ci prop->dmmu.start_addr, prop->dmmu.end_addr)) 45362306a36Sopenharmony_ci page_size = prop->dmmu.page_size; 45462306a36Sopenharmony_ci else if (hl_mem_area_inside_range(virt_addr, size, 45562306a36Sopenharmony_ci prop->pmmu.start_addr, prop->pmmu.end_addr)) 45662306a36Sopenharmony_ci page_size = prop->pmmu.page_size; 45762306a36Sopenharmony_ci else if (hl_mem_area_inside_range(virt_addr, size, 45862306a36Sopenharmony_ci prop->pmmu_huge.start_addr, prop->pmmu_huge.end_addr)) 45962306a36Sopenharmony_ci page_size = prop->pmmu_huge.page_size; 46062306a36Sopenharmony_ci else 46162306a36Sopenharmony_ci return -EINVAL; 46262306a36Sopenharmony_ci 46362306a36Sopenharmony_ci for (off = 0 ; off < size ; off += page_size) { 46462306a36Sopenharmony_ci curr_va = virt_addr + off; 46562306a36Sopenharmony_ci flush_pte = (off + page_size) >= size; 46662306a36Sopenharmony_ci rc = hl_mmu_unmap_page(ctx, curr_va, page_size, flush_pte); 46762306a36Sopenharmony_ci if (rc) 46862306a36Sopenharmony_ci dev_warn_ratelimited(hdev->dev, 46962306a36Sopenharmony_ci "Unmap failed for va 0x%llx\n", curr_va); 47062306a36Sopenharmony_ci } 47162306a36Sopenharmony_ci 47262306a36Sopenharmony_ci return rc; 47362306a36Sopenharmony_ci} 47462306a36Sopenharmony_ci 47562306a36Sopenharmony_cistatic void hl_mmu_pa_page_with_offset(struct hl_ctx *ctx, u64 virt_addr, 47662306a36Sopenharmony_ci struct hl_mmu_hop_info *hops, 47762306a36Sopenharmony_ci u64 *phys_addr) 47862306a36Sopenharmony_ci{ 47962306a36Sopenharmony_ci struct asic_fixed_properties *prop = &ctx->hdev->asic_prop; 48062306a36Sopenharmony_ci u64 offset_mask, addr_mask, hop_shift, tmp_phys_addr; 48162306a36Sopenharmony_ci struct hl_mmu_properties *mmu_prop; 48262306a36Sopenharmony_ci 48362306a36Sopenharmony_ci /* last hop holds the phys address and flags */ 48462306a36Sopenharmony_ci if (hops->unscrambled_paddr) 48562306a36Sopenharmony_ci tmp_phys_addr = hops->unscrambled_paddr; 48662306a36Sopenharmony_ci else 48762306a36Sopenharmony_ci tmp_phys_addr = hops->hop_info[hops->used_hops - 1].hop_pte_val; 48862306a36Sopenharmony_ci 48962306a36Sopenharmony_ci if (hops->range_type == HL_VA_RANGE_TYPE_HOST_HUGE) 49062306a36Sopenharmony_ci mmu_prop = &prop->pmmu_huge; 49162306a36Sopenharmony_ci else if (hops->range_type == HL_VA_RANGE_TYPE_HOST) 49262306a36Sopenharmony_ci mmu_prop = &prop->pmmu; 49362306a36Sopenharmony_ci else /* HL_VA_RANGE_TYPE_DRAM */ 49462306a36Sopenharmony_ci mmu_prop = &prop->dmmu; 49562306a36Sopenharmony_ci 49662306a36Sopenharmony_ci if ((hops->range_type == HL_VA_RANGE_TYPE_DRAM) && 49762306a36Sopenharmony_ci !is_power_of_2(prop->dram_page_size)) { 49862306a36Sopenharmony_ci u64 dram_page_size, dram_base, abs_phys_addr, abs_virt_addr, 49962306a36Sopenharmony_ci page_id, page_start; 50062306a36Sopenharmony_ci u32 page_off; 50162306a36Sopenharmony_ci 50262306a36Sopenharmony_ci /* 50362306a36Sopenharmony_ci * Bit arithmetic cannot be used for non power of two page 50462306a36Sopenharmony_ci * sizes. In addition, since bit arithmetic is not used, 50562306a36Sopenharmony_ci * we cannot ignore dram base. All that shall be considered. 50662306a36Sopenharmony_ci */ 50762306a36Sopenharmony_ci 50862306a36Sopenharmony_ci dram_page_size = prop->dram_page_size; 50962306a36Sopenharmony_ci dram_base = prop->dram_base_address; 51062306a36Sopenharmony_ci abs_phys_addr = tmp_phys_addr - dram_base; 51162306a36Sopenharmony_ci abs_virt_addr = virt_addr - dram_base; 51262306a36Sopenharmony_ci page_id = DIV_ROUND_DOWN_ULL(abs_phys_addr, dram_page_size); 51362306a36Sopenharmony_ci page_start = page_id * dram_page_size; 51462306a36Sopenharmony_ci div_u64_rem(abs_virt_addr, dram_page_size, &page_off); 51562306a36Sopenharmony_ci 51662306a36Sopenharmony_ci *phys_addr = page_start + page_off + dram_base; 51762306a36Sopenharmony_ci } else { 51862306a36Sopenharmony_ci /* 51962306a36Sopenharmony_ci * find the correct hop shift field in hl_mmu_properties 52062306a36Sopenharmony_ci * structure in order to determine the right masks 52162306a36Sopenharmony_ci * for the page offset. 52262306a36Sopenharmony_ci */ 52362306a36Sopenharmony_ci hop_shift = mmu_prop->hop_shifts[hops->used_hops - 1]; 52462306a36Sopenharmony_ci offset_mask = (1ull << hop_shift) - 1; 52562306a36Sopenharmony_ci addr_mask = ~(offset_mask); 52662306a36Sopenharmony_ci *phys_addr = (tmp_phys_addr & addr_mask) | 52762306a36Sopenharmony_ci (virt_addr & offset_mask); 52862306a36Sopenharmony_ci } 52962306a36Sopenharmony_ci} 53062306a36Sopenharmony_ci 53162306a36Sopenharmony_ciint hl_mmu_va_to_pa(struct hl_ctx *ctx, u64 virt_addr, u64 *phys_addr) 53262306a36Sopenharmony_ci{ 53362306a36Sopenharmony_ci struct hl_mmu_hop_info hops; 53462306a36Sopenharmony_ci int rc; 53562306a36Sopenharmony_ci 53662306a36Sopenharmony_ci memset(&hops, 0, sizeof(hops)); 53762306a36Sopenharmony_ci 53862306a36Sopenharmony_ci rc = hl_mmu_get_tlb_info(ctx, virt_addr, &hops); 53962306a36Sopenharmony_ci if (rc) 54062306a36Sopenharmony_ci return rc; 54162306a36Sopenharmony_ci 54262306a36Sopenharmony_ci hl_mmu_pa_page_with_offset(ctx, virt_addr, &hops, phys_addr); 54362306a36Sopenharmony_ci 54462306a36Sopenharmony_ci return 0; 54562306a36Sopenharmony_ci} 54662306a36Sopenharmony_ci 54762306a36Sopenharmony_ciint hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, 54862306a36Sopenharmony_ci struct hl_mmu_hop_info *hops) 54962306a36Sopenharmony_ci{ 55062306a36Sopenharmony_ci struct hl_device *hdev = ctx->hdev; 55162306a36Sopenharmony_ci struct asic_fixed_properties *prop; 55262306a36Sopenharmony_ci struct hl_mmu_properties *mmu_prop; 55362306a36Sopenharmony_ci struct hl_mmu_funcs *mmu_funcs; 55462306a36Sopenharmony_ci int pgt_residency, rc; 55562306a36Sopenharmony_ci bool is_dram_addr; 55662306a36Sopenharmony_ci 55762306a36Sopenharmony_ci if (hdev->mmu_disable) 55862306a36Sopenharmony_ci return -EOPNOTSUPP; 55962306a36Sopenharmony_ci 56062306a36Sopenharmony_ci prop = &hdev->asic_prop; 56162306a36Sopenharmony_ci hops->scrambled_vaddr = virt_addr; /* assume no scrambling */ 56262306a36Sopenharmony_ci 56362306a36Sopenharmony_ci is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, 56462306a36Sopenharmony_ci prop->dmmu.start_addr, 56562306a36Sopenharmony_ci prop->dmmu.end_addr); 56662306a36Sopenharmony_ci 56762306a36Sopenharmony_ci /* host-residency is the same in PMMU and PMMU huge, no need to distinguish here */ 56862306a36Sopenharmony_ci mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; 56962306a36Sopenharmony_ci pgt_residency = mmu_prop->host_resident ? MMU_HR_PGT : MMU_DR_PGT; 57062306a36Sopenharmony_ci mmu_funcs = hl_mmu_get_funcs(hdev, pgt_residency, is_dram_addr); 57162306a36Sopenharmony_ci 57262306a36Sopenharmony_ci mutex_lock(&hdev->mmu_lock); 57362306a36Sopenharmony_ci rc = mmu_funcs->get_tlb_info(ctx, virt_addr, hops); 57462306a36Sopenharmony_ci mutex_unlock(&hdev->mmu_lock); 57562306a36Sopenharmony_ci 57662306a36Sopenharmony_ci if (rc) 57762306a36Sopenharmony_ci return rc; 57862306a36Sopenharmony_ci 57962306a36Sopenharmony_ci /* add page offset to physical address */ 58062306a36Sopenharmony_ci if (hops->unscrambled_paddr) 58162306a36Sopenharmony_ci hl_mmu_pa_page_with_offset(ctx, virt_addr, hops, &hops->unscrambled_paddr); 58262306a36Sopenharmony_ci 58362306a36Sopenharmony_ci return 0; 58462306a36Sopenharmony_ci} 58562306a36Sopenharmony_ci 58662306a36Sopenharmony_ciint hl_mmu_if_set_funcs(struct hl_device *hdev) 58762306a36Sopenharmony_ci{ 58862306a36Sopenharmony_ci if (hdev->mmu_disable) 58962306a36Sopenharmony_ci return 0; 59062306a36Sopenharmony_ci 59162306a36Sopenharmony_ci switch (hdev->asic_type) { 59262306a36Sopenharmony_ci case ASIC_GOYA: 59362306a36Sopenharmony_ci case ASIC_GAUDI: 59462306a36Sopenharmony_ci case ASIC_GAUDI_SEC: 59562306a36Sopenharmony_ci hl_mmu_v1_set_funcs(hdev, &hdev->mmu_func[MMU_DR_PGT]); 59662306a36Sopenharmony_ci break; 59762306a36Sopenharmony_ci case ASIC_GAUDI2: 59862306a36Sopenharmony_ci case ASIC_GAUDI2B: 59962306a36Sopenharmony_ci case ASIC_GAUDI2C: 60062306a36Sopenharmony_ci /* MMUs in Gaudi2 are always host resident */ 60162306a36Sopenharmony_ci hl_mmu_v2_hr_set_funcs(hdev, &hdev->mmu_func[MMU_HR_PGT]); 60262306a36Sopenharmony_ci break; 60362306a36Sopenharmony_ci default: 60462306a36Sopenharmony_ci dev_err(hdev->dev, "Unrecognized ASIC type %d\n", 60562306a36Sopenharmony_ci hdev->asic_type); 60662306a36Sopenharmony_ci return -EOPNOTSUPP; 60762306a36Sopenharmony_ci } 60862306a36Sopenharmony_ci 60962306a36Sopenharmony_ci return 0; 61062306a36Sopenharmony_ci} 61162306a36Sopenharmony_ci 61262306a36Sopenharmony_ci/** 61362306a36Sopenharmony_ci * hl_mmu_scramble_addr() - The generic mmu address scrambling routine. 61462306a36Sopenharmony_ci * @hdev: pointer to device data. 61562306a36Sopenharmony_ci * @addr: The address to scramble. 61662306a36Sopenharmony_ci * 61762306a36Sopenharmony_ci * Return: The scrambled address. 61862306a36Sopenharmony_ci */ 61962306a36Sopenharmony_ciu64 hl_mmu_scramble_addr(struct hl_device *hdev, u64 addr) 62062306a36Sopenharmony_ci{ 62162306a36Sopenharmony_ci return addr; 62262306a36Sopenharmony_ci} 62362306a36Sopenharmony_ci 62462306a36Sopenharmony_ci/** 62562306a36Sopenharmony_ci * hl_mmu_descramble_addr() - The generic mmu address descrambling 62662306a36Sopenharmony_ci * routine. 62762306a36Sopenharmony_ci * @hdev: pointer to device data. 62862306a36Sopenharmony_ci * @addr: The address to descramble. 62962306a36Sopenharmony_ci * 63062306a36Sopenharmony_ci * Return: The un-scrambled address. 63162306a36Sopenharmony_ci */ 63262306a36Sopenharmony_ciu64 hl_mmu_descramble_addr(struct hl_device *hdev, u64 addr) 63362306a36Sopenharmony_ci{ 63462306a36Sopenharmony_ci return addr; 63562306a36Sopenharmony_ci} 63662306a36Sopenharmony_ci 63762306a36Sopenharmony_ciint hl_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags) 63862306a36Sopenharmony_ci{ 63962306a36Sopenharmony_ci int rc; 64062306a36Sopenharmony_ci 64162306a36Sopenharmony_ci rc = hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags); 64262306a36Sopenharmony_ci if (rc) 64362306a36Sopenharmony_ci dev_err_ratelimited(hdev->dev, 64462306a36Sopenharmony_ci "%s cache invalidation failed, rc=%d\n", 64562306a36Sopenharmony_ci flags == VM_TYPE_USERPTR ? "PMMU" : "HMMU", rc); 64662306a36Sopenharmony_ci 64762306a36Sopenharmony_ci return rc; 64862306a36Sopenharmony_ci} 64962306a36Sopenharmony_ci 65062306a36Sopenharmony_ciint hl_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard, 65162306a36Sopenharmony_ci u32 flags, u32 asid, u64 va, u64 size) 65262306a36Sopenharmony_ci{ 65362306a36Sopenharmony_ci int rc; 65462306a36Sopenharmony_ci 65562306a36Sopenharmony_ci rc = hdev->asic_funcs->mmu_invalidate_cache_range(hdev, is_hard, flags, 65662306a36Sopenharmony_ci asid, va, size); 65762306a36Sopenharmony_ci if (rc) 65862306a36Sopenharmony_ci dev_err_ratelimited(hdev->dev, 65962306a36Sopenharmony_ci "%s cache range invalidation failed: va=%#llx, size=%llu, rc=%d", 66062306a36Sopenharmony_ci flags == VM_TYPE_USERPTR ? "PMMU" : "HMMU", va, size, rc); 66162306a36Sopenharmony_ci 66262306a36Sopenharmony_ci return rc; 66362306a36Sopenharmony_ci} 66462306a36Sopenharmony_ci 66562306a36Sopenharmony_cistatic void hl_mmu_prefetch_work_function(struct work_struct *work) 66662306a36Sopenharmony_ci{ 66762306a36Sopenharmony_ci struct hl_prefetch_work *pfw = container_of(work, struct hl_prefetch_work, prefetch_work); 66862306a36Sopenharmony_ci struct hl_ctx *ctx = pfw->ctx; 66962306a36Sopenharmony_ci struct hl_device *hdev = ctx->hdev; 67062306a36Sopenharmony_ci 67162306a36Sopenharmony_ci if (!hl_device_operational(hdev, NULL)) 67262306a36Sopenharmony_ci goto put_ctx; 67362306a36Sopenharmony_ci 67462306a36Sopenharmony_ci mutex_lock(&hdev->mmu_lock); 67562306a36Sopenharmony_ci 67662306a36Sopenharmony_ci hdev->asic_funcs->mmu_prefetch_cache_range(ctx, pfw->flags, pfw->asid, pfw->va, pfw->size); 67762306a36Sopenharmony_ci 67862306a36Sopenharmony_ci mutex_unlock(&hdev->mmu_lock); 67962306a36Sopenharmony_ci 68062306a36Sopenharmony_ciput_ctx: 68162306a36Sopenharmony_ci /* 68262306a36Sopenharmony_ci * context was taken in the common mmu prefetch function- see comment there about 68362306a36Sopenharmony_ci * context handling. 68462306a36Sopenharmony_ci */ 68562306a36Sopenharmony_ci hl_ctx_put(ctx); 68662306a36Sopenharmony_ci kfree(pfw); 68762306a36Sopenharmony_ci} 68862306a36Sopenharmony_ci 68962306a36Sopenharmony_ciint hl_mmu_prefetch_cache_range(struct hl_ctx *ctx, u32 flags, u32 asid, u64 va, u64 size) 69062306a36Sopenharmony_ci{ 69162306a36Sopenharmony_ci struct hl_prefetch_work *handle_prefetch_work; 69262306a36Sopenharmony_ci 69362306a36Sopenharmony_ci handle_prefetch_work = kmalloc(sizeof(*handle_prefetch_work), GFP_KERNEL); 69462306a36Sopenharmony_ci if (!handle_prefetch_work) 69562306a36Sopenharmony_ci return -ENOMEM; 69662306a36Sopenharmony_ci 69762306a36Sopenharmony_ci INIT_WORK(&handle_prefetch_work->prefetch_work, hl_mmu_prefetch_work_function); 69862306a36Sopenharmony_ci handle_prefetch_work->ctx = ctx; 69962306a36Sopenharmony_ci handle_prefetch_work->va = va; 70062306a36Sopenharmony_ci handle_prefetch_work->size = size; 70162306a36Sopenharmony_ci handle_prefetch_work->flags = flags; 70262306a36Sopenharmony_ci handle_prefetch_work->asid = asid; 70362306a36Sopenharmony_ci 70462306a36Sopenharmony_ci /* 70562306a36Sopenharmony_ci * as actual prefetch is done in a WQ we must get the context (and put it 70662306a36Sopenharmony_ci * at the end of the work function) 70762306a36Sopenharmony_ci */ 70862306a36Sopenharmony_ci hl_ctx_get(ctx); 70962306a36Sopenharmony_ci queue_work(ctx->hdev->prefetch_wq, &handle_prefetch_work->prefetch_work); 71062306a36Sopenharmony_ci 71162306a36Sopenharmony_ci return 0; 71262306a36Sopenharmony_ci} 71362306a36Sopenharmony_ci 71462306a36Sopenharmony_ciu64 hl_mmu_get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte) 71562306a36Sopenharmony_ci{ 71662306a36Sopenharmony_ci return (curr_pte & PAGE_PRESENT_MASK) ? (curr_pte & HOP_PHYS_ADDR_MASK) : ULLONG_MAX; 71762306a36Sopenharmony_ci} 71862306a36Sopenharmony_ci 71962306a36Sopenharmony_ci/** 72062306a36Sopenharmony_ci * hl_mmu_get_hop_pte_phys_addr() - extract PTE address from HOP 72162306a36Sopenharmony_ci * @ctx: pointer to the context structure to initialize. 72262306a36Sopenharmony_ci * @mmu_prop: MMU properties. 72362306a36Sopenharmony_ci * @hop_idx: HOP index. 72462306a36Sopenharmony_ci * @hop_addr: HOP address. 72562306a36Sopenharmony_ci * @virt_addr: virtual address for the translation. 72662306a36Sopenharmony_ci * 72762306a36Sopenharmony_ci * @return the matching PTE value on success, otherwise U64_MAX. 72862306a36Sopenharmony_ci */ 72962306a36Sopenharmony_ciu64 hl_mmu_get_hop_pte_phys_addr(struct hl_ctx *ctx, struct hl_mmu_properties *mmu_prop, 73062306a36Sopenharmony_ci u8 hop_idx, u64 hop_addr, u64 virt_addr) 73162306a36Sopenharmony_ci{ 73262306a36Sopenharmony_ci u64 mask, shift; 73362306a36Sopenharmony_ci 73462306a36Sopenharmony_ci if (hop_idx >= mmu_prop->num_hops) { 73562306a36Sopenharmony_ci dev_err_ratelimited(ctx->hdev->dev, "Invalid hop index %d\n", hop_idx); 73662306a36Sopenharmony_ci return U64_MAX; 73762306a36Sopenharmony_ci } 73862306a36Sopenharmony_ci 73962306a36Sopenharmony_ci shift = mmu_prop->hop_shifts[hop_idx]; 74062306a36Sopenharmony_ci mask = mmu_prop->hop_masks[hop_idx]; 74162306a36Sopenharmony_ci 74262306a36Sopenharmony_ci return hop_addr + ctx->hdev->asic_prop.mmu_pte_size * ((virt_addr & mask) >> shift); 74362306a36Sopenharmony_ci} 74462306a36Sopenharmony_ci 74562306a36Sopenharmony_cistatic void mmu_dma_mem_free_from_chunk(struct gen_pool *pool, 74662306a36Sopenharmony_ci struct gen_pool_chunk *chunk, 74762306a36Sopenharmony_ci void *data) 74862306a36Sopenharmony_ci{ 74962306a36Sopenharmony_ci struct hl_device *hdev = data; 75062306a36Sopenharmony_ci 75162306a36Sopenharmony_ci hl_asic_dma_free_coherent(hdev, (chunk->end_addr - chunk->start_addr) + 1, 75262306a36Sopenharmony_ci (void *)chunk->start_addr, chunk->phys_addr); 75362306a36Sopenharmony_ci} 75462306a36Sopenharmony_ci 75562306a36Sopenharmony_civoid hl_mmu_hr_flush(struct hl_ctx *ctx) 75662306a36Sopenharmony_ci{ 75762306a36Sopenharmony_ci /* a flush operation requires memory barrier */ 75862306a36Sopenharmony_ci mb(); 75962306a36Sopenharmony_ci} 76062306a36Sopenharmony_ci 76162306a36Sopenharmony_ci/** 76262306a36Sopenharmony_ci * hl_mmu_hr_pool_destroy() - destroy genpool 76362306a36Sopenharmony_ci * @hdev: habanalabs device structure. 76462306a36Sopenharmony_ci * @hr_priv: MMU HR private data. 76562306a36Sopenharmony_ci * @hop_table_size: HOP table size. 76662306a36Sopenharmony_ci * 76762306a36Sopenharmony_ci * This function does the following: 76862306a36Sopenharmony_ci * - free entries allocated for shadow HOP0 76962306a36Sopenharmony_ci * - free pool chunks 77062306a36Sopenharmony_ci * - free pool 77162306a36Sopenharmony_ci */ 77262306a36Sopenharmony_cistatic void hl_mmu_hr_pool_destroy(struct hl_device *hdev, struct hl_mmu_hr_priv *hr_priv, 77362306a36Sopenharmony_ci u32 hop_table_size) 77462306a36Sopenharmony_ci{ 77562306a36Sopenharmony_ci struct asic_fixed_properties *prop = &hdev->asic_prop; 77662306a36Sopenharmony_ci struct gen_pool **pool = &hr_priv->mmu_pgt_pool; 77762306a36Sopenharmony_ci struct pgt_info *hop0_pgt; 77862306a36Sopenharmony_ci int asid; 77962306a36Sopenharmony_ci 78062306a36Sopenharmony_ci if (ZERO_OR_NULL_PTR(*pool)) 78162306a36Sopenharmony_ci return; 78262306a36Sopenharmony_ci 78362306a36Sopenharmony_ci /* Free the Fixed allocation of HOPs0 */ 78462306a36Sopenharmony_ci if (hr_priv->mmu_asid_hop0) { 78562306a36Sopenharmony_ci for (asid = 0 ; asid < prop->max_asid ; asid++) { 78662306a36Sopenharmony_ci hop0_pgt = &hr_priv->mmu_asid_hop0[asid]; 78762306a36Sopenharmony_ci if (ZERO_OR_NULL_PTR(hop0_pgt->virt_addr)) 78862306a36Sopenharmony_ci continue; 78962306a36Sopenharmony_ci 79062306a36Sopenharmony_ci gen_pool_free(*pool, (uintptr_t) hop0_pgt->virt_addr, hop_table_size); 79162306a36Sopenharmony_ci } 79262306a36Sopenharmony_ci } 79362306a36Sopenharmony_ci 79462306a36Sopenharmony_ci gen_pool_for_each_chunk(*pool, mmu_dma_mem_free_from_chunk, hdev); 79562306a36Sopenharmony_ci gen_pool_destroy(*pool); 79662306a36Sopenharmony_ci 79762306a36Sopenharmony_ci /* Make sure that if we arrive here again without init was called we 79862306a36Sopenharmony_ci * won't cause kernel panic. This can happen for example if we fail 79962306a36Sopenharmony_ci * during hard reset code at certain points 80062306a36Sopenharmony_ci */ 80162306a36Sopenharmony_ci *pool = NULL; 80262306a36Sopenharmony_ci} 80362306a36Sopenharmony_ci 80462306a36Sopenharmony_ci/** 80562306a36Sopenharmony_ci * hl_mmu_hr_init() - initialize the MMU module. 80662306a36Sopenharmony_ci * @hdev: habanalabs device structure. 80762306a36Sopenharmony_ci * @hr_priv: MMU HR private data. 80862306a36Sopenharmony_ci * @hop_table_size: HOP table size. 80962306a36Sopenharmony_ci * @pgt_size: memory size allocated for the page table 81062306a36Sopenharmony_ci * 81162306a36Sopenharmony_ci * @return 0 on success otherwise non-zero error code 81262306a36Sopenharmony_ci * 81362306a36Sopenharmony_ci * This function does the following: 81462306a36Sopenharmony_ci * - Create a pool of pages for pgt_infos. 81562306a36Sopenharmony_ci * - Create a shadow table for pgt 81662306a36Sopenharmony_ci */ 81762306a36Sopenharmony_ciint hl_mmu_hr_init(struct hl_device *hdev, struct hl_mmu_hr_priv *hr_priv, u32 hop_table_size, 81862306a36Sopenharmony_ci u64 pgt_size) 81962306a36Sopenharmony_ci{ 82062306a36Sopenharmony_ci struct asic_fixed_properties *prop = &hdev->asic_prop; 82162306a36Sopenharmony_ci size_t pool_chunk_size = SZ_4M; 82262306a36Sopenharmony_ci struct pgt_info *hop0_pgt; 82362306a36Sopenharmony_ci dma_addr_t dma_addr; 82462306a36Sopenharmony_ci u64 virt_addr; 82562306a36Sopenharmony_ci int i, rc; 82662306a36Sopenharmony_ci 82762306a36Sopenharmony_ci /* 82862306a36Sopenharmony_ci * we set alloc size as PAGE_SIZE (sine dma_alloc_coherent allocation order/size is 82962306a36Sopenharmony_ci * PAGE_SHIFT/PAGE_SIZE) in order to be able to control the allocations alignment. 83062306a36Sopenharmony_ci * This way we can call "DMA alloc align" according to dma_alloc granularity and supply 83162306a36Sopenharmony_ci * allocations with higher-order alignment restrictions 83262306a36Sopenharmony_ci */ 83362306a36Sopenharmony_ci hr_priv->mmu_pgt_pool = gen_pool_create(PAGE_SHIFT, -1); 83462306a36Sopenharmony_ci if (ZERO_OR_NULL_PTR(hr_priv->mmu_pgt_pool)) { 83562306a36Sopenharmony_ci dev_err(hdev->dev, "Failed to create hr page pool\n"); 83662306a36Sopenharmony_ci return -ENOMEM; 83762306a36Sopenharmony_ci } 83862306a36Sopenharmony_ci 83962306a36Sopenharmony_ci hr_priv->mmu_asid_hop0 = kvcalloc(prop->max_asid, sizeof(struct pgt_info), GFP_KERNEL); 84062306a36Sopenharmony_ci if (ZERO_OR_NULL_PTR(hr_priv->mmu_asid_hop0)) { 84162306a36Sopenharmony_ci dev_err(hdev->dev, "Failed to allocate hr-mmu hop0 table\n"); 84262306a36Sopenharmony_ci rc = -ENOMEM; 84362306a36Sopenharmony_ci goto destroy_mmu_pgt_pool; 84462306a36Sopenharmony_ci } 84562306a36Sopenharmony_ci 84662306a36Sopenharmony_ci for (i = 0 ; i < pgt_size ; i += pool_chunk_size) { 84762306a36Sopenharmony_ci virt_addr = (uintptr_t) hl_asic_dma_alloc_coherent(hdev, pool_chunk_size, 84862306a36Sopenharmony_ci &dma_addr, 84962306a36Sopenharmony_ci GFP_KERNEL | __GFP_ZERO); 85062306a36Sopenharmony_ci if (ZERO_OR_NULL_PTR(virt_addr)) { 85162306a36Sopenharmony_ci dev_err(hdev->dev, 85262306a36Sopenharmony_ci "Failed to allocate memory for host-resident page pool\n"); 85362306a36Sopenharmony_ci rc = -ENOMEM; 85462306a36Sopenharmony_ci goto destroy_mmu_pgt_pool; 85562306a36Sopenharmony_ci } 85662306a36Sopenharmony_ci 85762306a36Sopenharmony_ci rc = gen_pool_add_virt(hr_priv->mmu_pgt_pool, virt_addr, (phys_addr_t) dma_addr, 85862306a36Sopenharmony_ci pool_chunk_size, -1); 85962306a36Sopenharmony_ci if (rc) { 86062306a36Sopenharmony_ci dev_err(hdev->dev, "Failed to fill host-resident page pool\n"); 86162306a36Sopenharmony_ci goto destroy_mmu_pgt_pool; 86262306a36Sopenharmony_ci } 86362306a36Sopenharmony_ci } 86462306a36Sopenharmony_ci 86562306a36Sopenharmony_ci for (i = 0 ; i < prop->max_asid ; i++) { 86662306a36Sopenharmony_ci hop0_pgt = &hr_priv->mmu_asid_hop0[i]; 86762306a36Sopenharmony_ci hop0_pgt->virt_addr = (uintptr_t) 86862306a36Sopenharmony_ci gen_pool_dma_zalloc_align(hr_priv->mmu_pgt_pool, 86962306a36Sopenharmony_ci hop_table_size, 87062306a36Sopenharmony_ci (dma_addr_t *) &hop0_pgt->phys_addr, 87162306a36Sopenharmony_ci hop_table_size); 87262306a36Sopenharmony_ci if (!hop0_pgt->virt_addr) { 87362306a36Sopenharmony_ci dev_err(hdev->dev, "Failed to allocate HOP from pgt pool\n"); 87462306a36Sopenharmony_ci rc = -ENOMEM; 87562306a36Sopenharmony_ci goto destroy_mmu_pgt_pool; 87662306a36Sopenharmony_ci } 87762306a36Sopenharmony_ci } 87862306a36Sopenharmony_ci 87962306a36Sopenharmony_ci /* MMU H/W init will be done in device hw_init() */ 88062306a36Sopenharmony_ci 88162306a36Sopenharmony_ci return 0; 88262306a36Sopenharmony_ci 88362306a36Sopenharmony_cidestroy_mmu_pgt_pool: 88462306a36Sopenharmony_ci hl_mmu_hr_pool_destroy(hdev, hr_priv, hop_table_size); 88562306a36Sopenharmony_ci if (!ZERO_OR_NULL_PTR(hr_priv->mmu_asid_hop0)) 88662306a36Sopenharmony_ci kvfree(hr_priv->mmu_asid_hop0); 88762306a36Sopenharmony_ci 88862306a36Sopenharmony_ci return rc; 88962306a36Sopenharmony_ci} 89062306a36Sopenharmony_ci 89162306a36Sopenharmony_ci/** 89262306a36Sopenharmony_ci * hl_mmu_hr_fini() - release the MMU module. 89362306a36Sopenharmony_ci * @hdev: habanalabs device structure. 89462306a36Sopenharmony_ci * @hr_priv: MMU host resident private info. 89562306a36Sopenharmony_ci * @hop_table_size: HOP table size 89662306a36Sopenharmony_ci * 89762306a36Sopenharmony_ci * This function does the following: 89862306a36Sopenharmony_ci * - Disable MMU in H/W. 89962306a36Sopenharmony_ci * - Free the pgt_infos pool. 90062306a36Sopenharmony_ci * 90162306a36Sopenharmony_ci * All contexts should be freed before calling this function. 90262306a36Sopenharmony_ci */ 90362306a36Sopenharmony_civoid hl_mmu_hr_fini(struct hl_device *hdev, struct hl_mmu_hr_priv *hr_priv, u32 hop_table_size) 90462306a36Sopenharmony_ci{ 90562306a36Sopenharmony_ci /* MMU H/W fini was already done in device hw_fini() */ 90662306a36Sopenharmony_ci 90762306a36Sopenharmony_ci hl_mmu_hr_pool_destroy(hdev, hr_priv, hop_table_size); 90862306a36Sopenharmony_ci 90962306a36Sopenharmony_ci if (!ZERO_OR_NULL_PTR(hr_priv->mmu_asid_hop0)) { 91062306a36Sopenharmony_ci kvfree(hr_priv->mmu_asid_hop0); 91162306a36Sopenharmony_ci 91262306a36Sopenharmony_ci /* Make sure that if we arrive here again without init was 91362306a36Sopenharmony_ci * called we won't cause kernel panic. This can happen for 91462306a36Sopenharmony_ci * example if we fail during hard reset code at certain points 91562306a36Sopenharmony_ci */ 91662306a36Sopenharmony_ci hr_priv->mmu_asid_hop0 = NULL; 91762306a36Sopenharmony_ci } 91862306a36Sopenharmony_ci} 91962306a36Sopenharmony_ci 92062306a36Sopenharmony_ci/** 92162306a36Sopenharmony_ci * hl_mmu_hr_free_hop_remove_pgt() - free HOP and remove PGT from hash 92262306a36Sopenharmony_ci * @pgt_info: page table info structure. 92362306a36Sopenharmony_ci * @hr_priv: MMU HR private data. 92462306a36Sopenharmony_ci * @hop_table_size: HOP table size. 92562306a36Sopenharmony_ci */ 92662306a36Sopenharmony_civoid hl_mmu_hr_free_hop_remove_pgt(struct pgt_info *pgt_info, struct hl_mmu_hr_priv *hr_priv, 92762306a36Sopenharmony_ci u32 hop_table_size) 92862306a36Sopenharmony_ci{ 92962306a36Sopenharmony_ci gen_pool_free(hr_priv->mmu_pgt_pool, pgt_info->virt_addr, hop_table_size); 93062306a36Sopenharmony_ci hash_del(&pgt_info->node); 93162306a36Sopenharmony_ci kfree(pgt_info); 93262306a36Sopenharmony_ci} 93362306a36Sopenharmony_ci 93462306a36Sopenharmony_ci/** 93562306a36Sopenharmony_ci * hl_mmu_hr_pte_phys_to_virt() - translate PTE phys addr to virt addr 93662306a36Sopenharmony_ci * @ctx: pointer to the context structure 93762306a36Sopenharmony_ci * @pgt: pgt_info for the HOP hosting the PTE 93862306a36Sopenharmony_ci * @phys_pte_addr: phys address of the PTE 93962306a36Sopenharmony_ci * @hop_table_size: HOP table size 94062306a36Sopenharmony_ci * 94162306a36Sopenharmony_ci * @return PTE virtual address 94262306a36Sopenharmony_ci * 94362306a36Sopenharmony_ci * The function use the pgt_info to get HOP base virt addr and obtain the PTE's virt addr 94462306a36Sopenharmony_ci * by adding the PTE offset. 94562306a36Sopenharmony_ci */ 94662306a36Sopenharmony_ciu64 hl_mmu_hr_pte_phys_to_virt(struct hl_ctx *ctx, struct pgt_info *pgt, 94762306a36Sopenharmony_ci u64 phys_pte_addr, u32 hop_table_size) 94862306a36Sopenharmony_ci{ 94962306a36Sopenharmony_ci u64 page_mask = (hop_table_size - 1); 95062306a36Sopenharmony_ci u64 pte_offset = phys_pte_addr & page_mask; 95162306a36Sopenharmony_ci 95262306a36Sopenharmony_ci return pgt->virt_addr + pte_offset; 95362306a36Sopenharmony_ci} 95462306a36Sopenharmony_ci 95562306a36Sopenharmony_ci/** 95662306a36Sopenharmony_ci * hl_mmu_hr_write_pte() - write HR PTE 95762306a36Sopenharmony_ci * @ctx: pointer to the context structure 95862306a36Sopenharmony_ci * @pgt_info: HOP's page table info structure 95962306a36Sopenharmony_ci * @phys_pte_addr: phys PTE address 96062306a36Sopenharmony_ci * @val: raw PTE data 96162306a36Sopenharmony_ci * @hop_table_size: HOP table size 96262306a36Sopenharmony_ci */ 96362306a36Sopenharmony_civoid hl_mmu_hr_write_pte(struct hl_ctx *ctx, struct pgt_info *pgt_info, u64 phys_pte_addr, 96462306a36Sopenharmony_ci u64 val, u32 hop_table_size) 96562306a36Sopenharmony_ci{ 96662306a36Sopenharmony_ci /* 96762306a36Sopenharmony_ci * The value to write is the phys address of the next hop + 96862306a36Sopenharmony_ci * flags at the 12 LSBs. 96962306a36Sopenharmony_ci */ 97062306a36Sopenharmony_ci u64 virt_addr = hl_mmu_hr_pte_phys_to_virt(ctx, pgt_info, phys_pte_addr, hop_table_size); 97162306a36Sopenharmony_ci 97262306a36Sopenharmony_ci *((u64 *) (uintptr_t) virt_addr) = val; 97362306a36Sopenharmony_ci} 97462306a36Sopenharmony_ci 97562306a36Sopenharmony_ci/** 97662306a36Sopenharmony_ci * hl_mmu_hr_clear_pte() - clear HR PTE 97762306a36Sopenharmony_ci * @ctx: pointer to the context structure 97862306a36Sopenharmony_ci * @pgt_info: HOP's page table info structure 97962306a36Sopenharmony_ci * @phys_pte_addr: phys PTE address 98062306a36Sopenharmony_ci * @hop_table_size: HOP table size 98162306a36Sopenharmony_ci */ 98262306a36Sopenharmony_civoid hl_mmu_hr_clear_pte(struct hl_ctx *ctx, struct pgt_info *pgt_info, u64 phys_pte_addr, 98362306a36Sopenharmony_ci u32 hop_table_size) 98462306a36Sopenharmony_ci{ 98562306a36Sopenharmony_ci /* no need to transform the value to physical address */ 98662306a36Sopenharmony_ci hl_mmu_hr_write_pte(ctx, pgt_info, phys_pte_addr, 0, hop_table_size); 98762306a36Sopenharmony_ci} 98862306a36Sopenharmony_ci 98962306a36Sopenharmony_ci/** 99062306a36Sopenharmony_ci * hl_mmu_hr_put_pte() - put HR PTE and remove it if necessary (no more PTEs) 99162306a36Sopenharmony_ci * @ctx: pointer to the context structure 99262306a36Sopenharmony_ci * @pgt_info: HOP's page table info structure 99362306a36Sopenharmony_ci * @hr_priv: HR MMU private info 99462306a36Sopenharmony_ci * @hop_table_size: HOP table size 99562306a36Sopenharmony_ci * 99662306a36Sopenharmony_ci * @return number of PTEs still in the HOP 99762306a36Sopenharmony_ci */ 99862306a36Sopenharmony_ciint hl_mmu_hr_put_pte(struct hl_ctx *ctx, struct pgt_info *pgt_info, 99962306a36Sopenharmony_ci struct hl_mmu_hr_priv *hr_priv, 100062306a36Sopenharmony_ci u32 hop_table_size) 100162306a36Sopenharmony_ci{ 100262306a36Sopenharmony_ci int num_of_ptes_left; 100362306a36Sopenharmony_ci 100462306a36Sopenharmony_ci pgt_info->num_of_ptes--; 100562306a36Sopenharmony_ci 100662306a36Sopenharmony_ci /* 100762306a36Sopenharmony_ci * Need to save the number of ptes left because free_hop might free 100862306a36Sopenharmony_ci * the pgt_info 100962306a36Sopenharmony_ci */ 101062306a36Sopenharmony_ci num_of_ptes_left = pgt_info->num_of_ptes; 101162306a36Sopenharmony_ci if (!num_of_ptes_left) 101262306a36Sopenharmony_ci hl_mmu_hr_free_hop_remove_pgt(pgt_info, hr_priv, hop_table_size); 101362306a36Sopenharmony_ci 101462306a36Sopenharmony_ci return num_of_ptes_left; 101562306a36Sopenharmony_ci} 101662306a36Sopenharmony_ci 101762306a36Sopenharmony_ci/** 101862306a36Sopenharmony_ci * hl_mmu_hr_get_pte() - increase PGT PTE count 101962306a36Sopenharmony_ci * @ctx: pointer to the context structure 102062306a36Sopenharmony_ci * @hr_func: host resident functions 102162306a36Sopenharmony_ci * @phys_hop_addr: HOP phys address 102262306a36Sopenharmony_ci */ 102362306a36Sopenharmony_civoid hl_mmu_hr_get_pte(struct hl_ctx *ctx, struct hl_hr_mmu_funcs *hr_func, u64 phys_hop_addr) 102462306a36Sopenharmony_ci{ 102562306a36Sopenharmony_ci hr_func->get_pgt_info(ctx, phys_hop_addr)->num_of_ptes++; 102662306a36Sopenharmony_ci} 102762306a36Sopenharmony_ci 102862306a36Sopenharmony_ci/** 102962306a36Sopenharmony_ci * hl_mmu_hr_get_next_hop_pgt_info() - get pgt_info structure for the next HOP 103062306a36Sopenharmony_ci * @ctx: pointer to the context structure. 103162306a36Sopenharmony_ci * @hr_func: host resident functions. 103262306a36Sopenharmony_ci * @curr_pte: current PTE value. 103362306a36Sopenharmony_ci * 103462306a36Sopenharmony_ci * @return pgt_info structure on success, otherwise NULL. 103562306a36Sopenharmony_ci */ 103662306a36Sopenharmony_cistruct pgt_info *hl_mmu_hr_get_next_hop_pgt_info(struct hl_ctx *ctx, 103762306a36Sopenharmony_ci struct hl_hr_mmu_funcs *hr_func, 103862306a36Sopenharmony_ci u64 curr_pte) 103962306a36Sopenharmony_ci{ 104062306a36Sopenharmony_ci u64 next_hop_phys_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte); 104162306a36Sopenharmony_ci 104262306a36Sopenharmony_ci if (next_hop_phys_addr == ULLONG_MAX) 104362306a36Sopenharmony_ci return NULL; 104462306a36Sopenharmony_ci 104562306a36Sopenharmony_ci return hr_func->get_pgt_info(ctx, next_hop_phys_addr); 104662306a36Sopenharmony_ci} 104762306a36Sopenharmony_ci 104862306a36Sopenharmony_ci/** 104962306a36Sopenharmony_ci * hl_mmu_hr_alloc_hop() - allocate HOP 105062306a36Sopenharmony_ci * @ctx: pointer to the context structure. 105162306a36Sopenharmony_ci * @hr_priv: host resident private info structure. 105262306a36Sopenharmony_ci * @hr_func: host resident functions. 105362306a36Sopenharmony_ci * @mmu_prop: MMU properties. 105462306a36Sopenharmony_ci * 105562306a36Sopenharmony_ci * @return pgt_info structure associated with the allocated HOP on success, otherwise NULL. 105662306a36Sopenharmony_ci */ 105762306a36Sopenharmony_cistruct pgt_info *hl_mmu_hr_alloc_hop(struct hl_ctx *ctx, struct hl_mmu_hr_priv *hr_priv, 105862306a36Sopenharmony_ci struct hl_hr_mmu_funcs *hr_func, 105962306a36Sopenharmony_ci struct hl_mmu_properties *mmu_prop) 106062306a36Sopenharmony_ci{ 106162306a36Sopenharmony_ci struct hl_device *hdev = ctx->hdev; 106262306a36Sopenharmony_ci struct pgt_info *pgt_info; 106362306a36Sopenharmony_ci dma_addr_t phys_addr; 106462306a36Sopenharmony_ci void *virt_addr; 106562306a36Sopenharmony_ci int i, retry = 1; 106662306a36Sopenharmony_ci 106762306a36Sopenharmony_ci pgt_info = kmalloc(sizeof(*pgt_info), GFP_KERNEL); 106862306a36Sopenharmony_ci if (!pgt_info) 106962306a36Sopenharmony_ci return NULL; 107062306a36Sopenharmony_ci 107162306a36Sopenharmony_ci for (i = 0; i <= retry; i++) { 107262306a36Sopenharmony_ci virt_addr = gen_pool_dma_zalloc_align(hr_priv->mmu_pgt_pool, 107362306a36Sopenharmony_ci mmu_prop->hop_table_size, 107462306a36Sopenharmony_ci &phys_addr, 107562306a36Sopenharmony_ci mmu_prop->hop_table_size); 107662306a36Sopenharmony_ci if (virt_addr) 107762306a36Sopenharmony_ci break; 107862306a36Sopenharmony_ci 107962306a36Sopenharmony_ci /* No memory in pool - get some and try again */ 108062306a36Sopenharmony_ci virt_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &phys_addr, 108162306a36Sopenharmony_ci GFP_KERNEL | __GFP_ZERO); 108262306a36Sopenharmony_ci if (ZERO_OR_NULL_PTR(virt_addr)) 108362306a36Sopenharmony_ci break; 108462306a36Sopenharmony_ci 108562306a36Sopenharmony_ci if (gen_pool_add_virt(hr_priv->mmu_pgt_pool, (unsigned long)virt_addr, 108662306a36Sopenharmony_ci phys_addr, SZ_2M, -1)) { 108762306a36Sopenharmony_ci hl_asic_dma_free_coherent(hdev, SZ_2M, virt_addr, phys_addr); 108862306a36Sopenharmony_ci virt_addr = NULL; 108962306a36Sopenharmony_ci break; 109062306a36Sopenharmony_ci } 109162306a36Sopenharmony_ci } 109262306a36Sopenharmony_ci 109362306a36Sopenharmony_ci if (ZERO_OR_NULL_PTR(virt_addr)) { 109462306a36Sopenharmony_ci dev_err(hdev->dev, "failed to allocate page\n"); 109562306a36Sopenharmony_ci goto pool_alloc_err; 109662306a36Sopenharmony_ci } 109762306a36Sopenharmony_ci 109862306a36Sopenharmony_ci pgt_info->phys_addr = phys_addr; 109962306a36Sopenharmony_ci pgt_info->shadow_addr = (unsigned long) NULL; 110062306a36Sopenharmony_ci pgt_info->virt_addr = (unsigned long)virt_addr; 110162306a36Sopenharmony_ci pgt_info->ctx = ctx; 110262306a36Sopenharmony_ci pgt_info->num_of_ptes = 0; 110362306a36Sopenharmony_ci hr_func->add_pgt_info(ctx, pgt_info, phys_addr); 110462306a36Sopenharmony_ci 110562306a36Sopenharmony_ci return pgt_info; 110662306a36Sopenharmony_ci 110762306a36Sopenharmony_cipool_alloc_err: 110862306a36Sopenharmony_ci kfree(pgt_info); 110962306a36Sopenharmony_ci 111062306a36Sopenharmony_ci return NULL; 111162306a36Sopenharmony_ci} 111262306a36Sopenharmony_ci 111362306a36Sopenharmony_ci/** 111462306a36Sopenharmony_ci * hl_mmu_hr_get_alloc_next_hop() - get the next HOP, allocate it if it does not exist 111562306a36Sopenharmony_ci * @ctx: pointer to the context structure. 111662306a36Sopenharmony_ci * @hr_priv: host resident private info structure. 111762306a36Sopenharmony_ci * @hr_func: host resident functions. 111862306a36Sopenharmony_ci * @mmu_prop: MMU properties. 111962306a36Sopenharmony_ci * @curr_pte: current PTE value. 112062306a36Sopenharmony_ci * @is_new_hop: set to true if HOP is new (caller responsibility to set it to false). 112162306a36Sopenharmony_ci * 112262306a36Sopenharmony_ci * @return pgt_info structure associated with the allocated HOP on success, otherwise NULL. 112362306a36Sopenharmony_ci */ 112462306a36Sopenharmony_cistruct pgt_info *hl_mmu_hr_get_alloc_next_hop(struct hl_ctx *ctx, 112562306a36Sopenharmony_ci struct hl_mmu_hr_priv *hr_priv, 112662306a36Sopenharmony_ci struct hl_hr_mmu_funcs *hr_func, 112762306a36Sopenharmony_ci struct hl_mmu_properties *mmu_prop, 112862306a36Sopenharmony_ci u64 curr_pte, bool *is_new_hop) 112962306a36Sopenharmony_ci{ 113062306a36Sopenharmony_ci u64 hop_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte); 113162306a36Sopenharmony_ci 113262306a36Sopenharmony_ci if (hop_addr != ULLONG_MAX) 113362306a36Sopenharmony_ci return hr_func->get_pgt_info(ctx, hop_addr); 113462306a36Sopenharmony_ci 113562306a36Sopenharmony_ci *is_new_hop = true; 113662306a36Sopenharmony_ci return hl_mmu_hr_alloc_hop(ctx, hr_priv, hr_func, mmu_prop); 113762306a36Sopenharmony_ci} 113862306a36Sopenharmony_ci 113962306a36Sopenharmony_ci/** 114062306a36Sopenharmony_ci * hl_mmu_hr_get_tlb_info() - get the TLB info (info for a specific mapping) 114162306a36Sopenharmony_ci * @ctx: pointer to the context structure. 114262306a36Sopenharmony_ci * @virt_addr: the virt address for which to get info. 114362306a36Sopenharmony_ci * @hops: HOPs info structure. 114462306a36Sopenharmony_ci * @hr_func: host resident functions. 114562306a36Sopenharmony_ci * 114662306a36Sopenharmony_ci * @return 0 on success, otherwise non 0 error code.. 114762306a36Sopenharmony_ci */ 114862306a36Sopenharmony_ciint hl_mmu_hr_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, struct hl_mmu_hop_info *hops, 114962306a36Sopenharmony_ci struct hl_hr_mmu_funcs *hr_func) 115062306a36Sopenharmony_ci{ 115162306a36Sopenharmony_ci /* using 6 HOPs as this is the maximum number of HOPs */ 115262306a36Sopenharmony_ci struct pgt_info *hops_pgt_info[MMU_ARCH_6_HOPS] = { NULL }; 115362306a36Sopenharmony_ci struct hl_device *hdev = ctx->hdev; 115462306a36Sopenharmony_ci struct hl_mmu_properties *mmu_prop; 115562306a36Sopenharmony_ci int rc, i, used_hops; 115662306a36Sopenharmony_ci bool is_huge; 115762306a36Sopenharmony_ci 115862306a36Sopenharmony_ci rc = hr_func->get_tlb_mapping_params(hdev, &mmu_prop, hops, virt_addr, &is_huge); 115962306a36Sopenharmony_ci if (rc) 116062306a36Sopenharmony_ci return rc; 116162306a36Sopenharmony_ci 116262306a36Sopenharmony_ci used_hops = mmu_prop->num_hops; 116362306a36Sopenharmony_ci 116462306a36Sopenharmony_ci /* huge pages use one less hop */ 116562306a36Sopenharmony_ci if (is_huge) 116662306a36Sopenharmony_ci used_hops--; 116762306a36Sopenharmony_ci 116862306a36Sopenharmony_ci hops->scrambled_vaddr = hdev->asic_funcs->scramble_addr(hdev, virt_addr); 116962306a36Sopenharmony_ci 117062306a36Sopenharmony_ci for (i = 0 ; i < used_hops ; i++) { 117162306a36Sopenharmony_ci if (i == 0) 117262306a36Sopenharmony_ci hops_pgt_info[i] = hr_func->get_hop0_pgt_info(ctx); 117362306a36Sopenharmony_ci else 117462306a36Sopenharmony_ci hops_pgt_info[i] = hl_mmu_hr_get_next_hop_pgt_info(ctx, hr_func, 117562306a36Sopenharmony_ci hops->hop_info[i - 1].hop_pte_val); 117662306a36Sopenharmony_ci 117762306a36Sopenharmony_ci if (!hops_pgt_info[i]) 117862306a36Sopenharmony_ci return -EFAULT; 117962306a36Sopenharmony_ci 118062306a36Sopenharmony_ci hops->hop_info[i].hop_addr = hops_pgt_info[i]->phys_addr; 118162306a36Sopenharmony_ci hops->hop_info[i].hop_pte_addr = 118262306a36Sopenharmony_ci hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, i, 118362306a36Sopenharmony_ci hops->hop_info[i].hop_addr, 118462306a36Sopenharmony_ci hops->scrambled_vaddr); 118562306a36Sopenharmony_ci hops->hop_info[i].hop_pte_val = *(u64 *) (uintptr_t) 118662306a36Sopenharmony_ci hl_mmu_hr_pte_phys_to_virt(ctx, hops_pgt_info[i], 118762306a36Sopenharmony_ci hops->hop_info[i].hop_pte_addr, 118862306a36Sopenharmony_ci mmu_prop->hop_table_size); 118962306a36Sopenharmony_ci 119062306a36Sopenharmony_ci if (!(hops->hop_info[i].hop_pte_val & PAGE_PRESENT_MASK)) 119162306a36Sopenharmony_ci return -EFAULT; 119262306a36Sopenharmony_ci 119362306a36Sopenharmony_ci if (hops->hop_info[i].hop_pte_val & mmu_prop->last_mask) 119462306a36Sopenharmony_ci break; 119562306a36Sopenharmony_ci } 119662306a36Sopenharmony_ci 119762306a36Sopenharmony_ci /* if passed over all hops then no last hop was found */ 119862306a36Sopenharmony_ci if (i == mmu_prop->num_hops) 119962306a36Sopenharmony_ci return -EFAULT; 120062306a36Sopenharmony_ci 120162306a36Sopenharmony_ci if (hops->scrambled_vaddr != virt_addr) 120262306a36Sopenharmony_ci hops->unscrambled_paddr = hdev->asic_funcs->descramble_addr 120362306a36Sopenharmony_ci (hdev, hops->hop_info[i].hop_pte_val); 120462306a36Sopenharmony_ci else 120562306a36Sopenharmony_ci hops->unscrambled_paddr = hops->hop_info[i].hop_pte_val; 120662306a36Sopenharmony_ci 120762306a36Sopenharmony_ci hops->used_hops = i + 1; 120862306a36Sopenharmony_ci 120962306a36Sopenharmony_ci return 0; 121062306a36Sopenharmony_ci} 121162306a36Sopenharmony_ci 1212