18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
28c2ecf20Sopenharmony_ci
38c2ecf20Sopenharmony_ci/*
48c2ecf20Sopenharmony_ci * Copyright 2016-2019 HabanaLabs, Ltd.
58c2ecf20Sopenharmony_ci * All Rights Reserved.
68c2ecf20Sopenharmony_ci */
78c2ecf20Sopenharmony_ci
88c2ecf20Sopenharmony_ci#include "habanalabs.h"
98c2ecf20Sopenharmony_ci#include "../include/hw_ip/mmu/mmu_general.h"
108c2ecf20Sopenharmony_ci
118c2ecf20Sopenharmony_ci#include <linux/genalloc.h>
128c2ecf20Sopenharmony_ci#include <linux/slab.h>
138c2ecf20Sopenharmony_ci
148c2ecf20Sopenharmony_cistatic inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr);
158c2ecf20Sopenharmony_ci
168c2ecf20Sopenharmony_cistatic struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 hop_addr)
178c2ecf20Sopenharmony_ci{
188c2ecf20Sopenharmony_ci	struct pgt_info *pgt_info = NULL;
198c2ecf20Sopenharmony_ci
208c2ecf20Sopenharmony_ci	hash_for_each_possible(ctx->mmu_shadow_hash, pgt_info, node,
218c2ecf20Sopenharmony_ci				(unsigned long) hop_addr)
228c2ecf20Sopenharmony_ci		if (hop_addr == pgt_info->shadow_addr)
238c2ecf20Sopenharmony_ci			break;
248c2ecf20Sopenharmony_ci
258c2ecf20Sopenharmony_ci	return pgt_info;
268c2ecf20Sopenharmony_ci}
278c2ecf20Sopenharmony_ci
288c2ecf20Sopenharmony_cistatic void _free_hop(struct hl_ctx *ctx, struct pgt_info *pgt_info)
298c2ecf20Sopenharmony_ci{
308c2ecf20Sopenharmony_ci	struct hl_device *hdev = ctx->hdev;
318c2ecf20Sopenharmony_ci
328c2ecf20Sopenharmony_ci	gen_pool_free(hdev->mmu_priv.mmu_pgt_pool, pgt_info->phys_addr,
338c2ecf20Sopenharmony_ci			hdev->asic_prop.mmu_hop_table_size);
348c2ecf20Sopenharmony_ci	hash_del(&pgt_info->node);
358c2ecf20Sopenharmony_ci	kfree((u64 *) (uintptr_t) pgt_info->shadow_addr);
368c2ecf20Sopenharmony_ci	kfree(pgt_info);
378c2ecf20Sopenharmony_ci}
388c2ecf20Sopenharmony_ci
398c2ecf20Sopenharmony_cistatic void free_hop(struct hl_ctx *ctx, u64 hop_addr)
408c2ecf20Sopenharmony_ci{
418c2ecf20Sopenharmony_ci	struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
428c2ecf20Sopenharmony_ci
438c2ecf20Sopenharmony_ci	_free_hop(ctx, pgt_info);
448c2ecf20Sopenharmony_ci}
458c2ecf20Sopenharmony_ci
468c2ecf20Sopenharmony_cistatic u64 alloc_hop(struct hl_ctx *ctx)
478c2ecf20Sopenharmony_ci{
488c2ecf20Sopenharmony_ci	struct hl_device *hdev = ctx->hdev;
498c2ecf20Sopenharmony_ci	struct asic_fixed_properties *prop = &hdev->asic_prop;
508c2ecf20Sopenharmony_ci	struct pgt_info *pgt_info;
518c2ecf20Sopenharmony_ci	u64 phys_addr, shadow_addr;
528c2ecf20Sopenharmony_ci
538c2ecf20Sopenharmony_ci	pgt_info = kmalloc(sizeof(*pgt_info), GFP_KERNEL);
548c2ecf20Sopenharmony_ci	if (!pgt_info)
558c2ecf20Sopenharmony_ci		return ULLONG_MAX;
568c2ecf20Sopenharmony_ci
578c2ecf20Sopenharmony_ci	phys_addr = (u64) gen_pool_alloc(hdev->mmu_priv.mmu_pgt_pool,
588c2ecf20Sopenharmony_ci					prop->mmu_hop_table_size);
598c2ecf20Sopenharmony_ci	if (!phys_addr) {
608c2ecf20Sopenharmony_ci		dev_err(hdev->dev, "failed to allocate page\n");
618c2ecf20Sopenharmony_ci		goto pool_add_err;
628c2ecf20Sopenharmony_ci	}
638c2ecf20Sopenharmony_ci
648c2ecf20Sopenharmony_ci	shadow_addr = (u64) (uintptr_t) kzalloc(prop->mmu_hop_table_size,
658c2ecf20Sopenharmony_ci						GFP_KERNEL);
668c2ecf20Sopenharmony_ci	if (!shadow_addr)
678c2ecf20Sopenharmony_ci		goto shadow_err;
688c2ecf20Sopenharmony_ci
698c2ecf20Sopenharmony_ci	pgt_info->phys_addr = phys_addr;
708c2ecf20Sopenharmony_ci	pgt_info->shadow_addr = shadow_addr;
718c2ecf20Sopenharmony_ci	pgt_info->ctx = ctx;
728c2ecf20Sopenharmony_ci	pgt_info->num_of_ptes = 0;
738c2ecf20Sopenharmony_ci	hash_add(ctx->mmu_shadow_hash, &pgt_info->node, shadow_addr);
748c2ecf20Sopenharmony_ci
758c2ecf20Sopenharmony_ci	return shadow_addr;
768c2ecf20Sopenharmony_ci
778c2ecf20Sopenharmony_cishadow_err:
788c2ecf20Sopenharmony_ci	gen_pool_free(hdev->mmu_priv.mmu_pgt_pool, phys_addr,
798c2ecf20Sopenharmony_ci			prop->mmu_hop_table_size);
808c2ecf20Sopenharmony_cipool_add_err:
818c2ecf20Sopenharmony_ci	kfree(pgt_info);
828c2ecf20Sopenharmony_ci
838c2ecf20Sopenharmony_ci	return ULLONG_MAX;
848c2ecf20Sopenharmony_ci}
858c2ecf20Sopenharmony_ci
868c2ecf20Sopenharmony_cistatic inline u64 get_phys_hop0_addr(struct hl_ctx *ctx)
878c2ecf20Sopenharmony_ci{
888c2ecf20Sopenharmony_ci	return ctx->hdev->asic_prop.mmu_pgt_addr +
898c2ecf20Sopenharmony_ci			(ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
908c2ecf20Sopenharmony_ci}
918c2ecf20Sopenharmony_ci
928c2ecf20Sopenharmony_cistatic inline u64 get_hop0_addr(struct hl_ctx *ctx)
938c2ecf20Sopenharmony_ci{
948c2ecf20Sopenharmony_ci	return (u64) (uintptr_t) ctx->hdev->mmu_priv.mmu_shadow_hop0 +
958c2ecf20Sopenharmony_ci			(ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
968c2ecf20Sopenharmony_ci}
978c2ecf20Sopenharmony_ci
988c2ecf20Sopenharmony_cistatic void flush(struct hl_ctx *ctx)
998c2ecf20Sopenharmony_ci{
1008c2ecf20Sopenharmony_ci	/* flush all writes from all cores to reach PCI */
1018c2ecf20Sopenharmony_ci	mb();
1028c2ecf20Sopenharmony_ci	ctx->hdev->asic_funcs->read_pte(ctx->hdev, get_phys_hop0_addr(ctx));
1038c2ecf20Sopenharmony_ci}
1048c2ecf20Sopenharmony_ci
1058c2ecf20Sopenharmony_ci/* transform the value to physical address when writing to H/W */
1068c2ecf20Sopenharmony_cistatic inline void write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val)
1078c2ecf20Sopenharmony_ci{
1088c2ecf20Sopenharmony_ci	/*
1098c2ecf20Sopenharmony_ci	 * The value to write is actually the address of the next shadow hop +
1108c2ecf20Sopenharmony_ci	 * flags at the 12 LSBs.
1118c2ecf20Sopenharmony_ci	 * Hence in order to get the value to write to the physical PTE, we
1128c2ecf20Sopenharmony_ci	 * clear the 12 LSBs and translate the shadow hop to its associated
1138c2ecf20Sopenharmony_ci	 * physical hop, and add back the original 12 LSBs.
1148c2ecf20Sopenharmony_ci	 */
1158c2ecf20Sopenharmony_ci	u64 phys_val = get_phys_addr(ctx, val & HOP_PHYS_ADDR_MASK) |
1168c2ecf20Sopenharmony_ci				(val & FLAGS_MASK);
1178c2ecf20Sopenharmony_ci
1188c2ecf20Sopenharmony_ci	ctx->hdev->asic_funcs->write_pte(ctx->hdev,
1198c2ecf20Sopenharmony_ci					get_phys_addr(ctx, shadow_pte_addr),
1208c2ecf20Sopenharmony_ci					phys_val);
1218c2ecf20Sopenharmony_ci
1228c2ecf20Sopenharmony_ci	*(u64 *) (uintptr_t) shadow_pte_addr = val;
1238c2ecf20Sopenharmony_ci}
1248c2ecf20Sopenharmony_ci
1258c2ecf20Sopenharmony_ci/* do not transform the value to physical address when writing to H/W */
1268c2ecf20Sopenharmony_cistatic inline void write_final_pte(struct hl_ctx *ctx, u64 shadow_pte_addr,
1278c2ecf20Sopenharmony_ci					u64 val)
1288c2ecf20Sopenharmony_ci{
1298c2ecf20Sopenharmony_ci	ctx->hdev->asic_funcs->write_pte(ctx->hdev,
1308c2ecf20Sopenharmony_ci					get_phys_addr(ctx, shadow_pte_addr),
1318c2ecf20Sopenharmony_ci					val);
1328c2ecf20Sopenharmony_ci	*(u64 *) (uintptr_t) shadow_pte_addr = val;
1338c2ecf20Sopenharmony_ci}
1348c2ecf20Sopenharmony_ci
1358c2ecf20Sopenharmony_ci/* clear the last and present bits */
1368c2ecf20Sopenharmony_cistatic inline void clear_pte(struct hl_ctx *ctx, u64 pte_addr)
1378c2ecf20Sopenharmony_ci{
1388c2ecf20Sopenharmony_ci	/* no need to transform the value to physical address */
1398c2ecf20Sopenharmony_ci	write_final_pte(ctx, pte_addr, 0);
1408c2ecf20Sopenharmony_ci}
1418c2ecf20Sopenharmony_ci
1428c2ecf20Sopenharmony_cistatic inline void get_pte(struct hl_ctx *ctx, u64 hop_addr)
1438c2ecf20Sopenharmony_ci{
1448c2ecf20Sopenharmony_ci	get_pgt_info(ctx, hop_addr)->num_of_ptes++;
1458c2ecf20Sopenharmony_ci}
1468c2ecf20Sopenharmony_ci
1478c2ecf20Sopenharmony_ci/*
1488c2ecf20Sopenharmony_ci * put_pte - decrement the num of ptes and free the hop if possible
1498c2ecf20Sopenharmony_ci *
1508c2ecf20Sopenharmony_ci * @ctx: pointer to the context structure
1518c2ecf20Sopenharmony_ci * @hop_addr: addr of the hop
1528c2ecf20Sopenharmony_ci *
1538c2ecf20Sopenharmony_ci * This function returns the number of ptes left on this hop. If the number is
1548c2ecf20Sopenharmony_ci * 0, it means the pte was freed.
1558c2ecf20Sopenharmony_ci */
1568c2ecf20Sopenharmony_cistatic inline int put_pte(struct hl_ctx *ctx, u64 hop_addr)
1578c2ecf20Sopenharmony_ci{
1588c2ecf20Sopenharmony_ci	struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
1598c2ecf20Sopenharmony_ci	int num_of_ptes_left;
1608c2ecf20Sopenharmony_ci
1618c2ecf20Sopenharmony_ci	pgt_info->num_of_ptes--;
1628c2ecf20Sopenharmony_ci
1638c2ecf20Sopenharmony_ci	/*
1648c2ecf20Sopenharmony_ci	 * Need to save the number of ptes left because free_hop might free
1658c2ecf20Sopenharmony_ci	 * the pgt_info
1668c2ecf20Sopenharmony_ci	 */
1678c2ecf20Sopenharmony_ci	num_of_ptes_left = pgt_info->num_of_ptes;
1688c2ecf20Sopenharmony_ci	if (!num_of_ptes_left)
1698c2ecf20Sopenharmony_ci		_free_hop(ctx, pgt_info);
1708c2ecf20Sopenharmony_ci
1718c2ecf20Sopenharmony_ci	return num_of_ptes_left;
1728c2ecf20Sopenharmony_ci}
1738c2ecf20Sopenharmony_ci
1748c2ecf20Sopenharmony_cistatic inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
1758c2ecf20Sopenharmony_ci					u64 virt_addr, u64 mask, u64 shift)
1768c2ecf20Sopenharmony_ci{
1778c2ecf20Sopenharmony_ci	return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
1788c2ecf20Sopenharmony_ci			((virt_addr & mask) >> shift);
1798c2ecf20Sopenharmony_ci}
1808c2ecf20Sopenharmony_ci
1818c2ecf20Sopenharmony_cistatic inline u64 get_hop0_pte_addr(struct hl_ctx *ctx,
1828c2ecf20Sopenharmony_ci					struct hl_mmu_properties *mmu_prop,
1838c2ecf20Sopenharmony_ci					u64 hop_addr, u64 vaddr)
1848c2ecf20Sopenharmony_ci{
1858c2ecf20Sopenharmony_ci	return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop0_mask,
1868c2ecf20Sopenharmony_ci					mmu_prop->hop0_shift);
1878c2ecf20Sopenharmony_ci}
1888c2ecf20Sopenharmony_ci
1898c2ecf20Sopenharmony_cistatic inline u64 get_hop1_pte_addr(struct hl_ctx *ctx,
1908c2ecf20Sopenharmony_ci					struct hl_mmu_properties *mmu_prop,
1918c2ecf20Sopenharmony_ci					u64 hop_addr, u64 vaddr)
1928c2ecf20Sopenharmony_ci{
1938c2ecf20Sopenharmony_ci	return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop1_mask,
1948c2ecf20Sopenharmony_ci					mmu_prop->hop1_shift);
1958c2ecf20Sopenharmony_ci}
1968c2ecf20Sopenharmony_ci
1978c2ecf20Sopenharmony_cistatic inline u64 get_hop2_pte_addr(struct hl_ctx *ctx,
1988c2ecf20Sopenharmony_ci					struct hl_mmu_properties *mmu_prop,
1998c2ecf20Sopenharmony_ci					u64 hop_addr, u64 vaddr)
2008c2ecf20Sopenharmony_ci{
2018c2ecf20Sopenharmony_ci	return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop2_mask,
2028c2ecf20Sopenharmony_ci					mmu_prop->hop2_shift);
2038c2ecf20Sopenharmony_ci}
2048c2ecf20Sopenharmony_ci
2058c2ecf20Sopenharmony_cistatic inline u64 get_hop3_pte_addr(struct hl_ctx *ctx,
2068c2ecf20Sopenharmony_ci					struct hl_mmu_properties *mmu_prop,
2078c2ecf20Sopenharmony_ci					u64 hop_addr, u64 vaddr)
2088c2ecf20Sopenharmony_ci{
2098c2ecf20Sopenharmony_ci	return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop3_mask,
2108c2ecf20Sopenharmony_ci					mmu_prop->hop3_shift);
2118c2ecf20Sopenharmony_ci}
2128c2ecf20Sopenharmony_ci
2138c2ecf20Sopenharmony_cistatic inline u64 get_hop4_pte_addr(struct hl_ctx *ctx,
2148c2ecf20Sopenharmony_ci					struct hl_mmu_properties *mmu_prop,
2158c2ecf20Sopenharmony_ci					u64 hop_addr, u64 vaddr)
2168c2ecf20Sopenharmony_ci{
2178c2ecf20Sopenharmony_ci	return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop4_mask,
2188c2ecf20Sopenharmony_ci					mmu_prop->hop4_shift);
2198c2ecf20Sopenharmony_ci}
2208c2ecf20Sopenharmony_ci
2218c2ecf20Sopenharmony_cistatic inline u64 get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte)
2228c2ecf20Sopenharmony_ci{
2238c2ecf20Sopenharmony_ci	if (curr_pte & PAGE_PRESENT_MASK)
2248c2ecf20Sopenharmony_ci		return curr_pte & HOP_PHYS_ADDR_MASK;
2258c2ecf20Sopenharmony_ci	else
2268c2ecf20Sopenharmony_ci		return ULLONG_MAX;
2278c2ecf20Sopenharmony_ci}
2288c2ecf20Sopenharmony_ci
2298c2ecf20Sopenharmony_cistatic inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte,
2308c2ecf20Sopenharmony_ci						bool *is_new_hop)
2318c2ecf20Sopenharmony_ci{
2328c2ecf20Sopenharmony_ci	u64 hop_addr = get_next_hop_addr(ctx, curr_pte);
2338c2ecf20Sopenharmony_ci
2348c2ecf20Sopenharmony_ci	if (hop_addr == ULLONG_MAX) {
2358c2ecf20Sopenharmony_ci		hop_addr = alloc_hop(ctx);
2368c2ecf20Sopenharmony_ci		*is_new_hop = (hop_addr != ULLONG_MAX);
2378c2ecf20Sopenharmony_ci	}
2388c2ecf20Sopenharmony_ci
2398c2ecf20Sopenharmony_ci	return hop_addr;
2408c2ecf20Sopenharmony_ci}
2418c2ecf20Sopenharmony_ci
2428c2ecf20Sopenharmony_ci/* translates shadow address inside hop to a physical address */
2438c2ecf20Sopenharmony_cistatic inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr)
2448c2ecf20Sopenharmony_ci{
2458c2ecf20Sopenharmony_ci	u64 page_mask = (ctx->hdev->asic_prop.mmu_hop_table_size - 1);
2468c2ecf20Sopenharmony_ci	u64 shadow_hop_addr = shadow_addr & ~page_mask;
2478c2ecf20Sopenharmony_ci	u64 pte_offset = shadow_addr & page_mask;
2488c2ecf20Sopenharmony_ci	u64 phys_hop_addr;
2498c2ecf20Sopenharmony_ci
2508c2ecf20Sopenharmony_ci	if (shadow_hop_addr != get_hop0_addr(ctx))
2518c2ecf20Sopenharmony_ci		phys_hop_addr = get_pgt_info(ctx, shadow_hop_addr)->phys_addr;
2528c2ecf20Sopenharmony_ci	else
2538c2ecf20Sopenharmony_ci		phys_hop_addr = get_phys_hop0_addr(ctx);
2548c2ecf20Sopenharmony_ci
2558c2ecf20Sopenharmony_ci	return phys_hop_addr + pte_offset;
2568c2ecf20Sopenharmony_ci}
2578c2ecf20Sopenharmony_ci
2588c2ecf20Sopenharmony_cistatic int dram_default_mapping_init(struct hl_ctx *ctx)
2598c2ecf20Sopenharmony_ci{
2608c2ecf20Sopenharmony_ci	struct hl_device *hdev = ctx->hdev;
2618c2ecf20Sopenharmony_ci	struct asic_fixed_properties *prop = &hdev->asic_prop;
2628c2ecf20Sopenharmony_ci	u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr,
2638c2ecf20Sopenharmony_ci		hop2_pte_addr, hop3_pte_addr, pte_val;
2648c2ecf20Sopenharmony_ci	int rc, i, j, hop3_allocated = 0;
2658c2ecf20Sopenharmony_ci
2668c2ecf20Sopenharmony_ci	if ((!hdev->dram_supports_virtual_memory) ||
2678c2ecf20Sopenharmony_ci			(!hdev->dram_default_page_mapping) ||
2688c2ecf20Sopenharmony_ci			(ctx->asid == HL_KERNEL_ASID_ID))
2698c2ecf20Sopenharmony_ci		return 0;
2708c2ecf20Sopenharmony_ci
2718c2ecf20Sopenharmony_ci	num_of_hop3 = prop->dram_size_for_default_page_mapping;
2728c2ecf20Sopenharmony_ci	do_div(num_of_hop3, prop->dram_page_size);
2738c2ecf20Sopenharmony_ci	do_div(num_of_hop3, PTE_ENTRIES_IN_HOP);
2748c2ecf20Sopenharmony_ci
2758c2ecf20Sopenharmony_ci	/* add hop1 and hop2 */
2768c2ecf20Sopenharmony_ci	total_hops = num_of_hop3 + 2;
2778c2ecf20Sopenharmony_ci
2788c2ecf20Sopenharmony_ci	ctx->dram_default_hops = kzalloc(HL_PTE_SIZE * total_hops,  GFP_KERNEL);
2798c2ecf20Sopenharmony_ci	if (!ctx->dram_default_hops)
2808c2ecf20Sopenharmony_ci		return -ENOMEM;
2818c2ecf20Sopenharmony_ci
2828c2ecf20Sopenharmony_ci	hop0_addr = get_hop0_addr(ctx);
2838c2ecf20Sopenharmony_ci
2848c2ecf20Sopenharmony_ci	hop1_addr = alloc_hop(ctx);
2858c2ecf20Sopenharmony_ci	if (hop1_addr == ULLONG_MAX) {
2868c2ecf20Sopenharmony_ci		dev_err(hdev->dev, "failed to alloc hop 1\n");
2878c2ecf20Sopenharmony_ci		rc = -ENOMEM;
2888c2ecf20Sopenharmony_ci		goto hop1_err;
2898c2ecf20Sopenharmony_ci	}
2908c2ecf20Sopenharmony_ci
2918c2ecf20Sopenharmony_ci	ctx->dram_default_hops[total_hops - 1] = hop1_addr;
2928c2ecf20Sopenharmony_ci
2938c2ecf20Sopenharmony_ci	hop2_addr = alloc_hop(ctx);
2948c2ecf20Sopenharmony_ci	if (hop2_addr == ULLONG_MAX) {
2958c2ecf20Sopenharmony_ci		dev_err(hdev->dev, "failed to alloc hop 2\n");
2968c2ecf20Sopenharmony_ci		rc = -ENOMEM;
2978c2ecf20Sopenharmony_ci		goto hop2_err;
2988c2ecf20Sopenharmony_ci	}
2998c2ecf20Sopenharmony_ci
3008c2ecf20Sopenharmony_ci	ctx->dram_default_hops[total_hops - 2] = hop2_addr;
3018c2ecf20Sopenharmony_ci
3028c2ecf20Sopenharmony_ci	for (i = 0 ; i < num_of_hop3 ; i++) {
3038c2ecf20Sopenharmony_ci		ctx->dram_default_hops[i] = alloc_hop(ctx);
3048c2ecf20Sopenharmony_ci		if (ctx->dram_default_hops[i] == ULLONG_MAX) {
3058c2ecf20Sopenharmony_ci			dev_err(hdev->dev, "failed to alloc hop 3, i: %d\n", i);
3068c2ecf20Sopenharmony_ci			rc = -ENOMEM;
3078c2ecf20Sopenharmony_ci			goto hop3_err;
3088c2ecf20Sopenharmony_ci		}
3098c2ecf20Sopenharmony_ci		hop3_allocated++;
3108c2ecf20Sopenharmony_ci	}
3118c2ecf20Sopenharmony_ci
3128c2ecf20Sopenharmony_ci	/* need only pte 0 in hops 0 and 1 */
3138c2ecf20Sopenharmony_ci	pte_val = (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
3148c2ecf20Sopenharmony_ci	write_pte(ctx, hop0_addr, pte_val);
3158c2ecf20Sopenharmony_ci
3168c2ecf20Sopenharmony_ci	pte_val = (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
3178c2ecf20Sopenharmony_ci	write_pte(ctx, hop1_addr, pte_val);
3188c2ecf20Sopenharmony_ci	get_pte(ctx, hop1_addr);
3198c2ecf20Sopenharmony_ci
3208c2ecf20Sopenharmony_ci	hop2_pte_addr = hop2_addr;
3218c2ecf20Sopenharmony_ci	for (i = 0 ; i < num_of_hop3 ; i++) {
3228c2ecf20Sopenharmony_ci		pte_val = (ctx->dram_default_hops[i] & HOP_PHYS_ADDR_MASK) |
3238c2ecf20Sopenharmony_ci				PAGE_PRESENT_MASK;
3248c2ecf20Sopenharmony_ci		write_pte(ctx, hop2_pte_addr, pte_val);
3258c2ecf20Sopenharmony_ci		get_pte(ctx, hop2_addr);
3268c2ecf20Sopenharmony_ci		hop2_pte_addr += HL_PTE_SIZE;
3278c2ecf20Sopenharmony_ci	}
3288c2ecf20Sopenharmony_ci
3298c2ecf20Sopenharmony_ci	pte_val = (prop->mmu_dram_default_page_addr & HOP_PHYS_ADDR_MASK) |
3308c2ecf20Sopenharmony_ci			LAST_MASK | PAGE_PRESENT_MASK;
3318c2ecf20Sopenharmony_ci
3328c2ecf20Sopenharmony_ci	for (i = 0 ; i < num_of_hop3 ; i++) {
3338c2ecf20Sopenharmony_ci		hop3_pte_addr = ctx->dram_default_hops[i];
3348c2ecf20Sopenharmony_ci		for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) {
3358c2ecf20Sopenharmony_ci			write_final_pte(ctx, hop3_pte_addr, pte_val);
3368c2ecf20Sopenharmony_ci			get_pte(ctx, ctx->dram_default_hops[i]);
3378c2ecf20Sopenharmony_ci			hop3_pte_addr += HL_PTE_SIZE;
3388c2ecf20Sopenharmony_ci		}
3398c2ecf20Sopenharmony_ci	}
3408c2ecf20Sopenharmony_ci
3418c2ecf20Sopenharmony_ci	flush(ctx);
3428c2ecf20Sopenharmony_ci
3438c2ecf20Sopenharmony_ci	return 0;
3448c2ecf20Sopenharmony_ci
3458c2ecf20Sopenharmony_cihop3_err:
3468c2ecf20Sopenharmony_ci	for (i = 0 ; i < hop3_allocated ; i++)
3478c2ecf20Sopenharmony_ci		free_hop(ctx, ctx->dram_default_hops[i]);
3488c2ecf20Sopenharmony_ci
3498c2ecf20Sopenharmony_ci	free_hop(ctx, hop2_addr);
3508c2ecf20Sopenharmony_cihop2_err:
3518c2ecf20Sopenharmony_ci	free_hop(ctx, hop1_addr);
3528c2ecf20Sopenharmony_cihop1_err:
3538c2ecf20Sopenharmony_ci	kfree(ctx->dram_default_hops);
3548c2ecf20Sopenharmony_ci
3558c2ecf20Sopenharmony_ci	return rc;
3568c2ecf20Sopenharmony_ci}
3578c2ecf20Sopenharmony_ci
3588c2ecf20Sopenharmony_cistatic void dram_default_mapping_fini(struct hl_ctx *ctx)
3598c2ecf20Sopenharmony_ci{
3608c2ecf20Sopenharmony_ci	struct hl_device *hdev = ctx->hdev;
3618c2ecf20Sopenharmony_ci	struct asic_fixed_properties *prop = &hdev->asic_prop;
3628c2ecf20Sopenharmony_ci	u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr,
3638c2ecf20Sopenharmony_ci		hop2_pte_addr, hop3_pte_addr;
3648c2ecf20Sopenharmony_ci	int i, j;
3658c2ecf20Sopenharmony_ci
3668c2ecf20Sopenharmony_ci	if ((!hdev->dram_supports_virtual_memory) ||
3678c2ecf20Sopenharmony_ci			(!hdev->dram_default_page_mapping) ||
3688c2ecf20Sopenharmony_ci			(ctx->asid == HL_KERNEL_ASID_ID))
3698c2ecf20Sopenharmony_ci		return;
3708c2ecf20Sopenharmony_ci
3718c2ecf20Sopenharmony_ci	num_of_hop3 = prop->dram_size_for_default_page_mapping;
3728c2ecf20Sopenharmony_ci	do_div(num_of_hop3, prop->dram_page_size);
3738c2ecf20Sopenharmony_ci	do_div(num_of_hop3, PTE_ENTRIES_IN_HOP);
3748c2ecf20Sopenharmony_ci
3758c2ecf20Sopenharmony_ci	hop0_addr = get_hop0_addr(ctx);
3768c2ecf20Sopenharmony_ci	/* add hop1 and hop2 */
3778c2ecf20Sopenharmony_ci	total_hops = num_of_hop3 + 2;
3788c2ecf20Sopenharmony_ci	hop1_addr = ctx->dram_default_hops[total_hops - 1];
3798c2ecf20Sopenharmony_ci	hop2_addr = ctx->dram_default_hops[total_hops - 2];
3808c2ecf20Sopenharmony_ci
3818c2ecf20Sopenharmony_ci	for (i = 0 ; i < num_of_hop3 ; i++) {
3828c2ecf20Sopenharmony_ci		hop3_pte_addr = ctx->dram_default_hops[i];
3838c2ecf20Sopenharmony_ci		for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) {
3848c2ecf20Sopenharmony_ci			clear_pte(ctx, hop3_pte_addr);
3858c2ecf20Sopenharmony_ci			put_pte(ctx, ctx->dram_default_hops[i]);
3868c2ecf20Sopenharmony_ci			hop3_pte_addr += HL_PTE_SIZE;
3878c2ecf20Sopenharmony_ci		}
3888c2ecf20Sopenharmony_ci	}
3898c2ecf20Sopenharmony_ci
3908c2ecf20Sopenharmony_ci	hop2_pte_addr = hop2_addr;
3918c2ecf20Sopenharmony_ci	hop2_pte_addr = hop2_addr;
3928c2ecf20Sopenharmony_ci	for (i = 0 ; i < num_of_hop3 ; i++) {
3938c2ecf20Sopenharmony_ci		clear_pte(ctx, hop2_pte_addr);
3948c2ecf20Sopenharmony_ci		put_pte(ctx, hop2_addr);
3958c2ecf20Sopenharmony_ci		hop2_pte_addr += HL_PTE_SIZE;
3968c2ecf20Sopenharmony_ci	}
3978c2ecf20Sopenharmony_ci
3988c2ecf20Sopenharmony_ci	clear_pte(ctx, hop1_addr);
3998c2ecf20Sopenharmony_ci	put_pte(ctx, hop1_addr);
4008c2ecf20Sopenharmony_ci	clear_pte(ctx, hop0_addr);
4018c2ecf20Sopenharmony_ci
4028c2ecf20Sopenharmony_ci	kfree(ctx->dram_default_hops);
4038c2ecf20Sopenharmony_ci
4048c2ecf20Sopenharmony_ci	flush(ctx);
4058c2ecf20Sopenharmony_ci}
4068c2ecf20Sopenharmony_ci
4078c2ecf20Sopenharmony_ci/**
4088c2ecf20Sopenharmony_ci * hl_mmu_v1_init() - initialize the MMU module.
4098c2ecf20Sopenharmony_ci * @hdev: habanalabs device structure.
4108c2ecf20Sopenharmony_ci *
4118c2ecf20Sopenharmony_ci * This function does the following:
4128c2ecf20Sopenharmony_ci * - Create a pool of pages for pgt_infos.
4138c2ecf20Sopenharmony_ci * - Create a shadow table for pgt
4148c2ecf20Sopenharmony_ci *
4158c2ecf20Sopenharmony_ci * Return: 0 for success, non-zero for failure.
4168c2ecf20Sopenharmony_ci */
4178c2ecf20Sopenharmony_cistatic int hl_mmu_v1_init(struct hl_device *hdev)
4188c2ecf20Sopenharmony_ci{
4198c2ecf20Sopenharmony_ci	struct asic_fixed_properties *prop = &hdev->asic_prop;
4208c2ecf20Sopenharmony_ci	int rc;
4218c2ecf20Sopenharmony_ci
4228c2ecf20Sopenharmony_ci	hdev->mmu_priv.mmu_pgt_pool =
4238c2ecf20Sopenharmony_ci			gen_pool_create(__ffs(prop->mmu_hop_table_size), -1);
4248c2ecf20Sopenharmony_ci
4258c2ecf20Sopenharmony_ci	if (!hdev->mmu_priv.mmu_pgt_pool) {
4268c2ecf20Sopenharmony_ci		dev_err(hdev->dev, "Failed to create page gen pool\n");
4278c2ecf20Sopenharmony_ci		return -ENOMEM;
4288c2ecf20Sopenharmony_ci	}
4298c2ecf20Sopenharmony_ci
4308c2ecf20Sopenharmony_ci	rc = gen_pool_add(hdev->mmu_priv.mmu_pgt_pool, prop->mmu_pgt_addr +
4318c2ecf20Sopenharmony_ci			prop->mmu_hop0_tables_total_size,
4328c2ecf20Sopenharmony_ci			prop->mmu_pgt_size - prop->mmu_hop0_tables_total_size,
4338c2ecf20Sopenharmony_ci			-1);
4348c2ecf20Sopenharmony_ci	if (rc) {
4358c2ecf20Sopenharmony_ci		dev_err(hdev->dev, "Failed to add memory to page gen pool\n");
4368c2ecf20Sopenharmony_ci		goto err_pool_add;
4378c2ecf20Sopenharmony_ci	}
4388c2ecf20Sopenharmony_ci
4398c2ecf20Sopenharmony_ci	hdev->mmu_priv.mmu_shadow_hop0 = kvmalloc_array(prop->max_asid,
4408c2ecf20Sopenharmony_ci						prop->mmu_hop_table_size,
4418c2ecf20Sopenharmony_ci						GFP_KERNEL | __GFP_ZERO);
4428c2ecf20Sopenharmony_ci	if (ZERO_OR_NULL_PTR(hdev->mmu_priv.mmu_shadow_hop0)) {
4438c2ecf20Sopenharmony_ci		rc = -ENOMEM;
4448c2ecf20Sopenharmony_ci		goto err_pool_add;
4458c2ecf20Sopenharmony_ci	}
4468c2ecf20Sopenharmony_ci
4478c2ecf20Sopenharmony_ci	/* MMU H/W init will be done in device hw_init() */
4488c2ecf20Sopenharmony_ci
4498c2ecf20Sopenharmony_ci	return 0;
4508c2ecf20Sopenharmony_ci
4518c2ecf20Sopenharmony_cierr_pool_add:
4528c2ecf20Sopenharmony_ci	gen_pool_destroy(hdev->mmu_priv.mmu_pgt_pool);
4538c2ecf20Sopenharmony_ci
4548c2ecf20Sopenharmony_ci	return rc;
4558c2ecf20Sopenharmony_ci}
4568c2ecf20Sopenharmony_ci
4578c2ecf20Sopenharmony_ci/**
4588c2ecf20Sopenharmony_ci * hl_mmu_fini() - release the MMU module.
4598c2ecf20Sopenharmony_ci * @hdev: habanalabs device structure.
4608c2ecf20Sopenharmony_ci *
4618c2ecf20Sopenharmony_ci * This function does the following:
4628c2ecf20Sopenharmony_ci * - Disable MMU in H/W.
4638c2ecf20Sopenharmony_ci * - Free the pgt_infos pool.
4648c2ecf20Sopenharmony_ci *
4658c2ecf20Sopenharmony_ci * All contexts should be freed before calling this function.
4668c2ecf20Sopenharmony_ci */
4678c2ecf20Sopenharmony_cistatic void hl_mmu_v1_fini(struct hl_device *hdev)
4688c2ecf20Sopenharmony_ci{
4698c2ecf20Sopenharmony_ci	/* MMU H/W fini was already done in device hw_fini() */
4708c2ecf20Sopenharmony_ci
4718c2ecf20Sopenharmony_ci	kvfree(hdev->mmu_priv.mmu_shadow_hop0);
4728c2ecf20Sopenharmony_ci	gen_pool_destroy(hdev->mmu_priv.mmu_pgt_pool);
4738c2ecf20Sopenharmony_ci}
4748c2ecf20Sopenharmony_ci
4758c2ecf20Sopenharmony_ci/**
4768c2ecf20Sopenharmony_ci * hl_mmu_ctx_init() - initialize a context for using the MMU module.
4778c2ecf20Sopenharmony_ci * @ctx: pointer to the context structure to initialize.
4788c2ecf20Sopenharmony_ci *
4798c2ecf20Sopenharmony_ci * Initialize a mutex to protect the concurrent mapping flow, a hash to hold all
4808c2ecf20Sopenharmony_ci * page tables hops related to this context.
4818c2ecf20Sopenharmony_ci * Return: 0 on success, non-zero otherwise.
4828c2ecf20Sopenharmony_ci */
4838c2ecf20Sopenharmony_cistatic int hl_mmu_v1_ctx_init(struct hl_ctx *ctx)
4848c2ecf20Sopenharmony_ci{
4858c2ecf20Sopenharmony_ci	mutex_init(&ctx->mmu_lock);
4868c2ecf20Sopenharmony_ci	hash_init(ctx->mmu_shadow_hash);
4878c2ecf20Sopenharmony_ci
4888c2ecf20Sopenharmony_ci	return dram_default_mapping_init(ctx);
4898c2ecf20Sopenharmony_ci}
4908c2ecf20Sopenharmony_ci
4918c2ecf20Sopenharmony_ci/*
4928c2ecf20Sopenharmony_ci * hl_mmu_ctx_fini - disable a ctx from using the mmu module
4938c2ecf20Sopenharmony_ci *
4948c2ecf20Sopenharmony_ci * @ctx: pointer to the context structure
4958c2ecf20Sopenharmony_ci *
4968c2ecf20Sopenharmony_ci * This function does the following:
4978c2ecf20Sopenharmony_ci * - Free any pgts which were not freed yet
4988c2ecf20Sopenharmony_ci * - Free the mutex
4998c2ecf20Sopenharmony_ci * - Free DRAM default page mapping hops
5008c2ecf20Sopenharmony_ci */
5018c2ecf20Sopenharmony_cistatic void hl_mmu_v1_ctx_fini(struct hl_ctx *ctx)
5028c2ecf20Sopenharmony_ci{
5038c2ecf20Sopenharmony_ci	struct hl_device *hdev = ctx->hdev;
5048c2ecf20Sopenharmony_ci	struct pgt_info *pgt_info;
5058c2ecf20Sopenharmony_ci	struct hlist_node *tmp;
5068c2ecf20Sopenharmony_ci	int i;
5078c2ecf20Sopenharmony_ci
5088c2ecf20Sopenharmony_ci	dram_default_mapping_fini(ctx);
5098c2ecf20Sopenharmony_ci
5108c2ecf20Sopenharmony_ci	if (!hash_empty(ctx->mmu_shadow_hash))
5118c2ecf20Sopenharmony_ci		dev_err(hdev->dev, "ctx %d is freed while it has pgts in use\n",
5128c2ecf20Sopenharmony_ci			ctx->asid);
5138c2ecf20Sopenharmony_ci
5148c2ecf20Sopenharmony_ci	hash_for_each_safe(ctx->mmu_shadow_hash, i, tmp, pgt_info, node) {
5158c2ecf20Sopenharmony_ci		dev_err_ratelimited(hdev->dev,
5168c2ecf20Sopenharmony_ci			"pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n",
5178c2ecf20Sopenharmony_ci			pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes);
5188c2ecf20Sopenharmony_ci		_free_hop(ctx, pgt_info);
5198c2ecf20Sopenharmony_ci	}
5208c2ecf20Sopenharmony_ci
5218c2ecf20Sopenharmony_ci	mutex_destroy(&ctx->mmu_lock);
5228c2ecf20Sopenharmony_ci}
5238c2ecf20Sopenharmony_ci
5248c2ecf20Sopenharmony_cistatic int _hl_mmu_v1_unmap(struct hl_ctx *ctx,
5258c2ecf20Sopenharmony_ci				u64 virt_addr, bool is_dram_addr)
5268c2ecf20Sopenharmony_ci{
5278c2ecf20Sopenharmony_ci	struct hl_device *hdev = ctx->hdev;
5288c2ecf20Sopenharmony_ci	struct asic_fixed_properties *prop = &hdev->asic_prop;
5298c2ecf20Sopenharmony_ci	struct hl_mmu_properties *mmu_prop;
5308c2ecf20Sopenharmony_ci	u64 hop0_addr = 0, hop0_pte_addr = 0,
5318c2ecf20Sopenharmony_ci		hop1_addr = 0, hop1_pte_addr = 0,
5328c2ecf20Sopenharmony_ci		hop2_addr = 0, hop2_pte_addr = 0,
5338c2ecf20Sopenharmony_ci		hop3_addr = 0, hop3_pte_addr = 0,
5348c2ecf20Sopenharmony_ci		hop4_addr = 0, hop4_pte_addr = 0,
5358c2ecf20Sopenharmony_ci		curr_pte;
5368c2ecf20Sopenharmony_ci	bool is_huge, clear_hop3 = true;
5378c2ecf20Sopenharmony_ci
5388c2ecf20Sopenharmony_ci	/* shifts and masks are the same in PMMU and HPMMU, use one of them */
5398c2ecf20Sopenharmony_ci	mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
5408c2ecf20Sopenharmony_ci
5418c2ecf20Sopenharmony_ci	hop0_addr = get_hop0_addr(ctx);
5428c2ecf20Sopenharmony_ci	hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
5438c2ecf20Sopenharmony_ci
5448c2ecf20Sopenharmony_ci	curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
5458c2ecf20Sopenharmony_ci
5468c2ecf20Sopenharmony_ci	hop1_addr = get_next_hop_addr(ctx, curr_pte);
5478c2ecf20Sopenharmony_ci
5488c2ecf20Sopenharmony_ci	if (hop1_addr == ULLONG_MAX)
5498c2ecf20Sopenharmony_ci		goto not_mapped;
5508c2ecf20Sopenharmony_ci
5518c2ecf20Sopenharmony_ci	hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
5528c2ecf20Sopenharmony_ci
5538c2ecf20Sopenharmony_ci	curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
5548c2ecf20Sopenharmony_ci
5558c2ecf20Sopenharmony_ci	hop2_addr = get_next_hop_addr(ctx, curr_pte);
5568c2ecf20Sopenharmony_ci
5578c2ecf20Sopenharmony_ci	if (hop2_addr == ULLONG_MAX)
5588c2ecf20Sopenharmony_ci		goto not_mapped;
5598c2ecf20Sopenharmony_ci
5608c2ecf20Sopenharmony_ci	hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
5618c2ecf20Sopenharmony_ci
5628c2ecf20Sopenharmony_ci	curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
5638c2ecf20Sopenharmony_ci
5648c2ecf20Sopenharmony_ci	hop3_addr = get_next_hop_addr(ctx, curr_pte);
5658c2ecf20Sopenharmony_ci
5668c2ecf20Sopenharmony_ci	if (hop3_addr == ULLONG_MAX)
5678c2ecf20Sopenharmony_ci		goto not_mapped;
5688c2ecf20Sopenharmony_ci
5698c2ecf20Sopenharmony_ci	hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
5708c2ecf20Sopenharmony_ci
5718c2ecf20Sopenharmony_ci	curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
5728c2ecf20Sopenharmony_ci
5738c2ecf20Sopenharmony_ci	is_huge = curr_pte & LAST_MASK;
5748c2ecf20Sopenharmony_ci
5758c2ecf20Sopenharmony_ci	if (is_dram_addr && !is_huge) {
5768c2ecf20Sopenharmony_ci		dev_err(hdev->dev,
5778c2ecf20Sopenharmony_ci				"DRAM unmapping should use huge pages only\n");
5788c2ecf20Sopenharmony_ci		return -EFAULT;
5798c2ecf20Sopenharmony_ci	}
5808c2ecf20Sopenharmony_ci
5818c2ecf20Sopenharmony_ci	if (!is_huge) {
5828c2ecf20Sopenharmony_ci		hop4_addr = get_next_hop_addr(ctx, curr_pte);
5838c2ecf20Sopenharmony_ci
5848c2ecf20Sopenharmony_ci		if (hop4_addr == ULLONG_MAX)
5858c2ecf20Sopenharmony_ci			goto not_mapped;
5868c2ecf20Sopenharmony_ci
5878c2ecf20Sopenharmony_ci		hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
5888c2ecf20Sopenharmony_ci							virt_addr);
5898c2ecf20Sopenharmony_ci
5908c2ecf20Sopenharmony_ci		curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
5918c2ecf20Sopenharmony_ci
5928c2ecf20Sopenharmony_ci		clear_hop3 = false;
5938c2ecf20Sopenharmony_ci	}
5948c2ecf20Sopenharmony_ci
5958c2ecf20Sopenharmony_ci	if (hdev->dram_default_page_mapping && is_dram_addr) {
5968c2ecf20Sopenharmony_ci		u64 default_pte = (prop->mmu_dram_default_page_addr &
5978c2ecf20Sopenharmony_ci				HOP_PHYS_ADDR_MASK) | LAST_MASK |
5988c2ecf20Sopenharmony_ci					PAGE_PRESENT_MASK;
5998c2ecf20Sopenharmony_ci		if (curr_pte == default_pte) {
6008c2ecf20Sopenharmony_ci			dev_err(hdev->dev,
6018c2ecf20Sopenharmony_ci				"DRAM: hop3 PTE points to zero page, can't unmap, va: 0x%llx\n",
6028c2ecf20Sopenharmony_ci					virt_addr);
6038c2ecf20Sopenharmony_ci			goto not_mapped;
6048c2ecf20Sopenharmony_ci		}
6058c2ecf20Sopenharmony_ci
6068c2ecf20Sopenharmony_ci		if (!(curr_pte & PAGE_PRESENT_MASK)) {
6078c2ecf20Sopenharmony_ci			dev_err(hdev->dev,
6088c2ecf20Sopenharmony_ci				"DRAM: hop3 PTE is cleared! can't unmap, va: 0x%llx\n",
6098c2ecf20Sopenharmony_ci					virt_addr);
6108c2ecf20Sopenharmony_ci			goto not_mapped;
6118c2ecf20Sopenharmony_ci		}
6128c2ecf20Sopenharmony_ci
6138c2ecf20Sopenharmony_ci		write_final_pte(ctx, hop3_pte_addr, default_pte);
6148c2ecf20Sopenharmony_ci		put_pte(ctx, hop3_addr);
6158c2ecf20Sopenharmony_ci	} else {
6168c2ecf20Sopenharmony_ci		if (!(curr_pte & PAGE_PRESENT_MASK))
6178c2ecf20Sopenharmony_ci			goto not_mapped;
6188c2ecf20Sopenharmony_ci
6198c2ecf20Sopenharmony_ci		if (hop4_addr)
6208c2ecf20Sopenharmony_ci			clear_pte(ctx, hop4_pte_addr);
6218c2ecf20Sopenharmony_ci		else
6228c2ecf20Sopenharmony_ci			clear_pte(ctx, hop3_pte_addr);
6238c2ecf20Sopenharmony_ci
6248c2ecf20Sopenharmony_ci		if (hop4_addr && !put_pte(ctx, hop4_addr))
6258c2ecf20Sopenharmony_ci			clear_hop3 = true;
6268c2ecf20Sopenharmony_ci
6278c2ecf20Sopenharmony_ci		if (!clear_hop3)
6288c2ecf20Sopenharmony_ci			goto mapped;
6298c2ecf20Sopenharmony_ci
6308c2ecf20Sopenharmony_ci		clear_pte(ctx, hop3_pte_addr);
6318c2ecf20Sopenharmony_ci
6328c2ecf20Sopenharmony_ci		if (put_pte(ctx, hop3_addr))
6338c2ecf20Sopenharmony_ci			goto mapped;
6348c2ecf20Sopenharmony_ci
6358c2ecf20Sopenharmony_ci		clear_pte(ctx, hop2_pte_addr);
6368c2ecf20Sopenharmony_ci
6378c2ecf20Sopenharmony_ci		if (put_pte(ctx, hop2_addr))
6388c2ecf20Sopenharmony_ci			goto mapped;
6398c2ecf20Sopenharmony_ci
6408c2ecf20Sopenharmony_ci		clear_pte(ctx, hop1_pte_addr);
6418c2ecf20Sopenharmony_ci
6428c2ecf20Sopenharmony_ci		if (put_pte(ctx, hop1_addr))
6438c2ecf20Sopenharmony_ci			goto mapped;
6448c2ecf20Sopenharmony_ci
6458c2ecf20Sopenharmony_ci		clear_pte(ctx, hop0_pte_addr);
6468c2ecf20Sopenharmony_ci	}
6478c2ecf20Sopenharmony_ci
6488c2ecf20Sopenharmony_cimapped:
6498c2ecf20Sopenharmony_ci	return 0;
6508c2ecf20Sopenharmony_ci
6518c2ecf20Sopenharmony_cinot_mapped:
6528c2ecf20Sopenharmony_ci	dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
6538c2ecf20Sopenharmony_ci		virt_addr);
6548c2ecf20Sopenharmony_ci
6558c2ecf20Sopenharmony_ci	return -EINVAL;
6568c2ecf20Sopenharmony_ci}
6578c2ecf20Sopenharmony_ci
6588c2ecf20Sopenharmony_cistatic int _hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
6598c2ecf20Sopenharmony_ci			u32 page_size, bool is_dram_addr)
6608c2ecf20Sopenharmony_ci{
6618c2ecf20Sopenharmony_ci	struct hl_device *hdev = ctx->hdev;
6628c2ecf20Sopenharmony_ci	struct asic_fixed_properties *prop = &hdev->asic_prop;
6638c2ecf20Sopenharmony_ci	struct hl_mmu_properties *mmu_prop;
6648c2ecf20Sopenharmony_ci	u64 hop0_addr = 0, hop0_pte_addr = 0,
6658c2ecf20Sopenharmony_ci		hop1_addr = 0, hop1_pte_addr = 0,
6668c2ecf20Sopenharmony_ci		hop2_addr = 0, hop2_pte_addr = 0,
6678c2ecf20Sopenharmony_ci		hop3_addr = 0, hop3_pte_addr = 0,
6688c2ecf20Sopenharmony_ci		hop4_addr = 0, hop4_pte_addr = 0,
6698c2ecf20Sopenharmony_ci		curr_pte = 0;
6708c2ecf20Sopenharmony_ci	bool hop1_new = false, hop2_new = false, hop3_new = false,
6718c2ecf20Sopenharmony_ci		hop4_new = false, is_huge;
6728c2ecf20Sopenharmony_ci	int rc = -ENOMEM;
6738c2ecf20Sopenharmony_ci
6748c2ecf20Sopenharmony_ci	/*
6758c2ecf20Sopenharmony_ci	 * This mapping function can map a page or a huge page. For huge page
6768c2ecf20Sopenharmony_ci	 * there are only 3 hops rather than 4. Currently the DRAM allocation
6778c2ecf20Sopenharmony_ci	 * uses huge pages only but user memory could have been allocated with
6788c2ecf20Sopenharmony_ci	 * one of the two page sizes. Since this is a common code for all the
6798c2ecf20Sopenharmony_ci	 * three cases, we need this hugs page check.
6808c2ecf20Sopenharmony_ci	 */
6818c2ecf20Sopenharmony_ci	if (is_dram_addr) {
6828c2ecf20Sopenharmony_ci		mmu_prop = &prop->dmmu;
6838c2ecf20Sopenharmony_ci		is_huge = true;
6848c2ecf20Sopenharmony_ci	} else if (page_size == prop->pmmu_huge.page_size) {
6858c2ecf20Sopenharmony_ci		mmu_prop = &prop->pmmu_huge;
6868c2ecf20Sopenharmony_ci		is_huge = true;
6878c2ecf20Sopenharmony_ci	} else {
6888c2ecf20Sopenharmony_ci		mmu_prop = &prop->pmmu;
6898c2ecf20Sopenharmony_ci		is_huge = false;
6908c2ecf20Sopenharmony_ci	}
6918c2ecf20Sopenharmony_ci
6928c2ecf20Sopenharmony_ci	hop0_addr = get_hop0_addr(ctx);
6938c2ecf20Sopenharmony_ci	hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
6948c2ecf20Sopenharmony_ci	curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
6958c2ecf20Sopenharmony_ci
6968c2ecf20Sopenharmony_ci	hop1_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop1_new);
6978c2ecf20Sopenharmony_ci	if (hop1_addr == ULLONG_MAX)
6988c2ecf20Sopenharmony_ci		goto err;
6998c2ecf20Sopenharmony_ci
7008c2ecf20Sopenharmony_ci	hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
7018c2ecf20Sopenharmony_ci	curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
7028c2ecf20Sopenharmony_ci
7038c2ecf20Sopenharmony_ci	hop2_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop2_new);
7048c2ecf20Sopenharmony_ci	if (hop2_addr == ULLONG_MAX)
7058c2ecf20Sopenharmony_ci		goto err;
7068c2ecf20Sopenharmony_ci
7078c2ecf20Sopenharmony_ci	hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
7088c2ecf20Sopenharmony_ci	curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
7098c2ecf20Sopenharmony_ci
7108c2ecf20Sopenharmony_ci	hop3_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop3_new);
7118c2ecf20Sopenharmony_ci	if (hop3_addr == ULLONG_MAX)
7128c2ecf20Sopenharmony_ci		goto err;
7138c2ecf20Sopenharmony_ci
7148c2ecf20Sopenharmony_ci	hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
7158c2ecf20Sopenharmony_ci	curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
7168c2ecf20Sopenharmony_ci
7178c2ecf20Sopenharmony_ci	if (!is_huge) {
7188c2ecf20Sopenharmony_ci		hop4_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop4_new);
7198c2ecf20Sopenharmony_ci		if (hop4_addr == ULLONG_MAX)
7208c2ecf20Sopenharmony_ci			goto err;
7218c2ecf20Sopenharmony_ci
7228c2ecf20Sopenharmony_ci		hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
7238c2ecf20Sopenharmony_ci							virt_addr);
7248c2ecf20Sopenharmony_ci		curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
7258c2ecf20Sopenharmony_ci	}
7268c2ecf20Sopenharmony_ci
7278c2ecf20Sopenharmony_ci	if (hdev->dram_default_page_mapping && is_dram_addr) {
7288c2ecf20Sopenharmony_ci		u64 default_pte = (prop->mmu_dram_default_page_addr &
7298c2ecf20Sopenharmony_ci					HOP_PHYS_ADDR_MASK) | LAST_MASK |
7308c2ecf20Sopenharmony_ci						PAGE_PRESENT_MASK;
7318c2ecf20Sopenharmony_ci
7328c2ecf20Sopenharmony_ci		if (curr_pte != default_pte) {
7338c2ecf20Sopenharmony_ci			dev_err(hdev->dev,
7348c2ecf20Sopenharmony_ci				"DRAM: mapping already exists for virt_addr 0x%llx\n",
7358c2ecf20Sopenharmony_ci					virt_addr);
7368c2ecf20Sopenharmony_ci			rc = -EINVAL;
7378c2ecf20Sopenharmony_ci			goto err;
7388c2ecf20Sopenharmony_ci		}
7398c2ecf20Sopenharmony_ci
7408c2ecf20Sopenharmony_ci		if (hop1_new || hop2_new || hop3_new || hop4_new) {
7418c2ecf20Sopenharmony_ci			dev_err(hdev->dev,
7428c2ecf20Sopenharmony_ci				"DRAM mapping should not allocate more hops\n");
7438c2ecf20Sopenharmony_ci			rc = -EFAULT;
7448c2ecf20Sopenharmony_ci			goto err;
7458c2ecf20Sopenharmony_ci		}
7468c2ecf20Sopenharmony_ci	} else if (curr_pte & PAGE_PRESENT_MASK) {
7478c2ecf20Sopenharmony_ci		dev_err(hdev->dev,
7488c2ecf20Sopenharmony_ci			"mapping already exists for virt_addr 0x%llx\n",
7498c2ecf20Sopenharmony_ci				virt_addr);
7508c2ecf20Sopenharmony_ci
7518c2ecf20Sopenharmony_ci		dev_dbg(hdev->dev, "hop0 pte: 0x%llx (0x%llx)\n",
7528c2ecf20Sopenharmony_ci			*(u64 *) (uintptr_t) hop0_pte_addr, hop0_pte_addr);
7538c2ecf20Sopenharmony_ci		dev_dbg(hdev->dev, "hop1 pte: 0x%llx (0x%llx)\n",
7548c2ecf20Sopenharmony_ci			*(u64 *) (uintptr_t) hop1_pte_addr, hop1_pte_addr);
7558c2ecf20Sopenharmony_ci		dev_dbg(hdev->dev, "hop2 pte: 0x%llx (0x%llx)\n",
7568c2ecf20Sopenharmony_ci			*(u64 *) (uintptr_t) hop2_pte_addr, hop2_pte_addr);
7578c2ecf20Sopenharmony_ci		dev_dbg(hdev->dev, "hop3 pte: 0x%llx (0x%llx)\n",
7588c2ecf20Sopenharmony_ci			*(u64 *) (uintptr_t) hop3_pte_addr, hop3_pte_addr);
7598c2ecf20Sopenharmony_ci
7608c2ecf20Sopenharmony_ci		if (!is_huge)
7618c2ecf20Sopenharmony_ci			dev_dbg(hdev->dev, "hop4 pte: 0x%llx (0x%llx)\n",
7628c2ecf20Sopenharmony_ci				*(u64 *) (uintptr_t) hop4_pte_addr,
7638c2ecf20Sopenharmony_ci				hop4_pte_addr);
7648c2ecf20Sopenharmony_ci
7658c2ecf20Sopenharmony_ci		rc = -EINVAL;
7668c2ecf20Sopenharmony_ci		goto err;
7678c2ecf20Sopenharmony_ci	}
7688c2ecf20Sopenharmony_ci
7698c2ecf20Sopenharmony_ci	curr_pte = (phys_addr & HOP_PHYS_ADDR_MASK) | LAST_MASK
7708c2ecf20Sopenharmony_ci			| PAGE_PRESENT_MASK;
7718c2ecf20Sopenharmony_ci
7728c2ecf20Sopenharmony_ci	if (is_huge)
7738c2ecf20Sopenharmony_ci		write_final_pte(ctx, hop3_pte_addr, curr_pte);
7748c2ecf20Sopenharmony_ci	else
7758c2ecf20Sopenharmony_ci		write_final_pte(ctx, hop4_pte_addr, curr_pte);
7768c2ecf20Sopenharmony_ci
7778c2ecf20Sopenharmony_ci	if (hop1_new) {
7788c2ecf20Sopenharmony_ci		curr_pte =
7798c2ecf20Sopenharmony_ci			(hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
7808c2ecf20Sopenharmony_ci		write_pte(ctx, hop0_pte_addr, curr_pte);
7818c2ecf20Sopenharmony_ci	}
7828c2ecf20Sopenharmony_ci	if (hop2_new) {
7838c2ecf20Sopenharmony_ci		curr_pte =
7848c2ecf20Sopenharmony_ci			(hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
7858c2ecf20Sopenharmony_ci		write_pte(ctx, hop1_pte_addr, curr_pte);
7868c2ecf20Sopenharmony_ci		get_pte(ctx, hop1_addr);
7878c2ecf20Sopenharmony_ci	}
7888c2ecf20Sopenharmony_ci	if (hop3_new) {
7898c2ecf20Sopenharmony_ci		curr_pte =
7908c2ecf20Sopenharmony_ci			(hop3_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
7918c2ecf20Sopenharmony_ci		write_pte(ctx, hop2_pte_addr, curr_pte);
7928c2ecf20Sopenharmony_ci		get_pte(ctx, hop2_addr);
7938c2ecf20Sopenharmony_ci	}
7948c2ecf20Sopenharmony_ci
7958c2ecf20Sopenharmony_ci	if (!is_huge) {
7968c2ecf20Sopenharmony_ci		if (hop4_new) {
7978c2ecf20Sopenharmony_ci			curr_pte = (hop4_addr & HOP_PHYS_ADDR_MASK) |
7988c2ecf20Sopenharmony_ci					PAGE_PRESENT_MASK;
7998c2ecf20Sopenharmony_ci			write_pte(ctx, hop3_pte_addr, curr_pte);
8008c2ecf20Sopenharmony_ci			get_pte(ctx, hop3_addr);
8018c2ecf20Sopenharmony_ci		}
8028c2ecf20Sopenharmony_ci
8038c2ecf20Sopenharmony_ci		get_pte(ctx, hop4_addr);
8048c2ecf20Sopenharmony_ci	} else {
8058c2ecf20Sopenharmony_ci		get_pte(ctx, hop3_addr);
8068c2ecf20Sopenharmony_ci	}
8078c2ecf20Sopenharmony_ci
8088c2ecf20Sopenharmony_ci	return 0;
8098c2ecf20Sopenharmony_ci
8108c2ecf20Sopenharmony_cierr:
8118c2ecf20Sopenharmony_ci	if (hop4_new)
8128c2ecf20Sopenharmony_ci		free_hop(ctx, hop4_addr);
8138c2ecf20Sopenharmony_ci	if (hop3_new)
8148c2ecf20Sopenharmony_ci		free_hop(ctx, hop3_addr);
8158c2ecf20Sopenharmony_ci	if (hop2_new)
8168c2ecf20Sopenharmony_ci		free_hop(ctx, hop2_addr);
8178c2ecf20Sopenharmony_ci	if (hop1_new)
8188c2ecf20Sopenharmony_ci		free_hop(ctx, hop1_addr);
8198c2ecf20Sopenharmony_ci
8208c2ecf20Sopenharmony_ci	return rc;
8218c2ecf20Sopenharmony_ci}
8228c2ecf20Sopenharmony_ci
8238c2ecf20Sopenharmony_ci/*
8248c2ecf20Sopenharmony_ci * hl_mmu_v1_swap_out - marks all mapping of the given ctx as swapped out
8258c2ecf20Sopenharmony_ci *
8268c2ecf20Sopenharmony_ci * @ctx: pointer to the context structure
8278c2ecf20Sopenharmony_ci *
8288c2ecf20Sopenharmony_ci */
8298c2ecf20Sopenharmony_cistatic void hl_mmu_v1_swap_out(struct hl_ctx *ctx)
8308c2ecf20Sopenharmony_ci{
8318c2ecf20Sopenharmony_ci
8328c2ecf20Sopenharmony_ci}
8338c2ecf20Sopenharmony_ci
8348c2ecf20Sopenharmony_ci/*
8358c2ecf20Sopenharmony_ci * hl_mmu_v1_swap_in - marks all mapping of the given ctx as swapped in
8368c2ecf20Sopenharmony_ci *
8378c2ecf20Sopenharmony_ci * @ctx: pointer to the context structure
8388c2ecf20Sopenharmony_ci *
8398c2ecf20Sopenharmony_ci */
8408c2ecf20Sopenharmony_cistatic void hl_mmu_v1_swap_in(struct hl_ctx *ctx)
8418c2ecf20Sopenharmony_ci{
8428c2ecf20Sopenharmony_ci
8438c2ecf20Sopenharmony_ci}
8448c2ecf20Sopenharmony_ci
8458c2ecf20Sopenharmony_ci/*
8468c2ecf20Sopenharmony_ci * hl_mmu_v1_prepare - prepare mmu  for working with mmu v1
8478c2ecf20Sopenharmony_ci *
8488c2ecf20Sopenharmony_ci * @hdev: pointer to the device structure
8498c2ecf20Sopenharmony_ci */
8508c2ecf20Sopenharmony_civoid hl_mmu_v1_set_funcs(struct hl_device *hdev)
8518c2ecf20Sopenharmony_ci{
8528c2ecf20Sopenharmony_ci	struct hl_mmu_funcs *mmu = &hdev->mmu_func;
8538c2ecf20Sopenharmony_ci
8548c2ecf20Sopenharmony_ci	mmu->init = hl_mmu_v1_init;
8558c2ecf20Sopenharmony_ci	mmu->fini = hl_mmu_v1_fini;
8568c2ecf20Sopenharmony_ci	mmu->ctx_init = hl_mmu_v1_ctx_init;
8578c2ecf20Sopenharmony_ci	mmu->ctx_fini = hl_mmu_v1_ctx_fini;
8588c2ecf20Sopenharmony_ci	mmu->map = _hl_mmu_v1_map;
8598c2ecf20Sopenharmony_ci	mmu->unmap = _hl_mmu_v1_unmap;
8608c2ecf20Sopenharmony_ci	mmu->flush = flush;
8618c2ecf20Sopenharmony_ci	mmu->swap_out = hl_mmu_v1_swap_out;
8628c2ecf20Sopenharmony_ci	mmu->swap_in = hl_mmu_v1_swap_in;
8638c2ecf20Sopenharmony_ci}
864