162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci
362306a36Sopenharmony_ci/*
462306a36Sopenharmony_ci * Copyright 2016-2019 HabanaLabs, Ltd.
562306a36Sopenharmony_ci * All Rights Reserved.
662306a36Sopenharmony_ci */
762306a36Sopenharmony_ci
862306a36Sopenharmony_ci#include "../habanalabs.h"
962306a36Sopenharmony_ci#include "../../include/hw_ip/mmu/mmu_general.h"
1062306a36Sopenharmony_ci
1162306a36Sopenharmony_ci#include <linux/slab.h>
1262306a36Sopenharmony_ci
1362306a36Sopenharmony_ci#define MMU_V1_MAX_HOPS	(MMU_HOP4 + 1)
1462306a36Sopenharmony_ci
1562306a36Sopenharmony_cistatic inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr);
1662306a36Sopenharmony_ci
1762306a36Sopenharmony_cistatic struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 hop_addr)
1862306a36Sopenharmony_ci{
1962306a36Sopenharmony_ci	struct pgt_info *pgt_info = NULL;
2062306a36Sopenharmony_ci
2162306a36Sopenharmony_ci	hash_for_each_possible(ctx->mmu_shadow_hash, pgt_info, node,
2262306a36Sopenharmony_ci				(unsigned long) hop_addr)
2362306a36Sopenharmony_ci		if (hop_addr == pgt_info->shadow_addr)
2462306a36Sopenharmony_ci			break;
2562306a36Sopenharmony_ci
2662306a36Sopenharmony_ci	return pgt_info;
2762306a36Sopenharmony_ci}
2862306a36Sopenharmony_ci
2962306a36Sopenharmony_cistatic void _free_hop(struct hl_ctx *ctx, struct pgt_info *pgt_info)
3062306a36Sopenharmony_ci{
3162306a36Sopenharmony_ci	struct hl_device *hdev = ctx->hdev;
3262306a36Sopenharmony_ci
3362306a36Sopenharmony_ci	gen_pool_free(hdev->mmu_priv.dr.mmu_pgt_pool, pgt_info->phys_addr,
3462306a36Sopenharmony_ci			hdev->asic_prop.mmu_hop_table_size);
3562306a36Sopenharmony_ci	hash_del(&pgt_info->node);
3662306a36Sopenharmony_ci	kfree((u64 *) (uintptr_t) pgt_info->shadow_addr);
3762306a36Sopenharmony_ci	kfree(pgt_info);
3862306a36Sopenharmony_ci}
3962306a36Sopenharmony_ci
4062306a36Sopenharmony_cistatic void free_hop(struct hl_ctx *ctx, u64 hop_addr)
4162306a36Sopenharmony_ci{
4262306a36Sopenharmony_ci	struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
4362306a36Sopenharmony_ci
4462306a36Sopenharmony_ci	_free_hop(ctx, pgt_info);
4562306a36Sopenharmony_ci}
4662306a36Sopenharmony_ci
4762306a36Sopenharmony_cistatic u64 alloc_hop(struct hl_ctx *ctx)
4862306a36Sopenharmony_ci{
4962306a36Sopenharmony_ci	struct hl_device *hdev = ctx->hdev;
5062306a36Sopenharmony_ci	struct asic_fixed_properties *prop = &hdev->asic_prop;
5162306a36Sopenharmony_ci	struct pgt_info *pgt_info;
5262306a36Sopenharmony_ci	u64 phys_addr, shadow_addr;
5362306a36Sopenharmony_ci
5462306a36Sopenharmony_ci	pgt_info = kmalloc(sizeof(*pgt_info), GFP_KERNEL);
5562306a36Sopenharmony_ci	if (!pgt_info)
5662306a36Sopenharmony_ci		return ULLONG_MAX;
5762306a36Sopenharmony_ci
5862306a36Sopenharmony_ci	phys_addr = (u64) gen_pool_alloc(hdev->mmu_priv.dr.mmu_pgt_pool,
5962306a36Sopenharmony_ci					prop->mmu_hop_table_size);
6062306a36Sopenharmony_ci	if (!phys_addr) {
6162306a36Sopenharmony_ci		dev_err(hdev->dev, "failed to allocate page\n");
6262306a36Sopenharmony_ci		goto pool_add_err;
6362306a36Sopenharmony_ci	}
6462306a36Sopenharmony_ci
6562306a36Sopenharmony_ci	shadow_addr = (u64) (uintptr_t) kzalloc(prop->mmu_hop_table_size,
6662306a36Sopenharmony_ci						GFP_KERNEL);
6762306a36Sopenharmony_ci	if (!shadow_addr)
6862306a36Sopenharmony_ci		goto shadow_err;
6962306a36Sopenharmony_ci
7062306a36Sopenharmony_ci	pgt_info->phys_addr = phys_addr;
7162306a36Sopenharmony_ci	pgt_info->shadow_addr = shadow_addr;
7262306a36Sopenharmony_ci	pgt_info->ctx = ctx;
7362306a36Sopenharmony_ci	pgt_info->num_of_ptes = 0;
7462306a36Sopenharmony_ci	hash_add(ctx->mmu_shadow_hash, &pgt_info->node, shadow_addr);
7562306a36Sopenharmony_ci
7662306a36Sopenharmony_ci	return shadow_addr;
7762306a36Sopenharmony_ci
7862306a36Sopenharmony_cishadow_err:
7962306a36Sopenharmony_ci	gen_pool_free(hdev->mmu_priv.dr.mmu_pgt_pool, phys_addr,
8062306a36Sopenharmony_ci			prop->mmu_hop_table_size);
8162306a36Sopenharmony_cipool_add_err:
8262306a36Sopenharmony_ci	kfree(pgt_info);
8362306a36Sopenharmony_ci
8462306a36Sopenharmony_ci	return ULLONG_MAX;
8562306a36Sopenharmony_ci}
8662306a36Sopenharmony_ci
8762306a36Sopenharmony_cistatic inline u64 get_phys_hop0_addr(struct hl_ctx *ctx)
8862306a36Sopenharmony_ci{
8962306a36Sopenharmony_ci	return ctx->hdev->asic_prop.mmu_pgt_addr +
9062306a36Sopenharmony_ci			(ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
9162306a36Sopenharmony_ci}
9262306a36Sopenharmony_ci
9362306a36Sopenharmony_cistatic inline u64 get_hop0_addr(struct hl_ctx *ctx)
9462306a36Sopenharmony_ci{
9562306a36Sopenharmony_ci	return (u64) (uintptr_t) ctx->hdev->mmu_priv.dr.mmu_shadow_hop0 +
9662306a36Sopenharmony_ci			(ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
9762306a36Sopenharmony_ci}
9862306a36Sopenharmony_ci
9962306a36Sopenharmony_cistatic void flush(struct hl_ctx *ctx)
10062306a36Sopenharmony_ci{
10162306a36Sopenharmony_ci	/* flush all writes from all cores to reach PCI */
10262306a36Sopenharmony_ci	mb();
10362306a36Sopenharmony_ci	ctx->hdev->asic_funcs->read_pte(ctx->hdev, get_phys_hop0_addr(ctx));
10462306a36Sopenharmony_ci}
10562306a36Sopenharmony_ci
10662306a36Sopenharmony_ci/* transform the value to physical address when writing to H/W */
10762306a36Sopenharmony_cistatic inline void write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val)
10862306a36Sopenharmony_ci{
10962306a36Sopenharmony_ci	/*
11062306a36Sopenharmony_ci	 * The value to write is actually the address of the next shadow hop +
11162306a36Sopenharmony_ci	 * flags at the 12 LSBs.
11262306a36Sopenharmony_ci	 * Hence in order to get the value to write to the physical PTE, we
11362306a36Sopenharmony_ci	 * clear the 12 LSBs and translate the shadow hop to its associated
11462306a36Sopenharmony_ci	 * physical hop, and add back the original 12 LSBs.
11562306a36Sopenharmony_ci	 */
11662306a36Sopenharmony_ci	u64 phys_val = get_phys_addr(ctx, val & HOP_PHYS_ADDR_MASK) |
11762306a36Sopenharmony_ci				(val & FLAGS_MASK);
11862306a36Sopenharmony_ci
11962306a36Sopenharmony_ci	ctx->hdev->asic_funcs->write_pte(ctx->hdev,
12062306a36Sopenharmony_ci					get_phys_addr(ctx, shadow_pte_addr),
12162306a36Sopenharmony_ci					phys_val);
12262306a36Sopenharmony_ci
12362306a36Sopenharmony_ci	*(u64 *) (uintptr_t) shadow_pte_addr = val;
12462306a36Sopenharmony_ci}
12562306a36Sopenharmony_ci
12662306a36Sopenharmony_ci/* do not transform the value to physical address when writing to H/W */
12762306a36Sopenharmony_cistatic inline void write_final_pte(struct hl_ctx *ctx, u64 shadow_pte_addr,
12862306a36Sopenharmony_ci					u64 val)
12962306a36Sopenharmony_ci{
13062306a36Sopenharmony_ci	ctx->hdev->asic_funcs->write_pte(ctx->hdev,
13162306a36Sopenharmony_ci					get_phys_addr(ctx, shadow_pte_addr),
13262306a36Sopenharmony_ci					val);
13362306a36Sopenharmony_ci	*(u64 *) (uintptr_t) shadow_pte_addr = val;
13462306a36Sopenharmony_ci}
13562306a36Sopenharmony_ci
13662306a36Sopenharmony_ci/* clear the last and present bits */
13762306a36Sopenharmony_cistatic inline void clear_pte(struct hl_ctx *ctx, u64 pte_addr)
13862306a36Sopenharmony_ci{
13962306a36Sopenharmony_ci	/* no need to transform the value to physical address */
14062306a36Sopenharmony_ci	write_final_pte(ctx, pte_addr, 0);
14162306a36Sopenharmony_ci}
14262306a36Sopenharmony_ci
14362306a36Sopenharmony_cistatic inline void get_pte(struct hl_ctx *ctx, u64 hop_addr)
14462306a36Sopenharmony_ci{
14562306a36Sopenharmony_ci	get_pgt_info(ctx, hop_addr)->num_of_ptes++;
14662306a36Sopenharmony_ci}
14762306a36Sopenharmony_ci
14862306a36Sopenharmony_ci/*
14962306a36Sopenharmony_ci * put_pte - decrement the num of ptes and free the hop if possible
15062306a36Sopenharmony_ci *
15162306a36Sopenharmony_ci * @ctx: pointer to the context structure
15262306a36Sopenharmony_ci * @hop_addr: addr of the hop
15362306a36Sopenharmony_ci *
15462306a36Sopenharmony_ci * This function returns the number of ptes left on this hop. If the number is
15562306a36Sopenharmony_ci * 0, it means the pte was freed.
15662306a36Sopenharmony_ci */
15762306a36Sopenharmony_cistatic inline int put_pte(struct hl_ctx *ctx, u64 hop_addr)
15862306a36Sopenharmony_ci{
15962306a36Sopenharmony_ci	struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
16062306a36Sopenharmony_ci	int num_of_ptes_left;
16162306a36Sopenharmony_ci
16262306a36Sopenharmony_ci	pgt_info->num_of_ptes--;
16362306a36Sopenharmony_ci
16462306a36Sopenharmony_ci	/*
16562306a36Sopenharmony_ci	 * Need to save the number of ptes left because free_hop might free
16662306a36Sopenharmony_ci	 * the pgt_info
16762306a36Sopenharmony_ci	 */
16862306a36Sopenharmony_ci	num_of_ptes_left = pgt_info->num_of_ptes;
16962306a36Sopenharmony_ci	if (!num_of_ptes_left)
17062306a36Sopenharmony_ci		_free_hop(ctx, pgt_info);
17162306a36Sopenharmony_ci
17262306a36Sopenharmony_ci	return num_of_ptes_left;
17362306a36Sopenharmony_ci}
17462306a36Sopenharmony_ci
17562306a36Sopenharmony_cistatic inline u64 get_hop_pte_addr(struct hl_ctx *ctx, struct hl_mmu_properties *mmu_prop,
17662306a36Sopenharmony_ci					u64 *hop_addr_arr, u64 virt_addr, enum mmu_hop_num hop_idx)
17762306a36Sopenharmony_ci{
17862306a36Sopenharmony_ci	u64 mask, shift;
17962306a36Sopenharmony_ci
18062306a36Sopenharmony_ci	mask = mmu_prop->hop_masks[hop_idx];
18162306a36Sopenharmony_ci	shift = mmu_prop->hop_shifts[hop_idx];
18262306a36Sopenharmony_ci	return hop_addr_arr[hop_idx] +
18362306a36Sopenharmony_ci			ctx->hdev->asic_prop.mmu_pte_size * ((virt_addr & mask) >> shift);
18462306a36Sopenharmony_ci}
18562306a36Sopenharmony_ci
18662306a36Sopenharmony_cistatic inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte,
18762306a36Sopenharmony_ci						bool *is_new_hop)
18862306a36Sopenharmony_ci{
18962306a36Sopenharmony_ci	u64 hop_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte);
19062306a36Sopenharmony_ci
19162306a36Sopenharmony_ci	if (hop_addr == ULLONG_MAX) {
19262306a36Sopenharmony_ci		hop_addr = alloc_hop(ctx);
19362306a36Sopenharmony_ci		*is_new_hop = (hop_addr != ULLONG_MAX);
19462306a36Sopenharmony_ci	}
19562306a36Sopenharmony_ci
19662306a36Sopenharmony_ci	return hop_addr;
19762306a36Sopenharmony_ci}
19862306a36Sopenharmony_ci
19962306a36Sopenharmony_ci/* translates shadow address inside hop to a physical address */
20062306a36Sopenharmony_cistatic inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr)
20162306a36Sopenharmony_ci{
20262306a36Sopenharmony_ci	u64 page_mask = (ctx->hdev->asic_prop.mmu_hop_table_size - 1);
20362306a36Sopenharmony_ci	u64 shadow_hop_addr = shadow_addr & ~page_mask;
20462306a36Sopenharmony_ci	u64 pte_offset = shadow_addr & page_mask;
20562306a36Sopenharmony_ci	u64 phys_hop_addr;
20662306a36Sopenharmony_ci
20762306a36Sopenharmony_ci	if (shadow_hop_addr != get_hop0_addr(ctx))
20862306a36Sopenharmony_ci		phys_hop_addr = get_pgt_info(ctx, shadow_hop_addr)->phys_addr;
20962306a36Sopenharmony_ci	else
21062306a36Sopenharmony_ci		phys_hop_addr = get_phys_hop0_addr(ctx);
21162306a36Sopenharmony_ci
21262306a36Sopenharmony_ci	return phys_hop_addr + pte_offset;
21362306a36Sopenharmony_ci}
21462306a36Sopenharmony_ci
21562306a36Sopenharmony_cistatic int dram_default_mapping_init(struct hl_ctx *ctx)
21662306a36Sopenharmony_ci{
21762306a36Sopenharmony_ci	struct hl_device *hdev = ctx->hdev;
21862306a36Sopenharmony_ci	struct asic_fixed_properties *prop = &hdev->asic_prop;
21962306a36Sopenharmony_ci	u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr,
22062306a36Sopenharmony_ci		hop2_pte_addr, hop3_pte_addr, pte_val;
22162306a36Sopenharmony_ci	int rc, i, j, hop3_allocated = 0;
22262306a36Sopenharmony_ci
22362306a36Sopenharmony_ci	if ((!prop->dram_supports_virtual_memory) ||
22462306a36Sopenharmony_ci			(!hdev->dram_default_page_mapping) ||
22562306a36Sopenharmony_ci			(ctx->asid == HL_KERNEL_ASID_ID))
22662306a36Sopenharmony_ci		return 0;
22762306a36Sopenharmony_ci
22862306a36Sopenharmony_ci	num_of_hop3 = prop->dram_size_for_default_page_mapping;
22962306a36Sopenharmony_ci	do_div(num_of_hop3, prop->dram_page_size);
23062306a36Sopenharmony_ci	do_div(num_of_hop3, HOP_PTE_ENTRIES_512);
23162306a36Sopenharmony_ci
23262306a36Sopenharmony_ci	/* add hop1 and hop2 */
23362306a36Sopenharmony_ci	total_hops = num_of_hop3 + 2;
23462306a36Sopenharmony_ci
23562306a36Sopenharmony_ci	ctx->dram_default_hops = kzalloc(HL_PTE_SIZE * total_hops,  GFP_KERNEL);
23662306a36Sopenharmony_ci	if (!ctx->dram_default_hops)
23762306a36Sopenharmony_ci		return -ENOMEM;
23862306a36Sopenharmony_ci
23962306a36Sopenharmony_ci	hop0_addr = get_hop0_addr(ctx);
24062306a36Sopenharmony_ci
24162306a36Sopenharmony_ci	hop1_addr = alloc_hop(ctx);
24262306a36Sopenharmony_ci	if (hop1_addr == ULLONG_MAX) {
24362306a36Sopenharmony_ci		dev_err(hdev->dev, "failed to alloc hop 1\n");
24462306a36Sopenharmony_ci		rc = -ENOMEM;
24562306a36Sopenharmony_ci		goto hop1_err;
24662306a36Sopenharmony_ci	}
24762306a36Sopenharmony_ci
24862306a36Sopenharmony_ci	ctx->dram_default_hops[total_hops - 1] = hop1_addr;
24962306a36Sopenharmony_ci
25062306a36Sopenharmony_ci	hop2_addr = alloc_hop(ctx);
25162306a36Sopenharmony_ci	if (hop2_addr == ULLONG_MAX) {
25262306a36Sopenharmony_ci		dev_err(hdev->dev, "failed to alloc hop 2\n");
25362306a36Sopenharmony_ci		rc = -ENOMEM;
25462306a36Sopenharmony_ci		goto hop2_err;
25562306a36Sopenharmony_ci	}
25662306a36Sopenharmony_ci
25762306a36Sopenharmony_ci	ctx->dram_default_hops[total_hops - 2] = hop2_addr;
25862306a36Sopenharmony_ci
25962306a36Sopenharmony_ci	for (i = 0 ; i < num_of_hop3 ; i++) {
26062306a36Sopenharmony_ci		ctx->dram_default_hops[i] = alloc_hop(ctx);
26162306a36Sopenharmony_ci		if (ctx->dram_default_hops[i] == ULLONG_MAX) {
26262306a36Sopenharmony_ci			dev_err(hdev->dev, "failed to alloc hop 3, i: %d\n", i);
26362306a36Sopenharmony_ci			rc = -ENOMEM;
26462306a36Sopenharmony_ci			goto hop3_err;
26562306a36Sopenharmony_ci		}
26662306a36Sopenharmony_ci		hop3_allocated++;
26762306a36Sopenharmony_ci	}
26862306a36Sopenharmony_ci
26962306a36Sopenharmony_ci	/* need only pte 0 in hops 0 and 1 */
27062306a36Sopenharmony_ci	pte_val = (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
27162306a36Sopenharmony_ci	write_pte(ctx, hop0_addr, pte_val);
27262306a36Sopenharmony_ci
27362306a36Sopenharmony_ci	pte_val = (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
27462306a36Sopenharmony_ci	write_pte(ctx, hop1_addr, pte_val);
27562306a36Sopenharmony_ci	get_pte(ctx, hop1_addr);
27662306a36Sopenharmony_ci
27762306a36Sopenharmony_ci	hop2_pte_addr = hop2_addr;
27862306a36Sopenharmony_ci	for (i = 0 ; i < num_of_hop3 ; i++) {
27962306a36Sopenharmony_ci		pte_val = (ctx->dram_default_hops[i] & HOP_PHYS_ADDR_MASK) |
28062306a36Sopenharmony_ci				PAGE_PRESENT_MASK;
28162306a36Sopenharmony_ci		write_pte(ctx, hop2_pte_addr, pte_val);
28262306a36Sopenharmony_ci		get_pte(ctx, hop2_addr);
28362306a36Sopenharmony_ci		hop2_pte_addr += HL_PTE_SIZE;
28462306a36Sopenharmony_ci	}
28562306a36Sopenharmony_ci
28662306a36Sopenharmony_ci	pte_val = (prop->mmu_dram_default_page_addr & HOP_PHYS_ADDR_MASK) |
28762306a36Sopenharmony_ci			LAST_MASK | PAGE_PRESENT_MASK;
28862306a36Sopenharmony_ci
28962306a36Sopenharmony_ci	for (i = 0 ; i < num_of_hop3 ; i++) {
29062306a36Sopenharmony_ci		hop3_pte_addr = ctx->dram_default_hops[i];
29162306a36Sopenharmony_ci		for (j = 0 ; j < HOP_PTE_ENTRIES_512 ; j++) {
29262306a36Sopenharmony_ci			write_final_pte(ctx, hop3_pte_addr, pte_val);
29362306a36Sopenharmony_ci			get_pte(ctx, ctx->dram_default_hops[i]);
29462306a36Sopenharmony_ci			hop3_pte_addr += HL_PTE_SIZE;
29562306a36Sopenharmony_ci		}
29662306a36Sopenharmony_ci	}
29762306a36Sopenharmony_ci
29862306a36Sopenharmony_ci	flush(ctx);
29962306a36Sopenharmony_ci
30062306a36Sopenharmony_ci	return 0;
30162306a36Sopenharmony_ci
30262306a36Sopenharmony_cihop3_err:
30362306a36Sopenharmony_ci	for (i = 0 ; i < hop3_allocated ; i++)
30462306a36Sopenharmony_ci		free_hop(ctx, ctx->dram_default_hops[i]);
30562306a36Sopenharmony_ci
30662306a36Sopenharmony_ci	free_hop(ctx, hop2_addr);
30762306a36Sopenharmony_cihop2_err:
30862306a36Sopenharmony_ci	free_hop(ctx, hop1_addr);
30962306a36Sopenharmony_cihop1_err:
31062306a36Sopenharmony_ci	kfree(ctx->dram_default_hops);
31162306a36Sopenharmony_ci
31262306a36Sopenharmony_ci	return rc;
31362306a36Sopenharmony_ci}
31462306a36Sopenharmony_ci
31562306a36Sopenharmony_cistatic void dram_default_mapping_fini(struct hl_ctx *ctx)
31662306a36Sopenharmony_ci{
31762306a36Sopenharmony_ci	struct hl_device *hdev = ctx->hdev;
31862306a36Sopenharmony_ci	struct asic_fixed_properties *prop = &hdev->asic_prop;
31962306a36Sopenharmony_ci	u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr,
32062306a36Sopenharmony_ci		hop2_pte_addr, hop3_pte_addr;
32162306a36Sopenharmony_ci	int i, j;
32262306a36Sopenharmony_ci
32362306a36Sopenharmony_ci	if ((!prop->dram_supports_virtual_memory) ||
32462306a36Sopenharmony_ci			(!hdev->dram_default_page_mapping) ||
32562306a36Sopenharmony_ci			(ctx->asid == HL_KERNEL_ASID_ID))
32662306a36Sopenharmony_ci		return;
32762306a36Sopenharmony_ci
32862306a36Sopenharmony_ci	num_of_hop3 = prop->dram_size_for_default_page_mapping;
32962306a36Sopenharmony_ci	do_div(num_of_hop3, prop->dram_page_size);
33062306a36Sopenharmony_ci	do_div(num_of_hop3, HOP_PTE_ENTRIES_512);
33162306a36Sopenharmony_ci
33262306a36Sopenharmony_ci	hop0_addr = get_hop0_addr(ctx);
33362306a36Sopenharmony_ci	/* add hop1 and hop2 */
33462306a36Sopenharmony_ci	total_hops = num_of_hop3 + 2;
33562306a36Sopenharmony_ci	hop1_addr = ctx->dram_default_hops[total_hops - 1];
33662306a36Sopenharmony_ci	hop2_addr = ctx->dram_default_hops[total_hops - 2];
33762306a36Sopenharmony_ci
33862306a36Sopenharmony_ci	for (i = 0 ; i < num_of_hop3 ; i++) {
33962306a36Sopenharmony_ci		hop3_pte_addr = ctx->dram_default_hops[i];
34062306a36Sopenharmony_ci		for (j = 0 ; j < HOP_PTE_ENTRIES_512 ; j++) {
34162306a36Sopenharmony_ci			clear_pte(ctx, hop3_pte_addr);
34262306a36Sopenharmony_ci			put_pte(ctx, ctx->dram_default_hops[i]);
34362306a36Sopenharmony_ci			hop3_pte_addr += HL_PTE_SIZE;
34462306a36Sopenharmony_ci		}
34562306a36Sopenharmony_ci	}
34662306a36Sopenharmony_ci
34762306a36Sopenharmony_ci	hop2_pte_addr = hop2_addr;
34862306a36Sopenharmony_ci	for (i = 0 ; i < num_of_hop3 ; i++) {
34962306a36Sopenharmony_ci		clear_pte(ctx, hop2_pte_addr);
35062306a36Sopenharmony_ci		put_pte(ctx, hop2_addr);
35162306a36Sopenharmony_ci		hop2_pte_addr += HL_PTE_SIZE;
35262306a36Sopenharmony_ci	}
35362306a36Sopenharmony_ci
35462306a36Sopenharmony_ci	clear_pte(ctx, hop1_addr);
35562306a36Sopenharmony_ci	put_pte(ctx, hop1_addr);
35662306a36Sopenharmony_ci	clear_pte(ctx, hop0_addr);
35762306a36Sopenharmony_ci
35862306a36Sopenharmony_ci	kfree(ctx->dram_default_hops);
35962306a36Sopenharmony_ci
36062306a36Sopenharmony_ci	flush(ctx);
36162306a36Sopenharmony_ci}
36262306a36Sopenharmony_ci
36362306a36Sopenharmony_ci/**
36462306a36Sopenharmony_ci * hl_mmu_v1_init() - initialize the MMU module.
36562306a36Sopenharmony_ci * @hdev: habanalabs device structure.
36662306a36Sopenharmony_ci *
36762306a36Sopenharmony_ci * This function does the following:
36862306a36Sopenharmony_ci * - Create a pool of pages for pgt_infos.
36962306a36Sopenharmony_ci * - Create a shadow table for pgt
37062306a36Sopenharmony_ci *
37162306a36Sopenharmony_ci * Return: 0 for success, non-zero for failure.
37262306a36Sopenharmony_ci */
37362306a36Sopenharmony_cistatic int hl_mmu_v1_init(struct hl_device *hdev)
37462306a36Sopenharmony_ci{
37562306a36Sopenharmony_ci	struct asic_fixed_properties *prop = &hdev->asic_prop;
37662306a36Sopenharmony_ci	int rc;
37762306a36Sopenharmony_ci
37862306a36Sopenharmony_ci	hdev->mmu_priv.dr.mmu_pgt_pool =
37962306a36Sopenharmony_ci			gen_pool_create(__ffs(prop->mmu_hop_table_size), -1);
38062306a36Sopenharmony_ci
38162306a36Sopenharmony_ci	if (!hdev->mmu_priv.dr.mmu_pgt_pool) {
38262306a36Sopenharmony_ci		dev_err(hdev->dev, "Failed to create page gen pool\n");
38362306a36Sopenharmony_ci		return -ENOMEM;
38462306a36Sopenharmony_ci	}
38562306a36Sopenharmony_ci
38662306a36Sopenharmony_ci	rc = gen_pool_add(hdev->mmu_priv.dr.mmu_pgt_pool, prop->mmu_pgt_addr +
38762306a36Sopenharmony_ci			prop->mmu_hop0_tables_total_size,
38862306a36Sopenharmony_ci			prop->mmu_pgt_size - prop->mmu_hop0_tables_total_size,
38962306a36Sopenharmony_ci			-1);
39062306a36Sopenharmony_ci	if (rc) {
39162306a36Sopenharmony_ci		dev_err(hdev->dev, "Failed to add memory to page gen pool\n");
39262306a36Sopenharmony_ci		goto err_pool_add;
39362306a36Sopenharmony_ci	}
39462306a36Sopenharmony_ci
39562306a36Sopenharmony_ci	hdev->mmu_priv.dr.mmu_shadow_hop0 = kvcalloc(prop->max_asid, prop->mmu_hop_table_size,
39662306a36Sopenharmony_ci										GFP_KERNEL);
39762306a36Sopenharmony_ci	if (ZERO_OR_NULL_PTR(hdev->mmu_priv.dr.mmu_shadow_hop0)) {
39862306a36Sopenharmony_ci		rc = -ENOMEM;
39962306a36Sopenharmony_ci		goto err_pool_add;
40062306a36Sopenharmony_ci	}
40162306a36Sopenharmony_ci
40262306a36Sopenharmony_ci	/* MMU H/W init will be done in device hw_init() */
40362306a36Sopenharmony_ci
40462306a36Sopenharmony_ci	return 0;
40562306a36Sopenharmony_ci
40662306a36Sopenharmony_cierr_pool_add:
40762306a36Sopenharmony_ci	gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool);
40862306a36Sopenharmony_ci
40962306a36Sopenharmony_ci	return rc;
41062306a36Sopenharmony_ci}
41162306a36Sopenharmony_ci
41262306a36Sopenharmony_ci/**
41362306a36Sopenharmony_ci * hl_mmu_v1_fini() - release the MMU module.
41462306a36Sopenharmony_ci * @hdev: habanalabs device structure.
41562306a36Sopenharmony_ci *
41662306a36Sopenharmony_ci * This function does the following:
41762306a36Sopenharmony_ci * - Disable MMU in H/W.
41862306a36Sopenharmony_ci * - Free the pgt_infos pool.
41962306a36Sopenharmony_ci *
42062306a36Sopenharmony_ci * All contexts should be freed before calling this function.
42162306a36Sopenharmony_ci */
42262306a36Sopenharmony_cistatic void hl_mmu_v1_fini(struct hl_device *hdev)
42362306a36Sopenharmony_ci{
42462306a36Sopenharmony_ci	/* MMU H/W fini was already done in device hw_fini() */
42562306a36Sopenharmony_ci
42662306a36Sopenharmony_ci	if (!ZERO_OR_NULL_PTR(hdev->mmu_priv.dr.mmu_shadow_hop0)) {
42762306a36Sopenharmony_ci		kvfree(hdev->mmu_priv.dr.mmu_shadow_hop0);
42862306a36Sopenharmony_ci		gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool);
42962306a36Sopenharmony_ci
43062306a36Sopenharmony_ci		/* Make sure that if we arrive here again without init was
43162306a36Sopenharmony_ci		 * called we won't cause kernel panic. This can happen for
43262306a36Sopenharmony_ci		 * example if we fail during hard reset code at certain points
43362306a36Sopenharmony_ci		 */
43462306a36Sopenharmony_ci		hdev->mmu_priv.dr.mmu_shadow_hop0 = NULL;
43562306a36Sopenharmony_ci	}
43662306a36Sopenharmony_ci}
43762306a36Sopenharmony_ci
43862306a36Sopenharmony_ci/**
43962306a36Sopenharmony_ci * hl_mmu_v1_ctx_init() - initialize a context for using the MMU module.
44062306a36Sopenharmony_ci * @ctx: pointer to the context structure to initialize.
44162306a36Sopenharmony_ci *
44262306a36Sopenharmony_ci * Initialize a mutex to protect the concurrent mapping flow, a hash to hold all
44362306a36Sopenharmony_ci * page tables hops related to this context.
44462306a36Sopenharmony_ci * Return: 0 on success, non-zero otherwise.
44562306a36Sopenharmony_ci */
44662306a36Sopenharmony_cistatic int hl_mmu_v1_ctx_init(struct hl_ctx *ctx)
44762306a36Sopenharmony_ci{
44862306a36Sopenharmony_ci	hash_init(ctx->mmu_shadow_hash);
44962306a36Sopenharmony_ci	return dram_default_mapping_init(ctx);
45062306a36Sopenharmony_ci}
45162306a36Sopenharmony_ci
45262306a36Sopenharmony_ci/*
45362306a36Sopenharmony_ci * hl_mmu_ctx_fini - disable a ctx from using the mmu module
45462306a36Sopenharmony_ci *
45562306a36Sopenharmony_ci * @ctx: pointer to the context structure
45662306a36Sopenharmony_ci *
45762306a36Sopenharmony_ci * This function does the following:
45862306a36Sopenharmony_ci * - Free any pgts which were not freed yet
45962306a36Sopenharmony_ci * - Free the mutex
46062306a36Sopenharmony_ci * - Free DRAM default page mapping hops
46162306a36Sopenharmony_ci */
46262306a36Sopenharmony_cistatic void hl_mmu_v1_ctx_fini(struct hl_ctx *ctx)
46362306a36Sopenharmony_ci{
46462306a36Sopenharmony_ci	struct hl_device *hdev = ctx->hdev;
46562306a36Sopenharmony_ci	struct pgt_info *pgt_info;
46662306a36Sopenharmony_ci	struct hlist_node *tmp;
46762306a36Sopenharmony_ci	int i;
46862306a36Sopenharmony_ci
46962306a36Sopenharmony_ci	dram_default_mapping_fini(ctx);
47062306a36Sopenharmony_ci
47162306a36Sopenharmony_ci	if (!hash_empty(ctx->mmu_shadow_hash))
47262306a36Sopenharmony_ci		dev_err(hdev->dev, "ctx %d is freed while it has pgts in use\n",
47362306a36Sopenharmony_ci			ctx->asid);
47462306a36Sopenharmony_ci
47562306a36Sopenharmony_ci	hash_for_each_safe(ctx->mmu_shadow_hash, i, tmp, pgt_info, node) {
47662306a36Sopenharmony_ci		dev_err_ratelimited(hdev->dev,
47762306a36Sopenharmony_ci			"pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n",
47862306a36Sopenharmony_ci			pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes);
47962306a36Sopenharmony_ci		_free_hop(ctx, pgt_info);
48062306a36Sopenharmony_ci	}
48162306a36Sopenharmony_ci}
48262306a36Sopenharmony_ci
48362306a36Sopenharmony_cistatic int hl_mmu_v1_unmap(struct hl_ctx *ctx,
48462306a36Sopenharmony_ci				u64 virt_addr, bool is_dram_addr)
48562306a36Sopenharmony_ci{
48662306a36Sopenharmony_ci	u64 hop_addr[MMU_V1_MAX_HOPS] = {0}, hop_pte_addr[MMU_V1_MAX_HOPS] = {0}, curr_pte = 0;
48762306a36Sopenharmony_ci	struct hl_device *hdev = ctx->hdev;
48862306a36Sopenharmony_ci	struct asic_fixed_properties *prop = &hdev->asic_prop;
48962306a36Sopenharmony_ci	struct hl_mmu_properties *mmu_prop;
49062306a36Sopenharmony_ci	bool is_huge, clear_hop3 = true;
49162306a36Sopenharmony_ci	int hop_idx;
49262306a36Sopenharmony_ci
49362306a36Sopenharmony_ci	/* shifts and masks are the same in PMMU and HPMMU, use one of them */
49462306a36Sopenharmony_ci	mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
49562306a36Sopenharmony_ci
49662306a36Sopenharmony_ci	for (hop_idx = MMU_HOP0; hop_idx < MMU_HOP4; hop_idx++) {
49762306a36Sopenharmony_ci		if (hop_idx == MMU_HOP0) {
49862306a36Sopenharmony_ci			hop_addr[hop_idx] = get_hop0_addr(ctx);
49962306a36Sopenharmony_ci		} else {
50062306a36Sopenharmony_ci			hop_addr[hop_idx] = hl_mmu_get_next_hop_addr(ctx, curr_pte);
50162306a36Sopenharmony_ci			if (hop_addr[hop_idx] == ULLONG_MAX)
50262306a36Sopenharmony_ci				goto not_mapped;
50362306a36Sopenharmony_ci		}
50462306a36Sopenharmony_ci
50562306a36Sopenharmony_ci		hop_pte_addr[hop_idx] =
50662306a36Sopenharmony_ci				get_hop_pte_addr(ctx, mmu_prop, hop_addr, virt_addr, hop_idx);
50762306a36Sopenharmony_ci
50862306a36Sopenharmony_ci		curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[hop_idx];
50962306a36Sopenharmony_ci	}
51062306a36Sopenharmony_ci
51162306a36Sopenharmony_ci	is_huge = curr_pte & mmu_prop->last_mask;
51262306a36Sopenharmony_ci
51362306a36Sopenharmony_ci	if (is_dram_addr && !is_huge) {
51462306a36Sopenharmony_ci		dev_err(hdev->dev, "DRAM unmapping should use huge pages only\n");
51562306a36Sopenharmony_ci		return -EFAULT;
51662306a36Sopenharmony_ci	}
51762306a36Sopenharmony_ci
51862306a36Sopenharmony_ci	if (!is_huge) {
51962306a36Sopenharmony_ci		hop_idx = MMU_HOP4;
52062306a36Sopenharmony_ci		hop_addr[hop_idx] = hl_mmu_get_next_hop_addr(ctx, curr_pte);
52162306a36Sopenharmony_ci		if (hop_addr[hop_idx] == ULLONG_MAX)
52262306a36Sopenharmony_ci			goto not_mapped;
52362306a36Sopenharmony_ci
52462306a36Sopenharmony_ci		hop_pte_addr[hop_idx] =
52562306a36Sopenharmony_ci				get_hop_pte_addr(ctx, mmu_prop, hop_addr, virt_addr, hop_idx);
52662306a36Sopenharmony_ci		curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[hop_idx];
52762306a36Sopenharmony_ci		clear_hop3 = false;
52862306a36Sopenharmony_ci	}
52962306a36Sopenharmony_ci
53062306a36Sopenharmony_ci	if (hdev->dram_default_page_mapping && is_dram_addr) {
53162306a36Sopenharmony_ci		u64 default_pte = (prop->mmu_dram_default_page_addr &
53262306a36Sopenharmony_ci				HOP_PHYS_ADDR_MASK) | mmu_prop->last_mask |
53362306a36Sopenharmony_ci					PAGE_PRESENT_MASK;
53462306a36Sopenharmony_ci		if (curr_pte == default_pte) {
53562306a36Sopenharmony_ci			dev_err(hdev->dev,
53662306a36Sopenharmony_ci				"DRAM: hop3 PTE points to zero page, can't unmap, va: 0x%llx\n",
53762306a36Sopenharmony_ci					virt_addr);
53862306a36Sopenharmony_ci			goto not_mapped;
53962306a36Sopenharmony_ci		}
54062306a36Sopenharmony_ci
54162306a36Sopenharmony_ci		if (!(curr_pte & PAGE_PRESENT_MASK)) {
54262306a36Sopenharmony_ci			dev_err(hdev->dev,
54362306a36Sopenharmony_ci				"DRAM: hop3 PTE is cleared! can't unmap, va: 0x%llx\n",
54462306a36Sopenharmony_ci					virt_addr);
54562306a36Sopenharmony_ci			goto not_mapped;
54662306a36Sopenharmony_ci		}
54762306a36Sopenharmony_ci
54862306a36Sopenharmony_ci		hop_idx = MMU_HOP3;
54962306a36Sopenharmony_ci		write_final_pte(ctx, hop_pte_addr[hop_idx], default_pte);
55062306a36Sopenharmony_ci		put_pte(ctx, hop_addr[hop_idx]);
55162306a36Sopenharmony_ci	} else {
55262306a36Sopenharmony_ci		if (!(curr_pte & PAGE_PRESENT_MASK))
55362306a36Sopenharmony_ci			goto not_mapped;
55462306a36Sopenharmony_ci
55562306a36Sopenharmony_ci		if (hop_addr[MMU_HOP4])
55662306a36Sopenharmony_ci			clear_pte(ctx, hop_pte_addr[MMU_HOP4]);
55762306a36Sopenharmony_ci		else
55862306a36Sopenharmony_ci			clear_pte(ctx, hop_pte_addr[MMU_HOP3]);
55962306a36Sopenharmony_ci
56062306a36Sopenharmony_ci		if (hop_addr[MMU_HOP4] && !put_pte(ctx, hop_addr[MMU_HOP4]))
56162306a36Sopenharmony_ci			clear_hop3 = true;
56262306a36Sopenharmony_ci
56362306a36Sopenharmony_ci		if (!clear_hop3)
56462306a36Sopenharmony_ci			goto mapped;
56562306a36Sopenharmony_ci
56662306a36Sopenharmony_ci		for (hop_idx = MMU_HOP3; hop_idx >= 0; hop_idx--) {
56762306a36Sopenharmony_ci			clear_pte(ctx, hop_pte_addr[hop_idx]);
56862306a36Sopenharmony_ci
56962306a36Sopenharmony_ci			if (hop_idx == MMU_HOP0)
57062306a36Sopenharmony_ci				break;
57162306a36Sopenharmony_ci
57262306a36Sopenharmony_ci			if (put_pte(ctx, hop_addr[hop_idx]))
57362306a36Sopenharmony_ci				goto mapped;
57462306a36Sopenharmony_ci		}
57562306a36Sopenharmony_ci	}
57662306a36Sopenharmony_ci
57762306a36Sopenharmony_cimapped:
57862306a36Sopenharmony_ci	return 0;
57962306a36Sopenharmony_ci
58062306a36Sopenharmony_cinot_mapped:
58162306a36Sopenharmony_ci	dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
58262306a36Sopenharmony_ci		virt_addr);
58362306a36Sopenharmony_ci
58462306a36Sopenharmony_ci	return -EINVAL;
58562306a36Sopenharmony_ci}
58662306a36Sopenharmony_ci
58762306a36Sopenharmony_cistatic int hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
58862306a36Sopenharmony_ci			u32 page_size, bool is_dram_addr)
58962306a36Sopenharmony_ci{
59062306a36Sopenharmony_ci	u64 hop_addr[MMU_V1_MAX_HOPS] = {0}, hop_pte_addr[MMU_V1_MAX_HOPS] = {0}, curr_pte = 0;
59162306a36Sopenharmony_ci	struct hl_device *hdev = ctx->hdev;
59262306a36Sopenharmony_ci	struct asic_fixed_properties *prop = &hdev->asic_prop;
59362306a36Sopenharmony_ci	struct hl_mmu_properties *mmu_prop;
59462306a36Sopenharmony_ci	bool is_huge, hop_new[MMU_V1_MAX_HOPS] = {false};
59562306a36Sopenharmony_ci	int num_hops, hop_idx, prev_hop, rc = -ENOMEM;
59662306a36Sopenharmony_ci
59762306a36Sopenharmony_ci	/*
59862306a36Sopenharmony_ci	 * This mapping function can map a page or a huge page. For huge page
59962306a36Sopenharmony_ci	 * there are only 3 hops rather than 4. Currently the DRAM allocation
60062306a36Sopenharmony_ci	 * uses huge pages only but user memory could have been allocated with
60162306a36Sopenharmony_ci	 * one of the two page sizes. Since this is a common code for all the
60262306a36Sopenharmony_ci	 * three cases, we need this hugs page check.
60362306a36Sopenharmony_ci	 */
60462306a36Sopenharmony_ci	if (is_dram_addr) {
60562306a36Sopenharmony_ci		mmu_prop = &prop->dmmu;
60662306a36Sopenharmony_ci		is_huge = true;
60762306a36Sopenharmony_ci	} else if (page_size == prop->pmmu_huge.page_size) {
60862306a36Sopenharmony_ci		mmu_prop = &prop->pmmu_huge;
60962306a36Sopenharmony_ci		is_huge = true;
61062306a36Sopenharmony_ci	} else {
61162306a36Sopenharmony_ci		mmu_prop = &prop->pmmu;
61262306a36Sopenharmony_ci		is_huge = false;
61362306a36Sopenharmony_ci	}
61462306a36Sopenharmony_ci
61562306a36Sopenharmony_ci	num_hops = is_huge ? (MMU_V1_MAX_HOPS - 1) : MMU_V1_MAX_HOPS;
61662306a36Sopenharmony_ci
61762306a36Sopenharmony_ci	for (hop_idx = MMU_HOP0; hop_idx < num_hops; hop_idx++) {
61862306a36Sopenharmony_ci		if (hop_idx == MMU_HOP0) {
61962306a36Sopenharmony_ci			hop_addr[hop_idx] = get_hop0_addr(ctx);
62062306a36Sopenharmony_ci		} else {
62162306a36Sopenharmony_ci			hop_addr[hop_idx] =
62262306a36Sopenharmony_ci					get_alloc_next_hop_addr(ctx, curr_pte, &hop_new[hop_idx]);
62362306a36Sopenharmony_ci			if (hop_addr[hop_idx] == ULLONG_MAX)
62462306a36Sopenharmony_ci				goto err;
62562306a36Sopenharmony_ci		}
62662306a36Sopenharmony_ci
62762306a36Sopenharmony_ci		hop_pte_addr[hop_idx] =
62862306a36Sopenharmony_ci				get_hop_pte_addr(ctx, mmu_prop, hop_addr, virt_addr, hop_idx);
62962306a36Sopenharmony_ci		curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[hop_idx];
63062306a36Sopenharmony_ci	}
63162306a36Sopenharmony_ci
63262306a36Sopenharmony_ci	if (hdev->dram_default_page_mapping && is_dram_addr) {
63362306a36Sopenharmony_ci		u64 default_pte = (prop->mmu_dram_default_page_addr &
63462306a36Sopenharmony_ci					HOP_PHYS_ADDR_MASK) | mmu_prop->last_mask |
63562306a36Sopenharmony_ci						PAGE_PRESENT_MASK;
63662306a36Sopenharmony_ci
63762306a36Sopenharmony_ci		if (curr_pte != default_pte) {
63862306a36Sopenharmony_ci			dev_err(hdev->dev,
63962306a36Sopenharmony_ci				"DRAM: mapping already exists for virt_addr 0x%llx\n",
64062306a36Sopenharmony_ci					virt_addr);
64162306a36Sopenharmony_ci			rc = -EINVAL;
64262306a36Sopenharmony_ci			goto err;
64362306a36Sopenharmony_ci		}
64462306a36Sopenharmony_ci
64562306a36Sopenharmony_ci		for (hop_idx = MMU_HOP1; hop_idx < num_hops; hop_idx++) {
64662306a36Sopenharmony_ci			if (hop_new[hop_idx]) {
64762306a36Sopenharmony_ci				dev_err(hdev->dev, "DRAM mapping should not allocate more hops\n");
64862306a36Sopenharmony_ci				rc = -EFAULT;
64962306a36Sopenharmony_ci				goto err;
65062306a36Sopenharmony_ci			}
65162306a36Sopenharmony_ci		}
65262306a36Sopenharmony_ci	} else if (curr_pte & PAGE_PRESENT_MASK) {
65362306a36Sopenharmony_ci		dev_err(hdev->dev,
65462306a36Sopenharmony_ci			"mapping already exists for virt_addr 0x%llx\n",
65562306a36Sopenharmony_ci				virt_addr);
65662306a36Sopenharmony_ci
65762306a36Sopenharmony_ci		for (hop_idx = MMU_HOP0; hop_idx < num_hops; hop_idx++)
65862306a36Sopenharmony_ci			dev_dbg(hdev->dev, "hop%d pte: 0x%llx (0x%llx)\n", hop_idx,
65962306a36Sopenharmony_ci					*(u64 *) (uintptr_t) hop_pte_addr[hop_idx],
66062306a36Sopenharmony_ci					hop_pte_addr[hop_idx]);
66162306a36Sopenharmony_ci
66262306a36Sopenharmony_ci		rc = -EINVAL;
66362306a36Sopenharmony_ci		goto err;
66462306a36Sopenharmony_ci	}
66562306a36Sopenharmony_ci
66662306a36Sopenharmony_ci	curr_pte = (phys_addr & HOP_PHYS_ADDR_MASK) | mmu_prop->last_mask
66762306a36Sopenharmony_ci			| PAGE_PRESENT_MASK;
66862306a36Sopenharmony_ci
66962306a36Sopenharmony_ci	write_final_pte(ctx, hop_pte_addr[num_hops - 1], curr_pte);
67062306a36Sopenharmony_ci
67162306a36Sopenharmony_ci	for (hop_idx = MMU_HOP1; hop_idx < num_hops; hop_idx++) {
67262306a36Sopenharmony_ci		prev_hop = hop_idx - 1;
67362306a36Sopenharmony_ci
67462306a36Sopenharmony_ci		if (hop_new[hop_idx]) {
67562306a36Sopenharmony_ci			curr_pte = (hop_addr[hop_idx] & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
67662306a36Sopenharmony_ci			write_pte(ctx, hop_pte_addr[prev_hop], curr_pte);
67762306a36Sopenharmony_ci			if (hop_idx != MMU_HOP1)
67862306a36Sopenharmony_ci				get_pte(ctx, hop_addr[prev_hop]);
67962306a36Sopenharmony_ci		}
68062306a36Sopenharmony_ci	}
68162306a36Sopenharmony_ci
68262306a36Sopenharmony_ci	get_pte(ctx, hop_addr[num_hops - 1]);
68362306a36Sopenharmony_ci
68462306a36Sopenharmony_ci	return 0;
68562306a36Sopenharmony_ci
68662306a36Sopenharmony_cierr:
68762306a36Sopenharmony_ci	for (hop_idx = num_hops; hop_idx > MMU_HOP0; hop_idx--) {
68862306a36Sopenharmony_ci		if (hop_new[hop_idx])
68962306a36Sopenharmony_ci			free_hop(ctx, hop_addr[hop_idx]);
69062306a36Sopenharmony_ci	}
69162306a36Sopenharmony_ci
69262306a36Sopenharmony_ci	return rc;
69362306a36Sopenharmony_ci}
69462306a36Sopenharmony_ci
69562306a36Sopenharmony_ci/*
69662306a36Sopenharmony_ci * hl_mmu_v1_swap_out - marks all mapping of the given ctx as swapped out
69762306a36Sopenharmony_ci *
69862306a36Sopenharmony_ci * @ctx: pointer to the context structure
69962306a36Sopenharmony_ci *
70062306a36Sopenharmony_ci */
70162306a36Sopenharmony_cistatic void hl_mmu_v1_swap_out(struct hl_ctx *ctx)
70262306a36Sopenharmony_ci{
70362306a36Sopenharmony_ci
70462306a36Sopenharmony_ci}
70562306a36Sopenharmony_ci
70662306a36Sopenharmony_ci/*
70762306a36Sopenharmony_ci * hl_mmu_v1_swap_in - marks all mapping of the given ctx as swapped in
70862306a36Sopenharmony_ci *
70962306a36Sopenharmony_ci * @ctx: pointer to the context structure
71062306a36Sopenharmony_ci *
71162306a36Sopenharmony_ci */
71262306a36Sopenharmony_cistatic void hl_mmu_v1_swap_in(struct hl_ctx *ctx)
71362306a36Sopenharmony_ci{
71462306a36Sopenharmony_ci
71562306a36Sopenharmony_ci}
71662306a36Sopenharmony_ci
71762306a36Sopenharmony_cistatic int hl_mmu_v1_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
71862306a36Sopenharmony_ci				struct hl_mmu_hop_info *hops)
71962306a36Sopenharmony_ci{
72062306a36Sopenharmony_ci	struct hl_device *hdev = ctx->hdev;
72162306a36Sopenharmony_ci	struct asic_fixed_properties *prop = &hdev->asic_prop;
72262306a36Sopenharmony_ci	struct hl_mmu_properties *mmu_prop;
72362306a36Sopenharmony_ci	bool is_dram_addr, is_pmmu_addr, is_pmmu_h_addr, is_huge;
72462306a36Sopenharmony_ci	int i, used_hops;
72562306a36Sopenharmony_ci
72662306a36Sopenharmony_ci	is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
72762306a36Sopenharmony_ci						prop->dmmu.start_addr,
72862306a36Sopenharmony_ci						prop->dmmu.end_addr);
72962306a36Sopenharmony_ci	is_pmmu_addr = hl_mem_area_inside_range(virt_addr, prop->pmmu.page_size,
73062306a36Sopenharmony_ci						prop->pmmu.start_addr,
73162306a36Sopenharmony_ci						prop->pmmu.end_addr);
73262306a36Sopenharmony_ci	is_pmmu_h_addr = hl_mem_area_inside_range(virt_addr,
73362306a36Sopenharmony_ci						prop->pmmu_huge.page_size,
73462306a36Sopenharmony_ci						prop->pmmu_huge.start_addr,
73562306a36Sopenharmony_ci						prop->pmmu_huge.end_addr);
73662306a36Sopenharmony_ci	if (is_dram_addr) {
73762306a36Sopenharmony_ci		mmu_prop = &prop->dmmu;
73862306a36Sopenharmony_ci		is_huge = true;
73962306a36Sopenharmony_ci	} else if (is_pmmu_addr) {
74062306a36Sopenharmony_ci		mmu_prop = &prop->pmmu;
74162306a36Sopenharmony_ci		is_huge = false;
74262306a36Sopenharmony_ci	} else if (is_pmmu_h_addr) {
74362306a36Sopenharmony_ci		mmu_prop = &prop->pmmu_huge;
74462306a36Sopenharmony_ci		is_huge = true;
74562306a36Sopenharmony_ci	} else {
74662306a36Sopenharmony_ci		return -EINVAL;
74762306a36Sopenharmony_ci	}
74862306a36Sopenharmony_ci
74962306a36Sopenharmony_ci	used_hops = mmu_prop->num_hops;
75062306a36Sopenharmony_ci
75162306a36Sopenharmony_ci	/* huge pages use lesser hops */
75262306a36Sopenharmony_ci	if (is_huge)
75362306a36Sopenharmony_ci		used_hops--;
75462306a36Sopenharmony_ci
75562306a36Sopenharmony_ci	hops->hop_info[0].hop_addr = get_phys_hop0_addr(ctx);
75662306a36Sopenharmony_ci	hops->hop_info[0].hop_pte_addr =
75762306a36Sopenharmony_ci			hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, 0,
75862306a36Sopenharmony_ci					hops->hop_info[0].hop_addr, virt_addr);
75962306a36Sopenharmony_ci	hops->hop_info[0].hop_pte_val =
76062306a36Sopenharmony_ci			hdev->asic_funcs->read_pte(hdev,
76162306a36Sopenharmony_ci						hops->hop_info[0].hop_pte_addr);
76262306a36Sopenharmony_ci
76362306a36Sopenharmony_ci	for (i = 1 ; i < used_hops ; i++) {
76462306a36Sopenharmony_ci		hops->hop_info[i].hop_addr =
76562306a36Sopenharmony_ci			hl_mmu_get_next_hop_addr(ctx,
76662306a36Sopenharmony_ci					hops->hop_info[i - 1].hop_pte_val);
76762306a36Sopenharmony_ci		if (hops->hop_info[i].hop_addr == ULLONG_MAX)
76862306a36Sopenharmony_ci			return -EFAULT;
76962306a36Sopenharmony_ci
77062306a36Sopenharmony_ci		hops->hop_info[i].hop_pte_addr =
77162306a36Sopenharmony_ci				hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, i,
77262306a36Sopenharmony_ci						hops->hop_info[i].hop_addr,
77362306a36Sopenharmony_ci						virt_addr);
77462306a36Sopenharmony_ci		hops->hop_info[i].hop_pte_val =
77562306a36Sopenharmony_ci				hdev->asic_funcs->read_pte(hdev,
77662306a36Sopenharmony_ci						hops->hop_info[i].hop_pte_addr);
77762306a36Sopenharmony_ci
77862306a36Sopenharmony_ci		if (!(hops->hop_info[i].hop_pte_val & PAGE_PRESENT_MASK))
77962306a36Sopenharmony_ci			return -EFAULT;
78062306a36Sopenharmony_ci
78162306a36Sopenharmony_ci		if (hops->hop_info[i].hop_pte_val & mmu_prop->last_mask)
78262306a36Sopenharmony_ci			break;
78362306a36Sopenharmony_ci	}
78462306a36Sopenharmony_ci
78562306a36Sopenharmony_ci	/* if passed over all hops then no last hop was found */
78662306a36Sopenharmony_ci	if (i == mmu_prop->num_hops)
78762306a36Sopenharmony_ci		return -EFAULT;
78862306a36Sopenharmony_ci
78962306a36Sopenharmony_ci	if (!(hops->hop_info[i].hop_pte_val & PAGE_PRESENT_MASK))
79062306a36Sopenharmony_ci		return -EFAULT;
79162306a36Sopenharmony_ci
79262306a36Sopenharmony_ci	hops->used_hops = i + 1;
79362306a36Sopenharmony_ci
79462306a36Sopenharmony_ci	return 0;
79562306a36Sopenharmony_ci}
79662306a36Sopenharmony_ci
79762306a36Sopenharmony_ci/*
79862306a36Sopenharmony_ci * hl_mmu_v1_prepare - prepare mmu  for working with mmu v1
79962306a36Sopenharmony_ci *
80062306a36Sopenharmony_ci * @hdev: pointer to the device structure
80162306a36Sopenharmony_ci */
80262306a36Sopenharmony_civoid hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu)
80362306a36Sopenharmony_ci{
80462306a36Sopenharmony_ci	mmu->init = hl_mmu_v1_init;
80562306a36Sopenharmony_ci	mmu->fini = hl_mmu_v1_fini;
80662306a36Sopenharmony_ci	mmu->ctx_init = hl_mmu_v1_ctx_init;
80762306a36Sopenharmony_ci	mmu->ctx_fini = hl_mmu_v1_ctx_fini;
80862306a36Sopenharmony_ci	mmu->map = hl_mmu_v1_map;
80962306a36Sopenharmony_ci	mmu->unmap = hl_mmu_v1_unmap;
81062306a36Sopenharmony_ci	mmu->flush = flush;
81162306a36Sopenharmony_ci	mmu->swap_out = hl_mmu_v1_swap_out;
81262306a36Sopenharmony_ci	mmu->swap_in = hl_mmu_v1_swap_in;
81362306a36Sopenharmony_ci	mmu->get_tlb_info = hl_mmu_v1_get_tlb_info;
81462306a36Sopenharmony_ci}
815