1// SPDX-License-Identifier: GPL-2.0
2
3/*
4 * Copyright 2016-2019 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
7
8#include "../habanalabs.h"
9#include "../../include/hw_ip/mmu/mmu_general.h"
10
11#include <linux/slab.h>
12
13#define MMU_V1_MAX_HOPS	(MMU_HOP4 + 1)
14
15static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr);
16
17static struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 hop_addr)
18{
19	struct pgt_info *pgt_info = NULL;
20
21	hash_for_each_possible(ctx->mmu_shadow_hash, pgt_info, node,
22				(unsigned long) hop_addr)
23		if (hop_addr == pgt_info->shadow_addr)
24			break;
25
26	return pgt_info;
27}
28
29static void _free_hop(struct hl_ctx *ctx, struct pgt_info *pgt_info)
30{
31	struct hl_device *hdev = ctx->hdev;
32
33	gen_pool_free(hdev->mmu_priv.dr.mmu_pgt_pool, pgt_info->phys_addr,
34			hdev->asic_prop.mmu_hop_table_size);
35	hash_del(&pgt_info->node);
36	kfree((u64 *) (uintptr_t) pgt_info->shadow_addr);
37	kfree(pgt_info);
38}
39
40static void free_hop(struct hl_ctx *ctx, u64 hop_addr)
41{
42	struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
43
44	_free_hop(ctx, pgt_info);
45}
46
47static u64 alloc_hop(struct hl_ctx *ctx)
48{
49	struct hl_device *hdev = ctx->hdev;
50	struct asic_fixed_properties *prop = &hdev->asic_prop;
51	struct pgt_info *pgt_info;
52	u64 phys_addr, shadow_addr;
53
54	pgt_info = kmalloc(sizeof(*pgt_info), GFP_KERNEL);
55	if (!pgt_info)
56		return ULLONG_MAX;
57
58	phys_addr = (u64) gen_pool_alloc(hdev->mmu_priv.dr.mmu_pgt_pool,
59					prop->mmu_hop_table_size);
60	if (!phys_addr) {
61		dev_err(hdev->dev, "failed to allocate page\n");
62		goto pool_add_err;
63	}
64
65	shadow_addr = (u64) (uintptr_t) kzalloc(prop->mmu_hop_table_size,
66						GFP_KERNEL);
67	if (!shadow_addr)
68		goto shadow_err;
69
70	pgt_info->phys_addr = phys_addr;
71	pgt_info->shadow_addr = shadow_addr;
72	pgt_info->ctx = ctx;
73	pgt_info->num_of_ptes = 0;
74	hash_add(ctx->mmu_shadow_hash, &pgt_info->node, shadow_addr);
75
76	return shadow_addr;
77
78shadow_err:
79	gen_pool_free(hdev->mmu_priv.dr.mmu_pgt_pool, phys_addr,
80			prop->mmu_hop_table_size);
81pool_add_err:
82	kfree(pgt_info);
83
84	return ULLONG_MAX;
85}
86
87static inline u64 get_phys_hop0_addr(struct hl_ctx *ctx)
88{
89	return ctx->hdev->asic_prop.mmu_pgt_addr +
90			(ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
91}
92
93static inline u64 get_hop0_addr(struct hl_ctx *ctx)
94{
95	return (u64) (uintptr_t) ctx->hdev->mmu_priv.dr.mmu_shadow_hop0 +
96			(ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
97}
98
99static void flush(struct hl_ctx *ctx)
100{
101	/* flush all writes from all cores to reach PCI */
102	mb();
103	ctx->hdev->asic_funcs->read_pte(ctx->hdev, get_phys_hop0_addr(ctx));
104}
105
106/* transform the value to physical address when writing to H/W */
107static inline void write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val)
108{
109	/*
110	 * The value to write is actually the address of the next shadow hop +
111	 * flags at the 12 LSBs.
112	 * Hence in order to get the value to write to the physical PTE, we
113	 * clear the 12 LSBs and translate the shadow hop to its associated
114	 * physical hop, and add back the original 12 LSBs.
115	 */
116	u64 phys_val = get_phys_addr(ctx, val & HOP_PHYS_ADDR_MASK) |
117				(val & FLAGS_MASK);
118
119	ctx->hdev->asic_funcs->write_pte(ctx->hdev,
120					get_phys_addr(ctx, shadow_pte_addr),
121					phys_val);
122
123	*(u64 *) (uintptr_t) shadow_pte_addr = val;
124}
125
126/* do not transform the value to physical address when writing to H/W */
127static inline void write_final_pte(struct hl_ctx *ctx, u64 shadow_pte_addr,
128					u64 val)
129{
130	ctx->hdev->asic_funcs->write_pte(ctx->hdev,
131					get_phys_addr(ctx, shadow_pte_addr),
132					val);
133	*(u64 *) (uintptr_t) shadow_pte_addr = val;
134}
135
136/* clear the last and present bits */
137static inline void clear_pte(struct hl_ctx *ctx, u64 pte_addr)
138{
139	/* no need to transform the value to physical address */
140	write_final_pte(ctx, pte_addr, 0);
141}
142
143static inline void get_pte(struct hl_ctx *ctx, u64 hop_addr)
144{
145	get_pgt_info(ctx, hop_addr)->num_of_ptes++;
146}
147
148/*
149 * put_pte - decrement the num of ptes and free the hop if possible
150 *
151 * @ctx: pointer to the context structure
152 * @hop_addr: addr of the hop
153 *
154 * This function returns the number of ptes left on this hop. If the number is
155 * 0, it means the pte was freed.
156 */
157static inline int put_pte(struct hl_ctx *ctx, u64 hop_addr)
158{
159	struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
160	int num_of_ptes_left;
161
162	pgt_info->num_of_ptes--;
163
164	/*
165	 * Need to save the number of ptes left because free_hop might free
166	 * the pgt_info
167	 */
168	num_of_ptes_left = pgt_info->num_of_ptes;
169	if (!num_of_ptes_left)
170		_free_hop(ctx, pgt_info);
171
172	return num_of_ptes_left;
173}
174
175static inline u64 get_hop_pte_addr(struct hl_ctx *ctx, struct hl_mmu_properties *mmu_prop,
176					u64 *hop_addr_arr, u64 virt_addr, enum mmu_hop_num hop_idx)
177{
178	u64 mask, shift;
179
180	mask = mmu_prop->hop_masks[hop_idx];
181	shift = mmu_prop->hop_shifts[hop_idx];
182	return hop_addr_arr[hop_idx] +
183			ctx->hdev->asic_prop.mmu_pte_size * ((virt_addr & mask) >> shift);
184}
185
186static inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte,
187						bool *is_new_hop)
188{
189	u64 hop_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte);
190
191	if (hop_addr == ULLONG_MAX) {
192		hop_addr = alloc_hop(ctx);
193		*is_new_hop = (hop_addr != ULLONG_MAX);
194	}
195
196	return hop_addr;
197}
198
199/* translates shadow address inside hop to a physical address */
200static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr)
201{
202	u64 page_mask = (ctx->hdev->asic_prop.mmu_hop_table_size - 1);
203	u64 shadow_hop_addr = shadow_addr & ~page_mask;
204	u64 pte_offset = shadow_addr & page_mask;
205	u64 phys_hop_addr;
206
207	if (shadow_hop_addr != get_hop0_addr(ctx))
208		phys_hop_addr = get_pgt_info(ctx, shadow_hop_addr)->phys_addr;
209	else
210		phys_hop_addr = get_phys_hop0_addr(ctx);
211
212	return phys_hop_addr + pte_offset;
213}
214
215static int dram_default_mapping_init(struct hl_ctx *ctx)
216{
217	struct hl_device *hdev = ctx->hdev;
218	struct asic_fixed_properties *prop = &hdev->asic_prop;
219	u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr,
220		hop2_pte_addr, hop3_pte_addr, pte_val;
221	int rc, i, j, hop3_allocated = 0;
222
223	if ((!prop->dram_supports_virtual_memory) ||
224			(!hdev->dram_default_page_mapping) ||
225			(ctx->asid == HL_KERNEL_ASID_ID))
226		return 0;
227
228	num_of_hop3 = prop->dram_size_for_default_page_mapping;
229	do_div(num_of_hop3, prop->dram_page_size);
230	do_div(num_of_hop3, HOP_PTE_ENTRIES_512);
231
232	/* add hop1 and hop2 */
233	total_hops = num_of_hop3 + 2;
234
235	ctx->dram_default_hops = kzalloc(HL_PTE_SIZE * total_hops,  GFP_KERNEL);
236	if (!ctx->dram_default_hops)
237		return -ENOMEM;
238
239	hop0_addr = get_hop0_addr(ctx);
240
241	hop1_addr = alloc_hop(ctx);
242	if (hop1_addr == ULLONG_MAX) {
243		dev_err(hdev->dev, "failed to alloc hop 1\n");
244		rc = -ENOMEM;
245		goto hop1_err;
246	}
247
248	ctx->dram_default_hops[total_hops - 1] = hop1_addr;
249
250	hop2_addr = alloc_hop(ctx);
251	if (hop2_addr == ULLONG_MAX) {
252		dev_err(hdev->dev, "failed to alloc hop 2\n");
253		rc = -ENOMEM;
254		goto hop2_err;
255	}
256
257	ctx->dram_default_hops[total_hops - 2] = hop2_addr;
258
259	for (i = 0 ; i < num_of_hop3 ; i++) {
260		ctx->dram_default_hops[i] = alloc_hop(ctx);
261		if (ctx->dram_default_hops[i] == ULLONG_MAX) {
262			dev_err(hdev->dev, "failed to alloc hop 3, i: %d\n", i);
263			rc = -ENOMEM;
264			goto hop3_err;
265		}
266		hop3_allocated++;
267	}
268
269	/* need only pte 0 in hops 0 and 1 */
270	pte_val = (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
271	write_pte(ctx, hop0_addr, pte_val);
272
273	pte_val = (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
274	write_pte(ctx, hop1_addr, pte_val);
275	get_pte(ctx, hop1_addr);
276
277	hop2_pte_addr = hop2_addr;
278	for (i = 0 ; i < num_of_hop3 ; i++) {
279		pte_val = (ctx->dram_default_hops[i] & HOP_PHYS_ADDR_MASK) |
280				PAGE_PRESENT_MASK;
281		write_pte(ctx, hop2_pte_addr, pte_val);
282		get_pte(ctx, hop2_addr);
283		hop2_pte_addr += HL_PTE_SIZE;
284	}
285
286	pte_val = (prop->mmu_dram_default_page_addr & HOP_PHYS_ADDR_MASK) |
287			LAST_MASK | PAGE_PRESENT_MASK;
288
289	for (i = 0 ; i < num_of_hop3 ; i++) {
290		hop3_pte_addr = ctx->dram_default_hops[i];
291		for (j = 0 ; j < HOP_PTE_ENTRIES_512 ; j++) {
292			write_final_pte(ctx, hop3_pte_addr, pte_val);
293			get_pte(ctx, ctx->dram_default_hops[i]);
294			hop3_pte_addr += HL_PTE_SIZE;
295		}
296	}
297
298	flush(ctx);
299
300	return 0;
301
302hop3_err:
303	for (i = 0 ; i < hop3_allocated ; i++)
304		free_hop(ctx, ctx->dram_default_hops[i]);
305
306	free_hop(ctx, hop2_addr);
307hop2_err:
308	free_hop(ctx, hop1_addr);
309hop1_err:
310	kfree(ctx->dram_default_hops);
311
312	return rc;
313}
314
315static void dram_default_mapping_fini(struct hl_ctx *ctx)
316{
317	struct hl_device *hdev = ctx->hdev;
318	struct asic_fixed_properties *prop = &hdev->asic_prop;
319	u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr,
320		hop2_pte_addr, hop3_pte_addr;
321	int i, j;
322
323	if ((!prop->dram_supports_virtual_memory) ||
324			(!hdev->dram_default_page_mapping) ||
325			(ctx->asid == HL_KERNEL_ASID_ID))
326		return;
327
328	num_of_hop3 = prop->dram_size_for_default_page_mapping;
329	do_div(num_of_hop3, prop->dram_page_size);
330	do_div(num_of_hop3, HOP_PTE_ENTRIES_512);
331
332	hop0_addr = get_hop0_addr(ctx);
333	/* add hop1 and hop2 */
334	total_hops = num_of_hop3 + 2;
335	hop1_addr = ctx->dram_default_hops[total_hops - 1];
336	hop2_addr = ctx->dram_default_hops[total_hops - 2];
337
338	for (i = 0 ; i < num_of_hop3 ; i++) {
339		hop3_pte_addr = ctx->dram_default_hops[i];
340		for (j = 0 ; j < HOP_PTE_ENTRIES_512 ; j++) {
341			clear_pte(ctx, hop3_pte_addr);
342			put_pte(ctx, ctx->dram_default_hops[i]);
343			hop3_pte_addr += HL_PTE_SIZE;
344		}
345	}
346
347	hop2_pte_addr = hop2_addr;
348	for (i = 0 ; i < num_of_hop3 ; i++) {
349		clear_pte(ctx, hop2_pte_addr);
350		put_pte(ctx, hop2_addr);
351		hop2_pte_addr += HL_PTE_SIZE;
352	}
353
354	clear_pte(ctx, hop1_addr);
355	put_pte(ctx, hop1_addr);
356	clear_pte(ctx, hop0_addr);
357
358	kfree(ctx->dram_default_hops);
359
360	flush(ctx);
361}
362
363/**
364 * hl_mmu_v1_init() - initialize the MMU module.
365 * @hdev: habanalabs device structure.
366 *
367 * This function does the following:
368 * - Create a pool of pages for pgt_infos.
369 * - Create a shadow table for pgt
370 *
371 * Return: 0 for success, non-zero for failure.
372 */
373static int hl_mmu_v1_init(struct hl_device *hdev)
374{
375	struct asic_fixed_properties *prop = &hdev->asic_prop;
376	int rc;
377
378	hdev->mmu_priv.dr.mmu_pgt_pool =
379			gen_pool_create(__ffs(prop->mmu_hop_table_size), -1);
380
381	if (!hdev->mmu_priv.dr.mmu_pgt_pool) {
382		dev_err(hdev->dev, "Failed to create page gen pool\n");
383		return -ENOMEM;
384	}
385
386	rc = gen_pool_add(hdev->mmu_priv.dr.mmu_pgt_pool, prop->mmu_pgt_addr +
387			prop->mmu_hop0_tables_total_size,
388			prop->mmu_pgt_size - prop->mmu_hop0_tables_total_size,
389			-1);
390	if (rc) {
391		dev_err(hdev->dev, "Failed to add memory to page gen pool\n");
392		goto err_pool_add;
393	}
394
395	hdev->mmu_priv.dr.mmu_shadow_hop0 = kvcalloc(prop->max_asid, prop->mmu_hop_table_size,
396										GFP_KERNEL);
397	if (ZERO_OR_NULL_PTR(hdev->mmu_priv.dr.mmu_shadow_hop0)) {
398		rc = -ENOMEM;
399		goto err_pool_add;
400	}
401
402	/* MMU H/W init will be done in device hw_init() */
403
404	return 0;
405
406err_pool_add:
407	gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool);
408
409	return rc;
410}
411
412/**
413 * hl_mmu_v1_fini() - release the MMU module.
414 * @hdev: habanalabs device structure.
415 *
416 * This function does the following:
417 * - Disable MMU in H/W.
418 * - Free the pgt_infos pool.
419 *
420 * All contexts should be freed before calling this function.
421 */
422static void hl_mmu_v1_fini(struct hl_device *hdev)
423{
424	/* MMU H/W fini was already done in device hw_fini() */
425
426	if (!ZERO_OR_NULL_PTR(hdev->mmu_priv.dr.mmu_shadow_hop0)) {
427		kvfree(hdev->mmu_priv.dr.mmu_shadow_hop0);
428		gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool);
429
430		/* Make sure that if we arrive here again without init was
431		 * called we won't cause kernel panic. This can happen for
432		 * example if we fail during hard reset code at certain points
433		 */
434		hdev->mmu_priv.dr.mmu_shadow_hop0 = NULL;
435	}
436}
437
438/**
439 * hl_mmu_v1_ctx_init() - initialize a context for using the MMU module.
440 * @ctx: pointer to the context structure to initialize.
441 *
442 * Initialize a mutex to protect the concurrent mapping flow, a hash to hold all
443 * page tables hops related to this context.
444 * Return: 0 on success, non-zero otherwise.
445 */
446static int hl_mmu_v1_ctx_init(struct hl_ctx *ctx)
447{
448	hash_init(ctx->mmu_shadow_hash);
449	return dram_default_mapping_init(ctx);
450}
451
452/*
453 * hl_mmu_ctx_fini - disable a ctx from using the mmu module
454 *
455 * @ctx: pointer to the context structure
456 *
457 * This function does the following:
458 * - Free any pgts which were not freed yet
459 * - Free the mutex
460 * - Free DRAM default page mapping hops
461 */
462static void hl_mmu_v1_ctx_fini(struct hl_ctx *ctx)
463{
464	struct hl_device *hdev = ctx->hdev;
465	struct pgt_info *pgt_info;
466	struct hlist_node *tmp;
467	int i;
468
469	dram_default_mapping_fini(ctx);
470
471	if (!hash_empty(ctx->mmu_shadow_hash))
472		dev_err(hdev->dev, "ctx %d is freed while it has pgts in use\n",
473			ctx->asid);
474
475	hash_for_each_safe(ctx->mmu_shadow_hash, i, tmp, pgt_info, node) {
476		dev_err_ratelimited(hdev->dev,
477			"pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n",
478			pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes);
479		_free_hop(ctx, pgt_info);
480	}
481}
482
483static int hl_mmu_v1_unmap(struct hl_ctx *ctx,
484				u64 virt_addr, bool is_dram_addr)
485{
486	u64 hop_addr[MMU_V1_MAX_HOPS] = {0}, hop_pte_addr[MMU_V1_MAX_HOPS] = {0}, curr_pte = 0;
487	struct hl_device *hdev = ctx->hdev;
488	struct asic_fixed_properties *prop = &hdev->asic_prop;
489	struct hl_mmu_properties *mmu_prop;
490	bool is_huge, clear_hop3 = true;
491	int hop_idx;
492
493	/* shifts and masks are the same in PMMU and HPMMU, use one of them */
494	mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
495
496	for (hop_idx = MMU_HOP0; hop_idx < MMU_HOP4; hop_idx++) {
497		if (hop_idx == MMU_HOP0) {
498			hop_addr[hop_idx] = get_hop0_addr(ctx);
499		} else {
500			hop_addr[hop_idx] = hl_mmu_get_next_hop_addr(ctx, curr_pte);
501			if (hop_addr[hop_idx] == ULLONG_MAX)
502				goto not_mapped;
503		}
504
505		hop_pte_addr[hop_idx] =
506				get_hop_pte_addr(ctx, mmu_prop, hop_addr, virt_addr, hop_idx);
507
508		curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[hop_idx];
509	}
510
511	is_huge = curr_pte & mmu_prop->last_mask;
512
513	if (is_dram_addr && !is_huge) {
514		dev_err(hdev->dev, "DRAM unmapping should use huge pages only\n");
515		return -EFAULT;
516	}
517
518	if (!is_huge) {
519		hop_idx = MMU_HOP4;
520		hop_addr[hop_idx] = hl_mmu_get_next_hop_addr(ctx, curr_pte);
521		if (hop_addr[hop_idx] == ULLONG_MAX)
522			goto not_mapped;
523
524		hop_pte_addr[hop_idx] =
525				get_hop_pte_addr(ctx, mmu_prop, hop_addr, virt_addr, hop_idx);
526		curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[hop_idx];
527		clear_hop3 = false;
528	}
529
530	if (hdev->dram_default_page_mapping && is_dram_addr) {
531		u64 default_pte = (prop->mmu_dram_default_page_addr &
532				HOP_PHYS_ADDR_MASK) | mmu_prop->last_mask |
533					PAGE_PRESENT_MASK;
534		if (curr_pte == default_pte) {
535			dev_err(hdev->dev,
536				"DRAM: hop3 PTE points to zero page, can't unmap, va: 0x%llx\n",
537					virt_addr);
538			goto not_mapped;
539		}
540
541		if (!(curr_pte & PAGE_PRESENT_MASK)) {
542			dev_err(hdev->dev,
543				"DRAM: hop3 PTE is cleared! can't unmap, va: 0x%llx\n",
544					virt_addr);
545			goto not_mapped;
546		}
547
548		hop_idx = MMU_HOP3;
549		write_final_pte(ctx, hop_pte_addr[hop_idx], default_pte);
550		put_pte(ctx, hop_addr[hop_idx]);
551	} else {
552		if (!(curr_pte & PAGE_PRESENT_MASK))
553			goto not_mapped;
554
555		if (hop_addr[MMU_HOP4])
556			clear_pte(ctx, hop_pte_addr[MMU_HOP4]);
557		else
558			clear_pte(ctx, hop_pte_addr[MMU_HOP3]);
559
560		if (hop_addr[MMU_HOP4] && !put_pte(ctx, hop_addr[MMU_HOP4]))
561			clear_hop3 = true;
562
563		if (!clear_hop3)
564			goto mapped;
565
566		for (hop_idx = MMU_HOP3; hop_idx >= 0; hop_idx--) {
567			clear_pte(ctx, hop_pte_addr[hop_idx]);
568
569			if (hop_idx == MMU_HOP0)
570				break;
571
572			if (put_pte(ctx, hop_addr[hop_idx]))
573				goto mapped;
574		}
575	}
576
577mapped:
578	return 0;
579
580not_mapped:
581	dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
582		virt_addr);
583
584	return -EINVAL;
585}
586
587static int hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
588			u32 page_size, bool is_dram_addr)
589{
590	u64 hop_addr[MMU_V1_MAX_HOPS] = {0}, hop_pte_addr[MMU_V1_MAX_HOPS] = {0}, curr_pte = 0;
591	struct hl_device *hdev = ctx->hdev;
592	struct asic_fixed_properties *prop = &hdev->asic_prop;
593	struct hl_mmu_properties *mmu_prop;
594	bool is_huge, hop_new[MMU_V1_MAX_HOPS] = {false};
595	int num_hops, hop_idx, prev_hop, rc = -ENOMEM;
596
597	/*
598	 * This mapping function can map a page or a huge page. For huge page
599	 * there are only 3 hops rather than 4. Currently the DRAM allocation
600	 * uses huge pages only but user memory could have been allocated with
601	 * one of the two page sizes. Since this is a common code for all the
602	 * three cases, we need this hugs page check.
603	 */
604	if (is_dram_addr) {
605		mmu_prop = &prop->dmmu;
606		is_huge = true;
607	} else if (page_size == prop->pmmu_huge.page_size) {
608		mmu_prop = &prop->pmmu_huge;
609		is_huge = true;
610	} else {
611		mmu_prop = &prop->pmmu;
612		is_huge = false;
613	}
614
615	num_hops = is_huge ? (MMU_V1_MAX_HOPS - 1) : MMU_V1_MAX_HOPS;
616
617	for (hop_idx = MMU_HOP0; hop_idx < num_hops; hop_idx++) {
618		if (hop_idx == MMU_HOP0) {
619			hop_addr[hop_idx] = get_hop0_addr(ctx);
620		} else {
621			hop_addr[hop_idx] =
622					get_alloc_next_hop_addr(ctx, curr_pte, &hop_new[hop_idx]);
623			if (hop_addr[hop_idx] == ULLONG_MAX)
624				goto err;
625		}
626
627		hop_pte_addr[hop_idx] =
628				get_hop_pte_addr(ctx, mmu_prop, hop_addr, virt_addr, hop_idx);
629		curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[hop_idx];
630	}
631
632	if (hdev->dram_default_page_mapping && is_dram_addr) {
633		u64 default_pte = (prop->mmu_dram_default_page_addr &
634					HOP_PHYS_ADDR_MASK) | mmu_prop->last_mask |
635						PAGE_PRESENT_MASK;
636
637		if (curr_pte != default_pte) {
638			dev_err(hdev->dev,
639				"DRAM: mapping already exists for virt_addr 0x%llx\n",
640					virt_addr);
641			rc = -EINVAL;
642			goto err;
643		}
644
645		for (hop_idx = MMU_HOP1; hop_idx < num_hops; hop_idx++) {
646			if (hop_new[hop_idx]) {
647				dev_err(hdev->dev, "DRAM mapping should not allocate more hops\n");
648				rc = -EFAULT;
649				goto err;
650			}
651		}
652	} else if (curr_pte & PAGE_PRESENT_MASK) {
653		dev_err(hdev->dev,
654			"mapping already exists for virt_addr 0x%llx\n",
655				virt_addr);
656
657		for (hop_idx = MMU_HOP0; hop_idx < num_hops; hop_idx++)
658			dev_dbg(hdev->dev, "hop%d pte: 0x%llx (0x%llx)\n", hop_idx,
659					*(u64 *) (uintptr_t) hop_pte_addr[hop_idx],
660					hop_pte_addr[hop_idx]);
661
662		rc = -EINVAL;
663		goto err;
664	}
665
666	curr_pte = (phys_addr & HOP_PHYS_ADDR_MASK) | mmu_prop->last_mask
667			| PAGE_PRESENT_MASK;
668
669	write_final_pte(ctx, hop_pte_addr[num_hops - 1], curr_pte);
670
671	for (hop_idx = MMU_HOP1; hop_idx < num_hops; hop_idx++) {
672		prev_hop = hop_idx - 1;
673
674		if (hop_new[hop_idx]) {
675			curr_pte = (hop_addr[hop_idx] & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
676			write_pte(ctx, hop_pte_addr[prev_hop], curr_pte);
677			if (hop_idx != MMU_HOP1)
678				get_pte(ctx, hop_addr[prev_hop]);
679		}
680	}
681
682	get_pte(ctx, hop_addr[num_hops - 1]);
683
684	return 0;
685
686err:
687	for (hop_idx = num_hops; hop_idx > MMU_HOP0; hop_idx--) {
688		if (hop_new[hop_idx])
689			free_hop(ctx, hop_addr[hop_idx]);
690	}
691
692	return rc;
693}
694
695/*
696 * hl_mmu_v1_swap_out - marks all mapping of the given ctx as swapped out
697 *
698 * @ctx: pointer to the context structure
699 *
700 */
701static void hl_mmu_v1_swap_out(struct hl_ctx *ctx)
702{
703
704}
705
706/*
707 * hl_mmu_v1_swap_in - marks all mapping of the given ctx as swapped in
708 *
709 * @ctx: pointer to the context structure
710 *
711 */
712static void hl_mmu_v1_swap_in(struct hl_ctx *ctx)
713{
714
715}
716
717static int hl_mmu_v1_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
718				struct hl_mmu_hop_info *hops)
719{
720	struct hl_device *hdev = ctx->hdev;
721	struct asic_fixed_properties *prop = &hdev->asic_prop;
722	struct hl_mmu_properties *mmu_prop;
723	bool is_dram_addr, is_pmmu_addr, is_pmmu_h_addr, is_huge;
724	int i, used_hops;
725
726	is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
727						prop->dmmu.start_addr,
728						prop->dmmu.end_addr);
729	is_pmmu_addr = hl_mem_area_inside_range(virt_addr, prop->pmmu.page_size,
730						prop->pmmu.start_addr,
731						prop->pmmu.end_addr);
732	is_pmmu_h_addr = hl_mem_area_inside_range(virt_addr,
733						prop->pmmu_huge.page_size,
734						prop->pmmu_huge.start_addr,
735						prop->pmmu_huge.end_addr);
736	if (is_dram_addr) {
737		mmu_prop = &prop->dmmu;
738		is_huge = true;
739	} else if (is_pmmu_addr) {
740		mmu_prop = &prop->pmmu;
741		is_huge = false;
742	} else if (is_pmmu_h_addr) {
743		mmu_prop = &prop->pmmu_huge;
744		is_huge = true;
745	} else {
746		return -EINVAL;
747	}
748
749	used_hops = mmu_prop->num_hops;
750
751	/* huge pages use lesser hops */
752	if (is_huge)
753		used_hops--;
754
755	hops->hop_info[0].hop_addr = get_phys_hop0_addr(ctx);
756	hops->hop_info[0].hop_pte_addr =
757			hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, 0,
758					hops->hop_info[0].hop_addr, virt_addr);
759	hops->hop_info[0].hop_pte_val =
760			hdev->asic_funcs->read_pte(hdev,
761						hops->hop_info[0].hop_pte_addr);
762
763	for (i = 1 ; i < used_hops ; i++) {
764		hops->hop_info[i].hop_addr =
765			hl_mmu_get_next_hop_addr(ctx,
766					hops->hop_info[i - 1].hop_pte_val);
767		if (hops->hop_info[i].hop_addr == ULLONG_MAX)
768			return -EFAULT;
769
770		hops->hop_info[i].hop_pte_addr =
771				hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, i,
772						hops->hop_info[i].hop_addr,
773						virt_addr);
774		hops->hop_info[i].hop_pte_val =
775				hdev->asic_funcs->read_pte(hdev,
776						hops->hop_info[i].hop_pte_addr);
777
778		if (!(hops->hop_info[i].hop_pte_val & PAGE_PRESENT_MASK))
779			return -EFAULT;
780
781		if (hops->hop_info[i].hop_pte_val & mmu_prop->last_mask)
782			break;
783	}
784
785	/* if passed over all hops then no last hop was found */
786	if (i == mmu_prop->num_hops)
787		return -EFAULT;
788
789	if (!(hops->hop_info[i].hop_pte_val & PAGE_PRESENT_MASK))
790		return -EFAULT;
791
792	hops->used_hops = i + 1;
793
794	return 0;
795}
796
797/*
798 * hl_mmu_v1_prepare - prepare mmu  for working with mmu v1
799 *
800 * @hdev: pointer to the device structure
801 */
802void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu)
803{
804	mmu->init = hl_mmu_v1_init;
805	mmu->fini = hl_mmu_v1_fini;
806	mmu->ctx_init = hl_mmu_v1_ctx_init;
807	mmu->ctx_fini = hl_mmu_v1_ctx_fini;
808	mmu->map = hl_mmu_v1_map;
809	mmu->unmap = hl_mmu_v1_unmap;
810	mmu->flush = flush;
811	mmu->swap_out = hl_mmu_v1_swap_out;
812	mmu->swap_in = hl_mmu_v1_swap_in;
813	mmu->get_tlb_info = hl_mmu_v1_get_tlb_info;
814}
815