1// SPDX-License-Identifier: GPL-2.0
2
3/*
4 * Copyright 2016-2019 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
7
8#include "habanalabs.h"
9#include "../include/hw_ip/mmu/mmu_general.h"
10
11#include <linux/genalloc.h>
12#include <linux/slab.h>
13
14static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr);
15
16static struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 hop_addr)
17{
18	struct pgt_info *pgt_info = NULL;
19
20	hash_for_each_possible(ctx->mmu_shadow_hash, pgt_info, node,
21				(unsigned long) hop_addr)
22		if (hop_addr == pgt_info->shadow_addr)
23			break;
24
25	return pgt_info;
26}
27
28static void _free_hop(struct hl_ctx *ctx, struct pgt_info *pgt_info)
29{
30	struct hl_device *hdev = ctx->hdev;
31
32	gen_pool_free(hdev->mmu_priv.mmu_pgt_pool, pgt_info->phys_addr,
33			hdev->asic_prop.mmu_hop_table_size);
34	hash_del(&pgt_info->node);
35	kfree((u64 *) (uintptr_t) pgt_info->shadow_addr);
36	kfree(pgt_info);
37}
38
39static void free_hop(struct hl_ctx *ctx, u64 hop_addr)
40{
41	struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
42
43	_free_hop(ctx, pgt_info);
44}
45
46static u64 alloc_hop(struct hl_ctx *ctx)
47{
48	struct hl_device *hdev = ctx->hdev;
49	struct asic_fixed_properties *prop = &hdev->asic_prop;
50	struct pgt_info *pgt_info;
51	u64 phys_addr, shadow_addr;
52
53	pgt_info = kmalloc(sizeof(*pgt_info), GFP_KERNEL);
54	if (!pgt_info)
55		return ULLONG_MAX;
56
57	phys_addr = (u64) gen_pool_alloc(hdev->mmu_priv.mmu_pgt_pool,
58					prop->mmu_hop_table_size);
59	if (!phys_addr) {
60		dev_err(hdev->dev, "failed to allocate page\n");
61		goto pool_add_err;
62	}
63
64	shadow_addr = (u64) (uintptr_t) kzalloc(prop->mmu_hop_table_size,
65						GFP_KERNEL);
66	if (!shadow_addr)
67		goto shadow_err;
68
69	pgt_info->phys_addr = phys_addr;
70	pgt_info->shadow_addr = shadow_addr;
71	pgt_info->ctx = ctx;
72	pgt_info->num_of_ptes = 0;
73	hash_add(ctx->mmu_shadow_hash, &pgt_info->node, shadow_addr);
74
75	return shadow_addr;
76
77shadow_err:
78	gen_pool_free(hdev->mmu_priv.mmu_pgt_pool, phys_addr,
79			prop->mmu_hop_table_size);
80pool_add_err:
81	kfree(pgt_info);
82
83	return ULLONG_MAX;
84}
85
86static inline u64 get_phys_hop0_addr(struct hl_ctx *ctx)
87{
88	return ctx->hdev->asic_prop.mmu_pgt_addr +
89			(ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
90}
91
92static inline u64 get_hop0_addr(struct hl_ctx *ctx)
93{
94	return (u64) (uintptr_t) ctx->hdev->mmu_priv.mmu_shadow_hop0 +
95			(ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
96}
97
98static void flush(struct hl_ctx *ctx)
99{
100	/* flush all writes from all cores to reach PCI */
101	mb();
102	ctx->hdev->asic_funcs->read_pte(ctx->hdev, get_phys_hop0_addr(ctx));
103}
104
105/* transform the value to physical address when writing to H/W */
106static inline void write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val)
107{
108	/*
109	 * The value to write is actually the address of the next shadow hop +
110	 * flags at the 12 LSBs.
111	 * Hence in order to get the value to write to the physical PTE, we
112	 * clear the 12 LSBs and translate the shadow hop to its associated
113	 * physical hop, and add back the original 12 LSBs.
114	 */
115	u64 phys_val = get_phys_addr(ctx, val & HOP_PHYS_ADDR_MASK) |
116				(val & FLAGS_MASK);
117
118	ctx->hdev->asic_funcs->write_pte(ctx->hdev,
119					get_phys_addr(ctx, shadow_pte_addr),
120					phys_val);
121
122	*(u64 *) (uintptr_t) shadow_pte_addr = val;
123}
124
125/* do not transform the value to physical address when writing to H/W */
126static inline void write_final_pte(struct hl_ctx *ctx, u64 shadow_pte_addr,
127					u64 val)
128{
129	ctx->hdev->asic_funcs->write_pte(ctx->hdev,
130					get_phys_addr(ctx, shadow_pte_addr),
131					val);
132	*(u64 *) (uintptr_t) shadow_pte_addr = val;
133}
134
135/* clear the last and present bits */
136static inline void clear_pte(struct hl_ctx *ctx, u64 pte_addr)
137{
138	/* no need to transform the value to physical address */
139	write_final_pte(ctx, pte_addr, 0);
140}
141
142static inline void get_pte(struct hl_ctx *ctx, u64 hop_addr)
143{
144	get_pgt_info(ctx, hop_addr)->num_of_ptes++;
145}
146
147/*
148 * put_pte - decrement the num of ptes and free the hop if possible
149 *
150 * @ctx: pointer to the context structure
151 * @hop_addr: addr of the hop
152 *
153 * This function returns the number of ptes left on this hop. If the number is
154 * 0, it means the pte was freed.
155 */
156static inline int put_pte(struct hl_ctx *ctx, u64 hop_addr)
157{
158	struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
159	int num_of_ptes_left;
160
161	pgt_info->num_of_ptes--;
162
163	/*
164	 * Need to save the number of ptes left because free_hop might free
165	 * the pgt_info
166	 */
167	num_of_ptes_left = pgt_info->num_of_ptes;
168	if (!num_of_ptes_left)
169		_free_hop(ctx, pgt_info);
170
171	return num_of_ptes_left;
172}
173
174static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
175					u64 virt_addr, u64 mask, u64 shift)
176{
177	return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
178			((virt_addr & mask) >> shift);
179}
180
181static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx,
182					struct hl_mmu_properties *mmu_prop,
183					u64 hop_addr, u64 vaddr)
184{
185	return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop0_mask,
186					mmu_prop->hop0_shift);
187}
188
189static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx,
190					struct hl_mmu_properties *mmu_prop,
191					u64 hop_addr, u64 vaddr)
192{
193	return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop1_mask,
194					mmu_prop->hop1_shift);
195}
196
197static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx,
198					struct hl_mmu_properties *mmu_prop,
199					u64 hop_addr, u64 vaddr)
200{
201	return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop2_mask,
202					mmu_prop->hop2_shift);
203}
204
205static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx,
206					struct hl_mmu_properties *mmu_prop,
207					u64 hop_addr, u64 vaddr)
208{
209	return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop3_mask,
210					mmu_prop->hop3_shift);
211}
212
213static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx,
214					struct hl_mmu_properties *mmu_prop,
215					u64 hop_addr, u64 vaddr)
216{
217	return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop4_mask,
218					mmu_prop->hop4_shift);
219}
220
221static inline u64 get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte)
222{
223	if (curr_pte & PAGE_PRESENT_MASK)
224		return curr_pte & HOP_PHYS_ADDR_MASK;
225	else
226		return ULLONG_MAX;
227}
228
229static inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte,
230						bool *is_new_hop)
231{
232	u64 hop_addr = get_next_hop_addr(ctx, curr_pte);
233
234	if (hop_addr == ULLONG_MAX) {
235		hop_addr = alloc_hop(ctx);
236		*is_new_hop = (hop_addr != ULLONG_MAX);
237	}
238
239	return hop_addr;
240}
241
242/* translates shadow address inside hop to a physical address */
243static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr)
244{
245	u64 page_mask = (ctx->hdev->asic_prop.mmu_hop_table_size - 1);
246	u64 shadow_hop_addr = shadow_addr & ~page_mask;
247	u64 pte_offset = shadow_addr & page_mask;
248	u64 phys_hop_addr;
249
250	if (shadow_hop_addr != get_hop0_addr(ctx))
251		phys_hop_addr = get_pgt_info(ctx, shadow_hop_addr)->phys_addr;
252	else
253		phys_hop_addr = get_phys_hop0_addr(ctx);
254
255	return phys_hop_addr + pte_offset;
256}
257
258static int dram_default_mapping_init(struct hl_ctx *ctx)
259{
260	struct hl_device *hdev = ctx->hdev;
261	struct asic_fixed_properties *prop = &hdev->asic_prop;
262	u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr,
263		hop2_pte_addr, hop3_pte_addr, pte_val;
264	int rc, i, j, hop3_allocated = 0;
265
266	if ((!hdev->dram_supports_virtual_memory) ||
267			(!hdev->dram_default_page_mapping) ||
268			(ctx->asid == HL_KERNEL_ASID_ID))
269		return 0;
270
271	num_of_hop3 = prop->dram_size_for_default_page_mapping;
272	do_div(num_of_hop3, prop->dram_page_size);
273	do_div(num_of_hop3, PTE_ENTRIES_IN_HOP);
274
275	/* add hop1 and hop2 */
276	total_hops = num_of_hop3 + 2;
277
278	ctx->dram_default_hops = kzalloc(HL_PTE_SIZE * total_hops,  GFP_KERNEL);
279	if (!ctx->dram_default_hops)
280		return -ENOMEM;
281
282	hop0_addr = get_hop0_addr(ctx);
283
284	hop1_addr = alloc_hop(ctx);
285	if (hop1_addr == ULLONG_MAX) {
286		dev_err(hdev->dev, "failed to alloc hop 1\n");
287		rc = -ENOMEM;
288		goto hop1_err;
289	}
290
291	ctx->dram_default_hops[total_hops - 1] = hop1_addr;
292
293	hop2_addr = alloc_hop(ctx);
294	if (hop2_addr == ULLONG_MAX) {
295		dev_err(hdev->dev, "failed to alloc hop 2\n");
296		rc = -ENOMEM;
297		goto hop2_err;
298	}
299
300	ctx->dram_default_hops[total_hops - 2] = hop2_addr;
301
302	for (i = 0 ; i < num_of_hop3 ; i++) {
303		ctx->dram_default_hops[i] = alloc_hop(ctx);
304		if (ctx->dram_default_hops[i] == ULLONG_MAX) {
305			dev_err(hdev->dev, "failed to alloc hop 3, i: %d\n", i);
306			rc = -ENOMEM;
307			goto hop3_err;
308		}
309		hop3_allocated++;
310	}
311
312	/* need only pte 0 in hops 0 and 1 */
313	pte_val = (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
314	write_pte(ctx, hop0_addr, pte_val);
315
316	pte_val = (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
317	write_pte(ctx, hop1_addr, pte_val);
318	get_pte(ctx, hop1_addr);
319
320	hop2_pte_addr = hop2_addr;
321	for (i = 0 ; i < num_of_hop3 ; i++) {
322		pte_val = (ctx->dram_default_hops[i] & HOP_PHYS_ADDR_MASK) |
323				PAGE_PRESENT_MASK;
324		write_pte(ctx, hop2_pte_addr, pte_val);
325		get_pte(ctx, hop2_addr);
326		hop2_pte_addr += HL_PTE_SIZE;
327	}
328
329	pte_val = (prop->mmu_dram_default_page_addr & HOP_PHYS_ADDR_MASK) |
330			LAST_MASK | PAGE_PRESENT_MASK;
331
332	for (i = 0 ; i < num_of_hop3 ; i++) {
333		hop3_pte_addr = ctx->dram_default_hops[i];
334		for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) {
335			write_final_pte(ctx, hop3_pte_addr, pte_val);
336			get_pte(ctx, ctx->dram_default_hops[i]);
337			hop3_pte_addr += HL_PTE_SIZE;
338		}
339	}
340
341	flush(ctx);
342
343	return 0;
344
345hop3_err:
346	for (i = 0 ; i < hop3_allocated ; i++)
347		free_hop(ctx, ctx->dram_default_hops[i]);
348
349	free_hop(ctx, hop2_addr);
350hop2_err:
351	free_hop(ctx, hop1_addr);
352hop1_err:
353	kfree(ctx->dram_default_hops);
354
355	return rc;
356}
357
358static void dram_default_mapping_fini(struct hl_ctx *ctx)
359{
360	struct hl_device *hdev = ctx->hdev;
361	struct asic_fixed_properties *prop = &hdev->asic_prop;
362	u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr,
363		hop2_pte_addr, hop3_pte_addr;
364	int i, j;
365
366	if ((!hdev->dram_supports_virtual_memory) ||
367			(!hdev->dram_default_page_mapping) ||
368			(ctx->asid == HL_KERNEL_ASID_ID))
369		return;
370
371	num_of_hop3 = prop->dram_size_for_default_page_mapping;
372	do_div(num_of_hop3, prop->dram_page_size);
373	do_div(num_of_hop3, PTE_ENTRIES_IN_HOP);
374
375	hop0_addr = get_hop0_addr(ctx);
376	/* add hop1 and hop2 */
377	total_hops = num_of_hop3 + 2;
378	hop1_addr = ctx->dram_default_hops[total_hops - 1];
379	hop2_addr = ctx->dram_default_hops[total_hops - 2];
380
381	for (i = 0 ; i < num_of_hop3 ; i++) {
382		hop3_pte_addr = ctx->dram_default_hops[i];
383		for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) {
384			clear_pte(ctx, hop3_pte_addr);
385			put_pte(ctx, ctx->dram_default_hops[i]);
386			hop3_pte_addr += HL_PTE_SIZE;
387		}
388	}
389
390	hop2_pte_addr = hop2_addr;
391	hop2_pte_addr = hop2_addr;
392	for (i = 0 ; i < num_of_hop3 ; i++) {
393		clear_pte(ctx, hop2_pte_addr);
394		put_pte(ctx, hop2_addr);
395		hop2_pte_addr += HL_PTE_SIZE;
396	}
397
398	clear_pte(ctx, hop1_addr);
399	put_pte(ctx, hop1_addr);
400	clear_pte(ctx, hop0_addr);
401
402	kfree(ctx->dram_default_hops);
403
404	flush(ctx);
405}
406
407/**
408 * hl_mmu_v1_init() - initialize the MMU module.
409 * @hdev: habanalabs device structure.
410 *
411 * This function does the following:
412 * - Create a pool of pages for pgt_infos.
413 * - Create a shadow table for pgt
414 *
415 * Return: 0 for success, non-zero for failure.
416 */
417static int hl_mmu_v1_init(struct hl_device *hdev)
418{
419	struct asic_fixed_properties *prop = &hdev->asic_prop;
420	int rc;
421
422	hdev->mmu_priv.mmu_pgt_pool =
423			gen_pool_create(__ffs(prop->mmu_hop_table_size), -1);
424
425	if (!hdev->mmu_priv.mmu_pgt_pool) {
426		dev_err(hdev->dev, "Failed to create page gen pool\n");
427		return -ENOMEM;
428	}
429
430	rc = gen_pool_add(hdev->mmu_priv.mmu_pgt_pool, prop->mmu_pgt_addr +
431			prop->mmu_hop0_tables_total_size,
432			prop->mmu_pgt_size - prop->mmu_hop0_tables_total_size,
433			-1);
434	if (rc) {
435		dev_err(hdev->dev, "Failed to add memory to page gen pool\n");
436		goto err_pool_add;
437	}
438
439	hdev->mmu_priv.mmu_shadow_hop0 = kvmalloc_array(prop->max_asid,
440						prop->mmu_hop_table_size,
441						GFP_KERNEL | __GFP_ZERO);
442	if (ZERO_OR_NULL_PTR(hdev->mmu_priv.mmu_shadow_hop0)) {
443		rc = -ENOMEM;
444		goto err_pool_add;
445	}
446
447	/* MMU H/W init will be done in device hw_init() */
448
449	return 0;
450
451err_pool_add:
452	gen_pool_destroy(hdev->mmu_priv.mmu_pgt_pool);
453
454	return rc;
455}
456
457/**
458 * hl_mmu_fini() - release the MMU module.
459 * @hdev: habanalabs device structure.
460 *
461 * This function does the following:
462 * - Disable MMU in H/W.
463 * - Free the pgt_infos pool.
464 *
465 * All contexts should be freed before calling this function.
466 */
467static void hl_mmu_v1_fini(struct hl_device *hdev)
468{
469	/* MMU H/W fini was already done in device hw_fini() */
470
471	kvfree(hdev->mmu_priv.mmu_shadow_hop0);
472	gen_pool_destroy(hdev->mmu_priv.mmu_pgt_pool);
473}
474
475/**
476 * hl_mmu_ctx_init() - initialize a context for using the MMU module.
477 * @ctx: pointer to the context structure to initialize.
478 *
479 * Initialize a mutex to protect the concurrent mapping flow, a hash to hold all
480 * page tables hops related to this context.
481 * Return: 0 on success, non-zero otherwise.
482 */
483static int hl_mmu_v1_ctx_init(struct hl_ctx *ctx)
484{
485	mutex_init(&ctx->mmu_lock);
486	hash_init(ctx->mmu_shadow_hash);
487
488	return dram_default_mapping_init(ctx);
489}
490
491/*
492 * hl_mmu_ctx_fini - disable a ctx from using the mmu module
493 *
494 * @ctx: pointer to the context structure
495 *
496 * This function does the following:
497 * - Free any pgts which were not freed yet
498 * - Free the mutex
499 * - Free DRAM default page mapping hops
500 */
501static void hl_mmu_v1_ctx_fini(struct hl_ctx *ctx)
502{
503	struct hl_device *hdev = ctx->hdev;
504	struct pgt_info *pgt_info;
505	struct hlist_node *tmp;
506	int i;
507
508	dram_default_mapping_fini(ctx);
509
510	if (!hash_empty(ctx->mmu_shadow_hash))
511		dev_err(hdev->dev, "ctx %d is freed while it has pgts in use\n",
512			ctx->asid);
513
514	hash_for_each_safe(ctx->mmu_shadow_hash, i, tmp, pgt_info, node) {
515		dev_err_ratelimited(hdev->dev,
516			"pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n",
517			pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes);
518		_free_hop(ctx, pgt_info);
519	}
520
521	mutex_destroy(&ctx->mmu_lock);
522}
523
524static int _hl_mmu_v1_unmap(struct hl_ctx *ctx,
525				u64 virt_addr, bool is_dram_addr)
526{
527	struct hl_device *hdev = ctx->hdev;
528	struct asic_fixed_properties *prop = &hdev->asic_prop;
529	struct hl_mmu_properties *mmu_prop;
530	u64 hop0_addr = 0, hop0_pte_addr = 0,
531		hop1_addr = 0, hop1_pte_addr = 0,
532		hop2_addr = 0, hop2_pte_addr = 0,
533		hop3_addr = 0, hop3_pte_addr = 0,
534		hop4_addr = 0, hop4_pte_addr = 0,
535		curr_pte;
536	bool is_huge, clear_hop3 = true;
537
538	/* shifts and masks are the same in PMMU and HPMMU, use one of them */
539	mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
540
541	hop0_addr = get_hop0_addr(ctx);
542	hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
543
544	curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
545
546	hop1_addr = get_next_hop_addr(ctx, curr_pte);
547
548	if (hop1_addr == ULLONG_MAX)
549		goto not_mapped;
550
551	hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
552
553	curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
554
555	hop2_addr = get_next_hop_addr(ctx, curr_pte);
556
557	if (hop2_addr == ULLONG_MAX)
558		goto not_mapped;
559
560	hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
561
562	curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
563
564	hop3_addr = get_next_hop_addr(ctx, curr_pte);
565
566	if (hop3_addr == ULLONG_MAX)
567		goto not_mapped;
568
569	hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
570
571	curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
572
573	is_huge = curr_pte & LAST_MASK;
574
575	if (is_dram_addr && !is_huge) {
576		dev_err(hdev->dev,
577				"DRAM unmapping should use huge pages only\n");
578		return -EFAULT;
579	}
580
581	if (!is_huge) {
582		hop4_addr = get_next_hop_addr(ctx, curr_pte);
583
584		if (hop4_addr == ULLONG_MAX)
585			goto not_mapped;
586
587		hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
588							virt_addr);
589
590		curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
591
592		clear_hop3 = false;
593	}
594
595	if (hdev->dram_default_page_mapping && is_dram_addr) {
596		u64 default_pte = (prop->mmu_dram_default_page_addr &
597				HOP_PHYS_ADDR_MASK) | LAST_MASK |
598					PAGE_PRESENT_MASK;
599		if (curr_pte == default_pte) {
600			dev_err(hdev->dev,
601				"DRAM: hop3 PTE points to zero page, can't unmap, va: 0x%llx\n",
602					virt_addr);
603			goto not_mapped;
604		}
605
606		if (!(curr_pte & PAGE_PRESENT_MASK)) {
607			dev_err(hdev->dev,
608				"DRAM: hop3 PTE is cleared! can't unmap, va: 0x%llx\n",
609					virt_addr);
610			goto not_mapped;
611		}
612
613		write_final_pte(ctx, hop3_pte_addr, default_pte);
614		put_pte(ctx, hop3_addr);
615	} else {
616		if (!(curr_pte & PAGE_PRESENT_MASK))
617			goto not_mapped;
618
619		if (hop4_addr)
620			clear_pte(ctx, hop4_pte_addr);
621		else
622			clear_pte(ctx, hop3_pte_addr);
623
624		if (hop4_addr && !put_pte(ctx, hop4_addr))
625			clear_hop3 = true;
626
627		if (!clear_hop3)
628			goto mapped;
629
630		clear_pte(ctx, hop3_pte_addr);
631
632		if (put_pte(ctx, hop3_addr))
633			goto mapped;
634
635		clear_pte(ctx, hop2_pte_addr);
636
637		if (put_pte(ctx, hop2_addr))
638			goto mapped;
639
640		clear_pte(ctx, hop1_pte_addr);
641
642		if (put_pte(ctx, hop1_addr))
643			goto mapped;
644
645		clear_pte(ctx, hop0_pte_addr);
646	}
647
648mapped:
649	return 0;
650
651not_mapped:
652	dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
653		virt_addr);
654
655	return -EINVAL;
656}
657
658static int _hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
659			u32 page_size, bool is_dram_addr)
660{
661	struct hl_device *hdev = ctx->hdev;
662	struct asic_fixed_properties *prop = &hdev->asic_prop;
663	struct hl_mmu_properties *mmu_prop;
664	u64 hop0_addr = 0, hop0_pte_addr = 0,
665		hop1_addr = 0, hop1_pte_addr = 0,
666		hop2_addr = 0, hop2_pte_addr = 0,
667		hop3_addr = 0, hop3_pte_addr = 0,
668		hop4_addr = 0, hop4_pte_addr = 0,
669		curr_pte = 0;
670	bool hop1_new = false, hop2_new = false, hop3_new = false,
671		hop4_new = false, is_huge;
672	int rc = -ENOMEM;
673
674	/*
675	 * This mapping function can map a page or a huge page. For huge page
676	 * there are only 3 hops rather than 4. Currently the DRAM allocation
677	 * uses huge pages only but user memory could have been allocated with
678	 * one of the two page sizes. Since this is a common code for all the
679	 * three cases, we need this hugs page check.
680	 */
681	if (is_dram_addr) {
682		mmu_prop = &prop->dmmu;
683		is_huge = true;
684	} else if (page_size == prop->pmmu_huge.page_size) {
685		mmu_prop = &prop->pmmu_huge;
686		is_huge = true;
687	} else {
688		mmu_prop = &prop->pmmu;
689		is_huge = false;
690	}
691
692	hop0_addr = get_hop0_addr(ctx);
693	hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
694	curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
695
696	hop1_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop1_new);
697	if (hop1_addr == ULLONG_MAX)
698		goto err;
699
700	hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
701	curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
702
703	hop2_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop2_new);
704	if (hop2_addr == ULLONG_MAX)
705		goto err;
706
707	hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
708	curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
709
710	hop3_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop3_new);
711	if (hop3_addr == ULLONG_MAX)
712		goto err;
713
714	hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
715	curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
716
717	if (!is_huge) {
718		hop4_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop4_new);
719		if (hop4_addr == ULLONG_MAX)
720			goto err;
721
722		hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
723							virt_addr);
724		curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
725	}
726
727	if (hdev->dram_default_page_mapping && is_dram_addr) {
728		u64 default_pte = (prop->mmu_dram_default_page_addr &
729					HOP_PHYS_ADDR_MASK) | LAST_MASK |
730						PAGE_PRESENT_MASK;
731
732		if (curr_pte != default_pte) {
733			dev_err(hdev->dev,
734				"DRAM: mapping already exists for virt_addr 0x%llx\n",
735					virt_addr);
736			rc = -EINVAL;
737			goto err;
738		}
739
740		if (hop1_new || hop2_new || hop3_new || hop4_new) {
741			dev_err(hdev->dev,
742				"DRAM mapping should not allocate more hops\n");
743			rc = -EFAULT;
744			goto err;
745		}
746	} else if (curr_pte & PAGE_PRESENT_MASK) {
747		dev_err(hdev->dev,
748			"mapping already exists for virt_addr 0x%llx\n",
749				virt_addr);
750
751		dev_dbg(hdev->dev, "hop0 pte: 0x%llx (0x%llx)\n",
752			*(u64 *) (uintptr_t) hop0_pte_addr, hop0_pte_addr);
753		dev_dbg(hdev->dev, "hop1 pte: 0x%llx (0x%llx)\n",
754			*(u64 *) (uintptr_t) hop1_pte_addr, hop1_pte_addr);
755		dev_dbg(hdev->dev, "hop2 pte: 0x%llx (0x%llx)\n",
756			*(u64 *) (uintptr_t) hop2_pte_addr, hop2_pte_addr);
757		dev_dbg(hdev->dev, "hop3 pte: 0x%llx (0x%llx)\n",
758			*(u64 *) (uintptr_t) hop3_pte_addr, hop3_pte_addr);
759
760		if (!is_huge)
761			dev_dbg(hdev->dev, "hop4 pte: 0x%llx (0x%llx)\n",
762				*(u64 *) (uintptr_t) hop4_pte_addr,
763				hop4_pte_addr);
764
765		rc = -EINVAL;
766		goto err;
767	}
768
769	curr_pte = (phys_addr & HOP_PHYS_ADDR_MASK) | LAST_MASK
770			| PAGE_PRESENT_MASK;
771
772	if (is_huge)
773		write_final_pte(ctx, hop3_pte_addr, curr_pte);
774	else
775		write_final_pte(ctx, hop4_pte_addr, curr_pte);
776
777	if (hop1_new) {
778		curr_pte =
779			(hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
780		write_pte(ctx, hop0_pte_addr, curr_pte);
781	}
782	if (hop2_new) {
783		curr_pte =
784			(hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
785		write_pte(ctx, hop1_pte_addr, curr_pte);
786		get_pte(ctx, hop1_addr);
787	}
788	if (hop3_new) {
789		curr_pte =
790			(hop3_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
791		write_pte(ctx, hop2_pte_addr, curr_pte);
792		get_pte(ctx, hop2_addr);
793	}
794
795	if (!is_huge) {
796		if (hop4_new) {
797			curr_pte = (hop4_addr & HOP_PHYS_ADDR_MASK) |
798					PAGE_PRESENT_MASK;
799			write_pte(ctx, hop3_pte_addr, curr_pte);
800			get_pte(ctx, hop3_addr);
801		}
802
803		get_pte(ctx, hop4_addr);
804	} else {
805		get_pte(ctx, hop3_addr);
806	}
807
808	return 0;
809
810err:
811	if (hop4_new)
812		free_hop(ctx, hop4_addr);
813	if (hop3_new)
814		free_hop(ctx, hop3_addr);
815	if (hop2_new)
816		free_hop(ctx, hop2_addr);
817	if (hop1_new)
818		free_hop(ctx, hop1_addr);
819
820	return rc;
821}
822
823/*
824 * hl_mmu_v1_swap_out - marks all mapping of the given ctx as swapped out
825 *
826 * @ctx: pointer to the context structure
827 *
828 */
829static void hl_mmu_v1_swap_out(struct hl_ctx *ctx)
830{
831
832}
833
834/*
835 * hl_mmu_v1_swap_in - marks all mapping of the given ctx as swapped in
836 *
837 * @ctx: pointer to the context structure
838 *
839 */
840static void hl_mmu_v1_swap_in(struct hl_ctx *ctx)
841{
842
843}
844
845/*
846 * hl_mmu_v1_prepare - prepare mmu  for working with mmu v1
847 *
848 * @hdev: pointer to the device structure
849 */
850void hl_mmu_v1_set_funcs(struct hl_device *hdev)
851{
852	struct hl_mmu_funcs *mmu = &hdev->mmu_func;
853
854	mmu->init = hl_mmu_v1_init;
855	mmu->fini = hl_mmu_v1_fini;
856	mmu->ctx_init = hl_mmu_v1_ctx_init;
857	mmu->ctx_fini = hl_mmu_v1_ctx_fini;
858	mmu->map = _hl_mmu_v1_map;
859	mmu->unmap = _hl_mmu_v1_unmap;
860	mmu->flush = flush;
861	mmu->swap_out = hl_mmu_v1_swap_out;
862	mmu->swap_in = hl_mmu_v1_swap_in;
863}
864