162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci
362306a36Sopenharmony_ci/*
462306a36Sopenharmony_ci * Copyright 2016-2022 HabanaLabs, Ltd.
562306a36Sopenharmony_ci * All Rights Reserved.
662306a36Sopenharmony_ci */
762306a36Sopenharmony_ci
862306a36Sopenharmony_ci#include <linux/slab.h>
962306a36Sopenharmony_ci
1062306a36Sopenharmony_ci#include "../habanalabs.h"
1162306a36Sopenharmony_ci
1262306a36Sopenharmony_ci#include <trace/events/habanalabs.h>
1362306a36Sopenharmony_ci
1462306a36Sopenharmony_ci/**
1562306a36Sopenharmony_ci * hl_mmu_get_funcs() - get MMU functions structure
1662306a36Sopenharmony_ci * @hdev: habanalabs device structure.
1762306a36Sopenharmony_ci * @pgt_residency: page table residency.
1862306a36Sopenharmony_ci * @is_dram_addr: true if we need HMMU functions
1962306a36Sopenharmony_ci *
2062306a36Sopenharmony_ci * @return appropriate MMU functions structure
2162306a36Sopenharmony_ci */
2262306a36Sopenharmony_cistatic struct hl_mmu_funcs *hl_mmu_get_funcs(struct hl_device *hdev, int pgt_residency,
2362306a36Sopenharmony_ci									bool is_dram_addr)
2462306a36Sopenharmony_ci{
2562306a36Sopenharmony_ci	return &hdev->mmu_func[pgt_residency];
2662306a36Sopenharmony_ci}
2762306a36Sopenharmony_ci
2862306a36Sopenharmony_cibool hl_is_dram_va(struct hl_device *hdev, u64 virt_addr)
2962306a36Sopenharmony_ci{
3062306a36Sopenharmony_ci	struct asic_fixed_properties *prop = &hdev->asic_prop;
3162306a36Sopenharmony_ci
3262306a36Sopenharmony_ci	return hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
3362306a36Sopenharmony_ci					prop->dmmu.start_addr,
3462306a36Sopenharmony_ci					prop->dmmu.end_addr);
3562306a36Sopenharmony_ci}
3662306a36Sopenharmony_ci
3762306a36Sopenharmony_ci/**
3862306a36Sopenharmony_ci * hl_mmu_init() - initialize the MMU module.
3962306a36Sopenharmony_ci * @hdev: habanalabs device structure.
4062306a36Sopenharmony_ci *
4162306a36Sopenharmony_ci * Return: 0 for success, non-zero for failure.
4262306a36Sopenharmony_ci */
4362306a36Sopenharmony_ciint hl_mmu_init(struct hl_device *hdev)
4462306a36Sopenharmony_ci{
4562306a36Sopenharmony_ci	int rc = -EOPNOTSUPP;
4662306a36Sopenharmony_ci
4762306a36Sopenharmony_ci	if (hdev->mmu_disable)
4862306a36Sopenharmony_ci		return 0;
4962306a36Sopenharmony_ci
5062306a36Sopenharmony_ci	mutex_init(&hdev->mmu_lock);
5162306a36Sopenharmony_ci
5262306a36Sopenharmony_ci	if (hdev->mmu_func[MMU_DR_PGT].init != NULL) {
5362306a36Sopenharmony_ci		rc = hdev->mmu_func[MMU_DR_PGT].init(hdev);
5462306a36Sopenharmony_ci		if (rc)
5562306a36Sopenharmony_ci			return rc;
5662306a36Sopenharmony_ci	}
5762306a36Sopenharmony_ci
5862306a36Sopenharmony_ci	if (hdev->mmu_func[MMU_HR_PGT].init != NULL) {
5962306a36Sopenharmony_ci		rc = hdev->mmu_func[MMU_HR_PGT].init(hdev);
6062306a36Sopenharmony_ci		if (rc)
6162306a36Sopenharmony_ci			goto fini_dr_mmu;
6262306a36Sopenharmony_ci	}
6362306a36Sopenharmony_ci
6462306a36Sopenharmony_ci	return 0;
6562306a36Sopenharmony_ci
6662306a36Sopenharmony_cifini_dr_mmu:
6762306a36Sopenharmony_ci	if (hdev->mmu_func[MMU_DR_PGT].fini != NULL)
6862306a36Sopenharmony_ci		hdev->mmu_func[MMU_DR_PGT].fini(hdev);
6962306a36Sopenharmony_ci
7062306a36Sopenharmony_ci	return rc;
7162306a36Sopenharmony_ci}
7262306a36Sopenharmony_ci
7362306a36Sopenharmony_ci/**
7462306a36Sopenharmony_ci * hl_mmu_fini() - release the MMU module.
7562306a36Sopenharmony_ci * @hdev: habanalabs device structure.
7662306a36Sopenharmony_ci *
7762306a36Sopenharmony_ci * This function does the following:
7862306a36Sopenharmony_ci * - Disable MMU in H/W.
7962306a36Sopenharmony_ci * - Free the pgt_infos pool.
8062306a36Sopenharmony_ci *
8162306a36Sopenharmony_ci * All contexts should be freed before calling this function.
8262306a36Sopenharmony_ci */
8362306a36Sopenharmony_civoid hl_mmu_fini(struct hl_device *hdev)
8462306a36Sopenharmony_ci{
8562306a36Sopenharmony_ci	if (hdev->mmu_disable)
8662306a36Sopenharmony_ci		return;
8762306a36Sopenharmony_ci
8862306a36Sopenharmony_ci	if (hdev->mmu_func[MMU_DR_PGT].fini != NULL)
8962306a36Sopenharmony_ci		hdev->mmu_func[MMU_DR_PGT].fini(hdev);
9062306a36Sopenharmony_ci
9162306a36Sopenharmony_ci	if (hdev->mmu_func[MMU_HR_PGT].fini != NULL)
9262306a36Sopenharmony_ci		hdev->mmu_func[MMU_HR_PGT].fini(hdev);
9362306a36Sopenharmony_ci
9462306a36Sopenharmony_ci	mutex_destroy(&hdev->mmu_lock);
9562306a36Sopenharmony_ci}
9662306a36Sopenharmony_ci
9762306a36Sopenharmony_ci/**
9862306a36Sopenharmony_ci * hl_mmu_ctx_init() - initialize a context for using the MMU module.
9962306a36Sopenharmony_ci * @ctx: pointer to the context structure to initialize.
10062306a36Sopenharmony_ci *
10162306a36Sopenharmony_ci * Initialize a mutex to protect the concurrent mapping flow, a hash to hold all
10262306a36Sopenharmony_ci * page tables hops related to this context.
10362306a36Sopenharmony_ci * Return: 0 on success, non-zero otherwise.
10462306a36Sopenharmony_ci */
10562306a36Sopenharmony_ciint hl_mmu_ctx_init(struct hl_ctx *ctx)
10662306a36Sopenharmony_ci{
10762306a36Sopenharmony_ci	struct hl_device *hdev = ctx->hdev;
10862306a36Sopenharmony_ci	int rc = -EOPNOTSUPP;
10962306a36Sopenharmony_ci
11062306a36Sopenharmony_ci	if (hdev->mmu_disable)
11162306a36Sopenharmony_ci		return 0;
11262306a36Sopenharmony_ci
11362306a36Sopenharmony_ci	if (hdev->mmu_func[MMU_DR_PGT].ctx_init != NULL) {
11462306a36Sopenharmony_ci		rc = hdev->mmu_func[MMU_DR_PGT].ctx_init(ctx);
11562306a36Sopenharmony_ci		if (rc)
11662306a36Sopenharmony_ci			return rc;
11762306a36Sopenharmony_ci	}
11862306a36Sopenharmony_ci
11962306a36Sopenharmony_ci	if (hdev->mmu_func[MMU_HR_PGT].ctx_init != NULL) {
12062306a36Sopenharmony_ci		rc = hdev->mmu_func[MMU_HR_PGT].ctx_init(ctx);
12162306a36Sopenharmony_ci		if (rc)
12262306a36Sopenharmony_ci			goto fini_dr_ctx;
12362306a36Sopenharmony_ci	}
12462306a36Sopenharmony_ci
12562306a36Sopenharmony_ci	return 0;
12662306a36Sopenharmony_ci
12762306a36Sopenharmony_cifini_dr_ctx:
12862306a36Sopenharmony_ci	if (hdev->mmu_func[MMU_DR_PGT].fini != NULL)
12962306a36Sopenharmony_ci		hdev->mmu_func[MMU_DR_PGT].fini(hdev);
13062306a36Sopenharmony_ci
13162306a36Sopenharmony_ci	return rc;
13262306a36Sopenharmony_ci}
13362306a36Sopenharmony_ci
13462306a36Sopenharmony_ci/*
13562306a36Sopenharmony_ci * hl_mmu_ctx_fini - disable a ctx from using the mmu module
13662306a36Sopenharmony_ci *
13762306a36Sopenharmony_ci * @ctx: pointer to the context structure
13862306a36Sopenharmony_ci *
13962306a36Sopenharmony_ci * This function does the following:
14062306a36Sopenharmony_ci * - Free any pgts which were not freed yet
14162306a36Sopenharmony_ci * - Free the mutex
14262306a36Sopenharmony_ci * - Free DRAM default page mapping hops
14362306a36Sopenharmony_ci */
14462306a36Sopenharmony_civoid hl_mmu_ctx_fini(struct hl_ctx *ctx)
14562306a36Sopenharmony_ci{
14662306a36Sopenharmony_ci	struct hl_device *hdev = ctx->hdev;
14762306a36Sopenharmony_ci
14862306a36Sopenharmony_ci	if (hdev->mmu_disable)
14962306a36Sopenharmony_ci		return;
15062306a36Sopenharmony_ci
15162306a36Sopenharmony_ci	if (hdev->mmu_func[MMU_DR_PGT].ctx_fini != NULL)
15262306a36Sopenharmony_ci		hdev->mmu_func[MMU_DR_PGT].ctx_fini(ctx);
15362306a36Sopenharmony_ci
15462306a36Sopenharmony_ci	if (hdev->mmu_func[MMU_HR_PGT].ctx_fini != NULL)
15562306a36Sopenharmony_ci		hdev->mmu_func[MMU_HR_PGT].ctx_fini(ctx);
15662306a36Sopenharmony_ci}
15762306a36Sopenharmony_ci
15862306a36Sopenharmony_ci/*
15962306a36Sopenharmony_ci * hl_mmu_get_real_page_size - get real page size to use in map/unmap operation
16062306a36Sopenharmony_ci *
16162306a36Sopenharmony_ci * @hdev: pointer to device data.
16262306a36Sopenharmony_ci * @mmu_prop: MMU properties.
16362306a36Sopenharmony_ci * @page_size: page size
16462306a36Sopenharmony_ci * @real_page_size: set here the actual page size to use for the operation
16562306a36Sopenharmony_ci * @is_dram_addr: true if DRAM address, otherwise false.
16662306a36Sopenharmony_ci *
16762306a36Sopenharmony_ci * @return 0 on success, otherwise non 0 error code
16862306a36Sopenharmony_ci *
16962306a36Sopenharmony_ci * note that this is general implementation that can fit most MMU arch. but as this is used as an
17062306a36Sopenharmony_ci * MMU function:
17162306a36Sopenharmony_ci * 1. it shall not be called directly- only from mmu_func structure instance
17262306a36Sopenharmony_ci * 2. each MMU may modify the implementation internally
17362306a36Sopenharmony_ci */
17462306a36Sopenharmony_ciint hl_mmu_get_real_page_size(struct hl_device *hdev, struct hl_mmu_properties *mmu_prop,
17562306a36Sopenharmony_ci				u32 page_size, u32 *real_page_size, bool is_dram_addr)
17662306a36Sopenharmony_ci{
17762306a36Sopenharmony_ci	/*
17862306a36Sopenharmony_ci	 * The H/W handles mapping of specific page sizes. Hence if the page
17962306a36Sopenharmony_ci	 * size is bigger, we break it to sub-pages and map them separately.
18062306a36Sopenharmony_ci	 */
18162306a36Sopenharmony_ci	if ((page_size % mmu_prop->page_size) == 0) {
18262306a36Sopenharmony_ci		*real_page_size = mmu_prop->page_size;
18362306a36Sopenharmony_ci		return 0;
18462306a36Sopenharmony_ci	}
18562306a36Sopenharmony_ci
18662306a36Sopenharmony_ci	dev_err(hdev->dev, "page size of %u is not %uKB aligned, can't map\n",
18762306a36Sopenharmony_ci						page_size, mmu_prop->page_size >> 10);
18862306a36Sopenharmony_ci
18962306a36Sopenharmony_ci	return -EFAULT;
19062306a36Sopenharmony_ci}
19162306a36Sopenharmony_ci
19262306a36Sopenharmony_cistatic struct hl_mmu_properties *hl_mmu_get_prop(struct hl_device *hdev, u32 page_size,
19362306a36Sopenharmony_ci							bool is_dram_addr)
19462306a36Sopenharmony_ci{
19562306a36Sopenharmony_ci	struct asic_fixed_properties *prop = &hdev->asic_prop;
19662306a36Sopenharmony_ci
19762306a36Sopenharmony_ci	if (is_dram_addr)
19862306a36Sopenharmony_ci		return &prop->dmmu;
19962306a36Sopenharmony_ci	else if ((page_size % prop->pmmu_huge.page_size) == 0)
20062306a36Sopenharmony_ci		return &prop->pmmu_huge;
20162306a36Sopenharmony_ci
20262306a36Sopenharmony_ci	return &prop->pmmu;
20362306a36Sopenharmony_ci}
20462306a36Sopenharmony_ci
20562306a36Sopenharmony_ci/*
20662306a36Sopenharmony_ci * hl_mmu_unmap_page - unmaps a virtual addr
20762306a36Sopenharmony_ci *
20862306a36Sopenharmony_ci * @ctx: pointer to the context structure
20962306a36Sopenharmony_ci * @virt_addr: virt addr to map from
21062306a36Sopenharmony_ci * @page_size: size of the page to unmap
21162306a36Sopenharmony_ci * @flush_pte: whether to do a PCI flush
21262306a36Sopenharmony_ci *
21362306a36Sopenharmony_ci * This function does the following:
21462306a36Sopenharmony_ci * - Check that the virt addr is mapped
21562306a36Sopenharmony_ci * - Unmap the virt addr and frees pgts if possible
21662306a36Sopenharmony_ci * - Returns 0 on success, -EINVAL if the given addr is not mapped
21762306a36Sopenharmony_ci *
21862306a36Sopenharmony_ci * Because this function changes the page tables in the device and because it
21962306a36Sopenharmony_ci * changes the MMU hash, it must be protected by a lock.
22062306a36Sopenharmony_ci * However, because it maps only a single page, the lock should be implemented
22162306a36Sopenharmony_ci * in a higher level in order to protect the entire mapping of the memory area
22262306a36Sopenharmony_ci *
22362306a36Sopenharmony_ci * For optimization reasons PCI flush may be requested once after unmapping of
22462306a36Sopenharmony_ci * large area.
22562306a36Sopenharmony_ci */
22662306a36Sopenharmony_ciint hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, bool flush_pte)
22762306a36Sopenharmony_ci{
22862306a36Sopenharmony_ci	struct hl_device *hdev = ctx->hdev;
22962306a36Sopenharmony_ci	struct hl_mmu_properties *mmu_prop;
23062306a36Sopenharmony_ci	struct hl_mmu_funcs *mmu_funcs;
23162306a36Sopenharmony_ci	int i, pgt_residency, rc = 0;
23262306a36Sopenharmony_ci	u32 real_page_size, npages;
23362306a36Sopenharmony_ci	u64 real_virt_addr;
23462306a36Sopenharmony_ci	bool is_dram_addr;
23562306a36Sopenharmony_ci
23662306a36Sopenharmony_ci	if (hdev->mmu_disable)
23762306a36Sopenharmony_ci		return 0;
23862306a36Sopenharmony_ci
23962306a36Sopenharmony_ci	is_dram_addr = hl_is_dram_va(hdev, virt_addr);
24062306a36Sopenharmony_ci	mmu_prop = hl_mmu_get_prop(hdev, page_size, is_dram_addr);
24162306a36Sopenharmony_ci
24262306a36Sopenharmony_ci	pgt_residency = mmu_prop->host_resident ? MMU_HR_PGT : MMU_DR_PGT;
24362306a36Sopenharmony_ci	mmu_funcs = hl_mmu_get_funcs(hdev, pgt_residency, is_dram_addr);
24462306a36Sopenharmony_ci
24562306a36Sopenharmony_ci	rc = hdev->asic_funcs->mmu_get_real_page_size(hdev, mmu_prop, page_size, &real_page_size,
24662306a36Sopenharmony_ci							is_dram_addr);
24762306a36Sopenharmony_ci	if (rc)
24862306a36Sopenharmony_ci		return rc;
24962306a36Sopenharmony_ci
25062306a36Sopenharmony_ci	npages = page_size / real_page_size;
25162306a36Sopenharmony_ci	real_virt_addr = virt_addr;
25262306a36Sopenharmony_ci
25362306a36Sopenharmony_ci	for (i = 0 ; i < npages ; i++) {
25462306a36Sopenharmony_ci		rc = mmu_funcs->unmap(ctx, real_virt_addr, is_dram_addr);
25562306a36Sopenharmony_ci		if (rc)
25662306a36Sopenharmony_ci			break;
25762306a36Sopenharmony_ci
25862306a36Sopenharmony_ci		real_virt_addr += real_page_size;
25962306a36Sopenharmony_ci	}
26062306a36Sopenharmony_ci
26162306a36Sopenharmony_ci	if (flush_pte)
26262306a36Sopenharmony_ci		mmu_funcs->flush(ctx);
26362306a36Sopenharmony_ci
26462306a36Sopenharmony_ci	if (trace_habanalabs_mmu_unmap_enabled() && !rc)
26562306a36Sopenharmony_ci		trace_habanalabs_mmu_unmap(hdev->dev, virt_addr, 0, page_size, flush_pte);
26662306a36Sopenharmony_ci
26762306a36Sopenharmony_ci	return rc;
26862306a36Sopenharmony_ci}
26962306a36Sopenharmony_ci
27062306a36Sopenharmony_ci/*
27162306a36Sopenharmony_ci * hl_mmu_map_page - maps a virtual addr to physical addr
27262306a36Sopenharmony_ci *
27362306a36Sopenharmony_ci * @ctx: pointer to the context structure
27462306a36Sopenharmony_ci * @virt_addr: virt addr to map from
27562306a36Sopenharmony_ci * @phys_addr: phys addr to map to
27662306a36Sopenharmony_ci * @page_size: physical page size
27762306a36Sopenharmony_ci * @flush_pte: whether to do a PCI flush
27862306a36Sopenharmony_ci *
27962306a36Sopenharmony_ci * This function does the following:
28062306a36Sopenharmony_ci * - Check that the virt addr is not mapped
28162306a36Sopenharmony_ci * - Allocate pgts as necessary in order to map the virt addr to the phys
28262306a36Sopenharmony_ci * - Returns 0 on success, -EINVAL if addr is already mapped, or -ENOMEM.
28362306a36Sopenharmony_ci *
28462306a36Sopenharmony_ci * Because this function changes the page tables in the device and because it
28562306a36Sopenharmony_ci * changes the MMU hash, it must be protected by a lock.
28662306a36Sopenharmony_ci * However, because it maps only a single page, the lock should be implemented
28762306a36Sopenharmony_ci * in a higher level in order to protect the entire mapping of the memory area
28862306a36Sopenharmony_ci *
28962306a36Sopenharmony_ci * For optimization reasons PCI flush may be requested once after mapping of
29062306a36Sopenharmony_ci * large area.
29162306a36Sopenharmony_ci */
29262306a36Sopenharmony_ciint hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size,
29362306a36Sopenharmony_ci			bool flush_pte)
29462306a36Sopenharmony_ci{
29562306a36Sopenharmony_ci	int i, rc, pgt_residency, mapped_cnt = 0;
29662306a36Sopenharmony_ci	struct hl_device *hdev = ctx->hdev;
29762306a36Sopenharmony_ci	struct hl_mmu_properties *mmu_prop;
29862306a36Sopenharmony_ci	u64 real_virt_addr, real_phys_addr;
29962306a36Sopenharmony_ci	struct hl_mmu_funcs *mmu_funcs;
30062306a36Sopenharmony_ci	u32 real_page_size, npages;
30162306a36Sopenharmony_ci	bool is_dram_addr;
30262306a36Sopenharmony_ci
30362306a36Sopenharmony_ci
30462306a36Sopenharmony_ci	if (hdev->mmu_disable)
30562306a36Sopenharmony_ci		return 0;
30662306a36Sopenharmony_ci
30762306a36Sopenharmony_ci	is_dram_addr = hl_is_dram_va(hdev, virt_addr);
30862306a36Sopenharmony_ci	mmu_prop = hl_mmu_get_prop(hdev, page_size, is_dram_addr);
30962306a36Sopenharmony_ci
31062306a36Sopenharmony_ci	pgt_residency = mmu_prop->host_resident ? MMU_HR_PGT : MMU_DR_PGT;
31162306a36Sopenharmony_ci	mmu_funcs = hl_mmu_get_funcs(hdev, pgt_residency, is_dram_addr);
31262306a36Sopenharmony_ci
31362306a36Sopenharmony_ci	rc = hdev->asic_funcs->mmu_get_real_page_size(hdev, mmu_prop, page_size, &real_page_size,
31462306a36Sopenharmony_ci							is_dram_addr);
31562306a36Sopenharmony_ci	if (rc)
31662306a36Sopenharmony_ci		return rc;
31762306a36Sopenharmony_ci
31862306a36Sopenharmony_ci	/*
31962306a36Sopenharmony_ci	 * Verify that the phys and virt addresses are aligned with the
32062306a36Sopenharmony_ci	 * MMU page size (in dram this means checking the address and MMU
32162306a36Sopenharmony_ci	 * after scrambling)
32262306a36Sopenharmony_ci	 */
32362306a36Sopenharmony_ci	if ((is_dram_addr &&
32462306a36Sopenharmony_ci			((hdev->asic_funcs->scramble_addr(hdev, phys_addr) &
32562306a36Sopenharmony_ci				(mmu_prop->page_size - 1)) ||
32662306a36Sopenharmony_ci			(hdev->asic_funcs->scramble_addr(hdev, virt_addr) &
32762306a36Sopenharmony_ci				(mmu_prop->page_size - 1)))) ||
32862306a36Sopenharmony_ci		(!is_dram_addr && ((phys_addr & (real_page_size - 1)) ||
32962306a36Sopenharmony_ci				(virt_addr & (real_page_size - 1)))))
33062306a36Sopenharmony_ci		dev_crit(hdev->dev,
33162306a36Sopenharmony_ci			"Mapping address 0x%llx with virtual address 0x%llx and page size of 0x%x is erroneous! Addresses must be divisible by page size",
33262306a36Sopenharmony_ci			phys_addr, virt_addr, real_page_size);
33362306a36Sopenharmony_ci
33462306a36Sopenharmony_ci	npages = page_size / real_page_size;
33562306a36Sopenharmony_ci	real_virt_addr = virt_addr;
33662306a36Sopenharmony_ci	real_phys_addr = phys_addr;
33762306a36Sopenharmony_ci
33862306a36Sopenharmony_ci	for (i = 0 ; i < npages ; i++) {
33962306a36Sopenharmony_ci		rc = mmu_funcs->map(ctx, real_virt_addr, real_phys_addr, real_page_size,
34062306a36Sopenharmony_ci										is_dram_addr);
34162306a36Sopenharmony_ci		if (rc)
34262306a36Sopenharmony_ci			goto err;
34362306a36Sopenharmony_ci
34462306a36Sopenharmony_ci		real_virt_addr += real_page_size;
34562306a36Sopenharmony_ci		real_phys_addr += real_page_size;
34662306a36Sopenharmony_ci		mapped_cnt++;
34762306a36Sopenharmony_ci	}
34862306a36Sopenharmony_ci
34962306a36Sopenharmony_ci	if (flush_pte)
35062306a36Sopenharmony_ci		mmu_funcs->flush(ctx);
35162306a36Sopenharmony_ci
35262306a36Sopenharmony_ci	trace_habanalabs_mmu_map(hdev->dev, virt_addr, phys_addr, page_size, flush_pte);
35362306a36Sopenharmony_ci
35462306a36Sopenharmony_ci	return 0;
35562306a36Sopenharmony_ci
35662306a36Sopenharmony_cierr:
35762306a36Sopenharmony_ci	real_virt_addr = virt_addr;
35862306a36Sopenharmony_ci	for (i = 0 ; i < mapped_cnt ; i++) {
35962306a36Sopenharmony_ci		if (mmu_funcs->unmap(ctx, real_virt_addr, is_dram_addr))
36062306a36Sopenharmony_ci			dev_warn_ratelimited(hdev->dev,
36162306a36Sopenharmony_ci				"failed to unmap va: 0x%llx\n", real_virt_addr);
36262306a36Sopenharmony_ci
36362306a36Sopenharmony_ci		real_virt_addr += real_page_size;
36462306a36Sopenharmony_ci	}
36562306a36Sopenharmony_ci
36662306a36Sopenharmony_ci	mmu_funcs->flush(ctx);
36762306a36Sopenharmony_ci
36862306a36Sopenharmony_ci	return rc;
36962306a36Sopenharmony_ci}
37062306a36Sopenharmony_ci
37162306a36Sopenharmony_ci/*
37262306a36Sopenharmony_ci * hl_mmu_map_contiguous - implements a wrapper for hl_mmu_map_page
37362306a36Sopenharmony_ci *                         for mapping contiguous physical memory
37462306a36Sopenharmony_ci *
37562306a36Sopenharmony_ci * @ctx: pointer to the context structure
37662306a36Sopenharmony_ci * @virt_addr: virt addr to map from
37762306a36Sopenharmony_ci * @phys_addr: phys addr to map to
37862306a36Sopenharmony_ci * @size: size to map
37962306a36Sopenharmony_ci *
38062306a36Sopenharmony_ci */
38162306a36Sopenharmony_ciint hl_mmu_map_contiguous(struct hl_ctx *ctx, u64 virt_addr,
38262306a36Sopenharmony_ci					u64 phys_addr, u32 size)
38362306a36Sopenharmony_ci{
38462306a36Sopenharmony_ci	struct hl_device *hdev = ctx->hdev;
38562306a36Sopenharmony_ci	struct asic_fixed_properties *prop = &hdev->asic_prop;
38662306a36Sopenharmony_ci	u64 curr_va, curr_pa;
38762306a36Sopenharmony_ci	u32 page_size;
38862306a36Sopenharmony_ci	bool flush_pte;
38962306a36Sopenharmony_ci	int rc = 0, off;
39062306a36Sopenharmony_ci
39162306a36Sopenharmony_ci	if (hl_mem_area_inside_range(virt_addr, size,
39262306a36Sopenharmony_ci			prop->dmmu.start_addr, prop->dmmu.end_addr))
39362306a36Sopenharmony_ci		page_size = prop->dmmu.page_size;
39462306a36Sopenharmony_ci	else if (hl_mem_area_inside_range(virt_addr, size,
39562306a36Sopenharmony_ci			prop->pmmu.start_addr, prop->pmmu.end_addr))
39662306a36Sopenharmony_ci		page_size = prop->pmmu.page_size;
39762306a36Sopenharmony_ci	else if (hl_mem_area_inside_range(virt_addr, size,
39862306a36Sopenharmony_ci			prop->pmmu_huge.start_addr, prop->pmmu_huge.end_addr))
39962306a36Sopenharmony_ci		page_size = prop->pmmu_huge.page_size;
40062306a36Sopenharmony_ci	else
40162306a36Sopenharmony_ci		return -EINVAL;
40262306a36Sopenharmony_ci
40362306a36Sopenharmony_ci	for (off = 0 ; off < size ; off += page_size) {
40462306a36Sopenharmony_ci		curr_va = virt_addr + off;
40562306a36Sopenharmony_ci		curr_pa = phys_addr + off;
40662306a36Sopenharmony_ci		flush_pte = (off + page_size) >= size;
40762306a36Sopenharmony_ci		rc = hl_mmu_map_page(ctx, curr_va, curr_pa, page_size,
40862306a36Sopenharmony_ci								flush_pte);
40962306a36Sopenharmony_ci		if (rc) {
41062306a36Sopenharmony_ci			dev_err(hdev->dev,
41162306a36Sopenharmony_ci				"Map failed for va 0x%llx to pa 0x%llx\n",
41262306a36Sopenharmony_ci				curr_va, curr_pa);
41362306a36Sopenharmony_ci			/* last mapping failed so don't try to unmap it - reduce off by page_size */
41462306a36Sopenharmony_ci			off -= page_size;
41562306a36Sopenharmony_ci			goto unmap;
41662306a36Sopenharmony_ci		}
41762306a36Sopenharmony_ci	}
41862306a36Sopenharmony_ci
41962306a36Sopenharmony_ci	return rc;
42062306a36Sopenharmony_ci
42162306a36Sopenharmony_ciunmap:
42262306a36Sopenharmony_ci	for (; off >= 0 ; off -= page_size) {
42362306a36Sopenharmony_ci		curr_va = virt_addr + off;
42462306a36Sopenharmony_ci		flush_pte = (off - (s32) page_size) < 0;
42562306a36Sopenharmony_ci		if (hl_mmu_unmap_page(ctx, curr_va, page_size, flush_pte))
42662306a36Sopenharmony_ci			dev_warn_ratelimited(hdev->dev,
42762306a36Sopenharmony_ci				"failed to unmap va 0x%llx\n", curr_va);
42862306a36Sopenharmony_ci	}
42962306a36Sopenharmony_ci
43062306a36Sopenharmony_ci	return rc;
43162306a36Sopenharmony_ci}
43262306a36Sopenharmony_ci
43362306a36Sopenharmony_ci/*
43462306a36Sopenharmony_ci * hl_mmu_unmap_contiguous - implements a wrapper for hl_mmu_unmap_page
43562306a36Sopenharmony_ci *                           for unmapping contiguous physical memory
43662306a36Sopenharmony_ci *
43762306a36Sopenharmony_ci * @ctx: pointer to the context structure
43862306a36Sopenharmony_ci * @virt_addr: virt addr to unmap
43962306a36Sopenharmony_ci * @size: size to unmap
44062306a36Sopenharmony_ci *
44162306a36Sopenharmony_ci */
44262306a36Sopenharmony_ciint hl_mmu_unmap_contiguous(struct hl_ctx *ctx, u64 virt_addr, u32 size)
44362306a36Sopenharmony_ci{
44462306a36Sopenharmony_ci	struct hl_device *hdev = ctx->hdev;
44562306a36Sopenharmony_ci	struct asic_fixed_properties *prop = &hdev->asic_prop;
44662306a36Sopenharmony_ci	u64 curr_va;
44762306a36Sopenharmony_ci	u32 page_size;
44862306a36Sopenharmony_ci	bool flush_pte;
44962306a36Sopenharmony_ci	int rc = 0, off;
45062306a36Sopenharmony_ci
45162306a36Sopenharmony_ci	if (hl_mem_area_inside_range(virt_addr, size,
45262306a36Sopenharmony_ci			prop->dmmu.start_addr, prop->dmmu.end_addr))
45362306a36Sopenharmony_ci		page_size = prop->dmmu.page_size;
45462306a36Sopenharmony_ci	else if (hl_mem_area_inside_range(virt_addr, size,
45562306a36Sopenharmony_ci			prop->pmmu.start_addr, prop->pmmu.end_addr))
45662306a36Sopenharmony_ci		page_size = prop->pmmu.page_size;
45762306a36Sopenharmony_ci	else if (hl_mem_area_inside_range(virt_addr, size,
45862306a36Sopenharmony_ci			prop->pmmu_huge.start_addr, prop->pmmu_huge.end_addr))
45962306a36Sopenharmony_ci		page_size = prop->pmmu_huge.page_size;
46062306a36Sopenharmony_ci	else
46162306a36Sopenharmony_ci		return -EINVAL;
46262306a36Sopenharmony_ci
46362306a36Sopenharmony_ci	for (off = 0 ; off < size ; off += page_size) {
46462306a36Sopenharmony_ci		curr_va = virt_addr + off;
46562306a36Sopenharmony_ci		flush_pte = (off + page_size) >= size;
46662306a36Sopenharmony_ci		rc = hl_mmu_unmap_page(ctx, curr_va, page_size, flush_pte);
46762306a36Sopenharmony_ci		if (rc)
46862306a36Sopenharmony_ci			dev_warn_ratelimited(hdev->dev,
46962306a36Sopenharmony_ci				"Unmap failed for va 0x%llx\n", curr_va);
47062306a36Sopenharmony_ci	}
47162306a36Sopenharmony_ci
47262306a36Sopenharmony_ci	return rc;
47362306a36Sopenharmony_ci}
47462306a36Sopenharmony_ci
47562306a36Sopenharmony_cistatic void hl_mmu_pa_page_with_offset(struct hl_ctx *ctx, u64 virt_addr,
47662306a36Sopenharmony_ci						struct hl_mmu_hop_info *hops,
47762306a36Sopenharmony_ci						u64 *phys_addr)
47862306a36Sopenharmony_ci{
47962306a36Sopenharmony_ci	struct asic_fixed_properties *prop = &ctx->hdev->asic_prop;
48062306a36Sopenharmony_ci	u64 offset_mask, addr_mask, hop_shift, tmp_phys_addr;
48162306a36Sopenharmony_ci	struct hl_mmu_properties *mmu_prop;
48262306a36Sopenharmony_ci
48362306a36Sopenharmony_ci	/* last hop holds the phys address and flags */
48462306a36Sopenharmony_ci	if (hops->unscrambled_paddr)
48562306a36Sopenharmony_ci		tmp_phys_addr = hops->unscrambled_paddr;
48662306a36Sopenharmony_ci	else
48762306a36Sopenharmony_ci		tmp_phys_addr = hops->hop_info[hops->used_hops - 1].hop_pte_val;
48862306a36Sopenharmony_ci
48962306a36Sopenharmony_ci	if (hops->range_type == HL_VA_RANGE_TYPE_HOST_HUGE)
49062306a36Sopenharmony_ci		mmu_prop = &prop->pmmu_huge;
49162306a36Sopenharmony_ci	else if (hops->range_type == HL_VA_RANGE_TYPE_HOST)
49262306a36Sopenharmony_ci		mmu_prop = &prop->pmmu;
49362306a36Sopenharmony_ci	else /* HL_VA_RANGE_TYPE_DRAM */
49462306a36Sopenharmony_ci		mmu_prop = &prop->dmmu;
49562306a36Sopenharmony_ci
49662306a36Sopenharmony_ci	if ((hops->range_type == HL_VA_RANGE_TYPE_DRAM) &&
49762306a36Sopenharmony_ci			!is_power_of_2(prop->dram_page_size)) {
49862306a36Sopenharmony_ci		u64 dram_page_size, dram_base, abs_phys_addr, abs_virt_addr,
49962306a36Sopenharmony_ci			page_id, page_start;
50062306a36Sopenharmony_ci		u32 page_off;
50162306a36Sopenharmony_ci
50262306a36Sopenharmony_ci		/*
50362306a36Sopenharmony_ci		 * Bit arithmetic cannot be used for non power of two page
50462306a36Sopenharmony_ci		 * sizes. In addition, since bit arithmetic is not used,
50562306a36Sopenharmony_ci		 * we cannot ignore dram base. All that shall be considered.
50662306a36Sopenharmony_ci		 */
50762306a36Sopenharmony_ci
50862306a36Sopenharmony_ci		dram_page_size = prop->dram_page_size;
50962306a36Sopenharmony_ci		dram_base = prop->dram_base_address;
51062306a36Sopenharmony_ci		abs_phys_addr = tmp_phys_addr - dram_base;
51162306a36Sopenharmony_ci		abs_virt_addr = virt_addr - dram_base;
51262306a36Sopenharmony_ci		page_id = DIV_ROUND_DOWN_ULL(abs_phys_addr, dram_page_size);
51362306a36Sopenharmony_ci		page_start = page_id * dram_page_size;
51462306a36Sopenharmony_ci		div_u64_rem(abs_virt_addr, dram_page_size, &page_off);
51562306a36Sopenharmony_ci
51662306a36Sopenharmony_ci		*phys_addr = page_start + page_off + dram_base;
51762306a36Sopenharmony_ci	} else {
51862306a36Sopenharmony_ci		/*
51962306a36Sopenharmony_ci		 * find the correct hop shift field in hl_mmu_properties
52062306a36Sopenharmony_ci		 * structure in order to determine the right masks
52162306a36Sopenharmony_ci		 * for the page offset.
52262306a36Sopenharmony_ci		 */
52362306a36Sopenharmony_ci		hop_shift = mmu_prop->hop_shifts[hops->used_hops - 1];
52462306a36Sopenharmony_ci		offset_mask = (1ull << hop_shift) - 1;
52562306a36Sopenharmony_ci		addr_mask = ~(offset_mask);
52662306a36Sopenharmony_ci		*phys_addr = (tmp_phys_addr & addr_mask) |
52762306a36Sopenharmony_ci				(virt_addr & offset_mask);
52862306a36Sopenharmony_ci	}
52962306a36Sopenharmony_ci}
53062306a36Sopenharmony_ci
53162306a36Sopenharmony_ciint hl_mmu_va_to_pa(struct hl_ctx *ctx, u64 virt_addr, u64 *phys_addr)
53262306a36Sopenharmony_ci{
53362306a36Sopenharmony_ci	struct hl_mmu_hop_info hops;
53462306a36Sopenharmony_ci	int rc;
53562306a36Sopenharmony_ci
53662306a36Sopenharmony_ci	memset(&hops, 0, sizeof(hops));
53762306a36Sopenharmony_ci
53862306a36Sopenharmony_ci	rc = hl_mmu_get_tlb_info(ctx, virt_addr, &hops);
53962306a36Sopenharmony_ci	if (rc)
54062306a36Sopenharmony_ci		return rc;
54162306a36Sopenharmony_ci
54262306a36Sopenharmony_ci	hl_mmu_pa_page_with_offset(ctx, virt_addr, &hops,  phys_addr);
54362306a36Sopenharmony_ci
54462306a36Sopenharmony_ci	return 0;
54562306a36Sopenharmony_ci}
54662306a36Sopenharmony_ci
54762306a36Sopenharmony_ciint hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
54862306a36Sopenharmony_ci			struct hl_mmu_hop_info *hops)
54962306a36Sopenharmony_ci{
55062306a36Sopenharmony_ci	struct hl_device *hdev = ctx->hdev;
55162306a36Sopenharmony_ci	struct asic_fixed_properties *prop;
55262306a36Sopenharmony_ci	struct hl_mmu_properties *mmu_prop;
55362306a36Sopenharmony_ci	struct hl_mmu_funcs *mmu_funcs;
55462306a36Sopenharmony_ci	int pgt_residency, rc;
55562306a36Sopenharmony_ci	bool is_dram_addr;
55662306a36Sopenharmony_ci
55762306a36Sopenharmony_ci	if (hdev->mmu_disable)
55862306a36Sopenharmony_ci		return -EOPNOTSUPP;
55962306a36Sopenharmony_ci
56062306a36Sopenharmony_ci	prop = &hdev->asic_prop;
56162306a36Sopenharmony_ci	hops->scrambled_vaddr = virt_addr;      /* assume no scrambling */
56262306a36Sopenharmony_ci
56362306a36Sopenharmony_ci	is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
56462306a36Sopenharmony_ci								prop->dmmu.start_addr,
56562306a36Sopenharmony_ci								prop->dmmu.end_addr);
56662306a36Sopenharmony_ci
56762306a36Sopenharmony_ci	/* host-residency is the same in PMMU and PMMU huge, no need to distinguish here */
56862306a36Sopenharmony_ci	mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
56962306a36Sopenharmony_ci	pgt_residency = mmu_prop->host_resident ? MMU_HR_PGT : MMU_DR_PGT;
57062306a36Sopenharmony_ci	mmu_funcs = hl_mmu_get_funcs(hdev, pgt_residency, is_dram_addr);
57162306a36Sopenharmony_ci
57262306a36Sopenharmony_ci	mutex_lock(&hdev->mmu_lock);
57362306a36Sopenharmony_ci	rc = mmu_funcs->get_tlb_info(ctx, virt_addr, hops);
57462306a36Sopenharmony_ci	mutex_unlock(&hdev->mmu_lock);
57562306a36Sopenharmony_ci
57662306a36Sopenharmony_ci	if (rc)
57762306a36Sopenharmony_ci		return rc;
57862306a36Sopenharmony_ci
57962306a36Sopenharmony_ci	/* add page offset to physical address */
58062306a36Sopenharmony_ci	if (hops->unscrambled_paddr)
58162306a36Sopenharmony_ci		hl_mmu_pa_page_with_offset(ctx, virt_addr, hops, &hops->unscrambled_paddr);
58262306a36Sopenharmony_ci
58362306a36Sopenharmony_ci	return 0;
58462306a36Sopenharmony_ci}
58562306a36Sopenharmony_ci
58662306a36Sopenharmony_ciint hl_mmu_if_set_funcs(struct hl_device *hdev)
58762306a36Sopenharmony_ci{
58862306a36Sopenharmony_ci	if (hdev->mmu_disable)
58962306a36Sopenharmony_ci		return 0;
59062306a36Sopenharmony_ci
59162306a36Sopenharmony_ci	switch (hdev->asic_type) {
59262306a36Sopenharmony_ci	case ASIC_GOYA:
59362306a36Sopenharmony_ci	case ASIC_GAUDI:
59462306a36Sopenharmony_ci	case ASIC_GAUDI_SEC:
59562306a36Sopenharmony_ci		hl_mmu_v1_set_funcs(hdev, &hdev->mmu_func[MMU_DR_PGT]);
59662306a36Sopenharmony_ci		break;
59762306a36Sopenharmony_ci	case ASIC_GAUDI2:
59862306a36Sopenharmony_ci	case ASIC_GAUDI2B:
59962306a36Sopenharmony_ci	case ASIC_GAUDI2C:
60062306a36Sopenharmony_ci		/* MMUs in Gaudi2 are always host resident */
60162306a36Sopenharmony_ci		hl_mmu_v2_hr_set_funcs(hdev, &hdev->mmu_func[MMU_HR_PGT]);
60262306a36Sopenharmony_ci		break;
60362306a36Sopenharmony_ci	default:
60462306a36Sopenharmony_ci		dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
60562306a36Sopenharmony_ci			hdev->asic_type);
60662306a36Sopenharmony_ci		return -EOPNOTSUPP;
60762306a36Sopenharmony_ci	}
60862306a36Sopenharmony_ci
60962306a36Sopenharmony_ci	return 0;
61062306a36Sopenharmony_ci}
61162306a36Sopenharmony_ci
61262306a36Sopenharmony_ci/**
61362306a36Sopenharmony_ci * hl_mmu_scramble_addr() - The generic mmu address scrambling routine.
61462306a36Sopenharmony_ci * @hdev: pointer to device data.
61562306a36Sopenharmony_ci * @addr: The address to scramble.
61662306a36Sopenharmony_ci *
61762306a36Sopenharmony_ci * Return: The scrambled address.
61862306a36Sopenharmony_ci */
61962306a36Sopenharmony_ciu64 hl_mmu_scramble_addr(struct hl_device *hdev, u64 addr)
62062306a36Sopenharmony_ci{
62162306a36Sopenharmony_ci	return addr;
62262306a36Sopenharmony_ci}
62362306a36Sopenharmony_ci
62462306a36Sopenharmony_ci/**
62562306a36Sopenharmony_ci * hl_mmu_descramble_addr() - The generic mmu address descrambling
62662306a36Sopenharmony_ci * routine.
62762306a36Sopenharmony_ci * @hdev: pointer to device data.
62862306a36Sopenharmony_ci * @addr: The address to descramble.
62962306a36Sopenharmony_ci *
63062306a36Sopenharmony_ci * Return: The un-scrambled address.
63162306a36Sopenharmony_ci */
63262306a36Sopenharmony_ciu64 hl_mmu_descramble_addr(struct hl_device *hdev, u64 addr)
63362306a36Sopenharmony_ci{
63462306a36Sopenharmony_ci	return addr;
63562306a36Sopenharmony_ci}
63662306a36Sopenharmony_ci
63762306a36Sopenharmony_ciint hl_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
63862306a36Sopenharmony_ci{
63962306a36Sopenharmony_ci	int rc;
64062306a36Sopenharmony_ci
64162306a36Sopenharmony_ci	rc = hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
64262306a36Sopenharmony_ci	if (rc)
64362306a36Sopenharmony_ci		dev_err_ratelimited(hdev->dev,
64462306a36Sopenharmony_ci				"%s cache invalidation failed, rc=%d\n",
64562306a36Sopenharmony_ci				flags == VM_TYPE_USERPTR ? "PMMU" : "HMMU", rc);
64662306a36Sopenharmony_ci
64762306a36Sopenharmony_ci	return rc;
64862306a36Sopenharmony_ci}
64962306a36Sopenharmony_ci
65062306a36Sopenharmony_ciint hl_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard,
65162306a36Sopenharmony_ci					u32 flags, u32 asid, u64 va, u64 size)
65262306a36Sopenharmony_ci{
65362306a36Sopenharmony_ci	int rc;
65462306a36Sopenharmony_ci
65562306a36Sopenharmony_ci	rc = hdev->asic_funcs->mmu_invalidate_cache_range(hdev, is_hard, flags,
65662306a36Sopenharmony_ci								asid, va, size);
65762306a36Sopenharmony_ci	if (rc)
65862306a36Sopenharmony_ci		dev_err_ratelimited(hdev->dev,
65962306a36Sopenharmony_ci				"%s cache range invalidation failed: va=%#llx, size=%llu, rc=%d",
66062306a36Sopenharmony_ci				flags == VM_TYPE_USERPTR ? "PMMU" : "HMMU", va, size, rc);
66162306a36Sopenharmony_ci
66262306a36Sopenharmony_ci	return rc;
66362306a36Sopenharmony_ci}
66462306a36Sopenharmony_ci
66562306a36Sopenharmony_cistatic void hl_mmu_prefetch_work_function(struct work_struct *work)
66662306a36Sopenharmony_ci{
66762306a36Sopenharmony_ci	struct hl_prefetch_work *pfw = container_of(work, struct hl_prefetch_work, prefetch_work);
66862306a36Sopenharmony_ci	struct hl_ctx *ctx = pfw->ctx;
66962306a36Sopenharmony_ci	struct hl_device *hdev = ctx->hdev;
67062306a36Sopenharmony_ci
67162306a36Sopenharmony_ci	if (!hl_device_operational(hdev, NULL))
67262306a36Sopenharmony_ci		goto put_ctx;
67362306a36Sopenharmony_ci
67462306a36Sopenharmony_ci	mutex_lock(&hdev->mmu_lock);
67562306a36Sopenharmony_ci
67662306a36Sopenharmony_ci	hdev->asic_funcs->mmu_prefetch_cache_range(ctx, pfw->flags, pfw->asid, pfw->va, pfw->size);
67762306a36Sopenharmony_ci
67862306a36Sopenharmony_ci	mutex_unlock(&hdev->mmu_lock);
67962306a36Sopenharmony_ci
68062306a36Sopenharmony_ciput_ctx:
68162306a36Sopenharmony_ci	/*
68262306a36Sopenharmony_ci	 * context was taken in the common mmu prefetch function- see comment there about
68362306a36Sopenharmony_ci	 * context handling.
68462306a36Sopenharmony_ci	 */
68562306a36Sopenharmony_ci	hl_ctx_put(ctx);
68662306a36Sopenharmony_ci	kfree(pfw);
68762306a36Sopenharmony_ci}
68862306a36Sopenharmony_ci
68962306a36Sopenharmony_ciint hl_mmu_prefetch_cache_range(struct hl_ctx *ctx, u32 flags, u32 asid, u64 va, u64 size)
69062306a36Sopenharmony_ci{
69162306a36Sopenharmony_ci	struct hl_prefetch_work *handle_prefetch_work;
69262306a36Sopenharmony_ci
69362306a36Sopenharmony_ci	handle_prefetch_work = kmalloc(sizeof(*handle_prefetch_work), GFP_KERNEL);
69462306a36Sopenharmony_ci	if (!handle_prefetch_work)
69562306a36Sopenharmony_ci		return -ENOMEM;
69662306a36Sopenharmony_ci
69762306a36Sopenharmony_ci	INIT_WORK(&handle_prefetch_work->prefetch_work, hl_mmu_prefetch_work_function);
69862306a36Sopenharmony_ci	handle_prefetch_work->ctx = ctx;
69962306a36Sopenharmony_ci	handle_prefetch_work->va = va;
70062306a36Sopenharmony_ci	handle_prefetch_work->size = size;
70162306a36Sopenharmony_ci	handle_prefetch_work->flags = flags;
70262306a36Sopenharmony_ci	handle_prefetch_work->asid = asid;
70362306a36Sopenharmony_ci
70462306a36Sopenharmony_ci	/*
70562306a36Sopenharmony_ci	 * as actual prefetch is done in a WQ we must get the context (and put it
70662306a36Sopenharmony_ci	 * at the end of the work function)
70762306a36Sopenharmony_ci	 */
70862306a36Sopenharmony_ci	hl_ctx_get(ctx);
70962306a36Sopenharmony_ci	queue_work(ctx->hdev->prefetch_wq, &handle_prefetch_work->prefetch_work);
71062306a36Sopenharmony_ci
71162306a36Sopenharmony_ci	return 0;
71262306a36Sopenharmony_ci}
71362306a36Sopenharmony_ci
71462306a36Sopenharmony_ciu64 hl_mmu_get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte)
71562306a36Sopenharmony_ci{
71662306a36Sopenharmony_ci	return (curr_pte & PAGE_PRESENT_MASK) ? (curr_pte & HOP_PHYS_ADDR_MASK) : ULLONG_MAX;
71762306a36Sopenharmony_ci}
71862306a36Sopenharmony_ci
71962306a36Sopenharmony_ci/**
72062306a36Sopenharmony_ci * hl_mmu_get_hop_pte_phys_addr() - extract PTE address from HOP
72162306a36Sopenharmony_ci * @ctx: pointer to the context structure to initialize.
72262306a36Sopenharmony_ci * @mmu_prop: MMU properties.
72362306a36Sopenharmony_ci * @hop_idx: HOP index.
72462306a36Sopenharmony_ci * @hop_addr: HOP address.
72562306a36Sopenharmony_ci * @virt_addr: virtual address for the translation.
72662306a36Sopenharmony_ci *
72762306a36Sopenharmony_ci * @return the matching PTE value on success, otherwise U64_MAX.
72862306a36Sopenharmony_ci */
72962306a36Sopenharmony_ciu64 hl_mmu_get_hop_pte_phys_addr(struct hl_ctx *ctx, struct hl_mmu_properties *mmu_prop,
73062306a36Sopenharmony_ci					u8 hop_idx, u64 hop_addr, u64 virt_addr)
73162306a36Sopenharmony_ci{
73262306a36Sopenharmony_ci	u64 mask, shift;
73362306a36Sopenharmony_ci
73462306a36Sopenharmony_ci	if (hop_idx >= mmu_prop->num_hops) {
73562306a36Sopenharmony_ci		dev_err_ratelimited(ctx->hdev->dev, "Invalid hop index %d\n", hop_idx);
73662306a36Sopenharmony_ci		return U64_MAX;
73762306a36Sopenharmony_ci	}
73862306a36Sopenharmony_ci
73962306a36Sopenharmony_ci	shift = mmu_prop->hop_shifts[hop_idx];
74062306a36Sopenharmony_ci	mask = mmu_prop->hop_masks[hop_idx];
74162306a36Sopenharmony_ci
74262306a36Sopenharmony_ci	return hop_addr + ctx->hdev->asic_prop.mmu_pte_size * ((virt_addr & mask) >> shift);
74362306a36Sopenharmony_ci}
74462306a36Sopenharmony_ci
74562306a36Sopenharmony_cistatic void mmu_dma_mem_free_from_chunk(struct gen_pool *pool,
74662306a36Sopenharmony_ci					struct gen_pool_chunk *chunk,
74762306a36Sopenharmony_ci					void *data)
74862306a36Sopenharmony_ci{
74962306a36Sopenharmony_ci	struct hl_device *hdev = data;
75062306a36Sopenharmony_ci
75162306a36Sopenharmony_ci	hl_asic_dma_free_coherent(hdev, (chunk->end_addr - chunk->start_addr) + 1,
75262306a36Sopenharmony_ci					(void *)chunk->start_addr, chunk->phys_addr);
75362306a36Sopenharmony_ci}
75462306a36Sopenharmony_ci
75562306a36Sopenharmony_civoid hl_mmu_hr_flush(struct hl_ctx *ctx)
75662306a36Sopenharmony_ci{
75762306a36Sopenharmony_ci	/* a flush operation requires memory barrier */
75862306a36Sopenharmony_ci	mb();
75962306a36Sopenharmony_ci}
76062306a36Sopenharmony_ci
76162306a36Sopenharmony_ci/**
76262306a36Sopenharmony_ci * hl_mmu_hr_pool_destroy() - destroy genpool
76362306a36Sopenharmony_ci * @hdev: habanalabs device structure.
76462306a36Sopenharmony_ci * @hr_priv: MMU HR private data.
76562306a36Sopenharmony_ci * @hop_table_size: HOP table size.
76662306a36Sopenharmony_ci *
76762306a36Sopenharmony_ci * This function does the following:
76862306a36Sopenharmony_ci * - free entries allocated for shadow HOP0
76962306a36Sopenharmony_ci * - free pool chunks
77062306a36Sopenharmony_ci * - free pool
77162306a36Sopenharmony_ci */
77262306a36Sopenharmony_cistatic void hl_mmu_hr_pool_destroy(struct hl_device *hdev, struct hl_mmu_hr_priv *hr_priv,
77362306a36Sopenharmony_ci					u32 hop_table_size)
77462306a36Sopenharmony_ci{
77562306a36Sopenharmony_ci	struct asic_fixed_properties *prop = &hdev->asic_prop;
77662306a36Sopenharmony_ci	struct gen_pool **pool = &hr_priv->mmu_pgt_pool;
77762306a36Sopenharmony_ci	struct pgt_info *hop0_pgt;
77862306a36Sopenharmony_ci	int asid;
77962306a36Sopenharmony_ci
78062306a36Sopenharmony_ci	if (ZERO_OR_NULL_PTR(*pool))
78162306a36Sopenharmony_ci		return;
78262306a36Sopenharmony_ci
78362306a36Sopenharmony_ci	/* Free the Fixed allocation of HOPs0 */
78462306a36Sopenharmony_ci	if (hr_priv->mmu_asid_hop0) {
78562306a36Sopenharmony_ci		for (asid = 0 ; asid < prop->max_asid ; asid++) {
78662306a36Sopenharmony_ci			hop0_pgt = &hr_priv->mmu_asid_hop0[asid];
78762306a36Sopenharmony_ci			if (ZERO_OR_NULL_PTR(hop0_pgt->virt_addr))
78862306a36Sopenharmony_ci				continue;
78962306a36Sopenharmony_ci
79062306a36Sopenharmony_ci			gen_pool_free(*pool, (uintptr_t) hop0_pgt->virt_addr, hop_table_size);
79162306a36Sopenharmony_ci		}
79262306a36Sopenharmony_ci	}
79362306a36Sopenharmony_ci
79462306a36Sopenharmony_ci	gen_pool_for_each_chunk(*pool, mmu_dma_mem_free_from_chunk, hdev);
79562306a36Sopenharmony_ci	gen_pool_destroy(*pool);
79662306a36Sopenharmony_ci
79762306a36Sopenharmony_ci	/* Make sure that if we arrive here again without init was called we
79862306a36Sopenharmony_ci	 * won't cause kernel panic. This can happen for example if we fail
79962306a36Sopenharmony_ci	 * during hard reset code at certain points
80062306a36Sopenharmony_ci	 */
80162306a36Sopenharmony_ci	*pool = NULL;
80262306a36Sopenharmony_ci}
80362306a36Sopenharmony_ci
80462306a36Sopenharmony_ci/**
80562306a36Sopenharmony_ci * hl_mmu_hr_init() - initialize the MMU module.
80662306a36Sopenharmony_ci * @hdev: habanalabs device structure.
80762306a36Sopenharmony_ci * @hr_priv: MMU HR private data.
80862306a36Sopenharmony_ci * @hop_table_size: HOP table size.
80962306a36Sopenharmony_ci * @pgt_size: memory size allocated for the page table
81062306a36Sopenharmony_ci *
81162306a36Sopenharmony_ci * @return 0 on success otherwise non-zero error code
81262306a36Sopenharmony_ci *
81362306a36Sopenharmony_ci * This function does the following:
81462306a36Sopenharmony_ci * - Create a pool of pages for pgt_infos.
81562306a36Sopenharmony_ci * - Create a shadow table for pgt
81662306a36Sopenharmony_ci */
81762306a36Sopenharmony_ciint hl_mmu_hr_init(struct hl_device *hdev, struct hl_mmu_hr_priv *hr_priv, u32 hop_table_size,
81862306a36Sopenharmony_ci			u64 pgt_size)
81962306a36Sopenharmony_ci{
82062306a36Sopenharmony_ci	struct asic_fixed_properties *prop = &hdev->asic_prop;
82162306a36Sopenharmony_ci	size_t pool_chunk_size = SZ_4M;
82262306a36Sopenharmony_ci	struct pgt_info *hop0_pgt;
82362306a36Sopenharmony_ci	dma_addr_t dma_addr;
82462306a36Sopenharmony_ci	u64 virt_addr;
82562306a36Sopenharmony_ci	int i, rc;
82662306a36Sopenharmony_ci
82762306a36Sopenharmony_ci	/*
82862306a36Sopenharmony_ci	 * we set alloc size as PAGE_SIZE (sine dma_alloc_coherent allocation order/size is
82962306a36Sopenharmony_ci	 * PAGE_SHIFT/PAGE_SIZE) in order to be able to control the allocations alignment.
83062306a36Sopenharmony_ci	 * This way we can call "DMA alloc align" according to dma_alloc granularity and supply
83162306a36Sopenharmony_ci	 * allocations with higher-order alignment restrictions
83262306a36Sopenharmony_ci	 */
83362306a36Sopenharmony_ci	hr_priv->mmu_pgt_pool = gen_pool_create(PAGE_SHIFT, -1);
83462306a36Sopenharmony_ci	if (ZERO_OR_NULL_PTR(hr_priv->mmu_pgt_pool)) {
83562306a36Sopenharmony_ci		dev_err(hdev->dev, "Failed to create hr page pool\n");
83662306a36Sopenharmony_ci		return -ENOMEM;
83762306a36Sopenharmony_ci	}
83862306a36Sopenharmony_ci
83962306a36Sopenharmony_ci	hr_priv->mmu_asid_hop0 = kvcalloc(prop->max_asid, sizeof(struct pgt_info), GFP_KERNEL);
84062306a36Sopenharmony_ci	if (ZERO_OR_NULL_PTR(hr_priv->mmu_asid_hop0)) {
84162306a36Sopenharmony_ci		dev_err(hdev->dev, "Failed to allocate hr-mmu hop0 table\n");
84262306a36Sopenharmony_ci		rc = -ENOMEM;
84362306a36Sopenharmony_ci		goto destroy_mmu_pgt_pool;
84462306a36Sopenharmony_ci	}
84562306a36Sopenharmony_ci
84662306a36Sopenharmony_ci	for (i = 0 ; i < pgt_size ; i += pool_chunk_size) {
84762306a36Sopenharmony_ci		virt_addr = (uintptr_t) hl_asic_dma_alloc_coherent(hdev, pool_chunk_size,
84862306a36Sopenharmony_ci									&dma_addr,
84962306a36Sopenharmony_ci									GFP_KERNEL | __GFP_ZERO);
85062306a36Sopenharmony_ci		if (ZERO_OR_NULL_PTR(virt_addr)) {
85162306a36Sopenharmony_ci			dev_err(hdev->dev,
85262306a36Sopenharmony_ci				"Failed to allocate memory for host-resident page pool\n");
85362306a36Sopenharmony_ci			rc = -ENOMEM;
85462306a36Sopenharmony_ci			goto destroy_mmu_pgt_pool;
85562306a36Sopenharmony_ci		}
85662306a36Sopenharmony_ci
85762306a36Sopenharmony_ci		rc = gen_pool_add_virt(hr_priv->mmu_pgt_pool, virt_addr, (phys_addr_t) dma_addr,
85862306a36Sopenharmony_ci						pool_chunk_size, -1);
85962306a36Sopenharmony_ci		if (rc) {
86062306a36Sopenharmony_ci			dev_err(hdev->dev, "Failed to fill host-resident page pool\n");
86162306a36Sopenharmony_ci			goto destroy_mmu_pgt_pool;
86262306a36Sopenharmony_ci		}
86362306a36Sopenharmony_ci	}
86462306a36Sopenharmony_ci
86562306a36Sopenharmony_ci	for (i = 0 ; i < prop->max_asid ; i++) {
86662306a36Sopenharmony_ci		hop0_pgt = &hr_priv->mmu_asid_hop0[i];
86762306a36Sopenharmony_ci		hop0_pgt->virt_addr = (uintptr_t)
86862306a36Sopenharmony_ci					gen_pool_dma_zalloc_align(hr_priv->mmu_pgt_pool,
86962306a36Sopenharmony_ci								hop_table_size,
87062306a36Sopenharmony_ci								(dma_addr_t *) &hop0_pgt->phys_addr,
87162306a36Sopenharmony_ci								hop_table_size);
87262306a36Sopenharmony_ci		if (!hop0_pgt->virt_addr) {
87362306a36Sopenharmony_ci			dev_err(hdev->dev, "Failed to allocate HOP from pgt pool\n");
87462306a36Sopenharmony_ci			rc = -ENOMEM;
87562306a36Sopenharmony_ci			goto destroy_mmu_pgt_pool;
87662306a36Sopenharmony_ci		}
87762306a36Sopenharmony_ci	}
87862306a36Sopenharmony_ci
87962306a36Sopenharmony_ci	/* MMU H/W init will be done in device hw_init() */
88062306a36Sopenharmony_ci
88162306a36Sopenharmony_ci	return 0;
88262306a36Sopenharmony_ci
88362306a36Sopenharmony_cidestroy_mmu_pgt_pool:
88462306a36Sopenharmony_ci	hl_mmu_hr_pool_destroy(hdev, hr_priv, hop_table_size);
88562306a36Sopenharmony_ci	if (!ZERO_OR_NULL_PTR(hr_priv->mmu_asid_hop0))
88662306a36Sopenharmony_ci		kvfree(hr_priv->mmu_asid_hop0);
88762306a36Sopenharmony_ci
88862306a36Sopenharmony_ci	return rc;
88962306a36Sopenharmony_ci}
89062306a36Sopenharmony_ci
89162306a36Sopenharmony_ci/**
89262306a36Sopenharmony_ci * hl_mmu_hr_fini() - release the MMU module.
89362306a36Sopenharmony_ci * @hdev: habanalabs device structure.
89462306a36Sopenharmony_ci * @hr_priv: MMU host resident private info.
89562306a36Sopenharmony_ci * @hop_table_size: HOP table size
89662306a36Sopenharmony_ci *
89762306a36Sopenharmony_ci * This function does the following:
89862306a36Sopenharmony_ci * - Disable MMU in H/W.
89962306a36Sopenharmony_ci * - Free the pgt_infos pool.
90062306a36Sopenharmony_ci *
90162306a36Sopenharmony_ci * All contexts should be freed before calling this function.
90262306a36Sopenharmony_ci */
90362306a36Sopenharmony_civoid hl_mmu_hr_fini(struct hl_device *hdev, struct hl_mmu_hr_priv *hr_priv, u32 hop_table_size)
90462306a36Sopenharmony_ci{
90562306a36Sopenharmony_ci	/* MMU H/W fini was already done in device hw_fini() */
90662306a36Sopenharmony_ci
90762306a36Sopenharmony_ci	hl_mmu_hr_pool_destroy(hdev, hr_priv, hop_table_size);
90862306a36Sopenharmony_ci
90962306a36Sopenharmony_ci	if (!ZERO_OR_NULL_PTR(hr_priv->mmu_asid_hop0)) {
91062306a36Sopenharmony_ci		kvfree(hr_priv->mmu_asid_hop0);
91162306a36Sopenharmony_ci
91262306a36Sopenharmony_ci		/* Make sure that if we arrive here again without init was
91362306a36Sopenharmony_ci		 * called we won't cause kernel panic. This can happen for
91462306a36Sopenharmony_ci		 * example if we fail during hard reset code at certain points
91562306a36Sopenharmony_ci		 */
91662306a36Sopenharmony_ci		hr_priv->mmu_asid_hop0 = NULL;
91762306a36Sopenharmony_ci	}
91862306a36Sopenharmony_ci}
91962306a36Sopenharmony_ci
92062306a36Sopenharmony_ci/**
92162306a36Sopenharmony_ci * hl_mmu_hr_free_hop_remove_pgt() - free HOP and remove PGT from hash
92262306a36Sopenharmony_ci * @pgt_info: page table info structure.
92362306a36Sopenharmony_ci * @hr_priv: MMU HR private data.
92462306a36Sopenharmony_ci * @hop_table_size: HOP table size.
92562306a36Sopenharmony_ci */
92662306a36Sopenharmony_civoid hl_mmu_hr_free_hop_remove_pgt(struct pgt_info *pgt_info, struct hl_mmu_hr_priv *hr_priv,
92762306a36Sopenharmony_ci					u32 hop_table_size)
92862306a36Sopenharmony_ci{
92962306a36Sopenharmony_ci	gen_pool_free(hr_priv->mmu_pgt_pool, pgt_info->virt_addr, hop_table_size);
93062306a36Sopenharmony_ci	hash_del(&pgt_info->node);
93162306a36Sopenharmony_ci	kfree(pgt_info);
93262306a36Sopenharmony_ci}
93362306a36Sopenharmony_ci
93462306a36Sopenharmony_ci/**
93562306a36Sopenharmony_ci * hl_mmu_hr_pte_phys_to_virt() - translate PTE phys addr to virt addr
93662306a36Sopenharmony_ci * @ctx: pointer to the context structure
93762306a36Sopenharmony_ci * @pgt: pgt_info for the HOP hosting the PTE
93862306a36Sopenharmony_ci * @phys_pte_addr: phys address of the PTE
93962306a36Sopenharmony_ci * @hop_table_size: HOP table size
94062306a36Sopenharmony_ci *
94162306a36Sopenharmony_ci * @return PTE virtual address
94262306a36Sopenharmony_ci *
94362306a36Sopenharmony_ci * The function use the pgt_info to get HOP base virt addr and obtain the PTE's virt addr
94462306a36Sopenharmony_ci * by adding the PTE offset.
94562306a36Sopenharmony_ci */
94662306a36Sopenharmony_ciu64 hl_mmu_hr_pte_phys_to_virt(struct hl_ctx *ctx, struct pgt_info *pgt,
94762306a36Sopenharmony_ci							u64 phys_pte_addr, u32 hop_table_size)
94862306a36Sopenharmony_ci{
94962306a36Sopenharmony_ci	u64 page_mask = (hop_table_size - 1);
95062306a36Sopenharmony_ci	u64 pte_offset = phys_pte_addr & page_mask;
95162306a36Sopenharmony_ci
95262306a36Sopenharmony_ci	return pgt->virt_addr + pte_offset;
95362306a36Sopenharmony_ci}
95462306a36Sopenharmony_ci
95562306a36Sopenharmony_ci/**
95662306a36Sopenharmony_ci * hl_mmu_hr_write_pte() - write HR PTE
95762306a36Sopenharmony_ci * @ctx: pointer to the context structure
95862306a36Sopenharmony_ci * @pgt_info: HOP's page table info structure
95962306a36Sopenharmony_ci * @phys_pte_addr: phys PTE address
96062306a36Sopenharmony_ci * @val: raw PTE data
96162306a36Sopenharmony_ci * @hop_table_size: HOP table size
96262306a36Sopenharmony_ci */
96362306a36Sopenharmony_civoid hl_mmu_hr_write_pte(struct hl_ctx *ctx, struct pgt_info *pgt_info, u64 phys_pte_addr,
96462306a36Sopenharmony_ci								u64 val, u32 hop_table_size)
96562306a36Sopenharmony_ci{
96662306a36Sopenharmony_ci	/*
96762306a36Sopenharmony_ci	 * The value to write is the phys address of the next hop +
96862306a36Sopenharmony_ci	 * flags at the 12 LSBs.
96962306a36Sopenharmony_ci	 */
97062306a36Sopenharmony_ci	u64 virt_addr = hl_mmu_hr_pte_phys_to_virt(ctx, pgt_info, phys_pte_addr, hop_table_size);
97162306a36Sopenharmony_ci
97262306a36Sopenharmony_ci	*((u64 *) (uintptr_t) virt_addr) = val;
97362306a36Sopenharmony_ci}
97462306a36Sopenharmony_ci
97562306a36Sopenharmony_ci/**
97662306a36Sopenharmony_ci * hl_mmu_hr_clear_pte() - clear HR PTE
97762306a36Sopenharmony_ci * @ctx: pointer to the context structure
97862306a36Sopenharmony_ci * @pgt_info: HOP's page table info structure
97962306a36Sopenharmony_ci * @phys_pte_addr: phys PTE address
98062306a36Sopenharmony_ci * @hop_table_size: HOP table size
98162306a36Sopenharmony_ci */
98262306a36Sopenharmony_civoid hl_mmu_hr_clear_pte(struct hl_ctx *ctx, struct pgt_info *pgt_info, u64 phys_pte_addr,
98362306a36Sopenharmony_ci						u32 hop_table_size)
98462306a36Sopenharmony_ci{
98562306a36Sopenharmony_ci	/* no need to transform the value to physical address */
98662306a36Sopenharmony_ci	hl_mmu_hr_write_pte(ctx, pgt_info, phys_pte_addr, 0, hop_table_size);
98762306a36Sopenharmony_ci}
98862306a36Sopenharmony_ci
98962306a36Sopenharmony_ci/**
99062306a36Sopenharmony_ci * hl_mmu_hr_put_pte() - put HR PTE and remove it if necessary (no more PTEs)
99162306a36Sopenharmony_ci * @ctx: pointer to the context structure
99262306a36Sopenharmony_ci * @pgt_info: HOP's page table info structure
99362306a36Sopenharmony_ci * @hr_priv: HR MMU private info
99462306a36Sopenharmony_ci * @hop_table_size: HOP table size
99562306a36Sopenharmony_ci *
99662306a36Sopenharmony_ci * @return number of PTEs still in the HOP
99762306a36Sopenharmony_ci */
99862306a36Sopenharmony_ciint hl_mmu_hr_put_pte(struct hl_ctx *ctx, struct pgt_info *pgt_info,
99962306a36Sopenharmony_ci						struct hl_mmu_hr_priv *hr_priv,
100062306a36Sopenharmony_ci						u32 hop_table_size)
100162306a36Sopenharmony_ci{
100262306a36Sopenharmony_ci	int num_of_ptes_left;
100362306a36Sopenharmony_ci
100462306a36Sopenharmony_ci	pgt_info->num_of_ptes--;
100562306a36Sopenharmony_ci
100662306a36Sopenharmony_ci	/*
100762306a36Sopenharmony_ci	 * Need to save the number of ptes left because free_hop might free
100862306a36Sopenharmony_ci	 * the pgt_info
100962306a36Sopenharmony_ci	 */
101062306a36Sopenharmony_ci	num_of_ptes_left = pgt_info->num_of_ptes;
101162306a36Sopenharmony_ci	if (!num_of_ptes_left)
101262306a36Sopenharmony_ci		hl_mmu_hr_free_hop_remove_pgt(pgt_info, hr_priv, hop_table_size);
101362306a36Sopenharmony_ci
101462306a36Sopenharmony_ci	return num_of_ptes_left;
101562306a36Sopenharmony_ci}
101662306a36Sopenharmony_ci
101762306a36Sopenharmony_ci/**
101862306a36Sopenharmony_ci * hl_mmu_hr_get_pte() - increase PGT PTE count
101962306a36Sopenharmony_ci * @ctx: pointer to the context structure
102062306a36Sopenharmony_ci * @hr_func: host resident functions
102162306a36Sopenharmony_ci * @phys_hop_addr: HOP phys address
102262306a36Sopenharmony_ci */
102362306a36Sopenharmony_civoid hl_mmu_hr_get_pte(struct hl_ctx *ctx, struct hl_hr_mmu_funcs *hr_func, u64 phys_hop_addr)
102462306a36Sopenharmony_ci{
102562306a36Sopenharmony_ci	hr_func->get_pgt_info(ctx, phys_hop_addr)->num_of_ptes++;
102662306a36Sopenharmony_ci}
102762306a36Sopenharmony_ci
102862306a36Sopenharmony_ci/**
102962306a36Sopenharmony_ci * hl_mmu_hr_get_next_hop_pgt_info() - get pgt_info structure for the next HOP
103062306a36Sopenharmony_ci * @ctx: pointer to the context structure.
103162306a36Sopenharmony_ci * @hr_func: host resident functions.
103262306a36Sopenharmony_ci * @curr_pte: current PTE value.
103362306a36Sopenharmony_ci *
103462306a36Sopenharmony_ci * @return pgt_info structure on success, otherwise NULL.
103562306a36Sopenharmony_ci */
103662306a36Sopenharmony_cistruct pgt_info *hl_mmu_hr_get_next_hop_pgt_info(struct hl_ctx *ctx,
103762306a36Sopenharmony_ci							struct hl_hr_mmu_funcs *hr_func,
103862306a36Sopenharmony_ci							u64 curr_pte)
103962306a36Sopenharmony_ci{
104062306a36Sopenharmony_ci	u64 next_hop_phys_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte);
104162306a36Sopenharmony_ci
104262306a36Sopenharmony_ci	if (next_hop_phys_addr == ULLONG_MAX)
104362306a36Sopenharmony_ci		return NULL;
104462306a36Sopenharmony_ci
104562306a36Sopenharmony_ci	return hr_func->get_pgt_info(ctx, next_hop_phys_addr);
104662306a36Sopenharmony_ci}
104762306a36Sopenharmony_ci
104862306a36Sopenharmony_ci/**
104962306a36Sopenharmony_ci * hl_mmu_hr_alloc_hop() - allocate HOP
105062306a36Sopenharmony_ci * @ctx: pointer to the context structure.
105162306a36Sopenharmony_ci * @hr_priv: host resident private info structure.
105262306a36Sopenharmony_ci * @hr_func: host resident functions.
105362306a36Sopenharmony_ci * @mmu_prop: MMU properties.
105462306a36Sopenharmony_ci *
105562306a36Sopenharmony_ci * @return pgt_info structure associated with the allocated HOP on success, otherwise NULL.
105662306a36Sopenharmony_ci */
105762306a36Sopenharmony_cistruct pgt_info *hl_mmu_hr_alloc_hop(struct hl_ctx *ctx, struct hl_mmu_hr_priv *hr_priv,
105862306a36Sopenharmony_ci							struct hl_hr_mmu_funcs *hr_func,
105962306a36Sopenharmony_ci							struct hl_mmu_properties *mmu_prop)
106062306a36Sopenharmony_ci{
106162306a36Sopenharmony_ci	struct hl_device *hdev = ctx->hdev;
106262306a36Sopenharmony_ci	struct pgt_info *pgt_info;
106362306a36Sopenharmony_ci	dma_addr_t phys_addr;
106462306a36Sopenharmony_ci	void *virt_addr;
106562306a36Sopenharmony_ci	int i, retry = 1;
106662306a36Sopenharmony_ci
106762306a36Sopenharmony_ci	pgt_info = kmalloc(sizeof(*pgt_info), GFP_KERNEL);
106862306a36Sopenharmony_ci	if (!pgt_info)
106962306a36Sopenharmony_ci		return NULL;
107062306a36Sopenharmony_ci
107162306a36Sopenharmony_ci	for (i = 0; i <= retry; i++) {
107262306a36Sopenharmony_ci		virt_addr = gen_pool_dma_zalloc_align(hr_priv->mmu_pgt_pool,
107362306a36Sopenharmony_ci							mmu_prop->hop_table_size,
107462306a36Sopenharmony_ci							&phys_addr,
107562306a36Sopenharmony_ci							mmu_prop->hop_table_size);
107662306a36Sopenharmony_ci		if (virt_addr)
107762306a36Sopenharmony_ci			break;
107862306a36Sopenharmony_ci
107962306a36Sopenharmony_ci		/* No memory in pool - get some and try again */
108062306a36Sopenharmony_ci		virt_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &phys_addr,
108162306a36Sopenharmony_ci							GFP_KERNEL | __GFP_ZERO);
108262306a36Sopenharmony_ci		if (ZERO_OR_NULL_PTR(virt_addr))
108362306a36Sopenharmony_ci			break;
108462306a36Sopenharmony_ci
108562306a36Sopenharmony_ci		if (gen_pool_add_virt(hr_priv->mmu_pgt_pool, (unsigned long)virt_addr,
108662306a36Sopenharmony_ci								phys_addr, SZ_2M, -1)) {
108762306a36Sopenharmony_ci			hl_asic_dma_free_coherent(hdev, SZ_2M, virt_addr, phys_addr);
108862306a36Sopenharmony_ci			virt_addr = NULL;
108962306a36Sopenharmony_ci			break;
109062306a36Sopenharmony_ci		}
109162306a36Sopenharmony_ci	}
109262306a36Sopenharmony_ci
109362306a36Sopenharmony_ci	if (ZERO_OR_NULL_PTR(virt_addr)) {
109462306a36Sopenharmony_ci		dev_err(hdev->dev, "failed to allocate page\n");
109562306a36Sopenharmony_ci		goto pool_alloc_err;
109662306a36Sopenharmony_ci	}
109762306a36Sopenharmony_ci
109862306a36Sopenharmony_ci	pgt_info->phys_addr = phys_addr;
109962306a36Sopenharmony_ci	pgt_info->shadow_addr = (unsigned long) NULL;
110062306a36Sopenharmony_ci	pgt_info->virt_addr = (unsigned long)virt_addr;
110162306a36Sopenharmony_ci	pgt_info->ctx = ctx;
110262306a36Sopenharmony_ci	pgt_info->num_of_ptes = 0;
110362306a36Sopenharmony_ci	hr_func->add_pgt_info(ctx, pgt_info, phys_addr);
110462306a36Sopenharmony_ci
110562306a36Sopenharmony_ci	return pgt_info;
110662306a36Sopenharmony_ci
110762306a36Sopenharmony_cipool_alloc_err:
110862306a36Sopenharmony_ci	kfree(pgt_info);
110962306a36Sopenharmony_ci
111062306a36Sopenharmony_ci	return NULL;
111162306a36Sopenharmony_ci}
111262306a36Sopenharmony_ci
111362306a36Sopenharmony_ci/**
111462306a36Sopenharmony_ci * hl_mmu_hr_get_alloc_next_hop() - get the next HOP, allocate it if it does not exist
111562306a36Sopenharmony_ci * @ctx: pointer to the context structure.
111662306a36Sopenharmony_ci * @hr_priv: host resident private info structure.
111762306a36Sopenharmony_ci * @hr_func: host resident functions.
111862306a36Sopenharmony_ci * @mmu_prop: MMU properties.
111962306a36Sopenharmony_ci * @curr_pte: current PTE value.
112062306a36Sopenharmony_ci * @is_new_hop: set to true if HOP is new (caller responsibility to set it to false).
112162306a36Sopenharmony_ci *
112262306a36Sopenharmony_ci * @return pgt_info structure associated with the allocated HOP on success, otherwise NULL.
112362306a36Sopenharmony_ci */
112462306a36Sopenharmony_cistruct pgt_info *hl_mmu_hr_get_alloc_next_hop(struct hl_ctx *ctx,
112562306a36Sopenharmony_ci							struct hl_mmu_hr_priv *hr_priv,
112662306a36Sopenharmony_ci							struct hl_hr_mmu_funcs *hr_func,
112762306a36Sopenharmony_ci							struct hl_mmu_properties *mmu_prop,
112862306a36Sopenharmony_ci							u64 curr_pte, bool *is_new_hop)
112962306a36Sopenharmony_ci{
113062306a36Sopenharmony_ci	u64 hop_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte);
113162306a36Sopenharmony_ci
113262306a36Sopenharmony_ci	if (hop_addr != ULLONG_MAX)
113362306a36Sopenharmony_ci		return hr_func->get_pgt_info(ctx, hop_addr);
113462306a36Sopenharmony_ci
113562306a36Sopenharmony_ci	*is_new_hop = true;
113662306a36Sopenharmony_ci	return hl_mmu_hr_alloc_hop(ctx, hr_priv, hr_func, mmu_prop);
113762306a36Sopenharmony_ci}
113862306a36Sopenharmony_ci
113962306a36Sopenharmony_ci/**
114062306a36Sopenharmony_ci * hl_mmu_hr_get_tlb_info() - get the TLB info (info for a specific mapping)
114162306a36Sopenharmony_ci * @ctx: pointer to the context structure.
114262306a36Sopenharmony_ci * @virt_addr: the virt address for which to get info.
114362306a36Sopenharmony_ci * @hops: HOPs info structure.
114462306a36Sopenharmony_ci * @hr_func: host resident functions.
114562306a36Sopenharmony_ci *
114662306a36Sopenharmony_ci * @return 0 on success, otherwise non 0 error code..
114762306a36Sopenharmony_ci */
114862306a36Sopenharmony_ciint hl_mmu_hr_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, struct hl_mmu_hop_info *hops,
114962306a36Sopenharmony_ci								struct hl_hr_mmu_funcs *hr_func)
115062306a36Sopenharmony_ci{
115162306a36Sopenharmony_ci	/* using 6 HOPs as this is the maximum number of HOPs */
115262306a36Sopenharmony_ci	struct pgt_info *hops_pgt_info[MMU_ARCH_6_HOPS] = { NULL };
115362306a36Sopenharmony_ci	struct hl_device *hdev = ctx->hdev;
115462306a36Sopenharmony_ci	struct hl_mmu_properties *mmu_prop;
115562306a36Sopenharmony_ci	int rc, i, used_hops;
115662306a36Sopenharmony_ci	bool is_huge;
115762306a36Sopenharmony_ci
115862306a36Sopenharmony_ci	rc = hr_func->get_tlb_mapping_params(hdev, &mmu_prop, hops, virt_addr, &is_huge);
115962306a36Sopenharmony_ci	if (rc)
116062306a36Sopenharmony_ci		return rc;
116162306a36Sopenharmony_ci
116262306a36Sopenharmony_ci	used_hops = mmu_prop->num_hops;
116362306a36Sopenharmony_ci
116462306a36Sopenharmony_ci	/* huge pages use one less hop */
116562306a36Sopenharmony_ci	if (is_huge)
116662306a36Sopenharmony_ci		used_hops--;
116762306a36Sopenharmony_ci
116862306a36Sopenharmony_ci	hops->scrambled_vaddr = hdev->asic_funcs->scramble_addr(hdev, virt_addr);
116962306a36Sopenharmony_ci
117062306a36Sopenharmony_ci	for (i = 0 ; i < used_hops ; i++) {
117162306a36Sopenharmony_ci		if (i == 0)
117262306a36Sopenharmony_ci			hops_pgt_info[i] = hr_func->get_hop0_pgt_info(ctx);
117362306a36Sopenharmony_ci		else
117462306a36Sopenharmony_ci			hops_pgt_info[i] = hl_mmu_hr_get_next_hop_pgt_info(ctx, hr_func,
117562306a36Sopenharmony_ci								hops->hop_info[i - 1].hop_pte_val);
117662306a36Sopenharmony_ci
117762306a36Sopenharmony_ci		if (!hops_pgt_info[i])
117862306a36Sopenharmony_ci			return -EFAULT;
117962306a36Sopenharmony_ci
118062306a36Sopenharmony_ci		hops->hop_info[i].hop_addr = hops_pgt_info[i]->phys_addr;
118162306a36Sopenharmony_ci		hops->hop_info[i].hop_pte_addr =
118262306a36Sopenharmony_ci				hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, i,
118362306a36Sopenharmony_ci								hops->hop_info[i].hop_addr,
118462306a36Sopenharmony_ci								hops->scrambled_vaddr);
118562306a36Sopenharmony_ci		hops->hop_info[i].hop_pte_val = *(u64 *) (uintptr_t)
118662306a36Sopenharmony_ci						hl_mmu_hr_pte_phys_to_virt(ctx, hops_pgt_info[i],
118762306a36Sopenharmony_ci								hops->hop_info[i].hop_pte_addr,
118862306a36Sopenharmony_ci								mmu_prop->hop_table_size);
118962306a36Sopenharmony_ci
119062306a36Sopenharmony_ci		if (!(hops->hop_info[i].hop_pte_val & PAGE_PRESENT_MASK))
119162306a36Sopenharmony_ci			return -EFAULT;
119262306a36Sopenharmony_ci
119362306a36Sopenharmony_ci		if (hops->hop_info[i].hop_pte_val & mmu_prop->last_mask)
119462306a36Sopenharmony_ci			break;
119562306a36Sopenharmony_ci	}
119662306a36Sopenharmony_ci
119762306a36Sopenharmony_ci	/* if passed over all hops then no last hop was found */
119862306a36Sopenharmony_ci	if (i == mmu_prop->num_hops)
119962306a36Sopenharmony_ci		return -EFAULT;
120062306a36Sopenharmony_ci
120162306a36Sopenharmony_ci	if (hops->scrambled_vaddr != virt_addr)
120262306a36Sopenharmony_ci		hops->unscrambled_paddr = hdev->asic_funcs->descramble_addr
120362306a36Sopenharmony_ci				(hdev, hops->hop_info[i].hop_pte_val);
120462306a36Sopenharmony_ci	else
120562306a36Sopenharmony_ci		hops->unscrambled_paddr = hops->hop_info[i].hop_pte_val;
120662306a36Sopenharmony_ci
120762306a36Sopenharmony_ci	hops->used_hops = i + 1;
120862306a36Sopenharmony_ci
120962306a36Sopenharmony_ci	return 0;
121062306a36Sopenharmony_ci}
121162306a36Sopenharmony_ci
1212