18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
28c2ecf20Sopenharmony_ci/* Copyright (c) 2019 HiSilicon Limited. */
38c2ecf20Sopenharmony_ci#include <linux/dma-mapping.h>
48c2ecf20Sopenharmony_ci#include <linux/module.h>
58c2ecf20Sopenharmony_ci#include <linux/slab.h>
68c2ecf20Sopenharmony_ci#include "qm.h"
78c2ecf20Sopenharmony_ci
88c2ecf20Sopenharmony_ci#define HISI_ACC_SGL_SGE_NR_MIN		1
98c2ecf20Sopenharmony_ci#define HISI_ACC_SGL_NR_MAX		256
108c2ecf20Sopenharmony_ci#define HISI_ACC_SGL_ALIGN_SIZE		64
118c2ecf20Sopenharmony_ci#define HISI_ACC_MEM_BLOCK_NR		5
128c2ecf20Sopenharmony_ci
138c2ecf20Sopenharmony_cistruct acc_hw_sge {
148c2ecf20Sopenharmony_ci	dma_addr_t buf;
158c2ecf20Sopenharmony_ci	void *page_ctrl;
168c2ecf20Sopenharmony_ci	__le32 len;
178c2ecf20Sopenharmony_ci	__le32 pad;
188c2ecf20Sopenharmony_ci	__le32 pad0;
198c2ecf20Sopenharmony_ci	__le32 pad1;
208c2ecf20Sopenharmony_ci};
218c2ecf20Sopenharmony_ci
228c2ecf20Sopenharmony_ci/* use default sgl head size 64B */
238c2ecf20Sopenharmony_cistruct hisi_acc_hw_sgl {
248c2ecf20Sopenharmony_ci	dma_addr_t next_dma;
258c2ecf20Sopenharmony_ci	__le16 entry_sum_in_chain;
268c2ecf20Sopenharmony_ci	__le16 entry_sum_in_sgl;
278c2ecf20Sopenharmony_ci	__le16 entry_length_in_sgl;
288c2ecf20Sopenharmony_ci	__le16 pad0;
298c2ecf20Sopenharmony_ci	__le64 pad1[5];
308c2ecf20Sopenharmony_ci	struct hisi_acc_hw_sgl *next;
318c2ecf20Sopenharmony_ci	struct acc_hw_sge sge_entries[];
328c2ecf20Sopenharmony_ci} __aligned(1);
338c2ecf20Sopenharmony_ci
348c2ecf20Sopenharmony_cistruct hisi_acc_sgl_pool {
358c2ecf20Sopenharmony_ci	struct mem_block {
368c2ecf20Sopenharmony_ci		struct hisi_acc_hw_sgl *sgl;
378c2ecf20Sopenharmony_ci		dma_addr_t sgl_dma;
388c2ecf20Sopenharmony_ci		size_t size;
398c2ecf20Sopenharmony_ci	} mem_block[HISI_ACC_MEM_BLOCK_NR];
408c2ecf20Sopenharmony_ci	u32 sgl_num_per_block;
418c2ecf20Sopenharmony_ci	u32 block_num;
428c2ecf20Sopenharmony_ci	u32 count;
438c2ecf20Sopenharmony_ci	u32 sge_nr;
448c2ecf20Sopenharmony_ci	size_t sgl_size;
458c2ecf20Sopenharmony_ci};
468c2ecf20Sopenharmony_ci
478c2ecf20Sopenharmony_ci/**
488c2ecf20Sopenharmony_ci * hisi_acc_create_sgl_pool() - Create a hw sgl pool.
498c2ecf20Sopenharmony_ci * @dev: The device which hw sgl pool belongs to.
508c2ecf20Sopenharmony_ci * @count: Count of hisi_acc_hw_sgl in pool.
518c2ecf20Sopenharmony_ci * @sge_nr: The count of sge in hw_sgl
528c2ecf20Sopenharmony_ci *
538c2ecf20Sopenharmony_ci * This function creates a hw sgl pool, after this user can get hw sgl memory
548c2ecf20Sopenharmony_ci * from it.
558c2ecf20Sopenharmony_ci */
568c2ecf20Sopenharmony_cistruct hisi_acc_sgl_pool *hisi_acc_create_sgl_pool(struct device *dev,
578c2ecf20Sopenharmony_ci						   u32 count, u32 sge_nr)
588c2ecf20Sopenharmony_ci{
598c2ecf20Sopenharmony_ci	u32 sgl_size, block_size, sgl_num_per_block, block_num, remain_sgl = 0;
608c2ecf20Sopenharmony_ci	struct hisi_acc_sgl_pool *pool;
618c2ecf20Sopenharmony_ci	struct mem_block *block;
628c2ecf20Sopenharmony_ci	u32 i, j;
638c2ecf20Sopenharmony_ci
648c2ecf20Sopenharmony_ci	if (!dev || !count || !sge_nr || sge_nr > HISI_ACC_SGL_SGE_NR_MAX)
658c2ecf20Sopenharmony_ci		return ERR_PTR(-EINVAL);
668c2ecf20Sopenharmony_ci
678c2ecf20Sopenharmony_ci	sgl_size = sizeof(struct acc_hw_sge) * sge_nr +
688c2ecf20Sopenharmony_ci		   sizeof(struct hisi_acc_hw_sgl);
698c2ecf20Sopenharmony_ci	block_size = 1 << (PAGE_SHIFT + MAX_ORDER <= 32 ?
708c2ecf20Sopenharmony_ci			   PAGE_SHIFT + MAX_ORDER - 1 : 31);
718c2ecf20Sopenharmony_ci	sgl_num_per_block = block_size / sgl_size;
728c2ecf20Sopenharmony_ci	block_num = count / sgl_num_per_block;
738c2ecf20Sopenharmony_ci	remain_sgl = count % sgl_num_per_block;
748c2ecf20Sopenharmony_ci
758c2ecf20Sopenharmony_ci	if ((!remain_sgl && block_num > HISI_ACC_MEM_BLOCK_NR) ||
768c2ecf20Sopenharmony_ci	    (remain_sgl > 0 && block_num > HISI_ACC_MEM_BLOCK_NR - 1))
778c2ecf20Sopenharmony_ci		return ERR_PTR(-EINVAL);
788c2ecf20Sopenharmony_ci
798c2ecf20Sopenharmony_ci	pool = kzalloc(sizeof(*pool), GFP_KERNEL);
808c2ecf20Sopenharmony_ci	if (!pool)
818c2ecf20Sopenharmony_ci		return ERR_PTR(-ENOMEM);
828c2ecf20Sopenharmony_ci	block = pool->mem_block;
838c2ecf20Sopenharmony_ci
848c2ecf20Sopenharmony_ci	for (i = 0; i < block_num; i++) {
858c2ecf20Sopenharmony_ci		block[i].sgl = dma_alloc_coherent(dev, block_size,
868c2ecf20Sopenharmony_ci						  &block[i].sgl_dma,
878c2ecf20Sopenharmony_ci						  GFP_KERNEL);
888c2ecf20Sopenharmony_ci		if (!block[i].sgl)
898c2ecf20Sopenharmony_ci			goto err_free_mem;
908c2ecf20Sopenharmony_ci
918c2ecf20Sopenharmony_ci		block[i].size = block_size;
928c2ecf20Sopenharmony_ci	}
938c2ecf20Sopenharmony_ci
948c2ecf20Sopenharmony_ci	if (remain_sgl > 0) {
958c2ecf20Sopenharmony_ci		block[i].sgl = dma_alloc_coherent(dev, remain_sgl * sgl_size,
968c2ecf20Sopenharmony_ci						  &block[i].sgl_dma,
978c2ecf20Sopenharmony_ci						  GFP_KERNEL);
988c2ecf20Sopenharmony_ci		if (!block[i].sgl)
998c2ecf20Sopenharmony_ci			goto err_free_mem;
1008c2ecf20Sopenharmony_ci
1018c2ecf20Sopenharmony_ci		block[i].size = remain_sgl * sgl_size;
1028c2ecf20Sopenharmony_ci	}
1038c2ecf20Sopenharmony_ci
1048c2ecf20Sopenharmony_ci	pool->sgl_num_per_block = sgl_num_per_block;
1058c2ecf20Sopenharmony_ci	pool->block_num = remain_sgl ? block_num + 1 : block_num;
1068c2ecf20Sopenharmony_ci	pool->count = count;
1078c2ecf20Sopenharmony_ci	pool->sgl_size = sgl_size;
1088c2ecf20Sopenharmony_ci	pool->sge_nr = sge_nr;
1098c2ecf20Sopenharmony_ci
1108c2ecf20Sopenharmony_ci	return pool;
1118c2ecf20Sopenharmony_ci
1128c2ecf20Sopenharmony_cierr_free_mem:
1138c2ecf20Sopenharmony_ci	for (j = 0; j < i; j++) {
1148c2ecf20Sopenharmony_ci		dma_free_coherent(dev, block_size, block[j].sgl,
1158c2ecf20Sopenharmony_ci				  block[j].sgl_dma);
1168c2ecf20Sopenharmony_ci	}
1178c2ecf20Sopenharmony_ci	kfree_sensitive(pool);
1188c2ecf20Sopenharmony_ci	return ERR_PTR(-ENOMEM);
1198c2ecf20Sopenharmony_ci}
1208c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(hisi_acc_create_sgl_pool);
1218c2ecf20Sopenharmony_ci
1228c2ecf20Sopenharmony_ci/**
1238c2ecf20Sopenharmony_ci * hisi_acc_free_sgl_pool() - Free a hw sgl pool.
1248c2ecf20Sopenharmony_ci * @dev: The device which hw sgl pool belongs to.
1258c2ecf20Sopenharmony_ci * @pool: Pointer of pool.
1268c2ecf20Sopenharmony_ci *
1278c2ecf20Sopenharmony_ci * This function frees memory of a hw sgl pool.
1288c2ecf20Sopenharmony_ci */
1298c2ecf20Sopenharmony_civoid hisi_acc_free_sgl_pool(struct device *dev, struct hisi_acc_sgl_pool *pool)
1308c2ecf20Sopenharmony_ci{
1318c2ecf20Sopenharmony_ci	struct mem_block *block;
1328c2ecf20Sopenharmony_ci	int i;
1338c2ecf20Sopenharmony_ci
1348c2ecf20Sopenharmony_ci	if (!dev || !pool)
1358c2ecf20Sopenharmony_ci		return;
1368c2ecf20Sopenharmony_ci
1378c2ecf20Sopenharmony_ci	block = pool->mem_block;
1388c2ecf20Sopenharmony_ci
1398c2ecf20Sopenharmony_ci	for (i = 0; i < pool->block_num; i++)
1408c2ecf20Sopenharmony_ci		dma_free_coherent(dev, block[i].size, block[i].sgl,
1418c2ecf20Sopenharmony_ci				  block[i].sgl_dma);
1428c2ecf20Sopenharmony_ci
1438c2ecf20Sopenharmony_ci	kfree(pool);
1448c2ecf20Sopenharmony_ci}
1458c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(hisi_acc_free_sgl_pool);
1468c2ecf20Sopenharmony_ci
1478c2ecf20Sopenharmony_cistatic struct hisi_acc_hw_sgl *acc_get_sgl(struct hisi_acc_sgl_pool *pool,
1488c2ecf20Sopenharmony_ci					   u32 index, dma_addr_t *hw_sgl_dma)
1498c2ecf20Sopenharmony_ci{
1508c2ecf20Sopenharmony_ci	struct mem_block *block;
1518c2ecf20Sopenharmony_ci	u32 block_index, offset;
1528c2ecf20Sopenharmony_ci
1538c2ecf20Sopenharmony_ci	if (!pool || !hw_sgl_dma || index >= pool->count)
1548c2ecf20Sopenharmony_ci		return ERR_PTR(-EINVAL);
1558c2ecf20Sopenharmony_ci
1568c2ecf20Sopenharmony_ci	block = pool->mem_block;
1578c2ecf20Sopenharmony_ci	block_index = index / pool->sgl_num_per_block;
1588c2ecf20Sopenharmony_ci	offset = index % pool->sgl_num_per_block;
1598c2ecf20Sopenharmony_ci
1608c2ecf20Sopenharmony_ci	*hw_sgl_dma = block[block_index].sgl_dma + pool->sgl_size * offset;
1618c2ecf20Sopenharmony_ci	return (void *)block[block_index].sgl + pool->sgl_size * offset;
1628c2ecf20Sopenharmony_ci}
1638c2ecf20Sopenharmony_ci
1648c2ecf20Sopenharmony_cistatic void sg_map_to_hw_sg(struct scatterlist *sgl,
1658c2ecf20Sopenharmony_ci			    struct acc_hw_sge *hw_sge)
1668c2ecf20Sopenharmony_ci{
1678c2ecf20Sopenharmony_ci	hw_sge->buf = sg_dma_address(sgl);
1688c2ecf20Sopenharmony_ci	hw_sge->len = cpu_to_le32(sg_dma_len(sgl));
1698c2ecf20Sopenharmony_ci}
1708c2ecf20Sopenharmony_ci
1718c2ecf20Sopenharmony_cistatic void inc_hw_sgl_sge(struct hisi_acc_hw_sgl *hw_sgl)
1728c2ecf20Sopenharmony_ci{
1738c2ecf20Sopenharmony_ci	u16 var = le16_to_cpu(hw_sgl->entry_sum_in_sgl);
1748c2ecf20Sopenharmony_ci
1758c2ecf20Sopenharmony_ci	var++;
1768c2ecf20Sopenharmony_ci	hw_sgl->entry_sum_in_sgl = cpu_to_le16(var);
1778c2ecf20Sopenharmony_ci}
1788c2ecf20Sopenharmony_ci
1798c2ecf20Sopenharmony_cistatic void update_hw_sgl_sum_sge(struct hisi_acc_hw_sgl *hw_sgl, u16 sum)
1808c2ecf20Sopenharmony_ci{
1818c2ecf20Sopenharmony_ci	hw_sgl->entry_sum_in_chain = cpu_to_le16(sum);
1828c2ecf20Sopenharmony_ci}
1838c2ecf20Sopenharmony_ci
1848c2ecf20Sopenharmony_ci/**
1858c2ecf20Sopenharmony_ci * hisi_acc_sg_buf_map_to_hw_sgl - Map a scatterlist to a hw sgl.
1868c2ecf20Sopenharmony_ci * @dev: The device which hw sgl belongs to.
1878c2ecf20Sopenharmony_ci * @sgl: Scatterlist which will be mapped to hw sgl.
1888c2ecf20Sopenharmony_ci * @pool: Pool which hw sgl memory will be allocated in.
1898c2ecf20Sopenharmony_ci * @index: Index of hisi_acc_hw_sgl in pool.
1908c2ecf20Sopenharmony_ci * @hw_sgl_dma: The dma address of allocated hw sgl.
1918c2ecf20Sopenharmony_ci *
1928c2ecf20Sopenharmony_ci * This function builds hw sgl according input sgl, user can use hw_sgl_dma
1938c2ecf20Sopenharmony_ci * as src/dst in its BD. Only support single hw sgl currently.
1948c2ecf20Sopenharmony_ci */
1958c2ecf20Sopenharmony_cistruct hisi_acc_hw_sgl *
1968c2ecf20Sopenharmony_cihisi_acc_sg_buf_map_to_hw_sgl(struct device *dev,
1978c2ecf20Sopenharmony_ci			      struct scatterlist *sgl,
1988c2ecf20Sopenharmony_ci			      struct hisi_acc_sgl_pool *pool,
1998c2ecf20Sopenharmony_ci			      u32 index, dma_addr_t *hw_sgl_dma)
2008c2ecf20Sopenharmony_ci{
2018c2ecf20Sopenharmony_ci	struct hisi_acc_hw_sgl *curr_hw_sgl;
2028c2ecf20Sopenharmony_ci	dma_addr_t curr_sgl_dma = 0;
2038c2ecf20Sopenharmony_ci	struct acc_hw_sge *curr_hw_sge;
2048c2ecf20Sopenharmony_ci	struct scatterlist *sg;
2058c2ecf20Sopenharmony_ci	int i, sg_n, sg_n_mapped;
2068c2ecf20Sopenharmony_ci
2078c2ecf20Sopenharmony_ci	if (!dev || !sgl || !pool || !hw_sgl_dma)
2088c2ecf20Sopenharmony_ci		return ERR_PTR(-EINVAL);
2098c2ecf20Sopenharmony_ci
2108c2ecf20Sopenharmony_ci	sg_n = sg_nents(sgl);
2118c2ecf20Sopenharmony_ci
2128c2ecf20Sopenharmony_ci	sg_n_mapped = dma_map_sg(dev, sgl, sg_n, DMA_BIDIRECTIONAL);
2138c2ecf20Sopenharmony_ci	if (!sg_n_mapped)
2148c2ecf20Sopenharmony_ci		return ERR_PTR(-EINVAL);
2158c2ecf20Sopenharmony_ci
2168c2ecf20Sopenharmony_ci	if (sg_n_mapped > pool->sge_nr) {
2178c2ecf20Sopenharmony_ci		dma_unmap_sg(dev, sgl, sg_n, DMA_BIDIRECTIONAL);
2188c2ecf20Sopenharmony_ci		return ERR_PTR(-EINVAL);
2198c2ecf20Sopenharmony_ci	}
2208c2ecf20Sopenharmony_ci
2218c2ecf20Sopenharmony_ci	curr_hw_sgl = acc_get_sgl(pool, index, &curr_sgl_dma);
2228c2ecf20Sopenharmony_ci	if (IS_ERR(curr_hw_sgl)) {
2238c2ecf20Sopenharmony_ci		dma_unmap_sg(dev, sgl, sg_n, DMA_BIDIRECTIONAL);
2248c2ecf20Sopenharmony_ci		return ERR_PTR(-ENOMEM);
2258c2ecf20Sopenharmony_ci
2268c2ecf20Sopenharmony_ci	}
2278c2ecf20Sopenharmony_ci	curr_hw_sgl->entry_length_in_sgl = cpu_to_le16(pool->sge_nr);
2288c2ecf20Sopenharmony_ci	curr_hw_sge = curr_hw_sgl->sge_entries;
2298c2ecf20Sopenharmony_ci
2308c2ecf20Sopenharmony_ci	for_each_sg(sgl, sg, sg_n_mapped, i) {
2318c2ecf20Sopenharmony_ci		sg_map_to_hw_sg(sg, curr_hw_sge);
2328c2ecf20Sopenharmony_ci		inc_hw_sgl_sge(curr_hw_sgl);
2338c2ecf20Sopenharmony_ci		curr_hw_sge++;
2348c2ecf20Sopenharmony_ci	}
2358c2ecf20Sopenharmony_ci
2368c2ecf20Sopenharmony_ci	update_hw_sgl_sum_sge(curr_hw_sgl, pool->sge_nr);
2378c2ecf20Sopenharmony_ci	*hw_sgl_dma = curr_sgl_dma;
2388c2ecf20Sopenharmony_ci
2398c2ecf20Sopenharmony_ci	return curr_hw_sgl;
2408c2ecf20Sopenharmony_ci}
2418c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(hisi_acc_sg_buf_map_to_hw_sgl);
2428c2ecf20Sopenharmony_ci
2438c2ecf20Sopenharmony_ci/**
2448c2ecf20Sopenharmony_ci * hisi_acc_sg_buf_unmap() - Unmap allocated hw sgl.
2458c2ecf20Sopenharmony_ci * @dev: The device which hw sgl belongs to.
2468c2ecf20Sopenharmony_ci * @sgl: Related scatterlist.
2478c2ecf20Sopenharmony_ci * @hw_sgl: Virtual address of hw sgl.
2488c2ecf20Sopenharmony_ci * @hw_sgl_dma: DMA address of hw sgl.
2498c2ecf20Sopenharmony_ci * @pool: Pool which hw sgl is allocated in.
2508c2ecf20Sopenharmony_ci *
2518c2ecf20Sopenharmony_ci * This function unmaps allocated hw sgl.
2528c2ecf20Sopenharmony_ci */
2538c2ecf20Sopenharmony_civoid hisi_acc_sg_buf_unmap(struct device *dev, struct scatterlist *sgl,
2548c2ecf20Sopenharmony_ci			   struct hisi_acc_hw_sgl *hw_sgl)
2558c2ecf20Sopenharmony_ci{
2568c2ecf20Sopenharmony_ci	if (!dev || !sgl || !hw_sgl)
2578c2ecf20Sopenharmony_ci		return;
2588c2ecf20Sopenharmony_ci
2598c2ecf20Sopenharmony_ci	dma_unmap_sg(dev, sgl, sg_nents(sgl), DMA_BIDIRECTIONAL);
2608c2ecf20Sopenharmony_ci
2618c2ecf20Sopenharmony_ci	hw_sgl->entry_sum_in_chain = 0;
2628c2ecf20Sopenharmony_ci	hw_sgl->entry_sum_in_sgl = 0;
2638c2ecf20Sopenharmony_ci	hw_sgl->entry_length_in_sgl = 0;
2648c2ecf20Sopenharmony_ci}
2658c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(hisi_acc_sg_buf_unmap);
266