162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/* Copyright (c) 2019 HiSilicon Limited. */
362306a36Sopenharmony_ci#include <linux/align.h>
462306a36Sopenharmony_ci#include <linux/dma-mapping.h>
562306a36Sopenharmony_ci#include <linux/hisi_acc_qm.h>
662306a36Sopenharmony_ci#include <linux/module.h>
762306a36Sopenharmony_ci#include <linux/slab.h>
862306a36Sopenharmony_ci
962306a36Sopenharmony_ci#define HISI_ACC_SGL_SGE_NR_MIN		1
1062306a36Sopenharmony_ci#define HISI_ACC_SGL_NR_MAX		256
1162306a36Sopenharmony_ci#define HISI_ACC_SGL_ALIGN_SIZE		64
1262306a36Sopenharmony_ci#define HISI_ACC_MEM_BLOCK_NR		5
1362306a36Sopenharmony_ci
1462306a36Sopenharmony_cistruct acc_hw_sge {
1562306a36Sopenharmony_ci	dma_addr_t buf;
1662306a36Sopenharmony_ci	void *page_ctrl;
1762306a36Sopenharmony_ci	__le32 len;
1862306a36Sopenharmony_ci	__le32 pad;
1962306a36Sopenharmony_ci	__le32 pad0;
2062306a36Sopenharmony_ci	__le32 pad1;
2162306a36Sopenharmony_ci};
2262306a36Sopenharmony_ci
2362306a36Sopenharmony_ci/* use default sgl head size 64B */
2462306a36Sopenharmony_cistruct hisi_acc_hw_sgl {
2562306a36Sopenharmony_ci	dma_addr_t next_dma;
2662306a36Sopenharmony_ci	__le16 entry_sum_in_chain;
2762306a36Sopenharmony_ci	__le16 entry_sum_in_sgl;
2862306a36Sopenharmony_ci	__le16 entry_length_in_sgl;
2962306a36Sopenharmony_ci	__le16 pad0;
3062306a36Sopenharmony_ci	__le64 pad1[5];
3162306a36Sopenharmony_ci	struct hisi_acc_hw_sgl *next;
3262306a36Sopenharmony_ci	struct acc_hw_sge sge_entries[];
3362306a36Sopenharmony_ci} __aligned(1);
3462306a36Sopenharmony_ci
3562306a36Sopenharmony_cistruct hisi_acc_sgl_pool {
3662306a36Sopenharmony_ci	struct mem_block {
3762306a36Sopenharmony_ci		struct hisi_acc_hw_sgl *sgl;
3862306a36Sopenharmony_ci		dma_addr_t sgl_dma;
3962306a36Sopenharmony_ci		size_t size;
4062306a36Sopenharmony_ci	} mem_block[HISI_ACC_MEM_BLOCK_NR];
4162306a36Sopenharmony_ci	u32 sgl_num_per_block;
4262306a36Sopenharmony_ci	u32 block_num;
4362306a36Sopenharmony_ci	u32 count;
4462306a36Sopenharmony_ci	u32 sge_nr;
4562306a36Sopenharmony_ci	size_t sgl_size;
4662306a36Sopenharmony_ci};
4762306a36Sopenharmony_ci
4862306a36Sopenharmony_ci/**
4962306a36Sopenharmony_ci * hisi_acc_create_sgl_pool() - Create a hw sgl pool.
5062306a36Sopenharmony_ci * @dev: The device which hw sgl pool belongs to.
5162306a36Sopenharmony_ci * @count: Count of hisi_acc_hw_sgl in pool.
5262306a36Sopenharmony_ci * @sge_nr: The count of sge in hw_sgl
5362306a36Sopenharmony_ci *
5462306a36Sopenharmony_ci * This function creates a hw sgl pool, after this user can get hw sgl memory
5562306a36Sopenharmony_ci * from it.
5662306a36Sopenharmony_ci */
5762306a36Sopenharmony_cistruct hisi_acc_sgl_pool *hisi_acc_create_sgl_pool(struct device *dev,
5862306a36Sopenharmony_ci						   u32 count, u32 sge_nr)
5962306a36Sopenharmony_ci{
6062306a36Sopenharmony_ci	u32 sgl_size, block_size, sgl_num_per_block, block_num, remain_sgl;
6162306a36Sopenharmony_ci	struct hisi_acc_sgl_pool *pool;
6262306a36Sopenharmony_ci	struct mem_block *block;
6362306a36Sopenharmony_ci	u32 i, j;
6462306a36Sopenharmony_ci
6562306a36Sopenharmony_ci	if (!dev || !count || !sge_nr || sge_nr > HISI_ACC_SGL_SGE_NR_MAX)
6662306a36Sopenharmony_ci		return ERR_PTR(-EINVAL);
6762306a36Sopenharmony_ci
6862306a36Sopenharmony_ci	sgl_size = ALIGN(sizeof(struct acc_hw_sge) * sge_nr +
6962306a36Sopenharmony_ci			 sizeof(struct hisi_acc_hw_sgl),
7062306a36Sopenharmony_ci			 HISI_ACC_SGL_ALIGN_SIZE);
7162306a36Sopenharmony_ci
7262306a36Sopenharmony_ci	/*
7362306a36Sopenharmony_ci	 * the pool may allocate a block of memory of size PAGE_SIZE * 2^MAX_ORDER,
7462306a36Sopenharmony_ci	 * block size may exceed 2^31 on ia64, so the max of block size is 2^31
7562306a36Sopenharmony_ci	 */
7662306a36Sopenharmony_ci	block_size = 1 << (PAGE_SHIFT + MAX_ORDER < 32 ?
7762306a36Sopenharmony_ci			   PAGE_SHIFT + MAX_ORDER : 31);
7862306a36Sopenharmony_ci	sgl_num_per_block = block_size / sgl_size;
7962306a36Sopenharmony_ci	block_num = count / sgl_num_per_block;
8062306a36Sopenharmony_ci	remain_sgl = count % sgl_num_per_block;
8162306a36Sopenharmony_ci
8262306a36Sopenharmony_ci	if ((!remain_sgl && block_num > HISI_ACC_MEM_BLOCK_NR) ||
8362306a36Sopenharmony_ci	    (remain_sgl > 0 && block_num > HISI_ACC_MEM_BLOCK_NR - 1))
8462306a36Sopenharmony_ci		return ERR_PTR(-EINVAL);
8562306a36Sopenharmony_ci
8662306a36Sopenharmony_ci	pool = kzalloc(sizeof(*pool), GFP_KERNEL);
8762306a36Sopenharmony_ci	if (!pool)
8862306a36Sopenharmony_ci		return ERR_PTR(-ENOMEM);
8962306a36Sopenharmony_ci	block = pool->mem_block;
9062306a36Sopenharmony_ci
9162306a36Sopenharmony_ci	for (i = 0; i < block_num; i++) {
9262306a36Sopenharmony_ci		block[i].sgl = dma_alloc_coherent(dev, block_size,
9362306a36Sopenharmony_ci						  &block[i].sgl_dma,
9462306a36Sopenharmony_ci						  GFP_KERNEL);
9562306a36Sopenharmony_ci		if (!block[i].sgl) {
9662306a36Sopenharmony_ci			dev_err(dev, "Fail to allocate hw SG buffer!\n");
9762306a36Sopenharmony_ci			goto err_free_mem;
9862306a36Sopenharmony_ci		}
9962306a36Sopenharmony_ci
10062306a36Sopenharmony_ci		block[i].size = block_size;
10162306a36Sopenharmony_ci	}
10262306a36Sopenharmony_ci
10362306a36Sopenharmony_ci	if (remain_sgl > 0) {
10462306a36Sopenharmony_ci		block[i].sgl = dma_alloc_coherent(dev, remain_sgl * sgl_size,
10562306a36Sopenharmony_ci						  &block[i].sgl_dma,
10662306a36Sopenharmony_ci						  GFP_KERNEL);
10762306a36Sopenharmony_ci		if (!block[i].sgl) {
10862306a36Sopenharmony_ci			dev_err(dev, "Fail to allocate remained hw SG buffer!\n");
10962306a36Sopenharmony_ci			goto err_free_mem;
11062306a36Sopenharmony_ci		}
11162306a36Sopenharmony_ci
11262306a36Sopenharmony_ci		block[i].size = remain_sgl * sgl_size;
11362306a36Sopenharmony_ci	}
11462306a36Sopenharmony_ci
11562306a36Sopenharmony_ci	pool->sgl_num_per_block = sgl_num_per_block;
11662306a36Sopenharmony_ci	pool->block_num = remain_sgl ? block_num + 1 : block_num;
11762306a36Sopenharmony_ci	pool->count = count;
11862306a36Sopenharmony_ci	pool->sgl_size = sgl_size;
11962306a36Sopenharmony_ci	pool->sge_nr = sge_nr;
12062306a36Sopenharmony_ci
12162306a36Sopenharmony_ci	return pool;
12262306a36Sopenharmony_ci
12362306a36Sopenharmony_cierr_free_mem:
12462306a36Sopenharmony_ci	for (j = 0; j < i; j++) {
12562306a36Sopenharmony_ci		dma_free_coherent(dev, block_size, block[j].sgl,
12662306a36Sopenharmony_ci				  block[j].sgl_dma);
12762306a36Sopenharmony_ci	}
12862306a36Sopenharmony_ci	kfree_sensitive(pool);
12962306a36Sopenharmony_ci	return ERR_PTR(-ENOMEM);
13062306a36Sopenharmony_ci}
13162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(hisi_acc_create_sgl_pool);
13262306a36Sopenharmony_ci
13362306a36Sopenharmony_ci/**
13462306a36Sopenharmony_ci * hisi_acc_free_sgl_pool() - Free a hw sgl pool.
13562306a36Sopenharmony_ci * @dev: The device which hw sgl pool belongs to.
13662306a36Sopenharmony_ci * @pool: Pointer of pool.
13762306a36Sopenharmony_ci *
13862306a36Sopenharmony_ci * This function frees memory of a hw sgl pool.
13962306a36Sopenharmony_ci */
14062306a36Sopenharmony_civoid hisi_acc_free_sgl_pool(struct device *dev, struct hisi_acc_sgl_pool *pool)
14162306a36Sopenharmony_ci{
14262306a36Sopenharmony_ci	struct mem_block *block;
14362306a36Sopenharmony_ci	int i;
14462306a36Sopenharmony_ci
14562306a36Sopenharmony_ci	if (!dev || !pool)
14662306a36Sopenharmony_ci		return;
14762306a36Sopenharmony_ci
14862306a36Sopenharmony_ci	block = pool->mem_block;
14962306a36Sopenharmony_ci
15062306a36Sopenharmony_ci	for (i = 0; i < pool->block_num; i++)
15162306a36Sopenharmony_ci		dma_free_coherent(dev, block[i].size, block[i].sgl,
15262306a36Sopenharmony_ci				  block[i].sgl_dma);
15362306a36Sopenharmony_ci
15462306a36Sopenharmony_ci	kfree(pool);
15562306a36Sopenharmony_ci}
15662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(hisi_acc_free_sgl_pool);
15762306a36Sopenharmony_ci
15862306a36Sopenharmony_cistatic struct hisi_acc_hw_sgl *acc_get_sgl(struct hisi_acc_sgl_pool *pool,
15962306a36Sopenharmony_ci					   u32 index, dma_addr_t *hw_sgl_dma)
16062306a36Sopenharmony_ci{
16162306a36Sopenharmony_ci	struct mem_block *block;
16262306a36Sopenharmony_ci	u32 block_index, offset;
16362306a36Sopenharmony_ci
16462306a36Sopenharmony_ci	if (!pool || !hw_sgl_dma || index >= pool->count)
16562306a36Sopenharmony_ci		return ERR_PTR(-EINVAL);
16662306a36Sopenharmony_ci
16762306a36Sopenharmony_ci	block = pool->mem_block;
16862306a36Sopenharmony_ci	block_index = index / pool->sgl_num_per_block;
16962306a36Sopenharmony_ci	offset = index % pool->sgl_num_per_block;
17062306a36Sopenharmony_ci
17162306a36Sopenharmony_ci	*hw_sgl_dma = block[block_index].sgl_dma + pool->sgl_size * offset;
17262306a36Sopenharmony_ci	return (void *)block[block_index].sgl + pool->sgl_size * offset;
17362306a36Sopenharmony_ci}
17462306a36Sopenharmony_ci
17562306a36Sopenharmony_cistatic void sg_map_to_hw_sg(struct scatterlist *sgl,
17662306a36Sopenharmony_ci			    struct acc_hw_sge *hw_sge)
17762306a36Sopenharmony_ci{
17862306a36Sopenharmony_ci	hw_sge->buf = sg_dma_address(sgl);
17962306a36Sopenharmony_ci	hw_sge->len = cpu_to_le32(sg_dma_len(sgl));
18062306a36Sopenharmony_ci	hw_sge->page_ctrl = sg_virt(sgl);
18162306a36Sopenharmony_ci}
18262306a36Sopenharmony_ci
18362306a36Sopenharmony_cistatic void inc_hw_sgl_sge(struct hisi_acc_hw_sgl *hw_sgl)
18462306a36Sopenharmony_ci{
18562306a36Sopenharmony_ci	u16 var = le16_to_cpu(hw_sgl->entry_sum_in_sgl);
18662306a36Sopenharmony_ci
18762306a36Sopenharmony_ci	var++;
18862306a36Sopenharmony_ci	hw_sgl->entry_sum_in_sgl = cpu_to_le16(var);
18962306a36Sopenharmony_ci}
19062306a36Sopenharmony_ci
19162306a36Sopenharmony_cistatic void update_hw_sgl_sum_sge(struct hisi_acc_hw_sgl *hw_sgl, u16 sum)
19262306a36Sopenharmony_ci{
19362306a36Sopenharmony_ci	hw_sgl->entry_sum_in_chain = cpu_to_le16(sum);
19462306a36Sopenharmony_ci}
19562306a36Sopenharmony_ci
19662306a36Sopenharmony_cistatic void clear_hw_sgl_sge(struct hisi_acc_hw_sgl *hw_sgl)
19762306a36Sopenharmony_ci{
19862306a36Sopenharmony_ci	struct acc_hw_sge *hw_sge = hw_sgl->sge_entries;
19962306a36Sopenharmony_ci	int i;
20062306a36Sopenharmony_ci
20162306a36Sopenharmony_ci	for (i = 0; i < le16_to_cpu(hw_sgl->entry_sum_in_sgl); i++) {
20262306a36Sopenharmony_ci		hw_sge[i].page_ctrl = NULL;
20362306a36Sopenharmony_ci		hw_sge[i].buf = 0;
20462306a36Sopenharmony_ci		hw_sge[i].len = 0;
20562306a36Sopenharmony_ci	}
20662306a36Sopenharmony_ci}
20762306a36Sopenharmony_ci
20862306a36Sopenharmony_ci/**
20962306a36Sopenharmony_ci * hisi_acc_sg_buf_map_to_hw_sgl - Map a scatterlist to a hw sgl.
21062306a36Sopenharmony_ci * @dev: The device which hw sgl belongs to.
21162306a36Sopenharmony_ci * @sgl: Scatterlist which will be mapped to hw sgl.
21262306a36Sopenharmony_ci * @pool: Pool which hw sgl memory will be allocated in.
21362306a36Sopenharmony_ci * @index: Index of hisi_acc_hw_sgl in pool.
21462306a36Sopenharmony_ci * @hw_sgl_dma: The dma address of allocated hw sgl.
21562306a36Sopenharmony_ci *
21662306a36Sopenharmony_ci * This function builds hw sgl according input sgl, user can use hw_sgl_dma
21762306a36Sopenharmony_ci * as src/dst in its BD. Only support single hw sgl currently.
21862306a36Sopenharmony_ci */
21962306a36Sopenharmony_cistruct hisi_acc_hw_sgl *
22062306a36Sopenharmony_cihisi_acc_sg_buf_map_to_hw_sgl(struct device *dev,
22162306a36Sopenharmony_ci			      struct scatterlist *sgl,
22262306a36Sopenharmony_ci			      struct hisi_acc_sgl_pool *pool,
22362306a36Sopenharmony_ci			      u32 index, dma_addr_t *hw_sgl_dma)
22462306a36Sopenharmony_ci{
22562306a36Sopenharmony_ci	struct hisi_acc_hw_sgl *curr_hw_sgl;
22662306a36Sopenharmony_ci	dma_addr_t curr_sgl_dma = 0;
22762306a36Sopenharmony_ci	struct acc_hw_sge *curr_hw_sge;
22862306a36Sopenharmony_ci	struct scatterlist *sg;
22962306a36Sopenharmony_ci	int i, sg_n, sg_n_mapped;
23062306a36Sopenharmony_ci
23162306a36Sopenharmony_ci	if (!dev || !sgl || !pool || !hw_sgl_dma)
23262306a36Sopenharmony_ci		return ERR_PTR(-EINVAL);
23362306a36Sopenharmony_ci
23462306a36Sopenharmony_ci	sg_n = sg_nents(sgl);
23562306a36Sopenharmony_ci
23662306a36Sopenharmony_ci	sg_n_mapped = dma_map_sg(dev, sgl, sg_n, DMA_BIDIRECTIONAL);
23762306a36Sopenharmony_ci	if (!sg_n_mapped) {
23862306a36Sopenharmony_ci		dev_err(dev, "DMA mapping for SG error!\n");
23962306a36Sopenharmony_ci		return ERR_PTR(-EINVAL);
24062306a36Sopenharmony_ci	}
24162306a36Sopenharmony_ci
24262306a36Sopenharmony_ci	if (sg_n_mapped > pool->sge_nr) {
24362306a36Sopenharmony_ci		dev_err(dev, "the number of entries in input scatterlist is bigger than SGL pool setting.\n");
24462306a36Sopenharmony_ci		return ERR_PTR(-EINVAL);
24562306a36Sopenharmony_ci	}
24662306a36Sopenharmony_ci
24762306a36Sopenharmony_ci	curr_hw_sgl = acc_get_sgl(pool, index, &curr_sgl_dma);
24862306a36Sopenharmony_ci	if (IS_ERR(curr_hw_sgl)) {
24962306a36Sopenharmony_ci		dev_err(dev, "Get SGL error!\n");
25062306a36Sopenharmony_ci		dma_unmap_sg(dev, sgl, sg_n, DMA_BIDIRECTIONAL);
25162306a36Sopenharmony_ci		return ERR_PTR(-ENOMEM);
25262306a36Sopenharmony_ci	}
25362306a36Sopenharmony_ci	curr_hw_sgl->entry_length_in_sgl = cpu_to_le16(pool->sge_nr);
25462306a36Sopenharmony_ci	curr_hw_sge = curr_hw_sgl->sge_entries;
25562306a36Sopenharmony_ci
25662306a36Sopenharmony_ci	for_each_sg(sgl, sg, sg_n_mapped, i) {
25762306a36Sopenharmony_ci		sg_map_to_hw_sg(sg, curr_hw_sge);
25862306a36Sopenharmony_ci		inc_hw_sgl_sge(curr_hw_sgl);
25962306a36Sopenharmony_ci		curr_hw_sge++;
26062306a36Sopenharmony_ci	}
26162306a36Sopenharmony_ci
26262306a36Sopenharmony_ci	update_hw_sgl_sum_sge(curr_hw_sgl, pool->sge_nr);
26362306a36Sopenharmony_ci	*hw_sgl_dma = curr_sgl_dma;
26462306a36Sopenharmony_ci
26562306a36Sopenharmony_ci	return curr_hw_sgl;
26662306a36Sopenharmony_ci}
26762306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(hisi_acc_sg_buf_map_to_hw_sgl);
26862306a36Sopenharmony_ci
26962306a36Sopenharmony_ci/**
27062306a36Sopenharmony_ci * hisi_acc_sg_buf_unmap() - Unmap allocated hw sgl.
27162306a36Sopenharmony_ci * @dev: The device which hw sgl belongs to.
27262306a36Sopenharmony_ci * @sgl: Related scatterlist.
27362306a36Sopenharmony_ci * @hw_sgl: Virtual address of hw sgl.
27462306a36Sopenharmony_ci *
27562306a36Sopenharmony_ci * This function unmaps allocated hw sgl.
27662306a36Sopenharmony_ci */
27762306a36Sopenharmony_civoid hisi_acc_sg_buf_unmap(struct device *dev, struct scatterlist *sgl,
27862306a36Sopenharmony_ci			   struct hisi_acc_hw_sgl *hw_sgl)
27962306a36Sopenharmony_ci{
28062306a36Sopenharmony_ci	if (!dev || !sgl || !hw_sgl)
28162306a36Sopenharmony_ci		return;
28262306a36Sopenharmony_ci
28362306a36Sopenharmony_ci	dma_unmap_sg(dev, sgl, sg_nents(sgl), DMA_BIDIRECTIONAL);
28462306a36Sopenharmony_ci	clear_hw_sgl_sge(hw_sgl);
28562306a36Sopenharmony_ci	hw_sgl->entry_sum_in_chain = 0;
28662306a36Sopenharmony_ci	hw_sgl->entry_sum_in_sgl = 0;
28762306a36Sopenharmony_ci	hw_sgl->entry_length_in_sgl = 0;
28862306a36Sopenharmony_ci}
28962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(hisi_acc_sg_buf_unmap);
290