162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Hantro VP9 codec driver
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Copyright (C) 2021 Collabora Ltd.
662306a36Sopenharmony_ci */
762306a36Sopenharmony_ci
862306a36Sopenharmony_ci#include <linux/types.h>
962306a36Sopenharmony_ci#include <media/v4l2-mem2mem.h>
1062306a36Sopenharmony_ci
1162306a36Sopenharmony_ci#include "hantro.h"
1262306a36Sopenharmony_ci#include "hantro_hw.h"
1362306a36Sopenharmony_ci#include "hantro_vp9.h"
1462306a36Sopenharmony_ci
1562306a36Sopenharmony_ci#define POW2(x) (1 << (x))
1662306a36Sopenharmony_ci
1762306a36Sopenharmony_ci#define MAX_LOG2_TILE_COLUMNS 6
1862306a36Sopenharmony_ci#define MAX_NUM_TILE_COLS POW2(MAX_LOG2_TILE_COLUMNS)
1962306a36Sopenharmony_ci#define MAX_TILE_COLS 20
2062306a36Sopenharmony_ci#define MAX_TILE_ROWS 22
2162306a36Sopenharmony_ci
2262306a36Sopenharmony_cistatic size_t hantro_vp9_tile_filter_size(unsigned int height)
2362306a36Sopenharmony_ci{
2462306a36Sopenharmony_ci	u32 h, height32, size;
2562306a36Sopenharmony_ci
2662306a36Sopenharmony_ci	h = roundup(height, 8);
2762306a36Sopenharmony_ci
2862306a36Sopenharmony_ci	height32 = roundup(h, 64);
2962306a36Sopenharmony_ci	size = 24 * height32 * (MAX_NUM_TILE_COLS - 1); /* luma: 8, chroma: 8 + 8 */
3062306a36Sopenharmony_ci
3162306a36Sopenharmony_ci	return size;
3262306a36Sopenharmony_ci}
3362306a36Sopenharmony_ci
3462306a36Sopenharmony_cistatic size_t hantro_vp9_bsd_control_size(unsigned int height)
3562306a36Sopenharmony_ci{
3662306a36Sopenharmony_ci	u32 h, height32;
3762306a36Sopenharmony_ci
3862306a36Sopenharmony_ci	h = roundup(height, 8);
3962306a36Sopenharmony_ci	height32 = roundup(h, 64);
4062306a36Sopenharmony_ci
4162306a36Sopenharmony_ci	return 16 * (height32 / 4) * (MAX_NUM_TILE_COLS - 1);
4262306a36Sopenharmony_ci}
4362306a36Sopenharmony_ci
4462306a36Sopenharmony_cistatic size_t hantro_vp9_segment_map_size(unsigned int width, unsigned int height)
4562306a36Sopenharmony_ci{
4662306a36Sopenharmony_ci	u32 w, h;
4762306a36Sopenharmony_ci	int num_ctbs;
4862306a36Sopenharmony_ci
4962306a36Sopenharmony_ci	w = roundup(width, 8);
5062306a36Sopenharmony_ci	h = roundup(height, 8);
5162306a36Sopenharmony_ci	num_ctbs = ((w + 63) / 64) * ((h + 63) / 64);
5262306a36Sopenharmony_ci
5362306a36Sopenharmony_ci	return num_ctbs * 32;
5462306a36Sopenharmony_ci}
5562306a36Sopenharmony_ci
5662306a36Sopenharmony_cistatic inline size_t hantro_vp9_prob_tab_size(void)
5762306a36Sopenharmony_ci{
5862306a36Sopenharmony_ci	return roundup(sizeof(struct hantro_g2_all_probs), 16);
5962306a36Sopenharmony_ci}
6062306a36Sopenharmony_ci
6162306a36Sopenharmony_cistatic inline size_t hantro_vp9_count_tab_size(void)
6262306a36Sopenharmony_ci{
6362306a36Sopenharmony_ci	return roundup(sizeof(struct symbol_counts), 16);
6462306a36Sopenharmony_ci}
6562306a36Sopenharmony_ci
6662306a36Sopenharmony_cistatic inline size_t hantro_vp9_tile_info_size(void)
6762306a36Sopenharmony_ci{
6862306a36Sopenharmony_ci	return roundup((MAX_TILE_COLS * MAX_TILE_ROWS * 4 * sizeof(u16) + 15 + 16) & ~0xf, 16);
6962306a36Sopenharmony_ci}
7062306a36Sopenharmony_ci
7162306a36Sopenharmony_cistatic void *get_coeffs_arr(struct symbol_counts *cnts, int i, int j, int k, int l, int m)
7262306a36Sopenharmony_ci{
7362306a36Sopenharmony_ci	if (i == 0)
7462306a36Sopenharmony_ci		return &cnts->count_coeffs[j][k][l][m];
7562306a36Sopenharmony_ci
7662306a36Sopenharmony_ci	if (i == 1)
7762306a36Sopenharmony_ci		return &cnts->count_coeffs8x8[j][k][l][m];
7862306a36Sopenharmony_ci
7962306a36Sopenharmony_ci	if (i == 2)
8062306a36Sopenharmony_ci		return &cnts->count_coeffs16x16[j][k][l][m];
8162306a36Sopenharmony_ci
8262306a36Sopenharmony_ci	if (i == 3)
8362306a36Sopenharmony_ci		return &cnts->count_coeffs32x32[j][k][l][m];
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_ci	return NULL;
8662306a36Sopenharmony_ci}
8762306a36Sopenharmony_ci
8862306a36Sopenharmony_cistatic void *get_eobs1(struct symbol_counts *cnts, int i, int j, int k, int l, int m)
8962306a36Sopenharmony_ci{
9062306a36Sopenharmony_ci	if (i == 0)
9162306a36Sopenharmony_ci		return &cnts->count_coeffs[j][k][l][m][3];
9262306a36Sopenharmony_ci
9362306a36Sopenharmony_ci	if (i == 1)
9462306a36Sopenharmony_ci		return &cnts->count_coeffs8x8[j][k][l][m][3];
9562306a36Sopenharmony_ci
9662306a36Sopenharmony_ci	if (i == 2)
9762306a36Sopenharmony_ci		return &cnts->count_coeffs16x16[j][k][l][m][3];
9862306a36Sopenharmony_ci
9962306a36Sopenharmony_ci	if (i == 3)
10062306a36Sopenharmony_ci		return &cnts->count_coeffs32x32[j][k][l][m][3];
10162306a36Sopenharmony_ci
10262306a36Sopenharmony_ci	return NULL;
10362306a36Sopenharmony_ci}
10462306a36Sopenharmony_ci
10562306a36Sopenharmony_ci#define INNER_LOOP \
10662306a36Sopenharmony_ci	do {										\
10762306a36Sopenharmony_ci		for (m = 0; m < ARRAY_SIZE(vp9_ctx->cnts.coeff[i][0][0][0]); ++m) {	\
10862306a36Sopenharmony_ci			vp9_ctx->cnts.coeff[i][j][k][l][m] =				\
10962306a36Sopenharmony_ci				get_coeffs_arr(cnts, i, j, k, l, m);			\
11062306a36Sopenharmony_ci			vp9_ctx->cnts.eob[i][j][k][l][m][0] =				\
11162306a36Sopenharmony_ci				&cnts->count_eobs[i][j][k][l][m];			\
11262306a36Sopenharmony_ci			vp9_ctx->cnts.eob[i][j][k][l][m][1] =				\
11362306a36Sopenharmony_ci				get_eobs1(cnts, i, j, k, l, m);				\
11462306a36Sopenharmony_ci		}									\
11562306a36Sopenharmony_ci	} while (0)
11662306a36Sopenharmony_ci
11762306a36Sopenharmony_cistatic void init_v4l2_vp9_count_tbl(struct hantro_ctx *ctx)
11862306a36Sopenharmony_ci{
11962306a36Sopenharmony_ci	struct hantro_vp9_dec_hw_ctx *vp9_ctx = &ctx->vp9_dec;
12062306a36Sopenharmony_ci	struct symbol_counts *cnts = vp9_ctx->misc.cpu + vp9_ctx->ctx_counters_offset;
12162306a36Sopenharmony_ci	int i, j, k, l, m;
12262306a36Sopenharmony_ci
12362306a36Sopenharmony_ci	vp9_ctx->cnts.partition = &cnts->partition_counts;
12462306a36Sopenharmony_ci	vp9_ctx->cnts.skip = &cnts->mbskip_count;
12562306a36Sopenharmony_ci	vp9_ctx->cnts.intra_inter = &cnts->intra_inter_count;
12662306a36Sopenharmony_ci	vp9_ctx->cnts.tx32p = &cnts->tx32x32_count;
12762306a36Sopenharmony_ci	/*
12862306a36Sopenharmony_ci	 * g2 hardware uses tx16x16_count[2][3], while the api
12962306a36Sopenharmony_ci	 * expects tx16p[2][4], so this must be explicitly copied
13062306a36Sopenharmony_ci	 * into vp9_ctx->cnts.tx16p when passing the data to the
13162306a36Sopenharmony_ci	 * vp9 library function
13262306a36Sopenharmony_ci	 */
13362306a36Sopenharmony_ci	vp9_ctx->cnts.tx8p = &cnts->tx8x8_count;
13462306a36Sopenharmony_ci
13562306a36Sopenharmony_ci	vp9_ctx->cnts.y_mode = &cnts->sb_ymode_counts;
13662306a36Sopenharmony_ci	vp9_ctx->cnts.uv_mode = &cnts->uv_mode_counts;
13762306a36Sopenharmony_ci	vp9_ctx->cnts.comp = &cnts->comp_inter_count;
13862306a36Sopenharmony_ci	vp9_ctx->cnts.comp_ref = &cnts->comp_ref_count;
13962306a36Sopenharmony_ci	vp9_ctx->cnts.single_ref = &cnts->single_ref_count;
14062306a36Sopenharmony_ci	vp9_ctx->cnts.filter = &cnts->switchable_interp_counts;
14162306a36Sopenharmony_ci	vp9_ctx->cnts.mv_joint = &cnts->mv_counts.joints;
14262306a36Sopenharmony_ci	vp9_ctx->cnts.sign = &cnts->mv_counts.sign;
14362306a36Sopenharmony_ci	vp9_ctx->cnts.classes = &cnts->mv_counts.classes;
14462306a36Sopenharmony_ci	vp9_ctx->cnts.class0 = &cnts->mv_counts.class0;
14562306a36Sopenharmony_ci	vp9_ctx->cnts.bits = &cnts->mv_counts.bits;
14662306a36Sopenharmony_ci	vp9_ctx->cnts.class0_fp = &cnts->mv_counts.class0_fp;
14762306a36Sopenharmony_ci	vp9_ctx->cnts.fp = &cnts->mv_counts.fp;
14862306a36Sopenharmony_ci	vp9_ctx->cnts.class0_hp = &cnts->mv_counts.class0_hp;
14962306a36Sopenharmony_ci	vp9_ctx->cnts.hp = &cnts->mv_counts.hp;
15062306a36Sopenharmony_ci
15162306a36Sopenharmony_ci	for (i = 0; i < ARRAY_SIZE(vp9_ctx->cnts.coeff); ++i)
15262306a36Sopenharmony_ci		for (j = 0; j < ARRAY_SIZE(vp9_ctx->cnts.coeff[i]); ++j)
15362306a36Sopenharmony_ci			for (k = 0; k < ARRAY_SIZE(vp9_ctx->cnts.coeff[i][0]); ++k)
15462306a36Sopenharmony_ci				for (l = 0; l < ARRAY_SIZE(vp9_ctx->cnts.coeff[i][0][0]); ++l)
15562306a36Sopenharmony_ci					INNER_LOOP;
15662306a36Sopenharmony_ci}
15762306a36Sopenharmony_ci
15862306a36Sopenharmony_ciint hantro_vp9_dec_init(struct hantro_ctx *ctx)
15962306a36Sopenharmony_ci{
16062306a36Sopenharmony_ci	struct hantro_dev *vpu = ctx->dev;
16162306a36Sopenharmony_ci	const struct hantro_variant *variant = vpu->variant;
16262306a36Sopenharmony_ci	struct hantro_vp9_dec_hw_ctx *vp9_dec = &ctx->vp9_dec;
16362306a36Sopenharmony_ci	struct hantro_aux_buf *tile_edge = &vp9_dec->tile_edge;
16462306a36Sopenharmony_ci	struct hantro_aux_buf *segment_map = &vp9_dec->segment_map;
16562306a36Sopenharmony_ci	struct hantro_aux_buf *misc = &vp9_dec->misc;
16662306a36Sopenharmony_ci	u32 i, max_width, max_height, size;
16762306a36Sopenharmony_ci
16862306a36Sopenharmony_ci	if (variant->num_dec_fmts < 1)
16962306a36Sopenharmony_ci		return -EINVAL;
17062306a36Sopenharmony_ci
17162306a36Sopenharmony_ci	for (i = 0; i < variant->num_dec_fmts; ++i)
17262306a36Sopenharmony_ci		if (variant->dec_fmts[i].fourcc == V4L2_PIX_FMT_VP9_FRAME)
17362306a36Sopenharmony_ci			break;
17462306a36Sopenharmony_ci
17562306a36Sopenharmony_ci	if (i == variant->num_dec_fmts)
17662306a36Sopenharmony_ci		return -EINVAL;
17762306a36Sopenharmony_ci
17862306a36Sopenharmony_ci	max_width = vpu->variant->dec_fmts[i].frmsize.max_width;
17962306a36Sopenharmony_ci	max_height = vpu->variant->dec_fmts[i].frmsize.max_height;
18062306a36Sopenharmony_ci
18162306a36Sopenharmony_ci	size = hantro_vp9_tile_filter_size(max_height);
18262306a36Sopenharmony_ci	vp9_dec->bsd_ctrl_offset = size;
18362306a36Sopenharmony_ci	size += hantro_vp9_bsd_control_size(max_height);
18462306a36Sopenharmony_ci
18562306a36Sopenharmony_ci	tile_edge->cpu = dma_alloc_coherent(vpu->dev, size, &tile_edge->dma, GFP_KERNEL);
18662306a36Sopenharmony_ci	if (!tile_edge->cpu)
18762306a36Sopenharmony_ci		return -ENOMEM;
18862306a36Sopenharmony_ci
18962306a36Sopenharmony_ci	tile_edge->size = size;
19062306a36Sopenharmony_ci	memset(tile_edge->cpu, 0, size);
19162306a36Sopenharmony_ci
19262306a36Sopenharmony_ci	size = hantro_vp9_segment_map_size(max_width, max_height);
19362306a36Sopenharmony_ci	vp9_dec->segment_map_size = size;
19462306a36Sopenharmony_ci	size *= 2; /* we need two areas of this size, used alternately */
19562306a36Sopenharmony_ci
19662306a36Sopenharmony_ci	segment_map->cpu = dma_alloc_coherent(vpu->dev, size, &segment_map->dma, GFP_KERNEL);
19762306a36Sopenharmony_ci	if (!segment_map->cpu)
19862306a36Sopenharmony_ci		goto err_segment_map;
19962306a36Sopenharmony_ci
20062306a36Sopenharmony_ci	segment_map->size = size;
20162306a36Sopenharmony_ci	memset(segment_map->cpu, 0, size);
20262306a36Sopenharmony_ci
20362306a36Sopenharmony_ci	size = hantro_vp9_prob_tab_size();
20462306a36Sopenharmony_ci	vp9_dec->ctx_counters_offset = size;
20562306a36Sopenharmony_ci	size += hantro_vp9_count_tab_size();
20662306a36Sopenharmony_ci	vp9_dec->tile_info_offset = size;
20762306a36Sopenharmony_ci	size += hantro_vp9_tile_info_size();
20862306a36Sopenharmony_ci
20962306a36Sopenharmony_ci	misc->cpu = dma_alloc_coherent(vpu->dev, size, &misc->dma, GFP_KERNEL);
21062306a36Sopenharmony_ci	if (!misc->cpu)
21162306a36Sopenharmony_ci		goto err_misc;
21262306a36Sopenharmony_ci
21362306a36Sopenharmony_ci	misc->size = size;
21462306a36Sopenharmony_ci	memset(misc->cpu, 0, size);
21562306a36Sopenharmony_ci
21662306a36Sopenharmony_ci	init_v4l2_vp9_count_tbl(ctx);
21762306a36Sopenharmony_ci
21862306a36Sopenharmony_ci	return 0;
21962306a36Sopenharmony_ci
22062306a36Sopenharmony_cierr_misc:
22162306a36Sopenharmony_ci	dma_free_coherent(vpu->dev, segment_map->size, segment_map->cpu, segment_map->dma);
22262306a36Sopenharmony_ci
22362306a36Sopenharmony_cierr_segment_map:
22462306a36Sopenharmony_ci	dma_free_coherent(vpu->dev, tile_edge->size, tile_edge->cpu, tile_edge->dma);
22562306a36Sopenharmony_ci
22662306a36Sopenharmony_ci	return -ENOMEM;
22762306a36Sopenharmony_ci}
22862306a36Sopenharmony_ci
22962306a36Sopenharmony_civoid hantro_vp9_dec_exit(struct hantro_ctx *ctx)
23062306a36Sopenharmony_ci{
23162306a36Sopenharmony_ci	struct hantro_dev *vpu = ctx->dev;
23262306a36Sopenharmony_ci	struct hantro_vp9_dec_hw_ctx *vp9_dec = &ctx->vp9_dec;
23362306a36Sopenharmony_ci	struct hantro_aux_buf *tile_edge = &vp9_dec->tile_edge;
23462306a36Sopenharmony_ci	struct hantro_aux_buf *segment_map = &vp9_dec->segment_map;
23562306a36Sopenharmony_ci	struct hantro_aux_buf *misc = &vp9_dec->misc;
23662306a36Sopenharmony_ci
23762306a36Sopenharmony_ci	dma_free_coherent(vpu->dev, misc->size, misc->cpu, misc->dma);
23862306a36Sopenharmony_ci	dma_free_coherent(vpu->dev, segment_map->size, segment_map->cpu, segment_map->dma);
23962306a36Sopenharmony_ci	dma_free_coherent(vpu->dev, tile_edge->size, tile_edge->cpu, tile_edge->dma);
24062306a36Sopenharmony_ci}
241