162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Hantro VPU codec driver
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Copyright (C) 2018 Rockchip Electronics Co., Ltd.
662306a36Sopenharmony_ci */
762306a36Sopenharmony_ci
862306a36Sopenharmony_ci#include "hantro.h"
962306a36Sopenharmony_ci
1062306a36Sopenharmony_ci/*
1162306a36Sopenharmony_ci * probs table with packed
1262306a36Sopenharmony_ci */
1362306a36Sopenharmony_cistruct vp8_prob_tbl_packed {
1462306a36Sopenharmony_ci	u8 prob_mb_skip_false;
1562306a36Sopenharmony_ci	u8 prob_intra;
1662306a36Sopenharmony_ci	u8 prob_ref_last;
1762306a36Sopenharmony_ci	u8 prob_ref_golden;
1862306a36Sopenharmony_ci	u8 prob_segment[3];
1962306a36Sopenharmony_ci	u8 padding0;
2062306a36Sopenharmony_ci
2162306a36Sopenharmony_ci	u8 prob_luma_16x16_pred_mode[4];
2262306a36Sopenharmony_ci	u8 prob_chroma_pred_mode[3];
2362306a36Sopenharmony_ci	u8 padding1;
2462306a36Sopenharmony_ci
2562306a36Sopenharmony_ci	/* mv prob */
2662306a36Sopenharmony_ci	u8 prob_mv_context[2][V4L2_VP8_MV_PROB_CNT];
2762306a36Sopenharmony_ci	u8 padding2[2];
2862306a36Sopenharmony_ci
2962306a36Sopenharmony_ci	/* coeff probs */
3062306a36Sopenharmony_ci	u8 prob_coeffs[4][8][3][V4L2_VP8_COEFF_PROB_CNT];
3162306a36Sopenharmony_ci	u8 padding3[96];
3262306a36Sopenharmony_ci};
3362306a36Sopenharmony_ci
3462306a36Sopenharmony_ci/*
3562306a36Sopenharmony_ci * filter taps taken to 7-bit precision,
3662306a36Sopenharmony_ci * reference RFC6386#Page-16, filters[8][6]
3762306a36Sopenharmony_ci */
3862306a36Sopenharmony_ciconst u32 hantro_vp8_dec_mc_filter[8][6] = {
3962306a36Sopenharmony_ci	{ 0, 0, 128, 0, 0, 0 },
4062306a36Sopenharmony_ci	{ 0, -6, 123, 12, -1, 0 },
4162306a36Sopenharmony_ci	{ 2, -11, 108, 36, -8, 1 },
4262306a36Sopenharmony_ci	{ 0, -9, 93, 50, -6, 0 },
4362306a36Sopenharmony_ci	{ 3, -16, 77, 77, -16, 3 },
4462306a36Sopenharmony_ci	{ 0, -6, 50, 93, -9, 0 },
4562306a36Sopenharmony_ci	{ 1, -8, 36, 108, -11, 2 },
4662306a36Sopenharmony_ci	{ 0, -1, 12, 123, -6, 0 }
4762306a36Sopenharmony_ci};
4862306a36Sopenharmony_ci
4962306a36Sopenharmony_civoid hantro_vp8_prob_update(struct hantro_ctx *ctx,
5062306a36Sopenharmony_ci			    const struct v4l2_ctrl_vp8_frame *hdr)
5162306a36Sopenharmony_ci{
5262306a36Sopenharmony_ci	const struct v4l2_vp8_entropy *entropy = &hdr->entropy;
5362306a36Sopenharmony_ci	u32 i, j, k;
5462306a36Sopenharmony_ci	u8 *dst;
5562306a36Sopenharmony_ci
5662306a36Sopenharmony_ci	/* first probs */
5762306a36Sopenharmony_ci	dst = ctx->vp8_dec.prob_tbl.cpu;
5862306a36Sopenharmony_ci
5962306a36Sopenharmony_ci	dst[0] = hdr->prob_skip_false;
6062306a36Sopenharmony_ci	dst[1] = hdr->prob_intra;
6162306a36Sopenharmony_ci	dst[2] = hdr->prob_last;
6262306a36Sopenharmony_ci	dst[3] = hdr->prob_gf;
6362306a36Sopenharmony_ci	dst[4] = hdr->segment.segment_probs[0];
6462306a36Sopenharmony_ci	dst[5] = hdr->segment.segment_probs[1];
6562306a36Sopenharmony_ci	dst[6] = hdr->segment.segment_probs[2];
6662306a36Sopenharmony_ci	dst[7] = 0;
6762306a36Sopenharmony_ci
6862306a36Sopenharmony_ci	dst += 8;
6962306a36Sopenharmony_ci	dst[0] = entropy->y_mode_probs[0];
7062306a36Sopenharmony_ci	dst[1] = entropy->y_mode_probs[1];
7162306a36Sopenharmony_ci	dst[2] = entropy->y_mode_probs[2];
7262306a36Sopenharmony_ci	dst[3] = entropy->y_mode_probs[3];
7362306a36Sopenharmony_ci	dst[4] = entropy->uv_mode_probs[0];
7462306a36Sopenharmony_ci	dst[5] = entropy->uv_mode_probs[1];
7562306a36Sopenharmony_ci	dst[6] = entropy->uv_mode_probs[2];
7662306a36Sopenharmony_ci	dst[7] = 0; /*unused */
7762306a36Sopenharmony_ci
7862306a36Sopenharmony_ci	/* mv probs */
7962306a36Sopenharmony_ci	dst += 8;
8062306a36Sopenharmony_ci	dst[0] = entropy->mv_probs[0][0]; /* is short */
8162306a36Sopenharmony_ci	dst[1] = entropy->mv_probs[1][0];
8262306a36Sopenharmony_ci	dst[2] = entropy->mv_probs[0][1]; /* sign */
8362306a36Sopenharmony_ci	dst[3] = entropy->mv_probs[1][1];
8462306a36Sopenharmony_ci	dst[4] = entropy->mv_probs[0][8 + 9];
8562306a36Sopenharmony_ci	dst[5] = entropy->mv_probs[0][9 + 9];
8662306a36Sopenharmony_ci	dst[6] = entropy->mv_probs[1][8 + 9];
8762306a36Sopenharmony_ci	dst[7] = entropy->mv_probs[1][9 + 9];
8862306a36Sopenharmony_ci	dst += 8;
8962306a36Sopenharmony_ci	for (i = 0; i < 2; ++i) {
9062306a36Sopenharmony_ci		for (j = 0; j < 8; j += 4) {
9162306a36Sopenharmony_ci			dst[0] = entropy->mv_probs[i][j + 9 + 0];
9262306a36Sopenharmony_ci			dst[1] = entropy->mv_probs[i][j + 9 + 1];
9362306a36Sopenharmony_ci			dst[2] = entropy->mv_probs[i][j + 9 + 2];
9462306a36Sopenharmony_ci			dst[3] = entropy->mv_probs[i][j + 9 + 3];
9562306a36Sopenharmony_ci			dst += 4;
9662306a36Sopenharmony_ci		}
9762306a36Sopenharmony_ci	}
9862306a36Sopenharmony_ci	for (i = 0; i < 2; ++i) {
9962306a36Sopenharmony_ci		dst[0] = entropy->mv_probs[i][0 + 2];
10062306a36Sopenharmony_ci		dst[1] = entropy->mv_probs[i][1 + 2];
10162306a36Sopenharmony_ci		dst[2] = entropy->mv_probs[i][2 + 2];
10262306a36Sopenharmony_ci		dst[3] = entropy->mv_probs[i][3 + 2];
10362306a36Sopenharmony_ci		dst[4] = entropy->mv_probs[i][4 + 2];
10462306a36Sopenharmony_ci		dst[5] = entropy->mv_probs[i][5 + 2];
10562306a36Sopenharmony_ci		dst[6] = entropy->mv_probs[i][6 + 2];
10662306a36Sopenharmony_ci		dst[7] = 0;	/*unused */
10762306a36Sopenharmony_ci		dst += 8;
10862306a36Sopenharmony_ci	}
10962306a36Sopenharmony_ci
11062306a36Sopenharmony_ci	/* coeff probs (header part) */
11162306a36Sopenharmony_ci	dst = ctx->vp8_dec.prob_tbl.cpu;
11262306a36Sopenharmony_ci	dst += (8 * 7);
11362306a36Sopenharmony_ci	for (i = 0; i < 4; ++i) {
11462306a36Sopenharmony_ci		for (j = 0; j < 8; ++j) {
11562306a36Sopenharmony_ci			for (k = 0; k < 3; ++k) {
11662306a36Sopenharmony_ci				dst[0] = entropy->coeff_probs[i][j][k][0];
11762306a36Sopenharmony_ci				dst[1] = entropy->coeff_probs[i][j][k][1];
11862306a36Sopenharmony_ci				dst[2] = entropy->coeff_probs[i][j][k][2];
11962306a36Sopenharmony_ci				dst[3] = entropy->coeff_probs[i][j][k][3];
12062306a36Sopenharmony_ci				dst += 4;
12162306a36Sopenharmony_ci			}
12262306a36Sopenharmony_ci		}
12362306a36Sopenharmony_ci	}
12462306a36Sopenharmony_ci
12562306a36Sopenharmony_ci	/* coeff probs (footer part) */
12662306a36Sopenharmony_ci	dst = ctx->vp8_dec.prob_tbl.cpu;
12762306a36Sopenharmony_ci	dst += (8 * 55);
12862306a36Sopenharmony_ci	for (i = 0; i < 4; ++i) {
12962306a36Sopenharmony_ci		for (j = 0; j < 8; ++j) {
13062306a36Sopenharmony_ci			for (k = 0; k < 3; ++k) {
13162306a36Sopenharmony_ci				dst[0] = entropy->coeff_probs[i][j][k][4];
13262306a36Sopenharmony_ci				dst[1] = entropy->coeff_probs[i][j][k][5];
13362306a36Sopenharmony_ci				dst[2] = entropy->coeff_probs[i][j][k][6];
13462306a36Sopenharmony_ci				dst[3] = entropy->coeff_probs[i][j][k][7];
13562306a36Sopenharmony_ci				dst[4] = entropy->coeff_probs[i][j][k][8];
13662306a36Sopenharmony_ci				dst[5] = entropy->coeff_probs[i][j][k][9];
13762306a36Sopenharmony_ci				dst[6] = entropy->coeff_probs[i][j][k][10];
13862306a36Sopenharmony_ci				dst[7] = 0;	/*unused */
13962306a36Sopenharmony_ci				dst += 8;
14062306a36Sopenharmony_ci			}
14162306a36Sopenharmony_ci		}
14262306a36Sopenharmony_ci	}
14362306a36Sopenharmony_ci}
14462306a36Sopenharmony_ci
14562306a36Sopenharmony_ciint hantro_vp8_dec_init(struct hantro_ctx *ctx)
14662306a36Sopenharmony_ci{
14762306a36Sopenharmony_ci	struct hantro_dev *vpu = ctx->dev;
14862306a36Sopenharmony_ci	struct hantro_aux_buf *aux_buf;
14962306a36Sopenharmony_ci	unsigned int mb_width, mb_height;
15062306a36Sopenharmony_ci	size_t segment_map_size;
15162306a36Sopenharmony_ci	int ret;
15262306a36Sopenharmony_ci
15362306a36Sopenharmony_ci	/* segment map table size calculation */
15462306a36Sopenharmony_ci	mb_width = DIV_ROUND_UP(ctx->dst_fmt.width, 16);
15562306a36Sopenharmony_ci	mb_height = DIV_ROUND_UP(ctx->dst_fmt.height, 16);
15662306a36Sopenharmony_ci	segment_map_size = round_up(DIV_ROUND_UP(mb_width * mb_height, 4), 64);
15762306a36Sopenharmony_ci
15862306a36Sopenharmony_ci	/*
15962306a36Sopenharmony_ci	 * In context init the dma buffer for segment map must be allocated.
16062306a36Sopenharmony_ci	 * And the data in segment map buffer must be set to all zero.
16162306a36Sopenharmony_ci	 */
16262306a36Sopenharmony_ci	aux_buf = &ctx->vp8_dec.segment_map;
16362306a36Sopenharmony_ci	aux_buf->size = segment_map_size;
16462306a36Sopenharmony_ci	aux_buf->cpu = dma_alloc_coherent(vpu->dev, aux_buf->size,
16562306a36Sopenharmony_ci					  &aux_buf->dma, GFP_KERNEL);
16662306a36Sopenharmony_ci	if (!aux_buf->cpu)
16762306a36Sopenharmony_ci		return -ENOMEM;
16862306a36Sopenharmony_ci
16962306a36Sopenharmony_ci	/*
17062306a36Sopenharmony_ci	 * Allocate probability table buffer,
17162306a36Sopenharmony_ci	 * total 1208 bytes, 4K page is far enough.
17262306a36Sopenharmony_ci	 */
17362306a36Sopenharmony_ci	aux_buf = &ctx->vp8_dec.prob_tbl;
17462306a36Sopenharmony_ci	aux_buf->size = sizeof(struct vp8_prob_tbl_packed);
17562306a36Sopenharmony_ci	aux_buf->cpu = dma_alloc_coherent(vpu->dev, aux_buf->size,
17662306a36Sopenharmony_ci					  &aux_buf->dma, GFP_KERNEL);
17762306a36Sopenharmony_ci	if (!aux_buf->cpu) {
17862306a36Sopenharmony_ci		ret = -ENOMEM;
17962306a36Sopenharmony_ci		goto err_free_seg_map;
18062306a36Sopenharmony_ci	}
18162306a36Sopenharmony_ci
18262306a36Sopenharmony_ci	return 0;
18362306a36Sopenharmony_ci
18462306a36Sopenharmony_cierr_free_seg_map:
18562306a36Sopenharmony_ci	dma_free_coherent(vpu->dev, ctx->vp8_dec.segment_map.size,
18662306a36Sopenharmony_ci			  ctx->vp8_dec.segment_map.cpu,
18762306a36Sopenharmony_ci			  ctx->vp8_dec.segment_map.dma);
18862306a36Sopenharmony_ci
18962306a36Sopenharmony_ci	return ret;
19062306a36Sopenharmony_ci}
19162306a36Sopenharmony_ci
19262306a36Sopenharmony_civoid hantro_vp8_dec_exit(struct hantro_ctx *ctx)
19362306a36Sopenharmony_ci{
19462306a36Sopenharmony_ci	struct hantro_vp8_dec_hw_ctx *vp8_dec = &ctx->vp8_dec;
19562306a36Sopenharmony_ci	struct hantro_dev *vpu = ctx->dev;
19662306a36Sopenharmony_ci
19762306a36Sopenharmony_ci	dma_free_coherent(vpu->dev, vp8_dec->segment_map.size,
19862306a36Sopenharmony_ci			  vp8_dec->segment_map.cpu, vp8_dec->segment_map.dma);
19962306a36Sopenharmony_ci	dma_free_coherent(vpu->dev, vp8_dec->prob_tbl.size,
20062306a36Sopenharmony_ci			  vp8_dec->prob_tbl.cpu, vp8_dec->prob_tbl.dma);
20162306a36Sopenharmony_ci}
202