162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Hantro VPU HEVC codec driver
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Copyright (C) 2020 Safran Passenger Innovations LLC
662306a36Sopenharmony_ci */
762306a36Sopenharmony_ci
862306a36Sopenharmony_ci#include <linux/types.h>
962306a36Sopenharmony_ci#include <media/v4l2-mem2mem.h>
1062306a36Sopenharmony_ci
1162306a36Sopenharmony_ci#include "hantro.h"
1262306a36Sopenharmony_ci#include "hantro_hw.h"
1362306a36Sopenharmony_ci
1462306a36Sopenharmony_ci#define VERT_FILTER_RAM_SIZE 8 /* bytes per pixel row */
1562306a36Sopenharmony_ci/*
1662306a36Sopenharmony_ci * BSD control data of current picture at tile border
1762306a36Sopenharmony_ci * 128 bits per 4x4 tile = 128/(8*4) bytes per row
1862306a36Sopenharmony_ci */
1962306a36Sopenharmony_ci#define BSD_CTRL_RAM_SIZE 4 /* bytes per pixel row */
2062306a36Sopenharmony_ci/* tile border coefficients of filter */
2162306a36Sopenharmony_ci#define VERT_SAO_RAM_SIZE 48 /* bytes per pixel */
2262306a36Sopenharmony_ci
2362306a36Sopenharmony_ci#define SCALING_LIST_SIZE (16 * 64)
2462306a36Sopenharmony_ci
2562306a36Sopenharmony_ci#define MAX_TILE_COLS 20
2662306a36Sopenharmony_ci#define MAX_TILE_ROWS 22
2762306a36Sopenharmony_ci
2862306a36Sopenharmony_civoid hantro_hevc_ref_init(struct hantro_ctx *ctx)
2962306a36Sopenharmony_ci{
3062306a36Sopenharmony_ci	struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
3162306a36Sopenharmony_ci
3262306a36Sopenharmony_ci	hevc_dec->ref_bufs_used = 0;
3362306a36Sopenharmony_ci}
3462306a36Sopenharmony_ci
3562306a36Sopenharmony_cidma_addr_t hantro_hevc_get_ref_buf(struct hantro_ctx *ctx,
3662306a36Sopenharmony_ci				   s32 poc)
3762306a36Sopenharmony_ci{
3862306a36Sopenharmony_ci	struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
3962306a36Sopenharmony_ci	int i;
4062306a36Sopenharmony_ci
4162306a36Sopenharmony_ci	/* Find the reference buffer in already known ones */
4262306a36Sopenharmony_ci	for (i = 0;  i < NUM_REF_PICTURES; i++) {
4362306a36Sopenharmony_ci		if (hevc_dec->ref_bufs_poc[i] == poc) {
4462306a36Sopenharmony_ci			hevc_dec->ref_bufs_used |= 1 << i;
4562306a36Sopenharmony_ci			return hevc_dec->ref_bufs[i].dma;
4662306a36Sopenharmony_ci		}
4762306a36Sopenharmony_ci	}
4862306a36Sopenharmony_ci
4962306a36Sopenharmony_ci	return 0;
5062306a36Sopenharmony_ci}
5162306a36Sopenharmony_ci
5262306a36Sopenharmony_ciint hantro_hevc_add_ref_buf(struct hantro_ctx *ctx, int poc, dma_addr_t addr)
5362306a36Sopenharmony_ci{
5462306a36Sopenharmony_ci	struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
5562306a36Sopenharmony_ci	int i;
5662306a36Sopenharmony_ci
5762306a36Sopenharmony_ci	/* Add a new reference buffer */
5862306a36Sopenharmony_ci	for (i = 0; i < NUM_REF_PICTURES; i++) {
5962306a36Sopenharmony_ci		if (!(hevc_dec->ref_bufs_used & 1 << i)) {
6062306a36Sopenharmony_ci			hevc_dec->ref_bufs_used |= 1 << i;
6162306a36Sopenharmony_ci			hevc_dec->ref_bufs_poc[i] = poc;
6262306a36Sopenharmony_ci			hevc_dec->ref_bufs[i].dma = addr;
6362306a36Sopenharmony_ci			return 0;
6462306a36Sopenharmony_ci		}
6562306a36Sopenharmony_ci	}
6662306a36Sopenharmony_ci
6762306a36Sopenharmony_ci	return -EINVAL;
6862306a36Sopenharmony_ci}
6962306a36Sopenharmony_ci
7062306a36Sopenharmony_cistatic int tile_buffer_reallocate(struct hantro_ctx *ctx)
7162306a36Sopenharmony_ci{
7262306a36Sopenharmony_ci	struct hantro_dev *vpu = ctx->dev;
7362306a36Sopenharmony_ci	struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
7462306a36Sopenharmony_ci	const struct hantro_hevc_dec_ctrls *ctrls = &ctx->hevc_dec.ctrls;
7562306a36Sopenharmony_ci	const struct v4l2_ctrl_hevc_pps *pps = ctrls->pps;
7662306a36Sopenharmony_ci	const struct v4l2_ctrl_hevc_sps *sps = ctrls->sps;
7762306a36Sopenharmony_ci	unsigned int num_tile_cols = pps->num_tile_columns_minus1 + 1;
7862306a36Sopenharmony_ci	unsigned int height64 = (sps->pic_height_in_luma_samples + 63) & ~63;
7962306a36Sopenharmony_ci	unsigned int size;
8062306a36Sopenharmony_ci
8162306a36Sopenharmony_ci	if (num_tile_cols <= 1 ||
8262306a36Sopenharmony_ci	    num_tile_cols <= hevc_dec->num_tile_cols_allocated)
8362306a36Sopenharmony_ci		return 0;
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_ci	/* Need to reallocate due to tiles passed via PPS */
8662306a36Sopenharmony_ci	if (hevc_dec->tile_filter.cpu) {
8762306a36Sopenharmony_ci		dma_free_coherent(vpu->dev, hevc_dec->tile_filter.size,
8862306a36Sopenharmony_ci				  hevc_dec->tile_filter.cpu,
8962306a36Sopenharmony_ci				  hevc_dec->tile_filter.dma);
9062306a36Sopenharmony_ci		hevc_dec->tile_filter.cpu = NULL;
9162306a36Sopenharmony_ci	}
9262306a36Sopenharmony_ci
9362306a36Sopenharmony_ci	if (hevc_dec->tile_sao.cpu) {
9462306a36Sopenharmony_ci		dma_free_coherent(vpu->dev, hevc_dec->tile_sao.size,
9562306a36Sopenharmony_ci				  hevc_dec->tile_sao.cpu,
9662306a36Sopenharmony_ci				  hevc_dec->tile_sao.dma);
9762306a36Sopenharmony_ci		hevc_dec->tile_sao.cpu = NULL;
9862306a36Sopenharmony_ci	}
9962306a36Sopenharmony_ci
10062306a36Sopenharmony_ci	if (hevc_dec->tile_bsd.cpu) {
10162306a36Sopenharmony_ci		dma_free_coherent(vpu->dev, hevc_dec->tile_bsd.size,
10262306a36Sopenharmony_ci				  hevc_dec->tile_bsd.cpu,
10362306a36Sopenharmony_ci				  hevc_dec->tile_bsd.dma);
10462306a36Sopenharmony_ci		hevc_dec->tile_bsd.cpu = NULL;
10562306a36Sopenharmony_ci	}
10662306a36Sopenharmony_ci
10762306a36Sopenharmony_ci	size = (VERT_FILTER_RAM_SIZE * height64 * (num_tile_cols - 1) * ctx->bit_depth) / 8;
10862306a36Sopenharmony_ci	hevc_dec->tile_filter.cpu = dma_alloc_coherent(vpu->dev, size,
10962306a36Sopenharmony_ci						       &hevc_dec->tile_filter.dma,
11062306a36Sopenharmony_ci						       GFP_KERNEL);
11162306a36Sopenharmony_ci	if (!hevc_dec->tile_filter.cpu)
11262306a36Sopenharmony_ci		return -ENOMEM;
11362306a36Sopenharmony_ci	hevc_dec->tile_filter.size = size;
11462306a36Sopenharmony_ci
11562306a36Sopenharmony_ci	size = (VERT_SAO_RAM_SIZE * height64 * (num_tile_cols - 1) * ctx->bit_depth) / 8;
11662306a36Sopenharmony_ci	hevc_dec->tile_sao.cpu = dma_alloc_coherent(vpu->dev, size,
11762306a36Sopenharmony_ci						    &hevc_dec->tile_sao.dma,
11862306a36Sopenharmony_ci						    GFP_KERNEL);
11962306a36Sopenharmony_ci	if (!hevc_dec->tile_sao.cpu)
12062306a36Sopenharmony_ci		goto err_free_tile_buffers;
12162306a36Sopenharmony_ci	hevc_dec->tile_sao.size = size;
12262306a36Sopenharmony_ci
12362306a36Sopenharmony_ci	size = BSD_CTRL_RAM_SIZE * height64 * (num_tile_cols - 1);
12462306a36Sopenharmony_ci	hevc_dec->tile_bsd.cpu = dma_alloc_coherent(vpu->dev, size,
12562306a36Sopenharmony_ci						    &hevc_dec->tile_bsd.dma,
12662306a36Sopenharmony_ci						    GFP_KERNEL);
12762306a36Sopenharmony_ci	if (!hevc_dec->tile_bsd.cpu)
12862306a36Sopenharmony_ci		goto err_free_sao_buffers;
12962306a36Sopenharmony_ci	hevc_dec->tile_bsd.size = size;
13062306a36Sopenharmony_ci
13162306a36Sopenharmony_ci	hevc_dec->num_tile_cols_allocated = num_tile_cols;
13262306a36Sopenharmony_ci
13362306a36Sopenharmony_ci	return 0;
13462306a36Sopenharmony_ci
13562306a36Sopenharmony_cierr_free_sao_buffers:
13662306a36Sopenharmony_ci	if (hevc_dec->tile_sao.cpu)
13762306a36Sopenharmony_ci		dma_free_coherent(vpu->dev, hevc_dec->tile_sao.size,
13862306a36Sopenharmony_ci				  hevc_dec->tile_sao.cpu,
13962306a36Sopenharmony_ci				  hevc_dec->tile_sao.dma);
14062306a36Sopenharmony_ci	hevc_dec->tile_sao.cpu = NULL;
14162306a36Sopenharmony_ci
14262306a36Sopenharmony_cierr_free_tile_buffers:
14362306a36Sopenharmony_ci	if (hevc_dec->tile_filter.cpu)
14462306a36Sopenharmony_ci		dma_free_coherent(vpu->dev, hevc_dec->tile_filter.size,
14562306a36Sopenharmony_ci				  hevc_dec->tile_filter.cpu,
14662306a36Sopenharmony_ci				  hevc_dec->tile_filter.dma);
14762306a36Sopenharmony_ci	hevc_dec->tile_filter.cpu = NULL;
14862306a36Sopenharmony_ci
14962306a36Sopenharmony_ci	return -ENOMEM;
15062306a36Sopenharmony_ci}
15162306a36Sopenharmony_ci
15262306a36Sopenharmony_cistatic int hantro_hevc_validate_sps(struct hantro_ctx *ctx, const struct v4l2_ctrl_hevc_sps *sps)
15362306a36Sopenharmony_ci{
15462306a36Sopenharmony_ci	/*
15562306a36Sopenharmony_ci	 * for tile pixel format check if the width and height match
15662306a36Sopenharmony_ci	 * hardware constraints
15762306a36Sopenharmony_ci	 */
15862306a36Sopenharmony_ci	if (ctx->vpu_dst_fmt->fourcc == V4L2_PIX_FMT_NV12_4L4) {
15962306a36Sopenharmony_ci		if (ctx->dst_fmt.width !=
16062306a36Sopenharmony_ci		    ALIGN(sps->pic_width_in_luma_samples, ctx->vpu_dst_fmt->frmsize.step_width))
16162306a36Sopenharmony_ci			return -EINVAL;
16262306a36Sopenharmony_ci
16362306a36Sopenharmony_ci		if (ctx->dst_fmt.height !=
16462306a36Sopenharmony_ci		    ALIGN(sps->pic_height_in_luma_samples, ctx->vpu_dst_fmt->frmsize.step_height))
16562306a36Sopenharmony_ci			return -EINVAL;
16662306a36Sopenharmony_ci	}
16762306a36Sopenharmony_ci
16862306a36Sopenharmony_ci	return 0;
16962306a36Sopenharmony_ci}
17062306a36Sopenharmony_ci
17162306a36Sopenharmony_ciint hantro_hevc_dec_prepare_run(struct hantro_ctx *ctx)
17262306a36Sopenharmony_ci{
17362306a36Sopenharmony_ci	struct hantro_hevc_dec_hw_ctx *hevc_ctx = &ctx->hevc_dec;
17462306a36Sopenharmony_ci	struct hantro_hevc_dec_ctrls *ctrls = &hevc_ctx->ctrls;
17562306a36Sopenharmony_ci	int ret;
17662306a36Sopenharmony_ci
17762306a36Sopenharmony_ci	hantro_start_prepare_run(ctx);
17862306a36Sopenharmony_ci
17962306a36Sopenharmony_ci	ctrls->decode_params =
18062306a36Sopenharmony_ci		hantro_get_ctrl(ctx, V4L2_CID_STATELESS_HEVC_DECODE_PARAMS);
18162306a36Sopenharmony_ci	if (WARN_ON(!ctrls->decode_params))
18262306a36Sopenharmony_ci		return -EINVAL;
18362306a36Sopenharmony_ci
18462306a36Sopenharmony_ci	ctrls->scaling =
18562306a36Sopenharmony_ci		hantro_get_ctrl(ctx, V4L2_CID_STATELESS_HEVC_SCALING_MATRIX);
18662306a36Sopenharmony_ci	if (WARN_ON(!ctrls->scaling))
18762306a36Sopenharmony_ci		return -EINVAL;
18862306a36Sopenharmony_ci
18962306a36Sopenharmony_ci	ctrls->sps =
19062306a36Sopenharmony_ci		hantro_get_ctrl(ctx, V4L2_CID_STATELESS_HEVC_SPS);
19162306a36Sopenharmony_ci	if (WARN_ON(!ctrls->sps))
19262306a36Sopenharmony_ci		return -EINVAL;
19362306a36Sopenharmony_ci
19462306a36Sopenharmony_ci	ret = hantro_hevc_validate_sps(ctx, ctrls->sps);
19562306a36Sopenharmony_ci	if (ret)
19662306a36Sopenharmony_ci		return ret;
19762306a36Sopenharmony_ci
19862306a36Sopenharmony_ci	ctrls->pps =
19962306a36Sopenharmony_ci		hantro_get_ctrl(ctx, V4L2_CID_STATELESS_HEVC_PPS);
20062306a36Sopenharmony_ci	if (WARN_ON(!ctrls->pps))
20162306a36Sopenharmony_ci		return -EINVAL;
20262306a36Sopenharmony_ci
20362306a36Sopenharmony_ci	ret = tile_buffer_reallocate(ctx);
20462306a36Sopenharmony_ci	if (ret)
20562306a36Sopenharmony_ci		return ret;
20662306a36Sopenharmony_ci
20762306a36Sopenharmony_ci	return 0;
20862306a36Sopenharmony_ci}
20962306a36Sopenharmony_ci
21062306a36Sopenharmony_civoid hantro_hevc_dec_exit(struct hantro_ctx *ctx)
21162306a36Sopenharmony_ci{
21262306a36Sopenharmony_ci	struct hantro_dev *vpu = ctx->dev;
21362306a36Sopenharmony_ci	struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
21462306a36Sopenharmony_ci
21562306a36Sopenharmony_ci	if (hevc_dec->tile_sizes.cpu)
21662306a36Sopenharmony_ci		dma_free_coherent(vpu->dev, hevc_dec->tile_sizes.size,
21762306a36Sopenharmony_ci				  hevc_dec->tile_sizes.cpu,
21862306a36Sopenharmony_ci				  hevc_dec->tile_sizes.dma);
21962306a36Sopenharmony_ci	hevc_dec->tile_sizes.cpu = NULL;
22062306a36Sopenharmony_ci
22162306a36Sopenharmony_ci	if (hevc_dec->scaling_lists.cpu)
22262306a36Sopenharmony_ci		dma_free_coherent(vpu->dev, hevc_dec->scaling_lists.size,
22362306a36Sopenharmony_ci				  hevc_dec->scaling_lists.cpu,
22462306a36Sopenharmony_ci				  hevc_dec->scaling_lists.dma);
22562306a36Sopenharmony_ci	hevc_dec->scaling_lists.cpu = NULL;
22662306a36Sopenharmony_ci
22762306a36Sopenharmony_ci	if (hevc_dec->tile_filter.cpu)
22862306a36Sopenharmony_ci		dma_free_coherent(vpu->dev, hevc_dec->tile_filter.size,
22962306a36Sopenharmony_ci				  hevc_dec->tile_filter.cpu,
23062306a36Sopenharmony_ci				  hevc_dec->tile_filter.dma);
23162306a36Sopenharmony_ci	hevc_dec->tile_filter.cpu = NULL;
23262306a36Sopenharmony_ci
23362306a36Sopenharmony_ci	if (hevc_dec->tile_sao.cpu)
23462306a36Sopenharmony_ci		dma_free_coherent(vpu->dev, hevc_dec->tile_sao.size,
23562306a36Sopenharmony_ci				  hevc_dec->tile_sao.cpu,
23662306a36Sopenharmony_ci				  hevc_dec->tile_sao.dma);
23762306a36Sopenharmony_ci	hevc_dec->tile_sao.cpu = NULL;
23862306a36Sopenharmony_ci
23962306a36Sopenharmony_ci	if (hevc_dec->tile_bsd.cpu)
24062306a36Sopenharmony_ci		dma_free_coherent(vpu->dev, hevc_dec->tile_bsd.size,
24162306a36Sopenharmony_ci				  hevc_dec->tile_bsd.cpu,
24262306a36Sopenharmony_ci				  hevc_dec->tile_bsd.dma);
24362306a36Sopenharmony_ci	hevc_dec->tile_bsd.cpu = NULL;
24462306a36Sopenharmony_ci}
24562306a36Sopenharmony_ci
24662306a36Sopenharmony_ciint hantro_hevc_dec_init(struct hantro_ctx *ctx)
24762306a36Sopenharmony_ci{
24862306a36Sopenharmony_ci	struct hantro_dev *vpu = ctx->dev;
24962306a36Sopenharmony_ci	struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
25062306a36Sopenharmony_ci	unsigned int size;
25162306a36Sopenharmony_ci
25262306a36Sopenharmony_ci	memset(hevc_dec, 0, sizeof(*hevc_dec));
25362306a36Sopenharmony_ci
25462306a36Sopenharmony_ci	/*
25562306a36Sopenharmony_ci	 * Maximum number of tiles times width and height (2 bytes each),
25662306a36Sopenharmony_ci	 * rounding up to next 16 bytes boundary + one extra 16 byte
25762306a36Sopenharmony_ci	 * chunk (HW guys wanted to have this).
25862306a36Sopenharmony_ci	 */
25962306a36Sopenharmony_ci	size = round_up(MAX_TILE_COLS * MAX_TILE_ROWS * 4 * sizeof(u16) + 16, 16);
26062306a36Sopenharmony_ci	hevc_dec->tile_sizes.cpu = dma_alloc_coherent(vpu->dev, size,
26162306a36Sopenharmony_ci						      &hevc_dec->tile_sizes.dma,
26262306a36Sopenharmony_ci						      GFP_KERNEL);
26362306a36Sopenharmony_ci	if (!hevc_dec->tile_sizes.cpu)
26462306a36Sopenharmony_ci		return -ENOMEM;
26562306a36Sopenharmony_ci
26662306a36Sopenharmony_ci	hevc_dec->tile_sizes.size = size;
26762306a36Sopenharmony_ci
26862306a36Sopenharmony_ci	hevc_dec->scaling_lists.cpu = dma_alloc_coherent(vpu->dev, SCALING_LIST_SIZE,
26962306a36Sopenharmony_ci							 &hevc_dec->scaling_lists.dma,
27062306a36Sopenharmony_ci							 GFP_KERNEL);
27162306a36Sopenharmony_ci	if (!hevc_dec->scaling_lists.cpu)
27262306a36Sopenharmony_ci		return -ENOMEM;
27362306a36Sopenharmony_ci
27462306a36Sopenharmony_ci	hevc_dec->scaling_lists.size = SCALING_LIST_SIZE;
27562306a36Sopenharmony_ci
27662306a36Sopenharmony_ci	hantro_hevc_ref_init(ctx);
27762306a36Sopenharmony_ci
27862306a36Sopenharmony_ci	return 0;
27962306a36Sopenharmony_ci}
280