162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Hantro G1 post-processor support
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Copyright (C) 2019 Collabora, Ltd.
662306a36Sopenharmony_ci */
762306a36Sopenharmony_ci
862306a36Sopenharmony_ci#include <linux/dma-mapping.h>
962306a36Sopenharmony_ci#include <linux/types.h>
1062306a36Sopenharmony_ci
1162306a36Sopenharmony_ci#include "hantro.h"
1262306a36Sopenharmony_ci#include "hantro_hw.h"
1362306a36Sopenharmony_ci#include "hantro_g1_regs.h"
1462306a36Sopenharmony_ci#include "hantro_g2_regs.h"
1562306a36Sopenharmony_ci#include "hantro_v4l2.h"
1662306a36Sopenharmony_ci
1762306a36Sopenharmony_ci#define HANTRO_PP_REG_WRITE(vpu, reg_name, val) \
1862306a36Sopenharmony_ci{ \
1962306a36Sopenharmony_ci	hantro_reg_write(vpu, \
2062306a36Sopenharmony_ci			 &hantro_g1_postproc_regs.reg_name, \
2162306a36Sopenharmony_ci			 val); \
2262306a36Sopenharmony_ci}
2362306a36Sopenharmony_ci
2462306a36Sopenharmony_ci#define HANTRO_PP_REG_WRITE_RELAXED(vpu, reg_name, val) \
2562306a36Sopenharmony_ci{ \
2662306a36Sopenharmony_ci	hantro_reg_write_relaxed(vpu, \
2762306a36Sopenharmony_ci				 &hantro_g1_postproc_regs.reg_name, \
2862306a36Sopenharmony_ci				 val); \
2962306a36Sopenharmony_ci}
3062306a36Sopenharmony_ci
3162306a36Sopenharmony_ci#define VPU_PP_IN_YUYV			0x0
3262306a36Sopenharmony_ci#define VPU_PP_IN_NV12			0x1
3362306a36Sopenharmony_ci#define VPU_PP_IN_YUV420		0x2
3462306a36Sopenharmony_ci#define VPU_PP_IN_YUV240_TILED		0x5
3562306a36Sopenharmony_ci#define VPU_PP_OUT_RGB			0x0
3662306a36Sopenharmony_ci#define VPU_PP_OUT_YUYV			0x3
3762306a36Sopenharmony_ci
3862306a36Sopenharmony_cistatic const struct hantro_postproc_regs hantro_g1_postproc_regs = {
3962306a36Sopenharmony_ci	.pipeline_en = {G1_REG_PP_INTERRUPT, 1, 0x1},
4062306a36Sopenharmony_ci	.max_burst = {G1_REG_PP_DEV_CONFIG, 0, 0x1f},
4162306a36Sopenharmony_ci	.clk_gate = {G1_REG_PP_DEV_CONFIG, 1, 0x1},
4262306a36Sopenharmony_ci	.out_swap32 = {G1_REG_PP_DEV_CONFIG, 5, 0x1},
4362306a36Sopenharmony_ci	.out_endian = {G1_REG_PP_DEV_CONFIG, 6, 0x1},
4462306a36Sopenharmony_ci	.out_luma_base = {G1_REG_PP_OUT_LUMA_BASE, 0, 0xffffffff},
4562306a36Sopenharmony_ci	.input_width = {G1_REG_PP_INPUT_SIZE, 0, 0x1ff},
4662306a36Sopenharmony_ci	.input_height = {G1_REG_PP_INPUT_SIZE, 9, 0x1ff},
4762306a36Sopenharmony_ci	.output_width = {G1_REG_PP_CONTROL, 4, 0x7ff},
4862306a36Sopenharmony_ci	.output_height = {G1_REG_PP_CONTROL, 15, 0x7ff},
4962306a36Sopenharmony_ci	.input_fmt = {G1_REG_PP_CONTROL, 29, 0x7},
5062306a36Sopenharmony_ci	.output_fmt = {G1_REG_PP_CONTROL, 26, 0x7},
5162306a36Sopenharmony_ci	.orig_width = {G1_REG_PP_MASK1_ORIG_WIDTH, 23, 0x1ff},
5262306a36Sopenharmony_ci	.display_width = {G1_REG_PP_DISPLAY_WIDTH, 0, 0xfff},
5362306a36Sopenharmony_ci};
5462306a36Sopenharmony_ci
5562306a36Sopenharmony_cibool hantro_needs_postproc(const struct hantro_ctx *ctx,
5662306a36Sopenharmony_ci			   const struct hantro_fmt *fmt)
5762306a36Sopenharmony_ci{
5862306a36Sopenharmony_ci	if (ctx->is_encoder)
5962306a36Sopenharmony_ci		return false;
6062306a36Sopenharmony_ci
6162306a36Sopenharmony_ci	if (ctx->need_postproc)
6262306a36Sopenharmony_ci		return true;
6362306a36Sopenharmony_ci
6462306a36Sopenharmony_ci	return fmt->postprocessed;
6562306a36Sopenharmony_ci}
6662306a36Sopenharmony_ci
6762306a36Sopenharmony_cistatic void hantro_postproc_g1_enable(struct hantro_ctx *ctx)
6862306a36Sopenharmony_ci{
6962306a36Sopenharmony_ci	struct hantro_dev *vpu = ctx->dev;
7062306a36Sopenharmony_ci	struct vb2_v4l2_buffer *dst_buf;
7162306a36Sopenharmony_ci	u32 src_pp_fmt, dst_pp_fmt;
7262306a36Sopenharmony_ci	dma_addr_t dst_dma;
7362306a36Sopenharmony_ci
7462306a36Sopenharmony_ci	/* Turn on pipeline mode. Must be done first. */
7562306a36Sopenharmony_ci	HANTRO_PP_REG_WRITE(vpu, pipeline_en, 0x1);
7662306a36Sopenharmony_ci
7762306a36Sopenharmony_ci	src_pp_fmt = VPU_PP_IN_NV12;
7862306a36Sopenharmony_ci
7962306a36Sopenharmony_ci	switch (ctx->vpu_dst_fmt->fourcc) {
8062306a36Sopenharmony_ci	case V4L2_PIX_FMT_YUYV:
8162306a36Sopenharmony_ci		dst_pp_fmt = VPU_PP_OUT_YUYV;
8262306a36Sopenharmony_ci		break;
8362306a36Sopenharmony_ci	default:
8462306a36Sopenharmony_ci		WARN(1, "output format %d not supported by the post-processor, this wasn't expected.",
8562306a36Sopenharmony_ci		     ctx->vpu_dst_fmt->fourcc);
8662306a36Sopenharmony_ci		dst_pp_fmt = 0;
8762306a36Sopenharmony_ci		break;
8862306a36Sopenharmony_ci	}
8962306a36Sopenharmony_ci
9062306a36Sopenharmony_ci	dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
9162306a36Sopenharmony_ci	dst_dma = vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
9262306a36Sopenharmony_ci
9362306a36Sopenharmony_ci	HANTRO_PP_REG_WRITE(vpu, clk_gate, 0x1);
9462306a36Sopenharmony_ci	HANTRO_PP_REG_WRITE(vpu, out_endian, 0x1);
9562306a36Sopenharmony_ci	HANTRO_PP_REG_WRITE(vpu, out_swap32, 0x1);
9662306a36Sopenharmony_ci	HANTRO_PP_REG_WRITE(vpu, max_burst, 16);
9762306a36Sopenharmony_ci	HANTRO_PP_REG_WRITE(vpu, out_luma_base, dst_dma);
9862306a36Sopenharmony_ci	HANTRO_PP_REG_WRITE(vpu, input_width, MB_WIDTH(ctx->dst_fmt.width));
9962306a36Sopenharmony_ci	HANTRO_PP_REG_WRITE(vpu, input_height, MB_HEIGHT(ctx->dst_fmt.height));
10062306a36Sopenharmony_ci	HANTRO_PP_REG_WRITE(vpu, input_fmt, src_pp_fmt);
10162306a36Sopenharmony_ci	HANTRO_PP_REG_WRITE(vpu, output_fmt, dst_pp_fmt);
10262306a36Sopenharmony_ci	HANTRO_PP_REG_WRITE(vpu, output_width, ctx->dst_fmt.width);
10362306a36Sopenharmony_ci	HANTRO_PP_REG_WRITE(vpu, output_height, ctx->dst_fmt.height);
10462306a36Sopenharmony_ci	HANTRO_PP_REG_WRITE(vpu, orig_width, MB_WIDTH(ctx->dst_fmt.width));
10562306a36Sopenharmony_ci	HANTRO_PP_REG_WRITE(vpu, display_width, ctx->dst_fmt.width);
10662306a36Sopenharmony_ci}
10762306a36Sopenharmony_ci
10862306a36Sopenharmony_cistatic int down_scale_factor(struct hantro_ctx *ctx)
10962306a36Sopenharmony_ci{
11062306a36Sopenharmony_ci	if (ctx->src_fmt.width <= ctx->dst_fmt.width)
11162306a36Sopenharmony_ci		return 0;
11262306a36Sopenharmony_ci
11362306a36Sopenharmony_ci	return DIV_ROUND_CLOSEST(ctx->src_fmt.width, ctx->dst_fmt.width);
11462306a36Sopenharmony_ci}
11562306a36Sopenharmony_ci
11662306a36Sopenharmony_cistatic void hantro_postproc_g2_enable(struct hantro_ctx *ctx)
11762306a36Sopenharmony_ci{
11862306a36Sopenharmony_ci	struct hantro_dev *vpu = ctx->dev;
11962306a36Sopenharmony_ci	struct vb2_v4l2_buffer *dst_buf;
12062306a36Sopenharmony_ci	int down_scale = down_scale_factor(ctx);
12162306a36Sopenharmony_ci	int out_depth;
12262306a36Sopenharmony_ci	size_t chroma_offset;
12362306a36Sopenharmony_ci	dma_addr_t dst_dma;
12462306a36Sopenharmony_ci
12562306a36Sopenharmony_ci	dst_buf = hantro_get_dst_buf(ctx);
12662306a36Sopenharmony_ci	dst_dma = vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
12762306a36Sopenharmony_ci	chroma_offset = ctx->dst_fmt.plane_fmt[0].bytesperline *
12862306a36Sopenharmony_ci			ctx->dst_fmt.height;
12962306a36Sopenharmony_ci
13062306a36Sopenharmony_ci	if (down_scale) {
13162306a36Sopenharmony_ci		hantro_reg_write(vpu, &g2_down_scale_e, 1);
13262306a36Sopenharmony_ci		hantro_reg_write(vpu, &g2_down_scale_y, down_scale >> 2);
13362306a36Sopenharmony_ci		hantro_reg_write(vpu, &g2_down_scale_x, down_scale >> 2);
13462306a36Sopenharmony_ci		hantro_write_addr(vpu, G2_DS_DST, dst_dma);
13562306a36Sopenharmony_ci		hantro_write_addr(vpu, G2_DS_DST_CHR, dst_dma + (chroma_offset >> down_scale));
13662306a36Sopenharmony_ci	} else {
13762306a36Sopenharmony_ci		hantro_write_addr(vpu, G2_RS_OUT_LUMA_ADDR, dst_dma);
13862306a36Sopenharmony_ci		hantro_write_addr(vpu, G2_RS_OUT_CHROMA_ADDR, dst_dma + chroma_offset);
13962306a36Sopenharmony_ci	}
14062306a36Sopenharmony_ci
14162306a36Sopenharmony_ci	out_depth = hantro_get_format_depth(ctx->dst_fmt.pixelformat);
14262306a36Sopenharmony_ci	if (ctx->dev->variant->legacy_regs) {
14362306a36Sopenharmony_ci		u8 pp_shift = 0;
14462306a36Sopenharmony_ci
14562306a36Sopenharmony_ci		if (out_depth > 8)
14662306a36Sopenharmony_ci			pp_shift = 16 - out_depth;
14762306a36Sopenharmony_ci
14862306a36Sopenharmony_ci		hantro_reg_write(ctx->dev, &g2_rs_out_bit_depth, out_depth);
14962306a36Sopenharmony_ci		hantro_reg_write(ctx->dev, &g2_pp_pix_shift, pp_shift);
15062306a36Sopenharmony_ci	} else {
15162306a36Sopenharmony_ci		hantro_reg_write(vpu, &g2_output_8_bits, out_depth > 8 ? 0 : 1);
15262306a36Sopenharmony_ci		hantro_reg_write(vpu, &g2_output_format, out_depth > 8 ? 1 : 0);
15362306a36Sopenharmony_ci	}
15462306a36Sopenharmony_ci	hantro_reg_write(vpu, &g2_out_rs_e, 1);
15562306a36Sopenharmony_ci}
15662306a36Sopenharmony_ci
15762306a36Sopenharmony_cistatic int hantro_postproc_g2_enum_framesizes(struct hantro_ctx *ctx,
15862306a36Sopenharmony_ci					      struct v4l2_frmsizeenum *fsize)
15962306a36Sopenharmony_ci{
16062306a36Sopenharmony_ci	/**
16162306a36Sopenharmony_ci	 * G2 scaler can scale down by 0, 2, 4 or 8
16262306a36Sopenharmony_ci	 * use fsize->index has power of 2 diviser
16362306a36Sopenharmony_ci	 **/
16462306a36Sopenharmony_ci	if (fsize->index > 3)
16562306a36Sopenharmony_ci		return -EINVAL;
16662306a36Sopenharmony_ci
16762306a36Sopenharmony_ci	if (!ctx->src_fmt.width || !ctx->src_fmt.height)
16862306a36Sopenharmony_ci		return -EINVAL;
16962306a36Sopenharmony_ci
17062306a36Sopenharmony_ci	fsize->type = V4L2_FRMSIZE_TYPE_DISCRETE;
17162306a36Sopenharmony_ci	fsize->discrete.width = ctx->src_fmt.width >> fsize->index;
17262306a36Sopenharmony_ci	fsize->discrete.height = ctx->src_fmt.height >> fsize->index;
17362306a36Sopenharmony_ci
17462306a36Sopenharmony_ci	return 0;
17562306a36Sopenharmony_ci}
17662306a36Sopenharmony_ci
17762306a36Sopenharmony_civoid hantro_postproc_free(struct hantro_ctx *ctx)
17862306a36Sopenharmony_ci{
17962306a36Sopenharmony_ci	struct hantro_dev *vpu = ctx->dev;
18062306a36Sopenharmony_ci	unsigned int i;
18162306a36Sopenharmony_ci
18262306a36Sopenharmony_ci	for (i = 0; i < VB2_MAX_FRAME; ++i) {
18362306a36Sopenharmony_ci		struct hantro_aux_buf *priv = &ctx->postproc.dec_q[i];
18462306a36Sopenharmony_ci
18562306a36Sopenharmony_ci		if (priv->cpu) {
18662306a36Sopenharmony_ci			dma_free_attrs(vpu->dev, priv->size, priv->cpu,
18762306a36Sopenharmony_ci				       priv->dma, priv->attrs);
18862306a36Sopenharmony_ci			priv->cpu = NULL;
18962306a36Sopenharmony_ci		}
19062306a36Sopenharmony_ci	}
19162306a36Sopenharmony_ci}
19262306a36Sopenharmony_ci
19362306a36Sopenharmony_ciint hantro_postproc_alloc(struct hantro_ctx *ctx)
19462306a36Sopenharmony_ci{
19562306a36Sopenharmony_ci	struct hantro_dev *vpu = ctx->dev;
19662306a36Sopenharmony_ci	struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
19762306a36Sopenharmony_ci	struct vb2_queue *cap_queue = &m2m_ctx->cap_q_ctx.q;
19862306a36Sopenharmony_ci	unsigned int num_buffers = cap_queue->num_buffers;
19962306a36Sopenharmony_ci	struct v4l2_pix_format_mplane pix_mp;
20062306a36Sopenharmony_ci	const struct hantro_fmt *fmt;
20162306a36Sopenharmony_ci	unsigned int i, buf_size;
20262306a36Sopenharmony_ci
20362306a36Sopenharmony_ci	/* this should always pick native format */
20462306a36Sopenharmony_ci	fmt = hantro_get_default_fmt(ctx, false, ctx->bit_depth, HANTRO_AUTO_POSTPROC);
20562306a36Sopenharmony_ci	if (!fmt)
20662306a36Sopenharmony_ci		return -EINVAL;
20762306a36Sopenharmony_ci	v4l2_fill_pixfmt_mp(&pix_mp, fmt->fourcc, ctx->src_fmt.width,
20862306a36Sopenharmony_ci			    ctx->src_fmt.height);
20962306a36Sopenharmony_ci
21062306a36Sopenharmony_ci	buf_size = pix_mp.plane_fmt[0].sizeimage;
21162306a36Sopenharmony_ci	if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_H264_SLICE)
21262306a36Sopenharmony_ci		buf_size += hantro_h264_mv_size(pix_mp.width,
21362306a36Sopenharmony_ci						pix_mp.height);
21462306a36Sopenharmony_ci	else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_VP9_FRAME)
21562306a36Sopenharmony_ci		buf_size += hantro_vp9_mv_size(pix_mp.width,
21662306a36Sopenharmony_ci					       pix_mp.height);
21762306a36Sopenharmony_ci	else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_HEVC_SLICE)
21862306a36Sopenharmony_ci		buf_size += hantro_hevc_mv_size(pix_mp.width,
21962306a36Sopenharmony_ci						pix_mp.height);
22062306a36Sopenharmony_ci	else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_AV1_FRAME)
22162306a36Sopenharmony_ci		buf_size += hantro_av1_mv_size(pix_mp.width,
22262306a36Sopenharmony_ci					       pix_mp.height);
22362306a36Sopenharmony_ci
22462306a36Sopenharmony_ci	for (i = 0; i < num_buffers; ++i) {
22562306a36Sopenharmony_ci		struct hantro_aux_buf *priv = &ctx->postproc.dec_q[i];
22662306a36Sopenharmony_ci
22762306a36Sopenharmony_ci		/*
22862306a36Sopenharmony_ci		 * The buffers on this queue are meant as intermediate
22962306a36Sopenharmony_ci		 * buffers for the decoder, so no mapping is needed.
23062306a36Sopenharmony_ci		 */
23162306a36Sopenharmony_ci		priv->attrs = DMA_ATTR_NO_KERNEL_MAPPING;
23262306a36Sopenharmony_ci		priv->cpu = dma_alloc_attrs(vpu->dev, buf_size, &priv->dma,
23362306a36Sopenharmony_ci					    GFP_KERNEL, priv->attrs);
23462306a36Sopenharmony_ci		if (!priv->cpu)
23562306a36Sopenharmony_ci			return -ENOMEM;
23662306a36Sopenharmony_ci		priv->size = buf_size;
23762306a36Sopenharmony_ci	}
23862306a36Sopenharmony_ci	return 0;
23962306a36Sopenharmony_ci}
24062306a36Sopenharmony_ci
24162306a36Sopenharmony_cistatic void hantro_postproc_g1_disable(struct hantro_ctx *ctx)
24262306a36Sopenharmony_ci{
24362306a36Sopenharmony_ci	struct hantro_dev *vpu = ctx->dev;
24462306a36Sopenharmony_ci
24562306a36Sopenharmony_ci	HANTRO_PP_REG_WRITE(vpu, pipeline_en, 0x0);
24662306a36Sopenharmony_ci}
24762306a36Sopenharmony_ci
24862306a36Sopenharmony_cistatic void hantro_postproc_g2_disable(struct hantro_ctx *ctx)
24962306a36Sopenharmony_ci{
25062306a36Sopenharmony_ci	struct hantro_dev *vpu = ctx->dev;
25162306a36Sopenharmony_ci
25262306a36Sopenharmony_ci	hantro_reg_write(vpu, &g2_out_rs_e, 0);
25362306a36Sopenharmony_ci}
25462306a36Sopenharmony_ci
25562306a36Sopenharmony_civoid hantro_postproc_disable(struct hantro_ctx *ctx)
25662306a36Sopenharmony_ci{
25762306a36Sopenharmony_ci	struct hantro_dev *vpu = ctx->dev;
25862306a36Sopenharmony_ci
25962306a36Sopenharmony_ci	if (vpu->variant->postproc_ops && vpu->variant->postproc_ops->disable)
26062306a36Sopenharmony_ci		vpu->variant->postproc_ops->disable(ctx);
26162306a36Sopenharmony_ci}
26262306a36Sopenharmony_ci
26362306a36Sopenharmony_civoid hantro_postproc_enable(struct hantro_ctx *ctx)
26462306a36Sopenharmony_ci{
26562306a36Sopenharmony_ci	struct hantro_dev *vpu = ctx->dev;
26662306a36Sopenharmony_ci
26762306a36Sopenharmony_ci	if (vpu->variant->postproc_ops && vpu->variant->postproc_ops->enable)
26862306a36Sopenharmony_ci		vpu->variant->postproc_ops->enable(ctx);
26962306a36Sopenharmony_ci}
27062306a36Sopenharmony_ci
27162306a36Sopenharmony_ciint hanto_postproc_enum_framesizes(struct hantro_ctx *ctx,
27262306a36Sopenharmony_ci				   struct v4l2_frmsizeenum *fsize)
27362306a36Sopenharmony_ci{
27462306a36Sopenharmony_ci	struct hantro_dev *vpu = ctx->dev;
27562306a36Sopenharmony_ci
27662306a36Sopenharmony_ci	if (vpu->variant->postproc_ops && vpu->variant->postproc_ops->enum_framesizes)
27762306a36Sopenharmony_ci		return vpu->variant->postproc_ops->enum_framesizes(ctx, fsize);
27862306a36Sopenharmony_ci
27962306a36Sopenharmony_ci	return -EINVAL;
28062306a36Sopenharmony_ci}
28162306a36Sopenharmony_ci
28262306a36Sopenharmony_ciconst struct hantro_postproc_ops hantro_g1_postproc_ops = {
28362306a36Sopenharmony_ci	.enable = hantro_postproc_g1_enable,
28462306a36Sopenharmony_ci	.disable = hantro_postproc_g1_disable,
28562306a36Sopenharmony_ci};
28662306a36Sopenharmony_ci
28762306a36Sopenharmony_ciconst struct hantro_postproc_ops hantro_g2_postproc_ops = {
28862306a36Sopenharmony_ci	.enable = hantro_postproc_g2_enable,
28962306a36Sopenharmony_ci	.disable = hantro_postproc_g2_disable,
29062306a36Sopenharmony_ci	.enum_framesizes = hantro_postproc_g2_enum_framesizes,
29162306a36Sopenharmony_ci};
292