162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Hantro VPU HEVC codec driver 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Copyright (C) 2020 Safran Passenger Innovations LLC 662306a36Sopenharmony_ci */ 762306a36Sopenharmony_ci 862306a36Sopenharmony_ci#include <linux/types.h> 962306a36Sopenharmony_ci#include <media/v4l2-mem2mem.h> 1062306a36Sopenharmony_ci 1162306a36Sopenharmony_ci#include "hantro.h" 1262306a36Sopenharmony_ci#include "hantro_hw.h" 1362306a36Sopenharmony_ci 1462306a36Sopenharmony_ci#define VERT_FILTER_RAM_SIZE 8 /* bytes per pixel row */ 1562306a36Sopenharmony_ci/* 1662306a36Sopenharmony_ci * BSD control data of current picture at tile border 1762306a36Sopenharmony_ci * 128 bits per 4x4 tile = 128/(8*4) bytes per row 1862306a36Sopenharmony_ci */ 1962306a36Sopenharmony_ci#define BSD_CTRL_RAM_SIZE 4 /* bytes per pixel row */ 2062306a36Sopenharmony_ci/* tile border coefficients of filter */ 2162306a36Sopenharmony_ci#define VERT_SAO_RAM_SIZE 48 /* bytes per pixel */ 2262306a36Sopenharmony_ci 2362306a36Sopenharmony_ci#define SCALING_LIST_SIZE (16 * 64) 2462306a36Sopenharmony_ci 2562306a36Sopenharmony_ci#define MAX_TILE_COLS 20 2662306a36Sopenharmony_ci#define MAX_TILE_ROWS 22 2762306a36Sopenharmony_ci 2862306a36Sopenharmony_civoid hantro_hevc_ref_init(struct hantro_ctx *ctx) 2962306a36Sopenharmony_ci{ 3062306a36Sopenharmony_ci struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec; 3162306a36Sopenharmony_ci 3262306a36Sopenharmony_ci hevc_dec->ref_bufs_used = 0; 3362306a36Sopenharmony_ci} 3462306a36Sopenharmony_ci 3562306a36Sopenharmony_cidma_addr_t hantro_hevc_get_ref_buf(struct hantro_ctx *ctx, 3662306a36Sopenharmony_ci s32 poc) 3762306a36Sopenharmony_ci{ 3862306a36Sopenharmony_ci struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec; 3962306a36Sopenharmony_ci int i; 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_ci /* Find the reference buffer in already known ones */ 4262306a36Sopenharmony_ci for (i = 0; i < NUM_REF_PICTURES; i++) { 4362306a36Sopenharmony_ci if (hevc_dec->ref_bufs_poc[i] == poc) { 4462306a36Sopenharmony_ci hevc_dec->ref_bufs_used |= 1 << i; 4562306a36Sopenharmony_ci return hevc_dec->ref_bufs[i].dma; 4662306a36Sopenharmony_ci } 4762306a36Sopenharmony_ci } 4862306a36Sopenharmony_ci 4962306a36Sopenharmony_ci return 0; 5062306a36Sopenharmony_ci} 5162306a36Sopenharmony_ci 5262306a36Sopenharmony_ciint hantro_hevc_add_ref_buf(struct hantro_ctx *ctx, int poc, dma_addr_t addr) 5362306a36Sopenharmony_ci{ 5462306a36Sopenharmony_ci struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec; 5562306a36Sopenharmony_ci int i; 5662306a36Sopenharmony_ci 5762306a36Sopenharmony_ci /* Add a new reference buffer */ 5862306a36Sopenharmony_ci for (i = 0; i < NUM_REF_PICTURES; i++) { 5962306a36Sopenharmony_ci if (!(hevc_dec->ref_bufs_used & 1 << i)) { 6062306a36Sopenharmony_ci hevc_dec->ref_bufs_used |= 1 << i; 6162306a36Sopenharmony_ci hevc_dec->ref_bufs_poc[i] = poc; 6262306a36Sopenharmony_ci hevc_dec->ref_bufs[i].dma = addr; 6362306a36Sopenharmony_ci return 0; 6462306a36Sopenharmony_ci } 6562306a36Sopenharmony_ci } 6662306a36Sopenharmony_ci 6762306a36Sopenharmony_ci return -EINVAL; 6862306a36Sopenharmony_ci} 6962306a36Sopenharmony_ci 7062306a36Sopenharmony_cistatic int tile_buffer_reallocate(struct hantro_ctx *ctx) 7162306a36Sopenharmony_ci{ 7262306a36Sopenharmony_ci struct hantro_dev *vpu = ctx->dev; 7362306a36Sopenharmony_ci struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec; 7462306a36Sopenharmony_ci const struct hantro_hevc_dec_ctrls *ctrls = &ctx->hevc_dec.ctrls; 7562306a36Sopenharmony_ci const struct v4l2_ctrl_hevc_pps *pps = ctrls->pps; 7662306a36Sopenharmony_ci const struct v4l2_ctrl_hevc_sps *sps = ctrls->sps; 7762306a36Sopenharmony_ci unsigned int num_tile_cols = pps->num_tile_columns_minus1 + 1; 7862306a36Sopenharmony_ci unsigned int height64 = (sps->pic_height_in_luma_samples + 63) & ~63; 7962306a36Sopenharmony_ci unsigned int size; 8062306a36Sopenharmony_ci 8162306a36Sopenharmony_ci if (num_tile_cols <= 1 || 8262306a36Sopenharmony_ci num_tile_cols <= hevc_dec->num_tile_cols_allocated) 8362306a36Sopenharmony_ci return 0; 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_ci /* Need to reallocate due to tiles passed via PPS */ 8662306a36Sopenharmony_ci if (hevc_dec->tile_filter.cpu) { 8762306a36Sopenharmony_ci dma_free_coherent(vpu->dev, hevc_dec->tile_filter.size, 8862306a36Sopenharmony_ci hevc_dec->tile_filter.cpu, 8962306a36Sopenharmony_ci hevc_dec->tile_filter.dma); 9062306a36Sopenharmony_ci hevc_dec->tile_filter.cpu = NULL; 9162306a36Sopenharmony_ci } 9262306a36Sopenharmony_ci 9362306a36Sopenharmony_ci if (hevc_dec->tile_sao.cpu) { 9462306a36Sopenharmony_ci dma_free_coherent(vpu->dev, hevc_dec->tile_sao.size, 9562306a36Sopenharmony_ci hevc_dec->tile_sao.cpu, 9662306a36Sopenharmony_ci hevc_dec->tile_sao.dma); 9762306a36Sopenharmony_ci hevc_dec->tile_sao.cpu = NULL; 9862306a36Sopenharmony_ci } 9962306a36Sopenharmony_ci 10062306a36Sopenharmony_ci if (hevc_dec->tile_bsd.cpu) { 10162306a36Sopenharmony_ci dma_free_coherent(vpu->dev, hevc_dec->tile_bsd.size, 10262306a36Sopenharmony_ci hevc_dec->tile_bsd.cpu, 10362306a36Sopenharmony_ci hevc_dec->tile_bsd.dma); 10462306a36Sopenharmony_ci hevc_dec->tile_bsd.cpu = NULL; 10562306a36Sopenharmony_ci } 10662306a36Sopenharmony_ci 10762306a36Sopenharmony_ci size = (VERT_FILTER_RAM_SIZE * height64 * (num_tile_cols - 1) * ctx->bit_depth) / 8; 10862306a36Sopenharmony_ci hevc_dec->tile_filter.cpu = dma_alloc_coherent(vpu->dev, size, 10962306a36Sopenharmony_ci &hevc_dec->tile_filter.dma, 11062306a36Sopenharmony_ci GFP_KERNEL); 11162306a36Sopenharmony_ci if (!hevc_dec->tile_filter.cpu) 11262306a36Sopenharmony_ci return -ENOMEM; 11362306a36Sopenharmony_ci hevc_dec->tile_filter.size = size; 11462306a36Sopenharmony_ci 11562306a36Sopenharmony_ci size = (VERT_SAO_RAM_SIZE * height64 * (num_tile_cols - 1) * ctx->bit_depth) / 8; 11662306a36Sopenharmony_ci hevc_dec->tile_sao.cpu = dma_alloc_coherent(vpu->dev, size, 11762306a36Sopenharmony_ci &hevc_dec->tile_sao.dma, 11862306a36Sopenharmony_ci GFP_KERNEL); 11962306a36Sopenharmony_ci if (!hevc_dec->tile_sao.cpu) 12062306a36Sopenharmony_ci goto err_free_tile_buffers; 12162306a36Sopenharmony_ci hevc_dec->tile_sao.size = size; 12262306a36Sopenharmony_ci 12362306a36Sopenharmony_ci size = BSD_CTRL_RAM_SIZE * height64 * (num_tile_cols - 1); 12462306a36Sopenharmony_ci hevc_dec->tile_bsd.cpu = dma_alloc_coherent(vpu->dev, size, 12562306a36Sopenharmony_ci &hevc_dec->tile_bsd.dma, 12662306a36Sopenharmony_ci GFP_KERNEL); 12762306a36Sopenharmony_ci if (!hevc_dec->tile_bsd.cpu) 12862306a36Sopenharmony_ci goto err_free_sao_buffers; 12962306a36Sopenharmony_ci hevc_dec->tile_bsd.size = size; 13062306a36Sopenharmony_ci 13162306a36Sopenharmony_ci hevc_dec->num_tile_cols_allocated = num_tile_cols; 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_ci return 0; 13462306a36Sopenharmony_ci 13562306a36Sopenharmony_cierr_free_sao_buffers: 13662306a36Sopenharmony_ci if (hevc_dec->tile_sao.cpu) 13762306a36Sopenharmony_ci dma_free_coherent(vpu->dev, hevc_dec->tile_sao.size, 13862306a36Sopenharmony_ci hevc_dec->tile_sao.cpu, 13962306a36Sopenharmony_ci hevc_dec->tile_sao.dma); 14062306a36Sopenharmony_ci hevc_dec->tile_sao.cpu = NULL; 14162306a36Sopenharmony_ci 14262306a36Sopenharmony_cierr_free_tile_buffers: 14362306a36Sopenharmony_ci if (hevc_dec->tile_filter.cpu) 14462306a36Sopenharmony_ci dma_free_coherent(vpu->dev, hevc_dec->tile_filter.size, 14562306a36Sopenharmony_ci hevc_dec->tile_filter.cpu, 14662306a36Sopenharmony_ci hevc_dec->tile_filter.dma); 14762306a36Sopenharmony_ci hevc_dec->tile_filter.cpu = NULL; 14862306a36Sopenharmony_ci 14962306a36Sopenharmony_ci return -ENOMEM; 15062306a36Sopenharmony_ci} 15162306a36Sopenharmony_ci 15262306a36Sopenharmony_cistatic int hantro_hevc_validate_sps(struct hantro_ctx *ctx, const struct v4l2_ctrl_hevc_sps *sps) 15362306a36Sopenharmony_ci{ 15462306a36Sopenharmony_ci /* 15562306a36Sopenharmony_ci * for tile pixel format check if the width and height match 15662306a36Sopenharmony_ci * hardware constraints 15762306a36Sopenharmony_ci */ 15862306a36Sopenharmony_ci if (ctx->vpu_dst_fmt->fourcc == V4L2_PIX_FMT_NV12_4L4) { 15962306a36Sopenharmony_ci if (ctx->dst_fmt.width != 16062306a36Sopenharmony_ci ALIGN(sps->pic_width_in_luma_samples, ctx->vpu_dst_fmt->frmsize.step_width)) 16162306a36Sopenharmony_ci return -EINVAL; 16262306a36Sopenharmony_ci 16362306a36Sopenharmony_ci if (ctx->dst_fmt.height != 16462306a36Sopenharmony_ci ALIGN(sps->pic_height_in_luma_samples, ctx->vpu_dst_fmt->frmsize.step_height)) 16562306a36Sopenharmony_ci return -EINVAL; 16662306a36Sopenharmony_ci } 16762306a36Sopenharmony_ci 16862306a36Sopenharmony_ci return 0; 16962306a36Sopenharmony_ci} 17062306a36Sopenharmony_ci 17162306a36Sopenharmony_ciint hantro_hevc_dec_prepare_run(struct hantro_ctx *ctx) 17262306a36Sopenharmony_ci{ 17362306a36Sopenharmony_ci struct hantro_hevc_dec_hw_ctx *hevc_ctx = &ctx->hevc_dec; 17462306a36Sopenharmony_ci struct hantro_hevc_dec_ctrls *ctrls = &hevc_ctx->ctrls; 17562306a36Sopenharmony_ci int ret; 17662306a36Sopenharmony_ci 17762306a36Sopenharmony_ci hantro_start_prepare_run(ctx); 17862306a36Sopenharmony_ci 17962306a36Sopenharmony_ci ctrls->decode_params = 18062306a36Sopenharmony_ci hantro_get_ctrl(ctx, V4L2_CID_STATELESS_HEVC_DECODE_PARAMS); 18162306a36Sopenharmony_ci if (WARN_ON(!ctrls->decode_params)) 18262306a36Sopenharmony_ci return -EINVAL; 18362306a36Sopenharmony_ci 18462306a36Sopenharmony_ci ctrls->scaling = 18562306a36Sopenharmony_ci hantro_get_ctrl(ctx, V4L2_CID_STATELESS_HEVC_SCALING_MATRIX); 18662306a36Sopenharmony_ci if (WARN_ON(!ctrls->scaling)) 18762306a36Sopenharmony_ci return -EINVAL; 18862306a36Sopenharmony_ci 18962306a36Sopenharmony_ci ctrls->sps = 19062306a36Sopenharmony_ci hantro_get_ctrl(ctx, V4L2_CID_STATELESS_HEVC_SPS); 19162306a36Sopenharmony_ci if (WARN_ON(!ctrls->sps)) 19262306a36Sopenharmony_ci return -EINVAL; 19362306a36Sopenharmony_ci 19462306a36Sopenharmony_ci ret = hantro_hevc_validate_sps(ctx, ctrls->sps); 19562306a36Sopenharmony_ci if (ret) 19662306a36Sopenharmony_ci return ret; 19762306a36Sopenharmony_ci 19862306a36Sopenharmony_ci ctrls->pps = 19962306a36Sopenharmony_ci hantro_get_ctrl(ctx, V4L2_CID_STATELESS_HEVC_PPS); 20062306a36Sopenharmony_ci if (WARN_ON(!ctrls->pps)) 20162306a36Sopenharmony_ci return -EINVAL; 20262306a36Sopenharmony_ci 20362306a36Sopenharmony_ci ret = tile_buffer_reallocate(ctx); 20462306a36Sopenharmony_ci if (ret) 20562306a36Sopenharmony_ci return ret; 20662306a36Sopenharmony_ci 20762306a36Sopenharmony_ci return 0; 20862306a36Sopenharmony_ci} 20962306a36Sopenharmony_ci 21062306a36Sopenharmony_civoid hantro_hevc_dec_exit(struct hantro_ctx *ctx) 21162306a36Sopenharmony_ci{ 21262306a36Sopenharmony_ci struct hantro_dev *vpu = ctx->dev; 21362306a36Sopenharmony_ci struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec; 21462306a36Sopenharmony_ci 21562306a36Sopenharmony_ci if (hevc_dec->tile_sizes.cpu) 21662306a36Sopenharmony_ci dma_free_coherent(vpu->dev, hevc_dec->tile_sizes.size, 21762306a36Sopenharmony_ci hevc_dec->tile_sizes.cpu, 21862306a36Sopenharmony_ci hevc_dec->tile_sizes.dma); 21962306a36Sopenharmony_ci hevc_dec->tile_sizes.cpu = NULL; 22062306a36Sopenharmony_ci 22162306a36Sopenharmony_ci if (hevc_dec->scaling_lists.cpu) 22262306a36Sopenharmony_ci dma_free_coherent(vpu->dev, hevc_dec->scaling_lists.size, 22362306a36Sopenharmony_ci hevc_dec->scaling_lists.cpu, 22462306a36Sopenharmony_ci hevc_dec->scaling_lists.dma); 22562306a36Sopenharmony_ci hevc_dec->scaling_lists.cpu = NULL; 22662306a36Sopenharmony_ci 22762306a36Sopenharmony_ci if (hevc_dec->tile_filter.cpu) 22862306a36Sopenharmony_ci dma_free_coherent(vpu->dev, hevc_dec->tile_filter.size, 22962306a36Sopenharmony_ci hevc_dec->tile_filter.cpu, 23062306a36Sopenharmony_ci hevc_dec->tile_filter.dma); 23162306a36Sopenharmony_ci hevc_dec->tile_filter.cpu = NULL; 23262306a36Sopenharmony_ci 23362306a36Sopenharmony_ci if (hevc_dec->tile_sao.cpu) 23462306a36Sopenharmony_ci dma_free_coherent(vpu->dev, hevc_dec->tile_sao.size, 23562306a36Sopenharmony_ci hevc_dec->tile_sao.cpu, 23662306a36Sopenharmony_ci hevc_dec->tile_sao.dma); 23762306a36Sopenharmony_ci hevc_dec->tile_sao.cpu = NULL; 23862306a36Sopenharmony_ci 23962306a36Sopenharmony_ci if (hevc_dec->tile_bsd.cpu) 24062306a36Sopenharmony_ci dma_free_coherent(vpu->dev, hevc_dec->tile_bsd.size, 24162306a36Sopenharmony_ci hevc_dec->tile_bsd.cpu, 24262306a36Sopenharmony_ci hevc_dec->tile_bsd.dma); 24362306a36Sopenharmony_ci hevc_dec->tile_bsd.cpu = NULL; 24462306a36Sopenharmony_ci} 24562306a36Sopenharmony_ci 24662306a36Sopenharmony_ciint hantro_hevc_dec_init(struct hantro_ctx *ctx) 24762306a36Sopenharmony_ci{ 24862306a36Sopenharmony_ci struct hantro_dev *vpu = ctx->dev; 24962306a36Sopenharmony_ci struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec; 25062306a36Sopenharmony_ci unsigned int size; 25162306a36Sopenharmony_ci 25262306a36Sopenharmony_ci memset(hevc_dec, 0, sizeof(*hevc_dec)); 25362306a36Sopenharmony_ci 25462306a36Sopenharmony_ci /* 25562306a36Sopenharmony_ci * Maximum number of tiles times width and height (2 bytes each), 25662306a36Sopenharmony_ci * rounding up to next 16 bytes boundary + one extra 16 byte 25762306a36Sopenharmony_ci * chunk (HW guys wanted to have this). 25862306a36Sopenharmony_ci */ 25962306a36Sopenharmony_ci size = round_up(MAX_TILE_COLS * MAX_TILE_ROWS * 4 * sizeof(u16) + 16, 16); 26062306a36Sopenharmony_ci hevc_dec->tile_sizes.cpu = dma_alloc_coherent(vpu->dev, size, 26162306a36Sopenharmony_ci &hevc_dec->tile_sizes.dma, 26262306a36Sopenharmony_ci GFP_KERNEL); 26362306a36Sopenharmony_ci if (!hevc_dec->tile_sizes.cpu) 26462306a36Sopenharmony_ci return -ENOMEM; 26562306a36Sopenharmony_ci 26662306a36Sopenharmony_ci hevc_dec->tile_sizes.size = size; 26762306a36Sopenharmony_ci 26862306a36Sopenharmony_ci hevc_dec->scaling_lists.cpu = dma_alloc_coherent(vpu->dev, SCALING_LIST_SIZE, 26962306a36Sopenharmony_ci &hevc_dec->scaling_lists.dma, 27062306a36Sopenharmony_ci GFP_KERNEL); 27162306a36Sopenharmony_ci if (!hevc_dec->scaling_lists.cpu) 27262306a36Sopenharmony_ci return -ENOMEM; 27362306a36Sopenharmony_ci 27462306a36Sopenharmony_ci hevc_dec->scaling_lists.size = SCALING_LIST_SIZE; 27562306a36Sopenharmony_ci 27662306a36Sopenharmony_ci hantro_hevc_ref_init(ctx); 27762306a36Sopenharmony_ci 27862306a36Sopenharmony_ci return 0; 27962306a36Sopenharmony_ci} 280