162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Hantro VP9 codec driver 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Copyright (C) 2021 Collabora Ltd. 662306a36Sopenharmony_ci */ 762306a36Sopenharmony_ci 862306a36Sopenharmony_ci#include <linux/types.h> 962306a36Sopenharmony_ci#include <media/v4l2-mem2mem.h> 1062306a36Sopenharmony_ci 1162306a36Sopenharmony_ci#include "hantro.h" 1262306a36Sopenharmony_ci#include "hantro_hw.h" 1362306a36Sopenharmony_ci#include "hantro_vp9.h" 1462306a36Sopenharmony_ci 1562306a36Sopenharmony_ci#define POW2(x) (1 << (x)) 1662306a36Sopenharmony_ci 1762306a36Sopenharmony_ci#define MAX_LOG2_TILE_COLUMNS 6 1862306a36Sopenharmony_ci#define MAX_NUM_TILE_COLS POW2(MAX_LOG2_TILE_COLUMNS) 1962306a36Sopenharmony_ci#define MAX_TILE_COLS 20 2062306a36Sopenharmony_ci#define MAX_TILE_ROWS 22 2162306a36Sopenharmony_ci 2262306a36Sopenharmony_cistatic size_t hantro_vp9_tile_filter_size(unsigned int height) 2362306a36Sopenharmony_ci{ 2462306a36Sopenharmony_ci u32 h, height32, size; 2562306a36Sopenharmony_ci 2662306a36Sopenharmony_ci h = roundup(height, 8); 2762306a36Sopenharmony_ci 2862306a36Sopenharmony_ci height32 = roundup(h, 64); 2962306a36Sopenharmony_ci size = 24 * height32 * (MAX_NUM_TILE_COLS - 1); /* luma: 8, chroma: 8 + 8 */ 3062306a36Sopenharmony_ci 3162306a36Sopenharmony_ci return size; 3262306a36Sopenharmony_ci} 3362306a36Sopenharmony_ci 3462306a36Sopenharmony_cistatic size_t hantro_vp9_bsd_control_size(unsigned int height) 3562306a36Sopenharmony_ci{ 3662306a36Sopenharmony_ci u32 h, height32; 3762306a36Sopenharmony_ci 3862306a36Sopenharmony_ci h = roundup(height, 8); 3962306a36Sopenharmony_ci height32 = roundup(h, 64); 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_ci return 16 * (height32 / 4) * (MAX_NUM_TILE_COLS - 1); 4262306a36Sopenharmony_ci} 4362306a36Sopenharmony_ci 4462306a36Sopenharmony_cistatic size_t hantro_vp9_segment_map_size(unsigned int width, unsigned int height) 4562306a36Sopenharmony_ci{ 4662306a36Sopenharmony_ci u32 w, h; 4762306a36Sopenharmony_ci int num_ctbs; 4862306a36Sopenharmony_ci 4962306a36Sopenharmony_ci w = roundup(width, 8); 5062306a36Sopenharmony_ci h = roundup(height, 8); 5162306a36Sopenharmony_ci num_ctbs = ((w + 63) / 64) * ((h + 63) / 64); 5262306a36Sopenharmony_ci 5362306a36Sopenharmony_ci return num_ctbs * 32; 5462306a36Sopenharmony_ci} 5562306a36Sopenharmony_ci 5662306a36Sopenharmony_cistatic inline size_t hantro_vp9_prob_tab_size(void) 5762306a36Sopenharmony_ci{ 5862306a36Sopenharmony_ci return roundup(sizeof(struct hantro_g2_all_probs), 16); 5962306a36Sopenharmony_ci} 6062306a36Sopenharmony_ci 6162306a36Sopenharmony_cistatic inline size_t hantro_vp9_count_tab_size(void) 6262306a36Sopenharmony_ci{ 6362306a36Sopenharmony_ci return roundup(sizeof(struct symbol_counts), 16); 6462306a36Sopenharmony_ci} 6562306a36Sopenharmony_ci 6662306a36Sopenharmony_cistatic inline size_t hantro_vp9_tile_info_size(void) 6762306a36Sopenharmony_ci{ 6862306a36Sopenharmony_ci return roundup((MAX_TILE_COLS * MAX_TILE_ROWS * 4 * sizeof(u16) + 15 + 16) & ~0xf, 16); 6962306a36Sopenharmony_ci} 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_cistatic void *get_coeffs_arr(struct symbol_counts *cnts, int i, int j, int k, int l, int m) 7262306a36Sopenharmony_ci{ 7362306a36Sopenharmony_ci if (i == 0) 7462306a36Sopenharmony_ci return &cnts->count_coeffs[j][k][l][m]; 7562306a36Sopenharmony_ci 7662306a36Sopenharmony_ci if (i == 1) 7762306a36Sopenharmony_ci return &cnts->count_coeffs8x8[j][k][l][m]; 7862306a36Sopenharmony_ci 7962306a36Sopenharmony_ci if (i == 2) 8062306a36Sopenharmony_ci return &cnts->count_coeffs16x16[j][k][l][m]; 8162306a36Sopenharmony_ci 8262306a36Sopenharmony_ci if (i == 3) 8362306a36Sopenharmony_ci return &cnts->count_coeffs32x32[j][k][l][m]; 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_ci return NULL; 8662306a36Sopenharmony_ci} 8762306a36Sopenharmony_ci 8862306a36Sopenharmony_cistatic void *get_eobs1(struct symbol_counts *cnts, int i, int j, int k, int l, int m) 8962306a36Sopenharmony_ci{ 9062306a36Sopenharmony_ci if (i == 0) 9162306a36Sopenharmony_ci return &cnts->count_coeffs[j][k][l][m][3]; 9262306a36Sopenharmony_ci 9362306a36Sopenharmony_ci if (i == 1) 9462306a36Sopenharmony_ci return &cnts->count_coeffs8x8[j][k][l][m][3]; 9562306a36Sopenharmony_ci 9662306a36Sopenharmony_ci if (i == 2) 9762306a36Sopenharmony_ci return &cnts->count_coeffs16x16[j][k][l][m][3]; 9862306a36Sopenharmony_ci 9962306a36Sopenharmony_ci if (i == 3) 10062306a36Sopenharmony_ci return &cnts->count_coeffs32x32[j][k][l][m][3]; 10162306a36Sopenharmony_ci 10262306a36Sopenharmony_ci return NULL; 10362306a36Sopenharmony_ci} 10462306a36Sopenharmony_ci 10562306a36Sopenharmony_ci#define INNER_LOOP \ 10662306a36Sopenharmony_ci do { \ 10762306a36Sopenharmony_ci for (m = 0; m < ARRAY_SIZE(vp9_ctx->cnts.coeff[i][0][0][0]); ++m) { \ 10862306a36Sopenharmony_ci vp9_ctx->cnts.coeff[i][j][k][l][m] = \ 10962306a36Sopenharmony_ci get_coeffs_arr(cnts, i, j, k, l, m); \ 11062306a36Sopenharmony_ci vp9_ctx->cnts.eob[i][j][k][l][m][0] = \ 11162306a36Sopenharmony_ci &cnts->count_eobs[i][j][k][l][m]; \ 11262306a36Sopenharmony_ci vp9_ctx->cnts.eob[i][j][k][l][m][1] = \ 11362306a36Sopenharmony_ci get_eobs1(cnts, i, j, k, l, m); \ 11462306a36Sopenharmony_ci } \ 11562306a36Sopenharmony_ci } while (0) 11662306a36Sopenharmony_ci 11762306a36Sopenharmony_cistatic void init_v4l2_vp9_count_tbl(struct hantro_ctx *ctx) 11862306a36Sopenharmony_ci{ 11962306a36Sopenharmony_ci struct hantro_vp9_dec_hw_ctx *vp9_ctx = &ctx->vp9_dec; 12062306a36Sopenharmony_ci struct symbol_counts *cnts = vp9_ctx->misc.cpu + vp9_ctx->ctx_counters_offset; 12162306a36Sopenharmony_ci int i, j, k, l, m; 12262306a36Sopenharmony_ci 12362306a36Sopenharmony_ci vp9_ctx->cnts.partition = &cnts->partition_counts; 12462306a36Sopenharmony_ci vp9_ctx->cnts.skip = &cnts->mbskip_count; 12562306a36Sopenharmony_ci vp9_ctx->cnts.intra_inter = &cnts->intra_inter_count; 12662306a36Sopenharmony_ci vp9_ctx->cnts.tx32p = &cnts->tx32x32_count; 12762306a36Sopenharmony_ci /* 12862306a36Sopenharmony_ci * g2 hardware uses tx16x16_count[2][3], while the api 12962306a36Sopenharmony_ci * expects tx16p[2][4], so this must be explicitly copied 13062306a36Sopenharmony_ci * into vp9_ctx->cnts.tx16p when passing the data to the 13162306a36Sopenharmony_ci * vp9 library function 13262306a36Sopenharmony_ci */ 13362306a36Sopenharmony_ci vp9_ctx->cnts.tx8p = &cnts->tx8x8_count; 13462306a36Sopenharmony_ci 13562306a36Sopenharmony_ci vp9_ctx->cnts.y_mode = &cnts->sb_ymode_counts; 13662306a36Sopenharmony_ci vp9_ctx->cnts.uv_mode = &cnts->uv_mode_counts; 13762306a36Sopenharmony_ci vp9_ctx->cnts.comp = &cnts->comp_inter_count; 13862306a36Sopenharmony_ci vp9_ctx->cnts.comp_ref = &cnts->comp_ref_count; 13962306a36Sopenharmony_ci vp9_ctx->cnts.single_ref = &cnts->single_ref_count; 14062306a36Sopenharmony_ci vp9_ctx->cnts.filter = &cnts->switchable_interp_counts; 14162306a36Sopenharmony_ci vp9_ctx->cnts.mv_joint = &cnts->mv_counts.joints; 14262306a36Sopenharmony_ci vp9_ctx->cnts.sign = &cnts->mv_counts.sign; 14362306a36Sopenharmony_ci vp9_ctx->cnts.classes = &cnts->mv_counts.classes; 14462306a36Sopenharmony_ci vp9_ctx->cnts.class0 = &cnts->mv_counts.class0; 14562306a36Sopenharmony_ci vp9_ctx->cnts.bits = &cnts->mv_counts.bits; 14662306a36Sopenharmony_ci vp9_ctx->cnts.class0_fp = &cnts->mv_counts.class0_fp; 14762306a36Sopenharmony_ci vp9_ctx->cnts.fp = &cnts->mv_counts.fp; 14862306a36Sopenharmony_ci vp9_ctx->cnts.class0_hp = &cnts->mv_counts.class0_hp; 14962306a36Sopenharmony_ci vp9_ctx->cnts.hp = &cnts->mv_counts.hp; 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_ci for (i = 0; i < ARRAY_SIZE(vp9_ctx->cnts.coeff); ++i) 15262306a36Sopenharmony_ci for (j = 0; j < ARRAY_SIZE(vp9_ctx->cnts.coeff[i]); ++j) 15362306a36Sopenharmony_ci for (k = 0; k < ARRAY_SIZE(vp9_ctx->cnts.coeff[i][0]); ++k) 15462306a36Sopenharmony_ci for (l = 0; l < ARRAY_SIZE(vp9_ctx->cnts.coeff[i][0][0]); ++l) 15562306a36Sopenharmony_ci INNER_LOOP; 15662306a36Sopenharmony_ci} 15762306a36Sopenharmony_ci 15862306a36Sopenharmony_ciint hantro_vp9_dec_init(struct hantro_ctx *ctx) 15962306a36Sopenharmony_ci{ 16062306a36Sopenharmony_ci struct hantro_dev *vpu = ctx->dev; 16162306a36Sopenharmony_ci const struct hantro_variant *variant = vpu->variant; 16262306a36Sopenharmony_ci struct hantro_vp9_dec_hw_ctx *vp9_dec = &ctx->vp9_dec; 16362306a36Sopenharmony_ci struct hantro_aux_buf *tile_edge = &vp9_dec->tile_edge; 16462306a36Sopenharmony_ci struct hantro_aux_buf *segment_map = &vp9_dec->segment_map; 16562306a36Sopenharmony_ci struct hantro_aux_buf *misc = &vp9_dec->misc; 16662306a36Sopenharmony_ci u32 i, max_width, max_height, size; 16762306a36Sopenharmony_ci 16862306a36Sopenharmony_ci if (variant->num_dec_fmts < 1) 16962306a36Sopenharmony_ci return -EINVAL; 17062306a36Sopenharmony_ci 17162306a36Sopenharmony_ci for (i = 0; i < variant->num_dec_fmts; ++i) 17262306a36Sopenharmony_ci if (variant->dec_fmts[i].fourcc == V4L2_PIX_FMT_VP9_FRAME) 17362306a36Sopenharmony_ci break; 17462306a36Sopenharmony_ci 17562306a36Sopenharmony_ci if (i == variant->num_dec_fmts) 17662306a36Sopenharmony_ci return -EINVAL; 17762306a36Sopenharmony_ci 17862306a36Sopenharmony_ci max_width = vpu->variant->dec_fmts[i].frmsize.max_width; 17962306a36Sopenharmony_ci max_height = vpu->variant->dec_fmts[i].frmsize.max_height; 18062306a36Sopenharmony_ci 18162306a36Sopenharmony_ci size = hantro_vp9_tile_filter_size(max_height); 18262306a36Sopenharmony_ci vp9_dec->bsd_ctrl_offset = size; 18362306a36Sopenharmony_ci size += hantro_vp9_bsd_control_size(max_height); 18462306a36Sopenharmony_ci 18562306a36Sopenharmony_ci tile_edge->cpu = dma_alloc_coherent(vpu->dev, size, &tile_edge->dma, GFP_KERNEL); 18662306a36Sopenharmony_ci if (!tile_edge->cpu) 18762306a36Sopenharmony_ci return -ENOMEM; 18862306a36Sopenharmony_ci 18962306a36Sopenharmony_ci tile_edge->size = size; 19062306a36Sopenharmony_ci memset(tile_edge->cpu, 0, size); 19162306a36Sopenharmony_ci 19262306a36Sopenharmony_ci size = hantro_vp9_segment_map_size(max_width, max_height); 19362306a36Sopenharmony_ci vp9_dec->segment_map_size = size; 19462306a36Sopenharmony_ci size *= 2; /* we need two areas of this size, used alternately */ 19562306a36Sopenharmony_ci 19662306a36Sopenharmony_ci segment_map->cpu = dma_alloc_coherent(vpu->dev, size, &segment_map->dma, GFP_KERNEL); 19762306a36Sopenharmony_ci if (!segment_map->cpu) 19862306a36Sopenharmony_ci goto err_segment_map; 19962306a36Sopenharmony_ci 20062306a36Sopenharmony_ci segment_map->size = size; 20162306a36Sopenharmony_ci memset(segment_map->cpu, 0, size); 20262306a36Sopenharmony_ci 20362306a36Sopenharmony_ci size = hantro_vp9_prob_tab_size(); 20462306a36Sopenharmony_ci vp9_dec->ctx_counters_offset = size; 20562306a36Sopenharmony_ci size += hantro_vp9_count_tab_size(); 20662306a36Sopenharmony_ci vp9_dec->tile_info_offset = size; 20762306a36Sopenharmony_ci size += hantro_vp9_tile_info_size(); 20862306a36Sopenharmony_ci 20962306a36Sopenharmony_ci misc->cpu = dma_alloc_coherent(vpu->dev, size, &misc->dma, GFP_KERNEL); 21062306a36Sopenharmony_ci if (!misc->cpu) 21162306a36Sopenharmony_ci goto err_misc; 21262306a36Sopenharmony_ci 21362306a36Sopenharmony_ci misc->size = size; 21462306a36Sopenharmony_ci memset(misc->cpu, 0, size); 21562306a36Sopenharmony_ci 21662306a36Sopenharmony_ci init_v4l2_vp9_count_tbl(ctx); 21762306a36Sopenharmony_ci 21862306a36Sopenharmony_ci return 0; 21962306a36Sopenharmony_ci 22062306a36Sopenharmony_cierr_misc: 22162306a36Sopenharmony_ci dma_free_coherent(vpu->dev, segment_map->size, segment_map->cpu, segment_map->dma); 22262306a36Sopenharmony_ci 22362306a36Sopenharmony_cierr_segment_map: 22462306a36Sopenharmony_ci dma_free_coherent(vpu->dev, tile_edge->size, tile_edge->cpu, tile_edge->dma); 22562306a36Sopenharmony_ci 22662306a36Sopenharmony_ci return -ENOMEM; 22762306a36Sopenharmony_ci} 22862306a36Sopenharmony_ci 22962306a36Sopenharmony_civoid hantro_vp9_dec_exit(struct hantro_ctx *ctx) 23062306a36Sopenharmony_ci{ 23162306a36Sopenharmony_ci struct hantro_dev *vpu = ctx->dev; 23262306a36Sopenharmony_ci struct hantro_vp9_dec_hw_ctx *vp9_dec = &ctx->vp9_dec; 23362306a36Sopenharmony_ci struct hantro_aux_buf *tile_edge = &vp9_dec->tile_edge; 23462306a36Sopenharmony_ci struct hantro_aux_buf *segment_map = &vp9_dec->segment_map; 23562306a36Sopenharmony_ci struct hantro_aux_buf *misc = &vp9_dec->misc; 23662306a36Sopenharmony_ci 23762306a36Sopenharmony_ci dma_free_coherent(vpu->dev, misc->size, misc->cpu, misc->dma); 23862306a36Sopenharmony_ci dma_free_coherent(vpu->dev, segment_map->size, segment_map->cpu, segment_map->dma); 23962306a36Sopenharmony_ci dma_free_coherent(vpu->dev, tile_edge->size, tile_edge->cpu, tile_edge->dma); 24062306a36Sopenharmony_ci} 241