1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * Copyright (c) 2002 Dieter Shirley 3cabdff1aSopenharmony_ci * 4cabdff1aSopenharmony_ci * dct_unquantize_h263_altivec: 5cabdff1aSopenharmony_ci * Copyright (c) 2003 Romain Dolbeau <romain@dolbeau.org> 6cabdff1aSopenharmony_ci * 7cabdff1aSopenharmony_ci * This file is part of FFmpeg. 8cabdff1aSopenharmony_ci * 9cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 10cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 11cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 12cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 13cabdff1aSopenharmony_ci * 14cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 15cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 16cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17cabdff1aSopenharmony_ci * Lesser General Public License for more details. 18cabdff1aSopenharmony_ci * 19cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 20cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 21cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 22cabdff1aSopenharmony_ci */ 23cabdff1aSopenharmony_ci 24cabdff1aSopenharmony_ci#include <stdlib.h> 25cabdff1aSopenharmony_ci#include <stdio.h> 26cabdff1aSopenharmony_ci 27cabdff1aSopenharmony_ci#include "config.h" 28cabdff1aSopenharmony_ci 29cabdff1aSopenharmony_ci#include "libavutil/attributes.h" 30cabdff1aSopenharmony_ci#include "libavutil/cpu.h" 31cabdff1aSopenharmony_ci#include "libavutil/mem_internal.h" 32cabdff1aSopenharmony_ci#include "libavutil/ppc/cpu.h" 33cabdff1aSopenharmony_ci#include "libavutil/ppc/util_altivec.h" 34cabdff1aSopenharmony_ci 35cabdff1aSopenharmony_ci#include "libavcodec/mpegvideo.h" 36cabdff1aSopenharmony_ci 37cabdff1aSopenharmony_ci#if HAVE_ALTIVEC 38cabdff1aSopenharmony_ci 39cabdff1aSopenharmony_ci/* AltiVec version of dct_unquantize_h263 40cabdff1aSopenharmony_ci this code assumes `block' is 16 bytes-aligned */ 41cabdff1aSopenharmony_cistatic void dct_unquantize_h263_altivec(MpegEncContext *s, 42cabdff1aSopenharmony_ci int16_t *block, int n, int qscale) 43cabdff1aSopenharmony_ci{ 44cabdff1aSopenharmony_ci int i, level, qmul, qadd; 45cabdff1aSopenharmony_ci int nCoeffs; 46cabdff1aSopenharmony_ci 47cabdff1aSopenharmony_ci qadd = (qscale - 1) | 1; 48cabdff1aSopenharmony_ci qmul = qscale << 1; 49cabdff1aSopenharmony_ci 50cabdff1aSopenharmony_ci if (s->mb_intra) { 51cabdff1aSopenharmony_ci if (!s->h263_aic) { 52cabdff1aSopenharmony_ci if (n < 4) 53cabdff1aSopenharmony_ci block[0] = block[0] * s->y_dc_scale; 54cabdff1aSopenharmony_ci else 55cabdff1aSopenharmony_ci block[0] = block[0] * s->c_dc_scale; 56cabdff1aSopenharmony_ci }else 57cabdff1aSopenharmony_ci qadd = 0; 58cabdff1aSopenharmony_ci i = 1; 59cabdff1aSopenharmony_ci nCoeffs= 63; //does not always use zigzag table 60cabdff1aSopenharmony_ci } else { 61cabdff1aSopenharmony_ci i = 0; 62cabdff1aSopenharmony_ci av_assert2(s->block_last_index[n]>=0); 63cabdff1aSopenharmony_ci nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]; 64cabdff1aSopenharmony_ci } 65cabdff1aSopenharmony_ci 66cabdff1aSopenharmony_ci { 67cabdff1aSopenharmony_ci register const vector signed short vczero = (const vector signed short)vec_splat_s16(0); 68cabdff1aSopenharmony_ci DECLARE_ALIGNED(16, short, qmul8) = qmul; 69cabdff1aSopenharmony_ci DECLARE_ALIGNED(16, short, qadd8) = qadd; 70cabdff1aSopenharmony_ci register vector signed short blockv, qmulv, qaddv, nqaddv, temp1; 71cabdff1aSopenharmony_ci register vector bool short blockv_null, blockv_neg; 72cabdff1aSopenharmony_ci register short backup_0 = block[0]; 73cabdff1aSopenharmony_ci register int j = 0; 74cabdff1aSopenharmony_ci 75cabdff1aSopenharmony_ci qmulv = vec_splat((vec_s16)vec_lde(0, &qmul8), 0); 76cabdff1aSopenharmony_ci qaddv = vec_splat((vec_s16)vec_lde(0, &qadd8), 0); 77cabdff1aSopenharmony_ci nqaddv = vec_sub(vczero, qaddv); 78cabdff1aSopenharmony_ci 79cabdff1aSopenharmony_ci // vectorize all the 16 bytes-aligned blocks 80cabdff1aSopenharmony_ci // of 8 elements 81cabdff1aSopenharmony_ci for(; (j + 7) <= nCoeffs ; j+=8) { 82cabdff1aSopenharmony_ci blockv = vec_ld(j << 1, block); 83cabdff1aSopenharmony_ci blockv_neg = vec_cmplt(blockv, vczero); 84cabdff1aSopenharmony_ci blockv_null = vec_cmpeq(blockv, vczero); 85cabdff1aSopenharmony_ci // choose between +qadd or -qadd as the third operand 86cabdff1aSopenharmony_ci temp1 = vec_sel(qaddv, nqaddv, blockv_neg); 87cabdff1aSopenharmony_ci // multiply & add (block{i,i+7} * qmul [+-] qadd) 88cabdff1aSopenharmony_ci temp1 = vec_mladd(blockv, qmulv, temp1); 89cabdff1aSopenharmony_ci // put 0 where block[{i,i+7} used to have 0 90cabdff1aSopenharmony_ci blockv = vec_sel(temp1, blockv, blockv_null); 91cabdff1aSopenharmony_ci vec_st(blockv, j << 1, block); 92cabdff1aSopenharmony_ci } 93cabdff1aSopenharmony_ci 94cabdff1aSopenharmony_ci // if nCoeffs isn't a multiple of 8, finish the job 95cabdff1aSopenharmony_ci // using good old scalar units. 96cabdff1aSopenharmony_ci // (we could do it using a truncated vector, 97cabdff1aSopenharmony_ci // but I'm not sure it's worth the hassle) 98cabdff1aSopenharmony_ci for(; j <= nCoeffs ; j++) { 99cabdff1aSopenharmony_ci level = block[j]; 100cabdff1aSopenharmony_ci if (level) { 101cabdff1aSopenharmony_ci if (level < 0) { 102cabdff1aSopenharmony_ci level = level * qmul - qadd; 103cabdff1aSopenharmony_ci } else { 104cabdff1aSopenharmony_ci level = level * qmul + qadd; 105cabdff1aSopenharmony_ci } 106cabdff1aSopenharmony_ci block[j] = level; 107cabdff1aSopenharmony_ci } 108cabdff1aSopenharmony_ci } 109cabdff1aSopenharmony_ci 110cabdff1aSopenharmony_ci if (i == 1) { 111cabdff1aSopenharmony_ci // cheat. this avoid special-casing the first iteration 112cabdff1aSopenharmony_ci block[0] = backup_0; 113cabdff1aSopenharmony_ci } 114cabdff1aSopenharmony_ci } 115cabdff1aSopenharmony_ci} 116cabdff1aSopenharmony_ci 117cabdff1aSopenharmony_ci#endif /* HAVE_ALTIVEC */ 118cabdff1aSopenharmony_ci 119cabdff1aSopenharmony_ciav_cold void ff_mpv_common_init_ppc(MpegEncContext *s) 120cabdff1aSopenharmony_ci{ 121cabdff1aSopenharmony_ci#if HAVE_ALTIVEC 122cabdff1aSopenharmony_ci if (!PPC_ALTIVEC(av_get_cpu_flags())) 123cabdff1aSopenharmony_ci return; 124cabdff1aSopenharmony_ci 125cabdff1aSopenharmony_ci if ((s->avctx->dct_algo == FF_DCT_AUTO) || 126cabdff1aSopenharmony_ci (s->avctx->dct_algo == FF_DCT_ALTIVEC)) { 127cabdff1aSopenharmony_ci s->dct_unquantize_h263_intra = dct_unquantize_h263_altivec; 128cabdff1aSopenharmony_ci s->dct_unquantize_h263_inter = dct_unquantize_h263_altivec; 129cabdff1aSopenharmony_ci } 130cabdff1aSopenharmony_ci#endif /* HAVE_ALTIVEC */ 131cabdff1aSopenharmony_ci} 132