1/* 2 * Copyright (c) 2002 Dieter Shirley 3 * 4 * dct_unquantize_h263_altivec: 5 * Copyright (c) 2003 Romain Dolbeau <romain@dolbeau.org> 6 * 7 * This file is part of FFmpeg. 8 * 9 * FFmpeg is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU Lesser General Public 11 * License as published by the Free Software Foundation; either 12 * version 2.1 of the License, or (at your option) any later version. 13 * 14 * FFmpeg is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * Lesser General Public License for more details. 18 * 19 * You should have received a copy of the GNU Lesser General Public 20 * License along with FFmpeg; if not, write to the Free Software 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 22 */ 23 24#include <stdlib.h> 25#include <stdio.h> 26 27#include "config.h" 28 29#include "libavutil/attributes.h" 30#include "libavutil/cpu.h" 31#include "libavutil/mem_internal.h" 32#include "libavutil/ppc/cpu.h" 33#include "libavutil/ppc/util_altivec.h" 34 35#include "libavcodec/mpegvideo.h" 36 37#if HAVE_ALTIVEC 38 39/* AltiVec version of dct_unquantize_h263 40 this code assumes `block' is 16 bytes-aligned */ 41static void dct_unquantize_h263_altivec(MpegEncContext *s, 42 int16_t *block, int n, int qscale) 43{ 44 int i, level, qmul, qadd; 45 int nCoeffs; 46 47 qadd = (qscale - 1) | 1; 48 qmul = qscale << 1; 49 50 if (s->mb_intra) { 51 if (!s->h263_aic) { 52 if (n < 4) 53 block[0] = block[0] * s->y_dc_scale; 54 else 55 block[0] = block[0] * s->c_dc_scale; 56 }else 57 qadd = 0; 58 i = 1; 59 nCoeffs= 63; //does not always use zigzag table 60 } else { 61 i = 0; 62 av_assert2(s->block_last_index[n]>=0); 63 nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]; 64 } 65 66 { 67 register const vector signed short vczero = (const vector signed short)vec_splat_s16(0); 68 DECLARE_ALIGNED(16, short, qmul8) = qmul; 69 DECLARE_ALIGNED(16, short, qadd8) = qadd; 70 register vector signed short blockv, qmulv, qaddv, nqaddv, temp1; 71 register vector bool short blockv_null, blockv_neg; 72 register short backup_0 = block[0]; 73 register int j = 0; 74 75 qmulv = vec_splat((vec_s16)vec_lde(0, &qmul8), 0); 76 qaddv = vec_splat((vec_s16)vec_lde(0, &qadd8), 0); 77 nqaddv = vec_sub(vczero, qaddv); 78 79 // vectorize all the 16 bytes-aligned blocks 80 // of 8 elements 81 for(; (j + 7) <= nCoeffs ; j+=8) { 82 blockv = vec_ld(j << 1, block); 83 blockv_neg = vec_cmplt(blockv, vczero); 84 blockv_null = vec_cmpeq(blockv, vczero); 85 // choose between +qadd or -qadd as the third operand 86 temp1 = vec_sel(qaddv, nqaddv, blockv_neg); 87 // multiply & add (block{i,i+7} * qmul [+-] qadd) 88 temp1 = vec_mladd(blockv, qmulv, temp1); 89 // put 0 where block[{i,i+7} used to have 0 90 blockv = vec_sel(temp1, blockv, blockv_null); 91 vec_st(blockv, j << 1, block); 92 } 93 94 // if nCoeffs isn't a multiple of 8, finish the job 95 // using good old scalar units. 96 // (we could do it using a truncated vector, 97 // but I'm not sure it's worth the hassle) 98 for(; j <= nCoeffs ; j++) { 99 level = block[j]; 100 if (level) { 101 if (level < 0) { 102 level = level * qmul - qadd; 103 } else { 104 level = level * qmul + qadd; 105 } 106 block[j] = level; 107 } 108 } 109 110 if (i == 1) { 111 // cheat. this avoid special-casing the first iteration 112 block[0] = backup_0; 113 } 114 } 115} 116 117#endif /* HAVE_ALTIVEC */ 118 119av_cold void ff_mpv_common_init_ppc(MpegEncContext *s) 120{ 121#if HAVE_ALTIVEC 122 if (!PPC_ALTIVEC(av_get_cpu_flags())) 123 return; 124 125 if ((s->avctx->dct_algo == FF_DCT_AUTO) || 126 (s->avctx->dct_algo == FF_DCT_ALTIVEC)) { 127 s->dct_unquantize_h263_intra = dct_unquantize_h263_altivec; 128 s->dct_unquantize_h263_inter = dct_unquantize_h263_altivec; 129 } 130#endif /* HAVE_ALTIVEC */ 131} 132