1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * Simple IDCT 3cabdff1aSopenharmony_ci * 4cabdff1aSopenharmony_ci * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at> 5cabdff1aSopenharmony_ci * 6cabdff1aSopenharmony_ci * This file is part of FFmpeg. 7cabdff1aSopenharmony_ci * 8cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 9cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 10cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 11cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 12cabdff1aSopenharmony_ci * 13cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 14cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 15cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16cabdff1aSopenharmony_ci * Lesser General Public License for more details. 17cabdff1aSopenharmony_ci * 18cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 19cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 20cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 21cabdff1aSopenharmony_ci */ 22cabdff1aSopenharmony_ci 23cabdff1aSopenharmony_ci/** 24cabdff1aSopenharmony_ci * @file 25cabdff1aSopenharmony_ci * simpleidct in C. 26cabdff1aSopenharmony_ci */ 27cabdff1aSopenharmony_ci 28cabdff1aSopenharmony_ci#include "libavutil/intreadwrite.h" 29cabdff1aSopenharmony_ci#include "mathops.h" 30cabdff1aSopenharmony_ci#include "simple_idct.h" 31cabdff1aSopenharmony_ci 32cabdff1aSopenharmony_ci#define IN_IDCT_DEPTH 16 33cabdff1aSopenharmony_ci 34cabdff1aSopenharmony_ci#define BIT_DEPTH 8 35cabdff1aSopenharmony_ci#include "simple_idct_template.c" 36cabdff1aSopenharmony_ci#undef BIT_DEPTH 37cabdff1aSopenharmony_ci 38cabdff1aSopenharmony_ci#define BIT_DEPTH 10 39cabdff1aSopenharmony_ci#include "simple_idct_template.c" 40cabdff1aSopenharmony_ci 41cabdff1aSopenharmony_ci#define EXTRA_SHIFT 2 42cabdff1aSopenharmony_ci#include "simple_idct_template.c" 43cabdff1aSopenharmony_ci 44cabdff1aSopenharmony_ci#undef EXTRA_SHIFT 45cabdff1aSopenharmony_ci#undef BIT_DEPTH 46cabdff1aSopenharmony_ci 47cabdff1aSopenharmony_ci#define BIT_DEPTH 12 48cabdff1aSopenharmony_ci#include "simple_idct_template.c" 49cabdff1aSopenharmony_ci#undef BIT_DEPTH 50cabdff1aSopenharmony_ci#undef IN_IDCT_DEPTH 51cabdff1aSopenharmony_ci 52cabdff1aSopenharmony_ci#define IN_IDCT_DEPTH 32 53cabdff1aSopenharmony_ci#define BIT_DEPTH 10 54cabdff1aSopenharmony_ci#include "simple_idct_template.c" 55cabdff1aSopenharmony_ci#undef BIT_DEPTH 56cabdff1aSopenharmony_ci#undef IN_IDCT_DEPTH 57cabdff1aSopenharmony_ci 58cabdff1aSopenharmony_ci/* 2x4x8 idct */ 59cabdff1aSopenharmony_ci 60cabdff1aSopenharmony_ci#define CN_SHIFT 12 61cabdff1aSopenharmony_ci#define C_FIX(x) ((int)((x) * (1 << CN_SHIFT) + 0.5)) 62cabdff1aSopenharmony_ci#define C1 C_FIX(0.6532814824) 63cabdff1aSopenharmony_ci#define C2 C_FIX(0.2705980501) 64cabdff1aSopenharmony_ci 65cabdff1aSopenharmony_ci/* row idct is multiple by 16 * sqrt(2.0), col idct4 is normalized, 66cabdff1aSopenharmony_ci and the butterfly must be multiplied by 0.5 * sqrt(2.0) */ 67cabdff1aSopenharmony_ci#define C_SHIFT (4+1+12) 68cabdff1aSopenharmony_ci 69cabdff1aSopenharmony_cistatic inline void idct4col_put(uint8_t *dest, ptrdiff_t line_size, const int16_t *col) 70cabdff1aSopenharmony_ci{ 71cabdff1aSopenharmony_ci int c0, c1, c2, c3, a0, a1, a2, a3; 72cabdff1aSopenharmony_ci 73cabdff1aSopenharmony_ci a0 = col[8*0]; 74cabdff1aSopenharmony_ci a1 = col[8*2]; 75cabdff1aSopenharmony_ci a2 = col[8*4]; 76cabdff1aSopenharmony_ci a3 = col[8*6]; 77cabdff1aSopenharmony_ci c0 = ((a0 + a2) * (1 << CN_SHIFT - 1)) + (1 << (C_SHIFT - 1)); 78cabdff1aSopenharmony_ci c2 = ((a0 - a2) * (1 << CN_SHIFT - 1)) + (1 << (C_SHIFT - 1)); 79cabdff1aSopenharmony_ci c1 = a1 * C1 + a3 * C2; 80cabdff1aSopenharmony_ci c3 = a1 * C2 - a3 * C1; 81cabdff1aSopenharmony_ci dest[0] = av_clip_uint8((c0 + c1) >> C_SHIFT); 82cabdff1aSopenharmony_ci dest += line_size; 83cabdff1aSopenharmony_ci dest[0] = av_clip_uint8((c2 + c3) >> C_SHIFT); 84cabdff1aSopenharmony_ci dest += line_size; 85cabdff1aSopenharmony_ci dest[0] = av_clip_uint8((c2 - c3) >> C_SHIFT); 86cabdff1aSopenharmony_ci dest += line_size; 87cabdff1aSopenharmony_ci dest[0] = av_clip_uint8((c0 - c1) >> C_SHIFT); 88cabdff1aSopenharmony_ci} 89cabdff1aSopenharmony_ci 90cabdff1aSopenharmony_ci#define BF(k) \ 91cabdff1aSopenharmony_ci{\ 92cabdff1aSopenharmony_ci int a0, a1;\ 93cabdff1aSopenharmony_ci a0 = ptr[k];\ 94cabdff1aSopenharmony_ci a1 = ptr[8 + k];\ 95cabdff1aSopenharmony_ci ptr[k] = a0 + a1;\ 96cabdff1aSopenharmony_ci ptr[8 + k] = a0 - a1;\ 97cabdff1aSopenharmony_ci} 98cabdff1aSopenharmony_ci 99cabdff1aSopenharmony_ci/* only used by DV codec. The input must be interlaced. 128 is added 100cabdff1aSopenharmony_ci to the pixels before clamping to avoid systematic error 101cabdff1aSopenharmony_ci (1024*sqrt(2)) offset would be needed otherwise. */ 102cabdff1aSopenharmony_ci/* XXX: I think a 1.0/sqrt(2) normalization should be needed to 103cabdff1aSopenharmony_ci compensate the extra butterfly stage - I don't have the full DV 104cabdff1aSopenharmony_ci specification */ 105cabdff1aSopenharmony_civoid ff_simple_idct248_put(uint8_t *dest, ptrdiff_t line_size, int16_t *block) 106cabdff1aSopenharmony_ci{ 107cabdff1aSopenharmony_ci int i; 108cabdff1aSopenharmony_ci int16_t *ptr; 109cabdff1aSopenharmony_ci 110cabdff1aSopenharmony_ci /* butterfly */ 111cabdff1aSopenharmony_ci ptr = block; 112cabdff1aSopenharmony_ci for(i=0;i<4;i++) { 113cabdff1aSopenharmony_ci BF(0); 114cabdff1aSopenharmony_ci BF(1); 115cabdff1aSopenharmony_ci BF(2); 116cabdff1aSopenharmony_ci BF(3); 117cabdff1aSopenharmony_ci BF(4); 118cabdff1aSopenharmony_ci BF(5); 119cabdff1aSopenharmony_ci BF(6); 120cabdff1aSopenharmony_ci BF(7); 121cabdff1aSopenharmony_ci ptr += 2 * 8; 122cabdff1aSopenharmony_ci } 123cabdff1aSopenharmony_ci 124cabdff1aSopenharmony_ci /* IDCT8 on each line */ 125cabdff1aSopenharmony_ci for(i=0; i<8; i++) { 126cabdff1aSopenharmony_ci idctRowCondDC_int16_8bit(block + i*8, 0); 127cabdff1aSopenharmony_ci } 128cabdff1aSopenharmony_ci 129cabdff1aSopenharmony_ci /* IDCT4 and store */ 130cabdff1aSopenharmony_ci for(i=0;i<8;i++) { 131cabdff1aSopenharmony_ci idct4col_put(dest + i, 2 * line_size, block + i); 132cabdff1aSopenharmony_ci idct4col_put(dest + line_size + i, 2 * line_size, block + 8 + i); 133cabdff1aSopenharmony_ci } 134cabdff1aSopenharmony_ci} 135cabdff1aSopenharmony_ci 136cabdff1aSopenharmony_ci/* 8x4 & 4x8 WMV2 IDCT */ 137cabdff1aSopenharmony_ci#undef CN_SHIFT 138cabdff1aSopenharmony_ci#undef C_SHIFT 139cabdff1aSopenharmony_ci#undef C_FIX 140cabdff1aSopenharmony_ci#undef C1 141cabdff1aSopenharmony_ci#undef C2 142cabdff1aSopenharmony_ci#define CN_SHIFT 12 143cabdff1aSopenharmony_ci#define C_FIX(x) ((int)((x) * M_SQRT2 * (1 << CN_SHIFT) + 0.5)) 144cabdff1aSopenharmony_ci#define C1 C_FIX(0.6532814824) 145cabdff1aSopenharmony_ci#define C2 C_FIX(0.2705980501) 146cabdff1aSopenharmony_ci#define C3 C_FIX(0.5) 147cabdff1aSopenharmony_ci#define C_SHIFT (4+1+12) 148cabdff1aSopenharmony_cistatic inline void idct4col_add(uint8_t *dest, ptrdiff_t line_size, const int16_t *col) 149cabdff1aSopenharmony_ci{ 150cabdff1aSopenharmony_ci int c0, c1, c2, c3, a0, a1, a2, a3; 151cabdff1aSopenharmony_ci 152cabdff1aSopenharmony_ci a0 = col[8*0]; 153cabdff1aSopenharmony_ci a1 = col[8*1]; 154cabdff1aSopenharmony_ci a2 = col[8*2]; 155cabdff1aSopenharmony_ci a3 = col[8*3]; 156cabdff1aSopenharmony_ci c0 = (a0 + a2)*C3 + (1 << (C_SHIFT - 1)); 157cabdff1aSopenharmony_ci c2 = (a0 - a2)*C3 + (1 << (C_SHIFT - 1)); 158cabdff1aSopenharmony_ci c1 = a1 * C1 + a3 * C2; 159cabdff1aSopenharmony_ci c3 = a1 * C2 - a3 * C1; 160cabdff1aSopenharmony_ci dest[0] = av_clip_uint8(dest[0] + ((c0 + c1) >> C_SHIFT)); 161cabdff1aSopenharmony_ci dest += line_size; 162cabdff1aSopenharmony_ci dest[0] = av_clip_uint8(dest[0] + ((c2 + c3) >> C_SHIFT)); 163cabdff1aSopenharmony_ci dest += line_size; 164cabdff1aSopenharmony_ci dest[0] = av_clip_uint8(dest[0] + ((c2 - c3) >> C_SHIFT)); 165cabdff1aSopenharmony_ci dest += line_size; 166cabdff1aSopenharmony_ci dest[0] = av_clip_uint8(dest[0] + ((c0 - c1) >> C_SHIFT)); 167cabdff1aSopenharmony_ci} 168cabdff1aSopenharmony_ci 169cabdff1aSopenharmony_ci#define RN_SHIFT 15 170cabdff1aSopenharmony_ci#define R_FIX(x) ((int)((x) * M_SQRT2 * (1 << RN_SHIFT) + 0.5)) 171cabdff1aSopenharmony_ci#define R1 R_FIX(0.6532814824) 172cabdff1aSopenharmony_ci#define R2 R_FIX(0.2705980501) 173cabdff1aSopenharmony_ci#define R3 R_FIX(0.5) 174cabdff1aSopenharmony_ci#define R_SHIFT 11 175cabdff1aSopenharmony_cistatic inline void idct4row(int16_t *row) 176cabdff1aSopenharmony_ci{ 177cabdff1aSopenharmony_ci unsigned c0, c1, c2, c3; 178cabdff1aSopenharmony_ci int a0, a1, a2, a3; 179cabdff1aSopenharmony_ci 180cabdff1aSopenharmony_ci a0 = row[0]; 181cabdff1aSopenharmony_ci a1 = row[1]; 182cabdff1aSopenharmony_ci a2 = row[2]; 183cabdff1aSopenharmony_ci a3 = row[3]; 184cabdff1aSopenharmony_ci c0 = (a0 + a2)*R3 + (1 << (R_SHIFT - 1)); 185cabdff1aSopenharmony_ci c2 = (a0 - a2)*R3 + (1 << (R_SHIFT - 1)); 186cabdff1aSopenharmony_ci c1 = a1 * R1 + a3 * R2; 187cabdff1aSopenharmony_ci c3 = a1 * R2 - a3 * R1; 188cabdff1aSopenharmony_ci row[0]= (c0 + c1) >> R_SHIFT; 189cabdff1aSopenharmony_ci row[1]= (c2 + c3) >> R_SHIFT; 190cabdff1aSopenharmony_ci row[2]= (c2 - c3) >> R_SHIFT; 191cabdff1aSopenharmony_ci row[3]= (c0 - c1) >> R_SHIFT; 192cabdff1aSopenharmony_ci} 193cabdff1aSopenharmony_ci 194cabdff1aSopenharmony_civoid ff_simple_idct84_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block) 195cabdff1aSopenharmony_ci{ 196cabdff1aSopenharmony_ci int i; 197cabdff1aSopenharmony_ci 198cabdff1aSopenharmony_ci /* IDCT8 on each line */ 199cabdff1aSopenharmony_ci for(i=0; i<4; i++) { 200cabdff1aSopenharmony_ci idctRowCondDC_int16_8bit(block + i*8, 0); 201cabdff1aSopenharmony_ci } 202cabdff1aSopenharmony_ci 203cabdff1aSopenharmony_ci /* IDCT4 and store */ 204cabdff1aSopenharmony_ci for(i=0;i<8;i++) { 205cabdff1aSopenharmony_ci idct4col_add(dest + i, line_size, block + i); 206cabdff1aSopenharmony_ci } 207cabdff1aSopenharmony_ci} 208cabdff1aSopenharmony_ci 209cabdff1aSopenharmony_civoid ff_simple_idct48_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block) 210cabdff1aSopenharmony_ci{ 211cabdff1aSopenharmony_ci int i; 212cabdff1aSopenharmony_ci 213cabdff1aSopenharmony_ci /* IDCT4 on each line */ 214cabdff1aSopenharmony_ci for(i=0; i<8; i++) { 215cabdff1aSopenharmony_ci idct4row(block + i*8); 216cabdff1aSopenharmony_ci } 217cabdff1aSopenharmony_ci 218cabdff1aSopenharmony_ci /* IDCT8 and store */ 219cabdff1aSopenharmony_ci for(i=0; i<4; i++){ 220cabdff1aSopenharmony_ci idctSparseColAdd_int16_8bit(dest + i, line_size, block + i); 221cabdff1aSopenharmony_ci } 222cabdff1aSopenharmony_ci} 223cabdff1aSopenharmony_ci 224cabdff1aSopenharmony_civoid ff_simple_idct44_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block) 225cabdff1aSopenharmony_ci{ 226cabdff1aSopenharmony_ci int i; 227cabdff1aSopenharmony_ci 228cabdff1aSopenharmony_ci /* IDCT4 on each line */ 229cabdff1aSopenharmony_ci for(i=0; i<4; i++) { 230cabdff1aSopenharmony_ci idct4row(block + i*8); 231cabdff1aSopenharmony_ci } 232cabdff1aSopenharmony_ci 233cabdff1aSopenharmony_ci /* IDCT4 and store */ 234cabdff1aSopenharmony_ci for(i=0; i<4; i++){ 235cabdff1aSopenharmony_ci idct4col_add(dest + i, line_size, block + i); 236cabdff1aSopenharmony_ci } 237cabdff1aSopenharmony_ci} 238cabdff1aSopenharmony_ci 239cabdff1aSopenharmony_civoid ff_prores_idct_10(int16_t *block, const int16_t *qmat) 240cabdff1aSopenharmony_ci{ 241cabdff1aSopenharmony_ci int i; 242cabdff1aSopenharmony_ci 243cabdff1aSopenharmony_ci for (i = 0; i < 64; i++) 244cabdff1aSopenharmony_ci block[i] *= qmat[i]; 245cabdff1aSopenharmony_ci 246cabdff1aSopenharmony_ci for (i = 0; i < 8; i++) 247cabdff1aSopenharmony_ci idctRowCondDC_extrashift_10(block + i*8, 2); 248cabdff1aSopenharmony_ci 249cabdff1aSopenharmony_ci for (i = 0; i < 8; i++) { 250cabdff1aSopenharmony_ci block[i] += 8192; 251cabdff1aSopenharmony_ci idctSparseCol_extrashift_10(block + i); 252cabdff1aSopenharmony_ci } 253cabdff1aSopenharmony_ci} 254cabdff1aSopenharmony_ci 255cabdff1aSopenharmony_civoid ff_prores_idct_12(int16_t *block, const int16_t *qmat) 256cabdff1aSopenharmony_ci{ 257cabdff1aSopenharmony_ci int i; 258cabdff1aSopenharmony_ci 259cabdff1aSopenharmony_ci for (i = 0; i < 64; i++) 260cabdff1aSopenharmony_ci block[i] *= qmat[i]; 261cabdff1aSopenharmony_ci 262cabdff1aSopenharmony_ci for (i = 0; i < 8; i++) 263cabdff1aSopenharmony_ci idctRowCondDC_int16_12bit(block + i*8, 0); 264cabdff1aSopenharmony_ci 265cabdff1aSopenharmony_ci for (i = 0; i < 8; i++) { 266cabdff1aSopenharmony_ci block[i] += 8192; 267cabdff1aSopenharmony_ci idctSparseCol_int16_12bit(block + i); 268cabdff1aSopenharmony_ci } 269cabdff1aSopenharmony_ci} 270