1/* 2 * Simple IDCT 3 * 4 * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at> 5 * 6 * This file is part of FFmpeg. 7 * 8 * FFmpeg is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU Lesser General Public 10 * License as published by the Free Software Foundation; either 11 * version 2.1 of the License, or (at your option) any later version. 12 * 13 * FFmpeg is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * Lesser General Public License for more details. 17 * 18 * You should have received a copy of the GNU Lesser General Public 19 * License along with FFmpeg; if not, write to the Free Software 20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 21 */ 22 23/** 24 * @file 25 * simpleidct in C. 26 */ 27 28#include "libavutil/intreadwrite.h" 29#include "mathops.h" 30#include "simple_idct.h" 31 32#define IN_IDCT_DEPTH 16 33 34#define BIT_DEPTH 8 35#include "simple_idct_template.c" 36#undef BIT_DEPTH 37 38#define BIT_DEPTH 10 39#include "simple_idct_template.c" 40 41#define EXTRA_SHIFT 2 42#include "simple_idct_template.c" 43 44#undef EXTRA_SHIFT 45#undef BIT_DEPTH 46 47#define BIT_DEPTH 12 48#include "simple_idct_template.c" 49#undef BIT_DEPTH 50#undef IN_IDCT_DEPTH 51 52#define IN_IDCT_DEPTH 32 53#define BIT_DEPTH 10 54#include "simple_idct_template.c" 55#undef BIT_DEPTH 56#undef IN_IDCT_DEPTH 57 58/* 2x4x8 idct */ 59 60#define CN_SHIFT 12 61#define C_FIX(x) ((int)((x) * (1 << CN_SHIFT) + 0.5)) 62#define C1 C_FIX(0.6532814824) 63#define C2 C_FIX(0.2705980501) 64 65/* row idct is multiple by 16 * sqrt(2.0), col idct4 is normalized, 66 and the butterfly must be multiplied by 0.5 * sqrt(2.0) */ 67#define C_SHIFT (4+1+12) 68 69static inline void idct4col_put(uint8_t *dest, ptrdiff_t line_size, const int16_t *col) 70{ 71 int c0, c1, c2, c3, a0, a1, a2, a3; 72 73 a0 = col[8*0]; 74 a1 = col[8*2]; 75 a2 = col[8*4]; 76 a3 = col[8*6]; 77 c0 = ((a0 + a2) * (1 << CN_SHIFT - 1)) + (1 << (C_SHIFT - 1)); 78 c2 = ((a0 - a2) * (1 << CN_SHIFT - 1)) + (1 << (C_SHIFT - 1)); 79 c1 = a1 * C1 + a3 * C2; 80 c3 = a1 * C2 - a3 * C1; 81 dest[0] = av_clip_uint8((c0 + c1) >> C_SHIFT); 82 dest += line_size; 83 dest[0] = av_clip_uint8((c2 + c3) >> C_SHIFT); 84 dest += line_size; 85 dest[0] = av_clip_uint8((c2 - c3) >> C_SHIFT); 86 dest += line_size; 87 dest[0] = av_clip_uint8((c0 - c1) >> C_SHIFT); 88} 89 90#define BF(k) \ 91{\ 92 int a0, a1;\ 93 a0 = ptr[k];\ 94 a1 = ptr[8 + k];\ 95 ptr[k] = a0 + a1;\ 96 ptr[8 + k] = a0 - a1;\ 97} 98 99/* only used by DV codec. The input must be interlaced. 128 is added 100 to the pixels before clamping to avoid systematic error 101 (1024*sqrt(2)) offset would be needed otherwise. */ 102/* XXX: I think a 1.0/sqrt(2) normalization should be needed to 103 compensate the extra butterfly stage - I don't have the full DV 104 specification */ 105void ff_simple_idct248_put(uint8_t *dest, ptrdiff_t line_size, int16_t *block) 106{ 107 int i; 108 int16_t *ptr; 109 110 /* butterfly */ 111 ptr = block; 112 for(i=0;i<4;i++) { 113 BF(0); 114 BF(1); 115 BF(2); 116 BF(3); 117 BF(4); 118 BF(5); 119 BF(6); 120 BF(7); 121 ptr += 2 * 8; 122 } 123 124 /* IDCT8 on each line */ 125 for(i=0; i<8; i++) { 126 idctRowCondDC_int16_8bit(block + i*8, 0); 127 } 128 129 /* IDCT4 and store */ 130 for(i=0;i<8;i++) { 131 idct4col_put(dest + i, 2 * line_size, block + i); 132 idct4col_put(dest + line_size + i, 2 * line_size, block + 8 + i); 133 } 134} 135 136/* 8x4 & 4x8 WMV2 IDCT */ 137#undef CN_SHIFT 138#undef C_SHIFT 139#undef C_FIX 140#undef C1 141#undef C2 142#define CN_SHIFT 12 143#define C_FIX(x) ((int)((x) * M_SQRT2 * (1 << CN_SHIFT) + 0.5)) 144#define C1 C_FIX(0.6532814824) 145#define C2 C_FIX(0.2705980501) 146#define C3 C_FIX(0.5) 147#define C_SHIFT (4+1+12) 148static inline void idct4col_add(uint8_t *dest, ptrdiff_t line_size, const int16_t *col) 149{ 150 int c0, c1, c2, c3, a0, a1, a2, a3; 151 152 a0 = col[8*0]; 153 a1 = col[8*1]; 154 a2 = col[8*2]; 155 a3 = col[8*3]; 156 c0 = (a0 + a2)*C3 + (1 << (C_SHIFT - 1)); 157 c2 = (a0 - a2)*C3 + (1 << (C_SHIFT - 1)); 158 c1 = a1 * C1 + a3 * C2; 159 c3 = a1 * C2 - a3 * C1; 160 dest[0] = av_clip_uint8(dest[0] + ((c0 + c1) >> C_SHIFT)); 161 dest += line_size; 162 dest[0] = av_clip_uint8(dest[0] + ((c2 + c3) >> C_SHIFT)); 163 dest += line_size; 164 dest[0] = av_clip_uint8(dest[0] + ((c2 - c3) >> C_SHIFT)); 165 dest += line_size; 166 dest[0] = av_clip_uint8(dest[0] + ((c0 - c1) >> C_SHIFT)); 167} 168 169#define RN_SHIFT 15 170#define R_FIX(x) ((int)((x) * M_SQRT2 * (1 << RN_SHIFT) + 0.5)) 171#define R1 R_FIX(0.6532814824) 172#define R2 R_FIX(0.2705980501) 173#define R3 R_FIX(0.5) 174#define R_SHIFT 11 175static inline void idct4row(int16_t *row) 176{ 177 unsigned c0, c1, c2, c3; 178 int a0, a1, a2, a3; 179 180 a0 = row[0]; 181 a1 = row[1]; 182 a2 = row[2]; 183 a3 = row[3]; 184 c0 = (a0 + a2)*R3 + (1 << (R_SHIFT - 1)); 185 c2 = (a0 - a2)*R3 + (1 << (R_SHIFT - 1)); 186 c1 = a1 * R1 + a3 * R2; 187 c3 = a1 * R2 - a3 * R1; 188 row[0]= (c0 + c1) >> R_SHIFT; 189 row[1]= (c2 + c3) >> R_SHIFT; 190 row[2]= (c2 - c3) >> R_SHIFT; 191 row[3]= (c0 - c1) >> R_SHIFT; 192} 193 194void ff_simple_idct84_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block) 195{ 196 int i; 197 198 /* IDCT8 on each line */ 199 for(i=0; i<4; i++) { 200 idctRowCondDC_int16_8bit(block + i*8, 0); 201 } 202 203 /* IDCT4 and store */ 204 for(i=0;i<8;i++) { 205 idct4col_add(dest + i, line_size, block + i); 206 } 207} 208 209void ff_simple_idct48_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block) 210{ 211 int i; 212 213 /* IDCT4 on each line */ 214 for(i=0; i<8; i++) { 215 idct4row(block + i*8); 216 } 217 218 /* IDCT8 and store */ 219 for(i=0; i<4; i++){ 220 idctSparseColAdd_int16_8bit(dest + i, line_size, block + i); 221 } 222} 223 224void ff_simple_idct44_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block) 225{ 226 int i; 227 228 /* IDCT4 on each line */ 229 for(i=0; i<4; i++) { 230 idct4row(block + i*8); 231 } 232 233 /* IDCT4 and store */ 234 for(i=0; i<4; i++){ 235 idct4col_add(dest + i, line_size, block + i); 236 } 237} 238 239void ff_prores_idct_10(int16_t *block, const int16_t *qmat) 240{ 241 int i; 242 243 for (i = 0; i < 64; i++) 244 block[i] *= qmat[i]; 245 246 for (i = 0; i < 8; i++) 247 idctRowCondDC_extrashift_10(block + i*8, 2); 248 249 for (i = 0; i < 8; i++) { 250 block[i] += 8192; 251 idctSparseCol_extrashift_10(block + i); 252 } 253} 254 255void ff_prores_idct_12(int16_t *block, const int16_t *qmat) 256{ 257 int i; 258 259 for (i = 0; i < 64; i++) 260 block[i] *= qmat[i]; 261 262 for (i = 0; i < 8; i++) 263 idctRowCondDC_int16_12bit(block + i*8, 0); 264 265 for (i = 0; i < 8; i++) { 266 block[i] += 8192; 267 idctSparseCol_int16_12bit(block + i); 268 } 269} 270