1/* 2 * Xvid MPEG-4 IDCT 3 * 4 * Copyright (C) 2006-2011 Xvid Solutions GmbH 5 * 6 * This file is part of FFmpeg. 7 * 8 * FFmpeg is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU Lesser General Public 10 * License as published by the Free Software Foundation; either 11 * version 2.1 of the License, or (at your option) any later version. 12 * 13 * FFmpeg is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * Lesser General Public License for more details. 17 * 18 * You should have received a copy of the GNU Lesser General Public 19 * License along with FFmpeg; if not, write to the Free Software 20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 21 */ 22 23/** 24 * @file 25 * Walken IDCT 26 * Alternative IDCT implementation for decoding compatibility. 27 * 28 * @author Skal 29 * @note This C version is not the original IDCT, but a modified one that 30 * yields the same error profile as the MMX/MMXEXT/SSE2 versions. 31 */ 32 33#include "config.h" 34#include "libavutil/attributes.h" 35#include "avcodec.h" 36#include "idctdsp.h" 37#include "xvididct.h" 38 39#define ROW_SHIFT 11 40#define COL_SHIFT 6 41 42// #define FIX(x) (int)((x) * (1 << ROW_SHIFT)) 43#define RND0 65536 // 1 << (COL_SHIFT + ROW_SHIFT - 1); 44#define RND1 3597 // FIX (1.75683487303); 45#define RND2 2260 // FIX (1.10355339059); 46#define RND3 1203 // FIX (0.587788325588); 47#define RND4 0 48#define RND5 120 // FIX (0.058658283817); 49#define RND6 512 // FIX (0.25); 50#define RND7 512 // FIX (0.25); 51 52static const int TAB04[] = { 22725, 21407, 19266, 16384, 12873, 8867, 4520 }; 53static const int TAB17[] = { 31521, 29692, 26722, 22725, 17855, 12299, 6270 }; 54static const int TAB26[] = { 29692, 27969, 25172, 21407, 16819, 11585, 5906 }; 55static const int TAB35[] = { 26722, 25172, 22654, 19266, 15137, 10426, 5315 }; 56 57static int idct_row(short *in, const int *const tab, int rnd) 58{ 59 const unsigned c1 = tab[0]; 60 const unsigned c2 = tab[1]; 61 const unsigned c3 = tab[2]; 62 const unsigned c4 = tab[3]; 63 const unsigned c5 = tab[4]; 64 const unsigned c6 = tab[5]; 65 const unsigned c7 = tab[6]; 66 67 const int right = in[5] | in[6] | in[7]; 68 const int left = in[1] | in[2] | in[3]; 69 if (!(right | in[4])) { 70 const int k = c4 * in[0] + rnd; 71 if (left) { 72 const unsigned a0 = k + c2 * in[2]; 73 const unsigned a1 = k + c6 * in[2]; 74 const unsigned a2 = k - c6 * in[2]; 75 const unsigned a3 = k - c2 * in[2]; 76 77 const int b0 = c1 * in[1] + c3 * in[3]; 78 const int b1 = c3 * in[1] - c7 * in[3]; 79 const int b2 = c5 * in[1] - c1 * in[3]; 80 const int b3 = c7 * in[1] - c5 * in[3]; 81 82 in[0] = (int)(a0 + b0) >> ROW_SHIFT; 83 in[1] = (int)(a1 + b1) >> ROW_SHIFT; 84 in[2] = (int)(a2 + b2) >> ROW_SHIFT; 85 in[3] = (int)(a3 + b3) >> ROW_SHIFT; 86 in[4] = (int)(a3 - b3) >> ROW_SHIFT; 87 in[5] = (int)(a2 - b2) >> ROW_SHIFT; 88 in[6] = (int)(a1 - b1) >> ROW_SHIFT; 89 in[7] = (int)(a0 - b0) >> ROW_SHIFT; 90 } else { 91 const int a0 = k >> ROW_SHIFT; 92 if (a0) { 93 in[0] = 94 in[1] = 95 in[2] = 96 in[3] = 97 in[4] = 98 in[5] = 99 in[6] = 100 in[7] = a0; 101 } else 102 return 0; 103 } 104 } else if (!(left | right)) { 105 const int a0 = (int)(rnd + c4 * (in[0] + in[4])) >> ROW_SHIFT; 106 const int a1 = (int)(rnd + c4 * (in[0] - in[4])) >> ROW_SHIFT; 107 108 in[0] = a0; 109 in[3] = a0; 110 in[4] = a0; 111 in[7] = a0; 112 in[1] = a1; 113 in[2] = a1; 114 in[5] = a1; 115 in[6] = a1; 116 } else { 117 const unsigned int k = c4 * in[0] + rnd; 118 const unsigned int a0 = k + c2 * in[2] + c4 * in[4] + c6 * in[6]; 119 const unsigned int a1 = k + c6 * in[2] - c4 * in[4] - c2 * in[6]; 120 const unsigned int a2 = k - c6 * in[2] - c4 * in[4] + c2 * in[6]; 121 const unsigned int a3 = k - c2 * in[2] + c4 * in[4] - c6 * in[6]; 122 123 const unsigned int b0 = c1 * in[1] + c3 * in[3] + c5 * in[5] + c7 * in[7]; 124 const unsigned int b1 = c3 * in[1] - c7 * in[3] - c1 * in[5] - c5 * in[7]; 125 const unsigned int b2 = c5 * in[1] - c1 * in[3] + c7 * in[5] + c3 * in[7]; 126 const unsigned int b3 = c7 * in[1] - c5 * in[3] + c3 * in[5] - c1 * in[7]; 127 128 in[0] = (int)(a0 + b0) >> ROW_SHIFT; 129 in[1] = (int)(a1 + b1) >> ROW_SHIFT; 130 in[2] = (int)(a2 + b2) >> ROW_SHIFT; 131 in[3] = (int)(a3 + b3) >> ROW_SHIFT; 132 in[4] = (int)(a3 - b3) >> ROW_SHIFT; 133 in[5] = (int)(a2 - b2) >> ROW_SHIFT; 134 in[6] = (int)(a1 - b1) >> ROW_SHIFT; 135 in[7] = (int)(a0 - b0) >> ROW_SHIFT; 136 } 137 return 1; 138} 139 140#define TAN1 0x32EC 141#define TAN2 0x6A0A 142#define TAN3 0xAB0E 143#define SQRT2 0x5A82 144 145#define MULT(c, x, n) ((unsigned)((int)((c) * (unsigned)(x)) >> (n))) 146// 12b version => #define MULT(c,x, n) ((((c) >> 3) * (x)) >> ((n) - 3)) 147// 12b zero-testing version: 148 149#define BUTTERFLY(a, b, tmp) \ 150 (tmp) = (a) + (b); \ 151 (b) = (a) - (b); \ 152 (a) = (tmp) 153 154#define LOAD_BUTTERFLY(m1, m2, a, b, tmp, s) \ 155 (m1) = (s)[(a)] + (s)[(b)]; \ 156 (m2) = (s)[(a)] - (s)[(b)] 157 158static void idct_col_8(short *const in) 159{ 160 int mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7, spill; 161 162 // odd 163 164 mm4 = (int) in[7 * 8]; 165 mm5 = (int) in[5 * 8]; 166 mm6 = (int) in[3 * 8]; 167 mm7 = (int) in[1 * 8]; 168 169 mm0 = MULT(TAN1, mm4, 16) + mm7; 170 mm1 = MULT(TAN1, mm7, 16) - mm4; 171 mm2 = MULT(TAN3, mm5, 16) + mm6; 172 mm3 = MULT(TAN3, mm6, 16) - mm5; 173 174 mm7 = mm0 + mm2; 175 mm4 = mm1 - mm3; 176 mm0 = mm0 - mm2; 177 mm1 = mm1 + mm3; 178 mm6 = mm0 + mm1; 179 mm5 = mm0 - mm1; 180 mm5 = 2 * MULT(SQRT2, mm5, 16); // 2*sqrt2 181 mm6 = 2 * MULT(SQRT2, mm6, 16); // Watch out: precision loss but done to match 182 // the pmulhw used in MMX/MMXEXT/SSE2 versions 183 184 // even 185 186 mm1 = (int) in[2 * 8]; 187 mm2 = (int) in[6 * 8]; 188 mm3 = MULT(TAN2, mm2, 16) + mm1; 189 mm2 = MULT(TAN2, mm1, 16) - mm2; 190 191 LOAD_BUTTERFLY(mm0, mm1, 0 * 8, 4 * 8, spill, in); 192 193 BUTTERFLY(mm0, mm3, spill); 194 BUTTERFLY(mm0, mm7, spill); 195 in[8 * 0] = (int16_t) (mm0 >> COL_SHIFT); 196 in[8 * 7] = (int16_t) (mm7 >> COL_SHIFT); 197 BUTTERFLY(mm3, mm4, mm0); 198 in[8 * 3] = (int16_t) (mm3 >> COL_SHIFT); 199 in[8 * 4] = (int16_t) (mm4 >> COL_SHIFT); 200 201 BUTTERFLY(mm1, mm2, mm0); 202 BUTTERFLY(mm1, mm6, mm0); 203 in[8 * 1] = (int16_t) (mm1 >> COL_SHIFT); 204 in[8 * 6] = (int16_t) (mm6 >> COL_SHIFT); 205 BUTTERFLY(mm2, mm5, mm0); 206 in[8 * 2] = (int16_t) (mm2 >> COL_SHIFT); 207 in[8 * 5] = (int16_t) (mm5 >> COL_SHIFT); 208} 209 210static void idct_col_4(short *const in) 211{ 212 int mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7, spill; 213 214 // odd 215 216 mm0 = (int) in[1 * 8]; 217 mm2 = (int) in[3 * 8]; 218 219 mm1 = MULT(TAN1, mm0, 16); 220 mm3 = MULT(TAN3, mm2, 16); 221 222 mm7 = mm0 + mm2; 223 mm4 = mm1 - mm3; 224 mm0 = mm0 - mm2; 225 mm1 = mm1 + mm3; 226 mm6 = mm0 + mm1; 227 mm5 = mm0 - mm1; 228 mm6 = 2 * MULT(SQRT2, mm6, 16); // 2*sqrt2 229 mm5 = 2 * MULT(SQRT2, mm5, 16); 230 231 // even 232 233 mm0 = mm1 = (int) in[0 * 8]; 234 mm3 = (int) in[2 * 8]; 235 mm2 = MULT(TAN2, mm3, 16); 236 237 BUTTERFLY(mm0, mm3, spill); 238 BUTTERFLY(mm0, mm7, spill); 239 in[8 * 0] = (int16_t) (mm0 >> COL_SHIFT); 240 in[8 * 7] = (int16_t) (mm7 >> COL_SHIFT); 241 BUTTERFLY(mm3, mm4, mm0); 242 in[8 * 3] = (int16_t) (mm3 >> COL_SHIFT); 243 in[8 * 4] = (int16_t) (mm4 >> COL_SHIFT); 244 245 BUTTERFLY(mm1, mm2, mm0); 246 BUTTERFLY(mm1, mm6, mm0); 247 in[8 * 1] = (int16_t) (mm1 >> COL_SHIFT); 248 in[8 * 6] = (int16_t) (mm6 >> COL_SHIFT); 249 BUTTERFLY(mm2, mm5, mm0); 250 in[8 * 2] = (int16_t) (mm2 >> COL_SHIFT); 251 in[8 * 5] = (int16_t) (mm5 >> COL_SHIFT); 252} 253 254static void idct_col_3(short *const in) 255{ 256 int mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7, spill; 257 258 // odd 259 260 mm7 = (int) in[1 * 8]; 261 mm4 = MULT(TAN1, mm7, 16); 262 263 mm6 = mm7 + mm4; 264 mm5 = mm7 - mm4; 265 mm6 = 2 * MULT(SQRT2, mm6, 16); // 2*sqrt2 266 mm5 = 2 * MULT(SQRT2, mm5, 16); 267 268 // even 269 270 mm0 = mm1 = (int) in[0 * 8]; 271 mm3 = (int) in[2 * 8]; 272 mm2 = MULT(TAN2, mm3, 16); 273 274 BUTTERFLY(mm0, mm3, spill); 275 BUTTERFLY(mm0, mm7, spill); 276 in[8 * 0] = (int16_t) (mm0 >> COL_SHIFT); 277 in[8 * 7] = (int16_t) (mm7 >> COL_SHIFT); 278 BUTTERFLY(mm3, mm4, mm0); 279 in[8 * 3] = (int16_t) (mm3 >> COL_SHIFT); 280 in[8 * 4] = (int16_t) (mm4 >> COL_SHIFT); 281 282 BUTTERFLY(mm1, mm2, mm0); 283 BUTTERFLY(mm1, mm6, mm0); 284 in[8 * 1] = (int16_t) (mm1 >> COL_SHIFT); 285 in[8 * 6] = (int16_t) (mm6 >> COL_SHIFT); 286 BUTTERFLY(mm2, mm5, mm0); 287 in[8 * 2] = (int16_t) (mm2 >> COL_SHIFT); 288 in[8 * 5] = (int16_t) (mm5 >> COL_SHIFT); 289} 290 291void ff_xvid_idct(int16_t *const in) 292{ 293 int i, rows = 0x07; 294 295 idct_row(in + 0 * 8, TAB04, RND0); 296 idct_row(in + 1 * 8, TAB17, RND1); 297 idct_row(in + 2 * 8, TAB26, RND2); 298 if (idct_row(in + 3 * 8, TAB35, RND3)) 299 rows |= 0x08; 300 if (idct_row(in + 4 * 8, TAB04, RND4)) 301 rows |= 0x10; 302 if (idct_row(in + 5 * 8, TAB35, RND5)) 303 rows |= 0x20; 304 if (idct_row(in + 6 * 8, TAB26, RND6)) 305 rows |= 0x40; 306 if (idct_row(in + 7 * 8, TAB17, RND7)) 307 rows |= 0x80; 308 309 if (rows & 0xF0) { 310 for (i = 0; i < 8; i++) 311 idct_col_8(in + i); 312 } else if (rows & 0x08) { 313 for (i = 0; i < 8; i++) 314 idct_col_4(in + i); 315 } else { 316 for (i = 0; i < 8; i++) 317 idct_col_3(in + i); 318 } 319} 320 321static void xvid_idct_put(uint8_t *dest, ptrdiff_t line_size, int16_t *block) 322{ 323 ff_xvid_idct(block); 324 ff_put_pixels_clamped_c(block, dest, line_size); 325} 326 327static void xvid_idct_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block) 328{ 329 ff_xvid_idct(block); 330 ff_add_pixels_clamped_c(block, dest, line_size); 331} 332 333av_cold void ff_xvid_idct_init(IDCTDSPContext *c, AVCodecContext *avctx) 334{ 335 const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8; 336 337 if (high_bit_depth || avctx->lowres || 338 !(avctx->idct_algo == FF_IDCT_AUTO || 339 avctx->idct_algo == FF_IDCT_XVID)) 340 return; 341 342 if (avctx->idct_algo == FF_IDCT_XVID) { 343 c->idct_put = xvid_idct_put; 344 c->idct_add = xvid_idct_add; 345 c->idct = ff_xvid_idct; 346 c->perm_type = FF_IDCT_PERM_NONE; 347 } 348 349#if ARCH_X86 350 ff_xvid_idct_init_x86(c, avctx, high_bit_depth); 351#elif ARCH_MIPS 352 ff_xvid_idct_init_mips(c, avctx, high_bit_depth); 353#endif 354 355 ff_init_scantable_permutation(c->idct_permutation, c->perm_type); 356} 357