1/* 2 * (c) 2001 Fabrice Bellard 3 * 2007 Marc Hoffman <marc.hoffman@analog.com> 4 * 5 * This file is part of FFmpeg. 6 * 7 * FFmpeg is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU Lesser General Public 9 * License as published by the Free Software Foundation; either 10 * version 2.1 of the License, or (at your option) any later version. 11 * 12 * FFmpeg is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Lesser General Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General Public 18 * License along with FFmpeg; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20 */ 21 22/** 23 * @file 24 * DCT test (c) 2001 Fabrice Bellard 25 * Started from sample code by Juan J. Sierralta P. 26 */ 27 28#include "config.h" 29#include "config_components.h" 30#include <stdlib.h> 31#include <stdio.h> 32#include <string.h> 33#if HAVE_UNISTD_H 34#include <unistd.h> 35#endif 36#include <math.h> 37 38#include "libavutil/cpu.h" 39#include "libavutil/common.h" 40#include "libavutil/internal.h" 41#include "libavutil/lfg.h" 42#include "libavutil/mem_internal.h" 43#include "libavutil/time.h" 44 45#include "libavcodec/dct.h" 46#include "libavcodec/idctdsp.h" 47#include "libavcodec/simple_idct.h" 48#include "libavcodec/xvididct.h" 49#include "libavcodec/aandcttab.h" 50#include "libavcodec/faandct.h" 51#include "libavcodec/faanidct.h" 52#include "libavcodec/dctref.h" 53 54struct algo { 55 const char *name; 56 void (*func)(int16_t *block); 57 enum idct_permutation_type perm_type; 58 int cpu_flag; 59 int nonspec; 60}; 61 62static const struct algo fdct_tab[] = { 63 { "REF-DBL", ff_ref_fdct, FF_IDCT_PERM_NONE }, 64 { "IJG-AAN-INT", ff_fdct_ifast, FF_IDCT_PERM_NONE }, 65 { "IJG-LLM-INT", ff_jpeg_fdct_islow_8, FF_IDCT_PERM_NONE }, 66#if CONFIG_FAANDCT 67 { "FAAN", ff_faandct, FF_IDCT_PERM_NONE }, 68#endif /* CONFIG_FAANDCT */ 69}; 70 71static void ff_prores_idct_wrap(int16_t *dst){ 72 LOCAL_ALIGNED(16, int16_t, qmat, [64]); 73 int i; 74 75 for(i=0; i<64; i++){ 76 qmat[i]=4; 77 } 78 ff_prores_idct_10(dst, qmat); 79 for(i=0; i<64; i++) { 80 dst[i] -= 512; 81 } 82} 83 84static const struct algo idct_tab[] = { 85 { "REF-DBL", ff_ref_idct, FF_IDCT_PERM_NONE }, 86 { "INT", ff_j_rev_dct, FF_IDCT_PERM_LIBMPEG2 }, 87 { "SIMPLE-C", ff_simple_idct_int16_8bit, FF_IDCT_PERM_NONE }, 88 { "SIMPLE-C10", ff_simple_idct_int16_10bit, FF_IDCT_PERM_NONE }, 89 { "SIMPLE-C12", ff_simple_idct_int16_12bit, FF_IDCT_PERM_NONE, 0, 1 }, 90 { "PR-C", ff_prores_idct_wrap, FF_IDCT_PERM_NONE, 0, 1 }, 91#if CONFIG_FAANIDCT 92 { "FAANI", ff_faanidct, FF_IDCT_PERM_NONE }, 93#endif /* CONFIG_FAANIDCT */ 94#if CONFIG_MPEG4_DECODER 95 { "XVID", ff_xvid_idct, FF_IDCT_PERM_NONE, 0, 1 }, 96#endif /* CONFIG_MPEG4_DECODER */ 97}; 98 99#if ARCH_AARCH64 100#include "aarch64/dct.c" 101#elif ARCH_ARM 102#include "arm/dct.c" 103#elif ARCH_PPC 104#include "ppc/dct.c" 105#elif ARCH_X86 106#include "x86/dct.c" 107#else 108static const struct algo fdct_tab_arch[] = { { 0 } }; 109static const struct algo idct_tab_arch[] = { { 0 } }; 110#endif 111 112#define AANSCALE_BITS 12 113 114#define NB_ITS 20000 115#define NB_ITS_SPEED 50000 116 117DECLARE_ALIGNED(16, static int16_t, block)[64]; 118DECLARE_ALIGNED(8, static int16_t, block1)[64]; 119 120static void init_block(int16_t block[64], int test, int is_idct, AVLFG *prng, int vals) 121{ 122 int i, j; 123 124 memset(block, 0, 64 * sizeof(*block)); 125 126 switch (test) { 127 case 0: 128 for (i = 0; i < 64; i++) 129 block[i] = (av_lfg_get(prng) % (2*vals)) -vals; 130 if (is_idct) { 131 ff_ref_fdct(block); 132 for (i = 0; i < 64; i++) 133 block[i] >>= 3; 134 } 135 break; 136 case 1: 137 j = av_lfg_get(prng) % 10 + 1; 138 for (i = 0; i < j; i++) { 139 int idx = av_lfg_get(prng) % 64; 140 block[idx] = av_lfg_get(prng) % (2*vals) -vals; 141 } 142 break; 143 case 2: 144 block[ 0] = av_lfg_get(prng) % (16*vals) - (8*vals); 145 block[63] = (block[0] & 1) ^ 1; 146 break; 147 } 148} 149 150static void permute(int16_t dst[64], const int16_t src[64], 151 enum idct_permutation_type perm_type) 152{ 153 int i; 154 155#if ARCH_X86 156 if (permute_x86(dst, src, perm_type)) 157 return; 158#endif 159 160 switch (perm_type) { 161 case FF_IDCT_PERM_LIBMPEG2: 162 for (i = 0; i < 64; i++) 163 dst[(i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2)] = src[i]; 164 break; 165 case FF_IDCT_PERM_PARTTRANS: 166 for (i = 0; i < 64; i++) 167 dst[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = src[i]; 168 break; 169 case FF_IDCT_PERM_TRANSPOSE: 170 for (i = 0; i < 64; i++) 171 dst[(i>>3) | ((i<<3)&0x38)] = src[i]; 172 break; 173 default: 174 for (i = 0; i < 64; i++) 175 dst[i] = src[i]; 176 break; 177 } 178} 179 180static int dct_error(const struct algo *dct, int test, int is_idct, int speed, const int bits) 181{ 182 void (*ref)(int16_t *block) = is_idct ? ff_ref_idct : ff_ref_fdct; 183 int it, i, scale; 184 int err_inf, v; 185 int64_t err2, ti, ti1, it1, err_sum = 0; 186 int64_t sysErr[64], sysErrMax = 0; 187 int64_t err2_matrix[64], err2_max = 0; 188 int maxout = 0; 189 int blockSumErrMax = 0, blockSumErr; 190 AVLFG prng; 191 const int vals=1<<bits; 192 double omse, ome; 193 int spec_err; 194 195 av_lfg_init(&prng, 1); 196 197 err_inf = 0; 198 err2 = 0; 199 for (i = 0; i < 64; i++) 200 err2_matrix[i] = sysErr[i] = 0; 201 for (it = 0; it < NB_ITS; it++) { 202 init_block(block1, test, is_idct, &prng, vals); 203 permute(block, block1, dct->perm_type); 204 205 dct->func(block); 206 emms_c(); 207 208 if (!strcmp(dct->name, "IJG-AAN-INT")) { 209 for (i = 0; i < 64; i++) { 210 scale = 8 * (1 << (AANSCALE_BITS + 11)) / ff_aanscales[i]; 211 block[i] = (block[i] * scale) >> AANSCALE_BITS; 212 } 213 } 214 215 ref(block1); 216 if (!strcmp(dct->name, "PR-SSE2")) 217 for (i = 0; i < 64; i++) 218 block1[i] = av_clip(block1[i], 4-512, 1019-512); 219 220 blockSumErr = 0; 221 for (i = 0; i < 64; i++) { 222 int err = block[i] - block1[i]; 223 err_sum += err; 224 v = abs(err); 225 if (v > err_inf) 226 err_inf = v; 227 err2_matrix[i] += v * v; 228 err2 += v * v; 229 sysErr[i] += block[i] - block1[i]; 230 blockSumErr += v; 231 if (abs(block[i]) > maxout) 232 maxout = abs(block[i]); 233 } 234 if (blockSumErrMax < blockSumErr) 235 blockSumErrMax = blockSumErr; 236 } 237 for (i = 0; i < 64; i++) { 238 sysErrMax = FFMAX(sysErrMax, FFABS(sysErr[i])); 239 err2_max = FFMAX(err2_max , FFABS(err2_matrix[i])); 240 } 241 242 for (i = 0; i < 64; i++) { 243 if (i % 8 == 0) 244 printf("\n"); 245 printf("%7d ", (int) sysErr[i]); 246 } 247 printf("\n"); 248 249 omse = (double) err2 / NB_ITS / 64; 250 ome = (double) err_sum / NB_ITS / 64; 251 252 spec_err = is_idct && (err_inf > 1 || omse > 0.02 || fabs(ome) > 0.0015); 253 if (test < 2) 254 spec_err = is_idct && ((double) err2_max / NB_ITS > 0.06 || (double) sysErrMax / NB_ITS > 0.015); 255 256 printf("%s %s: max_err=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n", 257 is_idct ? "IDCT" : "DCT", dct->name, err_inf, 258 omse, ome, (double) sysErrMax / NB_ITS, 259 maxout, blockSumErrMax); 260 261 if (spec_err && !dct->nonspec) { 262 printf("Failed!\n"); 263 return 1; 264 } 265 266 if (!speed) 267 return 0; 268 269 /* speed test */ 270 271 init_block(block, test, is_idct, &prng, vals); 272 permute(block1, block, dct->perm_type); 273 274 ti = av_gettime_relative(); 275 it1 = 0; 276 do { 277 for (it = 0; it < NB_ITS_SPEED; it++) { 278 memcpy(block, block1, sizeof(block)); 279 dct->func(block); 280 } 281 emms_c(); 282 it1 += NB_ITS_SPEED; 283 ti1 = av_gettime_relative() - ti; 284 } while (ti1 < 1000000); 285 286 printf("%s %s: %0.1f kdct/s\n", is_idct ? "IDCT" : "DCT", dct->name, 287 (double) it1 * 1000.0 / (double) ti1); 288 289 return 0; 290} 291 292DECLARE_ALIGNED(8, static uint8_t, img_dest)[64]; 293DECLARE_ALIGNED(8, static uint8_t, img_dest1)[64]; 294 295static void idct248_ref(uint8_t *dest, ptrdiff_t linesize, int16_t *block) 296{ 297 static int init; 298 static double c8[8][8]; 299 static double c4[4][4]; 300 double block1[64], block2[64], block3[64]; 301 double s, sum, v; 302 int i, j, k; 303 304 if (!init) { 305 init = 1; 306 307 for (i = 0; i < 8; i++) { 308 sum = 0; 309 for (j = 0; j < 8; j++) { 310 s = (i == 0) ? sqrt(1.0 / 8.0) : sqrt(1.0 / 4.0); 311 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0); 312 sum += c8[i][j] * c8[i][j]; 313 } 314 } 315 316 for (i = 0; i < 4; i++) { 317 sum = 0; 318 for (j = 0; j < 4; j++) { 319 s = (i == 0) ? sqrt(1.0 / 4.0) : sqrt(1.0 / 2.0); 320 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0); 321 sum += c4[i][j] * c4[i][j]; 322 } 323 } 324 } 325 326 /* butterfly */ 327 s = 0.5 * sqrt(2.0); 328 for (i = 0; i < 4; i++) { 329 for (j = 0; j < 8; j++) { 330 block1[8 * (2 * i) + j] = 331 (block[8 * (2 * i) + j] + block[8 * (2 * i + 1) + j]) * s; 332 block1[8 * (2 * i + 1) + j] = 333 (block[8 * (2 * i) + j] - block[8 * (2 * i + 1) + j]) * s; 334 } 335 } 336 337 /* idct8 on lines */ 338 for (i = 0; i < 8; i++) { 339 for (j = 0; j < 8; j++) { 340 sum = 0; 341 for (k = 0; k < 8; k++) 342 sum += c8[k][j] * block1[8 * i + k]; 343 block2[8 * i + j] = sum; 344 } 345 } 346 347 /* idct4 */ 348 for (i = 0; i < 8; i++) { 349 for (j = 0; j < 4; j++) { 350 /* top */ 351 sum = 0; 352 for (k = 0; k < 4; k++) 353 sum += c4[k][j] * block2[8 * (2 * k) + i]; 354 block3[8 * (2 * j) + i] = sum; 355 356 /* bottom */ 357 sum = 0; 358 for (k = 0; k < 4; k++) 359 sum += c4[k][j] * block2[8 * (2 * k + 1) + i]; 360 block3[8 * (2 * j + 1) + i] = sum; 361 } 362 } 363 364 /* clamp and store the result */ 365 for (i = 0; i < 8; i++) { 366 for (j = 0; j < 8; j++) { 367 v = block3[8 * i + j]; 368 if (v < 0) v = 0; 369 else if (v > 255) v = 255; 370 dest[i * linesize + j] = (int) rint(v); 371 } 372 } 373} 374 375static void idct248_error(const char *name, 376 void (*idct248_put)(uint8_t *dest, 377 ptrdiff_t line_size, 378 int16_t *block), 379 int speed) 380{ 381 int it, i, it1, ti, ti1, err_max, v; 382 AVLFG prng; 383 384 av_lfg_init(&prng, 1); 385 386 /* just one test to see if code is correct (precision is less 387 important here) */ 388 err_max = 0; 389 for (it = 0; it < NB_ITS; it++) { 390 /* XXX: use forward transform to generate values */ 391 for (i = 0; i < 64; i++) 392 block1[i] = av_lfg_get(&prng) % 256 - 128; 393 block1[0] += 1024; 394 395 for (i = 0; i < 64; i++) 396 block[i] = block1[i]; 397 idct248_ref(img_dest1, 8, block); 398 399 for (i = 0; i < 64; i++) 400 block[i] = block1[i]; 401 idct248_put(img_dest, 8, block); 402 403 for (i = 0; i < 64; i++) { 404 v = abs((int) img_dest[i] - (int) img_dest1[i]); 405 if (v == 255) 406 printf("%d %d\n", img_dest[i], img_dest1[i]); 407 if (v > err_max) 408 err_max = v; 409 } 410#if 0 411 printf("ref=\n"); 412 for(i=0;i<8;i++) { 413 int j; 414 for(j=0;j<8;j++) { 415 printf(" %3d", img_dest1[i*8+j]); 416 } 417 printf("\n"); 418 } 419 420 printf("out=\n"); 421 for(i=0;i<8;i++) { 422 int j; 423 for(j=0;j<8;j++) { 424 printf(" %3d", img_dest[i*8+j]); 425 } 426 printf("\n"); 427 } 428#endif 429 } 430 printf("%s %s: err_inf=%d\n", 1 ? "IDCT248" : "DCT248", name, err_max); 431 432 if (!speed) 433 return; 434 435 ti = av_gettime_relative(); 436 it1 = 0; 437 do { 438 for (it = 0; it < NB_ITS_SPEED; it++) { 439 for (i = 0; i < 64; i++) 440 block[i] = block1[i]; 441 idct248_put(img_dest, 8, block); 442 } 443 emms_c(); 444 it1 += NB_ITS_SPEED; 445 ti1 = av_gettime_relative() - ti; 446 } while (ti1 < 1000000); 447 448 printf("%s %s: %0.1f kdct/s\n", 1 ? "IDCT248" : "DCT248", name, 449 (double) it1 * 1000.0 / (double) ti1); 450} 451 452static void help(void) 453{ 454 printf("dct-test [-i] [<test-number>] [<bits>]\n" 455 "test-number 0 -> test with random matrixes\n" 456 " 1 -> test with random sparse matrixes\n" 457 " 2 -> do 3. test from MPEG-4 std\n" 458 "bits Number of time domain bits to use, 8 is default\n" 459 "-i test IDCT implementations\n" 460 "-4 test IDCT248 implementations\n" 461 "-t speed test\n"); 462} 463 464#if !HAVE_GETOPT 465#include "compat/getopt.c" 466#endif 467 468int main(int argc, char **argv) 469{ 470 int test_idct = 0, test_248_dct = 0; 471 int c, i; 472 int test = 1; 473 int speed = 0; 474 int err = 0; 475 int bits=8; 476 477 ff_ref_dct_init(); 478 479 for (;;) { 480 c = getopt(argc, argv, "ih4t"); 481 if (c == -1) 482 break; 483 switch (c) { 484 case 'i': 485 test_idct = 1; 486 break; 487 case '4': 488 test_248_dct = 1; 489 break; 490 case 't': 491 speed = 1; 492 break; 493 default: 494 case 'h': 495 help(); 496 return 0; 497 } 498 } 499 500 if (optind < argc) 501 test = atoi(argv[optind]); 502 if(optind+1 < argc) bits= atoi(argv[optind+1]); 503 504 printf("ffmpeg DCT/IDCT test\n"); 505 506 if (test_248_dct) { 507 idct248_error("SIMPLE-C", ff_simple_idct248_put, speed); 508 } else { 509 const int cpu_flags = av_get_cpu_flags(); 510 if (test_idct) { 511 for (i = 0; i < FF_ARRAY_ELEMS(idct_tab); i++) 512 err |= dct_error(&idct_tab[i], test, test_idct, speed, bits); 513 514 for (i = 0; idct_tab_arch[i].name; i++) 515 if (!(~cpu_flags & idct_tab_arch[i].cpu_flag)) 516 err |= dct_error(&idct_tab_arch[i], test, test_idct, speed, bits); 517 } 518#if CONFIG_FDCTDSP 519 else { 520 for (i = 0; i < FF_ARRAY_ELEMS(fdct_tab); i++) 521 err |= dct_error(&fdct_tab[i], test, test_idct, speed, bits); 522 523 for (i = 0; fdct_tab_arch[i].name; i++) 524 if (!(~cpu_flags & fdct_tab_arch[i].cpu_flag)) 525 err |= dct_error(&fdct_tab_arch[i], test, test_idct, speed, bits); 526 } 527#endif /* CONFIG_FDCTDSP */ 528 } 529 530 if (err) 531 printf("Error: %d.\n", err); 532 533 return !!err; 534} 535