1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * (c) 2001 Fabrice Bellard 3cabdff1aSopenharmony_ci * 2007 Marc Hoffman <marc.hoffman@analog.com> 4cabdff1aSopenharmony_ci * 5cabdff1aSopenharmony_ci * This file is part of FFmpeg. 6cabdff1aSopenharmony_ci * 7cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 8cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 9cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 10cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 11cabdff1aSopenharmony_ci * 12cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 13cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 14cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15cabdff1aSopenharmony_ci * Lesser General Public License for more details. 16cabdff1aSopenharmony_ci * 17cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 18cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 19cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20cabdff1aSopenharmony_ci */ 21cabdff1aSopenharmony_ci 22cabdff1aSopenharmony_ci/** 23cabdff1aSopenharmony_ci * @file 24cabdff1aSopenharmony_ci * DCT test (c) 2001 Fabrice Bellard 25cabdff1aSopenharmony_ci * Started from sample code by Juan J. Sierralta P. 26cabdff1aSopenharmony_ci */ 27cabdff1aSopenharmony_ci 28cabdff1aSopenharmony_ci#include "config.h" 29cabdff1aSopenharmony_ci#include "config_components.h" 30cabdff1aSopenharmony_ci#include <stdlib.h> 31cabdff1aSopenharmony_ci#include <stdio.h> 32cabdff1aSopenharmony_ci#include <string.h> 33cabdff1aSopenharmony_ci#if HAVE_UNISTD_H 34cabdff1aSopenharmony_ci#include <unistd.h> 35cabdff1aSopenharmony_ci#endif 36cabdff1aSopenharmony_ci#include <math.h> 37cabdff1aSopenharmony_ci 38cabdff1aSopenharmony_ci#include "libavutil/cpu.h" 39cabdff1aSopenharmony_ci#include "libavutil/common.h" 40cabdff1aSopenharmony_ci#include "libavutil/internal.h" 41cabdff1aSopenharmony_ci#include "libavutil/lfg.h" 42cabdff1aSopenharmony_ci#include "libavutil/mem_internal.h" 43cabdff1aSopenharmony_ci#include "libavutil/time.h" 44cabdff1aSopenharmony_ci 45cabdff1aSopenharmony_ci#include "libavcodec/dct.h" 46cabdff1aSopenharmony_ci#include "libavcodec/idctdsp.h" 47cabdff1aSopenharmony_ci#include "libavcodec/simple_idct.h" 48cabdff1aSopenharmony_ci#include "libavcodec/xvididct.h" 49cabdff1aSopenharmony_ci#include "libavcodec/aandcttab.h" 50cabdff1aSopenharmony_ci#include "libavcodec/faandct.h" 51cabdff1aSopenharmony_ci#include "libavcodec/faanidct.h" 52cabdff1aSopenharmony_ci#include "libavcodec/dctref.h" 53cabdff1aSopenharmony_ci 54cabdff1aSopenharmony_cistruct algo { 55cabdff1aSopenharmony_ci const char *name; 56cabdff1aSopenharmony_ci void (*func)(int16_t *block); 57cabdff1aSopenharmony_ci enum idct_permutation_type perm_type; 58cabdff1aSopenharmony_ci int cpu_flag; 59cabdff1aSopenharmony_ci int nonspec; 60cabdff1aSopenharmony_ci}; 61cabdff1aSopenharmony_ci 62cabdff1aSopenharmony_cistatic const struct algo fdct_tab[] = { 63cabdff1aSopenharmony_ci { "REF-DBL", ff_ref_fdct, FF_IDCT_PERM_NONE }, 64cabdff1aSopenharmony_ci { "IJG-AAN-INT", ff_fdct_ifast, FF_IDCT_PERM_NONE }, 65cabdff1aSopenharmony_ci { "IJG-LLM-INT", ff_jpeg_fdct_islow_8, FF_IDCT_PERM_NONE }, 66cabdff1aSopenharmony_ci#if CONFIG_FAANDCT 67cabdff1aSopenharmony_ci { "FAAN", ff_faandct, FF_IDCT_PERM_NONE }, 68cabdff1aSopenharmony_ci#endif /* CONFIG_FAANDCT */ 69cabdff1aSopenharmony_ci}; 70cabdff1aSopenharmony_ci 71cabdff1aSopenharmony_cistatic void ff_prores_idct_wrap(int16_t *dst){ 72cabdff1aSopenharmony_ci LOCAL_ALIGNED(16, int16_t, qmat, [64]); 73cabdff1aSopenharmony_ci int i; 74cabdff1aSopenharmony_ci 75cabdff1aSopenharmony_ci for(i=0; i<64; i++){ 76cabdff1aSopenharmony_ci qmat[i]=4; 77cabdff1aSopenharmony_ci } 78cabdff1aSopenharmony_ci ff_prores_idct_10(dst, qmat); 79cabdff1aSopenharmony_ci for(i=0; i<64; i++) { 80cabdff1aSopenharmony_ci dst[i] -= 512; 81cabdff1aSopenharmony_ci } 82cabdff1aSopenharmony_ci} 83cabdff1aSopenharmony_ci 84cabdff1aSopenharmony_cistatic const struct algo idct_tab[] = { 85cabdff1aSopenharmony_ci { "REF-DBL", ff_ref_idct, FF_IDCT_PERM_NONE }, 86cabdff1aSopenharmony_ci { "INT", ff_j_rev_dct, FF_IDCT_PERM_LIBMPEG2 }, 87cabdff1aSopenharmony_ci { "SIMPLE-C", ff_simple_idct_int16_8bit, FF_IDCT_PERM_NONE }, 88cabdff1aSopenharmony_ci { "SIMPLE-C10", ff_simple_idct_int16_10bit, FF_IDCT_PERM_NONE }, 89cabdff1aSopenharmony_ci { "SIMPLE-C12", ff_simple_idct_int16_12bit, FF_IDCT_PERM_NONE, 0, 1 }, 90cabdff1aSopenharmony_ci { "PR-C", ff_prores_idct_wrap, FF_IDCT_PERM_NONE, 0, 1 }, 91cabdff1aSopenharmony_ci#if CONFIG_FAANIDCT 92cabdff1aSopenharmony_ci { "FAANI", ff_faanidct, FF_IDCT_PERM_NONE }, 93cabdff1aSopenharmony_ci#endif /* CONFIG_FAANIDCT */ 94cabdff1aSopenharmony_ci#if CONFIG_MPEG4_DECODER 95cabdff1aSopenharmony_ci { "XVID", ff_xvid_idct, FF_IDCT_PERM_NONE, 0, 1 }, 96cabdff1aSopenharmony_ci#endif /* CONFIG_MPEG4_DECODER */ 97cabdff1aSopenharmony_ci}; 98cabdff1aSopenharmony_ci 99cabdff1aSopenharmony_ci#if ARCH_AARCH64 100cabdff1aSopenharmony_ci#include "aarch64/dct.c" 101cabdff1aSopenharmony_ci#elif ARCH_ARM 102cabdff1aSopenharmony_ci#include "arm/dct.c" 103cabdff1aSopenharmony_ci#elif ARCH_PPC 104cabdff1aSopenharmony_ci#include "ppc/dct.c" 105cabdff1aSopenharmony_ci#elif ARCH_X86 106cabdff1aSopenharmony_ci#include "x86/dct.c" 107cabdff1aSopenharmony_ci#else 108cabdff1aSopenharmony_cistatic const struct algo fdct_tab_arch[] = { { 0 } }; 109cabdff1aSopenharmony_cistatic const struct algo idct_tab_arch[] = { { 0 } }; 110cabdff1aSopenharmony_ci#endif 111cabdff1aSopenharmony_ci 112cabdff1aSopenharmony_ci#define AANSCALE_BITS 12 113cabdff1aSopenharmony_ci 114cabdff1aSopenharmony_ci#define NB_ITS 20000 115cabdff1aSopenharmony_ci#define NB_ITS_SPEED 50000 116cabdff1aSopenharmony_ci 117cabdff1aSopenharmony_ciDECLARE_ALIGNED(16, static int16_t, block)[64]; 118cabdff1aSopenharmony_ciDECLARE_ALIGNED(8, static int16_t, block1)[64]; 119cabdff1aSopenharmony_ci 120cabdff1aSopenharmony_cistatic void init_block(int16_t block[64], int test, int is_idct, AVLFG *prng, int vals) 121cabdff1aSopenharmony_ci{ 122cabdff1aSopenharmony_ci int i, j; 123cabdff1aSopenharmony_ci 124cabdff1aSopenharmony_ci memset(block, 0, 64 * sizeof(*block)); 125cabdff1aSopenharmony_ci 126cabdff1aSopenharmony_ci switch (test) { 127cabdff1aSopenharmony_ci case 0: 128cabdff1aSopenharmony_ci for (i = 0; i < 64; i++) 129cabdff1aSopenharmony_ci block[i] = (av_lfg_get(prng) % (2*vals)) -vals; 130cabdff1aSopenharmony_ci if (is_idct) { 131cabdff1aSopenharmony_ci ff_ref_fdct(block); 132cabdff1aSopenharmony_ci for (i = 0; i < 64; i++) 133cabdff1aSopenharmony_ci block[i] >>= 3; 134cabdff1aSopenharmony_ci } 135cabdff1aSopenharmony_ci break; 136cabdff1aSopenharmony_ci case 1: 137cabdff1aSopenharmony_ci j = av_lfg_get(prng) % 10 + 1; 138cabdff1aSopenharmony_ci for (i = 0; i < j; i++) { 139cabdff1aSopenharmony_ci int idx = av_lfg_get(prng) % 64; 140cabdff1aSopenharmony_ci block[idx] = av_lfg_get(prng) % (2*vals) -vals; 141cabdff1aSopenharmony_ci } 142cabdff1aSopenharmony_ci break; 143cabdff1aSopenharmony_ci case 2: 144cabdff1aSopenharmony_ci block[ 0] = av_lfg_get(prng) % (16*vals) - (8*vals); 145cabdff1aSopenharmony_ci block[63] = (block[0] & 1) ^ 1; 146cabdff1aSopenharmony_ci break; 147cabdff1aSopenharmony_ci } 148cabdff1aSopenharmony_ci} 149cabdff1aSopenharmony_ci 150cabdff1aSopenharmony_cistatic void permute(int16_t dst[64], const int16_t src[64], 151cabdff1aSopenharmony_ci enum idct_permutation_type perm_type) 152cabdff1aSopenharmony_ci{ 153cabdff1aSopenharmony_ci int i; 154cabdff1aSopenharmony_ci 155cabdff1aSopenharmony_ci#if ARCH_X86 156cabdff1aSopenharmony_ci if (permute_x86(dst, src, perm_type)) 157cabdff1aSopenharmony_ci return; 158cabdff1aSopenharmony_ci#endif 159cabdff1aSopenharmony_ci 160cabdff1aSopenharmony_ci switch (perm_type) { 161cabdff1aSopenharmony_ci case FF_IDCT_PERM_LIBMPEG2: 162cabdff1aSopenharmony_ci for (i = 0; i < 64; i++) 163cabdff1aSopenharmony_ci dst[(i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2)] = src[i]; 164cabdff1aSopenharmony_ci break; 165cabdff1aSopenharmony_ci case FF_IDCT_PERM_PARTTRANS: 166cabdff1aSopenharmony_ci for (i = 0; i < 64; i++) 167cabdff1aSopenharmony_ci dst[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = src[i]; 168cabdff1aSopenharmony_ci break; 169cabdff1aSopenharmony_ci case FF_IDCT_PERM_TRANSPOSE: 170cabdff1aSopenharmony_ci for (i = 0; i < 64; i++) 171cabdff1aSopenharmony_ci dst[(i>>3) | ((i<<3)&0x38)] = src[i]; 172cabdff1aSopenharmony_ci break; 173cabdff1aSopenharmony_ci default: 174cabdff1aSopenharmony_ci for (i = 0; i < 64; i++) 175cabdff1aSopenharmony_ci dst[i] = src[i]; 176cabdff1aSopenharmony_ci break; 177cabdff1aSopenharmony_ci } 178cabdff1aSopenharmony_ci} 179cabdff1aSopenharmony_ci 180cabdff1aSopenharmony_cistatic int dct_error(const struct algo *dct, int test, int is_idct, int speed, const int bits) 181cabdff1aSopenharmony_ci{ 182cabdff1aSopenharmony_ci void (*ref)(int16_t *block) = is_idct ? ff_ref_idct : ff_ref_fdct; 183cabdff1aSopenharmony_ci int it, i, scale; 184cabdff1aSopenharmony_ci int err_inf, v; 185cabdff1aSopenharmony_ci int64_t err2, ti, ti1, it1, err_sum = 0; 186cabdff1aSopenharmony_ci int64_t sysErr[64], sysErrMax = 0; 187cabdff1aSopenharmony_ci int64_t err2_matrix[64], err2_max = 0; 188cabdff1aSopenharmony_ci int maxout = 0; 189cabdff1aSopenharmony_ci int blockSumErrMax = 0, blockSumErr; 190cabdff1aSopenharmony_ci AVLFG prng; 191cabdff1aSopenharmony_ci const int vals=1<<bits; 192cabdff1aSopenharmony_ci double omse, ome; 193cabdff1aSopenharmony_ci int spec_err; 194cabdff1aSopenharmony_ci 195cabdff1aSopenharmony_ci av_lfg_init(&prng, 1); 196cabdff1aSopenharmony_ci 197cabdff1aSopenharmony_ci err_inf = 0; 198cabdff1aSopenharmony_ci err2 = 0; 199cabdff1aSopenharmony_ci for (i = 0; i < 64; i++) 200cabdff1aSopenharmony_ci err2_matrix[i] = sysErr[i] = 0; 201cabdff1aSopenharmony_ci for (it = 0; it < NB_ITS; it++) { 202cabdff1aSopenharmony_ci init_block(block1, test, is_idct, &prng, vals); 203cabdff1aSopenharmony_ci permute(block, block1, dct->perm_type); 204cabdff1aSopenharmony_ci 205cabdff1aSopenharmony_ci dct->func(block); 206cabdff1aSopenharmony_ci emms_c(); 207cabdff1aSopenharmony_ci 208cabdff1aSopenharmony_ci if (!strcmp(dct->name, "IJG-AAN-INT")) { 209cabdff1aSopenharmony_ci for (i = 0; i < 64; i++) { 210cabdff1aSopenharmony_ci scale = 8 * (1 << (AANSCALE_BITS + 11)) / ff_aanscales[i]; 211cabdff1aSopenharmony_ci block[i] = (block[i] * scale) >> AANSCALE_BITS; 212cabdff1aSopenharmony_ci } 213cabdff1aSopenharmony_ci } 214cabdff1aSopenharmony_ci 215cabdff1aSopenharmony_ci ref(block1); 216cabdff1aSopenharmony_ci if (!strcmp(dct->name, "PR-SSE2")) 217cabdff1aSopenharmony_ci for (i = 0; i < 64; i++) 218cabdff1aSopenharmony_ci block1[i] = av_clip(block1[i], 4-512, 1019-512); 219cabdff1aSopenharmony_ci 220cabdff1aSopenharmony_ci blockSumErr = 0; 221cabdff1aSopenharmony_ci for (i = 0; i < 64; i++) { 222cabdff1aSopenharmony_ci int err = block[i] - block1[i]; 223cabdff1aSopenharmony_ci err_sum += err; 224cabdff1aSopenharmony_ci v = abs(err); 225cabdff1aSopenharmony_ci if (v > err_inf) 226cabdff1aSopenharmony_ci err_inf = v; 227cabdff1aSopenharmony_ci err2_matrix[i] += v * v; 228cabdff1aSopenharmony_ci err2 += v * v; 229cabdff1aSopenharmony_ci sysErr[i] += block[i] - block1[i]; 230cabdff1aSopenharmony_ci blockSumErr += v; 231cabdff1aSopenharmony_ci if (abs(block[i]) > maxout) 232cabdff1aSopenharmony_ci maxout = abs(block[i]); 233cabdff1aSopenharmony_ci } 234cabdff1aSopenharmony_ci if (blockSumErrMax < blockSumErr) 235cabdff1aSopenharmony_ci blockSumErrMax = blockSumErr; 236cabdff1aSopenharmony_ci } 237cabdff1aSopenharmony_ci for (i = 0; i < 64; i++) { 238cabdff1aSopenharmony_ci sysErrMax = FFMAX(sysErrMax, FFABS(sysErr[i])); 239cabdff1aSopenharmony_ci err2_max = FFMAX(err2_max , FFABS(err2_matrix[i])); 240cabdff1aSopenharmony_ci } 241cabdff1aSopenharmony_ci 242cabdff1aSopenharmony_ci for (i = 0; i < 64; i++) { 243cabdff1aSopenharmony_ci if (i % 8 == 0) 244cabdff1aSopenharmony_ci printf("\n"); 245cabdff1aSopenharmony_ci printf("%7d ", (int) sysErr[i]); 246cabdff1aSopenharmony_ci } 247cabdff1aSopenharmony_ci printf("\n"); 248cabdff1aSopenharmony_ci 249cabdff1aSopenharmony_ci omse = (double) err2 / NB_ITS / 64; 250cabdff1aSopenharmony_ci ome = (double) err_sum / NB_ITS / 64; 251cabdff1aSopenharmony_ci 252cabdff1aSopenharmony_ci spec_err = is_idct && (err_inf > 1 || omse > 0.02 || fabs(ome) > 0.0015); 253cabdff1aSopenharmony_ci if (test < 2) 254cabdff1aSopenharmony_ci spec_err = is_idct && ((double) err2_max / NB_ITS > 0.06 || (double) sysErrMax / NB_ITS > 0.015); 255cabdff1aSopenharmony_ci 256cabdff1aSopenharmony_ci printf("%s %s: max_err=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n", 257cabdff1aSopenharmony_ci is_idct ? "IDCT" : "DCT", dct->name, err_inf, 258cabdff1aSopenharmony_ci omse, ome, (double) sysErrMax / NB_ITS, 259cabdff1aSopenharmony_ci maxout, blockSumErrMax); 260cabdff1aSopenharmony_ci 261cabdff1aSopenharmony_ci if (spec_err && !dct->nonspec) { 262cabdff1aSopenharmony_ci printf("Failed!\n"); 263cabdff1aSopenharmony_ci return 1; 264cabdff1aSopenharmony_ci } 265cabdff1aSopenharmony_ci 266cabdff1aSopenharmony_ci if (!speed) 267cabdff1aSopenharmony_ci return 0; 268cabdff1aSopenharmony_ci 269cabdff1aSopenharmony_ci /* speed test */ 270cabdff1aSopenharmony_ci 271cabdff1aSopenharmony_ci init_block(block, test, is_idct, &prng, vals); 272cabdff1aSopenharmony_ci permute(block1, block, dct->perm_type); 273cabdff1aSopenharmony_ci 274cabdff1aSopenharmony_ci ti = av_gettime_relative(); 275cabdff1aSopenharmony_ci it1 = 0; 276cabdff1aSopenharmony_ci do { 277cabdff1aSopenharmony_ci for (it = 0; it < NB_ITS_SPEED; it++) { 278cabdff1aSopenharmony_ci memcpy(block, block1, sizeof(block)); 279cabdff1aSopenharmony_ci dct->func(block); 280cabdff1aSopenharmony_ci } 281cabdff1aSopenharmony_ci emms_c(); 282cabdff1aSopenharmony_ci it1 += NB_ITS_SPEED; 283cabdff1aSopenharmony_ci ti1 = av_gettime_relative() - ti; 284cabdff1aSopenharmony_ci } while (ti1 < 1000000); 285cabdff1aSopenharmony_ci 286cabdff1aSopenharmony_ci printf("%s %s: %0.1f kdct/s\n", is_idct ? "IDCT" : "DCT", dct->name, 287cabdff1aSopenharmony_ci (double) it1 * 1000.0 / (double) ti1); 288cabdff1aSopenharmony_ci 289cabdff1aSopenharmony_ci return 0; 290cabdff1aSopenharmony_ci} 291cabdff1aSopenharmony_ci 292cabdff1aSopenharmony_ciDECLARE_ALIGNED(8, static uint8_t, img_dest)[64]; 293cabdff1aSopenharmony_ciDECLARE_ALIGNED(8, static uint8_t, img_dest1)[64]; 294cabdff1aSopenharmony_ci 295cabdff1aSopenharmony_cistatic void idct248_ref(uint8_t *dest, ptrdiff_t linesize, int16_t *block) 296cabdff1aSopenharmony_ci{ 297cabdff1aSopenharmony_ci static int init; 298cabdff1aSopenharmony_ci static double c8[8][8]; 299cabdff1aSopenharmony_ci static double c4[4][4]; 300cabdff1aSopenharmony_ci double block1[64], block2[64], block3[64]; 301cabdff1aSopenharmony_ci double s, sum, v; 302cabdff1aSopenharmony_ci int i, j, k; 303cabdff1aSopenharmony_ci 304cabdff1aSopenharmony_ci if (!init) { 305cabdff1aSopenharmony_ci init = 1; 306cabdff1aSopenharmony_ci 307cabdff1aSopenharmony_ci for (i = 0; i < 8; i++) { 308cabdff1aSopenharmony_ci sum = 0; 309cabdff1aSopenharmony_ci for (j = 0; j < 8; j++) { 310cabdff1aSopenharmony_ci s = (i == 0) ? sqrt(1.0 / 8.0) : sqrt(1.0 / 4.0); 311cabdff1aSopenharmony_ci c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0); 312cabdff1aSopenharmony_ci sum += c8[i][j] * c8[i][j]; 313cabdff1aSopenharmony_ci } 314cabdff1aSopenharmony_ci } 315cabdff1aSopenharmony_ci 316cabdff1aSopenharmony_ci for (i = 0; i < 4; i++) { 317cabdff1aSopenharmony_ci sum = 0; 318cabdff1aSopenharmony_ci for (j = 0; j < 4; j++) { 319cabdff1aSopenharmony_ci s = (i == 0) ? sqrt(1.0 / 4.0) : sqrt(1.0 / 2.0); 320cabdff1aSopenharmony_ci c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0); 321cabdff1aSopenharmony_ci sum += c4[i][j] * c4[i][j]; 322cabdff1aSopenharmony_ci } 323cabdff1aSopenharmony_ci } 324cabdff1aSopenharmony_ci } 325cabdff1aSopenharmony_ci 326cabdff1aSopenharmony_ci /* butterfly */ 327cabdff1aSopenharmony_ci s = 0.5 * sqrt(2.0); 328cabdff1aSopenharmony_ci for (i = 0; i < 4; i++) { 329cabdff1aSopenharmony_ci for (j = 0; j < 8; j++) { 330cabdff1aSopenharmony_ci block1[8 * (2 * i) + j] = 331cabdff1aSopenharmony_ci (block[8 * (2 * i) + j] + block[8 * (2 * i + 1) + j]) * s; 332cabdff1aSopenharmony_ci block1[8 * (2 * i + 1) + j] = 333cabdff1aSopenharmony_ci (block[8 * (2 * i) + j] - block[8 * (2 * i + 1) + j]) * s; 334cabdff1aSopenharmony_ci } 335cabdff1aSopenharmony_ci } 336cabdff1aSopenharmony_ci 337cabdff1aSopenharmony_ci /* idct8 on lines */ 338cabdff1aSopenharmony_ci for (i = 0; i < 8; i++) { 339cabdff1aSopenharmony_ci for (j = 0; j < 8; j++) { 340cabdff1aSopenharmony_ci sum = 0; 341cabdff1aSopenharmony_ci for (k = 0; k < 8; k++) 342cabdff1aSopenharmony_ci sum += c8[k][j] * block1[8 * i + k]; 343cabdff1aSopenharmony_ci block2[8 * i + j] = sum; 344cabdff1aSopenharmony_ci } 345cabdff1aSopenharmony_ci } 346cabdff1aSopenharmony_ci 347cabdff1aSopenharmony_ci /* idct4 */ 348cabdff1aSopenharmony_ci for (i = 0; i < 8; i++) { 349cabdff1aSopenharmony_ci for (j = 0; j < 4; j++) { 350cabdff1aSopenharmony_ci /* top */ 351cabdff1aSopenharmony_ci sum = 0; 352cabdff1aSopenharmony_ci for (k = 0; k < 4; k++) 353cabdff1aSopenharmony_ci sum += c4[k][j] * block2[8 * (2 * k) + i]; 354cabdff1aSopenharmony_ci block3[8 * (2 * j) + i] = sum; 355cabdff1aSopenharmony_ci 356cabdff1aSopenharmony_ci /* bottom */ 357cabdff1aSopenharmony_ci sum = 0; 358cabdff1aSopenharmony_ci for (k = 0; k < 4; k++) 359cabdff1aSopenharmony_ci sum += c4[k][j] * block2[8 * (2 * k + 1) + i]; 360cabdff1aSopenharmony_ci block3[8 * (2 * j + 1) + i] = sum; 361cabdff1aSopenharmony_ci } 362cabdff1aSopenharmony_ci } 363cabdff1aSopenharmony_ci 364cabdff1aSopenharmony_ci /* clamp and store the result */ 365cabdff1aSopenharmony_ci for (i = 0; i < 8; i++) { 366cabdff1aSopenharmony_ci for (j = 0; j < 8; j++) { 367cabdff1aSopenharmony_ci v = block3[8 * i + j]; 368cabdff1aSopenharmony_ci if (v < 0) v = 0; 369cabdff1aSopenharmony_ci else if (v > 255) v = 255; 370cabdff1aSopenharmony_ci dest[i * linesize + j] = (int) rint(v); 371cabdff1aSopenharmony_ci } 372cabdff1aSopenharmony_ci } 373cabdff1aSopenharmony_ci} 374cabdff1aSopenharmony_ci 375cabdff1aSopenharmony_cistatic void idct248_error(const char *name, 376cabdff1aSopenharmony_ci void (*idct248_put)(uint8_t *dest, 377cabdff1aSopenharmony_ci ptrdiff_t line_size, 378cabdff1aSopenharmony_ci int16_t *block), 379cabdff1aSopenharmony_ci int speed) 380cabdff1aSopenharmony_ci{ 381cabdff1aSopenharmony_ci int it, i, it1, ti, ti1, err_max, v; 382cabdff1aSopenharmony_ci AVLFG prng; 383cabdff1aSopenharmony_ci 384cabdff1aSopenharmony_ci av_lfg_init(&prng, 1); 385cabdff1aSopenharmony_ci 386cabdff1aSopenharmony_ci /* just one test to see if code is correct (precision is less 387cabdff1aSopenharmony_ci important here) */ 388cabdff1aSopenharmony_ci err_max = 0; 389cabdff1aSopenharmony_ci for (it = 0; it < NB_ITS; it++) { 390cabdff1aSopenharmony_ci /* XXX: use forward transform to generate values */ 391cabdff1aSopenharmony_ci for (i = 0; i < 64; i++) 392cabdff1aSopenharmony_ci block1[i] = av_lfg_get(&prng) % 256 - 128; 393cabdff1aSopenharmony_ci block1[0] += 1024; 394cabdff1aSopenharmony_ci 395cabdff1aSopenharmony_ci for (i = 0; i < 64; i++) 396cabdff1aSopenharmony_ci block[i] = block1[i]; 397cabdff1aSopenharmony_ci idct248_ref(img_dest1, 8, block); 398cabdff1aSopenharmony_ci 399cabdff1aSopenharmony_ci for (i = 0; i < 64; i++) 400cabdff1aSopenharmony_ci block[i] = block1[i]; 401cabdff1aSopenharmony_ci idct248_put(img_dest, 8, block); 402cabdff1aSopenharmony_ci 403cabdff1aSopenharmony_ci for (i = 0; i < 64; i++) { 404cabdff1aSopenharmony_ci v = abs((int) img_dest[i] - (int) img_dest1[i]); 405cabdff1aSopenharmony_ci if (v == 255) 406cabdff1aSopenharmony_ci printf("%d %d\n", img_dest[i], img_dest1[i]); 407cabdff1aSopenharmony_ci if (v > err_max) 408cabdff1aSopenharmony_ci err_max = v; 409cabdff1aSopenharmony_ci } 410cabdff1aSopenharmony_ci#if 0 411cabdff1aSopenharmony_ci printf("ref=\n"); 412cabdff1aSopenharmony_ci for(i=0;i<8;i++) { 413cabdff1aSopenharmony_ci int j; 414cabdff1aSopenharmony_ci for(j=0;j<8;j++) { 415cabdff1aSopenharmony_ci printf(" %3d", img_dest1[i*8+j]); 416cabdff1aSopenharmony_ci } 417cabdff1aSopenharmony_ci printf("\n"); 418cabdff1aSopenharmony_ci } 419cabdff1aSopenharmony_ci 420cabdff1aSopenharmony_ci printf("out=\n"); 421cabdff1aSopenharmony_ci for(i=0;i<8;i++) { 422cabdff1aSopenharmony_ci int j; 423cabdff1aSopenharmony_ci for(j=0;j<8;j++) { 424cabdff1aSopenharmony_ci printf(" %3d", img_dest[i*8+j]); 425cabdff1aSopenharmony_ci } 426cabdff1aSopenharmony_ci printf("\n"); 427cabdff1aSopenharmony_ci } 428cabdff1aSopenharmony_ci#endif 429cabdff1aSopenharmony_ci } 430cabdff1aSopenharmony_ci printf("%s %s: err_inf=%d\n", 1 ? "IDCT248" : "DCT248", name, err_max); 431cabdff1aSopenharmony_ci 432cabdff1aSopenharmony_ci if (!speed) 433cabdff1aSopenharmony_ci return; 434cabdff1aSopenharmony_ci 435cabdff1aSopenharmony_ci ti = av_gettime_relative(); 436cabdff1aSopenharmony_ci it1 = 0; 437cabdff1aSopenharmony_ci do { 438cabdff1aSopenharmony_ci for (it = 0; it < NB_ITS_SPEED; it++) { 439cabdff1aSopenharmony_ci for (i = 0; i < 64; i++) 440cabdff1aSopenharmony_ci block[i] = block1[i]; 441cabdff1aSopenharmony_ci idct248_put(img_dest, 8, block); 442cabdff1aSopenharmony_ci } 443cabdff1aSopenharmony_ci emms_c(); 444cabdff1aSopenharmony_ci it1 += NB_ITS_SPEED; 445cabdff1aSopenharmony_ci ti1 = av_gettime_relative() - ti; 446cabdff1aSopenharmony_ci } while (ti1 < 1000000); 447cabdff1aSopenharmony_ci 448cabdff1aSopenharmony_ci printf("%s %s: %0.1f kdct/s\n", 1 ? "IDCT248" : "DCT248", name, 449cabdff1aSopenharmony_ci (double) it1 * 1000.0 / (double) ti1); 450cabdff1aSopenharmony_ci} 451cabdff1aSopenharmony_ci 452cabdff1aSopenharmony_cistatic void help(void) 453cabdff1aSopenharmony_ci{ 454cabdff1aSopenharmony_ci printf("dct-test [-i] [<test-number>] [<bits>]\n" 455cabdff1aSopenharmony_ci "test-number 0 -> test with random matrixes\n" 456cabdff1aSopenharmony_ci " 1 -> test with random sparse matrixes\n" 457cabdff1aSopenharmony_ci " 2 -> do 3. test from MPEG-4 std\n" 458cabdff1aSopenharmony_ci "bits Number of time domain bits to use, 8 is default\n" 459cabdff1aSopenharmony_ci "-i test IDCT implementations\n" 460cabdff1aSopenharmony_ci "-4 test IDCT248 implementations\n" 461cabdff1aSopenharmony_ci "-t speed test\n"); 462cabdff1aSopenharmony_ci} 463cabdff1aSopenharmony_ci 464cabdff1aSopenharmony_ci#if !HAVE_GETOPT 465cabdff1aSopenharmony_ci#include "compat/getopt.c" 466cabdff1aSopenharmony_ci#endif 467cabdff1aSopenharmony_ci 468cabdff1aSopenharmony_ciint main(int argc, char **argv) 469cabdff1aSopenharmony_ci{ 470cabdff1aSopenharmony_ci int test_idct = 0, test_248_dct = 0; 471cabdff1aSopenharmony_ci int c, i; 472cabdff1aSopenharmony_ci int test = 1; 473cabdff1aSopenharmony_ci int speed = 0; 474cabdff1aSopenharmony_ci int err = 0; 475cabdff1aSopenharmony_ci int bits=8; 476cabdff1aSopenharmony_ci 477cabdff1aSopenharmony_ci ff_ref_dct_init(); 478cabdff1aSopenharmony_ci 479cabdff1aSopenharmony_ci for (;;) { 480cabdff1aSopenharmony_ci c = getopt(argc, argv, "ih4t"); 481cabdff1aSopenharmony_ci if (c == -1) 482cabdff1aSopenharmony_ci break; 483cabdff1aSopenharmony_ci switch (c) { 484cabdff1aSopenharmony_ci case 'i': 485cabdff1aSopenharmony_ci test_idct = 1; 486cabdff1aSopenharmony_ci break; 487cabdff1aSopenharmony_ci case '4': 488cabdff1aSopenharmony_ci test_248_dct = 1; 489cabdff1aSopenharmony_ci break; 490cabdff1aSopenharmony_ci case 't': 491cabdff1aSopenharmony_ci speed = 1; 492cabdff1aSopenharmony_ci break; 493cabdff1aSopenharmony_ci default: 494cabdff1aSopenharmony_ci case 'h': 495cabdff1aSopenharmony_ci help(); 496cabdff1aSopenharmony_ci return 0; 497cabdff1aSopenharmony_ci } 498cabdff1aSopenharmony_ci } 499cabdff1aSopenharmony_ci 500cabdff1aSopenharmony_ci if (optind < argc) 501cabdff1aSopenharmony_ci test = atoi(argv[optind]); 502cabdff1aSopenharmony_ci if(optind+1 < argc) bits= atoi(argv[optind+1]); 503cabdff1aSopenharmony_ci 504cabdff1aSopenharmony_ci printf("ffmpeg DCT/IDCT test\n"); 505cabdff1aSopenharmony_ci 506cabdff1aSopenharmony_ci if (test_248_dct) { 507cabdff1aSopenharmony_ci idct248_error("SIMPLE-C", ff_simple_idct248_put, speed); 508cabdff1aSopenharmony_ci } else { 509cabdff1aSopenharmony_ci const int cpu_flags = av_get_cpu_flags(); 510cabdff1aSopenharmony_ci if (test_idct) { 511cabdff1aSopenharmony_ci for (i = 0; i < FF_ARRAY_ELEMS(idct_tab); i++) 512cabdff1aSopenharmony_ci err |= dct_error(&idct_tab[i], test, test_idct, speed, bits); 513cabdff1aSopenharmony_ci 514cabdff1aSopenharmony_ci for (i = 0; idct_tab_arch[i].name; i++) 515cabdff1aSopenharmony_ci if (!(~cpu_flags & idct_tab_arch[i].cpu_flag)) 516cabdff1aSopenharmony_ci err |= dct_error(&idct_tab_arch[i], test, test_idct, speed, bits); 517cabdff1aSopenharmony_ci } 518cabdff1aSopenharmony_ci#if CONFIG_FDCTDSP 519cabdff1aSopenharmony_ci else { 520cabdff1aSopenharmony_ci for (i = 0; i < FF_ARRAY_ELEMS(fdct_tab); i++) 521cabdff1aSopenharmony_ci err |= dct_error(&fdct_tab[i], test, test_idct, speed, bits); 522cabdff1aSopenharmony_ci 523cabdff1aSopenharmony_ci for (i = 0; fdct_tab_arch[i].name; i++) 524cabdff1aSopenharmony_ci if (!(~cpu_flags & fdct_tab_arch[i].cpu_flag)) 525cabdff1aSopenharmony_ci err |= dct_error(&fdct_tab_arch[i], test, test_idct, speed, bits); 526cabdff1aSopenharmony_ci } 527cabdff1aSopenharmony_ci#endif /* CONFIG_FDCTDSP */ 528cabdff1aSopenharmony_ci } 529cabdff1aSopenharmony_ci 530cabdff1aSopenharmony_ci if (err) 531cabdff1aSopenharmony_ci printf("Error: %d.\n", err); 532cabdff1aSopenharmony_ci 533cabdff1aSopenharmony_ci return !!err; 534cabdff1aSopenharmony_ci} 535