1/* 2 * Assembly testing and benchmarking tool 3 * Copyright (c) 2015 Henrik Gramner 4 * Copyright (c) 2008 Loren Merritt 5 * 6 * This file is part of FFmpeg. 7 * 8 * FFmpeg is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License as published by 10 * the Free Software Foundation; either version 2 of the License, or 11 * (at your option) any later version. 12 * 13 * FFmpeg is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public License along 19 * with FFmpeg; if not, write to the Free Software Foundation, Inc., 20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 21 */ 22 23#include "config.h" 24#include "config_components.h" 25 26#if CONFIG_LINUX_PERF 27# ifndef _GNU_SOURCE 28# define _GNU_SOURCE // for syscall (performance monitoring API) 29# endif 30#endif 31 32#include <stdarg.h> 33#include <stdio.h> 34#include <stdlib.h> 35#include <string.h> 36#include "checkasm.h" 37#include "libavutil/common.h" 38#include "libavutil/cpu.h" 39#include "libavutil/intfloat.h" 40#include "libavutil/random_seed.h" 41 42#if HAVE_IO_H 43#include <io.h> 44#endif 45 46#if HAVE_SETCONSOLETEXTATTRIBUTE && HAVE_GETSTDHANDLE 47#include <windows.h> 48#define COLOR_RED FOREGROUND_RED 49#define COLOR_GREEN FOREGROUND_GREEN 50#define COLOR_YELLOW (FOREGROUND_RED|FOREGROUND_GREEN) 51#else 52#define COLOR_RED 1 53#define COLOR_GREEN 2 54#define COLOR_YELLOW 3 55#endif 56 57#if HAVE_UNISTD_H 58#include <unistd.h> 59#endif 60 61#if !HAVE_ISATTY 62#define isatty(fd) 1 63#endif 64 65#if ARCH_ARM && HAVE_ARMV5TE_EXTERNAL 66#include "libavutil/arm/cpu.h" 67 68void (*checkasm_checked_call)(void *func, int dummy, ...) = checkasm_checked_call_novfp; 69#endif 70 71/* List of tests to invoke */ 72static const struct { 73 const char *name; 74 void (*func)(void); 75} tests[] = { 76#if CONFIG_AVCODEC 77 #if CONFIG_AAC_DECODER 78 { "aacpsdsp", checkasm_check_aacpsdsp }, 79 { "sbrdsp", checkasm_check_sbrdsp }, 80 #endif 81 #if CONFIG_ALAC_DECODER 82 { "alacdsp", checkasm_check_alacdsp }, 83 #endif 84 #if CONFIG_AUDIODSP 85 { "audiodsp", checkasm_check_audiodsp }, 86 #endif 87 #if CONFIG_BLOCKDSP 88 { "blockdsp", checkasm_check_blockdsp }, 89 #endif 90 #if CONFIG_BSWAPDSP 91 { "bswapdsp", checkasm_check_bswapdsp }, 92 #endif 93 #if CONFIG_DCA_DECODER 94 { "synth_filter", checkasm_check_synth_filter }, 95 #endif 96 #if CONFIG_EXR_DECODER 97 { "exrdsp", checkasm_check_exrdsp }, 98 #endif 99 #if CONFIG_FLACDSP 100 { "flacdsp", checkasm_check_flacdsp }, 101 #endif 102 #if CONFIG_FMTCONVERT 103 { "fmtconvert", checkasm_check_fmtconvert }, 104 #endif 105 #if CONFIG_G722DSP 106 { "g722dsp", checkasm_check_g722dsp }, 107 #endif 108 #if CONFIG_H264DSP 109 { "h264dsp", checkasm_check_h264dsp }, 110 #endif 111 #if CONFIG_H264PRED 112 { "h264pred", checkasm_check_h264pred }, 113 #endif 114 #if CONFIG_H264QPEL 115 { "h264qpel", checkasm_check_h264qpel }, 116 #endif 117 #if CONFIG_HEVC_DECODER 118 { "hevc_add_res", checkasm_check_hevc_add_res }, 119 { "hevc_idct", checkasm_check_hevc_idct }, 120 { "hevc_pel", checkasm_check_hevc_pel }, 121 { "hevc_sao", checkasm_check_hevc_sao }, 122 #endif 123 #if CONFIG_HUFFYUV_DECODER 124 { "huffyuvdsp", checkasm_check_huffyuvdsp }, 125 #endif 126 #if CONFIG_IDCTDSP 127 { "idctdsp", checkasm_check_idctdsp }, 128 #endif 129 #if CONFIG_JPEG2000_DECODER 130 { "jpeg2000dsp", checkasm_check_jpeg2000dsp }, 131 #endif 132 #if CONFIG_HUFFYUVDSP 133 { "llviddsp", checkasm_check_llviddsp }, 134 #endif 135 #if CONFIG_LLVIDENCDSP 136 { "llviddspenc", checkasm_check_llviddspenc }, 137 #endif 138 #if CONFIG_ME_CMP 139 { "motion", checkasm_check_motion }, 140 #endif 141 #if CONFIG_OPUS_DECODER 142 { "opusdsp", checkasm_check_opusdsp }, 143 #endif 144 #if CONFIG_PIXBLOCKDSP 145 { "pixblockdsp", checkasm_check_pixblockdsp }, 146 #endif 147 #if CONFIG_UTVIDEO_DECODER 148 { "utvideodsp", checkasm_check_utvideodsp }, 149 #endif 150 #if CONFIG_V210_DECODER 151 { "v210dec", checkasm_check_v210dec }, 152 #endif 153 #if CONFIG_V210_ENCODER 154 { "v210enc", checkasm_check_v210enc }, 155 #endif 156 #if CONFIG_VC1DSP 157 { "vc1dsp", checkasm_check_vc1dsp }, 158 #endif 159 #if CONFIG_VP8DSP 160 { "vp8dsp", checkasm_check_vp8dsp }, 161 #endif 162 #if CONFIG_VP9_DECODER 163 { "vp9dsp", checkasm_check_vp9dsp }, 164 #endif 165 #if CONFIG_VIDEODSP 166 { "videodsp", checkasm_check_videodsp }, 167 #endif 168#endif 169#if CONFIG_AVFILTER 170 #if CONFIG_AFIR_FILTER 171 { "af_afir", checkasm_check_afir }, 172 #endif 173 #if CONFIG_BLEND_FILTER 174 { "vf_blend", checkasm_check_blend }, 175 #endif 176 #if CONFIG_COLORSPACE_FILTER 177 { "vf_colorspace", checkasm_check_colorspace }, 178 #endif 179 #if CONFIG_EQ_FILTER 180 { "vf_eq", checkasm_check_vf_eq }, 181 #endif 182 #if CONFIG_GBLUR_FILTER 183 { "vf_gblur", checkasm_check_vf_gblur }, 184 #endif 185 #if CONFIG_HFLIP_FILTER 186 { "vf_hflip", checkasm_check_vf_hflip }, 187 #endif 188 #if CONFIG_NLMEANS_FILTER 189 { "vf_nlmeans", checkasm_check_nlmeans }, 190 #endif 191 #if CONFIG_THRESHOLD_FILTER 192 { "vf_threshold", checkasm_check_vf_threshold }, 193 #endif 194#endif 195#if CONFIG_SWSCALE 196 { "sw_gbrp", checkasm_check_sw_gbrp }, 197 { "sw_rgb", checkasm_check_sw_rgb }, 198 { "sw_scale", checkasm_check_sw_scale }, 199#endif 200#if CONFIG_AVUTIL 201 { "fixed_dsp", checkasm_check_fixed_dsp }, 202 { "float_dsp", checkasm_check_float_dsp }, 203 { "av_tx", checkasm_check_av_tx }, 204#endif 205 { NULL } 206}; 207 208/* List of cpu flags to check */ 209static const struct { 210 const char *name; 211 const char *suffix; 212 int flag; 213} cpus[] = { 214#if ARCH_AARCH64 215 { "ARMV8", "armv8", AV_CPU_FLAG_ARMV8 }, 216 { "NEON", "neon", AV_CPU_FLAG_NEON }, 217#elif ARCH_ARM 218 { "ARMV5TE", "armv5te", AV_CPU_FLAG_ARMV5TE }, 219 { "ARMV6", "armv6", AV_CPU_FLAG_ARMV6 }, 220 { "ARMV6T2", "armv6t2", AV_CPU_FLAG_ARMV6T2 }, 221 { "VFP", "vfp", AV_CPU_FLAG_VFP }, 222 { "VFP_VM", "vfp_vm", AV_CPU_FLAG_VFP_VM }, 223 { "VFPV3", "vfp3", AV_CPU_FLAG_VFPV3 }, 224 { "NEON", "neon", AV_CPU_FLAG_NEON }, 225#elif ARCH_PPC 226 { "ALTIVEC", "altivec", AV_CPU_FLAG_ALTIVEC }, 227 { "VSX", "vsx", AV_CPU_FLAG_VSX }, 228 { "POWER8", "power8", AV_CPU_FLAG_POWER8 }, 229#elif ARCH_MIPS 230 { "MMI", "mmi", AV_CPU_FLAG_MMI }, 231 { "MSA", "msa", AV_CPU_FLAG_MSA }, 232#elif ARCH_X86 233 { "MMX", "mmx", AV_CPU_FLAG_MMX|AV_CPU_FLAG_CMOV }, 234 { "MMXEXT", "mmxext", AV_CPU_FLAG_MMXEXT }, 235 { "3DNOW", "3dnow", AV_CPU_FLAG_3DNOW }, 236 { "3DNOWEXT", "3dnowext", AV_CPU_FLAG_3DNOWEXT }, 237 { "SSE", "sse", AV_CPU_FLAG_SSE }, 238 { "SSE2", "sse2", AV_CPU_FLAG_SSE2|AV_CPU_FLAG_SSE2SLOW }, 239 { "SSE3", "sse3", AV_CPU_FLAG_SSE3|AV_CPU_FLAG_SSE3SLOW }, 240 { "SSSE3", "ssse3", AV_CPU_FLAG_SSSE3|AV_CPU_FLAG_ATOM }, 241 { "SSE4.1", "sse4", AV_CPU_FLAG_SSE4 }, 242 { "SSE4.2", "sse42", AV_CPU_FLAG_SSE42 }, 243 { "AES-NI", "aesni", AV_CPU_FLAG_AESNI }, 244 { "AVX", "avx", AV_CPU_FLAG_AVX }, 245 { "XOP", "xop", AV_CPU_FLAG_XOP }, 246 { "FMA3", "fma3", AV_CPU_FLAG_FMA3 }, 247 { "FMA4", "fma4", AV_CPU_FLAG_FMA4 }, 248 { "AVX2", "avx2", AV_CPU_FLAG_AVX2 }, 249 { "AVX-512", "avx512", AV_CPU_FLAG_AVX512 }, 250 { "AVX-512ICL", "avx512icl", AV_CPU_FLAG_AVX512ICL }, 251#elif ARCH_LOONGARCH 252 { "LSX", "lsx", AV_CPU_FLAG_LSX }, 253 { "LASX", "lasx", AV_CPU_FLAG_LASX }, 254#endif 255 { NULL } 256}; 257 258typedef struct CheckasmFuncVersion { 259 struct CheckasmFuncVersion *next; 260 void *func; 261 int ok; 262 int cpu; 263 CheckasmPerf perf; 264} CheckasmFuncVersion; 265 266/* Binary search tree node */ 267typedef struct CheckasmFunc { 268 struct CheckasmFunc *child[2]; 269 CheckasmFuncVersion versions; 270 uint8_t color; /* 0 = red, 1 = black */ 271 char name[1]; 272} CheckasmFunc; 273 274/* Internal state */ 275static struct { 276 CheckasmFunc *funcs; 277 CheckasmFunc *current_func; 278 CheckasmFuncVersion *current_func_ver; 279 const char *current_test_name; 280 const char *bench_pattern; 281 int bench_pattern_len; 282 int num_checked; 283 int num_failed; 284 285 /* perf */ 286 int nop_time; 287 int sysfd; 288 289 int cpu_flag; 290 const char *cpu_flag_name; 291 const char *test_name; 292 int verbose; 293} state; 294 295/* PRNG state */ 296AVLFG checkasm_lfg; 297 298/* float compare support code */ 299static int is_negative(union av_intfloat32 u) 300{ 301 return u.i >> 31; 302} 303 304int float_near_ulp(float a, float b, unsigned max_ulp) 305{ 306 union av_intfloat32 x, y; 307 308 x.f = a; 309 y.f = b; 310 311 if (is_negative(x) != is_negative(y)) { 312 // handle -0.0 == +0.0 313 return a == b; 314 } 315 316 if (llabs((int64_t)x.i - y.i) <= max_ulp) 317 return 1; 318 319 return 0; 320} 321 322int float_near_ulp_array(const float *a, const float *b, unsigned max_ulp, 323 unsigned len) 324{ 325 unsigned i; 326 327 for (i = 0; i < len; i++) { 328 if (!float_near_ulp(a[i], b[i], max_ulp)) 329 return 0; 330 } 331 return 1; 332} 333 334int float_near_abs_eps(float a, float b, float eps) 335{ 336 float abs_diff = fabsf(a - b); 337 if (abs_diff < eps) 338 return 1; 339 340 fprintf(stderr, "test failed comparing %g with %g (abs diff=%g with EPS=%g)\n", a, b, abs_diff, eps); 341 342 return 0; 343} 344 345int float_near_abs_eps_array(const float *a, const float *b, float eps, 346 unsigned len) 347{ 348 unsigned i; 349 350 for (i = 0; i < len; i++) { 351 if (!float_near_abs_eps(a[i], b[i], eps)) 352 return 0; 353 } 354 return 1; 355} 356 357int float_near_abs_eps_ulp(float a, float b, float eps, unsigned max_ulp) 358{ 359 return float_near_ulp(a, b, max_ulp) || float_near_abs_eps(a, b, eps); 360} 361 362int float_near_abs_eps_array_ulp(const float *a, const float *b, float eps, 363 unsigned max_ulp, unsigned len) 364{ 365 unsigned i; 366 367 for (i = 0; i < len; i++) { 368 if (!float_near_abs_eps_ulp(a[i], b[i], eps, max_ulp)) 369 return 0; 370 } 371 return 1; 372} 373 374int double_near_abs_eps(double a, double b, double eps) 375{ 376 double abs_diff = fabs(a - b); 377 378 return abs_diff < eps; 379} 380 381int double_near_abs_eps_array(const double *a, const double *b, double eps, 382 unsigned len) 383{ 384 unsigned i; 385 386 for (i = 0; i < len; i++) { 387 if (!double_near_abs_eps(a[i], b[i], eps)) 388 return 0; 389 } 390 return 1; 391} 392 393/* Print colored text to stderr if the terminal supports it */ 394static void color_printf(int color, const char *fmt, ...) 395{ 396 static int use_color = -1; 397 va_list arg; 398 399#if HAVE_SETCONSOLETEXTATTRIBUTE && HAVE_GETSTDHANDLE 400 static HANDLE con; 401 static WORD org_attributes; 402 403 if (use_color < 0) { 404 CONSOLE_SCREEN_BUFFER_INFO con_info; 405 con = GetStdHandle(STD_ERROR_HANDLE); 406 if (con && con != INVALID_HANDLE_VALUE && GetConsoleScreenBufferInfo(con, &con_info)) { 407 org_attributes = con_info.wAttributes; 408 use_color = 1; 409 } else 410 use_color = 0; 411 } 412 if (use_color) 413 SetConsoleTextAttribute(con, (org_attributes & 0xfff0) | (color & 0x0f)); 414#else 415 if (use_color < 0) { 416 const char *term = getenv("TERM"); 417 use_color = term && strcmp(term, "dumb") && isatty(2); 418 } 419 if (use_color) 420 fprintf(stderr, "\x1b[%d;3%dm", (color & 0x08) >> 3, color & 0x07); 421#endif 422 423 va_start(arg, fmt); 424 vfprintf(stderr, fmt, arg); 425 va_end(arg); 426 427 if (use_color) { 428#if HAVE_SETCONSOLETEXTATTRIBUTE && HAVE_GETSTDHANDLE 429 SetConsoleTextAttribute(con, org_attributes); 430#else 431 fprintf(stderr, "\x1b[0m"); 432#endif 433 } 434} 435 436/* Deallocate a tree */ 437static void destroy_func_tree(CheckasmFunc *f) 438{ 439 if (f) { 440 CheckasmFuncVersion *v = f->versions.next; 441 while (v) { 442 CheckasmFuncVersion *next = v->next; 443 free(v); 444 v = next; 445 } 446 447 destroy_func_tree(f->child[0]); 448 destroy_func_tree(f->child[1]); 449 free(f); 450 } 451} 452 453/* Allocate a zero-initialized block, clean up and exit on failure */ 454static void *checkasm_malloc(size_t size) 455{ 456 void *ptr = calloc(1, size); 457 if (!ptr) { 458 fprintf(stderr, "checkasm: malloc failed\n"); 459 destroy_func_tree(state.funcs); 460 exit(1); 461 } 462 return ptr; 463} 464 465/* Get the suffix of the specified cpu flag */ 466static const char *cpu_suffix(int cpu) 467{ 468 int i = FF_ARRAY_ELEMS(cpus); 469 470 while (--i >= 0) 471 if (cpu & cpus[i].flag) 472 return cpus[i].suffix; 473 474 return "c"; 475} 476 477static int cmp_nop(const void *a, const void *b) 478{ 479 return *(const uint16_t*)a - *(const uint16_t*)b; 480} 481 482/* Measure the overhead of the timing code (in decicycles) */ 483static int measure_nop_time(void) 484{ 485 uint16_t nops[10000]; 486 int i, nop_sum = 0; 487 av_unused const int sysfd = state.sysfd; 488 489 uint64_t t = 0; 490 for (i = 0; i < 10000; i++) { 491 PERF_START(t); 492 PERF_STOP(t); 493 nops[i] = t; 494 } 495 496 qsort(nops, 10000, sizeof(uint16_t), cmp_nop); 497 for (i = 2500; i < 7500; i++) 498 nop_sum += nops[i]; 499 500 return nop_sum / 500; 501} 502 503/* Print benchmark results */ 504static void print_benchs(CheckasmFunc *f) 505{ 506 if (f) { 507 print_benchs(f->child[0]); 508 509 /* Only print functions with at least one assembly version */ 510 if (f->versions.cpu || f->versions.next) { 511 CheckasmFuncVersion *v = &f->versions; 512 do { 513 CheckasmPerf *p = &v->perf; 514 if (p->iterations) { 515 int decicycles = (10*p->cycles/p->iterations - state.nop_time) / 4; 516 printf("%s_%s: %d.%d\n", f->name, cpu_suffix(v->cpu), decicycles/10, decicycles%10); 517 } 518 } while ((v = v->next)); 519 } 520 521 print_benchs(f->child[1]); 522 } 523} 524 525/* ASCIIbetical sort except preserving natural order for numbers */ 526static int cmp_func_names(const char *a, const char *b) 527{ 528 const char *start = a; 529 int ascii_diff, digit_diff; 530 531 for (; !(ascii_diff = *(const unsigned char*)a - *(const unsigned char*)b) && *a; a++, b++); 532 for (; av_isdigit(*a) && av_isdigit(*b); a++, b++); 533 534 if (a > start && av_isdigit(a[-1]) && (digit_diff = av_isdigit(*a) - av_isdigit(*b))) 535 return digit_diff; 536 537 return ascii_diff; 538} 539 540/* Perform a tree rotation in the specified direction and return the new root */ 541static CheckasmFunc *rotate_tree(CheckasmFunc *f, int dir) 542{ 543 CheckasmFunc *r = f->child[dir^1]; 544 f->child[dir^1] = r->child[dir]; 545 r->child[dir] = f; 546 r->color = f->color; 547 f->color = 0; 548 return r; 549} 550 551#define is_red(f) ((f) && !(f)->color) 552 553/* Balance a left-leaning red-black tree at the specified node */ 554static void balance_tree(CheckasmFunc **root) 555{ 556 CheckasmFunc *f = *root; 557 558 if (is_red(f->child[0]) && is_red(f->child[1])) { 559 f->color ^= 1; 560 f->child[0]->color = f->child[1]->color = 1; 561 } 562 563 if (!is_red(f->child[0]) && is_red(f->child[1])) 564 *root = rotate_tree(f, 0); /* Rotate left */ 565 else if (is_red(f->child[0]) && is_red(f->child[0]->child[0])) 566 *root = rotate_tree(f, 1); /* Rotate right */ 567} 568 569/* Get a node with the specified name, creating it if it doesn't exist */ 570static CheckasmFunc *get_func(CheckasmFunc **root, const char *name) 571{ 572 CheckasmFunc *f = *root; 573 574 if (f) { 575 /* Search the tree for a matching node */ 576 int cmp = cmp_func_names(name, f->name); 577 if (cmp) { 578 f = get_func(&f->child[cmp > 0], name); 579 580 /* Rebalance the tree on the way up if a new node was inserted */ 581 if (!f->versions.func) 582 balance_tree(root); 583 } 584 } else { 585 /* Allocate and insert a new node into the tree */ 586 int name_length = strlen(name); 587 f = *root = checkasm_malloc(sizeof(CheckasmFunc) + name_length); 588 memcpy(f->name, name, name_length + 1); 589 } 590 591 return f; 592} 593 594/* Perform tests and benchmarks for the specified cpu flag if supported by the host */ 595static void check_cpu_flag(const char *name, int flag) 596{ 597 int old_cpu_flag = state.cpu_flag; 598 599 flag |= old_cpu_flag; 600 av_force_cpu_flags(-1); 601 state.cpu_flag = flag & av_get_cpu_flags(); 602 av_force_cpu_flags(state.cpu_flag); 603 604 if (!flag || state.cpu_flag != old_cpu_flag) { 605 int i; 606 607 state.cpu_flag_name = name; 608 for (i = 0; tests[i].func; i++) { 609 if (state.test_name && strcmp(tests[i].name, state.test_name)) 610 continue; 611 state.current_test_name = tests[i].name; 612 tests[i].func(); 613 } 614 } 615} 616 617/* Print the name of the current CPU flag, but only do it once */ 618static void print_cpu_name(void) 619{ 620 if (state.cpu_flag_name) { 621 color_printf(COLOR_YELLOW, "%s:\n", state.cpu_flag_name); 622 state.cpu_flag_name = NULL; 623 } 624} 625 626#if CONFIG_LINUX_PERF 627static int bench_init_linux(void) 628{ 629 struct perf_event_attr attr = { 630 .type = PERF_TYPE_HARDWARE, 631 .size = sizeof(struct perf_event_attr), 632 .config = PERF_COUNT_HW_CPU_CYCLES, 633 .disabled = 1, // start counting only on demand 634 .exclude_kernel = 1, 635 .exclude_hv = 1, 636 }; 637 638 printf("benchmarking with Linux Perf Monitoring API\n"); 639 640 state.sysfd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0); 641 if (state.sysfd == -1) { 642 perror("syscall"); 643 return -1; 644 } 645 return 0; 646} 647#elif CONFIG_MACOS_KPERF 648static int bench_init_kperf(void) 649{ 650 ff_kperf_init(); 651 return 0; 652} 653#else 654static int bench_init_ffmpeg(void) 655{ 656#ifdef AV_READ_TIME 657 printf("benchmarking with native FFmpeg timers\n"); 658 return 0; 659#else 660 fprintf(stderr, "checkasm: --bench is not supported on your system\n"); 661 return -1; 662#endif 663} 664#endif 665 666static int bench_init(void) 667{ 668#if CONFIG_LINUX_PERF 669 int ret = bench_init_linux(); 670#elif CONFIG_MACOS_KPERF 671 int ret = bench_init_kperf(); 672#else 673 int ret = bench_init_ffmpeg(); 674#endif 675 if (ret < 0) 676 return ret; 677 678 state.nop_time = measure_nop_time(); 679 printf("nop: %d.%d\n", state.nop_time/10, state.nop_time%10); 680 return 0; 681} 682 683static void bench_uninit(void) 684{ 685#if CONFIG_LINUX_PERF 686 if (state.sysfd > 0) 687 close(state.sysfd); 688#endif 689} 690 691int main(int argc, char *argv[]) 692{ 693 unsigned int seed = av_get_random_seed(); 694 int i, ret = 0; 695 696#if ARCH_ARM && HAVE_ARMV5TE_EXTERNAL 697 if (have_vfp(av_get_cpu_flags()) || have_neon(av_get_cpu_flags())) 698 checkasm_checked_call = checkasm_checked_call_vfp; 699#endif 700 701 if (!tests[0].func || !cpus[0].flag) { 702 fprintf(stderr, "checkasm: no tests to perform\n"); 703 return 0; 704 } 705 706 while (argc > 1) { 707 if (!strncmp(argv[1], "--bench", 7)) { 708 if (bench_init() < 0) 709 return 1; 710 if (argv[1][7] == '=') { 711 state.bench_pattern = argv[1] + 8; 712 state.bench_pattern_len = strlen(state.bench_pattern); 713 } else 714 state.bench_pattern = ""; 715 } else if (!strncmp(argv[1], "--test=", 7)) { 716 state.test_name = argv[1] + 7; 717 } else if (!strcmp(argv[1], "--verbose") || !strcmp(argv[1], "-v")) { 718 state.verbose = 1; 719 } else { 720 seed = strtoul(argv[1], NULL, 10); 721 } 722 723 argc--; 724 argv++; 725 } 726 727 fprintf(stderr, "checkasm: using random seed %u\n", seed); 728 av_lfg_init(&checkasm_lfg, seed); 729 730 check_cpu_flag(NULL, 0); 731 for (i = 0; cpus[i].flag; i++) 732 check_cpu_flag(cpus[i].name, cpus[i].flag); 733 734 if (state.num_failed) { 735 fprintf(stderr, "checkasm: %d of %d tests have failed\n", state.num_failed, state.num_checked); 736 ret = 1; 737 } else { 738 fprintf(stderr, "checkasm: all %d tests passed\n", state.num_checked); 739 if (state.bench_pattern) { 740 print_benchs(state.funcs); 741 } 742 } 743 744 destroy_func_tree(state.funcs); 745 bench_uninit(); 746 return ret; 747} 748 749/* Decide whether or not the specified function needs to be tested and 750 * allocate/initialize data structures if needed. Returns a pointer to a 751 * reference function if the function should be tested, otherwise NULL */ 752void *checkasm_check_func(void *func, const char *name, ...) 753{ 754 char name_buf[256]; 755 void *ref = func; 756 CheckasmFuncVersion *v; 757 int name_length; 758 va_list arg; 759 760 va_start(arg, name); 761 name_length = vsnprintf(name_buf, sizeof(name_buf), name, arg); 762 va_end(arg); 763 764 if (!func || name_length <= 0 || name_length >= sizeof(name_buf)) 765 return NULL; 766 767 state.current_func = get_func(&state.funcs, name_buf); 768 state.funcs->color = 1; 769 v = &state.current_func->versions; 770 771 if (v->func) { 772 CheckasmFuncVersion *prev; 773 do { 774 /* Only test functions that haven't already been tested */ 775 if (v->func == func) 776 return NULL; 777 778 if (v->ok) 779 ref = v->func; 780 781 prev = v; 782 } while ((v = v->next)); 783 784 v = prev->next = checkasm_malloc(sizeof(CheckasmFuncVersion)); 785 } 786 787 v->func = func; 788 v->ok = 1; 789 v->cpu = state.cpu_flag; 790 state.current_func_ver = v; 791 792 if (state.cpu_flag) 793 state.num_checked++; 794 795 return ref; 796} 797 798/* Decide whether or not the current function needs to be benchmarked */ 799int checkasm_bench_func(void) 800{ 801 return !state.num_failed && state.bench_pattern && 802 !strncmp(state.current_func->name, state.bench_pattern, state.bench_pattern_len); 803} 804 805/* Indicate that the current test has failed */ 806void checkasm_fail_func(const char *msg, ...) 807{ 808 if (state.current_func_ver->cpu && state.current_func_ver->ok) { 809 va_list arg; 810 811 print_cpu_name(); 812 fprintf(stderr, " %s_%s (", state.current_func->name, cpu_suffix(state.current_func_ver->cpu)); 813 va_start(arg, msg); 814 vfprintf(stderr, msg, arg); 815 va_end(arg); 816 fprintf(stderr, ")\n"); 817 818 state.current_func_ver->ok = 0; 819 state.num_failed++; 820 } 821} 822 823/* Get the benchmark context of the current function */ 824CheckasmPerf *checkasm_get_perf_context(void) 825{ 826 CheckasmPerf *perf = &state.current_func_ver->perf; 827 memset(perf, 0, sizeof(*perf)); 828 perf->sysfd = state.sysfd; 829 return perf; 830} 831 832/* Print the outcome of all tests performed since the last time this function was called */ 833void checkasm_report(const char *name, ...) 834{ 835 static int prev_checked, prev_failed, max_length; 836 837 if (state.num_checked > prev_checked) { 838 int pad_length = max_length + 4; 839 va_list arg; 840 841 print_cpu_name(); 842 pad_length -= fprintf(stderr, " - %s.", state.current_test_name); 843 va_start(arg, name); 844 pad_length -= vfprintf(stderr, name, arg); 845 va_end(arg); 846 fprintf(stderr, "%*c", FFMAX(pad_length, 0) + 2, '['); 847 848 if (state.num_failed == prev_failed) 849 color_printf(COLOR_GREEN, "OK"); 850 else 851 color_printf(COLOR_RED, "FAILED"); 852 fprintf(stderr, "]\n"); 853 854 prev_checked = state.num_checked; 855 prev_failed = state.num_failed; 856 } else if (!state.cpu_flag) { 857 /* Calculate the amount of padding required to make the output vertically aligned */ 858 int length = strlen(state.current_test_name); 859 va_list arg; 860 861 va_start(arg, name); 862 length += vsnprintf(NULL, 0, name, arg); 863 va_end(arg); 864 865 if (length > max_length) 866 max_length = length; 867 } 868} 869 870#define DEF_CHECKASM_CHECK_FUNC(type, fmt) \ 871int checkasm_check_##type(const char *const file, const int line, \ 872 const type *buf1, ptrdiff_t stride1, \ 873 const type *buf2, ptrdiff_t stride2, \ 874 const int w, int h, const char *const name) \ 875{ \ 876 int y = 0; \ 877 stride1 /= sizeof(*buf1); \ 878 stride2 /= sizeof(*buf2); \ 879 for (y = 0; y < h; y++) \ 880 if (memcmp(&buf1[y*stride1], &buf2[y*stride2], w*sizeof(*buf1))) \ 881 break; \ 882 if (y == h) \ 883 return 0; \ 884 checkasm_fail_func("%s:%d", file, line); \ 885 if (!state.verbose) \ 886 return 1; \ 887 fprintf(stderr, "%s:\n", name); \ 888 while (h--) { \ 889 for (int x = 0; x < w; x++) \ 890 fprintf(stderr, " " fmt, buf1[x]); \ 891 fprintf(stderr, " "); \ 892 for (int x = 0; x < w; x++) \ 893 fprintf(stderr, " " fmt, buf2[x]); \ 894 fprintf(stderr, " "); \ 895 for (int x = 0; x < w; x++) \ 896 fprintf(stderr, "%c", buf1[x] != buf2[x] ? 'x' : '.'); \ 897 buf1 += stride1; \ 898 buf2 += stride2; \ 899 fprintf(stderr, "\n"); \ 900 } \ 901 return 1; \ 902} 903 904DEF_CHECKASM_CHECK_FUNC(uint8_t, "%02x") 905DEF_CHECKASM_CHECK_FUNC(uint16_t, "%04x") 906DEF_CHECKASM_CHECK_FUNC(int16_t, "%6d") 907DEF_CHECKASM_CHECK_FUNC(int32_t, "%9d") 908