1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * Assembly testing and benchmarking tool 3cabdff1aSopenharmony_ci * Copyright (c) 2015 Henrik Gramner 4cabdff1aSopenharmony_ci * Copyright (c) 2008 Loren Merritt 5cabdff1aSopenharmony_ci * 6cabdff1aSopenharmony_ci * This file is part of FFmpeg. 7cabdff1aSopenharmony_ci * 8cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or modify 9cabdff1aSopenharmony_ci * it under the terms of the GNU General Public License as published by 10cabdff1aSopenharmony_ci * the Free Software Foundation; either version 2 of the License, or 11cabdff1aSopenharmony_ci * (at your option) any later version. 12cabdff1aSopenharmony_ci * 13cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 14cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 15cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16cabdff1aSopenharmony_ci * GNU General Public License for more details. 17cabdff1aSopenharmony_ci * 18cabdff1aSopenharmony_ci * You should have received a copy of the GNU General Public License along 19cabdff1aSopenharmony_ci * with FFmpeg; if not, write to the Free Software Foundation, Inc., 20cabdff1aSopenharmony_ci * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 21cabdff1aSopenharmony_ci */ 22cabdff1aSopenharmony_ci 23cabdff1aSopenharmony_ci#include "config.h" 24cabdff1aSopenharmony_ci#include "config_components.h" 25cabdff1aSopenharmony_ci 26cabdff1aSopenharmony_ci#if CONFIG_LINUX_PERF 27cabdff1aSopenharmony_ci# ifndef _GNU_SOURCE 28cabdff1aSopenharmony_ci# define _GNU_SOURCE // for syscall (performance monitoring API) 29cabdff1aSopenharmony_ci# endif 30cabdff1aSopenharmony_ci#endif 31cabdff1aSopenharmony_ci 32cabdff1aSopenharmony_ci#include <stdarg.h> 33cabdff1aSopenharmony_ci#include <stdio.h> 34cabdff1aSopenharmony_ci#include <stdlib.h> 35cabdff1aSopenharmony_ci#include <string.h> 36cabdff1aSopenharmony_ci#include "checkasm.h" 37cabdff1aSopenharmony_ci#include "libavutil/common.h" 38cabdff1aSopenharmony_ci#include "libavutil/cpu.h" 39cabdff1aSopenharmony_ci#include "libavutil/intfloat.h" 40cabdff1aSopenharmony_ci#include "libavutil/random_seed.h" 41cabdff1aSopenharmony_ci 42cabdff1aSopenharmony_ci#if HAVE_IO_H 43cabdff1aSopenharmony_ci#include <io.h> 44cabdff1aSopenharmony_ci#endif 45cabdff1aSopenharmony_ci 46cabdff1aSopenharmony_ci#if HAVE_SETCONSOLETEXTATTRIBUTE && HAVE_GETSTDHANDLE 47cabdff1aSopenharmony_ci#include <windows.h> 48cabdff1aSopenharmony_ci#define COLOR_RED FOREGROUND_RED 49cabdff1aSopenharmony_ci#define COLOR_GREEN FOREGROUND_GREEN 50cabdff1aSopenharmony_ci#define COLOR_YELLOW (FOREGROUND_RED|FOREGROUND_GREEN) 51cabdff1aSopenharmony_ci#else 52cabdff1aSopenharmony_ci#define COLOR_RED 1 53cabdff1aSopenharmony_ci#define COLOR_GREEN 2 54cabdff1aSopenharmony_ci#define COLOR_YELLOW 3 55cabdff1aSopenharmony_ci#endif 56cabdff1aSopenharmony_ci 57cabdff1aSopenharmony_ci#if HAVE_UNISTD_H 58cabdff1aSopenharmony_ci#include <unistd.h> 59cabdff1aSopenharmony_ci#endif 60cabdff1aSopenharmony_ci 61cabdff1aSopenharmony_ci#if !HAVE_ISATTY 62cabdff1aSopenharmony_ci#define isatty(fd) 1 63cabdff1aSopenharmony_ci#endif 64cabdff1aSopenharmony_ci 65cabdff1aSopenharmony_ci#if ARCH_ARM && HAVE_ARMV5TE_EXTERNAL 66cabdff1aSopenharmony_ci#include "libavutil/arm/cpu.h" 67cabdff1aSopenharmony_ci 68cabdff1aSopenharmony_civoid (*checkasm_checked_call)(void *func, int dummy, ...) = checkasm_checked_call_novfp; 69cabdff1aSopenharmony_ci#endif 70cabdff1aSopenharmony_ci 71cabdff1aSopenharmony_ci/* List of tests to invoke */ 72cabdff1aSopenharmony_cistatic const struct { 73cabdff1aSopenharmony_ci const char *name; 74cabdff1aSopenharmony_ci void (*func)(void); 75cabdff1aSopenharmony_ci} tests[] = { 76cabdff1aSopenharmony_ci#if CONFIG_AVCODEC 77cabdff1aSopenharmony_ci #if CONFIG_AAC_DECODER 78cabdff1aSopenharmony_ci { "aacpsdsp", checkasm_check_aacpsdsp }, 79cabdff1aSopenharmony_ci { "sbrdsp", checkasm_check_sbrdsp }, 80cabdff1aSopenharmony_ci #endif 81cabdff1aSopenharmony_ci #if CONFIG_ALAC_DECODER 82cabdff1aSopenharmony_ci { "alacdsp", checkasm_check_alacdsp }, 83cabdff1aSopenharmony_ci #endif 84cabdff1aSopenharmony_ci #if CONFIG_AUDIODSP 85cabdff1aSopenharmony_ci { "audiodsp", checkasm_check_audiodsp }, 86cabdff1aSopenharmony_ci #endif 87cabdff1aSopenharmony_ci #if CONFIG_BLOCKDSP 88cabdff1aSopenharmony_ci { "blockdsp", checkasm_check_blockdsp }, 89cabdff1aSopenharmony_ci #endif 90cabdff1aSopenharmony_ci #if CONFIG_BSWAPDSP 91cabdff1aSopenharmony_ci { "bswapdsp", checkasm_check_bswapdsp }, 92cabdff1aSopenharmony_ci #endif 93cabdff1aSopenharmony_ci #if CONFIG_DCA_DECODER 94cabdff1aSopenharmony_ci { "synth_filter", checkasm_check_synth_filter }, 95cabdff1aSopenharmony_ci #endif 96cabdff1aSopenharmony_ci #if CONFIG_EXR_DECODER 97cabdff1aSopenharmony_ci { "exrdsp", checkasm_check_exrdsp }, 98cabdff1aSopenharmony_ci #endif 99cabdff1aSopenharmony_ci #if CONFIG_FLACDSP 100cabdff1aSopenharmony_ci { "flacdsp", checkasm_check_flacdsp }, 101cabdff1aSopenharmony_ci #endif 102cabdff1aSopenharmony_ci #if CONFIG_FMTCONVERT 103cabdff1aSopenharmony_ci { "fmtconvert", checkasm_check_fmtconvert }, 104cabdff1aSopenharmony_ci #endif 105cabdff1aSopenharmony_ci #if CONFIG_G722DSP 106cabdff1aSopenharmony_ci { "g722dsp", checkasm_check_g722dsp }, 107cabdff1aSopenharmony_ci #endif 108cabdff1aSopenharmony_ci #if CONFIG_H264DSP 109cabdff1aSopenharmony_ci { "h264dsp", checkasm_check_h264dsp }, 110cabdff1aSopenharmony_ci #endif 111cabdff1aSopenharmony_ci #if CONFIG_H264PRED 112cabdff1aSopenharmony_ci { "h264pred", checkasm_check_h264pred }, 113cabdff1aSopenharmony_ci #endif 114cabdff1aSopenharmony_ci #if CONFIG_H264QPEL 115cabdff1aSopenharmony_ci { "h264qpel", checkasm_check_h264qpel }, 116cabdff1aSopenharmony_ci #endif 117cabdff1aSopenharmony_ci #if CONFIG_HEVC_DECODER 118cabdff1aSopenharmony_ci { "hevc_add_res", checkasm_check_hevc_add_res }, 119cabdff1aSopenharmony_ci { "hevc_idct", checkasm_check_hevc_idct }, 120cabdff1aSopenharmony_ci { "hevc_pel", checkasm_check_hevc_pel }, 121cabdff1aSopenharmony_ci { "hevc_sao", checkasm_check_hevc_sao }, 122cabdff1aSopenharmony_ci #endif 123cabdff1aSopenharmony_ci #if CONFIG_HUFFYUV_DECODER 124cabdff1aSopenharmony_ci { "huffyuvdsp", checkasm_check_huffyuvdsp }, 125cabdff1aSopenharmony_ci #endif 126cabdff1aSopenharmony_ci #if CONFIG_IDCTDSP 127cabdff1aSopenharmony_ci { "idctdsp", checkasm_check_idctdsp }, 128cabdff1aSopenharmony_ci #endif 129cabdff1aSopenharmony_ci #if CONFIG_JPEG2000_DECODER 130cabdff1aSopenharmony_ci { "jpeg2000dsp", checkasm_check_jpeg2000dsp }, 131cabdff1aSopenharmony_ci #endif 132cabdff1aSopenharmony_ci #if CONFIG_HUFFYUVDSP 133cabdff1aSopenharmony_ci { "llviddsp", checkasm_check_llviddsp }, 134cabdff1aSopenharmony_ci #endif 135cabdff1aSopenharmony_ci #if CONFIG_LLVIDENCDSP 136cabdff1aSopenharmony_ci { "llviddspenc", checkasm_check_llviddspenc }, 137cabdff1aSopenharmony_ci #endif 138cabdff1aSopenharmony_ci #if CONFIG_ME_CMP 139cabdff1aSopenharmony_ci { "motion", checkasm_check_motion }, 140cabdff1aSopenharmony_ci #endif 141cabdff1aSopenharmony_ci #if CONFIG_OPUS_DECODER 142cabdff1aSopenharmony_ci { "opusdsp", checkasm_check_opusdsp }, 143cabdff1aSopenharmony_ci #endif 144cabdff1aSopenharmony_ci #if CONFIG_PIXBLOCKDSP 145cabdff1aSopenharmony_ci { "pixblockdsp", checkasm_check_pixblockdsp }, 146cabdff1aSopenharmony_ci #endif 147cabdff1aSopenharmony_ci #if CONFIG_UTVIDEO_DECODER 148cabdff1aSopenharmony_ci { "utvideodsp", checkasm_check_utvideodsp }, 149cabdff1aSopenharmony_ci #endif 150cabdff1aSopenharmony_ci #if CONFIG_V210_DECODER 151cabdff1aSopenharmony_ci { "v210dec", checkasm_check_v210dec }, 152cabdff1aSopenharmony_ci #endif 153cabdff1aSopenharmony_ci #if CONFIG_V210_ENCODER 154cabdff1aSopenharmony_ci { "v210enc", checkasm_check_v210enc }, 155cabdff1aSopenharmony_ci #endif 156cabdff1aSopenharmony_ci #if CONFIG_VC1DSP 157cabdff1aSopenharmony_ci { "vc1dsp", checkasm_check_vc1dsp }, 158cabdff1aSopenharmony_ci #endif 159cabdff1aSopenharmony_ci #if CONFIG_VP8DSP 160cabdff1aSopenharmony_ci { "vp8dsp", checkasm_check_vp8dsp }, 161cabdff1aSopenharmony_ci #endif 162cabdff1aSopenharmony_ci #if CONFIG_VP9_DECODER 163cabdff1aSopenharmony_ci { "vp9dsp", checkasm_check_vp9dsp }, 164cabdff1aSopenharmony_ci #endif 165cabdff1aSopenharmony_ci #if CONFIG_VIDEODSP 166cabdff1aSopenharmony_ci { "videodsp", checkasm_check_videodsp }, 167cabdff1aSopenharmony_ci #endif 168cabdff1aSopenharmony_ci#endif 169cabdff1aSopenharmony_ci#if CONFIG_AVFILTER 170cabdff1aSopenharmony_ci #if CONFIG_AFIR_FILTER 171cabdff1aSopenharmony_ci { "af_afir", checkasm_check_afir }, 172cabdff1aSopenharmony_ci #endif 173cabdff1aSopenharmony_ci #if CONFIG_BLEND_FILTER 174cabdff1aSopenharmony_ci { "vf_blend", checkasm_check_blend }, 175cabdff1aSopenharmony_ci #endif 176cabdff1aSopenharmony_ci #if CONFIG_COLORSPACE_FILTER 177cabdff1aSopenharmony_ci { "vf_colorspace", checkasm_check_colorspace }, 178cabdff1aSopenharmony_ci #endif 179cabdff1aSopenharmony_ci #if CONFIG_EQ_FILTER 180cabdff1aSopenharmony_ci { "vf_eq", checkasm_check_vf_eq }, 181cabdff1aSopenharmony_ci #endif 182cabdff1aSopenharmony_ci #if CONFIG_GBLUR_FILTER 183cabdff1aSopenharmony_ci { "vf_gblur", checkasm_check_vf_gblur }, 184cabdff1aSopenharmony_ci #endif 185cabdff1aSopenharmony_ci #if CONFIG_HFLIP_FILTER 186cabdff1aSopenharmony_ci { "vf_hflip", checkasm_check_vf_hflip }, 187cabdff1aSopenharmony_ci #endif 188cabdff1aSopenharmony_ci #if CONFIG_NLMEANS_FILTER 189cabdff1aSopenharmony_ci { "vf_nlmeans", checkasm_check_nlmeans }, 190cabdff1aSopenharmony_ci #endif 191cabdff1aSopenharmony_ci #if CONFIG_THRESHOLD_FILTER 192cabdff1aSopenharmony_ci { "vf_threshold", checkasm_check_vf_threshold }, 193cabdff1aSopenharmony_ci #endif 194cabdff1aSopenharmony_ci#endif 195cabdff1aSopenharmony_ci#if CONFIG_SWSCALE 196cabdff1aSopenharmony_ci { "sw_gbrp", checkasm_check_sw_gbrp }, 197cabdff1aSopenharmony_ci { "sw_rgb", checkasm_check_sw_rgb }, 198cabdff1aSopenharmony_ci { "sw_scale", checkasm_check_sw_scale }, 199cabdff1aSopenharmony_ci#endif 200cabdff1aSopenharmony_ci#if CONFIG_AVUTIL 201cabdff1aSopenharmony_ci { "fixed_dsp", checkasm_check_fixed_dsp }, 202cabdff1aSopenharmony_ci { "float_dsp", checkasm_check_float_dsp }, 203cabdff1aSopenharmony_ci { "av_tx", checkasm_check_av_tx }, 204cabdff1aSopenharmony_ci#endif 205cabdff1aSopenharmony_ci { NULL } 206cabdff1aSopenharmony_ci}; 207cabdff1aSopenharmony_ci 208cabdff1aSopenharmony_ci/* List of cpu flags to check */ 209cabdff1aSopenharmony_cistatic const struct { 210cabdff1aSopenharmony_ci const char *name; 211cabdff1aSopenharmony_ci const char *suffix; 212cabdff1aSopenharmony_ci int flag; 213cabdff1aSopenharmony_ci} cpus[] = { 214cabdff1aSopenharmony_ci#if ARCH_AARCH64 215cabdff1aSopenharmony_ci { "ARMV8", "armv8", AV_CPU_FLAG_ARMV8 }, 216cabdff1aSopenharmony_ci { "NEON", "neon", AV_CPU_FLAG_NEON }, 217cabdff1aSopenharmony_ci#elif ARCH_ARM 218cabdff1aSopenharmony_ci { "ARMV5TE", "armv5te", AV_CPU_FLAG_ARMV5TE }, 219cabdff1aSopenharmony_ci { "ARMV6", "armv6", AV_CPU_FLAG_ARMV6 }, 220cabdff1aSopenharmony_ci { "ARMV6T2", "armv6t2", AV_CPU_FLAG_ARMV6T2 }, 221cabdff1aSopenharmony_ci { "VFP", "vfp", AV_CPU_FLAG_VFP }, 222cabdff1aSopenharmony_ci { "VFP_VM", "vfp_vm", AV_CPU_FLAG_VFP_VM }, 223cabdff1aSopenharmony_ci { "VFPV3", "vfp3", AV_CPU_FLAG_VFPV3 }, 224cabdff1aSopenharmony_ci { "NEON", "neon", AV_CPU_FLAG_NEON }, 225cabdff1aSopenharmony_ci#elif ARCH_PPC 226cabdff1aSopenharmony_ci { "ALTIVEC", "altivec", AV_CPU_FLAG_ALTIVEC }, 227cabdff1aSopenharmony_ci { "VSX", "vsx", AV_CPU_FLAG_VSX }, 228cabdff1aSopenharmony_ci { "POWER8", "power8", AV_CPU_FLAG_POWER8 }, 229cabdff1aSopenharmony_ci#elif ARCH_MIPS 230cabdff1aSopenharmony_ci { "MMI", "mmi", AV_CPU_FLAG_MMI }, 231cabdff1aSopenharmony_ci { "MSA", "msa", AV_CPU_FLAG_MSA }, 232cabdff1aSopenharmony_ci#elif ARCH_X86 233cabdff1aSopenharmony_ci { "MMX", "mmx", AV_CPU_FLAG_MMX|AV_CPU_FLAG_CMOV }, 234cabdff1aSopenharmony_ci { "MMXEXT", "mmxext", AV_CPU_FLAG_MMXEXT }, 235cabdff1aSopenharmony_ci { "3DNOW", "3dnow", AV_CPU_FLAG_3DNOW }, 236cabdff1aSopenharmony_ci { "3DNOWEXT", "3dnowext", AV_CPU_FLAG_3DNOWEXT }, 237cabdff1aSopenharmony_ci { "SSE", "sse", AV_CPU_FLAG_SSE }, 238cabdff1aSopenharmony_ci { "SSE2", "sse2", AV_CPU_FLAG_SSE2|AV_CPU_FLAG_SSE2SLOW }, 239cabdff1aSopenharmony_ci { "SSE3", "sse3", AV_CPU_FLAG_SSE3|AV_CPU_FLAG_SSE3SLOW }, 240cabdff1aSopenharmony_ci { "SSSE3", "ssse3", AV_CPU_FLAG_SSSE3|AV_CPU_FLAG_ATOM }, 241cabdff1aSopenharmony_ci { "SSE4.1", "sse4", AV_CPU_FLAG_SSE4 }, 242cabdff1aSopenharmony_ci { "SSE4.2", "sse42", AV_CPU_FLAG_SSE42 }, 243cabdff1aSopenharmony_ci { "AES-NI", "aesni", AV_CPU_FLAG_AESNI }, 244cabdff1aSopenharmony_ci { "AVX", "avx", AV_CPU_FLAG_AVX }, 245cabdff1aSopenharmony_ci { "XOP", "xop", AV_CPU_FLAG_XOP }, 246cabdff1aSopenharmony_ci { "FMA3", "fma3", AV_CPU_FLAG_FMA3 }, 247cabdff1aSopenharmony_ci { "FMA4", "fma4", AV_CPU_FLAG_FMA4 }, 248cabdff1aSopenharmony_ci { "AVX2", "avx2", AV_CPU_FLAG_AVX2 }, 249cabdff1aSopenharmony_ci { "AVX-512", "avx512", AV_CPU_FLAG_AVX512 }, 250cabdff1aSopenharmony_ci { "AVX-512ICL", "avx512icl", AV_CPU_FLAG_AVX512ICL }, 251cabdff1aSopenharmony_ci#elif ARCH_LOONGARCH 252cabdff1aSopenharmony_ci { "LSX", "lsx", AV_CPU_FLAG_LSX }, 253cabdff1aSopenharmony_ci { "LASX", "lasx", AV_CPU_FLAG_LASX }, 254cabdff1aSopenharmony_ci#endif 255cabdff1aSopenharmony_ci { NULL } 256cabdff1aSopenharmony_ci}; 257cabdff1aSopenharmony_ci 258cabdff1aSopenharmony_citypedef struct CheckasmFuncVersion { 259cabdff1aSopenharmony_ci struct CheckasmFuncVersion *next; 260cabdff1aSopenharmony_ci void *func; 261cabdff1aSopenharmony_ci int ok; 262cabdff1aSopenharmony_ci int cpu; 263cabdff1aSopenharmony_ci CheckasmPerf perf; 264cabdff1aSopenharmony_ci} CheckasmFuncVersion; 265cabdff1aSopenharmony_ci 266cabdff1aSopenharmony_ci/* Binary search tree node */ 267cabdff1aSopenharmony_citypedef struct CheckasmFunc { 268cabdff1aSopenharmony_ci struct CheckasmFunc *child[2]; 269cabdff1aSopenharmony_ci CheckasmFuncVersion versions; 270cabdff1aSopenharmony_ci uint8_t color; /* 0 = red, 1 = black */ 271cabdff1aSopenharmony_ci char name[1]; 272cabdff1aSopenharmony_ci} CheckasmFunc; 273cabdff1aSopenharmony_ci 274cabdff1aSopenharmony_ci/* Internal state */ 275cabdff1aSopenharmony_cistatic struct { 276cabdff1aSopenharmony_ci CheckasmFunc *funcs; 277cabdff1aSopenharmony_ci CheckasmFunc *current_func; 278cabdff1aSopenharmony_ci CheckasmFuncVersion *current_func_ver; 279cabdff1aSopenharmony_ci const char *current_test_name; 280cabdff1aSopenharmony_ci const char *bench_pattern; 281cabdff1aSopenharmony_ci int bench_pattern_len; 282cabdff1aSopenharmony_ci int num_checked; 283cabdff1aSopenharmony_ci int num_failed; 284cabdff1aSopenharmony_ci 285cabdff1aSopenharmony_ci /* perf */ 286cabdff1aSopenharmony_ci int nop_time; 287cabdff1aSopenharmony_ci int sysfd; 288cabdff1aSopenharmony_ci 289cabdff1aSopenharmony_ci int cpu_flag; 290cabdff1aSopenharmony_ci const char *cpu_flag_name; 291cabdff1aSopenharmony_ci const char *test_name; 292cabdff1aSopenharmony_ci int verbose; 293cabdff1aSopenharmony_ci} state; 294cabdff1aSopenharmony_ci 295cabdff1aSopenharmony_ci/* PRNG state */ 296cabdff1aSopenharmony_ciAVLFG checkasm_lfg; 297cabdff1aSopenharmony_ci 298cabdff1aSopenharmony_ci/* float compare support code */ 299cabdff1aSopenharmony_cistatic int is_negative(union av_intfloat32 u) 300cabdff1aSopenharmony_ci{ 301cabdff1aSopenharmony_ci return u.i >> 31; 302cabdff1aSopenharmony_ci} 303cabdff1aSopenharmony_ci 304cabdff1aSopenharmony_ciint float_near_ulp(float a, float b, unsigned max_ulp) 305cabdff1aSopenharmony_ci{ 306cabdff1aSopenharmony_ci union av_intfloat32 x, y; 307cabdff1aSopenharmony_ci 308cabdff1aSopenharmony_ci x.f = a; 309cabdff1aSopenharmony_ci y.f = b; 310cabdff1aSopenharmony_ci 311cabdff1aSopenharmony_ci if (is_negative(x) != is_negative(y)) { 312cabdff1aSopenharmony_ci // handle -0.0 == +0.0 313cabdff1aSopenharmony_ci return a == b; 314cabdff1aSopenharmony_ci } 315cabdff1aSopenharmony_ci 316cabdff1aSopenharmony_ci if (llabs((int64_t)x.i - y.i) <= max_ulp) 317cabdff1aSopenharmony_ci return 1; 318cabdff1aSopenharmony_ci 319cabdff1aSopenharmony_ci return 0; 320cabdff1aSopenharmony_ci} 321cabdff1aSopenharmony_ci 322cabdff1aSopenharmony_ciint float_near_ulp_array(const float *a, const float *b, unsigned max_ulp, 323cabdff1aSopenharmony_ci unsigned len) 324cabdff1aSopenharmony_ci{ 325cabdff1aSopenharmony_ci unsigned i; 326cabdff1aSopenharmony_ci 327cabdff1aSopenharmony_ci for (i = 0; i < len; i++) { 328cabdff1aSopenharmony_ci if (!float_near_ulp(a[i], b[i], max_ulp)) 329cabdff1aSopenharmony_ci return 0; 330cabdff1aSopenharmony_ci } 331cabdff1aSopenharmony_ci return 1; 332cabdff1aSopenharmony_ci} 333cabdff1aSopenharmony_ci 334cabdff1aSopenharmony_ciint float_near_abs_eps(float a, float b, float eps) 335cabdff1aSopenharmony_ci{ 336cabdff1aSopenharmony_ci float abs_diff = fabsf(a - b); 337cabdff1aSopenharmony_ci if (abs_diff < eps) 338cabdff1aSopenharmony_ci return 1; 339cabdff1aSopenharmony_ci 340cabdff1aSopenharmony_ci fprintf(stderr, "test failed comparing %g with %g (abs diff=%g with EPS=%g)\n", a, b, abs_diff, eps); 341cabdff1aSopenharmony_ci 342cabdff1aSopenharmony_ci return 0; 343cabdff1aSopenharmony_ci} 344cabdff1aSopenharmony_ci 345cabdff1aSopenharmony_ciint float_near_abs_eps_array(const float *a, const float *b, float eps, 346cabdff1aSopenharmony_ci unsigned len) 347cabdff1aSopenharmony_ci{ 348cabdff1aSopenharmony_ci unsigned i; 349cabdff1aSopenharmony_ci 350cabdff1aSopenharmony_ci for (i = 0; i < len; i++) { 351cabdff1aSopenharmony_ci if (!float_near_abs_eps(a[i], b[i], eps)) 352cabdff1aSopenharmony_ci return 0; 353cabdff1aSopenharmony_ci } 354cabdff1aSopenharmony_ci return 1; 355cabdff1aSopenharmony_ci} 356cabdff1aSopenharmony_ci 357cabdff1aSopenharmony_ciint float_near_abs_eps_ulp(float a, float b, float eps, unsigned max_ulp) 358cabdff1aSopenharmony_ci{ 359cabdff1aSopenharmony_ci return float_near_ulp(a, b, max_ulp) || float_near_abs_eps(a, b, eps); 360cabdff1aSopenharmony_ci} 361cabdff1aSopenharmony_ci 362cabdff1aSopenharmony_ciint float_near_abs_eps_array_ulp(const float *a, const float *b, float eps, 363cabdff1aSopenharmony_ci unsigned max_ulp, unsigned len) 364cabdff1aSopenharmony_ci{ 365cabdff1aSopenharmony_ci unsigned i; 366cabdff1aSopenharmony_ci 367cabdff1aSopenharmony_ci for (i = 0; i < len; i++) { 368cabdff1aSopenharmony_ci if (!float_near_abs_eps_ulp(a[i], b[i], eps, max_ulp)) 369cabdff1aSopenharmony_ci return 0; 370cabdff1aSopenharmony_ci } 371cabdff1aSopenharmony_ci return 1; 372cabdff1aSopenharmony_ci} 373cabdff1aSopenharmony_ci 374cabdff1aSopenharmony_ciint double_near_abs_eps(double a, double b, double eps) 375cabdff1aSopenharmony_ci{ 376cabdff1aSopenharmony_ci double abs_diff = fabs(a - b); 377cabdff1aSopenharmony_ci 378cabdff1aSopenharmony_ci return abs_diff < eps; 379cabdff1aSopenharmony_ci} 380cabdff1aSopenharmony_ci 381cabdff1aSopenharmony_ciint double_near_abs_eps_array(const double *a, const double *b, double eps, 382cabdff1aSopenharmony_ci unsigned len) 383cabdff1aSopenharmony_ci{ 384cabdff1aSopenharmony_ci unsigned i; 385cabdff1aSopenharmony_ci 386cabdff1aSopenharmony_ci for (i = 0; i < len; i++) { 387cabdff1aSopenharmony_ci if (!double_near_abs_eps(a[i], b[i], eps)) 388cabdff1aSopenharmony_ci return 0; 389cabdff1aSopenharmony_ci } 390cabdff1aSopenharmony_ci return 1; 391cabdff1aSopenharmony_ci} 392cabdff1aSopenharmony_ci 393cabdff1aSopenharmony_ci/* Print colored text to stderr if the terminal supports it */ 394cabdff1aSopenharmony_cistatic void color_printf(int color, const char *fmt, ...) 395cabdff1aSopenharmony_ci{ 396cabdff1aSopenharmony_ci static int use_color = -1; 397cabdff1aSopenharmony_ci va_list arg; 398cabdff1aSopenharmony_ci 399cabdff1aSopenharmony_ci#if HAVE_SETCONSOLETEXTATTRIBUTE && HAVE_GETSTDHANDLE 400cabdff1aSopenharmony_ci static HANDLE con; 401cabdff1aSopenharmony_ci static WORD org_attributes; 402cabdff1aSopenharmony_ci 403cabdff1aSopenharmony_ci if (use_color < 0) { 404cabdff1aSopenharmony_ci CONSOLE_SCREEN_BUFFER_INFO con_info; 405cabdff1aSopenharmony_ci con = GetStdHandle(STD_ERROR_HANDLE); 406cabdff1aSopenharmony_ci if (con && con != INVALID_HANDLE_VALUE && GetConsoleScreenBufferInfo(con, &con_info)) { 407cabdff1aSopenharmony_ci org_attributes = con_info.wAttributes; 408cabdff1aSopenharmony_ci use_color = 1; 409cabdff1aSopenharmony_ci } else 410cabdff1aSopenharmony_ci use_color = 0; 411cabdff1aSopenharmony_ci } 412cabdff1aSopenharmony_ci if (use_color) 413cabdff1aSopenharmony_ci SetConsoleTextAttribute(con, (org_attributes & 0xfff0) | (color & 0x0f)); 414cabdff1aSopenharmony_ci#else 415cabdff1aSopenharmony_ci if (use_color < 0) { 416cabdff1aSopenharmony_ci const char *term = getenv("TERM"); 417cabdff1aSopenharmony_ci use_color = term && strcmp(term, "dumb") && isatty(2); 418cabdff1aSopenharmony_ci } 419cabdff1aSopenharmony_ci if (use_color) 420cabdff1aSopenharmony_ci fprintf(stderr, "\x1b[%d;3%dm", (color & 0x08) >> 3, color & 0x07); 421cabdff1aSopenharmony_ci#endif 422cabdff1aSopenharmony_ci 423cabdff1aSopenharmony_ci va_start(arg, fmt); 424cabdff1aSopenharmony_ci vfprintf(stderr, fmt, arg); 425cabdff1aSopenharmony_ci va_end(arg); 426cabdff1aSopenharmony_ci 427cabdff1aSopenharmony_ci if (use_color) { 428cabdff1aSopenharmony_ci#if HAVE_SETCONSOLETEXTATTRIBUTE && HAVE_GETSTDHANDLE 429cabdff1aSopenharmony_ci SetConsoleTextAttribute(con, org_attributes); 430cabdff1aSopenharmony_ci#else 431cabdff1aSopenharmony_ci fprintf(stderr, "\x1b[0m"); 432cabdff1aSopenharmony_ci#endif 433cabdff1aSopenharmony_ci } 434cabdff1aSopenharmony_ci} 435cabdff1aSopenharmony_ci 436cabdff1aSopenharmony_ci/* Deallocate a tree */ 437cabdff1aSopenharmony_cistatic void destroy_func_tree(CheckasmFunc *f) 438cabdff1aSopenharmony_ci{ 439cabdff1aSopenharmony_ci if (f) { 440cabdff1aSopenharmony_ci CheckasmFuncVersion *v = f->versions.next; 441cabdff1aSopenharmony_ci while (v) { 442cabdff1aSopenharmony_ci CheckasmFuncVersion *next = v->next; 443cabdff1aSopenharmony_ci free(v); 444cabdff1aSopenharmony_ci v = next; 445cabdff1aSopenharmony_ci } 446cabdff1aSopenharmony_ci 447cabdff1aSopenharmony_ci destroy_func_tree(f->child[0]); 448cabdff1aSopenharmony_ci destroy_func_tree(f->child[1]); 449cabdff1aSopenharmony_ci free(f); 450cabdff1aSopenharmony_ci } 451cabdff1aSopenharmony_ci} 452cabdff1aSopenharmony_ci 453cabdff1aSopenharmony_ci/* Allocate a zero-initialized block, clean up and exit on failure */ 454cabdff1aSopenharmony_cistatic void *checkasm_malloc(size_t size) 455cabdff1aSopenharmony_ci{ 456cabdff1aSopenharmony_ci void *ptr = calloc(1, size); 457cabdff1aSopenharmony_ci if (!ptr) { 458cabdff1aSopenharmony_ci fprintf(stderr, "checkasm: malloc failed\n"); 459cabdff1aSopenharmony_ci destroy_func_tree(state.funcs); 460cabdff1aSopenharmony_ci exit(1); 461cabdff1aSopenharmony_ci } 462cabdff1aSopenharmony_ci return ptr; 463cabdff1aSopenharmony_ci} 464cabdff1aSopenharmony_ci 465cabdff1aSopenharmony_ci/* Get the suffix of the specified cpu flag */ 466cabdff1aSopenharmony_cistatic const char *cpu_suffix(int cpu) 467cabdff1aSopenharmony_ci{ 468cabdff1aSopenharmony_ci int i = FF_ARRAY_ELEMS(cpus); 469cabdff1aSopenharmony_ci 470cabdff1aSopenharmony_ci while (--i >= 0) 471cabdff1aSopenharmony_ci if (cpu & cpus[i].flag) 472cabdff1aSopenharmony_ci return cpus[i].suffix; 473cabdff1aSopenharmony_ci 474cabdff1aSopenharmony_ci return "c"; 475cabdff1aSopenharmony_ci} 476cabdff1aSopenharmony_ci 477cabdff1aSopenharmony_cistatic int cmp_nop(const void *a, const void *b) 478cabdff1aSopenharmony_ci{ 479cabdff1aSopenharmony_ci return *(const uint16_t*)a - *(const uint16_t*)b; 480cabdff1aSopenharmony_ci} 481cabdff1aSopenharmony_ci 482cabdff1aSopenharmony_ci/* Measure the overhead of the timing code (in decicycles) */ 483cabdff1aSopenharmony_cistatic int measure_nop_time(void) 484cabdff1aSopenharmony_ci{ 485cabdff1aSopenharmony_ci uint16_t nops[10000]; 486cabdff1aSopenharmony_ci int i, nop_sum = 0; 487cabdff1aSopenharmony_ci av_unused const int sysfd = state.sysfd; 488cabdff1aSopenharmony_ci 489cabdff1aSopenharmony_ci uint64_t t = 0; 490cabdff1aSopenharmony_ci for (i = 0; i < 10000; i++) { 491cabdff1aSopenharmony_ci PERF_START(t); 492cabdff1aSopenharmony_ci PERF_STOP(t); 493cabdff1aSopenharmony_ci nops[i] = t; 494cabdff1aSopenharmony_ci } 495cabdff1aSopenharmony_ci 496cabdff1aSopenharmony_ci qsort(nops, 10000, sizeof(uint16_t), cmp_nop); 497cabdff1aSopenharmony_ci for (i = 2500; i < 7500; i++) 498cabdff1aSopenharmony_ci nop_sum += nops[i]; 499cabdff1aSopenharmony_ci 500cabdff1aSopenharmony_ci return nop_sum / 500; 501cabdff1aSopenharmony_ci} 502cabdff1aSopenharmony_ci 503cabdff1aSopenharmony_ci/* Print benchmark results */ 504cabdff1aSopenharmony_cistatic void print_benchs(CheckasmFunc *f) 505cabdff1aSopenharmony_ci{ 506cabdff1aSopenharmony_ci if (f) { 507cabdff1aSopenharmony_ci print_benchs(f->child[0]); 508cabdff1aSopenharmony_ci 509cabdff1aSopenharmony_ci /* Only print functions with at least one assembly version */ 510cabdff1aSopenharmony_ci if (f->versions.cpu || f->versions.next) { 511cabdff1aSopenharmony_ci CheckasmFuncVersion *v = &f->versions; 512cabdff1aSopenharmony_ci do { 513cabdff1aSopenharmony_ci CheckasmPerf *p = &v->perf; 514cabdff1aSopenharmony_ci if (p->iterations) { 515cabdff1aSopenharmony_ci int decicycles = (10*p->cycles/p->iterations - state.nop_time) / 4; 516cabdff1aSopenharmony_ci printf("%s_%s: %d.%d\n", f->name, cpu_suffix(v->cpu), decicycles/10, decicycles%10); 517cabdff1aSopenharmony_ci } 518cabdff1aSopenharmony_ci } while ((v = v->next)); 519cabdff1aSopenharmony_ci } 520cabdff1aSopenharmony_ci 521cabdff1aSopenharmony_ci print_benchs(f->child[1]); 522cabdff1aSopenharmony_ci } 523cabdff1aSopenharmony_ci} 524cabdff1aSopenharmony_ci 525cabdff1aSopenharmony_ci/* ASCIIbetical sort except preserving natural order for numbers */ 526cabdff1aSopenharmony_cistatic int cmp_func_names(const char *a, const char *b) 527cabdff1aSopenharmony_ci{ 528cabdff1aSopenharmony_ci const char *start = a; 529cabdff1aSopenharmony_ci int ascii_diff, digit_diff; 530cabdff1aSopenharmony_ci 531cabdff1aSopenharmony_ci for (; !(ascii_diff = *(const unsigned char*)a - *(const unsigned char*)b) && *a; a++, b++); 532cabdff1aSopenharmony_ci for (; av_isdigit(*a) && av_isdigit(*b); a++, b++); 533cabdff1aSopenharmony_ci 534cabdff1aSopenharmony_ci if (a > start && av_isdigit(a[-1]) && (digit_diff = av_isdigit(*a) - av_isdigit(*b))) 535cabdff1aSopenharmony_ci return digit_diff; 536cabdff1aSopenharmony_ci 537cabdff1aSopenharmony_ci return ascii_diff; 538cabdff1aSopenharmony_ci} 539cabdff1aSopenharmony_ci 540cabdff1aSopenharmony_ci/* Perform a tree rotation in the specified direction and return the new root */ 541cabdff1aSopenharmony_cistatic CheckasmFunc *rotate_tree(CheckasmFunc *f, int dir) 542cabdff1aSopenharmony_ci{ 543cabdff1aSopenharmony_ci CheckasmFunc *r = f->child[dir^1]; 544cabdff1aSopenharmony_ci f->child[dir^1] = r->child[dir]; 545cabdff1aSopenharmony_ci r->child[dir] = f; 546cabdff1aSopenharmony_ci r->color = f->color; 547cabdff1aSopenharmony_ci f->color = 0; 548cabdff1aSopenharmony_ci return r; 549cabdff1aSopenharmony_ci} 550cabdff1aSopenharmony_ci 551cabdff1aSopenharmony_ci#define is_red(f) ((f) && !(f)->color) 552cabdff1aSopenharmony_ci 553cabdff1aSopenharmony_ci/* Balance a left-leaning red-black tree at the specified node */ 554cabdff1aSopenharmony_cistatic void balance_tree(CheckasmFunc **root) 555cabdff1aSopenharmony_ci{ 556cabdff1aSopenharmony_ci CheckasmFunc *f = *root; 557cabdff1aSopenharmony_ci 558cabdff1aSopenharmony_ci if (is_red(f->child[0]) && is_red(f->child[1])) { 559cabdff1aSopenharmony_ci f->color ^= 1; 560cabdff1aSopenharmony_ci f->child[0]->color = f->child[1]->color = 1; 561cabdff1aSopenharmony_ci } 562cabdff1aSopenharmony_ci 563cabdff1aSopenharmony_ci if (!is_red(f->child[0]) && is_red(f->child[1])) 564cabdff1aSopenharmony_ci *root = rotate_tree(f, 0); /* Rotate left */ 565cabdff1aSopenharmony_ci else if (is_red(f->child[0]) && is_red(f->child[0]->child[0])) 566cabdff1aSopenharmony_ci *root = rotate_tree(f, 1); /* Rotate right */ 567cabdff1aSopenharmony_ci} 568cabdff1aSopenharmony_ci 569cabdff1aSopenharmony_ci/* Get a node with the specified name, creating it if it doesn't exist */ 570cabdff1aSopenharmony_cistatic CheckasmFunc *get_func(CheckasmFunc **root, const char *name) 571cabdff1aSopenharmony_ci{ 572cabdff1aSopenharmony_ci CheckasmFunc *f = *root; 573cabdff1aSopenharmony_ci 574cabdff1aSopenharmony_ci if (f) { 575cabdff1aSopenharmony_ci /* Search the tree for a matching node */ 576cabdff1aSopenharmony_ci int cmp = cmp_func_names(name, f->name); 577cabdff1aSopenharmony_ci if (cmp) { 578cabdff1aSopenharmony_ci f = get_func(&f->child[cmp > 0], name); 579cabdff1aSopenharmony_ci 580cabdff1aSopenharmony_ci /* Rebalance the tree on the way up if a new node was inserted */ 581cabdff1aSopenharmony_ci if (!f->versions.func) 582cabdff1aSopenharmony_ci balance_tree(root); 583cabdff1aSopenharmony_ci } 584cabdff1aSopenharmony_ci } else { 585cabdff1aSopenharmony_ci /* Allocate and insert a new node into the tree */ 586cabdff1aSopenharmony_ci int name_length = strlen(name); 587cabdff1aSopenharmony_ci f = *root = checkasm_malloc(sizeof(CheckasmFunc) + name_length); 588cabdff1aSopenharmony_ci memcpy(f->name, name, name_length + 1); 589cabdff1aSopenharmony_ci } 590cabdff1aSopenharmony_ci 591cabdff1aSopenharmony_ci return f; 592cabdff1aSopenharmony_ci} 593cabdff1aSopenharmony_ci 594cabdff1aSopenharmony_ci/* Perform tests and benchmarks for the specified cpu flag if supported by the host */ 595cabdff1aSopenharmony_cistatic void check_cpu_flag(const char *name, int flag) 596cabdff1aSopenharmony_ci{ 597cabdff1aSopenharmony_ci int old_cpu_flag = state.cpu_flag; 598cabdff1aSopenharmony_ci 599cabdff1aSopenharmony_ci flag |= old_cpu_flag; 600cabdff1aSopenharmony_ci av_force_cpu_flags(-1); 601cabdff1aSopenharmony_ci state.cpu_flag = flag & av_get_cpu_flags(); 602cabdff1aSopenharmony_ci av_force_cpu_flags(state.cpu_flag); 603cabdff1aSopenharmony_ci 604cabdff1aSopenharmony_ci if (!flag || state.cpu_flag != old_cpu_flag) { 605cabdff1aSopenharmony_ci int i; 606cabdff1aSopenharmony_ci 607cabdff1aSopenharmony_ci state.cpu_flag_name = name; 608cabdff1aSopenharmony_ci for (i = 0; tests[i].func; i++) { 609cabdff1aSopenharmony_ci if (state.test_name && strcmp(tests[i].name, state.test_name)) 610cabdff1aSopenharmony_ci continue; 611cabdff1aSopenharmony_ci state.current_test_name = tests[i].name; 612cabdff1aSopenharmony_ci tests[i].func(); 613cabdff1aSopenharmony_ci } 614cabdff1aSopenharmony_ci } 615cabdff1aSopenharmony_ci} 616cabdff1aSopenharmony_ci 617cabdff1aSopenharmony_ci/* Print the name of the current CPU flag, but only do it once */ 618cabdff1aSopenharmony_cistatic void print_cpu_name(void) 619cabdff1aSopenharmony_ci{ 620cabdff1aSopenharmony_ci if (state.cpu_flag_name) { 621cabdff1aSopenharmony_ci color_printf(COLOR_YELLOW, "%s:\n", state.cpu_flag_name); 622cabdff1aSopenharmony_ci state.cpu_flag_name = NULL; 623cabdff1aSopenharmony_ci } 624cabdff1aSopenharmony_ci} 625cabdff1aSopenharmony_ci 626cabdff1aSopenharmony_ci#if CONFIG_LINUX_PERF 627cabdff1aSopenharmony_cistatic int bench_init_linux(void) 628cabdff1aSopenharmony_ci{ 629cabdff1aSopenharmony_ci struct perf_event_attr attr = { 630cabdff1aSopenharmony_ci .type = PERF_TYPE_HARDWARE, 631cabdff1aSopenharmony_ci .size = sizeof(struct perf_event_attr), 632cabdff1aSopenharmony_ci .config = PERF_COUNT_HW_CPU_CYCLES, 633cabdff1aSopenharmony_ci .disabled = 1, // start counting only on demand 634cabdff1aSopenharmony_ci .exclude_kernel = 1, 635cabdff1aSopenharmony_ci .exclude_hv = 1, 636cabdff1aSopenharmony_ci }; 637cabdff1aSopenharmony_ci 638cabdff1aSopenharmony_ci printf("benchmarking with Linux Perf Monitoring API\n"); 639cabdff1aSopenharmony_ci 640cabdff1aSopenharmony_ci state.sysfd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0); 641cabdff1aSopenharmony_ci if (state.sysfd == -1) { 642cabdff1aSopenharmony_ci perror("syscall"); 643cabdff1aSopenharmony_ci return -1; 644cabdff1aSopenharmony_ci } 645cabdff1aSopenharmony_ci return 0; 646cabdff1aSopenharmony_ci} 647cabdff1aSopenharmony_ci#elif CONFIG_MACOS_KPERF 648cabdff1aSopenharmony_cistatic int bench_init_kperf(void) 649cabdff1aSopenharmony_ci{ 650cabdff1aSopenharmony_ci ff_kperf_init(); 651cabdff1aSopenharmony_ci return 0; 652cabdff1aSopenharmony_ci} 653cabdff1aSopenharmony_ci#else 654cabdff1aSopenharmony_cistatic int bench_init_ffmpeg(void) 655cabdff1aSopenharmony_ci{ 656cabdff1aSopenharmony_ci#ifdef AV_READ_TIME 657cabdff1aSopenharmony_ci printf("benchmarking with native FFmpeg timers\n"); 658cabdff1aSopenharmony_ci return 0; 659cabdff1aSopenharmony_ci#else 660cabdff1aSopenharmony_ci fprintf(stderr, "checkasm: --bench is not supported on your system\n"); 661cabdff1aSopenharmony_ci return -1; 662cabdff1aSopenharmony_ci#endif 663cabdff1aSopenharmony_ci} 664cabdff1aSopenharmony_ci#endif 665cabdff1aSopenharmony_ci 666cabdff1aSopenharmony_cistatic int bench_init(void) 667cabdff1aSopenharmony_ci{ 668cabdff1aSopenharmony_ci#if CONFIG_LINUX_PERF 669cabdff1aSopenharmony_ci int ret = bench_init_linux(); 670cabdff1aSopenharmony_ci#elif CONFIG_MACOS_KPERF 671cabdff1aSopenharmony_ci int ret = bench_init_kperf(); 672cabdff1aSopenharmony_ci#else 673cabdff1aSopenharmony_ci int ret = bench_init_ffmpeg(); 674cabdff1aSopenharmony_ci#endif 675cabdff1aSopenharmony_ci if (ret < 0) 676cabdff1aSopenharmony_ci return ret; 677cabdff1aSopenharmony_ci 678cabdff1aSopenharmony_ci state.nop_time = measure_nop_time(); 679cabdff1aSopenharmony_ci printf("nop: %d.%d\n", state.nop_time/10, state.nop_time%10); 680cabdff1aSopenharmony_ci return 0; 681cabdff1aSopenharmony_ci} 682cabdff1aSopenharmony_ci 683cabdff1aSopenharmony_cistatic void bench_uninit(void) 684cabdff1aSopenharmony_ci{ 685cabdff1aSopenharmony_ci#if CONFIG_LINUX_PERF 686cabdff1aSopenharmony_ci if (state.sysfd > 0) 687cabdff1aSopenharmony_ci close(state.sysfd); 688cabdff1aSopenharmony_ci#endif 689cabdff1aSopenharmony_ci} 690cabdff1aSopenharmony_ci 691cabdff1aSopenharmony_ciint main(int argc, char *argv[]) 692cabdff1aSopenharmony_ci{ 693cabdff1aSopenharmony_ci unsigned int seed = av_get_random_seed(); 694cabdff1aSopenharmony_ci int i, ret = 0; 695cabdff1aSopenharmony_ci 696cabdff1aSopenharmony_ci#if ARCH_ARM && HAVE_ARMV5TE_EXTERNAL 697cabdff1aSopenharmony_ci if (have_vfp(av_get_cpu_flags()) || have_neon(av_get_cpu_flags())) 698cabdff1aSopenharmony_ci checkasm_checked_call = checkasm_checked_call_vfp; 699cabdff1aSopenharmony_ci#endif 700cabdff1aSopenharmony_ci 701cabdff1aSopenharmony_ci if (!tests[0].func || !cpus[0].flag) { 702cabdff1aSopenharmony_ci fprintf(stderr, "checkasm: no tests to perform\n"); 703cabdff1aSopenharmony_ci return 0; 704cabdff1aSopenharmony_ci } 705cabdff1aSopenharmony_ci 706cabdff1aSopenharmony_ci while (argc > 1) { 707cabdff1aSopenharmony_ci if (!strncmp(argv[1], "--bench", 7)) { 708cabdff1aSopenharmony_ci if (bench_init() < 0) 709cabdff1aSopenharmony_ci return 1; 710cabdff1aSopenharmony_ci if (argv[1][7] == '=') { 711cabdff1aSopenharmony_ci state.bench_pattern = argv[1] + 8; 712cabdff1aSopenharmony_ci state.bench_pattern_len = strlen(state.bench_pattern); 713cabdff1aSopenharmony_ci } else 714cabdff1aSopenharmony_ci state.bench_pattern = ""; 715cabdff1aSopenharmony_ci } else if (!strncmp(argv[1], "--test=", 7)) { 716cabdff1aSopenharmony_ci state.test_name = argv[1] + 7; 717cabdff1aSopenharmony_ci } else if (!strcmp(argv[1], "--verbose") || !strcmp(argv[1], "-v")) { 718cabdff1aSopenharmony_ci state.verbose = 1; 719cabdff1aSopenharmony_ci } else { 720cabdff1aSopenharmony_ci seed = strtoul(argv[1], NULL, 10); 721cabdff1aSopenharmony_ci } 722cabdff1aSopenharmony_ci 723cabdff1aSopenharmony_ci argc--; 724cabdff1aSopenharmony_ci argv++; 725cabdff1aSopenharmony_ci } 726cabdff1aSopenharmony_ci 727cabdff1aSopenharmony_ci fprintf(stderr, "checkasm: using random seed %u\n", seed); 728cabdff1aSopenharmony_ci av_lfg_init(&checkasm_lfg, seed); 729cabdff1aSopenharmony_ci 730cabdff1aSopenharmony_ci check_cpu_flag(NULL, 0); 731cabdff1aSopenharmony_ci for (i = 0; cpus[i].flag; i++) 732cabdff1aSopenharmony_ci check_cpu_flag(cpus[i].name, cpus[i].flag); 733cabdff1aSopenharmony_ci 734cabdff1aSopenharmony_ci if (state.num_failed) { 735cabdff1aSopenharmony_ci fprintf(stderr, "checkasm: %d of %d tests have failed\n", state.num_failed, state.num_checked); 736cabdff1aSopenharmony_ci ret = 1; 737cabdff1aSopenharmony_ci } else { 738cabdff1aSopenharmony_ci fprintf(stderr, "checkasm: all %d tests passed\n", state.num_checked); 739cabdff1aSopenharmony_ci if (state.bench_pattern) { 740cabdff1aSopenharmony_ci print_benchs(state.funcs); 741cabdff1aSopenharmony_ci } 742cabdff1aSopenharmony_ci } 743cabdff1aSopenharmony_ci 744cabdff1aSopenharmony_ci destroy_func_tree(state.funcs); 745cabdff1aSopenharmony_ci bench_uninit(); 746cabdff1aSopenharmony_ci return ret; 747cabdff1aSopenharmony_ci} 748cabdff1aSopenharmony_ci 749cabdff1aSopenharmony_ci/* Decide whether or not the specified function needs to be tested and 750cabdff1aSopenharmony_ci * allocate/initialize data structures if needed. Returns a pointer to a 751cabdff1aSopenharmony_ci * reference function if the function should be tested, otherwise NULL */ 752cabdff1aSopenharmony_civoid *checkasm_check_func(void *func, const char *name, ...) 753cabdff1aSopenharmony_ci{ 754cabdff1aSopenharmony_ci char name_buf[256]; 755cabdff1aSopenharmony_ci void *ref = func; 756cabdff1aSopenharmony_ci CheckasmFuncVersion *v; 757cabdff1aSopenharmony_ci int name_length; 758cabdff1aSopenharmony_ci va_list arg; 759cabdff1aSopenharmony_ci 760cabdff1aSopenharmony_ci va_start(arg, name); 761cabdff1aSopenharmony_ci name_length = vsnprintf(name_buf, sizeof(name_buf), name, arg); 762cabdff1aSopenharmony_ci va_end(arg); 763cabdff1aSopenharmony_ci 764cabdff1aSopenharmony_ci if (!func || name_length <= 0 || name_length >= sizeof(name_buf)) 765cabdff1aSopenharmony_ci return NULL; 766cabdff1aSopenharmony_ci 767cabdff1aSopenharmony_ci state.current_func = get_func(&state.funcs, name_buf); 768cabdff1aSopenharmony_ci state.funcs->color = 1; 769cabdff1aSopenharmony_ci v = &state.current_func->versions; 770cabdff1aSopenharmony_ci 771cabdff1aSopenharmony_ci if (v->func) { 772cabdff1aSopenharmony_ci CheckasmFuncVersion *prev; 773cabdff1aSopenharmony_ci do { 774cabdff1aSopenharmony_ci /* Only test functions that haven't already been tested */ 775cabdff1aSopenharmony_ci if (v->func == func) 776cabdff1aSopenharmony_ci return NULL; 777cabdff1aSopenharmony_ci 778cabdff1aSopenharmony_ci if (v->ok) 779cabdff1aSopenharmony_ci ref = v->func; 780cabdff1aSopenharmony_ci 781cabdff1aSopenharmony_ci prev = v; 782cabdff1aSopenharmony_ci } while ((v = v->next)); 783cabdff1aSopenharmony_ci 784cabdff1aSopenharmony_ci v = prev->next = checkasm_malloc(sizeof(CheckasmFuncVersion)); 785cabdff1aSopenharmony_ci } 786cabdff1aSopenharmony_ci 787cabdff1aSopenharmony_ci v->func = func; 788cabdff1aSopenharmony_ci v->ok = 1; 789cabdff1aSopenharmony_ci v->cpu = state.cpu_flag; 790cabdff1aSopenharmony_ci state.current_func_ver = v; 791cabdff1aSopenharmony_ci 792cabdff1aSopenharmony_ci if (state.cpu_flag) 793cabdff1aSopenharmony_ci state.num_checked++; 794cabdff1aSopenharmony_ci 795cabdff1aSopenharmony_ci return ref; 796cabdff1aSopenharmony_ci} 797cabdff1aSopenharmony_ci 798cabdff1aSopenharmony_ci/* Decide whether or not the current function needs to be benchmarked */ 799cabdff1aSopenharmony_ciint checkasm_bench_func(void) 800cabdff1aSopenharmony_ci{ 801cabdff1aSopenharmony_ci return !state.num_failed && state.bench_pattern && 802cabdff1aSopenharmony_ci !strncmp(state.current_func->name, state.bench_pattern, state.bench_pattern_len); 803cabdff1aSopenharmony_ci} 804cabdff1aSopenharmony_ci 805cabdff1aSopenharmony_ci/* Indicate that the current test has failed */ 806cabdff1aSopenharmony_civoid checkasm_fail_func(const char *msg, ...) 807cabdff1aSopenharmony_ci{ 808cabdff1aSopenharmony_ci if (state.current_func_ver->cpu && state.current_func_ver->ok) { 809cabdff1aSopenharmony_ci va_list arg; 810cabdff1aSopenharmony_ci 811cabdff1aSopenharmony_ci print_cpu_name(); 812cabdff1aSopenharmony_ci fprintf(stderr, " %s_%s (", state.current_func->name, cpu_suffix(state.current_func_ver->cpu)); 813cabdff1aSopenharmony_ci va_start(arg, msg); 814cabdff1aSopenharmony_ci vfprintf(stderr, msg, arg); 815cabdff1aSopenharmony_ci va_end(arg); 816cabdff1aSopenharmony_ci fprintf(stderr, ")\n"); 817cabdff1aSopenharmony_ci 818cabdff1aSopenharmony_ci state.current_func_ver->ok = 0; 819cabdff1aSopenharmony_ci state.num_failed++; 820cabdff1aSopenharmony_ci } 821cabdff1aSopenharmony_ci} 822cabdff1aSopenharmony_ci 823cabdff1aSopenharmony_ci/* Get the benchmark context of the current function */ 824cabdff1aSopenharmony_ciCheckasmPerf *checkasm_get_perf_context(void) 825cabdff1aSopenharmony_ci{ 826cabdff1aSopenharmony_ci CheckasmPerf *perf = &state.current_func_ver->perf; 827cabdff1aSopenharmony_ci memset(perf, 0, sizeof(*perf)); 828cabdff1aSopenharmony_ci perf->sysfd = state.sysfd; 829cabdff1aSopenharmony_ci return perf; 830cabdff1aSopenharmony_ci} 831cabdff1aSopenharmony_ci 832cabdff1aSopenharmony_ci/* Print the outcome of all tests performed since the last time this function was called */ 833cabdff1aSopenharmony_civoid checkasm_report(const char *name, ...) 834cabdff1aSopenharmony_ci{ 835cabdff1aSopenharmony_ci static int prev_checked, prev_failed, max_length; 836cabdff1aSopenharmony_ci 837cabdff1aSopenharmony_ci if (state.num_checked > prev_checked) { 838cabdff1aSopenharmony_ci int pad_length = max_length + 4; 839cabdff1aSopenharmony_ci va_list arg; 840cabdff1aSopenharmony_ci 841cabdff1aSopenharmony_ci print_cpu_name(); 842cabdff1aSopenharmony_ci pad_length -= fprintf(stderr, " - %s.", state.current_test_name); 843cabdff1aSopenharmony_ci va_start(arg, name); 844cabdff1aSopenharmony_ci pad_length -= vfprintf(stderr, name, arg); 845cabdff1aSopenharmony_ci va_end(arg); 846cabdff1aSopenharmony_ci fprintf(stderr, "%*c", FFMAX(pad_length, 0) + 2, '['); 847cabdff1aSopenharmony_ci 848cabdff1aSopenharmony_ci if (state.num_failed == prev_failed) 849cabdff1aSopenharmony_ci color_printf(COLOR_GREEN, "OK"); 850cabdff1aSopenharmony_ci else 851cabdff1aSopenharmony_ci color_printf(COLOR_RED, "FAILED"); 852cabdff1aSopenharmony_ci fprintf(stderr, "]\n"); 853cabdff1aSopenharmony_ci 854cabdff1aSopenharmony_ci prev_checked = state.num_checked; 855cabdff1aSopenharmony_ci prev_failed = state.num_failed; 856cabdff1aSopenharmony_ci } else if (!state.cpu_flag) { 857cabdff1aSopenharmony_ci /* Calculate the amount of padding required to make the output vertically aligned */ 858cabdff1aSopenharmony_ci int length = strlen(state.current_test_name); 859cabdff1aSopenharmony_ci va_list arg; 860cabdff1aSopenharmony_ci 861cabdff1aSopenharmony_ci va_start(arg, name); 862cabdff1aSopenharmony_ci length += vsnprintf(NULL, 0, name, arg); 863cabdff1aSopenharmony_ci va_end(arg); 864cabdff1aSopenharmony_ci 865cabdff1aSopenharmony_ci if (length > max_length) 866cabdff1aSopenharmony_ci max_length = length; 867cabdff1aSopenharmony_ci } 868cabdff1aSopenharmony_ci} 869cabdff1aSopenharmony_ci 870cabdff1aSopenharmony_ci#define DEF_CHECKASM_CHECK_FUNC(type, fmt) \ 871cabdff1aSopenharmony_ciint checkasm_check_##type(const char *const file, const int line, \ 872cabdff1aSopenharmony_ci const type *buf1, ptrdiff_t stride1, \ 873cabdff1aSopenharmony_ci const type *buf2, ptrdiff_t stride2, \ 874cabdff1aSopenharmony_ci const int w, int h, const char *const name) \ 875cabdff1aSopenharmony_ci{ \ 876cabdff1aSopenharmony_ci int y = 0; \ 877cabdff1aSopenharmony_ci stride1 /= sizeof(*buf1); \ 878cabdff1aSopenharmony_ci stride2 /= sizeof(*buf2); \ 879cabdff1aSopenharmony_ci for (y = 0; y < h; y++) \ 880cabdff1aSopenharmony_ci if (memcmp(&buf1[y*stride1], &buf2[y*stride2], w*sizeof(*buf1))) \ 881cabdff1aSopenharmony_ci break; \ 882cabdff1aSopenharmony_ci if (y == h) \ 883cabdff1aSopenharmony_ci return 0; \ 884cabdff1aSopenharmony_ci checkasm_fail_func("%s:%d", file, line); \ 885cabdff1aSopenharmony_ci if (!state.verbose) \ 886cabdff1aSopenharmony_ci return 1; \ 887cabdff1aSopenharmony_ci fprintf(stderr, "%s:\n", name); \ 888cabdff1aSopenharmony_ci while (h--) { \ 889cabdff1aSopenharmony_ci for (int x = 0; x < w; x++) \ 890cabdff1aSopenharmony_ci fprintf(stderr, " " fmt, buf1[x]); \ 891cabdff1aSopenharmony_ci fprintf(stderr, " "); \ 892cabdff1aSopenharmony_ci for (int x = 0; x < w; x++) \ 893cabdff1aSopenharmony_ci fprintf(stderr, " " fmt, buf2[x]); \ 894cabdff1aSopenharmony_ci fprintf(stderr, " "); \ 895cabdff1aSopenharmony_ci for (int x = 0; x < w; x++) \ 896cabdff1aSopenharmony_ci fprintf(stderr, "%c", buf1[x] != buf2[x] ? 'x' : '.'); \ 897cabdff1aSopenharmony_ci buf1 += stride1; \ 898cabdff1aSopenharmony_ci buf2 += stride2; \ 899cabdff1aSopenharmony_ci fprintf(stderr, "\n"); \ 900cabdff1aSopenharmony_ci } \ 901cabdff1aSopenharmony_ci return 1; \ 902cabdff1aSopenharmony_ci} 903cabdff1aSopenharmony_ci 904cabdff1aSopenharmony_ciDEF_CHECKASM_CHECK_FUNC(uint8_t, "%02x") 905cabdff1aSopenharmony_ciDEF_CHECKASM_CHECK_FUNC(uint16_t, "%04x") 906cabdff1aSopenharmony_ciDEF_CHECKASM_CHECK_FUNC(int16_t, "%6d") 907cabdff1aSopenharmony_ciDEF_CHECKASM_CHECK_FUNC(int32_t, "%9d") 908