1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * This file is part of FFmpeg. 3cabdff1aSopenharmony_ci * 4cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or modify 5cabdff1aSopenharmony_ci * it under the terms of the GNU General Public License as published by 6cabdff1aSopenharmony_ci * the Free Software Foundation; either version 2 of the License, or 7cabdff1aSopenharmony_ci * (at your option) any later version. 8cabdff1aSopenharmony_ci * 9cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 10cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 11cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12cabdff1aSopenharmony_ci * GNU General Public License for more details. 13cabdff1aSopenharmony_ci * 14cabdff1aSopenharmony_ci * You should have received a copy of the GNU General Public License along 15cabdff1aSopenharmony_ci * with FFmpeg; if not, write to the Free Software Foundation, Inc., 16cabdff1aSopenharmony_ci * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 17cabdff1aSopenharmony_ci */ 18cabdff1aSopenharmony_ci 19cabdff1aSopenharmony_ci#include "libavcodec/aacpsdsp.h" 20cabdff1aSopenharmony_ci#include "libavutil/intfloat.h" 21cabdff1aSopenharmony_ci#include "libavutil/mem_internal.h" 22cabdff1aSopenharmony_ci 23cabdff1aSopenharmony_ci#include "checkasm.h" 24cabdff1aSopenharmony_ci 25cabdff1aSopenharmony_ci#define N 32 26cabdff1aSopenharmony_ci#define STRIDE 128 27cabdff1aSopenharmony_ci#define BUF_SIZE (N * STRIDE) 28cabdff1aSopenharmony_ci 29cabdff1aSopenharmony_ci#define randomize(buf, len) do { \ 30cabdff1aSopenharmony_ci int i; \ 31cabdff1aSopenharmony_ci for (i = 0; i < len; i++) { \ 32cabdff1aSopenharmony_ci const INTFLOAT f = (INTFLOAT)rnd() / UINT_MAX; \ 33cabdff1aSopenharmony_ci (buf)[i] = f; \ 34cabdff1aSopenharmony_ci } \ 35cabdff1aSopenharmony_ci} while (0) 36cabdff1aSopenharmony_ci 37cabdff1aSopenharmony_ci#define EPS 0.005 38cabdff1aSopenharmony_ci 39cabdff1aSopenharmony_cistatic void clear_less_significant_bits(INTFLOAT *buf, int len, int bits) 40cabdff1aSopenharmony_ci{ 41cabdff1aSopenharmony_ci int i; 42cabdff1aSopenharmony_ci for (i = 0; i < len; i++) { 43cabdff1aSopenharmony_ci union av_intfloat32 u = { .f = buf[i] }; 44cabdff1aSopenharmony_ci u.i &= (0xffffffff << bits); 45cabdff1aSopenharmony_ci buf[i] = u.f; 46cabdff1aSopenharmony_ci } 47cabdff1aSopenharmony_ci} 48cabdff1aSopenharmony_ci 49cabdff1aSopenharmony_cistatic void test_add_squares(void) 50cabdff1aSopenharmony_ci{ 51cabdff1aSopenharmony_ci LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE]); 52cabdff1aSopenharmony_ci LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE]); 53cabdff1aSopenharmony_ci LOCAL_ALIGNED_16(INTFLOAT, src, [BUF_SIZE], [2]); 54cabdff1aSopenharmony_ci 55cabdff1aSopenharmony_ci declare_func(void, INTFLOAT *dst, 56cabdff1aSopenharmony_ci const INTFLOAT (*src)[2], int n); 57cabdff1aSopenharmony_ci 58cabdff1aSopenharmony_ci randomize((INTFLOAT *)src, BUF_SIZE * 2); 59cabdff1aSopenharmony_ci randomize(dst0, BUF_SIZE); 60cabdff1aSopenharmony_ci memcpy(dst1, dst0, BUF_SIZE * sizeof(INTFLOAT)); 61cabdff1aSopenharmony_ci call_ref(dst0, src, BUF_SIZE); 62cabdff1aSopenharmony_ci call_new(dst1, src, BUF_SIZE); 63cabdff1aSopenharmony_ci if (!float_near_abs_eps_array(dst0, dst1, EPS, BUF_SIZE)) 64cabdff1aSopenharmony_ci fail(); 65cabdff1aSopenharmony_ci bench_new(dst1, src, BUF_SIZE); 66cabdff1aSopenharmony_ci} 67cabdff1aSopenharmony_ci 68cabdff1aSopenharmony_cistatic void test_mul_pair_single(void) 69cabdff1aSopenharmony_ci{ 70cabdff1aSopenharmony_ci LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE], [2]); 71cabdff1aSopenharmony_ci LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE], [2]); 72cabdff1aSopenharmony_ci LOCAL_ALIGNED_16(INTFLOAT, src0, [BUF_SIZE], [2]); 73cabdff1aSopenharmony_ci LOCAL_ALIGNED_16(INTFLOAT, src1, [BUF_SIZE]); 74cabdff1aSopenharmony_ci 75cabdff1aSopenharmony_ci declare_func(void, INTFLOAT (*dst)[2], 76cabdff1aSopenharmony_ci INTFLOAT (*src0)[2], INTFLOAT *src1, int n); 77cabdff1aSopenharmony_ci 78cabdff1aSopenharmony_ci randomize((INTFLOAT *)src0, BUF_SIZE * 2); 79cabdff1aSopenharmony_ci randomize(src1, BUF_SIZE); 80cabdff1aSopenharmony_ci call_ref(dst0, src0, src1, BUF_SIZE); 81cabdff1aSopenharmony_ci call_new(dst1, src0, src1, BUF_SIZE); 82cabdff1aSopenharmony_ci if (!float_near_abs_eps_array((float *)dst0, (float *)dst1, EPS, BUF_SIZE * 2)) 83cabdff1aSopenharmony_ci fail(); 84cabdff1aSopenharmony_ci bench_new(dst1, src0, src1, BUF_SIZE); 85cabdff1aSopenharmony_ci} 86cabdff1aSopenharmony_ci 87cabdff1aSopenharmony_cistatic void test_hybrid_analysis(void) 88cabdff1aSopenharmony_ci{ 89cabdff1aSopenharmony_ci LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE], [2]); 90cabdff1aSopenharmony_ci LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE], [2]); 91cabdff1aSopenharmony_ci LOCAL_ALIGNED_16(INTFLOAT, in, [13], [2]); 92cabdff1aSopenharmony_ci LOCAL_ALIGNED_16(INTFLOAT, filter, [N], [8][2]); 93cabdff1aSopenharmony_ci 94cabdff1aSopenharmony_ci declare_func(void, INTFLOAT (*out)[2], INTFLOAT (*in)[2], 95cabdff1aSopenharmony_ci const INTFLOAT (*filter)[8][2], 96cabdff1aSopenharmony_ci ptrdiff_t stride, int n); 97cabdff1aSopenharmony_ci 98cabdff1aSopenharmony_ci randomize((INTFLOAT *)in, 13 * 2); 99cabdff1aSopenharmony_ci randomize((INTFLOAT *)filter, N * 8 * 2); 100cabdff1aSopenharmony_ci 101cabdff1aSopenharmony_ci randomize((INTFLOAT *)dst0, BUF_SIZE * 2); 102cabdff1aSopenharmony_ci memcpy(dst1, dst0, BUF_SIZE * 2 * sizeof(INTFLOAT)); 103cabdff1aSopenharmony_ci 104cabdff1aSopenharmony_ci call_ref(dst0, in, filter, STRIDE, N); 105cabdff1aSopenharmony_ci call_new(dst1, in, filter, STRIDE, N); 106cabdff1aSopenharmony_ci 107cabdff1aSopenharmony_ci if (!float_near_abs_eps_array((float *)dst0, (float *)dst1, EPS, BUF_SIZE * 2)) 108cabdff1aSopenharmony_ci fail(); 109cabdff1aSopenharmony_ci bench_new(dst1, in, filter, STRIDE, N); 110cabdff1aSopenharmony_ci} 111cabdff1aSopenharmony_ci 112cabdff1aSopenharmony_cistatic void test_hybrid_analysis_ileave(void) 113cabdff1aSopenharmony_ci{ 114cabdff1aSopenharmony_ci LOCAL_ALIGNED_16(INTFLOAT, in, [2], [38][64]); 115cabdff1aSopenharmony_ci LOCAL_ALIGNED_16(INTFLOAT, out0, [91], [32][2]); 116cabdff1aSopenharmony_ci LOCAL_ALIGNED_16(INTFLOAT, out1, [91], [32][2]); 117cabdff1aSopenharmony_ci 118cabdff1aSopenharmony_ci declare_func(void, INTFLOAT (*out)[32][2], INTFLOAT L[2][38][64], 119cabdff1aSopenharmony_ci int i, int len); 120cabdff1aSopenharmony_ci 121cabdff1aSopenharmony_ci randomize((INTFLOAT *)out0, 91 * 32 * 2); 122cabdff1aSopenharmony_ci randomize((INTFLOAT *)in, 2 * 38 * 64); 123cabdff1aSopenharmony_ci memcpy(out1, out0, 91 * 32 * 2 * sizeof(INTFLOAT)); 124cabdff1aSopenharmony_ci 125cabdff1aSopenharmony_ci /* len is hardcoded to 32 as that's the only value used in 126cabdff1aSopenharmony_ci libavcodec. asm functions are likely to be optimized 127cabdff1aSopenharmony_ci hardcoding this value in their loops and could fail with 128cabdff1aSopenharmony_ci anything else. 129cabdff1aSopenharmony_ci i is hardcoded to the two values currently used by the 130cabdff1aSopenharmony_ci aac decoder because the arm neon implementation is 131cabdff1aSopenharmony_ci micro-optimized for them and will fail for almost every 132cabdff1aSopenharmony_ci other value. */ 133cabdff1aSopenharmony_ci call_ref(out0, in, 3, 32); 134cabdff1aSopenharmony_ci call_new(out1, in, 3, 32); 135cabdff1aSopenharmony_ci 136cabdff1aSopenharmony_ci /* the function just moves data around, so memcmp is enough */ 137cabdff1aSopenharmony_ci if (memcmp(out0, out1, 91 * 32 * 2 * sizeof(INTFLOAT))) 138cabdff1aSopenharmony_ci fail(); 139cabdff1aSopenharmony_ci 140cabdff1aSopenharmony_ci call_ref(out0, in, 5, 32); 141cabdff1aSopenharmony_ci call_new(out1, in, 5, 32); 142cabdff1aSopenharmony_ci 143cabdff1aSopenharmony_ci if (memcmp(out0, out1, 91 * 32 * 2 * sizeof(INTFLOAT))) 144cabdff1aSopenharmony_ci fail(); 145cabdff1aSopenharmony_ci 146cabdff1aSopenharmony_ci bench_new(out1, in, 3, 32); 147cabdff1aSopenharmony_ci} 148cabdff1aSopenharmony_ci 149cabdff1aSopenharmony_cistatic void test_hybrid_synthesis_deint(void) 150cabdff1aSopenharmony_ci{ 151cabdff1aSopenharmony_ci LOCAL_ALIGNED_16(INTFLOAT, out0, [2], [38][64]); 152cabdff1aSopenharmony_ci LOCAL_ALIGNED_16(INTFLOAT, out1, [2], [38][64]); 153cabdff1aSopenharmony_ci LOCAL_ALIGNED_16(INTFLOAT, in, [91], [32][2]); 154cabdff1aSopenharmony_ci 155cabdff1aSopenharmony_ci declare_func(void, INTFLOAT out[2][38][64], INTFLOAT (*in)[32][2], 156cabdff1aSopenharmony_ci int i, int len); 157cabdff1aSopenharmony_ci 158cabdff1aSopenharmony_ci randomize((INTFLOAT *)in, 91 * 32 * 2); 159cabdff1aSopenharmony_ci randomize((INTFLOAT *)out0, 2 * 38 * 64); 160cabdff1aSopenharmony_ci memcpy(out1, out0, 2 * 38 * 64 * sizeof(INTFLOAT)); 161cabdff1aSopenharmony_ci 162cabdff1aSopenharmony_ci /* len is hardcoded to 32 as that's the only value used in 163cabdff1aSopenharmony_ci libavcodec. asm functions are likely to be optimized 164cabdff1aSopenharmony_ci hardcoding this value in their loops and could fail with 165cabdff1aSopenharmony_ci anything else. 166cabdff1aSopenharmony_ci i is hardcoded to the two values currently used by the 167cabdff1aSopenharmony_ci aac decoder because the arm neon implementation is 168cabdff1aSopenharmony_ci micro-optimized for them and will fail for almost every 169cabdff1aSopenharmony_ci other value. */ 170cabdff1aSopenharmony_ci call_ref(out0, in, 3, 32); 171cabdff1aSopenharmony_ci call_new(out1, in, 3, 32); 172cabdff1aSopenharmony_ci 173cabdff1aSopenharmony_ci /* the function just moves data around, so memcmp is enough */ 174cabdff1aSopenharmony_ci if (memcmp(out0, out1, 2 * 38 * 64 * sizeof(INTFLOAT))) 175cabdff1aSopenharmony_ci fail(); 176cabdff1aSopenharmony_ci 177cabdff1aSopenharmony_ci call_ref(out0, in, 5, 32); 178cabdff1aSopenharmony_ci call_new(out1, in, 5, 32); 179cabdff1aSopenharmony_ci 180cabdff1aSopenharmony_ci if (memcmp(out0, out1, 2 * 38 * 64 * sizeof(INTFLOAT))) 181cabdff1aSopenharmony_ci fail(); 182cabdff1aSopenharmony_ci 183cabdff1aSopenharmony_ci bench_new(out1, in, 3, 32); 184cabdff1aSopenharmony_ci} 185cabdff1aSopenharmony_ci 186cabdff1aSopenharmony_cistatic void test_stereo_interpolate(PSDSPContext *psdsp) 187cabdff1aSopenharmony_ci{ 188cabdff1aSopenharmony_ci int i; 189cabdff1aSopenharmony_ci LOCAL_ALIGNED_16(INTFLOAT, l, [BUF_SIZE], [2]); 190cabdff1aSopenharmony_ci LOCAL_ALIGNED_16(INTFLOAT, r, [BUF_SIZE], [2]); 191cabdff1aSopenharmony_ci LOCAL_ALIGNED_16(INTFLOAT, l0, [BUF_SIZE], [2]); 192cabdff1aSopenharmony_ci LOCAL_ALIGNED_16(INTFLOAT, r0, [BUF_SIZE], [2]); 193cabdff1aSopenharmony_ci LOCAL_ALIGNED_16(INTFLOAT, l1, [BUF_SIZE], [2]); 194cabdff1aSopenharmony_ci LOCAL_ALIGNED_16(INTFLOAT, r1, [BUF_SIZE], [2]); 195cabdff1aSopenharmony_ci LOCAL_ALIGNED_16(INTFLOAT, h, [2], [4]); 196cabdff1aSopenharmony_ci LOCAL_ALIGNED_16(INTFLOAT, h_step, [2], [4]); 197cabdff1aSopenharmony_ci 198cabdff1aSopenharmony_ci declare_func(void, INTFLOAT (*l)[2], INTFLOAT (*r)[2], 199cabdff1aSopenharmony_ci INTFLOAT h[2][4], INTFLOAT h_step[2][4], int len); 200cabdff1aSopenharmony_ci 201cabdff1aSopenharmony_ci randomize((INTFLOAT *)l, BUF_SIZE * 2); 202cabdff1aSopenharmony_ci randomize((INTFLOAT *)r, BUF_SIZE * 2); 203cabdff1aSopenharmony_ci 204cabdff1aSopenharmony_ci for (i = 0; i < 2; i++) { 205cabdff1aSopenharmony_ci if (check_func(psdsp->stereo_interpolate[i], "ps_stereo_interpolate%s", i ? "_ipdopd" : "")) { 206cabdff1aSopenharmony_ci memcpy(l0, l, BUF_SIZE * 2 * sizeof(INTFLOAT)); 207cabdff1aSopenharmony_ci memcpy(l1, l, BUF_SIZE * 2 * sizeof(INTFLOAT)); 208cabdff1aSopenharmony_ci memcpy(r0, r, BUF_SIZE * 2 * sizeof(INTFLOAT)); 209cabdff1aSopenharmony_ci memcpy(r1, r, BUF_SIZE * 2 * sizeof(INTFLOAT)); 210cabdff1aSopenharmony_ci 211cabdff1aSopenharmony_ci randomize((INTFLOAT *)h, 2 * 4); 212cabdff1aSopenharmony_ci randomize((INTFLOAT *)h_step, 2 * 4); 213cabdff1aSopenharmony_ci // Clear the least significant 14 bits of h_step, to avoid 214cabdff1aSopenharmony_ci // divergence when accumulating h_step BUF_SIZE times into 215cabdff1aSopenharmony_ci // a float variable which may or may not have extra intermediate 216cabdff1aSopenharmony_ci // precision. Therefore clear roughly log2(BUF_SIZE) less 217cabdff1aSopenharmony_ci // significant bits, to get the same result regardless of any 218cabdff1aSopenharmony_ci // extra precision in the accumulator. 219cabdff1aSopenharmony_ci clear_less_significant_bits((INTFLOAT *)h_step, 2 * 4, 14); 220cabdff1aSopenharmony_ci 221cabdff1aSopenharmony_ci call_ref(l0, r0, h, h_step, BUF_SIZE); 222cabdff1aSopenharmony_ci call_new(l1, r1, h, h_step, BUF_SIZE); 223cabdff1aSopenharmony_ci if (!float_near_abs_eps_array((float *)l0, (float *)l1, EPS, BUF_SIZE * 2) || 224cabdff1aSopenharmony_ci !float_near_abs_eps_array((float *)r0, (float *)r1, EPS, BUF_SIZE * 2)) 225cabdff1aSopenharmony_ci fail(); 226cabdff1aSopenharmony_ci 227cabdff1aSopenharmony_ci memcpy(l1, l, BUF_SIZE * 2 * sizeof(INTFLOAT)); 228cabdff1aSopenharmony_ci memcpy(r1, r, BUF_SIZE * 2 * sizeof(INTFLOAT)); 229cabdff1aSopenharmony_ci bench_new(l1, r1, h, h_step, BUF_SIZE); 230cabdff1aSopenharmony_ci } 231cabdff1aSopenharmony_ci } 232cabdff1aSopenharmony_ci} 233cabdff1aSopenharmony_ci 234cabdff1aSopenharmony_civoid checkasm_check_aacpsdsp(void) 235cabdff1aSopenharmony_ci{ 236cabdff1aSopenharmony_ci PSDSPContext psdsp; 237cabdff1aSopenharmony_ci 238cabdff1aSopenharmony_ci ff_psdsp_init(&psdsp); 239cabdff1aSopenharmony_ci 240cabdff1aSopenharmony_ci if (check_func(psdsp.add_squares, "ps_add_squares")) 241cabdff1aSopenharmony_ci test_add_squares(); 242cabdff1aSopenharmony_ci report("add_squares"); 243cabdff1aSopenharmony_ci 244cabdff1aSopenharmony_ci if (check_func(psdsp.mul_pair_single, "ps_mul_pair_single")) 245cabdff1aSopenharmony_ci test_mul_pair_single(); 246cabdff1aSopenharmony_ci report("mul_pair_single"); 247cabdff1aSopenharmony_ci 248cabdff1aSopenharmony_ci if (check_func(psdsp.hybrid_analysis, "ps_hybrid_analysis")) 249cabdff1aSopenharmony_ci test_hybrid_analysis(); 250cabdff1aSopenharmony_ci report("hybrid_analysis"); 251cabdff1aSopenharmony_ci 252cabdff1aSopenharmony_ci if (check_func(psdsp.hybrid_analysis_ileave, "ps_hybrid_analysis_ileave")) 253cabdff1aSopenharmony_ci test_hybrid_analysis_ileave(); 254cabdff1aSopenharmony_ci report("hybrid_analysis_ileave"); 255cabdff1aSopenharmony_ci 256cabdff1aSopenharmony_ci if (check_func(psdsp.hybrid_synthesis_deint, "ps_hybrid_synthesis_deint")) 257cabdff1aSopenharmony_ci test_hybrid_synthesis_deint(); 258cabdff1aSopenharmony_ci report("hybrid_synthesis_deint"); 259cabdff1aSopenharmony_ci 260cabdff1aSopenharmony_ci test_stereo_interpolate(&psdsp); 261cabdff1aSopenharmony_ci report("stereo_interpolate"); 262cabdff1aSopenharmony_ci} 263