1/* 2 * This file is part of FFmpeg. 3 * 4 * FFmpeg is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * FFmpeg is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License along 15 * with FFmpeg; if not, write to the Free Software Foundation, Inc., 16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 17 */ 18 19#include "libavcodec/aacpsdsp.h" 20#include "libavutil/intfloat.h" 21#include "libavutil/mem_internal.h" 22 23#include "checkasm.h" 24 25#define N 32 26#define STRIDE 128 27#define BUF_SIZE (N * STRIDE) 28 29#define randomize(buf, len) do { \ 30 int i; \ 31 for (i = 0; i < len; i++) { \ 32 const INTFLOAT f = (INTFLOAT)rnd() / UINT_MAX; \ 33 (buf)[i] = f; \ 34 } \ 35} while (0) 36 37#define EPS 0.005 38 39static void clear_less_significant_bits(INTFLOAT *buf, int len, int bits) 40{ 41 int i; 42 for (i = 0; i < len; i++) { 43 union av_intfloat32 u = { .f = buf[i] }; 44 u.i &= (0xffffffff << bits); 45 buf[i] = u.f; 46 } 47} 48 49static void test_add_squares(void) 50{ 51 LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE]); 52 LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE]); 53 LOCAL_ALIGNED_16(INTFLOAT, src, [BUF_SIZE], [2]); 54 55 declare_func(void, INTFLOAT *dst, 56 const INTFLOAT (*src)[2], int n); 57 58 randomize((INTFLOAT *)src, BUF_SIZE * 2); 59 randomize(dst0, BUF_SIZE); 60 memcpy(dst1, dst0, BUF_SIZE * sizeof(INTFLOAT)); 61 call_ref(dst0, src, BUF_SIZE); 62 call_new(dst1, src, BUF_SIZE); 63 if (!float_near_abs_eps_array(dst0, dst1, EPS, BUF_SIZE)) 64 fail(); 65 bench_new(dst1, src, BUF_SIZE); 66} 67 68static void test_mul_pair_single(void) 69{ 70 LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE], [2]); 71 LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE], [2]); 72 LOCAL_ALIGNED_16(INTFLOAT, src0, [BUF_SIZE], [2]); 73 LOCAL_ALIGNED_16(INTFLOAT, src1, [BUF_SIZE]); 74 75 declare_func(void, INTFLOAT (*dst)[2], 76 INTFLOAT (*src0)[2], INTFLOAT *src1, int n); 77 78 randomize((INTFLOAT *)src0, BUF_SIZE * 2); 79 randomize(src1, BUF_SIZE); 80 call_ref(dst0, src0, src1, BUF_SIZE); 81 call_new(dst1, src0, src1, BUF_SIZE); 82 if (!float_near_abs_eps_array((float *)dst0, (float *)dst1, EPS, BUF_SIZE * 2)) 83 fail(); 84 bench_new(dst1, src0, src1, BUF_SIZE); 85} 86 87static void test_hybrid_analysis(void) 88{ 89 LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE], [2]); 90 LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE], [2]); 91 LOCAL_ALIGNED_16(INTFLOAT, in, [13], [2]); 92 LOCAL_ALIGNED_16(INTFLOAT, filter, [N], [8][2]); 93 94 declare_func(void, INTFLOAT (*out)[2], INTFLOAT (*in)[2], 95 const INTFLOAT (*filter)[8][2], 96 ptrdiff_t stride, int n); 97 98 randomize((INTFLOAT *)in, 13 * 2); 99 randomize((INTFLOAT *)filter, N * 8 * 2); 100 101 randomize((INTFLOAT *)dst0, BUF_SIZE * 2); 102 memcpy(dst1, dst0, BUF_SIZE * 2 * sizeof(INTFLOAT)); 103 104 call_ref(dst0, in, filter, STRIDE, N); 105 call_new(dst1, in, filter, STRIDE, N); 106 107 if (!float_near_abs_eps_array((float *)dst0, (float *)dst1, EPS, BUF_SIZE * 2)) 108 fail(); 109 bench_new(dst1, in, filter, STRIDE, N); 110} 111 112static void test_hybrid_analysis_ileave(void) 113{ 114 LOCAL_ALIGNED_16(INTFLOAT, in, [2], [38][64]); 115 LOCAL_ALIGNED_16(INTFLOAT, out0, [91], [32][2]); 116 LOCAL_ALIGNED_16(INTFLOAT, out1, [91], [32][2]); 117 118 declare_func(void, INTFLOAT (*out)[32][2], INTFLOAT L[2][38][64], 119 int i, int len); 120 121 randomize((INTFLOAT *)out0, 91 * 32 * 2); 122 randomize((INTFLOAT *)in, 2 * 38 * 64); 123 memcpy(out1, out0, 91 * 32 * 2 * sizeof(INTFLOAT)); 124 125 /* len is hardcoded to 32 as that's the only value used in 126 libavcodec. asm functions are likely to be optimized 127 hardcoding this value in their loops and could fail with 128 anything else. 129 i is hardcoded to the two values currently used by the 130 aac decoder because the arm neon implementation is 131 micro-optimized for them and will fail for almost every 132 other value. */ 133 call_ref(out0, in, 3, 32); 134 call_new(out1, in, 3, 32); 135 136 /* the function just moves data around, so memcmp is enough */ 137 if (memcmp(out0, out1, 91 * 32 * 2 * sizeof(INTFLOAT))) 138 fail(); 139 140 call_ref(out0, in, 5, 32); 141 call_new(out1, in, 5, 32); 142 143 if (memcmp(out0, out1, 91 * 32 * 2 * sizeof(INTFLOAT))) 144 fail(); 145 146 bench_new(out1, in, 3, 32); 147} 148 149static void test_hybrid_synthesis_deint(void) 150{ 151 LOCAL_ALIGNED_16(INTFLOAT, out0, [2], [38][64]); 152 LOCAL_ALIGNED_16(INTFLOAT, out1, [2], [38][64]); 153 LOCAL_ALIGNED_16(INTFLOAT, in, [91], [32][2]); 154 155 declare_func(void, INTFLOAT out[2][38][64], INTFLOAT (*in)[32][2], 156 int i, int len); 157 158 randomize((INTFLOAT *)in, 91 * 32 * 2); 159 randomize((INTFLOAT *)out0, 2 * 38 * 64); 160 memcpy(out1, out0, 2 * 38 * 64 * sizeof(INTFLOAT)); 161 162 /* len is hardcoded to 32 as that's the only value used in 163 libavcodec. asm functions are likely to be optimized 164 hardcoding this value in their loops and could fail with 165 anything else. 166 i is hardcoded to the two values currently used by the 167 aac decoder because the arm neon implementation is 168 micro-optimized for them and will fail for almost every 169 other value. */ 170 call_ref(out0, in, 3, 32); 171 call_new(out1, in, 3, 32); 172 173 /* the function just moves data around, so memcmp is enough */ 174 if (memcmp(out0, out1, 2 * 38 * 64 * sizeof(INTFLOAT))) 175 fail(); 176 177 call_ref(out0, in, 5, 32); 178 call_new(out1, in, 5, 32); 179 180 if (memcmp(out0, out1, 2 * 38 * 64 * sizeof(INTFLOAT))) 181 fail(); 182 183 bench_new(out1, in, 3, 32); 184} 185 186static void test_stereo_interpolate(PSDSPContext *psdsp) 187{ 188 int i; 189 LOCAL_ALIGNED_16(INTFLOAT, l, [BUF_SIZE], [2]); 190 LOCAL_ALIGNED_16(INTFLOAT, r, [BUF_SIZE], [2]); 191 LOCAL_ALIGNED_16(INTFLOAT, l0, [BUF_SIZE], [2]); 192 LOCAL_ALIGNED_16(INTFLOAT, r0, [BUF_SIZE], [2]); 193 LOCAL_ALIGNED_16(INTFLOAT, l1, [BUF_SIZE], [2]); 194 LOCAL_ALIGNED_16(INTFLOAT, r1, [BUF_SIZE], [2]); 195 LOCAL_ALIGNED_16(INTFLOAT, h, [2], [4]); 196 LOCAL_ALIGNED_16(INTFLOAT, h_step, [2], [4]); 197 198 declare_func(void, INTFLOAT (*l)[2], INTFLOAT (*r)[2], 199 INTFLOAT h[2][4], INTFLOAT h_step[2][4], int len); 200 201 randomize((INTFLOAT *)l, BUF_SIZE * 2); 202 randomize((INTFLOAT *)r, BUF_SIZE * 2); 203 204 for (i = 0; i < 2; i++) { 205 if (check_func(psdsp->stereo_interpolate[i], "ps_stereo_interpolate%s", i ? "_ipdopd" : "")) { 206 memcpy(l0, l, BUF_SIZE * 2 * sizeof(INTFLOAT)); 207 memcpy(l1, l, BUF_SIZE * 2 * sizeof(INTFLOAT)); 208 memcpy(r0, r, BUF_SIZE * 2 * sizeof(INTFLOAT)); 209 memcpy(r1, r, BUF_SIZE * 2 * sizeof(INTFLOAT)); 210 211 randomize((INTFLOAT *)h, 2 * 4); 212 randomize((INTFLOAT *)h_step, 2 * 4); 213 // Clear the least significant 14 bits of h_step, to avoid 214 // divergence when accumulating h_step BUF_SIZE times into 215 // a float variable which may or may not have extra intermediate 216 // precision. Therefore clear roughly log2(BUF_SIZE) less 217 // significant bits, to get the same result regardless of any 218 // extra precision in the accumulator. 219 clear_less_significant_bits((INTFLOAT *)h_step, 2 * 4, 14); 220 221 call_ref(l0, r0, h, h_step, BUF_SIZE); 222 call_new(l1, r1, h, h_step, BUF_SIZE); 223 if (!float_near_abs_eps_array((float *)l0, (float *)l1, EPS, BUF_SIZE * 2) || 224 !float_near_abs_eps_array((float *)r0, (float *)r1, EPS, BUF_SIZE * 2)) 225 fail(); 226 227 memcpy(l1, l, BUF_SIZE * 2 * sizeof(INTFLOAT)); 228 memcpy(r1, r, BUF_SIZE * 2 * sizeof(INTFLOAT)); 229 bench_new(l1, r1, h, h_step, BUF_SIZE); 230 } 231 } 232} 233 234void checkasm_check_aacpsdsp(void) 235{ 236 PSDSPContext psdsp; 237 238 ff_psdsp_init(&psdsp); 239 240 if (check_func(psdsp.add_squares, "ps_add_squares")) 241 test_add_squares(); 242 report("add_squares"); 243 244 if (check_func(psdsp.mul_pair_single, "ps_mul_pair_single")) 245 test_mul_pair_single(); 246 report("mul_pair_single"); 247 248 if (check_func(psdsp.hybrid_analysis, "ps_hybrid_analysis")) 249 test_hybrid_analysis(); 250 report("hybrid_analysis"); 251 252 if (check_func(psdsp.hybrid_analysis_ileave, "ps_hybrid_analysis_ileave")) 253 test_hybrid_analysis_ileave(); 254 report("hybrid_analysis_ileave"); 255 256 if (check_func(psdsp.hybrid_synthesis_deint, "ps_hybrid_synthesis_deint")) 257 test_hybrid_synthesis_deint(); 258 report("hybrid_synthesis_deint"); 259 260 test_stereo_interpolate(&psdsp); 261 report("stereo_interpolate"); 262} 263