1/*
2 * This file is part of FFmpeg.
3 *
4 * FFmpeg is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License along
15 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17 */
18
19#include "libavcodec/aacpsdsp.h"
20#include "libavutil/intfloat.h"
21#include "libavutil/mem_internal.h"
22
23#include "checkasm.h"
24
25#define N 32
26#define STRIDE 128
27#define BUF_SIZE (N * STRIDE)
28
29#define randomize(buf, len) do {                                \
30    int i;                                                      \
31    for (i = 0; i < len; i++) {                                 \
32        const INTFLOAT f = (INTFLOAT)rnd() / UINT_MAX;          \
33        (buf)[i] = f;                                           \
34    }                                                           \
35} while (0)
36
37#define EPS 0.005
38
39static void clear_less_significant_bits(INTFLOAT *buf, int len, int bits)
40{
41    int i;
42    for (i = 0; i < len; i++) {
43        union av_intfloat32 u = { .f = buf[i] };
44        u.i &= (0xffffffff << bits);
45        buf[i] = u.f;
46    }
47}
48
49static void test_add_squares(void)
50{
51    LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE]);
52    LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE]);
53    LOCAL_ALIGNED_16(INTFLOAT, src, [BUF_SIZE], [2]);
54
55    declare_func(void, INTFLOAT *dst,
56                 const INTFLOAT (*src)[2], int n);
57
58    randomize((INTFLOAT *)src, BUF_SIZE * 2);
59    randomize(dst0, BUF_SIZE);
60    memcpy(dst1, dst0, BUF_SIZE * sizeof(INTFLOAT));
61    call_ref(dst0, src, BUF_SIZE);
62    call_new(dst1, src, BUF_SIZE);
63    if (!float_near_abs_eps_array(dst0, dst1, EPS, BUF_SIZE))
64        fail();
65    bench_new(dst1, src, BUF_SIZE);
66}
67
68static void test_mul_pair_single(void)
69{
70    LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE], [2]);
71    LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE], [2]);
72    LOCAL_ALIGNED_16(INTFLOAT, src0, [BUF_SIZE], [2]);
73    LOCAL_ALIGNED_16(INTFLOAT, src1, [BUF_SIZE]);
74
75    declare_func(void, INTFLOAT (*dst)[2],
76                       INTFLOAT (*src0)[2], INTFLOAT *src1, int n);
77
78    randomize((INTFLOAT *)src0, BUF_SIZE * 2);
79    randomize(src1, BUF_SIZE);
80    call_ref(dst0, src0, src1, BUF_SIZE);
81    call_new(dst1, src0, src1, BUF_SIZE);
82    if (!float_near_abs_eps_array((float *)dst0, (float *)dst1, EPS, BUF_SIZE * 2))
83        fail();
84    bench_new(dst1, src0, src1, BUF_SIZE);
85}
86
87static void test_hybrid_analysis(void)
88{
89    LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE], [2]);
90    LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE], [2]);
91    LOCAL_ALIGNED_16(INTFLOAT, in, [13], [2]);
92    LOCAL_ALIGNED_16(INTFLOAT, filter, [N], [8][2]);
93
94    declare_func(void, INTFLOAT (*out)[2], INTFLOAT (*in)[2],
95                 const INTFLOAT (*filter)[8][2],
96                 ptrdiff_t stride, int n);
97
98    randomize((INTFLOAT *)in, 13 * 2);
99    randomize((INTFLOAT *)filter, N * 8 * 2);
100
101    randomize((INTFLOAT *)dst0, BUF_SIZE * 2);
102    memcpy(dst1, dst0, BUF_SIZE * 2 * sizeof(INTFLOAT));
103
104    call_ref(dst0, in, filter, STRIDE, N);
105    call_new(dst1, in, filter, STRIDE, N);
106
107    if (!float_near_abs_eps_array((float *)dst0, (float *)dst1, EPS, BUF_SIZE * 2))
108        fail();
109    bench_new(dst1, in, filter, STRIDE, N);
110}
111
112static void test_hybrid_analysis_ileave(void)
113{
114    LOCAL_ALIGNED_16(INTFLOAT, in,   [2], [38][64]);
115    LOCAL_ALIGNED_16(INTFLOAT, out0, [91], [32][2]);
116    LOCAL_ALIGNED_16(INTFLOAT, out1, [91], [32][2]);
117
118    declare_func(void, INTFLOAT (*out)[32][2], INTFLOAT L[2][38][64],
119                       int i, int len);
120
121    randomize((INTFLOAT *)out0, 91 * 32 * 2);
122    randomize((INTFLOAT *)in,    2 * 38 * 64);
123    memcpy(out1, out0, 91 * 32 * 2 * sizeof(INTFLOAT));
124
125    /* len is hardcoded to 32 as that's the only value used in
126       libavcodec. asm functions are likely to be optimized
127       hardcoding this value in their loops and could fail with
128       anything else.
129       i is hardcoded to the two values currently used by the
130       aac decoder because the arm neon implementation is
131       micro-optimized for them and will fail for almost every
132       other value. */
133    call_ref(out0, in, 3, 32);
134    call_new(out1, in, 3, 32);
135
136    /* the function just moves data around, so memcmp is enough */
137    if (memcmp(out0, out1, 91 * 32 * 2 * sizeof(INTFLOAT)))
138        fail();
139
140    call_ref(out0, in, 5, 32);
141    call_new(out1, in, 5, 32);
142
143    if (memcmp(out0, out1, 91 * 32 * 2 * sizeof(INTFLOAT)))
144        fail();
145
146    bench_new(out1, in, 3, 32);
147}
148
149static void test_hybrid_synthesis_deint(void)
150{
151    LOCAL_ALIGNED_16(INTFLOAT, out0, [2], [38][64]);
152    LOCAL_ALIGNED_16(INTFLOAT, out1, [2], [38][64]);
153    LOCAL_ALIGNED_16(INTFLOAT, in,  [91], [32][2]);
154
155    declare_func(void, INTFLOAT out[2][38][64], INTFLOAT (*in)[32][2],
156                       int i, int len);
157
158    randomize((INTFLOAT *)in,  91 * 32 * 2);
159    randomize((INTFLOAT *)out0, 2 * 38 * 64);
160    memcpy(out1, out0, 2 * 38 * 64 * sizeof(INTFLOAT));
161
162    /* len is hardcoded to 32 as that's the only value used in
163       libavcodec. asm functions are likely to be optimized
164       hardcoding this value in their loops and could fail with
165       anything else.
166       i is hardcoded to the two values currently used by the
167       aac decoder because the arm neon implementation is
168       micro-optimized for them and will fail for almost every
169       other value. */
170    call_ref(out0, in, 3, 32);
171    call_new(out1, in, 3, 32);
172
173    /* the function just moves data around, so memcmp is enough */
174    if (memcmp(out0, out1, 2 * 38 * 64 * sizeof(INTFLOAT)))
175        fail();
176
177    call_ref(out0, in, 5, 32);
178    call_new(out1, in, 5, 32);
179
180    if (memcmp(out0, out1, 2 * 38 * 64 * sizeof(INTFLOAT)))
181        fail();
182
183    bench_new(out1, in, 3, 32);
184}
185
186static void test_stereo_interpolate(PSDSPContext *psdsp)
187{
188    int i;
189    LOCAL_ALIGNED_16(INTFLOAT, l,  [BUF_SIZE], [2]);
190    LOCAL_ALIGNED_16(INTFLOAT, r,  [BUF_SIZE], [2]);
191    LOCAL_ALIGNED_16(INTFLOAT, l0, [BUF_SIZE], [2]);
192    LOCAL_ALIGNED_16(INTFLOAT, r0, [BUF_SIZE], [2]);
193    LOCAL_ALIGNED_16(INTFLOAT, l1, [BUF_SIZE], [2]);
194    LOCAL_ALIGNED_16(INTFLOAT, r1, [BUF_SIZE], [2]);
195    LOCAL_ALIGNED_16(INTFLOAT, h, [2], [4]);
196    LOCAL_ALIGNED_16(INTFLOAT, h_step, [2], [4]);
197
198    declare_func(void, INTFLOAT (*l)[2], INTFLOAT (*r)[2],
199                       INTFLOAT h[2][4], INTFLOAT h_step[2][4], int len);
200
201    randomize((INTFLOAT *)l, BUF_SIZE * 2);
202    randomize((INTFLOAT *)r, BUF_SIZE * 2);
203
204    for (i = 0; i < 2; i++) {
205        if (check_func(psdsp->stereo_interpolate[i], "ps_stereo_interpolate%s", i ? "_ipdopd" : "")) {
206            memcpy(l0, l, BUF_SIZE * 2 * sizeof(INTFLOAT));
207            memcpy(l1, l, BUF_SIZE * 2 * sizeof(INTFLOAT));
208            memcpy(r0, r, BUF_SIZE * 2 * sizeof(INTFLOAT));
209            memcpy(r1, r, BUF_SIZE * 2 * sizeof(INTFLOAT));
210
211            randomize((INTFLOAT *)h, 2 * 4);
212            randomize((INTFLOAT *)h_step, 2 * 4);
213            // Clear the least significant 14 bits of h_step, to avoid
214            // divergence when accumulating h_step BUF_SIZE times into
215            // a float variable which may or may not have extra intermediate
216            // precision. Therefore clear roughly log2(BUF_SIZE) less
217            // significant bits, to get the same result regardless of any
218            // extra precision in the accumulator.
219            clear_less_significant_bits((INTFLOAT *)h_step, 2 * 4, 14);
220
221            call_ref(l0, r0, h, h_step, BUF_SIZE);
222            call_new(l1, r1, h, h_step, BUF_SIZE);
223            if (!float_near_abs_eps_array((float *)l0, (float *)l1, EPS, BUF_SIZE * 2) ||
224                !float_near_abs_eps_array((float *)r0, (float *)r1, EPS, BUF_SIZE * 2))
225                fail();
226
227            memcpy(l1, l, BUF_SIZE * 2 * sizeof(INTFLOAT));
228            memcpy(r1, r, BUF_SIZE * 2 * sizeof(INTFLOAT));
229            bench_new(l1, r1, h, h_step, BUF_SIZE);
230        }
231    }
232}
233
234void checkasm_check_aacpsdsp(void)
235{
236    PSDSPContext psdsp;
237
238    ff_psdsp_init(&psdsp);
239
240    if (check_func(psdsp.add_squares, "ps_add_squares"))
241        test_add_squares();
242    report("add_squares");
243
244    if (check_func(psdsp.mul_pair_single, "ps_mul_pair_single"))
245        test_mul_pair_single();
246    report("mul_pair_single");
247
248    if (check_func(psdsp.hybrid_analysis, "ps_hybrid_analysis"))
249        test_hybrid_analysis();
250    report("hybrid_analysis");
251
252    if (check_func(psdsp.hybrid_analysis_ileave, "ps_hybrid_analysis_ileave"))
253        test_hybrid_analysis_ileave();
254    report("hybrid_analysis_ileave");
255
256    if (check_func(psdsp.hybrid_synthesis_deint, "ps_hybrid_synthesis_deint"))
257        test_hybrid_synthesis_deint();
258    report("hybrid_synthesis_deint");
259
260    test_stereo_interpolate(&psdsp);
261    report("stereo_interpolate");
262}
263