1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * This file is part of FFmpeg.
3cabdff1aSopenharmony_ci *
4cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or modify
5cabdff1aSopenharmony_ci * it under the terms of the GNU General Public License as published by
6cabdff1aSopenharmony_ci * the Free Software Foundation; either version 2 of the License, or
7cabdff1aSopenharmony_ci * (at your option) any later version.
8cabdff1aSopenharmony_ci *
9cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
10cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
11cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12cabdff1aSopenharmony_ci * GNU General Public License for more details.
13cabdff1aSopenharmony_ci *
14cabdff1aSopenharmony_ci * You should have received a copy of the GNU General Public License along
15cabdff1aSopenharmony_ci * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
16cabdff1aSopenharmony_ci * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17cabdff1aSopenharmony_ci */
18cabdff1aSopenharmony_ci
19cabdff1aSopenharmony_ci#include "libavcodec/aacpsdsp.h"
20cabdff1aSopenharmony_ci#include "libavutil/intfloat.h"
21cabdff1aSopenharmony_ci#include "libavutil/mem_internal.h"
22cabdff1aSopenharmony_ci
23cabdff1aSopenharmony_ci#include "checkasm.h"
24cabdff1aSopenharmony_ci
25cabdff1aSopenharmony_ci#define N 32
26cabdff1aSopenharmony_ci#define STRIDE 128
27cabdff1aSopenharmony_ci#define BUF_SIZE (N * STRIDE)
28cabdff1aSopenharmony_ci
29cabdff1aSopenharmony_ci#define randomize(buf, len) do {                                \
30cabdff1aSopenharmony_ci    int i;                                                      \
31cabdff1aSopenharmony_ci    for (i = 0; i < len; i++) {                                 \
32cabdff1aSopenharmony_ci        const INTFLOAT f = (INTFLOAT)rnd() / UINT_MAX;          \
33cabdff1aSopenharmony_ci        (buf)[i] = f;                                           \
34cabdff1aSopenharmony_ci    }                                                           \
35cabdff1aSopenharmony_ci} while (0)
36cabdff1aSopenharmony_ci
37cabdff1aSopenharmony_ci#define EPS 0.005
38cabdff1aSopenharmony_ci
39cabdff1aSopenharmony_cistatic void clear_less_significant_bits(INTFLOAT *buf, int len, int bits)
40cabdff1aSopenharmony_ci{
41cabdff1aSopenharmony_ci    int i;
42cabdff1aSopenharmony_ci    for (i = 0; i < len; i++) {
43cabdff1aSopenharmony_ci        union av_intfloat32 u = { .f = buf[i] };
44cabdff1aSopenharmony_ci        u.i &= (0xffffffff << bits);
45cabdff1aSopenharmony_ci        buf[i] = u.f;
46cabdff1aSopenharmony_ci    }
47cabdff1aSopenharmony_ci}
48cabdff1aSopenharmony_ci
49cabdff1aSopenharmony_cistatic void test_add_squares(void)
50cabdff1aSopenharmony_ci{
51cabdff1aSopenharmony_ci    LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE]);
52cabdff1aSopenharmony_ci    LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE]);
53cabdff1aSopenharmony_ci    LOCAL_ALIGNED_16(INTFLOAT, src, [BUF_SIZE], [2]);
54cabdff1aSopenharmony_ci
55cabdff1aSopenharmony_ci    declare_func(void, INTFLOAT *dst,
56cabdff1aSopenharmony_ci                 const INTFLOAT (*src)[2], int n);
57cabdff1aSopenharmony_ci
58cabdff1aSopenharmony_ci    randomize((INTFLOAT *)src, BUF_SIZE * 2);
59cabdff1aSopenharmony_ci    randomize(dst0, BUF_SIZE);
60cabdff1aSopenharmony_ci    memcpy(dst1, dst0, BUF_SIZE * sizeof(INTFLOAT));
61cabdff1aSopenharmony_ci    call_ref(dst0, src, BUF_SIZE);
62cabdff1aSopenharmony_ci    call_new(dst1, src, BUF_SIZE);
63cabdff1aSopenharmony_ci    if (!float_near_abs_eps_array(dst0, dst1, EPS, BUF_SIZE))
64cabdff1aSopenharmony_ci        fail();
65cabdff1aSopenharmony_ci    bench_new(dst1, src, BUF_SIZE);
66cabdff1aSopenharmony_ci}
67cabdff1aSopenharmony_ci
68cabdff1aSopenharmony_cistatic void test_mul_pair_single(void)
69cabdff1aSopenharmony_ci{
70cabdff1aSopenharmony_ci    LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE], [2]);
71cabdff1aSopenharmony_ci    LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE], [2]);
72cabdff1aSopenharmony_ci    LOCAL_ALIGNED_16(INTFLOAT, src0, [BUF_SIZE], [2]);
73cabdff1aSopenharmony_ci    LOCAL_ALIGNED_16(INTFLOAT, src1, [BUF_SIZE]);
74cabdff1aSopenharmony_ci
75cabdff1aSopenharmony_ci    declare_func(void, INTFLOAT (*dst)[2],
76cabdff1aSopenharmony_ci                       INTFLOAT (*src0)[2], INTFLOAT *src1, int n);
77cabdff1aSopenharmony_ci
78cabdff1aSopenharmony_ci    randomize((INTFLOAT *)src0, BUF_SIZE * 2);
79cabdff1aSopenharmony_ci    randomize(src1, BUF_SIZE);
80cabdff1aSopenharmony_ci    call_ref(dst0, src0, src1, BUF_SIZE);
81cabdff1aSopenharmony_ci    call_new(dst1, src0, src1, BUF_SIZE);
82cabdff1aSopenharmony_ci    if (!float_near_abs_eps_array((float *)dst0, (float *)dst1, EPS, BUF_SIZE * 2))
83cabdff1aSopenharmony_ci        fail();
84cabdff1aSopenharmony_ci    bench_new(dst1, src0, src1, BUF_SIZE);
85cabdff1aSopenharmony_ci}
86cabdff1aSopenharmony_ci
87cabdff1aSopenharmony_cistatic void test_hybrid_analysis(void)
88cabdff1aSopenharmony_ci{
89cabdff1aSopenharmony_ci    LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE], [2]);
90cabdff1aSopenharmony_ci    LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE], [2]);
91cabdff1aSopenharmony_ci    LOCAL_ALIGNED_16(INTFLOAT, in, [13], [2]);
92cabdff1aSopenharmony_ci    LOCAL_ALIGNED_16(INTFLOAT, filter, [N], [8][2]);
93cabdff1aSopenharmony_ci
94cabdff1aSopenharmony_ci    declare_func(void, INTFLOAT (*out)[2], INTFLOAT (*in)[2],
95cabdff1aSopenharmony_ci                 const INTFLOAT (*filter)[8][2],
96cabdff1aSopenharmony_ci                 ptrdiff_t stride, int n);
97cabdff1aSopenharmony_ci
98cabdff1aSopenharmony_ci    randomize((INTFLOAT *)in, 13 * 2);
99cabdff1aSopenharmony_ci    randomize((INTFLOAT *)filter, N * 8 * 2);
100cabdff1aSopenharmony_ci
101cabdff1aSopenharmony_ci    randomize((INTFLOAT *)dst0, BUF_SIZE * 2);
102cabdff1aSopenharmony_ci    memcpy(dst1, dst0, BUF_SIZE * 2 * sizeof(INTFLOAT));
103cabdff1aSopenharmony_ci
104cabdff1aSopenharmony_ci    call_ref(dst0, in, filter, STRIDE, N);
105cabdff1aSopenharmony_ci    call_new(dst1, in, filter, STRIDE, N);
106cabdff1aSopenharmony_ci
107cabdff1aSopenharmony_ci    if (!float_near_abs_eps_array((float *)dst0, (float *)dst1, EPS, BUF_SIZE * 2))
108cabdff1aSopenharmony_ci        fail();
109cabdff1aSopenharmony_ci    bench_new(dst1, in, filter, STRIDE, N);
110cabdff1aSopenharmony_ci}
111cabdff1aSopenharmony_ci
112cabdff1aSopenharmony_cistatic void test_hybrid_analysis_ileave(void)
113cabdff1aSopenharmony_ci{
114cabdff1aSopenharmony_ci    LOCAL_ALIGNED_16(INTFLOAT, in,   [2], [38][64]);
115cabdff1aSopenharmony_ci    LOCAL_ALIGNED_16(INTFLOAT, out0, [91], [32][2]);
116cabdff1aSopenharmony_ci    LOCAL_ALIGNED_16(INTFLOAT, out1, [91], [32][2]);
117cabdff1aSopenharmony_ci
118cabdff1aSopenharmony_ci    declare_func(void, INTFLOAT (*out)[32][2], INTFLOAT L[2][38][64],
119cabdff1aSopenharmony_ci                       int i, int len);
120cabdff1aSopenharmony_ci
121cabdff1aSopenharmony_ci    randomize((INTFLOAT *)out0, 91 * 32 * 2);
122cabdff1aSopenharmony_ci    randomize((INTFLOAT *)in,    2 * 38 * 64);
123cabdff1aSopenharmony_ci    memcpy(out1, out0, 91 * 32 * 2 * sizeof(INTFLOAT));
124cabdff1aSopenharmony_ci
125cabdff1aSopenharmony_ci    /* len is hardcoded to 32 as that's the only value used in
126cabdff1aSopenharmony_ci       libavcodec. asm functions are likely to be optimized
127cabdff1aSopenharmony_ci       hardcoding this value in their loops and could fail with
128cabdff1aSopenharmony_ci       anything else.
129cabdff1aSopenharmony_ci       i is hardcoded to the two values currently used by the
130cabdff1aSopenharmony_ci       aac decoder because the arm neon implementation is
131cabdff1aSopenharmony_ci       micro-optimized for them and will fail for almost every
132cabdff1aSopenharmony_ci       other value. */
133cabdff1aSopenharmony_ci    call_ref(out0, in, 3, 32);
134cabdff1aSopenharmony_ci    call_new(out1, in, 3, 32);
135cabdff1aSopenharmony_ci
136cabdff1aSopenharmony_ci    /* the function just moves data around, so memcmp is enough */
137cabdff1aSopenharmony_ci    if (memcmp(out0, out1, 91 * 32 * 2 * sizeof(INTFLOAT)))
138cabdff1aSopenharmony_ci        fail();
139cabdff1aSopenharmony_ci
140cabdff1aSopenharmony_ci    call_ref(out0, in, 5, 32);
141cabdff1aSopenharmony_ci    call_new(out1, in, 5, 32);
142cabdff1aSopenharmony_ci
143cabdff1aSopenharmony_ci    if (memcmp(out0, out1, 91 * 32 * 2 * sizeof(INTFLOAT)))
144cabdff1aSopenharmony_ci        fail();
145cabdff1aSopenharmony_ci
146cabdff1aSopenharmony_ci    bench_new(out1, in, 3, 32);
147cabdff1aSopenharmony_ci}
148cabdff1aSopenharmony_ci
149cabdff1aSopenharmony_cistatic void test_hybrid_synthesis_deint(void)
150cabdff1aSopenharmony_ci{
151cabdff1aSopenharmony_ci    LOCAL_ALIGNED_16(INTFLOAT, out0, [2], [38][64]);
152cabdff1aSopenharmony_ci    LOCAL_ALIGNED_16(INTFLOAT, out1, [2], [38][64]);
153cabdff1aSopenharmony_ci    LOCAL_ALIGNED_16(INTFLOAT, in,  [91], [32][2]);
154cabdff1aSopenharmony_ci
155cabdff1aSopenharmony_ci    declare_func(void, INTFLOAT out[2][38][64], INTFLOAT (*in)[32][2],
156cabdff1aSopenharmony_ci                       int i, int len);
157cabdff1aSopenharmony_ci
158cabdff1aSopenharmony_ci    randomize((INTFLOAT *)in,  91 * 32 * 2);
159cabdff1aSopenharmony_ci    randomize((INTFLOAT *)out0, 2 * 38 * 64);
160cabdff1aSopenharmony_ci    memcpy(out1, out0, 2 * 38 * 64 * sizeof(INTFLOAT));
161cabdff1aSopenharmony_ci
162cabdff1aSopenharmony_ci    /* len is hardcoded to 32 as that's the only value used in
163cabdff1aSopenharmony_ci       libavcodec. asm functions are likely to be optimized
164cabdff1aSopenharmony_ci       hardcoding this value in their loops and could fail with
165cabdff1aSopenharmony_ci       anything else.
166cabdff1aSopenharmony_ci       i is hardcoded to the two values currently used by the
167cabdff1aSopenharmony_ci       aac decoder because the arm neon implementation is
168cabdff1aSopenharmony_ci       micro-optimized for them and will fail for almost every
169cabdff1aSopenharmony_ci       other value. */
170cabdff1aSopenharmony_ci    call_ref(out0, in, 3, 32);
171cabdff1aSopenharmony_ci    call_new(out1, in, 3, 32);
172cabdff1aSopenharmony_ci
173cabdff1aSopenharmony_ci    /* the function just moves data around, so memcmp is enough */
174cabdff1aSopenharmony_ci    if (memcmp(out0, out1, 2 * 38 * 64 * sizeof(INTFLOAT)))
175cabdff1aSopenharmony_ci        fail();
176cabdff1aSopenharmony_ci
177cabdff1aSopenharmony_ci    call_ref(out0, in, 5, 32);
178cabdff1aSopenharmony_ci    call_new(out1, in, 5, 32);
179cabdff1aSopenharmony_ci
180cabdff1aSopenharmony_ci    if (memcmp(out0, out1, 2 * 38 * 64 * sizeof(INTFLOAT)))
181cabdff1aSopenharmony_ci        fail();
182cabdff1aSopenharmony_ci
183cabdff1aSopenharmony_ci    bench_new(out1, in, 3, 32);
184cabdff1aSopenharmony_ci}
185cabdff1aSopenharmony_ci
186cabdff1aSopenharmony_cistatic void test_stereo_interpolate(PSDSPContext *psdsp)
187cabdff1aSopenharmony_ci{
188cabdff1aSopenharmony_ci    int i;
189cabdff1aSopenharmony_ci    LOCAL_ALIGNED_16(INTFLOAT, l,  [BUF_SIZE], [2]);
190cabdff1aSopenharmony_ci    LOCAL_ALIGNED_16(INTFLOAT, r,  [BUF_SIZE], [2]);
191cabdff1aSopenharmony_ci    LOCAL_ALIGNED_16(INTFLOAT, l0, [BUF_SIZE], [2]);
192cabdff1aSopenharmony_ci    LOCAL_ALIGNED_16(INTFLOAT, r0, [BUF_SIZE], [2]);
193cabdff1aSopenharmony_ci    LOCAL_ALIGNED_16(INTFLOAT, l1, [BUF_SIZE], [2]);
194cabdff1aSopenharmony_ci    LOCAL_ALIGNED_16(INTFLOAT, r1, [BUF_SIZE], [2]);
195cabdff1aSopenharmony_ci    LOCAL_ALIGNED_16(INTFLOAT, h, [2], [4]);
196cabdff1aSopenharmony_ci    LOCAL_ALIGNED_16(INTFLOAT, h_step, [2], [4]);
197cabdff1aSopenharmony_ci
198cabdff1aSopenharmony_ci    declare_func(void, INTFLOAT (*l)[2], INTFLOAT (*r)[2],
199cabdff1aSopenharmony_ci                       INTFLOAT h[2][4], INTFLOAT h_step[2][4], int len);
200cabdff1aSopenharmony_ci
201cabdff1aSopenharmony_ci    randomize((INTFLOAT *)l, BUF_SIZE * 2);
202cabdff1aSopenharmony_ci    randomize((INTFLOAT *)r, BUF_SIZE * 2);
203cabdff1aSopenharmony_ci
204cabdff1aSopenharmony_ci    for (i = 0; i < 2; i++) {
205cabdff1aSopenharmony_ci        if (check_func(psdsp->stereo_interpolate[i], "ps_stereo_interpolate%s", i ? "_ipdopd" : "")) {
206cabdff1aSopenharmony_ci            memcpy(l0, l, BUF_SIZE * 2 * sizeof(INTFLOAT));
207cabdff1aSopenharmony_ci            memcpy(l1, l, BUF_SIZE * 2 * sizeof(INTFLOAT));
208cabdff1aSopenharmony_ci            memcpy(r0, r, BUF_SIZE * 2 * sizeof(INTFLOAT));
209cabdff1aSopenharmony_ci            memcpy(r1, r, BUF_SIZE * 2 * sizeof(INTFLOAT));
210cabdff1aSopenharmony_ci
211cabdff1aSopenharmony_ci            randomize((INTFLOAT *)h, 2 * 4);
212cabdff1aSopenharmony_ci            randomize((INTFLOAT *)h_step, 2 * 4);
213cabdff1aSopenharmony_ci            // Clear the least significant 14 bits of h_step, to avoid
214cabdff1aSopenharmony_ci            // divergence when accumulating h_step BUF_SIZE times into
215cabdff1aSopenharmony_ci            // a float variable which may or may not have extra intermediate
216cabdff1aSopenharmony_ci            // precision. Therefore clear roughly log2(BUF_SIZE) less
217cabdff1aSopenharmony_ci            // significant bits, to get the same result regardless of any
218cabdff1aSopenharmony_ci            // extra precision in the accumulator.
219cabdff1aSopenharmony_ci            clear_less_significant_bits((INTFLOAT *)h_step, 2 * 4, 14);
220cabdff1aSopenharmony_ci
221cabdff1aSopenharmony_ci            call_ref(l0, r0, h, h_step, BUF_SIZE);
222cabdff1aSopenharmony_ci            call_new(l1, r1, h, h_step, BUF_SIZE);
223cabdff1aSopenharmony_ci            if (!float_near_abs_eps_array((float *)l0, (float *)l1, EPS, BUF_SIZE * 2) ||
224cabdff1aSopenharmony_ci                !float_near_abs_eps_array((float *)r0, (float *)r1, EPS, BUF_SIZE * 2))
225cabdff1aSopenharmony_ci                fail();
226cabdff1aSopenharmony_ci
227cabdff1aSopenharmony_ci            memcpy(l1, l, BUF_SIZE * 2 * sizeof(INTFLOAT));
228cabdff1aSopenharmony_ci            memcpy(r1, r, BUF_SIZE * 2 * sizeof(INTFLOAT));
229cabdff1aSopenharmony_ci            bench_new(l1, r1, h, h_step, BUF_SIZE);
230cabdff1aSopenharmony_ci        }
231cabdff1aSopenharmony_ci    }
232cabdff1aSopenharmony_ci}
233cabdff1aSopenharmony_ci
234cabdff1aSopenharmony_civoid checkasm_check_aacpsdsp(void)
235cabdff1aSopenharmony_ci{
236cabdff1aSopenharmony_ci    PSDSPContext psdsp;
237cabdff1aSopenharmony_ci
238cabdff1aSopenharmony_ci    ff_psdsp_init(&psdsp);
239cabdff1aSopenharmony_ci
240cabdff1aSopenharmony_ci    if (check_func(psdsp.add_squares, "ps_add_squares"))
241cabdff1aSopenharmony_ci        test_add_squares();
242cabdff1aSopenharmony_ci    report("add_squares");
243cabdff1aSopenharmony_ci
244cabdff1aSopenharmony_ci    if (check_func(psdsp.mul_pair_single, "ps_mul_pair_single"))
245cabdff1aSopenharmony_ci        test_mul_pair_single();
246cabdff1aSopenharmony_ci    report("mul_pair_single");
247cabdff1aSopenharmony_ci
248cabdff1aSopenharmony_ci    if (check_func(psdsp.hybrid_analysis, "ps_hybrid_analysis"))
249cabdff1aSopenharmony_ci        test_hybrid_analysis();
250cabdff1aSopenharmony_ci    report("hybrid_analysis");
251cabdff1aSopenharmony_ci
252cabdff1aSopenharmony_ci    if (check_func(psdsp.hybrid_analysis_ileave, "ps_hybrid_analysis_ileave"))
253cabdff1aSopenharmony_ci        test_hybrid_analysis_ileave();
254cabdff1aSopenharmony_ci    report("hybrid_analysis_ileave");
255cabdff1aSopenharmony_ci
256cabdff1aSopenharmony_ci    if (check_func(psdsp.hybrid_synthesis_deint, "ps_hybrid_synthesis_deint"))
257cabdff1aSopenharmony_ci        test_hybrid_synthesis_deint();
258cabdff1aSopenharmony_ci    report("hybrid_synthesis_deint");
259cabdff1aSopenharmony_ci
260cabdff1aSopenharmony_ci    test_stereo_interpolate(&psdsp);
261cabdff1aSopenharmony_ci    report("stereo_interpolate");
262cabdff1aSopenharmony_ci}
263