1/*
2 * Copyright (c) 2015 James Almer
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 */
20
21#include "checkasm.h"
22#include "libavutil/common.h"
23#include "libavutil/fixed_dsp.h"
24#include "libavutil/internal.h"
25#include "libavutil/mem.h"
26#include "libavutil/mem_internal.h"
27
28#define BUF_SIZE 256
29
30#define randomize_buffers()                   \
31    do {                                      \
32        int i;                                \
33        for (i = 0; i < BUF_SIZE; i++) {      \
34            src0[i] = sign_extend(rnd(), 24); \
35            src1[i] = sign_extend(rnd(), 24); \
36            src2[i] = sign_extend(rnd(), 24); \
37        }                                     \
38    } while (0)
39
40static void check_vector_fmul(const int *src0, const int *src1)
41{
42    LOCAL_ALIGNED_32(int, ref, [BUF_SIZE]);
43    LOCAL_ALIGNED_32(int, new, [BUF_SIZE]);
44
45    declare_func(void, int *dst, const int *src0, const int *src1, int len);
46
47    call_ref(ref, src0, src1, BUF_SIZE);
48    call_new(new, src0, src1, BUF_SIZE);
49    if (memcmp(ref, new, BUF_SIZE * sizeof(int)))
50        fail();
51    bench_new(new, src0, src1, BUF_SIZE);
52}
53
54static void check_vector_fmul_add(const int *src0, const int *src1, const int *src2)
55{
56    LOCAL_ALIGNED_32(int, ref, [BUF_SIZE]);
57    LOCAL_ALIGNED_32(int, new, [BUF_SIZE]);
58
59    declare_func(void, int *dst, const int *src0, const int *src1, const int *src2, int len);
60
61    call_ref(ref, src0, src1, src2, BUF_SIZE);
62    call_new(new, src0, src1, src2, BUF_SIZE);
63    if (memcmp(ref, new, BUF_SIZE * sizeof(int)))
64        fail();
65    bench_new(new, src0, src1, src2, BUF_SIZE);
66}
67
68static void check_vector_fmul_window(const int32_t *src0, const int32_t *src1, const int32_t *win)
69{
70    LOCAL_ALIGNED_32(int32_t, ref, [BUF_SIZE]);
71    LOCAL_ALIGNED_32(int32_t, new, [BUF_SIZE]);
72
73    declare_func(void, int32_t *dst, const int32_t *src0, const int32_t *src1, const int32_t *win, int len);
74
75    call_ref(ref, src0, src1, win, BUF_SIZE / 2);
76    call_new(new, src0, src1, win, BUF_SIZE / 2);
77    if (memcmp(ref, new, BUF_SIZE * sizeof(int32_t)))
78        fail();
79    bench_new(new, src0, src1, win, BUF_SIZE / 2);
80}
81
82static void check_vector_fmul_window_scaled(const int32_t *src0, const int32_t *src1, const int32_t *win)
83{
84    LOCAL_ALIGNED_16(int16_t, ref, [BUF_SIZE]);
85    LOCAL_ALIGNED_16(int16_t, new, [BUF_SIZE]);
86
87    declare_func(void, int16_t *dst, const int32_t *src0, const int32_t *src1, const int32_t *win, int len, uint8_t bits);
88
89    call_ref(ref, src0, src1, win, BUF_SIZE / 2, 2);
90    call_new(new, src0, src1, win, BUF_SIZE / 2, 2);
91    if (memcmp(ref, new, BUF_SIZE * sizeof(int16_t)))
92        fail();
93    bench_new(new, src0, src1, win, BUF_SIZE / 2, 2);
94}
95
96static void check_butterflies(const int *src0, const int *src1)
97{
98    LOCAL_ALIGNED_16(int, ref0, [BUF_SIZE]);
99    LOCAL_ALIGNED_16(int, ref1, [BUF_SIZE]);
100    LOCAL_ALIGNED_16(int, new0, [BUF_SIZE]);
101    LOCAL_ALIGNED_16(int, new1, [BUF_SIZE]);
102
103    declare_func(void, int *av_restrict src0, int *av_restrict src1, int len);
104
105    memcpy(ref0, src0, BUF_SIZE * sizeof(*src0));
106    memcpy(ref1, src1, BUF_SIZE * sizeof(*src1));
107    memcpy(new0, src0, BUF_SIZE * sizeof(*src0));
108    memcpy(new1, src1, BUF_SIZE * sizeof(*src1));
109
110    call_ref(ref0, ref1, BUF_SIZE);
111    call_new(new0, new1, BUF_SIZE);
112    if (memcmp(ref0, new0, BUF_SIZE * sizeof(*ref0)) ||
113        memcmp(ref1, new1, BUF_SIZE * sizeof(*ref1)))
114        fail();
115    memcpy(new0, src0, BUF_SIZE * sizeof(*src0));
116    memcpy(new1, src1, BUF_SIZE * sizeof(*src1));
117    bench_new(new0, new1, BUF_SIZE);
118}
119
120static void check_scalarproduct_fixed(const int *src0, const int *src1)
121{
122    int ref, new;
123
124    declare_func(int, const int *src0, const int *src1, int len);
125
126    ref = call_ref(src0, src1, BUF_SIZE);
127    new = call_new(src0, src1, BUF_SIZE);
128    if (ref != new)
129        fail();
130    bench_new(src0, src1, BUF_SIZE);
131}
132
133void checkasm_check_fixed_dsp(void)
134{
135    LOCAL_ALIGNED_32(int32_t, src0, [BUF_SIZE]);
136    LOCAL_ALIGNED_32(int32_t, src1, [BUF_SIZE]);
137    LOCAL_ALIGNED_32(int32_t, src2, [BUF_SIZE]);
138    AVFixedDSPContext *fdsp = avpriv_alloc_fixed_dsp(1);
139
140    randomize_buffers();
141    if (check_func(fdsp->vector_fmul, "vector_fmul_fixed"))
142        check_vector_fmul(src0, src1);
143    if (check_func(fdsp->vector_fmul_add, "vector_fmul_add_fixed"))
144        check_vector_fmul_add(src0, src1, src2);
145    if (check_func(fdsp->vector_fmul_reverse, "vector_fmul_reverse_fixed"))
146        check_vector_fmul(src0, src1);
147    if (check_func(fdsp->vector_fmul_window, "vector_fmul_window_fixed"))
148        check_vector_fmul_window(src0, src1, src2);
149    if (check_func(fdsp->vector_fmul_window_scaled, "vector_fmul_window_scaled_fixed"))
150        check_vector_fmul_window_scaled(src0, src1, src2);
151    report("vector_fmul");
152    if (check_func(fdsp->butterflies_fixed, "butterflies_fixed"))
153        check_butterflies(src0, src1);
154    report("butterflies_fixed");
155    if (check_func(fdsp->scalarproduct_fixed, "scalarproduct_fixed"))
156        check_scalarproduct_fixed(src0, src1);
157    report("scalarproduct_fixed");
158
159    av_freep(&fdsp);
160}
161