xref: /third_party/ffmpeg/libavutil/float_dsp.h (revision cabdff1a)
1/*
2 * This file is part of FFmpeg.
3 *
4 * FFmpeg is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with FFmpeg; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19#ifndef AVUTIL_FLOAT_DSP_H
20#define AVUTIL_FLOAT_DSP_H
21
22#include "config.h"
23
24typedef struct AVFloatDSPContext {
25    /**
26     * Calculate the entry wise product of two vectors of floats and store the result in
27     * a vector of floats.
28     *
29     * @param dst  output vector
30     *             constraints: 32-byte aligned
31     * @param src0 first input vector
32     *             constraints: 32-byte aligned
33     * @param src1 second input vector
34     *             constraints: 32-byte aligned
35     * @param len  number of elements in the input
36     *             constraints: multiple of 16
37     */
38    void (*vector_fmul)(float *dst, const float *src0, const float *src1,
39                        int len);
40
41    /**
42     * Multiply a vector of floats by a scalar float and add to
43     * destination vector.  Source and destination vectors must
44     * overlap exactly or not at all.
45     *
46     * @param dst result vector
47     *            constraints: 32-byte aligned
48     * @param src input vector
49     *            constraints: 32-byte aligned
50     * @param mul scalar value
51     * @param len length of vector
52     *            constraints: multiple of 16
53     */
54    void (*vector_fmac_scalar)(float *dst, const float *src, float mul,
55                               int len);
56
57    /**
58     * Multiply a vector of doubles by a scalar double and add to
59     * destination vector.  Source and destination vectors must
60     * overlap exactly or not at all.
61     *
62     * @param dst result vector
63     *            constraints: 32-byte aligned
64     * @param src input vector
65     *            constraints: 32-byte aligned
66     * @param mul scalar value
67     * @param len length of vector
68     *            constraints: multiple of 16
69     */
70    void (*vector_dmac_scalar)(double *dst, const double *src, double mul,
71                               int len);
72
73    /**
74     * Multiply a vector of floats by a scalar float.  Source and
75     * destination vectors must overlap exactly or not at all.
76     *
77     * @param dst result vector
78     *            constraints: 16-byte aligned
79     * @param src input vector
80     *            constraints: 16-byte aligned
81     * @param mul scalar value
82     * @param len length of vector
83     *            constraints: multiple of 4
84     */
85    void (*vector_fmul_scalar)(float *dst, const float *src, float mul,
86                               int len);
87
88    /**
89     * Multiply a vector of double by a scalar double.  Source and
90     * destination vectors must overlap exactly or not at all.
91     *
92     * @param dst result vector
93     *            constraints: 32-byte aligned
94     * @param src input vector
95     *            constraints: 32-byte aligned
96     * @param mul scalar value
97     * @param len length of vector
98     *            constraints: multiple of 8
99     */
100    void (*vector_dmul_scalar)(double *dst, const double *src, double mul,
101                               int len);
102
103    /**
104     * Overlap/add with window function.
105     * Used primarily by MDCT-based audio codecs.
106     * Source and destination vectors must overlap exactly or not at all.
107     *
108     * @param dst  result vector
109     *             constraints: 16-byte aligned
110     * @param src0 first source vector
111     *             constraints: 16-byte aligned
112     * @param src1 second source vector
113     *             constraints: 16-byte aligned
114     * @param win  half-window vector
115     *             constraints: 16-byte aligned
116     * @param len  length of vector
117     *             constraints: multiple of 4
118     */
119    void (*vector_fmul_window)(float *dst, const float *src0,
120                               const float *src1, const float *win, int len);
121
122    /**
123     * Calculate the entry wise product of two vectors of floats, add a third vector of
124     * floats and store the result in a vector of floats.
125     *
126     * @param dst  output vector
127     *             constraints: 32-byte aligned
128     * @param src0 first input vector
129     *             constraints: 32-byte aligned
130     * @param src1 second input vector
131     *             constraints: 32-byte aligned
132     * @param src2 third input vector
133     *             constraints: 32-byte aligned
134     * @param len  number of elements in the input
135     *             constraints: multiple of 16
136     */
137    void (*vector_fmul_add)(float *dst, const float *src0, const float *src1,
138                            const float *src2, int len);
139
140    /**
141     * Calculate the entry wise product of two vectors of floats, and store the result
142     * in a vector of floats. The second vector of floats is iterated over
143     * in reverse order.
144     *
145     * @param dst  output vector
146     *             constraints: 32-byte aligned
147     * @param src0 first input vector
148     *             constraints: 32-byte aligned
149     * @param src1 second input vector
150     *             constraints: 32-byte aligned
151     * @param len  number of elements in the input
152     *             constraints: multiple of 16
153     */
154    void (*vector_fmul_reverse)(float *dst, const float *src0,
155                                const float *src1, int len);
156
157    /**
158     * Calculate the sum and difference of two vectors of floats.
159     *
160     * @param v1  first input vector, sum output, 16-byte aligned
161     * @param v2  second input vector, difference output, 16-byte aligned
162     * @param len length of vectors, multiple of 4
163     */
164    void (*butterflies_float)(float *av_restrict v1, float *av_restrict v2, int len);
165
166    /**
167     * Calculate the scalar product of two vectors of floats.
168     *
169     * @param v1  first vector, 16-byte aligned
170     * @param v2  second vector, 16-byte aligned
171     * @param len length of vectors, multiple of 4
172     *
173     * @return sum of elementwise products
174     */
175    float (*scalarproduct_float)(const float *v1, const float *v2, int len);
176
177    /**
178     * Calculate the entry wise product of two vectors of doubles and store the result in
179     * a vector of doubles.
180     *
181     * @param dst  output vector
182     *             constraints: 32-byte aligned
183     * @param src0 first input vector
184     *             constraints: 32-byte aligned
185     * @param src1 second input vector
186     *             constraints: 32-byte aligned
187     * @param len  number of elements in the input
188     *             constraints: multiple of 16
189     */
190    void (*vector_dmul)(double *dst, const double *src0, const double *src1,
191                        int len);
192} AVFloatDSPContext;
193
194/**
195 * Return the scalar product of two vectors.
196 *
197 * @param v1  first input vector
198 * @param v2  first input vector
199 * @param len number of elements
200 *
201 * @return sum of elementwise products
202 */
203float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len);
204
205void ff_float_dsp_init_aarch64(AVFloatDSPContext *fdsp);
206void ff_float_dsp_init_arm(AVFloatDSPContext *fdsp);
207void ff_float_dsp_init_ppc(AVFloatDSPContext *fdsp, int strict);
208void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp);
209void ff_float_dsp_init_mips(AVFloatDSPContext *fdsp);
210
211/**
212 * Allocate a float DSP context.
213 *
214 * @param strict  setting to non-zero avoids using functions which may not be IEEE-754 compliant
215 */
216AVFloatDSPContext *avpriv_float_dsp_alloc(int strict);
217
218#endif /* AVUTIL_FLOAT_DSP_H */
219