xref: /third_party/ffmpeg/libavcodec/dcadsp.c (revision cabdff1a)
1/*
2 * Copyright (C) 2016 foo86
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21#include "libavutil/mem_internal.h"
22
23#include "dcadsp.h"
24#include "dcamath.h"
25
26static void decode_hf_c(int32_t **dst,
27                        const int32_t *vq_index,
28                        const int8_t hf_vq[1024][32],
29                        int32_t scale_factors[32][2],
30                        ptrdiff_t sb_start, ptrdiff_t sb_end,
31                        ptrdiff_t ofs, ptrdiff_t len)
32{
33    int i, j;
34
35    for (i = sb_start; i < sb_end; i++) {
36        const int8_t *coeff = hf_vq[vq_index[i]];
37        int32_t scale = scale_factors[i][0];
38        for (j = 0; j < len; j++)
39            dst[i][j + ofs] = clip23(coeff[j] * scale + (1 << 3) >> 4);
40    }
41}
42
43static void decode_joint_c(int32_t **dst, int32_t **src,
44                           const int32_t *scale_factors,
45                           ptrdiff_t sb_start, ptrdiff_t sb_end,
46                           ptrdiff_t ofs, ptrdiff_t len)
47{
48    int i, j;
49
50    for (i = sb_start; i < sb_end; i++) {
51        int32_t scale = scale_factors[i];
52        for (j = 0; j < len; j++)
53            dst[i][j + ofs] = clip23(mul17(src[i][j + ofs], scale));
54    }
55}
56
57static void lfe_fir_float_c(float *pcm_samples, int32_t *lfe_samples,
58                            const float *filter_coeff, ptrdiff_t npcmblocks,
59                            int dec_select)
60{
61    // Select decimation factor
62    int factor = 64 << dec_select;
63    int ncoeffs = 8 >> dec_select;
64    int nlfesamples = npcmblocks >> (dec_select + 1);
65    int i, j, k;
66
67    for (i = 0; i < nlfesamples; i++) {
68        // One decimated sample generates 64 or 128 interpolated ones
69        for (j = 0; j < factor / 2; j++) {
70            float a = 0;
71            float b = 0;
72
73            for (k = 0; k < ncoeffs; k++) {
74                a += filter_coeff[      j * ncoeffs + k] * lfe_samples[-k];
75                b += filter_coeff[255 - j * ncoeffs - k] * lfe_samples[-k];
76            }
77
78            pcm_samples[             j] = a;
79            pcm_samples[factor / 2 + j] = b;
80        }
81
82        lfe_samples++;
83        pcm_samples += factor;
84    }
85}
86
87static void lfe_fir0_float_c(float *pcm_samples, int32_t *lfe_samples,
88                             const float *filter_coeff, ptrdiff_t npcmblocks)
89{
90    lfe_fir_float_c(pcm_samples, lfe_samples, filter_coeff, npcmblocks, 0);
91}
92
93static void lfe_fir1_float_c(float *pcm_samples, int32_t *lfe_samples,
94                             const float *filter_coeff, ptrdiff_t npcmblocks)
95{
96    lfe_fir_float_c(pcm_samples, lfe_samples, filter_coeff, npcmblocks, 1);
97}
98
99static void lfe_x96_float_c(float *dst, const float *src,
100                            float *hist, ptrdiff_t len)
101{
102    float prev = *hist;
103    int i;
104
105    for (i = 0; i < len; i++) {
106        float a = 0.25f * src[i] + 0.75f * prev;
107        float b = 0.75f * src[i] + 0.25f * prev;
108        prev = src[i];
109        *dst++ = a;
110        *dst++ = b;
111    }
112
113    *hist = prev;
114}
115
116static void sub_qmf32_float_c(SynthFilterContext *synth,
117                              FFTContext *imdct,
118                              float *pcm_samples,
119                              int32_t **subband_samples_lo,
120                              int32_t **subband_samples_hi,
121                              float *hist1, int *offset, float *hist2,
122                              const float *filter_coeff, ptrdiff_t npcmblocks,
123                              float scale)
124{
125    LOCAL_ALIGNED_32(float, input, [32]);
126    int i, j;
127
128    for (j = 0; j < npcmblocks; j++) {
129        // Load in one sample from each subband
130        for (i = 0; i < 32; i++) {
131            if ((i - 1) & 2)
132                input[i] = -subband_samples_lo[i][j];
133            else
134                input[i] =  subband_samples_lo[i][j];
135        }
136
137        // One subband sample generates 32 interpolated ones
138        synth->synth_filter_float(imdct, hist1, offset,
139                                  hist2, filter_coeff,
140                                  pcm_samples, input, scale);
141        pcm_samples += 32;
142    }
143}
144
145static void sub_qmf64_float_c(SynthFilterContext *synth,
146                              FFTContext *imdct,
147                              float *pcm_samples,
148                              int32_t **subband_samples_lo,
149                              int32_t **subband_samples_hi,
150                              float *hist1, int *offset, float *hist2,
151                              const float *filter_coeff, ptrdiff_t npcmblocks,
152                              float scale)
153{
154    LOCAL_ALIGNED_32(float, input, [64]);
155    int i, j;
156
157    if (!subband_samples_hi)
158        memset(&input[32], 0, sizeof(input[0]) * 32);
159
160    for (j = 0; j < npcmblocks; j++) {
161        // Load in one sample from each subband
162        if (subband_samples_hi) {
163            // Full 64 subbands, first 32 are residual coded
164            for (i =  0; i < 32; i++) {
165                if ((i - 1) & 2)
166                    input[i] = -subband_samples_lo[i][j] - subband_samples_hi[i][j];
167                else
168                    input[i] =  subband_samples_lo[i][j] + subband_samples_hi[i][j];
169            }
170            for (i = 32; i < 64; i++) {
171                if ((i - 1) & 2)
172                    input[i] = -subband_samples_hi[i][j];
173                else
174                    input[i] =  subband_samples_hi[i][j];
175            }
176        } else {
177            // Only first 32 subbands
178            for (i =  0; i < 32; i++) {
179                if ((i - 1) & 2)
180                    input[i] = -subband_samples_lo[i][j];
181                else
182                    input[i] =  subband_samples_lo[i][j];
183            }
184        }
185
186        // One subband sample generates 64 interpolated ones
187        synth->synth_filter_float_64(imdct, hist1, offset,
188                                     hist2, filter_coeff,
189                                     pcm_samples, input, scale);
190        pcm_samples += 64;
191    }
192}
193
194static void lfe_fir_fixed_c(int32_t *pcm_samples, int32_t *lfe_samples,
195                            const int32_t *filter_coeff, ptrdiff_t npcmblocks)
196{
197    // Select decimation factor
198    int nlfesamples = npcmblocks >> 1;
199    int i, j, k;
200
201    for (i = 0; i < nlfesamples; i++) {
202        // One decimated sample generates 64 interpolated ones
203        for (j = 0; j < 32; j++) {
204            int64_t a = 0;
205            int64_t b = 0;
206
207            for (k = 0; k < 8; k++) {
208                a += (int64_t)filter_coeff[      j * 8 + k] * lfe_samples[-k];
209                b += (int64_t)filter_coeff[255 - j * 8 - k] * lfe_samples[-k];
210            }
211
212            pcm_samples[     j] = clip23(norm23(a));
213            pcm_samples[32 + j] = clip23(norm23(b));
214        }
215
216        lfe_samples++;
217        pcm_samples += 64;
218    }
219}
220
221static void lfe_x96_fixed_c(int32_t *dst, const int32_t *src,
222                            int32_t *hist, ptrdiff_t len)
223{
224    int32_t prev = *hist;
225    int i;
226
227    for (i = 0; i < len; i++) {
228        int64_t a = INT64_C(2097471) * src[i] + INT64_C(6291137) * prev;
229        int64_t b = INT64_C(6291137) * src[i] + INT64_C(2097471) * prev;
230        prev = src[i];
231        *dst++ = clip23(norm23(a));
232        *dst++ = clip23(norm23(b));
233    }
234
235    *hist = prev;
236}
237
238static void sub_qmf32_fixed_c(SynthFilterContext *synth,
239                              DCADCTContext *imdct,
240                              int32_t *pcm_samples,
241                              int32_t **subband_samples_lo,
242                              int32_t **subband_samples_hi,
243                              int32_t *hist1, int *offset, int32_t *hist2,
244                              const int32_t *filter_coeff, ptrdiff_t npcmblocks)
245{
246    LOCAL_ALIGNED_32(int32_t, input, [32]);
247    int i, j;
248
249    for (j = 0; j < npcmblocks; j++) {
250        // Load in one sample from each subband
251        for (i = 0; i < 32; i++)
252            input[i] = subband_samples_lo[i][j];
253
254        // One subband sample generates 32 interpolated ones
255        synth->synth_filter_fixed(imdct, hist1, offset,
256                                  hist2, filter_coeff,
257                                  pcm_samples, input);
258        pcm_samples += 32;
259    }
260}
261
262static void sub_qmf64_fixed_c(SynthFilterContext *synth,
263                              DCADCTContext *imdct,
264                              int32_t *pcm_samples,
265                              int32_t **subband_samples_lo,
266                              int32_t **subband_samples_hi,
267                              int32_t *hist1, int *offset, int32_t *hist2,
268                              const int32_t *filter_coeff, ptrdiff_t npcmblocks)
269{
270    LOCAL_ALIGNED_32(int32_t, input, [64]);
271    int i, j;
272
273    if (!subband_samples_hi)
274        memset(&input[32], 0, sizeof(input[0]) * 32);
275
276    for (j = 0; j < npcmblocks; j++) {
277        // Load in one sample from each subband
278        if (subband_samples_hi) {
279            // Full 64 subbands, first 32 are residual coded
280            for (i =  0; i < 32; i++)
281                input[i] = subband_samples_lo[i][j] + subband_samples_hi[i][j];
282            for (i = 32; i < 64; i++)
283                input[i] = subband_samples_hi[i][j];
284        } else {
285            // Only first 32 subbands
286            for (i =  0; i < 32; i++)
287                input[i] = subband_samples_lo[i][j];
288        }
289
290        // One subband sample generates 64 interpolated ones
291        synth->synth_filter_fixed_64(imdct, hist1, offset,
292                                     hist2, filter_coeff,
293                                     pcm_samples, input);
294        pcm_samples += 64;
295    }
296}
297
298static void decor_c(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t len)
299{
300    int i;
301
302    for (i = 0; i < len; i++)
303        dst[i] += (SUINT)((int)(src[i] * (SUINT)coeff + (1 << 2)) >> 3);
304}
305
306static void dmix_sub_xch_c(int32_t *dst1, int32_t *dst2,
307                           const int32_t *src, ptrdiff_t len)
308{
309    int i;
310
311    for (i = 0; i < len; i++) {
312        int32_t cs = mul23(src[i], 5931520 /* M_SQRT1_2 * (1 << 23) */);
313        dst1[i] -= cs;
314        dst2[i] -= cs;
315    }
316}
317
318static void dmix_sub_c(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t len)
319{
320    int i;
321
322    for (i = 0; i < len; i++)
323        dst[i] -= (unsigned)mul15(src[i], coeff);
324}
325
326static void dmix_add_c(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t len)
327{
328    int i;
329
330    for (i = 0; i < len; i++)
331        dst[i] += (unsigned)mul15(src[i], coeff);
332}
333
334static void dmix_scale_c(int32_t *dst, int scale, ptrdiff_t len)
335{
336    int i;
337
338    for (i = 0; i < len; i++)
339        dst[i] = mul15(dst[i], scale);
340}
341
342static void dmix_scale_inv_c(int32_t *dst, int scale_inv, ptrdiff_t len)
343{
344    int i;
345
346    for (i = 0; i < len; i++)
347        dst[i] = mul16(dst[i], scale_inv);
348}
349
350static void filter0(SUINT32 *dst, const int32_t *src, int32_t coeff, ptrdiff_t len)
351{
352    int i;
353
354    for (i = 0; i < len; i++)
355        dst[i] -= mul22(src[i], coeff);
356}
357
358static void filter1(SUINT32 *dst, const int32_t *src, int32_t coeff, ptrdiff_t len)
359{
360    int i;
361
362    for (i = 0; i < len; i++)
363        dst[i] -= mul23(src[i], coeff);
364}
365
366static void assemble_freq_bands_c(int32_t *dst, int32_t *src0, int32_t *src1,
367                                  const int32_t *coeff, ptrdiff_t len)
368{
369    int i;
370
371    filter0(src0, src1, coeff[0], len);
372    filter0(src1, src0, coeff[1], len);
373    filter0(src0, src1, coeff[2], len);
374    filter0(src1, src0, coeff[3], len);
375
376    for (i = 0; i < 8; i++, src0--) {
377        filter1(src0, src1, coeff[i +  4], len);
378        filter1(src1, src0, coeff[i + 12], len);
379        filter1(src0, src1, coeff[i +  4], len);
380    }
381
382    for (i = 0; i < len; i++) {
383        *dst++ = *src1++;
384        *dst++ = *++src0;
385    }
386}
387
388static void lbr_bank_c(float output[32][4], float **input,
389                       const float *coeff, ptrdiff_t ofs, ptrdiff_t len)
390{
391    float SW0 = coeff[0];
392    float SW1 = coeff[1];
393    float SW2 = coeff[2];
394    float SW3 = coeff[3];
395
396    float C1  = coeff[4];
397    float C2  = coeff[5];
398    float C3  = coeff[6];
399    float C4  = coeff[7];
400
401    float AL1 = coeff[8];
402    float AL2 = coeff[9];
403
404    int i;
405
406    // Short window and 8 point forward MDCT
407    for (i = 0; i < len; i++) {
408        float *src = input[i] + ofs;
409
410        float a = src[-4] * SW0 - src[-1] * SW3;
411        float b = src[-3] * SW1 - src[-2] * SW2;
412        float c = src[ 2] * SW1 + src[ 1] * SW2;
413        float d = src[ 3] * SW0 + src[ 0] * SW3;
414
415        output[i][0] = C1 * b - C2 * c + C4 * a - C3 * d;
416        output[i][1] = C1 * d - C2 * a - C4 * b - C3 * c;
417        output[i][2] = C3 * b + C2 * d - C4 * c + C1 * a;
418        output[i][3] = C3 * a - C2 * b + C4 * d - C1 * c;
419    }
420
421    // Aliasing cancellation for high frequencies
422    for (i = 12; i < len - 1; i++) {
423        float a = output[i  ][3] * AL1;
424        float b = output[i+1][0] * AL1;
425        output[i  ][3] += b - a;
426        output[i+1][0] -= b + a;
427        a = output[i  ][2] * AL2;
428        b = output[i+1][1] * AL2;
429        output[i  ][2] += b - a;
430        output[i+1][1] -= b + a;
431    }
432}
433
434static void lfe_iir_c(float *output, const float *input,
435                      const float iir[5][4], float hist[5][2],
436                      ptrdiff_t factor)
437{
438    float res, tmp;
439    int i, j, k;
440
441    for (i = 0; i < 64; i++) {
442        res = *input++;
443
444        for (j = 0; j < factor; j++) {
445            for (k = 0; k < 5; k++) {
446                tmp = hist[k][0] * iir[k][0] + hist[k][1] * iir[k][1] + res;
447                res = hist[k][0] * iir[k][2] + hist[k][1] * iir[k][3] + tmp;
448
449                hist[k][0] = hist[k][1];
450                hist[k][1] = tmp;
451            }
452
453            *output++ = res;
454            res = 0;
455        }
456    }
457}
458
459av_cold void ff_dcadsp_init(DCADSPContext *s)
460{
461    s->decode_hf     = decode_hf_c;
462    s->decode_joint  = decode_joint_c;
463
464    s->lfe_fir_float[0] = lfe_fir0_float_c;
465    s->lfe_fir_float[1] = lfe_fir1_float_c;
466    s->lfe_x96_float    = lfe_x96_float_c;
467    s->sub_qmf_float[0] = sub_qmf32_float_c;
468    s->sub_qmf_float[1] = sub_qmf64_float_c;
469
470    s->lfe_fir_fixed    = lfe_fir_fixed_c;
471    s->lfe_x96_fixed    = lfe_x96_fixed_c;
472    s->sub_qmf_fixed[0] = sub_qmf32_fixed_c;
473    s->sub_qmf_fixed[1] = sub_qmf64_fixed_c;
474
475    s->decor   = decor_c;
476
477    s->dmix_sub_xch   = dmix_sub_xch_c;
478    s->dmix_sub       = dmix_sub_c;
479    s->dmix_add       = dmix_add_c;
480    s->dmix_scale     = dmix_scale_c;
481    s->dmix_scale_inv = dmix_scale_inv_c;
482
483    s->assemble_freq_bands = assemble_freq_bands_c;
484
485    s->lbr_bank = lbr_bank_c;
486    s->lfe_iir = lfe_iir_c;
487
488#if ARCH_X86
489    ff_dcadsp_init_x86(s);
490#endif
491}
492