1/*
2 * This file is part of FFmpeg.
3 *
4 * FFmpeg is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with FFmpeg; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19#include <stdint.h>
20#include "libavutil/common.h"
21#include "mathops.h"
22
23#undef FUNC
24#undef sum_type
25#undef MUL
26#undef CLIP
27#undef FSUF
28
29#define FUNC(n) AV_JOIN(n ## _, SAMPLE_SIZE)
30
31#if SAMPLE_SIZE == 32
32#   define sum_type  int64_t
33#   define MUL(a, b) MUL64(a, b)
34#   define CLIP(x) av_clipl_int32(x)
35#else
36#   define sum_type  int32_t
37#   define MUL(a, b) ((a) * (b))
38#   define CLIP(x) (x)
39#endif
40
41#define LPC1(x) {           \
42    int c = coefs[(x)-1];   \
43    p0   += MUL(c, s);      \
44    s     = smp[i-(x)+1];   \
45    p1   += MUL(c, s);      \
46}
47
48static av_always_inline void FUNC(lpc_encode_unrolled)(int32_t *res,
49                                  const int32_t *smp, int len, int order,
50                                  const int32_t *coefs, int shift, int big)
51{
52    int i;
53    for (i = order; i < len; i += 2) {
54        int s  = smp[i-order];
55        sum_type p0 = 0, p1 = 0;
56        if (big) {
57            switch (order) {
58            case 32: LPC1(32)
59            case 31: LPC1(31)
60            case 30: LPC1(30)
61            case 29: LPC1(29)
62            case 28: LPC1(28)
63            case 27: LPC1(27)
64            case 26: LPC1(26)
65            case 25: LPC1(25)
66            case 24: LPC1(24)
67            case 23: LPC1(23)
68            case 22: LPC1(22)
69            case 21: LPC1(21)
70            case 20: LPC1(20)
71            case 19: LPC1(19)
72            case 18: LPC1(18)
73            case 17: LPC1(17)
74            case 16: LPC1(16)
75            case 15: LPC1(15)
76            case 14: LPC1(14)
77            case 13: LPC1(13)
78            case 12: LPC1(12)
79            case 11: LPC1(11)
80            case 10: LPC1(10)
81            case  9: LPC1( 9)
82                     LPC1( 8)
83                     LPC1( 7)
84                     LPC1( 6)
85                     LPC1( 5)
86                     LPC1( 4)
87                     LPC1( 3)
88                     LPC1( 2)
89                     LPC1( 1)
90            }
91        } else {
92            switch (order) {
93            case  8: LPC1( 8)
94            case  7: LPC1( 7)
95            case  6: LPC1( 6)
96            case  5: LPC1( 5)
97            case  4: LPC1( 4)
98            case  3: LPC1( 3)
99            case  2: LPC1( 2)
100            case  1: LPC1( 1)
101            }
102        }
103        res[i  ] = smp[i  ] - CLIP(p0 >> shift);
104        res[i+1] = smp[i+1] - CLIP(p1 >> shift);
105    }
106}
107
108static void FUNC(flac_lpc_encode_c)(int32_t *res, const int32_t *smp, int len,
109                                    int order, const int32_t *coefs, int shift)
110{
111    int i;
112    for (i = 0; i < order; i++)
113        res[i] = smp[i];
114#if CONFIG_SMALL
115    for (i = order; i < len; i += 2) {
116        int j;
117        int s  = smp[i];
118        sum_type p0 = 0, p1 = 0;
119        for (j = 0; j < order; j++) {
120            int c = coefs[j];
121            p1   += MUL(c, s);
122            s     = smp[i-j-1];
123            p0   += MUL(c, s);
124        }
125        res[i  ] = smp[i  ] - CLIP(p0 >> shift);
126        res[i+1] = smp[i+1] - CLIP(p1 >> shift);
127    }
128#else
129    switch (order) {
130    case  1: FUNC(lpc_encode_unrolled)(res, smp, len,     1, coefs, shift, 0); break;
131    case  2: FUNC(lpc_encode_unrolled)(res, smp, len,     2, coefs, shift, 0); break;
132    case  3: FUNC(lpc_encode_unrolled)(res, smp, len,     3, coefs, shift, 0); break;
133    case  4: FUNC(lpc_encode_unrolled)(res, smp, len,     4, coefs, shift, 0); break;
134    case  5: FUNC(lpc_encode_unrolled)(res, smp, len,     5, coefs, shift, 0); break;
135    case  6: FUNC(lpc_encode_unrolled)(res, smp, len,     6, coefs, shift, 0); break;
136    case  7: FUNC(lpc_encode_unrolled)(res, smp, len,     7, coefs, shift, 0); break;
137    case  8: FUNC(lpc_encode_unrolled)(res, smp, len,     8, coefs, shift, 0); break;
138    default: FUNC(lpc_encode_unrolled)(res, smp, len, order, coefs, shift, 1); break;
139    }
140#endif
141}
142
143/* Comment for clarity/de-obfuscation.
144 *
145 * for (int i = order; i < len; i++) {
146 *     int32_t p = 0;
147 *     for (int j = 0; j < order; j++) {
148 *         int c = coefs[j];
149 *         int s = smp[(i-1)-j];
150 *         p    += c*s;
151 *     }
152 *     res[i] = smp[i] - (p >> shift);
153 * }
154 *
155 * The CONFIG_SMALL code above simplifies to this, in the case of SAMPLE_SIZE
156 * not being equal to 32 (at the present time that means for 16-bit audio). The
157 * code above does 2 samples per iteration.  Commit bfdd5bc (made all the way
158 * back in 2007) says that way is faster.
159 */
160