xref: /third_party/ffmpeg/libavcodec/dcadct.c (revision cabdff1a)
1/*
2 * Copyright (C) 2016 foo86
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21#include <stdlib.h>
22
23#include "dcadct.h"
24#include "dcamath.h"
25
26static void sum_a(const int *input, int *output, int len)
27{
28    int i;
29
30    for (i = 0; i < len; i++)
31        output[i] = input[2 * i] + input[2 * i + 1];
32}
33
34static void sum_b(const int *input, int *output, int len)
35{
36    int i;
37
38    output[0] = input[0];
39    for (i = 1; i < len; i++)
40        output[i] = input[2 * i] + input[2 * i - 1];
41}
42
43static void sum_c(const int *input, int *output, int len)
44{
45    int i;
46
47    for (i = 0; i < len; i++)
48        output[i] = input[2 * i];
49}
50
51static void sum_d(const int *input, int *output, int len)
52{
53    int i;
54
55    output[0] = input[1];
56    for (i = 1; i < len; i++)
57        output[i] = input[2 * i - 1] + input[2 * i + 1];
58}
59
60static void dct_a(const int *input, int *output)
61{
62    static const int cos_mod[8][8] = {
63         { 8348215,  8027397,  7398092,  6484482,  5321677,  3954362,  2435084,   822227 },
64         { 8027397,  5321677,   822227, -3954362, -7398092, -8348215, -6484482, -2435084 },
65         { 7398092,   822227, -6484482, -8027397, -2435084,  5321677,  8348215,  3954362 },
66         { 6484482, -3954362, -8027397,   822227,  8348215,  2435084, -7398092, -5321677 },
67         { 5321677, -7398092, -2435084,  8348215,  -822227, -8027397,  3954362,  6484482 },
68         { 3954362, -8348215,  5321677,  2435084, -8027397,  6484482,   822227, -7398092 },
69         { 2435084, -6484482,  8348215, -7398092,  3954362,   822227, -5321677,  8027397 },
70         {  822227, -2435084,  3954362, -5321677,  6484482, -7398092,  8027397, -8348215 }
71    };
72
73    int i, j;
74
75    for (i = 0; i < 8; i++) {
76        int64_t res = 0;
77        for (j = 0; j < 8; j++)
78            res += (int64_t)cos_mod[i][j] * input[j];
79        output[i] = norm23(res);
80    }
81}
82
83static void dct_b(const int *input, int *output)
84{
85    static const int cos_mod[8][7] = {
86        {  8227423,  7750063,  6974873,  5931642,  4660461,  3210181,  1636536 },
87        {  6974873,  3210181, -1636536, -5931642, -8227423, -7750063, -4660461 },
88        {  4660461, -3210181, -8227423, -5931642,  1636536,  7750063,  6974873 },
89        {  1636536, -7750063, -4660461,  5931642,  6974873, -3210181, -8227423 },
90        { -1636536, -7750063,  4660461,  5931642, -6974873, -3210181,  8227423 },
91        { -4660461, -3210181,  8227423, -5931642, -1636536,  7750063, -6974873 },
92        { -6974873,  3210181,  1636536, -5931642,  8227423, -7750063,  4660461 },
93        { -8227423,  7750063, -6974873,  5931642, -4660461,  3210181, -1636536 }
94    };
95
96    int i, j;
97
98    for (i = 0; i < 8; i++) {
99        int64_t res = input[0] * (INT64_C(1) << 23);
100        for (j = 0; j < 7; j++)
101            res += (int64_t)cos_mod[i][j] * input[1 + j];
102        output[i] = norm23(res);
103    }
104}
105
106static void mod_a(const int *input, int *output)
107{
108    static const int cos_mod[16] = {
109          4199362,   4240198,   4323885,   4454708,
110          4639772,   4890013,   5221943,   5660703,
111         -6245623,  -7040975,  -8158494,  -9809974,
112        -12450076, -17261920, -28585092, -85479984
113    };
114
115    int i, k;
116
117    for (i = 0; i < 8; i++)
118        output[i] = mul23(cos_mod[i], input[i] + input[8 + i]);
119
120    for (i = 8, k = 7; i < 16; i++, k--)
121        output[i] = mul23(cos_mod[i], input[k] - input[8 + k]);
122}
123
124static void mod_b(int *input, int *output)
125{
126    static const int cos_mod[8] = {
127        4214598,  4383036,  4755871,  5425934,
128        6611520,  8897610, 14448934, 42791536
129    };
130
131    int i, k;
132
133    for (i = 0; i < 8; i++)
134        input[8 + i] = mul23(cos_mod[i], input[8 + i]);
135
136    for (i = 0; i < 8; i++)
137        output[i] = input[i] + input[8 + i];
138
139    for (i = 8, k = 7; i < 16; i++, k--)
140        output[i] = input[k] - input[8 + k];
141}
142
143static void mod_c(const int *input, int *output)
144{
145    static const int cos_mod[32] = {
146         1048892,  1051425,   1056522,   1064244,
147         1074689,  1087987,   1104313,   1123884,
148         1146975,  1173922,   1205139,   1241133,
149         1282529,  1330095,   1384791,   1447815,
150        -1520688, -1605358,  -1704360,  -1821051,
151        -1959964, -2127368,  -2332183,  -2587535,
152        -2913561, -3342802,  -3931480,  -4785806,
153        -6133390, -8566050, -14253820, -42727120
154    };
155
156    int i, k;
157
158    for (i = 0; i < 16; i++)
159        output[i] = mul23(cos_mod[i], input[i] + input[16 + i]);
160
161    for (i = 16, k = 15; i < 32; i++, k--)
162        output[i] = mul23(cos_mod[i], input[k] - input[16 + k]);
163}
164
165static void clp_v(int *input, int len)
166{
167    int i;
168
169    for (i = 0; i < len; i++)
170        input[i] = clip23(input[i]);
171}
172
173static void imdct_half_32(int32_t *output, const int32_t *input)
174{
175    int buf_a[32], buf_b[32];
176    int i, k, mag, shift, round;
177
178    mag = 0;
179    for (i = 0; i < 32; i++)
180        mag += abs(input[i]);
181
182    shift = mag > 0x400000 ? 2 : 0;
183    round = shift > 0 ? 1 << (shift - 1) : 0;
184
185    for (i = 0; i < 32; i++)
186        buf_a[i] = (input[i] + round) >> shift;
187
188    sum_a(buf_a, buf_b +  0, 16);
189    sum_b(buf_a, buf_b + 16, 16);
190    clp_v(buf_b, 32);
191
192    sum_a(buf_b +  0, buf_a +  0, 8);
193    sum_b(buf_b +  0, buf_a +  8, 8);
194    sum_c(buf_b + 16, buf_a + 16, 8);
195    sum_d(buf_b + 16, buf_a + 24, 8);
196    clp_v(buf_a, 32);
197
198    dct_a(buf_a +  0, buf_b +  0);
199    dct_b(buf_a +  8, buf_b +  8);
200    dct_b(buf_a + 16, buf_b + 16);
201    dct_b(buf_a + 24, buf_b + 24);
202    clp_v(buf_b, 32);
203
204    mod_a(buf_b +  0, buf_a +  0);
205    mod_b(buf_b + 16, buf_a + 16);
206    clp_v(buf_a, 32);
207
208    mod_c(buf_a, buf_b);
209
210    for (i = 0; i < 32; i++)
211        buf_b[i] = clip23(buf_b[i] * (1 << shift));
212
213    for (i = 0, k = 31; i < 16; i++, k--) {
214        output[     i] = clip23(buf_b[i] - buf_b[k]);
215        output[16 + i] = clip23(buf_b[i] + buf_b[k]);
216    }
217}
218
219static void mod64_a(const int *input, int *output)
220{
221    static const int cos_mod[32] = {
222          4195568,   4205700,   4226086,    4256977,
223          4298755,   4351949,   4417251,    4495537,
224          4587901,   4695690,   4820557,    4964534,
225          5130115,   5320382,   5539164,    5791261,
226         -6082752,  -6421430,  -6817439,   -7284203,
227         -7839855,  -8509474,  -9328732,  -10350140,
228        -11654242, -13371208, -15725922,  -19143224,
229        -24533560, -34264200, -57015280, -170908480
230    };
231
232    int i, k;
233
234    for (i = 0; i < 16; i++)
235        output[i] = mul23(cos_mod[i], input[i] + input[16 + i]);
236
237    for (i = 16, k = 15; i < 32; i++, k--)
238        output[i] = mul23(cos_mod[i], input[k] - input[16 + k]);
239}
240
241static void mod64_b(int *input, int *output)
242{
243    static const int cos_mod[16] = {
244         4199362,  4240198,  4323885,  4454708,
245         4639772,  4890013,  5221943,  5660703,
246         6245623,  7040975,  8158494,  9809974,
247        12450076, 17261920, 28585092, 85479984
248    };
249
250    int i, k;
251
252    for (i = 0; i < 16; i++)
253        input[16 + i] = mul23(cos_mod[i], input[16 + i]);
254
255    for (i = 0; i < 16; i++)
256        output[i] = input[i] + input[16 + i];
257
258    for (i = 16, k = 15; i < 32; i++, k--)
259        output[i] = input[k] - input[16 + k];
260}
261
262static void mod64_c(const int *input, int *output)
263{
264    static const int cos_mod[64] = {
265          741511,    741958,    742853,    744199,
266          746001,    748262,    750992,    754197,
267          757888,    762077,    766777,    772003,
268          777772,    784105,    791021,    798546,
269          806707,    815532,    825054,    835311,
270          846342,    858193,    870912,    884554,
271          899181,    914860,    931667,    949686,
272          969011,    989747,   1012012,   1035941,
273        -1061684,  -1089412,  -1119320,  -1151629,
274        -1186595,  -1224511,  -1265719,  -1310613,
275        -1359657,  -1413400,  -1472490,  -1537703,
276        -1609974,  -1690442,  -1780506,  -1881904,
277        -1996824,  -2128058,  -2279225,  -2455101,
278        -2662128,  -2909200,  -3208956,  -3579983,
279        -4050785,  -4667404,  -5509372,  -6726913,
280        -8641940, -12091426, -20144284, -60420720
281    };
282
283    int i, k;
284
285    for (i = 0; i < 32; i++)
286        output[i] = mul23(cos_mod[i], input[i] + input[32 + i]);
287
288    for (i = 32, k = 31; i < 64; i++, k--)
289        output[i] = mul23(cos_mod[i], input[k] - input[32 + k]);
290}
291
292static void imdct_half_64(int32_t *output, const int32_t *input)
293{
294    int buf_a[64], buf_b[64];
295    int i, k, mag, shift, round;
296
297    mag = 0;
298    for (i = 0; i < 64; i++)
299        mag += abs(input[i]);
300
301    shift = mag > 0x400000 ? 2 : 0;
302    round = shift > 0 ? 1 << (shift - 1) : 0;
303
304    for (i = 0; i < 64; i++)
305        buf_a[i] = (input[i] + round) >> shift;
306
307    sum_a(buf_a, buf_b +  0, 32);
308    sum_b(buf_a, buf_b + 32, 32);
309    clp_v(buf_b, 64);
310
311    sum_a(buf_b +  0, buf_a +  0, 16);
312    sum_b(buf_b +  0, buf_a + 16, 16);
313    sum_c(buf_b + 32, buf_a + 32, 16);
314    sum_d(buf_b + 32, buf_a + 48, 16);
315    clp_v(buf_a, 64);
316
317    sum_a(buf_a +  0, buf_b +  0, 8);
318    sum_b(buf_a +  0, buf_b +  8, 8);
319    sum_c(buf_a + 16, buf_b + 16, 8);
320    sum_d(buf_a + 16, buf_b + 24, 8);
321    sum_c(buf_a + 32, buf_b + 32, 8);
322    sum_d(buf_a + 32, buf_b + 40, 8);
323    sum_c(buf_a + 48, buf_b + 48, 8);
324    sum_d(buf_a + 48, buf_b + 56, 8);
325    clp_v(buf_b, 64);
326
327    dct_a(buf_b +  0, buf_a +  0);
328    dct_b(buf_b +  8, buf_a +  8);
329    dct_b(buf_b + 16, buf_a + 16);
330    dct_b(buf_b + 24, buf_a + 24);
331    dct_b(buf_b + 32, buf_a + 32);
332    dct_b(buf_b + 40, buf_a + 40);
333    dct_b(buf_b + 48, buf_a + 48);
334    dct_b(buf_b + 56, buf_a + 56);
335    clp_v(buf_a, 64);
336
337    mod_a(buf_a +  0, buf_b +  0);
338    mod_b(buf_a + 16, buf_b + 16);
339    mod_b(buf_a + 32, buf_b + 32);
340    mod_b(buf_a + 48, buf_b + 48);
341    clp_v(buf_b, 64);
342
343    mod64_a(buf_b +  0, buf_a +  0);
344    mod64_b(buf_b + 32, buf_a + 32);
345    clp_v(buf_a, 64);
346
347    mod64_c(buf_a, buf_b);
348
349    for (i = 0; i < 64; i++)
350        buf_b[i] = clip23(buf_b[i] * (1 << shift));
351
352    for (i = 0, k = 63; i < 32; i++, k--) {
353        output[     i] = clip23(buf_b[i] - buf_b[k]);
354        output[32 + i] = clip23(buf_b[i] + buf_b[k]);
355    }
356}
357
358av_cold void ff_dcadct_init(DCADCTContext *c)
359{
360    c->imdct_half[0] = imdct_half_32;
361    c->imdct_half[1] = imdct_half_64;
362}
363