xref: /third_party/ffmpeg/libavcodec/vp3dsp.c (revision cabdff1a)
1/*
2 * Copyright (C) 2004 The FFmpeg project
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21/**
22 * @file
23 * Standard C DSP-oriented functions cribbed from the original VP3
24 * source code.
25 */
26
27#include <string.h>
28
29#include "config.h"
30#include "libavutil/attributes.h"
31#include "libavutil/common.h"
32#include "libavutil/internal.h"
33#include "libavutil/intreadwrite.h"
34#include "libavutil/avassert.h"
35
36#include "rnd_avg.h"
37#include "vp3dsp.h"
38
39#define IdctAdjustBeforeShift 8
40#define xC1S7 64277
41#define xC2S6 60547
42#define xC3S5 54491
43#define xC4S4 46341
44#define xC5S3 36410
45#define xC6S2 25080
46#define xC7S1 12785
47
48#define M(a, b) ((int)((SUINT)(a) * (b)) >> 16)
49
50static av_always_inline void idct(uint8_t *dst, ptrdiff_t stride,
51                                  int16_t *input, int type)
52{
53    int16_t *ip = input;
54
55    int A, B, C, D, Ad, Bd, Cd, Dd, E, F, G, H;
56    int Ed, Gd, Add, Bdd, Fd, Hd;
57
58    int i;
59
60    /* Inverse DCT on the rows now */
61    for (i = 0; i < 8; i++) {
62        /* Check for non-zero values */
63        if (ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] |
64            ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8]) {
65            A = M(xC1S7, ip[1 * 8]) + M(xC7S1, ip[7 * 8]);
66            B = M(xC7S1, ip[1 * 8]) - M(xC1S7, ip[7 * 8]);
67            C = M(xC3S5, ip[3 * 8]) + M(xC5S3, ip[5 * 8]);
68            D = M(xC3S5, ip[5 * 8]) - M(xC5S3, ip[3 * 8]);
69
70            Ad = M(xC4S4, (A - C));
71            Bd = M(xC4S4, (B - D));
72
73            Cd = A + C;
74            Dd = B + D;
75
76            E = M(xC4S4, (ip[0 * 8] + ip[4 * 8]));
77            F = M(xC4S4, (ip[0 * 8] - ip[4 * 8]));
78
79            G = M(xC2S6, ip[2 * 8]) + M(xC6S2, ip[6 * 8]);
80            H = M(xC6S2, ip[2 * 8]) - M(xC2S6, ip[6 * 8]);
81
82            Ed = E - G;
83            Gd = E + G;
84
85            Add = F + Ad;
86            Bdd = Bd - H;
87
88            Fd = F - Ad;
89            Hd = Bd + H;
90
91            /*  Final sequence of operations over-write original inputs. */
92            ip[0 * 8] = Gd + Cd;
93            ip[7 * 8] = Gd - Cd;
94
95            ip[1 * 8] = Add + Hd;
96            ip[2 * 8] = Add - Hd;
97
98            ip[3 * 8] = Ed + Dd;
99            ip[4 * 8] = Ed - Dd;
100
101            ip[5 * 8] = Fd + Bdd;
102            ip[6 * 8] = Fd - Bdd;
103        }
104
105        ip += 1;            /* next row */
106    }
107
108    ip = input;
109
110    for (i = 0; i < 8; i++) {
111        /* Check for non-zero values (bitwise or faster than ||) */
112        if (ip[1] | ip[2] | ip[3] |
113            ip[4] | ip[5] | ip[6] | ip[7]) {
114            A = M(xC1S7, ip[1]) + M(xC7S1, ip[7]);
115            B = M(xC7S1, ip[1]) - M(xC1S7, ip[7]);
116            C = M(xC3S5, ip[3]) + M(xC5S3, ip[5]);
117            D = M(xC3S5, ip[5]) - M(xC5S3, ip[3]);
118
119            Ad = M(xC4S4, (A - C));
120            Bd = M(xC4S4, (B - D));
121
122            Cd = A + C;
123            Dd = B + D;
124
125            E = M(xC4S4, (ip[0] + ip[4])) + 8;
126            F = M(xC4S4, (ip[0] - ip[4])) + 8;
127
128            if (type == 1) { // HACK
129                E += 16 * 128;
130                F += 16 * 128;
131            }
132
133            G = M(xC2S6, ip[2]) + M(xC6S2, ip[6]);
134            H = M(xC6S2, ip[2]) - M(xC2S6, ip[6]);
135
136            Ed = E - G;
137            Gd = E + G;
138
139            Add = F + Ad;
140            Bdd = Bd - H;
141
142            Fd = F - Ad;
143            Hd = Bd + H;
144
145            /* Final sequence of operations over-write original inputs. */
146            if (type == 1) {
147                dst[0 * stride] = av_clip_uint8((Gd + Cd) >> 4);
148                dst[7 * stride] = av_clip_uint8((Gd - Cd) >> 4);
149
150                dst[1 * stride] = av_clip_uint8((Add + Hd) >> 4);
151                dst[2 * stride] = av_clip_uint8((Add - Hd) >> 4);
152
153                dst[3 * stride] = av_clip_uint8((Ed + Dd) >> 4);
154                dst[4 * stride] = av_clip_uint8((Ed - Dd) >> 4);
155
156                dst[5 * stride] = av_clip_uint8((Fd + Bdd) >> 4);
157                dst[6 * stride] = av_clip_uint8((Fd - Bdd) >> 4);
158            } else {
159                dst[0 * stride] = av_clip_uint8(dst[0 * stride] + ((Gd + Cd) >> 4));
160                dst[7 * stride] = av_clip_uint8(dst[7 * stride] + ((Gd - Cd) >> 4));
161
162                dst[1 * stride] = av_clip_uint8(dst[1 * stride] + ((Add + Hd) >> 4));
163                dst[2 * stride] = av_clip_uint8(dst[2 * stride] + ((Add - Hd) >> 4));
164
165                dst[3 * stride] = av_clip_uint8(dst[3 * stride] + ((Ed + Dd) >> 4));
166                dst[4 * stride] = av_clip_uint8(dst[4 * stride] + ((Ed - Dd) >> 4));
167
168                dst[5 * stride] = av_clip_uint8(dst[5 * stride] + ((Fd + Bdd) >> 4));
169                dst[6 * stride] = av_clip_uint8(dst[6 * stride] + ((Fd - Bdd) >> 4));
170            }
171        } else {
172            if (type == 1) {
173                dst[0*stride] =
174                dst[1*stride] =
175                dst[2*stride] =
176                dst[3*stride] =
177                dst[4*stride] =
178                dst[5*stride] =
179                dst[6*stride] =
180                dst[7*stride] = av_clip_uint8(128 + ((xC4S4 * ip[0] + (IdctAdjustBeforeShift << 16)) >> 20));
181            } else {
182                if (ip[0]) {
183                    int v = (xC4S4 * ip[0] + (IdctAdjustBeforeShift << 16)) >> 20;
184                    dst[0 * stride] = av_clip_uint8(dst[0 * stride] + v);
185                    dst[1 * stride] = av_clip_uint8(dst[1 * stride] + v);
186                    dst[2 * stride] = av_clip_uint8(dst[2 * stride] + v);
187                    dst[3 * stride] = av_clip_uint8(dst[3 * stride] + v);
188                    dst[4 * stride] = av_clip_uint8(dst[4 * stride] + v);
189                    dst[5 * stride] = av_clip_uint8(dst[5 * stride] + v);
190                    dst[6 * stride] = av_clip_uint8(dst[6 * stride] + v);
191                    dst[7 * stride] = av_clip_uint8(dst[7 * stride] + v);
192                }
193            }
194        }
195
196        ip += 8;            /* next column */
197        dst++;
198    }
199}
200
201static av_always_inline void idct10(uint8_t *dst, ptrdiff_t stride,
202                                    int16_t *input, int type)
203{
204    int16_t *ip = input;
205
206    int A, B, C, D, Ad, Bd, Cd, Dd, E, F, G, H;
207    int Ed, Gd, Add, Bdd, Fd, Hd;
208
209    int i;
210
211    /* Inverse DCT on the rows now */
212    for (i = 0; i < 4; i++) {
213        /* Check for non-zero values */
214        if (ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8]) {
215            A =  M(xC1S7, ip[1 * 8]);
216            B =  M(xC7S1, ip[1 * 8]);
217            C =  M(xC3S5, ip[3 * 8]);
218            D = -M(xC5S3, ip[3 * 8]);
219
220            Ad = M(xC4S4, (A - C));
221            Bd = M(xC4S4, (B - D));
222
223            Cd = A + C;
224            Dd = B + D;
225
226            E = M(xC4S4, ip[0 * 8]);
227            F = E;
228
229            G = M(xC2S6, ip[2 * 8]);
230            H = M(xC6S2, ip[2 * 8]);
231
232            Ed = E - G;
233            Gd = E + G;
234
235            Add = F + Ad;
236            Bdd = Bd - H;
237
238            Fd = F - Ad;
239            Hd = Bd + H;
240
241            /* Final sequence of operations over-write original inputs */
242            ip[0 * 8] = Gd + Cd;
243            ip[7 * 8] = Gd - Cd;
244
245            ip[1 * 8] = Add + Hd;
246            ip[2 * 8] = Add - Hd;
247
248            ip[3 * 8] = Ed + Dd;
249            ip[4 * 8] = Ed - Dd;
250
251            ip[5 * 8] = Fd + Bdd;
252            ip[6 * 8] = Fd - Bdd;
253
254        }
255
256        ip += 1;
257    }
258
259    ip = input;
260
261    for (i = 0; i < 8; i++) {
262        /* Check for non-zero values (bitwise or faster than ||) */
263        if (ip[0] | ip[1] | ip[2] | ip[3]) {
264            A =  M(xC1S7, ip[1]);
265            B =  M(xC7S1, ip[1]);
266            C =  M(xC3S5, ip[3]);
267            D = -M(xC5S3, ip[3]);
268
269            Ad = M(xC4S4, (A - C));
270            Bd = M(xC4S4, (B - D));
271
272            Cd = A + C;
273            Dd = B + D;
274
275            E = M(xC4S4, ip[0]);
276            if (type == 1)
277                E += 16 * 128;
278            F = E;
279
280            G = M(xC2S6, ip[2]);
281            H = M(xC6S2, ip[2]);
282
283            Ed = E - G;
284            Gd = E + G;
285
286            Add = F + Ad;
287            Bdd = Bd - H;
288
289            Fd = F - Ad;
290            Hd = Bd + H;
291
292            Gd += 8;
293            Add += 8;
294            Ed += 8;
295            Fd += 8;
296
297            /* Final sequence of operations over-write original inputs. */
298            if (type == 1) {
299                dst[0 * stride] = av_clip_uint8((Gd + Cd) >> 4);
300                dst[7 * stride] = av_clip_uint8((Gd - Cd) >> 4);
301
302                dst[1 * stride] = av_clip_uint8((Add + Hd) >> 4);
303                dst[2 * stride] = av_clip_uint8((Add - Hd) >> 4);
304
305                dst[3 * stride] = av_clip_uint8((Ed + Dd) >> 4);
306                dst[4 * stride] = av_clip_uint8((Ed - Dd) >> 4);
307
308                dst[5 * stride] = av_clip_uint8((Fd + Bdd) >> 4);
309                dst[6 * stride] = av_clip_uint8((Fd - Bdd) >> 4);
310            } else {
311                dst[0 * stride] = av_clip_uint8(dst[0 * stride] + ((Gd + Cd) >> 4));
312                dst[7 * stride] = av_clip_uint8(dst[7 * stride] + ((Gd - Cd) >> 4));
313
314                dst[1 * stride] = av_clip_uint8(dst[1 * stride] + ((Add + Hd) >> 4));
315                dst[2 * stride] = av_clip_uint8(dst[2 * stride] + ((Add - Hd) >> 4));
316
317                dst[3 * stride] = av_clip_uint8(dst[3 * stride] + ((Ed + Dd) >> 4));
318                dst[4 * stride] = av_clip_uint8(dst[4 * stride] + ((Ed - Dd) >> 4));
319
320                dst[5 * stride] = av_clip_uint8(dst[5 * stride] + ((Fd + Bdd) >> 4));
321                dst[6 * stride] = av_clip_uint8(dst[6 * stride] + ((Fd - Bdd) >> 4));
322            }
323        } else {
324            if (type == 1) {
325                dst[0*stride] =
326                dst[1*stride] =
327                dst[2*stride] =
328                dst[3*stride] =
329                dst[4*stride] =
330                dst[5*stride] =
331                dst[6*stride] =
332                dst[7*stride] = 128;
333            }
334        }
335
336        ip += 8;
337        dst++;
338    }
339}
340
341void ff_vp3dsp_idct10_put(uint8_t *dest, ptrdiff_t stride, int16_t *block)
342{
343    idct10(dest, stride, block, 1);
344    memset(block, 0, sizeof(*block) * 64);
345}
346
347void ff_vp3dsp_idct10_add(uint8_t *dest, ptrdiff_t stride, int16_t *block)
348{
349    idct10(dest, stride, block, 2);
350    memset(block, 0, sizeof(*block) * 64);
351}
352
353static void vp3_idct_put_c(uint8_t *dest /* align 8 */, ptrdiff_t stride,
354                           int16_t *block /* align 16 */)
355{
356    idct(dest, stride, block, 1);
357    memset(block, 0, sizeof(*block) * 64);
358}
359
360static void vp3_idct_add_c(uint8_t *dest /* align 8 */, ptrdiff_t stride,
361                           int16_t *block /* align 16 */)
362{
363    idct(dest, stride, block, 2);
364    memset(block, 0, sizeof(*block) * 64);
365}
366
367static void vp3_idct_dc_add_c(uint8_t *dest /* align 8 */, ptrdiff_t stride,
368                              int16_t *block /* align 16 */)
369{
370    int i, dc = (block[0] + 15) >> 5;
371
372    for (i = 0; i < 8; i++) {
373        dest[0] = av_clip_uint8(dest[0] + dc);
374        dest[1] = av_clip_uint8(dest[1] + dc);
375        dest[2] = av_clip_uint8(dest[2] + dc);
376        dest[3] = av_clip_uint8(dest[3] + dc);
377        dest[4] = av_clip_uint8(dest[4] + dc);
378        dest[5] = av_clip_uint8(dest[5] + dc);
379        dest[6] = av_clip_uint8(dest[6] + dc);
380        dest[7] = av_clip_uint8(dest[7] + dc);
381        dest   += stride;
382    }
383    block[0] = 0;
384}
385
386static av_always_inline void vp3_v_loop_filter_c(uint8_t *first_pixel, ptrdiff_t stride,
387                                                 int *bounding_values, int count)
388{
389    unsigned char *end;
390    int filter_value;
391    const ptrdiff_t nstride = -stride;
392
393    for (end = first_pixel + count; first_pixel < end; first_pixel++) {
394        filter_value = (first_pixel[2 * nstride] - first_pixel[stride]) +
395                       (first_pixel[0] - first_pixel[nstride]) * 3;
396        filter_value = bounding_values[(filter_value + 4) >> 3];
397
398        first_pixel[nstride] = av_clip_uint8(first_pixel[nstride] + filter_value);
399        first_pixel[0]       = av_clip_uint8(first_pixel[0] - filter_value);
400    }
401}
402
403static av_always_inline void vp3_h_loop_filter_c(uint8_t *first_pixel, ptrdiff_t stride,
404                                                 int *bounding_values, int count)
405{
406    unsigned char *end;
407    int filter_value;
408
409    for (end = first_pixel + count * stride; first_pixel != end; first_pixel += stride) {
410        filter_value = (first_pixel[-2] - first_pixel[1]) +
411                       (first_pixel[ 0] - first_pixel[-1]) * 3;
412        filter_value = bounding_values[(filter_value + 4) >> 3];
413
414        first_pixel[-1] = av_clip_uint8(first_pixel[-1] + filter_value);
415        first_pixel[ 0] = av_clip_uint8(first_pixel[ 0] - filter_value);
416    }
417}
418
419#define LOOP_FILTER(prefix, suffix, dim, count) \
420void prefix##_##dim##_loop_filter_##count##suffix(uint8_t *first_pixel, ptrdiff_t stride, \
421                                int *bounding_values) \
422{ \
423    vp3_##dim##_loop_filter_c(first_pixel, stride, bounding_values, count); \
424}
425
426static LOOP_FILTER(vp3,_c, v, 8)
427static LOOP_FILTER(vp3,_c, h, 8)
428LOOP_FILTER(ff_vp3dsp, , v, 12)
429LOOP_FILTER(ff_vp3dsp, , h, 12)
430
431static void put_no_rnd_pixels_l2(uint8_t *dst, const uint8_t *src1,
432                                 const uint8_t *src2, ptrdiff_t stride, int h)
433{
434    int i;
435
436    for (i = 0; i < h; i++) {
437        uint32_t a, b;
438
439        a = AV_RN32(&src1[i * stride]);
440        b = AV_RN32(&src2[i * stride]);
441        AV_WN32A(&dst[i * stride], no_rnd_avg32(a, b));
442        a = AV_RN32(&src1[i * stride + 4]);
443        b = AV_RN32(&src2[i * stride + 4]);
444        AV_WN32A(&dst[i * stride + 4], no_rnd_avg32(a, b));
445    }
446}
447
448av_cold void ff_vp3dsp_init(VP3DSPContext *c, int flags)
449{
450    c->put_no_rnd_pixels_l2 = put_no_rnd_pixels_l2;
451
452    c->idct_put      = vp3_idct_put_c;
453    c->idct_add      = vp3_idct_add_c;
454    c->idct_dc_add   = vp3_idct_dc_add_c;
455    c->v_loop_filter = c->v_loop_filter_unaligned = vp3_v_loop_filter_8_c;
456    c->h_loop_filter = c->h_loop_filter_unaligned = vp3_h_loop_filter_8_c;
457
458#if ARCH_ARM
459    ff_vp3dsp_init_arm(c, flags);
460#elif ARCH_PPC
461    ff_vp3dsp_init_ppc(c, flags);
462#elif ARCH_X86
463    ff_vp3dsp_init_x86(c, flags);
464#elif ARCH_MIPS
465    ff_vp3dsp_init_mips(c, flags);
466#endif
467}
468
469/*
470 * This function initializes the loop filter boundary limits if the frame's
471 * quality index is different from the previous frame's.
472 *
473 * where sizeof(bounding_values_array) is 256 * sizeof(int)
474 *
475 * The filter_limit_values may not be larger than 127.
476 */
477void ff_vp3dsp_set_bounding_values(int * bounding_values_array, int filter_limit)
478{
479    int *bounding_values = bounding_values_array + 127;
480    int x;
481    int value;
482
483    av_assert0(filter_limit < 128U);
484
485    /* set up the bounding values */
486    memset(bounding_values_array, 0, 256 * sizeof(int));
487    for (x = 0; x < filter_limit; x++) {
488        bounding_values[-x] = -x;
489        bounding_values[x] = x;
490    }
491    for (x = value = filter_limit; x < 128 && value; x++, value--) {
492        bounding_values[ x] =  value;
493        bounding_values[-x] = -value;
494    }
495    if (value)
496        bounding_values[128] = value;
497    bounding_values[129] = bounding_values[130] = filter_limit * 0x02020202U;
498}
499