1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * Copyright (C) 2004 The FFmpeg project
3cabdff1aSopenharmony_ci *
4cabdff1aSopenharmony_ci * This file is part of FFmpeg.
5cabdff1aSopenharmony_ci *
6cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
7cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
8cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
9cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
10cabdff1aSopenharmony_ci *
11cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
12cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
13cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14cabdff1aSopenharmony_ci * Lesser General Public License for more details.
15cabdff1aSopenharmony_ci *
16cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
17cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
18cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19cabdff1aSopenharmony_ci */
20cabdff1aSopenharmony_ci
21cabdff1aSopenharmony_ci/**
22cabdff1aSopenharmony_ci * @file
23cabdff1aSopenharmony_ci * Standard C DSP-oriented functions cribbed from the original VP3
24cabdff1aSopenharmony_ci * source code.
25cabdff1aSopenharmony_ci */
26cabdff1aSopenharmony_ci
27cabdff1aSopenharmony_ci#include <string.h>
28cabdff1aSopenharmony_ci
29cabdff1aSopenharmony_ci#include "config.h"
30cabdff1aSopenharmony_ci#include "libavutil/attributes.h"
31cabdff1aSopenharmony_ci#include "libavutil/common.h"
32cabdff1aSopenharmony_ci#include "libavutil/internal.h"
33cabdff1aSopenharmony_ci#include "libavutil/intreadwrite.h"
34cabdff1aSopenharmony_ci#include "libavutil/avassert.h"
35cabdff1aSopenharmony_ci
36cabdff1aSopenharmony_ci#include "rnd_avg.h"
37cabdff1aSopenharmony_ci#include "vp3dsp.h"
38cabdff1aSopenharmony_ci
39cabdff1aSopenharmony_ci#define IdctAdjustBeforeShift 8
40cabdff1aSopenharmony_ci#define xC1S7 64277
41cabdff1aSopenharmony_ci#define xC2S6 60547
42cabdff1aSopenharmony_ci#define xC3S5 54491
43cabdff1aSopenharmony_ci#define xC4S4 46341
44cabdff1aSopenharmony_ci#define xC5S3 36410
45cabdff1aSopenharmony_ci#define xC6S2 25080
46cabdff1aSopenharmony_ci#define xC7S1 12785
47cabdff1aSopenharmony_ci
48cabdff1aSopenharmony_ci#define M(a, b) ((int)((SUINT)(a) * (b)) >> 16)
49cabdff1aSopenharmony_ci
50cabdff1aSopenharmony_cistatic av_always_inline void idct(uint8_t *dst, ptrdiff_t stride,
51cabdff1aSopenharmony_ci                                  int16_t *input, int type)
52cabdff1aSopenharmony_ci{
53cabdff1aSopenharmony_ci    int16_t *ip = input;
54cabdff1aSopenharmony_ci
55cabdff1aSopenharmony_ci    int A, B, C, D, Ad, Bd, Cd, Dd, E, F, G, H;
56cabdff1aSopenharmony_ci    int Ed, Gd, Add, Bdd, Fd, Hd;
57cabdff1aSopenharmony_ci
58cabdff1aSopenharmony_ci    int i;
59cabdff1aSopenharmony_ci
60cabdff1aSopenharmony_ci    /* Inverse DCT on the rows now */
61cabdff1aSopenharmony_ci    for (i = 0; i < 8; i++) {
62cabdff1aSopenharmony_ci        /* Check for non-zero values */
63cabdff1aSopenharmony_ci        if (ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] |
64cabdff1aSopenharmony_ci            ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8]) {
65cabdff1aSopenharmony_ci            A = M(xC1S7, ip[1 * 8]) + M(xC7S1, ip[7 * 8]);
66cabdff1aSopenharmony_ci            B = M(xC7S1, ip[1 * 8]) - M(xC1S7, ip[7 * 8]);
67cabdff1aSopenharmony_ci            C = M(xC3S5, ip[3 * 8]) + M(xC5S3, ip[5 * 8]);
68cabdff1aSopenharmony_ci            D = M(xC3S5, ip[5 * 8]) - M(xC5S3, ip[3 * 8]);
69cabdff1aSopenharmony_ci
70cabdff1aSopenharmony_ci            Ad = M(xC4S4, (A - C));
71cabdff1aSopenharmony_ci            Bd = M(xC4S4, (B - D));
72cabdff1aSopenharmony_ci
73cabdff1aSopenharmony_ci            Cd = A + C;
74cabdff1aSopenharmony_ci            Dd = B + D;
75cabdff1aSopenharmony_ci
76cabdff1aSopenharmony_ci            E = M(xC4S4, (ip[0 * 8] + ip[4 * 8]));
77cabdff1aSopenharmony_ci            F = M(xC4S4, (ip[0 * 8] - ip[4 * 8]));
78cabdff1aSopenharmony_ci
79cabdff1aSopenharmony_ci            G = M(xC2S6, ip[2 * 8]) + M(xC6S2, ip[6 * 8]);
80cabdff1aSopenharmony_ci            H = M(xC6S2, ip[2 * 8]) - M(xC2S6, ip[6 * 8]);
81cabdff1aSopenharmony_ci
82cabdff1aSopenharmony_ci            Ed = E - G;
83cabdff1aSopenharmony_ci            Gd = E + G;
84cabdff1aSopenharmony_ci
85cabdff1aSopenharmony_ci            Add = F + Ad;
86cabdff1aSopenharmony_ci            Bdd = Bd - H;
87cabdff1aSopenharmony_ci
88cabdff1aSopenharmony_ci            Fd = F - Ad;
89cabdff1aSopenharmony_ci            Hd = Bd + H;
90cabdff1aSopenharmony_ci
91cabdff1aSopenharmony_ci            /*  Final sequence of operations over-write original inputs. */
92cabdff1aSopenharmony_ci            ip[0 * 8] = Gd + Cd;
93cabdff1aSopenharmony_ci            ip[7 * 8] = Gd - Cd;
94cabdff1aSopenharmony_ci
95cabdff1aSopenharmony_ci            ip[1 * 8] = Add + Hd;
96cabdff1aSopenharmony_ci            ip[2 * 8] = Add - Hd;
97cabdff1aSopenharmony_ci
98cabdff1aSopenharmony_ci            ip[3 * 8] = Ed + Dd;
99cabdff1aSopenharmony_ci            ip[4 * 8] = Ed - Dd;
100cabdff1aSopenharmony_ci
101cabdff1aSopenharmony_ci            ip[5 * 8] = Fd + Bdd;
102cabdff1aSopenharmony_ci            ip[6 * 8] = Fd - Bdd;
103cabdff1aSopenharmony_ci        }
104cabdff1aSopenharmony_ci
105cabdff1aSopenharmony_ci        ip += 1;            /* next row */
106cabdff1aSopenharmony_ci    }
107cabdff1aSopenharmony_ci
108cabdff1aSopenharmony_ci    ip = input;
109cabdff1aSopenharmony_ci
110cabdff1aSopenharmony_ci    for (i = 0; i < 8; i++) {
111cabdff1aSopenharmony_ci        /* Check for non-zero values (bitwise or faster than ||) */
112cabdff1aSopenharmony_ci        if (ip[1] | ip[2] | ip[3] |
113cabdff1aSopenharmony_ci            ip[4] | ip[5] | ip[6] | ip[7]) {
114cabdff1aSopenharmony_ci            A = M(xC1S7, ip[1]) + M(xC7S1, ip[7]);
115cabdff1aSopenharmony_ci            B = M(xC7S1, ip[1]) - M(xC1S7, ip[7]);
116cabdff1aSopenharmony_ci            C = M(xC3S5, ip[3]) + M(xC5S3, ip[5]);
117cabdff1aSopenharmony_ci            D = M(xC3S5, ip[5]) - M(xC5S3, ip[3]);
118cabdff1aSopenharmony_ci
119cabdff1aSopenharmony_ci            Ad = M(xC4S4, (A - C));
120cabdff1aSopenharmony_ci            Bd = M(xC4S4, (B - D));
121cabdff1aSopenharmony_ci
122cabdff1aSopenharmony_ci            Cd = A + C;
123cabdff1aSopenharmony_ci            Dd = B + D;
124cabdff1aSopenharmony_ci
125cabdff1aSopenharmony_ci            E = M(xC4S4, (ip[0] + ip[4])) + 8;
126cabdff1aSopenharmony_ci            F = M(xC4S4, (ip[0] - ip[4])) + 8;
127cabdff1aSopenharmony_ci
128cabdff1aSopenharmony_ci            if (type == 1) { // HACK
129cabdff1aSopenharmony_ci                E += 16 * 128;
130cabdff1aSopenharmony_ci                F += 16 * 128;
131cabdff1aSopenharmony_ci            }
132cabdff1aSopenharmony_ci
133cabdff1aSopenharmony_ci            G = M(xC2S6, ip[2]) + M(xC6S2, ip[6]);
134cabdff1aSopenharmony_ci            H = M(xC6S2, ip[2]) - M(xC2S6, ip[6]);
135cabdff1aSopenharmony_ci
136cabdff1aSopenharmony_ci            Ed = E - G;
137cabdff1aSopenharmony_ci            Gd = E + G;
138cabdff1aSopenharmony_ci
139cabdff1aSopenharmony_ci            Add = F + Ad;
140cabdff1aSopenharmony_ci            Bdd = Bd - H;
141cabdff1aSopenharmony_ci
142cabdff1aSopenharmony_ci            Fd = F - Ad;
143cabdff1aSopenharmony_ci            Hd = Bd + H;
144cabdff1aSopenharmony_ci
145cabdff1aSopenharmony_ci            /* Final sequence of operations over-write original inputs. */
146cabdff1aSopenharmony_ci            if (type == 1) {
147cabdff1aSopenharmony_ci                dst[0 * stride] = av_clip_uint8((Gd + Cd) >> 4);
148cabdff1aSopenharmony_ci                dst[7 * stride] = av_clip_uint8((Gd - Cd) >> 4);
149cabdff1aSopenharmony_ci
150cabdff1aSopenharmony_ci                dst[1 * stride] = av_clip_uint8((Add + Hd) >> 4);
151cabdff1aSopenharmony_ci                dst[2 * stride] = av_clip_uint8((Add - Hd) >> 4);
152cabdff1aSopenharmony_ci
153cabdff1aSopenharmony_ci                dst[3 * stride] = av_clip_uint8((Ed + Dd) >> 4);
154cabdff1aSopenharmony_ci                dst[4 * stride] = av_clip_uint8((Ed - Dd) >> 4);
155cabdff1aSopenharmony_ci
156cabdff1aSopenharmony_ci                dst[5 * stride] = av_clip_uint8((Fd + Bdd) >> 4);
157cabdff1aSopenharmony_ci                dst[6 * stride] = av_clip_uint8((Fd - Bdd) >> 4);
158cabdff1aSopenharmony_ci            } else {
159cabdff1aSopenharmony_ci                dst[0 * stride] = av_clip_uint8(dst[0 * stride] + ((Gd + Cd) >> 4));
160cabdff1aSopenharmony_ci                dst[7 * stride] = av_clip_uint8(dst[7 * stride] + ((Gd - Cd) >> 4));
161cabdff1aSopenharmony_ci
162cabdff1aSopenharmony_ci                dst[1 * stride] = av_clip_uint8(dst[1 * stride] + ((Add + Hd) >> 4));
163cabdff1aSopenharmony_ci                dst[2 * stride] = av_clip_uint8(dst[2 * stride] + ((Add - Hd) >> 4));
164cabdff1aSopenharmony_ci
165cabdff1aSopenharmony_ci                dst[3 * stride] = av_clip_uint8(dst[3 * stride] + ((Ed + Dd) >> 4));
166cabdff1aSopenharmony_ci                dst[4 * stride] = av_clip_uint8(dst[4 * stride] + ((Ed - Dd) >> 4));
167cabdff1aSopenharmony_ci
168cabdff1aSopenharmony_ci                dst[5 * stride] = av_clip_uint8(dst[5 * stride] + ((Fd + Bdd) >> 4));
169cabdff1aSopenharmony_ci                dst[6 * stride] = av_clip_uint8(dst[6 * stride] + ((Fd - Bdd) >> 4));
170cabdff1aSopenharmony_ci            }
171cabdff1aSopenharmony_ci        } else {
172cabdff1aSopenharmony_ci            if (type == 1) {
173cabdff1aSopenharmony_ci                dst[0*stride] =
174cabdff1aSopenharmony_ci                dst[1*stride] =
175cabdff1aSopenharmony_ci                dst[2*stride] =
176cabdff1aSopenharmony_ci                dst[3*stride] =
177cabdff1aSopenharmony_ci                dst[4*stride] =
178cabdff1aSopenharmony_ci                dst[5*stride] =
179cabdff1aSopenharmony_ci                dst[6*stride] =
180cabdff1aSopenharmony_ci                dst[7*stride] = av_clip_uint8(128 + ((xC4S4 * ip[0] + (IdctAdjustBeforeShift << 16)) >> 20));
181cabdff1aSopenharmony_ci            } else {
182cabdff1aSopenharmony_ci                if (ip[0]) {
183cabdff1aSopenharmony_ci                    int v = (xC4S4 * ip[0] + (IdctAdjustBeforeShift << 16)) >> 20;
184cabdff1aSopenharmony_ci                    dst[0 * stride] = av_clip_uint8(dst[0 * stride] + v);
185cabdff1aSopenharmony_ci                    dst[1 * stride] = av_clip_uint8(dst[1 * stride] + v);
186cabdff1aSopenharmony_ci                    dst[2 * stride] = av_clip_uint8(dst[2 * stride] + v);
187cabdff1aSopenharmony_ci                    dst[3 * stride] = av_clip_uint8(dst[3 * stride] + v);
188cabdff1aSopenharmony_ci                    dst[4 * stride] = av_clip_uint8(dst[4 * stride] + v);
189cabdff1aSopenharmony_ci                    dst[5 * stride] = av_clip_uint8(dst[5 * stride] + v);
190cabdff1aSopenharmony_ci                    dst[6 * stride] = av_clip_uint8(dst[6 * stride] + v);
191cabdff1aSopenharmony_ci                    dst[7 * stride] = av_clip_uint8(dst[7 * stride] + v);
192cabdff1aSopenharmony_ci                }
193cabdff1aSopenharmony_ci            }
194cabdff1aSopenharmony_ci        }
195cabdff1aSopenharmony_ci
196cabdff1aSopenharmony_ci        ip += 8;            /* next column */
197cabdff1aSopenharmony_ci        dst++;
198cabdff1aSopenharmony_ci    }
199cabdff1aSopenharmony_ci}
200cabdff1aSopenharmony_ci
201cabdff1aSopenharmony_cistatic av_always_inline void idct10(uint8_t *dst, ptrdiff_t stride,
202cabdff1aSopenharmony_ci                                    int16_t *input, int type)
203cabdff1aSopenharmony_ci{
204cabdff1aSopenharmony_ci    int16_t *ip = input;
205cabdff1aSopenharmony_ci
206cabdff1aSopenharmony_ci    int A, B, C, D, Ad, Bd, Cd, Dd, E, F, G, H;
207cabdff1aSopenharmony_ci    int Ed, Gd, Add, Bdd, Fd, Hd;
208cabdff1aSopenharmony_ci
209cabdff1aSopenharmony_ci    int i;
210cabdff1aSopenharmony_ci
211cabdff1aSopenharmony_ci    /* Inverse DCT on the rows now */
212cabdff1aSopenharmony_ci    for (i = 0; i < 4; i++) {
213cabdff1aSopenharmony_ci        /* Check for non-zero values */
214cabdff1aSopenharmony_ci        if (ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8]) {
215cabdff1aSopenharmony_ci            A =  M(xC1S7, ip[1 * 8]);
216cabdff1aSopenharmony_ci            B =  M(xC7S1, ip[1 * 8]);
217cabdff1aSopenharmony_ci            C =  M(xC3S5, ip[3 * 8]);
218cabdff1aSopenharmony_ci            D = -M(xC5S3, ip[3 * 8]);
219cabdff1aSopenharmony_ci
220cabdff1aSopenharmony_ci            Ad = M(xC4S4, (A - C));
221cabdff1aSopenharmony_ci            Bd = M(xC4S4, (B - D));
222cabdff1aSopenharmony_ci
223cabdff1aSopenharmony_ci            Cd = A + C;
224cabdff1aSopenharmony_ci            Dd = B + D;
225cabdff1aSopenharmony_ci
226cabdff1aSopenharmony_ci            E = M(xC4S4, ip[0 * 8]);
227cabdff1aSopenharmony_ci            F = E;
228cabdff1aSopenharmony_ci
229cabdff1aSopenharmony_ci            G = M(xC2S6, ip[2 * 8]);
230cabdff1aSopenharmony_ci            H = M(xC6S2, ip[2 * 8]);
231cabdff1aSopenharmony_ci
232cabdff1aSopenharmony_ci            Ed = E - G;
233cabdff1aSopenharmony_ci            Gd = E + G;
234cabdff1aSopenharmony_ci
235cabdff1aSopenharmony_ci            Add = F + Ad;
236cabdff1aSopenharmony_ci            Bdd = Bd - H;
237cabdff1aSopenharmony_ci
238cabdff1aSopenharmony_ci            Fd = F - Ad;
239cabdff1aSopenharmony_ci            Hd = Bd + H;
240cabdff1aSopenharmony_ci
241cabdff1aSopenharmony_ci            /* Final sequence of operations over-write original inputs */
242cabdff1aSopenharmony_ci            ip[0 * 8] = Gd + Cd;
243cabdff1aSopenharmony_ci            ip[7 * 8] = Gd - Cd;
244cabdff1aSopenharmony_ci
245cabdff1aSopenharmony_ci            ip[1 * 8] = Add + Hd;
246cabdff1aSopenharmony_ci            ip[2 * 8] = Add - Hd;
247cabdff1aSopenharmony_ci
248cabdff1aSopenharmony_ci            ip[3 * 8] = Ed + Dd;
249cabdff1aSopenharmony_ci            ip[4 * 8] = Ed - Dd;
250cabdff1aSopenharmony_ci
251cabdff1aSopenharmony_ci            ip[5 * 8] = Fd + Bdd;
252cabdff1aSopenharmony_ci            ip[6 * 8] = Fd - Bdd;
253cabdff1aSopenharmony_ci
254cabdff1aSopenharmony_ci        }
255cabdff1aSopenharmony_ci
256cabdff1aSopenharmony_ci        ip += 1;
257cabdff1aSopenharmony_ci    }
258cabdff1aSopenharmony_ci
259cabdff1aSopenharmony_ci    ip = input;
260cabdff1aSopenharmony_ci
261cabdff1aSopenharmony_ci    for (i = 0; i < 8; i++) {
262cabdff1aSopenharmony_ci        /* Check for non-zero values (bitwise or faster than ||) */
263cabdff1aSopenharmony_ci        if (ip[0] | ip[1] | ip[2] | ip[3]) {
264cabdff1aSopenharmony_ci            A =  M(xC1S7, ip[1]);
265cabdff1aSopenharmony_ci            B =  M(xC7S1, ip[1]);
266cabdff1aSopenharmony_ci            C =  M(xC3S5, ip[3]);
267cabdff1aSopenharmony_ci            D = -M(xC5S3, ip[3]);
268cabdff1aSopenharmony_ci
269cabdff1aSopenharmony_ci            Ad = M(xC4S4, (A - C));
270cabdff1aSopenharmony_ci            Bd = M(xC4S4, (B - D));
271cabdff1aSopenharmony_ci
272cabdff1aSopenharmony_ci            Cd = A + C;
273cabdff1aSopenharmony_ci            Dd = B + D;
274cabdff1aSopenharmony_ci
275cabdff1aSopenharmony_ci            E = M(xC4S4, ip[0]);
276cabdff1aSopenharmony_ci            if (type == 1)
277cabdff1aSopenharmony_ci                E += 16 * 128;
278cabdff1aSopenharmony_ci            F = E;
279cabdff1aSopenharmony_ci
280cabdff1aSopenharmony_ci            G = M(xC2S6, ip[2]);
281cabdff1aSopenharmony_ci            H = M(xC6S2, ip[2]);
282cabdff1aSopenharmony_ci
283cabdff1aSopenharmony_ci            Ed = E - G;
284cabdff1aSopenharmony_ci            Gd = E + G;
285cabdff1aSopenharmony_ci
286cabdff1aSopenharmony_ci            Add = F + Ad;
287cabdff1aSopenharmony_ci            Bdd = Bd - H;
288cabdff1aSopenharmony_ci
289cabdff1aSopenharmony_ci            Fd = F - Ad;
290cabdff1aSopenharmony_ci            Hd = Bd + H;
291cabdff1aSopenharmony_ci
292cabdff1aSopenharmony_ci            Gd += 8;
293cabdff1aSopenharmony_ci            Add += 8;
294cabdff1aSopenharmony_ci            Ed += 8;
295cabdff1aSopenharmony_ci            Fd += 8;
296cabdff1aSopenharmony_ci
297cabdff1aSopenharmony_ci            /* Final sequence of operations over-write original inputs. */
298cabdff1aSopenharmony_ci            if (type == 1) {
299cabdff1aSopenharmony_ci                dst[0 * stride] = av_clip_uint8((Gd + Cd) >> 4);
300cabdff1aSopenharmony_ci                dst[7 * stride] = av_clip_uint8((Gd - Cd) >> 4);
301cabdff1aSopenharmony_ci
302cabdff1aSopenharmony_ci                dst[1 * stride] = av_clip_uint8((Add + Hd) >> 4);
303cabdff1aSopenharmony_ci                dst[2 * stride] = av_clip_uint8((Add - Hd) >> 4);
304cabdff1aSopenharmony_ci
305cabdff1aSopenharmony_ci                dst[3 * stride] = av_clip_uint8((Ed + Dd) >> 4);
306cabdff1aSopenharmony_ci                dst[4 * stride] = av_clip_uint8((Ed - Dd) >> 4);
307cabdff1aSopenharmony_ci
308cabdff1aSopenharmony_ci                dst[5 * stride] = av_clip_uint8((Fd + Bdd) >> 4);
309cabdff1aSopenharmony_ci                dst[6 * stride] = av_clip_uint8((Fd - Bdd) >> 4);
310cabdff1aSopenharmony_ci            } else {
311cabdff1aSopenharmony_ci                dst[0 * stride] = av_clip_uint8(dst[0 * stride] + ((Gd + Cd) >> 4));
312cabdff1aSopenharmony_ci                dst[7 * stride] = av_clip_uint8(dst[7 * stride] + ((Gd - Cd) >> 4));
313cabdff1aSopenharmony_ci
314cabdff1aSopenharmony_ci                dst[1 * stride] = av_clip_uint8(dst[1 * stride] + ((Add + Hd) >> 4));
315cabdff1aSopenharmony_ci                dst[2 * stride] = av_clip_uint8(dst[2 * stride] + ((Add - Hd) >> 4));
316cabdff1aSopenharmony_ci
317cabdff1aSopenharmony_ci                dst[3 * stride] = av_clip_uint8(dst[3 * stride] + ((Ed + Dd) >> 4));
318cabdff1aSopenharmony_ci                dst[4 * stride] = av_clip_uint8(dst[4 * stride] + ((Ed - Dd) >> 4));
319cabdff1aSopenharmony_ci
320cabdff1aSopenharmony_ci                dst[5 * stride] = av_clip_uint8(dst[5 * stride] + ((Fd + Bdd) >> 4));
321cabdff1aSopenharmony_ci                dst[6 * stride] = av_clip_uint8(dst[6 * stride] + ((Fd - Bdd) >> 4));
322cabdff1aSopenharmony_ci            }
323cabdff1aSopenharmony_ci        } else {
324cabdff1aSopenharmony_ci            if (type == 1) {
325cabdff1aSopenharmony_ci                dst[0*stride] =
326cabdff1aSopenharmony_ci                dst[1*stride] =
327cabdff1aSopenharmony_ci                dst[2*stride] =
328cabdff1aSopenharmony_ci                dst[3*stride] =
329cabdff1aSopenharmony_ci                dst[4*stride] =
330cabdff1aSopenharmony_ci                dst[5*stride] =
331cabdff1aSopenharmony_ci                dst[6*stride] =
332cabdff1aSopenharmony_ci                dst[7*stride] = 128;
333cabdff1aSopenharmony_ci            }
334cabdff1aSopenharmony_ci        }
335cabdff1aSopenharmony_ci
336cabdff1aSopenharmony_ci        ip += 8;
337cabdff1aSopenharmony_ci        dst++;
338cabdff1aSopenharmony_ci    }
339cabdff1aSopenharmony_ci}
340cabdff1aSopenharmony_ci
341cabdff1aSopenharmony_civoid ff_vp3dsp_idct10_put(uint8_t *dest, ptrdiff_t stride, int16_t *block)
342cabdff1aSopenharmony_ci{
343cabdff1aSopenharmony_ci    idct10(dest, stride, block, 1);
344cabdff1aSopenharmony_ci    memset(block, 0, sizeof(*block) * 64);
345cabdff1aSopenharmony_ci}
346cabdff1aSopenharmony_ci
347cabdff1aSopenharmony_civoid ff_vp3dsp_idct10_add(uint8_t *dest, ptrdiff_t stride, int16_t *block)
348cabdff1aSopenharmony_ci{
349cabdff1aSopenharmony_ci    idct10(dest, stride, block, 2);
350cabdff1aSopenharmony_ci    memset(block, 0, sizeof(*block) * 64);
351cabdff1aSopenharmony_ci}
352cabdff1aSopenharmony_ci
353cabdff1aSopenharmony_cistatic void vp3_idct_put_c(uint8_t *dest /* align 8 */, ptrdiff_t stride,
354cabdff1aSopenharmony_ci                           int16_t *block /* align 16 */)
355cabdff1aSopenharmony_ci{
356cabdff1aSopenharmony_ci    idct(dest, stride, block, 1);
357cabdff1aSopenharmony_ci    memset(block, 0, sizeof(*block) * 64);
358cabdff1aSopenharmony_ci}
359cabdff1aSopenharmony_ci
360cabdff1aSopenharmony_cistatic void vp3_idct_add_c(uint8_t *dest /* align 8 */, ptrdiff_t stride,
361cabdff1aSopenharmony_ci                           int16_t *block /* align 16 */)
362cabdff1aSopenharmony_ci{
363cabdff1aSopenharmony_ci    idct(dest, stride, block, 2);
364cabdff1aSopenharmony_ci    memset(block, 0, sizeof(*block) * 64);
365cabdff1aSopenharmony_ci}
366cabdff1aSopenharmony_ci
367cabdff1aSopenharmony_cistatic void vp3_idct_dc_add_c(uint8_t *dest /* align 8 */, ptrdiff_t stride,
368cabdff1aSopenharmony_ci                              int16_t *block /* align 16 */)
369cabdff1aSopenharmony_ci{
370cabdff1aSopenharmony_ci    int i, dc = (block[0] + 15) >> 5;
371cabdff1aSopenharmony_ci
372cabdff1aSopenharmony_ci    for (i = 0; i < 8; i++) {
373cabdff1aSopenharmony_ci        dest[0] = av_clip_uint8(dest[0] + dc);
374cabdff1aSopenharmony_ci        dest[1] = av_clip_uint8(dest[1] + dc);
375cabdff1aSopenharmony_ci        dest[2] = av_clip_uint8(dest[2] + dc);
376cabdff1aSopenharmony_ci        dest[3] = av_clip_uint8(dest[3] + dc);
377cabdff1aSopenharmony_ci        dest[4] = av_clip_uint8(dest[4] + dc);
378cabdff1aSopenharmony_ci        dest[5] = av_clip_uint8(dest[5] + dc);
379cabdff1aSopenharmony_ci        dest[6] = av_clip_uint8(dest[6] + dc);
380cabdff1aSopenharmony_ci        dest[7] = av_clip_uint8(dest[7] + dc);
381cabdff1aSopenharmony_ci        dest   += stride;
382cabdff1aSopenharmony_ci    }
383cabdff1aSopenharmony_ci    block[0] = 0;
384cabdff1aSopenharmony_ci}
385cabdff1aSopenharmony_ci
386cabdff1aSopenharmony_cistatic av_always_inline void vp3_v_loop_filter_c(uint8_t *first_pixel, ptrdiff_t stride,
387cabdff1aSopenharmony_ci                                                 int *bounding_values, int count)
388cabdff1aSopenharmony_ci{
389cabdff1aSopenharmony_ci    unsigned char *end;
390cabdff1aSopenharmony_ci    int filter_value;
391cabdff1aSopenharmony_ci    const ptrdiff_t nstride = -stride;
392cabdff1aSopenharmony_ci
393cabdff1aSopenharmony_ci    for (end = first_pixel + count; first_pixel < end; first_pixel++) {
394cabdff1aSopenharmony_ci        filter_value = (first_pixel[2 * nstride] - first_pixel[stride]) +
395cabdff1aSopenharmony_ci                       (first_pixel[0] - first_pixel[nstride]) * 3;
396cabdff1aSopenharmony_ci        filter_value = bounding_values[(filter_value + 4) >> 3];
397cabdff1aSopenharmony_ci
398cabdff1aSopenharmony_ci        first_pixel[nstride] = av_clip_uint8(first_pixel[nstride] + filter_value);
399cabdff1aSopenharmony_ci        first_pixel[0]       = av_clip_uint8(first_pixel[0] - filter_value);
400cabdff1aSopenharmony_ci    }
401cabdff1aSopenharmony_ci}
402cabdff1aSopenharmony_ci
403cabdff1aSopenharmony_cistatic av_always_inline void vp3_h_loop_filter_c(uint8_t *first_pixel, ptrdiff_t stride,
404cabdff1aSopenharmony_ci                                                 int *bounding_values, int count)
405cabdff1aSopenharmony_ci{
406cabdff1aSopenharmony_ci    unsigned char *end;
407cabdff1aSopenharmony_ci    int filter_value;
408cabdff1aSopenharmony_ci
409cabdff1aSopenharmony_ci    for (end = first_pixel + count * stride; first_pixel != end; first_pixel += stride) {
410cabdff1aSopenharmony_ci        filter_value = (first_pixel[-2] - first_pixel[1]) +
411cabdff1aSopenharmony_ci                       (first_pixel[ 0] - first_pixel[-1]) * 3;
412cabdff1aSopenharmony_ci        filter_value = bounding_values[(filter_value + 4) >> 3];
413cabdff1aSopenharmony_ci
414cabdff1aSopenharmony_ci        first_pixel[-1] = av_clip_uint8(first_pixel[-1] + filter_value);
415cabdff1aSopenharmony_ci        first_pixel[ 0] = av_clip_uint8(first_pixel[ 0] - filter_value);
416cabdff1aSopenharmony_ci    }
417cabdff1aSopenharmony_ci}
418cabdff1aSopenharmony_ci
419cabdff1aSopenharmony_ci#define LOOP_FILTER(prefix, suffix, dim, count) \
420cabdff1aSopenharmony_civoid prefix##_##dim##_loop_filter_##count##suffix(uint8_t *first_pixel, ptrdiff_t stride, \
421cabdff1aSopenharmony_ci                                int *bounding_values) \
422cabdff1aSopenharmony_ci{ \
423cabdff1aSopenharmony_ci    vp3_##dim##_loop_filter_c(first_pixel, stride, bounding_values, count); \
424cabdff1aSopenharmony_ci}
425cabdff1aSopenharmony_ci
426cabdff1aSopenharmony_cistatic LOOP_FILTER(vp3,_c, v, 8)
427cabdff1aSopenharmony_cistatic LOOP_FILTER(vp3,_c, h, 8)
428cabdff1aSopenharmony_ciLOOP_FILTER(ff_vp3dsp, , v, 12)
429cabdff1aSopenharmony_ciLOOP_FILTER(ff_vp3dsp, , h, 12)
430cabdff1aSopenharmony_ci
431cabdff1aSopenharmony_cistatic void put_no_rnd_pixels_l2(uint8_t *dst, const uint8_t *src1,
432cabdff1aSopenharmony_ci                                 const uint8_t *src2, ptrdiff_t stride, int h)
433cabdff1aSopenharmony_ci{
434cabdff1aSopenharmony_ci    int i;
435cabdff1aSopenharmony_ci
436cabdff1aSopenharmony_ci    for (i = 0; i < h; i++) {
437cabdff1aSopenharmony_ci        uint32_t a, b;
438cabdff1aSopenharmony_ci
439cabdff1aSopenharmony_ci        a = AV_RN32(&src1[i * stride]);
440cabdff1aSopenharmony_ci        b = AV_RN32(&src2[i * stride]);
441cabdff1aSopenharmony_ci        AV_WN32A(&dst[i * stride], no_rnd_avg32(a, b));
442cabdff1aSopenharmony_ci        a = AV_RN32(&src1[i * stride + 4]);
443cabdff1aSopenharmony_ci        b = AV_RN32(&src2[i * stride + 4]);
444cabdff1aSopenharmony_ci        AV_WN32A(&dst[i * stride + 4], no_rnd_avg32(a, b));
445cabdff1aSopenharmony_ci    }
446cabdff1aSopenharmony_ci}
447cabdff1aSopenharmony_ci
448cabdff1aSopenharmony_ciav_cold void ff_vp3dsp_init(VP3DSPContext *c, int flags)
449cabdff1aSopenharmony_ci{
450cabdff1aSopenharmony_ci    c->put_no_rnd_pixels_l2 = put_no_rnd_pixels_l2;
451cabdff1aSopenharmony_ci
452cabdff1aSopenharmony_ci    c->idct_put      = vp3_idct_put_c;
453cabdff1aSopenharmony_ci    c->idct_add      = vp3_idct_add_c;
454cabdff1aSopenharmony_ci    c->idct_dc_add   = vp3_idct_dc_add_c;
455cabdff1aSopenharmony_ci    c->v_loop_filter = c->v_loop_filter_unaligned = vp3_v_loop_filter_8_c;
456cabdff1aSopenharmony_ci    c->h_loop_filter = c->h_loop_filter_unaligned = vp3_h_loop_filter_8_c;
457cabdff1aSopenharmony_ci
458cabdff1aSopenharmony_ci#if ARCH_ARM
459cabdff1aSopenharmony_ci    ff_vp3dsp_init_arm(c, flags);
460cabdff1aSopenharmony_ci#elif ARCH_PPC
461cabdff1aSopenharmony_ci    ff_vp3dsp_init_ppc(c, flags);
462cabdff1aSopenharmony_ci#elif ARCH_X86
463cabdff1aSopenharmony_ci    ff_vp3dsp_init_x86(c, flags);
464cabdff1aSopenharmony_ci#elif ARCH_MIPS
465cabdff1aSopenharmony_ci    ff_vp3dsp_init_mips(c, flags);
466cabdff1aSopenharmony_ci#endif
467cabdff1aSopenharmony_ci}
468cabdff1aSopenharmony_ci
469cabdff1aSopenharmony_ci/*
470cabdff1aSopenharmony_ci * This function initializes the loop filter boundary limits if the frame's
471cabdff1aSopenharmony_ci * quality index is different from the previous frame's.
472cabdff1aSopenharmony_ci *
473cabdff1aSopenharmony_ci * where sizeof(bounding_values_array) is 256 * sizeof(int)
474cabdff1aSopenharmony_ci *
475cabdff1aSopenharmony_ci * The filter_limit_values may not be larger than 127.
476cabdff1aSopenharmony_ci */
477cabdff1aSopenharmony_civoid ff_vp3dsp_set_bounding_values(int * bounding_values_array, int filter_limit)
478cabdff1aSopenharmony_ci{
479cabdff1aSopenharmony_ci    int *bounding_values = bounding_values_array + 127;
480cabdff1aSopenharmony_ci    int x;
481cabdff1aSopenharmony_ci    int value;
482cabdff1aSopenharmony_ci
483cabdff1aSopenharmony_ci    av_assert0(filter_limit < 128U);
484cabdff1aSopenharmony_ci
485cabdff1aSopenharmony_ci    /* set up the bounding values */
486cabdff1aSopenharmony_ci    memset(bounding_values_array, 0, 256 * sizeof(int));
487cabdff1aSopenharmony_ci    for (x = 0; x < filter_limit; x++) {
488cabdff1aSopenharmony_ci        bounding_values[-x] = -x;
489cabdff1aSopenharmony_ci        bounding_values[x] = x;
490cabdff1aSopenharmony_ci    }
491cabdff1aSopenharmony_ci    for (x = value = filter_limit; x < 128 && value; x++, value--) {
492cabdff1aSopenharmony_ci        bounding_values[ x] =  value;
493cabdff1aSopenharmony_ci        bounding_values[-x] = -value;
494cabdff1aSopenharmony_ci    }
495cabdff1aSopenharmony_ci    if (value)
496cabdff1aSopenharmony_ci        bounding_values[128] = value;
497cabdff1aSopenharmony_ci    bounding_values[129] = bounding_values[130] = filter_limit * 0x02020202U;
498cabdff1aSopenharmony_ci}
499