1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * VC-1 and WMV3 decoder - DSP functions
3cabdff1aSopenharmony_ci * Copyright (c) 2006 Konstantin Shishkov
4cabdff1aSopenharmony_ci *
5cabdff1aSopenharmony_ci * This file is part of FFmpeg.
6cabdff1aSopenharmony_ci *
7cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
8cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
9cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
10cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
11cabdff1aSopenharmony_ci *
12cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
13cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
14cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15cabdff1aSopenharmony_ci * Lesser General Public License for more details.
16cabdff1aSopenharmony_ci *
17cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
18cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
19cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20cabdff1aSopenharmony_ci */
21cabdff1aSopenharmony_ci
22cabdff1aSopenharmony_ci/**
23cabdff1aSopenharmony_ci * @file
24cabdff1aSopenharmony_ci * VC-1 and WMV3 decoder
25cabdff1aSopenharmony_ci */
26cabdff1aSopenharmony_ci
27cabdff1aSopenharmony_ci#include "config_components.h"
28cabdff1aSopenharmony_ci
29cabdff1aSopenharmony_ci#include "libavutil/avassert.h"
30cabdff1aSopenharmony_ci#include "libavutil/common.h"
31cabdff1aSopenharmony_ci#include "libavutil/intreadwrite.h"
32cabdff1aSopenharmony_ci#include "h264chroma.h"
33cabdff1aSopenharmony_ci#include "qpeldsp.h"
34cabdff1aSopenharmony_ci#include "rnd_avg.h"
35cabdff1aSopenharmony_ci#include "vc1dsp.h"
36cabdff1aSopenharmony_ci#include "startcode.h"
37cabdff1aSopenharmony_ci#include "vc1_common.h"
38cabdff1aSopenharmony_ci
39cabdff1aSopenharmony_ci/* Apply overlap transform to horizontal edge */
40cabdff1aSopenharmony_cistatic void vc1_v_overlap_c(uint8_t *src, ptrdiff_t stride)
41cabdff1aSopenharmony_ci{
42cabdff1aSopenharmony_ci    int i;
43cabdff1aSopenharmony_ci    int a, b, c, d;
44cabdff1aSopenharmony_ci    int d1, d2;
45cabdff1aSopenharmony_ci    int rnd = 1;
46cabdff1aSopenharmony_ci    for (i = 0; i < 8; i++) {
47cabdff1aSopenharmony_ci        a  = src[-2 * stride];
48cabdff1aSopenharmony_ci        b  = src[-stride];
49cabdff1aSopenharmony_ci        c  = src[0];
50cabdff1aSopenharmony_ci        d  = src[stride];
51cabdff1aSopenharmony_ci        d1 = (a - d + 3 + rnd) >> 3;
52cabdff1aSopenharmony_ci        d2 = (a - d + b - c + 4 - rnd) >> 3;
53cabdff1aSopenharmony_ci
54cabdff1aSopenharmony_ci        src[-2 * stride] = a - d1;
55cabdff1aSopenharmony_ci        src[-stride]     = av_clip_uint8(b - d2);
56cabdff1aSopenharmony_ci        src[0]           = av_clip_uint8(c + d2);
57cabdff1aSopenharmony_ci        src[stride]      = d + d1;
58cabdff1aSopenharmony_ci        src++;
59cabdff1aSopenharmony_ci        rnd = !rnd;
60cabdff1aSopenharmony_ci    }
61cabdff1aSopenharmony_ci}
62cabdff1aSopenharmony_ci
63cabdff1aSopenharmony_ci/* Apply overlap transform to vertical edge */
64cabdff1aSopenharmony_cistatic void vc1_h_overlap_c(uint8_t *src, ptrdiff_t stride)
65cabdff1aSopenharmony_ci{
66cabdff1aSopenharmony_ci    int i;
67cabdff1aSopenharmony_ci    int a, b, c, d;
68cabdff1aSopenharmony_ci    int d1, d2;
69cabdff1aSopenharmony_ci    int rnd = 1;
70cabdff1aSopenharmony_ci    for (i = 0; i < 8; i++) {
71cabdff1aSopenharmony_ci        a  = src[-2];
72cabdff1aSopenharmony_ci        b  = src[-1];
73cabdff1aSopenharmony_ci        c  = src[0];
74cabdff1aSopenharmony_ci        d  = src[1];
75cabdff1aSopenharmony_ci        d1 = (a - d + 3 + rnd) >> 3;
76cabdff1aSopenharmony_ci        d2 = (a - d + b - c + 4 - rnd) >> 3;
77cabdff1aSopenharmony_ci
78cabdff1aSopenharmony_ci        src[-2] = a - d1;
79cabdff1aSopenharmony_ci        src[-1] = av_clip_uint8(b - d2);
80cabdff1aSopenharmony_ci        src[0]  = av_clip_uint8(c + d2);
81cabdff1aSopenharmony_ci        src[1]  = d + d1;
82cabdff1aSopenharmony_ci        src    += stride;
83cabdff1aSopenharmony_ci        rnd     = !rnd;
84cabdff1aSopenharmony_ci    }
85cabdff1aSopenharmony_ci}
86cabdff1aSopenharmony_ci
87cabdff1aSopenharmony_cistatic void vc1_v_s_overlap_c(int16_t *top, int16_t *bottom)
88cabdff1aSopenharmony_ci{
89cabdff1aSopenharmony_ci    int i;
90cabdff1aSopenharmony_ci    int a, b, c, d;
91cabdff1aSopenharmony_ci    int d1, d2;
92cabdff1aSopenharmony_ci    int rnd1 = 4, rnd2 = 3;
93cabdff1aSopenharmony_ci    for (i = 0; i < 8; i++) {
94cabdff1aSopenharmony_ci        a  = top[48];
95cabdff1aSopenharmony_ci        b  = top[56];
96cabdff1aSopenharmony_ci        c  = bottom[0];
97cabdff1aSopenharmony_ci        d  = bottom[8];
98cabdff1aSopenharmony_ci        d1 = a - d;
99cabdff1aSopenharmony_ci        d2 = a - d + b - c;
100cabdff1aSopenharmony_ci
101cabdff1aSopenharmony_ci        top[48]   = ((a * 8) - d1 + rnd1) >> 3;
102cabdff1aSopenharmony_ci        top[56]   = ((b * 8) - d2 + rnd2) >> 3;
103cabdff1aSopenharmony_ci        bottom[0] = ((c * 8) + d2 + rnd1) >> 3;
104cabdff1aSopenharmony_ci        bottom[8] = ((d * 8) + d1 + rnd2) >> 3;
105cabdff1aSopenharmony_ci
106cabdff1aSopenharmony_ci        bottom++;
107cabdff1aSopenharmony_ci        top++;
108cabdff1aSopenharmony_ci        rnd2 = 7 - rnd2;
109cabdff1aSopenharmony_ci        rnd1 = 7 - rnd1;
110cabdff1aSopenharmony_ci    }
111cabdff1aSopenharmony_ci}
112cabdff1aSopenharmony_ci
113cabdff1aSopenharmony_cistatic void vc1_h_s_overlap_c(int16_t *left, int16_t *right, ptrdiff_t left_stride, ptrdiff_t right_stride, int flags)
114cabdff1aSopenharmony_ci{
115cabdff1aSopenharmony_ci    int i;
116cabdff1aSopenharmony_ci    int a, b, c, d;
117cabdff1aSopenharmony_ci    int d1, d2;
118cabdff1aSopenharmony_ci    int rnd1 = flags & 2 ? 3 : 4;
119cabdff1aSopenharmony_ci    int rnd2 = 7 - rnd1;
120cabdff1aSopenharmony_ci    for (i = 0; i < 8; i++) {
121cabdff1aSopenharmony_ci        a  = left[6];
122cabdff1aSopenharmony_ci        b  = left[7];
123cabdff1aSopenharmony_ci        c  = right[0];
124cabdff1aSopenharmony_ci        d  = right[1];
125cabdff1aSopenharmony_ci        d1 = a - d;
126cabdff1aSopenharmony_ci        d2 = a - d + b - c;
127cabdff1aSopenharmony_ci
128cabdff1aSopenharmony_ci        left[6]  = ((a * 8) - d1 + rnd1) >> 3;
129cabdff1aSopenharmony_ci        left[7]  = ((b * 8) - d2 + rnd2) >> 3;
130cabdff1aSopenharmony_ci        right[0] = ((c * 8) + d2 + rnd1) >> 3;
131cabdff1aSopenharmony_ci        right[1] = ((d * 8) + d1 + rnd2) >> 3;
132cabdff1aSopenharmony_ci
133cabdff1aSopenharmony_ci        right += right_stride;
134cabdff1aSopenharmony_ci        left  += left_stride;
135cabdff1aSopenharmony_ci        if (flags & 1) {
136cabdff1aSopenharmony_ci            rnd2   = 7 - rnd2;
137cabdff1aSopenharmony_ci            rnd1   = 7 - rnd1;
138cabdff1aSopenharmony_ci        }
139cabdff1aSopenharmony_ci    }
140cabdff1aSopenharmony_ci}
141cabdff1aSopenharmony_ci
142cabdff1aSopenharmony_ci/**
143cabdff1aSopenharmony_ci * VC-1 in-loop deblocking filter for one line
144cabdff1aSopenharmony_ci * @param src source block type
145cabdff1aSopenharmony_ci * @param stride block stride
146cabdff1aSopenharmony_ci * @param pq block quantizer
147cabdff1aSopenharmony_ci * @return whether other 3 pairs should be filtered or not
148cabdff1aSopenharmony_ci * @see 8.6
149cabdff1aSopenharmony_ci */
150cabdff1aSopenharmony_cistatic av_always_inline int vc1_filter_line(uint8_t *src, ptrdiff_t stride, int pq)
151cabdff1aSopenharmony_ci{
152cabdff1aSopenharmony_ci    int a0 = (2 * (src[-2 * stride] - src[1 * stride]) -
153cabdff1aSopenharmony_ci              5 * (src[-1 * stride] - src[0 * stride]) + 4) >> 3;
154cabdff1aSopenharmony_ci    int a0_sign = a0 >> 31;        /* Store sign */
155cabdff1aSopenharmony_ci
156cabdff1aSopenharmony_ci    a0 = (a0 ^ a0_sign) - a0_sign; /* a0 = FFABS(a0); */
157cabdff1aSopenharmony_ci    if (a0 < pq) {
158cabdff1aSopenharmony_ci        int a1 = FFABS((2 * (src[-4 * stride] - src[-1 * stride]) -
159cabdff1aSopenharmony_ci                        5 * (src[-3 * stride] - src[-2 * stride]) + 4) >> 3);
160cabdff1aSopenharmony_ci        int a2 = FFABS((2 * (src[ 0 * stride] - src[ 3 * stride]) -
161cabdff1aSopenharmony_ci                        5 * (src[ 1 * stride] - src[ 2 * stride]) + 4) >> 3);
162cabdff1aSopenharmony_ci        if (a1 < a0 || a2 < a0) {
163cabdff1aSopenharmony_ci            int clip      = src[-1 * stride] - src[0 * stride];
164cabdff1aSopenharmony_ci            int clip_sign = clip >> 31;
165cabdff1aSopenharmony_ci
166cabdff1aSopenharmony_ci            clip = ((clip ^ clip_sign) - clip_sign) >> 1;
167cabdff1aSopenharmony_ci            if (clip) {
168cabdff1aSopenharmony_ci                int a3     = FFMIN(a1, a2);
169cabdff1aSopenharmony_ci                int d      = 5 * (a3 - a0);
170cabdff1aSopenharmony_ci                int d_sign = (d >> 31);
171cabdff1aSopenharmony_ci
172cabdff1aSopenharmony_ci                d       = ((d ^ d_sign) - d_sign) >> 3;
173cabdff1aSopenharmony_ci                d_sign ^= a0_sign;
174cabdff1aSopenharmony_ci
175cabdff1aSopenharmony_ci                if (d_sign ^ clip_sign)
176cabdff1aSopenharmony_ci                    d = 0;
177cabdff1aSopenharmony_ci                else {
178cabdff1aSopenharmony_ci                    d = FFMIN(d, clip);
179cabdff1aSopenharmony_ci                    d = (d ^ d_sign) - d_sign; /* Restore sign */
180cabdff1aSopenharmony_ci                    src[-1 * stride] = av_clip_uint8(src[-1 * stride] - d);
181cabdff1aSopenharmony_ci                    src[ 0 * stride] = av_clip_uint8(src[ 0 * stride] + d);
182cabdff1aSopenharmony_ci                }
183cabdff1aSopenharmony_ci                return 1;
184cabdff1aSopenharmony_ci            }
185cabdff1aSopenharmony_ci        }
186cabdff1aSopenharmony_ci    }
187cabdff1aSopenharmony_ci    return 0;
188cabdff1aSopenharmony_ci}
189cabdff1aSopenharmony_ci
190cabdff1aSopenharmony_ci/**
191cabdff1aSopenharmony_ci * VC-1 in-loop deblocking filter
192cabdff1aSopenharmony_ci * @param src source block type
193cabdff1aSopenharmony_ci * @param step distance between horizontally adjacent elements
194cabdff1aSopenharmony_ci * @param stride distance between vertically adjacent elements
195cabdff1aSopenharmony_ci * @param len edge length to filter (4 or 8 pixels)
196cabdff1aSopenharmony_ci * @param pq block quantizer
197cabdff1aSopenharmony_ci * @see 8.6
198cabdff1aSopenharmony_ci */
199cabdff1aSopenharmony_cistatic inline void vc1_loop_filter(uint8_t *src, int step, ptrdiff_t stride,
200cabdff1aSopenharmony_ci                                   int len, int pq)
201cabdff1aSopenharmony_ci{
202cabdff1aSopenharmony_ci    int i;
203cabdff1aSopenharmony_ci    int filt3;
204cabdff1aSopenharmony_ci
205cabdff1aSopenharmony_ci    for (i = 0; i < len; i += 4) {
206cabdff1aSopenharmony_ci        filt3 = vc1_filter_line(src + 2 * step, stride, pq);
207cabdff1aSopenharmony_ci        if (filt3) {
208cabdff1aSopenharmony_ci            vc1_filter_line(src + 0 * step, stride, pq);
209cabdff1aSopenharmony_ci            vc1_filter_line(src + 1 * step, stride, pq);
210cabdff1aSopenharmony_ci            vc1_filter_line(src + 3 * step, stride, pq);
211cabdff1aSopenharmony_ci        }
212cabdff1aSopenharmony_ci        src += step * 4;
213cabdff1aSopenharmony_ci    }
214cabdff1aSopenharmony_ci}
215cabdff1aSopenharmony_ci
216cabdff1aSopenharmony_cistatic void vc1_v_loop_filter4_c(uint8_t *src, ptrdiff_t stride, int pq)
217cabdff1aSopenharmony_ci{
218cabdff1aSopenharmony_ci    vc1_loop_filter(src, 1, stride, 4, pq);
219cabdff1aSopenharmony_ci}
220cabdff1aSopenharmony_ci
221cabdff1aSopenharmony_cistatic void vc1_h_loop_filter4_c(uint8_t *src, ptrdiff_t stride, int pq)
222cabdff1aSopenharmony_ci{
223cabdff1aSopenharmony_ci    vc1_loop_filter(src, stride, 1, 4, pq);
224cabdff1aSopenharmony_ci}
225cabdff1aSopenharmony_ci
226cabdff1aSopenharmony_cistatic void vc1_v_loop_filter8_c(uint8_t *src, ptrdiff_t stride, int pq)
227cabdff1aSopenharmony_ci{
228cabdff1aSopenharmony_ci    vc1_loop_filter(src, 1, stride, 8, pq);
229cabdff1aSopenharmony_ci}
230cabdff1aSopenharmony_ci
231cabdff1aSopenharmony_cistatic void vc1_h_loop_filter8_c(uint8_t *src, ptrdiff_t stride, int pq)
232cabdff1aSopenharmony_ci{
233cabdff1aSopenharmony_ci    vc1_loop_filter(src, stride, 1, 8, pq);
234cabdff1aSopenharmony_ci}
235cabdff1aSopenharmony_ci
236cabdff1aSopenharmony_cistatic void vc1_v_loop_filter16_c(uint8_t *src, ptrdiff_t stride, int pq)
237cabdff1aSopenharmony_ci{
238cabdff1aSopenharmony_ci    vc1_loop_filter(src, 1, stride, 16, pq);
239cabdff1aSopenharmony_ci}
240cabdff1aSopenharmony_ci
241cabdff1aSopenharmony_cistatic void vc1_h_loop_filter16_c(uint8_t *src, ptrdiff_t stride, int pq)
242cabdff1aSopenharmony_ci{
243cabdff1aSopenharmony_ci    vc1_loop_filter(src, stride, 1, 16, pq);
244cabdff1aSopenharmony_ci}
245cabdff1aSopenharmony_ci
246cabdff1aSopenharmony_ci/* Do inverse transform on 8x8 block */
247cabdff1aSopenharmony_cistatic void vc1_inv_trans_8x8_dc_c(uint8_t *dest, ptrdiff_t stride, int16_t *block)
248cabdff1aSopenharmony_ci{
249cabdff1aSopenharmony_ci    int i;
250cabdff1aSopenharmony_ci    int dc = block[0];
251cabdff1aSopenharmony_ci
252cabdff1aSopenharmony_ci    dc = (3 * dc +  1) >> 1;
253cabdff1aSopenharmony_ci    dc = (3 * dc + 16) >> 5;
254cabdff1aSopenharmony_ci
255cabdff1aSopenharmony_ci    for (i = 0; i < 8; i++) {
256cabdff1aSopenharmony_ci        dest[0] = av_clip_uint8(dest[0] + dc);
257cabdff1aSopenharmony_ci        dest[1] = av_clip_uint8(dest[1] + dc);
258cabdff1aSopenharmony_ci        dest[2] = av_clip_uint8(dest[2] + dc);
259cabdff1aSopenharmony_ci        dest[3] = av_clip_uint8(dest[3] + dc);
260cabdff1aSopenharmony_ci        dest[4] = av_clip_uint8(dest[4] + dc);
261cabdff1aSopenharmony_ci        dest[5] = av_clip_uint8(dest[5] + dc);
262cabdff1aSopenharmony_ci        dest[6] = av_clip_uint8(dest[6] + dc);
263cabdff1aSopenharmony_ci        dest[7] = av_clip_uint8(dest[7] + dc);
264cabdff1aSopenharmony_ci        dest += stride;
265cabdff1aSopenharmony_ci    }
266cabdff1aSopenharmony_ci}
267cabdff1aSopenharmony_ci
268cabdff1aSopenharmony_cistatic void vc1_inv_trans_8x8_c(int16_t block[64])
269cabdff1aSopenharmony_ci{
270cabdff1aSopenharmony_ci    int i;
271cabdff1aSopenharmony_ci    register int t1, t2, t3, t4, t5, t6, t7, t8;
272cabdff1aSopenharmony_ci    int16_t *src, *dst, temp[64];
273cabdff1aSopenharmony_ci
274cabdff1aSopenharmony_ci    src = block;
275cabdff1aSopenharmony_ci    dst = temp;
276cabdff1aSopenharmony_ci    for (i = 0; i < 8; i++) {
277cabdff1aSopenharmony_ci        t1 = 12 * (src[ 0] + src[32]) + 4;
278cabdff1aSopenharmony_ci        t2 = 12 * (src[ 0] - src[32]) + 4;
279cabdff1aSopenharmony_ci        t3 = 16 * src[16] +  6 * src[48];
280cabdff1aSopenharmony_ci        t4 =  6 * src[16] - 16 * src[48];
281cabdff1aSopenharmony_ci
282cabdff1aSopenharmony_ci        t5 = t1 + t3;
283cabdff1aSopenharmony_ci        t6 = t2 + t4;
284cabdff1aSopenharmony_ci        t7 = t2 - t4;
285cabdff1aSopenharmony_ci        t8 = t1 - t3;
286cabdff1aSopenharmony_ci
287cabdff1aSopenharmony_ci        t1 = 16 * src[ 8] + 15 * src[24] +  9 * src[40] +  4 * src[56];
288cabdff1aSopenharmony_ci        t2 = 15 * src[ 8] -  4 * src[24] - 16 * src[40] -  9 * src[56];
289cabdff1aSopenharmony_ci        t3 =  9 * src[ 8] - 16 * src[24] +  4 * src[40] + 15 * src[56];
290cabdff1aSopenharmony_ci        t4 =  4 * src[ 8] -  9 * src[24] + 15 * src[40] - 16 * src[56];
291cabdff1aSopenharmony_ci
292cabdff1aSopenharmony_ci        dst[0] = (t5 + t1) >> 3;
293cabdff1aSopenharmony_ci        dst[1] = (t6 + t2) >> 3;
294cabdff1aSopenharmony_ci        dst[2] = (t7 + t3) >> 3;
295cabdff1aSopenharmony_ci        dst[3] = (t8 + t4) >> 3;
296cabdff1aSopenharmony_ci        dst[4] = (t8 - t4) >> 3;
297cabdff1aSopenharmony_ci        dst[5] = (t7 - t3) >> 3;
298cabdff1aSopenharmony_ci        dst[6] = (t6 - t2) >> 3;
299cabdff1aSopenharmony_ci        dst[7] = (t5 - t1) >> 3;
300cabdff1aSopenharmony_ci
301cabdff1aSopenharmony_ci        src += 1;
302cabdff1aSopenharmony_ci        dst += 8;
303cabdff1aSopenharmony_ci    }
304cabdff1aSopenharmony_ci
305cabdff1aSopenharmony_ci    src = temp;
306cabdff1aSopenharmony_ci    dst = block;
307cabdff1aSopenharmony_ci    for (i = 0; i < 8; i++) {
308cabdff1aSopenharmony_ci        t1 = 12 * (src[ 0] + src[32]) + 64;
309cabdff1aSopenharmony_ci        t2 = 12 * (src[ 0] - src[32]) + 64;
310cabdff1aSopenharmony_ci        t3 = 16 * src[16] +  6 * src[48];
311cabdff1aSopenharmony_ci        t4 =  6 * src[16] - 16 * src[48];
312cabdff1aSopenharmony_ci
313cabdff1aSopenharmony_ci        t5 = t1 + t3;
314cabdff1aSopenharmony_ci        t6 = t2 + t4;
315cabdff1aSopenharmony_ci        t7 = t2 - t4;
316cabdff1aSopenharmony_ci        t8 = t1 - t3;
317cabdff1aSopenharmony_ci
318cabdff1aSopenharmony_ci        t1 = 16 * src[ 8] + 15 * src[24] +  9 * src[40] +  4 * src[56];
319cabdff1aSopenharmony_ci        t2 = 15 * src[ 8] -  4 * src[24] - 16 * src[40] -  9 * src[56];
320cabdff1aSopenharmony_ci        t3 =  9 * src[ 8] - 16 * src[24] +  4 * src[40] + 15 * src[56];
321cabdff1aSopenharmony_ci        t4 =  4 * src[ 8] -  9 * src[24] + 15 * src[40] - 16 * src[56];
322cabdff1aSopenharmony_ci
323cabdff1aSopenharmony_ci        dst[ 0] = (t5 + t1) >> 7;
324cabdff1aSopenharmony_ci        dst[ 8] = (t6 + t2) >> 7;
325cabdff1aSopenharmony_ci        dst[16] = (t7 + t3) >> 7;
326cabdff1aSopenharmony_ci        dst[24] = (t8 + t4) >> 7;
327cabdff1aSopenharmony_ci        dst[32] = (t8 - t4 + 1) >> 7;
328cabdff1aSopenharmony_ci        dst[40] = (t7 - t3 + 1) >> 7;
329cabdff1aSopenharmony_ci        dst[48] = (t6 - t2 + 1) >> 7;
330cabdff1aSopenharmony_ci        dst[56] = (t5 - t1 + 1) >> 7;
331cabdff1aSopenharmony_ci
332cabdff1aSopenharmony_ci        src++;
333cabdff1aSopenharmony_ci        dst++;
334cabdff1aSopenharmony_ci    }
335cabdff1aSopenharmony_ci}
336cabdff1aSopenharmony_ci
337cabdff1aSopenharmony_ci/* Do inverse transform on 8x4 part of block */
338cabdff1aSopenharmony_cistatic void vc1_inv_trans_8x4_dc_c(uint8_t *dest, ptrdiff_t stride, int16_t *block)
339cabdff1aSopenharmony_ci{
340cabdff1aSopenharmony_ci    int i;
341cabdff1aSopenharmony_ci    int dc = block[0];
342cabdff1aSopenharmony_ci
343cabdff1aSopenharmony_ci    dc =  (3 * dc +  1) >> 1;
344cabdff1aSopenharmony_ci    dc = (17 * dc + 64) >> 7;
345cabdff1aSopenharmony_ci
346cabdff1aSopenharmony_ci    for (i = 0; i < 4; i++) {
347cabdff1aSopenharmony_ci        dest[0] = av_clip_uint8(dest[0] + dc);
348cabdff1aSopenharmony_ci        dest[1] = av_clip_uint8(dest[1] + dc);
349cabdff1aSopenharmony_ci        dest[2] = av_clip_uint8(dest[2] + dc);
350cabdff1aSopenharmony_ci        dest[3] = av_clip_uint8(dest[3] + dc);
351cabdff1aSopenharmony_ci        dest[4] = av_clip_uint8(dest[4] + dc);
352cabdff1aSopenharmony_ci        dest[5] = av_clip_uint8(dest[5] + dc);
353cabdff1aSopenharmony_ci        dest[6] = av_clip_uint8(dest[6] + dc);
354cabdff1aSopenharmony_ci        dest[7] = av_clip_uint8(dest[7] + dc);
355cabdff1aSopenharmony_ci        dest += stride;
356cabdff1aSopenharmony_ci    }
357cabdff1aSopenharmony_ci}
358cabdff1aSopenharmony_ci
359cabdff1aSopenharmony_cistatic void vc1_inv_trans_8x4_c(uint8_t *dest, ptrdiff_t stride, int16_t *block)
360cabdff1aSopenharmony_ci{
361cabdff1aSopenharmony_ci    int i;
362cabdff1aSopenharmony_ci    register int t1, t2, t3, t4, t5, t6, t7, t8;
363cabdff1aSopenharmony_ci    int16_t *src, *dst;
364cabdff1aSopenharmony_ci
365cabdff1aSopenharmony_ci    src = block;
366cabdff1aSopenharmony_ci    dst = block;
367cabdff1aSopenharmony_ci
368cabdff1aSopenharmony_ci    for (i = 0; i < 4; i++) {
369cabdff1aSopenharmony_ci        t1 = 12 * (src[0] + src[4]) + 4;
370cabdff1aSopenharmony_ci        t2 = 12 * (src[0] - src[4]) + 4;
371cabdff1aSopenharmony_ci        t3 = 16 * src[2] +  6 * src[6];
372cabdff1aSopenharmony_ci        t4 =  6 * src[2] - 16 * src[6];
373cabdff1aSopenharmony_ci
374cabdff1aSopenharmony_ci        t5 = t1 + t3;
375cabdff1aSopenharmony_ci        t6 = t2 + t4;
376cabdff1aSopenharmony_ci        t7 = t2 - t4;
377cabdff1aSopenharmony_ci        t8 = t1 - t3;
378cabdff1aSopenharmony_ci
379cabdff1aSopenharmony_ci        t1 = 16 * src[1] + 15 * src[3] +  9 * src[5] +  4 * src[7];
380cabdff1aSopenharmony_ci        t2 = 15 * src[1] -  4 * src[3] - 16 * src[5] -  9 * src[7];
381cabdff1aSopenharmony_ci        t3 =  9 * src[1] - 16 * src[3] +  4 * src[5] + 15 * src[7];
382cabdff1aSopenharmony_ci        t4 =  4 * src[1] -  9 * src[3] + 15 * src[5] - 16 * src[7];
383cabdff1aSopenharmony_ci
384cabdff1aSopenharmony_ci        dst[0] = (t5 + t1) >> 3;
385cabdff1aSopenharmony_ci        dst[1] = (t6 + t2) >> 3;
386cabdff1aSopenharmony_ci        dst[2] = (t7 + t3) >> 3;
387cabdff1aSopenharmony_ci        dst[3] = (t8 + t4) >> 3;
388cabdff1aSopenharmony_ci        dst[4] = (t8 - t4) >> 3;
389cabdff1aSopenharmony_ci        dst[5] = (t7 - t3) >> 3;
390cabdff1aSopenharmony_ci        dst[6] = (t6 - t2) >> 3;
391cabdff1aSopenharmony_ci        dst[7] = (t5 - t1) >> 3;
392cabdff1aSopenharmony_ci
393cabdff1aSopenharmony_ci        src += 8;
394cabdff1aSopenharmony_ci        dst += 8;
395cabdff1aSopenharmony_ci    }
396cabdff1aSopenharmony_ci
397cabdff1aSopenharmony_ci    src = block;
398cabdff1aSopenharmony_ci    for (i = 0; i < 8; i++) {
399cabdff1aSopenharmony_ci        t1 = 17 * (src[ 0] + src[16]) + 64;
400cabdff1aSopenharmony_ci        t2 = 17 * (src[ 0] - src[16]) + 64;
401cabdff1aSopenharmony_ci        t3 = 22 * src[ 8] + 10 * src[24];
402cabdff1aSopenharmony_ci        t4 = 22 * src[24] - 10 * src[ 8];
403cabdff1aSopenharmony_ci
404cabdff1aSopenharmony_ci        dest[0 * stride] = av_clip_uint8(dest[0 * stride] + ((t1 + t3) >> 7));
405cabdff1aSopenharmony_ci        dest[1 * stride] = av_clip_uint8(dest[1 * stride] + ((t2 - t4) >> 7));
406cabdff1aSopenharmony_ci        dest[2 * stride] = av_clip_uint8(dest[2 * stride] + ((t2 + t4) >> 7));
407cabdff1aSopenharmony_ci        dest[3 * stride] = av_clip_uint8(dest[3 * stride] + ((t1 - t3) >> 7));
408cabdff1aSopenharmony_ci
409cabdff1aSopenharmony_ci        src++;
410cabdff1aSopenharmony_ci        dest++;
411cabdff1aSopenharmony_ci    }
412cabdff1aSopenharmony_ci}
413cabdff1aSopenharmony_ci
414cabdff1aSopenharmony_ci/* Do inverse transform on 4x8 parts of block */
415cabdff1aSopenharmony_cistatic void vc1_inv_trans_4x8_dc_c(uint8_t *dest, ptrdiff_t stride, int16_t *block)
416cabdff1aSopenharmony_ci{
417cabdff1aSopenharmony_ci    int i;
418cabdff1aSopenharmony_ci    int dc = block[0];
419cabdff1aSopenharmony_ci
420cabdff1aSopenharmony_ci    dc = (17 * dc +  4) >> 3;
421cabdff1aSopenharmony_ci    dc = (12 * dc + 64) >> 7;
422cabdff1aSopenharmony_ci
423cabdff1aSopenharmony_ci    for (i = 0; i < 8; i++) {
424cabdff1aSopenharmony_ci        dest[0] = av_clip_uint8(dest[0] + dc);
425cabdff1aSopenharmony_ci        dest[1] = av_clip_uint8(dest[1] + dc);
426cabdff1aSopenharmony_ci        dest[2] = av_clip_uint8(dest[2] + dc);
427cabdff1aSopenharmony_ci        dest[3] = av_clip_uint8(dest[3] + dc);
428cabdff1aSopenharmony_ci        dest += stride;
429cabdff1aSopenharmony_ci    }
430cabdff1aSopenharmony_ci}
431cabdff1aSopenharmony_ci
432cabdff1aSopenharmony_cistatic void vc1_inv_trans_4x8_c(uint8_t *dest, ptrdiff_t stride, int16_t *block)
433cabdff1aSopenharmony_ci{
434cabdff1aSopenharmony_ci    int i;
435cabdff1aSopenharmony_ci    register int t1, t2, t3, t4, t5, t6, t7, t8;
436cabdff1aSopenharmony_ci    int16_t *src, *dst;
437cabdff1aSopenharmony_ci
438cabdff1aSopenharmony_ci    src = block;
439cabdff1aSopenharmony_ci    dst = block;
440cabdff1aSopenharmony_ci
441cabdff1aSopenharmony_ci    for (i = 0; i < 8; i++) {
442cabdff1aSopenharmony_ci        t1 = 17 * (src[0] + src[2]) + 4;
443cabdff1aSopenharmony_ci        t2 = 17 * (src[0] - src[2]) + 4;
444cabdff1aSopenharmony_ci        t3 = 22 * src[1] + 10 * src[3];
445cabdff1aSopenharmony_ci        t4 = 22 * src[3] - 10 * src[1];
446cabdff1aSopenharmony_ci
447cabdff1aSopenharmony_ci        dst[0] = (t1 + t3) >> 3;
448cabdff1aSopenharmony_ci        dst[1] = (t2 - t4) >> 3;
449cabdff1aSopenharmony_ci        dst[2] = (t2 + t4) >> 3;
450cabdff1aSopenharmony_ci        dst[3] = (t1 - t3) >> 3;
451cabdff1aSopenharmony_ci
452cabdff1aSopenharmony_ci        src += 8;
453cabdff1aSopenharmony_ci        dst += 8;
454cabdff1aSopenharmony_ci    }
455cabdff1aSopenharmony_ci
456cabdff1aSopenharmony_ci    src = block;
457cabdff1aSopenharmony_ci    for (i = 0; i < 4; i++) {
458cabdff1aSopenharmony_ci        t1 = 12 * (src[ 0] + src[32]) + 64;
459cabdff1aSopenharmony_ci        t2 = 12 * (src[ 0] - src[32]) + 64;
460cabdff1aSopenharmony_ci        t3 = 16 * src[16] +  6 * src[48];
461cabdff1aSopenharmony_ci        t4 =  6 * src[16] - 16 * src[48];
462cabdff1aSopenharmony_ci
463cabdff1aSopenharmony_ci        t5 = t1 + t3;
464cabdff1aSopenharmony_ci        t6 = t2 + t4;
465cabdff1aSopenharmony_ci        t7 = t2 - t4;
466cabdff1aSopenharmony_ci        t8 = t1 - t3;
467cabdff1aSopenharmony_ci
468cabdff1aSopenharmony_ci        t1 = 16 * src[ 8] + 15 * src[24] +  9 * src[40] +  4 * src[56];
469cabdff1aSopenharmony_ci        t2 = 15 * src[ 8] -  4 * src[24] - 16 * src[40] -  9 * src[56];
470cabdff1aSopenharmony_ci        t3 =  9 * src[ 8] - 16 * src[24] +  4 * src[40] + 15 * src[56];
471cabdff1aSopenharmony_ci        t4 =  4 * src[ 8] -  9 * src[24] + 15 * src[40] - 16 * src[56];
472cabdff1aSopenharmony_ci
473cabdff1aSopenharmony_ci        dest[0 * stride] = av_clip_uint8(dest[0 * stride] + ((t5 + t1)     >> 7));
474cabdff1aSopenharmony_ci        dest[1 * stride] = av_clip_uint8(dest[1 * stride] + ((t6 + t2)     >> 7));
475cabdff1aSopenharmony_ci        dest[2 * stride] = av_clip_uint8(dest[2 * stride] + ((t7 + t3)     >> 7));
476cabdff1aSopenharmony_ci        dest[3 * stride] = av_clip_uint8(dest[3 * stride] + ((t8 + t4)     >> 7));
477cabdff1aSopenharmony_ci        dest[4 * stride] = av_clip_uint8(dest[4 * stride] + ((t8 - t4 + 1) >> 7));
478cabdff1aSopenharmony_ci        dest[5 * stride] = av_clip_uint8(dest[5 * stride] + ((t7 - t3 + 1) >> 7));
479cabdff1aSopenharmony_ci        dest[6 * stride] = av_clip_uint8(dest[6 * stride] + ((t6 - t2 + 1) >> 7));
480cabdff1aSopenharmony_ci        dest[7 * stride] = av_clip_uint8(dest[7 * stride] + ((t5 - t1 + 1) >> 7));
481cabdff1aSopenharmony_ci
482cabdff1aSopenharmony_ci        src++;
483cabdff1aSopenharmony_ci        dest++;
484cabdff1aSopenharmony_ci    }
485cabdff1aSopenharmony_ci}
486cabdff1aSopenharmony_ci
487cabdff1aSopenharmony_ci/* Do inverse transform on 4x4 part of block */
488cabdff1aSopenharmony_cistatic void vc1_inv_trans_4x4_dc_c(uint8_t *dest, ptrdiff_t stride, int16_t *block)
489cabdff1aSopenharmony_ci{
490cabdff1aSopenharmony_ci    int i;
491cabdff1aSopenharmony_ci    int dc = block[0];
492cabdff1aSopenharmony_ci
493cabdff1aSopenharmony_ci    dc = (17 * dc +  4) >> 3;
494cabdff1aSopenharmony_ci    dc = (17 * dc + 64) >> 7;
495cabdff1aSopenharmony_ci
496cabdff1aSopenharmony_ci    for (i = 0; i < 4; i++) {
497cabdff1aSopenharmony_ci        dest[0] = av_clip_uint8(dest[0] + dc);
498cabdff1aSopenharmony_ci        dest[1] = av_clip_uint8(dest[1] + dc);
499cabdff1aSopenharmony_ci        dest[2] = av_clip_uint8(dest[2] + dc);
500cabdff1aSopenharmony_ci        dest[3] = av_clip_uint8(dest[3] + dc);
501cabdff1aSopenharmony_ci        dest += stride;
502cabdff1aSopenharmony_ci    }
503cabdff1aSopenharmony_ci}
504cabdff1aSopenharmony_ci
505cabdff1aSopenharmony_cistatic void vc1_inv_trans_4x4_c(uint8_t *dest, ptrdiff_t stride, int16_t *block)
506cabdff1aSopenharmony_ci{
507cabdff1aSopenharmony_ci    int i;
508cabdff1aSopenharmony_ci    register int t1, t2, t3, t4;
509cabdff1aSopenharmony_ci    int16_t *src, *dst;
510cabdff1aSopenharmony_ci
511cabdff1aSopenharmony_ci    src = block;
512cabdff1aSopenharmony_ci    dst = block;
513cabdff1aSopenharmony_ci    for (i = 0; i < 4; i++) {
514cabdff1aSopenharmony_ci        t1 = 17 * (src[0] + src[2]) + 4;
515cabdff1aSopenharmony_ci        t2 = 17 * (src[0] - src[2]) + 4;
516cabdff1aSopenharmony_ci        t3 = 22 * src[1] + 10 * src[3];
517cabdff1aSopenharmony_ci        t4 = 22 * src[3] - 10 * src[1];
518cabdff1aSopenharmony_ci
519cabdff1aSopenharmony_ci        dst[0] = (t1 + t3) >> 3;
520cabdff1aSopenharmony_ci        dst[1] = (t2 - t4) >> 3;
521cabdff1aSopenharmony_ci        dst[2] = (t2 + t4) >> 3;
522cabdff1aSopenharmony_ci        dst[3] = (t1 - t3) >> 3;
523cabdff1aSopenharmony_ci
524cabdff1aSopenharmony_ci        src += 8;
525cabdff1aSopenharmony_ci        dst += 8;
526cabdff1aSopenharmony_ci    }
527cabdff1aSopenharmony_ci
528cabdff1aSopenharmony_ci    src = block;
529cabdff1aSopenharmony_ci    for (i = 0; i < 4; i++) {
530cabdff1aSopenharmony_ci        t1 = 17 * (src[0] + src[16]) + 64;
531cabdff1aSopenharmony_ci        t2 = 17 * (src[0] - src[16]) + 64;
532cabdff1aSopenharmony_ci        t3 = 22 * src[8] + 10 * src[24];
533cabdff1aSopenharmony_ci        t4 = 22 * src[24] - 10 * src[8];
534cabdff1aSopenharmony_ci
535cabdff1aSopenharmony_ci        dest[0 * stride] = av_clip_uint8(dest[0 * stride] + ((t1 + t3) >> 7));
536cabdff1aSopenharmony_ci        dest[1 * stride] = av_clip_uint8(dest[1 * stride] + ((t2 - t4) >> 7));
537cabdff1aSopenharmony_ci        dest[2 * stride] = av_clip_uint8(dest[2 * stride] + ((t2 + t4) >> 7));
538cabdff1aSopenharmony_ci        dest[3 * stride] = av_clip_uint8(dest[3 * stride] + ((t1 - t3) >> 7));
539cabdff1aSopenharmony_ci
540cabdff1aSopenharmony_ci        src++;
541cabdff1aSopenharmony_ci        dest++;
542cabdff1aSopenharmony_ci    }
543cabdff1aSopenharmony_ci}
544cabdff1aSopenharmony_ci
545cabdff1aSopenharmony_ci/* motion compensation functions */
546cabdff1aSopenharmony_ci
547cabdff1aSopenharmony_ci/* Filter in case of 2 filters */
548cabdff1aSopenharmony_ci#define VC1_MSPEL_FILTER_16B(DIR, TYPE)                                       \
549cabdff1aSopenharmony_cistatic av_always_inline int vc1_mspel_ ## DIR ## _filter_16bits(const TYPE *src, \
550cabdff1aSopenharmony_ci                                                                int stride,   \
551cabdff1aSopenharmony_ci                                                                int mode)     \
552cabdff1aSopenharmony_ci{                                                                             \
553cabdff1aSopenharmony_ci    switch(mode) {                                                            \
554cabdff1aSopenharmony_ci    case 0: /* no shift - should not occur */                                 \
555cabdff1aSopenharmony_ci        return 0;                                                             \
556cabdff1aSopenharmony_ci    case 1: /* 1/4 shift */                                                   \
557cabdff1aSopenharmony_ci        return -4 * src[-stride] + 53 * src[0] +                              \
558cabdff1aSopenharmony_ci               18 * src[stride]  -  3 * src[stride * 2];                      \
559cabdff1aSopenharmony_ci    case 2: /* 1/2 shift */                                                   \
560cabdff1aSopenharmony_ci        return -1 * src[-stride] +  9 * src[0] +                              \
561cabdff1aSopenharmony_ci                9 * src[stride]  -  1 * src[stride * 2];                      \
562cabdff1aSopenharmony_ci    case 3: /* 3/4 shift */                                                   \
563cabdff1aSopenharmony_ci        return -3 * src[-stride] + 18 * src[0] +                              \
564cabdff1aSopenharmony_ci               53 * src[stride]  -  4 * src[stride * 2];                      \
565cabdff1aSopenharmony_ci    }                                                                         \
566cabdff1aSopenharmony_ci    return 0; /* should not occur */                                          \
567cabdff1aSopenharmony_ci}
568cabdff1aSopenharmony_ci
569cabdff1aSopenharmony_ciVC1_MSPEL_FILTER_16B(ver, uint8_t)
570cabdff1aSopenharmony_ciVC1_MSPEL_FILTER_16B(hor, int16_t)
571cabdff1aSopenharmony_ci
572cabdff1aSopenharmony_ci/* Filter used to interpolate fractional pel values */
573cabdff1aSopenharmony_cistatic av_always_inline int vc1_mspel_filter(const uint8_t *src, int stride,
574cabdff1aSopenharmony_ci                                             int mode, int r)
575cabdff1aSopenharmony_ci{
576cabdff1aSopenharmony_ci    switch (mode) {
577cabdff1aSopenharmony_ci    case 0: // no shift
578cabdff1aSopenharmony_ci        return src[0];
579cabdff1aSopenharmony_ci    case 1: // 1/4 shift
580cabdff1aSopenharmony_ci        return (-4 * src[-stride] + 53 * src[0] +
581cabdff1aSopenharmony_ci                18 * src[stride]  -  3 * src[stride * 2] + 32 - r) >> 6;
582cabdff1aSopenharmony_ci    case 2: // 1/2 shift
583cabdff1aSopenharmony_ci        return (-1 * src[-stride] +  9 * src[0] +
584cabdff1aSopenharmony_ci                 9 * src[stride]  -  1 * src[stride * 2] + 8 - r) >> 4;
585cabdff1aSopenharmony_ci    case 3: // 3/4 shift
586cabdff1aSopenharmony_ci        return (-3 * src[-stride] + 18 * src[0] +
587cabdff1aSopenharmony_ci                53 * src[stride]  -  4 * src[stride * 2] + 32 - r) >> 6;
588cabdff1aSopenharmony_ci    }
589cabdff1aSopenharmony_ci    return 0; // should not occur
590cabdff1aSopenharmony_ci}
591cabdff1aSopenharmony_ci
592cabdff1aSopenharmony_ci/* Function used to do motion compensation with bicubic interpolation */
593cabdff1aSopenharmony_ci#define VC1_MSPEL_MC(OP, OP4, OPNAME)                                         \
594cabdff1aSopenharmony_cistatic av_always_inline void OPNAME ## vc1_mspel_mc(uint8_t *dst,             \
595cabdff1aSopenharmony_ci                                                    const uint8_t *src,       \
596cabdff1aSopenharmony_ci                                                    ptrdiff_t stride,         \
597cabdff1aSopenharmony_ci                                                    int hmode,                \
598cabdff1aSopenharmony_ci                                                    int vmode,                \
599cabdff1aSopenharmony_ci                                                    int rnd)                  \
600cabdff1aSopenharmony_ci{                                                                             \
601cabdff1aSopenharmony_ci    int i, j;                                                                 \
602cabdff1aSopenharmony_ci                                                                              \
603cabdff1aSopenharmony_ci    if (vmode) { /* Horizontal filter to apply */                             \
604cabdff1aSopenharmony_ci        int r;                                                                \
605cabdff1aSopenharmony_ci                                                                              \
606cabdff1aSopenharmony_ci        if (hmode) { /* Vertical filter to apply, output to tmp */            \
607cabdff1aSopenharmony_ci            static const int shift_value[] = { 0, 5, 1, 5 };                  \
608cabdff1aSopenharmony_ci            int shift = (shift_value[hmode] + shift_value[vmode]) >> 1;       \
609cabdff1aSopenharmony_ci            int16_t tmp[11 * 8], *tptr = tmp;                                 \
610cabdff1aSopenharmony_ci                                                                              \
611cabdff1aSopenharmony_ci            r = (1 << (shift - 1)) + rnd - 1;                                 \
612cabdff1aSopenharmony_ci                                                                              \
613cabdff1aSopenharmony_ci            src -= 1;                                                         \
614cabdff1aSopenharmony_ci            for (j = 0; j < 8; j++) {                                         \
615cabdff1aSopenharmony_ci                for (i = 0; i < 11; i++)                                      \
616cabdff1aSopenharmony_ci                    tptr[i] = (vc1_mspel_ver_filter_16bits(src + i, stride, vmode) + r) >> shift; \
617cabdff1aSopenharmony_ci                src  += stride;                                               \
618cabdff1aSopenharmony_ci                tptr += 11;                                                   \
619cabdff1aSopenharmony_ci            }                                                                 \
620cabdff1aSopenharmony_ci                                                                              \
621cabdff1aSopenharmony_ci            r    = 64 - rnd;                                                  \
622cabdff1aSopenharmony_ci            tptr = tmp + 1;                                                   \
623cabdff1aSopenharmony_ci            for (j = 0; j < 8; j++) {                                         \
624cabdff1aSopenharmony_ci                for (i = 0; i < 8; i++)                                       \
625cabdff1aSopenharmony_ci                    OP(dst[i], (vc1_mspel_hor_filter_16bits(tptr + i, 1, hmode) + r) >> 7); \
626cabdff1aSopenharmony_ci                dst  += stride;                                               \
627cabdff1aSopenharmony_ci                tptr += 11;                                                   \
628cabdff1aSopenharmony_ci            }                                                                 \
629cabdff1aSopenharmony_ci                                                                              \
630cabdff1aSopenharmony_ci            return;                                                           \
631cabdff1aSopenharmony_ci        } else { /* No horizontal filter, output 8 lines to dst */            \
632cabdff1aSopenharmony_ci            r = 1 - rnd;                                                      \
633cabdff1aSopenharmony_ci                                                                              \
634cabdff1aSopenharmony_ci            for (j = 0; j < 8; j++) {                                         \
635cabdff1aSopenharmony_ci                for (i = 0; i < 8; i++)                                       \
636cabdff1aSopenharmony_ci                    OP(dst[i], vc1_mspel_filter(src + i, stride, vmode, r));  \
637cabdff1aSopenharmony_ci                src += stride;                                                \
638cabdff1aSopenharmony_ci                dst += stride;                                                \
639cabdff1aSopenharmony_ci            }                                                                 \
640cabdff1aSopenharmony_ci            return;                                                           \
641cabdff1aSopenharmony_ci        }                                                                     \
642cabdff1aSopenharmony_ci    }                                                                         \
643cabdff1aSopenharmony_ci                                                                              \
644cabdff1aSopenharmony_ci    /* Horizontal mode with no vertical mode */                               \
645cabdff1aSopenharmony_ci    for (j = 0; j < 8; j++) {                                                 \
646cabdff1aSopenharmony_ci        for (i = 0; i < 8; i++)                                               \
647cabdff1aSopenharmony_ci            OP(dst[i], vc1_mspel_filter(src + i, 1, hmode, rnd));             \
648cabdff1aSopenharmony_ci        dst += stride;                                                        \
649cabdff1aSopenharmony_ci        src += stride;                                                        \
650cabdff1aSopenharmony_ci    }                                                                         \
651cabdff1aSopenharmony_ci}\
652cabdff1aSopenharmony_cistatic av_always_inline void OPNAME ## vc1_mspel_mc_16(uint8_t *dst,          \
653cabdff1aSopenharmony_ci                                                       const uint8_t *src,    \
654cabdff1aSopenharmony_ci                                                       ptrdiff_t stride,      \
655cabdff1aSopenharmony_ci                                                       int hmode,             \
656cabdff1aSopenharmony_ci                                                       int vmode,             \
657cabdff1aSopenharmony_ci                                                       int rnd)               \
658cabdff1aSopenharmony_ci{                                                                             \
659cabdff1aSopenharmony_ci    int i, j;                                                                 \
660cabdff1aSopenharmony_ci                                                                              \
661cabdff1aSopenharmony_ci    if (vmode) { /* Horizontal filter to apply */                             \
662cabdff1aSopenharmony_ci        int r;                                                                \
663cabdff1aSopenharmony_ci                                                                              \
664cabdff1aSopenharmony_ci        if (hmode) { /* Vertical filter to apply, output to tmp */            \
665cabdff1aSopenharmony_ci            static const int shift_value[] = { 0, 5, 1, 5 };                  \
666cabdff1aSopenharmony_ci            int shift = (shift_value[hmode] + shift_value[vmode]) >> 1;       \
667cabdff1aSopenharmony_ci            int16_t tmp[19 * 16], *tptr = tmp;                                \
668cabdff1aSopenharmony_ci                                                                              \
669cabdff1aSopenharmony_ci            r = (1 << (shift - 1)) + rnd - 1;                                 \
670cabdff1aSopenharmony_ci                                                                              \
671cabdff1aSopenharmony_ci            src -= 1;                                                         \
672cabdff1aSopenharmony_ci            for (j = 0; j < 16; j++) {                                        \
673cabdff1aSopenharmony_ci                for (i = 0; i < 19; i++)                                      \
674cabdff1aSopenharmony_ci                    tptr[i] = (vc1_mspel_ver_filter_16bits(src + i, stride, vmode) + r) >> shift; \
675cabdff1aSopenharmony_ci                src  += stride;                                               \
676cabdff1aSopenharmony_ci                tptr += 19;                                                   \
677cabdff1aSopenharmony_ci            }                                                                 \
678cabdff1aSopenharmony_ci                                                                              \
679cabdff1aSopenharmony_ci            r    = 64 - rnd;                                                  \
680cabdff1aSopenharmony_ci            tptr = tmp + 1;                                                   \
681cabdff1aSopenharmony_ci            for (j = 0; j < 16; j++) {                                        \
682cabdff1aSopenharmony_ci                for (i = 0; i < 16; i++)                                      \
683cabdff1aSopenharmony_ci                    OP(dst[i], (vc1_mspel_hor_filter_16bits(tptr + i, 1, hmode) + r) >> 7); \
684cabdff1aSopenharmony_ci                dst  += stride;                                               \
685cabdff1aSopenharmony_ci                tptr += 19;                                                   \
686cabdff1aSopenharmony_ci            }                                                                 \
687cabdff1aSopenharmony_ci                                                                              \
688cabdff1aSopenharmony_ci            return;                                                           \
689cabdff1aSopenharmony_ci        } else { /* No horizontal filter, output 8 lines to dst */            \
690cabdff1aSopenharmony_ci            r = 1 - rnd;                                                      \
691cabdff1aSopenharmony_ci                                                                              \
692cabdff1aSopenharmony_ci            for (j = 0; j < 16; j++) {                                        \
693cabdff1aSopenharmony_ci                for (i = 0; i < 16; i++)                                      \
694cabdff1aSopenharmony_ci                    OP(dst[i], vc1_mspel_filter(src + i, stride, vmode, r));  \
695cabdff1aSopenharmony_ci                src += stride;                                                \
696cabdff1aSopenharmony_ci                dst += stride;                                                \
697cabdff1aSopenharmony_ci            }                                                                 \
698cabdff1aSopenharmony_ci            return;                                                           \
699cabdff1aSopenharmony_ci        }                                                                     \
700cabdff1aSopenharmony_ci    }                                                                         \
701cabdff1aSopenharmony_ci                                                                              \
702cabdff1aSopenharmony_ci    /* Horizontal mode with no vertical mode */                               \
703cabdff1aSopenharmony_ci    for (j = 0; j < 16; j++) {                                                \
704cabdff1aSopenharmony_ci        for (i = 0; i < 16; i++)                                              \
705cabdff1aSopenharmony_ci            OP(dst[i], vc1_mspel_filter(src + i, 1, hmode, rnd));             \
706cabdff1aSopenharmony_ci        dst += stride;                                                        \
707cabdff1aSopenharmony_ci        src += stride;                                                        \
708cabdff1aSopenharmony_ci    }                                                                         \
709cabdff1aSopenharmony_ci}\
710cabdff1aSopenharmony_cistatic void OPNAME ## pixels8x8_c(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int rnd){\
711cabdff1aSopenharmony_ci    int i;\
712cabdff1aSopenharmony_ci    for(i=0; i<8; i++){\
713cabdff1aSopenharmony_ci        OP4(*(uint32_t*)(block  ), AV_RN32(pixels  ));\
714cabdff1aSopenharmony_ci        OP4(*(uint32_t*)(block+4), AV_RN32(pixels+4));\
715cabdff1aSopenharmony_ci        pixels+=line_size;\
716cabdff1aSopenharmony_ci        block +=line_size;\
717cabdff1aSopenharmony_ci    }\
718cabdff1aSopenharmony_ci}\
719cabdff1aSopenharmony_cistatic void OPNAME ## pixels16x16_c(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int rnd){\
720cabdff1aSopenharmony_ci    int i;\
721cabdff1aSopenharmony_ci    for(i=0; i<16; i++){\
722cabdff1aSopenharmony_ci        OP4(*(uint32_t*)(block   ), AV_RN32(pixels   ));\
723cabdff1aSopenharmony_ci        OP4(*(uint32_t*)(block+ 4), AV_RN32(pixels+ 4));\
724cabdff1aSopenharmony_ci        OP4(*(uint32_t*)(block+ 8), AV_RN32(pixels+ 8));\
725cabdff1aSopenharmony_ci        OP4(*(uint32_t*)(block+12), AV_RN32(pixels+12));\
726cabdff1aSopenharmony_ci        pixels+=line_size;\
727cabdff1aSopenharmony_ci        block +=line_size;\
728cabdff1aSopenharmony_ci    }\
729cabdff1aSopenharmony_ci}
730cabdff1aSopenharmony_ci
731cabdff1aSopenharmony_ci#define op_put(a, b) (a) = av_clip_uint8(b)
732cabdff1aSopenharmony_ci#define op_avg(a, b) (a) = ((a) + av_clip_uint8(b) + 1) >> 1
733cabdff1aSopenharmony_ci#define op4_avg(a, b) (a) = rnd_avg32(a, b)
734cabdff1aSopenharmony_ci#define op4_put(a, b) (a) = (b)
735cabdff1aSopenharmony_ci
736cabdff1aSopenharmony_ciVC1_MSPEL_MC(op_put, op4_put, put_)
737cabdff1aSopenharmony_ciVC1_MSPEL_MC(op_avg, op4_avg, avg_)
738cabdff1aSopenharmony_ci
739cabdff1aSopenharmony_ci/* pixel functions - really are entry points to vc1_mspel_mc */
740cabdff1aSopenharmony_ci
741cabdff1aSopenharmony_ci#define PUT_VC1_MSPEL(a, b)                                                   \
742cabdff1aSopenharmony_cistatic void put_vc1_mspel_mc ## a ## b ## _c(uint8_t *dst,                    \
743cabdff1aSopenharmony_ci                                             const uint8_t *src,              \
744cabdff1aSopenharmony_ci                                             ptrdiff_t stride, int rnd)       \
745cabdff1aSopenharmony_ci{                                                                             \
746cabdff1aSopenharmony_ci    put_vc1_mspel_mc(dst, src, stride, a, b, rnd);                            \
747cabdff1aSopenharmony_ci}                                                                             \
748cabdff1aSopenharmony_cistatic void avg_vc1_mspel_mc ## a ## b ## _c(uint8_t *dst,                    \
749cabdff1aSopenharmony_ci                                             const uint8_t *src,              \
750cabdff1aSopenharmony_ci                                             ptrdiff_t stride, int rnd)       \
751cabdff1aSopenharmony_ci{                                                                             \
752cabdff1aSopenharmony_ci    avg_vc1_mspel_mc(dst, src, stride, a, b, rnd);                            \
753cabdff1aSopenharmony_ci}                                                                             \
754cabdff1aSopenharmony_cistatic void put_vc1_mspel_mc ## a ## b ## _16_c(uint8_t *dst,                 \
755cabdff1aSopenharmony_ci                                                const uint8_t *src,           \
756cabdff1aSopenharmony_ci                                                ptrdiff_t stride, int rnd)    \
757cabdff1aSopenharmony_ci{                                                                             \
758cabdff1aSopenharmony_ci    put_vc1_mspel_mc_16(dst, src, stride, a, b, rnd);                         \
759cabdff1aSopenharmony_ci}                                                                             \
760cabdff1aSopenharmony_cistatic void avg_vc1_mspel_mc ## a ## b ## _16_c(uint8_t *dst,                 \
761cabdff1aSopenharmony_ci                                                const uint8_t *src,           \
762cabdff1aSopenharmony_ci                                                ptrdiff_t stride, int rnd)    \
763cabdff1aSopenharmony_ci{                                                                             \
764cabdff1aSopenharmony_ci    avg_vc1_mspel_mc_16(dst, src, stride, a, b, rnd);                         \
765cabdff1aSopenharmony_ci}
766cabdff1aSopenharmony_ci
767cabdff1aSopenharmony_ciPUT_VC1_MSPEL(1, 0)
768cabdff1aSopenharmony_ciPUT_VC1_MSPEL(2, 0)
769cabdff1aSopenharmony_ciPUT_VC1_MSPEL(3, 0)
770cabdff1aSopenharmony_ci
771cabdff1aSopenharmony_ciPUT_VC1_MSPEL(0, 1)
772cabdff1aSopenharmony_ciPUT_VC1_MSPEL(1, 1)
773cabdff1aSopenharmony_ciPUT_VC1_MSPEL(2, 1)
774cabdff1aSopenharmony_ciPUT_VC1_MSPEL(3, 1)
775cabdff1aSopenharmony_ci
776cabdff1aSopenharmony_ciPUT_VC1_MSPEL(0, 2)
777cabdff1aSopenharmony_ciPUT_VC1_MSPEL(1, 2)
778cabdff1aSopenharmony_ciPUT_VC1_MSPEL(2, 2)
779cabdff1aSopenharmony_ciPUT_VC1_MSPEL(3, 2)
780cabdff1aSopenharmony_ci
781cabdff1aSopenharmony_ciPUT_VC1_MSPEL(0, 3)
782cabdff1aSopenharmony_ciPUT_VC1_MSPEL(1, 3)
783cabdff1aSopenharmony_ciPUT_VC1_MSPEL(2, 3)
784cabdff1aSopenharmony_ciPUT_VC1_MSPEL(3, 3)
785cabdff1aSopenharmony_ci
786cabdff1aSopenharmony_ci#define chroma_mc(a) \
787cabdff1aSopenharmony_ci    ((A * src[a] + B * src[a + 1] + \
788cabdff1aSopenharmony_ci      C * src[stride + a] + D * src[stride + a + 1] + 32 - 4) >> 6)
789cabdff1aSopenharmony_cistatic void put_no_rnd_vc1_chroma_mc8_c(uint8_t *dst /* align 8 */,
790cabdff1aSopenharmony_ci                                        uint8_t *src /* align 1 */,
791cabdff1aSopenharmony_ci                                        ptrdiff_t stride, int h, int x, int y)
792cabdff1aSopenharmony_ci{
793cabdff1aSopenharmony_ci    const int A = (8 - x) * (8 - y);
794cabdff1aSopenharmony_ci    const int B =     (x) * (8 - y);
795cabdff1aSopenharmony_ci    const int C = (8 - x) *     (y);
796cabdff1aSopenharmony_ci    const int D =     (x) *     (y);
797cabdff1aSopenharmony_ci    int i;
798cabdff1aSopenharmony_ci
799cabdff1aSopenharmony_ci    av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
800cabdff1aSopenharmony_ci
801cabdff1aSopenharmony_ci    for (i = 0; i < h; i++) {
802cabdff1aSopenharmony_ci        dst[0] = chroma_mc(0);
803cabdff1aSopenharmony_ci        dst[1] = chroma_mc(1);
804cabdff1aSopenharmony_ci        dst[2] = chroma_mc(2);
805cabdff1aSopenharmony_ci        dst[3] = chroma_mc(3);
806cabdff1aSopenharmony_ci        dst[4] = chroma_mc(4);
807cabdff1aSopenharmony_ci        dst[5] = chroma_mc(5);
808cabdff1aSopenharmony_ci        dst[6] = chroma_mc(6);
809cabdff1aSopenharmony_ci        dst[7] = chroma_mc(7);
810cabdff1aSopenharmony_ci        dst += stride;
811cabdff1aSopenharmony_ci        src += stride;
812cabdff1aSopenharmony_ci    }
813cabdff1aSopenharmony_ci}
814cabdff1aSopenharmony_ci
815cabdff1aSopenharmony_cistatic void put_no_rnd_vc1_chroma_mc4_c(uint8_t *dst, uint8_t *src,
816cabdff1aSopenharmony_ci                                        ptrdiff_t stride, int h, int x, int y)
817cabdff1aSopenharmony_ci{
818cabdff1aSopenharmony_ci    const int A = (8 - x) * (8 - y);
819cabdff1aSopenharmony_ci    const int B =     (x) * (8 - y);
820cabdff1aSopenharmony_ci    const int C = (8 - x) *     (y);
821cabdff1aSopenharmony_ci    const int D =     (x) *     (y);
822cabdff1aSopenharmony_ci    int i;
823cabdff1aSopenharmony_ci
824cabdff1aSopenharmony_ci    av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
825cabdff1aSopenharmony_ci
826cabdff1aSopenharmony_ci    for (i = 0; i < h; i++) {
827cabdff1aSopenharmony_ci        dst[0] = chroma_mc(0);
828cabdff1aSopenharmony_ci        dst[1] = chroma_mc(1);
829cabdff1aSopenharmony_ci        dst[2] = chroma_mc(2);
830cabdff1aSopenharmony_ci        dst[3] = chroma_mc(3);
831cabdff1aSopenharmony_ci        dst += stride;
832cabdff1aSopenharmony_ci        src += stride;
833cabdff1aSopenharmony_ci    }
834cabdff1aSopenharmony_ci}
835cabdff1aSopenharmony_ci
836cabdff1aSopenharmony_ci#define avg2(a, b) (((a) + (b) + 1) >> 1)
837cabdff1aSopenharmony_cistatic void avg_no_rnd_vc1_chroma_mc8_c(uint8_t *dst /* align 8 */,
838cabdff1aSopenharmony_ci                                        uint8_t *src /* align 1 */,
839cabdff1aSopenharmony_ci                                        ptrdiff_t stride, int h, int x, int y)
840cabdff1aSopenharmony_ci{
841cabdff1aSopenharmony_ci    const int A = (8 - x) * (8 - y);
842cabdff1aSopenharmony_ci    const int B =     (x) * (8 - y);
843cabdff1aSopenharmony_ci    const int C = (8 - x) *     (y);
844cabdff1aSopenharmony_ci    const int D =     (x) *     (y);
845cabdff1aSopenharmony_ci    int i;
846cabdff1aSopenharmony_ci
847cabdff1aSopenharmony_ci    av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
848cabdff1aSopenharmony_ci
849cabdff1aSopenharmony_ci    for (i = 0; i < h; i++) {
850cabdff1aSopenharmony_ci        dst[0] = avg2(dst[0], chroma_mc(0));
851cabdff1aSopenharmony_ci        dst[1] = avg2(dst[1], chroma_mc(1));
852cabdff1aSopenharmony_ci        dst[2] = avg2(dst[2], chroma_mc(2));
853cabdff1aSopenharmony_ci        dst[3] = avg2(dst[3], chroma_mc(3));
854cabdff1aSopenharmony_ci        dst[4] = avg2(dst[4], chroma_mc(4));
855cabdff1aSopenharmony_ci        dst[5] = avg2(dst[5], chroma_mc(5));
856cabdff1aSopenharmony_ci        dst[6] = avg2(dst[6], chroma_mc(6));
857cabdff1aSopenharmony_ci        dst[7] = avg2(dst[7], chroma_mc(7));
858cabdff1aSopenharmony_ci        dst += stride;
859cabdff1aSopenharmony_ci        src += stride;
860cabdff1aSopenharmony_ci    }
861cabdff1aSopenharmony_ci}
862cabdff1aSopenharmony_ci
863cabdff1aSopenharmony_cistatic void avg_no_rnd_vc1_chroma_mc4_c(uint8_t *dst /* align 8 */,
864cabdff1aSopenharmony_ci                                        uint8_t *src /* align 1 */,
865cabdff1aSopenharmony_ci                                        ptrdiff_t stride, int h, int x, int y)
866cabdff1aSopenharmony_ci{
867cabdff1aSopenharmony_ci    const int A = (8 - x) * (8 - y);
868cabdff1aSopenharmony_ci    const int B = (    x) * (8 - y);
869cabdff1aSopenharmony_ci    const int C = (8 - x) * (    y);
870cabdff1aSopenharmony_ci    const int D = (    x) * (    y);
871cabdff1aSopenharmony_ci    int i;
872cabdff1aSopenharmony_ci
873cabdff1aSopenharmony_ci    av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
874cabdff1aSopenharmony_ci
875cabdff1aSopenharmony_ci    for (i = 0; i < h; i++) {
876cabdff1aSopenharmony_ci        dst[0] = avg2(dst[0], chroma_mc(0));
877cabdff1aSopenharmony_ci        dst[1] = avg2(dst[1], chroma_mc(1));
878cabdff1aSopenharmony_ci        dst[2] = avg2(dst[2], chroma_mc(2));
879cabdff1aSopenharmony_ci        dst[3] = avg2(dst[3], chroma_mc(3));
880cabdff1aSopenharmony_ci        dst += stride;
881cabdff1aSopenharmony_ci        src += stride;
882cabdff1aSopenharmony_ci    }
883cabdff1aSopenharmony_ci}
884cabdff1aSopenharmony_ci
885cabdff1aSopenharmony_ci#if CONFIG_WMV3IMAGE_DECODER || CONFIG_VC1IMAGE_DECODER
886cabdff1aSopenharmony_ci
887cabdff1aSopenharmony_cistatic void sprite_h_c(uint8_t *dst, const uint8_t *src, int offset,
888cabdff1aSopenharmony_ci                       int advance, int count)
889cabdff1aSopenharmony_ci{
890cabdff1aSopenharmony_ci    while (count--) {
891cabdff1aSopenharmony_ci        int a = src[(offset >> 16)];
892cabdff1aSopenharmony_ci        int b = src[(offset >> 16) + 1];
893cabdff1aSopenharmony_ci        *dst++  = a + ((b - a) * (offset & 0xFFFF) >> 16);
894cabdff1aSopenharmony_ci        offset += advance;
895cabdff1aSopenharmony_ci    }
896cabdff1aSopenharmony_ci}
897cabdff1aSopenharmony_ci
898cabdff1aSopenharmony_cistatic av_always_inline void sprite_v_template(uint8_t *dst,
899cabdff1aSopenharmony_ci                                               const uint8_t *src1a,
900cabdff1aSopenharmony_ci                                               const uint8_t *src1b,
901cabdff1aSopenharmony_ci                                               int offset1,
902cabdff1aSopenharmony_ci                                               int two_sprites,
903cabdff1aSopenharmony_ci                                               const uint8_t *src2a,
904cabdff1aSopenharmony_ci                                               const uint8_t *src2b,
905cabdff1aSopenharmony_ci                                               int offset2,
906cabdff1aSopenharmony_ci                                               int alpha, int scaled,
907cabdff1aSopenharmony_ci                                               int width)
908cabdff1aSopenharmony_ci{
909cabdff1aSopenharmony_ci    int a1, b1, a2, b2;
910cabdff1aSopenharmony_ci    while (width--) {
911cabdff1aSopenharmony_ci        a1 = *src1a++;
912cabdff1aSopenharmony_ci        if (scaled) {
913cabdff1aSopenharmony_ci            b1 = *src1b++;
914cabdff1aSopenharmony_ci            a1 = a1 + ((b1 - a1) * offset1 >> 16);
915cabdff1aSopenharmony_ci        }
916cabdff1aSopenharmony_ci        if (two_sprites) {
917cabdff1aSopenharmony_ci            a2 = *src2a++;
918cabdff1aSopenharmony_ci            if (scaled > 1) {
919cabdff1aSopenharmony_ci                b2 = *src2b++;
920cabdff1aSopenharmony_ci                a2 = a2 + ((b2 - a2) * offset2 >> 16);
921cabdff1aSopenharmony_ci            }
922cabdff1aSopenharmony_ci            a1 = a1 + ((a2 - a1) * alpha >> 16);
923cabdff1aSopenharmony_ci        }
924cabdff1aSopenharmony_ci        *dst++ = a1;
925cabdff1aSopenharmony_ci    }
926cabdff1aSopenharmony_ci}
927cabdff1aSopenharmony_ci
928cabdff1aSopenharmony_cistatic void sprite_v_single_c(uint8_t *dst, const uint8_t *src1a,
929cabdff1aSopenharmony_ci                              const uint8_t *src1b,
930cabdff1aSopenharmony_ci                              int offset, int width)
931cabdff1aSopenharmony_ci{
932cabdff1aSopenharmony_ci    sprite_v_template(dst, src1a, src1b, offset, 0, NULL, NULL, 0, 0, 1, width);
933cabdff1aSopenharmony_ci}
934cabdff1aSopenharmony_ci
935cabdff1aSopenharmony_cistatic void sprite_v_double_noscale_c(uint8_t *dst, const uint8_t *src1a,
936cabdff1aSopenharmony_ci                                      const uint8_t *src2a,
937cabdff1aSopenharmony_ci                                      int alpha, int width)
938cabdff1aSopenharmony_ci{
939cabdff1aSopenharmony_ci    sprite_v_template(dst, src1a, NULL, 0, 1, src2a, NULL, 0, alpha, 0, width);
940cabdff1aSopenharmony_ci}
941cabdff1aSopenharmony_ci
942cabdff1aSopenharmony_cistatic void sprite_v_double_onescale_c(uint8_t *dst,
943cabdff1aSopenharmony_ci                                       const uint8_t *src1a,
944cabdff1aSopenharmony_ci                                       const uint8_t *src1b,
945cabdff1aSopenharmony_ci                                       int offset1,
946cabdff1aSopenharmony_ci                                       const uint8_t *src2a,
947cabdff1aSopenharmony_ci                                       int alpha, int width)
948cabdff1aSopenharmony_ci{
949cabdff1aSopenharmony_ci    sprite_v_template(dst, src1a, src1b, offset1, 1, src2a, NULL, 0, alpha, 1,
950cabdff1aSopenharmony_ci                      width);
951cabdff1aSopenharmony_ci}
952cabdff1aSopenharmony_ci
953cabdff1aSopenharmony_cistatic void sprite_v_double_twoscale_c(uint8_t *dst,
954cabdff1aSopenharmony_ci                                       const uint8_t *src1a,
955cabdff1aSopenharmony_ci                                       const uint8_t *src1b,
956cabdff1aSopenharmony_ci                                       int offset1,
957cabdff1aSopenharmony_ci                                       const uint8_t *src2a,
958cabdff1aSopenharmony_ci                                       const uint8_t *src2b,
959cabdff1aSopenharmony_ci                                       int offset2,
960cabdff1aSopenharmony_ci                                       int alpha,
961cabdff1aSopenharmony_ci                                       int width)
962cabdff1aSopenharmony_ci{
963cabdff1aSopenharmony_ci    sprite_v_template(dst, src1a, src1b, offset1, 1, src2a, src2b, offset2,
964cabdff1aSopenharmony_ci                      alpha, 2, width);
965cabdff1aSopenharmony_ci}
966cabdff1aSopenharmony_ci
967cabdff1aSopenharmony_ci#endif /* CONFIG_WMV3IMAGE_DECODER || CONFIG_VC1IMAGE_DECODER */
968cabdff1aSopenharmony_ci#define FN_ASSIGN(X, Y) \
969cabdff1aSopenharmony_ci    dsp->put_vc1_mspel_pixels_tab[1][X+4*Y] = put_vc1_mspel_mc##X##Y##_c; \
970cabdff1aSopenharmony_ci    dsp->put_vc1_mspel_pixels_tab[0][X+4*Y] = put_vc1_mspel_mc##X##Y##_16_c; \
971cabdff1aSopenharmony_ci    dsp->avg_vc1_mspel_pixels_tab[1][X+4*Y] = avg_vc1_mspel_mc##X##Y##_c; \
972cabdff1aSopenharmony_ci    dsp->avg_vc1_mspel_pixels_tab[0][X+4*Y] = avg_vc1_mspel_mc##X##Y##_16_c
973cabdff1aSopenharmony_ci
974cabdff1aSopenharmony_ciav_cold void ff_vc1dsp_init(VC1DSPContext *dsp)
975cabdff1aSopenharmony_ci{
976cabdff1aSopenharmony_ci    dsp->vc1_inv_trans_8x8    = vc1_inv_trans_8x8_c;
977cabdff1aSopenharmony_ci    dsp->vc1_inv_trans_4x8    = vc1_inv_trans_4x8_c;
978cabdff1aSopenharmony_ci    dsp->vc1_inv_trans_8x4    = vc1_inv_trans_8x4_c;
979cabdff1aSopenharmony_ci    dsp->vc1_inv_trans_4x4    = vc1_inv_trans_4x4_c;
980cabdff1aSopenharmony_ci    dsp->vc1_inv_trans_8x8_dc = vc1_inv_trans_8x8_dc_c;
981cabdff1aSopenharmony_ci    dsp->vc1_inv_trans_4x8_dc = vc1_inv_trans_4x8_dc_c;
982cabdff1aSopenharmony_ci    dsp->vc1_inv_trans_8x4_dc = vc1_inv_trans_8x4_dc_c;
983cabdff1aSopenharmony_ci    dsp->vc1_inv_trans_4x4_dc = vc1_inv_trans_4x4_dc_c;
984cabdff1aSopenharmony_ci
985cabdff1aSopenharmony_ci    dsp->vc1_h_overlap        = vc1_h_overlap_c;
986cabdff1aSopenharmony_ci    dsp->vc1_v_overlap        = vc1_v_overlap_c;
987cabdff1aSopenharmony_ci    dsp->vc1_h_s_overlap      = vc1_h_s_overlap_c;
988cabdff1aSopenharmony_ci    dsp->vc1_v_s_overlap      = vc1_v_s_overlap_c;
989cabdff1aSopenharmony_ci
990cabdff1aSopenharmony_ci    dsp->vc1_v_loop_filter4   = vc1_v_loop_filter4_c;
991cabdff1aSopenharmony_ci    dsp->vc1_h_loop_filter4   = vc1_h_loop_filter4_c;
992cabdff1aSopenharmony_ci    dsp->vc1_v_loop_filter8   = vc1_v_loop_filter8_c;
993cabdff1aSopenharmony_ci    dsp->vc1_h_loop_filter8   = vc1_h_loop_filter8_c;
994cabdff1aSopenharmony_ci    dsp->vc1_v_loop_filter16  = vc1_v_loop_filter16_c;
995cabdff1aSopenharmony_ci    dsp->vc1_h_loop_filter16  = vc1_h_loop_filter16_c;
996cabdff1aSopenharmony_ci
997cabdff1aSopenharmony_ci    dsp->put_vc1_mspel_pixels_tab[0][0] = put_pixels16x16_c;
998cabdff1aSopenharmony_ci    dsp->avg_vc1_mspel_pixels_tab[0][0] = avg_pixels16x16_c;
999cabdff1aSopenharmony_ci    dsp->put_vc1_mspel_pixels_tab[1][0] = put_pixels8x8_c;
1000cabdff1aSopenharmony_ci    dsp->avg_vc1_mspel_pixels_tab[1][0] = avg_pixels8x8_c;
1001cabdff1aSopenharmony_ci    FN_ASSIGN(0, 1);
1002cabdff1aSopenharmony_ci    FN_ASSIGN(0, 2);
1003cabdff1aSopenharmony_ci    FN_ASSIGN(0, 3);
1004cabdff1aSopenharmony_ci
1005cabdff1aSopenharmony_ci    FN_ASSIGN(1, 0);
1006cabdff1aSopenharmony_ci    FN_ASSIGN(1, 1);
1007cabdff1aSopenharmony_ci    FN_ASSIGN(1, 2);
1008cabdff1aSopenharmony_ci    FN_ASSIGN(1, 3);
1009cabdff1aSopenharmony_ci
1010cabdff1aSopenharmony_ci    FN_ASSIGN(2, 0);
1011cabdff1aSopenharmony_ci    FN_ASSIGN(2, 1);
1012cabdff1aSopenharmony_ci    FN_ASSIGN(2, 2);
1013cabdff1aSopenharmony_ci    FN_ASSIGN(2, 3);
1014cabdff1aSopenharmony_ci
1015cabdff1aSopenharmony_ci    FN_ASSIGN(3, 0);
1016cabdff1aSopenharmony_ci    FN_ASSIGN(3, 1);
1017cabdff1aSopenharmony_ci    FN_ASSIGN(3, 2);
1018cabdff1aSopenharmony_ci    FN_ASSIGN(3, 3);
1019cabdff1aSopenharmony_ci
1020cabdff1aSopenharmony_ci    dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = put_no_rnd_vc1_chroma_mc8_c;
1021cabdff1aSopenharmony_ci    dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = avg_no_rnd_vc1_chroma_mc8_c;
1022cabdff1aSopenharmony_ci    dsp->put_no_rnd_vc1_chroma_pixels_tab[1] = put_no_rnd_vc1_chroma_mc4_c;
1023cabdff1aSopenharmony_ci    dsp->avg_no_rnd_vc1_chroma_pixels_tab[1] = avg_no_rnd_vc1_chroma_mc4_c;
1024cabdff1aSopenharmony_ci
1025cabdff1aSopenharmony_ci#if CONFIG_WMV3IMAGE_DECODER || CONFIG_VC1IMAGE_DECODER
1026cabdff1aSopenharmony_ci    dsp->sprite_h                 = sprite_h_c;
1027cabdff1aSopenharmony_ci    dsp->sprite_v_single          = sprite_v_single_c;
1028cabdff1aSopenharmony_ci    dsp->sprite_v_double_noscale  = sprite_v_double_noscale_c;
1029cabdff1aSopenharmony_ci    dsp->sprite_v_double_onescale = sprite_v_double_onescale_c;
1030cabdff1aSopenharmony_ci    dsp->sprite_v_double_twoscale = sprite_v_double_twoscale_c;
1031cabdff1aSopenharmony_ci#endif /* CONFIG_WMV3IMAGE_DECODER || CONFIG_VC1IMAGE_DECODER */
1032cabdff1aSopenharmony_ci
1033cabdff1aSopenharmony_ci    dsp->startcode_find_candidate = ff_startcode_find_candidate_c;
1034cabdff1aSopenharmony_ci    dsp->vc1_unescape_buffer      = vc1_unescape_buffer;
1035cabdff1aSopenharmony_ci
1036cabdff1aSopenharmony_ci#if ARCH_AARCH64
1037cabdff1aSopenharmony_ci    ff_vc1dsp_init_aarch64(dsp);
1038cabdff1aSopenharmony_ci#elif ARCH_ARM
1039cabdff1aSopenharmony_ci    ff_vc1dsp_init_arm(dsp);
1040cabdff1aSopenharmony_ci#elif ARCH_PPC
1041cabdff1aSopenharmony_ci    ff_vc1dsp_init_ppc(dsp);
1042cabdff1aSopenharmony_ci#elif ARCH_X86
1043cabdff1aSopenharmony_ci    ff_vc1dsp_init_x86(dsp);
1044cabdff1aSopenharmony_ci#elif ARCH_MIPS
1045cabdff1aSopenharmony_ci    ff_vc1dsp_init_mips(dsp);
1046cabdff1aSopenharmony_ci#elif ARCH_LOONGARCH
1047cabdff1aSopenharmony_ci    ff_vc1dsp_init_loongarch(dsp);
1048cabdff1aSopenharmony_ci#endif
1049cabdff1aSopenharmony_ci}
1050