1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * VC-1 and WMV3 decoder - DSP functions AltiVec-optimized
3cabdff1aSopenharmony_ci * Copyright (c) 2006 Konstantin Shishkov
4cabdff1aSopenharmony_ci *
5cabdff1aSopenharmony_ci * This file is part of FFmpeg.
6cabdff1aSopenharmony_ci *
7cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
8cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
9cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
10cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
11cabdff1aSopenharmony_ci *
12cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
13cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
14cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15cabdff1aSopenharmony_ci * Lesser General Public License for more details.
16cabdff1aSopenharmony_ci *
17cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
18cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
19cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20cabdff1aSopenharmony_ci */
21cabdff1aSopenharmony_ci
22cabdff1aSopenharmony_ci#include "config.h"
23cabdff1aSopenharmony_ci
24cabdff1aSopenharmony_ci#include "libavutil/attributes.h"
25cabdff1aSopenharmony_ci#include "libavutil/cpu.h"
26cabdff1aSopenharmony_ci#include "libavutil/ppc/cpu.h"
27cabdff1aSopenharmony_ci#include "libavutil/ppc/util_altivec.h"
28cabdff1aSopenharmony_ci
29cabdff1aSopenharmony_ci#include "libavcodec/vc1dsp.h"
30cabdff1aSopenharmony_ci
31cabdff1aSopenharmony_ci#if HAVE_ALTIVEC
32cabdff1aSopenharmony_ci
33cabdff1aSopenharmony_ci// main steps of 8x8 transform
34cabdff1aSopenharmony_ci#define STEP8(s0, s1, s2, s3, s4, s5, s6, s7, vec_rnd) \
35cabdff1aSopenharmony_cido { \
36cabdff1aSopenharmony_ci    t0 = vec_sl(vec_add(s0, s4), vec_2); \
37cabdff1aSopenharmony_ci    t0 = vec_add(vec_sl(t0, vec_1), t0); \
38cabdff1aSopenharmony_ci    t0 = vec_add(t0, vec_rnd); \
39cabdff1aSopenharmony_ci    t1 = vec_sl(vec_sub(s0, s4), vec_2); \
40cabdff1aSopenharmony_ci    t1 = vec_add(vec_sl(t1, vec_1), t1); \
41cabdff1aSopenharmony_ci    t1 = vec_add(t1, vec_rnd); \
42cabdff1aSopenharmony_ci    t2 = vec_add(vec_sl(s6, vec_2), vec_sl(s6, vec_1)); \
43cabdff1aSopenharmony_ci    t2 = vec_add(t2, vec_sl(s2, vec_4)); \
44cabdff1aSopenharmony_ci    t3 = vec_add(vec_sl(s2, vec_2), vec_sl(s2, vec_1)); \
45cabdff1aSopenharmony_ci    t3 = vec_sub(t3, vec_sl(s6, vec_4)); \
46cabdff1aSopenharmony_ci    t4 = vec_add(t0, t2); \
47cabdff1aSopenharmony_ci    t5 = vec_add(t1, t3); \
48cabdff1aSopenharmony_ci    t6 = vec_sub(t1, t3); \
49cabdff1aSopenharmony_ci    t7 = vec_sub(t0, t2); \
50cabdff1aSopenharmony_ci\
51cabdff1aSopenharmony_ci    t0 = vec_sl(vec_add(s1, s3), vec_4); \
52cabdff1aSopenharmony_ci    t0 = vec_add(t0, vec_sl(s5, vec_3)); \
53cabdff1aSopenharmony_ci    t0 = vec_add(t0, vec_sl(s7, vec_2)); \
54cabdff1aSopenharmony_ci    t0 = vec_add(t0, vec_sub(s5, s3)); \
55cabdff1aSopenharmony_ci\
56cabdff1aSopenharmony_ci    t1 = vec_sl(vec_sub(s1, s5), vec_4); \
57cabdff1aSopenharmony_ci    t1 = vec_sub(t1, vec_sl(s7, vec_3)); \
58cabdff1aSopenharmony_ci    t1 = vec_sub(t1, vec_sl(s3, vec_2)); \
59cabdff1aSopenharmony_ci    t1 = vec_sub(t1, vec_add(s1, s7)); \
60cabdff1aSopenharmony_ci\
61cabdff1aSopenharmony_ci    t2 = vec_sl(vec_sub(s7, s3), vec_4); \
62cabdff1aSopenharmony_ci    t2 = vec_add(t2, vec_sl(s1, vec_3)); \
63cabdff1aSopenharmony_ci    t2 = vec_add(t2, vec_sl(s5, vec_2)); \
64cabdff1aSopenharmony_ci    t2 = vec_add(t2, vec_sub(s1, s7)); \
65cabdff1aSopenharmony_ci\
66cabdff1aSopenharmony_ci    t3 = vec_sl(vec_sub(s5, s7), vec_4); \
67cabdff1aSopenharmony_ci    t3 = vec_sub(t3, vec_sl(s3, vec_3)); \
68cabdff1aSopenharmony_ci    t3 = vec_add(t3, vec_sl(s1, vec_2)); \
69cabdff1aSopenharmony_ci    t3 = vec_sub(t3, vec_add(s3, s5)); \
70cabdff1aSopenharmony_ci\
71cabdff1aSopenharmony_ci    s0 = vec_add(t4, t0); \
72cabdff1aSopenharmony_ci    s1 = vec_add(t5, t1); \
73cabdff1aSopenharmony_ci    s2 = vec_add(t6, t2); \
74cabdff1aSopenharmony_ci    s3 = vec_add(t7, t3); \
75cabdff1aSopenharmony_ci    s4 = vec_sub(t7, t3); \
76cabdff1aSopenharmony_ci    s5 = vec_sub(t6, t2); \
77cabdff1aSopenharmony_ci    s6 = vec_sub(t5, t1); \
78cabdff1aSopenharmony_ci    s7 = vec_sub(t4, t0); \
79cabdff1aSopenharmony_ci}while(0)
80cabdff1aSopenharmony_ci
81cabdff1aSopenharmony_ci#define SHIFT_HOR8(s0, s1, s2, s3, s4, s5, s6, s7) \
82cabdff1aSopenharmony_cido { \
83cabdff1aSopenharmony_ci    s0 = vec_sra(s0, vec_3); \
84cabdff1aSopenharmony_ci    s1 = vec_sra(s1, vec_3); \
85cabdff1aSopenharmony_ci    s2 = vec_sra(s2, vec_3); \
86cabdff1aSopenharmony_ci    s3 = vec_sra(s3, vec_3); \
87cabdff1aSopenharmony_ci    s4 = vec_sra(s4, vec_3); \
88cabdff1aSopenharmony_ci    s5 = vec_sra(s5, vec_3); \
89cabdff1aSopenharmony_ci    s6 = vec_sra(s6, vec_3); \
90cabdff1aSopenharmony_ci    s7 = vec_sra(s7, vec_3); \
91cabdff1aSopenharmony_ci}while(0)
92cabdff1aSopenharmony_ci
93cabdff1aSopenharmony_ci#define SHIFT_VERT8(s0, s1, s2, s3, s4, s5, s6, s7) \
94cabdff1aSopenharmony_cido { \
95cabdff1aSopenharmony_ci    s0 = vec_sra(s0, vec_7); \
96cabdff1aSopenharmony_ci    s1 = vec_sra(s1, vec_7); \
97cabdff1aSopenharmony_ci    s2 = vec_sra(s2, vec_7); \
98cabdff1aSopenharmony_ci    s3 = vec_sra(s3, vec_7); \
99cabdff1aSopenharmony_ci    s4 = vec_sra(vec_add(s4, vec_1s), vec_7); \
100cabdff1aSopenharmony_ci    s5 = vec_sra(vec_add(s5, vec_1s), vec_7); \
101cabdff1aSopenharmony_ci    s6 = vec_sra(vec_add(s6, vec_1s), vec_7); \
102cabdff1aSopenharmony_ci    s7 = vec_sra(vec_add(s7, vec_1s), vec_7); \
103cabdff1aSopenharmony_ci}while(0)
104cabdff1aSopenharmony_ci
105cabdff1aSopenharmony_ci/* main steps of 4x4 transform */
106cabdff1aSopenharmony_ci#define STEP4(s0, s1, s2, s3, vec_rnd) \
107cabdff1aSopenharmony_cido { \
108cabdff1aSopenharmony_ci    t1 = vec_add(vec_sl(s0, vec_4), s0); \
109cabdff1aSopenharmony_ci    t1 = vec_add(t1, vec_rnd); \
110cabdff1aSopenharmony_ci    t2 = vec_add(vec_sl(s2, vec_4), s2); \
111cabdff1aSopenharmony_ci    t0 = vec_add(t1, t2); \
112cabdff1aSopenharmony_ci    t1 = vec_sub(t1, t2); \
113cabdff1aSopenharmony_ci    t3 = vec_sl(vec_sub(s3, s1), vec_1); \
114cabdff1aSopenharmony_ci    t3 = vec_add(t3, vec_sl(t3, vec_2)); \
115cabdff1aSopenharmony_ci    t2 = vec_add(t3, vec_sl(s1, vec_5)); \
116cabdff1aSopenharmony_ci    t3 = vec_add(t3, vec_sl(s3, vec_3)); \
117cabdff1aSopenharmony_ci    t3 = vec_add(t3, vec_sl(s3, vec_2)); \
118cabdff1aSopenharmony_ci    s0 = vec_add(t0, t2); \
119cabdff1aSopenharmony_ci    s1 = vec_sub(t1, t3); \
120cabdff1aSopenharmony_ci    s2 = vec_add(t1, t3); \
121cabdff1aSopenharmony_ci    s3 = vec_sub(t0, t2); \
122cabdff1aSopenharmony_ci}while (0)
123cabdff1aSopenharmony_ci
124cabdff1aSopenharmony_ci#define SHIFT_HOR4(s0, s1, s2, s3) \
125cabdff1aSopenharmony_ci    s0 = vec_sra(s0, vec_3); \
126cabdff1aSopenharmony_ci    s1 = vec_sra(s1, vec_3); \
127cabdff1aSopenharmony_ci    s2 = vec_sra(s2, vec_3); \
128cabdff1aSopenharmony_ci    s3 = vec_sra(s3, vec_3);
129cabdff1aSopenharmony_ci
130cabdff1aSopenharmony_ci#define SHIFT_VERT4(s0, s1, s2, s3) \
131cabdff1aSopenharmony_ci    s0 = vec_sra(s0, vec_7); \
132cabdff1aSopenharmony_ci    s1 = vec_sra(s1, vec_7); \
133cabdff1aSopenharmony_ci    s2 = vec_sra(s2, vec_7); \
134cabdff1aSopenharmony_ci    s3 = vec_sra(s3, vec_7);
135cabdff1aSopenharmony_ci
136cabdff1aSopenharmony_ci/** Do inverse transform on 8x8 block
137cabdff1aSopenharmony_ci*/
138cabdff1aSopenharmony_cistatic void vc1_inv_trans_8x8_altivec(int16_t block[64])
139cabdff1aSopenharmony_ci{
140cabdff1aSopenharmony_ci    vector signed short src0, src1, src2, src3, src4, src5, src6, src7;
141cabdff1aSopenharmony_ci    vector signed int s0, s1, s2, s3, s4, s5, s6, s7;
142cabdff1aSopenharmony_ci    vector signed int s8, s9, sA, sB, sC, sD, sE, sF;
143cabdff1aSopenharmony_ci    vector signed int t0, t1, t2, t3, t4, t5, t6, t7;
144cabdff1aSopenharmony_ci    const vector signed int vec_64 = vec_sl(vec_splat_s32(4), vec_splat_u32(4));
145cabdff1aSopenharmony_ci    const vector unsigned int vec_7 = vec_splat_u32(7);
146cabdff1aSopenharmony_ci    const vector unsigned int vec_4 = vec_splat_u32(4);
147cabdff1aSopenharmony_ci    const vector  signed int vec_4s = vec_splat_s32(4);
148cabdff1aSopenharmony_ci    const vector unsigned int vec_3 = vec_splat_u32(3);
149cabdff1aSopenharmony_ci    const vector unsigned int vec_2 = vec_splat_u32(2);
150cabdff1aSopenharmony_ci    const vector  signed int vec_1s = vec_splat_s32(1);
151cabdff1aSopenharmony_ci    const vector unsigned int vec_1 = vec_splat_u32(1);
152cabdff1aSopenharmony_ci
153cabdff1aSopenharmony_ci    src0 = vec_ld(  0, block);
154cabdff1aSopenharmony_ci    src1 = vec_ld( 16, block);
155cabdff1aSopenharmony_ci    src2 = vec_ld( 32, block);
156cabdff1aSopenharmony_ci    src3 = vec_ld( 48, block);
157cabdff1aSopenharmony_ci    src4 = vec_ld( 64, block);
158cabdff1aSopenharmony_ci    src5 = vec_ld( 80, block);
159cabdff1aSopenharmony_ci    src6 = vec_ld( 96, block);
160cabdff1aSopenharmony_ci    src7 = vec_ld(112, block);
161cabdff1aSopenharmony_ci
162cabdff1aSopenharmony_ci    s0 = vec_unpackl(src0);
163cabdff1aSopenharmony_ci    s1 = vec_unpackl(src1);
164cabdff1aSopenharmony_ci    s2 = vec_unpackl(src2);
165cabdff1aSopenharmony_ci    s3 = vec_unpackl(src3);
166cabdff1aSopenharmony_ci    s4 = vec_unpackl(src4);
167cabdff1aSopenharmony_ci    s5 = vec_unpackl(src5);
168cabdff1aSopenharmony_ci    s6 = vec_unpackl(src6);
169cabdff1aSopenharmony_ci    s7 = vec_unpackl(src7);
170cabdff1aSopenharmony_ci    s8 = vec_unpackh(src0);
171cabdff1aSopenharmony_ci    s9 = vec_unpackh(src1);
172cabdff1aSopenharmony_ci    sA = vec_unpackh(src2);
173cabdff1aSopenharmony_ci    sB = vec_unpackh(src3);
174cabdff1aSopenharmony_ci    sC = vec_unpackh(src4);
175cabdff1aSopenharmony_ci    sD = vec_unpackh(src5);
176cabdff1aSopenharmony_ci    sE = vec_unpackh(src6);
177cabdff1aSopenharmony_ci    sF = vec_unpackh(src7);
178cabdff1aSopenharmony_ci    STEP8(s0, s1, s2, s3, s4, s5, s6, s7, vec_4s);
179cabdff1aSopenharmony_ci    SHIFT_HOR8(s0, s1, s2, s3, s4, s5, s6, s7);
180cabdff1aSopenharmony_ci    STEP8(s8, s9, sA, sB, sC, sD, sE, sF, vec_4s);
181cabdff1aSopenharmony_ci    SHIFT_HOR8(s8, s9, sA, sB, sC, sD, sE, sF);
182cabdff1aSopenharmony_ci    src0 = vec_pack(s8, s0);
183cabdff1aSopenharmony_ci    src1 = vec_pack(s9, s1);
184cabdff1aSopenharmony_ci    src2 = vec_pack(sA, s2);
185cabdff1aSopenharmony_ci    src3 = vec_pack(sB, s3);
186cabdff1aSopenharmony_ci    src4 = vec_pack(sC, s4);
187cabdff1aSopenharmony_ci    src5 = vec_pack(sD, s5);
188cabdff1aSopenharmony_ci    src6 = vec_pack(sE, s6);
189cabdff1aSopenharmony_ci    src7 = vec_pack(sF, s7);
190cabdff1aSopenharmony_ci    TRANSPOSE8(src0, src1, src2, src3, src4, src5, src6, src7);
191cabdff1aSopenharmony_ci
192cabdff1aSopenharmony_ci    s0 = vec_unpackl(src0);
193cabdff1aSopenharmony_ci    s1 = vec_unpackl(src1);
194cabdff1aSopenharmony_ci    s2 = vec_unpackl(src2);
195cabdff1aSopenharmony_ci    s3 = vec_unpackl(src3);
196cabdff1aSopenharmony_ci    s4 = vec_unpackl(src4);
197cabdff1aSopenharmony_ci    s5 = vec_unpackl(src5);
198cabdff1aSopenharmony_ci    s6 = vec_unpackl(src6);
199cabdff1aSopenharmony_ci    s7 = vec_unpackl(src7);
200cabdff1aSopenharmony_ci    s8 = vec_unpackh(src0);
201cabdff1aSopenharmony_ci    s9 = vec_unpackh(src1);
202cabdff1aSopenharmony_ci    sA = vec_unpackh(src2);
203cabdff1aSopenharmony_ci    sB = vec_unpackh(src3);
204cabdff1aSopenharmony_ci    sC = vec_unpackh(src4);
205cabdff1aSopenharmony_ci    sD = vec_unpackh(src5);
206cabdff1aSopenharmony_ci    sE = vec_unpackh(src6);
207cabdff1aSopenharmony_ci    sF = vec_unpackh(src7);
208cabdff1aSopenharmony_ci    STEP8(s0, s1, s2, s3, s4, s5, s6, s7, vec_64);
209cabdff1aSopenharmony_ci    SHIFT_VERT8(s0, s1, s2, s3, s4, s5, s6, s7);
210cabdff1aSopenharmony_ci    STEP8(s8, s9, sA, sB, sC, sD, sE, sF, vec_64);
211cabdff1aSopenharmony_ci    SHIFT_VERT8(s8, s9, sA, sB, sC, sD, sE, sF);
212cabdff1aSopenharmony_ci    src0 = vec_pack(s8, s0);
213cabdff1aSopenharmony_ci    src1 = vec_pack(s9, s1);
214cabdff1aSopenharmony_ci    src2 = vec_pack(sA, s2);
215cabdff1aSopenharmony_ci    src3 = vec_pack(sB, s3);
216cabdff1aSopenharmony_ci    src4 = vec_pack(sC, s4);
217cabdff1aSopenharmony_ci    src5 = vec_pack(sD, s5);
218cabdff1aSopenharmony_ci    src6 = vec_pack(sE, s6);
219cabdff1aSopenharmony_ci    src7 = vec_pack(sF, s7);
220cabdff1aSopenharmony_ci
221cabdff1aSopenharmony_ci    vec_st(src0,  0, block);
222cabdff1aSopenharmony_ci    vec_st(src1, 16, block);
223cabdff1aSopenharmony_ci    vec_st(src2, 32, block);
224cabdff1aSopenharmony_ci    vec_st(src3, 48, block);
225cabdff1aSopenharmony_ci    vec_st(src4, 64, block);
226cabdff1aSopenharmony_ci    vec_st(src5, 80, block);
227cabdff1aSopenharmony_ci    vec_st(src6, 96, block);
228cabdff1aSopenharmony_ci    vec_st(src7,112, block);
229cabdff1aSopenharmony_ci}
230cabdff1aSopenharmony_ci
231cabdff1aSopenharmony_ci/** Do inverse transform on 8x4 part of block
232cabdff1aSopenharmony_ci*/
233cabdff1aSopenharmony_cistatic void vc1_inv_trans_8x4_altivec(uint8_t *dest, ptrdiff_t stride,
234cabdff1aSopenharmony_ci                                      int16_t *block)
235cabdff1aSopenharmony_ci{
236cabdff1aSopenharmony_ci    vector signed short src0, src1, src2, src3, src4, src5, src6, src7;
237cabdff1aSopenharmony_ci    vector signed int s0, s1, s2, s3, s4, s5, s6, s7;
238cabdff1aSopenharmony_ci    vector signed int s8, s9, sA, sB, sC, sD, sE, sF;
239cabdff1aSopenharmony_ci    vector signed int t0, t1, t2, t3, t4, t5, t6, t7;
240cabdff1aSopenharmony_ci    const vector signed int vec_64 = vec_sl(vec_splat_s32(4), vec_splat_u32(4));
241cabdff1aSopenharmony_ci    const vector unsigned int vec_7 = vec_splat_u32(7);
242cabdff1aSopenharmony_ci    const vector unsigned int vec_5 = vec_splat_u32(5);
243cabdff1aSopenharmony_ci    const vector unsigned int vec_4 = vec_splat_u32(4);
244cabdff1aSopenharmony_ci    const vector  signed int vec_4s = vec_splat_s32(4);
245cabdff1aSopenharmony_ci    const vector unsigned int vec_3 = vec_splat_u32(3);
246cabdff1aSopenharmony_ci    const vector unsigned int vec_2 = vec_splat_u32(2);
247cabdff1aSopenharmony_ci    const vector unsigned int vec_1 = vec_splat_u32(1);
248cabdff1aSopenharmony_ci    vector unsigned char tmp;
249cabdff1aSopenharmony_ci    vector signed short tmp2, tmp3;
250cabdff1aSopenharmony_ci    vector unsigned char perm0, perm1, p0, p1, p;
251cabdff1aSopenharmony_ci
252cabdff1aSopenharmony_ci    src0 = vec_ld(  0, block);
253cabdff1aSopenharmony_ci    src1 = vec_ld( 16, block);
254cabdff1aSopenharmony_ci    src2 = vec_ld( 32, block);
255cabdff1aSopenharmony_ci    src3 = vec_ld( 48, block);
256cabdff1aSopenharmony_ci    src4 = vec_ld( 64, block);
257cabdff1aSopenharmony_ci    src5 = vec_ld( 80, block);
258cabdff1aSopenharmony_ci    src6 = vec_ld( 96, block);
259cabdff1aSopenharmony_ci    src7 = vec_ld(112, block);
260cabdff1aSopenharmony_ci
261cabdff1aSopenharmony_ci    TRANSPOSE8(src0, src1, src2, src3, src4, src5, src6, src7);
262cabdff1aSopenharmony_ci    s0 = vec_unpackl(src0);
263cabdff1aSopenharmony_ci    s1 = vec_unpackl(src1);
264cabdff1aSopenharmony_ci    s2 = vec_unpackl(src2);
265cabdff1aSopenharmony_ci    s3 = vec_unpackl(src3);
266cabdff1aSopenharmony_ci    s4 = vec_unpackl(src4);
267cabdff1aSopenharmony_ci    s5 = vec_unpackl(src5);
268cabdff1aSopenharmony_ci    s6 = vec_unpackl(src6);
269cabdff1aSopenharmony_ci    s7 = vec_unpackl(src7);
270cabdff1aSopenharmony_ci    s8 = vec_unpackh(src0);
271cabdff1aSopenharmony_ci    s9 = vec_unpackh(src1);
272cabdff1aSopenharmony_ci    sA = vec_unpackh(src2);
273cabdff1aSopenharmony_ci    sB = vec_unpackh(src3);
274cabdff1aSopenharmony_ci    sC = vec_unpackh(src4);
275cabdff1aSopenharmony_ci    sD = vec_unpackh(src5);
276cabdff1aSopenharmony_ci    sE = vec_unpackh(src6);
277cabdff1aSopenharmony_ci    sF = vec_unpackh(src7);
278cabdff1aSopenharmony_ci    STEP8(s0, s1, s2, s3, s4, s5, s6, s7, vec_4s);
279cabdff1aSopenharmony_ci    SHIFT_HOR8(s0, s1, s2, s3, s4, s5, s6, s7);
280cabdff1aSopenharmony_ci    STEP8(s8, s9, sA, sB, sC, sD, sE, sF, vec_4s);
281cabdff1aSopenharmony_ci    SHIFT_HOR8(s8, s9, sA, sB, sC, sD, sE, sF);
282cabdff1aSopenharmony_ci    src0 = vec_pack(s8, s0);
283cabdff1aSopenharmony_ci    src1 = vec_pack(s9, s1);
284cabdff1aSopenharmony_ci    src2 = vec_pack(sA, s2);
285cabdff1aSopenharmony_ci    src3 = vec_pack(sB, s3);
286cabdff1aSopenharmony_ci    src4 = vec_pack(sC, s4);
287cabdff1aSopenharmony_ci    src5 = vec_pack(sD, s5);
288cabdff1aSopenharmony_ci    src6 = vec_pack(sE, s6);
289cabdff1aSopenharmony_ci    src7 = vec_pack(sF, s7);
290cabdff1aSopenharmony_ci    TRANSPOSE8(src0, src1, src2, src3, src4, src5, src6, src7);
291cabdff1aSopenharmony_ci
292cabdff1aSopenharmony_ci    s0 = vec_unpackh(src0);
293cabdff1aSopenharmony_ci    s1 = vec_unpackh(src1);
294cabdff1aSopenharmony_ci    s2 = vec_unpackh(src2);
295cabdff1aSopenharmony_ci    s3 = vec_unpackh(src3);
296cabdff1aSopenharmony_ci    s8 = vec_unpackl(src0);
297cabdff1aSopenharmony_ci    s9 = vec_unpackl(src1);
298cabdff1aSopenharmony_ci    sA = vec_unpackl(src2);
299cabdff1aSopenharmony_ci    sB = vec_unpackl(src3);
300cabdff1aSopenharmony_ci    STEP4(s0, s1, s2, s3, vec_64);
301cabdff1aSopenharmony_ci    SHIFT_VERT4(s0, s1, s2, s3);
302cabdff1aSopenharmony_ci    STEP4(s8, s9, sA, sB, vec_64);
303cabdff1aSopenharmony_ci    SHIFT_VERT4(s8, s9, sA, sB);
304cabdff1aSopenharmony_ci    src0 = vec_pack(s0, s8);
305cabdff1aSopenharmony_ci    src1 = vec_pack(s1, s9);
306cabdff1aSopenharmony_ci    src2 = vec_pack(s2, sA);
307cabdff1aSopenharmony_ci    src3 = vec_pack(s3, sB);
308cabdff1aSopenharmony_ci
309cabdff1aSopenharmony_ci#if HAVE_BIGENDIAN
310cabdff1aSopenharmony_ci    p0 = vec_lvsl (0, dest);
311cabdff1aSopenharmony_ci    p1 = vec_lvsl (stride, dest);
312cabdff1aSopenharmony_ci    p = vec_splat_u8 (-1);
313cabdff1aSopenharmony_ci    perm0 = vec_mergeh (p, p0);
314cabdff1aSopenharmony_ci    perm1 = vec_mergeh (p, p1);
315cabdff1aSopenharmony_ci#define GET_TMP2(dst, p)        \
316cabdff1aSopenharmony_ci    tmp = vec_ld (0, dest);     \
317cabdff1aSopenharmony_ci    tmp2 = (vector signed short)vec_perm (tmp, vec_splat_u8(0), p);
318cabdff1aSopenharmony_ci#else
319cabdff1aSopenharmony_ci#define GET_TMP2(dst,p)         \
320cabdff1aSopenharmony_ci    tmp = vec_vsx_ld (0, dst);  \
321cabdff1aSopenharmony_ci    tmp2 = (vector signed short)vec_mergeh (tmp, vec_splat_u8(0));
322cabdff1aSopenharmony_ci#endif
323cabdff1aSopenharmony_ci
324cabdff1aSopenharmony_ci#define ADD(dest,src,perm)                                              \
325cabdff1aSopenharmony_ci    GET_TMP2(dest, perm);                                               \
326cabdff1aSopenharmony_ci    tmp3 = vec_adds (tmp2, src);                                        \
327cabdff1aSopenharmony_ci    tmp = vec_packsu (tmp3, tmp3);                                      \
328cabdff1aSopenharmony_ci    vec_ste ((vector unsigned int)tmp, 0, (unsigned int *)dest);        \
329cabdff1aSopenharmony_ci    vec_ste ((vector unsigned int)tmp, 4, (unsigned int *)dest);
330cabdff1aSopenharmony_ci
331cabdff1aSopenharmony_ci    ADD (dest, src0, perm0)      dest += stride;
332cabdff1aSopenharmony_ci    ADD (dest, src1, perm1)      dest += stride;
333cabdff1aSopenharmony_ci    ADD (dest, src2, perm0)      dest += stride;
334cabdff1aSopenharmony_ci    ADD (dest, src3, perm1)
335cabdff1aSopenharmony_ci}
336cabdff1aSopenharmony_ci
337cabdff1aSopenharmony_ci#define PUT_OP_U8_ALTIVEC(d, s, dst) d = s
338cabdff1aSopenharmony_ci#define AVG_OP_U8_ALTIVEC(d, s, dst) d = vec_avg(dst, s)
339cabdff1aSopenharmony_ci
340cabdff1aSopenharmony_ci#define OP_U8_ALTIVEC                          PUT_OP_U8_ALTIVEC
341cabdff1aSopenharmony_ci#define PREFIX_no_rnd_vc1_chroma_mc8_altivec   put_no_rnd_vc1_chroma_mc8_altivec
342cabdff1aSopenharmony_ci#include "h264chroma_template.c"
343cabdff1aSopenharmony_ci#undef OP_U8_ALTIVEC
344cabdff1aSopenharmony_ci#undef PREFIX_no_rnd_vc1_chroma_mc8_altivec
345cabdff1aSopenharmony_ci
346cabdff1aSopenharmony_ci#define OP_U8_ALTIVEC                          AVG_OP_U8_ALTIVEC
347cabdff1aSopenharmony_ci#define PREFIX_no_rnd_vc1_chroma_mc8_altivec   avg_no_rnd_vc1_chroma_mc8_altivec
348cabdff1aSopenharmony_ci#include "h264chroma_template.c"
349cabdff1aSopenharmony_ci#undef OP_U8_ALTIVEC
350cabdff1aSopenharmony_ci#undef PREFIX_no_rnd_vc1_chroma_mc8_altivec
351cabdff1aSopenharmony_ci
352cabdff1aSopenharmony_ci#endif /* HAVE_ALTIVEC */
353cabdff1aSopenharmony_ci
354cabdff1aSopenharmony_ciav_cold void ff_vc1dsp_init_ppc(VC1DSPContext *dsp)
355cabdff1aSopenharmony_ci{
356cabdff1aSopenharmony_ci#if HAVE_ALTIVEC
357cabdff1aSopenharmony_ci    if (!PPC_ALTIVEC(av_get_cpu_flags()))
358cabdff1aSopenharmony_ci        return;
359cabdff1aSopenharmony_ci
360cabdff1aSopenharmony_ci    dsp->vc1_inv_trans_8x8 = vc1_inv_trans_8x8_altivec;
361cabdff1aSopenharmony_ci    dsp->vc1_inv_trans_8x4 = vc1_inv_trans_8x4_altivec;
362cabdff1aSopenharmony_ci    dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = put_no_rnd_vc1_chroma_mc8_altivec;
363cabdff1aSopenharmony_ci    dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = avg_no_rnd_vc1_chroma_mc8_altivec;
364cabdff1aSopenharmony_ci#endif /* HAVE_ALTIVEC */
365cabdff1aSopenharmony_ci}
366