1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * Loongson SIMD optimized vc1dsp
3cabdff1aSopenharmony_ci *
4cabdff1aSopenharmony_ci * Copyright (c) 2019 Loongson Technology Corporation Limited
5cabdff1aSopenharmony_ci *                    gxw <guxiwei-hf@loongson.cn>
6cabdff1aSopenharmony_ci *
7cabdff1aSopenharmony_ci * This file is part of FFmpeg.
8cabdff1aSopenharmony_ci *
9cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
10cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
11cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
12cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
13cabdff1aSopenharmony_ci *
14cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
15cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
16cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17cabdff1aSopenharmony_ci * Lesser General Public License for more details.
18cabdff1aSopenharmony_ci *
19cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
20cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
21cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22cabdff1aSopenharmony_ci */
23cabdff1aSopenharmony_ci
24cabdff1aSopenharmony_ci#include "vc1dsp_mips.h"
25cabdff1aSopenharmony_ci#include "constants.h"
26cabdff1aSopenharmony_ci#include "libavutil/mips/generic_macros_msa.h"
27cabdff1aSopenharmony_ci
28cabdff1aSopenharmony_civoid ff_vc1_inv_trans_8x8_msa(int16_t block[64])
29cabdff1aSopenharmony_ci{
30cabdff1aSopenharmony_ci    v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
31cabdff1aSopenharmony_ci    v4i32 in_r0, in_r1, in_r2, in_r3, in_r4, in_r5, in_r6, in_r7;
32cabdff1aSopenharmony_ci    v4i32 in_l0, in_l1, in_l2, in_l3, in_l4, in_l5, in_l6, in_l7;
33cabdff1aSopenharmony_ci    v4i32 t_r1, t_r2, t_r3, t_r4, t_r5, t_r6, t_r7, t_r8;
34cabdff1aSopenharmony_ci    v4i32 t_l1, t_l2, t_l3, t_l4, t_l5, t_l6, t_l7, t_l8;
35cabdff1aSopenharmony_ci    v4i32 cnst_12 = {12, 12, 12, 12};
36cabdff1aSopenharmony_ci    v4i32 cnst_4 = {4, 4, 4, 4};
37cabdff1aSopenharmony_ci    v4i32 cnst_16 = {16, 16, 16, 16};
38cabdff1aSopenharmony_ci    v4i32 cnst_6 = {6, 6, 6, 6};
39cabdff1aSopenharmony_ci    v4i32 cnst_15 = {15, 15, 15, 15};
40cabdff1aSopenharmony_ci    v4i32 cnst_9 = {9, 9, 9, 9};
41cabdff1aSopenharmony_ci    v4i32 cnst_1 = {1, 1, 1, 1};
42cabdff1aSopenharmony_ci    v4i32 cnst_64 = {64, 64, 64, 64};
43cabdff1aSopenharmony_ci
44cabdff1aSopenharmony_ci    LD_SH8(block, 8, in0, in1, in2, in3, in4, in5, in6, in7);
45cabdff1aSopenharmony_ci    UNPCK_SH_SW(in0, in_r0, in_l0);
46cabdff1aSopenharmony_ci    UNPCK_SH_SW(in1, in_r1, in_l1);
47cabdff1aSopenharmony_ci    UNPCK_SH_SW(in2, in_r2, in_l2);
48cabdff1aSopenharmony_ci    UNPCK_SH_SW(in3, in_r3, in_l3);
49cabdff1aSopenharmony_ci    UNPCK_SH_SW(in4, in_r4, in_l4);
50cabdff1aSopenharmony_ci    UNPCK_SH_SW(in5, in_r5, in_l5);
51cabdff1aSopenharmony_ci    UNPCK_SH_SW(in6, in_r6, in_l6);
52cabdff1aSopenharmony_ci    UNPCK_SH_SW(in7, in_r7, in_l7);
53cabdff1aSopenharmony_ci    // First loop
54cabdff1aSopenharmony_ci    t_r1 = cnst_12 * (in_r0 + in_r4) + cnst_4;
55cabdff1aSopenharmony_ci    t_l1 = cnst_12 * (in_l0 + in_l4) + cnst_4;
56cabdff1aSopenharmony_ci    t_r2 = cnst_12 * (in_r0 - in_r4) + cnst_4;
57cabdff1aSopenharmony_ci    t_l2 = cnst_12 * (in_l0 - in_l4) + cnst_4;
58cabdff1aSopenharmony_ci    t_r3 = cnst_16 * in_r2 + cnst_6 * in_r6;
59cabdff1aSopenharmony_ci    t_l3 = cnst_16 * in_l2 + cnst_6 * in_l6;
60cabdff1aSopenharmony_ci    t_r4 = cnst_6 * in_r2 - cnst_16 * in_r6;
61cabdff1aSopenharmony_ci    t_l4 = cnst_6 * in_l2 - cnst_16 * in_l6;
62cabdff1aSopenharmony_ci
63cabdff1aSopenharmony_ci    ADD4(t_r1, t_r3, t_l1, t_l3, t_r2, t_r4, t_l2, t_l4, t_r5, t_l5, t_r6, t_l6);
64cabdff1aSopenharmony_ci    SUB4(t_r2, t_r4, t_l2, t_l4, t_r1, t_r3, t_l1, t_l3, t_r7, t_l7, t_r8, t_l8);
65cabdff1aSopenharmony_ci    t_r1 = cnst_16 * in_r1 + cnst_15 * in_r3 + cnst_9 * in_r5 + cnst_4 * in_r7;
66cabdff1aSopenharmony_ci    t_l1 = cnst_16 * in_l1 + cnst_15 * in_l3 + cnst_9 * in_l5 + cnst_4 * in_l7;
67cabdff1aSopenharmony_ci    t_r2 = cnst_15 * in_r1 - cnst_4 * in_r3 - cnst_16 * in_r5 - cnst_9 * in_r7;
68cabdff1aSopenharmony_ci    t_l2 = cnst_15 * in_l1 - cnst_4 * in_l3 - cnst_16 * in_l5 - cnst_9 * in_l7;
69cabdff1aSopenharmony_ci    t_r3 = cnst_9 * in_r1 - cnst_16 * in_r3 + cnst_4 * in_r5 + cnst_15 * in_r7;
70cabdff1aSopenharmony_ci    t_l3 = cnst_9 * in_l1 - cnst_16 * in_l3 + cnst_4 * in_l5 + cnst_15 * in_l7;
71cabdff1aSopenharmony_ci    t_r4 = cnst_4 * in_r1 - cnst_9 * in_r3 + cnst_15 * in_r5 - cnst_16 * in_r7;
72cabdff1aSopenharmony_ci    t_l4 = cnst_4 * in_l1 - cnst_9 * in_l3 + cnst_15 * in_l5 - cnst_16 * in_l7;
73cabdff1aSopenharmony_ci
74cabdff1aSopenharmony_ci    in_r0 = (t_r5 + t_r1) >> 3;
75cabdff1aSopenharmony_ci    in_l0 = (t_l5 + t_l1) >> 3;
76cabdff1aSopenharmony_ci    in_r1 = (t_r6 + t_r2) >> 3;
77cabdff1aSopenharmony_ci    in_l1 = (t_l6 + t_l2) >> 3;
78cabdff1aSopenharmony_ci    in_r2 = (t_r7 + t_r3) >> 3;
79cabdff1aSopenharmony_ci    in_l2 = (t_l7 + t_l3) >> 3;
80cabdff1aSopenharmony_ci    in_r3 = (t_r8 + t_r4) >> 3;
81cabdff1aSopenharmony_ci    in_l3 = (t_l8 + t_l4) >> 3;
82cabdff1aSopenharmony_ci
83cabdff1aSopenharmony_ci    in_r4 = (t_r8 - t_r4) >> 3;
84cabdff1aSopenharmony_ci    in_l4 = (t_l8 - t_l4) >> 3;
85cabdff1aSopenharmony_ci    in_r5 = (t_r7 - t_r3) >> 3;
86cabdff1aSopenharmony_ci    in_l5 = (t_l7 - t_l3) >> 3;
87cabdff1aSopenharmony_ci    in_r6 = (t_r6 - t_r2) >> 3;
88cabdff1aSopenharmony_ci    in_l6 = (t_l6 - t_l2) >> 3;
89cabdff1aSopenharmony_ci    in_r7 = (t_r5 - t_r1) >> 3;
90cabdff1aSopenharmony_ci    in_l7 = (t_l5 - t_l1) >> 3;
91cabdff1aSopenharmony_ci    TRANSPOSE4x4_SW_SW(in_r0, in_r1, in_r2, in_r3, in_r0, in_r1, in_r2, in_r3);
92cabdff1aSopenharmony_ci    TRANSPOSE4x4_SW_SW(in_l0, in_l1, in_l2, in_l3, in_l0, in_l1, in_l2, in_l3);
93cabdff1aSopenharmony_ci    TRANSPOSE4x4_SW_SW(in_r4, in_r5, in_r6, in_r7, in_r4, in_r5, in_r6, in_r7);
94cabdff1aSopenharmony_ci    TRANSPOSE4x4_SW_SW(in_l4, in_l5, in_l6, in_l7, in_l4, in_l5, in_l6, in_l7);
95cabdff1aSopenharmony_ci    // Second loop
96cabdff1aSopenharmony_ci    t_r1 = cnst_12 * (in_r0 + in_l0) + cnst_64;
97cabdff1aSopenharmony_ci    t_l1 = cnst_12 * (in_r4 + in_l4) + cnst_64;
98cabdff1aSopenharmony_ci    t_r2 = cnst_12 * (in_r0 - in_l0) + cnst_64;
99cabdff1aSopenharmony_ci    t_l2 = cnst_12 * (in_r4 - in_l4) + cnst_64;
100cabdff1aSopenharmony_ci    t_r3 = cnst_16 * in_r2 + cnst_6 * in_l2;
101cabdff1aSopenharmony_ci    t_l3 = cnst_16 * in_r6 + cnst_6 * in_l6;
102cabdff1aSopenharmony_ci    t_r4 = cnst_6 * in_r2 - cnst_16 * in_l2;
103cabdff1aSopenharmony_ci    t_l4 = cnst_6 * in_r6 - cnst_16 * in_l6;
104cabdff1aSopenharmony_ci
105cabdff1aSopenharmony_ci    ADD4(t_r1, t_r3, t_l1, t_l3, t_r2, t_r4, t_l2, t_l4, t_r5, t_l5, t_r6, t_l6);
106cabdff1aSopenharmony_ci    SUB4(t_r2, t_r4, t_l2, t_l4, t_r1, t_r3, t_l1, t_l3, t_r7, t_l7, t_r8, t_l8);
107cabdff1aSopenharmony_ci    t_r1 = cnst_16 * in_r1 + cnst_15 * in_r3 + cnst_9 * in_l1 + cnst_4 * in_l3;
108cabdff1aSopenharmony_ci    t_l1 = cnst_16 * in_r5 + cnst_15 * in_r7 + cnst_9 * in_l5 + cnst_4 * in_l7;
109cabdff1aSopenharmony_ci    t_r2 = cnst_15 * in_r1 - cnst_4 * in_r3 - cnst_16 * in_l1 - cnst_9 * in_l3;
110cabdff1aSopenharmony_ci    t_l2 = cnst_15 * in_r5 - cnst_4 * in_r7 - cnst_16 * in_l5 - cnst_9 * in_l7;
111cabdff1aSopenharmony_ci    t_r3 = cnst_9 * in_r1 - cnst_16 * in_r3 + cnst_4 * in_l1 + cnst_15 * in_l3;
112cabdff1aSopenharmony_ci    t_l3 = cnst_9 * in_r5 - cnst_16 * in_r7 + cnst_4 * in_l5 + cnst_15 * in_l7;
113cabdff1aSopenharmony_ci    t_r4 = cnst_4 * in_r1 - cnst_9 * in_r3 + cnst_15 * in_l1 - cnst_16 * in_l3;
114cabdff1aSopenharmony_ci    t_l4 = cnst_4 * in_r5 - cnst_9 * in_r7 + cnst_15 * in_l5 - cnst_16 * in_l7;
115cabdff1aSopenharmony_ci
116cabdff1aSopenharmony_ci    in_r0 = (t_r5 + t_r1) >> 7;
117cabdff1aSopenharmony_ci    in_l0 = (t_l5 + t_l1) >> 7;
118cabdff1aSopenharmony_ci    in_r1 = (t_r6 + t_r2) >> 7;
119cabdff1aSopenharmony_ci    in_l1 = (t_l6 + t_l2) >> 7;
120cabdff1aSopenharmony_ci    in_r2 = (t_r7 + t_r3) >> 7;
121cabdff1aSopenharmony_ci    in_l2 = (t_l7 + t_l3) >> 7;
122cabdff1aSopenharmony_ci    in_r3 = (t_r8 + t_r4) >> 7;
123cabdff1aSopenharmony_ci    in_l3 = (t_l8 + t_l4) >> 7;
124cabdff1aSopenharmony_ci
125cabdff1aSopenharmony_ci    in_r4 = (t_r8 - t_r4 + cnst_1) >> 7;
126cabdff1aSopenharmony_ci    in_l4 = (t_l8 - t_l4 + cnst_1) >> 7;
127cabdff1aSopenharmony_ci    in_r5 = (t_r7 - t_r3 + cnst_1) >> 7;
128cabdff1aSopenharmony_ci    in_l5 = (t_l7 - t_l3 + cnst_1) >> 7;
129cabdff1aSopenharmony_ci    in_r6 = (t_r6 - t_r2 + cnst_1) >> 7;
130cabdff1aSopenharmony_ci    in_l6 = (t_l6 - t_l2 + cnst_1) >> 7;
131cabdff1aSopenharmony_ci    in_r7 = (t_r5 - t_r1 + cnst_1) >> 7;
132cabdff1aSopenharmony_ci    in_l7 = (t_l5 - t_l1 + cnst_1) >> 7;
133cabdff1aSopenharmony_ci    PCKEV_H4_SH(in_l0, in_r0, in_l1, in_r1, in_l2, in_r2, in_l3, in_r3,
134cabdff1aSopenharmony_ci                in0, in1, in2, in3);
135cabdff1aSopenharmony_ci    PCKEV_H4_SH(in_l4, in_r4, in_l5, in_r5, in_l6, in_r6, in_l7, in_r7,
136cabdff1aSopenharmony_ci                in4, in5, in6, in7);
137cabdff1aSopenharmony_ci    ST_SH8(in0, in1, in2, in3, in4, in5, in6, in7, block, 8);
138cabdff1aSopenharmony_ci}
139cabdff1aSopenharmony_ci
140cabdff1aSopenharmony_civoid ff_vc1_inv_trans_4x8_msa(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
141cabdff1aSopenharmony_ci{
142cabdff1aSopenharmony_ci    v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
143cabdff1aSopenharmony_ci    v4i32 in_r0, in_r1, in_r2, in_r3, in_r4, in_r5, in_r6, in_r7;
144cabdff1aSopenharmony_ci    v4i32 t1, t2, t3, t4, t5, t6, t7, t8;
145cabdff1aSopenharmony_ci    v4i32 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
146cabdff1aSopenharmony_ci    v16i8 zero_m = { 0 };
147cabdff1aSopenharmony_ci    v4i32 cnst_17 = {17, 17, 17, 17};
148cabdff1aSopenharmony_ci    v4i32 cnst_22 = {22, 22, 22, 22};
149cabdff1aSopenharmony_ci    v4i32 cnst_10 = {10, 10, 10, 10};
150cabdff1aSopenharmony_ci    v4i32 cnst_12 = {12, 12, 12, 12};
151cabdff1aSopenharmony_ci    v4i32 cnst_64 = {64, 64, 64, 64};
152cabdff1aSopenharmony_ci    v4i32 cnst_16 = {16, 16, 16, 16};
153cabdff1aSopenharmony_ci    v4i32 cnst_15 = {15, 15, 15, 15};
154cabdff1aSopenharmony_ci    v4i32 cnst_4 = {4, 4, 4, 4};
155cabdff1aSopenharmony_ci    v4i32 cnst_6 = {6, 6, 6, 6};
156cabdff1aSopenharmony_ci    v4i32 cnst_9 = {9, 9, 9, 9};
157cabdff1aSopenharmony_ci    v4i32 cnst_1 = {1, 1, 1, 1};
158cabdff1aSopenharmony_ci
159cabdff1aSopenharmony_ci    LD_SH8(block, 8, in0, in1, in2, in3, in4, in5, in6, in7);
160cabdff1aSopenharmony_ci    UNPCK_R_SH_SW(in0, in_r0);
161cabdff1aSopenharmony_ci    UNPCK_R_SH_SW(in1, in_r1);
162cabdff1aSopenharmony_ci    UNPCK_R_SH_SW(in2, in_r2);
163cabdff1aSopenharmony_ci    UNPCK_R_SH_SW(in3, in_r3);
164cabdff1aSopenharmony_ci    UNPCK_R_SH_SW(in4, in_r4);
165cabdff1aSopenharmony_ci    UNPCK_R_SH_SW(in5, in_r5);
166cabdff1aSopenharmony_ci    UNPCK_R_SH_SW(in6, in_r6);
167cabdff1aSopenharmony_ci    UNPCK_R_SH_SW(in7, in_r7);
168cabdff1aSopenharmony_ci    // First loop
169cabdff1aSopenharmony_ci    TRANSPOSE4x4_SW_SW(in_r0, in_r1, in_r2, in_r3, in_r0, in_r1, in_r2, in_r3);
170cabdff1aSopenharmony_ci    TRANSPOSE4x4_SW_SW(in_r4, in_r5, in_r6, in_r7, in_r4, in_r5, in_r6, in_r7);
171cabdff1aSopenharmony_ci    t1 = cnst_17 * (in_r0 + in_r2) + cnst_4;
172cabdff1aSopenharmony_ci    t5 = cnst_17 * (in_r4 + in_r6) + cnst_4;
173cabdff1aSopenharmony_ci    t2 = cnst_17 * (in_r0 - in_r2) + cnst_4;
174cabdff1aSopenharmony_ci    t6 = cnst_17 * (in_r4 - in_r6) + cnst_4;
175cabdff1aSopenharmony_ci    t3 = cnst_22 * in_r1 + cnst_10 * in_r3;
176cabdff1aSopenharmony_ci    t7 = cnst_22 * in_r5 + cnst_10 * in_r7;
177cabdff1aSopenharmony_ci    t4 = cnst_22 * in_r3 - cnst_10 * in_r1;
178cabdff1aSopenharmony_ci    t8 = cnst_22 * in_r7 - cnst_10 * in_r5;
179cabdff1aSopenharmony_ci
180cabdff1aSopenharmony_ci    in_r0 = (t1 + t3) >> 3;
181cabdff1aSopenharmony_ci    in_r4 = (t5 + t7) >> 3;
182cabdff1aSopenharmony_ci    in_r1 = (t2 - t4) >> 3;
183cabdff1aSopenharmony_ci    in_r5 = (t6 - t8) >> 3;
184cabdff1aSopenharmony_ci    in_r2 = (t2 + t4) >> 3;
185cabdff1aSopenharmony_ci    in_r6 = (t6 + t8) >> 3;
186cabdff1aSopenharmony_ci    in_r3 = (t1 - t3) >> 3;
187cabdff1aSopenharmony_ci    in_r7 = (t5 - t7) >> 3;
188cabdff1aSopenharmony_ci    TRANSPOSE4x4_SW_SW(in_r0, in_r1, in_r2, in_r3, in_r0, in_r1, in_r2, in_r3);
189cabdff1aSopenharmony_ci    TRANSPOSE4x4_SW_SW(in_r4, in_r5, in_r6, in_r7, in_r4, in_r5, in_r6, in_r7);
190cabdff1aSopenharmony_ci    PCKEV_H4_SH(in_r1, in_r0, in_r3, in_r2, in_r5, in_r4, in_r7, in_r6,
191cabdff1aSopenharmony_ci                in0, in1, in2, in3);
192cabdff1aSopenharmony_ci    ST_D8(in0, in1, in2, in3, 0, 1, 0, 1, 0, 1, 0, 1, block, 8);
193cabdff1aSopenharmony_ci    // Second loop
194cabdff1aSopenharmony_ci    t1 = cnst_12 * (in_r0 + in_r4) + cnst_64;
195cabdff1aSopenharmony_ci    t2 = cnst_12 * (in_r0 - in_r4) + cnst_64;
196cabdff1aSopenharmony_ci    t3 = cnst_16 * in_r2 + cnst_6 * in_r6;
197cabdff1aSopenharmony_ci    t4 = cnst_6 * in_r2 - cnst_16 * in_r6;
198cabdff1aSopenharmony_ci    t5 = t1 + t3, t6 = t2 + t4;
199cabdff1aSopenharmony_ci    t7 = t2 - t4, t8 = t1 - t3;
200cabdff1aSopenharmony_ci    t1 = cnst_16 * in_r1 + cnst_15 * in_r3 + cnst_9 * in_r5 + cnst_4 * in_r7;
201cabdff1aSopenharmony_ci    t2 = cnst_15 * in_r1 - cnst_4 * in_r3 - cnst_16 * in_r5 - cnst_9 * in_r7;
202cabdff1aSopenharmony_ci    t3 = cnst_9 * in_r1 - cnst_16 * in_r3 + cnst_4 * in_r5 + cnst_15 * in_r7;
203cabdff1aSopenharmony_ci    t4 = cnst_4 * in_r1 - cnst_9 * in_r3 + cnst_15 * in_r5 - cnst_16 * in_r7;
204cabdff1aSopenharmony_ci    LD_SW8(dest, linesize, dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7);
205cabdff1aSopenharmony_ci    ILVR_B8_SW(zero_m, dst0, zero_m, dst1, zero_m, dst2, zero_m, dst3,
206cabdff1aSopenharmony_ci               zero_m, dst4, zero_m, dst5, zero_m, dst6, zero_m, dst7,
207cabdff1aSopenharmony_ci               dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7);
208cabdff1aSopenharmony_ci    ILVR_H4_SW(zero_m, dst0, zero_m, dst1, zero_m, dst2, zero_m, dst3,
209cabdff1aSopenharmony_ci               dst0, dst1, dst2, dst3);
210cabdff1aSopenharmony_ci    ILVR_H4_SW(zero_m, dst4, zero_m, dst5, zero_m, dst6, zero_m, dst7,
211cabdff1aSopenharmony_ci               dst4, dst5, dst6, dst7);
212cabdff1aSopenharmony_ci    in_r0 = (t5 + t1) >> 7;
213cabdff1aSopenharmony_ci    in_r1 = (t6 + t2) >> 7;
214cabdff1aSopenharmony_ci    in_r2 = (t7 + t3) >> 7;
215cabdff1aSopenharmony_ci    in_r3 = (t8 + t4) >> 7;
216cabdff1aSopenharmony_ci    in_r4 = (t8 - t4 + cnst_1) >> 7;
217cabdff1aSopenharmony_ci    in_r5 = (t7 - t3 + cnst_1) >> 7;
218cabdff1aSopenharmony_ci    in_r6 = (t6 - t2 + cnst_1) >> 7;
219cabdff1aSopenharmony_ci    in_r7 = (t5 - t1 + cnst_1) >> 7;
220cabdff1aSopenharmony_ci    ADD4(in_r0, dst0, in_r1, dst1, in_r2, dst2, in_r3, dst3,
221cabdff1aSopenharmony_ci         in_r0, in_r1, in_r2, in_r3);
222cabdff1aSopenharmony_ci    ADD4(in_r4, dst4, in_r5, dst5, in_r6, dst6, in_r7, dst7,
223cabdff1aSopenharmony_ci         in_r4, in_r5, in_r6, in_r7);
224cabdff1aSopenharmony_ci    CLIP_SW8_0_255(in_r0, in_r1, in_r2, in_r3, in_r4, in_r5, in_r6, in_r7);
225cabdff1aSopenharmony_ci    PCKEV_H4_SH(in_r1, in_r0, in_r3, in_r2, in_r5, in_r4, in_r7, in_r6,
226cabdff1aSopenharmony_ci                in0, in1, in2, in3);
227cabdff1aSopenharmony_ci    PCKEV_B2_SH(in1, in0, in3, in2, in0, in1);
228cabdff1aSopenharmony_ci    ST_W8(in0, in1, 0, 1, 2, 3, 0, 1, 2, 3, dest, linesize);
229cabdff1aSopenharmony_ci}
230cabdff1aSopenharmony_ci
231cabdff1aSopenharmony_civoid ff_vc1_inv_trans_8x4_msa(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
232cabdff1aSopenharmony_ci{
233cabdff1aSopenharmony_ci    v4i32 in0, in1, in2, in3, in4, in5, in6, in7;
234cabdff1aSopenharmony_ci    v4i32 t1, t2, t3, t4, t5, t6, t7, t8;
235cabdff1aSopenharmony_ci    v4i32 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
236cabdff1aSopenharmony_ci    v16i8 zero_m = { 0 };
237cabdff1aSopenharmony_ci    v4i32 cnst_17 = {17, 17, 17, 17};
238cabdff1aSopenharmony_ci    v4i32 cnst_22 = {22, 22, 22, 22};
239cabdff1aSopenharmony_ci    v4i32 cnst_10 = {10, 10, 10, 10};
240cabdff1aSopenharmony_ci    v4i32 cnst_12 = {12, 12, 12, 12};
241cabdff1aSopenharmony_ci    v4i32 cnst_64 = {64, 64, 64, 64};
242cabdff1aSopenharmony_ci    v4i32 cnst_16 = {16, 16, 16, 16};
243cabdff1aSopenharmony_ci    v4i32 cnst_15 = {15, 15, 15, 15};
244cabdff1aSopenharmony_ci    v4i32 cnst_4 = {4, 4, 4, 4};
245cabdff1aSopenharmony_ci    v4i32 cnst_6 = {6, 6, 6, 6};
246cabdff1aSopenharmony_ci    v4i32 cnst_9 = {9, 9, 9, 9};
247cabdff1aSopenharmony_ci
248cabdff1aSopenharmony_ci    LD_SW4(block, 8, t1, t2, t3, t4);
249cabdff1aSopenharmony_ci    UNPCK_SH_SW(t1, in0, in4);
250cabdff1aSopenharmony_ci    UNPCK_SH_SW(t2, in1, in5);
251cabdff1aSopenharmony_ci    UNPCK_SH_SW(t3, in2, in6);
252cabdff1aSopenharmony_ci    UNPCK_SH_SW(t4, in3, in7);
253cabdff1aSopenharmony_ci    TRANSPOSE4x4_SW_SW(in0, in1, in2, in3, in0, in1, in2, in3);
254cabdff1aSopenharmony_ci    TRANSPOSE4x4_SW_SW(in4, in5, in6, in7, in4, in5, in6, in7);
255cabdff1aSopenharmony_ci    // First loop
256cabdff1aSopenharmony_ci    t1 = cnst_12 * (in0 + in4) + cnst_4;
257cabdff1aSopenharmony_ci    t2 = cnst_12 * (in0 - in4) + cnst_4;
258cabdff1aSopenharmony_ci    t3 = cnst_16 * in2 + cnst_6 * in6;
259cabdff1aSopenharmony_ci    t4 = cnst_6 * in2 - cnst_16 * in6;
260cabdff1aSopenharmony_ci    t5 = t1 + t3, t6 = t2 + t4;
261cabdff1aSopenharmony_ci    t7 = t2 - t4, t8 = t1 - t3;
262cabdff1aSopenharmony_ci    t1 = cnst_16 * in1 + cnst_15 * in3 + cnst_9 * in5 + cnst_4 * in7;
263cabdff1aSopenharmony_ci    t2 = cnst_15 * in1 - cnst_4 * in3 - cnst_16 * in5 - cnst_9 * in7;
264cabdff1aSopenharmony_ci    t3 = cnst_9 * in1 - cnst_16 * in3 + cnst_4 * in5 + cnst_15 * in7;
265cabdff1aSopenharmony_ci    t4 = cnst_4 * in1 - cnst_9 * in3 + cnst_15 * in5 - cnst_16 * in7;
266cabdff1aSopenharmony_ci    in0 = (t5 + t1) >> 3;
267cabdff1aSopenharmony_ci    in1 = (t6 + t2) >> 3;
268cabdff1aSopenharmony_ci    in2 = (t7 + t3) >> 3;
269cabdff1aSopenharmony_ci    in3 = (t8 + t4) >> 3;
270cabdff1aSopenharmony_ci    in4 = (t8 - t4) >> 3;
271cabdff1aSopenharmony_ci    in5 = (t7 - t3) >> 3;
272cabdff1aSopenharmony_ci    in6 = (t6 - t2) >> 3;
273cabdff1aSopenharmony_ci    in7 = (t5 - t1) >> 3;
274cabdff1aSopenharmony_ci    TRANSPOSE4x4_SW_SW(in0, in1, in2, in3, in0, in1, in2, in3);
275cabdff1aSopenharmony_ci    TRANSPOSE4x4_SW_SW(in4, in5, in6, in7, in4, in5, in6, in7);
276cabdff1aSopenharmony_ci    PCKEV_H4_SW(in4, in0, in5, in1, in6, in2, in7, in3, t1, t2, t3, t4);
277cabdff1aSopenharmony_ci    ST_SW4(t1, t2, t3, t4, block, 8);
278cabdff1aSopenharmony_ci    // Second loop
279cabdff1aSopenharmony_ci    LD_SW4(dest, linesize, dst0, dst1, dst2, dst3);
280cabdff1aSopenharmony_ci    ILVR_B4_SW(zero_m, dst0, zero_m, dst1, zero_m, dst2, zero_m, dst3,
281cabdff1aSopenharmony_ci               dst0, dst1, dst2, dst3);
282cabdff1aSopenharmony_ci    ILVL_H4_SW(zero_m, dst0, zero_m, dst1, zero_m, dst2, zero_m, dst3,
283cabdff1aSopenharmony_ci               dst4, dst5, dst6, dst7);
284cabdff1aSopenharmony_ci    ILVR_H4_SW(zero_m, dst0, zero_m, dst1, zero_m, dst2, zero_m, dst3,
285cabdff1aSopenharmony_ci               dst0, dst1, dst2, dst3);
286cabdff1aSopenharmony_ci    // Right part
287cabdff1aSopenharmony_ci    t1 = cnst_17 * (in0 + in2) + cnst_64;
288cabdff1aSopenharmony_ci    t2 = cnst_17 * (in0 - in2) + cnst_64;
289cabdff1aSopenharmony_ci    t3 = cnst_22 * in1 + cnst_10 * in3;
290cabdff1aSopenharmony_ci    t4 = cnst_22 * in3 - cnst_10 * in1;
291cabdff1aSopenharmony_ci    in0 = (t1 + t3) >> 7;
292cabdff1aSopenharmony_ci    in1 = (t2 - t4) >> 7;
293cabdff1aSopenharmony_ci    in2 = (t2 + t4) >> 7;
294cabdff1aSopenharmony_ci    in3 = (t1 - t3) >> 7;
295cabdff1aSopenharmony_ci    ADD4(in0, dst0, in1, dst1, in2, dst2, in3, dst3, in0, in1, in2, in3);
296cabdff1aSopenharmony_ci    CLIP_SW4_0_255(in0, in1, in2, in3);
297cabdff1aSopenharmony_ci    // Left part
298cabdff1aSopenharmony_ci    t5 = cnst_17 * (in4 + in6) + cnst_64;
299cabdff1aSopenharmony_ci    t6 = cnst_17 * (in4 - in6) + cnst_64;
300cabdff1aSopenharmony_ci    t7 = cnst_22 * in5 + cnst_10 * in7;
301cabdff1aSopenharmony_ci    t8 = cnst_22 * in7 - cnst_10 * in5;
302cabdff1aSopenharmony_ci    in4 = (t5 + t7) >> 7;
303cabdff1aSopenharmony_ci    in5 = (t6 - t8) >> 7;
304cabdff1aSopenharmony_ci    in6 = (t6 + t8) >> 7;
305cabdff1aSopenharmony_ci    in7 = (t5 - t7) >> 7;
306cabdff1aSopenharmony_ci    ADD4(in4, dst4, in5, dst5, in6, dst6, in7, dst7, in4, in5, in6, in7);
307cabdff1aSopenharmony_ci    CLIP_SW4_0_255(in4, in5, in6, in7);
308cabdff1aSopenharmony_ci    PCKEV_H4_SW(in4, in0, in5, in1, in6, in2, in7, in3, in0, in1, in2, in3);
309cabdff1aSopenharmony_ci    PCKEV_B2_SW(in1, in0, in3, in2, in0, in1);
310cabdff1aSopenharmony_ci    ST_D4(in0, in1, 0, 1, 0, 1, dest, linesize);
311cabdff1aSopenharmony_ci}
312cabdff1aSopenharmony_ci
313cabdff1aSopenharmony_cistatic void put_vc1_mspel_mc_h_v_msa(uint8_t *dst, const uint8_t *src,
314cabdff1aSopenharmony_ci                                     ptrdiff_t stride, int hmode, int vmode,
315cabdff1aSopenharmony_ci                                     int rnd)
316cabdff1aSopenharmony_ci{
317cabdff1aSopenharmony_ci    v8i16 in_r0, in_r1, in_r2, in_r3, in_l0, in_l1, in_l2, in_l3;
318cabdff1aSopenharmony_ci    v8i16 t0, t1, t2, t3, t4, t5, t6, t7;
319cabdff1aSopenharmony_ci    v8i16 t8, t9, t10, t11, t12, t13, t14, t15;
320cabdff1aSopenharmony_ci    v8i16 cnst_para0, cnst_para1, cnst_para2, cnst_para3, cnst_r;
321cabdff1aSopenharmony_ci    static const int para_value[][4] = {{4, 53, 18, 3},
322cabdff1aSopenharmony_ci                                        {1, 9, 9, 1},
323cabdff1aSopenharmony_ci                                        {3, 18, 53, 4}};
324cabdff1aSopenharmony_ci    static const int shift_value[] = {0, 5, 1, 5};
325cabdff1aSopenharmony_ci    int shift = (shift_value[hmode] + shift_value[vmode]) >> 1;
326cabdff1aSopenharmony_ci    int r = (1 << (shift - 1)) + rnd - 1;
327cabdff1aSopenharmony_ci    cnst_r = __msa_fill_h(r);
328cabdff1aSopenharmony_ci    src -= 1, src -= stride;
329cabdff1aSopenharmony_ci    cnst_para0 = __msa_fill_h(para_value[vmode - 1][0]);
330cabdff1aSopenharmony_ci    cnst_para1 = __msa_fill_h(para_value[vmode - 1][1]);
331cabdff1aSopenharmony_ci    cnst_para2 = __msa_fill_h(para_value[vmode - 1][2]);
332cabdff1aSopenharmony_ci    cnst_para3 = __msa_fill_h(para_value[vmode - 1][3]);
333cabdff1aSopenharmony_ci    LD_SH4(src, stride, in_l0, in_l1, in_l2, in_l3);
334cabdff1aSopenharmony_ci    UNPCK_UB_SH(in_l0, in_r0, in_l0);
335cabdff1aSopenharmony_ci    UNPCK_UB_SH(in_l1, in_r1, in_l1);
336cabdff1aSopenharmony_ci    UNPCK_UB_SH(in_l2, in_r2, in_l2);
337cabdff1aSopenharmony_ci    UNPCK_UB_SH(in_l3, in_r3, in_l3);
338cabdff1aSopenharmony_ci    // row 0
339cabdff1aSopenharmony_ci    t0 = cnst_para1 * in_r1 + cnst_para2 * in_r2
340cabdff1aSopenharmony_ci         - cnst_para0 * in_r0 - cnst_para3 * in_r3;
341cabdff1aSopenharmony_ci    t8 = cnst_para1 * in_l1 + cnst_para2 * in_l2
342cabdff1aSopenharmony_ci         - cnst_para0 * in_l0 - cnst_para3 * in_l3;
343cabdff1aSopenharmony_ci    in_l0 = LD_SH(src + 4 * stride);
344cabdff1aSopenharmony_ci    UNPCK_UB_SH(in_l0, in_r0, in_l0);
345cabdff1aSopenharmony_ci    // row 1
346cabdff1aSopenharmony_ci    t1 = cnst_para1 * in_r2 + cnst_para2 * in_r3
347cabdff1aSopenharmony_ci         - cnst_para0 * in_r1 - cnst_para3 * in_r0;
348cabdff1aSopenharmony_ci    t9 = cnst_para1 * in_l2 + cnst_para2 * in_l3
349cabdff1aSopenharmony_ci         - cnst_para0 * in_l1 - cnst_para3 * in_l0;
350cabdff1aSopenharmony_ci    in_l1 = LD_SH(src + 5 * stride);
351cabdff1aSopenharmony_ci    UNPCK_UB_SH(in_l1, in_r1, in_l1);
352cabdff1aSopenharmony_ci    // row 2
353cabdff1aSopenharmony_ci    t2 = cnst_para1 * in_r3 + cnst_para2 * in_r0
354cabdff1aSopenharmony_ci         - cnst_para0 * in_r2 - cnst_para3 * in_r1;
355cabdff1aSopenharmony_ci    t10 = cnst_para1 * in_l3 + cnst_para2 * in_l0
356cabdff1aSopenharmony_ci          - cnst_para0 * in_l2 - cnst_para3 * in_l1;
357cabdff1aSopenharmony_ci    in_l2 = LD_SH(src + 6 * stride);
358cabdff1aSopenharmony_ci    UNPCK_UB_SH(in_l2, in_r2, in_l2);
359cabdff1aSopenharmony_ci    // row 3
360cabdff1aSopenharmony_ci    t3 = cnst_para1 * in_r0 + cnst_para2 * in_r1
361cabdff1aSopenharmony_ci         - cnst_para0 * in_r3 - cnst_para3 * in_r2;
362cabdff1aSopenharmony_ci    t11 = cnst_para1 * in_l0 + cnst_para2 * in_l1
363cabdff1aSopenharmony_ci          - cnst_para0 * in_l3 - cnst_para3 * in_l2;
364cabdff1aSopenharmony_ci    in_l3 = LD_SH(src + 7 * stride);
365cabdff1aSopenharmony_ci    UNPCK_UB_SH(in_l3, in_r3, in_l3);
366cabdff1aSopenharmony_ci    // row 4
367cabdff1aSopenharmony_ci    t4 = cnst_para1 * in_r1 + cnst_para2 * in_r2
368cabdff1aSopenharmony_ci         - cnst_para0 * in_r0 - cnst_para3 * in_r3;
369cabdff1aSopenharmony_ci    t12 = cnst_para1 * in_l1 + cnst_para2 * in_l2
370cabdff1aSopenharmony_ci          - cnst_para0 * in_l0 - cnst_para3 * in_l3;
371cabdff1aSopenharmony_ci    in_l0 = LD_SH(src + 8 * stride);
372cabdff1aSopenharmony_ci    UNPCK_UB_SH(in_l0, in_r0, in_l0);
373cabdff1aSopenharmony_ci    // row 5
374cabdff1aSopenharmony_ci    t5 = cnst_para1 * in_r2 + cnst_para2 * in_r3
375cabdff1aSopenharmony_ci         - cnst_para0 * in_r1 - cnst_para3 * in_r0;
376cabdff1aSopenharmony_ci    t13 = cnst_para1 * in_l2 + cnst_para2 * in_l3
377cabdff1aSopenharmony_ci          - cnst_para0 * in_l1 - cnst_para3 * in_l0;
378cabdff1aSopenharmony_ci    in_l1 = LD_SH(src + 9 * stride);
379cabdff1aSopenharmony_ci    UNPCK_UB_SH(in_l1, in_r1, in_l1);
380cabdff1aSopenharmony_ci    // row 6
381cabdff1aSopenharmony_ci    t6 = cnst_para1 * in_r3 + cnst_para2 * in_r0
382cabdff1aSopenharmony_ci         - cnst_para0 * in_r2 - cnst_para3 * in_r1;
383cabdff1aSopenharmony_ci    t14 = cnst_para1 * in_l3 + cnst_para2 * in_l0
384cabdff1aSopenharmony_ci          - cnst_para0 * in_l2 - cnst_para3 * in_l1;
385cabdff1aSopenharmony_ci    in_l2 = LD_SH(src + 10 * stride);
386cabdff1aSopenharmony_ci    UNPCK_UB_SH(in_l2, in_r2, in_l2);
387cabdff1aSopenharmony_ci    // row 7
388cabdff1aSopenharmony_ci    t7 = cnst_para1 * in_r0 + cnst_para2 * in_r1
389cabdff1aSopenharmony_ci         - cnst_para0 * in_r3 - cnst_para3 * in_r2;
390cabdff1aSopenharmony_ci    t15 = cnst_para1 * in_l0 + cnst_para2 * in_l1
391cabdff1aSopenharmony_ci          - cnst_para0 * in_l3 - cnst_para3 * in_l2;
392cabdff1aSopenharmony_ci
393cabdff1aSopenharmony_ci    ADD4(t0, cnst_r, t1, cnst_r, t2, cnst_r, t3, cnst_r, t0, t1, t2, t3);
394cabdff1aSopenharmony_ci    ADD4(t4, cnst_r, t5, cnst_r, t6, cnst_r, t7, cnst_r, t4, t5, t6, t7);
395cabdff1aSopenharmony_ci    ADD4(t8, cnst_r, t9, cnst_r, t10, cnst_r, t11, cnst_r,
396cabdff1aSopenharmony_ci         t8, t9, t10, t11);
397cabdff1aSopenharmony_ci    ADD4(t12, cnst_r, t13, cnst_r, t14, cnst_r, t15, cnst_r,
398cabdff1aSopenharmony_ci         t12, t13, t14, t15);
399cabdff1aSopenharmony_ci    t0 >>= shift, t1 >>= shift, t2 >>= shift, t3 >>= shift;
400cabdff1aSopenharmony_ci    t4 >>= shift, t5 >>= shift, t6 >>= shift, t7 >>= shift;
401cabdff1aSopenharmony_ci    t8 >>= shift, t9 >>= shift, t10 >>= shift, t11 >>= shift;
402cabdff1aSopenharmony_ci    t12 >>= shift, t13 >>= shift, t14 >>= shift, t15 >>= shift;
403cabdff1aSopenharmony_ci    TRANSPOSE8x8_SH_SH(t0, t1, t2, t3, t4, t5, t6, t7,
404cabdff1aSopenharmony_ci                       t0, t1, t2, t3, t4, t5, t6, t7);
405cabdff1aSopenharmony_ci    TRANSPOSE8x8_SH_SH(t8, t9, t10, t11, t12, t13, t14, t15,
406cabdff1aSopenharmony_ci                       t8, t9, t10, t11, t12, t13, t14, t15);
407cabdff1aSopenharmony_ci    cnst_para0 = __msa_fill_h(para_value[hmode - 1][0]);
408cabdff1aSopenharmony_ci    cnst_para1 = __msa_fill_h(para_value[hmode - 1][1]);
409cabdff1aSopenharmony_ci    cnst_para2 = __msa_fill_h(para_value[hmode - 1][2]);
410cabdff1aSopenharmony_ci    cnst_para3 = __msa_fill_h(para_value[hmode - 1][3]);
411cabdff1aSopenharmony_ci    r = 64 - rnd;
412cabdff1aSopenharmony_ci    cnst_r = __msa_fill_h(r);
413cabdff1aSopenharmony_ci    // col 0 ~ 7
414cabdff1aSopenharmony_ci    t0 = cnst_para1 * t1 + cnst_para2 * t2 - cnst_para0 * t0 - cnst_para3 * t3;
415cabdff1aSopenharmony_ci    t1 = cnst_para1 * t2 + cnst_para2 * t3 - cnst_para0 * t1 - cnst_para3 * t4;
416cabdff1aSopenharmony_ci    t2 = cnst_para1 * t3 + cnst_para2 * t4 - cnst_para0 * t2 - cnst_para3 * t5;
417cabdff1aSopenharmony_ci    t3 = cnst_para1 * t4 + cnst_para2 * t5 - cnst_para0 * t3 - cnst_para3 * t6;
418cabdff1aSopenharmony_ci    t4 = cnst_para1 * t5 + cnst_para2 * t6 - cnst_para0 * t4 - cnst_para3 * t7;
419cabdff1aSopenharmony_ci    t5 = cnst_para1 * t6 + cnst_para2 * t7 - cnst_para0 * t5 - cnst_para3 * t8;
420cabdff1aSopenharmony_ci    t6 = cnst_para1 * t7 + cnst_para2 * t8 - cnst_para0 * t6 - cnst_para3 * t9;
421cabdff1aSopenharmony_ci    t7 = cnst_para1 * t8 + cnst_para2 * t9 - cnst_para0 * t7 - cnst_para3 * t10;
422cabdff1aSopenharmony_ci    ADD4(t0, cnst_r, t1, cnst_r, t2, cnst_r, t3, cnst_r, t0, t1, t2, t3);
423cabdff1aSopenharmony_ci    ADD4(t4, cnst_r, t5, cnst_r, t6, cnst_r, t7, cnst_r, t4, t5, t6, t7);
424cabdff1aSopenharmony_ci    t0 >>= 7, t1 >>= 7, t2 >>= 7, t3 >>= 7;
425cabdff1aSopenharmony_ci    t4 >>= 7, t5 >>= 7, t6 >>= 7, t7 >>= 7;
426cabdff1aSopenharmony_ci    TRANSPOSE8x8_SH_SH(t0, t1, t2, t3, t4, t5, t6, t7,
427cabdff1aSopenharmony_ci                       t0, t1, t2, t3, t4, t5, t6, t7);
428cabdff1aSopenharmony_ci    CLIP_SH8_0_255(t0, t1, t2, t3, t4, t5, t6, t7);
429cabdff1aSopenharmony_ci    PCKEV_B4_SH(t1, t0, t3, t2, t5, t4, t7, t6, t0, t1, t2, t3);
430cabdff1aSopenharmony_ci    ST_D8(t0, t1, t2, t3, 0, 1, 0, 1, 0, 1, 0, 1, dst, stride);
431cabdff1aSopenharmony_ci}
432cabdff1aSopenharmony_ci
433cabdff1aSopenharmony_ci#define PUT_VC1_MSPEL_MC_MSA(hmode, vmode)                                    \
434cabdff1aSopenharmony_civoid ff_put_vc1_mspel_mc ## hmode ## vmode ## _msa(uint8_t *dst,              \
435cabdff1aSopenharmony_ci                                                const uint8_t *src,           \
436cabdff1aSopenharmony_ci                                                ptrdiff_t stride, int rnd)    \
437cabdff1aSopenharmony_ci{                                                                             \
438cabdff1aSopenharmony_ci    put_vc1_mspel_mc_h_v_msa(dst, src, stride, hmode, vmode, rnd);            \
439cabdff1aSopenharmony_ci}                                                                             \
440cabdff1aSopenharmony_civoid ff_put_vc1_mspel_mc ## hmode ## vmode ## _16_msa(uint8_t *dst,           \
441cabdff1aSopenharmony_ci                                                   const uint8_t *src,        \
442cabdff1aSopenharmony_ci                                                   ptrdiff_t stride, int rnd) \
443cabdff1aSopenharmony_ci{                                                                             \
444cabdff1aSopenharmony_ci    put_vc1_mspel_mc_h_v_msa(dst, src, stride, hmode, vmode, rnd);            \
445cabdff1aSopenharmony_ci    put_vc1_mspel_mc_h_v_msa(dst + 8, src + 8, stride, hmode, vmode, rnd);    \
446cabdff1aSopenharmony_ci    dst += 8 * stride, src += 8 * stride;                                     \
447cabdff1aSopenharmony_ci    put_vc1_mspel_mc_h_v_msa(dst, src, stride, hmode, vmode, rnd);            \
448cabdff1aSopenharmony_ci    put_vc1_mspel_mc_h_v_msa(dst + 8, src + 8, stride, hmode, vmode, rnd);    \
449cabdff1aSopenharmony_ci}
450cabdff1aSopenharmony_ci
451cabdff1aSopenharmony_ciPUT_VC1_MSPEL_MC_MSA(1, 1);
452cabdff1aSopenharmony_ciPUT_VC1_MSPEL_MC_MSA(1, 2);
453cabdff1aSopenharmony_ciPUT_VC1_MSPEL_MC_MSA(1, 3);
454cabdff1aSopenharmony_ci
455cabdff1aSopenharmony_ciPUT_VC1_MSPEL_MC_MSA(2, 1);
456cabdff1aSopenharmony_ciPUT_VC1_MSPEL_MC_MSA(2, 2);
457cabdff1aSopenharmony_ciPUT_VC1_MSPEL_MC_MSA(2, 3);
458cabdff1aSopenharmony_ci
459cabdff1aSopenharmony_ciPUT_VC1_MSPEL_MC_MSA(3, 1);
460cabdff1aSopenharmony_ciPUT_VC1_MSPEL_MC_MSA(3, 2);
461cabdff1aSopenharmony_ciPUT_VC1_MSPEL_MC_MSA(3, 3);
462