1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * VC-1 and WMV3 - DSP functions Loongson MMI-optimized
3cabdff1aSopenharmony_ci *
4cabdff1aSopenharmony_ci * Copyright (c) 2016 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
5cabdff1aSopenharmony_ci *
6cabdff1aSopenharmony_ci * This file is part of FFmpeg.
7cabdff1aSopenharmony_ci *
8cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
9cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
10cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
11cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
12cabdff1aSopenharmony_ci *
13cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
14cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
15cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16cabdff1aSopenharmony_ci * Lesser General Public License for more details.
17cabdff1aSopenharmony_ci *
18cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
19cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
20cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21cabdff1aSopenharmony_ci */
22cabdff1aSopenharmony_ci
23cabdff1aSopenharmony_ci#include "libavutil/attributes.h"
24cabdff1aSopenharmony_ci#include "libavutil/avassert.h"
25cabdff1aSopenharmony_ci#include "libavutil/mem_internal.h"
26cabdff1aSopenharmony_ci
27cabdff1aSopenharmony_ci#include "libavcodec/vc1dsp.h"
28cabdff1aSopenharmony_ci#include "constants.h"
29cabdff1aSopenharmony_ci#include "vc1dsp_mips.h"
30cabdff1aSopenharmony_ci#include "hpeldsp_mips.h"
31cabdff1aSopenharmony_ci#include "libavutil/mips/mmiutils.h"
32cabdff1aSopenharmony_ci
33cabdff1aSopenharmony_ci#define VC1_INV_TRANCS_8_TYPE1(o1, o2, r1, r2, r3, r4, c0)                  \
34cabdff1aSopenharmony_ci        "li         %[tmp0],    "#r1"                                 \n\t" \
35cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp13]                             \n\t" \
36cabdff1aSopenharmony_ci        "punpcklwd  %[ftmp13],  %[ftmp13],  %[ftmp13]                 \n\t" \
37cabdff1aSopenharmony_ci        "li         %[tmp0],    "#r2"                                 \n\t" \
38cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp14]                             \n\t" \
39cabdff1aSopenharmony_ci        "punpcklwd  %[ftmp14],  %[ftmp14],  %[ftmp14]                 \n\t" \
40cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp1],   %[ftmp5],   %[ftmp13]                 \n\t" \
41cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp2],   %[ftmp7],   %[ftmp14]                 \n\t" \
42cabdff1aSopenharmony_ci        "paddw      %[ftmp1],   %[ftmp1],   %[ftmp2]                  \n\t" \
43cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp2],   %[ftmp6],   %[ftmp13]                 \n\t" \
44cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp3],   %[ftmp8],   %[ftmp14]                 \n\t" \
45cabdff1aSopenharmony_ci        "paddw      %[ftmp2],   %[ftmp2],   %[ftmp3]                  \n\t" \
46cabdff1aSopenharmony_ci                                                                            \
47cabdff1aSopenharmony_ci        "li         %[tmp0],    "#r3"                                 \n\t" \
48cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp13]                             \n\t" \
49cabdff1aSopenharmony_ci        "punpcklwd  %[ftmp13],  %[ftmp13],  %[ftmp13]                 \n\t" \
50cabdff1aSopenharmony_ci        "li         %[tmp0],    "#r4"                                 \n\t" \
51cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp14]                             \n\t" \
52cabdff1aSopenharmony_ci        "punpcklwd  %[ftmp14],  %[ftmp14],  %[ftmp14]                 \n\t" \
53cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp3],   %[ftmp9],   %[ftmp13]                 \n\t" \
54cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp4],   %[ftmp11],  %[ftmp14]                 \n\t" \
55cabdff1aSopenharmony_ci        "paddw      %[ftmp3],   %[ftmp3],   %[ftmp4]                  \n\t" \
56cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp4],   %[ftmp10],  %[ftmp13]                 \n\t" \
57cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp13],  %[ftmp12],  %[ftmp14]                 \n\t" \
58cabdff1aSopenharmony_ci        "paddw      %[ftmp4],   %[ftmp4],   %[ftmp13]                 \n\t" \
59cabdff1aSopenharmony_ci                                                                            \
60cabdff1aSopenharmony_ci        "paddw      %[ftmp1],   %[ftmp1],   "#c0"                     \n\t" \
61cabdff1aSopenharmony_ci        "paddw      %[ftmp2],   %[ftmp2],   "#c0"                     \n\t" \
62cabdff1aSopenharmony_ci        "paddw      %[ftmp13],  %[ftmp1],   %[ftmp3]                  \n\t" \
63cabdff1aSopenharmony_ci        "psubw      %[ftmp14],  %[ftmp1],   %[ftmp3]                  \n\t" \
64cabdff1aSopenharmony_ci        "paddw      %[ftmp1],   %[ftmp2],   %[ftmp4]                  \n\t" \
65cabdff1aSopenharmony_ci        "psubw      %[ftmp3],   %[ftmp2],   %[ftmp4]                  \n\t" \
66cabdff1aSopenharmony_ci        "psraw      %[ftmp13],  %[ftmp13],  %[ftmp0]                  \n\t" \
67cabdff1aSopenharmony_ci        "psraw      %[ftmp1],   %[ftmp1],   %[ftmp0]                  \n\t" \
68cabdff1aSopenharmony_ci        "psraw      %[ftmp14],  %[ftmp14],  %[ftmp0]                  \n\t" \
69cabdff1aSopenharmony_ci        "psraw      %[ftmp3],   %[ftmp3],   %[ftmp0]                  \n\t" \
70cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp2],   %[ftmp13],  %[ftmp1]                  \n\t" \
71cabdff1aSopenharmony_ci        "punpckhhw  %[ftmp4],   %[ftmp13],  %[ftmp1]                  \n\t" \
72cabdff1aSopenharmony_ci        "punpcklhw  "#o1",      %[ftmp2],   %[ftmp4]                  \n\t" \
73cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp2],   %[ftmp14],  %[ftmp3]                  \n\t" \
74cabdff1aSopenharmony_ci        "punpckhhw  %[ftmp4],   %[ftmp14],  %[ftmp3]                  \n\t" \
75cabdff1aSopenharmony_ci        "punpcklhw  "#o2",      %[ftmp2],   %[ftmp4]                  \n\t"
76cabdff1aSopenharmony_ci
77cabdff1aSopenharmony_ci#define VC1_INV_TRANCS_8_TYPE2(o1, o2, r1, r2, r3, r4, c0, c1)              \
78cabdff1aSopenharmony_ci        "li         %[tmp0],    "#r1"                                 \n\t" \
79cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp13]                             \n\t" \
80cabdff1aSopenharmony_ci        "punpcklwd  %[ftmp13],  %[ftmp13],  %[ftmp13]                 \n\t" \
81cabdff1aSopenharmony_ci        "li         %[tmp0],    "#r2"                                 \n\t" \
82cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp14]                             \n\t" \
83cabdff1aSopenharmony_ci        "punpcklwd  %[ftmp14],  %[ftmp14],  %[ftmp14]                 \n\t" \
84cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp1],   %[ftmp5],   %[ftmp13]                 \n\t" \
85cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp2],   %[ftmp7],   %[ftmp14]                 \n\t" \
86cabdff1aSopenharmony_ci        "paddw      %[ftmp1],   %[ftmp1],   %[ftmp2]                  \n\t" \
87cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp2],   %[ftmp6],   %[ftmp13]                 \n\t" \
88cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp3],   %[ftmp8],   %[ftmp14]                 \n\t" \
89cabdff1aSopenharmony_ci        "paddw      %[ftmp2],   %[ftmp2],   %[ftmp3]                  \n\t" \
90cabdff1aSopenharmony_ci                                                                            \
91cabdff1aSopenharmony_ci        "li         %[tmp0],    "#r3"                                 \n\t" \
92cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp13]                             \n\t" \
93cabdff1aSopenharmony_ci        "punpcklwd  %[ftmp13],  %[ftmp13],  %[ftmp13]                 \n\t" \
94cabdff1aSopenharmony_ci        "li         %[tmp0],    "#r4"                                 \n\t" \
95cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp14]                             \n\t" \
96cabdff1aSopenharmony_ci        "punpcklwd  %[ftmp14],  %[ftmp14],  %[ftmp14]                 \n\t" \
97cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp3],   %[ftmp9],   %[ftmp13]                 \n\t" \
98cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp4],   %[ftmp11],  %[ftmp14]                 \n\t" \
99cabdff1aSopenharmony_ci        "paddw      %[ftmp3],   %[ftmp3],   %[ftmp4]                  \n\t" \
100cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp4],   %[ftmp10],  %[ftmp13]                 \n\t" \
101cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp13],  %[ftmp12],  %[ftmp14]                 \n\t" \
102cabdff1aSopenharmony_ci        "paddw      %[ftmp4],   %[ftmp4],   %[ftmp13]                 \n\t" \
103cabdff1aSopenharmony_ci                                                                            \
104cabdff1aSopenharmony_ci        "paddw      %[ftmp13],  %[ftmp1],   %[ftmp3]                  \n\t" \
105cabdff1aSopenharmony_ci        "psubw      %[ftmp14],  %[ftmp1],   %[ftmp3]                  \n\t" \
106cabdff1aSopenharmony_ci        "paddw      %[ftmp14],  %[ftmp14],  "#c1"                     \n\t" \
107cabdff1aSopenharmony_ci        "paddw      %[ftmp1],   %[ftmp2],   %[ftmp4]                  \n\t" \
108cabdff1aSopenharmony_ci        "psubw      %[ftmp3],   %[ftmp2],   %[ftmp4]                  \n\t" \
109cabdff1aSopenharmony_ci        "paddw      %[ftmp3],   %[ftmp3],   "#c1"                     \n\t" \
110cabdff1aSopenharmony_ci        "paddw      %[ftmp13],  %[ftmp13],  "#c0"                     \n\t" \
111cabdff1aSopenharmony_ci        "paddw      %[ftmp14],  %[ftmp14],  "#c0"                     \n\t" \
112cabdff1aSopenharmony_ci        "paddw      %[ftmp1],   %[ftmp1],   "#c0"                     \n\t" \
113cabdff1aSopenharmony_ci        "paddw      %[ftmp3],   %[ftmp3],   "#c0"                     \n\t" \
114cabdff1aSopenharmony_ci        "psraw      %[ftmp13],  %[ftmp13],  %[ftmp0]                  \n\t" \
115cabdff1aSopenharmony_ci        "psraw      %[ftmp1],   %[ftmp1],   %[ftmp0]                  \n\t" \
116cabdff1aSopenharmony_ci        "psraw      %[ftmp14],  %[ftmp14],  %[ftmp0]                  \n\t" \
117cabdff1aSopenharmony_ci        "psraw      %[ftmp3],   %[ftmp3],   %[ftmp0]                  \n\t" \
118cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp2],   %[ftmp13],  %[ftmp1]                  \n\t" \
119cabdff1aSopenharmony_ci        "punpckhhw  %[ftmp4],   %[ftmp13],  %[ftmp1]                  \n\t" \
120cabdff1aSopenharmony_ci        "punpcklhw  "#o1",      %[ftmp2],   %[ftmp4]                  \n\t" \
121cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp2],   %[ftmp14],  %[ftmp3]                  \n\t" \
122cabdff1aSopenharmony_ci        "punpckhhw  %[ftmp4],   %[ftmp14],  %[ftmp3]                  \n\t" \
123cabdff1aSopenharmony_ci        "punpcklhw  "#o2",      %[ftmp2],   %[ftmp4]                  \n\t"
124cabdff1aSopenharmony_ci
125cabdff1aSopenharmony_ci/* Do inverse transform on 8x8 block */
126cabdff1aSopenharmony_civoid ff_vc1_inv_trans_8x8_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
127cabdff1aSopenharmony_ci{
128cabdff1aSopenharmony_ci    int dc = block[0];
129cabdff1aSopenharmony_ci    double ftmp[9];
130cabdff1aSopenharmony_ci    mips_reg addr[1];
131cabdff1aSopenharmony_ci    int count;
132cabdff1aSopenharmony_ci    union mmi_intfloat64 dc_u;
133cabdff1aSopenharmony_ci
134cabdff1aSopenharmony_ci    dc = (3 * dc +  1) >> 1;
135cabdff1aSopenharmony_ci    dc = (3 * dc + 16) >> 5;
136cabdff1aSopenharmony_ci    dc_u.i = dc;
137cabdff1aSopenharmony_ci
138cabdff1aSopenharmony_ci    __asm__ volatile(
139cabdff1aSopenharmony_ci        "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
140cabdff1aSopenharmony_ci        "pshufh     %[dc],      %[dc],          %[ftmp0]                \n\t"
141cabdff1aSopenharmony_ci        "li         %[count],   0x02                                    \n\t"
142cabdff1aSopenharmony_ci
143cabdff1aSopenharmony_ci        "1:                                                             \n\t"
144cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp1], %[dest], 0x00)
145cabdff1aSopenharmony_ci        PTR_ADDU   "%[addr0],   %[dest],        %[linesize]             \n\t"
146cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp2], %[addr0], 0x00)
147cabdff1aSopenharmony_ci        PTR_ADDU   "%[addr0],   %[addr0],       %[linesize]             \n\t"
148cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp3], %[addr0], 0x00)
149cabdff1aSopenharmony_ci        PTR_ADDU   "%[addr0],   %[addr0],       %[linesize]             \n\t"
150cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp4], %[addr0], 0x00)
151cabdff1aSopenharmony_ci
152cabdff1aSopenharmony_ci        "punpckhbh  %[ftmp5],   %[ftmp1],       %[ftmp0]                \n\t"
153cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
154cabdff1aSopenharmony_ci        "punpckhbh  %[ftmp6],   %[ftmp2],       %[ftmp0]                \n\t"
155cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t"
156cabdff1aSopenharmony_ci        "punpckhbh  %[ftmp7],   %[ftmp3],       %[ftmp0]                \n\t"
157cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
158cabdff1aSopenharmony_ci        "punpckhbh  %[ftmp8],   %[ftmp4],       %[ftmp0]                \n\t"
159cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t"
160cabdff1aSopenharmony_ci
161cabdff1aSopenharmony_ci        "paddsh     %[ftmp1],   %[ftmp1],       %[dc]                   \n\t"
162cabdff1aSopenharmony_ci        "paddsh     %[ftmp2],   %[ftmp2],       %[dc]                   \n\t"
163cabdff1aSopenharmony_ci        "paddsh     %[ftmp3],   %[ftmp3],       %[dc]                   \n\t"
164cabdff1aSopenharmony_ci        "paddsh     %[ftmp4],   %[ftmp4],       %[dc]                   \n\t"
165cabdff1aSopenharmony_ci        "paddsh     %[ftmp5],   %[ftmp5],       %[dc]                   \n\t"
166cabdff1aSopenharmony_ci        "paddsh     %[ftmp6],   %[ftmp6],       %[dc]                   \n\t"
167cabdff1aSopenharmony_ci        "paddsh     %[ftmp7],   %[ftmp7],       %[dc]                   \n\t"
168cabdff1aSopenharmony_ci        "paddsh     %[ftmp8],   %[ftmp8],       %[dc]                   \n\t"
169cabdff1aSopenharmony_ci
170cabdff1aSopenharmony_ci        "packushb   %[ftmp1],   %[ftmp1],       %[ftmp5]                \n\t"
171cabdff1aSopenharmony_ci        "packushb   %[ftmp2],   %[ftmp2],       %[ftmp6]                \n\t"
172cabdff1aSopenharmony_ci        "packushb   %[ftmp3],   %[ftmp3],       %[ftmp7]                \n\t"
173cabdff1aSopenharmony_ci        "packushb   %[ftmp4],   %[ftmp4],       %[ftmp8]                \n\t"
174cabdff1aSopenharmony_ci
175cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp1], %[dest], 0x00)
176cabdff1aSopenharmony_ci        PTR_ADDU   "%[addr0],   %[dest],        %[linesize]             \n\t"
177cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp2], %[addr0], 0x00)
178cabdff1aSopenharmony_ci        PTR_ADDU   "%[addr0],   %[addr0],       %[linesize]             \n\t"
179cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp3], %[addr0], 0x00)
180cabdff1aSopenharmony_ci        PTR_ADDU   "%[addr0],   %[addr0],       %[linesize]             \n\t"
181cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp4], %[addr0], 0x00)
182cabdff1aSopenharmony_ci
183cabdff1aSopenharmony_ci        "addiu      %[count],   %[count],       -0x01                   \n\t"
184cabdff1aSopenharmony_ci        PTR_ADDU   "%[dest],    %[addr0],       %[linesize]             \n\t"
185cabdff1aSopenharmony_ci        "bnez       %[count],   1b                                      \n\t"
186cabdff1aSopenharmony_ci        : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
187cabdff1aSopenharmony_ci          [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
188cabdff1aSopenharmony_ci          [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
189cabdff1aSopenharmony_ci          [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
190cabdff1aSopenharmony_ci          [ftmp8]"=&f"(ftmp[8]),
191cabdff1aSopenharmony_ci          [addr0]"=&r"(addr[0]),
192cabdff1aSopenharmony_ci          [count]"=&r"(count),          [dest]"+&r"(dest)
193cabdff1aSopenharmony_ci        : [linesize]"r"((mips_reg)linesize),
194cabdff1aSopenharmony_ci          [dc]"f"(dc_u.f)
195cabdff1aSopenharmony_ci        : "memory"
196cabdff1aSopenharmony_ci    );
197cabdff1aSopenharmony_ci}
198cabdff1aSopenharmony_ci
199cabdff1aSopenharmony_ci#if _MIPS_SIM != _ABIO32
200cabdff1aSopenharmony_civoid ff_vc1_inv_trans_8x8_mmi(int16_t block[64])
201cabdff1aSopenharmony_ci{
202cabdff1aSopenharmony_ci    DECLARE_ALIGNED(16, int16_t, temp[64]);
203cabdff1aSopenharmony_ci    double ftmp[23];
204cabdff1aSopenharmony_ci    uint64_t tmp[1];
205cabdff1aSopenharmony_ci
206cabdff1aSopenharmony_ci    __asm__ volatile (
207cabdff1aSopenharmony_ci        /* 1st loop: start */
208cabdff1aSopenharmony_ci        "li         %[tmp0],    0x03                                    \n\t"
209cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp0]                                \n\t"
210cabdff1aSopenharmony_ci
211cabdff1aSopenharmony_ci       // 1st part
212cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp1], %[block], 0x00)
213cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp11], %[block], 0x10)
214cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp2], %[block], 0x20)
215cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp12], %[block], 0x30)
216cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp3], %[block], 0x40)
217cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp13], %[block], 0x50)
218cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp4], %[block], 0x60)
219cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp14], %[block], 0x70)
220cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp5],   %[ftmp1],   %[ftmp2]                    \n\t"
221cabdff1aSopenharmony_ci        "punpckhhw  %[ftmp6],   %[ftmp1],   %[ftmp2]                    \n\t"
222cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp7],   %[ftmp3],   %[ftmp4]                    \n\t"
223cabdff1aSopenharmony_ci        "punpckhhw  %[ftmp8],   %[ftmp3],   %[ftmp4]                    \n\t"
224cabdff1aSopenharmony_ci
225cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp9],  %[ftmp11],  %[ftmp12]                    \n\t"
226cabdff1aSopenharmony_ci        "punpckhhw  %[ftmp10], %[ftmp11],  %[ftmp12]                    \n\t"
227cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp11], %[ftmp13],  %[ftmp14]                    \n\t"
228cabdff1aSopenharmony_ci        "punpckhhw  %[ftmp12], %[ftmp13],  %[ftmp14]                    \n\t"
229cabdff1aSopenharmony_ci
230cabdff1aSopenharmony_ci        /* ftmp15:dst03,dst02,dst01,dst00 ftmp22:dst73,dst72,dst71,dst70 */
231cabdff1aSopenharmony_ci        VC1_INV_TRANCS_8_TYPE1(%[ftmp15], %[ftmp22], 0x0010000c, 0x0006000c,
232cabdff1aSopenharmony_ci                               0x000f0010, 0x00040009, %[ff_pw_4])
233cabdff1aSopenharmony_ci
234cabdff1aSopenharmony_ci        /* ftmp16:dst13,dst12,dst11,dst10 ftmp21:dst63,dst62,dst61,dst60 */
235cabdff1aSopenharmony_ci        VC1_INV_TRANCS_8_TYPE1(%[ftmp16], %[ftmp21], 0x0006000c, 0xfff0fff4,
236cabdff1aSopenharmony_ci                               0xfffc000f, 0xfff7fff0, %[ff_pw_4])
237cabdff1aSopenharmony_ci
238cabdff1aSopenharmony_ci        /* ftmp17:dst23,dst22,dst21,dst20 ftmp20:dst53,dst52,dst51,dst50 */
239cabdff1aSopenharmony_ci        VC1_INV_TRANCS_8_TYPE1(%[ftmp17], %[ftmp20], 0xfffa000c, 0x0010fff4,
240cabdff1aSopenharmony_ci                               0xfff00009, 0x000f0004, %[ff_pw_4])
241cabdff1aSopenharmony_ci
242cabdff1aSopenharmony_ci        /* ftmp18:dst33,dst32,dst31,dst30 ftmp19:dst43,dst42,dst41,dst40 */
243cabdff1aSopenharmony_ci        VC1_INV_TRANCS_8_TYPE1(%[ftmp18], %[ftmp19], 0xfff0000c, 0xfffa000c,
244cabdff1aSopenharmony_ci                               0xfff70004, 0xfff0000f, %[ff_pw_4])
245cabdff1aSopenharmony_ci
246cabdff1aSopenharmony_ci        TRANSPOSE_4H(%[ftmp15], %[ftmp16], %[ftmp17], %[ftmp18],
247cabdff1aSopenharmony_ci                     %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4])
248cabdff1aSopenharmony_ci
249cabdff1aSopenharmony_ci        TRANSPOSE_4H(%[ftmp19], %[ftmp20], %[ftmp21], %[ftmp22],
250cabdff1aSopenharmony_ci                     %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4])
251cabdff1aSopenharmony_ci
252cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp15], %[temp], 0x00)
253cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp19], %[temp], 0x08)
254cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp16], %[temp], 0x10)
255cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp20], %[temp], 0x18)
256cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp17], %[temp], 0x20)
257cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp21], %[temp], 0x28)
258cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp18], %[temp], 0x30)
259cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp22], %[temp], 0x38)
260cabdff1aSopenharmony_ci
261cabdff1aSopenharmony_ci       // 2nd part
262cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp1], %[block], 0x08)
263cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp11], %[block], 0x18)
264cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp2], %[block], 0x28)
265cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp12], %[block], 0x38)
266cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp3], %[block], 0x48)
267cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp13], %[block], 0x58)
268cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp4], %[block], 0x68)
269cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp14], %[block], 0x78)
270cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp5],   %[ftmp1],   %[ftmp2]                    \n\t"
271cabdff1aSopenharmony_ci        "punpckhhw  %[ftmp6],   %[ftmp1],   %[ftmp2]                    \n\t"
272cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp7],   %[ftmp3],   %[ftmp4]                    \n\t"
273cabdff1aSopenharmony_ci        "punpckhhw  %[ftmp8],   %[ftmp3],   %[ftmp4]                    \n\t"
274cabdff1aSopenharmony_ci
275cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp9],   %[ftmp11],  %[ftmp12]                   \n\t"
276cabdff1aSopenharmony_ci        "punpckhhw  %[ftmp10],  %[ftmp11],  %[ftmp12]                   \n\t"
277cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp11],  %[ftmp13],  %[ftmp14]                   \n\t"
278cabdff1aSopenharmony_ci        "punpckhhw  %[ftmp12],  %[ftmp13],  %[ftmp14]                   \n\t"
279cabdff1aSopenharmony_ci
280cabdff1aSopenharmony_ci        /* ftmp15:dst03,dst02,dst01,dst00 ftmp22:dst73,dst72,dst71,dst70 */
281cabdff1aSopenharmony_ci        VC1_INV_TRANCS_8_TYPE1(%[ftmp15], %[ftmp22], 0x0010000c, 0x0006000c,
282cabdff1aSopenharmony_ci                               0x000f0010, 0x00040009, %[ff_pw_4])
283cabdff1aSopenharmony_ci
284cabdff1aSopenharmony_ci        /* ftmp16:dst13,dst12,dst11,dst10 ftmp21:dst63,dst62,dst61,dst60 */
285cabdff1aSopenharmony_ci        VC1_INV_TRANCS_8_TYPE1(%[ftmp16], %[ftmp21], 0x0006000c, 0xfff0fff4,
286cabdff1aSopenharmony_ci                               0xfffc000f, 0xfff7fff0, %[ff_pw_4])
287cabdff1aSopenharmony_ci
288cabdff1aSopenharmony_ci        /* ftmp17:dst23,dst22,dst21,dst20 ftmp20:dst53,dst52,dst51,dst50 */
289cabdff1aSopenharmony_ci        VC1_INV_TRANCS_8_TYPE1(%[ftmp17], %[ftmp20], 0xfffa000c, 0x0010fff4,
290cabdff1aSopenharmony_ci                               0xfff00009, 0x000f0004, %[ff_pw_4])
291cabdff1aSopenharmony_ci
292cabdff1aSopenharmony_ci        /* ftmp18:dst33,dst32,dst31,dst30 ftmp19:dst43,dst42,dst41,dst40 */
293cabdff1aSopenharmony_ci        VC1_INV_TRANCS_8_TYPE1(%[ftmp18], %[ftmp19], 0xfff0000c, 0xfffa000c,
294cabdff1aSopenharmony_ci                               0xfff70004, 0xfff0000f, %[ff_pw_4])
295cabdff1aSopenharmony_ci
296cabdff1aSopenharmony_ci        TRANSPOSE_4H(%[ftmp15], %[ftmp16], %[ftmp17], %[ftmp18],
297cabdff1aSopenharmony_ci                     %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4])
298cabdff1aSopenharmony_ci
299cabdff1aSopenharmony_ci        TRANSPOSE_4H(%[ftmp19], %[ftmp20], %[ftmp21], %[ftmp22],
300cabdff1aSopenharmony_ci                     %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4])
301cabdff1aSopenharmony_ci
302cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp19], %[temp], 0x48)
303cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp20], %[temp], 0x58)
304cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp21], %[temp], 0x68)
305cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp22], %[temp], 0x78)
306cabdff1aSopenharmony_ci        /* 1st loop: end */
307cabdff1aSopenharmony_ci
308cabdff1aSopenharmony_ci        /* 2nd loop: start */
309cabdff1aSopenharmony_ci        "li         %[tmp0],    0x07                                    \n\t"
310cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp0]                                \n\t"
311cabdff1aSopenharmony_ci
312cabdff1aSopenharmony_ci        // 1st part
313cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp1], %[temp], 0x00)
314cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp11], %[temp], 0x10)
315cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp2], %[temp], 0x20)
316cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp12], %[temp], 0x30)
317cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp5],   %[ftmp1],   %[ftmp2]                    \n\t"
318cabdff1aSopenharmony_ci        "punpckhhw  %[ftmp6],   %[ftmp1],   %[ftmp2]                    \n\t"
319cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp7],   %[ftmp15],  %[ftmp17]                   \n\t"
320cabdff1aSopenharmony_ci        "punpckhhw  %[ftmp8],   %[ftmp15],  %[ftmp17]                   \n\t"
321cabdff1aSopenharmony_ci
322cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp9],   %[ftmp11],  %[ftmp12]                   \n\t"
323cabdff1aSopenharmony_ci        "punpckhhw  %[ftmp10],  %[ftmp11],  %[ftmp12]                   \n\t"
324cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp11],  %[ftmp16],  %[ftmp18]                   \n\t"
325cabdff1aSopenharmony_ci        "punpckhhw  %[ftmp12],  %[ftmp16],  %[ftmp18]                   \n\t"
326cabdff1aSopenharmony_ci
327cabdff1aSopenharmony_ci        /* ftmp15:dst03,dst02,dst01,dst00 ftmp22:dst73,dst72,dst71,dst70 */
328cabdff1aSopenharmony_ci        VC1_INV_TRANCS_8_TYPE2(%[ftmp15], %[ftmp22], 0x0010000c, 0x0006000c,
329cabdff1aSopenharmony_ci                               0x000f0010, 0x00040009, %[ff_pw_64], %[ff_pw_1])
330cabdff1aSopenharmony_ci
331cabdff1aSopenharmony_ci        /* ftmp16:dst13,dst12,dst11,dst10 ftmp21:dst63,dst62,dst61,dst60 */
332cabdff1aSopenharmony_ci        VC1_INV_TRANCS_8_TYPE2(%[ftmp16], %[ftmp21], 0x0006000c, 0xfff0fff4,
333cabdff1aSopenharmony_ci                               0xfffc000f, 0xfff7fff0, %[ff_pw_64], %[ff_pw_1])
334cabdff1aSopenharmony_ci
335cabdff1aSopenharmony_ci        /* ftmp17:dst23,dst22,dst21,dst20 ftmp20:dst53,dst52,dst51,dst50 */
336cabdff1aSopenharmony_ci        VC1_INV_TRANCS_8_TYPE2(%[ftmp17], %[ftmp20], 0xfffa000c, 0x0010fff4,
337cabdff1aSopenharmony_ci                               0xfff00009, 0x000f0004, %[ff_pw_64], %[ff_pw_1])
338cabdff1aSopenharmony_ci
339cabdff1aSopenharmony_ci        /* ftmp18:dst33,dst32,dst31,dst30 ftmp19:dst43,dst42,dst41,dst40 */
340cabdff1aSopenharmony_ci        VC1_INV_TRANCS_8_TYPE2(%[ftmp18], %[ftmp19], 0xfff0000c, 0xfffa000c,
341cabdff1aSopenharmony_ci                               0xfff70004, 0xfff0000f, %[ff_pw_64], %[ff_pw_1])
342cabdff1aSopenharmony_ci
343cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp15], %[block], 0x00)
344cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp16], %[block], 0x10)
345cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp17], %[block], 0x20)
346cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp18], %[block], 0x30)
347cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp19], %[block], 0x40)
348cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp20], %[block], 0x50)
349cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp21], %[block], 0x60)
350cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp22], %[block], 0x70)
351cabdff1aSopenharmony_ci
352cabdff1aSopenharmony_ci       // 2nd part
353cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp1], %[temp], 0x08)
354cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp11], %[temp], 0x18)
355cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp2], %[temp], 0x28)
356cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp12], %[temp], 0x38)
357cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp3], %[temp], 0x48)
358cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp13], %[temp], 0x58)
359cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp4], %[temp], 0x68)
360cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp14], %[temp], 0x78)
361cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp5],   %[ftmp1],   %[ftmp2]                    \n\t"
362cabdff1aSopenharmony_ci        "punpckhhw  %[ftmp6],   %[ftmp1],   %[ftmp2]                    \n\t"
363cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp7],   %[ftmp3],   %[ftmp4]                    \n\t"
364cabdff1aSopenharmony_ci        "punpckhhw  %[ftmp8],   %[ftmp3],   %[ftmp4]                    \n\t"
365cabdff1aSopenharmony_ci
366cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp9],   %[ftmp11],  %[ftmp12]                   \n\t"
367cabdff1aSopenharmony_ci        "punpckhhw  %[ftmp10],  %[ftmp11],  %[ftmp12]                   \n\t"
368cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp11],  %[ftmp13],  %[ftmp14]                   \n\t"
369cabdff1aSopenharmony_ci        "punpckhhw  %[ftmp12],  %[ftmp13],  %[ftmp14]                   \n\t"
370cabdff1aSopenharmony_ci
371cabdff1aSopenharmony_ci        /* ftmp15:dst03,dst02,dst01,dst00 ftmp22:dst73,dst72,dst71,dst70 */
372cabdff1aSopenharmony_ci        VC1_INV_TRANCS_8_TYPE2(%[ftmp15], %[ftmp22], 0x0010000c, 0x0006000c,
373cabdff1aSopenharmony_ci                               0x000f0010, 0x00040009, %[ff_pw_64], %[ff_pw_1])
374cabdff1aSopenharmony_ci
375cabdff1aSopenharmony_ci        /* ftmp16:dst13,dst12,dst11,dst10 ftmp21:dst63,dst62,dst61,dst60 */
376cabdff1aSopenharmony_ci        VC1_INV_TRANCS_8_TYPE2(%[ftmp16], %[ftmp21], 0x0006000c, 0xfff0fff4,
377cabdff1aSopenharmony_ci                               0xfffc000f, 0xfff7fff0, %[ff_pw_64], %[ff_pw_1])
378cabdff1aSopenharmony_ci
379cabdff1aSopenharmony_ci        /* ftmp17:dst23,dst22,dst21,dst20 ftmp20:dst53,dst52,dst51,dst50 */
380cabdff1aSopenharmony_ci        VC1_INV_TRANCS_8_TYPE2(%[ftmp17], %[ftmp20], 0xfffa000c, 0x0010fff4,
381cabdff1aSopenharmony_ci                               0xfff00009, 0x000f0004, %[ff_pw_64], %[ff_pw_1])
382cabdff1aSopenharmony_ci
383cabdff1aSopenharmony_ci        /* ftmp18:dst33,dst32,dst31,dst30 ftmp19:dst43,dst42,dst41,dst40 */
384cabdff1aSopenharmony_ci        VC1_INV_TRANCS_8_TYPE2(%[ftmp18], %[ftmp19], 0xfff0000c, 0xfffa000c,
385cabdff1aSopenharmony_ci                               0xfff70004, 0xfff0000f, %[ff_pw_64], %[ff_pw_1])
386cabdff1aSopenharmony_ci
387cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp15], %[block], 0x08)
388cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp16], %[block], 0x18)
389cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp17], %[block], 0x28)
390cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp18], %[block], 0x38)
391cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp19], %[block], 0x48)
392cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp20], %[block], 0x58)
393cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp21], %[block], 0x68)
394cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp22], %[block], 0x78)
395cabdff1aSopenharmony_ci        /* 2nd loop: end */
396cabdff1aSopenharmony_ci        : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
397cabdff1aSopenharmony_ci          [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
398cabdff1aSopenharmony_ci          [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
399cabdff1aSopenharmony_ci          [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
400cabdff1aSopenharmony_ci          [ftmp8]"=&f"(ftmp[8]),        [ftmp9]"=&f"(ftmp[9]),
401cabdff1aSopenharmony_ci          [ftmp10]"=&f"(ftmp[10]),      [ftmp11]"=&f"(ftmp[11]),
402cabdff1aSopenharmony_ci          [ftmp12]"=&f"(ftmp[12]),      [ftmp13]"=&f"(ftmp[13]),
403cabdff1aSopenharmony_ci          [ftmp14]"=&f"(ftmp[14]),      [ftmp15]"=&f"(ftmp[15]),
404cabdff1aSopenharmony_ci          [ftmp16]"=&f"(ftmp[16]),      [ftmp17]"=&f"(ftmp[17]),
405cabdff1aSopenharmony_ci          [ftmp18]"=&f"(ftmp[18]),      [ftmp19]"=&f"(ftmp[19]),
406cabdff1aSopenharmony_ci          [ftmp20]"=&f"(ftmp[20]),      [ftmp21]"=&f"(ftmp[21]),
407cabdff1aSopenharmony_ci          [ftmp22]"=&f"(ftmp[22]),
408cabdff1aSopenharmony_ci          [tmp0]"=&r"(tmp[0])
409cabdff1aSopenharmony_ci        : [ff_pw_1]"f"(ff_pw_32_1.f),   [ff_pw_64]"f"(ff_pw_32_64.f),
410cabdff1aSopenharmony_ci          [ff_pw_4]"f"(ff_pw_32_4.f),   [block]"r"(block),
411cabdff1aSopenharmony_ci          [temp]"r"(temp)
412cabdff1aSopenharmony_ci        : "memory"
413cabdff1aSopenharmony_ci    );
414cabdff1aSopenharmony_ci}
415cabdff1aSopenharmony_ci#endif
416cabdff1aSopenharmony_ci
417cabdff1aSopenharmony_ci/* Do inverse transform on 8x4 part of block */
418cabdff1aSopenharmony_civoid ff_vc1_inv_trans_8x4_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
419cabdff1aSopenharmony_ci{
420cabdff1aSopenharmony_ci    int dc = block[0];
421cabdff1aSopenharmony_ci    double ftmp[9];
422cabdff1aSopenharmony_ci    union mmi_intfloat64 dc_u;
423cabdff1aSopenharmony_ci
424cabdff1aSopenharmony_ci    dc = ( 3 * dc +  1) >> 1;
425cabdff1aSopenharmony_ci    dc = (17 * dc + 64) >> 7;
426cabdff1aSopenharmony_ci    dc_u.i = dc;
427cabdff1aSopenharmony_ci
428cabdff1aSopenharmony_ci    __asm__ volatile(
429cabdff1aSopenharmony_ci        "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
430cabdff1aSopenharmony_ci        "pshufh     %[dc],      %[dc],          %[ftmp0]                \n\t"
431cabdff1aSopenharmony_ci
432cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp1], %[dest0], 0x00)
433cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp2], %[dest1], 0x00)
434cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp3], %[dest2], 0x00)
435cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp4], %[dest3], 0x00)
436cabdff1aSopenharmony_ci
437cabdff1aSopenharmony_ci        "punpckhbh  %[ftmp5],   %[ftmp1],       %[ftmp0]                \n\t"
438cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
439cabdff1aSopenharmony_ci        "punpckhbh  %[ftmp6],   %[ftmp2],       %[ftmp0]                \n\t"
440cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t"
441cabdff1aSopenharmony_ci        "punpckhbh  %[ftmp7],   %[ftmp3],       %[ftmp0]                \n\t"
442cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
443cabdff1aSopenharmony_ci        "punpckhbh  %[ftmp8],   %[ftmp4],       %[ftmp0]                \n\t"
444cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t"
445cabdff1aSopenharmony_ci
446cabdff1aSopenharmony_ci        "paddsh     %[ftmp1],   %[ftmp1],       %[dc]                   \n\t"
447cabdff1aSopenharmony_ci        "paddsh     %[ftmp2],   %[ftmp2],       %[dc]                   \n\t"
448cabdff1aSopenharmony_ci        "paddsh     %[ftmp3],   %[ftmp3],       %[dc]                   \n\t"
449cabdff1aSopenharmony_ci        "paddsh     %[ftmp4],   %[ftmp4],       %[dc]                   \n\t"
450cabdff1aSopenharmony_ci        "paddsh     %[ftmp5],   %[ftmp5],       %[dc]                   \n\t"
451cabdff1aSopenharmony_ci        "paddsh     %[ftmp6],   %[ftmp6],       %[dc]                   \n\t"
452cabdff1aSopenharmony_ci        "paddsh     %[ftmp7],   %[ftmp7],       %[dc]                   \n\t"
453cabdff1aSopenharmony_ci        "paddsh     %[ftmp8],   %[ftmp8],       %[dc]                   \n\t"
454cabdff1aSopenharmony_ci
455cabdff1aSopenharmony_ci        "packushb   %[ftmp1],   %[ftmp1],       %[ftmp5]                \n\t"
456cabdff1aSopenharmony_ci        "packushb   %[ftmp2],   %[ftmp2],       %[ftmp6]                \n\t"
457cabdff1aSopenharmony_ci        "packushb   %[ftmp3],   %[ftmp3],       %[ftmp7]                \n\t"
458cabdff1aSopenharmony_ci        "packushb   %[ftmp4],   %[ftmp4],       %[ftmp8]                \n\t"
459cabdff1aSopenharmony_ci
460cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp1], %[dest0], 0x00)
461cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp2], %[dest1], 0x00)
462cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp3], %[dest2], 0x00)
463cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp4], %[dest3], 0x00)
464cabdff1aSopenharmony_ci        : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
465cabdff1aSopenharmony_ci          [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
466cabdff1aSopenharmony_ci          [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
467cabdff1aSopenharmony_ci          [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
468cabdff1aSopenharmony_ci          [ftmp8]"=&f"(ftmp[8])
469cabdff1aSopenharmony_ci        : [dest0]"r"(dest+0*linesize),  [dest1]"r"(dest+1*linesize),
470cabdff1aSopenharmony_ci          [dest2]"r"(dest+2*linesize),  [dest3]"r"(dest+3*linesize),
471cabdff1aSopenharmony_ci          [dc]"f"(dc_u.f)
472cabdff1aSopenharmony_ci        : "memory"
473cabdff1aSopenharmony_ci    );
474cabdff1aSopenharmony_ci}
475cabdff1aSopenharmony_ci
476cabdff1aSopenharmony_ci#if _MIPS_SIM != _ABIO32
477cabdff1aSopenharmony_civoid ff_vc1_inv_trans_8x4_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
478cabdff1aSopenharmony_ci{
479cabdff1aSopenharmony_ci    int16_t *src = block;
480cabdff1aSopenharmony_ci    int16_t *dst = block;
481cabdff1aSopenharmony_ci    double ftmp[16];
482cabdff1aSopenharmony_ci    uint32_t tmp[1];
483cabdff1aSopenharmony_ci    int16_t count = 4;
484cabdff1aSopenharmony_ci    int16_t coeff[64] = {12, 16,  16,  15,  12,   9,   6,   4,
485cabdff1aSopenharmony_ci                         12, 15,   6,  -4, -12, -16, -16,  -9,
486cabdff1aSopenharmony_ci                         12,  9,  -6, -16, -12,   4,  16,  15,
487cabdff1aSopenharmony_ci                         12,  4, -16,  -9,  12,  15,  -6, -16,
488cabdff1aSopenharmony_ci                         12, -4, -16,   9,  12, -15,  -6,  16,
489cabdff1aSopenharmony_ci                         12, -9,  -6,  16, -12,  -4,  16, -15,
490cabdff1aSopenharmony_ci                         12, -15,  6,   4, -12,  16, -16,   9,
491cabdff1aSopenharmony_ci                         12, -16, 16, -15,  12,  -9,   6,  -4};
492cabdff1aSopenharmony_ci
493cabdff1aSopenharmony_ci    // 1st loop
494cabdff1aSopenharmony_ci    __asm__ volatile (
495cabdff1aSopenharmony_ci        "li         %[tmp0],    0x03                                    \n\t"
496cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp0]                                \n\t"
497cabdff1aSopenharmony_ci
498cabdff1aSopenharmony_ci        "1:                                                             \n\t"
499cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp1], %[src], 0x00)
500cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp2], %[src], 0x08)
501cabdff1aSopenharmony_ci
502cabdff1aSopenharmony_ci        /* ftmp11: dst1,dst0 */
503cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp3], %[coeff], 0x00)
504cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp4], %[coeff], 0x08)
505cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp5], %[coeff], 0x10)
506cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp6], %[coeff], 0x18)
507cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp7],   %[ftmp1],   %[ftmp3]                    \n\t"
508cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp8],   %[ftmp2],   %[ftmp4]                    \n\t"
509cabdff1aSopenharmony_ci        "paddw      %[ftmp9],   %[ftmp7],   %[ftmp8]                    \n\t"
510cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp7],   %[ftmp1],   %[ftmp5]                    \n\t"
511cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp8],   %[ftmp2],   %[ftmp6]                    \n\t"
512cabdff1aSopenharmony_ci        "paddw      %[ftmp10],  %[ftmp7],   %[ftmp8]                    \n\t"
513cabdff1aSopenharmony_ci        "punpcklwd  %[ftmp7],   %[ftmp9],   %[ftmp10]                   \n\t"
514cabdff1aSopenharmony_ci        "punpckhwd  %[ftmp8],   %[ftmp9],   %[ftmp10]                   \n\t"
515cabdff1aSopenharmony_ci        "paddw      %[ftmp11],  %[ftmp7],   %[ftmp8]                    \n\t"
516cabdff1aSopenharmony_ci        "paddw      %[ftmp11],  %[ftmp11],  %[ff_pw_4]                  \n\t"
517cabdff1aSopenharmony_ci
518cabdff1aSopenharmony_ci        /* ftmp12: dst3,dst2 */
519cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp3], %[coeff], 0x20)
520cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp4], %[coeff], 0x28)
521cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp5], %[coeff], 0x30)
522cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp6], %[coeff], 0x38)
523cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp7],   %[ftmp1],   %[ftmp3]                    \n\t"
524cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp8],   %[ftmp2],   %[ftmp4]                    \n\t"
525cabdff1aSopenharmony_ci        "paddw      %[ftmp9],   %[ftmp7],   %[ftmp8]                    \n\t"
526cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp7],   %[ftmp1],   %[ftmp5]                    \n\t"
527cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp8],   %[ftmp2],   %[ftmp6]                    \n\t"
528cabdff1aSopenharmony_ci        "paddw      %[ftmp10],  %[ftmp7],   %[ftmp8]                    \n\t"
529cabdff1aSopenharmony_ci        "punpcklwd  %[ftmp7],   %[ftmp9],   %[ftmp10]                   \n\t"
530cabdff1aSopenharmony_ci        "punpckhwd  %[ftmp8],   %[ftmp9],   %[ftmp10]                   \n\t"
531cabdff1aSopenharmony_ci        "paddw      %[ftmp12],  %[ftmp7],   %[ftmp8]                    \n\t"
532cabdff1aSopenharmony_ci        "paddw      %[ftmp12],  %[ftmp12],  %[ff_pw_4]                  \n\t"
533cabdff1aSopenharmony_ci
534cabdff1aSopenharmony_ci        /* ftmp13: dst5,dst4 */
535cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp3], %[coeff], 0x40)
536cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp4], %[coeff], 0x48)
537cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp5], %[coeff], 0x50)
538cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp6], %[coeff], 0x58)
539cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp7],   %[ftmp1],   %[ftmp3]                    \n\t"
540cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp8],   %[ftmp2],   %[ftmp4]                    \n\t"
541cabdff1aSopenharmony_ci        "paddw      %[ftmp9],   %[ftmp7],   %[ftmp8]                    \n\t"
542cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp7],   %[ftmp1],   %[ftmp5]                    \n\t"
543cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp8],   %[ftmp2],   %[ftmp6]                    \n\t"
544cabdff1aSopenharmony_ci        "paddw      %[ftmp10],  %[ftmp7],   %[ftmp8]                    \n\t"
545cabdff1aSopenharmony_ci        "punpcklwd  %[ftmp7],   %[ftmp9],   %[ftmp10]                   \n\t"
546cabdff1aSopenharmony_ci        "punpckhwd  %[ftmp8],   %[ftmp9],   %[ftmp10]                   \n\t"
547cabdff1aSopenharmony_ci        "paddw      %[ftmp13],  %[ftmp7],   %[ftmp8]                    \n\t"
548cabdff1aSopenharmony_ci        "paddw      %[ftmp13],  %[ftmp13],  %[ff_pw_4]                  \n\t"
549cabdff1aSopenharmony_ci
550cabdff1aSopenharmony_ci        /* ftmp14: dst7,dst6 */
551cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp3], %[coeff], 0x60)
552cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp4], %[coeff], 0x68)
553cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp5], %[coeff], 0x70)
554cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp6], %[coeff], 0x78)
555cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp7],   %[ftmp1],   %[ftmp3]                    \n\t"
556cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp8],   %[ftmp2],   %[ftmp4]                    \n\t"
557cabdff1aSopenharmony_ci        "paddw      %[ftmp9],   %[ftmp7],   %[ftmp8]                    \n\t"
558cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp7],   %[ftmp1],   %[ftmp5]                    \n\t"
559cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp8],   %[ftmp2],   %[ftmp6]                    \n\t"
560cabdff1aSopenharmony_ci        "paddw      %[ftmp10],  %[ftmp7],   %[ftmp8]                    \n\t"
561cabdff1aSopenharmony_ci        "punpcklwd  %[ftmp7],   %[ftmp9],   %[ftmp10]                   \n\t"
562cabdff1aSopenharmony_ci        "punpckhwd  %[ftmp8],   %[ftmp9],   %[ftmp10]                   \n\t"
563cabdff1aSopenharmony_ci        "paddw      %[ftmp14],  %[ftmp7],   %[ftmp8]                    \n\t"
564cabdff1aSopenharmony_ci        "paddw      %[ftmp14],  %[ftmp14],  %[ff_pw_4]                  \n\t"
565cabdff1aSopenharmony_ci
566cabdff1aSopenharmony_ci        /* ftmp9: dst3,dst2,dst1,dst0    ftmp10: dst7,dst6,dst5,dst4 */
567cabdff1aSopenharmony_ci        "psraw      %[ftmp11],  %[ftmp11],  %[ftmp0]                    \n\t"
568cabdff1aSopenharmony_ci        "psraw      %[ftmp12],  %[ftmp12],  %[ftmp0]                    \n\t"
569cabdff1aSopenharmony_ci        "psraw      %[ftmp13],  %[ftmp13],  %[ftmp0]                    \n\t"
570cabdff1aSopenharmony_ci        "psraw      %[ftmp14],  %[ftmp14],  %[ftmp0]                    \n\t"
571cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp7],   %[ftmp11],  %[ftmp12]                   \n\t"
572cabdff1aSopenharmony_ci        "punpckhhw  %[ftmp8],   %[ftmp11],  %[ftmp12]                   \n\t"
573cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp9],   %[ftmp7],   %[ftmp8]                    \n\t"
574cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp7],   %[ftmp13],  %[ftmp14]                   \n\t"
575cabdff1aSopenharmony_ci        "punpckhhw  %[ftmp8],   %[ftmp13],  %[ftmp14]                   \n\t"
576cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp10],  %[ftmp7],   %[ftmp8]                    \n\t"
577cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp9], %[dst], 0x00)
578cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp10], %[dst], 0x08)
579cabdff1aSopenharmony_ci
580cabdff1aSopenharmony_ci        PTR_ADDIU  "%[src],     %[src],     0x10                        \n\t"
581cabdff1aSopenharmony_ci        PTR_ADDIU  "%[dst],     %[dst],     0x10                        \n\t"
582cabdff1aSopenharmony_ci        "addiu      %[count],   %[count],   -0x01                       \n\t"
583cabdff1aSopenharmony_ci        "bnez       %[count],   1b                                      \n\t"
584cabdff1aSopenharmony_ci        : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
585cabdff1aSopenharmony_ci          [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
586cabdff1aSopenharmony_ci          [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
587cabdff1aSopenharmony_ci          [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
588cabdff1aSopenharmony_ci          [ftmp8]"=&f"(ftmp[8]),        [ftmp9]"=&f"(ftmp[9]),
589cabdff1aSopenharmony_ci          [ftmp10]"=&f"(ftmp[10]),      [ftmp11]"=&f"(ftmp[11]),
590cabdff1aSopenharmony_ci          [ftmp12]"=&f"(ftmp[12]),      [ftmp13]"=&f"(ftmp[13]),
591cabdff1aSopenharmony_ci          [ftmp14]"=&f"(ftmp[14]),      [tmp0]"=&r"(tmp[0]),
592cabdff1aSopenharmony_ci          [src]"+&r"(src), [dst]"+&r"(dst), [count]"+&r"(count)
593cabdff1aSopenharmony_ci        : [ff_pw_4]"f"(ff_pw_32_4.f),   [coeff]"r"(coeff)
594cabdff1aSopenharmony_ci        : "memory"
595cabdff1aSopenharmony_ci    );
596cabdff1aSopenharmony_ci
597cabdff1aSopenharmony_ci    src = block;
598cabdff1aSopenharmony_ci
599cabdff1aSopenharmony_ci    // 2nd loop
600cabdff1aSopenharmony_ci    __asm__ volatile (
601cabdff1aSopenharmony_ci        "li         %[tmp0],    0x44                                    \n\t"
602cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp15]                               \n\t"
603cabdff1aSopenharmony_ci
604cabdff1aSopenharmony_ci        // 1st part
605cabdff1aSopenharmony_ci        "li         %[tmp0],    0x07                                    \n\t"
606cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp0]                                \n\t"
607cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp1], %[src], 0x00)
608cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp2], %[src], 0x10)
609cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp3], %[src], 0x20)
610cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp4], %[src], 0x30)
611cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp5],   %[ftmp1],   %[ftmp2]                    \n\t"
612cabdff1aSopenharmony_ci        "punpckhhw  %[ftmp6],   %[ftmp1],   %[ftmp2]                    \n\t"
613cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp7],   %[ftmp3],   %[ftmp4]                    \n\t"
614cabdff1aSopenharmony_ci        "punpckhhw  %[ftmp8],   %[ftmp3],   %[ftmp4]                    \n\t"
615cabdff1aSopenharmony_ci
616cabdff1aSopenharmony_ci        /* ftmp11: dst03,dst02,dst01,dst00 */
617cabdff1aSopenharmony_ci        "li         %[tmp0],    0x00160011                              \n\t"
618cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp3]                                \n\t"
619cabdff1aSopenharmony_ci        "pshufh     %[ftmp3],   %[ftmp3],   %[ftmp15]                   \n\t"
620cabdff1aSopenharmony_ci        "li         %[tmp0],    0x000a0011                              \n\t"
621cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp4]                                \n\t"
622cabdff1aSopenharmony_ci        "pshufh     %[ftmp4],   %[ftmp4],   %[ftmp15]                   \n\t"
623cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp1],   %[ftmp5],   %[ftmp3]                    \n\t"
624cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp2],   %[ftmp7],   %[ftmp4]                    \n\t"
625cabdff1aSopenharmony_ci        "paddw      %[ftmp9],   %[ftmp1],   %[ftmp2]                    \n\t"
626cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp1],   %[ftmp6],   %[ftmp3]                    \n\t"
627cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp2],   %[ftmp8],   %[ftmp4]                    \n\t"
628cabdff1aSopenharmony_ci        "paddw      %[ftmp10],  %[ftmp1],   %[ftmp2]                    \n\t"
629cabdff1aSopenharmony_ci        "paddw      %[ftmp9],   %[ftmp9],   %[ff_pw_64]                 \n\t"
630cabdff1aSopenharmony_ci        "paddw      %[ftmp10],  %[ftmp10],  %[ff_pw_64]                 \n\t"
631cabdff1aSopenharmony_ci        "psraw      %[ftmp9],   %[ftmp9],   %[ftmp0]                    \n\t"
632cabdff1aSopenharmony_ci        "psraw      %[ftmp10],  %[ftmp10],  %[ftmp0]                    \n\t"
633cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp1],   %[ftmp9],   %[ftmp10]                   \n\t"
634cabdff1aSopenharmony_ci        "punpckhhw  %[ftmp2],   %[ftmp9],   %[ftmp10]                   \n\t"
635cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp11],  %[ftmp1],   %[ftmp2]                    \n\t"
636cabdff1aSopenharmony_ci
637cabdff1aSopenharmony_ci        /* ftmp12: dst13,dst12,dst11,dst10 */
638cabdff1aSopenharmony_ci        "li         %[tmp0],    0x000a0011                              \n\t"
639cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp3]                                \n\t"
640cabdff1aSopenharmony_ci        "pshufh     %[ftmp3],   %[ftmp3],   %[ftmp15]                   \n\t"
641cabdff1aSopenharmony_ci        "li         %[tmp0],    0xffeaffef                              \n\t"
642cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp4]                                \n\t"
643cabdff1aSopenharmony_ci        "pshufh     %[ftmp4],   %[ftmp4],   %[ftmp15]                   \n\t"
644cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp1],   %[ftmp5],   %[ftmp3]                    \n\t"
645cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp2],   %[ftmp7],   %[ftmp4]                    \n\t"
646cabdff1aSopenharmony_ci        "paddw      %[ftmp9],   %[ftmp1],   %[ftmp2]                    \n\t"
647cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp1],   %[ftmp6],   %[ftmp3]                    \n\t"
648cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp2],   %[ftmp8],   %[ftmp4]                    \n\t"
649cabdff1aSopenharmony_ci        "paddw      %[ftmp10],  %[ftmp1],   %[ftmp2]                    \n\t"
650cabdff1aSopenharmony_ci        "paddw      %[ftmp9],   %[ftmp9],   %[ff_pw_64]                 \n\t"
651cabdff1aSopenharmony_ci        "paddw      %[ftmp10],  %[ftmp10],  %[ff_pw_64]                 \n\t"
652cabdff1aSopenharmony_ci        "psraw      %[ftmp9],   %[ftmp9],   %[ftmp0]                    \n\t"
653cabdff1aSopenharmony_ci        "psraw      %[ftmp10],  %[ftmp10],  %[ftmp0]                    \n\t"
654cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp1],   %[ftmp9],   %[ftmp10]                   \n\t"
655cabdff1aSopenharmony_ci        "punpckhhw  %[ftmp2],   %[ftmp9],   %[ftmp10]                   \n\t"
656cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp12],  %[ftmp1],   %[ftmp2]                    \n\t"
657cabdff1aSopenharmony_ci
658cabdff1aSopenharmony_ci        /* ftmp13: dst23,dst22,dst21,dst20 */
659cabdff1aSopenharmony_ci        "li         %[tmp0],    0xfff60011                              \n\t"
660cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp3]                                \n\t"
661cabdff1aSopenharmony_ci        "pshufh     %[ftmp3],   %[ftmp3],   %[ftmp15]                   \n\t"
662cabdff1aSopenharmony_ci        "li         %[tmp0],    0x0016ffef                              \n\t"
663cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp4]                                \n\t"
664cabdff1aSopenharmony_ci        "pshufh     %[ftmp4],   %[ftmp4],   %[ftmp15]                   \n\t"
665cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp1],   %[ftmp5],   %[ftmp3]                    \n\t"
666cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp2],   %[ftmp7],   %[ftmp4]                    \n\t"
667cabdff1aSopenharmony_ci        "paddw      %[ftmp9],   %[ftmp1],   %[ftmp2]                    \n\t"
668cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp1],   %[ftmp6],   %[ftmp3]                    \n\t"
669cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp2],   %[ftmp8],   %[ftmp4]                    \n\t"
670cabdff1aSopenharmony_ci        "paddw      %[ftmp10],  %[ftmp1],   %[ftmp2]                    \n\t"
671cabdff1aSopenharmony_ci        "paddw      %[ftmp9],   %[ftmp9],   %[ff_pw_64]                 \n\t"
672cabdff1aSopenharmony_ci        "paddw      %[ftmp10],  %[ftmp10],  %[ff_pw_64]                 \n\t"
673cabdff1aSopenharmony_ci        "psraw      %[ftmp9],   %[ftmp9],   %[ftmp0]                    \n\t"
674cabdff1aSopenharmony_ci        "psraw      %[ftmp10],  %[ftmp10],  %[ftmp0]                    \n\t"
675cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp1],   %[ftmp9],   %[ftmp10]                   \n\t"
676cabdff1aSopenharmony_ci        "punpckhhw  %[ftmp2],   %[ftmp9],   %[ftmp10]                   \n\t"
677cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp13],  %[ftmp1],   %[ftmp2]                    \n\t"
678cabdff1aSopenharmony_ci
679cabdff1aSopenharmony_ci        /* ftmp14: dst33,dst32,dst31,dst30 */
680cabdff1aSopenharmony_ci        "li         %[tmp0],    0xffea0011                              \n\t"
681cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp3]                                \n\t"
682cabdff1aSopenharmony_ci        "pshufh     %[ftmp3],   %[ftmp3],   %[ftmp15]                   \n\t"
683cabdff1aSopenharmony_ci        "li         %[tmp0],    0xfff60011                              \n\t"
684cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp4]                                \n\t"
685cabdff1aSopenharmony_ci        "pshufh     %[ftmp4],   %[ftmp4],   %[ftmp15]                   \n\t"
686cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp1],   %[ftmp5],   %[ftmp3]                    \n\t"
687cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp2],   %[ftmp7],   %[ftmp4]                    \n\t"
688cabdff1aSopenharmony_ci        "paddw      %[ftmp9],   %[ftmp1],   %[ftmp2]                    \n\t"
689cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp1],   %[ftmp6],   %[ftmp3]                    \n\t"
690cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp2],   %[ftmp8],   %[ftmp4]                    \n\t"
691cabdff1aSopenharmony_ci        "paddw      %[ftmp10],  %[ftmp1],   %[ftmp2]                    \n\t"
692cabdff1aSopenharmony_ci        "paddw      %[ftmp9],   %[ftmp9],   %[ff_pw_64]                 \n\t"
693cabdff1aSopenharmony_ci        "paddw      %[ftmp10],  %[ftmp10],  %[ff_pw_64]                 \n\t"
694cabdff1aSopenharmony_ci        "psraw      %[ftmp9],   %[ftmp9],   %[ftmp0]                    \n\t"
695cabdff1aSopenharmony_ci        "psraw      %[ftmp10],  %[ftmp10],  %[ftmp0]                    \n\t"
696cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp1],   %[ftmp9],   %[ftmp10]                   \n\t"
697cabdff1aSopenharmony_ci        "punpckhhw  %[ftmp2],   %[ftmp9],   %[ftmp10]                   \n\t"
698cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp14],  %[ftmp1],   %[ftmp2]                    \n\t"
699cabdff1aSopenharmony_ci
700cabdff1aSopenharmony_ci        MMI_LWC1(%[ftmp1], %[dest], 0x00)
701cabdff1aSopenharmony_ci        PTR_ADDU    "%[tmp0],   %[dest],    %[linesize]                 \n\t"
702cabdff1aSopenharmony_ci        MMI_LWC1(%[ftmp2], %[tmp0], 0x00)
703cabdff1aSopenharmony_ci        PTR_ADDU    "%[tmp0],   %[tmp0],    %[linesize]                 \n\t"
704cabdff1aSopenharmony_ci        MMI_LWC1(%[ftmp3], %[tmp0], 0x00)
705cabdff1aSopenharmony_ci        PTR_ADDU    "%[tmp0],   %[tmp0],    %[linesize]                 \n\t"
706cabdff1aSopenharmony_ci        MMI_LWC1(%[ftmp4], %[tmp0], 0x00)
707cabdff1aSopenharmony_ci        "pxor       %[ftmp0],   %[ftmp0],   %[ftmp0]                    \n\t"
708cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp1],   %[ftmp1],   %[ftmp0]                    \n\t"
709cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp2],   %[ftmp0]                    \n\t"
710cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp3],   %[ftmp3],   %[ftmp0]                    \n\t"
711cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp4],   %[ftmp4],   %[ftmp0]                    \n\t"
712cabdff1aSopenharmony_ci        "paddh      %[ftmp1],   %[ftmp1],   %[ftmp11]                   \n\t"
713cabdff1aSopenharmony_ci        "paddh      %[ftmp2],   %[ftmp2],   %[ftmp12]                   \n\t"
714cabdff1aSopenharmony_ci        "paddh      %[ftmp3],   %[ftmp3],   %[ftmp13]                   \n\t"
715cabdff1aSopenharmony_ci        "paddh      %[ftmp4],   %[ftmp4],   %[ftmp14]                   \n\t"
716cabdff1aSopenharmony_ci        "packushb   %[ftmp1],   %[ftmp1],   %[ftmp0]                    \n\t"
717cabdff1aSopenharmony_ci        "packushb   %[ftmp2],   %[ftmp2],   %[ftmp0]                    \n\t"
718cabdff1aSopenharmony_ci        "packushb   %[ftmp3],   %[ftmp3],   %[ftmp0]                    \n\t"
719cabdff1aSopenharmony_ci        "packushb   %[ftmp4],   %[ftmp4],   %[ftmp0]                    \n\t"
720cabdff1aSopenharmony_ci        MMI_SWC1(%[ftmp1], %[dest], 0x00)
721cabdff1aSopenharmony_ci        PTR_ADDU   "%[tmp0],    %[dest],    %[linesize]                 \n\t"
722cabdff1aSopenharmony_ci        MMI_SWC1(%[ftmp2], %[tmp0], 0x00)
723cabdff1aSopenharmony_ci        PTR_ADDU   "%[tmp0],    %[tmp0],    %[linesize]                 \n\t"
724cabdff1aSopenharmony_ci        MMI_SWC1(%[ftmp3], %[tmp0], 0x00)
725cabdff1aSopenharmony_ci        PTR_ADDU   "%[tmp0],    %[tmp0],    %[linesize]                 \n\t"
726cabdff1aSopenharmony_ci        MMI_SWC1(%[ftmp4], %[tmp0], 0x00)
727cabdff1aSopenharmony_ci
728cabdff1aSopenharmony_ci        // 2nd part
729cabdff1aSopenharmony_ci        "li         %[tmp0],    0x07                                    \n\t"
730cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp0]                                \n\t"
731cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp1], %[src], 0x08)
732cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp2], %[src], 0x18)
733cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp3], %[src], 0x28)
734cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp4], %[src], 0x38)
735cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp5],   %[ftmp1],   %[ftmp2]                    \n\t"
736cabdff1aSopenharmony_ci        "punpckhhw  %[ftmp6],   %[ftmp1],   %[ftmp2]                    \n\t"
737cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp7],   %[ftmp3],   %[ftmp4]                    \n\t"
738cabdff1aSopenharmony_ci        "punpckhhw  %[ftmp8],   %[ftmp3],   %[ftmp4]                    \n\t"
739cabdff1aSopenharmony_ci
740cabdff1aSopenharmony_ci        /* ftmp11: dst03,dst02,dst01,dst00 */
741cabdff1aSopenharmony_ci        "li         %[tmp0],    0x00160011                              \n\t"
742cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp3]                                \n\t"
743cabdff1aSopenharmony_ci        "pshufh     %[ftmp3],   %[ftmp3],   %[ftmp15]                   \n\t"
744cabdff1aSopenharmony_ci        "li         %[tmp0],    0x000a0011                              \n\t"
745cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp4]                                \n\t"
746cabdff1aSopenharmony_ci        "pshufh     %[ftmp4],   %[ftmp4],   %[ftmp15]                   \n\t"
747cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp1],   %[ftmp5],   %[ftmp3]                    \n\t"
748cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp2],   %[ftmp7],   %[ftmp4]                    \n\t"
749cabdff1aSopenharmony_ci        "paddw      %[ftmp9],   %[ftmp1],   %[ftmp2]                    \n\t"
750cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp1],   %[ftmp6],   %[ftmp3]                    \n\t"
751cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp2],   %[ftmp8],   %[ftmp4]                    \n\t"
752cabdff1aSopenharmony_ci        "paddw      %[ftmp10],  %[ftmp1],   %[ftmp2]                    \n\t"
753cabdff1aSopenharmony_ci        "paddw      %[ftmp9],   %[ftmp9],   %[ff_pw_64]                 \n\t"
754cabdff1aSopenharmony_ci        "paddw      %[ftmp10],  %[ftmp10],  %[ff_pw_64]                 \n\t"
755cabdff1aSopenharmony_ci        "psraw      %[ftmp9],   %[ftmp9],   %[ftmp0]                    \n\t"
756cabdff1aSopenharmony_ci        "psraw      %[ftmp10],  %[ftmp10],  %[ftmp0]                    \n\t"
757cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp1],   %[ftmp9],   %[ftmp10]                   \n\t"
758cabdff1aSopenharmony_ci        "punpckhhw  %[ftmp2],   %[ftmp9],   %[ftmp10]                   \n\t"
759cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp11],  %[ftmp1],   %[ftmp2]                    \n\t"
760cabdff1aSopenharmony_ci
761cabdff1aSopenharmony_ci        /* ftmp12: dst13,dst12,dst11,dst10 */
762cabdff1aSopenharmony_ci        "li         %[tmp0],    0x000a0011                              \n\t"
763cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp3]                                \n\t"
764cabdff1aSopenharmony_ci        "pshufh     %[ftmp3],   %[ftmp3],   %[ftmp15]                   \n\t"
765cabdff1aSopenharmony_ci        "li         %[tmp0],    0xffeaffef                              \n\t"
766cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp4]                                \n\t"
767cabdff1aSopenharmony_ci        "pshufh     %[ftmp4],   %[ftmp4],   %[ftmp15]                   \n\t"
768cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp1],   %[ftmp5],   %[ftmp3]                    \n\t"
769cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp2],   %[ftmp7],   %[ftmp4]                    \n\t"
770cabdff1aSopenharmony_ci        "paddw      %[ftmp9],   %[ftmp1],   %[ftmp2]                    \n\t"
771cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp1],   %[ftmp6],   %[ftmp3]                    \n\t"
772cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp2],   %[ftmp8],   %[ftmp4]                    \n\t"
773cabdff1aSopenharmony_ci        "paddw      %[ftmp10],  %[ftmp1],   %[ftmp2]                    \n\t"
774cabdff1aSopenharmony_ci        "paddw      %[ftmp9],   %[ftmp9],   %[ff_pw_64]                 \n\t"
775cabdff1aSopenharmony_ci        "paddw      %[ftmp10],  %[ftmp10],  %[ff_pw_64]                 \n\t"
776cabdff1aSopenharmony_ci        "psraw      %[ftmp9],   %[ftmp9],   %[ftmp0]                    \n\t"
777cabdff1aSopenharmony_ci        "psraw      %[ftmp10],  %[ftmp10],  %[ftmp0]                    \n\t"
778cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp1],   %[ftmp9],   %[ftmp10]                   \n\t"
779cabdff1aSopenharmony_ci        "punpckhhw  %[ftmp2],   %[ftmp9],   %[ftmp10]                   \n\t"
780cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp12],  %[ftmp1],   %[ftmp2]                    \n\t"
781cabdff1aSopenharmony_ci
782cabdff1aSopenharmony_ci        /* ftmp13: dst23,dst22,dst21,dst20 */
783cabdff1aSopenharmony_ci        "li         %[tmp0],    0xfff60011                              \n\t"
784cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp3]                                \n\t"
785cabdff1aSopenharmony_ci        "pshufh     %[ftmp3],   %[ftmp3],   %[ftmp15]                   \n\t"
786cabdff1aSopenharmony_ci        "li         %[tmp0],    0x0016ffef                              \n\t"
787cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp4]                                \n\t"
788cabdff1aSopenharmony_ci        "pshufh     %[ftmp4],   %[ftmp4],   %[ftmp15]                   \n\t"
789cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp1],   %[ftmp5],   %[ftmp3]                    \n\t"
790cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp2],   %[ftmp7],   %[ftmp4]                    \n\t"
791cabdff1aSopenharmony_ci        "paddw      %[ftmp9],   %[ftmp1],   %[ftmp2]                    \n\t"
792cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp1],   %[ftmp6],   %[ftmp3]                    \n\t"
793cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp2],   %[ftmp8],   %[ftmp4]                    \n\t"
794cabdff1aSopenharmony_ci        "paddw      %[ftmp10],  %[ftmp1],   %[ftmp2]                    \n\t"
795cabdff1aSopenharmony_ci        "paddw      %[ftmp9],   %[ftmp9],   %[ff_pw_64]                 \n\t"
796cabdff1aSopenharmony_ci        "paddw      %[ftmp10],  %[ftmp10],  %[ff_pw_64]                 \n\t"
797cabdff1aSopenharmony_ci        "psraw      %[ftmp9],   %[ftmp9],   %[ftmp0]                    \n\t"
798cabdff1aSopenharmony_ci        "psraw      %[ftmp10],  %[ftmp10],  %[ftmp0]                    \n\t"
799cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp1],   %[ftmp9],   %[ftmp10]                   \n\t"
800cabdff1aSopenharmony_ci        "punpckhhw  %[ftmp2],   %[ftmp9],   %[ftmp10]                   \n\t"
801cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp13],  %[ftmp1],   %[ftmp2]                    \n\t"
802cabdff1aSopenharmony_ci
803cabdff1aSopenharmony_ci        /* ftmp14: dst33,dst32,dst31,dst30 */
804cabdff1aSopenharmony_ci        "li         %[tmp0],    0xffea0011                              \n\t"
805cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp3]                                \n\t"
806cabdff1aSopenharmony_ci        "pshufh     %[ftmp3],   %[ftmp3],   %[ftmp15]                   \n\t"
807cabdff1aSopenharmony_ci        "li         %[tmp0],    0xfff60011                              \n\t"
808cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp4]                                \n\t"
809cabdff1aSopenharmony_ci        "pshufh     %[ftmp4],   %[ftmp4],   %[ftmp15]                   \n\t"
810cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp1],   %[ftmp5],   %[ftmp3]                    \n\t"
811cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp2],   %[ftmp7],   %[ftmp4]                    \n\t"
812cabdff1aSopenharmony_ci        "paddw      %[ftmp9],   %[ftmp1],   %[ftmp2]                    \n\t"
813cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp1],   %[ftmp6],   %[ftmp3]                    \n\t"
814cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp2],   %[ftmp8],   %[ftmp4]                    \n\t"
815cabdff1aSopenharmony_ci        "paddw      %[ftmp10],  %[ftmp1],   %[ftmp2]                    \n\t"
816cabdff1aSopenharmony_ci        "paddw      %[ftmp9],   %[ftmp9],   %[ff_pw_64]                 \n\t"
817cabdff1aSopenharmony_ci        "paddw      %[ftmp10],  %[ftmp10],  %[ff_pw_64]                 \n\t"
818cabdff1aSopenharmony_ci        "psraw      %[ftmp9],   %[ftmp9],   %[ftmp0]                    \n\t"
819cabdff1aSopenharmony_ci        "psraw      %[ftmp10],  %[ftmp10],  %[ftmp0]                    \n\t"
820cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp1],   %[ftmp9],   %[ftmp10]                   \n\t"
821cabdff1aSopenharmony_ci        "punpckhhw  %[ftmp2],   %[ftmp9],   %[ftmp10]                   \n\t"
822cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp14],  %[ftmp1],   %[ftmp2]                    \n\t"
823cabdff1aSopenharmony_ci
824cabdff1aSopenharmony_ci        MMI_LWC1(%[ftmp1], %[dest], 0x04)
825cabdff1aSopenharmony_ci        PTR_ADDU    "%[tmp0],   %[dest],    %[linesize]                 \n\t"
826cabdff1aSopenharmony_ci        MMI_LWC1(%[ftmp2], %[tmp0], 0x04)
827cabdff1aSopenharmony_ci        PTR_ADDU    "%[tmp0],   %[tmp0],    %[linesize]                 \n\t"
828cabdff1aSopenharmony_ci        MMI_LWC1(%[ftmp3], %[tmp0], 0x04)
829cabdff1aSopenharmony_ci        PTR_ADDU    "%[tmp0],   %[tmp0],    %[linesize]                 \n\t"
830cabdff1aSopenharmony_ci        MMI_LWC1(%[ftmp4], %[tmp0], 0x04)
831cabdff1aSopenharmony_ci        "pxor       %[ftmp0],   %[ftmp0],   %[ftmp0]                    \n\t"
832cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp1],   %[ftmp1],   %[ftmp0]                    \n\t"
833cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp2],   %[ftmp0]                    \n\t"
834cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp3],   %[ftmp3],   %[ftmp0]                    \n\t"
835cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp4],   %[ftmp4],   %[ftmp0]                    \n\t"
836cabdff1aSopenharmony_ci        "paddh      %[ftmp1],   %[ftmp1],   %[ftmp11]                   \n\t"
837cabdff1aSopenharmony_ci        "paddh      %[ftmp2],   %[ftmp2],   %[ftmp12]                   \n\t"
838cabdff1aSopenharmony_ci        "paddh      %[ftmp3],   %[ftmp3],   %[ftmp13]                   \n\t"
839cabdff1aSopenharmony_ci        "paddh      %[ftmp4],   %[ftmp4],   %[ftmp14]                   \n\t"
840cabdff1aSopenharmony_ci        "packushb   %[ftmp1],   %[ftmp1],   %[ftmp0]                    \n\t"
841cabdff1aSopenharmony_ci        "packushb   %[ftmp2],   %[ftmp2],   %[ftmp0]                    \n\t"
842cabdff1aSopenharmony_ci        "packushb   %[ftmp3],   %[ftmp3],   %[ftmp0]                    \n\t"
843cabdff1aSopenharmony_ci        "packushb   %[ftmp4],   %[ftmp4],   %[ftmp0]                    \n\t"
844cabdff1aSopenharmony_ci        MMI_SWC1(%[ftmp1], %[dest], 0x04)
845cabdff1aSopenharmony_ci        PTR_ADDU   "%[tmp0],    %[dest],    %[linesize]                 \n\t"
846cabdff1aSopenharmony_ci        MMI_SWC1(%[ftmp2], %[tmp0], 0x04)
847cabdff1aSopenharmony_ci        PTR_ADDU   "%[tmp0],    %[tmp0],    %[linesize]                 \n\t"
848cabdff1aSopenharmony_ci        MMI_SWC1(%[ftmp3], %[tmp0], 0x04)
849cabdff1aSopenharmony_ci        PTR_ADDU   "%[tmp0],    %[tmp0],    %[linesize]                 \n\t"
850cabdff1aSopenharmony_ci        MMI_SWC1(%[ftmp4], %[tmp0], 0x04)
851cabdff1aSopenharmony_ci
852cabdff1aSopenharmony_ci        : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
853cabdff1aSopenharmony_ci          [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
854cabdff1aSopenharmony_ci          [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
855cabdff1aSopenharmony_ci          [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
856cabdff1aSopenharmony_ci          [ftmp8]"=&f"(ftmp[8]),        [ftmp9]"=&f"(ftmp[9]),
857cabdff1aSopenharmony_ci          [ftmp10]"=&f"(ftmp[10]),      [ftmp11]"=&f"(ftmp[11]),
858cabdff1aSopenharmony_ci          [ftmp12]"=&f"(ftmp[12]),      [ftmp13]"=&f"(ftmp[13]),
859cabdff1aSopenharmony_ci          [ftmp14]"=&f"(ftmp[14]),      [ftmp15]"=&f"(ftmp[15]),
860cabdff1aSopenharmony_ci          [tmp0]"=&r"(tmp[0])
861cabdff1aSopenharmony_ci        : [ff_pw_64]"f"(ff_pw_32_64.f),
862cabdff1aSopenharmony_ci          [src]"r"(src), [dest]"r"(dest), [linesize]"r"(linesize)
863cabdff1aSopenharmony_ci        :"memory"
864cabdff1aSopenharmony_ci    );
865cabdff1aSopenharmony_ci}
866cabdff1aSopenharmony_ci#endif
867cabdff1aSopenharmony_ci
868cabdff1aSopenharmony_ci/* Do inverse transform on 4x8 parts of block */
869cabdff1aSopenharmony_civoid ff_vc1_inv_trans_4x8_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
870cabdff1aSopenharmony_ci{
871cabdff1aSopenharmony_ci    int dc = block[0];
872cabdff1aSopenharmony_ci    double ftmp[9];
873cabdff1aSopenharmony_ci    union mmi_intfloat64 dc_u;
874cabdff1aSopenharmony_ci    DECLARE_VAR_LOW32;
875cabdff1aSopenharmony_ci
876cabdff1aSopenharmony_ci    dc = (17 * dc +  4) >> 3;
877cabdff1aSopenharmony_ci    dc = (12 * dc + 64) >> 7;
878cabdff1aSopenharmony_ci    dc_u.i = dc;
879cabdff1aSopenharmony_ci
880cabdff1aSopenharmony_ci    __asm__ volatile(
881cabdff1aSopenharmony_ci        "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
882cabdff1aSopenharmony_ci        "pshufh     %[dc],      %[dc],          %[ftmp0]                \n\t"
883cabdff1aSopenharmony_ci
884cabdff1aSopenharmony_ci        MMI_LWC1(%[ftmp1], %[dest0], 0x00)
885cabdff1aSopenharmony_ci        MMI_LWC1(%[ftmp2], %[dest1], 0x00)
886cabdff1aSopenharmony_ci        MMI_LWC1(%[ftmp3], %[dest2], 0x00)
887cabdff1aSopenharmony_ci        MMI_LWC1(%[ftmp4], %[dest3], 0x00)
888cabdff1aSopenharmony_ci        MMI_LWC1(%[ftmp5], %[dest4], 0x00)
889cabdff1aSopenharmony_ci        MMI_LWC1(%[ftmp6], %[dest5], 0x00)
890cabdff1aSopenharmony_ci        MMI_LWC1(%[ftmp7], %[dest6], 0x00)
891cabdff1aSopenharmony_ci        MMI_LWC1(%[ftmp8], %[dest7], 0x00)
892cabdff1aSopenharmony_ci
893cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
894cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t"
895cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
896cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t"
897cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp5],   %[ftmp5],       %[ftmp0]                \n\t"
898cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
899cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp7],   %[ftmp7],       %[ftmp0]                \n\t"
900cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp8],   %[ftmp8],       %[ftmp0]                \n\t"
901cabdff1aSopenharmony_ci
902cabdff1aSopenharmony_ci        "paddsh     %[ftmp1],   %[ftmp1],       %[dc]                   \n\t"
903cabdff1aSopenharmony_ci        "paddsh     %[ftmp2],   %[ftmp2],       %[dc]                   \n\t"
904cabdff1aSopenharmony_ci        "paddsh     %[ftmp3],   %[ftmp3],       %[dc]                   \n\t"
905cabdff1aSopenharmony_ci        "paddsh     %[ftmp4],   %[ftmp4],       %[dc]                   \n\t"
906cabdff1aSopenharmony_ci        "paddsh     %[ftmp5],   %[ftmp5],       %[dc]                   \n\t"
907cabdff1aSopenharmony_ci        "paddsh     %[ftmp6],   %[ftmp6],       %[dc]                   \n\t"
908cabdff1aSopenharmony_ci        "paddsh     %[ftmp7],   %[ftmp7],       %[dc]                   \n\t"
909cabdff1aSopenharmony_ci        "paddsh     %[ftmp8],   %[ftmp8],       %[dc]                   \n\t"
910cabdff1aSopenharmony_ci
911cabdff1aSopenharmony_ci        "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
912cabdff1aSopenharmony_ci        "packushb   %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t"
913cabdff1aSopenharmony_ci        "packushb   %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
914cabdff1aSopenharmony_ci        "packushb   %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t"
915cabdff1aSopenharmony_ci        "packushb   %[ftmp5],   %[ftmp5],       %[ftmp0]                \n\t"
916cabdff1aSopenharmony_ci        "packushb   %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
917cabdff1aSopenharmony_ci        "packushb   %[ftmp7],   %[ftmp7],       %[ftmp0]                \n\t"
918cabdff1aSopenharmony_ci        "packushb   %[ftmp8],   %[ftmp8],       %[ftmp0]                \n\t"
919cabdff1aSopenharmony_ci
920cabdff1aSopenharmony_ci        MMI_SWC1(%[ftmp1], %[dest0], 0x00)
921cabdff1aSopenharmony_ci        MMI_SWC1(%[ftmp2], %[dest1], 0x00)
922cabdff1aSopenharmony_ci        MMI_SWC1(%[ftmp3], %[dest2], 0x00)
923cabdff1aSopenharmony_ci        MMI_SWC1(%[ftmp4], %[dest3], 0x00)
924cabdff1aSopenharmony_ci        MMI_SWC1(%[ftmp5], %[dest4], 0x00)
925cabdff1aSopenharmony_ci        MMI_SWC1(%[ftmp6], %[dest5], 0x00)
926cabdff1aSopenharmony_ci        MMI_SWC1(%[ftmp7], %[dest6], 0x00)
927cabdff1aSopenharmony_ci        MMI_SWC1(%[ftmp8], %[dest7], 0x00)
928cabdff1aSopenharmony_ci        : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
929cabdff1aSopenharmony_ci          [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
930cabdff1aSopenharmony_ci          [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
931cabdff1aSopenharmony_ci          [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
932cabdff1aSopenharmony_ci          RESTRICT_ASM_LOW32
933cabdff1aSopenharmony_ci          [ftmp8]"=&f"(ftmp[8])
934cabdff1aSopenharmony_ci        : [dest0]"r"(dest+0*linesize),  [dest1]"r"(dest+1*linesize),
935cabdff1aSopenharmony_ci          [dest2]"r"(dest+2*linesize),  [dest3]"r"(dest+3*linesize),
936cabdff1aSopenharmony_ci          [dest4]"r"(dest+4*linesize),  [dest5]"r"(dest+5*linesize),
937cabdff1aSopenharmony_ci          [dest6]"r"(dest+6*linesize),  [dest7]"r"(dest+7*linesize),
938cabdff1aSopenharmony_ci          [dc]"f"(dc_u.f)
939cabdff1aSopenharmony_ci        : "memory"
940cabdff1aSopenharmony_ci    );
941cabdff1aSopenharmony_ci}
942cabdff1aSopenharmony_ci
943cabdff1aSopenharmony_ci#if _MIPS_SIM != _ABIO32
944cabdff1aSopenharmony_civoid ff_vc1_inv_trans_4x8_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
945cabdff1aSopenharmony_ci{
946cabdff1aSopenharmony_ci    int16_t *src = block;
947cabdff1aSopenharmony_ci    int16_t *dst = block;
948cabdff1aSopenharmony_ci    double ftmp[23];
949cabdff1aSopenharmony_ci    uint64_t count = 8, tmp[1];
950cabdff1aSopenharmony_ci    int16_t coeff[16] = {17, 22, 17, 10,
951cabdff1aSopenharmony_ci                         17, 10,-17,-22,
952cabdff1aSopenharmony_ci                         17,-10,-17, 22,
953cabdff1aSopenharmony_ci                         17,-22, 17,-10};
954cabdff1aSopenharmony_ci
955cabdff1aSopenharmony_ci    // 1st loop
956cabdff1aSopenharmony_ci    __asm__ volatile (
957cabdff1aSopenharmony_ci
958cabdff1aSopenharmony_ci        "li         %[tmp0],    0x03                                    \n\t"
959cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp0]                                \n\t"
960cabdff1aSopenharmony_ci
961cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp2], %[coeff], 0x00)
962cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp3], %[coeff], 0x08)
963cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp4], %[coeff], 0x10)
964cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp5], %[coeff], 0x18)
965cabdff1aSopenharmony_ci        "1:                                                             \n\t"
966cabdff1aSopenharmony_ci        /* ftmp8: dst3,dst2,dst1,dst0 */
967cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp1], %[src], 0x00)
968cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp6],   %[ftmp2],   %[ftmp1]                    \n\t"
969cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp7],   %[ftmp3],   %[ftmp1]                    \n\t"
970cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp8],   %[ftmp4],   %[ftmp1]                    \n\t"
971cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp9],   %[ftmp5],   %[ftmp1]                    \n\t"
972cabdff1aSopenharmony_ci        "punpcklwd  %[ftmp10],  %[ftmp6],   %[ftmp7]                    \n\t"
973cabdff1aSopenharmony_ci        "punpckhwd  %[ftmp11],  %[ftmp6],   %[ftmp7]                    \n\t"
974cabdff1aSopenharmony_ci        "punpcklwd  %[ftmp6],   %[ftmp8],   %[ftmp9]                    \n\t"
975cabdff1aSopenharmony_ci        "punpckhwd  %[ftmp7],   %[ftmp8],   %[ftmp9]                    \n\t"
976cabdff1aSopenharmony_ci        "paddw      %[ftmp8],   %[ftmp10],  %[ftmp11]                   \n\t"
977cabdff1aSopenharmony_ci        "paddw      %[ftmp9],   %[ftmp6],   %[ftmp7]                    \n\t"
978cabdff1aSopenharmony_ci        "paddw      %[ftmp8],   %[ftmp8],   %[ff_pw_4]                  \n\t"
979cabdff1aSopenharmony_ci        "paddw      %[ftmp9],   %[ftmp9],   %[ff_pw_4]                  \n\t"
980cabdff1aSopenharmony_ci        "psraw      %[ftmp8],   %[ftmp8],   %[ftmp0]                    \n\t"
981cabdff1aSopenharmony_ci        "psraw      %[ftmp9],   %[ftmp9],   %[ftmp0]                    \n\t"
982cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp6],   %[ftmp8],   %[ftmp9]                    \n\t"
983cabdff1aSopenharmony_ci        "punpckhhw  %[ftmp7],   %[ftmp8],   %[ftmp9]                    \n\t"
984cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp8],   %[ftmp6],   %[ftmp7]                    \n\t"
985cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp8], %[dst], 0x00)
986cabdff1aSopenharmony_ci
987cabdff1aSopenharmony_ci        PTR_ADDIU  "%[src],     %[src],     0x10                        \n\t"
988cabdff1aSopenharmony_ci        PTR_ADDIU  "%[dst],     %[dst],     0x10                        \n\t"
989cabdff1aSopenharmony_ci        "addiu      %[count],   %[count],   -0x01                       \n\t"
990cabdff1aSopenharmony_ci        "bnez       %[count],   1b                                      \n\t"
991cabdff1aSopenharmony_ci        : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
992cabdff1aSopenharmony_ci          [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
993cabdff1aSopenharmony_ci          [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
994cabdff1aSopenharmony_ci          [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
995cabdff1aSopenharmony_ci          [ftmp8]"=&f"(ftmp[8]),        [ftmp9]"=&f"(ftmp[9]),
996cabdff1aSopenharmony_ci          [ftmp10]"=&f"(ftmp[10]),      [ftmp11]"=&f"(ftmp[11]),
997cabdff1aSopenharmony_ci          [tmp0]"=&r"(tmp[0]),          [count]"+&r"(count),
998cabdff1aSopenharmony_ci          [src]"+&r"(src),              [dst]"+&r"(dst)
999cabdff1aSopenharmony_ci        : [ff_pw_4]"f"(ff_pw_32_4.f),   [coeff]"r"(coeff)
1000cabdff1aSopenharmony_ci        : "memory"
1001cabdff1aSopenharmony_ci    );
1002cabdff1aSopenharmony_ci
1003cabdff1aSopenharmony_ci    src = block;
1004cabdff1aSopenharmony_ci
1005cabdff1aSopenharmony_ci    // 2nd loop
1006cabdff1aSopenharmony_ci    __asm__ volatile (
1007cabdff1aSopenharmony_ci        "li         %[tmp0],    0x07                                    \n\t"
1008cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp0]                                \n\t"
1009cabdff1aSopenharmony_ci
1010cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp1], %[src], 0x00)
1011cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp2], %[src], 0x20)
1012cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp3], %[src], 0x40)
1013cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp4], %[src], 0x60)
1014cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp5],   %[ftmp1],   %[ftmp2]                    \n\t"
1015cabdff1aSopenharmony_ci        "punpckhhw  %[ftmp6],   %[ftmp1],   %[ftmp2]                    \n\t"
1016cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp7],   %[ftmp3],   %[ftmp4]                    \n\t"
1017cabdff1aSopenharmony_ci        "punpckhhw  %[ftmp8],   %[ftmp3],   %[ftmp4]                    \n\t"
1018cabdff1aSopenharmony_ci
1019cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp1], %[src], 0x10)
1020cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp2], %[src], 0x30)
1021cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp3], %[src], 0x50)
1022cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp4], %[src], 0x70)
1023cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp9],   %[ftmp1],   %[ftmp2]                    \n\t"
1024cabdff1aSopenharmony_ci        "punpckhhw  %[ftmp10],  %[ftmp1],   %[ftmp2]                    \n\t"
1025cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp11],  %[ftmp3],   %[ftmp4]                    \n\t"
1026cabdff1aSopenharmony_ci        "punpckhhw  %[ftmp12],  %[ftmp3],   %[ftmp4]                    \n\t"
1027cabdff1aSopenharmony_ci
1028cabdff1aSopenharmony_ci        /* ftmp15:dst03,dst02,dst01,dst00 ftmp22:dst73,dst72,dst71,dst70 */
1029cabdff1aSopenharmony_ci        VC1_INV_TRANCS_8_TYPE2(%[ftmp15], %[ftmp22], 0x0010000c, 0x0006000c,
1030cabdff1aSopenharmony_ci                               0x000f0010, 0x00040009, %[ff_pw_64], %[ff_pw_1])
1031cabdff1aSopenharmony_ci
1032cabdff1aSopenharmony_ci        /* ftmp16:dst13,dst12,dst11,dst10 ftmp21:dst63,dst62,dst61,dst60 */
1033cabdff1aSopenharmony_ci        VC1_INV_TRANCS_8_TYPE2(%[ftmp16], %[ftmp21], 0x0006000c, 0xfff0fff4,
1034cabdff1aSopenharmony_ci                               0xfffc000f, 0xfff7fff0, %[ff_pw_64], %[ff_pw_1])
1035cabdff1aSopenharmony_ci
1036cabdff1aSopenharmony_ci        /* ftmp17:dst23,dst22,dst21,dst20 ftmp20:dst53,dst52,dst51,dst50 */
1037cabdff1aSopenharmony_ci        VC1_INV_TRANCS_8_TYPE2(%[ftmp17], %[ftmp20], 0xfffa000c, 0x0010fff4,
1038cabdff1aSopenharmony_ci                               0xfff00009, 0x000f0004, %[ff_pw_64], %[ff_pw_1])
1039cabdff1aSopenharmony_ci
1040cabdff1aSopenharmony_ci        /* ftmp18:dst33,dst32,dst31,dst30 ftmp19:dst43,dst42,dst41,dst40 */
1041cabdff1aSopenharmony_ci        VC1_INV_TRANCS_8_TYPE2(%[ftmp18], %[ftmp19], 0xfff0000c, 0xfffa000c,
1042cabdff1aSopenharmony_ci                               0xfff70004, 0xfff0000f, %[ff_pw_64], %[ff_pw_1])
1043cabdff1aSopenharmony_ci
1044cabdff1aSopenharmony_ci        MMI_LWC1(%[ftmp1], %[dest], 0x00)
1045cabdff1aSopenharmony_ci        PTR_ADDU  "%[tmp0],   %[dest],    %[linesize]                 \n\t"
1046cabdff1aSopenharmony_ci        MMI_LWC1(%[ftmp2], %[tmp0], 0x00)
1047cabdff1aSopenharmony_ci        PTR_ADDU  "%[tmp0],   %[tmp0],    %[linesize]                 \n\t"
1048cabdff1aSopenharmony_ci        MMI_LWC1(%[ftmp3], %[tmp0], 0x00)
1049cabdff1aSopenharmony_ci        PTR_ADDU  "%[tmp0],   %[tmp0],    %[linesize]                 \n\t"
1050cabdff1aSopenharmony_ci        MMI_LWC1(%[ftmp4], %[tmp0], 0x00)
1051cabdff1aSopenharmony_ci        PTR_ADDU  "%[tmp0],   %[tmp0],    %[linesize]                 \n\t"
1052cabdff1aSopenharmony_ci        MMI_LWC1(%[ftmp5], %[tmp0], 0x00)
1053cabdff1aSopenharmony_ci        PTR_ADDU  "%[tmp0],   %[tmp0],    %[linesize]                 \n\t"
1054cabdff1aSopenharmony_ci        MMI_LWC1(%[ftmp6], %[tmp0], 0x00)
1055cabdff1aSopenharmony_ci        PTR_ADDU  "%[tmp0],   %[tmp0],    %[linesize]                 \n\t"
1056cabdff1aSopenharmony_ci        MMI_LWC1(%[ftmp7], %[tmp0], 0x00)
1057cabdff1aSopenharmony_ci        PTR_ADDU  "%[tmp0],   %[tmp0],    %[linesize]                 \n\t"
1058cabdff1aSopenharmony_ci        MMI_LWC1(%[ftmp8], %[tmp0], 0x00)
1059cabdff1aSopenharmony_ci        "pxor       %[ftmp0],   %[ftmp0],   %[ftmp0]                    \n\t"
1060cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp1],   %[ftmp1],   %[ftmp0]                    \n\t"
1061cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp2],   %[ftmp0]                    \n\t"
1062cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp3],   %[ftmp3],   %[ftmp0]                    \n\t"
1063cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp4],   %[ftmp4],   %[ftmp0]                    \n\t"
1064cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp5],   %[ftmp5],   %[ftmp0]                    \n\t"
1065cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp6],   %[ftmp6],   %[ftmp0]                    \n\t"
1066cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp7],   %[ftmp7],   %[ftmp0]                    \n\t"
1067cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp8],   %[ftmp8],   %[ftmp0]                    \n\t"
1068cabdff1aSopenharmony_ci
1069cabdff1aSopenharmony_ci        "paddh      %[ftmp1],   %[ftmp1],   %[ftmp15]                   \n\t"
1070cabdff1aSopenharmony_ci        "paddh      %[ftmp2],   %[ftmp2],   %[ftmp16]                   \n\t"
1071cabdff1aSopenharmony_ci        "paddh      %[ftmp3],   %[ftmp3],   %[ftmp17]                   \n\t"
1072cabdff1aSopenharmony_ci        "paddh      %[ftmp4],   %[ftmp4],   %[ftmp18]                   \n\t"
1073cabdff1aSopenharmony_ci        "paddh      %[ftmp5],   %[ftmp5],   %[ftmp19]                   \n\t"
1074cabdff1aSopenharmony_ci        "paddh      %[ftmp6],   %[ftmp6],   %[ftmp20]                   \n\t"
1075cabdff1aSopenharmony_ci        "paddh      %[ftmp7],   %[ftmp7],   %[ftmp21]                   \n\t"
1076cabdff1aSopenharmony_ci        "paddh      %[ftmp8],   %[ftmp8],   %[ftmp22]                   \n\t"
1077cabdff1aSopenharmony_ci
1078cabdff1aSopenharmony_ci        "packushb   %[ftmp1],   %[ftmp1],   %[ftmp0]                    \n\t"
1079cabdff1aSopenharmony_ci        "packushb   %[ftmp2],   %[ftmp2],   %[ftmp0]                    \n\t"
1080cabdff1aSopenharmony_ci        "packushb   %[ftmp3],   %[ftmp3],   %[ftmp0]                    \n\t"
1081cabdff1aSopenharmony_ci        "packushb   %[ftmp4],   %[ftmp4],   %[ftmp0]                    \n\t"
1082cabdff1aSopenharmony_ci        "packushb   %[ftmp5],   %[ftmp5],   %[ftmp0]                    \n\t"
1083cabdff1aSopenharmony_ci        "packushb   %[ftmp6],   %[ftmp6],   %[ftmp0]                    \n\t"
1084cabdff1aSopenharmony_ci        "packushb   %[ftmp7],   %[ftmp7],   %[ftmp0]                    \n\t"
1085cabdff1aSopenharmony_ci        "packushb   %[ftmp8],   %[ftmp8],   %[ftmp0]                    \n\t"
1086cabdff1aSopenharmony_ci
1087cabdff1aSopenharmony_ci        MMI_SWC1(%[ftmp1], %[dest], 0x00)
1088cabdff1aSopenharmony_ci        PTR_ADDU   "%[tmp0],    %[dest],    %[linesize]                 \n\t"
1089cabdff1aSopenharmony_ci        MMI_SWC1(%[ftmp2], %[tmp0], 0x00)
1090cabdff1aSopenharmony_ci        PTR_ADDU   "%[tmp0],    %[tmp0],    %[linesize]                 \n\t"
1091cabdff1aSopenharmony_ci        MMI_SWC1(%[ftmp3], %[tmp0], 0x00)
1092cabdff1aSopenharmony_ci        PTR_ADDU   "%[tmp0],    %[tmp0],    %[linesize]                 \n\t"
1093cabdff1aSopenharmony_ci        MMI_SWC1(%[ftmp4], %[tmp0], 0x00)
1094cabdff1aSopenharmony_ci        PTR_ADDU   "%[tmp0],    %[tmp0],    %[linesize]                 \n\t"
1095cabdff1aSopenharmony_ci        MMI_SWC1(%[ftmp5], %[tmp0], 0x00)
1096cabdff1aSopenharmony_ci        PTR_ADDU   "%[tmp0],    %[tmp0],    %[linesize]                 \n\t"
1097cabdff1aSopenharmony_ci        MMI_SWC1(%[ftmp6], %[tmp0], 0x00)
1098cabdff1aSopenharmony_ci        PTR_ADDU   "%[tmp0],    %[tmp0],    %[linesize]                 \n\t"
1099cabdff1aSopenharmony_ci        MMI_SWC1(%[ftmp7], %[tmp0], 0x00)
1100cabdff1aSopenharmony_ci        PTR_ADDU   "%[tmp0],    %[tmp0],    %[linesize]                 \n\t"
1101cabdff1aSopenharmony_ci        MMI_SWC1(%[ftmp8], %[tmp0], 0x00)
1102cabdff1aSopenharmony_ci
1103cabdff1aSopenharmony_ci        : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
1104cabdff1aSopenharmony_ci          [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
1105cabdff1aSopenharmony_ci          [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
1106cabdff1aSopenharmony_ci          [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
1107cabdff1aSopenharmony_ci          [ftmp8]"=&f"(ftmp[8]),        [ftmp9]"=&f"(ftmp[9]),
1108cabdff1aSopenharmony_ci          [ftmp10]"=&f"(ftmp[10]),      [ftmp11]"=&f"(ftmp[11]),
1109cabdff1aSopenharmony_ci          [ftmp12]"=&f"(ftmp[12]),      [ftmp13]"=&f"(ftmp[13]),
1110cabdff1aSopenharmony_ci          [ftmp14]"=&f"(ftmp[14]),      [ftmp15]"=&f"(ftmp[15]),
1111cabdff1aSopenharmony_ci          [ftmp16]"=&f"(ftmp[16]),      [ftmp17]"=&f"(ftmp[17]),
1112cabdff1aSopenharmony_ci          [ftmp18]"=&f"(ftmp[18]),      [ftmp19]"=&f"(ftmp[19]),
1113cabdff1aSopenharmony_ci          [ftmp20]"=&f"(ftmp[20]),      [ftmp21]"=&f"(ftmp[21]),
1114cabdff1aSopenharmony_ci          [ftmp22]"=&f"(ftmp[22]),
1115cabdff1aSopenharmony_ci          [tmp0]"=&r"(tmp[0])
1116cabdff1aSopenharmony_ci        : [ff_pw_1]"f"(ff_pw_32_1.f),   [ff_pw_64]"f"(ff_pw_32_64.f),
1117cabdff1aSopenharmony_ci          [src]"r"(src), [dest]"r"(dest), [linesize]"r"(linesize)
1118cabdff1aSopenharmony_ci        : "memory"
1119cabdff1aSopenharmony_ci    );
1120cabdff1aSopenharmony_ci}
1121cabdff1aSopenharmony_ci#endif
1122cabdff1aSopenharmony_ci
1123cabdff1aSopenharmony_ci/* Do inverse transform on 4x4 part of block */
1124cabdff1aSopenharmony_civoid ff_vc1_inv_trans_4x4_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
1125cabdff1aSopenharmony_ci{
1126cabdff1aSopenharmony_ci    int dc = block[0];
1127cabdff1aSopenharmony_ci    double ftmp[5];
1128cabdff1aSopenharmony_ci    union mmi_intfloat64 dc_u;
1129cabdff1aSopenharmony_ci    DECLARE_VAR_LOW32;
1130cabdff1aSopenharmony_ci
1131cabdff1aSopenharmony_ci    dc = (17 * dc +  4) >> 3;
1132cabdff1aSopenharmony_ci    dc = (17 * dc + 64) >> 7;
1133cabdff1aSopenharmony_ci    dc_u.i = dc;
1134cabdff1aSopenharmony_ci
1135cabdff1aSopenharmony_ci    __asm__ volatile(
1136cabdff1aSopenharmony_ci        "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
1137cabdff1aSopenharmony_ci        "pshufh     %[dc],      %[dc],          %[ftmp0]                \n\t"
1138cabdff1aSopenharmony_ci
1139cabdff1aSopenharmony_ci        MMI_LWC1(%[ftmp1], %[dest0], 0x00)
1140cabdff1aSopenharmony_ci        MMI_LWC1(%[ftmp2], %[dest1], 0x00)
1141cabdff1aSopenharmony_ci        MMI_LWC1(%[ftmp3], %[dest2], 0x00)
1142cabdff1aSopenharmony_ci        MMI_LWC1(%[ftmp4], %[dest3], 0x00)
1143cabdff1aSopenharmony_ci
1144cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
1145cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t"
1146cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
1147cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t"
1148cabdff1aSopenharmony_ci
1149cabdff1aSopenharmony_ci        "paddsh     %[ftmp1],   %[ftmp1],       %[dc]                   \n\t"
1150cabdff1aSopenharmony_ci        "paddsh     %[ftmp2],   %[ftmp2],       %[dc]                   \n\t"
1151cabdff1aSopenharmony_ci        "paddsh     %[ftmp3],   %[ftmp3],       %[dc]                   \n\t"
1152cabdff1aSopenharmony_ci        "paddsh     %[ftmp4],   %[ftmp4],       %[dc]                   \n\t"
1153cabdff1aSopenharmony_ci
1154cabdff1aSopenharmony_ci        "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
1155cabdff1aSopenharmony_ci        "packushb   %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t"
1156cabdff1aSopenharmony_ci        "packushb   %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
1157cabdff1aSopenharmony_ci        "packushb   %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t"
1158cabdff1aSopenharmony_ci
1159cabdff1aSopenharmony_ci        MMI_SWC1(%[ftmp1], %[dest0], 0x00)
1160cabdff1aSopenharmony_ci        MMI_SWC1(%[ftmp2], %[dest1], 0x00)
1161cabdff1aSopenharmony_ci        MMI_SWC1(%[ftmp3], %[dest2], 0x00)
1162cabdff1aSopenharmony_ci        MMI_SWC1(%[ftmp4], %[dest3], 0x00)
1163cabdff1aSopenharmony_ci        : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
1164cabdff1aSopenharmony_ci          [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
1165cabdff1aSopenharmony_ci          RESTRICT_ASM_LOW32
1166cabdff1aSopenharmony_ci          [ftmp4]"=&f"(ftmp[4])
1167cabdff1aSopenharmony_ci        : [dest0]"r"(dest+0*linesize),  [dest1]"r"(dest+1*linesize),
1168cabdff1aSopenharmony_ci          [dest2]"r"(dest+2*linesize),  [dest3]"r"(dest+3*linesize),
1169cabdff1aSopenharmony_ci          [dc]"f"(dc_u.f)
1170cabdff1aSopenharmony_ci        : "memory"
1171cabdff1aSopenharmony_ci    );
1172cabdff1aSopenharmony_ci}
1173cabdff1aSopenharmony_ci
1174cabdff1aSopenharmony_civoid ff_vc1_inv_trans_4x4_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
1175cabdff1aSopenharmony_ci{
1176cabdff1aSopenharmony_ci    int16_t *src = block;
1177cabdff1aSopenharmony_ci    int16_t *dst = block;
1178cabdff1aSopenharmony_ci    double ftmp[16];
1179cabdff1aSopenharmony_ci    uint32_t count = 4, tmp[1];
1180cabdff1aSopenharmony_ci    int16_t coeff[16] = {17, 22, 17, 10,
1181cabdff1aSopenharmony_ci                         17, 10,-17,-22,
1182cabdff1aSopenharmony_ci                         17,-10,-17, 22,
1183cabdff1aSopenharmony_ci                         17,-22, 17,-10};
1184cabdff1aSopenharmony_ci    // 1st loop
1185cabdff1aSopenharmony_ci    __asm__ volatile (
1186cabdff1aSopenharmony_ci
1187cabdff1aSopenharmony_ci        "li         %[tmp0],    0x03                                    \n\t"
1188cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp0]                                \n\t"
1189cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp2], %[coeff], 0x00)
1190cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp3], %[coeff], 0x08)
1191cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp4], %[coeff], 0x10)
1192cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp5], %[coeff], 0x18)
1193cabdff1aSopenharmony_ci        "1:                                                             \n\t"
1194cabdff1aSopenharmony_ci        /* ftmp8: dst3,dst2,dst1,dst0 */
1195cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp1], %[src], 0x00)
1196cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp6],   %[ftmp2],   %[ftmp1]                    \n\t"
1197cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp7],   %[ftmp3],   %[ftmp1]                    \n\t"
1198cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp8],   %[ftmp4],   %[ftmp1]                    \n\t"
1199cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp9],   %[ftmp5],   %[ftmp1]                    \n\t"
1200cabdff1aSopenharmony_ci        "punpcklwd  %[ftmp10],  %[ftmp6],   %[ftmp7]                    \n\t"
1201cabdff1aSopenharmony_ci        "punpckhwd  %[ftmp11],  %[ftmp6],   %[ftmp7]                    \n\t"
1202cabdff1aSopenharmony_ci        "punpcklwd  %[ftmp6],   %[ftmp8],   %[ftmp9]                    \n\t"
1203cabdff1aSopenharmony_ci        "punpckhwd  %[ftmp7],   %[ftmp8],   %[ftmp9]                    \n\t"
1204cabdff1aSopenharmony_ci        "paddw      %[ftmp8],   %[ftmp10],  %[ftmp11]                   \n\t"
1205cabdff1aSopenharmony_ci        "paddw      %[ftmp9],   %[ftmp6],   %[ftmp7]                    \n\t"
1206cabdff1aSopenharmony_ci        "paddw      %[ftmp8],   %[ftmp8],   %[ff_pw_4]                  \n\t"
1207cabdff1aSopenharmony_ci        "paddw      %[ftmp9],   %[ftmp9],   %[ff_pw_4]                  \n\t"
1208cabdff1aSopenharmony_ci        "psraw      %[ftmp8],   %[ftmp8],   %[ftmp0]                    \n\t"
1209cabdff1aSopenharmony_ci        "psraw      %[ftmp9],   %[ftmp9],   %[ftmp0]                    \n\t"
1210cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp6],   %[ftmp8],   %[ftmp9]                    \n\t"
1211cabdff1aSopenharmony_ci        "punpckhhw  %[ftmp7],   %[ftmp8],   %[ftmp9]                    \n\t"
1212cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp8],   %[ftmp6],   %[ftmp7]                    \n\t"
1213cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp8], %[dst], 0x00)
1214cabdff1aSopenharmony_ci
1215cabdff1aSopenharmony_ci        PTR_ADDIU  "%[src],     %[src],     0x10                        \n\t"
1216cabdff1aSopenharmony_ci        PTR_ADDIU  "%[dst],     %[dst],     0x10                        \n\t"
1217cabdff1aSopenharmony_ci        "addiu      %[count],   %[count],   -0x01                       \n\t"
1218cabdff1aSopenharmony_ci        "bnez       %[count],   1b                                      \n\t"
1219cabdff1aSopenharmony_ci        : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
1220cabdff1aSopenharmony_ci          [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
1221cabdff1aSopenharmony_ci          [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
1222cabdff1aSopenharmony_ci          [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
1223cabdff1aSopenharmony_ci          [ftmp8]"=&f"(ftmp[8]),        [ftmp9]"=&f"(ftmp[9]),
1224cabdff1aSopenharmony_ci          [ftmp10]"=&f"(ftmp[10]),      [ftmp11]"=&f"(ftmp[11]),
1225cabdff1aSopenharmony_ci          [tmp0]"=&r"(tmp[0]),          [count]"+&r"(count),
1226cabdff1aSopenharmony_ci          [src]"+&r"(src),              [dst]"+&r"(dst)
1227cabdff1aSopenharmony_ci        : [ff_pw_4]"f"(ff_pw_32_4.f),   [coeff]"r"(coeff)
1228cabdff1aSopenharmony_ci        : "memory"
1229cabdff1aSopenharmony_ci    );
1230cabdff1aSopenharmony_ci
1231cabdff1aSopenharmony_ci    src = block;
1232cabdff1aSopenharmony_ci
1233cabdff1aSopenharmony_ci    // 2nd loop
1234cabdff1aSopenharmony_ci    __asm__ volatile (
1235cabdff1aSopenharmony_ci        "li         %[tmp0],    0x07                                    \n\t"
1236cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp0]                                \n\t"
1237cabdff1aSopenharmony_ci        "li         %[tmp0],    0x44                                    \n\t"
1238cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp15]                               \n\t"
1239cabdff1aSopenharmony_ci
1240cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp1], %[src], 0x00)
1241cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp2], %[src], 0x10)
1242cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp3], %[src], 0x20)
1243cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp4], %[src], 0x30)
1244cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp5],   %[ftmp1],   %[ftmp2]                    \n\t"
1245cabdff1aSopenharmony_ci        "punpckhhw  %[ftmp6],   %[ftmp1],   %[ftmp2]                    \n\t"
1246cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp7],   %[ftmp3],   %[ftmp4]                    \n\t"
1247cabdff1aSopenharmony_ci        "punpckhhw  %[ftmp8],   %[ftmp3],   %[ftmp4]                    \n\t"
1248cabdff1aSopenharmony_ci
1249cabdff1aSopenharmony_ci        /* ftmp11: dst03,dst02,dst01,dst00 */
1250cabdff1aSopenharmony_ci        "li         %[tmp0],    0x00160011                              \n\t"
1251cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp3]                                \n\t"
1252cabdff1aSopenharmony_ci        "pshufh     %[ftmp3],   %[ftmp3],   %[ftmp15]                   \n\t"
1253cabdff1aSopenharmony_ci        "li         %[tmp0],    0x000a0011                              \n\t"
1254cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp4]                                \n\t"
1255cabdff1aSopenharmony_ci        "pshufh     %[ftmp4],   %[ftmp4],   %[ftmp15]                   \n\t"
1256cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp1],   %[ftmp5],   %[ftmp3]                    \n\t"
1257cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp2],   %[ftmp7],   %[ftmp4]                    \n\t"
1258cabdff1aSopenharmony_ci        "paddw      %[ftmp9],   %[ftmp1],   %[ftmp2]                    \n\t"
1259cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp1],   %[ftmp6],   %[ftmp3]                    \n\t"
1260cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp2],   %[ftmp8],   %[ftmp4]                    \n\t"
1261cabdff1aSopenharmony_ci        "paddw      %[ftmp10],  %[ftmp1],   %[ftmp2]                    \n\t"
1262cabdff1aSopenharmony_ci        "paddw      %[ftmp9],   %[ftmp9],   %[ff_pw_64]                 \n\t"
1263cabdff1aSopenharmony_ci        "paddw      %[ftmp10],  %[ftmp10],  %[ff_pw_64]                 \n\t"
1264cabdff1aSopenharmony_ci        "psraw      %[ftmp9],   %[ftmp9],   %[ftmp0]                    \n\t"
1265cabdff1aSopenharmony_ci        "psraw      %[ftmp10],  %[ftmp10],  %[ftmp0]                    \n\t"
1266cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp1],   %[ftmp9],   %[ftmp10]                   \n\t"
1267cabdff1aSopenharmony_ci        "punpckhhw  %[ftmp2],   %[ftmp9],   %[ftmp10]                   \n\t"
1268cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp11],  %[ftmp1],   %[ftmp2]                    \n\t"
1269cabdff1aSopenharmony_ci
1270cabdff1aSopenharmony_ci        /* ftmp12: dst13,dst12,dst11,dst10 */
1271cabdff1aSopenharmony_ci        "li         %[tmp0],    0x000a0011                              \n\t"
1272cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp3]                                \n\t"
1273cabdff1aSopenharmony_ci        "pshufh     %[ftmp3],   %[ftmp3],   %[ftmp15]                   \n\t"
1274cabdff1aSopenharmony_ci        "li         %[tmp0],    0xffeaffef                              \n\t"
1275cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp4]                                \n\t"
1276cabdff1aSopenharmony_ci        "pshufh     %[ftmp4],   %[ftmp4],   %[ftmp15]                   \n\t"
1277cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp1],   %[ftmp5],   %[ftmp3]                    \n\t"
1278cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp2],   %[ftmp7],   %[ftmp4]                    \n\t"
1279cabdff1aSopenharmony_ci        "paddw      %[ftmp9],   %[ftmp1],   %[ftmp2]                    \n\t"
1280cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp1],   %[ftmp6],   %[ftmp3]                    \n\t"
1281cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp2],   %[ftmp8],   %[ftmp4]                    \n\t"
1282cabdff1aSopenharmony_ci        "paddw      %[ftmp10],  %[ftmp1],   %[ftmp2]                    \n\t"
1283cabdff1aSopenharmony_ci        "paddw      %[ftmp9],   %[ftmp9],   %[ff_pw_64]                 \n\t"
1284cabdff1aSopenharmony_ci        "paddw      %[ftmp10],  %[ftmp10],  %[ff_pw_64]                 \n\t"
1285cabdff1aSopenharmony_ci        "psraw      %[ftmp9],   %[ftmp9],   %[ftmp0]                    \n\t"
1286cabdff1aSopenharmony_ci        "psraw      %[ftmp10],  %[ftmp10],  %[ftmp0]                    \n\t"
1287cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp1],   %[ftmp9],   %[ftmp10]                   \n\t"
1288cabdff1aSopenharmony_ci        "punpckhhw  %[ftmp2],   %[ftmp9],   %[ftmp10]                   \n\t"
1289cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp12],  %[ftmp1],   %[ftmp2]                    \n\t"
1290cabdff1aSopenharmony_ci
1291cabdff1aSopenharmony_ci        /* ftmp13: dst23,dst22,dst21,dst20 */
1292cabdff1aSopenharmony_ci        "li         %[tmp0],    0xfff60011                              \n\t"
1293cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp3]                                \n\t"
1294cabdff1aSopenharmony_ci        "pshufh     %[ftmp3],   %[ftmp3],   %[ftmp15]                   \n\t"
1295cabdff1aSopenharmony_ci        "li         %[tmp0],    0x0016ffef                              \n\t"
1296cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp4]                                \n\t"
1297cabdff1aSopenharmony_ci        "pshufh     %[ftmp4],   %[ftmp4],   %[ftmp15]                   \n\t"
1298cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp1],   %[ftmp5],   %[ftmp3]                    \n\t"
1299cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp2],   %[ftmp7],   %[ftmp4]                    \n\t"
1300cabdff1aSopenharmony_ci        "paddw      %[ftmp9],   %[ftmp1],   %[ftmp2]                    \n\t"
1301cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp1],   %[ftmp6],   %[ftmp3]                    \n\t"
1302cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp2],   %[ftmp8],   %[ftmp4]                    \n\t"
1303cabdff1aSopenharmony_ci        "paddw      %[ftmp10],  %[ftmp1],   %[ftmp2]                    \n\t"
1304cabdff1aSopenharmony_ci        "paddw      %[ftmp9],   %[ftmp9],   %[ff_pw_64]                 \n\t"
1305cabdff1aSopenharmony_ci        "paddw      %[ftmp10],  %[ftmp10],  %[ff_pw_64]                 \n\t"
1306cabdff1aSopenharmony_ci        "psraw      %[ftmp9],   %[ftmp9],   %[ftmp0]                    \n\t"
1307cabdff1aSopenharmony_ci        "psraw      %[ftmp10],  %[ftmp10],  %[ftmp0]                    \n\t"
1308cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp1],   %[ftmp9],   %[ftmp10]                   \n\t"
1309cabdff1aSopenharmony_ci        "punpckhhw  %[ftmp2],   %[ftmp9],   %[ftmp10]                   \n\t"
1310cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp13],  %[ftmp1],   %[ftmp2]                    \n\t"
1311cabdff1aSopenharmony_ci
1312cabdff1aSopenharmony_ci        /* ftmp14: dst33,dst32,dst31,dst30 */
1313cabdff1aSopenharmony_ci        "li         %[tmp0],    0xffea0011                              \n\t"
1314cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp3]                                \n\t"
1315cabdff1aSopenharmony_ci        "pshufh     %[ftmp3],   %[ftmp3],   %[ftmp15]                   \n\t"
1316cabdff1aSopenharmony_ci        "li         %[tmp0],    0xfff60011                              \n\t"
1317cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp4]                                \n\t"
1318cabdff1aSopenharmony_ci        "pshufh     %[ftmp4],   %[ftmp4],   %[ftmp15]                   \n\t"
1319cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp1],   %[ftmp5],   %[ftmp3]                    \n\t"
1320cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp2],   %[ftmp7],   %[ftmp4]                    \n\t"
1321cabdff1aSopenharmony_ci        "paddw      %[ftmp9],   %[ftmp1],   %[ftmp2]                    \n\t"
1322cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp1],   %[ftmp6],   %[ftmp3]                    \n\t"
1323cabdff1aSopenharmony_ci        "pmaddhw    %[ftmp2],   %[ftmp8],   %[ftmp4]                    \n\t"
1324cabdff1aSopenharmony_ci        "paddw      %[ftmp10],  %[ftmp1],   %[ftmp2]                    \n\t"
1325cabdff1aSopenharmony_ci        "paddw      %[ftmp9],   %[ftmp9],   %[ff_pw_64]                 \n\t"
1326cabdff1aSopenharmony_ci        "paddw      %[ftmp10],  %[ftmp10],  %[ff_pw_64]                 \n\t"
1327cabdff1aSopenharmony_ci        "psraw      %[ftmp9],   %[ftmp9],   %[ftmp0]                    \n\t"
1328cabdff1aSopenharmony_ci        "psraw      %[ftmp10],  %[ftmp10],  %[ftmp0]                    \n\t"
1329cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp1],   %[ftmp9],   %[ftmp10]                   \n\t"
1330cabdff1aSopenharmony_ci        "punpckhhw  %[ftmp2],   %[ftmp9],   %[ftmp10]                   \n\t"
1331cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp14],  %[ftmp1],   %[ftmp2]                    \n\t"
1332cabdff1aSopenharmony_ci
1333cabdff1aSopenharmony_ci        MMI_LWC1(%[ftmp1], %[dest], 0x00)
1334cabdff1aSopenharmony_ci        PTR_ADDU    "%[tmp0],   %[dest],    %[linesize]                 \n\t"
1335cabdff1aSopenharmony_ci        MMI_LWC1(%[ftmp2], %[tmp0], 0x00)
1336cabdff1aSopenharmony_ci        PTR_ADDU    "%[tmp0],   %[tmp0],    %[linesize]                 \n\t"
1337cabdff1aSopenharmony_ci        MMI_LWC1(%[ftmp3], %[tmp0], 0x00)
1338cabdff1aSopenharmony_ci        PTR_ADDU    "%[tmp0],   %[tmp0],    %[linesize]                 \n\t"
1339cabdff1aSopenharmony_ci        MMI_LWC1(%[ftmp4], %[tmp0], 0x00)
1340cabdff1aSopenharmony_ci        "pxor       %[ftmp0],   %[ftmp0],   %[ftmp0]                    \n\t"
1341cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp1],   %[ftmp1],   %[ftmp0]                    \n\t"
1342cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp2],   %[ftmp0]                    \n\t"
1343cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp3],   %[ftmp3],   %[ftmp0]                    \n\t"
1344cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp4],   %[ftmp4],   %[ftmp0]                    \n\t"
1345cabdff1aSopenharmony_ci        "paddh      %[ftmp1],   %[ftmp1],   %[ftmp11]                   \n\t"
1346cabdff1aSopenharmony_ci        "paddh      %[ftmp2],   %[ftmp2],   %[ftmp12]                   \n\t"
1347cabdff1aSopenharmony_ci        "paddh      %[ftmp3],   %[ftmp3],   %[ftmp13]                   \n\t"
1348cabdff1aSopenharmony_ci        "paddh      %[ftmp4],   %[ftmp4],   %[ftmp14]                   \n\t"
1349cabdff1aSopenharmony_ci        "packushb   %[ftmp1],   %[ftmp1],   %[ftmp0]                    \n\t"
1350cabdff1aSopenharmony_ci        "packushb   %[ftmp2],   %[ftmp2],   %[ftmp0]                    \n\t"
1351cabdff1aSopenharmony_ci        "packushb   %[ftmp3],   %[ftmp3],   %[ftmp0]                    \n\t"
1352cabdff1aSopenharmony_ci        "packushb   %[ftmp4],   %[ftmp4],   %[ftmp0]                    \n\t"
1353cabdff1aSopenharmony_ci
1354cabdff1aSopenharmony_ci        MMI_SWC1(%[ftmp1], %[dest], 0x00)
1355cabdff1aSopenharmony_ci        PTR_ADDU   "%[tmp0],    %[dest],    %[linesize]                 \n\t"
1356cabdff1aSopenharmony_ci        MMI_SWC1(%[ftmp2], %[tmp0], 0x00)
1357cabdff1aSopenharmony_ci        PTR_ADDU   "%[tmp0],    %[tmp0],    %[linesize]                 \n\t"
1358cabdff1aSopenharmony_ci        MMI_SWC1(%[ftmp3], %[tmp0], 0x00)
1359cabdff1aSopenharmony_ci        PTR_ADDU   "%[tmp0],    %[tmp0],    %[linesize]                 \n\t"
1360cabdff1aSopenharmony_ci        MMI_SWC1(%[ftmp4], %[tmp0], 0x00)
1361cabdff1aSopenharmony_ci
1362cabdff1aSopenharmony_ci        : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
1363cabdff1aSopenharmony_ci          [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
1364cabdff1aSopenharmony_ci          [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
1365cabdff1aSopenharmony_ci          [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
1366cabdff1aSopenharmony_ci          [ftmp8]"=&f"(ftmp[8]),        [ftmp9]"=&f"(ftmp[9]),
1367cabdff1aSopenharmony_ci          [ftmp10]"=&f"(ftmp[10]),      [ftmp11]"=&f"(ftmp[11]),
1368cabdff1aSopenharmony_ci          [ftmp12]"=&f"(ftmp[12]),      [ftmp13]"=&f"(ftmp[13]),
1369cabdff1aSopenharmony_ci          [ftmp14]"=&f"(ftmp[14]),      [ftmp15]"=&f"(ftmp[15]),
1370cabdff1aSopenharmony_ci          [tmp0]"=&r"(tmp[0])
1371cabdff1aSopenharmony_ci        : [ff_pw_64]"f"(ff_pw_32_64.f),
1372cabdff1aSopenharmony_ci          [src]"r"(src), [dest]"r"(dest), [linesize]"r"(linesize)
1373cabdff1aSopenharmony_ci        :"memory"
1374cabdff1aSopenharmony_ci    );
1375cabdff1aSopenharmony_ci}
1376cabdff1aSopenharmony_ci
1377cabdff1aSopenharmony_ci/* Apply overlap transform to horizontal edge */
1378cabdff1aSopenharmony_civoid ff_vc1_h_overlap_mmi(uint8_t *src, ptrdiff_t stride)
1379cabdff1aSopenharmony_ci{
1380cabdff1aSopenharmony_ci    int i;
1381cabdff1aSopenharmony_ci    int a, b, c, d;
1382cabdff1aSopenharmony_ci    int d1, d2;
1383cabdff1aSopenharmony_ci    int rnd = 1;
1384cabdff1aSopenharmony_ci    for (i = 0; i < 8; i++) {
1385cabdff1aSopenharmony_ci        a  = src[-2];
1386cabdff1aSopenharmony_ci        b  = src[-1];
1387cabdff1aSopenharmony_ci        c  = src[0];
1388cabdff1aSopenharmony_ci        d  = src[1];
1389cabdff1aSopenharmony_ci        d1 = (a - d + 3 + rnd) >> 3;
1390cabdff1aSopenharmony_ci        d2 = (a - d + b - c + 4 - rnd) >> 3;
1391cabdff1aSopenharmony_ci
1392cabdff1aSopenharmony_ci        src[-2] = a - d1;
1393cabdff1aSopenharmony_ci        src[-1] = av_clip_uint8(b - d2);
1394cabdff1aSopenharmony_ci        src[0]  = av_clip_uint8(c + d2);
1395cabdff1aSopenharmony_ci        src[1]  = d + d1;
1396cabdff1aSopenharmony_ci        src    += stride;
1397cabdff1aSopenharmony_ci        rnd     = !rnd;
1398cabdff1aSopenharmony_ci    }
1399cabdff1aSopenharmony_ci}
1400cabdff1aSopenharmony_ci
1401cabdff1aSopenharmony_civoid ff_vc1_h_s_overlap_mmi(int16_t *left, int16_t *right, ptrdiff_t left_stride, ptrdiff_t right_stride, int flags)
1402cabdff1aSopenharmony_ci{
1403cabdff1aSopenharmony_ci    int i;
1404cabdff1aSopenharmony_ci    int a, b, c, d;
1405cabdff1aSopenharmony_ci    int d1, d2;
1406cabdff1aSopenharmony_ci    int rnd1 = flags & 2 ? 3 : 4;
1407cabdff1aSopenharmony_ci    int rnd2 = 7 - rnd1;
1408cabdff1aSopenharmony_ci    for (i = 0; i < 8; i++) {
1409cabdff1aSopenharmony_ci        a  = left[6];
1410cabdff1aSopenharmony_ci        b  = left[7];
1411cabdff1aSopenharmony_ci        c  = right[0];
1412cabdff1aSopenharmony_ci        d  = right[1];
1413cabdff1aSopenharmony_ci        d1 = a - d;
1414cabdff1aSopenharmony_ci        d2 = a - d + b - c;
1415cabdff1aSopenharmony_ci
1416cabdff1aSopenharmony_ci        left[6]  = ((a << 3) - d1 + rnd1) >> 3;
1417cabdff1aSopenharmony_ci        left[7]  = ((b << 3) - d2 + rnd2) >> 3;
1418cabdff1aSopenharmony_ci        right[0] = ((c << 3) + d2 + rnd1) >> 3;
1419cabdff1aSopenharmony_ci        right[1] = ((d << 3) + d1 + rnd2) >> 3;
1420cabdff1aSopenharmony_ci
1421cabdff1aSopenharmony_ci        right += right_stride;
1422cabdff1aSopenharmony_ci        left  += left_stride;
1423cabdff1aSopenharmony_ci        if (flags & 1) {
1424cabdff1aSopenharmony_ci            rnd2   = 7 - rnd2;
1425cabdff1aSopenharmony_ci            rnd1   = 7 - rnd1;
1426cabdff1aSopenharmony_ci        }
1427cabdff1aSopenharmony_ci    }
1428cabdff1aSopenharmony_ci}
1429cabdff1aSopenharmony_ci
1430cabdff1aSopenharmony_ci/* Apply overlap transform to vertical edge */
1431cabdff1aSopenharmony_civoid ff_vc1_v_overlap_mmi(uint8_t *src, ptrdiff_t stride)
1432cabdff1aSopenharmony_ci{
1433cabdff1aSopenharmony_ci    int i;
1434cabdff1aSopenharmony_ci    int a, b, c, d;
1435cabdff1aSopenharmony_ci    int d1, d2;
1436cabdff1aSopenharmony_ci    int rnd = 1;
1437cabdff1aSopenharmony_ci    for (i = 0; i < 8; i++) {
1438cabdff1aSopenharmony_ci        a  = src[-2 * stride];
1439cabdff1aSopenharmony_ci        b  = src[-stride];
1440cabdff1aSopenharmony_ci        c  = src[0];
1441cabdff1aSopenharmony_ci        d  = src[stride];
1442cabdff1aSopenharmony_ci        d1 = (a - d + 3 + rnd) >> 3;
1443cabdff1aSopenharmony_ci        d2 = (a - d + b - c + 4 - rnd) >> 3;
1444cabdff1aSopenharmony_ci
1445cabdff1aSopenharmony_ci        src[-2 * stride] = a - d1;
1446cabdff1aSopenharmony_ci        src[-stride]     = av_clip_uint8(b - d2);
1447cabdff1aSopenharmony_ci        src[0]           = av_clip_uint8(c + d2);
1448cabdff1aSopenharmony_ci        src[stride]      = d + d1;
1449cabdff1aSopenharmony_ci        src++;
1450cabdff1aSopenharmony_ci        rnd = !rnd;
1451cabdff1aSopenharmony_ci    }
1452cabdff1aSopenharmony_ci}
1453cabdff1aSopenharmony_ci
1454cabdff1aSopenharmony_civoid ff_vc1_v_s_overlap_mmi(int16_t *top, int16_t *bottom)
1455cabdff1aSopenharmony_ci{
1456cabdff1aSopenharmony_ci    int i;
1457cabdff1aSopenharmony_ci    int a, b, c, d;
1458cabdff1aSopenharmony_ci    int d1, d2;
1459cabdff1aSopenharmony_ci    int rnd1 = 4, rnd2 = 3;
1460cabdff1aSopenharmony_ci    for (i = 0; i < 8; i++) {
1461cabdff1aSopenharmony_ci        a  = top[48];
1462cabdff1aSopenharmony_ci        b  = top[56];
1463cabdff1aSopenharmony_ci        c  = bottom[0];
1464cabdff1aSopenharmony_ci        d  = bottom[8];
1465cabdff1aSopenharmony_ci        d1 = a - d;
1466cabdff1aSopenharmony_ci        d2 = a - d + b - c;
1467cabdff1aSopenharmony_ci
1468cabdff1aSopenharmony_ci        top[48]   = ((a << 3) - d1 + rnd1) >> 3;
1469cabdff1aSopenharmony_ci        top[56]   = ((b << 3) - d2 + rnd2) >> 3;
1470cabdff1aSopenharmony_ci        bottom[0] = ((c << 3) + d2 + rnd1) >> 3;
1471cabdff1aSopenharmony_ci        bottom[8] = ((d << 3) + d1 + rnd2) >> 3;
1472cabdff1aSopenharmony_ci
1473cabdff1aSopenharmony_ci        bottom++;
1474cabdff1aSopenharmony_ci        top++;
1475cabdff1aSopenharmony_ci        rnd2 = 7 - rnd2;
1476cabdff1aSopenharmony_ci        rnd1 = 7 - rnd1;
1477cabdff1aSopenharmony_ci    }
1478cabdff1aSopenharmony_ci}
1479cabdff1aSopenharmony_ci
1480cabdff1aSopenharmony_ci/**
1481cabdff1aSopenharmony_ci * VC-1 in-loop deblocking filter for one line
1482cabdff1aSopenharmony_ci * @param src source block type
1483cabdff1aSopenharmony_ci * @param stride block stride
1484cabdff1aSopenharmony_ci * @param pq block quantizer
1485cabdff1aSopenharmony_ci * @return whether other 3 pairs should be filtered or not
1486cabdff1aSopenharmony_ci * @see 8.6
1487cabdff1aSopenharmony_ci */
1488cabdff1aSopenharmony_cistatic av_always_inline int vc1_filter_line(uint8_t *src, int stride, int pq)
1489cabdff1aSopenharmony_ci{
1490cabdff1aSopenharmony_ci    int a0 = (2 * (src[-2 * stride] - src[1 * stride]) -
1491cabdff1aSopenharmony_ci              5 * (src[-1 * stride] - src[0 * stride]) + 4) >> 3;
1492cabdff1aSopenharmony_ci    int a0_sign = a0 >> 31;        /* Store sign */
1493cabdff1aSopenharmony_ci
1494cabdff1aSopenharmony_ci    a0 = (a0 ^ a0_sign) - a0_sign; /* a0 = FFABS(a0); */
1495cabdff1aSopenharmony_ci    if (a0 < pq) {
1496cabdff1aSopenharmony_ci        int a1 = FFABS((2 * (src[-4 * stride] - src[-1 * stride]) -
1497cabdff1aSopenharmony_ci                        5 * (src[-3 * stride] - src[-2 * stride]) + 4) >> 3);
1498cabdff1aSopenharmony_ci        int a2 = FFABS((2 * (src[ 0 * stride] - src[ 3 * stride]) -
1499cabdff1aSopenharmony_ci                        5 * (src[ 1 * stride] - src[ 2 * stride]) + 4) >> 3);
1500cabdff1aSopenharmony_ci        if (a1 < a0 || a2 < a0) {
1501cabdff1aSopenharmony_ci            int clip      = src[-1 * stride] - src[0 * stride];
1502cabdff1aSopenharmony_ci            int clip_sign = clip >> 31;
1503cabdff1aSopenharmony_ci
1504cabdff1aSopenharmony_ci            clip = ((clip ^ clip_sign) - clip_sign) >> 1;
1505cabdff1aSopenharmony_ci            if (clip) {
1506cabdff1aSopenharmony_ci                int a3     = FFMIN(a1, a2);
1507cabdff1aSopenharmony_ci                int d      = 5 * (a3 - a0);
1508cabdff1aSopenharmony_ci                int d_sign = (d >> 31);
1509cabdff1aSopenharmony_ci
1510cabdff1aSopenharmony_ci                d       = ((d ^ d_sign) - d_sign) >> 3;
1511cabdff1aSopenharmony_ci                d_sign ^= a0_sign;
1512cabdff1aSopenharmony_ci
1513cabdff1aSopenharmony_ci                if (d_sign ^ clip_sign)
1514cabdff1aSopenharmony_ci                    d = 0;
1515cabdff1aSopenharmony_ci                else {
1516cabdff1aSopenharmony_ci                    d = FFMIN(d, clip);
1517cabdff1aSopenharmony_ci                    d = (d ^ d_sign) - d_sign; /* Restore sign */
1518cabdff1aSopenharmony_ci                    src[-1 * stride] = av_clip_uint8(src[-1 * stride] - d);
1519cabdff1aSopenharmony_ci                    src[ 0 * stride] = av_clip_uint8(src[ 0 * stride] + d);
1520cabdff1aSopenharmony_ci                }
1521cabdff1aSopenharmony_ci                return 1;
1522cabdff1aSopenharmony_ci            }
1523cabdff1aSopenharmony_ci        }
1524cabdff1aSopenharmony_ci    }
1525cabdff1aSopenharmony_ci    return 0;
1526cabdff1aSopenharmony_ci}
1527cabdff1aSopenharmony_ci
1528cabdff1aSopenharmony_ci/**
1529cabdff1aSopenharmony_ci * VC-1 in-loop deblocking filter
1530cabdff1aSopenharmony_ci * @param src source block type
1531cabdff1aSopenharmony_ci * @param step distance between horizontally adjacent elements
1532cabdff1aSopenharmony_ci * @param stride distance between vertically adjacent elements
1533cabdff1aSopenharmony_ci * @param len edge length to filter (4 or 8 pixels)
1534cabdff1aSopenharmony_ci * @param pq block quantizer
1535cabdff1aSopenharmony_ci * @see 8.6
1536cabdff1aSopenharmony_ci */
1537cabdff1aSopenharmony_cistatic inline void vc1_loop_filter(uint8_t *src, int step, int stride,
1538cabdff1aSopenharmony_ci                                   int len, int pq)
1539cabdff1aSopenharmony_ci{
1540cabdff1aSopenharmony_ci    int i;
1541cabdff1aSopenharmony_ci    int filt3;
1542cabdff1aSopenharmony_ci
1543cabdff1aSopenharmony_ci    for (i = 0; i < len; i += 4) {
1544cabdff1aSopenharmony_ci        filt3 = vc1_filter_line(src + 2 * step, stride, pq);
1545cabdff1aSopenharmony_ci        if (filt3) {
1546cabdff1aSopenharmony_ci            vc1_filter_line(src + 0 * step, stride, pq);
1547cabdff1aSopenharmony_ci            vc1_filter_line(src + 1 * step, stride, pq);
1548cabdff1aSopenharmony_ci            vc1_filter_line(src + 3 * step, stride, pq);
1549cabdff1aSopenharmony_ci        }
1550cabdff1aSopenharmony_ci        src += step * 4;
1551cabdff1aSopenharmony_ci    }
1552cabdff1aSopenharmony_ci}
1553cabdff1aSopenharmony_ci
1554cabdff1aSopenharmony_civoid ff_vc1_v_loop_filter4_mmi(uint8_t *src, ptrdiff_t stride, int pq)
1555cabdff1aSopenharmony_ci{
1556cabdff1aSopenharmony_ci    vc1_loop_filter(src, 1, stride, 4, pq);
1557cabdff1aSopenharmony_ci}
1558cabdff1aSopenharmony_ci
1559cabdff1aSopenharmony_civoid ff_vc1_h_loop_filter4_mmi(uint8_t *src, ptrdiff_t stride, int pq)
1560cabdff1aSopenharmony_ci{
1561cabdff1aSopenharmony_ci    vc1_loop_filter(src, stride, 1, 4, pq);
1562cabdff1aSopenharmony_ci}
1563cabdff1aSopenharmony_ci
1564cabdff1aSopenharmony_civoid ff_vc1_v_loop_filter8_mmi(uint8_t *src, ptrdiff_t stride, int pq)
1565cabdff1aSopenharmony_ci{
1566cabdff1aSopenharmony_ci    vc1_loop_filter(src, 1, stride, 8, pq);
1567cabdff1aSopenharmony_ci}
1568cabdff1aSopenharmony_ci
1569cabdff1aSopenharmony_civoid ff_vc1_h_loop_filter8_mmi(uint8_t *src, ptrdiff_t stride, int pq)
1570cabdff1aSopenharmony_ci{
1571cabdff1aSopenharmony_ci    vc1_loop_filter(src, stride, 1, 8, pq);
1572cabdff1aSopenharmony_ci}
1573cabdff1aSopenharmony_ci
1574cabdff1aSopenharmony_civoid ff_vc1_v_loop_filter16_mmi(uint8_t *src, ptrdiff_t stride, int pq)
1575cabdff1aSopenharmony_ci{
1576cabdff1aSopenharmony_ci    vc1_loop_filter(src, 1, stride, 16, pq);
1577cabdff1aSopenharmony_ci}
1578cabdff1aSopenharmony_ci
1579cabdff1aSopenharmony_civoid ff_vc1_h_loop_filter16_mmi(uint8_t *src, ptrdiff_t stride, int pq)
1580cabdff1aSopenharmony_ci{
1581cabdff1aSopenharmony_ci    vc1_loop_filter(src, stride, 1, 16, pq);
1582cabdff1aSopenharmony_ci}
1583cabdff1aSopenharmony_ci
1584cabdff1aSopenharmony_civoid ff_put_vc1_mspel_mc00_mmi(uint8_t *dst, const uint8_t *src,
1585cabdff1aSopenharmony_ci                               ptrdiff_t stride, int rnd)
1586cabdff1aSopenharmony_ci{
1587cabdff1aSopenharmony_ci    ff_put_pixels8_8_mmi(dst, src, stride, 8);
1588cabdff1aSopenharmony_ci}
1589cabdff1aSopenharmony_civoid ff_put_vc1_mspel_mc00_16_mmi(uint8_t *dst, const uint8_t *src,
1590cabdff1aSopenharmony_ci                                  ptrdiff_t stride, int rnd)
1591cabdff1aSopenharmony_ci{
1592cabdff1aSopenharmony_ci    ff_put_pixels16_8_mmi(dst, src, stride, 16);
1593cabdff1aSopenharmony_ci}
1594cabdff1aSopenharmony_civoid ff_avg_vc1_mspel_mc00_mmi(uint8_t *dst, const uint8_t *src,
1595cabdff1aSopenharmony_ci                               ptrdiff_t stride, int rnd)
1596cabdff1aSopenharmony_ci{
1597cabdff1aSopenharmony_ci    ff_avg_pixels8_8_mmi(dst, src, stride, 8);
1598cabdff1aSopenharmony_ci}
1599cabdff1aSopenharmony_civoid ff_avg_vc1_mspel_mc00_16_mmi(uint8_t *dst, const uint8_t *src,
1600cabdff1aSopenharmony_ci                                  ptrdiff_t stride, int rnd)
1601cabdff1aSopenharmony_ci{
1602cabdff1aSopenharmony_ci    ff_avg_pixels16_8_mmi(dst, src, stride, 16);
1603cabdff1aSopenharmony_ci}
1604cabdff1aSopenharmony_ci
1605cabdff1aSopenharmony_ci#define OP_PUT(S, D)
1606cabdff1aSopenharmony_ci#define OP_AVG(S, D)                                                        \
1607cabdff1aSopenharmony_ci    "ldc1       $f16,   "#S"                        \n\t"                   \
1608cabdff1aSopenharmony_ci    "pavgb      "#D",   "#D",   $f16                \n\t"
1609cabdff1aSopenharmony_ci
1610cabdff1aSopenharmony_ci/** Add rounder from $f14 to $f6 and pack result at destination */
1611cabdff1aSopenharmony_ci#define NORMALIZE_MMI(SHIFT)                                                \
1612cabdff1aSopenharmony_ci    "paddh      $f6,    $f6,    $f14                \n\t" /* +bias-r */     \
1613cabdff1aSopenharmony_ci    "paddh      $f8,    $f8,    $f14                \n\t" /* +bias-r */     \
1614cabdff1aSopenharmony_ci    "psrah      $f6,    $f6,    "SHIFT"             \n\t"                   \
1615cabdff1aSopenharmony_ci    "psrah      $f8,    $f8,    "SHIFT"             \n\t"
1616cabdff1aSopenharmony_ci
1617cabdff1aSopenharmony_ci#define TRANSFER_DO_PACK(OP)                                                \
1618cabdff1aSopenharmony_ci    "packushb   $f6,    $f6,    $f8                 \n\t"                   \
1619cabdff1aSopenharmony_ci    OP((%[dst]), $f6)                                                       \
1620cabdff1aSopenharmony_ci    "sdc1       $f6,    0x00(%[dst])                \n\t"
1621cabdff1aSopenharmony_ci
1622cabdff1aSopenharmony_ci#define TRANSFER_DONT_PACK(OP)                                              \
1623cabdff1aSopenharmony_ci     OP(0(%[dst]), $f6)                                                     \
1624cabdff1aSopenharmony_ci     OP(8(%[dst]), $f8)                                                     \
1625cabdff1aSopenharmony_ci     "sdc1      $f6,    0x00(%[dst])                \n\t"                   \
1626cabdff1aSopenharmony_ci     "sdc1      $f8,    0x08(%[dst])                \n\t"
1627cabdff1aSopenharmony_ci
1628cabdff1aSopenharmony_ci/** @see MSPEL_FILTER13_CORE for use as UNPACK macro */
1629cabdff1aSopenharmony_ci#define DO_UNPACK(reg)                                                      \
1630cabdff1aSopenharmony_ci    "punpcklbh  "reg",  "reg",  $f0                 \n\t"
1631cabdff1aSopenharmony_ci#define DONT_UNPACK(reg)
1632cabdff1aSopenharmony_ci
1633cabdff1aSopenharmony_ci/** Compute the rounder 32-r or 8-r and unpacks it to $f14 */
1634cabdff1aSopenharmony_ci#define LOAD_ROUNDER_MMI(ROUND)                                             \
1635cabdff1aSopenharmony_ci    "lwc1       $f14,   "ROUND"                     \n\t"                   \
1636cabdff1aSopenharmony_ci    "punpcklhw  $f14,   $f14,   $f14                \n\t"                   \
1637cabdff1aSopenharmony_ci    "punpcklwd  $f14,   $f14,   $f14                \n\t"
1638cabdff1aSopenharmony_ci
1639cabdff1aSopenharmony_ci
1640cabdff1aSopenharmony_ci#define SHIFT2_LINE(OFF, R0, R1, R2, R3)                                    \
1641cabdff1aSopenharmony_ci    "paddh      "#R1",      "#R1",  "#R2"           \n\t"                   \
1642cabdff1aSopenharmony_ci    PTR_ADDU    "$9,        %[src], %[stride1]      \n\t"                   \
1643cabdff1aSopenharmony_ci    MMI_ULWC1(R0, $9, 0x00)                                                 \
1644cabdff1aSopenharmony_ci    "pmullh     "#R1",      "#R1",  $f6             \n\t"                   \
1645cabdff1aSopenharmony_ci    "punpcklbh  "#R0",      "#R0",  $f0             \n\t"                   \
1646cabdff1aSopenharmony_ci    PTR_ADDU    "$9,        %[src], %[stride]       \n\t"                   \
1647cabdff1aSopenharmony_ci    MMI_ULWC1(R3, $9, 0x00)                                                 \
1648cabdff1aSopenharmony_ci    "psubh      "#R1",      "#R1",  "#R0"           \n\t"                   \
1649cabdff1aSopenharmony_ci    "punpcklbh  "#R3",      "#R3",  $f0             \n\t"                   \
1650cabdff1aSopenharmony_ci    "paddh      "#R1",      "#R1",  $f14            \n\t"                   \
1651cabdff1aSopenharmony_ci    "psubh      "#R1",      "#R1",  "#R3"           \n\t"                   \
1652cabdff1aSopenharmony_ci    "psrah      "#R1",      "#R1",  %[shift]        \n\t"                   \
1653cabdff1aSopenharmony_ci    MMI_SDC1(R1, %[dst], OFF)                                               \
1654cabdff1aSopenharmony_ci    PTR_ADDU    "%[src],    %[src], %[stride]       \n\t"
1655cabdff1aSopenharmony_ci
1656cabdff1aSopenharmony_ci/** Sacrificing $f12 makes it possible to pipeline loads from src */
1657cabdff1aSopenharmony_cistatic void vc1_put_ver_16b_shift2_mmi(int16_t *dst,
1658cabdff1aSopenharmony_ci                                       const uint8_t *src, mips_reg stride,
1659cabdff1aSopenharmony_ci                                       int rnd, int64_t shift)
1660cabdff1aSopenharmony_ci{
1661cabdff1aSopenharmony_ci    union mmi_intfloat64 shift_u;
1662cabdff1aSopenharmony_ci    DECLARE_VAR_LOW32;
1663cabdff1aSopenharmony_ci    DECLARE_VAR_ADDRT;
1664cabdff1aSopenharmony_ci    shift_u.i = shift;
1665cabdff1aSopenharmony_ci
1666cabdff1aSopenharmony_ci    __asm__ volatile(
1667cabdff1aSopenharmony_ci        "pxor       $f0,    $f0,    $f0             \n\t"
1668cabdff1aSopenharmony_ci        "li         $8,     0x03                    \n\t"
1669cabdff1aSopenharmony_ci        LOAD_ROUNDER_MMI("%[rnd]")
1670cabdff1aSopenharmony_ci        "1:                                         \n\t"
1671cabdff1aSopenharmony_ci        MMI_ULWC1($f4, %[src], 0x00)
1672cabdff1aSopenharmony_ci        PTR_ADDU   "%[src], %[src], %[stride]       \n\t"
1673cabdff1aSopenharmony_ci        MMI_ULWC1($f6, %[src], 0x00)
1674cabdff1aSopenharmony_ci        "punpcklbh  $f4,    $f4,    $f0             \n\t"
1675cabdff1aSopenharmony_ci        "punpcklbh  $f6,    $f6,    $f0             \n\t"
1676cabdff1aSopenharmony_ci        SHIFT2_LINE(  0, $f2, $f4, $f6, $f8)
1677cabdff1aSopenharmony_ci        SHIFT2_LINE( 24, $f4, $f6, $f8, $f2)
1678cabdff1aSopenharmony_ci        SHIFT2_LINE( 48, $f6, $f8, $f2, $f4)
1679cabdff1aSopenharmony_ci        SHIFT2_LINE( 72, $f8, $f2, $f4, $f6)
1680cabdff1aSopenharmony_ci        SHIFT2_LINE( 96, $f2, $f4, $f6, $f8)
1681cabdff1aSopenharmony_ci        SHIFT2_LINE(120, $f4, $f6, $f8, $f2)
1682cabdff1aSopenharmony_ci        SHIFT2_LINE(144, $f6, $f8, $f2, $f4)
1683cabdff1aSopenharmony_ci        SHIFT2_LINE(168, $f8, $f2, $f4, $f6)
1684cabdff1aSopenharmony_ci        PTR_SUBU   "%[src], %[src], %[stride2]      \n\t"
1685cabdff1aSopenharmony_ci        PTR_ADDIU  "%[dst], %[dst], 0x08            \n\t"
1686cabdff1aSopenharmony_ci        "addiu      $8,     $8,    -0x01            \n\t"
1687cabdff1aSopenharmony_ci        "bnez       $8,     1b                      \n\t"
1688cabdff1aSopenharmony_ci        : RESTRICT_ASM_LOW32            RESTRICT_ASM_ADDRT
1689cabdff1aSopenharmony_ci          [src]"+r"(src),               [dst]"+r"(dst)
1690cabdff1aSopenharmony_ci        : [stride]"r"(stride),          [stride1]"r"(-2*stride),
1691cabdff1aSopenharmony_ci          [shift]"f"(shift_u.f),        [rnd]"m"(rnd),
1692cabdff1aSopenharmony_ci          [stride2]"r"(9*stride-4)
1693cabdff1aSopenharmony_ci        : "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10",
1694cabdff1aSopenharmony_ci          "$f14", "$f16", "memory"
1695cabdff1aSopenharmony_ci    );
1696cabdff1aSopenharmony_ci}
1697cabdff1aSopenharmony_ci
1698cabdff1aSopenharmony_ci/**
1699cabdff1aSopenharmony_ci * Data is already unpacked, so some operations can directly be made from
1700cabdff1aSopenharmony_ci * memory.
1701cabdff1aSopenharmony_ci */
1702cabdff1aSopenharmony_ci#define VC1_HOR_16B_SHIFT2(OP, OPNAME)                                      \
1703cabdff1aSopenharmony_cistatic void OPNAME ## vc1_hor_16b_shift2_mmi(uint8_t *dst, mips_reg stride, \
1704cabdff1aSopenharmony_ci                                             const int16_t *src, int rnd)   \
1705cabdff1aSopenharmony_ci{                                                                           \
1706cabdff1aSopenharmony_ci    int h = 8;                                                              \
1707cabdff1aSopenharmony_ci    DECLARE_VAR_ALL64;                                                      \
1708cabdff1aSopenharmony_ci    DECLARE_VAR_ADDRT;                                                      \
1709cabdff1aSopenharmony_ci                                                                            \
1710cabdff1aSopenharmony_ci    src -= 1;                                                               \
1711cabdff1aSopenharmony_ci    rnd -= (-1+9+9-1)*1024; /* Add -1024 bias */                            \
1712cabdff1aSopenharmony_ci                                                                            \
1713cabdff1aSopenharmony_ci    __asm__ volatile(                                                       \
1714cabdff1aSopenharmony_ci        LOAD_ROUNDER_MMI("%[rnd]")                                          \
1715cabdff1aSopenharmony_ci        "1:                                         \n\t"                   \
1716cabdff1aSopenharmony_ci        MMI_ULDC1($f2, %[src], 0x00)                                        \
1717cabdff1aSopenharmony_ci        MMI_ULDC1($f4, %[src], 0x08)                                        \
1718cabdff1aSopenharmony_ci        MMI_ULDC1($f6, %[src], 0x02)                                        \
1719cabdff1aSopenharmony_ci        MMI_ULDC1($f8, %[src], 0x0a)                                        \
1720cabdff1aSopenharmony_ci        MMI_ULDC1($f0, %[src], 0x06)                                        \
1721cabdff1aSopenharmony_ci        "paddh      $f2,    $f2,    $f0             \n\t"                   \
1722cabdff1aSopenharmony_ci        MMI_ULDC1($f0, %[src], 0x0e)                                        \
1723cabdff1aSopenharmony_ci        "paddh      $f4,    $f4,    $f0             \n\t"                   \
1724cabdff1aSopenharmony_ci        MMI_ULDC1($f0, %[src], 0x04)                                        \
1725cabdff1aSopenharmony_ci        "paddh      $f6,    $f6,    $f0             \n\t"                   \
1726cabdff1aSopenharmony_ci        MMI_ULDC1($f0, %[src], 0x0b)                                        \
1727cabdff1aSopenharmony_ci        "paddh      $f8,    $f8,    $f0             \n\t"                   \
1728cabdff1aSopenharmony_ci        "pmullh     $f6,    $f6,    %[ff_pw_9]      \n\t"                   \
1729cabdff1aSopenharmony_ci        "pmullh     $f8,    $f8,    %[ff_pw_9]      \n\t"                   \
1730cabdff1aSopenharmony_ci        "psubh      $f6,    $f6,    $f2             \n\t"                   \
1731cabdff1aSopenharmony_ci        "psubh      $f8,    $f8,    $f4             \n\t"                   \
1732cabdff1aSopenharmony_ci        "li         $8,     0x07                    \n\t"                   \
1733cabdff1aSopenharmony_ci        "mtc1       $8,     $f16                    \n\t"                   \
1734cabdff1aSopenharmony_ci        NORMALIZE_MMI("$f16")                                               \
1735cabdff1aSopenharmony_ci        /* Remove bias */                                                   \
1736cabdff1aSopenharmony_ci        "paddh      $f6,    $f6,    %[ff_pw_128]    \n\t"                   \
1737cabdff1aSopenharmony_ci        "paddh      $f8,    $f8,    %[ff_pw_128]    \n\t"                   \
1738cabdff1aSopenharmony_ci        TRANSFER_DO_PACK(OP)                                                \
1739cabdff1aSopenharmony_ci        "addiu      %[h],   %[h],  -0x01            \n\t"                   \
1740cabdff1aSopenharmony_ci        PTR_ADDIU  "%[src], %[src], 0x18            \n\t"                   \
1741cabdff1aSopenharmony_ci        PTR_ADDU   "%[dst], %[dst], %[stride]       \n\t"                   \
1742cabdff1aSopenharmony_ci        "bnez       %[h],   1b                      \n\t"                   \
1743cabdff1aSopenharmony_ci        : RESTRICT_ASM_ALL64            RESTRICT_ASM_ADDRT                  \
1744cabdff1aSopenharmony_ci          [h]"+r"(h),                                                       \
1745cabdff1aSopenharmony_ci          [src]"+r"(src),               [dst]"+r"(dst)                      \
1746cabdff1aSopenharmony_ci        : [stride]"r"(stride),          [rnd]"m"(rnd),                      \
1747cabdff1aSopenharmony_ci          [ff_pw_9]"f"(ff_pw_9.f),      [ff_pw_128]"f"(ff_pw_128.f)         \
1748cabdff1aSopenharmony_ci        : "$8", "$f0", "$f2", "$f4", "$f6", "$f8", "$f14",                  \
1749cabdff1aSopenharmony_ci          "$f16", "memory"                                                  \
1750cabdff1aSopenharmony_ci    );                                                                      \
1751cabdff1aSopenharmony_ci}
1752cabdff1aSopenharmony_ci
1753cabdff1aSopenharmony_ciVC1_HOR_16B_SHIFT2(OP_PUT, put_)
1754cabdff1aSopenharmony_ciVC1_HOR_16B_SHIFT2(OP_AVG, avg_)
1755cabdff1aSopenharmony_ci
1756cabdff1aSopenharmony_ci/**
1757cabdff1aSopenharmony_ci * Purely vertical or horizontal 1/2 shift interpolation.
1758cabdff1aSopenharmony_ci * Sacrify $f12 for *9 factor.
1759cabdff1aSopenharmony_ci */
1760cabdff1aSopenharmony_ci#define VC1_SHIFT2(OP, OPNAME)\
1761cabdff1aSopenharmony_cistatic void OPNAME ## vc1_shift2_mmi(uint8_t *dst, const uint8_t *src,      \
1762cabdff1aSopenharmony_ci                                     mips_reg stride, int rnd,              \
1763cabdff1aSopenharmony_ci                                     mips_reg offset)                       \
1764cabdff1aSopenharmony_ci{                                                                           \
1765cabdff1aSopenharmony_ci    DECLARE_VAR_LOW32;                                                      \
1766cabdff1aSopenharmony_ci    DECLARE_VAR_ADDRT;                                                      \
1767cabdff1aSopenharmony_ci                                                                            \
1768cabdff1aSopenharmony_ci    rnd = 8 - rnd;                                                          \
1769cabdff1aSopenharmony_ci                                                                            \
1770cabdff1aSopenharmony_ci    __asm__ volatile(                                                       \
1771cabdff1aSopenharmony_ci        "pxor       $f0,    $f0,    $f0             \n\t"                   \
1772cabdff1aSopenharmony_ci        "li         $10,    0x08                    \n\t"                   \
1773cabdff1aSopenharmony_ci        LOAD_ROUNDER_MMI("%[rnd]")                                          \
1774cabdff1aSopenharmony_ci        "1:                                         \n\t"                   \
1775cabdff1aSopenharmony_ci        MMI_ULWC1($f6, %[src], 0x00)                                        \
1776cabdff1aSopenharmony_ci        MMI_ULWC1($f8, %[src], 0x04)                                        \
1777cabdff1aSopenharmony_ci        PTR_ADDU   "$9,     %[src], %[offset]       \n\t"                   \
1778cabdff1aSopenharmony_ci        MMI_ULWC1($f2, $9, 0x00)                                            \
1779cabdff1aSopenharmony_ci        MMI_ULWC1($f4, $9, 0x04)                                            \
1780cabdff1aSopenharmony_ci        PTR_ADDU   "%[src], %[src], %[offset]       \n\t"                   \
1781cabdff1aSopenharmony_ci        "punpcklbh  $f6,    $f6,    $f0             \n\t"                   \
1782cabdff1aSopenharmony_ci        "punpcklbh  $f8,    $f8,    $f0             \n\t"                   \
1783cabdff1aSopenharmony_ci        "punpcklbh  $f2,    $f2,    $f0             \n\t"                   \
1784cabdff1aSopenharmony_ci        "punpcklbh  $f4,    $f4,    $f0             \n\t"                   \
1785cabdff1aSopenharmony_ci        "paddh      $f6,    $f6,    $f2             \n\t"                   \
1786cabdff1aSopenharmony_ci        "paddh      $f8,    $f8,    $f4             \n\t"                   \
1787cabdff1aSopenharmony_ci        PTR_ADDU   "$9,     %[src], %[offset_x2n]   \n\t"                   \
1788cabdff1aSopenharmony_ci        MMI_ULWC1($f2, $9, 0x00)                                            \
1789cabdff1aSopenharmony_ci        MMI_ULWC1($f4, $9, 0x04)                                            \
1790cabdff1aSopenharmony_ci        "pmullh     $f6,    $f6,    %[ff_pw_9]      \n\t" /* 0,9,9,0*/      \
1791cabdff1aSopenharmony_ci        "pmullh     $f8,    $f8,    %[ff_pw_9]      \n\t" /* 0,9,9,0*/      \
1792cabdff1aSopenharmony_ci        "punpcklbh  $f2,    $f2,    $f0             \n\t"                   \
1793cabdff1aSopenharmony_ci        "punpcklbh  $f4,    $f4,    $f0             \n\t"                   \
1794cabdff1aSopenharmony_ci        "psubh      $f6,    $f6,    $f2             \n\t" /*-1,9,9,0*/      \
1795cabdff1aSopenharmony_ci        "psubh      $f8,    $f8,    $f4             \n\t" /*-1,9,9,0*/      \
1796cabdff1aSopenharmony_ci        PTR_ADDU   "$9,     %[src], %[offset]       \n\t"                   \
1797cabdff1aSopenharmony_ci        MMI_ULWC1($f2, $9, 0x00)                                            \
1798cabdff1aSopenharmony_ci        MMI_ULWC1($f4, $9, 0x04)                                            \
1799cabdff1aSopenharmony_ci        "punpcklbh  $f2,    $f2,    $f0             \n\t"                   \
1800cabdff1aSopenharmony_ci        "punpcklbh  $f4,    $f4,    $f0             \n\t"                   \
1801cabdff1aSopenharmony_ci        "psubh      $f6,    $f6,    $f2             \n\t" /*-1,9,9,-1*/     \
1802cabdff1aSopenharmony_ci        "psubh      $f8,    $f8,    $f4             \n\t" /*-1,9,9,-1*/     \
1803cabdff1aSopenharmony_ci        "li         $8,     0x04                    \n\t"                   \
1804cabdff1aSopenharmony_ci        "mtc1       $8,     $f16                    \n\t"                   \
1805cabdff1aSopenharmony_ci        NORMALIZE_MMI("$f16")                                               \
1806cabdff1aSopenharmony_ci        "packushb   $f6,    $f6,    $f8             \n\t"                   \
1807cabdff1aSopenharmony_ci        OP((%[dst]), $f6)                                                   \
1808cabdff1aSopenharmony_ci        "sdc1       $f6,    0x00(%[dst])            \n\t"                   \
1809cabdff1aSopenharmony_ci        "addiu      $10,    $10,   -0x01            \n\t"                   \
1810cabdff1aSopenharmony_ci        PTR_ADDU   "%[src], %[src], %[stride1]      \n\t"                   \
1811cabdff1aSopenharmony_ci        PTR_ADDU   "%[dst], %[dst], %[stride]       \n\t"                   \
1812cabdff1aSopenharmony_ci        "bnez       $10,    1b                      \n\t"                   \
1813cabdff1aSopenharmony_ci        : RESTRICT_ASM_LOW32            RESTRICT_ASM_ADDRT                  \
1814cabdff1aSopenharmony_ci          [src]"+r"(src),               [dst]"+r"(dst)                      \
1815cabdff1aSopenharmony_ci        : [offset]"r"(offset),          [offset_x2n]"r"(-2*offset),         \
1816cabdff1aSopenharmony_ci          [stride]"r"(stride),          [rnd]"m"(rnd),                      \
1817cabdff1aSopenharmony_ci          [stride1]"r"(stride-offset),                                      \
1818cabdff1aSopenharmony_ci          [ff_pw_9]"f"(ff_pw_9.f)                                           \
1819cabdff1aSopenharmony_ci        : "$8", "$9", "$10", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10",     \
1820cabdff1aSopenharmony_ci          "$f14", "$f16", "memory"                                          \
1821cabdff1aSopenharmony_ci    );                                                                      \
1822cabdff1aSopenharmony_ci}
1823cabdff1aSopenharmony_ci
1824cabdff1aSopenharmony_ciVC1_SHIFT2(OP_PUT, put_)
1825cabdff1aSopenharmony_ciVC1_SHIFT2(OP_AVG, avg_)
1826cabdff1aSopenharmony_ci
1827cabdff1aSopenharmony_ci/**
1828cabdff1aSopenharmony_ci * Core of the 1/4 and 3/4 shift bicubic interpolation.
1829cabdff1aSopenharmony_ci *
1830cabdff1aSopenharmony_ci * @param UNPACK  Macro unpacking arguments from 8 to 16bits (can be empty).
1831cabdff1aSopenharmony_ci * @param LOAD    "MMI_ULWC1" or "MMI_ULDC1", if data read is already unpacked.
1832cabdff1aSopenharmony_ci * @param M       "1" for MMI_ULWC1, "2" for MMI_ULDC1.
1833cabdff1aSopenharmony_ci * @param A1      Stride address of 1st tap (beware of unpacked/packed).
1834cabdff1aSopenharmony_ci * @param A2      Stride address of 2nd tap
1835cabdff1aSopenharmony_ci * @param A3      Stride address of 3rd tap
1836cabdff1aSopenharmony_ci * @param A4      Stride address of 4th tap
1837cabdff1aSopenharmony_ci */
1838cabdff1aSopenharmony_ci#define MSPEL_FILTER13_CORE(UNPACK, LOAD, M, A1, A2, A3, A4)                \
1839cabdff1aSopenharmony_ci    PTR_ADDU   "$9,     %[src], "#A1"           \n\t"                       \
1840cabdff1aSopenharmony_ci    LOAD($f2, $9, M*0)                                                      \
1841cabdff1aSopenharmony_ci    LOAD($f4, $9, M*4)                                                      \
1842cabdff1aSopenharmony_ci    UNPACK("$f2")                                                           \
1843cabdff1aSopenharmony_ci    UNPACK("$f4")                                                           \
1844cabdff1aSopenharmony_ci    "pmullh     $f2,    $f2,    %[ff_pw_3]      \n\t"                       \
1845cabdff1aSopenharmony_ci    "pmullh     $f4,    $f4,    %[ff_pw_3]      \n\t"                       \
1846cabdff1aSopenharmony_ci    PTR_ADDU   "$9,     %[src], "#A2"           \n\t"                       \
1847cabdff1aSopenharmony_ci    LOAD($f6, $9, M*0)                                                      \
1848cabdff1aSopenharmony_ci    LOAD($f8, $9, M*4)                                                      \
1849cabdff1aSopenharmony_ci    UNPACK("$f6")                                                           \
1850cabdff1aSopenharmony_ci    UNPACK("$f8")                                                           \
1851cabdff1aSopenharmony_ci    "pmullh     $f6,    $f6,    %[ff_pw_18]     \n\t" /* *18 */             \
1852cabdff1aSopenharmony_ci    "pmullh     $f8,    $f8,    %[ff_pw_18]     \n\t" /* *18 */             \
1853cabdff1aSopenharmony_ci    "psubh      $f6,    $f6,    $f2             \n\t" /* *18, -3 */         \
1854cabdff1aSopenharmony_ci    "psubh      $f8,    $f8,    $f4             \n\t" /* *18, -3 */         \
1855cabdff1aSopenharmony_ci    PTR_ADDU   "$9,     %[src], "#A4"           \n\t"                       \
1856cabdff1aSopenharmony_ci    LOAD($f2, $9, M*0)                                                      \
1857cabdff1aSopenharmony_ci    LOAD($f4, $9, M*4)                                                      \
1858cabdff1aSopenharmony_ci    UNPACK("$f2")                                                           \
1859cabdff1aSopenharmony_ci    UNPACK("$f4")                                                           \
1860cabdff1aSopenharmony_ci    "li         $8,     0x02                    \n\t"                       \
1861cabdff1aSopenharmony_ci    "mtc1       $8,     $f16                    \n\t"                       \
1862cabdff1aSopenharmony_ci    "psllh      $f2,    $f2,    $f16            \n\t" /* 4* */              \
1863cabdff1aSopenharmony_ci    "psllh      $f4,    $f4,    $f16            \n\t" /* 4* */              \
1864cabdff1aSopenharmony_ci    "psubh      $f6,    $f6,    $f2             \n\t" /* -4,18,-3 */        \
1865cabdff1aSopenharmony_ci    "psubh      $f8,    $f8,    $f4             \n\t" /* -4,18,-3 */        \
1866cabdff1aSopenharmony_ci    PTR_ADDU   "$9,     %[src], "#A3"           \n\t"                       \
1867cabdff1aSopenharmony_ci    LOAD($f2, $9, M*0)                                                      \
1868cabdff1aSopenharmony_ci    LOAD($f4, $9, M*4)                                                      \
1869cabdff1aSopenharmony_ci    UNPACK("$f2")                                                           \
1870cabdff1aSopenharmony_ci    UNPACK("$f4")                                                           \
1871cabdff1aSopenharmony_ci    "pmullh     $f2,    $f2,    %[ff_pw_53]     \n\t" /* *53 */             \
1872cabdff1aSopenharmony_ci    "pmullh     $f4,    $f4,    %[ff_pw_53]     \n\t" /* *53 */             \
1873cabdff1aSopenharmony_ci    "paddh      $f6,    $f6,    $f2             \n\t" /* 4,53,18,-3 */      \
1874cabdff1aSopenharmony_ci    "paddh      $f8,    $f8,    $f4             \n\t" /* 4,53,18,-3 */
1875cabdff1aSopenharmony_ci
1876cabdff1aSopenharmony_ci/**
1877cabdff1aSopenharmony_ci * Macro to build the vertical 16bits version of vc1_put_shift[13].
1878cabdff1aSopenharmony_ci * Here, offset=src_stride. Parameters passed A1 to A4 must use
1879cabdff1aSopenharmony_ci * %3 (src_stride), %4 (2*src_stride) and %5 (3*src_stride).
1880cabdff1aSopenharmony_ci *
1881cabdff1aSopenharmony_ci * @param  NAME   Either 1 or 3
1882cabdff1aSopenharmony_ci * @see MSPEL_FILTER13_CORE for information on A1->A4
1883cabdff1aSopenharmony_ci */
1884cabdff1aSopenharmony_ci#define MSPEL_FILTER13_VER_16B(NAME, A1, A2, A3, A4)                        \
1885cabdff1aSopenharmony_cistatic void                                                                 \
1886cabdff1aSopenharmony_civc1_put_ver_16b_ ## NAME ## _mmi(int16_t *dst, const uint8_t *src,          \
1887cabdff1aSopenharmony_ci                                 mips_reg src_stride,                       \
1888cabdff1aSopenharmony_ci                                 int rnd, int64_t shift)                    \
1889cabdff1aSopenharmony_ci{                                                                           \
1890cabdff1aSopenharmony_ci    int h = 8;                                                              \
1891cabdff1aSopenharmony_ci    union mmi_intfloat64 shift_u;                                           \
1892cabdff1aSopenharmony_ci    DECLARE_VAR_LOW32;                                                      \
1893cabdff1aSopenharmony_ci    DECLARE_VAR_ADDRT;                                                      \
1894cabdff1aSopenharmony_ci    shift_u.i = shift;                                                      \
1895cabdff1aSopenharmony_ci                                                                            \
1896cabdff1aSopenharmony_ci    src -= src_stride;                                                      \
1897cabdff1aSopenharmony_ci                                                                            \
1898cabdff1aSopenharmony_ci    __asm__ volatile(                                                       \
1899cabdff1aSopenharmony_ci        "pxor       $f0,    $f0,    $f0             \n\t"                   \
1900cabdff1aSopenharmony_ci        LOAD_ROUNDER_MMI("%[rnd]")                                          \
1901cabdff1aSopenharmony_ci        ".p2align 3                                 \n\t"                   \
1902cabdff1aSopenharmony_ci        "1:                                         \n\t"                   \
1903cabdff1aSopenharmony_ci        MSPEL_FILTER13_CORE(DO_UNPACK, MMI_ULWC1, 1, A1, A2, A3, A4)        \
1904cabdff1aSopenharmony_ci        NORMALIZE_MMI("%[shift]")                                           \
1905cabdff1aSopenharmony_ci        TRANSFER_DONT_PACK(OP_PUT)                                          \
1906cabdff1aSopenharmony_ci        /* Last 3 (in fact 4) bytes on the line */                          \
1907cabdff1aSopenharmony_ci        PTR_ADDU   "$9,     %[src], "#A1"           \n\t"                   \
1908cabdff1aSopenharmony_ci        MMI_ULWC1($f2, $9, 0x08)                                            \
1909cabdff1aSopenharmony_ci        DO_UNPACK("$f2")                                                    \
1910cabdff1aSopenharmony_ci        "mov.d      $f6,    $f2                     \n\t"                   \
1911cabdff1aSopenharmony_ci        "paddh      $f2,    $f2,    $f2             \n\t"                   \
1912cabdff1aSopenharmony_ci        "paddh      $f2,    $f2,    $f6             \n\t" /* 3* */          \
1913cabdff1aSopenharmony_ci        PTR_ADDU   "$9,     %[src], "#A2"           \n\t"                   \
1914cabdff1aSopenharmony_ci        MMI_ULWC1($f6, $9, 0x08)                                            \
1915cabdff1aSopenharmony_ci        DO_UNPACK("$f6")                                                    \
1916cabdff1aSopenharmony_ci        "pmullh     $f6,    $f6,    %[ff_pw_18]     \n\t" /* *18 */         \
1917cabdff1aSopenharmony_ci        "psubh      $f6,    $f6,    $f2             \n\t" /* *18,-3 */      \
1918cabdff1aSopenharmony_ci        PTR_ADDU   "$9,     %[src], "#A3"           \n\t"                   \
1919cabdff1aSopenharmony_ci        MMI_ULWC1($f2, $9, 0x08)                                            \
1920cabdff1aSopenharmony_ci        DO_UNPACK("$f2")                                                    \
1921cabdff1aSopenharmony_ci        "pmullh     $f2,    $f2,    %[ff_pw_53]     \n\t" /* *53 */         \
1922cabdff1aSopenharmony_ci        "paddh      $f6,    $f6,    $f2             \n\t" /* *53,18,-3 */   \
1923cabdff1aSopenharmony_ci        PTR_ADDU   "$9,     %[src], "#A4"           \n\t"                   \
1924cabdff1aSopenharmony_ci        MMI_ULWC1($f2, $9, 0x08)                                            \
1925cabdff1aSopenharmony_ci        DO_UNPACK("$f2")                                                    \
1926cabdff1aSopenharmony_ci        "li         $8,     0x02                    \n\t"                   \
1927cabdff1aSopenharmony_ci        "mtc1       $8,     $f16                    \n\t"                   \
1928cabdff1aSopenharmony_ci        "psllh      $f2,    $f2,    $f16            \n\t" /* 4* */          \
1929cabdff1aSopenharmony_ci        "psubh      $f6,    $f6,    $f2             \n\t"                   \
1930cabdff1aSopenharmony_ci        "paddh      $f6,    $f6,    $f14            \n\t"                   \
1931cabdff1aSopenharmony_ci        "li         $8,     0x06                    \n\t"                   \
1932cabdff1aSopenharmony_ci        "mtc1       $8,     $f16                    \n\t"                   \
1933cabdff1aSopenharmony_ci        "psrah      $f6,    $f6,    $f16            \n\t"                   \
1934cabdff1aSopenharmony_ci        "sdc1       $f6,    0x10(%[dst])            \n\t"                   \
1935cabdff1aSopenharmony_ci        "addiu      %[h],   %[h],  -0x01            \n\t"                   \
1936cabdff1aSopenharmony_ci        PTR_ADDU   "%[src], %[src], %[stride_x1]    \n\t"                   \
1937cabdff1aSopenharmony_ci        PTR_ADDIU  "%[dst], %[dst], 0x18            \n\t"                   \
1938cabdff1aSopenharmony_ci        "bnez       %[h],   1b                      \n\t"                   \
1939cabdff1aSopenharmony_ci        : RESTRICT_ASM_LOW32            RESTRICT_ASM_ADDRT                  \
1940cabdff1aSopenharmony_ci          [h]"+r"(h),                                                       \
1941cabdff1aSopenharmony_ci          [src]"+r"(src),               [dst]"+r"(dst)                      \
1942cabdff1aSopenharmony_ci        : [stride_x1]"r"(src_stride),   [stride_x2]"r"(2*src_stride),       \
1943cabdff1aSopenharmony_ci          [stride_x3]"r"(3*src_stride),                                     \
1944cabdff1aSopenharmony_ci          [rnd]"m"(rnd),                [shift]"f"(shift_u.f),              \
1945cabdff1aSopenharmony_ci          [ff_pw_53]"f"(ff_pw_53.f),    [ff_pw_18]"f"(ff_pw_18.f),          \
1946cabdff1aSopenharmony_ci          [ff_pw_3]"f"(ff_pw_3.f)                                           \
1947cabdff1aSopenharmony_ci        : "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8",                    \
1948cabdff1aSopenharmony_ci          "$f14", "$f16", "memory"                                          \
1949cabdff1aSopenharmony_ci    );                                                                      \
1950cabdff1aSopenharmony_ci}
1951cabdff1aSopenharmony_ci
1952cabdff1aSopenharmony_ci/**
1953cabdff1aSopenharmony_ci * Macro to build the horizontal 16bits version of vc1_put_shift[13].
1954cabdff1aSopenharmony_ci * Here, offset=16bits, so parameters passed A1 to A4 should be simple.
1955cabdff1aSopenharmony_ci *
1956cabdff1aSopenharmony_ci * @param  NAME   Either 1 or 3
1957cabdff1aSopenharmony_ci * @see MSPEL_FILTER13_CORE for information on A1->A4
1958cabdff1aSopenharmony_ci */
1959cabdff1aSopenharmony_ci#define MSPEL_FILTER13_HOR_16B(NAME, A1, A2, A3, A4, OP, OPNAME)            \
1960cabdff1aSopenharmony_cistatic void                                                                 \
1961cabdff1aSopenharmony_ciOPNAME ## vc1_hor_16b_ ## NAME ## _mmi(uint8_t *dst, mips_reg stride,       \
1962cabdff1aSopenharmony_ci                                       const int16_t *src, int rnd)         \
1963cabdff1aSopenharmony_ci{                                                                           \
1964cabdff1aSopenharmony_ci    int h = 8;                                                              \
1965cabdff1aSopenharmony_ci    DECLARE_VAR_ALL64;                                                      \
1966cabdff1aSopenharmony_ci    DECLARE_VAR_ADDRT;                                                      \
1967cabdff1aSopenharmony_ci                                                                            \
1968cabdff1aSopenharmony_ci    src -= 1;                                                               \
1969cabdff1aSopenharmony_ci    rnd -= (-4+58+13-3)*256; /* Add -256 bias */                            \
1970cabdff1aSopenharmony_ci                                                                            \
1971cabdff1aSopenharmony_ci    __asm__ volatile(                                                       \
1972cabdff1aSopenharmony_ci        "pxor       $f0,    $f0,    $f0             \n\t"                   \
1973cabdff1aSopenharmony_ci        LOAD_ROUNDER_MMI("%[rnd]")                                          \
1974cabdff1aSopenharmony_ci        ".p2align 3                                 \n\t"                   \
1975cabdff1aSopenharmony_ci        "1:                                         \n\t"                   \
1976cabdff1aSopenharmony_ci        MSPEL_FILTER13_CORE(DONT_UNPACK, MMI_ULDC1, 2, A1, A2, A3, A4)      \
1977cabdff1aSopenharmony_ci        "li         $8,     0x07                    \n\t"                   \
1978cabdff1aSopenharmony_ci        "mtc1       $8,     $f16                    \n\t"                   \
1979cabdff1aSopenharmony_ci        NORMALIZE_MMI("$f16")                                               \
1980cabdff1aSopenharmony_ci        /* Remove bias */                                                   \
1981cabdff1aSopenharmony_ci        "paddh      $f6,    $f6,    %[ff_pw_128]    \n\t"                   \
1982cabdff1aSopenharmony_ci        "paddh      $f8,    $f8,    %[ff_pw_128]    \n\t"                   \
1983cabdff1aSopenharmony_ci        TRANSFER_DO_PACK(OP)                                                \
1984cabdff1aSopenharmony_ci        "addiu      %[h],   %[h],  -0x01            \n\t"                   \
1985cabdff1aSopenharmony_ci        PTR_ADDU   "%[src], %[src], 0x18            \n\t"                   \
1986cabdff1aSopenharmony_ci        PTR_ADDU   "%[dst], %[dst], %[stride]       \n\t"                   \
1987cabdff1aSopenharmony_ci        "bnez       %[h],   1b                      \n\t"                   \
1988cabdff1aSopenharmony_ci        : RESTRICT_ASM_ALL64            RESTRICT_ASM_ADDRT                  \
1989cabdff1aSopenharmony_ci          [h]"+r"(h),                                                       \
1990cabdff1aSopenharmony_ci          [src]"+r"(src),               [dst]"+r"(dst)                      \
1991cabdff1aSopenharmony_ci        : [stride]"r"(stride),          [rnd]"m"(rnd),                      \
1992cabdff1aSopenharmony_ci          [ff_pw_53]"f"(ff_pw_53.f),    [ff_pw_18]"f"(ff_pw_18.f),          \
1993cabdff1aSopenharmony_ci          [ff_pw_3]"f"(ff_pw_3.f),      [ff_pw_128]"f"(ff_pw_128.f)         \
1994cabdff1aSopenharmony_ci        : "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8",                    \
1995cabdff1aSopenharmony_ci          "$f14", "$f16", "memory"                                          \
1996cabdff1aSopenharmony_ci    );                                                                      \
1997cabdff1aSopenharmony_ci}
1998cabdff1aSopenharmony_ci
1999cabdff1aSopenharmony_ci/**
2000cabdff1aSopenharmony_ci * Macro to build the 8bits, any direction, version of vc1_put_shift[13].
2001cabdff1aSopenharmony_ci * Here, offset=src_stride. Parameters passed A1 to A4 must use
2002cabdff1aSopenharmony_ci * %3 (offset), %4 (2*offset) and %5 (3*offset).
2003cabdff1aSopenharmony_ci *
2004cabdff1aSopenharmony_ci * @param  NAME   Either 1 or 3
2005cabdff1aSopenharmony_ci * @see MSPEL_FILTER13_CORE for information on A1->A4
2006cabdff1aSopenharmony_ci */
2007cabdff1aSopenharmony_ci#define MSPEL_FILTER13_8B(NAME, A1, A2, A3, A4, OP, OPNAME)                 \
2008cabdff1aSopenharmony_cistatic void                                                                 \
2009cabdff1aSopenharmony_ciOPNAME ## vc1_## NAME ## _mmi(uint8_t *dst, const uint8_t *src,             \
2010cabdff1aSopenharmony_ci                              mips_reg stride, int rnd, mips_reg offset)    \
2011cabdff1aSopenharmony_ci{                                                                           \
2012cabdff1aSopenharmony_ci    int h = 8;                                                              \
2013cabdff1aSopenharmony_ci    DECLARE_VAR_LOW32;                                                      \
2014cabdff1aSopenharmony_ci    DECLARE_VAR_ADDRT;                                                      \
2015cabdff1aSopenharmony_ci                                                                            \
2016cabdff1aSopenharmony_ci    src -= offset;                                                          \
2017cabdff1aSopenharmony_ci    rnd = 32-rnd;                                                           \
2018cabdff1aSopenharmony_ci                                                                            \
2019cabdff1aSopenharmony_ci    __asm__ volatile (                                                      \
2020cabdff1aSopenharmony_ci        "pxor       $f0,    $f0,    $f0             \n\t"                   \
2021cabdff1aSopenharmony_ci        LOAD_ROUNDER_MMI("%[rnd]")                                          \
2022cabdff1aSopenharmony_ci        ".p2align 3                                 \n\t"                   \
2023cabdff1aSopenharmony_ci        "1:                                         \n\t"                   \
2024cabdff1aSopenharmony_ci        MSPEL_FILTER13_CORE(DO_UNPACK, MMI_ULWC1, 1, A1, A2, A3, A4)        \
2025cabdff1aSopenharmony_ci        "li         $8,     0x06                    \n\t"                   \
2026cabdff1aSopenharmony_ci        "mtc1       $8,     $f16                    \n\t"                   \
2027cabdff1aSopenharmony_ci        NORMALIZE_MMI("$f16")                                               \
2028cabdff1aSopenharmony_ci        TRANSFER_DO_PACK(OP)                                                \
2029cabdff1aSopenharmony_ci        "addiu      %[h],   %[h],      -0x01        \n\t"                   \
2030cabdff1aSopenharmony_ci        PTR_ADDU   "%[src], %[src],     %[stride]   \n\t"                   \
2031cabdff1aSopenharmony_ci        PTR_ADDU   "%[dst], %[dst],     %[stride]   \n\t"                   \
2032cabdff1aSopenharmony_ci        "bnez       %[h],   1b                      \n\t"                   \
2033cabdff1aSopenharmony_ci        : RESTRICT_ASM_LOW32            RESTRICT_ASM_ADDRT                  \
2034cabdff1aSopenharmony_ci          [h]"+r"(h),                                                       \
2035cabdff1aSopenharmony_ci          [src]"+r"(src),               [dst]"+r"(dst)                      \
2036cabdff1aSopenharmony_ci        : [offset_x1]"r"(offset),       [offset_x2]"r"(2*offset),           \
2037cabdff1aSopenharmony_ci          [offset_x3]"r"(3*offset),     [stride]"r"(stride),                \
2038cabdff1aSopenharmony_ci          [rnd]"m"(rnd),                                                    \
2039cabdff1aSopenharmony_ci          [ff_pw_53]"f"(ff_pw_53.f),    [ff_pw_18]"f"(ff_pw_18.f),          \
2040cabdff1aSopenharmony_ci          [ff_pw_3]"f"(ff_pw_3.f)                                           \
2041cabdff1aSopenharmony_ci        : "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8",                    \
2042cabdff1aSopenharmony_ci          "$f14", "$f16", "memory"                                          \
2043cabdff1aSopenharmony_ci    );                                                                      \
2044cabdff1aSopenharmony_ci}
2045cabdff1aSopenharmony_ci
2046cabdff1aSopenharmony_ci
2047cabdff1aSopenharmony_ci/** 1/4 shift bicubic interpolation */
2048cabdff1aSopenharmony_ciMSPEL_FILTER13_8B(shift1, %[offset_x3], %[offset_x2], %[offset_x1], $0, OP_PUT, put_)
2049cabdff1aSopenharmony_ciMSPEL_FILTER13_8B(shift1, %[offset_x3], %[offset_x2], %[offset_x1], $0, OP_AVG, avg_)
2050cabdff1aSopenharmony_ciMSPEL_FILTER13_VER_16B(shift1, %[stride_x3], %[stride_x2], %[stride_x1], $0)
2051cabdff1aSopenharmony_ciMSPEL_FILTER13_HOR_16B(shift1, 6, 4, 2, 0, OP_PUT, put_)
2052cabdff1aSopenharmony_ciMSPEL_FILTER13_HOR_16B(shift1, 6, 4, 2, 0, OP_AVG, avg_)
2053cabdff1aSopenharmony_ci
2054cabdff1aSopenharmony_ci/** 3/4 shift bicubic interpolation */
2055cabdff1aSopenharmony_ciMSPEL_FILTER13_8B(shift3, $0, %[offset_x1], %[offset_x2], %[offset_x3], OP_PUT, put_)
2056cabdff1aSopenharmony_ciMSPEL_FILTER13_8B(shift3, $0, %[offset_x1], %[offset_x2], %[offset_x3], OP_AVG, avg_)
2057cabdff1aSopenharmony_ciMSPEL_FILTER13_VER_16B(shift3, $0, %[stride_x1], %[stride_x2], %[stride_x3])
2058cabdff1aSopenharmony_ciMSPEL_FILTER13_HOR_16B(shift3, 0, 2, 4, 6, OP_PUT, put_)
2059cabdff1aSopenharmony_ciMSPEL_FILTER13_HOR_16B(shift3, 0, 2, 4, 6, OP_AVG, avg_)
2060cabdff1aSopenharmony_ci
2061cabdff1aSopenharmony_citypedef void (*vc1_mspel_mc_filter_ver_16bits)
2062cabdff1aSopenharmony_ci             (int16_t *dst, const uint8_t *src, mips_reg src_stride, int rnd,
2063cabdff1aSopenharmony_ci              int64_t shift);
2064cabdff1aSopenharmony_citypedef void (*vc1_mspel_mc_filter_hor_16bits)
2065cabdff1aSopenharmony_ci             (uint8_t *dst, mips_reg dst_stride, const int16_t *src, int rnd);
2066cabdff1aSopenharmony_citypedef void (*vc1_mspel_mc_filter_8bits)
2067cabdff1aSopenharmony_ci             (uint8_t *dst, const uint8_t *src, mips_reg stride, int rnd,
2068cabdff1aSopenharmony_ci              mips_reg offset);
2069cabdff1aSopenharmony_ci
2070cabdff1aSopenharmony_ci/**
2071cabdff1aSopenharmony_ci * Interpolate fractional pel values by applying proper vertical then
2072cabdff1aSopenharmony_ci * horizontal filter.
2073cabdff1aSopenharmony_ci *
2074cabdff1aSopenharmony_ci * @param  dst     Destination buffer for interpolated pels.
2075cabdff1aSopenharmony_ci * @param  src     Source buffer.
2076cabdff1aSopenharmony_ci * @param  stride  Stride for both src and dst buffers.
2077cabdff1aSopenharmony_ci * @param  hmode   Horizontal filter (expressed in quarter pixels shift).
2078cabdff1aSopenharmony_ci * @param  hmode   Vertical filter.
2079cabdff1aSopenharmony_ci * @param  rnd     Rounding bias.
2080cabdff1aSopenharmony_ci */
2081cabdff1aSopenharmony_ci#define VC1_MSPEL_MC(OP)                                                    \
2082cabdff1aSopenharmony_cistatic void OP ## vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride,\
2083cabdff1aSopenharmony_ci                               int hmode, int vmode, int rnd)               \
2084cabdff1aSopenharmony_ci{                                                                           \
2085cabdff1aSopenharmony_ci    static const vc1_mspel_mc_filter_ver_16bits vc1_put_shift_ver_16bits[] =\
2086cabdff1aSopenharmony_ci         { NULL, vc1_put_ver_16b_shift1_mmi,                                \
2087cabdff1aSopenharmony_ci                 vc1_put_ver_16b_shift2_mmi,                                \
2088cabdff1aSopenharmony_ci                 vc1_put_ver_16b_shift3_mmi };                              \
2089cabdff1aSopenharmony_ci    static const vc1_mspel_mc_filter_hor_16bits vc1_put_shift_hor_16bits[] =\
2090cabdff1aSopenharmony_ci         { NULL, OP ## vc1_hor_16b_shift1_mmi,                              \
2091cabdff1aSopenharmony_ci                 OP ## vc1_hor_16b_shift2_mmi,                              \
2092cabdff1aSopenharmony_ci                 OP ## vc1_hor_16b_shift3_mmi };                            \
2093cabdff1aSopenharmony_ci    static const vc1_mspel_mc_filter_8bits vc1_put_shift_8bits[] =          \
2094cabdff1aSopenharmony_ci         { NULL, OP ## vc1_shift1_mmi,                                      \
2095cabdff1aSopenharmony_ci                 OP ## vc1_shift2_mmi,                                      \
2096cabdff1aSopenharmony_ci                 OP ## vc1_shift3_mmi };                                    \
2097cabdff1aSopenharmony_ci                                                                            \
2098cabdff1aSopenharmony_ci    if (vmode) { /* Vertical filter to apply */                             \
2099cabdff1aSopenharmony_ci        if (hmode) { /* Horizontal filter to apply, output to tmp */        \
2100cabdff1aSopenharmony_ci            static const int shift_value[] = { 0, 5, 1, 5 };                \
2101cabdff1aSopenharmony_ci            int    shift = (shift_value[hmode]+shift_value[vmode])>>1;      \
2102cabdff1aSopenharmony_ci            int    r;                                                       \
2103cabdff1aSopenharmony_ci            LOCAL_ALIGNED(16, int16_t, tmp, [12*8]);                        \
2104cabdff1aSopenharmony_ci                                                                            \
2105cabdff1aSopenharmony_ci            r = (1<<(shift-1)) + rnd-1;                                     \
2106cabdff1aSopenharmony_ci            vc1_put_shift_ver_16bits[vmode](tmp, src-1, stride, r, shift);  \
2107cabdff1aSopenharmony_ci                                                                            \
2108cabdff1aSopenharmony_ci            vc1_put_shift_hor_16bits[hmode](dst, stride, tmp+1, 64-rnd);    \
2109cabdff1aSopenharmony_ci            return;                                                         \
2110cabdff1aSopenharmony_ci        }                                                                   \
2111cabdff1aSopenharmony_ci        else { /* No horizontal filter, output 8 lines to dst */            \
2112cabdff1aSopenharmony_ci            vc1_put_shift_8bits[vmode](dst, src, stride, 1-rnd, stride);    \
2113cabdff1aSopenharmony_ci            return;                                                         \
2114cabdff1aSopenharmony_ci        }                                                                   \
2115cabdff1aSopenharmony_ci    }                                                                       \
2116cabdff1aSopenharmony_ci                                                                            \
2117cabdff1aSopenharmony_ci    /* Horizontal mode with no vertical mode */                             \
2118cabdff1aSopenharmony_ci    vc1_put_shift_8bits[hmode](dst, src, stride, rnd, 1);                   \
2119cabdff1aSopenharmony_ci}                                                                           \
2120cabdff1aSopenharmony_cistatic void OP ## vc1_mspel_mc_16(uint8_t *dst, const uint8_t *src,         \
2121cabdff1aSopenharmony_ci                                  int stride, int hmode, int vmode, int rnd)\
2122cabdff1aSopenharmony_ci{                                                                           \
2123cabdff1aSopenharmony_ci    OP ## vc1_mspel_mc(dst + 0, src + 0, stride, hmode, vmode, rnd);        \
2124cabdff1aSopenharmony_ci    OP ## vc1_mspel_mc(dst + 8, src + 8, stride, hmode, vmode, rnd);        \
2125cabdff1aSopenharmony_ci    dst += 8*stride; src += 8*stride;                                       \
2126cabdff1aSopenharmony_ci    OP ## vc1_mspel_mc(dst + 0, src + 0, stride, hmode, vmode, rnd);        \
2127cabdff1aSopenharmony_ci    OP ## vc1_mspel_mc(dst + 8, src + 8, stride, hmode, vmode, rnd);        \
2128cabdff1aSopenharmony_ci}
2129cabdff1aSopenharmony_ci
2130cabdff1aSopenharmony_ciVC1_MSPEL_MC(put_)
2131cabdff1aSopenharmony_ciVC1_MSPEL_MC(avg_)
2132cabdff1aSopenharmony_ci
2133cabdff1aSopenharmony_ci/** Macro to ease bicubic filter interpolation functions declarations */
2134cabdff1aSopenharmony_ci#define DECLARE_FUNCTION(a, b)                                              \
2135cabdff1aSopenharmony_civoid ff_put_vc1_mspel_mc ## a ## b ## _mmi(uint8_t *dst,                    \
2136cabdff1aSopenharmony_ci                                           const uint8_t *src,              \
2137cabdff1aSopenharmony_ci                                           ptrdiff_t stride,                \
2138cabdff1aSopenharmony_ci                                           int rnd)                         \
2139cabdff1aSopenharmony_ci{                                                                           \
2140cabdff1aSopenharmony_ci     put_vc1_mspel_mc(dst, src, stride, a, b, rnd);                         \
2141cabdff1aSopenharmony_ci}                                                                           \
2142cabdff1aSopenharmony_civoid ff_avg_vc1_mspel_mc ## a ## b ## _mmi(uint8_t *dst,                    \
2143cabdff1aSopenharmony_ci                                           const uint8_t *src,              \
2144cabdff1aSopenharmony_ci                                           ptrdiff_t stride,                \
2145cabdff1aSopenharmony_ci                                           int rnd)                         \
2146cabdff1aSopenharmony_ci{                                                                           \
2147cabdff1aSopenharmony_ci     avg_vc1_mspel_mc(dst, src, stride, a, b, rnd);                         \
2148cabdff1aSopenharmony_ci}                                                                           \
2149cabdff1aSopenharmony_civoid ff_put_vc1_mspel_mc ## a ## b ## _16_mmi(uint8_t *dst,                 \
2150cabdff1aSopenharmony_ci                                              const uint8_t *src,           \
2151cabdff1aSopenharmony_ci                                              ptrdiff_t stride,             \
2152cabdff1aSopenharmony_ci                                              int rnd)                      \
2153cabdff1aSopenharmony_ci{                                                                           \
2154cabdff1aSopenharmony_ci     put_vc1_mspel_mc_16(dst, src, stride, a, b, rnd);                      \
2155cabdff1aSopenharmony_ci}                                                                           \
2156cabdff1aSopenharmony_civoid ff_avg_vc1_mspel_mc ## a ## b ## _16_mmi(uint8_t *dst,                 \
2157cabdff1aSopenharmony_ci                                              const uint8_t *src,           \
2158cabdff1aSopenharmony_ci                                              ptrdiff_t stride,             \
2159cabdff1aSopenharmony_ci                                              int rnd)                      \
2160cabdff1aSopenharmony_ci{                                                                           \
2161cabdff1aSopenharmony_ci     avg_vc1_mspel_mc_16(dst, src, stride, a, b, rnd);                      \
2162cabdff1aSopenharmony_ci}
2163cabdff1aSopenharmony_ci
2164cabdff1aSopenharmony_ciDECLARE_FUNCTION(0, 1)
2165cabdff1aSopenharmony_ciDECLARE_FUNCTION(0, 2)
2166cabdff1aSopenharmony_ciDECLARE_FUNCTION(0, 3)
2167cabdff1aSopenharmony_ci
2168cabdff1aSopenharmony_ciDECLARE_FUNCTION(1, 0)
2169cabdff1aSopenharmony_ciDECLARE_FUNCTION(1, 1)
2170cabdff1aSopenharmony_ciDECLARE_FUNCTION(1, 2)
2171cabdff1aSopenharmony_ciDECLARE_FUNCTION(1, 3)
2172cabdff1aSopenharmony_ci
2173cabdff1aSopenharmony_ciDECLARE_FUNCTION(2, 0)
2174cabdff1aSopenharmony_ciDECLARE_FUNCTION(2, 1)
2175cabdff1aSopenharmony_ciDECLARE_FUNCTION(2, 2)
2176cabdff1aSopenharmony_ciDECLARE_FUNCTION(2, 3)
2177cabdff1aSopenharmony_ci
2178cabdff1aSopenharmony_ciDECLARE_FUNCTION(3, 0)
2179cabdff1aSopenharmony_ciDECLARE_FUNCTION(3, 1)
2180cabdff1aSopenharmony_ciDECLARE_FUNCTION(3, 2)
2181cabdff1aSopenharmony_ciDECLARE_FUNCTION(3, 3)
2182cabdff1aSopenharmony_ci
2183cabdff1aSopenharmony_ci#define CHROMA_MC_8_MMI                                                     \
2184cabdff1aSopenharmony_ci        "punpckhbh  %[ftmp5],   %[ftmp1],   %[ftmp0]                \n\t"   \
2185cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp1],   %[ftmp1],   %[ftmp0]                \n\t"   \
2186cabdff1aSopenharmony_ci        "punpckhbh  %[ftmp6],   %[ftmp2],   %[ftmp0]                \n\t"   \
2187cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp2],   %[ftmp0]                \n\t"   \
2188cabdff1aSopenharmony_ci        "punpckhbh  %[ftmp7],   %[ftmp3],   %[ftmp0]                \n\t"   \
2189cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp3],   %[ftmp3],   %[ftmp0]                \n\t"   \
2190cabdff1aSopenharmony_ci        "punpckhbh  %[ftmp8],   %[ftmp4],   %[ftmp0]                \n\t"   \
2191cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp4],   %[ftmp4],   %[ftmp0]                \n\t"   \
2192cabdff1aSopenharmony_ci                                                                            \
2193cabdff1aSopenharmony_ci        "pmullh     %[ftmp1],   %[ftmp1],   %[A]                    \n\t"   \
2194cabdff1aSopenharmony_ci        "pmullh     %[ftmp5],   %[ftmp5],   %[A]                    \n\t"   \
2195cabdff1aSopenharmony_ci        "pmullh     %[ftmp2],   %[ftmp2],   %[B]                    \n\t"   \
2196cabdff1aSopenharmony_ci        "pmullh     %[ftmp6],   %[ftmp6],   %[B]                    \n\t"   \
2197cabdff1aSopenharmony_ci        "pmullh     %[ftmp3],   %[ftmp3],   %[C]                    \n\t"   \
2198cabdff1aSopenharmony_ci        "pmullh     %[ftmp7],   %[ftmp7],   %[C]                    \n\t"   \
2199cabdff1aSopenharmony_ci        "pmullh     %[ftmp4],   %[ftmp4],   %[D]                    \n\t"   \
2200cabdff1aSopenharmony_ci        "pmullh     %[ftmp8],   %[ftmp8],   %[D]                    \n\t"   \
2201cabdff1aSopenharmony_ci                                                                            \
2202cabdff1aSopenharmony_ci        "paddh      %[ftmp1],   %[ftmp1],   %[ftmp2]                \n\t"   \
2203cabdff1aSopenharmony_ci        "paddh      %[ftmp3],   %[ftmp3],   %[ftmp4]                \n\t"   \
2204cabdff1aSopenharmony_ci        "paddh      %[ftmp1],   %[ftmp1],   %[ftmp3]                \n\t"   \
2205cabdff1aSopenharmony_ci        "paddh      %[ftmp1],   %[ftmp1],   %[ff_pw_28]             \n\t"   \
2206cabdff1aSopenharmony_ci                                                                            \
2207cabdff1aSopenharmony_ci        "paddh      %[ftmp5],   %[ftmp5],   %[ftmp6]                \n\t"   \
2208cabdff1aSopenharmony_ci        "paddh      %[ftmp7],   %[ftmp7],   %[ftmp8]                \n\t"   \
2209cabdff1aSopenharmony_ci        "paddh      %[ftmp5],   %[ftmp5],   %[ftmp7]                \n\t"   \
2210cabdff1aSopenharmony_ci        "paddh      %[ftmp5],   %[ftmp5],   %[ff_pw_28]             \n\t"   \
2211cabdff1aSopenharmony_ci                                                                            \
2212cabdff1aSopenharmony_ci        "psrlh      %[ftmp1],   %[ftmp1],   %[ftmp9]                \n\t"   \
2213cabdff1aSopenharmony_ci        "psrlh      %[ftmp5],   %[ftmp5],   %[ftmp9]                \n\t"   \
2214cabdff1aSopenharmony_ci        "packushb   %[ftmp1],   %[ftmp1],   %[ftmp5]                \n\t"
2215cabdff1aSopenharmony_ci
2216cabdff1aSopenharmony_ci
2217cabdff1aSopenharmony_ci#define CHROMA_MC_4_MMI                                                     \
2218cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp1],   %[ftmp1],   %[ftmp0]                \n\t"   \
2219cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp2],   %[ftmp0]                \n\t"   \
2220cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp3],   %[ftmp3],   %[ftmp0]                \n\t"   \
2221cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp4],   %[ftmp4],   %[ftmp0]                \n\t"   \
2222cabdff1aSopenharmony_ci                                                                            \
2223cabdff1aSopenharmony_ci        "pmullh     %[ftmp1],   %[ftmp1],   %[A]                    \n\t"   \
2224cabdff1aSopenharmony_ci        "pmullh     %[ftmp2],   %[ftmp2],   %[B]                    \n\t"   \
2225cabdff1aSopenharmony_ci        "pmullh     %[ftmp3],   %[ftmp3],   %[C]                    \n\t"   \
2226cabdff1aSopenharmony_ci        "pmullh     %[ftmp4],   %[ftmp4],   %[D]                    \n\t"   \
2227cabdff1aSopenharmony_ci                                                                            \
2228cabdff1aSopenharmony_ci        "paddh      %[ftmp1],   %[ftmp1],   %[ftmp2]                \n\t"   \
2229cabdff1aSopenharmony_ci        "paddh      %[ftmp3],   %[ftmp3],   %[ftmp4]                \n\t"   \
2230cabdff1aSopenharmony_ci        "paddh      %[ftmp1],   %[ftmp1],   %[ftmp3]                \n\t"   \
2231cabdff1aSopenharmony_ci        "paddh      %[ftmp1],   %[ftmp1],   %[ff_pw_28]             \n\t"   \
2232cabdff1aSopenharmony_ci                                                                            \
2233cabdff1aSopenharmony_ci        "psrlh      %[ftmp1],   %[ftmp1],   %[ftmp5]                \n\t"   \
2234cabdff1aSopenharmony_ci        "packushb   %[ftmp1],   %[ftmp1],   %[ftmp0]                \n\t"
2235cabdff1aSopenharmony_ci
2236cabdff1aSopenharmony_ci
2237cabdff1aSopenharmony_civoid ff_put_no_rnd_vc1_chroma_mc8_mmi(uint8_t *dst /* align 8 */,
2238cabdff1aSopenharmony_ci                                      uint8_t *src /* align 1 */,
2239cabdff1aSopenharmony_ci                                      ptrdiff_t stride, int h, int x, int y)
2240cabdff1aSopenharmony_ci{
2241cabdff1aSopenharmony_ci    union mmi_intfloat64 A, B, C, D;
2242cabdff1aSopenharmony_ci    double ftmp[10];
2243cabdff1aSopenharmony_ci    uint32_t tmp[1];
2244cabdff1aSopenharmony_ci    DECLARE_VAR_ALL64;
2245cabdff1aSopenharmony_ci    DECLARE_VAR_ADDRT;
2246cabdff1aSopenharmony_ci    A.i = (8 - x) * (8 - y);
2247cabdff1aSopenharmony_ci    B.i =     (x) * (8 - y);
2248cabdff1aSopenharmony_ci    C.i = (8 - x) *     (y);
2249cabdff1aSopenharmony_ci    D.i =     (x) *     (y);
2250cabdff1aSopenharmony_ci
2251cabdff1aSopenharmony_ci    av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
2252cabdff1aSopenharmony_ci
2253cabdff1aSopenharmony_ci    __asm__ volatile(
2254cabdff1aSopenharmony_ci        "li         %[tmp0],    0x06                                    \n\t"
2255cabdff1aSopenharmony_ci        "pxor       %[ftmp0],   %[ftmp0],   %[ftmp0]                    \n\t"
2256cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp9]                                \n\t"
2257cabdff1aSopenharmony_ci        "pshufh     %[A],       %[A],       %[ftmp0]                    \n\t"
2258cabdff1aSopenharmony_ci        "pshufh     %[B],       %[B],       %[ftmp0]                    \n\t"
2259cabdff1aSopenharmony_ci        "pshufh     %[C],       %[C],       %[ftmp0]                    \n\t"
2260cabdff1aSopenharmony_ci        "pshufh     %[D],       %[D],       %[ftmp0]                    \n\t"
2261cabdff1aSopenharmony_ci
2262cabdff1aSopenharmony_ci        "1:                                                             \n\t"
2263cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp1], %[src], 0x00)
2264cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp2], %[src], 0x01)
2265cabdff1aSopenharmony_ci        PTR_ADDU   "%[src],     %[src],     %[stride]                   \n\t"
2266cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp3], %[src], 0x00)
2267cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp4], %[src], 0x01)
2268cabdff1aSopenharmony_ci
2269cabdff1aSopenharmony_ci        CHROMA_MC_8_MMI
2270cabdff1aSopenharmony_ci
2271cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp1], %[dst], 0x00)
2272cabdff1aSopenharmony_ci        "addiu      %[h],       %[h],      -0x01                        \n\t"
2273cabdff1aSopenharmony_ci        PTR_ADDU   "%[dst],     %[dst],     %[stride]                   \n\t"
2274cabdff1aSopenharmony_ci        "bnez       %[h],       1b                                      \n\t"
2275cabdff1aSopenharmony_ci        : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
2276cabdff1aSopenharmony_ci          [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
2277cabdff1aSopenharmony_ci          [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
2278cabdff1aSopenharmony_ci          [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
2279cabdff1aSopenharmony_ci          [ftmp8]"=&f"(ftmp[8]),        [ftmp9]"=&f"(ftmp[9]),
2280cabdff1aSopenharmony_ci          RESTRICT_ASM_ALL64
2281cabdff1aSopenharmony_ci          RESTRICT_ASM_ADDRT
2282cabdff1aSopenharmony_ci          [tmp0]"=&r"(tmp[0]),
2283cabdff1aSopenharmony_ci          [src]"+&r"(src),              [dst]"+&r"(dst),
2284cabdff1aSopenharmony_ci          [h]"+&r"(h)
2285cabdff1aSopenharmony_ci        : [stride]"r"((mips_reg)stride),
2286cabdff1aSopenharmony_ci          [A]"f"(A.f),                  [B]"f"(B.f),
2287cabdff1aSopenharmony_ci          [C]"f"(C.f),                  [D]"f"(D.f),
2288cabdff1aSopenharmony_ci          [ff_pw_28]"f"(ff_pw_28.f)
2289cabdff1aSopenharmony_ci        : "memory"
2290cabdff1aSopenharmony_ci    );
2291cabdff1aSopenharmony_ci}
2292cabdff1aSopenharmony_ci
2293cabdff1aSopenharmony_civoid ff_put_no_rnd_vc1_chroma_mc4_mmi(uint8_t *dst /* align 8 */,
2294cabdff1aSopenharmony_ci                                      uint8_t *src /* align 1 */,
2295cabdff1aSopenharmony_ci                                      ptrdiff_t stride, int h, int x, int y)
2296cabdff1aSopenharmony_ci{
2297cabdff1aSopenharmony_ci    union mmi_intfloat64 A, B, C, D;
2298cabdff1aSopenharmony_ci    double ftmp[6];
2299cabdff1aSopenharmony_ci    uint32_t tmp[1];
2300cabdff1aSopenharmony_ci    DECLARE_VAR_LOW32;
2301cabdff1aSopenharmony_ci    DECLARE_VAR_ADDRT;
2302cabdff1aSopenharmony_ci    A.i = (8 - x) * (8 - y);
2303cabdff1aSopenharmony_ci    B.i =     (x) * (8 - y);
2304cabdff1aSopenharmony_ci    C.i = (8 - x) *     (y);
2305cabdff1aSopenharmony_ci    D.i =     (x) *     (y);
2306cabdff1aSopenharmony_ci
2307cabdff1aSopenharmony_ci    av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
2308cabdff1aSopenharmony_ci
2309cabdff1aSopenharmony_ci    __asm__ volatile(
2310cabdff1aSopenharmony_ci        "li         %[tmp0],    0x06                                    \n\t"
2311cabdff1aSopenharmony_ci        "pxor       %[ftmp0],   %[ftmp0],   %[ftmp0]                    \n\t"
2312cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp5]                                \n\t"
2313cabdff1aSopenharmony_ci        "pshufh     %[A],       %[A],       %[ftmp0]                    \n\t"
2314cabdff1aSopenharmony_ci        "pshufh     %[B],       %[B],       %[ftmp0]                    \n\t"
2315cabdff1aSopenharmony_ci        "pshufh     %[C],       %[C],       %[ftmp0]                    \n\t"
2316cabdff1aSopenharmony_ci        "pshufh     %[D],       %[D],       %[ftmp0]                    \n\t"
2317cabdff1aSopenharmony_ci
2318cabdff1aSopenharmony_ci        "1:                                                             \n\t"
2319cabdff1aSopenharmony_ci        MMI_ULWC1(%[ftmp1], %[src], 0x00)
2320cabdff1aSopenharmony_ci        MMI_ULWC1(%[ftmp2], %[src], 0x01)
2321cabdff1aSopenharmony_ci        PTR_ADDU   "%[src],     %[src],     %[stride]                   \n\t"
2322cabdff1aSopenharmony_ci        MMI_ULWC1(%[ftmp3], %[src], 0x00)
2323cabdff1aSopenharmony_ci        MMI_ULWC1(%[ftmp4], %[src], 0x01)
2324cabdff1aSopenharmony_ci
2325cabdff1aSopenharmony_ci        CHROMA_MC_4_MMI
2326cabdff1aSopenharmony_ci
2327cabdff1aSopenharmony_ci        MMI_SWC1(%[ftmp1], %[dst], 0x00)
2328cabdff1aSopenharmony_ci        "addiu      %[h],       %[h],      -0x01                        \n\t"
2329cabdff1aSopenharmony_ci        PTR_ADDU   "%[dst],     %[dst],     %[stride]                   \n\t"
2330cabdff1aSopenharmony_ci        "bnez       %[h],       1b                                      \n\t"
2331cabdff1aSopenharmony_ci        : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
2332cabdff1aSopenharmony_ci          [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
2333cabdff1aSopenharmony_ci          [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
2334cabdff1aSopenharmony_ci          [tmp0]"=&r"(tmp[0]),
2335cabdff1aSopenharmony_ci          RESTRICT_ASM_LOW32
2336cabdff1aSopenharmony_ci          RESTRICT_ASM_ADDRT
2337cabdff1aSopenharmony_ci          [src]"+&r"(src),              [dst]"+&r"(dst),
2338cabdff1aSopenharmony_ci          [h]"+&r"(h)
2339cabdff1aSopenharmony_ci        : [stride]"r"((mips_reg)stride),
2340cabdff1aSopenharmony_ci          [A]"f"(A.f),                  [B]"f"(B.f),
2341cabdff1aSopenharmony_ci          [C]"f"(C.f),                  [D]"f"(D.f),
2342cabdff1aSopenharmony_ci          [ff_pw_28]"f"(ff_pw_28.f)
2343cabdff1aSopenharmony_ci        : "memory"
2344cabdff1aSopenharmony_ci    );
2345cabdff1aSopenharmony_ci}
2346cabdff1aSopenharmony_ci
2347cabdff1aSopenharmony_civoid ff_avg_no_rnd_vc1_chroma_mc8_mmi(uint8_t *dst /* align 8 */,
2348cabdff1aSopenharmony_ci                                      uint8_t *src /* align 1 */,
2349cabdff1aSopenharmony_ci                                      ptrdiff_t stride, int h, int x, int y)
2350cabdff1aSopenharmony_ci{
2351cabdff1aSopenharmony_ci    union mmi_intfloat64 A, B, C, D;
2352cabdff1aSopenharmony_ci    double ftmp[10];
2353cabdff1aSopenharmony_ci    uint32_t tmp[1];
2354cabdff1aSopenharmony_ci    DECLARE_VAR_ALL64;
2355cabdff1aSopenharmony_ci    DECLARE_VAR_ADDRT;
2356cabdff1aSopenharmony_ci    A.i = (8 - x) * (8 - y);
2357cabdff1aSopenharmony_ci    B.i =     (x) * (8 - y);
2358cabdff1aSopenharmony_ci    C.i = (8 - x) *     (y);
2359cabdff1aSopenharmony_ci    D.i =     (x) *     (y);
2360cabdff1aSopenharmony_ci
2361cabdff1aSopenharmony_ci    av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
2362cabdff1aSopenharmony_ci
2363cabdff1aSopenharmony_ci    __asm__ volatile(
2364cabdff1aSopenharmony_ci        "li         %[tmp0],    0x06                                    \n\t"
2365cabdff1aSopenharmony_ci        "pxor       %[ftmp0],   %[ftmp0],   %[ftmp0]                    \n\t"
2366cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp9]                                \n\t"
2367cabdff1aSopenharmony_ci        "pshufh     %[A],       %[A],       %[ftmp0]                    \n\t"
2368cabdff1aSopenharmony_ci        "pshufh     %[B],       %[B],       %[ftmp0]                    \n\t"
2369cabdff1aSopenharmony_ci        "pshufh     %[C],       %[C],       %[ftmp0]                    \n\t"
2370cabdff1aSopenharmony_ci        "pshufh     %[D],       %[D],       %[ftmp0]                    \n\t"
2371cabdff1aSopenharmony_ci
2372cabdff1aSopenharmony_ci        "1:                                                             \n\t"
2373cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp1], %[src], 0x00)
2374cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp2], %[src], 0x01)
2375cabdff1aSopenharmony_ci        PTR_ADDU   "%[src],     %[src],     %[stride]                   \n\t"
2376cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp3], %[src], 0x00)
2377cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp4], %[src], 0x01)
2378cabdff1aSopenharmony_ci
2379cabdff1aSopenharmony_ci        CHROMA_MC_8_MMI
2380cabdff1aSopenharmony_ci
2381cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp2], %[dst], 0x00)
2382cabdff1aSopenharmony_ci        "pavgb      %[ftmp1],   %[ftmp1],   %[ftmp2]                    \n\t"
2383cabdff1aSopenharmony_ci
2384cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp1], %[dst], 0x00)
2385cabdff1aSopenharmony_ci        "addiu      %[h],       %[h],      -0x01                        \n\t"
2386cabdff1aSopenharmony_ci        PTR_ADDU   "%[dst],     %[dst],     %[stride]                   \n\t"
2387cabdff1aSopenharmony_ci        "bnez       %[h],       1b                                      \n\t"
2388cabdff1aSopenharmony_ci        : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
2389cabdff1aSopenharmony_ci          [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
2390cabdff1aSopenharmony_ci          [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
2391cabdff1aSopenharmony_ci          [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
2392cabdff1aSopenharmony_ci          [ftmp8]"=&f"(ftmp[8]),        [ftmp9]"=&f"(ftmp[9]),
2393cabdff1aSopenharmony_ci          [tmp0]"=&r"(tmp[0]),
2394cabdff1aSopenharmony_ci          RESTRICT_ASM_ALL64
2395cabdff1aSopenharmony_ci          RESTRICT_ASM_ADDRT
2396cabdff1aSopenharmony_ci          [src]"+&r"(src),              [dst]"+&r"(dst),
2397cabdff1aSopenharmony_ci          [h]"+&r"(h)
2398cabdff1aSopenharmony_ci        : [stride]"r"((mips_reg)stride),
2399cabdff1aSopenharmony_ci          [A]"f"(A.f),                 [B]"f"(B.f),
2400cabdff1aSopenharmony_ci          [C]"f"(C.f),                 [D]"f"(D.f),
2401cabdff1aSopenharmony_ci          [ff_pw_28]"f"(ff_pw_28.f)
2402cabdff1aSopenharmony_ci        : "memory"
2403cabdff1aSopenharmony_ci    );
2404cabdff1aSopenharmony_ci}
2405cabdff1aSopenharmony_ci
2406cabdff1aSopenharmony_civoid ff_avg_no_rnd_vc1_chroma_mc4_mmi(uint8_t *dst /* align 8 */,
2407cabdff1aSopenharmony_ci                                      uint8_t *src /* align 1 */,
2408cabdff1aSopenharmony_ci                                      ptrdiff_t stride, int h, int x, int y)
2409cabdff1aSopenharmony_ci{
2410cabdff1aSopenharmony_ci    union mmi_intfloat64 A, B, C, D;
2411cabdff1aSopenharmony_ci    double ftmp[6];
2412cabdff1aSopenharmony_ci    uint32_t tmp[1];
2413cabdff1aSopenharmony_ci    DECLARE_VAR_LOW32;
2414cabdff1aSopenharmony_ci    DECLARE_VAR_ADDRT;
2415cabdff1aSopenharmony_ci    A.i = (8 - x) * (8 - y);
2416cabdff1aSopenharmony_ci    B.i = (x) * (8 - y);
2417cabdff1aSopenharmony_ci    C.i = (8 - x) * (y);
2418cabdff1aSopenharmony_ci    D.i = (x) * (y);
2419cabdff1aSopenharmony_ci
2420cabdff1aSopenharmony_ci    av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
2421cabdff1aSopenharmony_ci
2422cabdff1aSopenharmony_ci    __asm__ volatile(
2423cabdff1aSopenharmony_ci        "li         %[tmp0],    0x06                                    \n\t"
2424cabdff1aSopenharmony_ci        "pxor       %[ftmp0],   %[ftmp0],   %[ftmp0]                    \n\t"
2425cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp5]                                \n\t"
2426cabdff1aSopenharmony_ci        "pshufh     %[A],       %[A],       %[ftmp0]                    \n\t"
2427cabdff1aSopenharmony_ci        "pshufh     %[B],       %[B],       %[ftmp0]                    \n\t"
2428cabdff1aSopenharmony_ci        "pshufh     %[C],       %[C],       %[ftmp0]                    \n\t"
2429cabdff1aSopenharmony_ci        "pshufh     %[D],       %[D],       %[ftmp0]                    \n\t"
2430cabdff1aSopenharmony_ci
2431cabdff1aSopenharmony_ci        "1:                                                             \n\t"
2432cabdff1aSopenharmony_ci        MMI_ULWC1(%[ftmp1], %[src], 0x00)
2433cabdff1aSopenharmony_ci        MMI_ULWC1(%[ftmp2], %[src], 0x01)
2434cabdff1aSopenharmony_ci        PTR_ADDU   "%[src],     %[src],     %[stride]                   \n\t"
2435cabdff1aSopenharmony_ci        MMI_ULWC1(%[ftmp3], %[src], 0x00)
2436cabdff1aSopenharmony_ci        MMI_ULWC1(%[ftmp4], %[src], 0x01)
2437cabdff1aSopenharmony_ci
2438cabdff1aSopenharmony_ci        CHROMA_MC_4_MMI
2439cabdff1aSopenharmony_ci
2440cabdff1aSopenharmony_ci        MMI_LWC1(%[ftmp2], %[dst], 0x00)
2441cabdff1aSopenharmony_ci        "pavgb      %[ftmp1],   %[ftmp1],   %[ftmp2]                    \n\t"
2442cabdff1aSopenharmony_ci
2443cabdff1aSopenharmony_ci        MMI_SWC1(%[ftmp1], %[dst], 0x00)
2444cabdff1aSopenharmony_ci        "addiu      %[h],       %[h],      -0x01                        \n\t"
2445cabdff1aSopenharmony_ci        PTR_ADDU   "%[dst],     %[dst],     %[stride]                   \n\t"
2446cabdff1aSopenharmony_ci        "bnez       %[h],       1b                                      \n\t"
2447cabdff1aSopenharmony_ci        : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
2448cabdff1aSopenharmony_ci          [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
2449cabdff1aSopenharmony_ci          [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
2450cabdff1aSopenharmony_ci          [tmp0]"=&r"(tmp[0]),
2451cabdff1aSopenharmony_ci          RESTRICT_ASM_LOW32
2452cabdff1aSopenharmony_ci          RESTRICT_ASM_ADDRT
2453cabdff1aSopenharmony_ci          [src]"+&r"(src),              [dst]"+&r"(dst),
2454cabdff1aSopenharmony_ci          [h]"+&r"(h)
2455cabdff1aSopenharmony_ci        : [stride]"r"((mips_reg)stride),
2456cabdff1aSopenharmony_ci          [A]"f"(A.f),                  [B]"f"(B.f),
2457cabdff1aSopenharmony_ci          [C]"f"(C.f),                  [D]"f"(D.f),
2458cabdff1aSopenharmony_ci          [ff_pw_28]"f"(ff_pw_28.f)
2459cabdff1aSopenharmony_ci        : "memory"
2460cabdff1aSopenharmony_ci    );
2461cabdff1aSopenharmony_ci}
2462