1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * Loongson SIMD optimized h264chroma
3cabdff1aSopenharmony_ci *
4cabdff1aSopenharmony_ci * Copyright (c) 2015 Loongson Technology Corporation Limited
5cabdff1aSopenharmony_ci * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6cabdff1aSopenharmony_ci *                    Zhang Shuangshuang <zhangshuangshuang@ict.ac.cn>
7cabdff1aSopenharmony_ci *
8cabdff1aSopenharmony_ci * This file is part of FFmpeg.
9cabdff1aSopenharmony_ci *
10cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
11cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
12cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
13cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
14cabdff1aSopenharmony_ci *
15cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
16cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
17cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18cabdff1aSopenharmony_ci * Lesser General Public License for more details.
19cabdff1aSopenharmony_ci *
20cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
21cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
22cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23cabdff1aSopenharmony_ci */
24cabdff1aSopenharmony_ci
25cabdff1aSopenharmony_ci#include "h264chroma_mips.h"
26cabdff1aSopenharmony_ci#include "constants.h"
27cabdff1aSopenharmony_ci#include "libavutil/mips/mmiutils.h"
28cabdff1aSopenharmony_ci
29cabdff1aSopenharmony_civoid ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
30cabdff1aSopenharmony_ci        int h, int x, int y)
31cabdff1aSopenharmony_ci{
32cabdff1aSopenharmony_ci    double ftmp[12];
33cabdff1aSopenharmony_ci    union mmi_intfloat64 A, B, C, D, E;
34cabdff1aSopenharmony_ci    DECLARE_VAR_ALL64;
35cabdff1aSopenharmony_ci
36cabdff1aSopenharmony_ci    A.i = 64;
37cabdff1aSopenharmony_ci
38cabdff1aSopenharmony_ci    if (!(x || y)) {
39cabdff1aSopenharmony_ci        /* x=0, y=0, A.i=64 */
40cabdff1aSopenharmony_ci        __asm__ volatile (
41cabdff1aSopenharmony_ci            "1:                                                        \n\t"
42cabdff1aSopenharmony_ci            MMI_ULDC1(%[ftmp0], %[src], 0x00)
43cabdff1aSopenharmony_ci            PTR_ADDU   "%[src],     %[src],         %[stride]          \n\t"
44cabdff1aSopenharmony_ci            MMI_ULDC1(%[ftmp1], %[src], 0x00)
45cabdff1aSopenharmony_ci            PTR_ADDU   "%[src],     %[src],         %[stride]          \n\t"
46cabdff1aSopenharmony_ci            MMI_ULDC1(%[ftmp2], %[src], 0x00)
47cabdff1aSopenharmony_ci            PTR_ADDU   "%[src],     %[src],         %[stride]          \n\t"
48cabdff1aSopenharmony_ci            MMI_ULDC1(%[ftmp3], %[src], 0x00)
49cabdff1aSopenharmony_ci            PTR_ADDU   "%[src],     %[src],         %[stride]          \n\t"
50cabdff1aSopenharmony_ci
51cabdff1aSopenharmony_ci            "addi       %[h],       %[h],           -0x04              \n\t"
52cabdff1aSopenharmony_ci
53cabdff1aSopenharmony_ci            MMI_SDC1(%[ftmp0], %[dst], 0x00)
54cabdff1aSopenharmony_ci            PTR_ADDU   "%[dst],     %[dst],         %[stride]          \n\t"
55cabdff1aSopenharmony_ci            MMI_SDC1(%[ftmp1], %[dst], 0x00)
56cabdff1aSopenharmony_ci            PTR_ADDU   "%[dst],     %[dst],         %[stride]          \n\t"
57cabdff1aSopenharmony_ci            MMI_SDC1(%[ftmp2], %[dst], 0x00)
58cabdff1aSopenharmony_ci            PTR_ADDU   "%[dst],     %[dst],         %[stride]          \n\t"
59cabdff1aSopenharmony_ci            MMI_SDC1(%[ftmp3], %[dst], 0x00)
60cabdff1aSopenharmony_ci            PTR_ADDU   "%[dst],     %[dst],         %[stride]          \n\t"
61cabdff1aSopenharmony_ci            "bnez       %[h],       1b                                 \n\t"
62cabdff1aSopenharmony_ci            : RESTRICT_ASM_ALL64
63cabdff1aSopenharmony_ci              [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
64cabdff1aSopenharmony_ci              [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
65cabdff1aSopenharmony_ci              [dst]"+&r"(dst),              [src]"+&r"(src),
66cabdff1aSopenharmony_ci              [h]"+&r"(h)
67cabdff1aSopenharmony_ci            : [stride]"r"((mips_reg)stride)
68cabdff1aSopenharmony_ci            : "memory"
69cabdff1aSopenharmony_ci        );
70cabdff1aSopenharmony_ci    } else if (x && y) {
71cabdff1aSopenharmony_ci        /* x!=0, y!=0 */
72cabdff1aSopenharmony_ci        D.i = x * y;
73cabdff1aSopenharmony_ci        B.i = (x << 3) - D.i;
74cabdff1aSopenharmony_ci        C.i = (y << 3) - D.i;
75cabdff1aSopenharmony_ci        A.i = 64 - D.i - B.i - C.i;
76cabdff1aSopenharmony_ci
77cabdff1aSopenharmony_ci        __asm__ volatile (
78cabdff1aSopenharmony_ci            "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]           \n\t"
79cabdff1aSopenharmony_ci            "pshufh     %[A],       %[A],           %[ftmp0]           \n\t"
80cabdff1aSopenharmony_ci            "pshufh     %[B],       %[B],           %[ftmp0]           \n\t"
81cabdff1aSopenharmony_ci            "mtc1       %[tmp0],    %[ftmp9]                           \n\t"
82cabdff1aSopenharmony_ci            "pshufh     %[C],       %[C],           %[ftmp0]           \n\t"
83cabdff1aSopenharmony_ci            "pshufh     %[D],       %[D],           %[ftmp0]           \n\t"
84cabdff1aSopenharmony_ci
85cabdff1aSopenharmony_ci            "1:                                                        \n\t"
86cabdff1aSopenharmony_ci            MMI_ULDC1(%[ftmp1], %[src], 0x00)
87cabdff1aSopenharmony_ci            MMI_ULDC1(%[ftmp2], %[src], 0x01)
88cabdff1aSopenharmony_ci            PTR_ADDU   "%[src],     %[src],         %[stride]          \n\t"
89cabdff1aSopenharmony_ci            MMI_ULDC1(%[ftmp3], %[src], 0x00)
90cabdff1aSopenharmony_ci            MMI_ULDC1(%[ftmp4], %[src], 0x01)
91cabdff1aSopenharmony_ci            PTR_ADDU   "%[src],     %[src],         %[stride]          \n\t"
92cabdff1aSopenharmony_ci            MMI_ULDC1(%[ftmp10], %[src], 0x00)
93cabdff1aSopenharmony_ci            MMI_ULDC1(%[ftmp11], %[src], 0x01)
94cabdff1aSopenharmony_ci            "addi       %[h],       %[h],           -0x02              \n\t"
95cabdff1aSopenharmony_ci
96cabdff1aSopenharmony_ci            "punpcklbh  %[ftmp5],   %[ftmp1],       %[ftmp0]           \n\t"
97cabdff1aSopenharmony_ci            "punpckhbh  %[ftmp6],   %[ftmp1],       %[ftmp0]           \n\t"
98cabdff1aSopenharmony_ci            "punpcklbh  %[ftmp7],   %[ftmp2],       %[ftmp0]           \n\t"
99cabdff1aSopenharmony_ci            "punpckhbh  %[ftmp8],   %[ftmp2],       %[ftmp0]           \n\t"
100cabdff1aSopenharmony_ci            "pmullh     %[ftmp5],   %[ftmp5],       %[A]               \n\t"
101cabdff1aSopenharmony_ci            "pmullh     %[ftmp7],   %[ftmp7],       %[B]               \n\t"
102cabdff1aSopenharmony_ci            "paddh      %[ftmp1],   %[ftmp5],       %[ftmp7]           \n\t"
103cabdff1aSopenharmony_ci            "pmullh     %[ftmp6],   %[ftmp6],       %[A]               \n\t"
104cabdff1aSopenharmony_ci            "pmullh     %[ftmp8],   %[ftmp8],       %[B]               \n\t"
105cabdff1aSopenharmony_ci            "paddh      %[ftmp2],   %[ftmp6],       %[ftmp8]           \n\t"
106cabdff1aSopenharmony_ci            "punpcklbh  %[ftmp5],   %[ftmp3],       %[ftmp0]           \n\t"
107cabdff1aSopenharmony_ci            "punpckhbh  %[ftmp6],   %[ftmp3],       %[ftmp0]           \n\t"
108cabdff1aSopenharmony_ci            "punpcklbh  %[ftmp7],   %[ftmp4],       %[ftmp0]           \n\t"
109cabdff1aSopenharmony_ci            "punpckhbh  %[ftmp8],   %[ftmp4],       %[ftmp0]           \n\t"
110cabdff1aSopenharmony_ci            "pmullh     %[ftmp5],   %[ftmp5],       %[C]               \n\t"
111cabdff1aSopenharmony_ci            "pmullh     %[ftmp7],   %[ftmp7],       %[D]               \n\t"
112cabdff1aSopenharmony_ci            "paddh      %[ftmp5],   %[ftmp5],       %[ftmp7]           \n\t"
113cabdff1aSopenharmony_ci            "pmullh     %[ftmp6],   %[ftmp6],       %[C]               \n\t"
114cabdff1aSopenharmony_ci            "pmullh     %[ftmp8],   %[ftmp8],       %[D]               \n\t"
115cabdff1aSopenharmony_ci            "paddh      %[ftmp6],   %[ftmp6],       %[ftmp8]           \n\t"
116cabdff1aSopenharmony_ci            "paddh      %[ftmp1],   %[ftmp1],       %[ftmp5]           \n\t"
117cabdff1aSopenharmony_ci            "paddh      %[ftmp2],   %[ftmp2],       %[ftmp6]           \n\t"
118cabdff1aSopenharmony_ci            "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]        \n\t"
119cabdff1aSopenharmony_ci            "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]        \n\t"
120cabdff1aSopenharmony_ci            "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp9]           \n\t"
121cabdff1aSopenharmony_ci            "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp9]           \n\t"
122cabdff1aSopenharmony_ci            "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]           \n\t"
123cabdff1aSopenharmony_ci
124cabdff1aSopenharmony_ci            "punpcklbh  %[ftmp5],   %[ftmp3],       %[ftmp0]           \n\t"
125cabdff1aSopenharmony_ci            "punpckhbh  %[ftmp6],   %[ftmp3],       %[ftmp0]           \n\t"
126cabdff1aSopenharmony_ci            "punpcklbh  %[ftmp7],   %[ftmp4],       %[ftmp0]           \n\t"
127cabdff1aSopenharmony_ci            "punpckhbh  %[ftmp8],   %[ftmp4],       %[ftmp0]           \n\t"
128cabdff1aSopenharmony_ci            "pmullh     %[ftmp5],   %[ftmp5],       %[A]               \n\t"
129cabdff1aSopenharmony_ci            "pmullh     %[ftmp7],   %[ftmp7],       %[B]               \n\t"
130cabdff1aSopenharmony_ci            "paddh      %[ftmp3],   %[ftmp5],       %[ftmp7]           \n\t"
131cabdff1aSopenharmony_ci            "pmullh     %[ftmp6],   %[ftmp6],       %[A]               \n\t"
132cabdff1aSopenharmony_ci            "pmullh     %[ftmp8],   %[ftmp8],       %[B]               \n\t"
133cabdff1aSopenharmony_ci            "paddh      %[ftmp4],   %[ftmp6],       %[ftmp8]           \n\t"
134cabdff1aSopenharmony_ci            "punpcklbh  %[ftmp5],   %[ftmp10],      %[ftmp0]           \n\t"
135cabdff1aSopenharmony_ci            "punpckhbh  %[ftmp6],   %[ftmp10],      %[ftmp0]           \n\t"
136cabdff1aSopenharmony_ci            "punpcklbh  %[ftmp7],   %[ftmp11],      %[ftmp0]           \n\t"
137cabdff1aSopenharmony_ci            "punpckhbh  %[ftmp8],   %[ftmp11],      %[ftmp0]           \n\t"
138cabdff1aSopenharmony_ci            "pmullh     %[ftmp5],   %[ftmp5],       %[C]               \n\t"
139cabdff1aSopenharmony_ci            "pmullh     %[ftmp7],   %[ftmp7],       %[D]               \n\t"
140cabdff1aSopenharmony_ci            "paddh      %[ftmp5],   %[ftmp5],       %[ftmp7]           \n\t"
141cabdff1aSopenharmony_ci            "pmullh     %[ftmp6],   %[ftmp6],       %[C]               \n\t"
142cabdff1aSopenharmony_ci            "pmullh     %[ftmp8],   %[ftmp8],       %[D]               \n\t"
143cabdff1aSopenharmony_ci            "paddh      %[ftmp6],   %[ftmp6],       %[ftmp8]           \n\t"
144cabdff1aSopenharmony_ci            "paddh      %[ftmp3],   %[ftmp3],       %[ftmp5]           \n\t"
145cabdff1aSopenharmony_ci            "paddh      %[ftmp4],   %[ftmp4],       %[ftmp6]           \n\t"
146cabdff1aSopenharmony_ci            "paddh      %[ftmp3],   %[ftmp3],       %[ff_pw_32]        \n\t"
147cabdff1aSopenharmony_ci            "paddh      %[ftmp4],   %[ftmp4],       %[ff_pw_32]        \n\t"
148cabdff1aSopenharmony_ci            "psrlh      %[ftmp3],   %[ftmp3],       %[ftmp9]           \n\t"
149cabdff1aSopenharmony_ci            "psrlh      %[ftmp4],   %[ftmp4],       %[ftmp9]           \n\t"
150cabdff1aSopenharmony_ci            "packushb   %[ftmp3],   %[ftmp3],       %[ftmp4]           \n\t"
151cabdff1aSopenharmony_ci
152cabdff1aSopenharmony_ci            MMI_SDC1(%[ftmp1], %[dst], 0x00)
153cabdff1aSopenharmony_ci            PTR_ADDU   "%[dst],     %[dst],         %[stride]          \n\t"
154cabdff1aSopenharmony_ci            MMI_SDC1(%[ftmp3], %[dst], 0x00)
155cabdff1aSopenharmony_ci            PTR_ADDU   "%[dst],     %[dst],         %[stride]          \n\t"
156cabdff1aSopenharmony_ci            "bnez       %[h],       1b                                 \n\t"
157cabdff1aSopenharmony_ci            : RESTRICT_ASM_ALL64
158cabdff1aSopenharmony_ci              [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
159cabdff1aSopenharmony_ci              [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
160cabdff1aSopenharmony_ci              [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
161cabdff1aSopenharmony_ci              [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
162cabdff1aSopenharmony_ci              [ftmp8]"=&f"(ftmp[8]),        [ftmp9]"=&f"(ftmp[9]),
163cabdff1aSopenharmony_ci              [ftmp10]"=&f"(ftmp[10]),      [ftmp11]"=&f"(ftmp[11]),
164cabdff1aSopenharmony_ci              [dst]"+&r"(dst),              [src]"+&r"(src),
165cabdff1aSopenharmony_ci              [h]"+&r"(h)
166cabdff1aSopenharmony_ci            : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32.f),
167cabdff1aSopenharmony_ci              [A]"f"(A.f),                  [B]"f"(B.f),
168cabdff1aSopenharmony_ci              [C]"f"(C.f),                  [D]"f"(D.f),
169cabdff1aSopenharmony_ci              [tmp0]"r"(0x06)
170cabdff1aSopenharmony_ci            : "memory"
171cabdff1aSopenharmony_ci        );
172cabdff1aSopenharmony_ci    } else if (x) {
173cabdff1aSopenharmony_ci        /* x!=0, y==0 */
174cabdff1aSopenharmony_ci        E.i = x << 3;
175cabdff1aSopenharmony_ci        A.i = 64 - E.i;
176cabdff1aSopenharmony_ci
177cabdff1aSopenharmony_ci        __asm__ volatile (
178cabdff1aSopenharmony_ci            "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]           \n\t"
179cabdff1aSopenharmony_ci            "pshufh     %[A],       %[A],           %[ftmp0]           \n\t"
180cabdff1aSopenharmony_ci            "pshufh     %[E],       %[E],           %[ftmp0]           \n\t"
181cabdff1aSopenharmony_ci            "mtc1       %[tmp0],    %[ftmp7]                           \n\t"
182cabdff1aSopenharmony_ci
183cabdff1aSopenharmony_ci            "1:                                                        \n\t"
184cabdff1aSopenharmony_ci            MMI_ULDC1(%[ftmp1], %[src], 0x00)
185cabdff1aSopenharmony_ci            MMI_ULDC1(%[ftmp2], %[src], 0x01)
186cabdff1aSopenharmony_ci            "addi       %[h],       %[h],           -0x01              \n\t"
187cabdff1aSopenharmony_ci            PTR_ADDU   "%[src],     %[src],         %[stride]          \n\t"
188cabdff1aSopenharmony_ci
189cabdff1aSopenharmony_ci            "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]           \n\t"
190cabdff1aSopenharmony_ci            "punpckhbh  %[ftmp4],   %[ftmp1],       %[ftmp0]           \n\t"
191cabdff1aSopenharmony_ci            "punpcklbh  %[ftmp5],   %[ftmp2],       %[ftmp0]           \n\t"
192cabdff1aSopenharmony_ci            "punpckhbh  %[ftmp6],   %[ftmp2],       %[ftmp0]           \n\t"
193cabdff1aSopenharmony_ci            "pmullh     %[ftmp3],   %[ftmp3],       %[A]               \n\t"
194cabdff1aSopenharmony_ci            "pmullh     %[ftmp5],   %[ftmp5],       %[E]               \n\t"
195cabdff1aSopenharmony_ci            "paddh      %[ftmp1],   %[ftmp3],       %[ftmp5]           \n\t"
196cabdff1aSopenharmony_ci            "pmullh     %[ftmp4],   %[ftmp4],       %[A]               \n\t"
197cabdff1aSopenharmony_ci            "pmullh     %[ftmp6],   %[ftmp6],       %[E]               \n\t"
198cabdff1aSopenharmony_ci            "paddh      %[ftmp2],   %[ftmp4],       %[ftmp6]           \n\t"
199cabdff1aSopenharmony_ci
200cabdff1aSopenharmony_ci            "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]        \n\t"
201cabdff1aSopenharmony_ci            "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]        \n\t"
202cabdff1aSopenharmony_ci            "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp7]           \n\t"
203cabdff1aSopenharmony_ci            "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp7]           \n\t"
204cabdff1aSopenharmony_ci            "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]           \n\t"
205cabdff1aSopenharmony_ci            MMI_SDC1(%[ftmp1], %[dst], 0x00)
206cabdff1aSopenharmony_ci            PTR_ADDU   "%[dst],     %[dst],         %[stride]          \n\t"
207cabdff1aSopenharmony_ci            "bnez       %[h],       1b                                 \n\t"
208cabdff1aSopenharmony_ci            : RESTRICT_ASM_ALL64
209cabdff1aSopenharmony_ci              [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
210cabdff1aSopenharmony_ci              [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
211cabdff1aSopenharmony_ci              [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
212cabdff1aSopenharmony_ci              [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
213cabdff1aSopenharmony_ci              [dst]"+&r"(dst),              [src]"+&r"(src),
214cabdff1aSopenharmony_ci              [h]"+&r"(h)
215cabdff1aSopenharmony_ci            : [stride]"r"((mips_reg)stride),
216cabdff1aSopenharmony_ci              [ff_pw_32]"f"(ff_pw_32.f),    [tmp0]"r"(0x06),
217cabdff1aSopenharmony_ci              [A]"f"(A.f),                  [E]"f"(E.f)
218cabdff1aSopenharmony_ci            : "memory"
219cabdff1aSopenharmony_ci        );
220cabdff1aSopenharmony_ci    } else {
221cabdff1aSopenharmony_ci        /* x==0, y!=0 */
222cabdff1aSopenharmony_ci        E.i = y << 3;
223cabdff1aSopenharmony_ci        A.i = 64 - E.i;
224cabdff1aSopenharmony_ci
225cabdff1aSopenharmony_ci        __asm__ volatile (
226cabdff1aSopenharmony_ci            "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]           \n\t"
227cabdff1aSopenharmony_ci            "pshufh     %[A],       %[A],           %[ftmp0]           \n\t"
228cabdff1aSopenharmony_ci            "pshufh     %[E],       %[E],           %[ftmp0]           \n\t"
229cabdff1aSopenharmony_ci            "mtc1       %[tmp0],    %[ftmp7]                           \n\t"
230cabdff1aSopenharmony_ci
231cabdff1aSopenharmony_ci            "1:                                                        \n\t"
232cabdff1aSopenharmony_ci            MMI_ULDC1(%[ftmp1], %[src], 0x00)
233cabdff1aSopenharmony_ci            PTR_ADDU   "%[src],     %[src],         %[stride]          \n\t"
234cabdff1aSopenharmony_ci            MMI_ULDC1(%[ftmp2], %[src], 0x00)
235cabdff1aSopenharmony_ci            PTR_ADDU   "%[src],     %[src],         %[stride]          \n\t"
236cabdff1aSopenharmony_ci            MMI_ULDC1(%[ftmp8], %[src], 0x00)
237cabdff1aSopenharmony_ci            "addi       %[h],       %[h],           -0x02              \n\t"
238cabdff1aSopenharmony_ci
239cabdff1aSopenharmony_ci            "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]           \n\t"
240cabdff1aSopenharmony_ci            "punpckhbh  %[ftmp4],   %[ftmp1],       %[ftmp0]           \n\t"
241cabdff1aSopenharmony_ci            "punpcklbh  %[ftmp5],   %[ftmp2],       %[ftmp0]           \n\t"
242cabdff1aSopenharmony_ci            "punpckhbh  %[ftmp6],   %[ftmp2],       %[ftmp0]           \n\t"
243cabdff1aSopenharmony_ci            "pmullh     %[ftmp3],   %[ftmp3],       %[A]               \n\t"
244cabdff1aSopenharmony_ci            "pmullh     %[ftmp5],   %[ftmp5],       %[E]               \n\t"
245cabdff1aSopenharmony_ci            "paddh      %[ftmp3],   %[ftmp3],       %[ftmp5]           \n\t"
246cabdff1aSopenharmony_ci            "pmullh     %[ftmp4],   %[ftmp4],       %[A]               \n\t"
247cabdff1aSopenharmony_ci            "pmullh     %[ftmp6],   %[ftmp6],       %[E]               \n\t"
248cabdff1aSopenharmony_ci            "paddh      %[ftmp4],   %[ftmp4],       %[ftmp6]           \n\t"
249cabdff1aSopenharmony_ci            "paddh      %[ftmp3],   %[ftmp3],       %[ff_pw_32]        \n\t"
250cabdff1aSopenharmony_ci            "paddh      %[ftmp4],   %[ftmp4],       %[ff_pw_32]        \n\t"
251cabdff1aSopenharmony_ci            "psrlh      %[ftmp3],   %[ftmp3],       %[ftmp7]           \n\t"
252cabdff1aSopenharmony_ci            "psrlh      %[ftmp4],   %[ftmp4],       %[ftmp7]           \n\t"
253cabdff1aSopenharmony_ci            "packushb   %[ftmp1],   %[ftmp3],       %[ftmp4]           \n\t"
254cabdff1aSopenharmony_ci
255cabdff1aSopenharmony_ci            "punpcklbh  %[ftmp3],   %[ftmp2],       %[ftmp0]           \n\t"
256cabdff1aSopenharmony_ci            "punpckhbh  %[ftmp4],   %[ftmp2],       %[ftmp0]           \n\t"
257cabdff1aSopenharmony_ci            "punpcklbh  %[ftmp5],   %[ftmp8],       %[ftmp0]           \n\t"
258cabdff1aSopenharmony_ci            "punpckhbh  %[ftmp6],   %[ftmp8],       %[ftmp0]           \n\t"
259cabdff1aSopenharmony_ci            "pmullh     %[ftmp3],   %[ftmp3],       %[A]               \n\t"
260cabdff1aSopenharmony_ci            "pmullh     %[ftmp5],   %[ftmp5],       %[E]               \n\t"
261cabdff1aSopenharmony_ci            "paddh      %[ftmp3],   %[ftmp3],       %[ftmp5]           \n\t"
262cabdff1aSopenharmony_ci            "pmullh     %[ftmp4],   %[ftmp4],       %[A]               \n\t"
263cabdff1aSopenharmony_ci            "pmullh     %[ftmp6],   %[ftmp6],       %[E]               \n\t"
264cabdff1aSopenharmony_ci            "paddh      %[ftmp4],   %[ftmp4],       %[ftmp6]           \n\t"
265cabdff1aSopenharmony_ci            "paddh      %[ftmp3],   %[ftmp3],       %[ff_pw_32]        \n\t"
266cabdff1aSopenharmony_ci            "paddh      %[ftmp4],   %[ftmp4],       %[ff_pw_32]        \n\t"
267cabdff1aSopenharmony_ci            "psrlh      %[ftmp3],   %[ftmp3],       %[ftmp7]           \n\t"
268cabdff1aSopenharmony_ci            "psrlh      %[ftmp4],   %[ftmp4],       %[ftmp7]           \n\t"
269cabdff1aSopenharmony_ci            "packushb   %[ftmp2],   %[ftmp3],       %[ftmp4]           \n\t"
270cabdff1aSopenharmony_ci
271cabdff1aSopenharmony_ci            MMI_SDC1(%[ftmp1], %[dst], 0x00)
272cabdff1aSopenharmony_ci            PTR_ADDU   "%[dst],     %[dst],         %[stride]          \n\t"
273cabdff1aSopenharmony_ci            MMI_SDC1(%[ftmp2], %[dst], 0x00)
274cabdff1aSopenharmony_ci            PTR_ADDU   "%[dst],     %[dst],         %[stride]          \n\t"
275cabdff1aSopenharmony_ci            "bnez       %[h],       1b                                 \n\t"
276cabdff1aSopenharmony_ci            : RESTRICT_ASM_ALL64
277cabdff1aSopenharmony_ci              [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
278cabdff1aSopenharmony_ci              [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
279cabdff1aSopenharmony_ci              [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
280cabdff1aSopenharmony_ci              [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
281cabdff1aSopenharmony_ci              [ftmp8]"=&f"(ftmp[8]),
282cabdff1aSopenharmony_ci              [dst]"+&r"(dst),              [src]"+&r"(src),
283cabdff1aSopenharmony_ci              [h]"+&r"(h)
284cabdff1aSopenharmony_ci            : [stride]"r"((mips_reg)stride),
285cabdff1aSopenharmony_ci              [ff_pw_32]"f"(ff_pw_32.f),    [A]"f"(A.f),
286cabdff1aSopenharmony_ci              [E]"f"(E.f),                  [tmp0]"r"(0x06)
287cabdff1aSopenharmony_ci            : "memory"
288cabdff1aSopenharmony_ci        );
289cabdff1aSopenharmony_ci    }
290cabdff1aSopenharmony_ci}
291cabdff1aSopenharmony_ci
292cabdff1aSopenharmony_civoid ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
293cabdff1aSopenharmony_ci        int h, int x, int y)
294cabdff1aSopenharmony_ci{
295cabdff1aSopenharmony_ci    double ftmp[10];
296cabdff1aSopenharmony_ci    union mmi_intfloat64 A, B, C, D, E;
297cabdff1aSopenharmony_ci    DECLARE_VAR_ALL64;
298cabdff1aSopenharmony_ci
299cabdff1aSopenharmony_ci    A.i = 64;
300cabdff1aSopenharmony_ci
301cabdff1aSopenharmony_ci    if(!(x || y)){
302cabdff1aSopenharmony_ci        /* x=0, y=0, A.i=64 */
303cabdff1aSopenharmony_ci        __asm__ volatile (
304cabdff1aSopenharmony_ci            "1:                                                         \n\t"
305cabdff1aSopenharmony_ci            MMI_ULDC1(%[ftmp0], %[src], 0x00)
306cabdff1aSopenharmony_ci            PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
307cabdff1aSopenharmony_ci            MMI_ULDC1(%[ftmp1], %[src], 0x00)
308cabdff1aSopenharmony_ci            PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
309cabdff1aSopenharmony_ci            MMI_LDC1(%[ftmp2], %[dst], 0x00)
310cabdff1aSopenharmony_ci            PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
311cabdff1aSopenharmony_ci            MMI_LDC1(%[ftmp3], %[dst], 0x00)
312cabdff1aSopenharmony_ci            PTR_SUBU   "%[dst],     %[dst],         %[stride]           \n\t"
313cabdff1aSopenharmony_ci            "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp2]            \n\t"
314cabdff1aSopenharmony_ci            "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
315cabdff1aSopenharmony_ci            MMI_SDC1(%[ftmp0], %[dst], 0x00)
316cabdff1aSopenharmony_ci            PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
317cabdff1aSopenharmony_ci            MMI_SDC1(%[ftmp1], %[dst], 0x00)
318cabdff1aSopenharmony_ci            PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
319cabdff1aSopenharmony_ci            "addi       %[h],       %[h],           -0x02               \n\t"
320cabdff1aSopenharmony_ci            "bnez       %[h],       1b                                  \n\t"
321cabdff1aSopenharmony_ci            : RESTRICT_ASM_ALL64
322cabdff1aSopenharmony_ci              [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
323cabdff1aSopenharmony_ci              [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
324cabdff1aSopenharmony_ci              [dst]"+&r"(dst),              [src]"+&r"(src),
325cabdff1aSopenharmony_ci              [h]"+&r"(h)
326cabdff1aSopenharmony_ci            : [stride]"r"((mips_reg)stride)
327cabdff1aSopenharmony_ci            : "memory"
328cabdff1aSopenharmony_ci        );
329cabdff1aSopenharmony_ci    } else if (x && y) {
330cabdff1aSopenharmony_ci        /* x!=0, y!=0 */
331cabdff1aSopenharmony_ci        D.i = x * y;
332cabdff1aSopenharmony_ci        B.i = (x << 3) - D.i;
333cabdff1aSopenharmony_ci        C.i = (y << 3) - D.i;
334cabdff1aSopenharmony_ci        A.i = 64 - D.i - B.i - C.i;
335cabdff1aSopenharmony_ci        __asm__ volatile (
336cabdff1aSopenharmony_ci            "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]       \n\t"
337cabdff1aSopenharmony_ci            "pshufh     %[A],       %[A],           %[ftmp0]       \n\t"
338cabdff1aSopenharmony_ci            "pshufh     %[B],       %[B],           %[ftmp0]       \n\t"
339cabdff1aSopenharmony_ci            "mtc1       %[tmp0],    %[ftmp9]                       \n\t"
340cabdff1aSopenharmony_ci            "pshufh     %[C],       %[C],           %[ftmp0]       \n\t"
341cabdff1aSopenharmony_ci            "pshufh     %[D],       %[D],           %[ftmp0]       \n\t"
342cabdff1aSopenharmony_ci
343cabdff1aSopenharmony_ci            "1:                                                    \n\t"
344cabdff1aSopenharmony_ci            MMI_ULDC1(%[ftmp1], %[src], 0x00)
345cabdff1aSopenharmony_ci            MMI_ULDC1(%[ftmp2], %[src], 0x01)
346cabdff1aSopenharmony_ci            PTR_ADDU   "%[src],     %[src],         %[stride]      \n\t"
347cabdff1aSopenharmony_ci            MMI_ULDC1(%[ftmp3], %[src], 0x00)
348cabdff1aSopenharmony_ci            MMI_ULDC1(%[ftmp4], %[src], 0x01)
349cabdff1aSopenharmony_ci            "addi       %[h],       %[h],           -0x01          \n\t"
350cabdff1aSopenharmony_ci
351cabdff1aSopenharmony_ci            "punpcklbh  %[ftmp5],   %[ftmp1],       %[ftmp0]       \n\t"
352cabdff1aSopenharmony_ci            "punpckhbh  %[ftmp6],   %[ftmp1],       %[ftmp0]       \n\t"
353cabdff1aSopenharmony_ci            "punpcklbh  %[ftmp7],   %[ftmp2],       %[ftmp0]       \n\t"
354cabdff1aSopenharmony_ci            "punpckhbh  %[ftmp8],   %[ftmp2],       %[ftmp0]       \n\t"
355cabdff1aSopenharmony_ci            "pmullh     %[ftmp5],   %[ftmp5],       %[A]           \n\t"
356cabdff1aSopenharmony_ci            "pmullh     %[ftmp7],   %[ftmp7],       %[B]           \n\t"
357cabdff1aSopenharmony_ci            "paddh      %[ftmp1],   %[ftmp5],       %[ftmp7]       \n\t"
358cabdff1aSopenharmony_ci            "pmullh     %[ftmp6],   %[ftmp6],       %[A]           \n\t"
359cabdff1aSopenharmony_ci            "pmullh     %[ftmp8],   %[ftmp8],       %[B]           \n\t"
360cabdff1aSopenharmony_ci            "paddh      %[ftmp2],   %[ftmp6],       %[ftmp8]       \n\t"
361cabdff1aSopenharmony_ci
362cabdff1aSopenharmony_ci            "punpcklbh  %[ftmp5],   %[ftmp3],       %[ftmp0]       \n\t"
363cabdff1aSopenharmony_ci            "punpckhbh  %[ftmp6],   %[ftmp3],       %[ftmp0]       \n\t"
364cabdff1aSopenharmony_ci            "punpcklbh  %[ftmp7],   %[ftmp4],       %[ftmp0]       \n\t"
365cabdff1aSopenharmony_ci            "punpckhbh  %[ftmp8],   %[ftmp4],       %[ftmp0]       \n\t"
366cabdff1aSopenharmony_ci            "pmullh     %[ftmp5],   %[ftmp5],       %[C]           \n\t"
367cabdff1aSopenharmony_ci            "pmullh     %[ftmp7],   %[ftmp7],       %[D]           \n\t"
368cabdff1aSopenharmony_ci            "paddh      %[ftmp3],   %[ftmp5],       %[ftmp7]       \n\t"
369cabdff1aSopenharmony_ci            "pmullh     %[ftmp6],   %[ftmp6],       %[C]           \n\t"
370cabdff1aSopenharmony_ci            "pmullh     %[ftmp8],   %[ftmp8],       %[D]           \n\t"
371cabdff1aSopenharmony_ci            "paddh      %[ftmp4],   %[ftmp6],       %[ftmp8]       \n\t"
372cabdff1aSopenharmony_ci
373cabdff1aSopenharmony_ci            "paddh      %[ftmp1],   %[ftmp1],       %[ftmp3]       \n\t"
374cabdff1aSopenharmony_ci            "paddh      %[ftmp2],   %[ftmp2],       %[ftmp4]       \n\t"
375cabdff1aSopenharmony_ci            "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]    \n\t"
376cabdff1aSopenharmony_ci            "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]    \n\t"
377cabdff1aSopenharmony_ci            "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp9]       \n\t"
378cabdff1aSopenharmony_ci            "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp9]       \n\t"
379cabdff1aSopenharmony_ci            "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]       \n\t"
380cabdff1aSopenharmony_ci            MMI_LDC1(%[ftmp2], %[dst], 0x00)
381cabdff1aSopenharmony_ci            "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]       \n\t"
382cabdff1aSopenharmony_ci            MMI_SDC1(%[ftmp1], %[dst], 0x00)
383cabdff1aSopenharmony_ci            PTR_ADDU   "%[dst],     %[dst],         %[stride]      \n\t"
384cabdff1aSopenharmony_ci            "bnez       %[h],       1b                             \n\t"
385cabdff1aSopenharmony_ci            : RESTRICT_ASM_ALL64
386cabdff1aSopenharmony_ci              [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
387cabdff1aSopenharmony_ci              [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
388cabdff1aSopenharmony_ci              [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
389cabdff1aSopenharmony_ci              [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
390cabdff1aSopenharmony_ci              [ftmp8]"=&f"(ftmp[8]),        [ftmp9]"=&f"(ftmp[9]),
391cabdff1aSopenharmony_ci              [dst]"+&r"(dst),              [src]"+&r"(src),
392cabdff1aSopenharmony_ci              [h]"+&r"(h)
393cabdff1aSopenharmony_ci            : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32.f),
394cabdff1aSopenharmony_ci              [A]"f"(A.f),                  [B]"f"(B.f),
395cabdff1aSopenharmony_ci              [C]"f"(C.f),                  [D]"f"(D.f),
396cabdff1aSopenharmony_ci              [tmp0]"r"(0x06)
397cabdff1aSopenharmony_ci            : "memory"
398cabdff1aSopenharmony_ci        );
399cabdff1aSopenharmony_ci    } else if (x) {
400cabdff1aSopenharmony_ci        /* x!=0, y==0 */
401cabdff1aSopenharmony_ci        E.i = x << 3;
402cabdff1aSopenharmony_ci        A.i = 64 - E.i;
403cabdff1aSopenharmony_ci        __asm__ volatile (
404cabdff1aSopenharmony_ci            "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]       \n\t"
405cabdff1aSopenharmony_ci            "pshufh     %[A],       %[A],           %[ftmp0]       \n\t"
406cabdff1aSopenharmony_ci            "pshufh     %[E],       %[E],           %[ftmp0]       \n\t"
407cabdff1aSopenharmony_ci            "mtc1       %[tmp0],    %[ftmp7]                       \n\t"
408cabdff1aSopenharmony_ci
409cabdff1aSopenharmony_ci            "1:                                                    \n\t"
410cabdff1aSopenharmony_ci            MMI_ULDC1(%[ftmp1], %[src], 0x00)
411cabdff1aSopenharmony_ci            MMI_ULDC1(%[ftmp2], %[src], 0x01)
412cabdff1aSopenharmony_ci            PTR_ADDU   "%[src],     %[src],         %[stride]      \n\t"
413cabdff1aSopenharmony_ci            "addi       %[h],       %[h],           -0x01          \n\t"
414cabdff1aSopenharmony_ci
415cabdff1aSopenharmony_ci            "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]       \n\t"
416cabdff1aSopenharmony_ci            "punpckhbh  %[ftmp4],   %[ftmp1],       %[ftmp0]       \n\t"
417cabdff1aSopenharmony_ci            "punpcklbh  %[ftmp5],   %[ftmp2],       %[ftmp0]       \n\t"
418cabdff1aSopenharmony_ci            "punpckhbh  %[ftmp6],   %[ftmp2],       %[ftmp0]       \n\t"
419cabdff1aSopenharmony_ci            "pmullh     %[ftmp3],   %[ftmp3],       %[A]           \n\t"
420cabdff1aSopenharmony_ci            "pmullh     %[ftmp5],   %[ftmp5],       %[E]           \n\t"
421cabdff1aSopenharmony_ci            "paddh      %[ftmp1],   %[ftmp3],       %[ftmp5]       \n\t"
422cabdff1aSopenharmony_ci            "pmullh     %[ftmp4],   %[ftmp4],       %[A]           \n\t"
423cabdff1aSopenharmony_ci            "pmullh     %[ftmp6],   %[ftmp6],       %[E]           \n\t"
424cabdff1aSopenharmony_ci            "paddh      %[ftmp2],   %[ftmp4],       %[ftmp6]       \n\t"
425cabdff1aSopenharmony_ci
426cabdff1aSopenharmony_ci            "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]    \n\t"
427cabdff1aSopenharmony_ci            "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]    \n\t"
428cabdff1aSopenharmony_ci            "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp7]       \n\t"
429cabdff1aSopenharmony_ci            "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp7]       \n\t"
430cabdff1aSopenharmony_ci            "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]       \n\t"
431cabdff1aSopenharmony_ci            MMI_LDC1(%[ftmp2], %[dst], 0x00)
432cabdff1aSopenharmony_ci            "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]       \n\t"
433cabdff1aSopenharmony_ci            MMI_SDC1(%[ftmp1], %[dst], 0x00)
434cabdff1aSopenharmony_ci            PTR_ADDU   "%[dst],     %[dst],         %[stride]      \n\t"
435cabdff1aSopenharmony_ci            "bnez       %[h],       1b                             \n\t"
436cabdff1aSopenharmony_ci            : RESTRICT_ASM_ALL64
437cabdff1aSopenharmony_ci              [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
438cabdff1aSopenharmony_ci              [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
439cabdff1aSopenharmony_ci              [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
440cabdff1aSopenharmony_ci              [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
441cabdff1aSopenharmony_ci              [dst]"+&r"(dst),              [src]"+&r"(src),
442cabdff1aSopenharmony_ci              [h]"+&r"(h)
443cabdff1aSopenharmony_ci            : [stride]"r"((mips_reg)stride),
444cabdff1aSopenharmony_ci              [ff_pw_32]"f"(ff_pw_32.f),    [tmp0]"r"(0x06),
445cabdff1aSopenharmony_ci              [A]"f"(A.f),                  [E]"f"(E.f)
446cabdff1aSopenharmony_ci            : "memory"
447cabdff1aSopenharmony_ci        );
448cabdff1aSopenharmony_ci    } else {
449cabdff1aSopenharmony_ci        /* x==0, y!=0 */
450cabdff1aSopenharmony_ci        E.i = y << 3;
451cabdff1aSopenharmony_ci        A.i = 64 - E.i;
452cabdff1aSopenharmony_ci        __asm__ volatile (
453cabdff1aSopenharmony_ci            "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]       \n\t"
454cabdff1aSopenharmony_ci            "pshufh     %[A],       %[A],           %[ftmp0]       \n\t"
455cabdff1aSopenharmony_ci            "pshufh     %[E],       %[E],           %[ftmp0]       \n\t"
456cabdff1aSopenharmony_ci            "mtc1       %[tmp0],    %[ftmp7]                       \n\t"
457cabdff1aSopenharmony_ci
458cabdff1aSopenharmony_ci            "1:                                                    \n\t"
459cabdff1aSopenharmony_ci            MMI_ULDC1(%[ftmp1], %[src], 0x00)
460cabdff1aSopenharmony_ci            PTR_ADDU   "%[src],     %[src],         %[stride]      \n\t"
461cabdff1aSopenharmony_ci            MMI_ULDC1(%[ftmp2], %[src], 0x00)
462cabdff1aSopenharmony_ci            "addi       %[h],       %[h],           -0x01          \n\t"
463cabdff1aSopenharmony_ci
464cabdff1aSopenharmony_ci            "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]       \n\t"
465cabdff1aSopenharmony_ci            "punpckhbh  %[ftmp4],   %[ftmp1],       %[ftmp0]       \n\t"
466cabdff1aSopenharmony_ci            "punpcklbh  %[ftmp5],   %[ftmp2],       %[ftmp0]       \n\t"
467cabdff1aSopenharmony_ci            "punpckhbh  %[ftmp6],   %[ftmp2],       %[ftmp0]       \n\t"
468cabdff1aSopenharmony_ci            "pmullh     %[ftmp3],   %[ftmp3],       %[A]           \n\t"
469cabdff1aSopenharmony_ci            "pmullh     %[ftmp5],   %[ftmp5],       %[E]           \n\t"
470cabdff1aSopenharmony_ci            "paddh      %[ftmp1],   %[ftmp3],       %[ftmp5]       \n\t"
471cabdff1aSopenharmony_ci            "pmullh     %[ftmp4],   %[ftmp4],       %[A]           \n\t"
472cabdff1aSopenharmony_ci            "pmullh     %[ftmp6],   %[ftmp6],       %[E]           \n\t"
473cabdff1aSopenharmony_ci            "paddh      %[ftmp2],   %[ftmp4],       %[ftmp6]       \n\t"
474cabdff1aSopenharmony_ci
475cabdff1aSopenharmony_ci            "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]  \n\t"
476cabdff1aSopenharmony_ci            "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]  \n\t"
477cabdff1aSopenharmony_ci            "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp7]       \n\t"
478cabdff1aSopenharmony_ci            "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp7]       \n\t"
479cabdff1aSopenharmony_ci            "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]       \n\t"
480cabdff1aSopenharmony_ci            MMI_LDC1(%[ftmp2], %[dst], 0x00)
481cabdff1aSopenharmony_ci            "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]       \n\t"
482cabdff1aSopenharmony_ci            MMI_SDC1(%[ftmp1], %[dst], 0x00)
483cabdff1aSopenharmony_ci            PTR_ADDU   "%[dst],     %[dst],         %[stride]      \n\t"
484cabdff1aSopenharmony_ci            "bnez       %[h],       1b                             \n\t"
485cabdff1aSopenharmony_ci            : RESTRICT_ASM_ALL64
486cabdff1aSopenharmony_ci              [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
487cabdff1aSopenharmony_ci              [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
488cabdff1aSopenharmony_ci              [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
489cabdff1aSopenharmony_ci              [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
490cabdff1aSopenharmony_ci              [dst]"+&r"(dst),              [src]"+&r"(src),
491cabdff1aSopenharmony_ci              [h]"+&r"(h)
492cabdff1aSopenharmony_ci            : [stride]"r"((mips_reg)stride),
493cabdff1aSopenharmony_ci              [ff_pw_32]"f"(ff_pw_32.f),    [tmp0]"r"(0x06),
494cabdff1aSopenharmony_ci              [A]"f"(A.f),                  [E]"f"(E.f)
495cabdff1aSopenharmony_ci            : "memory"
496cabdff1aSopenharmony_ci        );
497cabdff1aSopenharmony_ci    }
498cabdff1aSopenharmony_ci}
499cabdff1aSopenharmony_ci
500cabdff1aSopenharmony_civoid ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
501cabdff1aSopenharmony_ci        int h, int x, int y)
502cabdff1aSopenharmony_ci{
503cabdff1aSopenharmony_ci    double ftmp[8];
504cabdff1aSopenharmony_ci    mips_reg addr[1];
505cabdff1aSopenharmony_ci    union mmi_intfloat64 A, B, C, D, E;
506cabdff1aSopenharmony_ci    DECLARE_VAR_LOW32;
507cabdff1aSopenharmony_ci    A.i = (8 - x) * (8 - y);
508cabdff1aSopenharmony_ci    B.i = x * (8 - y);
509cabdff1aSopenharmony_ci    C.i = (8 - x) * y;
510cabdff1aSopenharmony_ci    D.i = x * y;
511cabdff1aSopenharmony_ci    E.i = B.i + C.i;
512cabdff1aSopenharmony_ci
513cabdff1aSopenharmony_ci    if (D.i) {
514cabdff1aSopenharmony_ci        __asm__ volatile (
515cabdff1aSopenharmony_ci            "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
516cabdff1aSopenharmony_ci            "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
517cabdff1aSopenharmony_ci            "pshufh     %[B],       %[B],           %[ftmp0]            \n\t"
518cabdff1aSopenharmony_ci            "mtc1       %[tmp0],    %[ftmp7]                            \n\t"
519cabdff1aSopenharmony_ci            "pshufh     %[C],       %[C],           %[ftmp0]            \n\t"
520cabdff1aSopenharmony_ci            "pshufh     %[D],       %[D],           %[ftmp0]            \n\t"
521cabdff1aSopenharmony_ci
522cabdff1aSopenharmony_ci            "1:                                                         \n\t"
523cabdff1aSopenharmony_ci            MMI_ULWC1(%[ftmp1], %[src], 0x00)
524cabdff1aSopenharmony_ci            MMI_ULWC1(%[ftmp2], %[src], 0x01)
525cabdff1aSopenharmony_ci            PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
526cabdff1aSopenharmony_ci            MMI_ULWC1(%[ftmp3], %[src], 0x00)
527cabdff1aSopenharmony_ci            MMI_ULWC1(%[ftmp4], %[src], 0x01)
528cabdff1aSopenharmony_ci
529cabdff1aSopenharmony_ci            "punpcklbh  %[ftmp5],   %[ftmp1],       %[ftmp0]            \n\t"
530cabdff1aSopenharmony_ci            "punpcklbh  %[ftmp6],   %[ftmp2],       %[ftmp0]            \n\t"
531cabdff1aSopenharmony_ci            "pmullh     %[ftmp5],   %[ftmp5],       %[A]                \n\t"
532cabdff1aSopenharmony_ci            "pmullh     %[ftmp6],   %[ftmp6],       %[B]                \n\t"
533cabdff1aSopenharmony_ci            "paddh      %[ftmp1],   %[ftmp5],       %[ftmp6]            \n\t"
534cabdff1aSopenharmony_ci            "punpcklbh  %[ftmp5],   %[ftmp3],       %[ftmp0]            \n\t"
535cabdff1aSopenharmony_ci            "punpcklbh  %[ftmp6],   %[ftmp4],       %[ftmp0]            \n\t"
536cabdff1aSopenharmony_ci            "pmullh     %[ftmp5],   %[ftmp5],       %[C]                \n\t"
537cabdff1aSopenharmony_ci            "pmullh     %[ftmp6],   %[ftmp6],       %[D]                \n\t"
538cabdff1aSopenharmony_ci            "paddh      %[ftmp2],   %[ftmp5],       %[ftmp6]            \n\t"
539cabdff1aSopenharmony_ci            "paddh      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
540cabdff1aSopenharmony_ci            "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
541cabdff1aSopenharmony_ci            "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp7]            \n\t"
542cabdff1aSopenharmony_ci            "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
543cabdff1aSopenharmony_ci
544cabdff1aSopenharmony_ci            "addi       %[h],       %[h],           -0x01               \n\t"
545cabdff1aSopenharmony_ci            MMI_SWC1(%[ftmp1], %[dst], 0x00)
546cabdff1aSopenharmony_ci            PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
547cabdff1aSopenharmony_ci            "bnez       %[h],       1b                                  \n\t"
548cabdff1aSopenharmony_ci            : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
549cabdff1aSopenharmony_ci              [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
550cabdff1aSopenharmony_ci              [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
551cabdff1aSopenharmony_ci              [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
552cabdff1aSopenharmony_ci              RESTRICT_ASM_LOW32
553cabdff1aSopenharmony_ci              [dst]"+&r"(dst),              [src]"+&r"(src),
554cabdff1aSopenharmony_ci              [h]"+&r"(h)
555cabdff1aSopenharmony_ci            : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32.f),
556cabdff1aSopenharmony_ci              [A]"f"(A.f),                  [B]"f"(B.f),
557cabdff1aSopenharmony_ci              [C]"f"(C.f),                  [D]"f"(D.f),
558cabdff1aSopenharmony_ci              [tmp0]"r"(0x06)
559cabdff1aSopenharmony_ci            : "memory"
560cabdff1aSopenharmony_ci        );
561cabdff1aSopenharmony_ci    } else if (E.i) {
562cabdff1aSopenharmony_ci        const int step = C.i ? stride : 1;
563cabdff1aSopenharmony_ci        __asm__ volatile (
564cabdff1aSopenharmony_ci            "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
565cabdff1aSopenharmony_ci            "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
566cabdff1aSopenharmony_ci            "pshufh     %[E],       %[E],           %[ftmp0]            \n\t"
567cabdff1aSopenharmony_ci            "mtc1       %[tmp0],    %[ftmp5]                            \n\t"
568cabdff1aSopenharmony_ci
569cabdff1aSopenharmony_ci            "1:                                                         \n\t"
570cabdff1aSopenharmony_ci            MMI_ULWC1(%[ftmp1], %[src], 0x00)
571cabdff1aSopenharmony_ci            PTR_ADDU   "%[addr0],   %[src],         %[step]             \n\t"
572cabdff1aSopenharmony_ci            MMI_ULWC1(%[ftmp2], %[addr0], 0x00)
573cabdff1aSopenharmony_ci            PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
574cabdff1aSopenharmony_ci            "addi       %[h],       %[h],           -0x01               \n\t"
575cabdff1aSopenharmony_ci            "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"
576cabdff1aSopenharmony_ci            "punpcklbh  %[ftmp4],   %[ftmp2],       %[ftmp0]            \n\t"
577cabdff1aSopenharmony_ci            "pmullh     %[ftmp3],   %[ftmp3],       %[A]                \n\t"
578cabdff1aSopenharmony_ci            "pmullh     %[ftmp4],   %[ftmp4],       %[E]                \n\t"
579cabdff1aSopenharmony_ci            "paddh      %[ftmp1],   %[ftmp3],       %[ftmp4]            \n\t"
580cabdff1aSopenharmony_ci            "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
581cabdff1aSopenharmony_ci            "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp5]            \n\t"
582cabdff1aSopenharmony_ci            "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
583cabdff1aSopenharmony_ci            MMI_SWC1(%[ftmp1], %[dst], 0x00)
584cabdff1aSopenharmony_ci            PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
585cabdff1aSopenharmony_ci            "bnez       %[h],       1b                                  \n\t"
586cabdff1aSopenharmony_ci            : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
587cabdff1aSopenharmony_ci              [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
588cabdff1aSopenharmony_ci              [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
589cabdff1aSopenharmony_ci              RESTRICT_ASM_LOW32
590cabdff1aSopenharmony_ci              [addr0]"=&r"(addr[0]),
591cabdff1aSopenharmony_ci              [dst]"+&r"(dst),              [src]"+&r"(src),
592cabdff1aSopenharmony_ci              [h]"+&r"(h)
593cabdff1aSopenharmony_ci            : [stride]"r"((mips_reg)stride),[step]"r"((mips_reg)step),
594cabdff1aSopenharmony_ci              [ff_pw_32]"f"(ff_pw_32.f),    [tmp0]"r"(0x06),
595cabdff1aSopenharmony_ci              [A]"f"(A.f),                  [E]"f"(E.f)
596cabdff1aSopenharmony_ci            : "memory"
597cabdff1aSopenharmony_ci        );
598cabdff1aSopenharmony_ci    } else {
599cabdff1aSopenharmony_ci        __asm__ volatile (
600cabdff1aSopenharmony_ci            "1:                                                         \n\t"
601cabdff1aSopenharmony_ci            MMI_ULWC1(%[ftmp0], %[src], 0x00)
602cabdff1aSopenharmony_ci            PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
603cabdff1aSopenharmony_ci            MMI_ULWC1(%[ftmp1], %[src], 0x00)
604cabdff1aSopenharmony_ci            PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
605cabdff1aSopenharmony_ci            "addi       %[h],       %[h],           -0x02               \n\t"
606cabdff1aSopenharmony_ci            MMI_SWC1(%[ftmp0], %[dst], 0x00)
607cabdff1aSopenharmony_ci            PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
608cabdff1aSopenharmony_ci            MMI_SWC1(%[ftmp1], %[dst], 0x00)
609cabdff1aSopenharmony_ci            PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
610cabdff1aSopenharmony_ci            "bnez       %[h],       1b                                  \n\t"
611cabdff1aSopenharmony_ci            : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
612cabdff1aSopenharmony_ci              [dst]"+&r"(dst),              [src]"+&r"(src),
613cabdff1aSopenharmony_ci              RESTRICT_ASM_LOW32
614cabdff1aSopenharmony_ci              [h]"+&r"(h)
615cabdff1aSopenharmony_ci            : [stride]"r"((mips_reg)stride)
616cabdff1aSopenharmony_ci            : "memory"
617cabdff1aSopenharmony_ci        );
618cabdff1aSopenharmony_ci    }
619cabdff1aSopenharmony_ci}
620cabdff1aSopenharmony_ci
621cabdff1aSopenharmony_civoid ff_avg_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
622cabdff1aSopenharmony_ci        int h, int x, int y)
623cabdff1aSopenharmony_ci{
624cabdff1aSopenharmony_ci    double ftmp[8];
625cabdff1aSopenharmony_ci    mips_reg addr[1];
626cabdff1aSopenharmony_ci    union mmi_intfloat64 A, B, C, D, E;
627cabdff1aSopenharmony_ci    DECLARE_VAR_LOW32;
628cabdff1aSopenharmony_ci    A.i = (8 - x) *(8 - y);
629cabdff1aSopenharmony_ci    B.i = x * (8 - y);
630cabdff1aSopenharmony_ci    C.i = (8 - x) * y;
631cabdff1aSopenharmony_ci    D.i = x * y;
632cabdff1aSopenharmony_ci    E.i = B.i + C.i;
633cabdff1aSopenharmony_ci
634cabdff1aSopenharmony_ci    if (D.i) {
635cabdff1aSopenharmony_ci        __asm__ volatile (
636cabdff1aSopenharmony_ci            "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
637cabdff1aSopenharmony_ci            "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
638cabdff1aSopenharmony_ci            "pshufh     %[B],       %[B],           %[ftmp0]            \n\t"
639cabdff1aSopenharmony_ci            "mtc1       %[tmp0],    %[ftmp7]                            \n\t"
640cabdff1aSopenharmony_ci            "pshufh     %[C],       %[C],           %[ftmp0]            \n\t"
641cabdff1aSopenharmony_ci            "pshufh     %[D],       %[D],           %[ftmp0]            \n\t"
642cabdff1aSopenharmony_ci
643cabdff1aSopenharmony_ci            "1:                                                         \n\t"
644cabdff1aSopenharmony_ci            MMI_ULWC1(%[ftmp1], %[src], 0x00)
645cabdff1aSopenharmony_ci            MMI_ULWC1(%[ftmp2], %[src], 0x01)
646cabdff1aSopenharmony_ci            PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
647cabdff1aSopenharmony_ci            MMI_ULWC1(%[ftmp3], %[src], 0x00)
648cabdff1aSopenharmony_ci            MMI_ULWC1(%[ftmp4], %[src], 0x01)
649cabdff1aSopenharmony_ci
650cabdff1aSopenharmony_ci            "punpcklbh  %[ftmp5],   %[ftmp1],       %[ftmp0]            \n\t"
651cabdff1aSopenharmony_ci            "punpcklbh  %[ftmp6],   %[ftmp2],       %[ftmp0]            \n\t"
652cabdff1aSopenharmony_ci            "pmullh     %[ftmp5],   %[ftmp5],       %[A]                \n\t"
653cabdff1aSopenharmony_ci            "pmullh     %[ftmp6],   %[ftmp6],       %[B]                \n\t"
654cabdff1aSopenharmony_ci            "paddh      %[ftmp1],   %[ftmp5],       %[ftmp6]            \n\t"
655cabdff1aSopenharmony_ci            "punpcklbh  %[ftmp5],   %[ftmp3],       %[ftmp0]            \n\t"
656cabdff1aSopenharmony_ci            "punpcklbh  %[ftmp6],   %[ftmp4],       %[ftmp0]            \n\t"
657cabdff1aSopenharmony_ci            "pmullh     %[ftmp5],   %[ftmp5],       %[C]                \n\t"
658cabdff1aSopenharmony_ci            "pmullh     %[ftmp6],   %[ftmp6],       %[D]                \n\t"
659cabdff1aSopenharmony_ci            "paddh      %[ftmp2],   %[ftmp5],       %[ftmp6]            \n\t"
660cabdff1aSopenharmony_ci            "paddh      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
661cabdff1aSopenharmony_ci            "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
662cabdff1aSopenharmony_ci            "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp7]            \n\t"
663cabdff1aSopenharmony_ci            "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
664cabdff1aSopenharmony_ci            MMI_LWC1(%[ftmp2], %[dst], 0x00)
665cabdff1aSopenharmony_ci            "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
666cabdff1aSopenharmony_ci
667cabdff1aSopenharmony_ci            "addi       %[h],       %[h],           -0x01               \n\t"
668cabdff1aSopenharmony_ci            MMI_SWC1(%[ftmp1], %[dst], 0x00)
669cabdff1aSopenharmony_ci            PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
670cabdff1aSopenharmony_ci            "bnez       %[h],       1b                                  \n\t"
671cabdff1aSopenharmony_ci            : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
672cabdff1aSopenharmony_ci              [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
673cabdff1aSopenharmony_ci              [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
674cabdff1aSopenharmony_ci              [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
675cabdff1aSopenharmony_ci              RESTRICT_ASM_LOW32
676cabdff1aSopenharmony_ci              [dst]"+&r"(dst),              [src]"+&r"(src),
677cabdff1aSopenharmony_ci              [h]"+&r"(h)
678cabdff1aSopenharmony_ci            : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32.f),
679cabdff1aSopenharmony_ci              [A]"f"(A.f),                  [B]"f"(B.f),
680cabdff1aSopenharmony_ci              [C]"f"(C.f),                  [D]"f"(D.f),
681cabdff1aSopenharmony_ci              [tmp0]"r"(0x06)
682cabdff1aSopenharmony_ci            : "memory"
683cabdff1aSopenharmony_ci        );
684cabdff1aSopenharmony_ci    } else if (E.i) {
685cabdff1aSopenharmony_ci        const int step = C.i ? stride : 1;
686cabdff1aSopenharmony_ci        __asm__ volatile (
687cabdff1aSopenharmony_ci            "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
688cabdff1aSopenharmony_ci            "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
689cabdff1aSopenharmony_ci            "pshufh     %[E],       %[E],           %[ftmp0]            \n\t"
690cabdff1aSopenharmony_ci            "mtc1       %[tmp0],    %[ftmp5]                            \n\t"
691cabdff1aSopenharmony_ci
692cabdff1aSopenharmony_ci            "1:                                                         \n\t"
693cabdff1aSopenharmony_ci            MMI_ULWC1(%[ftmp1], %[src], 0x00)
694cabdff1aSopenharmony_ci            PTR_ADDU   "%[addr0],   %[src],         %[step]             \n\t"
695cabdff1aSopenharmony_ci            MMI_ULWC1(%[ftmp2], %[addr0], 0x00)
696cabdff1aSopenharmony_ci            PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
697cabdff1aSopenharmony_ci            "addi       %[h],       %[h],           -0x01               \n\t"
698cabdff1aSopenharmony_ci            "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"
699cabdff1aSopenharmony_ci            "punpcklbh  %[ftmp4],   %[ftmp2],       %[ftmp0]            \n\t"
700cabdff1aSopenharmony_ci            "pmullh     %[ftmp3],   %[ftmp3],       %[A]                \n\t"
701cabdff1aSopenharmony_ci            "pmullh     %[ftmp4],   %[ftmp4],       %[E]                \n\t"
702cabdff1aSopenharmony_ci            "paddh      %[ftmp1],   %[ftmp3],       %[ftmp4]            \n\t"
703cabdff1aSopenharmony_ci            "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
704cabdff1aSopenharmony_ci            "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp5]            \n\t"
705cabdff1aSopenharmony_ci            "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
706cabdff1aSopenharmony_ci            MMI_LWC1(%[ftmp2], %[dst], 0x00)
707cabdff1aSopenharmony_ci            "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
708cabdff1aSopenharmony_ci            MMI_SWC1(%[ftmp1], %[dst], 0x00)
709cabdff1aSopenharmony_ci            PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
710cabdff1aSopenharmony_ci            "bnez       %[h],       1b                                  \n\t"
711cabdff1aSopenharmony_ci            : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
712cabdff1aSopenharmony_ci              [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
713cabdff1aSopenharmony_ci              [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
714cabdff1aSopenharmony_ci              RESTRICT_ASM_LOW32
715cabdff1aSopenharmony_ci              [addr0]"=&r"(addr[0]),
716cabdff1aSopenharmony_ci              [dst]"+&r"(dst),              [src]"+&r"(src),
717cabdff1aSopenharmony_ci              [h]"+&r"(h)
718cabdff1aSopenharmony_ci            : [stride]"r"((mips_reg)stride),[step]"r"((mips_reg)step),
719cabdff1aSopenharmony_ci              [ff_pw_32]"f"(ff_pw_32.f),    [tmp0]"r"(0x06),
720cabdff1aSopenharmony_ci              [A]"f"(A.f),                  [E]"f"(E.f)
721cabdff1aSopenharmony_ci            : "memory"
722cabdff1aSopenharmony_ci        );
723cabdff1aSopenharmony_ci    } else {
724cabdff1aSopenharmony_ci        __asm__ volatile (
725cabdff1aSopenharmony_ci            "1:                                                         \n\t"
726cabdff1aSopenharmony_ci            MMI_ULWC1(%[ftmp0], %[src], 0x00)
727cabdff1aSopenharmony_ci            PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
728cabdff1aSopenharmony_ci            MMI_ULWC1(%[ftmp1], %[src], 0x00)
729cabdff1aSopenharmony_ci            PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
730cabdff1aSopenharmony_ci            "addi       %[h],       %[h],           -0x02               \n\t"
731cabdff1aSopenharmony_ci            MMI_LWC1(%[ftmp2], %[dst], 0x00)
732cabdff1aSopenharmony_ci            "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp2]            \n\t"
733cabdff1aSopenharmony_ci            MMI_SWC1(%[ftmp0], %[dst], 0x00)
734cabdff1aSopenharmony_ci            PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
735cabdff1aSopenharmony_ci            MMI_LWC1(%[ftmp3], %[dst], 0x00)
736cabdff1aSopenharmony_ci            "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
737cabdff1aSopenharmony_ci            MMI_SWC1(%[ftmp1], %[dst], 0x00)
738cabdff1aSopenharmony_ci            PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
739cabdff1aSopenharmony_ci            "bnez       %[h],       1b                                  \n\t"
740cabdff1aSopenharmony_ci            : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
741cabdff1aSopenharmony_ci              [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
742cabdff1aSopenharmony_ci              [dst]"+&r"(dst),              [src]"+&r"(src),
743cabdff1aSopenharmony_ci              RESTRICT_ASM_LOW32
744cabdff1aSopenharmony_ci              [h]"+&r"(h)
745cabdff1aSopenharmony_ci            : [stride]"r"((mips_reg)stride)
746cabdff1aSopenharmony_ci            : "memory"
747cabdff1aSopenharmony_ci        );
748cabdff1aSopenharmony_ci    }
749cabdff1aSopenharmony_ci}
750