1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * Loongson SIMD optimized vp8dsp
3cabdff1aSopenharmony_ci *
4cabdff1aSopenharmony_ci * Copyright (c) 2016 Loongson Technology Corporation Limited
5cabdff1aSopenharmony_ci * Copyright (c) 2016 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6cabdff1aSopenharmony_ci *
7cabdff1aSopenharmony_ci * This file is part of FFmpeg.
8cabdff1aSopenharmony_ci *
9cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
10cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
11cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
12cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
13cabdff1aSopenharmony_ci *
14cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
15cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
16cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17cabdff1aSopenharmony_ci * Lesser General Public License for more details.
18cabdff1aSopenharmony_ci *
19cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
20cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
21cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22cabdff1aSopenharmony_ci */
23cabdff1aSopenharmony_ci
24cabdff1aSopenharmony_ci#include "vp8dsp_mips.h"
25cabdff1aSopenharmony_ci#include "constants.h"
26cabdff1aSopenharmony_ci#include "libavutil/attributes.h"
27cabdff1aSopenharmony_ci#include "libavutil/mips/mmiutils.h"
28cabdff1aSopenharmony_ci#include "libavutil/mem_internal.h"
29cabdff1aSopenharmony_ci
30cabdff1aSopenharmony_ci#define DECLARE_DOUBLE_1            double db_1
31cabdff1aSopenharmony_ci#define DECLARE_DOUBLE_2            double db_2
32cabdff1aSopenharmony_ci#define DECLARE_UINT32_T            uint32_t  it_1
33cabdff1aSopenharmony_ci#define RESTRICT_ASM_DOUBLE_1       [db_1]"=&f"(db_1)
34cabdff1aSopenharmony_ci#define RESTRICT_ASM_DOUBLE_2       [db_2]"=&f"(db_2)
35cabdff1aSopenharmony_ci#define RESTRICT_ASM_UINT32_T       [it_1]"=&r"(it_1)
36cabdff1aSopenharmony_ci
37cabdff1aSopenharmony_ci#define MMI_PCMPGTUB(dst, src1, src2)                                       \
38cabdff1aSopenharmony_ci        "pcmpeqb    %[db_1],    "#src1",        "#src2"             \n\t"   \
39cabdff1aSopenharmony_ci        "pmaxub     %[db_2],    "#src1",        "#src2"             \n\t"   \
40cabdff1aSopenharmony_ci        "pcmpeqb    %[db_2],    %[db_2],        "#src1"             \n\t"   \
41cabdff1aSopenharmony_ci        "pxor       "#dst",     %[db_2],        %[db_1]             \n\t"
42cabdff1aSopenharmony_ci
43cabdff1aSopenharmony_ci#define MMI_BTOH(dst_l, dst_r, src)                                         \
44cabdff1aSopenharmony_ci        "pxor       %[db_1],    %[db_1],        %[db_1]             \n\t"   \
45cabdff1aSopenharmony_ci        "pcmpgtb    %[db_2],    %[db_1],        "#src"              \n\t"   \
46cabdff1aSopenharmony_ci        "punpcklbh  "#dst_r",   "#src",         %[db_2]             \n\t"   \
47cabdff1aSopenharmony_ci        "punpckhbh  "#dst_l",   "#src",         %[db_2]             \n\t"
48cabdff1aSopenharmony_ci
49cabdff1aSopenharmony_ci#define MMI_VP8_LOOP_FILTER                                                 \
50cabdff1aSopenharmony_ci        /* Calculation of hev */                                            \
51cabdff1aSopenharmony_ci        "dmtc1      %[thresh],  %[ftmp3]                            \n\t"   \
52cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp3]            \n\t"   \
53cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp3],   %[ftmp3],       %[ftmp3]            \n\t"   \
54cabdff1aSopenharmony_ci        "punpcklwd  %[ftmp3],   %[ftmp3],       %[ftmp3]            \n\t"   \
55cabdff1aSopenharmony_ci        "pasubub    %[ftmp0],   %[p1],          %[p0]               \n\t"   \
56cabdff1aSopenharmony_ci        "pasubub    %[ftmp1],   %[q1],          %[q0]               \n\t"   \
57cabdff1aSopenharmony_ci        "pmaxub     %[ftmp0],   %[ftmp0],       %[ftmp1]            \n\t"   \
58cabdff1aSopenharmony_ci        MMI_PCMPGTUB(%[hev], %[ftmp0], %[ftmp3])                            \
59cabdff1aSopenharmony_ci        /* Calculation of mask */                                           \
60cabdff1aSopenharmony_ci        "pasubub    %[ftmp1],   %[p0],          %[q0]               \n\t"   \
61cabdff1aSopenharmony_ci        "paddusb    %[ftmp1],   %[ftmp1],       %[ftmp1]            \n\t"   \
62cabdff1aSopenharmony_ci        "pasubub    %[ftmp2],   %[p1],          %[q1]               \n\t"   \
63cabdff1aSopenharmony_ci        "li         %[tmp0],    0x09                                \n\t"   \
64cabdff1aSopenharmony_ci        "dmtc1      %[tmp0],    %[ftmp3]                            \n\t"   \
65cabdff1aSopenharmony_ci        PSRLB_MMI(%[ftmp2],  %[ftmp3],  %[ftmp4],  %[ftmp5],  %[ftmp2])     \
66cabdff1aSopenharmony_ci        "paddusb    %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"   \
67cabdff1aSopenharmony_ci        "dmtc1      %[e],       %[ftmp3]                            \n\t"   \
68cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp3]            \n\t"   \
69cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp3],   %[ftmp3],       %[ftmp3]            \n\t"   \
70cabdff1aSopenharmony_ci        "punpcklwd  %[ftmp3],   %[ftmp3],       %[ftmp3]            \n\t"   \
71cabdff1aSopenharmony_ci        MMI_PCMPGTUB(%[mask], %[ftmp1], %[ftmp3])                           \
72cabdff1aSopenharmony_ci        "pmaxub     %[mask],    %[mask],        %[ftmp0]            \n\t"   \
73cabdff1aSopenharmony_ci        "pasubub    %[ftmp1],   %[p3],          %[p2]               \n\t"   \
74cabdff1aSopenharmony_ci        "pasubub    %[ftmp2],   %[p2],          %[p1]               \n\t"   \
75cabdff1aSopenharmony_ci        "pmaxub     %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"   \
76cabdff1aSopenharmony_ci        "pmaxub     %[mask],    %[mask],        %[ftmp1]            \n\t"   \
77cabdff1aSopenharmony_ci        "pasubub    %[ftmp1],   %[q3],          %[q2]               \n\t"   \
78cabdff1aSopenharmony_ci        "pasubub    %[ftmp2],   %[q2],          %[q1]               \n\t"   \
79cabdff1aSopenharmony_ci        "pmaxub     %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"   \
80cabdff1aSopenharmony_ci        "pmaxub     %[mask],    %[mask],        %[ftmp1]            \n\t"   \
81cabdff1aSopenharmony_ci        "dmtc1      %[i],       %[ftmp3]                            \n\t"   \
82cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp3]            \n\t"   \
83cabdff1aSopenharmony_ci        "punpcklhw  %[ftmp3],   %[ftmp3],       %[ftmp3]            \n\t"   \
84cabdff1aSopenharmony_ci        "punpcklwd  %[ftmp3],   %[ftmp3],       %[ftmp3]            \n\t"   \
85cabdff1aSopenharmony_ci        MMI_PCMPGTUB(%[mask], %[mask], %[ftmp3])                            \
86cabdff1aSopenharmony_ci        "pcmpeqw    %[ftmp3],   %[ftmp3],       %[ftmp3]            \n\t"   \
87cabdff1aSopenharmony_ci        "pxor       %[mask],    %[mask],        %[ftmp3]            \n\t"   \
88cabdff1aSopenharmony_ci        /* VP8_MBFILTER */                                                  \
89cabdff1aSopenharmony_ci        "li         %[tmp0],    0x80808080                          \n\t"   \
90cabdff1aSopenharmony_ci        "dmtc1      %[tmp0],    %[ftmp7]                            \n\t"   \
91cabdff1aSopenharmony_ci        "punpcklwd  %[ftmp7],   %[ftmp7],       %[ftmp7]            \n\t"   \
92cabdff1aSopenharmony_ci        "pxor       %[p2],      %[p2],          %[ftmp7]            \n\t"   \
93cabdff1aSopenharmony_ci        "pxor       %[p1],      %[p1],          %[ftmp7]            \n\t"   \
94cabdff1aSopenharmony_ci        "pxor       %[p0],      %[p0],          %[ftmp7]            \n\t"   \
95cabdff1aSopenharmony_ci        "pxor       %[q0],      %[q0],          %[ftmp7]            \n\t"   \
96cabdff1aSopenharmony_ci        "pxor       %[q1],      %[q1],          %[ftmp7]            \n\t"   \
97cabdff1aSopenharmony_ci        "pxor       %[q2],      %[q2],          %[ftmp7]            \n\t"   \
98cabdff1aSopenharmony_ci        "psubsb     %[ftmp4],   %[p1],          %[q1]               \n\t"   \
99cabdff1aSopenharmony_ci        "psubb      %[ftmp5],   %[q0],          %[p0]               \n\t"   \
100cabdff1aSopenharmony_ci        MMI_BTOH(%[ftmp1],  %[ftmp0],  %[ftmp5])                            \
101cabdff1aSopenharmony_ci        MMI_BTOH(%[ftmp3],  %[ftmp2],  %[ftmp4])                            \
102cabdff1aSopenharmony_ci        /* Right part */                                                    \
103cabdff1aSopenharmony_ci        "paddh      %[ftmp5],   %[ftmp0],       %[ftmp0]            \n\t"   \
104cabdff1aSopenharmony_ci        "paddh      %[ftmp0],   %[ftmp0],       %[ftmp5]            \n\t"   \
105cabdff1aSopenharmony_ci        "paddh      %[ftmp0],   %[ftmp2],       %[ftmp0]            \n\t"   \
106cabdff1aSopenharmony_ci        /* Left part */                                                     \
107cabdff1aSopenharmony_ci        "paddh      %[ftmp5],   %[ftmp1],       %[ftmp1]            \n\t"   \
108cabdff1aSopenharmony_ci        "paddh      %[ftmp1],   %[ftmp1],       %[ftmp5]            \n\t"   \
109cabdff1aSopenharmony_ci        "paddh      %[ftmp1],   %[ftmp3],       %[ftmp1]            \n\t"   \
110cabdff1aSopenharmony_ci        /* Combine left and right part */                                   \
111cabdff1aSopenharmony_ci        "packsshb   %[ftmp1],   %[ftmp0],       %[ftmp1]            \n\t"   \
112cabdff1aSopenharmony_ci        "pand       %[ftmp1],   %[ftmp1],       %[mask]             \n\t"   \
113cabdff1aSopenharmony_ci        "pand       %[ftmp2],   %[ftmp1],       %[hev]              \n\t"   \
114cabdff1aSopenharmony_ci        "li         %[tmp0],    0x04040404                          \n\t"   \
115cabdff1aSopenharmony_ci        "dmtc1      %[tmp0],    %[ftmp0]                            \n\t"   \
116cabdff1aSopenharmony_ci        "punpcklwd  %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"   \
117cabdff1aSopenharmony_ci        "paddsb     %[ftmp3],   %[ftmp2],       %[ftmp0]            \n\t"   \
118cabdff1aSopenharmony_ci        "li         %[tmp0],    0x0B                                \n\t"   \
119cabdff1aSopenharmony_ci        "dmtc1      %[tmp0],    %[ftmp4]                            \n\t"   \
120cabdff1aSopenharmony_ci        PSRAB_MMI(%[ftmp3],  %[ftmp4],  %[ftmp5],  %[ftmp6],  %[ftmp3])     \
121cabdff1aSopenharmony_ci        "li         %[tmp0],    0x03030303                          \n\t"   \
122cabdff1aSopenharmony_ci        "dmtc1      %[tmp0],    %[ftmp0]                            \n\t"   \
123cabdff1aSopenharmony_ci        "punpcklwd  %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"   \
124cabdff1aSopenharmony_ci        "paddsb     %[ftmp4],   %[ftmp2],       %[ftmp0]            \n\t"   \
125cabdff1aSopenharmony_ci        "li         %[tmp0],    0x0B                                \n\t"   \
126cabdff1aSopenharmony_ci        "dmtc1      %[tmp0],    %[ftmp2]                            \n\t"   \
127cabdff1aSopenharmony_ci        PSRAB_MMI(%[ftmp4],  %[ftmp2],  %[ftmp5],  %[ftmp6],  %[ftmp4])     \
128cabdff1aSopenharmony_ci        "psubsb     %[q0],      %[q0],          %[ftmp3]            \n\t"   \
129cabdff1aSopenharmony_ci        "paddsb     %[p0],      %[p0],          %[ftmp4]            \n\t"   \
130cabdff1aSopenharmony_ci        /* filt_val &= ~hev */                                              \
131cabdff1aSopenharmony_ci        "pcmpeqw    %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"   \
132cabdff1aSopenharmony_ci        "pxor       %[hev],     %[hev],         %[ftmp0]            \n\t"   \
133cabdff1aSopenharmony_ci        "pand       %[ftmp1],   %[ftmp1],       %[hev]              \n\t"   \
134cabdff1aSopenharmony_ci        MMI_BTOH(%[ftmp5],  %[ftmp6],  %[ftmp1])                            \
135cabdff1aSopenharmony_ci        "li         %[tmp0],    0x07                                \n\t"   \
136cabdff1aSopenharmony_ci        "dmtc1      %[tmp0],    %[ftmp2]                            \n\t"   \
137cabdff1aSopenharmony_ci        "li         %[tmp0],    0x001b001b                          \n\t"   \
138cabdff1aSopenharmony_ci        "dmtc1      %[tmp0],    %[ftmp1]                            \n\t"   \
139cabdff1aSopenharmony_ci        "punpcklwd  %[ftmp1],   %[ftmp1],       %[ftmp1]            \n\t"   \
140cabdff1aSopenharmony_ci        "li         %[tmp0],    0x003f003f                          \n\t"   \
141cabdff1aSopenharmony_ci        "dmtc1      %[tmp0],    %[ftmp0]                            \n\t"   \
142cabdff1aSopenharmony_ci        "punpcklwd  %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"   \
143cabdff1aSopenharmony_ci        /* Right part */                                                    \
144cabdff1aSopenharmony_ci        "pmullh     %[ftmp3],   %[ftmp6],       %[ftmp1]            \n\t"   \
145cabdff1aSopenharmony_ci        "paddh      %[ftmp3],   %[ftmp3],       %[ftmp0]            \n\t"   \
146cabdff1aSopenharmony_ci        "psrah      %[ftmp3],   %[ftmp3],       %[ftmp2]            \n\t"   \
147cabdff1aSopenharmony_ci        /* Left part */                                                     \
148cabdff1aSopenharmony_ci        "pmullh     %[ftmp4],   %[ftmp5],       %[ftmp1]            \n\t"   \
149cabdff1aSopenharmony_ci        "paddh      %[ftmp4],   %[ftmp4],       %[ftmp0]            \n\t"   \
150cabdff1aSopenharmony_ci        "psrah      %[ftmp4],   %[ftmp4],       %[ftmp2]            \n\t"   \
151cabdff1aSopenharmony_ci        /* Combine left and right part */                                   \
152cabdff1aSopenharmony_ci        "packsshb   %[ftmp4],   %[ftmp3],       %[ftmp4]            \n\t"   \
153cabdff1aSopenharmony_ci        "psubsb     %[q0],      %[q0],          %[ftmp4]            \n\t"   \
154cabdff1aSopenharmony_ci        "pxor       %[q0],      %[q0],          %[ftmp7]            \n\t"   \
155cabdff1aSopenharmony_ci        "paddsb     %[p0],      %[p0],          %[ftmp4]            \n\t"   \
156cabdff1aSopenharmony_ci        "pxor       %[p0],      %[p0],          %[ftmp7]            \n\t"   \
157cabdff1aSopenharmony_ci        "li         %[tmp0],    0x00120012                          \n\t"   \
158cabdff1aSopenharmony_ci        "dmtc1      %[tmp0],    %[ftmp1]                            \n\t"   \
159cabdff1aSopenharmony_ci        "punpcklwd  %[ftmp1],   %[ftmp1],       %[ftmp1]            \n\t"   \
160cabdff1aSopenharmony_ci        /* Right part */                                                    \
161cabdff1aSopenharmony_ci        "pmullh     %[ftmp3],   %[ftmp6],       %[ftmp1]            \n\t"   \
162cabdff1aSopenharmony_ci        "paddh      %[ftmp3],   %[ftmp3],       %[ftmp0]            \n\t"   \
163cabdff1aSopenharmony_ci        "psrah      %[ftmp3],   %[ftmp3],       %[ftmp2]            \n\t"   \
164cabdff1aSopenharmony_ci        /* Left part */                                                     \
165cabdff1aSopenharmony_ci        "pmullh     %[ftmp4],   %[ftmp5],       %[ftmp1]            \n\t"   \
166cabdff1aSopenharmony_ci        "paddh      %[ftmp4],   %[ftmp4],       %[ftmp0]            \n\t"   \
167cabdff1aSopenharmony_ci        "psrah      %[ftmp4],   %[ftmp4],       %[ftmp2]            \n\t"   \
168cabdff1aSopenharmony_ci        /* Combine left and right part */                                   \
169cabdff1aSopenharmony_ci        "packsshb   %[ftmp4],   %[ftmp3],       %[ftmp4]            \n\t"   \
170cabdff1aSopenharmony_ci        "psubsb     %[q1],      %[q1],          %[ftmp4]            \n\t"   \
171cabdff1aSopenharmony_ci        "pxor       %[q1],      %[q1],          %[ftmp7]            \n\t"   \
172cabdff1aSopenharmony_ci        "paddsb     %[p1],      %[p1],          %[ftmp4]            \n\t"   \
173cabdff1aSopenharmony_ci        "pxor       %[p1],      %[p1],          %[ftmp7]            \n\t"   \
174cabdff1aSopenharmony_ci        "li         %[tmp0],    0x03                                \n\t"   \
175cabdff1aSopenharmony_ci        "dmtc1      %[tmp0],    %[ftmp1]                            \n\t"   \
176cabdff1aSopenharmony_ci        /* Right part */                                                    \
177cabdff1aSopenharmony_ci        "psllh      %[ftmp3],   %[ftmp6],       %[ftmp1]            \n\t"   \
178cabdff1aSopenharmony_ci        "paddh      %[ftmp3],   %[ftmp3],       %[ftmp6]            \n\t"   \
179cabdff1aSopenharmony_ci        "paddh      %[ftmp3],   %[ftmp3],       %[ftmp0]            \n\t"   \
180cabdff1aSopenharmony_ci        "psrah      %[ftmp3],   %[ftmp3],       %[ftmp2]            \n\t"   \
181cabdff1aSopenharmony_ci        /* Left part */                                                     \
182cabdff1aSopenharmony_ci        "psllh      %[ftmp4],   %[ftmp5],       %[ftmp1]            \n\t"   \
183cabdff1aSopenharmony_ci        "paddh      %[ftmp4],   %[ftmp4],       %[ftmp5]            \n\t"   \
184cabdff1aSopenharmony_ci        "paddh      %[ftmp4],   %[ftmp4],       %[ftmp0]            \n\t"   \
185cabdff1aSopenharmony_ci        "psrah      %[ftmp4],   %[ftmp4],       %[ftmp2]            \n\t"   \
186cabdff1aSopenharmony_ci        /* Combine left and right part */                                   \
187cabdff1aSopenharmony_ci        "packsshb   %[ftmp4],   %[ftmp3],       %[ftmp4]            \n\t"   \
188cabdff1aSopenharmony_ci        "psubsb     %[q2],      %[q2],          %[ftmp4]            \n\t"   \
189cabdff1aSopenharmony_ci        "pxor       %[q2],      %[q2],          %[ftmp7]            \n\t"   \
190cabdff1aSopenharmony_ci        "paddsb     %[p2],      %[p2],          %[ftmp4]            \n\t"   \
191cabdff1aSopenharmony_ci        "pxor       %[p2],      %[p2],          %[ftmp7]            \n\t"
192cabdff1aSopenharmony_ci
193cabdff1aSopenharmony_ci#define PUT_VP8_EPEL4_H6_MMI(src, dst)                                      \
194cabdff1aSopenharmony_ci        MMI_ULWC1(%[ftmp1], src, 0x00)                                      \
195cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"   \
196cabdff1aSopenharmony_ci        "pmullh     %[ftmp3],   %[ftmp2],       %[filter2]          \n\t"   \
197cabdff1aSopenharmony_ci                                                                            \
198cabdff1aSopenharmony_ci        MMI_ULWC1(%[ftmp1], src, -0x01)                                     \
199cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"   \
200cabdff1aSopenharmony_ci        "pmullh     %[ftmp2],   %[ftmp2],       %[filter1]          \n\t"   \
201cabdff1aSopenharmony_ci        "psubsh     %[ftmp3],   %[ftmp3],       %[ftmp2]            \n\t"   \
202cabdff1aSopenharmony_ci                                                                            \
203cabdff1aSopenharmony_ci        MMI_ULWC1(%[ftmp1], src, -0x02)                                     \
204cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"   \
205cabdff1aSopenharmony_ci        "pmullh     %[ftmp2],   %[ftmp2],       %[filter0]          \n\t"   \
206cabdff1aSopenharmony_ci        "paddsh     %[ftmp5],   %[ftmp3],       %[ftmp2]            \n\t"   \
207cabdff1aSopenharmony_ci                                                                            \
208cabdff1aSopenharmony_ci        MMI_ULWC1(%[ftmp1], src, 0x01)                                      \
209cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"   \
210cabdff1aSopenharmony_ci        "pmullh     %[ftmp3],   %[ftmp2],       %[filter3]          \n\t"   \
211cabdff1aSopenharmony_ci                                                                            \
212cabdff1aSopenharmony_ci        MMI_ULWC1(%[ftmp1], src, 0x02)                                      \
213cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"   \
214cabdff1aSopenharmony_ci        "pmullh     %[ftmp2],   %[ftmp2],       %[filter4]          \n\t"   \
215cabdff1aSopenharmony_ci        "psubsh     %[ftmp3],   %[ftmp3],       %[ftmp2]            \n\t"   \
216cabdff1aSopenharmony_ci                                                                            \
217cabdff1aSopenharmony_ci        MMI_ULWC1(%[ftmp1], src, 0x03)                                      \
218cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"   \
219cabdff1aSopenharmony_ci        "pmullh     %[ftmp2],   %[ftmp2],       %[filter5]          \n\t"   \
220cabdff1aSopenharmony_ci        "paddsh     %[ftmp3],   %[ftmp3],       %[ftmp2]            \n\t"   \
221cabdff1aSopenharmony_ci                                                                            \
222cabdff1aSopenharmony_ci        "paddsh     %[ftmp3],   %[ftmp3],       %[ftmp5]            \n\t"   \
223cabdff1aSopenharmony_ci        "paddsh     %[ftmp3],   %[ftmp3],       %[ff_pw_64]         \n\t"   \
224cabdff1aSopenharmony_ci        "psrah      %[ftmp3],   %[ftmp3],       %[ftmp4]            \n\t"   \
225cabdff1aSopenharmony_ci        "packushb   %[ftmp1],   %[ftmp3],       %[ftmp0]            \n\t"   \
226cabdff1aSopenharmony_ci                                                                            \
227cabdff1aSopenharmony_ci        MMI_SWC1(%[ftmp1], dst, 0x00)
228cabdff1aSopenharmony_ci
229cabdff1aSopenharmony_ci
230cabdff1aSopenharmony_ci#define PUT_VP8_EPEL4_H4_MMI(src, dst)                                      \
231cabdff1aSopenharmony_ci        MMI_ULWC1(%[ftmp1], src, 0x00)                                      \
232cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"   \
233cabdff1aSopenharmony_ci        "pmullh     %[ftmp3],   %[ftmp2],       %[filter2]          \n\t"   \
234cabdff1aSopenharmony_ci                                                                            \
235cabdff1aSopenharmony_ci        MMI_ULWC1(%[ftmp1], src, -0x01)                                     \
236cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"   \
237cabdff1aSopenharmony_ci        "pmullh     %[ftmp2],   %[ftmp2],       %[filter1]          \n\t"   \
238cabdff1aSopenharmony_ci        "psubsh     %[ftmp5],   %[ftmp3],       %[ftmp2]            \n\t"   \
239cabdff1aSopenharmony_ci                                                                            \
240cabdff1aSopenharmony_ci        MMI_ULWC1(%[ftmp1], src, 0x01)                                      \
241cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"   \
242cabdff1aSopenharmony_ci        "pmullh     %[ftmp3],   %[ftmp2],       %[filter3]          \n\t"   \
243cabdff1aSopenharmony_ci                                                                            \
244cabdff1aSopenharmony_ci        MMI_ULWC1(%[ftmp1], src, 0x02)                                      \
245cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"   \
246cabdff1aSopenharmony_ci        "pmullh     %[ftmp2],   %[ftmp2],       %[filter4]          \n\t"   \
247cabdff1aSopenharmony_ci        "psubh      %[ftmp3],   %[ftmp3],       %[ftmp2]            \n\t"   \
248cabdff1aSopenharmony_ci                                                                            \
249cabdff1aSopenharmony_ci        "paddsh     %[ftmp3],   %[ftmp3],       %[ftmp5]            \n\t"   \
250cabdff1aSopenharmony_ci                                                                            \
251cabdff1aSopenharmony_ci        "paddsh     %[ftmp3],   %[ftmp3],       %[ff_pw_64]         \n\t"   \
252cabdff1aSopenharmony_ci        "psrah      %[ftmp3],   %[ftmp3],       %[ftmp4]            \n\t"   \
253cabdff1aSopenharmony_ci                                                                            \
254cabdff1aSopenharmony_ci        "packushb   %[ftmp1],   %[ftmp3],       %[ftmp0]            \n\t"   \
255cabdff1aSopenharmony_ci        MMI_SWC1(%[ftmp1], dst, 0x00)
256cabdff1aSopenharmony_ci
257cabdff1aSopenharmony_ci
258cabdff1aSopenharmony_ci#define PUT_VP8_EPEL4_V6_MMI(src, src1, dst, srcstride)                     \
259cabdff1aSopenharmony_ci        MMI_ULWC1(%[ftmp1], src, 0x00)                                      \
260cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"   \
261cabdff1aSopenharmony_ci        "pmullh     %[ftmp3],   %[ftmp2],       %[filter2]          \n\t"   \
262cabdff1aSopenharmony_ci                                                                            \
263cabdff1aSopenharmony_ci        PTR_SUBU   ""#src1",    "#src",         "#srcstride"        \n\t"   \
264cabdff1aSopenharmony_ci        MMI_ULWC1(%[ftmp1], src1, 0x00)                                     \
265cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"   \
266cabdff1aSopenharmony_ci        "pmullh     %[ftmp2],   %[ftmp2],       %[filter1]          \n\t"   \
267cabdff1aSopenharmony_ci        "psubsh     %[ftmp3],   %[ftmp3],       %[ftmp2]            \n\t"   \
268cabdff1aSopenharmony_ci                                                                            \
269cabdff1aSopenharmony_ci        PTR_SUBU   ""#src1",    "#src1",        "#srcstride"        \n\t"   \
270cabdff1aSopenharmony_ci        MMI_ULWC1(%[ftmp1], src1, 0x00)                                     \
271cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"   \
272cabdff1aSopenharmony_ci        "pmullh     %[ftmp2],   %[ftmp2],       %[filter0]          \n\t"   \
273cabdff1aSopenharmony_ci        "paddsh     %[ftmp5],   %[ftmp3],       %[ftmp2]            \n\t"   \
274cabdff1aSopenharmony_ci                                                                            \
275cabdff1aSopenharmony_ci        PTR_ADDU   ""#src1",    "#src",         "#srcstride"        \n\t"   \
276cabdff1aSopenharmony_ci        MMI_ULWC1(%[ftmp1], src1, 0x00)                                     \
277cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"   \
278cabdff1aSopenharmony_ci        "pmullh     %[ftmp3],   %[ftmp2],       %[filter3]          \n\t"   \
279cabdff1aSopenharmony_ci                                                                            \
280cabdff1aSopenharmony_ci        PTR_ADDU   ""#src1",    "#src1",        "#srcstride"        \n\t"   \
281cabdff1aSopenharmony_ci        MMI_ULWC1(%[ftmp1], src1, 0x00)                                     \
282cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"   \
283cabdff1aSopenharmony_ci        "pmullh     %[ftmp2],   %[ftmp2],       %[filter4]          \n\t"   \
284cabdff1aSopenharmony_ci        "psubsh     %[ftmp3],   %[ftmp3],       %[ftmp2]            \n\t"   \
285cabdff1aSopenharmony_ci                                                                            \
286cabdff1aSopenharmony_ci        PTR_ADDU   ""#src1",    "#src1",        "#srcstride"        \n\t"   \
287cabdff1aSopenharmony_ci        MMI_ULWC1(%[ftmp1], src1, 0x00)                                     \
288cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"   \
289cabdff1aSopenharmony_ci        "pmullh     %[ftmp2],   %[ftmp2],       %[filter5]          \n\t"   \
290cabdff1aSopenharmony_ci        "paddsh     %[ftmp3],   %[ftmp3],       %[ftmp2]            \n\t"   \
291cabdff1aSopenharmony_ci                                                                            \
292cabdff1aSopenharmony_ci        "paddsh     %[ftmp3],   %[ftmp3],       %[ftmp5]            \n\t"   \
293cabdff1aSopenharmony_ci                                                                            \
294cabdff1aSopenharmony_ci        "paddsh     %[ftmp3],   %[ftmp3],       %[ff_pw_64]         \n\t"   \
295cabdff1aSopenharmony_ci        "psrah      %[ftmp3],   %[ftmp3],       %[ftmp4]            \n\t"   \
296cabdff1aSopenharmony_ci        "packushb   %[ftmp1],   %[ftmp3],       %[ftmp0]            \n\t"   \
297cabdff1aSopenharmony_ci                                                                            \
298cabdff1aSopenharmony_ci        MMI_SWC1(%[ftmp1], dst, 0x00)
299cabdff1aSopenharmony_ci
300cabdff1aSopenharmony_ci
301cabdff1aSopenharmony_ci#define PUT_VP8_EPEL4_V4_MMI(src, src1, dst, srcstride)                     \
302cabdff1aSopenharmony_ci        MMI_ULWC1(%[ftmp1], src, 0x00)                                      \
303cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"   \
304cabdff1aSopenharmony_ci        "pmullh     %[ftmp3],   %[ftmp2],       %[filter2]          \n\t"   \
305cabdff1aSopenharmony_ci                                                                            \
306cabdff1aSopenharmony_ci        PTR_SUBU   ""#src1",    "#src",         "#srcstride"        \n\t"   \
307cabdff1aSopenharmony_ci        MMI_ULWC1(%[ftmp1], src1, 0x00)                                     \
308cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"   \
309cabdff1aSopenharmony_ci        "pmullh     %[ftmp2],   %[ftmp2],       %[filter1]          \n\t"   \
310cabdff1aSopenharmony_ci        "psubsh     %[ftmp5],   %[ftmp3],       %[ftmp2]            \n\t"   \
311cabdff1aSopenharmony_ci                                                                            \
312cabdff1aSopenharmony_ci        PTR_ADDU   ""#src1",    "#src",         "#srcstride"        \n\t"   \
313cabdff1aSopenharmony_ci        MMI_ULWC1(%[ftmp1], src1, 0x00)                                     \
314cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"   \
315cabdff1aSopenharmony_ci        "pmullh     %[ftmp3],   %[ftmp2],       %[filter3]          \n\t"   \
316cabdff1aSopenharmony_ci                                                                            \
317cabdff1aSopenharmony_ci        PTR_ADDU   ""#src1",    "#src1",        "#srcstride"        \n\t"   \
318cabdff1aSopenharmony_ci        MMI_ULWC1(%[ftmp1], src1, 0x00)                                     \
319cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"   \
320cabdff1aSopenharmony_ci        "pmullh     %[ftmp2],   %[ftmp2],       %[filter4]          \n\t"   \
321cabdff1aSopenharmony_ci        "psubsh     %[ftmp3],   %[ftmp3],       %[ftmp2]            \n\t"   \
322cabdff1aSopenharmony_ci                                                                            \
323cabdff1aSopenharmony_ci        "paddsh     %[ftmp3],   %[ftmp3],       %[ftmp5]            \n\t"   \
324cabdff1aSopenharmony_ci                                                                            \
325cabdff1aSopenharmony_ci        "paddsh     %[ftmp3],   %[ftmp3],       %[ff_pw_64]         \n\t"   \
326cabdff1aSopenharmony_ci        "psrah      %[ftmp3],   %[ftmp3],       %[ftmp4]            \n\t"   \
327cabdff1aSopenharmony_ci        "packushb   %[ftmp1],   %[ftmp3],       %[ftmp0]            \n\t"   \
328cabdff1aSopenharmony_ci                                                                            \
329cabdff1aSopenharmony_ci        MMI_SWC1(%[ftmp1], dst, 0x00)
330cabdff1aSopenharmony_ci
331cabdff1aSopenharmony_ci
332cabdff1aSopenharmony_ci#define PUT_VP8_EPEL8_H6_MMI(src, dst)                                      \
333cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp1], src, 0x00)                                      \
334cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"   \
335cabdff1aSopenharmony_ci        "punpckhbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"   \
336cabdff1aSopenharmony_ci        "pmullh     %[ftmp5],   %[ftmp2],       %[filter2]          \n\t"   \
337cabdff1aSopenharmony_ci        "pmullh     %[ftmp6],   %[ftmp3],       %[filter2]          \n\t"   \
338cabdff1aSopenharmony_ci                                                                            \
339cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp1], src, -0x01)                                     \
340cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"   \
341cabdff1aSopenharmony_ci        "punpckhbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"   \
342cabdff1aSopenharmony_ci        "pmullh     %[ftmp2],   %[ftmp2],       %[filter1]          \n\t"   \
343cabdff1aSopenharmony_ci        "pmullh     %[ftmp3],   %[ftmp3],       %[filter1]          \n\t"   \
344cabdff1aSopenharmony_ci        "psubsh     %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t"   \
345cabdff1aSopenharmony_ci        "psubsh     %[ftmp6],   %[ftmp6],       %[ftmp3]            \n\t"   \
346cabdff1aSopenharmony_ci                                                                            \
347cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp1], src, -0x02)                                     \
348cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"   \
349cabdff1aSopenharmony_ci        "punpckhbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"   \
350cabdff1aSopenharmony_ci        "pmullh     %[ftmp2],   %[ftmp2],       %[filter0]          \n\t"   \
351cabdff1aSopenharmony_ci        "pmullh     %[ftmp3],   %[ftmp3],       %[filter0]          \n\t"   \
352cabdff1aSopenharmony_ci        "paddsh     %[ftmp7],   %[ftmp5],       %[ftmp2]            \n\t"   \
353cabdff1aSopenharmony_ci        "paddsh     %[ftmp8],   %[ftmp6],       %[ftmp3]            \n\t"   \
354cabdff1aSopenharmony_ci                                                                            \
355cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp1], src, 0x01)                                      \
356cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"   \
357cabdff1aSopenharmony_ci        "punpckhbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"   \
358cabdff1aSopenharmony_ci        "pmullh     %[ftmp5],   %[ftmp2],       %[filter3]          \n\t"   \
359cabdff1aSopenharmony_ci        "pmullh     %[ftmp6],   %[ftmp3],       %[filter3]          \n\t"   \
360cabdff1aSopenharmony_ci                                                                            \
361cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp1], src, 0x02)                                      \
362cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"   \
363cabdff1aSopenharmony_ci        "punpckhbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"   \
364cabdff1aSopenharmony_ci        "pmullh     %[ftmp2],   %[ftmp2],       %[filter4]          \n\t"   \
365cabdff1aSopenharmony_ci        "pmullh     %[ftmp3],   %[ftmp3],       %[filter4]          \n\t"   \
366cabdff1aSopenharmony_ci        "psubsh     %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t"   \
367cabdff1aSopenharmony_ci        "psubsh     %[ftmp6],   %[ftmp6],       %[ftmp3]            \n\t"   \
368cabdff1aSopenharmony_ci                                                                            \
369cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp1], src, 0x03)                                      \
370cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"   \
371cabdff1aSopenharmony_ci        "punpckhbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"   \
372cabdff1aSopenharmony_ci        "pmullh     %[ftmp2],   %[ftmp2],       %[filter5]          \n\t"   \
373cabdff1aSopenharmony_ci        "pmullh     %[ftmp3],   %[ftmp3],       %[filter5]          \n\t"   \
374cabdff1aSopenharmony_ci        "paddsh     %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t"   \
375cabdff1aSopenharmony_ci        "paddsh     %[ftmp6],   %[ftmp6],       %[ftmp3]            \n\t"   \
376cabdff1aSopenharmony_ci                                                                            \
377cabdff1aSopenharmony_ci        "paddsh     %[ftmp5],   %[ftmp5],       %[ftmp7]            \n\t"   \
378cabdff1aSopenharmony_ci        "paddsh     %[ftmp6],   %[ftmp6],       %[ftmp8]            \n\t"   \
379cabdff1aSopenharmony_ci                                                                            \
380cabdff1aSopenharmony_ci        "paddsh     %[ftmp5],   %[ftmp5],       %[ff_pw_64]         \n\t"   \
381cabdff1aSopenharmony_ci        "paddsh     %[ftmp6],   %[ftmp6],       %[ff_pw_64]         \n\t"   \
382cabdff1aSopenharmony_ci        "psrah      %[ftmp5],   %[ftmp5],       %[ftmp4]            \n\t"   \
383cabdff1aSopenharmony_ci        "psrah      %[ftmp6],   %[ftmp6],       %[ftmp4]            \n\t"   \
384cabdff1aSopenharmony_ci        "packushb   %[ftmp1],   %[ftmp5],       %[ftmp6]            \n\t"   \
385cabdff1aSopenharmony_ci                                                                            \
386cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp1], dst, 0x00)
387cabdff1aSopenharmony_ci
388cabdff1aSopenharmony_ci
389cabdff1aSopenharmony_ci#define PUT_VP8_EPEL8_H4_MMI(src, dst)                                      \
390cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp1], src, 0x00)                                      \
391cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"   \
392cabdff1aSopenharmony_ci        "punpckhbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"   \
393cabdff1aSopenharmony_ci        "pmullh     %[ftmp5],   %[ftmp2],       %[filter2]          \n\t"   \
394cabdff1aSopenharmony_ci        "pmullh     %[ftmp6],   %[ftmp3],       %[filter2]          \n\t"   \
395cabdff1aSopenharmony_ci                                                                            \
396cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp1], src, -0x01)                                     \
397cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"   \
398cabdff1aSopenharmony_ci        "punpckhbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"   \
399cabdff1aSopenharmony_ci        "pmullh     %[ftmp2],   %[ftmp2],       %[filter1]          \n\t"   \
400cabdff1aSopenharmony_ci        "pmullh     %[ftmp3],   %[ftmp3],       %[filter1]          \n\t"   \
401cabdff1aSopenharmony_ci        "psubsh     %[ftmp7],   %[ftmp5],       %[ftmp2]            \n\t"   \
402cabdff1aSopenharmony_ci        "psubsh     %[ftmp8],   %[ftmp6],       %[ftmp3]            \n\t"   \
403cabdff1aSopenharmony_ci                                                                            \
404cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp1], src, 0x01)                                      \
405cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"   \
406cabdff1aSopenharmony_ci        "punpckhbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"   \
407cabdff1aSopenharmony_ci        "pmullh     %[ftmp5],   %[ftmp2],       %[filter3]          \n\t"   \
408cabdff1aSopenharmony_ci        "pmullh     %[ftmp6],   %[ftmp3],       %[filter3]          \n\t"   \
409cabdff1aSopenharmony_ci                                                                            \
410cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp1], src, 0x02)                                      \
411cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"   \
412cabdff1aSopenharmony_ci        "punpckhbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"   \
413cabdff1aSopenharmony_ci        "pmullh     %[ftmp2],   %[ftmp2],       %[filter4]          \n\t"   \
414cabdff1aSopenharmony_ci        "pmullh     %[ftmp3],   %[ftmp3],       %[filter4]          \n\t"   \
415cabdff1aSopenharmony_ci        "psubsh     %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t"   \
416cabdff1aSopenharmony_ci        "psubsh     %[ftmp6],   %[ftmp6],       %[ftmp3]            \n\t"   \
417cabdff1aSopenharmony_ci                                                                            \
418cabdff1aSopenharmony_ci        "paddsh     %[ftmp5],   %[ftmp5],       %[ftmp7]            \n\t"   \
419cabdff1aSopenharmony_ci        "paddsh     %[ftmp6],   %[ftmp6],       %[ftmp8]            \n\t"   \
420cabdff1aSopenharmony_ci                                                                            \
421cabdff1aSopenharmony_ci        "paddsh     %[ftmp5],   %[ftmp5],       %[ff_pw_64]         \n\t"   \
422cabdff1aSopenharmony_ci        "paddsh     %[ftmp6],   %[ftmp6],       %[ff_pw_64]         \n\t"   \
423cabdff1aSopenharmony_ci        "psrah      %[ftmp5],   %[ftmp5],       %[ftmp4]            \n\t"   \
424cabdff1aSopenharmony_ci        "psrah      %[ftmp6],   %[ftmp6],       %[ftmp4]            \n\t"   \
425cabdff1aSopenharmony_ci                                                                            \
426cabdff1aSopenharmony_ci        "packushb   %[ftmp1],   %[ftmp5],       %[ftmp6]            \n\t"   \
427cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp1], dst, 0x00)
428cabdff1aSopenharmony_ci
429cabdff1aSopenharmony_ci
430cabdff1aSopenharmony_ci#define PUT_VP8_EPEL8_V6_MMI(src, src1, dst, srcstride)                     \
431cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp1], src, 0x00)                                      \
432cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"   \
433cabdff1aSopenharmony_ci        "punpckhbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"   \
434cabdff1aSopenharmony_ci        "pmullh     %[ftmp5],   %[ftmp2],       %[filter2]          \n\t"   \
435cabdff1aSopenharmony_ci        "pmullh     %[ftmp6],   %[ftmp3],       %[filter2]          \n\t"   \
436cabdff1aSopenharmony_ci                                                                            \
437cabdff1aSopenharmony_ci        PTR_SUBU   ""#src1",    "#src",         "#srcstride"        \n\t"   \
438cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp1], src1, 0x00)                                     \
439cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"   \
440cabdff1aSopenharmony_ci        "punpckhbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"   \
441cabdff1aSopenharmony_ci        "pmullh     %[ftmp2],   %[ftmp2],       %[filter1]          \n\t"   \
442cabdff1aSopenharmony_ci        "pmullh     %[ftmp3],   %[ftmp3],       %[filter1]          \n\t"   \
443cabdff1aSopenharmony_ci        "psubsh     %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t"   \
444cabdff1aSopenharmony_ci        "psubsh     %[ftmp6],   %[ftmp6],       %[ftmp3]            \n\t"   \
445cabdff1aSopenharmony_ci                                                                            \
446cabdff1aSopenharmony_ci        PTR_SUBU   ""#src1",    "#src1",        "#srcstride"        \n\t"   \
447cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp1], src1, 0x00)                                     \
448cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"   \
449cabdff1aSopenharmony_ci        "punpckhbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"   \
450cabdff1aSopenharmony_ci        "pmullh     %[ftmp2],   %[ftmp2],       %[filter0]          \n\t"   \
451cabdff1aSopenharmony_ci        "pmullh     %[ftmp3],   %[ftmp3],       %[filter0]          \n\t"   \
452cabdff1aSopenharmony_ci        "paddsh     %[ftmp7],   %[ftmp5],       %[ftmp2]            \n\t"   \
453cabdff1aSopenharmony_ci        "paddsh     %[ftmp8],   %[ftmp6],       %[ftmp3]            \n\t"   \
454cabdff1aSopenharmony_ci                                                                            \
455cabdff1aSopenharmony_ci        PTR_ADDU   ""#src1",    "#src",         "#srcstride"        \n\t"   \
456cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp1], src1, 0x00)                                     \
457cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"   \
458cabdff1aSopenharmony_ci        "punpckhbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"   \
459cabdff1aSopenharmony_ci        "pmullh     %[ftmp5],   %[ftmp2],       %[filter3]          \n\t"   \
460cabdff1aSopenharmony_ci        "pmullh     %[ftmp6],   %[ftmp3],       %[filter3]          \n\t"   \
461cabdff1aSopenharmony_ci                                                                            \
462cabdff1aSopenharmony_ci        PTR_ADDU   ""#src1",    "#src1",        "#srcstride"        \n\t"   \
463cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp1], src1, 0x00)                                     \
464cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"   \
465cabdff1aSopenharmony_ci        "punpckhbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"   \
466cabdff1aSopenharmony_ci        "pmullh     %[ftmp2],   %[ftmp2],       %[filter4]          \n\t"   \
467cabdff1aSopenharmony_ci        "pmullh     %[ftmp3],   %[ftmp3],       %[filter4]          \n\t"   \
468cabdff1aSopenharmony_ci        "psubsh     %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t"   \
469cabdff1aSopenharmony_ci        "psubsh     %[ftmp6],   %[ftmp6],       %[ftmp3]            \n\t"   \
470cabdff1aSopenharmony_ci                                                                            \
471cabdff1aSopenharmony_ci        PTR_ADDU   ""#src1",    "#src1",        "#srcstride"        \n\t"   \
472cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp1], src1, 0x00)                                     \
473cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"   \
474cabdff1aSopenharmony_ci        "punpckhbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"   \
475cabdff1aSopenharmony_ci        "pmullh     %[ftmp2],   %[ftmp2],       %[filter5]          \n\t"   \
476cabdff1aSopenharmony_ci        "pmullh     %[ftmp3],   %[ftmp3],       %[filter5]          \n\t"   \
477cabdff1aSopenharmony_ci        "paddsh     %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t"   \
478cabdff1aSopenharmony_ci        "paddsh     %[ftmp6],   %[ftmp6],       %[ftmp3]            \n\t"   \
479cabdff1aSopenharmony_ci                                                                            \
480cabdff1aSopenharmony_ci        "paddsh     %[ftmp5],   %[ftmp5],       %[ftmp7]            \n\t"   \
481cabdff1aSopenharmony_ci        "paddsh     %[ftmp6],   %[ftmp6],       %[ftmp8]            \n\t"   \
482cabdff1aSopenharmony_ci                                                                            \
483cabdff1aSopenharmony_ci        "paddsh     %[ftmp5],   %[ftmp5],       %[ff_pw_64]         \n\t"   \
484cabdff1aSopenharmony_ci        "paddsh     %[ftmp6],   %[ftmp6],       %[ff_pw_64]         \n\t"   \
485cabdff1aSopenharmony_ci        "psrah      %[ftmp5],   %[ftmp5],       %[ftmp4]            \n\t"   \
486cabdff1aSopenharmony_ci        "psrah      %[ftmp6],   %[ftmp6],       %[ftmp4]            \n\t"   \
487cabdff1aSopenharmony_ci        "packushb   %[ftmp1],   %[ftmp5],       %[ftmp6]            \n\t"   \
488cabdff1aSopenharmony_ci                                                                            \
489cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp1], dst, 0x00)
490cabdff1aSopenharmony_ci
491cabdff1aSopenharmony_ci
492cabdff1aSopenharmony_ci#define PUT_VP8_EPEL8_V4_MMI(src, src1, dst, srcstride)                     \
493cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp1], src, 0x00)                                      \
494cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"   \
495cabdff1aSopenharmony_ci        "punpckhbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"   \
496cabdff1aSopenharmony_ci        "pmullh     %[ftmp5],   %[ftmp2],       %[filter2]          \n\t"   \
497cabdff1aSopenharmony_ci        "pmullh     %[ftmp6],   %[ftmp3],       %[filter2]          \n\t"   \
498cabdff1aSopenharmony_ci                                                                            \
499cabdff1aSopenharmony_ci        PTR_SUBU   ""#src1",    "#src",         "#srcstride"        \n\t"   \
500cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp1], src1, 0x00)                                     \
501cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"   \
502cabdff1aSopenharmony_ci        "punpckhbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"   \
503cabdff1aSopenharmony_ci        "pmullh     %[ftmp2],   %[ftmp2],       %[filter1]          \n\t"   \
504cabdff1aSopenharmony_ci        "pmullh     %[ftmp3],   %[ftmp3],       %[filter1]          \n\t"   \
505cabdff1aSopenharmony_ci        "psubsh     %[ftmp7],   %[ftmp5],       %[ftmp2]            \n\t"   \
506cabdff1aSopenharmony_ci        "psubsh     %[ftmp8],   %[ftmp6],       %[ftmp3]            \n\t"   \
507cabdff1aSopenharmony_ci                                                                            \
508cabdff1aSopenharmony_ci        PTR_ADDU   ""#src1",    "#src",         "#srcstride"        \n\t"   \
509cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp1], src1, 0x00)                                     \
510cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"   \
511cabdff1aSopenharmony_ci        "punpckhbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"   \
512cabdff1aSopenharmony_ci        "pmullh     %[ftmp5],   %[ftmp2],       %[filter3]          \n\t"   \
513cabdff1aSopenharmony_ci        "pmullh     %[ftmp6],   %[ftmp3],       %[filter3]          \n\t"   \
514cabdff1aSopenharmony_ci                                                                            \
515cabdff1aSopenharmony_ci        PTR_ADDU   ""#src1",    "#src1",        "#srcstride"        \n\t"   \
516cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp1], src1, 0x00)                                     \
517cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"   \
518cabdff1aSopenharmony_ci        "punpckhbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"   \
519cabdff1aSopenharmony_ci        "pmullh     %[ftmp2],   %[ftmp2],       %[filter4]          \n\t"   \
520cabdff1aSopenharmony_ci        "pmullh     %[ftmp3],   %[ftmp3],       %[filter4]          \n\t"   \
521cabdff1aSopenharmony_ci        "psubsh     %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t"   \
522cabdff1aSopenharmony_ci        "psubsh     %[ftmp6],   %[ftmp6],       %[ftmp3]            \n\t"   \
523cabdff1aSopenharmony_ci                                                                            \
524cabdff1aSopenharmony_ci        "paddsh     %[ftmp5],   %[ftmp5],       %[ftmp7]            \n\t"   \
525cabdff1aSopenharmony_ci        "paddsh     %[ftmp6],   %[ftmp6],       %[ftmp8]            \n\t"   \
526cabdff1aSopenharmony_ci                                                                            \
527cabdff1aSopenharmony_ci        "paddsh     %[ftmp5],   %[ftmp5],       %[ff_pw_64]         \n\t"   \
528cabdff1aSopenharmony_ci        "paddsh     %[ftmp6],   %[ftmp6],       %[ff_pw_64]         \n\t"   \
529cabdff1aSopenharmony_ci        "psrah      %[ftmp5],   %[ftmp5],       %[ftmp4]            \n\t"   \
530cabdff1aSopenharmony_ci        "psrah      %[ftmp6],   %[ftmp6],       %[ftmp4]            \n\t"   \
531cabdff1aSopenharmony_ci        "packushb   %[ftmp1],   %[ftmp5],       %[ftmp6]            \n\t"   \
532cabdff1aSopenharmony_ci                                                                            \
533cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp1], dst, 0x00)
534cabdff1aSopenharmony_ci
535cabdff1aSopenharmony_ci
536cabdff1aSopenharmony_ci#define PUT_VP8_BILINEAR8_H_MMI(src, dst)                                   \
537cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp1], src, 0x00)                                      \
538cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"   \
539cabdff1aSopenharmony_ci        "punpckhbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"   \
540cabdff1aSopenharmony_ci        "pmullh     %[ftmp5],   %[ftmp2],       %[a]                \n\t"   \
541cabdff1aSopenharmony_ci        "pmullh     %[ftmp6],   %[ftmp3],       %[a]                \n\t"   \
542cabdff1aSopenharmony_ci                                                                            \
543cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp1], src, 0x01)                                      \
544cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"   \
545cabdff1aSopenharmony_ci        "punpckhbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"   \
546cabdff1aSopenharmony_ci        "pmullh     %[ftmp2],   %[ftmp2],       %[b]                \n\t"   \
547cabdff1aSopenharmony_ci        "pmullh     %[ftmp3],   %[ftmp3],       %[b]                \n\t"   \
548cabdff1aSopenharmony_ci        "paddsh     %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t"   \
549cabdff1aSopenharmony_ci        "paddsh     %[ftmp6],   %[ftmp6],       %[ftmp3]            \n\t"   \
550cabdff1aSopenharmony_ci                                                                            \
551cabdff1aSopenharmony_ci        "paddsh     %[ftmp5],   %[ftmp5],       %[ff_pw_4]          \n\t"   \
552cabdff1aSopenharmony_ci        "paddsh     %[ftmp6],   %[ftmp6],       %[ff_pw_4]          \n\t"   \
553cabdff1aSopenharmony_ci        "psrah      %[ftmp5],   %[ftmp5],       %[ftmp4]            \n\t"   \
554cabdff1aSopenharmony_ci        "psrah      %[ftmp6],   %[ftmp6],       %[ftmp4]            \n\t"   \
555cabdff1aSopenharmony_ci                                                                            \
556cabdff1aSopenharmony_ci        "packushb   %[ftmp1],   %[ftmp5],       %[ftmp6]            \n\t"   \
557cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp1], dst, 0x00)
558cabdff1aSopenharmony_ci
559cabdff1aSopenharmony_ci
560cabdff1aSopenharmony_ci#define PUT_VP8_BILINEAR4_H_MMI(src, dst)                                   \
561cabdff1aSopenharmony_ci        MMI_ULWC1(%[ftmp1], src, 0x00)                                      \
562cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"   \
563cabdff1aSopenharmony_ci        "pmullh     %[ftmp3],   %[ftmp2],       %[a]                \n\t"   \
564cabdff1aSopenharmony_ci                                                                            \
565cabdff1aSopenharmony_ci        MMI_ULWC1(%[ftmp1], src, 0x01)                                      \
566cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"   \
567cabdff1aSopenharmony_ci        "pmullh     %[ftmp2],   %[ftmp2],       %[b]                \n\t"   \
568cabdff1aSopenharmony_ci        "paddsh     %[ftmp3],   %[ftmp3],       %[ftmp2]            \n\t"   \
569cabdff1aSopenharmony_ci                                                                            \
570cabdff1aSopenharmony_ci        "paddsh     %[ftmp3],   %[ftmp3],       %[ff_pw_4]          \n\t"   \
571cabdff1aSopenharmony_ci        "psrah      %[ftmp3],   %[ftmp3],       %[ftmp4]            \n\t"   \
572cabdff1aSopenharmony_ci                                                                            \
573cabdff1aSopenharmony_ci        "packushb   %[ftmp1],   %[ftmp3],       %[ftmp0]            \n\t"   \
574cabdff1aSopenharmony_ci        MMI_SWC1(%[ftmp1], dst, 0x00)
575cabdff1aSopenharmony_ci
576cabdff1aSopenharmony_ci
577cabdff1aSopenharmony_ci#define PUT_VP8_BILINEAR8_V_MMI(src, src1, dst, sstride)                    \
578cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp1], src, 0x00)                                      \
579cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"   \
580cabdff1aSopenharmony_ci        "punpckhbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"   \
581cabdff1aSopenharmony_ci        "pmullh     %[ftmp5],   %[ftmp2],       %[c]                \n\t"   \
582cabdff1aSopenharmony_ci        "pmullh     %[ftmp6],   %[ftmp3],       %[c]                \n\t"   \
583cabdff1aSopenharmony_ci                                                                            \
584cabdff1aSopenharmony_ci        PTR_ADDU   ""#src1",    "#src",         "#sstride"          \n\t"   \
585cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp1], src1, 0x00)                                     \
586cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"   \
587cabdff1aSopenharmony_ci        "punpckhbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"   \
588cabdff1aSopenharmony_ci        "pmullh     %[ftmp2],   %[ftmp2],       %[d]                \n\t"   \
589cabdff1aSopenharmony_ci        "pmullh     %[ftmp3],   %[ftmp3],       %[d]                \n\t"   \
590cabdff1aSopenharmony_ci        "paddsh     %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t"   \
591cabdff1aSopenharmony_ci        "paddsh     %[ftmp6],   %[ftmp6],       %[ftmp3]            \n\t"   \
592cabdff1aSopenharmony_ci                                                                            \
593cabdff1aSopenharmony_ci        "paddsh     %[ftmp5],   %[ftmp5],       %[ff_pw_4]          \n\t"   \
594cabdff1aSopenharmony_ci        "paddsh     %[ftmp6],   %[ftmp6],       %[ff_pw_4]          \n\t"   \
595cabdff1aSopenharmony_ci        "psrah      %[ftmp5],   %[ftmp5],       %[ftmp4]            \n\t"   \
596cabdff1aSopenharmony_ci        "psrah      %[ftmp6],   %[ftmp6],       %[ftmp4]            \n\t"   \
597cabdff1aSopenharmony_ci                                                                            \
598cabdff1aSopenharmony_ci        "packushb   %[ftmp1],   %[ftmp5],       %[ftmp6]            \n\t"   \
599cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp1], dst, 0x00)
600cabdff1aSopenharmony_ci
601cabdff1aSopenharmony_ci
602cabdff1aSopenharmony_ci#define PUT_VP8_BILINEAR4_V_MMI(src, src1, dst, sstride)                    \
603cabdff1aSopenharmony_ci        MMI_ULWC1(%[ftmp1], src, 0x00)                                      \
604cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"   \
605cabdff1aSopenharmony_ci        "pmullh     %[ftmp3],   %[ftmp2],       %[c]                \n\t"   \
606cabdff1aSopenharmony_ci                                                                            \
607cabdff1aSopenharmony_ci        PTR_ADDU   ""#src1",    "#src",         "#sstride"          \n\t"   \
608cabdff1aSopenharmony_ci        MMI_ULWC1(%[ftmp1], src1, 0x00)                                     \
609cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"   \
610cabdff1aSopenharmony_ci        "pmullh     %[ftmp2],   %[ftmp2],       %[d]                \n\t"   \
611cabdff1aSopenharmony_ci        "paddsh     %[ftmp3],   %[ftmp3],       %[ftmp2]            \n\t"   \
612cabdff1aSopenharmony_ci                                                                            \
613cabdff1aSopenharmony_ci        "paddsh     %[ftmp3],   %[ftmp3],       %[ff_pw_4]          \n\t"   \
614cabdff1aSopenharmony_ci        "psrah      %[ftmp3],   %[ftmp3],       %[ftmp4]            \n\t"   \
615cabdff1aSopenharmony_ci                                                                            \
616cabdff1aSopenharmony_ci        "packushb   %[ftmp1],   %[ftmp3],       %[ftmp0]            \n\t"   \
617cabdff1aSopenharmony_ci        MMI_SWC1(%[ftmp1], dst, 0x00)
618cabdff1aSopenharmony_ci
619cabdff1aSopenharmony_ci
620cabdff1aSopenharmony_ciDECLARE_ALIGNED(8, static const uint64_t, fourtap_subpel_filters[7][6]) = {
621cabdff1aSopenharmony_ci   {0x0000000000000000, 0x0006000600060006, 0x007b007b007b007b,
622cabdff1aSopenharmony_ci    0x000c000c000c000c, 0x0001000100010001, 0x0000000000000000},
623cabdff1aSopenharmony_ci
624cabdff1aSopenharmony_ci   {0x0002000200020002, 0x000b000b000b000b, 0x006c006c006c006c,
625cabdff1aSopenharmony_ci    0x0024002400240024, 0x0008000800080008, 0x0001000100010001},
626cabdff1aSopenharmony_ci
627cabdff1aSopenharmony_ci   {0x0000000000000000, 0x0009000900090009, 0x005d005d005d005d,
628cabdff1aSopenharmony_ci    0x0032003200320032, 0x0006000600060006, 0x0000000000000000},
629cabdff1aSopenharmony_ci
630cabdff1aSopenharmony_ci   {0x0003000300030003, 0x0010001000100010, 0x004d004d004d004d,
631cabdff1aSopenharmony_ci    0x004d004d004d004d, 0x0010001000100010, 0x0003000300030003},
632cabdff1aSopenharmony_ci
633cabdff1aSopenharmony_ci   {0x0000000000000000, 0x0006000600060006, 0x0032003200320032,
634cabdff1aSopenharmony_ci    0x005d005d005d005d, 0x0009000900090009, 0x0000000000000000},
635cabdff1aSopenharmony_ci
636cabdff1aSopenharmony_ci   {0x0001000100010001, 0x0008000800080008, 0x0024002400240024,
637cabdff1aSopenharmony_ci    0x006c006c006c006c, 0x000b000b000b000b, 0x0002000200020002},
638cabdff1aSopenharmony_ci
639cabdff1aSopenharmony_ci   {0x0000000000000000, 0x0001000100010001, 0x000c000c000c000c,
640cabdff1aSopenharmony_ci    0x007b007b007b007b, 0x0006000600060006, 0x0000000000000000}
641cabdff1aSopenharmony_ci};
642cabdff1aSopenharmony_ci
643cabdff1aSopenharmony_ci#if 0
644cabdff1aSopenharmony_ci#define FILTER_6TAP(src, F, stride)                                           \
645cabdff1aSopenharmony_ci    cm[(F[2] * src[x + 0 * stride] - F[1] * src[x - 1 * stride] +             \
646cabdff1aSopenharmony_ci        F[0] * src[x - 2 * stride] + F[3] * src[x + 1 * stride] -             \
647cabdff1aSopenharmony_ci        F[4] * src[x + 2 * stride] + F[5] * src[x + 3 * stride] + 64) >> 7]
648cabdff1aSopenharmony_ci
649cabdff1aSopenharmony_ci#define FILTER_4TAP(src, F, stride)                                           \
650cabdff1aSopenharmony_ci    cm[(F[2] * src[x + 0 * stride] - F[1] * src[x - 1 * stride] +             \
651cabdff1aSopenharmony_ci        F[3] * src[x + 1 * stride] - F[4] * src[x + 2 * stride] + 64) >> 7]
652cabdff1aSopenharmony_ci
653cabdff1aSopenharmony_cistatic const uint8_t subpel_filters[7][6] = {
654cabdff1aSopenharmony_ci    { 0,  6, 123,  12,  1, 0 },
655cabdff1aSopenharmony_ci    { 2, 11, 108,  36,  8, 1 },
656cabdff1aSopenharmony_ci    { 0,  9,  93,  50,  6, 0 },
657cabdff1aSopenharmony_ci    { 3, 16,  77,  77, 16, 3 },
658cabdff1aSopenharmony_ci    { 0,  6,  50,  93,  9, 0 },
659cabdff1aSopenharmony_ci    { 1,  8,  36, 108, 11, 2 },
660cabdff1aSopenharmony_ci    { 0,  1,  12, 123,  6, 0 },
661cabdff1aSopenharmony_ci};
662cabdff1aSopenharmony_ci
663cabdff1aSopenharmony_ci#define MUL_20091(a) ((((a) * 20091) >> 16) + (a))
664cabdff1aSopenharmony_ci#define MUL_35468(a)  (((a) * 35468) >> 16)
665cabdff1aSopenharmony_ci#endif
666cabdff1aSopenharmony_ci
667cabdff1aSopenharmony_ci#define clip_int8(n) (cm[(n) + 0x80] - 0x80)
668cabdff1aSopenharmony_cistatic av_always_inline void vp8_filter_common_is4tap(uint8_t *p,
669cabdff1aSopenharmony_ci        ptrdiff_t stride)
670cabdff1aSopenharmony_ci{
671cabdff1aSopenharmony_ci    int av_unused p1 = p[-2 * stride];
672cabdff1aSopenharmony_ci    int av_unused p0 = p[-1 * stride];
673cabdff1aSopenharmony_ci    int av_unused q0 = p[ 0 * stride];
674cabdff1aSopenharmony_ci    int av_unused q1 = p[ 1 * stride];
675cabdff1aSopenharmony_ci    int a, f1, f2;
676cabdff1aSopenharmony_ci    const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;
677cabdff1aSopenharmony_ci
678cabdff1aSopenharmony_ci    a = 3 * (q0 - p0);
679cabdff1aSopenharmony_ci    a += clip_int8(p1 - q1);
680cabdff1aSopenharmony_ci    a = clip_int8(a);
681cabdff1aSopenharmony_ci
682cabdff1aSopenharmony_ci    // We deviate from the spec here with c(a+3) >> 3
683cabdff1aSopenharmony_ci    // since that's what libvpx does.
684cabdff1aSopenharmony_ci    f1 = FFMIN(a + 4, 127) >> 3;
685cabdff1aSopenharmony_ci    f2 = FFMIN(a + 3, 127) >> 3;
686cabdff1aSopenharmony_ci
687cabdff1aSopenharmony_ci    // Despite what the spec says, we do need to clamp here to
688cabdff1aSopenharmony_ci    // be bitexact with libvpx.
689cabdff1aSopenharmony_ci    p[-1 * stride] = cm[p0 + f2];
690cabdff1aSopenharmony_ci    p[ 0 * stride] = cm[q0 - f1];
691cabdff1aSopenharmony_ci}
692cabdff1aSopenharmony_ci
693cabdff1aSopenharmony_cistatic av_always_inline void vp8_filter_common_isnot4tap(uint8_t *p,
694cabdff1aSopenharmony_ci        ptrdiff_t stride)
695cabdff1aSopenharmony_ci{
696cabdff1aSopenharmony_ci    int av_unused p1 = p[-2 * stride];
697cabdff1aSopenharmony_ci    int av_unused p0 = p[-1 * stride];
698cabdff1aSopenharmony_ci    int av_unused q0 = p[ 0 * stride];
699cabdff1aSopenharmony_ci    int av_unused q1 = p[ 1 * stride];
700cabdff1aSopenharmony_ci    int a, f1, f2;
701cabdff1aSopenharmony_ci    const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;
702cabdff1aSopenharmony_ci
703cabdff1aSopenharmony_ci    a = 3 * (q0 - p0);
704cabdff1aSopenharmony_ci    a = clip_int8(a);
705cabdff1aSopenharmony_ci
706cabdff1aSopenharmony_ci    // We deviate from the spec here with c(a+3) >> 3
707cabdff1aSopenharmony_ci    // since that's what libvpx does.
708cabdff1aSopenharmony_ci    f1 = FFMIN(a + 4, 127) >> 3;
709cabdff1aSopenharmony_ci    f2 = FFMIN(a + 3, 127) >> 3;
710cabdff1aSopenharmony_ci
711cabdff1aSopenharmony_ci    // Despite what the spec says, we do need to clamp here to
712cabdff1aSopenharmony_ci    // be bitexact with libvpx.
713cabdff1aSopenharmony_ci    p[-1 * stride] = cm[p0 + f2];
714cabdff1aSopenharmony_ci    p[ 0 * stride] = cm[q0 - f1];
715cabdff1aSopenharmony_ci    a              = (f1 + 1) >> 1;
716cabdff1aSopenharmony_ci    p[-2 * stride] = cm[p1 + a];
717cabdff1aSopenharmony_ci    p[ 1 * stride] = cm[q1 - a];
718cabdff1aSopenharmony_ci}
719cabdff1aSopenharmony_ci
720cabdff1aSopenharmony_cistatic av_always_inline int vp8_simple_limit(uint8_t *p, ptrdiff_t stride,
721cabdff1aSopenharmony_ci        int flim)
722cabdff1aSopenharmony_ci{
723cabdff1aSopenharmony_ci    int av_unused p1 = p[-2 * stride];
724cabdff1aSopenharmony_ci    int av_unused p0 = p[-1 * stride];
725cabdff1aSopenharmony_ci    int av_unused q0 = p[ 0 * stride];
726cabdff1aSopenharmony_ci    int av_unused q1 = p[ 1 * stride];
727cabdff1aSopenharmony_ci
728cabdff1aSopenharmony_ci    return 2 * FFABS(p0 - q0) + (FFABS(p1 - q1) >> 1) <= flim;
729cabdff1aSopenharmony_ci}
730cabdff1aSopenharmony_ci
731cabdff1aSopenharmony_cistatic av_always_inline int hev(uint8_t *p, ptrdiff_t stride, int thresh)
732cabdff1aSopenharmony_ci{
733cabdff1aSopenharmony_ci    int av_unused p1 = p[-2 * stride];
734cabdff1aSopenharmony_ci    int av_unused p0 = p[-1 * stride];
735cabdff1aSopenharmony_ci    int av_unused q0 = p[ 0 * stride];
736cabdff1aSopenharmony_ci    int av_unused q1 = p[ 1 * stride];
737cabdff1aSopenharmony_ci
738cabdff1aSopenharmony_ci    return FFABS(p1 - p0) > thresh || FFABS(q1 - q0) > thresh;
739cabdff1aSopenharmony_ci}
740cabdff1aSopenharmony_ci
741cabdff1aSopenharmony_cistatic av_always_inline void filter_mbedge(uint8_t *p, ptrdiff_t stride)
742cabdff1aSopenharmony_ci{
743cabdff1aSopenharmony_ci    int a0, a1, a2, w;
744cabdff1aSopenharmony_ci    const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;
745cabdff1aSopenharmony_ci
746cabdff1aSopenharmony_ci    int av_unused p2 = p[-3 * stride];
747cabdff1aSopenharmony_ci    int av_unused p1 = p[-2 * stride];
748cabdff1aSopenharmony_ci    int av_unused p0 = p[-1 * stride];
749cabdff1aSopenharmony_ci    int av_unused q0 = p[ 0 * stride];
750cabdff1aSopenharmony_ci    int av_unused q1 = p[ 1 * stride];
751cabdff1aSopenharmony_ci    int av_unused q2 = p[ 2 * stride];
752cabdff1aSopenharmony_ci
753cabdff1aSopenharmony_ci    w = clip_int8(p1 - q1);
754cabdff1aSopenharmony_ci    w = clip_int8(w + 3 * (q0 - p0));
755cabdff1aSopenharmony_ci
756cabdff1aSopenharmony_ci    a0 = (27 * w + 63) >> 7;
757cabdff1aSopenharmony_ci    a1 = (18 * w + 63) >> 7;
758cabdff1aSopenharmony_ci    a2 =  (9 * w + 63) >> 7;
759cabdff1aSopenharmony_ci
760cabdff1aSopenharmony_ci    p[-3 * stride] = cm[p2 + a2];
761cabdff1aSopenharmony_ci    p[-2 * stride] = cm[p1 + a1];
762cabdff1aSopenharmony_ci    p[-1 * stride] = cm[p0 + a0];
763cabdff1aSopenharmony_ci    p[ 0 * stride] = cm[q0 - a0];
764cabdff1aSopenharmony_ci    p[ 1 * stride] = cm[q1 - a1];
765cabdff1aSopenharmony_ci    p[ 2 * stride] = cm[q2 - a2];
766cabdff1aSopenharmony_ci}
767cabdff1aSopenharmony_ci
768cabdff1aSopenharmony_cistatic av_always_inline int vp8_normal_limit(uint8_t *p, ptrdiff_t stride,
769cabdff1aSopenharmony_ci        int E, int I)
770cabdff1aSopenharmony_ci{
771cabdff1aSopenharmony_ci    int av_unused p3 = p[-4 * stride];
772cabdff1aSopenharmony_ci    int av_unused p2 = p[-3 * stride];
773cabdff1aSopenharmony_ci    int av_unused p1 = p[-2 * stride];
774cabdff1aSopenharmony_ci    int av_unused p0 = p[-1 * stride];
775cabdff1aSopenharmony_ci    int av_unused q0 = p[ 0 * stride];
776cabdff1aSopenharmony_ci    int av_unused q1 = p[ 1 * stride];
777cabdff1aSopenharmony_ci    int av_unused q2 = p[ 2 * stride];
778cabdff1aSopenharmony_ci    int av_unused q3 = p[ 3 * stride];
779cabdff1aSopenharmony_ci
780cabdff1aSopenharmony_ci    return vp8_simple_limit(p, stride, E) &&
781cabdff1aSopenharmony_ci           FFABS(p3 - p2) <= I && FFABS(p2 - p1) <= I &&
782cabdff1aSopenharmony_ci           FFABS(p1 - p0) <= I && FFABS(q3 - q2) <= I &&
783cabdff1aSopenharmony_ci           FFABS(q2 - q1) <= I && FFABS(q1 - q0) <= I;
784cabdff1aSopenharmony_ci}
785cabdff1aSopenharmony_ci
786cabdff1aSopenharmony_cistatic av_always_inline void vp8_v_loop_filter8_mmi(uint8_t *dst,
787cabdff1aSopenharmony_ci        ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
788cabdff1aSopenharmony_ci{
789cabdff1aSopenharmony_ci    double ftmp[18];
790cabdff1aSopenharmony_ci    uint32_t tmp[1];
791cabdff1aSopenharmony_ci    DECLARE_DOUBLE_1;
792cabdff1aSopenharmony_ci    DECLARE_DOUBLE_2;
793cabdff1aSopenharmony_ci    DECLARE_UINT32_T;
794cabdff1aSopenharmony_ci    DECLARE_VAR_ALL64;
795cabdff1aSopenharmony_ci
796cabdff1aSopenharmony_ci    __asm__ volatile(
797cabdff1aSopenharmony_ci        /* Get data from dst */
798cabdff1aSopenharmony_ci        MMI_ULDC1(%[q0], %[dst], 0x0)
799cabdff1aSopenharmony_ci        PTR_SUBU    "%[tmp0],   %[dst],         %[stride]         \n\t"
800cabdff1aSopenharmony_ci        MMI_ULDC1(%[p0], %[tmp0], 0x0)
801cabdff1aSopenharmony_ci        PTR_SUBU    "%[tmp0],   %[tmp0],        %[stride]         \n\t"
802cabdff1aSopenharmony_ci        MMI_ULDC1(%[p1], %[tmp0], 0x0)
803cabdff1aSopenharmony_ci        PTR_SUBU    "%[tmp0],   %[tmp0],        %[stride]         \n\t"
804cabdff1aSopenharmony_ci        MMI_ULDC1(%[p2], %[tmp0], 0x0)
805cabdff1aSopenharmony_ci        PTR_SUBU    "%[tmp0],   %[tmp0],        %[stride]         \n\t"
806cabdff1aSopenharmony_ci        MMI_ULDC1(%[p3], %[tmp0], 0x0)
807cabdff1aSopenharmony_ci        PTR_ADDU    "%[tmp0],   %[dst],         %[stride]         \n\t"
808cabdff1aSopenharmony_ci        MMI_ULDC1(%[q1], %[tmp0], 0x0)
809cabdff1aSopenharmony_ci        PTR_ADDU    "%[tmp0],   %[tmp0],        %[stride]         \n\t"
810cabdff1aSopenharmony_ci        MMI_ULDC1(%[q2], %[tmp0], 0x0)
811cabdff1aSopenharmony_ci        PTR_ADDU    "%[tmp0],   %[tmp0],        %[stride]         \n\t"
812cabdff1aSopenharmony_ci        MMI_ULDC1(%[q3], %[tmp0], 0x0)
813cabdff1aSopenharmony_ci        MMI_VP8_LOOP_FILTER
814cabdff1aSopenharmony_ci        /* Move to dst */
815cabdff1aSopenharmony_ci        MMI_USDC1(%[q0], %[dst], 0x0)
816cabdff1aSopenharmony_ci        PTR_SUBU    "%[tmp0],   %[dst],         %[stride]         \n\t"
817cabdff1aSopenharmony_ci        MMI_USDC1(%[p0], %[tmp0], 0x0)
818cabdff1aSopenharmony_ci        PTR_SUBU    "%[tmp0],   %[tmp0],        %[stride]         \n\t"
819cabdff1aSopenharmony_ci        MMI_USDC1(%[p1], %[tmp0], 0x0)
820cabdff1aSopenharmony_ci        PTR_SUBU    "%[tmp0],   %[tmp0],        %[stride]         \n\t"
821cabdff1aSopenharmony_ci        MMI_USDC1(%[p2], %[tmp0], 0x0)
822cabdff1aSopenharmony_ci        PTR_ADDU    "%[tmp0],   %[dst],         %[stride]         \n\t"
823cabdff1aSopenharmony_ci        MMI_USDC1(%[q1], %[tmp0], 0x0)
824cabdff1aSopenharmony_ci        PTR_ADDU    "%[tmp0],   %[tmp0],        %[stride]         \n\t"
825cabdff1aSopenharmony_ci        MMI_USDC1(%[q2], %[tmp0], 0x0)
826cabdff1aSopenharmony_ci        : RESTRICT_ASM_ALL64
827cabdff1aSopenharmony_ci          [p3]"=&f"(ftmp[0]),       [p2]"=&f"(ftmp[1]),
828cabdff1aSopenharmony_ci          [p1]"=&f"(ftmp[2]),       [p0]"=&f"(ftmp[3]),
829cabdff1aSopenharmony_ci          [q0]"=&f"(ftmp[4]),       [q1]"=&f"(ftmp[5]),
830cabdff1aSopenharmony_ci          [q2]"=&f"(ftmp[6]),       [q3]"=&f"(ftmp[7]),
831cabdff1aSopenharmony_ci          [ftmp0]"=&f"(ftmp[8]),    [ftmp1]"=&f"(ftmp[9]),
832cabdff1aSopenharmony_ci          [ftmp2]"=&f"(ftmp[10]),   [ftmp3]"=&f"(ftmp[11]),
833cabdff1aSopenharmony_ci          [hev]"=&f"(ftmp[12]),     [mask]"=&f"(ftmp[13]),
834cabdff1aSopenharmony_ci          [ftmp4]"=&f"(ftmp[14]),   [ftmp5]"=&f"(ftmp[15]),
835cabdff1aSopenharmony_ci          [ftmp6]"=&f"(ftmp[16]),   [ftmp7]"=&f"(ftmp[17]),
836cabdff1aSopenharmony_ci          [dst]"+&r"(dst),          [tmp0]"=&r"(tmp[0]),
837cabdff1aSopenharmony_ci          RESTRICT_ASM_DOUBLE_1,    RESTRICT_ASM_DOUBLE_2,
838cabdff1aSopenharmony_ci          RESTRICT_ASM_UINT32_T
839cabdff1aSopenharmony_ci        : [e]"r"((mips_reg)flim_E), [thresh]"r"((mips_reg)hev_thresh),
840cabdff1aSopenharmony_ci          [i]"r"((mips_reg)flim_I), [stride]"r"((mips_reg)stride)
841cabdff1aSopenharmony_ci        : "memory"
842cabdff1aSopenharmony_ci    );
843cabdff1aSopenharmony_ci}
844cabdff1aSopenharmony_ci
845cabdff1aSopenharmony_cistatic av_always_inline void vp8_v_loop_filter8_inner_mmi(uint8_t *dst,
846cabdff1aSopenharmony_ci        ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
847cabdff1aSopenharmony_ci{
848cabdff1aSopenharmony_ci    int i;
849cabdff1aSopenharmony_ci
850cabdff1aSopenharmony_ci    for (i = 0; i < 8; i++)
851cabdff1aSopenharmony_ci        if (vp8_normal_limit(dst + i * 1, stride, flim_E, flim_I)) {
852cabdff1aSopenharmony_ci            int hv = hev(dst + i * 1, stride, hev_thresh);
853cabdff1aSopenharmony_ci            if (hv)
854cabdff1aSopenharmony_ci                vp8_filter_common_is4tap(dst + i * 1, stride);
855cabdff1aSopenharmony_ci            else
856cabdff1aSopenharmony_ci                vp8_filter_common_isnot4tap(dst + i * 1, stride);
857cabdff1aSopenharmony_ci        }
858cabdff1aSopenharmony_ci}
859cabdff1aSopenharmony_ci
860cabdff1aSopenharmony_cistatic av_always_inline void vp8_h_loop_filter8_mmi(uint8_t *dst,
861cabdff1aSopenharmony_ci        ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
862cabdff1aSopenharmony_ci{
863cabdff1aSopenharmony_ci    double ftmp[18];
864cabdff1aSopenharmony_ci    uint32_t tmp[1];
865cabdff1aSopenharmony_ci    DECLARE_DOUBLE_1;
866cabdff1aSopenharmony_ci    DECLARE_DOUBLE_2;
867cabdff1aSopenharmony_ci    DECLARE_UINT32_T;
868cabdff1aSopenharmony_ci    DECLARE_VAR_ALL64;
869cabdff1aSopenharmony_ci
870cabdff1aSopenharmony_ci    __asm__ volatile(
871cabdff1aSopenharmony_ci        /* Get data from dst */
872cabdff1aSopenharmony_ci        MMI_ULDC1(%[p3], %[dst], -0x04)
873cabdff1aSopenharmony_ci        PTR_ADDU    "%[tmp0],     %[dst],           %[stride]     \n\t"
874cabdff1aSopenharmony_ci        MMI_ULDC1(%[p2], %[tmp0], -0x04)
875cabdff1aSopenharmony_ci        PTR_ADDU    "%[tmp0],     %[tmp0],          %[stride]     \n\t"
876cabdff1aSopenharmony_ci        MMI_ULDC1(%[p1], %[tmp0], -0x04)
877cabdff1aSopenharmony_ci        PTR_ADDU    "%[tmp0],     %[tmp0],          %[stride]     \n\t"
878cabdff1aSopenharmony_ci        MMI_ULDC1(%[p0], %[tmp0], -0x04)
879cabdff1aSopenharmony_ci        PTR_ADDU    "%[tmp0],     %[tmp0],          %[stride]     \n\t"
880cabdff1aSopenharmony_ci        MMI_ULDC1(%[q0], %[tmp0], -0x04)
881cabdff1aSopenharmony_ci        PTR_ADDU    "%[tmp0],     %[tmp0],          %[stride]     \n\t"
882cabdff1aSopenharmony_ci        MMI_ULDC1(%[q1], %[tmp0], -0x04)
883cabdff1aSopenharmony_ci        PTR_ADDU    "%[tmp0],     %[tmp0],          %[stride]     \n\t"
884cabdff1aSopenharmony_ci        MMI_ULDC1(%[q2], %[tmp0], -0x04)
885cabdff1aSopenharmony_ci        PTR_ADDU    "%[tmp0],     %[tmp0],          %[stride]     \n\t"
886cabdff1aSopenharmony_ci        MMI_ULDC1(%[q3], %[tmp0], -0x04)
887cabdff1aSopenharmony_ci        /* Matrix transpose */
888cabdff1aSopenharmony_ci        TRANSPOSE_8B(%[p3], %[p2], %[p1], %[p0],
889cabdff1aSopenharmony_ci                     %[q0], %[q1], %[q2], %[q3],
890cabdff1aSopenharmony_ci                     %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4])
891cabdff1aSopenharmony_ci        MMI_VP8_LOOP_FILTER
892cabdff1aSopenharmony_ci        /* Matrix transpose */
893cabdff1aSopenharmony_ci        TRANSPOSE_8B(%[p3], %[p2], %[p1], %[p0],
894cabdff1aSopenharmony_ci                     %[q0], %[q1], %[q2], %[q3],
895cabdff1aSopenharmony_ci                     %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4])
896cabdff1aSopenharmony_ci        /* Move to dst */
897cabdff1aSopenharmony_ci        MMI_USDC1(%[p3], %[dst], -0x04)
898cabdff1aSopenharmony_ci        PTR_ADDU    "%[dst],      %[dst],           %[stride]     \n\t"
899cabdff1aSopenharmony_ci        MMI_USDC1(%[p2], %[dst], -0x04)
900cabdff1aSopenharmony_ci        PTR_ADDU    "%[dst],      %[dst],           %[stride]     \n\t"
901cabdff1aSopenharmony_ci        MMI_USDC1(%[p1], %[dst], -0x04)
902cabdff1aSopenharmony_ci        PTR_ADDU    "%[dst],      %[dst],           %[stride]     \n\t"
903cabdff1aSopenharmony_ci        MMI_USDC1(%[p0], %[dst], -0x04)
904cabdff1aSopenharmony_ci        PTR_ADDU    "%[dst],      %[dst],           %[stride]     \n\t"
905cabdff1aSopenharmony_ci        MMI_USDC1(%[q0], %[dst], -0x04)
906cabdff1aSopenharmony_ci        PTR_ADDU    "%[dst],      %[dst],           %[stride]     \n\t"
907cabdff1aSopenharmony_ci        MMI_USDC1(%[q1], %[dst], -0x04)
908cabdff1aSopenharmony_ci        PTR_ADDU    "%[dst],      %[dst],           %[stride]     \n\t"
909cabdff1aSopenharmony_ci        MMI_USDC1(%[q2], %[dst], -0x04)
910cabdff1aSopenharmony_ci        PTR_ADDU    "%[dst],      %[dst],           %[stride]     \n\t"
911cabdff1aSopenharmony_ci        MMI_USDC1(%[q3], %[dst], -0x04)
912cabdff1aSopenharmony_ci        : RESTRICT_ASM_ALL64
913cabdff1aSopenharmony_ci          [p3]"=&f"(ftmp[0]),       [p2]"=&f"(ftmp[1]),
914cabdff1aSopenharmony_ci          [p1]"=&f"(ftmp[2]),       [p0]"=&f"(ftmp[3]),
915cabdff1aSopenharmony_ci          [q0]"=&f"(ftmp[4]),       [q1]"=&f"(ftmp[5]),
916cabdff1aSopenharmony_ci          [q2]"=&f"(ftmp[6]),       [q3]"=&f"(ftmp[7]),
917cabdff1aSopenharmony_ci          [ftmp0]"=&f"(ftmp[8]),    [ftmp1]"=&f"(ftmp[9]),
918cabdff1aSopenharmony_ci          [ftmp2]"=&f"(ftmp[10]),   [ftmp3]"=&f"(ftmp[11]),
919cabdff1aSopenharmony_ci          [hev]"=&f"(ftmp[12]),     [mask]"=&f"(ftmp[13]),
920cabdff1aSopenharmony_ci          [ftmp4]"=&f"(ftmp[14]),   [ftmp5]"=&f"(ftmp[15]),
921cabdff1aSopenharmony_ci          [ftmp6]"=&f"(ftmp[16]),   [ftmp7]"=&f"(ftmp[17]),
922cabdff1aSopenharmony_ci          [dst]"+&r"(dst),          [tmp0]"=&r"(tmp[0]),
923cabdff1aSopenharmony_ci          RESTRICT_ASM_DOUBLE_1,    RESTRICT_ASM_DOUBLE_2,
924cabdff1aSopenharmony_ci          RESTRICT_ASM_UINT32_T
925cabdff1aSopenharmony_ci        : [e]"r"((mips_reg)flim_E), [thresh]"r"((mips_reg)hev_thresh),
926cabdff1aSopenharmony_ci          [i]"r"((mips_reg)flim_I), [stride]"r"((mips_reg)stride)
927cabdff1aSopenharmony_ci        : "memory"
928cabdff1aSopenharmony_ci    );
929cabdff1aSopenharmony_ci}
930cabdff1aSopenharmony_ci
931cabdff1aSopenharmony_cistatic av_always_inline void vp8_h_loop_filter8_inner_mmi(uint8_t *dst,
932cabdff1aSopenharmony_ci        ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
933cabdff1aSopenharmony_ci{
934cabdff1aSopenharmony_ci    int i;
935cabdff1aSopenharmony_ci
936cabdff1aSopenharmony_ci    for (i = 0; i < 8; i++)
937cabdff1aSopenharmony_ci        if (vp8_normal_limit(dst + i * stride, 1, flim_E, flim_I)) {
938cabdff1aSopenharmony_ci            int hv = hev(dst + i * stride, 1, hev_thresh);
939cabdff1aSopenharmony_ci            if (hv)
940cabdff1aSopenharmony_ci                vp8_filter_common_is4tap(dst + i * stride, 1);
941cabdff1aSopenharmony_ci            else
942cabdff1aSopenharmony_ci                vp8_filter_common_isnot4tap(dst + i * stride, 1);
943cabdff1aSopenharmony_ci        }
944cabdff1aSopenharmony_ci}
945cabdff1aSopenharmony_ci
946cabdff1aSopenharmony_civoid ff_vp8_luma_dc_wht_mmi(int16_t block[4][4][16], int16_t dc[16])
947cabdff1aSopenharmony_ci{
948cabdff1aSopenharmony_ci#if 1
949cabdff1aSopenharmony_ci    double ftmp[8];
950cabdff1aSopenharmony_ci    DECLARE_VAR_ALL64;
951cabdff1aSopenharmony_ci
952cabdff1aSopenharmony_ci    __asm__ volatile (
953cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp0], %[dc], 0x00)
954cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp1], %[dc], 0x08)
955cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp2], %[dc], 0x10)
956cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp3], %[dc], 0x18)
957cabdff1aSopenharmony_ci        "paddsh     %[ftmp4],   %[ftmp0],       %[ftmp3]            \n\t"
958cabdff1aSopenharmony_ci        "psubsh     %[ftmp5],   %[ftmp0],       %[ftmp3]            \n\t"
959cabdff1aSopenharmony_ci        "paddsh     %[ftmp6],   %[ftmp1],       %[ftmp2]            \n\t"
960cabdff1aSopenharmony_ci        "psubsh     %[ftmp7],   %[ftmp1],       %[ftmp2]            \n\t"
961cabdff1aSopenharmony_ci        "paddsh     %[ftmp0],   %[ftmp4],       %[ftmp6]            \n\t"
962cabdff1aSopenharmony_ci        "paddsh     %[ftmp1],   %[ftmp5],       %[ftmp7]            \n\t"
963cabdff1aSopenharmony_ci        "psubsh     %[ftmp2],   %[ftmp4],       %[ftmp6]            \n\t"
964cabdff1aSopenharmony_ci        "psubsh     %[ftmp3],   %[ftmp5],       %[ftmp7]            \n\t"
965cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp0], %[dc], 0x00)
966cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp1], %[dc], 0x08)
967cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp2], %[dc], 0x10)
968cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp3], %[dc], 0x18)
969cabdff1aSopenharmony_ci        : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
970cabdff1aSopenharmony_ci          [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
971cabdff1aSopenharmony_ci          [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
972cabdff1aSopenharmony_ci          [ftmp6]"=&f"(ftmp[6]),
973cabdff1aSopenharmony_ci          RESTRICT_ASM_ALL64
974cabdff1aSopenharmony_ci          [ftmp7]"=&f"(ftmp[7])
975cabdff1aSopenharmony_ci        : [dc]"r"((uint8_t*)dc)
976cabdff1aSopenharmony_ci        : "memory"
977cabdff1aSopenharmony_ci    );
978cabdff1aSopenharmony_ci
979cabdff1aSopenharmony_ci    block[0][0][0] = (dc[0] + dc[3] + 3 + dc[1] + dc[2]) >> 3;
980cabdff1aSopenharmony_ci    block[0][1][0] = (dc[0] - dc[3] + 3 + dc[1] - dc[2]) >> 3;
981cabdff1aSopenharmony_ci    block[0][2][0] = (dc[0] + dc[3] + 3 - dc[1] - dc[2]) >> 3;
982cabdff1aSopenharmony_ci    block[0][3][0] = (dc[0] - dc[3] + 3 - dc[1] + dc[2]) >> 3;
983cabdff1aSopenharmony_ci
984cabdff1aSopenharmony_ci    block[1][0][0] = (dc[4] + dc[7] + 3 + dc[5] + dc[6]) >> 3;
985cabdff1aSopenharmony_ci    block[1][1][0] = (dc[4] - dc[7] + 3 + dc[5] - dc[6]) >> 3;
986cabdff1aSopenharmony_ci    block[1][2][0] = (dc[4] + dc[7] + 3 - dc[5] - dc[6]) >> 3;
987cabdff1aSopenharmony_ci    block[1][3][0] = (dc[4] - dc[7] + 3 - dc[5] + dc[6]) >> 3;
988cabdff1aSopenharmony_ci
989cabdff1aSopenharmony_ci    block[2][0][0] = (dc[8] + dc[11] + 3 + dc[9] + dc[10]) >> 3;
990cabdff1aSopenharmony_ci    block[2][1][0] = (dc[8] - dc[11] + 3 + dc[9] - dc[10]) >> 3;
991cabdff1aSopenharmony_ci    block[2][2][0] = (dc[8] + dc[11] + 3 - dc[9] - dc[10]) >> 3;
992cabdff1aSopenharmony_ci    block[2][3][0] = (dc[8] - dc[11] + 3 - dc[9] + dc[10]) >> 3;
993cabdff1aSopenharmony_ci
994cabdff1aSopenharmony_ci    block[3][0][0] = (dc[12] + dc[15] + 3 + dc[13] + dc[14]) >> 3;
995cabdff1aSopenharmony_ci    block[3][1][0] = (dc[12] - dc[15] + 3 + dc[13] - dc[14]) >> 3;
996cabdff1aSopenharmony_ci    block[3][2][0] = (dc[12] + dc[15] + 3 - dc[13] - dc[14]) >> 3;
997cabdff1aSopenharmony_ci    block[3][3][0] = (dc[12] - dc[15] + 3 - dc[13] + dc[14]) >> 3;
998cabdff1aSopenharmony_ci
999cabdff1aSopenharmony_ci    __asm__ volatile (
1000cabdff1aSopenharmony_ci        "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
1001cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp0], %[dc], 0x00)
1002cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp0], %[dc], 0x08)
1003cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp0], %[dc], 0x10)
1004cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp0], %[dc], 0x18)
1005cabdff1aSopenharmony_ci        : RESTRICT_ASM_ALL64
1006cabdff1aSopenharmony_ci          [ftmp0]"=&f"(ftmp[0])
1007cabdff1aSopenharmony_ci        : [dc]"r"((uint8_t *)dc)
1008cabdff1aSopenharmony_ci        : "memory"
1009cabdff1aSopenharmony_ci    );
1010cabdff1aSopenharmony_ci#else
1011cabdff1aSopenharmony_ci    int t00, t01, t02, t03, t10, t11, t12, t13, t20, t21, t22, t23, t30, t31, t32, t33;
1012cabdff1aSopenharmony_ci
1013cabdff1aSopenharmony_ci    t00 = dc[0] + dc[12];
1014cabdff1aSopenharmony_ci    t10 = dc[1] + dc[13];
1015cabdff1aSopenharmony_ci    t20 = dc[2] + dc[14];
1016cabdff1aSopenharmony_ci    t30 = dc[3] + dc[15];
1017cabdff1aSopenharmony_ci
1018cabdff1aSopenharmony_ci    t03 = dc[0] - dc[12];
1019cabdff1aSopenharmony_ci    t13 = dc[1] - dc[13];
1020cabdff1aSopenharmony_ci    t23 = dc[2] - dc[14];
1021cabdff1aSopenharmony_ci    t33 = dc[3] - dc[15];
1022cabdff1aSopenharmony_ci
1023cabdff1aSopenharmony_ci    t01 = dc[4] + dc[ 8];
1024cabdff1aSopenharmony_ci    t11 = dc[5] + dc[ 9];
1025cabdff1aSopenharmony_ci    t21 = dc[6] + dc[10];
1026cabdff1aSopenharmony_ci    t31 = dc[7] + dc[11];
1027cabdff1aSopenharmony_ci
1028cabdff1aSopenharmony_ci    t02 = dc[4] - dc[ 8];
1029cabdff1aSopenharmony_ci    t12 = dc[5] - dc[ 9];
1030cabdff1aSopenharmony_ci    t22 = dc[6] - dc[10];
1031cabdff1aSopenharmony_ci    t32 = dc[7] - dc[11];
1032cabdff1aSopenharmony_ci
1033cabdff1aSopenharmony_ci    dc[ 0] = t00 + t01;
1034cabdff1aSopenharmony_ci    dc[ 1] = t10 + t11;
1035cabdff1aSopenharmony_ci    dc[ 2] = t20 + t21;
1036cabdff1aSopenharmony_ci    dc[ 3] = t30 + t31;
1037cabdff1aSopenharmony_ci
1038cabdff1aSopenharmony_ci    dc[ 4] = t03 + t02;
1039cabdff1aSopenharmony_ci    dc[ 5] = t13 + t12;
1040cabdff1aSopenharmony_ci    dc[ 6] = t23 + t22;
1041cabdff1aSopenharmony_ci    dc[ 7] = t33 + t32;
1042cabdff1aSopenharmony_ci
1043cabdff1aSopenharmony_ci    dc[ 8] = t00 - t01;
1044cabdff1aSopenharmony_ci    dc[ 9] = t10 - t11;
1045cabdff1aSopenharmony_ci    dc[10] = t20 - t21;
1046cabdff1aSopenharmony_ci    dc[11] = t30 - t31;
1047cabdff1aSopenharmony_ci
1048cabdff1aSopenharmony_ci    dc[12] = t03 - t02;
1049cabdff1aSopenharmony_ci    dc[13] = t13 - t12;
1050cabdff1aSopenharmony_ci    dc[14] = t23 - t22;
1051cabdff1aSopenharmony_ci    dc[15] = t33 - t32;
1052cabdff1aSopenharmony_ci
1053cabdff1aSopenharmony_ci    block[0][0][0] = (dc[0] + dc[3] + 3 + dc[1] + dc[2]) >> 3;
1054cabdff1aSopenharmony_ci    block[0][1][0] = (dc[0] - dc[3] + 3 + dc[1] - dc[2]) >> 3;
1055cabdff1aSopenharmony_ci    block[0][2][0] = (dc[0] + dc[3] + 3 - dc[1] - dc[2]) >> 3;
1056cabdff1aSopenharmony_ci    block[0][3][0] = (dc[0] - dc[3] + 3 - dc[1] + dc[2]) >> 3;
1057cabdff1aSopenharmony_ci
1058cabdff1aSopenharmony_ci    block[1][0][0] = (dc[4] + dc[7] + 3 + dc[5] + dc[6]) >> 3;
1059cabdff1aSopenharmony_ci    block[1][1][0] = (dc[4] - dc[7] + 3 + dc[5] - dc[6]) >> 3;
1060cabdff1aSopenharmony_ci    block[1][2][0] = (dc[4] + dc[7] + 3 - dc[5] - dc[6]) >> 3;
1061cabdff1aSopenharmony_ci    block[1][3][0] = (dc[4] - dc[7] + 3 - dc[5] + dc[6]) >> 3;
1062cabdff1aSopenharmony_ci
1063cabdff1aSopenharmony_ci    block[2][0][0] = (dc[8] + dc[11] + 3 + dc[9] + dc[10]) >> 3;
1064cabdff1aSopenharmony_ci    block[2][1][0] = (dc[8] - dc[11] + 3 + dc[9] - dc[10]) >> 3;
1065cabdff1aSopenharmony_ci    block[2][2][0] = (dc[8] + dc[11] + 3 - dc[9] - dc[10]) >> 3;
1066cabdff1aSopenharmony_ci    block[2][3][0] = (dc[8] - dc[11] + 3 - dc[9] + dc[10]) >> 3;
1067cabdff1aSopenharmony_ci
1068cabdff1aSopenharmony_ci    block[3][0][0] = (dc[12] + dc[15] + 3 + dc[13] + dc[14]) >> 3;
1069cabdff1aSopenharmony_ci    block[3][1][0] = (dc[12] - dc[15] + 3 + dc[13] - dc[14]) >> 3;
1070cabdff1aSopenharmony_ci    block[3][2][0] = (dc[12] + dc[15] + 3 - dc[13] - dc[14]) >> 3;
1071cabdff1aSopenharmony_ci    block[3][3][0] = (dc[12] - dc[15] + 3 - dc[13] + dc[14]) >> 3;
1072cabdff1aSopenharmony_ci
1073cabdff1aSopenharmony_ci    AV_ZERO64(dc + 0);
1074cabdff1aSopenharmony_ci    AV_ZERO64(dc + 4);
1075cabdff1aSopenharmony_ci    AV_ZERO64(dc + 8);
1076cabdff1aSopenharmony_ci    AV_ZERO64(dc + 12);
1077cabdff1aSopenharmony_ci#endif
1078cabdff1aSopenharmony_ci}
1079cabdff1aSopenharmony_ci
1080cabdff1aSopenharmony_civoid ff_vp8_luma_dc_wht_dc_mmi(int16_t block[4][4][16], int16_t dc[16])
1081cabdff1aSopenharmony_ci{
1082cabdff1aSopenharmony_ci    int val = (dc[0] + 3) >> 3;
1083cabdff1aSopenharmony_ci
1084cabdff1aSopenharmony_ci    dc[0] = 0;
1085cabdff1aSopenharmony_ci
1086cabdff1aSopenharmony_ci    block[0][0][0] = val;
1087cabdff1aSopenharmony_ci    block[0][1][0] = val;
1088cabdff1aSopenharmony_ci    block[0][2][0] = val;
1089cabdff1aSopenharmony_ci    block[0][3][0] = val;
1090cabdff1aSopenharmony_ci    block[1][0][0] = val;
1091cabdff1aSopenharmony_ci    block[1][1][0] = val;
1092cabdff1aSopenharmony_ci    block[1][2][0] = val;
1093cabdff1aSopenharmony_ci    block[1][3][0] = val;
1094cabdff1aSopenharmony_ci    block[2][0][0] = val;
1095cabdff1aSopenharmony_ci    block[2][1][0] = val;
1096cabdff1aSopenharmony_ci    block[2][2][0] = val;
1097cabdff1aSopenharmony_ci    block[2][3][0] = val;
1098cabdff1aSopenharmony_ci    block[3][0][0] = val;
1099cabdff1aSopenharmony_ci    block[3][1][0] = val;
1100cabdff1aSopenharmony_ci    block[3][2][0] = val;
1101cabdff1aSopenharmony_ci    block[3][3][0] = val;
1102cabdff1aSopenharmony_ci}
1103cabdff1aSopenharmony_ci
1104cabdff1aSopenharmony_civoid ff_vp8_idct_add_mmi(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
1105cabdff1aSopenharmony_ci{
1106cabdff1aSopenharmony_ci#if 1
1107cabdff1aSopenharmony_ci    double ftmp[12];
1108cabdff1aSopenharmony_ci    uint32_t tmp[1];
1109cabdff1aSopenharmony_ci    union av_intfloat64 ff_ph_4e7b_u;
1110cabdff1aSopenharmony_ci    union av_intfloat64 ff_ph_22a3_u;
1111cabdff1aSopenharmony_ci    DECLARE_VAR_LOW32;
1112cabdff1aSopenharmony_ci    DECLARE_VAR_ALL64;
1113cabdff1aSopenharmony_ci    ff_ph_4e7b_u.i = 0x4e7b4e7b4e7b4e7bULL;
1114cabdff1aSopenharmony_ci    ff_ph_22a3_u.i = 0x22a322a322a322a3ULL;
1115cabdff1aSopenharmony_ci
1116cabdff1aSopenharmony_ci    __asm__ volatile (
1117cabdff1aSopenharmony_ci        "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
1118cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp1], %[block], 0x00)
1119cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp2], %[block], 0x08)
1120cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp3], %[block], 0x10)
1121cabdff1aSopenharmony_ci        MMI_LDC1(%[ftmp4], %[block], 0x18)
1122cabdff1aSopenharmony_ci
1123cabdff1aSopenharmony_ci        "li         %[tmp0],    0x02                                \n\t"
1124cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp11]                           \n\t"
1125cabdff1aSopenharmony_ci
1126cabdff1aSopenharmony_ci        // block[0...3] + block[8...11]
1127cabdff1aSopenharmony_ci        "paddh      %[ftmp5],   %[ftmp1],       %[ftmp3]            \n\t"
1128cabdff1aSopenharmony_ci        // block[0...3] - block[8...11]
1129cabdff1aSopenharmony_ci        "psubh      %[ftmp6],   %[ftmp1],       %[ftmp3]            \n\t"
1130cabdff1aSopenharmony_ci        // MUL_35468(block[12...15])
1131cabdff1aSopenharmony_ci        "psllh      %[ftmp9],   %[ftmp4],       %[ftmp11]           \n\t"
1132cabdff1aSopenharmony_ci        "pmulhh     %[ftmp7],   %[ftmp9],       %[ff_ph_22a3]       \n\t"
1133cabdff1aSopenharmony_ci        // MUL_35468(block[4...7])
1134cabdff1aSopenharmony_ci        "psllh      %[ftmp9],   %[ftmp2],       %[ftmp11]           \n\t"
1135cabdff1aSopenharmony_ci        "pmulhh     %[ftmp8],   %[ftmp9],       %[ff_ph_22a3]       \n\t"
1136cabdff1aSopenharmony_ci        // MUL_20091(block[4...7]
1137cabdff1aSopenharmony_ci        "pmulhh     %[ftmp9],   %[ftmp2],       %[ff_ph_4e7b]       \n\t"
1138cabdff1aSopenharmony_ci        "paddh      %[ftmp9],   %[ftmp9],       %[ftmp2]            \n\t"
1139cabdff1aSopenharmony_ci        // MUL_20091(block[12...15])
1140cabdff1aSopenharmony_ci        "pmulhh     %[ftmp10],  %[ftmp4],       %[ff_ph_4e7b]       \n\t"
1141cabdff1aSopenharmony_ci        "paddh      %[ftmp10],  %[ftmp10],      %[ftmp4]            \n\t"
1142cabdff1aSopenharmony_ci
1143cabdff1aSopenharmony_ci        // tmp[0 4  8 12]
1144cabdff1aSopenharmony_ci        "paddh      %[ftmp1],   %[ftmp5],       %[ftmp7]            \n\t"
1145cabdff1aSopenharmony_ci        "paddh      %[ftmp1],   %[ftmp1],       %[ftmp9]            \n\t"
1146cabdff1aSopenharmony_ci        // tmp[1 5  9 13]
1147cabdff1aSopenharmony_ci        "paddh      %[ftmp2],   %[ftmp6],       %[ftmp8]            \n\t"
1148cabdff1aSopenharmony_ci        "psubh      %[ftmp2],   %[ftmp2],       %[ftmp10]           \n\t"
1149cabdff1aSopenharmony_ci        // tmp[2 6 10 14]
1150cabdff1aSopenharmony_ci        "psubh      %[ftmp3],   %[ftmp6],       %[ftmp8]            \n\t"
1151cabdff1aSopenharmony_ci        "paddh      %[ftmp3],   %[ftmp3],       %[ftmp10]           \n\t"
1152cabdff1aSopenharmony_ci        // tmp[3 7 11 15]
1153cabdff1aSopenharmony_ci        "psubh      %[ftmp4],   %[ftmp5],       %[ftmp7]            \n\t"
1154cabdff1aSopenharmony_ci        "psubh      %[ftmp4],   %[ftmp4],       %[ftmp9]            \n\t"
1155cabdff1aSopenharmony_ci
1156cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp0], %[block], 0x00)
1157cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp0], %[block], 0x08)
1158cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp0], %[block], 0x10)
1159cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp0], %[block], 0x18)
1160cabdff1aSopenharmony_ci
1161cabdff1aSopenharmony_ci        TRANSPOSE_4H(%[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4],
1162cabdff1aSopenharmony_ci                     %[ftmp5], %[ftmp6], %[ftmp7], %[ftmp8])
1163cabdff1aSopenharmony_ci
1164cabdff1aSopenharmony_ci        // t[0 4  8 12]
1165cabdff1aSopenharmony_ci        "paddh      %[ftmp5],   %[ftmp1],       %[ftmp3]            \n\t"
1166cabdff1aSopenharmony_ci        // t[1 5  9 13]
1167cabdff1aSopenharmony_ci        "psubh      %[ftmp6],   %[ftmp1],       %[ftmp3]            \n\t"
1168cabdff1aSopenharmony_ci        // t[2 6 10 14]
1169cabdff1aSopenharmony_ci        "psllh      %[ftmp9],   %[ftmp2],       %[ftmp11]           \n\t"
1170cabdff1aSopenharmony_ci        "pmulhh     %[ftmp9],   %[ftmp9],       %[ff_ph_22a3]       \n\t"
1171cabdff1aSopenharmony_ci        "psubh      %[ftmp7],   %[ftmp9],       %[ftmp4]            \n\t"
1172cabdff1aSopenharmony_ci        "pmulhh     %[ftmp10],  %[ftmp4],       %[ff_ph_4e7b]       \n\t"
1173cabdff1aSopenharmony_ci        "psubh      %[ftmp7],   %[ftmp7],       %[ftmp10]           \n\t"
1174cabdff1aSopenharmony_ci        // t[3 7 11 15]
1175cabdff1aSopenharmony_ci        "psllh      %[ftmp9],   %[ftmp4],       %[ftmp11]           \n\t"
1176cabdff1aSopenharmony_ci        "pmulhh     %[ftmp9],   %[ftmp9],       %[ff_ph_22a3]       \n\t"
1177cabdff1aSopenharmony_ci        "paddh      %[ftmp8],   %[ftmp9],       %[ftmp2]            \n\t"
1178cabdff1aSopenharmony_ci        "pmulhh     %[ftmp10],  %[ftmp2],       %[ff_ph_4e7b]       \n\t"
1179cabdff1aSopenharmony_ci        "paddh      %[ftmp8],   %[ftmp8],       %[ftmp10]           \n\t"
1180cabdff1aSopenharmony_ci
1181cabdff1aSopenharmony_ci        "li         %[tmp0],    0x03                                \n\t"
1182cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp11]                           \n\t"
1183cabdff1aSopenharmony_ci        "paddh      %[ftmp1],   %[ftmp5],       %[ftmp8]            \n\t"
1184cabdff1aSopenharmony_ci        "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_4]          \n\t"
1185cabdff1aSopenharmony_ci        "psrah      %[ftmp1],   %[ftmp1],       %[ftmp11]           \n\t"
1186cabdff1aSopenharmony_ci        "paddh      %[ftmp2],   %[ftmp6],       %[ftmp7]            \n\t"
1187cabdff1aSopenharmony_ci        "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_4]          \n\t"
1188cabdff1aSopenharmony_ci        "psrah      %[ftmp2],   %[ftmp2],       %[ftmp11]           \n\t"
1189cabdff1aSopenharmony_ci        "psubh      %[ftmp3],   %[ftmp6],       %[ftmp7]            \n\t"
1190cabdff1aSopenharmony_ci        "paddh      %[ftmp3],   %[ftmp3],       %[ff_pw_4]          \n\t"
1191cabdff1aSopenharmony_ci        "psrah      %[ftmp3],   %[ftmp3],       %[ftmp11]           \n\t"
1192cabdff1aSopenharmony_ci        "psubh      %[ftmp4],   %[ftmp5],       %[ftmp8]            \n\t"
1193cabdff1aSopenharmony_ci        "paddh      %[ftmp4],   %[ftmp4],       %[ff_pw_4]          \n\t"
1194cabdff1aSopenharmony_ci        "psrah      %[ftmp4],   %[ftmp4],       %[ftmp11]           \n\t"
1195cabdff1aSopenharmony_ci
1196cabdff1aSopenharmony_ci        TRANSPOSE_4H(%[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4],
1197cabdff1aSopenharmony_ci                     %[ftmp5], %[ftmp6], %[ftmp7], %[ftmp8])
1198cabdff1aSopenharmony_ci
1199cabdff1aSopenharmony_ci        MMI_LWC1(%[ftmp5], %[dst0], 0x00)
1200cabdff1aSopenharmony_ci        MMI_LWC1(%[ftmp6], %[dst1], 0x00)
1201cabdff1aSopenharmony_ci        MMI_LWC1(%[ftmp7], %[dst2], 0x00)
1202cabdff1aSopenharmony_ci        MMI_LWC1(%[ftmp8], %[dst3], 0x00)
1203cabdff1aSopenharmony_ci
1204cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp5],   %[ftmp5],       %[ftmp0]            \n\t"
1205cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp0]            \n\t"
1206cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp7],   %[ftmp7],       %[ftmp0]            \n\t"
1207cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp8],   %[ftmp8],       %[ftmp0]            \n\t"
1208cabdff1aSopenharmony_ci
1209cabdff1aSopenharmony_ci        "paddh      %[ftmp1],   %[ftmp1],       %[ftmp5]            \n\t"
1210cabdff1aSopenharmony_ci        "paddh      %[ftmp2],   %[ftmp2],       %[ftmp6]            \n\t"
1211cabdff1aSopenharmony_ci        "paddh      %[ftmp3],   %[ftmp3],       %[ftmp7]            \n\t"
1212cabdff1aSopenharmony_ci        "paddh      %[ftmp4],   %[ftmp4],       %[ftmp8]            \n\t"
1213cabdff1aSopenharmony_ci
1214cabdff1aSopenharmony_ci        "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
1215cabdff1aSopenharmony_ci        "packushb   %[ftmp2],   %[ftmp2],       %[ftmp0]            \n\t"
1216cabdff1aSopenharmony_ci        "packushb   %[ftmp3],   %[ftmp3],       %[ftmp0]            \n\t"
1217cabdff1aSopenharmony_ci        "packushb   %[ftmp4],   %[ftmp4],       %[ftmp0]            \n\t"
1218cabdff1aSopenharmony_ci
1219cabdff1aSopenharmony_ci        MMI_SWC1(%[ftmp1], %[dst0], 0x00)
1220cabdff1aSopenharmony_ci        MMI_SWC1(%[ftmp2], %[dst1], 0x00)
1221cabdff1aSopenharmony_ci        MMI_SWC1(%[ftmp3], %[dst2], 0x00)
1222cabdff1aSopenharmony_ci        MMI_SWC1(%[ftmp4], %[dst3], 0x00)
1223cabdff1aSopenharmony_ci        : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
1224cabdff1aSopenharmony_ci          [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
1225cabdff1aSopenharmony_ci          [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
1226cabdff1aSopenharmony_ci          [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
1227cabdff1aSopenharmony_ci          [ftmp8]"=&f"(ftmp[8]),            [ftmp9]"=&f"(ftmp[9]),
1228cabdff1aSopenharmony_ci          [ftmp10]"=&f"(ftmp[10]),          [ftmp11]"=&f"(ftmp[11]),
1229cabdff1aSopenharmony_ci          RESTRICT_ASM_LOW32
1230cabdff1aSopenharmony_ci          RESTRICT_ASM_ALL64
1231cabdff1aSopenharmony_ci          [tmp0]"=&r"(tmp[0])
1232cabdff1aSopenharmony_ci        : [dst0]"r"(dst),                   [dst1]"r"(dst+stride),
1233cabdff1aSopenharmony_ci          [dst2]"r"(dst+2*stride),          [dst3]"r"(dst+3*stride),
1234cabdff1aSopenharmony_ci          [block]"r"(block),                [ff_pw_4]"f"(ff_pw_4.f),
1235cabdff1aSopenharmony_ci          [ff_ph_4e7b]"f"(ff_ph_4e7b_u.f),  [ff_ph_22a3]"f"(ff_ph_22a3_u.f)
1236cabdff1aSopenharmony_ci        : "memory"
1237cabdff1aSopenharmony_ci    );
1238cabdff1aSopenharmony_ci#else
1239cabdff1aSopenharmony_ci    int i, t0, t1, t2, t3;
1240cabdff1aSopenharmony_ci    int16_t tmp[16];
1241cabdff1aSopenharmony_ci
1242cabdff1aSopenharmony_ci    for (i = 0; i < 4; i++) {
1243cabdff1aSopenharmony_ci        t0 = block[0 + i] + block[8 + i];
1244cabdff1aSopenharmony_ci        t1 = block[0 + i] - block[8 + i];
1245cabdff1aSopenharmony_ci        t2 = MUL_35468(block[4 + i]) - MUL_20091(block[12 + i]);
1246cabdff1aSopenharmony_ci        t3 = MUL_20091(block[4 + i]) + MUL_35468(block[12 + i]);
1247cabdff1aSopenharmony_ci        block[ 0 + i] = 0;
1248cabdff1aSopenharmony_ci        block[ 4 + i] = 0;
1249cabdff1aSopenharmony_ci        block[ 8 + i] = 0;
1250cabdff1aSopenharmony_ci        block[12 + i] = 0;
1251cabdff1aSopenharmony_ci
1252cabdff1aSopenharmony_ci        tmp[i * 4 + 0] = t0 + t3;
1253cabdff1aSopenharmony_ci        tmp[i * 4 + 1] = t1 + t2;
1254cabdff1aSopenharmony_ci        tmp[i * 4 + 2] = t1 - t2;
1255cabdff1aSopenharmony_ci        tmp[i * 4 + 3] = t0 - t3;
1256cabdff1aSopenharmony_ci    }
1257cabdff1aSopenharmony_ci
1258cabdff1aSopenharmony_ci    for (i = 0; i < 4; i++) {
1259cabdff1aSopenharmony_ci        t0 = tmp[0 + i] + tmp[8 + i];
1260cabdff1aSopenharmony_ci        t1 = tmp[0 + i] - tmp[8 + i];
1261cabdff1aSopenharmony_ci        t2 = MUL_35468(tmp[4 + i]) - MUL_20091(tmp[12 + i]);
1262cabdff1aSopenharmony_ci        t3 = MUL_20091(tmp[4 + i]) + MUL_35468(tmp[12 + i]);
1263cabdff1aSopenharmony_ci
1264cabdff1aSopenharmony_ci        dst[0] = av_clip_uint8(dst[0] + ((t0 + t3 + 4) >> 3));
1265cabdff1aSopenharmony_ci        dst[1] = av_clip_uint8(dst[1] + ((t1 + t2 + 4) >> 3));
1266cabdff1aSopenharmony_ci        dst[2] = av_clip_uint8(dst[2] + ((t1 - t2 + 4) >> 3));
1267cabdff1aSopenharmony_ci        dst[3] = av_clip_uint8(dst[3] + ((t0 - t3 + 4) >> 3));
1268cabdff1aSopenharmony_ci        dst   += stride;
1269cabdff1aSopenharmony_ci    }
1270cabdff1aSopenharmony_ci#endif
1271cabdff1aSopenharmony_ci}
1272cabdff1aSopenharmony_ci
1273cabdff1aSopenharmony_civoid ff_vp8_idct_dc_add_mmi(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
1274cabdff1aSopenharmony_ci{
1275cabdff1aSopenharmony_ci#if 1
1276cabdff1aSopenharmony_ci    int dc = (block[0] + 4) >> 3;
1277cabdff1aSopenharmony_ci    double ftmp[6];
1278cabdff1aSopenharmony_ci    DECLARE_VAR_LOW32;
1279cabdff1aSopenharmony_ci
1280cabdff1aSopenharmony_ci    block[0] = 0;
1281cabdff1aSopenharmony_ci
1282cabdff1aSopenharmony_ci    __asm__ volatile (
1283cabdff1aSopenharmony_ci        "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
1284cabdff1aSopenharmony_ci        "mtc1       %[dc],      %[ftmp5]                            \n\t"
1285cabdff1aSopenharmony_ci        MMI_LWC1(%[ftmp1], %[dst0], 0x00)
1286cabdff1aSopenharmony_ci        MMI_LWC1(%[ftmp2], %[dst1], 0x00)
1287cabdff1aSopenharmony_ci        MMI_LWC1(%[ftmp3], %[dst2], 0x00)
1288cabdff1aSopenharmony_ci        MMI_LWC1(%[ftmp4], %[dst3], 0x00)
1289cabdff1aSopenharmony_ci        "pshufh     %[ftmp5],   %[ftmp5],       %[ftmp0]            \n\t"
1290cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
1291cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp0]            \n\t"
1292cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp0]            \n\t"
1293cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp0]            \n\t"
1294cabdff1aSopenharmony_ci        "paddsh     %[ftmp1],   %[ftmp1],       %[ftmp5]            \n\t"
1295cabdff1aSopenharmony_ci        "paddsh     %[ftmp2],   %[ftmp2],       %[ftmp5]            \n\t"
1296cabdff1aSopenharmony_ci        "paddsh     %[ftmp3],   %[ftmp3],       %[ftmp5]            \n\t"
1297cabdff1aSopenharmony_ci        "paddsh     %[ftmp4],   %[ftmp4],       %[ftmp5]            \n\t"
1298cabdff1aSopenharmony_ci        "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
1299cabdff1aSopenharmony_ci        "packushb   %[ftmp2],   %[ftmp2],       %[ftmp0]            \n\t"
1300cabdff1aSopenharmony_ci        "packushb   %[ftmp3],   %[ftmp3],       %[ftmp0]            \n\t"
1301cabdff1aSopenharmony_ci        "packushb   %[ftmp4],   %[ftmp4],       %[ftmp0]            \n\t"
1302cabdff1aSopenharmony_ci        MMI_SWC1(%[ftmp1], %[dst0], 0x00)
1303cabdff1aSopenharmony_ci        MMI_SWC1(%[ftmp2], %[dst1], 0x00)
1304cabdff1aSopenharmony_ci        MMI_SWC1(%[ftmp3], %[dst2], 0x00)
1305cabdff1aSopenharmony_ci        MMI_SWC1(%[ftmp4], %[dst3], 0x00)
1306cabdff1aSopenharmony_ci        : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
1307cabdff1aSopenharmony_ci          [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
1308cabdff1aSopenharmony_ci          [ftmp4]"=&f"(ftmp[4]),
1309cabdff1aSopenharmony_ci          RESTRICT_ASM_LOW32
1310cabdff1aSopenharmony_ci          [ftmp5]"=&f"(ftmp[5])
1311cabdff1aSopenharmony_ci        : [dst0]"r"(dst),                   [dst1]"r"(dst+stride),
1312cabdff1aSopenharmony_ci          [dst2]"r"(dst+2*stride),          [dst3]"r"(dst+3*stride),
1313cabdff1aSopenharmony_ci          [dc]"r"(dc)
1314cabdff1aSopenharmony_ci        : "memory"
1315cabdff1aSopenharmony_ci    );
1316cabdff1aSopenharmony_ci#else
1317cabdff1aSopenharmony_ci    int i, dc = (block[0] + 4) >> 3;
1318cabdff1aSopenharmony_ci
1319cabdff1aSopenharmony_ci    block[0] = 0;
1320cabdff1aSopenharmony_ci
1321cabdff1aSopenharmony_ci    for (i = 0; i < 4; i++) {
1322cabdff1aSopenharmony_ci        dst[0] = av_clip_uint8(dst[0] + dc);
1323cabdff1aSopenharmony_ci        dst[1] = av_clip_uint8(dst[1] + dc);
1324cabdff1aSopenharmony_ci        dst[2] = av_clip_uint8(dst[2] + dc);
1325cabdff1aSopenharmony_ci        dst[3] = av_clip_uint8(dst[3] + dc);
1326cabdff1aSopenharmony_ci        dst   += stride;
1327cabdff1aSopenharmony_ci    }
1328cabdff1aSopenharmony_ci#endif
1329cabdff1aSopenharmony_ci}
1330cabdff1aSopenharmony_ci
1331cabdff1aSopenharmony_civoid ff_vp8_idct_dc_add4y_mmi(uint8_t *dst, int16_t block[4][16],
1332cabdff1aSopenharmony_ci        ptrdiff_t stride)
1333cabdff1aSopenharmony_ci{
1334cabdff1aSopenharmony_ci    ff_vp8_idct_dc_add_mmi(dst +  0, block[0], stride);
1335cabdff1aSopenharmony_ci    ff_vp8_idct_dc_add_mmi(dst +  4, block[1], stride);
1336cabdff1aSopenharmony_ci    ff_vp8_idct_dc_add_mmi(dst +  8, block[2], stride);
1337cabdff1aSopenharmony_ci    ff_vp8_idct_dc_add_mmi(dst + 12, block[3], stride);
1338cabdff1aSopenharmony_ci}
1339cabdff1aSopenharmony_ci
1340cabdff1aSopenharmony_civoid ff_vp8_idct_dc_add4uv_mmi(uint8_t *dst, int16_t block[4][16],
1341cabdff1aSopenharmony_ci        ptrdiff_t stride)
1342cabdff1aSopenharmony_ci{
1343cabdff1aSopenharmony_ci    ff_vp8_idct_dc_add_mmi(dst + stride * 0 + 0, block[0], stride);
1344cabdff1aSopenharmony_ci    ff_vp8_idct_dc_add_mmi(dst + stride * 0 + 4, block[1], stride);
1345cabdff1aSopenharmony_ci    ff_vp8_idct_dc_add_mmi(dst + stride * 4 + 0, block[2], stride);
1346cabdff1aSopenharmony_ci    ff_vp8_idct_dc_add_mmi(dst + stride * 4 + 4, block[3], stride);
1347cabdff1aSopenharmony_ci}
1348cabdff1aSopenharmony_ci
1349cabdff1aSopenharmony_ci// loop filter applied to edges between macroblocks
1350cabdff1aSopenharmony_civoid ff_vp8_v_loop_filter16_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E,
1351cabdff1aSopenharmony_ci        int flim_I, int hev_thresh)
1352cabdff1aSopenharmony_ci{
1353cabdff1aSopenharmony_ci    vp8_v_loop_filter8_mmi(dst, stride, flim_E, flim_I, hev_thresh);
1354cabdff1aSopenharmony_ci    vp8_v_loop_filter8_mmi(dst + 8, stride, flim_E, flim_I, hev_thresh);
1355cabdff1aSopenharmony_ci}
1356cabdff1aSopenharmony_ci
1357cabdff1aSopenharmony_civoid ff_vp8_h_loop_filter16_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E,
1358cabdff1aSopenharmony_ci        int flim_I, int hev_thresh)
1359cabdff1aSopenharmony_ci{
1360cabdff1aSopenharmony_ci    vp8_h_loop_filter8_mmi(dst, stride, flim_E, flim_I, hev_thresh);
1361cabdff1aSopenharmony_ci    vp8_h_loop_filter8_mmi(dst + 8 * stride, stride, flim_E, flim_I,
1362cabdff1aSopenharmony_ci                           hev_thresh);
1363cabdff1aSopenharmony_ci}
1364cabdff1aSopenharmony_ci
1365cabdff1aSopenharmony_civoid ff_vp8_v_loop_filter8uv_mmi(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride,
1366cabdff1aSopenharmony_ci        int flim_E, int flim_I, int hev_thresh)
1367cabdff1aSopenharmony_ci{
1368cabdff1aSopenharmony_ci    vp8_v_loop_filter8_mmi(dstU, stride, flim_E, flim_I, hev_thresh);
1369cabdff1aSopenharmony_ci    vp8_v_loop_filter8_mmi(dstV, stride, flim_E, flim_I, hev_thresh);
1370cabdff1aSopenharmony_ci}
1371cabdff1aSopenharmony_ci
1372cabdff1aSopenharmony_civoid ff_vp8_h_loop_filter8uv_mmi(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride,
1373cabdff1aSopenharmony_ci        int flim_E, int flim_I, int hev_thresh)
1374cabdff1aSopenharmony_ci{
1375cabdff1aSopenharmony_ci    vp8_h_loop_filter8_mmi(dstU, stride, flim_E, flim_I, hev_thresh);
1376cabdff1aSopenharmony_ci    vp8_h_loop_filter8_mmi(dstV, stride, flim_E, flim_I, hev_thresh);
1377cabdff1aSopenharmony_ci}
1378cabdff1aSopenharmony_ci
1379cabdff1aSopenharmony_ci// loop filter applied to inner macroblock edges
1380cabdff1aSopenharmony_civoid ff_vp8_v_loop_filter16_inner_mmi(uint8_t *dst, ptrdiff_t stride,
1381cabdff1aSopenharmony_ci        int flim_E, int flim_I, int hev_thresh)
1382cabdff1aSopenharmony_ci{
1383cabdff1aSopenharmony_ci    int i;
1384cabdff1aSopenharmony_ci
1385cabdff1aSopenharmony_ci    for (i = 0; i < 16; i++)
1386cabdff1aSopenharmony_ci        if (vp8_normal_limit(dst + i * 1, stride, flim_E, flim_I)) {
1387cabdff1aSopenharmony_ci            int hv = hev(dst + i * 1, stride, hev_thresh);
1388cabdff1aSopenharmony_ci            if (hv)
1389cabdff1aSopenharmony_ci                vp8_filter_common_is4tap(dst + i * 1, stride);
1390cabdff1aSopenharmony_ci            else
1391cabdff1aSopenharmony_ci                vp8_filter_common_isnot4tap(dst + i * 1, stride);
1392cabdff1aSopenharmony_ci        }
1393cabdff1aSopenharmony_ci}
1394cabdff1aSopenharmony_ci
1395cabdff1aSopenharmony_civoid ff_vp8_h_loop_filter16_inner_mmi(uint8_t *dst, ptrdiff_t stride,
1396cabdff1aSopenharmony_ci        int flim_E, int flim_I, int hev_thresh)
1397cabdff1aSopenharmony_ci{
1398cabdff1aSopenharmony_ci    int i;
1399cabdff1aSopenharmony_ci
1400cabdff1aSopenharmony_ci    for (i = 0; i < 16; i++)
1401cabdff1aSopenharmony_ci        if (vp8_normal_limit(dst + i * stride, 1, flim_E, flim_I)) {
1402cabdff1aSopenharmony_ci            int hv = hev(dst + i * stride, 1, hev_thresh);
1403cabdff1aSopenharmony_ci            if (hv)
1404cabdff1aSopenharmony_ci                vp8_filter_common_is4tap(dst + i * stride, 1);
1405cabdff1aSopenharmony_ci            else
1406cabdff1aSopenharmony_ci                vp8_filter_common_isnot4tap(dst + i * stride, 1);
1407cabdff1aSopenharmony_ci        }
1408cabdff1aSopenharmony_ci}
1409cabdff1aSopenharmony_ci
1410cabdff1aSopenharmony_civoid ff_vp8_v_loop_filter8uv_inner_mmi(uint8_t *dstU, uint8_t *dstV,
1411cabdff1aSopenharmony_ci        ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
1412cabdff1aSopenharmony_ci{
1413cabdff1aSopenharmony_ci    vp8_v_loop_filter8_inner_mmi(dstU, stride, flim_E, flim_I, hev_thresh);
1414cabdff1aSopenharmony_ci    vp8_v_loop_filter8_inner_mmi(dstV, stride, flim_E, flim_I, hev_thresh);
1415cabdff1aSopenharmony_ci}
1416cabdff1aSopenharmony_ci
1417cabdff1aSopenharmony_civoid ff_vp8_h_loop_filter8uv_inner_mmi(uint8_t *dstU, uint8_t *dstV,
1418cabdff1aSopenharmony_ci        ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
1419cabdff1aSopenharmony_ci{
1420cabdff1aSopenharmony_ci    vp8_h_loop_filter8_inner_mmi(dstU, stride, flim_E, flim_I, hev_thresh);
1421cabdff1aSopenharmony_ci    vp8_h_loop_filter8_inner_mmi(dstV, stride, flim_E, flim_I, hev_thresh);
1422cabdff1aSopenharmony_ci}
1423cabdff1aSopenharmony_ci
1424cabdff1aSopenharmony_civoid ff_vp8_v_loop_filter_simple_mmi(uint8_t *dst, ptrdiff_t stride, int flim)
1425cabdff1aSopenharmony_ci{
1426cabdff1aSopenharmony_ci    int i;
1427cabdff1aSopenharmony_ci
1428cabdff1aSopenharmony_ci    for (i = 0; i < 16; i++)
1429cabdff1aSopenharmony_ci        if (vp8_simple_limit(dst + i, stride, flim))
1430cabdff1aSopenharmony_ci            vp8_filter_common_is4tap(dst + i, stride);
1431cabdff1aSopenharmony_ci}
1432cabdff1aSopenharmony_ci
1433cabdff1aSopenharmony_civoid ff_vp8_h_loop_filter_simple_mmi(uint8_t *dst, ptrdiff_t stride, int flim)
1434cabdff1aSopenharmony_ci{
1435cabdff1aSopenharmony_ci    int i;
1436cabdff1aSopenharmony_ci
1437cabdff1aSopenharmony_ci    for (i = 0; i < 16; i++)
1438cabdff1aSopenharmony_ci        if (vp8_simple_limit(dst + i * stride, 1, flim))
1439cabdff1aSopenharmony_ci            vp8_filter_common_is4tap(dst + i * stride, 1);
1440cabdff1aSopenharmony_ci}
1441cabdff1aSopenharmony_ci
1442cabdff1aSopenharmony_civoid ff_put_vp8_pixels16_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
1443cabdff1aSopenharmony_ci        ptrdiff_t srcstride, int h, int x, int y)
1444cabdff1aSopenharmony_ci{
1445cabdff1aSopenharmony_ci#if 1
1446cabdff1aSopenharmony_ci    double ftmp[2];
1447cabdff1aSopenharmony_ci    uint64_t tmp[2];
1448cabdff1aSopenharmony_ci    mips_reg addr[2];
1449cabdff1aSopenharmony_ci    DECLARE_VAR_ALL64;
1450cabdff1aSopenharmony_ci
1451cabdff1aSopenharmony_ci    __asm__ volatile (
1452cabdff1aSopenharmony_ci        "1:                                                         \n\t"
1453cabdff1aSopenharmony_ci        PTR_ADDU   "%[addr0],   %[src],         %[srcstride]        \n\t"
1454cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp0], %[src], 0x00)
1455cabdff1aSopenharmony_ci        "ldl        %[tmp0],    0x0f(%[src])                        \n\t"
1456cabdff1aSopenharmony_ci        "ldr        %[tmp0],    0x08(%[src])                        \n\t"
1457cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
1458cabdff1aSopenharmony_ci        "ldl        %[tmp1],    0x0f(%[addr0])                      \n\t"
1459cabdff1aSopenharmony_ci        "ldr        %[tmp1],    0x08(%[addr0])                      \n\t"
1460cabdff1aSopenharmony_ci        PTR_ADDU   "%[addr1],   %[dst],         %[dststride]        \n\t"
1461cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp0], %[dst], 0x00)
1462cabdff1aSopenharmony_ci        "sdl        %[tmp0],    0x0f(%[dst])                        \n\t"
1463cabdff1aSopenharmony_ci        "sdr        %[tmp0],    0x08(%[dst])                        \n\t"
1464cabdff1aSopenharmony_ci        "addiu      %[h],       %[h],           -0x02               \n\t"
1465cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp1], %[addr1], 0x00)
1466cabdff1aSopenharmony_ci        PTR_ADDU   "%[src],     %[addr0],       %[srcstride]        \n\t"
1467cabdff1aSopenharmony_ci        "sdl        %[tmp1],    0x0f(%[addr1])                      \n\t"
1468cabdff1aSopenharmony_ci        "sdr        %[tmp1],    0x08(%[addr1])                      \n\t"
1469cabdff1aSopenharmony_ci        PTR_ADDU   "%[dst],     %[addr1],       %[dststride]        \n\t"
1470cabdff1aSopenharmony_ci        "bnez       %[h],       1b                                  \n\t"
1471cabdff1aSopenharmony_ci        : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
1472cabdff1aSopenharmony_ci          [tmp0]"=&r"(tmp[0]),              [tmp1]"=&r"(tmp[1]),
1473cabdff1aSopenharmony_ci          RESTRICT_ASM_ALL64
1474cabdff1aSopenharmony_ci          [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1]),
1475cabdff1aSopenharmony_ci          [dst]"+&r"(dst),                  [src]"+&r"(src),
1476cabdff1aSopenharmony_ci          [h]"+&r"(h)
1477cabdff1aSopenharmony_ci        : [dststride]"r"((mips_reg)dststride),
1478cabdff1aSopenharmony_ci          [srcstride]"r"((mips_reg)srcstride)
1479cabdff1aSopenharmony_ci        : "memory"
1480cabdff1aSopenharmony_ci    );
1481cabdff1aSopenharmony_ci#else
1482cabdff1aSopenharmony_ci    int i;
1483cabdff1aSopenharmony_ci
1484cabdff1aSopenharmony_ci    for (i = 0; i < h; i++, dst += dststride, src += srcstride)
1485cabdff1aSopenharmony_ci        memcpy(dst, src, 16);
1486cabdff1aSopenharmony_ci#endif
1487cabdff1aSopenharmony_ci}
1488cabdff1aSopenharmony_ci
1489cabdff1aSopenharmony_civoid ff_put_vp8_pixels8_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
1490cabdff1aSopenharmony_ci        ptrdiff_t srcstride, int h, int x, int y)
1491cabdff1aSopenharmony_ci{
1492cabdff1aSopenharmony_ci#if 1
1493cabdff1aSopenharmony_ci    double ftmp[1];
1494cabdff1aSopenharmony_ci    uint64_t tmp[1];
1495cabdff1aSopenharmony_ci    mips_reg addr[2];
1496cabdff1aSopenharmony_ci    DECLARE_VAR_ALL64;
1497cabdff1aSopenharmony_ci
1498cabdff1aSopenharmony_ci    __asm__ volatile (
1499cabdff1aSopenharmony_ci        "1:                                                         \n\t"
1500cabdff1aSopenharmony_ci        PTR_ADDU   "%[addr0],   %[src],         %[srcstride]        \n\t"
1501cabdff1aSopenharmony_ci        MMI_ULDC1(%[ftmp0], %[src], 0x00)
1502cabdff1aSopenharmony_ci        "ldl        %[tmp0],    0x07(%[addr0])                      \n\t"
1503cabdff1aSopenharmony_ci        "ldr        %[tmp0],    0x00(%[addr0])                      \n\t"
1504cabdff1aSopenharmony_ci        PTR_ADDU   "%[addr1],   %[dst],         %[dststride]        \n\t"
1505cabdff1aSopenharmony_ci        MMI_SDC1(%[ftmp0], %[dst], 0x00)
1506cabdff1aSopenharmony_ci        "addiu      %[h],       %[h],           -0x02               \n\t"
1507cabdff1aSopenharmony_ci        "sdl        %[tmp0],    0x07(%[addr1])                      \n\t"
1508cabdff1aSopenharmony_ci        "sdr        %[tmp0],    0x00(%[addr1])                      \n\t"
1509cabdff1aSopenharmony_ci        PTR_ADDU   "%[src],     %[addr0],       %[srcstride]        \n\t"
1510cabdff1aSopenharmony_ci        PTR_ADDU   "%[dst],     %[addr1],       %[dststride]        \n\t"
1511cabdff1aSopenharmony_ci        "bnez       %[h],       1b                                  \n\t"
1512cabdff1aSopenharmony_ci        : [ftmp0]"=&f"(ftmp[0]),            [tmp0]"=&r"(tmp[0]),
1513cabdff1aSopenharmony_ci          RESTRICT_ASM_ALL64
1514cabdff1aSopenharmony_ci          [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1]),
1515cabdff1aSopenharmony_ci          [dst]"+&r"(dst),                  [src]"+&r"(src),
1516cabdff1aSopenharmony_ci          [h]"+&r"(h)
1517cabdff1aSopenharmony_ci        : [dststride]"r"((mips_reg)dststride),
1518cabdff1aSopenharmony_ci          [srcstride]"r"((mips_reg)srcstride)
1519cabdff1aSopenharmony_ci        : "memory"
1520cabdff1aSopenharmony_ci    );
1521cabdff1aSopenharmony_ci#else
1522cabdff1aSopenharmony_ci    int i;
1523cabdff1aSopenharmony_ci
1524cabdff1aSopenharmony_ci    for (i = 0; i < h; i++, dst += dststride, src += srcstride)
1525cabdff1aSopenharmony_ci        memcpy(dst, src, 8);
1526cabdff1aSopenharmony_ci#endif
1527cabdff1aSopenharmony_ci}
1528cabdff1aSopenharmony_ci
1529cabdff1aSopenharmony_civoid ff_put_vp8_pixels4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
1530cabdff1aSopenharmony_ci        ptrdiff_t srcstride, int h, int x, int y)
1531cabdff1aSopenharmony_ci{
1532cabdff1aSopenharmony_ci#if 1
1533cabdff1aSopenharmony_ci    double ftmp[1];
1534cabdff1aSopenharmony_ci    uint64_t tmp[1];
1535cabdff1aSopenharmony_ci    mips_reg addr[2];
1536cabdff1aSopenharmony_ci    DECLARE_VAR_LOW32;
1537cabdff1aSopenharmony_ci
1538cabdff1aSopenharmony_ci    __asm__ volatile (
1539cabdff1aSopenharmony_ci        "1:                                                         \n\t"
1540cabdff1aSopenharmony_ci        PTR_ADDU   "%[addr0],   %[src],         %[srcstride]        \n\t"
1541cabdff1aSopenharmony_ci        MMI_LWC1(%[ftmp0], %[src], 0x00)
1542cabdff1aSopenharmony_ci        "lwl        %[tmp0],    0x03(%[addr0])                      \n\t"
1543cabdff1aSopenharmony_ci        "lwr        %[tmp0],    0x00(%[addr0])                      \n\t"
1544cabdff1aSopenharmony_ci        PTR_ADDU   "%[addr1],   %[dst],         %[dststride]        \n\t"
1545cabdff1aSopenharmony_ci        MMI_SWC1(%[ftmp0], %[dst], 0x00)
1546cabdff1aSopenharmony_ci        "addiu      %[h],       %[h],           -0x02               \n\t"
1547cabdff1aSopenharmony_ci        "swl        %[tmp0],    0x03(%[addr1])                      \n\t"
1548cabdff1aSopenharmony_ci        "swr        %[tmp0],    0x00(%[addr1])                      \n\t"
1549cabdff1aSopenharmony_ci        PTR_ADDU   "%[src],     %[addr0],       %[srcstride]        \n\t"
1550cabdff1aSopenharmony_ci        PTR_ADDU   "%[dst],     %[addr1],       %[dststride]        \n\t"
1551cabdff1aSopenharmony_ci        "bnez       %[h],       1b                                  \n\t"
1552cabdff1aSopenharmony_ci        : [ftmp0]"=&f"(ftmp[0]),            [tmp0]"=&r"(tmp[0]),
1553cabdff1aSopenharmony_ci          RESTRICT_ASM_LOW32
1554cabdff1aSopenharmony_ci          [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1]),
1555cabdff1aSopenharmony_ci          [dst]"+&r"(dst),                  [src]"+&r"(src),
1556cabdff1aSopenharmony_ci          [h]"+&r"(h)
1557cabdff1aSopenharmony_ci        : [dststride]"r"((mips_reg)dststride),
1558cabdff1aSopenharmony_ci          [srcstride]"r"((mips_reg)srcstride)
1559cabdff1aSopenharmony_ci        : "memory"
1560cabdff1aSopenharmony_ci    );
1561cabdff1aSopenharmony_ci#else
1562cabdff1aSopenharmony_ci    int i;
1563cabdff1aSopenharmony_ci
1564cabdff1aSopenharmony_ci    for (i = 0; i < h; i++, dst += dststride, src += srcstride)
1565cabdff1aSopenharmony_ci        memcpy(dst, src, 4);
1566cabdff1aSopenharmony_ci#endif
1567cabdff1aSopenharmony_ci}
1568cabdff1aSopenharmony_ci
1569cabdff1aSopenharmony_civoid ff_put_vp8_epel16_h4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
1570cabdff1aSopenharmony_ci        ptrdiff_t srcstride, int h, int mx, int my)
1571cabdff1aSopenharmony_ci{
1572cabdff1aSopenharmony_ci#if 1
1573cabdff1aSopenharmony_ci    const uint64_t *filter = fourtap_subpel_filters[mx - 1];
1574cabdff1aSopenharmony_ci    double ftmp[9];
1575cabdff1aSopenharmony_ci    uint32_t tmp[1];
1576cabdff1aSopenharmony_ci    union av_intfloat64 filter1;
1577cabdff1aSopenharmony_ci    union av_intfloat64 filter2;
1578cabdff1aSopenharmony_ci    union av_intfloat64 filter3;
1579cabdff1aSopenharmony_ci    union av_intfloat64 filter4;
1580cabdff1aSopenharmony_ci    mips_reg src1, dst1;
1581cabdff1aSopenharmony_ci    DECLARE_VAR_ALL64;
1582cabdff1aSopenharmony_ci    filter1.i = filter[1];
1583cabdff1aSopenharmony_ci    filter2.i = filter[2];
1584cabdff1aSopenharmony_ci    filter3.i = filter[3];
1585cabdff1aSopenharmony_ci    filter4.i = filter[4];
1586cabdff1aSopenharmony_ci
1587cabdff1aSopenharmony_ci    /*
1588cabdff1aSopenharmony_ci    dst[0] = cm[(filter[2] * src[0] - filter[1] * src[-1] + filter[3] * src[1] - filter[4] * src[2] + 64) >> 7];
1589cabdff1aSopenharmony_ci    dst[1] = cm[(filter[2] * src[1] - filter[1] * src[ 0] + filter[3] * src[2] - filter[4] * src[3] + 64) >> 7];
1590cabdff1aSopenharmony_ci    dst[2] = cm[(filter[2] * src[2] - filter[1] * src[ 1] + filter[3] * src[3] - filter[4] * src[4] + 64) >> 7];
1591cabdff1aSopenharmony_ci    dst[3] = cm[(filter[2] * src[3] - filter[1] * src[ 2] + filter[3] * src[4] - filter[4] * src[5] + 64) >> 7];
1592cabdff1aSopenharmony_ci    dst[4] = cm[(filter[2] * src[4] - filter[1] * src[ 3] + filter[3] * src[5] - filter[4] * src[6] + 64) >> 7];
1593cabdff1aSopenharmony_ci    dst[5] = cm[(filter[2] * src[5] - filter[1] * src[ 4] + filter[3] * src[6] - filter[4] * src[7] + 64) >> 7];
1594cabdff1aSopenharmony_ci    dst[6] = cm[(filter[2] * src[6] - filter[1] * src[ 5] + filter[3] * src[7] - filter[4] * src[8] + 64) >> 7];
1595cabdff1aSopenharmony_ci    dst[7] = cm[(filter[2] * src[7] - filter[1] * src[ 6] + filter[3] * src[8] - filter[4] * src[9] + 64) >> 7];
1596cabdff1aSopenharmony_ci
1597cabdff1aSopenharmony_ci    dst[ 8] = cm[(filter[2] * src[ 8] - filter[1] * src[ 7] + filter[3] * src[ 9] - filter[4] * src[10] + 64) >> 7];
1598cabdff1aSopenharmony_ci    dst[ 9] = cm[(filter[2] * src[ 9] - filter[1] * src[ 8] + filter[3] * src[10] - filter[4] * src[11] + 64) >> 7];
1599cabdff1aSopenharmony_ci    dst[10] = cm[(filter[2] * src[10] - filter[1] * src[ 9] + filter[3] * src[11] - filter[4] * src[12] + 64) >> 7];
1600cabdff1aSopenharmony_ci    dst[11] = cm[(filter[2] * src[11] - filter[1] * src[10] + filter[3] * src[12] - filter[4] * src[13] + 64) >> 7];
1601cabdff1aSopenharmony_ci    dst[12] = cm[(filter[2] * src[12] - filter[1] * src[11] + filter[3] * src[13] - filter[4] * src[14] + 64) >> 7];
1602cabdff1aSopenharmony_ci    dst[13] = cm[(filter[2] * src[13] - filter[1] * src[12] + filter[3] * src[14] - filter[4] * src[15] + 64) >> 7];
1603cabdff1aSopenharmony_ci    dst[14] = cm[(filter[2] * src[14] - filter[1] * src[13] + filter[3] * src[15] - filter[4] * src[16] + 64) >> 7];
1604cabdff1aSopenharmony_ci    dst[15] = cm[(filter[2] * src[15] - filter[1] * src[14] + filter[3] * src[16] - filter[4] * src[17] + 64) >> 7];
1605cabdff1aSopenharmony_ci    */
1606cabdff1aSopenharmony_ci    __asm__ volatile (
1607cabdff1aSopenharmony_ci        "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
1608cabdff1aSopenharmony_ci        "li         %[tmp0],    0x07                                \n\t"
1609cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp4]                            \n\t"
1610cabdff1aSopenharmony_ci
1611cabdff1aSopenharmony_ci        "1:                                                         \n\t"
1612cabdff1aSopenharmony_ci        // 0 - 7
1613cabdff1aSopenharmony_ci        PUT_VP8_EPEL8_H4_MMI(%[src], %[dst])
1614cabdff1aSopenharmony_ci        PTR_ADDIU  "%[src1],    %[src],         0x08                \n\t"
1615cabdff1aSopenharmony_ci        PTR_ADDIU  "%[dst1],    %[dst],         0x08                \n\t"
1616cabdff1aSopenharmony_ci        // 8 - 15
1617cabdff1aSopenharmony_ci        PUT_VP8_EPEL8_H4_MMI(%[src1], %[dst1])
1618cabdff1aSopenharmony_ci
1619cabdff1aSopenharmony_ci        "addiu      %[h],       %[h],           -0x01               \n\t"
1620cabdff1aSopenharmony_ci        PTR_ADDU   "%[src],     %[src],         %[srcstride]        \n\t"
1621cabdff1aSopenharmony_ci        PTR_ADDU   "%[dst],     %[dst],         %[dststride]        \n\t"
1622cabdff1aSopenharmony_ci        "bnez       %[h],       1b                                  \n\t"
1623cabdff1aSopenharmony_ci        : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
1624cabdff1aSopenharmony_ci          [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
1625cabdff1aSopenharmony_ci          [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
1626cabdff1aSopenharmony_ci          [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
1627cabdff1aSopenharmony_ci          [ftmp8]"=&f"(ftmp[8]),
1628cabdff1aSopenharmony_ci          [tmp0]"=&r"(tmp[0]),
1629cabdff1aSopenharmony_ci          RESTRICT_ASM_ALL64
1630cabdff1aSopenharmony_ci          [dst1]"=&r"(dst1),                [src1]"=&r"(src1),
1631cabdff1aSopenharmony_ci          [h]"+&r"(h),
1632cabdff1aSopenharmony_ci          [dst]"+&r"(dst),                  [src]"+&r"(src)
1633cabdff1aSopenharmony_ci        : [ff_pw_64]"f"(ff_pw_64.f),
1634cabdff1aSopenharmony_ci          [srcstride]"r"((mips_reg)srcstride),
1635cabdff1aSopenharmony_ci          [dststride]"r"((mips_reg)dststride),
1636cabdff1aSopenharmony_ci          [filter1]"f"(filter1.f),          [filter2]"f"(filter2.f),
1637cabdff1aSopenharmony_ci          [filter3]"f"(filter3.f),          [filter4]"f"(filter4.f)
1638cabdff1aSopenharmony_ci        : "memory"
1639cabdff1aSopenharmony_ci    );
1640cabdff1aSopenharmony_ci#else
1641cabdff1aSopenharmony_ci    const uint8_t *filter = subpel_filters[mx - 1];
1642cabdff1aSopenharmony_ci    const uint8_t *cm     = ff_crop_tab + MAX_NEG_CROP;
1643cabdff1aSopenharmony_ci    int x, y;
1644cabdff1aSopenharmony_ci
1645cabdff1aSopenharmony_ci    for (y = 0; y < h; y++) {
1646cabdff1aSopenharmony_ci        for (x = 0; x < 16; x++)
1647cabdff1aSopenharmony_ci            dst[x] = FILTER_4TAP(src, filter, 1);
1648cabdff1aSopenharmony_ci        dst += dststride;
1649cabdff1aSopenharmony_ci        src += srcstride;
1650cabdff1aSopenharmony_ci    }
1651cabdff1aSopenharmony_ci#endif
1652cabdff1aSopenharmony_ci}
1653cabdff1aSopenharmony_ci
1654cabdff1aSopenharmony_civoid ff_put_vp8_epel8_h4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
1655cabdff1aSopenharmony_ci        ptrdiff_t srcstride, int h, int mx, int my)
1656cabdff1aSopenharmony_ci{
1657cabdff1aSopenharmony_ci#if 1
1658cabdff1aSopenharmony_ci    const uint64_t *filter = fourtap_subpel_filters[mx - 1];
1659cabdff1aSopenharmony_ci    double ftmp[9];
1660cabdff1aSopenharmony_ci    uint32_t tmp[1];
1661cabdff1aSopenharmony_ci    union av_intfloat64 filter1;
1662cabdff1aSopenharmony_ci    union av_intfloat64 filter2;
1663cabdff1aSopenharmony_ci    union av_intfloat64 filter3;
1664cabdff1aSopenharmony_ci    union av_intfloat64 filter4;
1665cabdff1aSopenharmony_ci    DECLARE_VAR_ALL64;
1666cabdff1aSopenharmony_ci    filter1.i = filter[1];
1667cabdff1aSopenharmony_ci    filter2.i = filter[2];
1668cabdff1aSopenharmony_ci    filter3.i = filter[3];
1669cabdff1aSopenharmony_ci    filter4.i = filter[4];
1670cabdff1aSopenharmony_ci
1671cabdff1aSopenharmony_ci
1672cabdff1aSopenharmony_ci    /*
1673cabdff1aSopenharmony_ci    dst[0] = cm[(filter[2] * src[0] - filter[1] * src[-1] + filter[3] * src[1] - filter[4] * src[2] + 64) >> 7];
1674cabdff1aSopenharmony_ci    dst[1] = cm[(filter[2] * src[1] - filter[1] * src[ 0] + filter[3] * src[2] - filter[4] * src[3] + 64) >> 7];
1675cabdff1aSopenharmony_ci    dst[2] = cm[(filter[2] * src[2] - filter[1] * src[ 1] + filter[3] * src[3] - filter[4] * src[4] + 64) >> 7];
1676cabdff1aSopenharmony_ci    dst[3] = cm[(filter[2] * src[3] - filter[1] * src[ 2] + filter[3] * src[4] - filter[4] * src[5] + 64) >> 7];
1677cabdff1aSopenharmony_ci    dst[4] = cm[(filter[2] * src[4] - filter[1] * src[ 3] + filter[3] * src[5] - filter[4] * src[6] + 64) >> 7];
1678cabdff1aSopenharmony_ci    dst[5] = cm[(filter[2] * src[5] - filter[1] * src[ 4] + filter[3] * src[6] - filter[4] * src[7] + 64) >> 7];
1679cabdff1aSopenharmony_ci    dst[6] = cm[(filter[2] * src[6] - filter[1] * src[ 5] + filter[3] * src[7] - filter[4] * src[8] + 64) >> 7];
1680cabdff1aSopenharmony_ci    dst[7] = cm[(filter[2] * src[7] - filter[1] * src[ 6] + filter[3] * src[8] - filter[4] * src[9] + 64) >> 7];
1681cabdff1aSopenharmony_ci    */
1682cabdff1aSopenharmony_ci    __asm__ volatile (
1683cabdff1aSopenharmony_ci        "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
1684cabdff1aSopenharmony_ci        "li         %[tmp0],    0x07                                \n\t"
1685cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp4]                            \n\t"
1686cabdff1aSopenharmony_ci
1687cabdff1aSopenharmony_ci        "1:                                                         \n\t"
1688cabdff1aSopenharmony_ci        PUT_VP8_EPEL8_H4_MMI(%[src], %[dst])
1689cabdff1aSopenharmony_ci
1690cabdff1aSopenharmony_ci        "addiu      %[h],       %[h],           -0x01               \n\t"
1691cabdff1aSopenharmony_ci        PTR_ADDU   "%[src],     %[src],         %[srcstride]        \n\t"
1692cabdff1aSopenharmony_ci        PTR_ADDU   "%[dst],     %[dst],         %[dststride]        \n\t"
1693cabdff1aSopenharmony_ci        "bnez       %[h],       1b                                  \n\t"
1694cabdff1aSopenharmony_ci        : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
1695cabdff1aSopenharmony_ci          [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
1696cabdff1aSopenharmony_ci          [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
1697cabdff1aSopenharmony_ci          [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
1698cabdff1aSopenharmony_ci          [ftmp8]"=&f"(ftmp[8]),
1699cabdff1aSopenharmony_ci          [tmp0]"=&r"(tmp[0]),
1700cabdff1aSopenharmony_ci          RESTRICT_ASM_ALL64
1701cabdff1aSopenharmony_ci          [h]"+&r"(h),
1702cabdff1aSopenharmony_ci          [dst]"+&r"(dst),                  [src]"+&r"(src)
1703cabdff1aSopenharmony_ci        : [ff_pw_64]"f"(ff_pw_64.f),
1704cabdff1aSopenharmony_ci          [srcstride]"r"((mips_reg)srcstride),
1705cabdff1aSopenharmony_ci          [dststride]"r"((mips_reg)dststride),
1706cabdff1aSopenharmony_ci          [filter1]"f"(filter1.f),          [filter2]"f"(filter2.f),
1707cabdff1aSopenharmony_ci          [filter3]"f"(filter3.f),          [filter4]"f"(filter4.f)
1708cabdff1aSopenharmony_ci        : "memory"
1709cabdff1aSopenharmony_ci    );
1710cabdff1aSopenharmony_ci#else
1711cabdff1aSopenharmony_ci    const uint8_t *filter = subpel_filters[mx - 1];
1712cabdff1aSopenharmony_ci    const uint8_t *cm     = ff_crop_tab + MAX_NEG_CROP;
1713cabdff1aSopenharmony_ci    int x, y;
1714cabdff1aSopenharmony_ci
1715cabdff1aSopenharmony_ci    for (y = 0; y < h; y++) {
1716cabdff1aSopenharmony_ci        for (x = 0; x < 8; x++)
1717cabdff1aSopenharmony_ci            dst[x] = FILTER_4TAP(src, filter, 1);
1718cabdff1aSopenharmony_ci        dst += dststride;
1719cabdff1aSopenharmony_ci        src += srcstride;
1720cabdff1aSopenharmony_ci    }
1721cabdff1aSopenharmony_ci#endif
1722cabdff1aSopenharmony_ci}
1723cabdff1aSopenharmony_ci
1724cabdff1aSopenharmony_civoid ff_put_vp8_epel4_h4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
1725cabdff1aSopenharmony_ci        ptrdiff_t srcstride, int h, int mx, int my)
1726cabdff1aSopenharmony_ci{
1727cabdff1aSopenharmony_ci#if 1
1728cabdff1aSopenharmony_ci    const uint64_t *filter = fourtap_subpel_filters[mx - 1];
1729cabdff1aSopenharmony_ci    double ftmp[6];
1730cabdff1aSopenharmony_ci    uint32_t tmp[1];
1731cabdff1aSopenharmony_ci    union av_intfloat64 filter1;
1732cabdff1aSopenharmony_ci    union av_intfloat64 filter2;
1733cabdff1aSopenharmony_ci    union av_intfloat64 filter3;
1734cabdff1aSopenharmony_ci    union av_intfloat64 filter4;
1735cabdff1aSopenharmony_ci    DECLARE_VAR_LOW32;
1736cabdff1aSopenharmony_ci    filter1.i = filter[1];
1737cabdff1aSopenharmony_ci    filter2.i = filter[2];
1738cabdff1aSopenharmony_ci    filter3.i = filter[3];
1739cabdff1aSopenharmony_ci    filter4.i = filter[4];
1740cabdff1aSopenharmony_ci
1741cabdff1aSopenharmony_ci    /*
1742cabdff1aSopenharmony_ci    dst[0] = cm[(filter[2] * src[0] - filter[1] * src[-1] + filter[3] * src[1] - filter[4] * src[2] + 64) >> 7];
1743cabdff1aSopenharmony_ci    dst[1] = cm[(filter[2] * src[1] - filter[1] * src[ 0] + filter[3] * src[2] - filter[4] * src[3] + 64) >> 7];
1744cabdff1aSopenharmony_ci    dst[2] = cm[(filter[2] * src[2] - filter[1] * src[ 1] + filter[3] * src[3] - filter[4] * src[4] + 64) >> 7];
1745cabdff1aSopenharmony_ci    dst[3] = cm[(filter[2] * src[3] - filter[1] * src[ 2] + filter[3] * src[4] - filter[4] * src[5] + 64) >> 7];
1746cabdff1aSopenharmony_ci    */
1747cabdff1aSopenharmony_ci    __asm__ volatile (
1748cabdff1aSopenharmony_ci        "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
1749cabdff1aSopenharmony_ci        "li         %[tmp0],    0x07                                \n\t"
1750cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp4]                            \n\t"
1751cabdff1aSopenharmony_ci
1752cabdff1aSopenharmony_ci        "1:                                                         \n\t"
1753cabdff1aSopenharmony_ci        PUT_VP8_EPEL4_H4_MMI(%[src], %[dst])
1754cabdff1aSopenharmony_ci
1755cabdff1aSopenharmony_ci        "addiu      %[h],       %[h],           -0x01               \n\t"
1756cabdff1aSopenharmony_ci        PTR_ADDU   "%[src],     %[src],         %[srcstride]        \n\t"
1757cabdff1aSopenharmony_ci        PTR_ADDU   "%[dst],     %[dst],         %[dststride]        \n\t"
1758cabdff1aSopenharmony_ci        "bnez       %[h],       1b                                  \n\t"
1759cabdff1aSopenharmony_ci        : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
1760cabdff1aSopenharmony_ci          [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
1761cabdff1aSopenharmony_ci          [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
1762cabdff1aSopenharmony_ci          [tmp0]"=&r"(tmp[0]),
1763cabdff1aSopenharmony_ci          RESTRICT_ASM_LOW32
1764cabdff1aSopenharmony_ci          [h]"+&r"(h),
1765cabdff1aSopenharmony_ci          [dst]"+&r"(dst),                  [src]"+&r"(src)
1766cabdff1aSopenharmony_ci        : [ff_pw_64]"f"(ff_pw_64.f),
1767cabdff1aSopenharmony_ci          [srcstride]"r"((mips_reg)srcstride),
1768cabdff1aSopenharmony_ci          [dststride]"r"((mips_reg)dststride),
1769cabdff1aSopenharmony_ci          [filter1]"f"(filter1.f),          [filter2]"f"(filter2.f),
1770cabdff1aSopenharmony_ci          [filter3]"f"(filter3.f),          [filter4]"f"(filter4.f)
1771cabdff1aSopenharmony_ci        : "memory"
1772cabdff1aSopenharmony_ci    );
1773cabdff1aSopenharmony_ci#else
1774cabdff1aSopenharmony_ci    const uint8_t *filter = subpel_filters[mx - 1];
1775cabdff1aSopenharmony_ci    const uint8_t *cm     = ff_crop_tab + MAX_NEG_CROP;
1776cabdff1aSopenharmony_ci    int x, y;
1777cabdff1aSopenharmony_ci
1778cabdff1aSopenharmony_ci    for (y = 0; y < h; y++) {
1779cabdff1aSopenharmony_ci        for (x = 0; x < 4; x++)
1780cabdff1aSopenharmony_ci            dst[x] = FILTER_4TAP(src, filter, 1);
1781cabdff1aSopenharmony_ci        dst += dststride;
1782cabdff1aSopenharmony_ci        src += srcstride;
1783cabdff1aSopenharmony_ci    }
1784cabdff1aSopenharmony_ci#endif
1785cabdff1aSopenharmony_ci}
1786cabdff1aSopenharmony_ci
1787cabdff1aSopenharmony_civoid ff_put_vp8_epel16_h6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
1788cabdff1aSopenharmony_ci        ptrdiff_t srcstride, int h, int mx, int my)
1789cabdff1aSopenharmony_ci{
1790cabdff1aSopenharmony_ci#if 1
1791cabdff1aSopenharmony_ci    const uint64_t *filter = fourtap_subpel_filters[mx - 1];
1792cabdff1aSopenharmony_ci    double ftmp[9];
1793cabdff1aSopenharmony_ci    uint32_t tmp[1];
1794cabdff1aSopenharmony_ci    mips_reg src1, dst1;
1795cabdff1aSopenharmony_ci    union av_intfloat64 filter0;
1796cabdff1aSopenharmony_ci    union av_intfloat64 filter1;
1797cabdff1aSopenharmony_ci    union av_intfloat64 filter2;
1798cabdff1aSopenharmony_ci    union av_intfloat64 filter3;
1799cabdff1aSopenharmony_ci    union av_intfloat64 filter4;
1800cabdff1aSopenharmony_ci    union av_intfloat64 filter5;
1801cabdff1aSopenharmony_ci    DECLARE_VAR_ALL64;
1802cabdff1aSopenharmony_ci    filter0.i = filter[0];
1803cabdff1aSopenharmony_ci    filter1.i = filter[1];
1804cabdff1aSopenharmony_ci    filter2.i = filter[2];
1805cabdff1aSopenharmony_ci    filter3.i = filter[3];
1806cabdff1aSopenharmony_ci    filter4.i = filter[4];
1807cabdff1aSopenharmony_ci    filter5.i = filter[5];
1808cabdff1aSopenharmony_ci
1809cabdff1aSopenharmony_ci    /*
1810cabdff1aSopenharmony_ci    dst[ 0] = cm[(filter[2]*src[ 0] - filter[1]*src[-1] + filter[0]*src[-2] + filter[3]*src[ 1] - filter[4]*src[ 2] + filter[5]*src[ 3] + 64) >> 7];
1811cabdff1aSopenharmony_ci    dst[ 1] = cm[(filter[2]*src[ 1] - filter[1]*src[ 0] + filter[0]*src[-1] + filter[3]*src[ 2] - filter[4]*src[ 3] + filter[5]*src[ 4] + 64) >> 7];
1812cabdff1aSopenharmony_ci    dst[ 2] = cm[(filter[2]*src[ 2] - filter[1]*src[ 1] + filter[0]*src[ 0] + filter[3]*src[ 3] - filter[4]*src[ 4] + filter[5]*src[ 5] + 64) >> 7];
1813cabdff1aSopenharmony_ci    dst[ 3] = cm[(filter[2]*src[ 3] - filter[1]*src[ 2] + filter[0]*src[ 1] + filter[3]*src[ 4] - filter[4]*src[ 5] + filter[5]*src[ 6] + 64) >> 7];
1814cabdff1aSopenharmony_ci    dst[ 4] = cm[(filter[2]*src[ 4] - filter[1]*src[ 3] + filter[0]*src[ 2] + filter[3]*src[ 5] - filter[4]*src[ 6] + filter[5]*src[ 7] + 64) >> 7];
1815cabdff1aSopenharmony_ci    dst[ 5] = cm[(filter[2]*src[ 5] - filter[1]*src[ 4] + filter[0]*src[ 3] + filter[3]*src[ 6] - filter[4]*src[ 7] + filter[5]*src[ 8] + 64) >> 7];
1816cabdff1aSopenharmony_ci    dst[ 6] = cm[(filter[2]*src[ 6] - filter[1]*src[ 5] + filter[0]*src[ 4] + filter[3]*src[ 7] - filter[4]*src[ 8] + filter[5]*src[ 9] + 64) >> 7];
1817cabdff1aSopenharmony_ci    dst[ 7] = cm[(filter[2]*src[ 7] - filter[1]*src[ 6] + filter[0]*src[ 5] + filter[3]*src[ 8] - filter[4]*src[ 9] + filter[5]*src[10] + 64) >> 7];
1818cabdff1aSopenharmony_ci
1819cabdff1aSopenharmony_ci    dst[ 8] = cm[(filter[2]*src[ 8] - filter[1]*src[ 7] + filter[0]*src[ 6] + filter[3]*src[ 9] - filter[4]*src[10] + filter[5]*src[11] + 64) >> 7];
1820cabdff1aSopenharmony_ci    dst[ 9] = cm[(filter[2]*src[ 9] - filter[1]*src[ 8] + filter[0]*src[ 7] + filter[3]*src[10] - filter[4]*src[11] + filter[5]*src[12] + 64) >> 7];
1821cabdff1aSopenharmony_ci    dst[10] = cm[(filter[2]*src[10] - filter[1]*src[ 9] + filter[0]*src[ 8] + filter[3]*src[11] - filter[4]*src[12] + filter[5]*src[13] + 64) >> 7];
1822cabdff1aSopenharmony_ci    dst[11] = cm[(filter[2]*src[11] - filter[1]*src[10] + filter[0]*src[ 9] + filter[3]*src[12] - filter[4]*src[13] + filter[5]*src[14] + 64) >> 7];
1823cabdff1aSopenharmony_ci    dst[12] = cm[(filter[2]*src[12] - filter[1]*src[11] + filter[0]*src[10] + filter[3]*src[13] - filter[4]*src[14] + filter[5]*src[15] + 64) >> 7];
1824cabdff1aSopenharmony_ci    dst[13] = cm[(filter[2]*src[13] - filter[1]*src[12] + filter[0]*src[11] + filter[3]*src[14] - filter[4]*src[15] + filter[5]*src[16] + 64) >> 7];
1825cabdff1aSopenharmony_ci    dst[14] = cm[(filter[2]*src[14] - filter[1]*src[13] + filter[0]*src[12] + filter[3]*src[15] - filter[4]*src[16] + filter[5]*src[17] + 64) >> 7];
1826cabdff1aSopenharmony_ci    dst[15] = cm[(filter[2]*src[15] - filter[1]*src[14] + filter[0]*src[13] + filter[3]*src[16] - filter[4]*src[17] + filter[5]*src[18] + 64) >> 7];
1827cabdff1aSopenharmony_ci    */
1828cabdff1aSopenharmony_ci    __asm__ volatile (
1829cabdff1aSopenharmony_ci        "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
1830cabdff1aSopenharmony_ci        "li         %[tmp0],    0x07                                \n\t"
1831cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp4]                            \n\t"
1832cabdff1aSopenharmony_ci
1833cabdff1aSopenharmony_ci        "1:                                                         \n\t"
1834cabdff1aSopenharmony_ci        // 0 - 7
1835cabdff1aSopenharmony_ci        PUT_VP8_EPEL8_H6_MMI(%[src], %[dst])
1836cabdff1aSopenharmony_ci        PTR_ADDIU  "%[src1],    %[src],         0x08                \n\t"
1837cabdff1aSopenharmony_ci        PTR_ADDIU  "%[dst1],    %[dst],         0x08                \n\t"
1838cabdff1aSopenharmony_ci        // 8 - 15
1839cabdff1aSopenharmony_ci        PUT_VP8_EPEL8_H6_MMI(%[src1], %[dst1])
1840cabdff1aSopenharmony_ci
1841cabdff1aSopenharmony_ci        "addiu      %[h],       %[h],           -0x01               \n\t"
1842cabdff1aSopenharmony_ci        PTR_ADDU   "%[src],     %[src],         %[srcstride]        \n\t"
1843cabdff1aSopenharmony_ci        PTR_ADDU   "%[dst],     %[dst],         %[dststride]        \n\t"
1844cabdff1aSopenharmony_ci        "bnez       %[h],       1b                                  \n\t"
1845cabdff1aSopenharmony_ci        : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
1846cabdff1aSopenharmony_ci          [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
1847cabdff1aSopenharmony_ci          [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
1848cabdff1aSopenharmony_ci          [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
1849cabdff1aSopenharmony_ci          [ftmp8]"=&f"(ftmp[8]),
1850cabdff1aSopenharmony_ci          [tmp0]"=&r"(tmp[0]),
1851cabdff1aSopenharmony_ci          RESTRICT_ASM_ALL64
1852cabdff1aSopenharmony_ci          [dst1]"=&r"(dst1),                [src1]"=&r"(src1),
1853cabdff1aSopenharmony_ci          [h]"+&r"(h),
1854cabdff1aSopenharmony_ci          [dst]"+&r"(dst),                  [src]"+&r"(src)
1855cabdff1aSopenharmony_ci        : [ff_pw_64]"f"(ff_pw_64.f),
1856cabdff1aSopenharmony_ci          [srcstride]"r"((mips_reg)srcstride),
1857cabdff1aSopenharmony_ci          [dststride]"r"((mips_reg)dststride),
1858cabdff1aSopenharmony_ci          [filter0]"f"(filter0.f),          [filter1]"f"(filter1.f),
1859cabdff1aSopenharmony_ci          [filter2]"f"(filter2.f),          [filter3]"f"(filter3.f),
1860cabdff1aSopenharmony_ci          [filter4]"f"(filter4.f),          [filter5]"f"(filter5.f)
1861cabdff1aSopenharmony_ci        : "memory"
1862cabdff1aSopenharmony_ci    );
1863cabdff1aSopenharmony_ci#else
1864cabdff1aSopenharmony_ci    const uint8_t *filter = subpel_filters[mx - 1];
1865cabdff1aSopenharmony_ci    const uint8_t *cm     = ff_crop_tab + MAX_NEG_CROP;
1866cabdff1aSopenharmony_ci    int x, y;
1867cabdff1aSopenharmony_ci
1868cabdff1aSopenharmony_ci    for (y = 0; y < h; y++) {
1869cabdff1aSopenharmony_ci        for (x = 0; x < 16; x++)
1870cabdff1aSopenharmony_ci            dst[x] = FILTER_6TAP(src, filter, 1);
1871cabdff1aSopenharmony_ci        dst += dststride;
1872cabdff1aSopenharmony_ci        src += srcstride;
1873cabdff1aSopenharmony_ci    }
1874cabdff1aSopenharmony_ci#endif
1875cabdff1aSopenharmony_ci}
1876cabdff1aSopenharmony_ci
1877cabdff1aSopenharmony_civoid ff_put_vp8_epel8_h6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
1878cabdff1aSopenharmony_ci        ptrdiff_t srcstride, int h, int mx, int my)
1879cabdff1aSopenharmony_ci{
1880cabdff1aSopenharmony_ci#if 1
1881cabdff1aSopenharmony_ci    const uint64_t *filter = fourtap_subpel_filters[mx - 1];
1882cabdff1aSopenharmony_ci    double ftmp[9];
1883cabdff1aSopenharmony_ci    uint32_t tmp[1];
1884cabdff1aSopenharmony_ci    union av_intfloat64 filter0;
1885cabdff1aSopenharmony_ci    union av_intfloat64 filter1;
1886cabdff1aSopenharmony_ci    union av_intfloat64 filter2;
1887cabdff1aSopenharmony_ci    union av_intfloat64 filter3;
1888cabdff1aSopenharmony_ci    union av_intfloat64 filter4;
1889cabdff1aSopenharmony_ci    union av_intfloat64 filter5;
1890cabdff1aSopenharmony_ci    DECLARE_VAR_ALL64;
1891cabdff1aSopenharmony_ci    filter0.i = filter[0];
1892cabdff1aSopenharmony_ci    filter1.i = filter[1];
1893cabdff1aSopenharmony_ci    filter2.i = filter[2];
1894cabdff1aSopenharmony_ci    filter3.i = filter[3];
1895cabdff1aSopenharmony_ci    filter4.i = filter[4];
1896cabdff1aSopenharmony_ci    filter5.i = filter[5];
1897cabdff1aSopenharmony_ci
1898cabdff1aSopenharmony_ci    /*
1899cabdff1aSopenharmony_ci    dst[0] = cm[(filter[2]*src[0] - filter[1]*src[-1] + filter[0]*src[-2] + filter[3]*src[1] - filter[4]*src[2] + filter[5]*src[ 3] + 64) >> 7];
1900cabdff1aSopenharmony_ci    dst[1] = cm[(filter[2]*src[1] - filter[1]*src[ 0] + filter[0]*src[-1] + filter[3]*src[2] - filter[4]*src[3] + filter[5]*src[ 4] + 64) >> 7];
1901cabdff1aSopenharmony_ci    dst[2] = cm[(filter[2]*src[2] - filter[1]*src[ 1] + filter[0]*src[ 0] + filter[3]*src[3] - filter[4]*src[4] + filter[5]*src[ 5] + 64) >> 7];
1902cabdff1aSopenharmony_ci    dst[3] = cm[(filter[2]*src[3] - filter[1]*src[ 2] + filter[0]*src[ 1] + filter[3]*src[4] - filter[4]*src[5] + filter[5]*src[ 6] + 64) >> 7];
1903cabdff1aSopenharmony_ci    dst[4] = cm[(filter[2]*src[4] - filter[1]*src[ 3] + filter[0]*src[ 2] + filter[3]*src[5] - filter[4]*src[6] + filter[5]*src[ 7] + 64) >> 7];
1904cabdff1aSopenharmony_ci    dst[5] = cm[(filter[2]*src[5] - filter[1]*src[ 4] + filter[0]*src[ 3] + filter[3]*src[6] - filter[4]*src[7] + filter[5]*src[ 8] + 64) >> 7];
1905cabdff1aSopenharmony_ci    dst[6] = cm[(filter[2]*src[6] - filter[1]*src[ 5] + filter[0]*src[ 4] + filter[3]*src[7] - filter[4]*src[8] + filter[5]*src[ 9] + 64) >> 7];
1906cabdff1aSopenharmony_ci    dst[7] = cm[(filter[2]*src[7] - filter[1]*src[ 6] + filter[0]*src[ 5] + filter[3]*src[8] - filter[4]*src[9] + filter[5]*src[10] + 64) >> 7];
1907cabdff1aSopenharmony_ci    */
1908cabdff1aSopenharmony_ci    __asm__ volatile (
1909cabdff1aSopenharmony_ci        "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
1910cabdff1aSopenharmony_ci        "li         %[tmp0],    0x07                                \n\t"
1911cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp4]                            \n\t"
1912cabdff1aSopenharmony_ci
1913cabdff1aSopenharmony_ci        "1:                                                         \n\t"
1914cabdff1aSopenharmony_ci        PUT_VP8_EPEL8_H6_MMI(%[src], %[dst])
1915cabdff1aSopenharmony_ci
1916cabdff1aSopenharmony_ci        "addiu      %[h],       %[h],           -0x01               \n\t"
1917cabdff1aSopenharmony_ci        PTR_ADDU   "%[src],     %[src],         %[srcstride]        \n\t"
1918cabdff1aSopenharmony_ci        PTR_ADDU   "%[dst],     %[dst],         %[dststride]        \n\t"
1919cabdff1aSopenharmony_ci        "bnez       %[h],       1b                                  \n\t"
1920cabdff1aSopenharmony_ci        : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
1921cabdff1aSopenharmony_ci          [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
1922cabdff1aSopenharmony_ci          [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
1923cabdff1aSopenharmony_ci          [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
1924cabdff1aSopenharmony_ci          [ftmp8]"=&f"(ftmp[8]),
1925cabdff1aSopenharmony_ci          [tmp0]"=&r"(tmp[0]),
1926cabdff1aSopenharmony_ci          RESTRICT_ASM_ALL64
1927cabdff1aSopenharmony_ci          [h]"+&r"(h),
1928cabdff1aSopenharmony_ci          [dst]"+&r"(dst),                  [src]"+&r"(src)
1929cabdff1aSopenharmony_ci        : [ff_pw_64]"f"(ff_pw_64.f),
1930cabdff1aSopenharmony_ci          [srcstride]"r"((mips_reg)srcstride),
1931cabdff1aSopenharmony_ci          [dststride]"r"((mips_reg)dststride),
1932cabdff1aSopenharmony_ci          [filter0]"f"(filter0.f),          [filter1]"f"(filter1.f),
1933cabdff1aSopenharmony_ci          [filter2]"f"(filter2.f),          [filter3]"f"(filter3.f),
1934cabdff1aSopenharmony_ci          [filter4]"f"(filter4.f),          [filter5]"f"(filter5.f)
1935cabdff1aSopenharmony_ci        : "memory"
1936cabdff1aSopenharmony_ci    );
1937cabdff1aSopenharmony_ci#else
1938cabdff1aSopenharmony_ci    const uint8_t *filter = subpel_filters[mx - 1];
1939cabdff1aSopenharmony_ci    const uint8_t *cm     = ff_crop_tab + MAX_NEG_CROP;
1940cabdff1aSopenharmony_ci    int x, y;
1941cabdff1aSopenharmony_ci
1942cabdff1aSopenharmony_ci    for (y = 0; y < h; y++) {
1943cabdff1aSopenharmony_ci        for (x = 0; x < 8; x++)
1944cabdff1aSopenharmony_ci            dst[x] = FILTER_6TAP(src, filter, 1);
1945cabdff1aSopenharmony_ci        dst += dststride;
1946cabdff1aSopenharmony_ci        src += srcstride;
1947cabdff1aSopenharmony_ci    }
1948cabdff1aSopenharmony_ci#endif
1949cabdff1aSopenharmony_ci}
1950cabdff1aSopenharmony_ci
1951cabdff1aSopenharmony_civoid ff_put_vp8_epel4_h6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
1952cabdff1aSopenharmony_ci        ptrdiff_t srcstride, int h, int mx, int my)
1953cabdff1aSopenharmony_ci{
1954cabdff1aSopenharmony_ci#if 1
1955cabdff1aSopenharmony_ci    const uint64_t *filter = fourtap_subpel_filters[mx - 1];
1956cabdff1aSopenharmony_ci    double ftmp[6];
1957cabdff1aSopenharmony_ci    uint32_t tmp[1];
1958cabdff1aSopenharmony_ci    union av_intfloat64 filter0;
1959cabdff1aSopenharmony_ci    union av_intfloat64 filter1;
1960cabdff1aSopenharmony_ci    union av_intfloat64 filter2;
1961cabdff1aSopenharmony_ci    union av_intfloat64 filter3;
1962cabdff1aSopenharmony_ci    union av_intfloat64 filter4;
1963cabdff1aSopenharmony_ci    union av_intfloat64 filter5;
1964cabdff1aSopenharmony_ci    DECLARE_VAR_LOW32;
1965cabdff1aSopenharmony_ci    filter0.i = filter[0];
1966cabdff1aSopenharmony_ci    filter1.i = filter[1];
1967cabdff1aSopenharmony_ci    filter2.i = filter[2];
1968cabdff1aSopenharmony_ci    filter3.i = filter[3];
1969cabdff1aSopenharmony_ci    filter4.i = filter[4];
1970cabdff1aSopenharmony_ci    filter5.i = filter[5];
1971cabdff1aSopenharmony_ci
1972cabdff1aSopenharmony_ci    /*
1973cabdff1aSopenharmony_ci    dst[0] = cm[(filter[2]*src[0] - filter[1]*src[-1] + filter[0]*src[-2] + filter[3]*src[1] - filter[4]*src[2] + filter[5]*src[ 3] + 64) >> 7];
1974cabdff1aSopenharmony_ci    dst[1] = cm[(filter[2]*src[1] - filter[1]*src[ 0] + filter[0]*src[-1] + filter[3]*src[2] - filter[4]*src[3] + filter[5]*src[ 4] + 64) >> 7];
1975cabdff1aSopenharmony_ci    dst[2] = cm[(filter[2]*src[2] - filter[1]*src[ 1] + filter[0]*src[ 0] + filter[3]*src[3] - filter[4]*src[4] + filter[5]*src[ 5] + 64) >> 7];
1976cabdff1aSopenharmony_ci    dst[3] = cm[(filter[2]*src[3] - filter[1]*src[ 2] + filter[0]*src[ 1] + filter[3]*src[4] - filter[4]*src[5] + filter[5]*src[ 6] + 64) >> 7];
1977cabdff1aSopenharmony_ci    */
1978cabdff1aSopenharmony_ci    __asm__ volatile (
1979cabdff1aSopenharmony_ci        "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
1980cabdff1aSopenharmony_ci        "li         %[tmp0],    0x07                                \n\t"
1981cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp4]                            \n\t"
1982cabdff1aSopenharmony_ci
1983cabdff1aSopenharmony_ci        "1:                                                         \n\t"
1984cabdff1aSopenharmony_ci        PUT_VP8_EPEL4_H6_MMI(%[src], %[dst])
1985cabdff1aSopenharmony_ci
1986cabdff1aSopenharmony_ci        "addiu      %[h],       %[h],           -0x01               \n\t"
1987cabdff1aSopenharmony_ci        PTR_ADDU   "%[src],     %[src],         %[srcstride]        \n\t"
1988cabdff1aSopenharmony_ci        PTR_ADDU   "%[dst],     %[dst],         %[dststride]        \n\t"
1989cabdff1aSopenharmony_ci        "bnez       %[h],       1b                                  \n\t"
1990cabdff1aSopenharmony_ci        : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
1991cabdff1aSopenharmony_ci          [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
1992cabdff1aSopenharmony_ci          [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
1993cabdff1aSopenharmony_ci          [tmp0]"=&r"(tmp[0]),
1994cabdff1aSopenharmony_ci          RESTRICT_ASM_LOW32
1995cabdff1aSopenharmony_ci          [h]"+&r"(h),
1996cabdff1aSopenharmony_ci          [dst]"+&r"(dst),                  [src]"+&r"(src)
1997cabdff1aSopenharmony_ci        : [ff_pw_64]"f"(ff_pw_64.f),
1998cabdff1aSopenharmony_ci          [srcstride]"r"((mips_reg)srcstride),
1999cabdff1aSopenharmony_ci          [dststride]"r"((mips_reg)dststride),
2000cabdff1aSopenharmony_ci          [filter0]"f"(filter0.f),          [filter1]"f"(filter1.f),
2001cabdff1aSopenharmony_ci          [filter2]"f"(filter2.f),          [filter3]"f"(filter3.f),
2002cabdff1aSopenharmony_ci          [filter4]"f"(filter4.f),          [filter5]"f"(filter5.f)
2003cabdff1aSopenharmony_ci        : "memory"
2004cabdff1aSopenharmony_ci    );
2005cabdff1aSopenharmony_ci#else
2006cabdff1aSopenharmony_ci    const uint8_t *filter = subpel_filters[mx - 1];
2007cabdff1aSopenharmony_ci    const uint8_t *cm     = ff_crop_tab + MAX_NEG_CROP;
2008cabdff1aSopenharmony_ci    int x, y;
2009cabdff1aSopenharmony_ci
2010cabdff1aSopenharmony_ci    for (y = 0; y < h; y++) {
2011cabdff1aSopenharmony_ci        for (x = 0; x < 4; x++)
2012cabdff1aSopenharmony_ci            dst[x] = FILTER_6TAP(src, filter, 1);
2013cabdff1aSopenharmony_ci        dst += dststride;
2014cabdff1aSopenharmony_ci        src += srcstride;
2015cabdff1aSopenharmony_ci    }
2016cabdff1aSopenharmony_ci#endif
2017cabdff1aSopenharmony_ci}
2018cabdff1aSopenharmony_ci
2019cabdff1aSopenharmony_civoid ff_put_vp8_epel16_v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
2020cabdff1aSopenharmony_ci        ptrdiff_t srcstride, int h, int mx, int my)
2021cabdff1aSopenharmony_ci{
2022cabdff1aSopenharmony_ci#if 1
2023cabdff1aSopenharmony_ci    const uint64_t *filter = fourtap_subpel_filters[my - 1];
2024cabdff1aSopenharmony_ci    double ftmp[9];
2025cabdff1aSopenharmony_ci    uint32_t tmp[1];
2026cabdff1aSopenharmony_ci    mips_reg src0, src1, dst0;
2027cabdff1aSopenharmony_ci    union av_intfloat64 filter1;
2028cabdff1aSopenharmony_ci    union av_intfloat64 filter2;
2029cabdff1aSopenharmony_ci    union av_intfloat64 filter3;
2030cabdff1aSopenharmony_ci    union av_intfloat64 filter4;
2031cabdff1aSopenharmony_ci    DECLARE_VAR_ALL64;
2032cabdff1aSopenharmony_ci    filter1.i = filter[1];
2033cabdff1aSopenharmony_ci    filter2.i = filter[2];
2034cabdff1aSopenharmony_ci    filter3.i = filter[3];
2035cabdff1aSopenharmony_ci    filter4.i = filter[4];
2036cabdff1aSopenharmony_ci
2037cabdff1aSopenharmony_ci    /*
2038cabdff1aSopenharmony_ci    dst[0] = cm[(filter[2] * src[0] - filter[1] * src[ -srcstride] + filter[3] * src[  srcstride] - filter[4] * src[  2*srcstride] + 64) >> 7];
2039cabdff1aSopenharmony_ci    dst[1] = cm[(filter[2] * src[1] - filter[1] * src[1-srcstride] + filter[3] * src[1+srcstride] - filter[4] * src[1+2*srcstride] + 64) >> 7];
2040cabdff1aSopenharmony_ci    dst[2] = cm[(filter[2] * src[2] - filter[1] * src[2-srcstride] + filter[3] * src[2+srcstride] - filter[4] * src[2+2*srcstride] + 64) >> 7];
2041cabdff1aSopenharmony_ci    dst[3] = cm[(filter[2] * src[3] - filter[1] * src[3-srcstride] + filter[3] * src[3+srcstride] - filter[4] * src[3+2*srcstride] + 64) >> 7];
2042cabdff1aSopenharmony_ci    dst[4] = cm[(filter[2] * src[4] - filter[1] * src[4-srcstride] + filter[3] * src[4+srcstride] - filter[4] * src[4+2*srcstride] + 64) >> 7];
2043cabdff1aSopenharmony_ci    dst[5] = cm[(filter[2] * src[5] - filter[1] * src[5-srcstride] + filter[3] * src[5+srcstride] - filter[4] * src[5+2*srcstride] + 64) >> 7];
2044cabdff1aSopenharmony_ci    dst[6] = cm[(filter[2] * src[6] - filter[1] * src[6-srcstride] + filter[3] * src[6+srcstride] - filter[4] * src[6+2*srcstride] + 64) >> 7];
2045cabdff1aSopenharmony_ci    dst[7] = cm[(filter[2] * src[7] - filter[1] * src[7-srcstride] + filter[3] * src[7+srcstride] - filter[4] * src[7+2*srcstride] + 64) >> 7];
2046cabdff1aSopenharmony_ci
2047cabdff1aSopenharmony_ci    dst[ 8] = cm[(filter[2] * src[ 8] - filter[1] * src[ 8-srcstride] + filter[3] * src[ 8+srcstride] - filter[4] * src[ 8+2*srcstride] + 64) >> 7];
2048cabdff1aSopenharmony_ci    dst[ 9] = cm[(filter[2] * src[ 9] - filter[1] * src[ 9-srcstride] + filter[3] * src[ 9+srcstride] - filter[4] * src[ 9+2*srcstride] + 64) >> 7];
2049cabdff1aSopenharmony_ci    dst[10] = cm[(filter[2] * src[10] - filter[1] * src[10-srcstride] + filter[3] * src[10+srcstride] - filter[4] * src[10+2*srcstride] + 64) >> 7];
2050cabdff1aSopenharmony_ci    dst[11] = cm[(filter[2] * src[11] - filter[1] * src[11-srcstride] + filter[3] * src[11+srcstride] - filter[4] * src[11+2*srcstride] + 64) >> 7];
2051cabdff1aSopenharmony_ci    dst[12] = cm[(filter[2] * src[12] - filter[1] * src[12-srcstride] + filter[3] * src[12+srcstride] - filter[4] * src[12+2*srcstride] + 64) >> 7];
2052cabdff1aSopenharmony_ci    dst[13] = cm[(filter[2] * src[13] - filter[1] * src[13-srcstride] + filter[3] * src[13+srcstride] - filter[4] * src[13+2*srcstride] + 64) >> 7];
2053cabdff1aSopenharmony_ci    dst[14] = cm[(filter[2] * src[14] - filter[1] * src[14-srcstride] + filter[3] * src[14+srcstride] - filter[4] * src[14+2*srcstride] + 64) >> 7];
2054cabdff1aSopenharmony_ci    dst[15] = cm[(filter[2] * src[15] - filter[1] * src[15-srcstride] + filter[3] * src[15+srcstride] - filter[4] * src[15+2*srcstride] + 64) >> 7];
2055cabdff1aSopenharmony_ci    */
2056cabdff1aSopenharmony_ci    __asm__ volatile (
2057cabdff1aSopenharmony_ci        "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
2058cabdff1aSopenharmony_ci        "li         %[tmp0],    0x07                                \n\t"
2059cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp4]                            \n\t"
2060cabdff1aSopenharmony_ci
2061cabdff1aSopenharmony_ci        "1:                                                         \n\t"
2062cabdff1aSopenharmony_ci        // 0 - 7
2063cabdff1aSopenharmony_ci        PUT_VP8_EPEL8_V4_MMI(%[src], %[src1], %[dst], %[srcstride])
2064cabdff1aSopenharmony_ci        PTR_ADDIU  "%[src0],    %[src],         0x08                \n\t"
2065cabdff1aSopenharmony_ci        PTR_ADDIU  "%[dst0],    %[dst],         0x08                \n\t"
2066cabdff1aSopenharmony_ci        // 8 - 15
2067cabdff1aSopenharmony_ci        PUT_VP8_EPEL8_V4_MMI(%[src0], %[src1], %[dst], %[srcstride])
2068cabdff1aSopenharmony_ci
2069cabdff1aSopenharmony_ci        "addiu      %[h],       %[h],           -0x01               \n\t"
2070cabdff1aSopenharmony_ci        PTR_ADDU   "%[src],     %[src],         %[srcstride]        \n\t"
2071cabdff1aSopenharmony_ci        PTR_ADDU   "%[dst],     %[dst],         %[dststride]        \n\t"
2072cabdff1aSopenharmony_ci        "bnez       %[h],       1b                                  \n\t"
2073cabdff1aSopenharmony_ci        : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
2074cabdff1aSopenharmony_ci          [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
2075cabdff1aSopenharmony_ci          [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
2076cabdff1aSopenharmony_ci          [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
2077cabdff1aSopenharmony_ci          [ftmp8]"=&f"(ftmp[8]),
2078cabdff1aSopenharmony_ci          [tmp0]"=&r"(tmp[0]),
2079cabdff1aSopenharmony_ci          RESTRICT_ASM_ALL64
2080cabdff1aSopenharmony_ci          [src0]"=&r"(src0),                [dst0]"=&r"(dst0),
2081cabdff1aSopenharmony_ci          [src1]"=&r"(src1),
2082cabdff1aSopenharmony_ci          [h]"+&r"(h),
2083cabdff1aSopenharmony_ci          [dst]"+&r"(dst),                  [src]"+&r"(src)
2084cabdff1aSopenharmony_ci        : [ff_pw_64]"f"(ff_pw_64.f),
2085cabdff1aSopenharmony_ci          [srcstride]"r"((mips_reg)srcstride),
2086cabdff1aSopenharmony_ci          [dststride]"r"((mips_reg)dststride),
2087cabdff1aSopenharmony_ci          [filter1]"f"(filter1.f),          [filter2]"f"(filter2.f),
2088cabdff1aSopenharmony_ci          [filter3]"f"(filter3.f),          [filter4]"f"(filter4.f)
2089cabdff1aSopenharmony_ci        : "memory"
2090cabdff1aSopenharmony_ci    );
2091cabdff1aSopenharmony_ci#else
2092cabdff1aSopenharmony_ci    const uint8_t *filter = subpel_filters[my - 1];
2093cabdff1aSopenharmony_ci    const uint8_t *cm     = ff_crop_tab + MAX_NEG_CROP;
2094cabdff1aSopenharmony_ci    int x, y;
2095cabdff1aSopenharmony_ci
2096cabdff1aSopenharmony_ci    for (y = 0; y < h; y++) {
2097cabdff1aSopenharmony_ci        for (x = 0; x < 16; x++)
2098cabdff1aSopenharmony_ci            dst[x] = FILTER_4TAP(src, filter, srcstride);
2099cabdff1aSopenharmony_ci        dst += dststride;
2100cabdff1aSopenharmony_ci        src += srcstride;
2101cabdff1aSopenharmony_ci    }
2102cabdff1aSopenharmony_ci#endif
2103cabdff1aSopenharmony_ci}
2104cabdff1aSopenharmony_ci
2105cabdff1aSopenharmony_civoid ff_put_vp8_epel8_v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
2106cabdff1aSopenharmony_ci        ptrdiff_t srcstride, int h, int mx, int my)
2107cabdff1aSopenharmony_ci{
2108cabdff1aSopenharmony_ci#if 1
2109cabdff1aSopenharmony_ci    const uint64_t *filter = fourtap_subpel_filters[my - 1];
2110cabdff1aSopenharmony_ci    double ftmp[9];
2111cabdff1aSopenharmony_ci    uint32_t tmp[1];
2112cabdff1aSopenharmony_ci    mips_reg src1;
2113cabdff1aSopenharmony_ci    union av_intfloat64 filter1;
2114cabdff1aSopenharmony_ci    union av_intfloat64 filter2;
2115cabdff1aSopenharmony_ci    union av_intfloat64 filter3;
2116cabdff1aSopenharmony_ci    union av_intfloat64 filter4;
2117cabdff1aSopenharmony_ci    DECLARE_VAR_ALL64;
2118cabdff1aSopenharmony_ci    filter1.i = filter[1];
2119cabdff1aSopenharmony_ci    filter2.i = filter[2];
2120cabdff1aSopenharmony_ci    filter3.i = filter[3];
2121cabdff1aSopenharmony_ci    filter4.i = filter[4];
2122cabdff1aSopenharmony_ci
2123cabdff1aSopenharmony_ci    /*
2124cabdff1aSopenharmony_ci    dst[0] = cm[(filter[2] * src[0] - filter[1] * src[ -srcstride] + filter[3] * src[  srcstride] - filter[4] * src[  2*srcstride] + 64) >> 7];
2125cabdff1aSopenharmony_ci    dst[1] = cm[(filter[2] * src[1] - filter[1] * src[1-srcstride] + filter[3] * src[1+srcstride] - filter[4] * src[1+2*srcstride] + 64) >> 7];
2126cabdff1aSopenharmony_ci    dst[2] = cm[(filter[2] * src[2] - filter[1] * src[2-srcstride] + filter[3] * src[2+srcstride] - filter[4] * src[2+2*srcstride] + 64) >> 7];
2127cabdff1aSopenharmony_ci    dst[3] = cm[(filter[2] * src[3] - filter[1] * src[3-srcstride] + filter[3] * src[3+srcstride] - filter[4] * src[3+2*srcstride] + 64) >> 7];
2128cabdff1aSopenharmony_ci    dst[4] = cm[(filter[2] * src[4] - filter[1] * src[4-srcstride] + filter[3] * src[4+srcstride] - filter[4] * src[4+2*srcstride] + 64) >> 7];
2129cabdff1aSopenharmony_ci    dst[5] = cm[(filter[2] * src[5] - filter[1] * src[5-srcstride] + filter[3] * src[5+srcstride] - filter[4] * src[5+2*srcstride] + 64) >> 7];
2130cabdff1aSopenharmony_ci    dst[6] = cm[(filter[2] * src[6] - filter[1] * src[6-srcstride] + filter[3] * src[6+srcstride] - filter[4] * src[6+2*srcstride] + 64) >> 7];
2131cabdff1aSopenharmony_ci    dst[7] = cm[(filter[2] * src[7] - filter[1] * src[7-srcstride] + filter[3] * src[7+srcstride] - filter[4] * src[7+2*srcstride] + 64) >> 7];
2132cabdff1aSopenharmony_ci    */
2133cabdff1aSopenharmony_ci    __asm__ volatile (
2134cabdff1aSopenharmony_ci        "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
2135cabdff1aSopenharmony_ci        "li         %[tmp0],    0x07                                \n\t"
2136cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp4]                            \n\t"
2137cabdff1aSopenharmony_ci
2138cabdff1aSopenharmony_ci        "1:                                                         \n\t"
2139cabdff1aSopenharmony_ci        PUT_VP8_EPEL8_V4_MMI(%[src], %[src1], %[dst], %[srcstride])
2140cabdff1aSopenharmony_ci
2141cabdff1aSopenharmony_ci        "addiu      %[h],       %[h],           -0x01               \n\t"
2142cabdff1aSopenharmony_ci        PTR_ADDU   "%[src],     %[src],         %[srcstride]        \n\t"
2143cabdff1aSopenharmony_ci        PTR_ADDU   "%[dst],     %[dst],         %[dststride]        \n\t"
2144cabdff1aSopenharmony_ci        "bnez       %[h],       1b                                  \n\t"
2145cabdff1aSopenharmony_ci        : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
2146cabdff1aSopenharmony_ci          [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
2147cabdff1aSopenharmony_ci          [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
2148cabdff1aSopenharmony_ci          [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
2149cabdff1aSopenharmony_ci          [ftmp8]"=&f"(ftmp[8]),
2150cabdff1aSopenharmony_ci          [tmp0]"=&r"(tmp[0]),
2151cabdff1aSopenharmony_ci          RESTRICT_ASM_ALL64
2152cabdff1aSopenharmony_ci          [src1]"=&r"(src1),
2153cabdff1aSopenharmony_ci          [h]"+&r"(h),
2154cabdff1aSopenharmony_ci          [dst]"+&r"(dst),                  [src]"+&r"(src)
2155cabdff1aSopenharmony_ci        : [ff_pw_64]"f"(ff_pw_64.f),
2156cabdff1aSopenharmony_ci          [srcstride]"r"((mips_reg)srcstride),
2157cabdff1aSopenharmony_ci          [dststride]"r"((mips_reg)dststride),
2158cabdff1aSopenharmony_ci          [filter1]"f"(filter1.f),          [filter2]"f"(filter2.f),
2159cabdff1aSopenharmony_ci          [filter3]"f"(filter3.f),          [filter4]"f"(filter4.f)
2160cabdff1aSopenharmony_ci        : "memory"
2161cabdff1aSopenharmony_ci    );
2162cabdff1aSopenharmony_ci#else
2163cabdff1aSopenharmony_ci    const uint8_t *filter = subpel_filters[my - 1];
2164cabdff1aSopenharmony_ci    const uint8_t *cm     = ff_crop_tab + MAX_NEG_CROP;
2165cabdff1aSopenharmony_ci    int x, y;
2166cabdff1aSopenharmony_ci
2167cabdff1aSopenharmony_ci    for (y = 0; y < h; y++) {
2168cabdff1aSopenharmony_ci        for (x = 0; x < 8; x++)
2169cabdff1aSopenharmony_ci            dst[x] = FILTER_4TAP(src, filter, srcstride);
2170cabdff1aSopenharmony_ci        dst += dststride;
2171cabdff1aSopenharmony_ci        src += srcstride;
2172cabdff1aSopenharmony_ci    }
2173cabdff1aSopenharmony_ci#endif
2174cabdff1aSopenharmony_ci}
2175cabdff1aSopenharmony_ci
2176cabdff1aSopenharmony_civoid ff_put_vp8_epel4_v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
2177cabdff1aSopenharmony_ci        ptrdiff_t srcstride, int h, int mx, int my)
2178cabdff1aSopenharmony_ci{
2179cabdff1aSopenharmony_ci#if 1
2180cabdff1aSopenharmony_ci    const uint64_t *filter = fourtap_subpel_filters[my - 1];
2181cabdff1aSopenharmony_ci    double ftmp[6];
2182cabdff1aSopenharmony_ci    uint32_t tmp[1];
2183cabdff1aSopenharmony_ci    mips_reg src1;
2184cabdff1aSopenharmony_ci    union av_intfloat64 filter1;
2185cabdff1aSopenharmony_ci    union av_intfloat64 filter2;
2186cabdff1aSopenharmony_ci    union av_intfloat64 filter3;
2187cabdff1aSopenharmony_ci    union av_intfloat64 filter4;
2188cabdff1aSopenharmony_ci    DECLARE_VAR_LOW32;
2189cabdff1aSopenharmony_ci    filter1.i = filter[1];
2190cabdff1aSopenharmony_ci    filter2.i = filter[2];
2191cabdff1aSopenharmony_ci    filter3.i = filter[3];
2192cabdff1aSopenharmony_ci    filter4.i = filter[4];
2193cabdff1aSopenharmony_ci
2194cabdff1aSopenharmony_ci    /*
2195cabdff1aSopenharmony_ci    dst[0] = cm[(filter[2] * src[0] - filter[1] * src[ -srcstride] + filter[3] * src[  srcstride] - filter[4] * src[  2*srcstride] + 64) >> 7];
2196cabdff1aSopenharmony_ci    dst[1] = cm[(filter[2] * src[1] - filter[1] * src[1-srcstride] + filter[3] * src[1+srcstride] - filter[4] * src[1+2*srcstride] + 64) >> 7];
2197cabdff1aSopenharmony_ci    dst[2] = cm[(filter[2] * src[2] - filter[1] * src[2-srcstride] + filter[3] * src[2+srcstride] - filter[4] * src[2+2*srcstride] + 64) >> 7];
2198cabdff1aSopenharmony_ci    dst[3] = cm[(filter[2] * src[3] - filter[1] * src[3-srcstride] + filter[3] * src[3+srcstride] - filter[4] * src[3+2*srcstride] + 64) >> 7];
2199cabdff1aSopenharmony_ci    */
2200cabdff1aSopenharmony_ci    __asm__ volatile (
2201cabdff1aSopenharmony_ci        "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
2202cabdff1aSopenharmony_ci        "li         %[tmp0],    0x07                                \n\t"
2203cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp4]                            \n\t"
2204cabdff1aSopenharmony_ci
2205cabdff1aSopenharmony_ci        "1:                                                         \n\t"
2206cabdff1aSopenharmony_ci        PUT_VP8_EPEL4_V4_MMI(%[src], %[src1], %[dst], %[srcstride])
2207cabdff1aSopenharmony_ci
2208cabdff1aSopenharmony_ci        "addiu      %[h],       %[h],           -0x01               \n\t"
2209cabdff1aSopenharmony_ci        PTR_ADDU   "%[src],     %[src],         %[srcstride]        \n\t"
2210cabdff1aSopenharmony_ci        PTR_ADDU   "%[dst],     %[dst],         %[dststride]        \n\t"
2211cabdff1aSopenharmony_ci        "bnez       %[h],       1b                                  \n\t"
2212cabdff1aSopenharmony_ci        : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
2213cabdff1aSopenharmony_ci          [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
2214cabdff1aSopenharmony_ci          [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
2215cabdff1aSopenharmony_ci          [tmp0]"=&r"(tmp[0]),
2216cabdff1aSopenharmony_ci          RESTRICT_ASM_LOW32
2217cabdff1aSopenharmony_ci          [src1]"=&r"(src1),
2218cabdff1aSopenharmony_ci          [h]"+&r"(h),
2219cabdff1aSopenharmony_ci          [dst]"+&r"(dst),                  [src]"+&r"(src)
2220cabdff1aSopenharmony_ci        : [ff_pw_64]"f"(ff_pw_64.f),
2221cabdff1aSopenharmony_ci          [srcstride]"r"((mips_reg)srcstride),
2222cabdff1aSopenharmony_ci          [dststride]"r"((mips_reg)dststride),
2223cabdff1aSopenharmony_ci          [filter1]"f"(filter1.f),          [filter2]"f"(filter2.f),
2224cabdff1aSopenharmony_ci          [filter3]"f"(filter3.f),          [filter4]"f"(filter4.f)
2225cabdff1aSopenharmony_ci        : "memory"
2226cabdff1aSopenharmony_ci    );
2227cabdff1aSopenharmony_ci#else
2228cabdff1aSopenharmony_ci    const uint8_t *filter = subpel_filters[my - 1];
2229cabdff1aSopenharmony_ci    const uint8_t *cm     = ff_crop_tab + MAX_NEG_CROP;
2230cabdff1aSopenharmony_ci    int x, y;
2231cabdff1aSopenharmony_ci
2232cabdff1aSopenharmony_ci    for (y = 0; y < h; y++) {
2233cabdff1aSopenharmony_ci        for (x = 0; x < 4; x++)
2234cabdff1aSopenharmony_ci            dst[x] = FILTER_4TAP(src, filter, srcstride);
2235cabdff1aSopenharmony_ci        dst += dststride;
2236cabdff1aSopenharmony_ci        src += srcstride;
2237cabdff1aSopenharmony_ci    }
2238cabdff1aSopenharmony_ci#endif
2239cabdff1aSopenharmony_ci}
2240cabdff1aSopenharmony_ci
2241cabdff1aSopenharmony_civoid ff_put_vp8_epel16_v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
2242cabdff1aSopenharmony_ci        ptrdiff_t srcstride, int h, int mx, int my)
2243cabdff1aSopenharmony_ci{
2244cabdff1aSopenharmony_ci#if 1
2245cabdff1aSopenharmony_ci    const uint64_t *filter = fourtap_subpel_filters[my - 1];
2246cabdff1aSopenharmony_ci    double ftmp[9];
2247cabdff1aSopenharmony_ci    uint32_t tmp[1];
2248cabdff1aSopenharmony_ci    mips_reg src0, src1, dst0;
2249cabdff1aSopenharmony_ci    union av_intfloat64 filter0;
2250cabdff1aSopenharmony_ci    union av_intfloat64 filter1;
2251cabdff1aSopenharmony_ci    union av_intfloat64 filter2;
2252cabdff1aSopenharmony_ci    union av_intfloat64 filter3;
2253cabdff1aSopenharmony_ci    union av_intfloat64 filter4;
2254cabdff1aSopenharmony_ci    union av_intfloat64 filter5;
2255cabdff1aSopenharmony_ci    DECLARE_VAR_ALL64;
2256cabdff1aSopenharmony_ci    filter0.i = filter[0];
2257cabdff1aSopenharmony_ci    filter1.i = filter[1];
2258cabdff1aSopenharmony_ci    filter2.i = filter[2];
2259cabdff1aSopenharmony_ci    filter3.i = filter[3];
2260cabdff1aSopenharmony_ci    filter4.i = filter[4];
2261cabdff1aSopenharmony_ci    filter5.i = filter[5];
2262cabdff1aSopenharmony_ci
2263cabdff1aSopenharmony_ci    /*
2264cabdff1aSopenharmony_ci    dst[0] = cm[(filter[2]*src[0] - filter[1]*src[0-srcstride] + filter[0]*src[0-2*srcstride] + filter[3]*src[0+srcstride] - filter[4]*src[0+2*srcstride] + filter[5]*src[0+3*srcstride] + 64) >> 7];
2265cabdff1aSopenharmony_ci    dst[1] = cm[(filter[2]*src[1] - filter[1]*src[1-srcstride] + filter[0]*src[1-2*srcstride] + filter[3]*src[1+srcstride] - filter[4]*src[1+2*srcstride] + filter[5]*src[1+3*srcstride] + 64) >> 7];
2266cabdff1aSopenharmony_ci    dst[2] = cm[(filter[2]*src[2] - filter[1]*src[2-srcstride] + filter[0]*src[2-2*srcstride] + filter[3]*src[2+srcstride] - filter[4]*src[2+2*srcstride] + filter[5]*src[2+3*srcstride] + 64) >> 7];
2267cabdff1aSopenharmony_ci    dst[3] = cm[(filter[2]*src[3] - filter[1]*src[3-srcstride] + filter[0]*src[3-2*srcstride] + filter[3]*src[3+srcstride] - filter[4]*src[3+2*srcstride] + filter[5]*src[3+3*srcstride] + 64) >> 7];
2268cabdff1aSopenharmony_ci    dst[4] = cm[(filter[2]*src[4] - filter[1]*src[4-srcstride] + filter[0]*src[4-2*srcstride] + filter[3]*src[4+srcstride] - filter[4]*src[4+2*srcstride] + filter[5]*src[4+3*srcstride] + 64) >> 7];
2269cabdff1aSopenharmony_ci    dst[5] = cm[(filter[2]*src[5] - filter[1]*src[5-srcstride] + filter[0]*src[5-2*srcstride] + filter[3]*src[5+srcstride] - filter[4]*src[5+2*srcstride] + filter[5]*src[5+3*srcstride] + 64) >> 7];
2270cabdff1aSopenharmony_ci    dst[6] = cm[(filter[2]*src[6] - filter[1]*src[6-srcstride] + filter[0]*src[6-2*srcstride] + filter[3]*src[6+srcstride] - filter[4]*src[6+2*srcstride] + filter[5]*src[6+3*srcstride] + 64) >> 7];
2271cabdff1aSopenharmony_ci    dst[7] = cm[(filter[2]*src[7] - filter[1]*src[7-srcstride] + filter[0]*src[7-2*srcstride] + filter[3]*src[7+srcstride] - filter[4]*src[7+2*srcstride] + filter[5]*src[7+3*srcstride] + 64) >> 7];
2272cabdff1aSopenharmony_ci
2273cabdff1aSopenharmony_ci    dst[ 8] = cm[(filter[2]*src[ 8] - filter[1]*src[ 8-srcstride] + filter[0]*src[ 8-2*srcstride] + filter[3]*src[ 8+srcstride] - filter[4]*src[ 8+2*srcstride] + filter[5]*src[ 8+3*srcstride] + 64) >> 7];
2274cabdff1aSopenharmony_ci    dst[ 9] = cm[(filter[2]*src[ 9] - filter[1]*src[ 9-srcstride] + filter[0]*src[ 9-2*srcstride] + filter[3]*src[ 9+srcstride] - filter[4]*src[ 9+2*srcstride] + filter[5]*src[ 9+3*srcstride] + 64) >> 7];
2275cabdff1aSopenharmony_ci    dst[10] = cm[(filter[2]*src[10] - filter[1]*src[10-srcstride] + filter[0]*src[10-2*srcstride] + filter[3]*src[10+srcstride] - filter[4]*src[10+2*srcstride] + filter[5]*src[10+3*srcstride] + 64) >> 7];
2276cabdff1aSopenharmony_ci    dst[11] = cm[(filter[2]*src[11] - filter[1]*src[11-srcstride] + filter[0]*src[11-2*srcstride] + filter[3]*src[11+srcstride] - filter[4]*src[11+2*srcstride] + filter[5]*src[11+3*srcstride] + 64) >> 7];
2277cabdff1aSopenharmony_ci    dst[12] = cm[(filter[2]*src[12] - filter[1]*src[12-srcstride] + filter[0]*src[12-2*srcstride] + filter[3]*src[12+srcstride] - filter[4]*src[12+2*srcstride] + filter[5]*src[12+3*srcstride] + 64) >> 7];
2278cabdff1aSopenharmony_ci    dst[13] = cm[(filter[2]*src[13] - filter[1]*src[13-srcstride] + filter[0]*src[13-2*srcstride] + filter[3]*src[13+srcstride] - filter[4]*src[13+2*srcstride] + filter[5]*src[13+3*srcstride] + 64) >> 7];
2279cabdff1aSopenharmony_ci    dst[14] = cm[(filter[2]*src[14] - filter[1]*src[14-srcstride] + filter[0]*src[14-2*srcstride] + filter[3]*src[14+srcstride] - filter[4]*src[14+2*srcstride] + filter[5]*src[14+3*srcstride] + 64) >> 7];
2280cabdff1aSopenharmony_ci    dst[15] = cm[(filter[2]*src[15] - filter[1]*src[15-srcstride] + filter[0]*src[15-2*srcstride] + filter[3]*src[15+srcstride] - filter[4]*src[15+2*srcstride] + filter[5]*src[15+3*srcstride] + 64) >> 7];
2281cabdff1aSopenharmony_ci    */
2282cabdff1aSopenharmony_ci    __asm__ volatile (
2283cabdff1aSopenharmony_ci        "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
2284cabdff1aSopenharmony_ci        "li         %[tmp0],    0x07                                \n\t"
2285cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp4]                            \n\t"
2286cabdff1aSopenharmony_ci
2287cabdff1aSopenharmony_ci        "1:                                                         \n\t"
2288cabdff1aSopenharmony_ci        // 0 - 7
2289cabdff1aSopenharmony_ci        PUT_VP8_EPEL8_V6_MMI(%[src], %[src1], %[dst], %[srcstride])
2290cabdff1aSopenharmony_ci        PTR_ADDIU  "%[src0],    %[src],         0x08                \n\t"
2291cabdff1aSopenharmony_ci        PTR_ADDIU  "%[dst0],    %[dst],         0x08                \n\t"
2292cabdff1aSopenharmony_ci        // 8 - 15
2293cabdff1aSopenharmony_ci        PUT_VP8_EPEL8_V6_MMI(%[src0], %[src1], %[dst0], %[srcstride])
2294cabdff1aSopenharmony_ci
2295cabdff1aSopenharmony_ci        "addiu      %[h],       %[h],           -0x01               \n\t"
2296cabdff1aSopenharmony_ci        PTR_ADDU   "%[src],     %[src],         %[srcstride]        \n\t"
2297cabdff1aSopenharmony_ci        PTR_ADDU   "%[dst],     %[dst],         %[dststride]        \n\t"
2298cabdff1aSopenharmony_ci        "bnez       %[h],       1b                                  \n\t"
2299cabdff1aSopenharmony_ci        : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
2300cabdff1aSopenharmony_ci          [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
2301cabdff1aSopenharmony_ci          [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
2302cabdff1aSopenharmony_ci          [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
2303cabdff1aSopenharmony_ci          [ftmp8]"=&f"(ftmp[8]),
2304cabdff1aSopenharmony_ci          [tmp0]"=&r"(tmp[0]),
2305cabdff1aSopenharmony_ci          RESTRICT_ASM_ALL64
2306cabdff1aSopenharmony_ci          [src0]"=&r"(src0),                [dst0]"=&r"(dst0),
2307cabdff1aSopenharmony_ci          [src1]"=&r"(src1),
2308cabdff1aSopenharmony_ci          [h]"+&r"(h),
2309cabdff1aSopenharmony_ci          [dst]"+&r"(dst),                  [src]"+&r"(src)
2310cabdff1aSopenharmony_ci        : [ff_pw_64]"f"(ff_pw_64.f),
2311cabdff1aSopenharmony_ci          [srcstride]"r"((mips_reg)srcstride),
2312cabdff1aSopenharmony_ci          [dststride]"r"((mips_reg)dststride),
2313cabdff1aSopenharmony_ci          [filter0]"f"(filter0.f),          [filter1]"f"(filter1.f),
2314cabdff1aSopenharmony_ci          [filter2]"f"(filter2.f),          [filter3]"f"(filter3.f),
2315cabdff1aSopenharmony_ci          [filter4]"f"(filter4.f),          [filter5]"f"(filter5.f)
2316cabdff1aSopenharmony_ci        : "memory"
2317cabdff1aSopenharmony_ci    );
2318cabdff1aSopenharmony_ci#else
2319cabdff1aSopenharmony_ci    const uint8_t *filter = subpel_filters[my - 1];
2320cabdff1aSopenharmony_ci    const uint8_t *cm     = ff_crop_tab + MAX_NEG_CROP;
2321cabdff1aSopenharmony_ci    int x, y;
2322cabdff1aSopenharmony_ci
2323cabdff1aSopenharmony_ci    for (y = 0; y < h; y++) {
2324cabdff1aSopenharmony_ci        for (x = 0; x < 16; x++)
2325cabdff1aSopenharmony_ci            dst[x] = FILTER_6TAP(src, filter, srcstride);
2326cabdff1aSopenharmony_ci        dst += dststride;
2327cabdff1aSopenharmony_ci        src += srcstride;
2328cabdff1aSopenharmony_ci    }
2329cabdff1aSopenharmony_ci#endif
2330cabdff1aSopenharmony_ci}
2331cabdff1aSopenharmony_ci
2332cabdff1aSopenharmony_civoid ff_put_vp8_epel8_v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
2333cabdff1aSopenharmony_ci        ptrdiff_t srcstride, int h, int mx, int my)
2334cabdff1aSopenharmony_ci{
2335cabdff1aSopenharmony_ci#if 1
2336cabdff1aSopenharmony_ci    const uint64_t *filter = fourtap_subpel_filters[my - 1];
2337cabdff1aSopenharmony_ci    double ftmp[9];
2338cabdff1aSopenharmony_ci    uint32_t tmp[1];
2339cabdff1aSopenharmony_ci    mips_reg src1;
2340cabdff1aSopenharmony_ci    union av_intfloat64 filter0;
2341cabdff1aSopenharmony_ci    union av_intfloat64 filter1;
2342cabdff1aSopenharmony_ci    union av_intfloat64 filter2;
2343cabdff1aSopenharmony_ci    union av_intfloat64 filter3;
2344cabdff1aSopenharmony_ci    union av_intfloat64 filter4;
2345cabdff1aSopenharmony_ci    union av_intfloat64 filter5;
2346cabdff1aSopenharmony_ci    DECLARE_VAR_ALL64;
2347cabdff1aSopenharmony_ci    filter0.i = filter[0];
2348cabdff1aSopenharmony_ci    filter1.i = filter[1];
2349cabdff1aSopenharmony_ci    filter2.i = filter[2];
2350cabdff1aSopenharmony_ci    filter3.i = filter[3];
2351cabdff1aSopenharmony_ci    filter4.i = filter[4];
2352cabdff1aSopenharmony_ci    filter5.i = filter[5];
2353cabdff1aSopenharmony_ci
2354cabdff1aSopenharmony_ci    /*
2355cabdff1aSopenharmony_ci    dst[0] = cm[(filter[2]*src[0] - filter[1]*src[0-srcstride] + filter[0]*src[0-2*srcstride] + filter[3]*src[0+srcstride] - filter[4]*src[0+2*srcstride] + filter[5]*src[0+3*srcstride] + 64) >> 7];
2356cabdff1aSopenharmony_ci    dst[1] = cm[(filter[2]*src[1] - filter[1]*src[1-srcstride] + filter[0]*src[1-2*srcstride] + filter[3]*src[1+srcstride] - filter[4]*src[1+2*srcstride] + filter[5]*src[1+3*srcstride] + 64) >> 7];
2357cabdff1aSopenharmony_ci    dst[2] = cm[(filter[2]*src[2] - filter[1]*src[2-srcstride] + filter[0]*src[2-2*srcstride] + filter[3]*src[2+srcstride] - filter[4]*src[2+2*srcstride] + filter[5]*src[2+3*srcstride] + 64) >> 7];
2358cabdff1aSopenharmony_ci    dst[3] = cm[(filter[2]*src[3] - filter[1]*src[3-srcstride] + filter[0]*src[3-2*srcstride] + filter[3]*src[3+srcstride] - filter[4]*src[3+2*srcstride] + filter[5]*src[3+3*srcstride] + 64) >> 7];
2359cabdff1aSopenharmony_ci    dst[4] = cm[(filter[2]*src[4] - filter[1]*src[4-srcstride] + filter[0]*src[4-2*srcstride] + filter[3]*src[4+srcstride] - filter[4]*src[4+2*srcstride] + filter[5]*src[4+3*srcstride] + 64) >> 7];
2360cabdff1aSopenharmony_ci    dst[5] = cm[(filter[2]*src[5] - filter[1]*src[5-srcstride] + filter[0]*src[5-2*srcstride] + filter[3]*src[5+srcstride] - filter[4]*src[5+2*srcstride] + filter[5]*src[5+3*srcstride] + 64) >> 7];
2361cabdff1aSopenharmony_ci    dst[6] = cm[(filter[2]*src[6] - filter[1]*src[6-srcstride] + filter[0]*src[6-2*srcstride] + filter[3]*src[6+srcstride] - filter[4]*src[6+2*srcstride] + filter[5]*src[6+3*srcstride] + 64) >> 7];
2362cabdff1aSopenharmony_ci    dst[7] = cm[(filter[2]*src[7] - filter[1]*src[7-srcstride] + filter[0]*src[7-2*srcstride] + filter[3]*src[7+srcstride] - filter[4]*src[7+2*srcstride] + filter[5]*src[7+3*srcstride] + 64) >> 7];
2363cabdff1aSopenharmony_ci    */
2364cabdff1aSopenharmony_ci    __asm__ volatile (
2365cabdff1aSopenharmony_ci        "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
2366cabdff1aSopenharmony_ci        "li         %[tmp0],    0x07                                \n\t"
2367cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp4]                            \n\t"
2368cabdff1aSopenharmony_ci
2369cabdff1aSopenharmony_ci        "1:                                                         \n\t"
2370cabdff1aSopenharmony_ci        PUT_VP8_EPEL8_V6_MMI(%[src], %[src1], %[dst], %[srcstride])
2371cabdff1aSopenharmony_ci
2372cabdff1aSopenharmony_ci        "addiu      %[h],       %[h],           -0x01               \n\t"
2373cabdff1aSopenharmony_ci        PTR_ADDU   "%[src],     %[src],         %[srcstride]        \n\t"
2374cabdff1aSopenharmony_ci        PTR_ADDU   "%[dst],     %[dst],         %[dststride]        \n\t"
2375cabdff1aSopenharmony_ci        "bnez       %[h],       1b                                  \n\t"
2376cabdff1aSopenharmony_ci        : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
2377cabdff1aSopenharmony_ci          [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
2378cabdff1aSopenharmony_ci          [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
2379cabdff1aSopenharmony_ci          [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
2380cabdff1aSopenharmony_ci          [ftmp8]"=&f"(ftmp[8]),
2381cabdff1aSopenharmony_ci          [tmp0]"=&r"(tmp[0]),
2382cabdff1aSopenharmony_ci          RESTRICT_ASM_ALL64
2383cabdff1aSopenharmony_ci          [src1]"=&r"(src1),
2384cabdff1aSopenharmony_ci          [h]"+&r"(h),
2385cabdff1aSopenharmony_ci          [dst]"+&r"(dst),                  [src]"+&r"(src)
2386cabdff1aSopenharmony_ci        : [ff_pw_64]"f"(ff_pw_64.f),
2387cabdff1aSopenharmony_ci          [srcstride]"r"((mips_reg)srcstride),
2388cabdff1aSopenharmony_ci          [dststride]"r"((mips_reg)dststride),
2389cabdff1aSopenharmony_ci          [filter0]"f"(filter0.f),          [filter1]"f"(filter1.f),
2390cabdff1aSopenharmony_ci          [filter2]"f"(filter2.f),          [filter3]"f"(filter3.f),
2391cabdff1aSopenharmony_ci          [filter4]"f"(filter4.f),          [filter5]"f"(filter5.f)
2392cabdff1aSopenharmony_ci        : "memory"
2393cabdff1aSopenharmony_ci    );
2394cabdff1aSopenharmony_ci#else
2395cabdff1aSopenharmony_ci    const uint8_t *filter = subpel_filters[my - 1];
2396cabdff1aSopenharmony_ci    const uint8_t *cm     = ff_crop_tab + MAX_NEG_CROP;
2397cabdff1aSopenharmony_ci    int x, y;
2398cabdff1aSopenharmony_ci
2399cabdff1aSopenharmony_ci    for (y = 0; y < h; y++) {
2400cabdff1aSopenharmony_ci        for (x = 0; x < 8; x++)
2401cabdff1aSopenharmony_ci            dst[x] = FILTER_6TAP(src, filter, srcstride);
2402cabdff1aSopenharmony_ci        dst += dststride;
2403cabdff1aSopenharmony_ci        src += srcstride;
2404cabdff1aSopenharmony_ci    }
2405cabdff1aSopenharmony_ci#endif
2406cabdff1aSopenharmony_ci}
2407cabdff1aSopenharmony_ci
2408cabdff1aSopenharmony_civoid ff_put_vp8_epel4_v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
2409cabdff1aSopenharmony_ci        ptrdiff_t srcstride, int h, int mx, int my)
2410cabdff1aSopenharmony_ci{
2411cabdff1aSopenharmony_ci#if 1
2412cabdff1aSopenharmony_ci    const uint64_t *filter = fourtap_subpel_filters[my - 1];
2413cabdff1aSopenharmony_ci    double ftmp[6];
2414cabdff1aSopenharmony_ci    uint32_t tmp[1];
2415cabdff1aSopenharmony_ci    mips_reg src1;
2416cabdff1aSopenharmony_ci    union av_intfloat64 filter0;
2417cabdff1aSopenharmony_ci    union av_intfloat64 filter1;
2418cabdff1aSopenharmony_ci    union av_intfloat64 filter2;
2419cabdff1aSopenharmony_ci    union av_intfloat64 filter3;
2420cabdff1aSopenharmony_ci    union av_intfloat64 filter4;
2421cabdff1aSopenharmony_ci    union av_intfloat64 filter5;
2422cabdff1aSopenharmony_ci    DECLARE_VAR_LOW32;
2423cabdff1aSopenharmony_ci    filter0.i = filter[0];
2424cabdff1aSopenharmony_ci    filter1.i = filter[1];
2425cabdff1aSopenharmony_ci    filter2.i = filter[2];
2426cabdff1aSopenharmony_ci    filter3.i = filter[3];
2427cabdff1aSopenharmony_ci    filter4.i = filter[4];
2428cabdff1aSopenharmony_ci    filter5.i = filter[5];
2429cabdff1aSopenharmony_ci
2430cabdff1aSopenharmony_ci    /*
2431cabdff1aSopenharmony_ci    dst[0] = cm[(filter[2]*src[0] - filter[1]*src[0-srcstride] + filter[0]*src[0-2*srcstride] + filter[3]*src[0+srcstride] - filter[4]*src[0+2*srcstride] + filter[5]*src[0+3*srcstride] + 64) >> 7];
2432cabdff1aSopenharmony_ci    dst[1] = cm[(filter[2]*src[1] - filter[1]*src[1-srcstride] + filter[0]*src[1-2*srcstride] + filter[3]*src[1+srcstride] - filter[4]*src[1+2*srcstride] + filter[5]*src[1+3*srcstride] + 64) >> 7];
2433cabdff1aSopenharmony_ci    dst[2] = cm[(filter[2]*src[2] - filter[1]*src[2-srcstride] + filter[0]*src[2-2*srcstride] + filter[3]*src[2+srcstride] - filter[4]*src[2+2*srcstride] + filter[5]*src[2+3*srcstride] + 64) >> 7];
2434cabdff1aSopenharmony_ci    dst[3] = cm[(filter[2]*src[3] - filter[1]*src[3-srcstride] + filter[0]*src[3-2*srcstride] + filter[3]*src[3+srcstride] - filter[4]*src[3+2*srcstride] + filter[5]*src[3+3*srcstride] + 64) >> 7];
2435cabdff1aSopenharmony_ci    */
2436cabdff1aSopenharmony_ci    __asm__ volatile (
2437cabdff1aSopenharmony_ci        "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
2438cabdff1aSopenharmony_ci        "li         %[tmp0],    0x07                                \n\t"
2439cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp4]                            \n\t"
2440cabdff1aSopenharmony_ci
2441cabdff1aSopenharmony_ci        "1:                                                         \n\t"
2442cabdff1aSopenharmony_ci        PUT_VP8_EPEL4_V6_MMI(%[src], %[src1], %[dst], %[srcstride])
2443cabdff1aSopenharmony_ci
2444cabdff1aSopenharmony_ci        "addiu      %[h],       %[h],           -0x01               \n\t"
2445cabdff1aSopenharmony_ci        PTR_ADDU   "%[src],     %[src],         %[srcstride]        \n\t"
2446cabdff1aSopenharmony_ci        PTR_ADDU   "%[dst],     %[dst],         %[dststride]        \n\t"
2447cabdff1aSopenharmony_ci        "bnez       %[h],       1b                                  \n\t"
2448cabdff1aSopenharmony_ci        : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
2449cabdff1aSopenharmony_ci          [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
2450cabdff1aSopenharmony_ci          [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
2451cabdff1aSopenharmony_ci          [tmp0]"=&r"(tmp[0]),
2452cabdff1aSopenharmony_ci          RESTRICT_ASM_LOW32
2453cabdff1aSopenharmony_ci          [src1]"=&r"(src1),
2454cabdff1aSopenharmony_ci          [h]"+&r"(h),
2455cabdff1aSopenharmony_ci          [dst]"+&r"(dst),                  [src]"+&r"(src)
2456cabdff1aSopenharmony_ci        : [ff_pw_64]"f"(ff_pw_64.f),
2457cabdff1aSopenharmony_ci          [srcstride]"r"((mips_reg)srcstride),
2458cabdff1aSopenharmony_ci          [dststride]"r"((mips_reg)dststride),
2459cabdff1aSopenharmony_ci          [filter0]"f"(filter0.f),          [filter1]"f"(filter1.f),
2460cabdff1aSopenharmony_ci          [filter2]"f"(filter2.f),          [filter3]"f"(filter3.f),
2461cabdff1aSopenharmony_ci          [filter4]"f"(filter4.f),          [filter5]"f"(filter5.f)
2462cabdff1aSopenharmony_ci        : "memory"
2463cabdff1aSopenharmony_ci    );
2464cabdff1aSopenharmony_ci#else
2465cabdff1aSopenharmony_ci    const uint8_t *filter = subpel_filters[my - 1];
2466cabdff1aSopenharmony_ci    const uint8_t *cm     = ff_crop_tab + MAX_NEG_CROP;
2467cabdff1aSopenharmony_ci    int x, y;
2468cabdff1aSopenharmony_ci
2469cabdff1aSopenharmony_ci    for (y = 0; y < h; y++) {
2470cabdff1aSopenharmony_ci        for (x = 0; x < 4; x++)
2471cabdff1aSopenharmony_ci            dst[x] = FILTER_6TAP(src, filter, srcstride);
2472cabdff1aSopenharmony_ci        dst += dststride;
2473cabdff1aSopenharmony_ci        src += srcstride;
2474cabdff1aSopenharmony_ci    }
2475cabdff1aSopenharmony_ci#endif
2476cabdff1aSopenharmony_ci}
2477cabdff1aSopenharmony_ci
2478cabdff1aSopenharmony_civoid ff_put_vp8_epel16_h4v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
2479cabdff1aSopenharmony_ci        ptrdiff_t srcstride, int h, int mx, int my)
2480cabdff1aSopenharmony_ci{
2481cabdff1aSopenharmony_ci#if 1
2482cabdff1aSopenharmony_ci    DECLARE_ALIGNED(8, uint8_t, tmp_array[560]);
2483cabdff1aSopenharmony_ci    uint8_t *tmp = tmp_array;
2484cabdff1aSopenharmony_ci
2485cabdff1aSopenharmony_ci    src -= srcstride;
2486cabdff1aSopenharmony_ci    ff_put_vp8_epel16_h4_mmi(tmp, 16, src, srcstride, h + 3, mx, my);
2487cabdff1aSopenharmony_ci    tmp = tmp_array + 16;
2488cabdff1aSopenharmony_ci    ff_put_vp8_epel16_v4_mmi(dst, dststride, tmp, 16, h, mx, my);
2489cabdff1aSopenharmony_ci#else
2490cabdff1aSopenharmony_ci    const uint8_t *filter = subpel_filters[mx - 1];
2491cabdff1aSopenharmony_ci    const uint8_t *cm     = ff_crop_tab + MAX_NEG_CROP;
2492cabdff1aSopenharmony_ci    int x, y;
2493cabdff1aSopenharmony_ci    uint8_t tmp_array[560];
2494cabdff1aSopenharmony_ci    uint8_t *tmp = tmp_array;
2495cabdff1aSopenharmony_ci
2496cabdff1aSopenharmony_ci    src -= srcstride;
2497cabdff1aSopenharmony_ci
2498cabdff1aSopenharmony_ci    for (y = 0; y < h + 3; y++) {
2499cabdff1aSopenharmony_ci        for (x = 0; x < 16; x++)
2500cabdff1aSopenharmony_ci            tmp[x] = FILTER_4TAP(src, filter, 1);
2501cabdff1aSopenharmony_ci        tmp += 16;
2502cabdff1aSopenharmony_ci        src += srcstride;
2503cabdff1aSopenharmony_ci    }
2504cabdff1aSopenharmony_ci
2505cabdff1aSopenharmony_ci    tmp    = tmp_array + 16;
2506cabdff1aSopenharmony_ci    filter = subpel_filters[my - 1];
2507cabdff1aSopenharmony_ci
2508cabdff1aSopenharmony_ci    for (y = 0; y < h; y++) {
2509cabdff1aSopenharmony_ci        for (x = 0; x < 16; x++)
2510cabdff1aSopenharmony_ci            dst[x] = FILTER_4TAP(tmp, filter, 16);
2511cabdff1aSopenharmony_ci        dst += dststride;
2512cabdff1aSopenharmony_ci        tmp += 16;
2513cabdff1aSopenharmony_ci    }
2514cabdff1aSopenharmony_ci#endif
2515cabdff1aSopenharmony_ci}
2516cabdff1aSopenharmony_ci
2517cabdff1aSopenharmony_civoid ff_put_vp8_epel8_h4v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
2518cabdff1aSopenharmony_ci        ptrdiff_t srcstride, int h, int mx, int my)
2519cabdff1aSopenharmony_ci{
2520cabdff1aSopenharmony_ci#if 1
2521cabdff1aSopenharmony_ci    DECLARE_ALIGNED(8, uint8_t, tmp_array[152]);
2522cabdff1aSopenharmony_ci    uint8_t *tmp = tmp_array;
2523cabdff1aSopenharmony_ci
2524cabdff1aSopenharmony_ci    src -= srcstride;
2525cabdff1aSopenharmony_ci    ff_put_vp8_epel8_h4_mmi(tmp, 8, src, srcstride, h + 3, mx, my);
2526cabdff1aSopenharmony_ci    tmp = tmp_array + 8;
2527cabdff1aSopenharmony_ci    ff_put_vp8_epel8_v4_mmi(dst, dststride, tmp, 8, h, mx, my);
2528cabdff1aSopenharmony_ci#else
2529cabdff1aSopenharmony_ci    const uint8_t *filter = subpel_filters[mx - 1];
2530cabdff1aSopenharmony_ci    const uint8_t *cm     = ff_crop_tab + MAX_NEG_CROP;
2531cabdff1aSopenharmony_ci    int x, y;
2532cabdff1aSopenharmony_ci    uint8_t tmp_array[152];
2533cabdff1aSopenharmony_ci    uint8_t *tmp = tmp_array;
2534cabdff1aSopenharmony_ci
2535cabdff1aSopenharmony_ci    src -= srcstride;
2536cabdff1aSopenharmony_ci
2537cabdff1aSopenharmony_ci    for (y = 0; y < h + 3; y++) {
2538cabdff1aSopenharmony_ci        for (x = 0; x < 8; x++)
2539cabdff1aSopenharmony_ci            tmp[x] = FILTER_4TAP(src, filter, 1);
2540cabdff1aSopenharmony_ci        tmp += 8;
2541cabdff1aSopenharmony_ci        src += srcstride;
2542cabdff1aSopenharmony_ci    }
2543cabdff1aSopenharmony_ci
2544cabdff1aSopenharmony_ci    tmp    = tmp_array + 8;
2545cabdff1aSopenharmony_ci    filter = subpel_filters[my - 1];
2546cabdff1aSopenharmony_ci
2547cabdff1aSopenharmony_ci    for (y = 0; y < h; y++) {
2548cabdff1aSopenharmony_ci        for (x = 0; x < 8; x++)
2549cabdff1aSopenharmony_ci            dst[x] = FILTER_4TAP(tmp, filter, 8);
2550cabdff1aSopenharmony_ci        dst += dststride;
2551cabdff1aSopenharmony_ci        tmp += 8;
2552cabdff1aSopenharmony_ci    }
2553cabdff1aSopenharmony_ci#endif
2554cabdff1aSopenharmony_ci}
2555cabdff1aSopenharmony_ci
2556cabdff1aSopenharmony_civoid ff_put_vp8_epel4_h4v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
2557cabdff1aSopenharmony_ci        ptrdiff_t srcstride, int h, int mx, int my)
2558cabdff1aSopenharmony_ci{
2559cabdff1aSopenharmony_ci#if 1
2560cabdff1aSopenharmony_ci    DECLARE_ALIGNED(4, uint8_t, tmp_array[44]);
2561cabdff1aSopenharmony_ci    uint8_t *tmp = tmp_array;
2562cabdff1aSopenharmony_ci
2563cabdff1aSopenharmony_ci    src -= srcstride;
2564cabdff1aSopenharmony_ci    ff_put_vp8_epel4_h4_mmi(tmp, 4, src, srcstride, h + 3, mx, my);
2565cabdff1aSopenharmony_ci    tmp = tmp_array + 4;
2566cabdff1aSopenharmony_ci    ff_put_vp8_epel4_v4_mmi(dst, dststride, tmp, 4, h, mx, my);
2567cabdff1aSopenharmony_ci#else
2568cabdff1aSopenharmony_ci    const uint8_t *filter = subpel_filters[mx - 1];
2569cabdff1aSopenharmony_ci    const uint8_t *cm     = ff_crop_tab + MAX_NEG_CROP;
2570cabdff1aSopenharmony_ci    int x, y;
2571cabdff1aSopenharmony_ci    uint8_t tmp_array[44];
2572cabdff1aSopenharmony_ci    uint8_t *tmp = tmp_array;
2573cabdff1aSopenharmony_ci
2574cabdff1aSopenharmony_ci    src -= srcstride;
2575cabdff1aSopenharmony_ci
2576cabdff1aSopenharmony_ci    for (y = 0; y < h + 3; y++) {
2577cabdff1aSopenharmony_ci        for (x = 0; x < 4; x++)
2578cabdff1aSopenharmony_ci            tmp[x] = FILTER_4TAP(src, filter, 1);
2579cabdff1aSopenharmony_ci        tmp += 4;
2580cabdff1aSopenharmony_ci        src += srcstride;
2581cabdff1aSopenharmony_ci    }
2582cabdff1aSopenharmony_ci    tmp    = tmp_array + 4;
2583cabdff1aSopenharmony_ci    filter = subpel_filters[my - 1];
2584cabdff1aSopenharmony_ci
2585cabdff1aSopenharmony_ci    for (y = 0; y < h; y++) {
2586cabdff1aSopenharmony_ci        for (x = 0; x < 4; x++)
2587cabdff1aSopenharmony_ci            dst[x] = FILTER_4TAP(tmp, filter, 4);
2588cabdff1aSopenharmony_ci        dst += dststride;
2589cabdff1aSopenharmony_ci        tmp += 4;
2590cabdff1aSopenharmony_ci    }
2591cabdff1aSopenharmony_ci#endif
2592cabdff1aSopenharmony_ci}
2593cabdff1aSopenharmony_ci
2594cabdff1aSopenharmony_civoid ff_put_vp8_epel16_h4v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
2595cabdff1aSopenharmony_ci        ptrdiff_t srcstride, int h, int mx, int my)
2596cabdff1aSopenharmony_ci{
2597cabdff1aSopenharmony_ci#if 1
2598cabdff1aSopenharmony_ci    DECLARE_ALIGNED(8, uint8_t, tmp_array[592]);
2599cabdff1aSopenharmony_ci    uint8_t *tmp = tmp_array;
2600cabdff1aSopenharmony_ci
2601cabdff1aSopenharmony_ci    src -= 2 * srcstride;
2602cabdff1aSopenharmony_ci    ff_put_vp8_epel16_h4_mmi(tmp, 16, src, srcstride, h + 5, mx, my);
2603cabdff1aSopenharmony_ci    tmp    = tmp_array + 32;
2604cabdff1aSopenharmony_ci    ff_put_vp8_epel16_v6_mmi(dst, dststride, tmp, 16, h, mx, my);
2605cabdff1aSopenharmony_ci#else
2606cabdff1aSopenharmony_ci    const uint8_t *filter = subpel_filters[mx - 1];
2607cabdff1aSopenharmony_ci    const uint8_t *cm     = ff_crop_tab + MAX_NEG_CROP;
2608cabdff1aSopenharmony_ci    int x, y;
2609cabdff1aSopenharmony_ci    uint8_t tmp_array[592];
2610cabdff1aSopenharmony_ci    uint8_t *tmp = tmp_array;
2611cabdff1aSopenharmony_ci
2612cabdff1aSopenharmony_ci    src -= 2 * srcstride;
2613cabdff1aSopenharmony_ci
2614cabdff1aSopenharmony_ci    for (y = 0; y < h + 5; y++) {
2615cabdff1aSopenharmony_ci        for (x = 0; x < 16; x++)
2616cabdff1aSopenharmony_ci            tmp[x] = FILTER_4TAP(src, filter, 1);
2617cabdff1aSopenharmony_ci        tmp += 16;
2618cabdff1aSopenharmony_ci        src += srcstride;
2619cabdff1aSopenharmony_ci    }
2620cabdff1aSopenharmony_ci
2621cabdff1aSopenharmony_ci    tmp    = tmp_array + 32;
2622cabdff1aSopenharmony_ci    filter = subpel_filters[my - 1];
2623cabdff1aSopenharmony_ci
2624cabdff1aSopenharmony_ci    for (y = 0; y < h; y++) {
2625cabdff1aSopenharmony_ci        for (x = 0; x < 16; x++)
2626cabdff1aSopenharmony_ci            dst[x] = FILTER_6TAP(tmp, filter, 16);
2627cabdff1aSopenharmony_ci        dst += dststride;
2628cabdff1aSopenharmony_ci        tmp += 16;
2629cabdff1aSopenharmony_ci    }
2630cabdff1aSopenharmony_ci#endif
2631cabdff1aSopenharmony_ci}
2632cabdff1aSopenharmony_ci
2633cabdff1aSopenharmony_civoid ff_put_vp8_epel8_h4v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
2634cabdff1aSopenharmony_ci        ptrdiff_t srcstride, int h, int mx, int my)
2635cabdff1aSopenharmony_ci{
2636cabdff1aSopenharmony_ci#if 1
2637cabdff1aSopenharmony_ci    DECLARE_ALIGNED(8, uint8_t, tmp_array[168]);
2638cabdff1aSopenharmony_ci    uint8_t *tmp = tmp_array;
2639cabdff1aSopenharmony_ci
2640cabdff1aSopenharmony_ci    src -= 2 * srcstride;
2641cabdff1aSopenharmony_ci    ff_put_vp8_epel8_h4_mmi(tmp, 8, src, srcstride, h + 5, mx, my);
2642cabdff1aSopenharmony_ci    tmp    = tmp_array + 16;
2643cabdff1aSopenharmony_ci    ff_put_vp8_epel8_v6_mmi(dst, dststride, tmp, 8, h, mx, my);
2644cabdff1aSopenharmony_ci#else
2645cabdff1aSopenharmony_ci    const uint8_t *filter = subpel_filters[mx - 1];
2646cabdff1aSopenharmony_ci    const uint8_t *cm     = ff_crop_tab + MAX_NEG_CROP;
2647cabdff1aSopenharmony_ci    int x, y;
2648cabdff1aSopenharmony_ci    uint8_t tmp_array[168];
2649cabdff1aSopenharmony_ci    uint8_t *tmp = tmp_array;
2650cabdff1aSopenharmony_ci
2651cabdff1aSopenharmony_ci    src -= 2 * srcstride;
2652cabdff1aSopenharmony_ci
2653cabdff1aSopenharmony_ci    for (y = 0; y < h + 5; y++) {
2654cabdff1aSopenharmony_ci        for (x = 0; x < 8; x++)
2655cabdff1aSopenharmony_ci            tmp[x] = FILTER_4TAP(src, filter, 1);
2656cabdff1aSopenharmony_ci        tmp += 8;
2657cabdff1aSopenharmony_ci        src += srcstride;
2658cabdff1aSopenharmony_ci    }
2659cabdff1aSopenharmony_ci
2660cabdff1aSopenharmony_ci    tmp    = tmp_array + 16;
2661cabdff1aSopenharmony_ci    filter = subpel_filters[my - 1];
2662cabdff1aSopenharmony_ci
2663cabdff1aSopenharmony_ci    for (y = 0; y < h; y++) {
2664cabdff1aSopenharmony_ci        for (x = 0; x < 8; x++)
2665cabdff1aSopenharmony_ci            dst[x] = FILTER_6TAP(tmp, filter, 8);
2666cabdff1aSopenharmony_ci        dst += dststride;
2667cabdff1aSopenharmony_ci        tmp += 8;
2668cabdff1aSopenharmony_ci    }
2669cabdff1aSopenharmony_ci#endif
2670cabdff1aSopenharmony_ci}
2671cabdff1aSopenharmony_ci
2672cabdff1aSopenharmony_civoid ff_put_vp8_epel4_h4v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
2673cabdff1aSopenharmony_ci        ptrdiff_t srcstride, int h, int mx, int my)
2674cabdff1aSopenharmony_ci{
2675cabdff1aSopenharmony_ci#if 1
2676cabdff1aSopenharmony_ci    DECLARE_ALIGNED(4, uint8_t, tmp_array[52]);
2677cabdff1aSopenharmony_ci    uint8_t *tmp = tmp_array;
2678cabdff1aSopenharmony_ci
2679cabdff1aSopenharmony_ci    src -= 2 * srcstride;
2680cabdff1aSopenharmony_ci    ff_put_vp8_epel4_h4_mmi(tmp, 4, src, srcstride, h + 5, mx, my);
2681cabdff1aSopenharmony_ci    tmp    = tmp_array + 8;
2682cabdff1aSopenharmony_ci    ff_put_vp8_epel4_v6_mmi(dst, dststride, tmp, 4, h, mx, my);
2683cabdff1aSopenharmony_ci#else
2684cabdff1aSopenharmony_ci    const uint8_t *filter = subpel_filters[mx - 1];
2685cabdff1aSopenharmony_ci    const uint8_t *cm     = ff_crop_tab + MAX_NEG_CROP;
2686cabdff1aSopenharmony_ci    int x, y;
2687cabdff1aSopenharmony_ci    uint8_t tmp_array[52];
2688cabdff1aSopenharmony_ci    uint8_t *tmp = tmp_array;
2689cabdff1aSopenharmony_ci
2690cabdff1aSopenharmony_ci    src -= 2 * srcstride;
2691cabdff1aSopenharmony_ci
2692cabdff1aSopenharmony_ci    for (y = 0; y < h + 5; y++) {
2693cabdff1aSopenharmony_ci        for (x = 0; x < 4; x++)
2694cabdff1aSopenharmony_ci            tmp[x] = FILTER_4TAP(src, filter, 1);
2695cabdff1aSopenharmony_ci        tmp += 4;
2696cabdff1aSopenharmony_ci        src += srcstride;
2697cabdff1aSopenharmony_ci    }
2698cabdff1aSopenharmony_ci
2699cabdff1aSopenharmony_ci    tmp    = tmp_array + 8;
2700cabdff1aSopenharmony_ci    filter = subpel_filters[my - 1];
2701cabdff1aSopenharmony_ci
2702cabdff1aSopenharmony_ci    for (y = 0; y < h; y++) {
2703cabdff1aSopenharmony_ci        for (x = 0; x < 4; x++)
2704cabdff1aSopenharmony_ci            dst[x] = FILTER_6TAP(tmp, filter, 4);
2705cabdff1aSopenharmony_ci        dst += dststride;
2706cabdff1aSopenharmony_ci        tmp += 4;
2707cabdff1aSopenharmony_ci    }
2708cabdff1aSopenharmony_ci#endif
2709cabdff1aSopenharmony_ci}
2710cabdff1aSopenharmony_ci
2711cabdff1aSopenharmony_civoid ff_put_vp8_epel16_h6v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
2712cabdff1aSopenharmony_ci        ptrdiff_t srcstride, int h, int mx, int my)
2713cabdff1aSopenharmony_ci{
2714cabdff1aSopenharmony_ci#if 1
2715cabdff1aSopenharmony_ci    DECLARE_ALIGNED(8, uint8_t, tmp_array[560]);
2716cabdff1aSopenharmony_ci    uint8_t *tmp = tmp_array;
2717cabdff1aSopenharmony_ci
2718cabdff1aSopenharmony_ci    src -= srcstride;
2719cabdff1aSopenharmony_ci    ff_put_vp8_epel16_h6_mmi(tmp, 16, src, srcstride, h + 3, mx, my);
2720cabdff1aSopenharmony_ci    tmp    = tmp_array + 16;
2721cabdff1aSopenharmony_ci    ff_put_vp8_epel16_v4_mmi(dst, dststride, tmp, 16, h, mx, my);
2722cabdff1aSopenharmony_ci#else
2723cabdff1aSopenharmony_ci    const uint8_t *filter = subpel_filters[mx - 1];
2724cabdff1aSopenharmony_ci    const uint8_t *cm     = ff_crop_tab + MAX_NEG_CROP;
2725cabdff1aSopenharmony_ci    int x, y;
2726cabdff1aSopenharmony_ci    uint8_t tmp_array[560];
2727cabdff1aSopenharmony_ci    uint8_t *tmp = tmp_array;
2728cabdff1aSopenharmony_ci
2729cabdff1aSopenharmony_ci    src -= srcstride;
2730cabdff1aSopenharmony_ci
2731cabdff1aSopenharmony_ci    for (y = 0; y < h + 3; y++) {
2732cabdff1aSopenharmony_ci        for (x = 0; x < 16; x++)
2733cabdff1aSopenharmony_ci            tmp[x] = FILTER_6TAP(src, filter, 1);
2734cabdff1aSopenharmony_ci        tmp += 16;
2735cabdff1aSopenharmony_ci        src += srcstride;
2736cabdff1aSopenharmony_ci    }
2737cabdff1aSopenharmony_ci
2738cabdff1aSopenharmony_ci    tmp    = tmp_array + 16;
2739cabdff1aSopenharmony_ci    filter = subpel_filters[my - 1];
2740cabdff1aSopenharmony_ci
2741cabdff1aSopenharmony_ci    for (y = 0; y < h; y++) {
2742cabdff1aSopenharmony_ci        for (x = 0; x < 16; x++)
2743cabdff1aSopenharmony_ci            dst[x] = FILTER_4TAP(tmp, filter, 16);
2744cabdff1aSopenharmony_ci        dst += dststride;
2745cabdff1aSopenharmony_ci        tmp += 16;
2746cabdff1aSopenharmony_ci    }
2747cabdff1aSopenharmony_ci#endif
2748cabdff1aSopenharmony_ci}
2749cabdff1aSopenharmony_ci
2750cabdff1aSopenharmony_civoid ff_put_vp8_epel8_h6v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
2751cabdff1aSopenharmony_ci        ptrdiff_t srcstride, int h, int mx, int my)
2752cabdff1aSopenharmony_ci{
2753cabdff1aSopenharmony_ci#if 1
2754cabdff1aSopenharmony_ci    DECLARE_ALIGNED(8, uint8_t, tmp_array[152]);
2755cabdff1aSopenharmony_ci    uint8_t *tmp = tmp_array;
2756cabdff1aSopenharmony_ci
2757cabdff1aSopenharmony_ci    src -= srcstride;
2758cabdff1aSopenharmony_ci    ff_put_vp8_epel8_h6_mmi(tmp, 8, src, srcstride, h + 3, mx, my);
2759cabdff1aSopenharmony_ci    tmp    = tmp_array + 8;
2760cabdff1aSopenharmony_ci    ff_put_vp8_epel8_v4_mmi(dst, dststride, tmp, 8, h, mx, my);
2761cabdff1aSopenharmony_ci#else
2762cabdff1aSopenharmony_ci    const uint8_t *filter = subpel_filters[mx - 1];
2763cabdff1aSopenharmony_ci    const uint8_t *cm     = ff_crop_tab + MAX_NEG_CROP;
2764cabdff1aSopenharmony_ci    int x, y;
2765cabdff1aSopenharmony_ci    uint8_t tmp_array[152];
2766cabdff1aSopenharmony_ci    uint8_t *tmp = tmp_array;
2767cabdff1aSopenharmony_ci
2768cabdff1aSopenharmony_ci    src -= srcstride;
2769cabdff1aSopenharmony_ci
2770cabdff1aSopenharmony_ci    for (y = 0; y < h + 3; y++) {
2771cabdff1aSopenharmony_ci        for (x = 0; x < 8; x++)
2772cabdff1aSopenharmony_ci            tmp[x] = FILTER_6TAP(src, filter, 1);
2773cabdff1aSopenharmony_ci        tmp += 8;
2774cabdff1aSopenharmony_ci        src += srcstride;
2775cabdff1aSopenharmony_ci    }
2776cabdff1aSopenharmony_ci
2777cabdff1aSopenharmony_ci    tmp    = tmp_array + 8;
2778cabdff1aSopenharmony_ci    filter = subpel_filters[my - 1];
2779cabdff1aSopenharmony_ci
2780cabdff1aSopenharmony_ci    for (y = 0; y < h; y++) {
2781cabdff1aSopenharmony_ci        for (x = 0; x < 8; x++)
2782cabdff1aSopenharmony_ci            dst[x] = FILTER_4TAP(tmp, filter, 8);
2783cabdff1aSopenharmony_ci        dst += dststride;
2784cabdff1aSopenharmony_ci        tmp += 8;
2785cabdff1aSopenharmony_ci    }
2786cabdff1aSopenharmony_ci#endif
2787cabdff1aSopenharmony_ci}
2788cabdff1aSopenharmony_ci
2789cabdff1aSopenharmony_civoid ff_put_vp8_epel4_h6v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
2790cabdff1aSopenharmony_ci        ptrdiff_t srcstride, int h, int mx, int my)
2791cabdff1aSopenharmony_ci{
2792cabdff1aSopenharmony_ci#if 1
2793cabdff1aSopenharmony_ci    DECLARE_ALIGNED(4, uint8_t, tmp_array[44]);
2794cabdff1aSopenharmony_ci    uint8_t *tmp = tmp_array;
2795cabdff1aSopenharmony_ci
2796cabdff1aSopenharmony_ci    src -= srcstride;
2797cabdff1aSopenharmony_ci    ff_put_vp8_epel4_h6_mmi(tmp, 4, src, srcstride, h + 3, mx, my);
2798cabdff1aSopenharmony_ci    tmp    = tmp_array + 4;
2799cabdff1aSopenharmony_ci    ff_put_vp8_epel4_v4_mmi(dst, dststride, tmp, 4, h, mx, my);
2800cabdff1aSopenharmony_ci#else
2801cabdff1aSopenharmony_ci    const uint8_t *filter = subpel_filters[mx - 1];
2802cabdff1aSopenharmony_ci    const uint8_t *cm     = ff_crop_tab + MAX_NEG_CROP;
2803cabdff1aSopenharmony_ci    int x, y;
2804cabdff1aSopenharmony_ci    uint8_t tmp_array[44];
2805cabdff1aSopenharmony_ci    uint8_t *tmp = tmp_array;
2806cabdff1aSopenharmony_ci
2807cabdff1aSopenharmony_ci    src -= srcstride;
2808cabdff1aSopenharmony_ci
2809cabdff1aSopenharmony_ci    for (y = 0; y < h + 3; y++) {
2810cabdff1aSopenharmony_ci        for (x = 0; x < 4; x++)
2811cabdff1aSopenharmony_ci            tmp[x] = FILTER_6TAP(src, filter, 1);
2812cabdff1aSopenharmony_ci        tmp += 4;
2813cabdff1aSopenharmony_ci        src += srcstride;
2814cabdff1aSopenharmony_ci    }
2815cabdff1aSopenharmony_ci
2816cabdff1aSopenharmony_ci    tmp    = tmp_array + 4;
2817cabdff1aSopenharmony_ci    filter = subpel_filters[my - 1];
2818cabdff1aSopenharmony_ci
2819cabdff1aSopenharmony_ci    for (y = 0; y < h; y++) {
2820cabdff1aSopenharmony_ci        for (x = 0; x < 4; x++)
2821cabdff1aSopenharmony_ci            dst[x] = FILTER_4TAP(tmp, filter, 4);
2822cabdff1aSopenharmony_ci        dst += dststride;
2823cabdff1aSopenharmony_ci        tmp += 4;
2824cabdff1aSopenharmony_ci    }
2825cabdff1aSopenharmony_ci#endif
2826cabdff1aSopenharmony_ci}
2827cabdff1aSopenharmony_ci
2828cabdff1aSopenharmony_civoid ff_put_vp8_epel16_h6v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
2829cabdff1aSopenharmony_ci        ptrdiff_t srcstride, int h, int mx, int my)
2830cabdff1aSopenharmony_ci{
2831cabdff1aSopenharmony_ci#if 1
2832cabdff1aSopenharmony_ci    DECLARE_ALIGNED(8, uint8_t, tmp_array[592]);
2833cabdff1aSopenharmony_ci    uint8_t *tmp = tmp_array;
2834cabdff1aSopenharmony_ci
2835cabdff1aSopenharmony_ci    src -= 2 * srcstride;
2836cabdff1aSopenharmony_ci    ff_put_vp8_epel16_h6_mmi(tmp, 16, src, srcstride, h + 5, mx, my);
2837cabdff1aSopenharmony_ci    tmp    = tmp_array + 32;
2838cabdff1aSopenharmony_ci    ff_put_vp8_epel16_v6_mmi(dst, dststride, tmp, 16, h, mx, my);
2839cabdff1aSopenharmony_ci#else
2840cabdff1aSopenharmony_ci    const uint8_t *filter = subpel_filters[mx - 1];
2841cabdff1aSopenharmony_ci    const uint8_t *cm     = ff_crop_tab + MAX_NEG_CROP;
2842cabdff1aSopenharmony_ci    int x, y;
2843cabdff1aSopenharmony_ci    uint8_t tmp_array[592];
2844cabdff1aSopenharmony_ci    uint8_t *tmp = tmp_array;
2845cabdff1aSopenharmony_ci
2846cabdff1aSopenharmony_ci    src -= 2 * srcstride;
2847cabdff1aSopenharmony_ci
2848cabdff1aSopenharmony_ci    for (y = 0; y < h + 5; y++) {
2849cabdff1aSopenharmony_ci        for (x = 0; x < 16; x++)
2850cabdff1aSopenharmony_ci            tmp[x] = FILTER_6TAP(src, filter, 1);
2851cabdff1aSopenharmony_ci        tmp += 16;
2852cabdff1aSopenharmony_ci        src += srcstride;
2853cabdff1aSopenharmony_ci    }
2854cabdff1aSopenharmony_ci
2855cabdff1aSopenharmony_ci    tmp    = tmp_array + 32;
2856cabdff1aSopenharmony_ci    filter = subpel_filters[my - 1];
2857cabdff1aSopenharmony_ci
2858cabdff1aSopenharmony_ci    for (y = 0; y < h; y++) {
2859cabdff1aSopenharmony_ci        for (x = 0; x < 16; x++)
2860cabdff1aSopenharmony_ci            dst[x] = FILTER_6TAP(tmp, filter, 16);
2861cabdff1aSopenharmony_ci        dst += dststride;
2862cabdff1aSopenharmony_ci        tmp += 16;
2863cabdff1aSopenharmony_ci    }
2864cabdff1aSopenharmony_ci#endif
2865cabdff1aSopenharmony_ci}
2866cabdff1aSopenharmony_ci
2867cabdff1aSopenharmony_civoid ff_put_vp8_epel8_h6v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
2868cabdff1aSopenharmony_ci        ptrdiff_t srcstride, int h, int mx, int my)
2869cabdff1aSopenharmony_ci{
2870cabdff1aSopenharmony_ci#if 1
2871cabdff1aSopenharmony_ci    DECLARE_ALIGNED(8, uint8_t, tmp_array[168]);
2872cabdff1aSopenharmony_ci    uint8_t *tmp = tmp_array;
2873cabdff1aSopenharmony_ci
2874cabdff1aSopenharmony_ci    src -= 2 * srcstride;
2875cabdff1aSopenharmony_ci    ff_put_vp8_epel8_h6_mmi(tmp, 8, src, srcstride, h + 5, mx, my);
2876cabdff1aSopenharmony_ci    tmp    = tmp_array + 16;
2877cabdff1aSopenharmony_ci    ff_put_vp8_epel8_v6_mmi(dst, dststride, tmp, 8, h, mx, my);
2878cabdff1aSopenharmony_ci#else
2879cabdff1aSopenharmony_ci    const uint8_t *filter = subpel_filters[mx - 1];
2880cabdff1aSopenharmony_ci    const uint8_t *cm     = ff_crop_tab + MAX_NEG_CROP;
2881cabdff1aSopenharmony_ci    int x, y;
2882cabdff1aSopenharmony_ci    uint8_t tmp_array[168];
2883cabdff1aSopenharmony_ci    uint8_t *tmp = tmp_array;
2884cabdff1aSopenharmony_ci
2885cabdff1aSopenharmony_ci    src -= 2 * srcstride;
2886cabdff1aSopenharmony_ci
2887cabdff1aSopenharmony_ci    for (y = 0; y < h + 5; y++) {
2888cabdff1aSopenharmony_ci        for (x = 0; x < 8; x++)
2889cabdff1aSopenharmony_ci            tmp[x] = FILTER_6TAP(src, filter, 1);
2890cabdff1aSopenharmony_ci        tmp += 8;
2891cabdff1aSopenharmony_ci        src += srcstride;
2892cabdff1aSopenharmony_ci    }
2893cabdff1aSopenharmony_ci
2894cabdff1aSopenharmony_ci    tmp    = tmp_array + 16;
2895cabdff1aSopenharmony_ci    filter = subpel_filters[my - 1];
2896cabdff1aSopenharmony_ci
2897cabdff1aSopenharmony_ci    for (y = 0; y < h; y++) {
2898cabdff1aSopenharmony_ci        for (x = 0; x < 8; x++)
2899cabdff1aSopenharmony_ci            dst[x] = FILTER_6TAP(tmp, filter, 8);
2900cabdff1aSopenharmony_ci        dst += dststride;
2901cabdff1aSopenharmony_ci        tmp += 8;
2902cabdff1aSopenharmony_ci    }
2903cabdff1aSopenharmony_ci#endif
2904cabdff1aSopenharmony_ci}
2905cabdff1aSopenharmony_ci
2906cabdff1aSopenharmony_civoid ff_put_vp8_epel4_h6v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
2907cabdff1aSopenharmony_ci        ptrdiff_t srcstride, int h, int mx, int my)
2908cabdff1aSopenharmony_ci{
2909cabdff1aSopenharmony_ci#if 1
2910cabdff1aSopenharmony_ci    DECLARE_ALIGNED(4, uint8_t, tmp_array[52]);
2911cabdff1aSopenharmony_ci    uint8_t *tmp = tmp_array;
2912cabdff1aSopenharmony_ci
2913cabdff1aSopenharmony_ci    src -= 2 * srcstride;
2914cabdff1aSopenharmony_ci    ff_put_vp8_epel4_h6_mmi(tmp, 4, src, srcstride, h + 5, mx, my);
2915cabdff1aSopenharmony_ci    tmp    = tmp_array + 8;
2916cabdff1aSopenharmony_ci    ff_put_vp8_epel4_v6_mmi(dst, dststride, tmp, 4, h, mx, my);
2917cabdff1aSopenharmony_ci#else
2918cabdff1aSopenharmony_ci    const uint8_t *filter = subpel_filters[mx - 1];
2919cabdff1aSopenharmony_ci    const uint8_t *cm     = ff_crop_tab + MAX_NEG_CROP;
2920cabdff1aSopenharmony_ci    int x, y;
2921cabdff1aSopenharmony_ci    uint8_t tmp_array[52];
2922cabdff1aSopenharmony_ci    uint8_t *tmp = tmp_array;
2923cabdff1aSopenharmony_ci
2924cabdff1aSopenharmony_ci    src -= 2 * srcstride;
2925cabdff1aSopenharmony_ci
2926cabdff1aSopenharmony_ci    for (y = 0; y < h + 5; y++) {
2927cabdff1aSopenharmony_ci        for (x = 0; x < 4; x++)
2928cabdff1aSopenharmony_ci            tmp[x] = FILTER_6TAP(src, filter, 1);
2929cabdff1aSopenharmony_ci        tmp += 4;
2930cabdff1aSopenharmony_ci        src += srcstride;
2931cabdff1aSopenharmony_ci    }
2932cabdff1aSopenharmony_ci
2933cabdff1aSopenharmony_ci    tmp    = tmp_array + 8;
2934cabdff1aSopenharmony_ci    filter = subpel_filters[my - 1];
2935cabdff1aSopenharmony_ci
2936cabdff1aSopenharmony_ci    for (y = 0; y < h; y++) {
2937cabdff1aSopenharmony_ci        for (x = 0; x < 4; x++)
2938cabdff1aSopenharmony_ci            dst[x] = FILTER_6TAP(tmp, filter, 4);
2939cabdff1aSopenharmony_ci        dst += dststride;
2940cabdff1aSopenharmony_ci        tmp += 4;
2941cabdff1aSopenharmony_ci    }
2942cabdff1aSopenharmony_ci#endif
2943cabdff1aSopenharmony_ci}
2944cabdff1aSopenharmony_ci
2945cabdff1aSopenharmony_civoid ff_put_vp8_bilinear16_h_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
2946cabdff1aSopenharmony_ci        ptrdiff_t sstride, int h, int mx, int my)
2947cabdff1aSopenharmony_ci{
2948cabdff1aSopenharmony_ci#if 1
2949cabdff1aSopenharmony_ci    union mmi_intfloat64 a, b;
2950cabdff1aSopenharmony_ci    double ftmp[7];
2951cabdff1aSopenharmony_ci    uint32_t tmp[1];
2952cabdff1aSopenharmony_ci    mips_reg dst0, src0;
2953cabdff1aSopenharmony_ci    DECLARE_VAR_ALL64;
2954cabdff1aSopenharmony_ci    a.i = 8 - mx;
2955cabdff1aSopenharmony_ci    b.i = mx;
2956cabdff1aSopenharmony_ci
2957cabdff1aSopenharmony_ci    /*
2958cabdff1aSopenharmony_ci    dst[0] = (a * src[0] + b * src[1] + 4) >> 3;
2959cabdff1aSopenharmony_ci    dst[1] = (a * src[1] + b * src[2] + 4) >> 3;
2960cabdff1aSopenharmony_ci    dst[2] = (a * src[2] + b * src[3] + 4) >> 3;
2961cabdff1aSopenharmony_ci    dst[3] = (a * src[3] + b * src[4] + 4) >> 3;
2962cabdff1aSopenharmony_ci    dst[4] = (a * src[4] + b * src[5] + 4) >> 3;
2963cabdff1aSopenharmony_ci    dst[5] = (a * src[5] + b * src[6] + 4) >> 3;
2964cabdff1aSopenharmony_ci    dst[6] = (a * src[6] + b * src[7] + 4) >> 3;
2965cabdff1aSopenharmony_ci    dst[7] = (a * src[7] + b * src[8] + 4) >> 3;
2966cabdff1aSopenharmony_ci
2967cabdff1aSopenharmony_ci    dst[ 8] = (a * src[ 8] + b * src[ 9] + 4) >> 3;
2968cabdff1aSopenharmony_ci    dst[ 9] = (a * src[ 9] + b * src[10] + 4) >> 3;
2969cabdff1aSopenharmony_ci    dst[10] = (a * src[10] + b * src[11] + 4) >> 3;
2970cabdff1aSopenharmony_ci    dst[11] = (a * src[11] + b * src[12] + 4) >> 3;
2971cabdff1aSopenharmony_ci    dst[12] = (a * src[12] + b * src[13] + 4) >> 3;
2972cabdff1aSopenharmony_ci    dst[13] = (a * src[13] + b * src[14] + 4) >> 3;
2973cabdff1aSopenharmony_ci    dst[14] = (a * src[14] + b * src[15] + 4) >> 3;
2974cabdff1aSopenharmony_ci    dst[15] = (a * src[15] + b * src[16] + 4) >> 3;
2975cabdff1aSopenharmony_ci    */
2976cabdff1aSopenharmony_ci    __asm__ volatile (
2977cabdff1aSopenharmony_ci        "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
2978cabdff1aSopenharmony_ci        "li         %[tmp0],    0x03                                \n\t"
2979cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp4]                            \n\t"
2980cabdff1aSopenharmony_ci        "pshufh     %[a],       %[a],           %[ftmp0]            \n\t"
2981cabdff1aSopenharmony_ci        "pshufh     %[b],       %[b],           %[ftmp0]            \n\t"
2982cabdff1aSopenharmony_ci
2983cabdff1aSopenharmony_ci        "1:                                                         \n\t"
2984cabdff1aSopenharmony_ci        // 0 - 7
2985cabdff1aSopenharmony_ci        PUT_VP8_BILINEAR8_H_MMI(%[src], %[dst])
2986cabdff1aSopenharmony_ci        PTR_ADDIU  "%[src0],    %[src],         0x08                \n\t"
2987cabdff1aSopenharmony_ci        PTR_ADDIU  "%[dst0],    %[dst],         0x08                \n\t"
2988cabdff1aSopenharmony_ci        // 8 - 15
2989cabdff1aSopenharmony_ci        PUT_VP8_BILINEAR8_H_MMI(%[src0], %[dst0])
2990cabdff1aSopenharmony_ci
2991cabdff1aSopenharmony_ci        "addiu      %[h],       %[h],           -0x01               \n\t"
2992cabdff1aSopenharmony_ci        PTR_ADDU   "%[src],     %[src],         %[sstride]          \n\t"
2993cabdff1aSopenharmony_ci        PTR_ADDU   "%[dst],     %[dst],         %[dstride]          \n\t"
2994cabdff1aSopenharmony_ci        "bnez       %[h],       1b                                  \n\t"
2995cabdff1aSopenharmony_ci        : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
2996cabdff1aSopenharmony_ci          [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
2997cabdff1aSopenharmony_ci          [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
2998cabdff1aSopenharmony_ci          [ftmp6]"=&f"(ftmp[6]),
2999cabdff1aSopenharmony_ci          [tmp0]"=&r"(tmp[0]),
3000cabdff1aSopenharmony_ci          RESTRICT_ASM_ALL64
3001cabdff1aSopenharmony_ci          [dst0]"=&r"(dst0),            [src0]"=&r"(src0),
3002cabdff1aSopenharmony_ci          [h]"+&r"(h),
3003cabdff1aSopenharmony_ci          [dst]"+&r"(dst),              [src]"+&r"(src),
3004cabdff1aSopenharmony_ci          [a]"+&f"(a.f),                [b]"+&f"(b.f)
3005cabdff1aSopenharmony_ci        : [sstride]"r"((mips_reg)sstride),
3006cabdff1aSopenharmony_ci          [dstride]"r"((mips_reg)dstride),
3007cabdff1aSopenharmony_ci          [ff_pw_4]"f"(ff_pw_4.f)
3008cabdff1aSopenharmony_ci        : "memory"
3009cabdff1aSopenharmony_ci    );
3010cabdff1aSopenharmony_ci#else
3011cabdff1aSopenharmony_ci    int a = 8 - mx, b = mx;
3012cabdff1aSopenharmony_ci    int x, y;
3013cabdff1aSopenharmony_ci
3014cabdff1aSopenharmony_ci    for (y = 0; y < h; y++) {
3015cabdff1aSopenharmony_ci        for (x = 0; x < 16; x++)
3016cabdff1aSopenharmony_ci            dst[x] = (a * src[x] + b * src[x + 1] + 4) >> 3;
3017cabdff1aSopenharmony_ci        dst += dstride;
3018cabdff1aSopenharmony_ci        src += sstride;
3019cabdff1aSopenharmony_ci    }
3020cabdff1aSopenharmony_ci#endif
3021cabdff1aSopenharmony_ci}
3022cabdff1aSopenharmony_ci
3023cabdff1aSopenharmony_civoid ff_put_vp8_bilinear16_v_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
3024cabdff1aSopenharmony_ci        ptrdiff_t sstride, int h, int mx, int my)
3025cabdff1aSopenharmony_ci{
3026cabdff1aSopenharmony_ci#if 1
3027cabdff1aSopenharmony_ci    union mmi_intfloat64 c, d;
3028cabdff1aSopenharmony_ci    double ftmp[7];
3029cabdff1aSopenharmony_ci    uint32_t tmp[1];
3030cabdff1aSopenharmony_ci    mips_reg src0, src1, dst0;
3031cabdff1aSopenharmony_ci    DECLARE_VAR_ALL64;
3032cabdff1aSopenharmony_ci    c.i = 8 - my;
3033cabdff1aSopenharmony_ci    d.i = my;
3034cabdff1aSopenharmony_ci
3035cabdff1aSopenharmony_ci    /*
3036cabdff1aSopenharmony_ci    dst[0] = (c * src[0] + d * src[    sstride] + 4) >> 3;
3037cabdff1aSopenharmony_ci    dst[1] = (c * src[1] + d * src[1 + sstride] + 4) >> 3;
3038cabdff1aSopenharmony_ci    dst[2] = (c * src[2] + d * src[2 + sstride] + 4) >> 3;
3039cabdff1aSopenharmony_ci    dst[3] = (c * src[3] + d * src[3 + sstride] + 4) >> 3;
3040cabdff1aSopenharmony_ci    dst[4] = (c * src[4] + d * src[4 + sstride] + 4) >> 3;
3041cabdff1aSopenharmony_ci    dst[5] = (c * src[5] + d * src[5 + sstride] + 4) >> 3;
3042cabdff1aSopenharmony_ci    dst[6] = (c * src[6] + d * src[6 + sstride] + 4) >> 3;
3043cabdff1aSopenharmony_ci    dst[7] = (c * src[7] + d * src[7 + sstride] + 4) >> 3;
3044cabdff1aSopenharmony_ci    */
3045cabdff1aSopenharmony_ci    __asm__ volatile (
3046cabdff1aSopenharmony_ci        "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
3047cabdff1aSopenharmony_ci        "li         %[tmp0],    0x03                                \n\t"
3048cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp4]                            \n\t"
3049cabdff1aSopenharmony_ci        "pshufh     %[c],       %[c],           %[ftmp0]            \n\t"
3050cabdff1aSopenharmony_ci        "pshufh     %[d],       %[d],           %[ftmp0]            \n\t"
3051cabdff1aSopenharmony_ci
3052cabdff1aSopenharmony_ci        "1:                                                         \n\t"
3053cabdff1aSopenharmony_ci        // 0 - 7
3054cabdff1aSopenharmony_ci        PUT_VP8_BILINEAR8_V_MMI(%[src], %[src1], %[dst], %[sstride])
3055cabdff1aSopenharmony_ci        PTR_ADDIU  "%[src0],    %[src],         0x08                \n\t"
3056cabdff1aSopenharmony_ci        PTR_ADDIU  "%[dst0],    %[dst],         0x08                \n\t"
3057cabdff1aSopenharmony_ci        // 8 - 15
3058cabdff1aSopenharmony_ci        PUT_VP8_BILINEAR8_V_MMI(%[src0], %[src1], %[dst0], %[sstride])
3059cabdff1aSopenharmony_ci
3060cabdff1aSopenharmony_ci        "addiu      %[h],       %[h],           -0x01               \n\t"
3061cabdff1aSopenharmony_ci        PTR_ADDU   "%[src],     %[src],         %[sstride]          \n\t"
3062cabdff1aSopenharmony_ci        PTR_ADDU   "%[dst],     %[dst],         %[dstride]          \n\t"
3063cabdff1aSopenharmony_ci        "bnez       %[h],       1b                                  \n\t"
3064cabdff1aSopenharmony_ci        : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
3065cabdff1aSopenharmony_ci          [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
3066cabdff1aSopenharmony_ci          [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
3067cabdff1aSopenharmony_ci          [ftmp6]"=&f"(ftmp[6]),
3068cabdff1aSopenharmony_ci          [tmp0]"=&r"(tmp[0]),
3069cabdff1aSopenharmony_ci          RESTRICT_ASM_ALL64
3070cabdff1aSopenharmony_ci          [src0]"=&r"(src0),            [dst0]"=&r"(dst0),
3071cabdff1aSopenharmony_ci          [src1]"=&r"(src1),
3072cabdff1aSopenharmony_ci          [h]"+&r"(h),
3073cabdff1aSopenharmony_ci          [dst]"+&r"(dst),              [src]"+&r"(src),
3074cabdff1aSopenharmony_ci          [c]"+&f"(c.f),                [d]"+&f"(d.f)
3075cabdff1aSopenharmony_ci        : [sstride]"r"((mips_reg)sstride),
3076cabdff1aSopenharmony_ci          [dstride]"r"((mips_reg)dstride),
3077cabdff1aSopenharmony_ci          [ff_pw_4]"f"(ff_pw_4.f)
3078cabdff1aSopenharmony_ci        : "memory"
3079cabdff1aSopenharmony_ci    );
3080cabdff1aSopenharmony_ci#else
3081cabdff1aSopenharmony_ci    int c = 8 - my, d = my;
3082cabdff1aSopenharmony_ci    int x, y;
3083cabdff1aSopenharmony_ci
3084cabdff1aSopenharmony_ci    for (y = 0; y < h; y++) {
3085cabdff1aSopenharmony_ci        for (x = 0; x < 16; x++)
3086cabdff1aSopenharmony_ci            dst[x] = (c * src[x] + d * src[x + sstride] + 4) >> 3;
3087cabdff1aSopenharmony_ci        dst += dstride;
3088cabdff1aSopenharmony_ci        src += sstride;
3089cabdff1aSopenharmony_ci    }
3090cabdff1aSopenharmony_ci#endif
3091cabdff1aSopenharmony_ci}
3092cabdff1aSopenharmony_ci
3093cabdff1aSopenharmony_civoid ff_put_vp8_bilinear16_hv_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
3094cabdff1aSopenharmony_ci        ptrdiff_t sstride, int h, int mx, int my)
3095cabdff1aSopenharmony_ci{
3096cabdff1aSopenharmony_ci#if 1
3097cabdff1aSopenharmony_ci    DECLARE_ALIGNED(8, uint8_t, tmp_array[528]);
3098cabdff1aSopenharmony_ci    uint8_t *tmp = tmp_array;
3099cabdff1aSopenharmony_ci
3100cabdff1aSopenharmony_ci    ff_put_vp8_bilinear16_h_mmi(tmp, 16, src, sstride, h + 1, mx, my);
3101cabdff1aSopenharmony_ci    ff_put_vp8_bilinear16_v_mmi(dst, dstride, tmp, 16, h, mx, my);
3102cabdff1aSopenharmony_ci#else
3103cabdff1aSopenharmony_ci    int a = 8 - mx, b = mx;
3104cabdff1aSopenharmony_ci    int c = 8 - my, d = my;
3105cabdff1aSopenharmony_ci    int x, y;
3106cabdff1aSopenharmony_ci    uint8_t tmp_array[528];
3107cabdff1aSopenharmony_ci    uint8_t *tmp = tmp_array;
3108cabdff1aSopenharmony_ci
3109cabdff1aSopenharmony_ci    for (y = 0; y < h + 1; y++) {
3110cabdff1aSopenharmony_ci        for (x = 0; x < 16; x++)
3111cabdff1aSopenharmony_ci            tmp[x] = (a * src[x] + b * src[x + 1] + 4) >> 3;
3112cabdff1aSopenharmony_ci        tmp += 16;
3113cabdff1aSopenharmony_ci        src += sstride;
3114cabdff1aSopenharmony_ci    }
3115cabdff1aSopenharmony_ci
3116cabdff1aSopenharmony_ci    tmp = tmp_array;
3117cabdff1aSopenharmony_ci
3118cabdff1aSopenharmony_ci    for (y = 0; y < h; y++) {
3119cabdff1aSopenharmony_ci        for (x = 0; x < 16; x++)
3120cabdff1aSopenharmony_ci            dst[x] = (c * tmp[x] + d * tmp[x + 16] + 4) >> 3;
3121cabdff1aSopenharmony_ci        dst += dstride;
3122cabdff1aSopenharmony_ci        tmp += 16;
3123cabdff1aSopenharmony_ci    }
3124cabdff1aSopenharmony_ci#endif
3125cabdff1aSopenharmony_ci}
3126cabdff1aSopenharmony_ci
3127cabdff1aSopenharmony_civoid ff_put_vp8_bilinear8_h_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
3128cabdff1aSopenharmony_ci        ptrdiff_t sstride, int h, int mx, int my)
3129cabdff1aSopenharmony_ci{
3130cabdff1aSopenharmony_ci#if 1
3131cabdff1aSopenharmony_ci    union mmi_intfloat64 a, b;
3132cabdff1aSopenharmony_ci    double ftmp[7];
3133cabdff1aSopenharmony_ci    uint32_t tmp[1];
3134cabdff1aSopenharmony_ci    DECLARE_VAR_ALL64;
3135cabdff1aSopenharmony_ci    a.i = 8 - mx;
3136cabdff1aSopenharmony_ci    b.i = mx;
3137cabdff1aSopenharmony_ci
3138cabdff1aSopenharmony_ci    /*
3139cabdff1aSopenharmony_ci    dst[0] = (a * src[0] + b * src[1] + 4) >> 3;
3140cabdff1aSopenharmony_ci    dst[1] = (a * src[1] + b * src[2] + 4) >> 3;
3141cabdff1aSopenharmony_ci    dst[2] = (a * src[2] + b * src[3] + 4) >> 3;
3142cabdff1aSopenharmony_ci    dst[3] = (a * src[3] + b * src[4] + 4) >> 3;
3143cabdff1aSopenharmony_ci    dst[4] = (a * src[4] + b * src[5] + 4) >> 3;
3144cabdff1aSopenharmony_ci    dst[5] = (a * src[5] + b * src[6] + 4) >> 3;
3145cabdff1aSopenharmony_ci    dst[6] = (a * src[6] + b * src[7] + 4) >> 3;
3146cabdff1aSopenharmony_ci    dst[7] = (a * src[7] + b * src[8] + 4) >> 3;
3147cabdff1aSopenharmony_ci    */
3148cabdff1aSopenharmony_ci    __asm__ volatile (
3149cabdff1aSopenharmony_ci        "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
3150cabdff1aSopenharmony_ci        "li         %[tmp0],    0x03                                \n\t"
3151cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp4]                            \n\t"
3152cabdff1aSopenharmony_ci        "pshufh     %[a],       %[a],           %[ftmp0]            \n\t"
3153cabdff1aSopenharmony_ci        "pshufh     %[b],       %[b],           %[ftmp0]            \n\t"
3154cabdff1aSopenharmony_ci
3155cabdff1aSopenharmony_ci        "1:                                                         \n\t"
3156cabdff1aSopenharmony_ci        PUT_VP8_BILINEAR8_H_MMI(%[src], %[dst])
3157cabdff1aSopenharmony_ci
3158cabdff1aSopenharmony_ci        "addiu      %[h],       %[h],           -0x01               \n\t"
3159cabdff1aSopenharmony_ci        PTR_ADDU   "%[src],     %[src],         %[sstride]          \n\t"
3160cabdff1aSopenharmony_ci        PTR_ADDU   "%[dst],     %[dst],         %[dstride]          \n\t"
3161cabdff1aSopenharmony_ci        "bnez       %[h],       1b                                  \n\t"
3162cabdff1aSopenharmony_ci        : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
3163cabdff1aSopenharmony_ci          [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
3164cabdff1aSopenharmony_ci          [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
3165cabdff1aSopenharmony_ci          [ftmp6]"=&f"(ftmp[6]),
3166cabdff1aSopenharmony_ci          [tmp0]"=&r"(tmp[0]),
3167cabdff1aSopenharmony_ci          RESTRICT_ASM_ALL64
3168cabdff1aSopenharmony_ci          [h]"+&r"(h),
3169cabdff1aSopenharmony_ci          [dst]"+&r"(dst),              [src]"+&r"(src),
3170cabdff1aSopenharmony_ci          [a]"+&f"(a.f),                [b]"+&f"(b.f)
3171cabdff1aSopenharmony_ci        : [sstride]"r"((mips_reg)sstride),
3172cabdff1aSopenharmony_ci          [dstride]"r"((mips_reg)dstride),
3173cabdff1aSopenharmony_ci          [ff_pw_4]"f"(ff_pw_4.f)
3174cabdff1aSopenharmony_ci        : "memory"
3175cabdff1aSopenharmony_ci    );
3176cabdff1aSopenharmony_ci#else
3177cabdff1aSopenharmony_ci    int a = 8 - mx, b = mx;
3178cabdff1aSopenharmony_ci    int x, y;
3179cabdff1aSopenharmony_ci
3180cabdff1aSopenharmony_ci    for (y = 0; y < h; y++) {
3181cabdff1aSopenharmony_ci        for (x = 0; x < 8; x++)
3182cabdff1aSopenharmony_ci            dst[x] = (a * src[x] + b * src[x + 1] + 4) >> 3;
3183cabdff1aSopenharmony_ci        dst += dstride;
3184cabdff1aSopenharmony_ci        src += sstride;
3185cabdff1aSopenharmony_ci    }
3186cabdff1aSopenharmony_ci#endif
3187cabdff1aSopenharmony_ci}
3188cabdff1aSopenharmony_ci
3189cabdff1aSopenharmony_civoid ff_put_vp8_bilinear8_v_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
3190cabdff1aSopenharmony_ci        ptrdiff_t sstride, int h, int mx, int my)
3191cabdff1aSopenharmony_ci{
3192cabdff1aSopenharmony_ci#if 1
3193cabdff1aSopenharmony_ci    union mmi_intfloat64 c, d;
3194cabdff1aSopenharmony_ci    double ftmp[7];
3195cabdff1aSopenharmony_ci    uint32_t tmp[1];
3196cabdff1aSopenharmony_ci    mips_reg src1;
3197cabdff1aSopenharmony_ci    DECLARE_VAR_ALL64;
3198cabdff1aSopenharmony_ci    c.i = 8 - my;
3199cabdff1aSopenharmony_ci    d.i = my;
3200cabdff1aSopenharmony_ci
3201cabdff1aSopenharmony_ci    /*
3202cabdff1aSopenharmony_ci    dst[0] = (c * src[0] + d * src[    sstride] + 4) >> 3;
3203cabdff1aSopenharmony_ci    dst[1] = (c * src[1] + d * src[1 + sstride] + 4) >> 3;
3204cabdff1aSopenharmony_ci    dst[2] = (c * src[2] + d * src[2 + sstride] + 4) >> 3;
3205cabdff1aSopenharmony_ci    dst[3] = (c * src[3] + d * src[3 + sstride] + 4) >> 3;
3206cabdff1aSopenharmony_ci    dst[4] = (c * src[4] + d * src[4 + sstride] + 4) >> 3;
3207cabdff1aSopenharmony_ci    dst[5] = (c * src[5] + d * src[5 + sstride] + 4) >> 3;
3208cabdff1aSopenharmony_ci    dst[6] = (c * src[6] + d * src[6 + sstride] + 4) >> 3;
3209cabdff1aSopenharmony_ci    dst[7] = (c * src[7] + d * src[7 + sstride] + 4) >> 3;
3210cabdff1aSopenharmony_ci    */
3211cabdff1aSopenharmony_ci    __asm__ volatile (
3212cabdff1aSopenharmony_ci        "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
3213cabdff1aSopenharmony_ci        "li         %[tmp0],    0x03                                \n\t"
3214cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp4]                            \n\t"
3215cabdff1aSopenharmony_ci        "pshufh     %[c],       %[c],           %[ftmp0]            \n\t"
3216cabdff1aSopenharmony_ci        "pshufh     %[d],       %[d],           %[ftmp0]            \n\t"
3217cabdff1aSopenharmony_ci
3218cabdff1aSopenharmony_ci        "1:                                                         \n\t"
3219cabdff1aSopenharmony_ci        PUT_VP8_BILINEAR8_V_MMI(%[src], %[src1], %[dst], %[sstride])
3220cabdff1aSopenharmony_ci
3221cabdff1aSopenharmony_ci        "addiu      %[h],       %[h],           -0x01               \n\t"
3222cabdff1aSopenharmony_ci        PTR_ADDU   "%[src],     %[src],         %[sstride]          \n\t"
3223cabdff1aSopenharmony_ci        PTR_ADDU   "%[dst],     %[dst],         %[dstride]          \n\t"
3224cabdff1aSopenharmony_ci        "bnez       %[h],       1b                                  \n\t"
3225cabdff1aSopenharmony_ci        : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
3226cabdff1aSopenharmony_ci          [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
3227cabdff1aSopenharmony_ci          [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
3228cabdff1aSopenharmony_ci          [ftmp6]"=&f"(ftmp[6]),
3229cabdff1aSopenharmony_ci          [tmp0]"=&r"(tmp[0]),
3230cabdff1aSopenharmony_ci          RESTRICT_ASM_ALL64
3231cabdff1aSopenharmony_ci          [src1]"=&r"(src1),
3232cabdff1aSopenharmony_ci          [h]"+&r"(h),
3233cabdff1aSopenharmony_ci          [dst]"+&r"(dst),              [src]"+&r"(src),
3234cabdff1aSopenharmony_ci          [c]"+&f"(c.f),                [d]"+&f"(d.f)
3235cabdff1aSopenharmony_ci        : [sstride]"r"((mips_reg)sstride),
3236cabdff1aSopenharmony_ci          [dstride]"r"((mips_reg)dstride),
3237cabdff1aSopenharmony_ci          [ff_pw_4]"f"(ff_pw_4.f)
3238cabdff1aSopenharmony_ci        : "memory"
3239cabdff1aSopenharmony_ci    );
3240cabdff1aSopenharmony_ci#else
3241cabdff1aSopenharmony_ci    int c = 8 - my, d = my;
3242cabdff1aSopenharmony_ci    int x, y;
3243cabdff1aSopenharmony_ci
3244cabdff1aSopenharmony_ci    for (y = 0; y < h; y++) {
3245cabdff1aSopenharmony_ci        for (x = 0; x < 8; x++)
3246cabdff1aSopenharmony_ci            dst[x] = (c * src[x] + d * src[x + sstride] + 4) >> 3;
3247cabdff1aSopenharmony_ci        dst += dstride;
3248cabdff1aSopenharmony_ci        src += sstride;
3249cabdff1aSopenharmony_ci    }
3250cabdff1aSopenharmony_ci#endif
3251cabdff1aSopenharmony_ci}
3252cabdff1aSopenharmony_ci
3253cabdff1aSopenharmony_civoid ff_put_vp8_bilinear8_hv_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
3254cabdff1aSopenharmony_ci        ptrdiff_t sstride, int h, int mx, int my)
3255cabdff1aSopenharmony_ci{
3256cabdff1aSopenharmony_ci#if 1
3257cabdff1aSopenharmony_ci    DECLARE_ALIGNED(8, uint8_t, tmp_array[136]);
3258cabdff1aSopenharmony_ci    uint8_t *tmp = tmp_array;
3259cabdff1aSopenharmony_ci
3260cabdff1aSopenharmony_ci    ff_put_vp8_bilinear8_h_mmi(tmp, 8, src, sstride, h + 1, mx, my);
3261cabdff1aSopenharmony_ci    ff_put_vp8_bilinear8_v_mmi(dst, dstride, tmp, 8, h, mx, my);
3262cabdff1aSopenharmony_ci#else
3263cabdff1aSopenharmony_ci    int a = 8 - mx, b = mx;
3264cabdff1aSopenharmony_ci    int c = 8 - my, d = my;
3265cabdff1aSopenharmony_ci    int x, y;
3266cabdff1aSopenharmony_ci    uint8_t tmp_array[136];
3267cabdff1aSopenharmony_ci    uint8_t *tmp = tmp_array;
3268cabdff1aSopenharmony_ci
3269cabdff1aSopenharmony_ci    for (y = 0; y < h + 1; y++) {
3270cabdff1aSopenharmony_ci        for (x = 0; x < 8; x++)
3271cabdff1aSopenharmony_ci            tmp[x] = (a * src[x] + b * src[x + 1] + 4) >> 3;
3272cabdff1aSopenharmony_ci        tmp += 8;
3273cabdff1aSopenharmony_ci        src += sstride;
3274cabdff1aSopenharmony_ci    }
3275cabdff1aSopenharmony_ci
3276cabdff1aSopenharmony_ci    tmp = tmp_array;
3277cabdff1aSopenharmony_ci
3278cabdff1aSopenharmony_ci    for (y = 0; y < h; y++) {
3279cabdff1aSopenharmony_ci        for (x = 0; x < 8; x++)
3280cabdff1aSopenharmony_ci            dst[x] = (c * tmp[x] + d * tmp[x + 8] + 4) >> 3;
3281cabdff1aSopenharmony_ci        dst += dstride;
3282cabdff1aSopenharmony_ci        tmp += 8;
3283cabdff1aSopenharmony_ci    }
3284cabdff1aSopenharmony_ci#endif
3285cabdff1aSopenharmony_ci}
3286cabdff1aSopenharmony_ci
3287cabdff1aSopenharmony_civoid ff_put_vp8_bilinear4_h_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
3288cabdff1aSopenharmony_ci        ptrdiff_t sstride, int h, int mx, int my)
3289cabdff1aSopenharmony_ci{
3290cabdff1aSopenharmony_ci#if 1
3291cabdff1aSopenharmony_ci    union mmi_intfloat64 a, b;
3292cabdff1aSopenharmony_ci    double ftmp[5];
3293cabdff1aSopenharmony_ci    uint32_t tmp[1];
3294cabdff1aSopenharmony_ci    DECLARE_VAR_LOW32;
3295cabdff1aSopenharmony_ci    DECLARE_VAR_ALL64;
3296cabdff1aSopenharmony_ci    a.i = 8 - mx;
3297cabdff1aSopenharmony_ci    b.i = mx;
3298cabdff1aSopenharmony_ci
3299cabdff1aSopenharmony_ci    /*
3300cabdff1aSopenharmony_ci    dst[0] = (a * src[0] + b * src[1] + 4) >> 3;
3301cabdff1aSopenharmony_ci    dst[1] = (a * src[1] + b * src[2] + 4) >> 3;
3302cabdff1aSopenharmony_ci    dst[2] = (a * src[2] + b * src[3] + 4) >> 3;
3303cabdff1aSopenharmony_ci    dst[3] = (a * src[3] + b * src[4] + 4) >> 3;
3304cabdff1aSopenharmony_ci    */
3305cabdff1aSopenharmony_ci    __asm__ volatile (
3306cabdff1aSopenharmony_ci        "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
3307cabdff1aSopenharmony_ci        "li         %[tmp0],    0x03                                \n\t"
3308cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp4]                            \n\t"
3309cabdff1aSopenharmony_ci        "pshufh     %[a],       %[a],           %[ftmp0]            \n\t"
3310cabdff1aSopenharmony_ci        "pshufh     %[b],       %[b],           %[ftmp0]            \n\t"
3311cabdff1aSopenharmony_ci
3312cabdff1aSopenharmony_ci        "1:                                                         \n\t"
3313cabdff1aSopenharmony_ci        PUT_VP8_BILINEAR4_H_MMI(%[src], %[dst])
3314cabdff1aSopenharmony_ci
3315cabdff1aSopenharmony_ci        "addiu      %[h],       %[h],           -0x01               \n\t"
3316cabdff1aSopenharmony_ci        PTR_ADDU   "%[src],     %[src],         %[sstride]          \n\t"
3317cabdff1aSopenharmony_ci        PTR_ADDU   "%[dst],     %[dst],         %[dstride]          \n\t"
3318cabdff1aSopenharmony_ci        "bnez       %[h],       1b                                  \n\t"
3319cabdff1aSopenharmony_ci        : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
3320cabdff1aSopenharmony_ci          [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
3321cabdff1aSopenharmony_ci          [ftmp4]"=&f"(ftmp[4]),
3322cabdff1aSopenharmony_ci          [tmp0]"=&r"(tmp[0]),
3323cabdff1aSopenharmony_ci          RESTRICT_ASM_LOW32
3324cabdff1aSopenharmony_ci          RESTRICT_ASM_ALL64
3325cabdff1aSopenharmony_ci          [h]"+&r"(h),
3326cabdff1aSopenharmony_ci          [dst]"+&r"(dst),              [src]"+&r"(src),
3327cabdff1aSopenharmony_ci          [a]"+&f"(a.f),                [b]"+&f"(b.f)
3328cabdff1aSopenharmony_ci        : [sstride]"r"((mips_reg)sstride),
3329cabdff1aSopenharmony_ci          [dstride]"r"((mips_reg)dstride),
3330cabdff1aSopenharmony_ci          [ff_pw_4]"f"(ff_pw_4.f)
3331cabdff1aSopenharmony_ci        : "memory"
3332cabdff1aSopenharmony_ci    );
3333cabdff1aSopenharmony_ci#else
3334cabdff1aSopenharmony_ci    int a = 8 - mx, b = mx;
3335cabdff1aSopenharmony_ci    int x, y;
3336cabdff1aSopenharmony_ci
3337cabdff1aSopenharmony_ci    for (y = 0; y < h; y++) {
3338cabdff1aSopenharmony_ci        for (x = 0; x < 4; x++)
3339cabdff1aSopenharmony_ci            dst[x] = (a * src[x] + b * src[x + 1] + 4) >> 3;
3340cabdff1aSopenharmony_ci        dst += dstride;
3341cabdff1aSopenharmony_ci        src += sstride;
3342cabdff1aSopenharmony_ci    }
3343cabdff1aSopenharmony_ci#endif
3344cabdff1aSopenharmony_ci}
3345cabdff1aSopenharmony_ci
3346cabdff1aSopenharmony_civoid ff_put_vp8_bilinear4_v_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
3347cabdff1aSopenharmony_ci        ptrdiff_t sstride, int h, int mx, int my)
3348cabdff1aSopenharmony_ci{
3349cabdff1aSopenharmony_ci#if 1
3350cabdff1aSopenharmony_ci    union mmi_intfloat64 c, d;
3351cabdff1aSopenharmony_ci    double ftmp[7];
3352cabdff1aSopenharmony_ci    uint32_t tmp[1];
3353cabdff1aSopenharmony_ci    mips_reg src1;
3354cabdff1aSopenharmony_ci    DECLARE_VAR_LOW32;
3355cabdff1aSopenharmony_ci    DECLARE_VAR_ALL64;
3356cabdff1aSopenharmony_ci    c.i = 8 - my;
3357cabdff1aSopenharmony_ci    d.i = my;
3358cabdff1aSopenharmony_ci
3359cabdff1aSopenharmony_ci    /*
3360cabdff1aSopenharmony_ci    dst[0] = (c * src[0] + d * src[    sstride] + 4) >> 3;
3361cabdff1aSopenharmony_ci    dst[1] = (c * src[1] + d * src[1 + sstride] + 4) >> 3;
3362cabdff1aSopenharmony_ci    dst[2] = (c * src[2] + d * src[2 + sstride] + 4) >> 3;
3363cabdff1aSopenharmony_ci    dst[3] = (c * src[3] + d * src[3 + sstride] + 4) >> 3;
3364cabdff1aSopenharmony_ci    */
3365cabdff1aSopenharmony_ci    __asm__ volatile (
3366cabdff1aSopenharmony_ci        "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
3367cabdff1aSopenharmony_ci        "li         %[tmp0],    0x03                                \n\t"
3368cabdff1aSopenharmony_ci        "mtc1       %[tmp0],    %[ftmp4]                            \n\t"
3369cabdff1aSopenharmony_ci        "pshufh     %[c],       %[c],           %[ftmp0]            \n\t"
3370cabdff1aSopenharmony_ci        "pshufh     %[d],       %[d],           %[ftmp0]            \n\t"
3371cabdff1aSopenharmony_ci
3372cabdff1aSopenharmony_ci        "1:                                                         \n\t"
3373cabdff1aSopenharmony_ci        PUT_VP8_BILINEAR4_V_MMI(%[src], %[src1], %[dst], %[sstride])
3374cabdff1aSopenharmony_ci
3375cabdff1aSopenharmony_ci        "addiu      %[h],       %[h],           -0x01               \n\t"
3376cabdff1aSopenharmony_ci        PTR_ADDU   "%[src],     %[src],         %[sstride]          \n\t"
3377cabdff1aSopenharmony_ci        PTR_ADDU   "%[dst],     %[dst],         %[dstride]          \n\t"
3378cabdff1aSopenharmony_ci        "bnez       %[h],       1b                                  \n\t"
3379cabdff1aSopenharmony_ci        : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
3380cabdff1aSopenharmony_ci          [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
3381cabdff1aSopenharmony_ci          [ftmp4]"=&f"(ftmp[4]),
3382cabdff1aSopenharmony_ci          [tmp0]"=&r"(tmp[0]),
3383cabdff1aSopenharmony_ci          RESTRICT_ASM_LOW32
3384cabdff1aSopenharmony_ci          RESTRICT_ASM_ALL64
3385cabdff1aSopenharmony_ci          [src1]"=&r"(src1),
3386cabdff1aSopenharmony_ci          [h]"+&r"(h),
3387cabdff1aSopenharmony_ci          [dst]"+&r"(dst),              [src]"+&r"(src),
3388cabdff1aSopenharmony_ci          [c]"+&f"(c.f),                [d]"+&f"(d.f)
3389cabdff1aSopenharmony_ci        : [sstride]"r"((mips_reg)sstride),
3390cabdff1aSopenharmony_ci          [dstride]"r"((mips_reg)dstride),
3391cabdff1aSopenharmony_ci          [ff_pw_4]"f"(ff_pw_4.f)
3392cabdff1aSopenharmony_ci        : "memory"
3393cabdff1aSopenharmony_ci    );
3394cabdff1aSopenharmony_ci#else
3395cabdff1aSopenharmony_ci    int c = 8 - my, d = my;
3396cabdff1aSopenharmony_ci    int x, y;
3397cabdff1aSopenharmony_ci
3398cabdff1aSopenharmony_ci    for (y = 0; y < h; y++) {
3399cabdff1aSopenharmony_ci        for (x = 0; x < 4; x++)
3400cabdff1aSopenharmony_ci            dst[x] = (c * src[x] + d * src[x + sstride] + 4) >> 3;
3401cabdff1aSopenharmony_ci        dst += dstride;
3402cabdff1aSopenharmony_ci        src += sstride;
3403cabdff1aSopenharmony_ci    }
3404cabdff1aSopenharmony_ci#endif
3405cabdff1aSopenharmony_ci}
3406cabdff1aSopenharmony_ci
3407cabdff1aSopenharmony_civoid ff_put_vp8_bilinear4_hv_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
3408cabdff1aSopenharmony_ci        ptrdiff_t sstride, int h, int mx, int my)
3409cabdff1aSopenharmony_ci{
3410cabdff1aSopenharmony_ci#if 1
3411cabdff1aSopenharmony_ci    DECLARE_ALIGNED(4, uint8_t, tmp_array[36]);
3412cabdff1aSopenharmony_ci    uint8_t *tmp = tmp_array;
3413cabdff1aSopenharmony_ci
3414cabdff1aSopenharmony_ci    ff_put_vp8_bilinear4_h_mmi(tmp, 4, src, sstride, h + 1, mx, my);
3415cabdff1aSopenharmony_ci    ff_put_vp8_bilinear4_v_mmi(dst, dstride, tmp, 4, h, mx, my);
3416cabdff1aSopenharmony_ci#else
3417cabdff1aSopenharmony_ci    int a = 8 - mx, b = mx;
3418cabdff1aSopenharmony_ci    int c = 8 - my, d = my;
3419cabdff1aSopenharmony_ci    int x, y;
3420cabdff1aSopenharmony_ci    uint8_t tmp_array[36];
3421cabdff1aSopenharmony_ci    uint8_t *tmp = tmp_array;
3422cabdff1aSopenharmony_ci
3423cabdff1aSopenharmony_ci    for (y = 0; y < h + 1; y++) {
3424cabdff1aSopenharmony_ci        for (x = 0; x < 4; x++)
3425cabdff1aSopenharmony_ci            tmp[x] = (a * src[x] + b * src[x + 1] + 4) >> 3;
3426cabdff1aSopenharmony_ci        tmp += 4;
3427cabdff1aSopenharmony_ci        src += sstride;
3428cabdff1aSopenharmony_ci    }
3429cabdff1aSopenharmony_ci
3430cabdff1aSopenharmony_ci    tmp = tmp_array;
3431cabdff1aSopenharmony_ci
3432cabdff1aSopenharmony_ci    for (y = 0; y < h; y++) {
3433cabdff1aSopenharmony_ci        for (x = 0; x < 4; x++)
3434cabdff1aSopenharmony_ci            dst[x] = (c * tmp[x] + d * tmp[x + 4] + 4) >> 3;
3435cabdff1aSopenharmony_ci        dst += dstride;
3436cabdff1aSopenharmony_ci        tmp += 4;
3437cabdff1aSopenharmony_ci    }
3438cabdff1aSopenharmony_ci#endif
3439cabdff1aSopenharmony_ci}
3440