1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * Copyright (c) 2018 gxw <guxiwei-hf@loongson.cn>
3cabdff1aSopenharmony_ci *
4cabdff1aSopenharmony_ci * This file is part of FFmpeg.
5cabdff1aSopenharmony_ci *
6cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
7cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
8cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
9cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
10cabdff1aSopenharmony_ci *
11cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
12cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
13cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14cabdff1aSopenharmony_ci * Lesser General Public License for more details.
15cabdff1aSopenharmony_ci *
16cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
17cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
18cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19cabdff1aSopenharmony_ci */
20cabdff1aSopenharmony_ci
21cabdff1aSopenharmony_ci#include "vp3dsp_mips.h"
22cabdff1aSopenharmony_ci#include "libavutil/intreadwrite.h"
23cabdff1aSopenharmony_ci#include "libavutil/mips/mmiutils.h"
24cabdff1aSopenharmony_ci#include "libavutil/common.h"
25cabdff1aSopenharmony_ci#include "libavcodec/rnd_avg.h"
26cabdff1aSopenharmony_ci
27cabdff1aSopenharmony_ci#define LOAD_CONST(dst, value)                        \
28cabdff1aSopenharmony_ci    "li     %[tmp1],      "#value"              \n\t" \
29cabdff1aSopenharmony_ci    "dmtc1  %[tmp1],      "#dst"                \n\t" \
30cabdff1aSopenharmony_ci    "pshufh "#dst",       "#dst",     %[ftmp10] \n\t"
31cabdff1aSopenharmony_ci
32cabdff1aSopenharmony_cistatic void idct_row_mmi(int16_t *input)
33cabdff1aSopenharmony_ci{
34cabdff1aSopenharmony_ci    double ftmp[23];
35cabdff1aSopenharmony_ci    uint64_t tmp[2];
36cabdff1aSopenharmony_ci    __asm__ volatile (
37cabdff1aSopenharmony_ci        "pxor       %[ftmp10],      %[ftmp10],        %[ftmp10] \n\t"
38cabdff1aSopenharmony_ci        LOAD_CONST(%[csth_1], 1)
39cabdff1aSopenharmony_ci        "li         %[tmp0],        0x02                        \n\t"
40cabdff1aSopenharmony_ci        "1:                                                     \n\t"
41cabdff1aSopenharmony_ci        /* Load input */
42cabdff1aSopenharmony_ci        "ldc1       %[ftmp0],       0x00(%[input])              \n\t"
43cabdff1aSopenharmony_ci        "ldc1       %[ftmp1],       0x10(%[input])              \n\t"
44cabdff1aSopenharmony_ci        "ldc1       %[ftmp2],       0x20(%[input])              \n\t"
45cabdff1aSopenharmony_ci        "ldc1       %[ftmp3],       0x30(%[input])              \n\t"
46cabdff1aSopenharmony_ci        "ldc1       %[ftmp4],       0x40(%[input])              \n\t"
47cabdff1aSopenharmony_ci        "ldc1       %[ftmp5],       0x50(%[input])              \n\t"
48cabdff1aSopenharmony_ci        "ldc1       %[ftmp6],       0x60(%[input])              \n\t"
49cabdff1aSopenharmony_ci        "ldc1       %[ftmp7],       0x70(%[input])              \n\t"
50cabdff1aSopenharmony_ci        LOAD_CONST(%[ftmp8], 64277)
51cabdff1aSopenharmony_ci        LOAD_CONST(%[ftmp9], 12785)
52cabdff1aSopenharmony_ci        "pmulhh     %[A],           %[ftmp9],         %[ftmp7]  \n\t"
53cabdff1aSopenharmony_ci        "pcmpgth    %[C],           %[ftmp10],        %[ftmp1]  \n\t"
54cabdff1aSopenharmony_ci        "por        %[mask],        %[C],             %[csth_1] \n\t"
55cabdff1aSopenharmony_ci        "pmullh     %[B],           %[ftmp1],         %[mask]   \n\t"
56cabdff1aSopenharmony_ci        "pmulhuh    %[B],           %[ftmp8],         %[B]      \n\t"
57cabdff1aSopenharmony_ci        "pmullh     %[B],           %[B],             %[mask]   \n\t"
58cabdff1aSopenharmony_ci        "paddh      %[A],           %[A],             %[B]      \n\t"
59cabdff1aSopenharmony_ci        "paddh      %[A],           %[A],             %[C]      \n\t"
60cabdff1aSopenharmony_ci        "pcmpgth    %[D],           %[ftmp10],        %[ftmp7]  \n\t"
61cabdff1aSopenharmony_ci        "por        %[mask],        %[D],             %[csth_1] \n\t"
62cabdff1aSopenharmony_ci        "pmullh     %[ftmp7],       %[ftmp7],         %[mask]   \n\t"
63cabdff1aSopenharmony_ci        "pmulhuh    %[B],           %[ftmp8],         %[ftmp7]  \n\t"
64cabdff1aSopenharmony_ci        "pmullh     %[B],           %[B],             %[mask]   \n\t"
65cabdff1aSopenharmony_ci        "pmulhh     %[C],           %[ftmp9],         %[ftmp1]  \n\t"
66cabdff1aSopenharmony_ci        "psubh      %[B],           %[C],             %[B]      \n\t"
67cabdff1aSopenharmony_ci        "psubh      %[B],           %[B],             %[D]      \n\t"
68cabdff1aSopenharmony_ci
69cabdff1aSopenharmony_ci        LOAD_CONST(%[ftmp8], 54491)
70cabdff1aSopenharmony_ci        LOAD_CONST(%[ftmp9], 36410)
71cabdff1aSopenharmony_ci        "pcmpgth    %[Ad],          %[ftmp10],        %[ftmp5]  \n\t"
72cabdff1aSopenharmony_ci        "por        %[mask],        %[Ad],            %[csth_1] \n\t"
73cabdff1aSopenharmony_ci        "pmullh     %[ftmp1],       %[ftmp5],         %[mask]   \n\t"
74cabdff1aSopenharmony_ci        "pmulhuh    %[C],           %[ftmp9],         %[ftmp1]  \n\t"
75cabdff1aSopenharmony_ci        "pmullh     %[C],           %[C],             %[mask]   \n\t"
76cabdff1aSopenharmony_ci        "pcmpgth    %[Bd],          %[ftmp10],        %[ftmp3]  \n\t"
77cabdff1aSopenharmony_ci        "por        %[mask],        %[Bd],            %[csth_1] \n\t"
78cabdff1aSopenharmony_ci        "pmullh     %[D],           %[ftmp3],         %[mask]   \n\t"
79cabdff1aSopenharmony_ci        "pmulhuh    %[D],           %[ftmp8],         %[D]      \n\t"
80cabdff1aSopenharmony_ci        "pmullh     %[D],           %[D],             %[mask]   \n\t"
81cabdff1aSopenharmony_ci        "paddh      %[C],           %[C],             %[D]      \n\t"
82cabdff1aSopenharmony_ci        "paddh      %[C],           %[C],             %[Ad]     \n\t"
83cabdff1aSopenharmony_ci        "paddh      %[C],           %[C],             %[Bd]     \n\t"
84cabdff1aSopenharmony_ci        "pcmpgth    %[Bd],          %[ftmp10],        %[ftmp3]  \n\t"
85cabdff1aSopenharmony_ci        "por        %[mask],        %[Bd],            %[csth_1] \n\t"
86cabdff1aSopenharmony_ci        "pmullh     %[ftmp1],       %[ftmp3],         %[mask]   \n\t"
87cabdff1aSopenharmony_ci        "pmulhuh    %[D],           %[ftmp9],         %[ftmp1]  \n\t"
88cabdff1aSopenharmony_ci        "pmullh     %[D],           %[D],             %[mask]   \n\t"
89cabdff1aSopenharmony_ci        "pcmpgth    %[Ed],          %[ftmp10],        %[ftmp5]  \n\t"
90cabdff1aSopenharmony_ci        "por        %[mask],        %[Ed],            %[csth_1] \n\t"
91cabdff1aSopenharmony_ci        "pmullh     %[Ad],          %[ftmp5],         %[mask]   \n\t"
92cabdff1aSopenharmony_ci        "pmulhuh    %[Ad],          %[ftmp8],         %[Ad]     \n\t"
93cabdff1aSopenharmony_ci        "pmullh     %[Ad],          %[Ad],            %[mask]   \n\t"
94cabdff1aSopenharmony_ci        "psubh      %[D],           %[Ad],            %[D]      \n\t"
95cabdff1aSopenharmony_ci        "paddh      %[D],           %[D],             %[Ed]     \n\t"
96cabdff1aSopenharmony_ci        "psubh      %[D],           %[D],             %[Bd]     \n\t"
97cabdff1aSopenharmony_ci
98cabdff1aSopenharmony_ci        LOAD_CONST(%[ftmp8], 46341)
99cabdff1aSopenharmony_ci        "psubh      %[Ad],          %[A],             %[C]      \n\t"
100cabdff1aSopenharmony_ci        "pcmpgth    %[Bd],          %[ftmp10],        %[Ad]     \n\t"
101cabdff1aSopenharmony_ci        "por        %[mask],        %[Bd],            %[csth_1] \n\t"
102cabdff1aSopenharmony_ci        "pmullh     %[Ad],          %[Ad],            %[mask]   \n\t"
103cabdff1aSopenharmony_ci        "pmulhuh    %[Ad],          %[ftmp8],         %[Ad]     \n\t"
104cabdff1aSopenharmony_ci        "pmullh     %[Ad],          %[Ad],            %[mask]   \n\t"
105cabdff1aSopenharmony_ci        "paddh      %[Ad],          %[Ad],            %[Bd]     \n\t"
106cabdff1aSopenharmony_ci        "psubh      %[Bd],          %[B],             %[D]      \n\t"
107cabdff1aSopenharmony_ci        "pcmpgth    %[Cd],          %[ftmp10],        %[Bd]     \n\t"
108cabdff1aSopenharmony_ci        "por        %[mask],        %[Cd],            %[csth_1] \n\t"
109cabdff1aSopenharmony_ci        "pmullh     %[Bd],          %[Bd],            %[mask]   \n\t"
110cabdff1aSopenharmony_ci        "pmulhuh    %[Bd],          %[ftmp8],         %[Bd]     \n\t"
111cabdff1aSopenharmony_ci        "pmullh     %[Bd],          %[Bd],            %[mask]   \n\t"
112cabdff1aSopenharmony_ci        "paddh      %[Bd],          %[Bd],            %[Cd]     \n\t"
113cabdff1aSopenharmony_ci        "paddh      %[Cd],          %[A],             %[C]      \n\t"
114cabdff1aSopenharmony_ci        "paddh      %[Dd],          %[B],             %[D]      \n\t"
115cabdff1aSopenharmony_ci        "paddh      %[A],           %[ftmp0],         %[ftmp4]  \n\t"
116cabdff1aSopenharmony_ci        "pcmpgth    %[B],           %[ftmp10],        %[A]      \n\t"
117cabdff1aSopenharmony_ci        "por        %[mask],        %[B],             %[csth_1] \n\t"
118cabdff1aSopenharmony_ci        "pmullh     %[A],           %[A],             %[mask]   \n\t"
119cabdff1aSopenharmony_ci        "pmulhuh    %[A],           %[ftmp8],         %[A]      \n\t"
120cabdff1aSopenharmony_ci        "pmullh     %[A],           %[A],             %[mask]   \n\t"
121cabdff1aSopenharmony_ci        "paddh      %[A],           %[A],             %[B]      \n\t"
122cabdff1aSopenharmony_ci        "psubh      %[B],           %[ftmp0],         %[ftmp4]  \n\t"
123cabdff1aSopenharmony_ci        "pcmpgth    %[C],           %[ftmp10],        %[B]      \n\t"
124cabdff1aSopenharmony_ci        "por        %[mask],        %[C],             %[csth_1] \n\t"
125cabdff1aSopenharmony_ci        "pmullh     %[B],           %[B],             %[mask]   \n\t"
126cabdff1aSopenharmony_ci        "pmulhuh    %[B],           %[ftmp8],         %[B]      \n\t"
127cabdff1aSopenharmony_ci        "pmullh     %[B],           %[B],             %[mask]   \n\t"
128cabdff1aSopenharmony_ci        "paddh      %[B],           %[B],             %[C]      \n\t"
129cabdff1aSopenharmony_ci
130cabdff1aSopenharmony_ci        LOAD_CONST(%[ftmp8], 60547)
131cabdff1aSopenharmony_ci        LOAD_CONST(%[ftmp9], 25080)
132cabdff1aSopenharmony_ci        "pmulhh     %[C],           %[ftmp9],         %[ftmp6]  \n\t"
133cabdff1aSopenharmony_ci        "pcmpgth    %[D],           %[ftmp10],        %[ftmp2]  \n\t"
134cabdff1aSopenharmony_ci        "por        %[mask],        %[D],             %[csth_1] \n\t"
135cabdff1aSopenharmony_ci        "pmullh     %[Ed],          %[ftmp2],         %[mask]   \n\t"
136cabdff1aSopenharmony_ci        "pmulhuh    %[Ed],          %[ftmp8],         %[Ed]     \n\t"
137cabdff1aSopenharmony_ci        "pmullh     %[Ed],          %[Ed],            %[mask]   \n\t"
138cabdff1aSopenharmony_ci        "paddh      %[C],           %[C],             %[Ed]     \n\t"
139cabdff1aSopenharmony_ci        "paddh      %[C],           %[C],             %[D]      \n\t"
140cabdff1aSopenharmony_ci        "pcmpgth    %[Ed],          %[ftmp10],        %[ftmp6]  \n\t"
141cabdff1aSopenharmony_ci        "por        %[mask],        %[Ed],            %[csth_1] \n\t"
142cabdff1aSopenharmony_ci        "pmullh     %[ftmp6],       %[ftmp6],         %[mask]   \n\t"
143cabdff1aSopenharmony_ci        "pmulhuh    %[D],           %[ftmp8],         %[ftmp6]  \n\t"
144cabdff1aSopenharmony_ci        "pmullh     %[D],           %[D],             %[mask]   \n\t"
145cabdff1aSopenharmony_ci        "pmulhh     %[Gd],          %[ftmp9],         %[ftmp2]  \n\t"
146cabdff1aSopenharmony_ci        "psubh      %[D],           %[Gd],            %[D]      \n\t"
147cabdff1aSopenharmony_ci        "psubh      %[D],           %[D],             %[Ed]     \n\t"
148cabdff1aSopenharmony_ci        "psubh      %[Ed],          %[A],             %[C]      \n\t"
149cabdff1aSopenharmony_ci        "paddh      %[Gd],          %[A],             %[C]      \n\t"
150cabdff1aSopenharmony_ci        "paddh      %[A],           %[B],             %[Ad]     \n\t"
151cabdff1aSopenharmony_ci        "psubh      %[C],           %[B],             %[Ad]     \n\t"
152cabdff1aSopenharmony_ci        "psubh      %[B],           %[Bd],            %[D]      \n\t"
153cabdff1aSopenharmony_ci        "paddh      %[D],           %[Bd],            %[D]      \n\t"
154cabdff1aSopenharmony_ci        /* Final sequence of operations over-write original inputs */
155cabdff1aSopenharmony_ci        "paddh      %[ftmp0],       %[Gd],            %[Cd]     \n\t"
156cabdff1aSopenharmony_ci        "paddh      %[ftmp1],       %[A],             %[D]      \n\t"
157cabdff1aSopenharmony_ci        "psubh      %[ftmp2],       %[A],             %[D]      \n\t"
158cabdff1aSopenharmony_ci        "paddh      %[ftmp3],       %[Ed],            %[Dd]     \n\t"
159cabdff1aSopenharmony_ci        "psubh      %[ftmp4],       %[Ed],            %[Dd]     \n\t"
160cabdff1aSopenharmony_ci        "paddh      %[ftmp5],       %[C],             %[B]      \n\t"
161cabdff1aSopenharmony_ci        "psubh      %[ftmp6],       %[C],             %[B]      \n\t"
162cabdff1aSopenharmony_ci        "psubh      %[ftmp7],       %[Gd],            %[Cd]     \n\t"
163cabdff1aSopenharmony_ci        "sdc1       %[ftmp0],       0x00(%[input])              \n\t"
164cabdff1aSopenharmony_ci        "sdc1       %[ftmp1],       0x10(%[input])              \n\t"
165cabdff1aSopenharmony_ci        "sdc1       %[ftmp2],       0x20(%[input])              \n\t"
166cabdff1aSopenharmony_ci        "sdc1       %[ftmp3],       0x30(%[input])              \n\t"
167cabdff1aSopenharmony_ci        "sdc1       %[ftmp4],       0x40(%[input])              \n\t"
168cabdff1aSopenharmony_ci        "sdc1       %[ftmp5],       0x50(%[input])              \n\t"
169cabdff1aSopenharmony_ci        "sdc1       %[ftmp6],       0x60(%[input])              \n\t"
170cabdff1aSopenharmony_ci        "sdc1       %[ftmp7],       0x70(%[input])              \n\t"
171cabdff1aSopenharmony_ci        PTR_ADDU   "%[tmp0],        %[tmp0],          -0x01     \n\t"
172cabdff1aSopenharmony_ci        PTR_ADDIU  "%[input],       %[input],         0x08      \n\t"
173cabdff1aSopenharmony_ci        "bnez       %[tmp0],        1b                          \n\t"
174cabdff1aSopenharmony_ci        : [input]"+&r"(input), [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
175cabdff1aSopenharmony_ci          [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]),
176cabdff1aSopenharmony_ci          [ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
177cabdff1aSopenharmony_ci          [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp8]"=&f"(ftmp[8]),
178cabdff1aSopenharmony_ci          [ftmp9]"=&f"(ftmp[9]), [ftmp10]"=&f"(ftmp[10]), [mask]"=&f"(ftmp[11]),
179cabdff1aSopenharmony_ci          [A]"=&f"(ftmp[12]), [B]"=&f"(ftmp[13]), [C]"=&f"(ftmp[14]),
180cabdff1aSopenharmony_ci          [D]"=&f"(ftmp[15]), [Ad]"=&f"(ftmp[16]), [Bd]"=&f"(ftmp[17]),
181cabdff1aSopenharmony_ci          [Cd]"=&f"(ftmp[18]), [Dd]"=&f"(ftmp[19]), [Ed]"=&f"(ftmp[20]),
182cabdff1aSopenharmony_ci          [Gd]"=&f"(ftmp[21]), [csth_1]"=&f"(ftmp[22])
183cabdff1aSopenharmony_ci        :
184cabdff1aSopenharmony_ci        : "memory"
185cabdff1aSopenharmony_ci    );
186cabdff1aSopenharmony_ci}
187cabdff1aSopenharmony_ci
188cabdff1aSopenharmony_cistatic void idct_column_true_mmi(uint8_t *dst, int stride, int16_t *input)
189cabdff1aSopenharmony_ci{
190cabdff1aSopenharmony_ci    uint8_t temp_value[8];
191cabdff1aSopenharmony_ci    double ftmp[23];
192cabdff1aSopenharmony_ci    uint64_t tmp[2];
193cabdff1aSopenharmony_ci    for (int i = 0; i < 8; ++i)
194cabdff1aSopenharmony_ci        temp_value[i] = av_clip_uint8(128 + ((46341 * input[i << 3] + (8 << 16)) >> 20));
195cabdff1aSopenharmony_ci    __asm__ volatile (
196cabdff1aSopenharmony_ci        "pxor       %[ftmp10],      %[ftmp10],          %[ftmp10] \n\t"
197cabdff1aSopenharmony_ci        "li         %[tmp0],        0x02                          \n\t"
198cabdff1aSopenharmony_ci        "1:                                                       \n\t"
199cabdff1aSopenharmony_ci        "ldc1       %[ftmp0],       0x00(%[input])                \n\t"
200cabdff1aSopenharmony_ci        "ldc1       %[ftmp4],       0x08(%[input])                \n\t"
201cabdff1aSopenharmony_ci        "ldc1       %[ftmp1],       0x10(%[input])                \n\t"
202cabdff1aSopenharmony_ci        "ldc1       %[ftmp5],       0x18(%[input])                \n\t"
203cabdff1aSopenharmony_ci        "ldc1       %[ftmp2],       0x20(%[input])                \n\t"
204cabdff1aSopenharmony_ci        "ldc1       %[ftmp6],       0x28(%[input])                \n\t"
205cabdff1aSopenharmony_ci        "ldc1       %[ftmp3],       0x30(%[input])                \n\t"
206cabdff1aSopenharmony_ci        "ldc1       %[ftmp7],       0x38(%[input])                \n\t"
207cabdff1aSopenharmony_ci        TRANSPOSE_4H(%[ftmp0], %[ftmp1], %[ftmp2], %[ftmp3],
208cabdff1aSopenharmony_ci                     %[A], %[B], %[C], %[D])
209cabdff1aSopenharmony_ci        TRANSPOSE_4H(%[ftmp4], %[ftmp5], %[ftmp6], %[ftmp7],
210cabdff1aSopenharmony_ci                     %[A], %[B], %[C], %[D])
211cabdff1aSopenharmony_ci        LOAD_CONST(%[ftmp8], 64277)
212cabdff1aSopenharmony_ci        LOAD_CONST(%[ftmp9], 12785)
213cabdff1aSopenharmony_ci        LOAD_CONST(%[Gd], 1)
214cabdff1aSopenharmony_ci        "pmulhh     %[A],           %[ftmp9],           %[ftmp7]  \n\t"
215cabdff1aSopenharmony_ci        "pcmpgth    %[C],           %[ftmp10],          %[ftmp1]  \n\t"
216cabdff1aSopenharmony_ci        "por        %[mask],        %[C],               %[Gd]     \n\t"
217cabdff1aSopenharmony_ci        "pmullh     %[B],           %[ftmp1],           %[mask]   \n\t"
218cabdff1aSopenharmony_ci        "pmulhuh    %[B],           %[ftmp8],           %[B]      \n\t"
219cabdff1aSopenharmony_ci        "pmullh     %[B],           %[B],               %[mask]   \n\t"
220cabdff1aSopenharmony_ci        "paddh      %[A],           %[A],               %[B]      \n\t"
221cabdff1aSopenharmony_ci        "paddh      %[A],           %[A],               %[C]      \n\t"
222cabdff1aSopenharmony_ci        "pcmpgth    %[D],           %[ftmp10],          %[ftmp7]  \n\t"
223cabdff1aSopenharmony_ci        "por        %[mask],        %[D],               %[Gd]     \n\t"
224cabdff1aSopenharmony_ci        "pmullh     %[Ad],          %[ftmp7],           %[mask]   \n\t"
225cabdff1aSopenharmony_ci        "pmulhuh    %[B],           %[ftmp8],           %[Ad]     \n\t"
226cabdff1aSopenharmony_ci        "pmullh     %[B],           %[B],               %[mask]   \n\t"
227cabdff1aSopenharmony_ci        "pmulhh     %[C],           %[ftmp9],           %[ftmp1]  \n\t"
228cabdff1aSopenharmony_ci        "psubh      %[B],           %[C],               %[B]      \n\t"
229cabdff1aSopenharmony_ci        "psubh      %[B],           %[B],               %[D]      \n\t"
230cabdff1aSopenharmony_ci
231cabdff1aSopenharmony_ci        LOAD_CONST(%[ftmp8], 54491)
232cabdff1aSopenharmony_ci        LOAD_CONST(%[ftmp9], 36410)
233cabdff1aSopenharmony_ci        "pcmpgth    %[Ad],          %[ftmp10],          %[ftmp5]  \n\t"
234cabdff1aSopenharmony_ci        "por        %[mask],        %[Ad],              %[Gd]     \n\t"
235cabdff1aSopenharmony_ci        "pmullh     %[Cd],          %[ftmp5],           %[mask]   \n\t"
236cabdff1aSopenharmony_ci        "pmulhuh    %[C],           %[ftmp9],           %[Cd]     \n\t"
237cabdff1aSopenharmony_ci        "pmullh     %[C],           %[C],               %[mask]   \n\t"
238cabdff1aSopenharmony_ci        "pcmpgth    %[Bd],          %[ftmp10],          %[ftmp3]  \n\t"
239cabdff1aSopenharmony_ci        "por        %[mask],        %[Bd],              %[Gd]     \n\t"
240cabdff1aSopenharmony_ci        "pmullh     %[D],           %[ftmp3],           %[mask]   \n\t"
241cabdff1aSopenharmony_ci        "pmulhuh    %[D],           %[ftmp8],           %[D]      \n\t"
242cabdff1aSopenharmony_ci        "pmullh     %[D],           %[D],               %[mask]   \n\t"
243cabdff1aSopenharmony_ci        "paddh      %[C],           %[C],               %[D]      \n\t"
244cabdff1aSopenharmony_ci        "paddh      %[C],           %[C],               %[Ad]     \n\t"
245cabdff1aSopenharmony_ci        "paddh      %[C],           %[C],               %[Bd]     \n\t"
246cabdff1aSopenharmony_ci        "pcmpgth    %[Bd],          %[ftmp10],          %[ftmp3]  \n\t"
247cabdff1aSopenharmony_ci        "por        %[mask],        %[Bd],              %[Gd]     \n\t"
248cabdff1aSopenharmony_ci        "pmullh     %[Cd],          %[ftmp3],           %[mask]   \n\t"
249cabdff1aSopenharmony_ci        "pmulhuh    %[D],           %[ftmp9],           %[Cd]     \n\t"
250cabdff1aSopenharmony_ci        "pmullh     %[D],           %[D],               %[mask]   \n\t"
251cabdff1aSopenharmony_ci        "pcmpgth    %[Ed],          %[ftmp10],          %[ftmp5]  \n\t"
252cabdff1aSopenharmony_ci        "por        %[mask],        %[Ed],              %[Gd]     \n\t"
253cabdff1aSopenharmony_ci        "pmullh     %[Ad],          %[ftmp5],           %[mask]   \n\t"
254cabdff1aSopenharmony_ci        "pmulhuh    %[Ad],          %[ftmp8],           %[Ad]     \n\t"
255cabdff1aSopenharmony_ci        "pmullh     %[Ad],          %[Ad],              %[mask]   \n\t"
256cabdff1aSopenharmony_ci        "psubh      %[D],           %[Ad],              %[D]      \n\t"
257cabdff1aSopenharmony_ci        "paddh      %[D],           %[D],               %[Ed]     \n\t"
258cabdff1aSopenharmony_ci        "psubh      %[D],           %[D],               %[Bd]     \n\t"
259cabdff1aSopenharmony_ci
260cabdff1aSopenharmony_ci        LOAD_CONST(%[ftmp8], 46341)
261cabdff1aSopenharmony_ci        "psubh      %[Ad],          %[A],             %[C]        \n\t"
262cabdff1aSopenharmony_ci        "pcmpgth    %[Bd],          %[ftmp10],        %[Ad]       \n\t"
263cabdff1aSopenharmony_ci        "por        %[mask],        %[Bd],            %[Gd]       \n\t"
264cabdff1aSopenharmony_ci        "pmullh     %[Ad],          %[Ad],            %[mask]     \n\t"
265cabdff1aSopenharmony_ci        "pmulhuh    %[Ad],          %[ftmp8],         %[Ad]       \n\t"
266cabdff1aSopenharmony_ci        "pmullh     %[Ad],          %[Ad],            %[mask]     \n\t"
267cabdff1aSopenharmony_ci        "paddh      %[Ad],          %[Ad],            %[Bd]       \n\t"
268cabdff1aSopenharmony_ci        "psubh      %[Bd],          %[B],             %[D]        \n\t"
269cabdff1aSopenharmony_ci        "pcmpgth    %[Cd],          %[ftmp10],        %[Bd]       \n\t"
270cabdff1aSopenharmony_ci        "por        %[mask],        %[Cd],            %[Gd]       \n\t"
271cabdff1aSopenharmony_ci        "pmullh     %[Bd],          %[Bd],            %[mask]     \n\t"
272cabdff1aSopenharmony_ci        "pmulhuh    %[Bd],          %[ftmp8],         %[Bd]       \n\t"
273cabdff1aSopenharmony_ci        "pmullh     %[Bd],          %[Bd],            %[mask]     \n\t"
274cabdff1aSopenharmony_ci        "paddh      %[Bd],          %[Bd],            %[Cd]       \n\t"
275cabdff1aSopenharmony_ci        "paddh      %[Cd],          %[A],             %[C]        \n\t"
276cabdff1aSopenharmony_ci        "paddh      %[Dd],          %[B],             %[D]        \n\t"
277cabdff1aSopenharmony_ci
278cabdff1aSopenharmony_ci        LOAD_CONST(%[Ed], 2056)
279cabdff1aSopenharmony_ci        "paddh      %[A],           %[ftmp0],         %[ftmp4]    \n\t"
280cabdff1aSopenharmony_ci        "pcmpgth    %[B],           %[ftmp10],        %[A]        \n\t"
281cabdff1aSopenharmony_ci        "por        %[mask],        %[B],             %[Gd]       \n\t"
282cabdff1aSopenharmony_ci        "pmullh     %[A],           %[A],             %[mask]     \n\t"
283cabdff1aSopenharmony_ci        "pmulhuh    %[A],           %[ftmp8],         %[A]        \n\t"
284cabdff1aSopenharmony_ci        "pmullh     %[A],           %[A],             %[mask]     \n\t"
285cabdff1aSopenharmony_ci        "paddh      %[A],           %[A],             %[B]        \n\t"
286cabdff1aSopenharmony_ci        "paddh      %[A],           %[A],             %[Ed]       \n\t"
287cabdff1aSopenharmony_ci        "psubh      %[B],           %[ftmp0],         %[ftmp4]    \n\t"
288cabdff1aSopenharmony_ci        "pcmpgth    %[C],           %[ftmp10],        %[B]        \n\t"
289cabdff1aSopenharmony_ci        "por        %[mask],        %[C],             %[Gd]       \n\t"
290cabdff1aSopenharmony_ci        "pmullh     %[B],           %[B],             %[mask]     \n\t"
291cabdff1aSopenharmony_ci        "pmulhuh    %[B],           %[ftmp8],         %[B]        \n\t"
292cabdff1aSopenharmony_ci        "pmullh     %[B],           %[B],             %[mask]     \n\t"
293cabdff1aSopenharmony_ci        "paddh      %[B],           %[B],             %[C]        \n\t"
294cabdff1aSopenharmony_ci        "paddh      %[B],           %[B],             %[Ed]       \n\t"
295cabdff1aSopenharmony_ci
296cabdff1aSopenharmony_ci        LOAD_CONST(%[ftmp8], 60547)
297cabdff1aSopenharmony_ci        LOAD_CONST(%[ftmp9], 25080)
298cabdff1aSopenharmony_ci        "pmulhh     %[C],           %[ftmp9],         %[ftmp6]    \n\t"
299cabdff1aSopenharmony_ci        "pcmpgth    %[D],           %[ftmp10],        %[ftmp2]    \n\t"
300cabdff1aSopenharmony_ci        "por        %[mask],        %[D],             %[Gd]       \n\t"
301cabdff1aSopenharmony_ci        "pmullh     %[Ed],          %[ftmp2],         %[mask]     \n\t"
302cabdff1aSopenharmony_ci        "pmulhuh    %[Ed],          %[ftmp8],         %[Ed]       \n\t"
303cabdff1aSopenharmony_ci        "pmullh     %[Ed],          %[Ed],            %[mask]     \n\t"
304cabdff1aSopenharmony_ci        "paddh      %[C],           %[C],             %[Ed]       \n\t"
305cabdff1aSopenharmony_ci        "paddh      %[C],           %[C],             %[D]        \n\t"
306cabdff1aSopenharmony_ci        "pcmpgth    %[Ed],          %[ftmp10],        %[ftmp6]    \n\t"
307cabdff1aSopenharmony_ci        "por        %[mask],        %[Ed],            %[Gd]       \n\t"
308cabdff1aSopenharmony_ci        "pmullh     %[D],           %[ftmp6],         %[mask]     \n\t"
309cabdff1aSopenharmony_ci        "pmulhuh    %[D],           %[ftmp8],         %[D]        \n\t"
310cabdff1aSopenharmony_ci        "pmullh     %[D],           %[D],             %[mask]     \n\t"
311cabdff1aSopenharmony_ci        "pmulhh     %[Gd],          %[ftmp9],         %[ftmp2]    \n\t"
312cabdff1aSopenharmony_ci        "psubh      %[D],           %[Gd],            %[D]        \n\t"
313cabdff1aSopenharmony_ci        "psubh      %[D],           %[D],             %[Ed]       \n\t"
314cabdff1aSopenharmony_ci        "psubh      %[Ed],          %[A],             %[C]        \n\t"
315cabdff1aSopenharmony_ci        "paddh      %[Gd],          %[A],             %[C]        \n\t"
316cabdff1aSopenharmony_ci        "paddh      %[A],           %[B],             %[Ad]       \n\t"
317cabdff1aSopenharmony_ci        "psubh      %[C],           %[B],             %[Ad]       \n\t"
318cabdff1aSopenharmony_ci        "psubh      %[B],           %[Bd],            %[D]        \n\t"
319cabdff1aSopenharmony_ci        "paddh      %[D],           %[Bd],            %[D]        \n\t"
320cabdff1aSopenharmony_ci        "por        %[mask],        %[ftmp1],         %[ftmp2]    \n\t"
321cabdff1aSopenharmony_ci        "por        %[mask],        %[mask],          %[ftmp3]    \n\t"
322cabdff1aSopenharmony_ci        "por        %[mask],        %[mask],          %[ftmp4]    \n\t"
323cabdff1aSopenharmony_ci        "por        %[mask],        %[mask],          %[ftmp5]    \n\t"
324cabdff1aSopenharmony_ci        "por        %[mask],        %[mask],          %[ftmp6]    \n\t"
325cabdff1aSopenharmony_ci        "por        %[mask],        %[mask],          %[ftmp7]    \n\t"
326cabdff1aSopenharmony_ci        "pcmpeqh    %[mask],        %[mask],          %[ftmp10]   \n\t"
327cabdff1aSopenharmony_ci        "packushb   %[mask],        %[mask],          %[ftmp10]   \n\t"
328cabdff1aSopenharmony_ci        "li         %[tmp1],        0x04                          \n\t"
329cabdff1aSopenharmony_ci        "dmtc1      %[tmp1],        %[ftmp8]                      \n\t"
330cabdff1aSopenharmony_ci        "paddh      %[ftmp0],       %[Gd],            %[Cd]       \n\t"
331cabdff1aSopenharmony_ci        "psrah      %[ftmp0],       %[ftmp0],         %[ftmp8]    \n\t"
332cabdff1aSopenharmony_ci        "paddh      %[ftmp1],       %[A],             %[D]        \n\t"
333cabdff1aSopenharmony_ci        "psrah      %[ftmp1],       %[ftmp1],         %[ftmp8]    \n\t"
334cabdff1aSopenharmony_ci        "psubh      %[ftmp2],       %[A],             %[D]        \n\t"
335cabdff1aSopenharmony_ci        "psrah      %[ftmp2],       %[ftmp2],         %[ftmp8]    \n\t"
336cabdff1aSopenharmony_ci        "paddh      %[ftmp3],       %[Ed],            %[Dd]       \n\t"
337cabdff1aSopenharmony_ci        "psrah      %[ftmp3],       %[ftmp3],         %[ftmp8]    \n\t"
338cabdff1aSopenharmony_ci        "psubh      %[ftmp4],       %[Ed],            %[Dd]       \n\t"
339cabdff1aSopenharmony_ci        "psrah      %[ftmp4],       %[ftmp4],         %[ftmp8]    \n\t"
340cabdff1aSopenharmony_ci        "paddh      %[ftmp5],       %[C],             %[B]        \n\t"
341cabdff1aSopenharmony_ci        "psrah      %[ftmp5],       %[ftmp5],         %[ftmp8]    \n\t"
342cabdff1aSopenharmony_ci        "psubh      %[ftmp6],       %[C],             %[B]        \n\t"
343cabdff1aSopenharmony_ci        "psrah      %[ftmp6],       %[ftmp6],         %[ftmp8]    \n\t"
344cabdff1aSopenharmony_ci        "psubh      %[ftmp7],       %[Gd],            %[Cd]       \n\t"
345cabdff1aSopenharmony_ci        "psrah      %[ftmp7],       %[ftmp7],         %[ftmp8]    \n\t"
346cabdff1aSopenharmony_ci        "pmaxsh     %[ftmp0],       %[ftmp0],         %[ftmp10]   \n\t"
347cabdff1aSopenharmony_ci        "packushb   %[ftmp0],       %[ftmp0],         %[ftmp10]   \n\t"
348cabdff1aSopenharmony_ci        "pmaxsh     %[ftmp1],       %[ftmp1],         %[ftmp10]   \n\t"
349cabdff1aSopenharmony_ci        "packushb   %[ftmp1],       %[ftmp1],         %[ftmp10]   \n\t"
350cabdff1aSopenharmony_ci        "pmaxsh     %[ftmp2],       %[ftmp2],         %[ftmp10]   \n\t"
351cabdff1aSopenharmony_ci        "packushb   %[ftmp2],       %[ftmp2],         %[ftmp10]   \n\t"
352cabdff1aSopenharmony_ci        "pmaxsh     %[ftmp3],       %[ftmp3],         %[ftmp10]   \n\t"
353cabdff1aSopenharmony_ci        "packushb   %[ftmp3],       %[ftmp3],         %[ftmp10]   \n\t"
354cabdff1aSopenharmony_ci        "pmaxsh     %[ftmp4],       %[ftmp4],         %[ftmp10]   \n\t"
355cabdff1aSopenharmony_ci        "packushb   %[ftmp4],       %[ftmp4],         %[ftmp10]   \n\t"
356cabdff1aSopenharmony_ci        "pmaxsh     %[ftmp5],       %[ftmp5],         %[ftmp10]   \n\t"
357cabdff1aSopenharmony_ci        "packushb   %[ftmp5],       %[ftmp5],         %[ftmp10]   \n\t"
358cabdff1aSopenharmony_ci        "pmaxsh     %[ftmp6],       %[ftmp6],         %[ftmp10]   \n\t"
359cabdff1aSopenharmony_ci        "packushb   %[ftmp6],       %[ftmp6],         %[ftmp10]   \n\t"
360cabdff1aSopenharmony_ci        "pmaxsh     %[ftmp7],       %[ftmp7],         %[ftmp10]   \n\t"
361cabdff1aSopenharmony_ci        "packushb   %[ftmp7],       %[ftmp7],         %[ftmp10]   \n\t"
362cabdff1aSopenharmony_ci
363cabdff1aSopenharmony_ci        "lwc1       %[Ed],          0x00(%[temp_value])           \n\t"
364cabdff1aSopenharmony_ci        "pand       %[Ed],          %[Ed],            %[mask]     \n\t"
365cabdff1aSopenharmony_ci        "paddb      %[ftmp0],       %[ftmp0],         %[Ed]       \n\t"
366cabdff1aSopenharmony_ci        "paddb      %[ftmp1],       %[ftmp1],         %[Ed]       \n\t"
367cabdff1aSopenharmony_ci        "paddb      %[ftmp2],       %[ftmp2],         %[Ed]       \n\t"
368cabdff1aSopenharmony_ci        "paddb      %[ftmp3],       %[ftmp3],         %[Ed]       \n\t"
369cabdff1aSopenharmony_ci        "paddb      %[ftmp4],       %[ftmp4],         %[Ed]       \n\t"
370cabdff1aSopenharmony_ci        "paddb      %[ftmp5],       %[ftmp5],         %[Ed]       \n\t"
371cabdff1aSopenharmony_ci        "paddb      %[ftmp6],       %[ftmp6],         %[Ed]       \n\t"
372cabdff1aSopenharmony_ci        "paddb      %[ftmp7],       %[ftmp7],         %[Ed]       \n\t"
373cabdff1aSopenharmony_ci        "swc1       %[ftmp0],       0x00(%[dst])                  \n\t"
374cabdff1aSopenharmony_ci        PTR_ADDU   "%[tmp1],        %[dst],           %[stride]   \n\t"
375cabdff1aSopenharmony_ci        "swc1       %[ftmp1],       0x00(%[tmp1])                 \n\t"
376cabdff1aSopenharmony_ci        PTR_ADDU   "%[tmp1],        %[tmp1],          %[stride]   \n\t"
377cabdff1aSopenharmony_ci        "swc1       %[ftmp2],       0x00(%[tmp1])                 \n\t"
378cabdff1aSopenharmony_ci        PTR_ADDU   "%[tmp1],        %[tmp1],          %[stride]   \n\t"
379cabdff1aSopenharmony_ci        "swc1       %[ftmp3],       0x00(%[tmp1])                 \n\t"
380cabdff1aSopenharmony_ci        PTR_ADDU   "%[tmp1],        %[tmp1],          %[stride]   \n\t"
381cabdff1aSopenharmony_ci        "swc1       %[ftmp4],       0x00(%[tmp1])                 \n\t"
382cabdff1aSopenharmony_ci        PTR_ADDU   "%[tmp1],        %[tmp1],          %[stride]   \n\t"
383cabdff1aSopenharmony_ci        "swc1       %[ftmp5],       0x00(%[tmp1])                 \n\t"
384cabdff1aSopenharmony_ci        PTR_ADDU   "%[tmp1],        %[tmp1],          %[stride]   \n\t"
385cabdff1aSopenharmony_ci        "swc1       %[ftmp6],       0x00(%[tmp1])                 \n\t"
386cabdff1aSopenharmony_ci        PTR_ADDU   "%[tmp1],        %[tmp1],          %[stride]   \n\t"
387cabdff1aSopenharmony_ci        "swc1       %[ftmp7],       0x00(%[tmp1])                 \n\t"
388cabdff1aSopenharmony_ci        PTR_ADDIU  "%[dst],         %[dst],           0x04        \n\t"
389cabdff1aSopenharmony_ci        PTR_ADDIU  "%[input],       %[input],         0x40        \n\t"
390cabdff1aSopenharmony_ci        PTR_ADDIU  "%[temp_value],  %[temp_value],    0x04        \n\t"
391cabdff1aSopenharmony_ci        PTR_ADDIU  "%[tmp0],        %[tmp0],          -0x01       \n\t"
392cabdff1aSopenharmony_ci        "bnez       %[tmp0],        1b                            \n\t"
393cabdff1aSopenharmony_ci        : [dst]"+&r"(dst), [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
394cabdff1aSopenharmony_ci          [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]),
395cabdff1aSopenharmony_ci          [ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
396cabdff1aSopenharmony_ci          [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp8]"=&f"(ftmp[8]),
397cabdff1aSopenharmony_ci          [ftmp9]"=&f"(ftmp[9]), [ftmp10]"=&f"(ftmp[10]), [mask]"=&f"(ftmp[11]),
398cabdff1aSopenharmony_ci          [A]"=&f"(ftmp[12]), [B]"=&f"(ftmp[13]), [C]"=&f"(ftmp[14]),
399cabdff1aSopenharmony_ci          [D]"=&f"(ftmp[15]), [Ad]"=&f"(ftmp[16]), [Bd]"=&f"(ftmp[17]),
400cabdff1aSopenharmony_ci          [Cd]"=&f"(ftmp[18]), [Dd]"=&f"(ftmp[19]), [Ed]"=&f"(ftmp[20]),
401cabdff1aSopenharmony_ci          [Gd]"=&f"(ftmp[21]), [input]"+&r"(input)
402cabdff1aSopenharmony_ci        : [stride]"r"(stride), [temp_value]"r"(temp_value)
403cabdff1aSopenharmony_ci        : "memory"
404cabdff1aSopenharmony_ci    );
405cabdff1aSopenharmony_ci}
406cabdff1aSopenharmony_ci
407cabdff1aSopenharmony_cistatic void idct_column_false_mmi(uint8_t *dst, int stride, int16_t *input)
408cabdff1aSopenharmony_ci{
409cabdff1aSopenharmony_ci    int16_t temp_value[8];
410cabdff1aSopenharmony_ci    double ftmp[23];
411cabdff1aSopenharmony_ci    uint64_t tmp[2];
412cabdff1aSopenharmony_ci    for (int i = 0; i < 8; ++i)
413cabdff1aSopenharmony_ci        temp_value[i] = (46341 * input[i << 3] + (8 << 16)) >> 20;
414cabdff1aSopenharmony_ci    __asm__ volatile (
415cabdff1aSopenharmony_ci        "pxor       %[ftmp10],      %[ftmp10],          %[ftmp10] \n\t"
416cabdff1aSopenharmony_ci        "li         %[tmp0],        0x02                          \n\t"
417cabdff1aSopenharmony_ci        "1:                                                       \n\t"
418cabdff1aSopenharmony_ci        "ldc1       %[ftmp0],       0x00(%[input])                \n\t"
419cabdff1aSopenharmony_ci        "ldc1       %[ftmp4],       0x08(%[input])                \n\t"
420cabdff1aSopenharmony_ci        "ldc1       %[ftmp1],       0x10(%[input])                \n\t"
421cabdff1aSopenharmony_ci        "ldc1       %[ftmp5],       0x18(%[input])                \n\t"
422cabdff1aSopenharmony_ci        "ldc1       %[ftmp2],       0x20(%[input])                \n\t"
423cabdff1aSopenharmony_ci        "ldc1       %[ftmp6],       0x28(%[input])                \n\t"
424cabdff1aSopenharmony_ci        "ldc1       %[ftmp3],       0x30(%[input])                \n\t"
425cabdff1aSopenharmony_ci        "ldc1       %[ftmp7],       0x38(%[input])                \n\t"
426cabdff1aSopenharmony_ci        TRANSPOSE_4H(%[ftmp0], %[ftmp1], %[ftmp2], %[ftmp3],
427cabdff1aSopenharmony_ci                     %[A], %[B], %[C], %[D])
428cabdff1aSopenharmony_ci        TRANSPOSE_4H(%[ftmp4], %[ftmp5], %[ftmp6], %[ftmp7],
429cabdff1aSopenharmony_ci                     %[A], %[B], %[C], %[D])
430cabdff1aSopenharmony_ci        LOAD_CONST(%[ftmp8], 64277)
431cabdff1aSopenharmony_ci        LOAD_CONST(%[ftmp9], 12785)
432cabdff1aSopenharmony_ci        LOAD_CONST(%[Gd], 1)
433cabdff1aSopenharmony_ci        "pmulhh     %[A],           %[ftmp9],           %[ftmp7]  \n\t"
434cabdff1aSopenharmony_ci        "pcmpgth    %[C],           %[ftmp10],          %[ftmp1]  \n\t"
435cabdff1aSopenharmony_ci        "por        %[mask],        %[C],               %[Gd]     \n\t"
436cabdff1aSopenharmony_ci        "pmullh     %[B],           %[ftmp1],           %[mask]   \n\t"
437cabdff1aSopenharmony_ci        "pmulhuh    %[B],           %[ftmp8],           %[B]      \n\t"
438cabdff1aSopenharmony_ci        "pmullh     %[B],           %[B],               %[mask]   \n\t"
439cabdff1aSopenharmony_ci        "paddh      %[A],           %[A],               %[B]      \n\t"
440cabdff1aSopenharmony_ci        "paddh      %[A],           %[A],               %[C]      \n\t"
441cabdff1aSopenharmony_ci        "pcmpgth    %[D],           %[ftmp10],          %[ftmp7]  \n\t"
442cabdff1aSopenharmony_ci        "por        %[mask],        %[D],               %[Gd]     \n\t"
443cabdff1aSopenharmony_ci        "pmullh     %[Ad],          %[ftmp7],           %[mask]   \n\t"
444cabdff1aSopenharmony_ci        "pmulhuh    %[B],           %[ftmp8],           %[Ad]     \n\t"
445cabdff1aSopenharmony_ci        "pmullh     %[B],           %[B],               %[mask]   \n\t"
446cabdff1aSopenharmony_ci        "pmulhh     %[C],           %[ftmp9],           %[ftmp1]  \n\t"
447cabdff1aSopenharmony_ci        "psubh      %[B],           %[C],               %[B]      \n\t"
448cabdff1aSopenharmony_ci        "psubh      %[B],           %[B],               %[D]      \n\t"
449cabdff1aSopenharmony_ci
450cabdff1aSopenharmony_ci        LOAD_CONST(%[ftmp8], 54491)
451cabdff1aSopenharmony_ci        LOAD_CONST(%[ftmp9], 36410)
452cabdff1aSopenharmony_ci        "pcmpgth    %[Ad],          %[ftmp10],          %[ftmp5]  \n\t"
453cabdff1aSopenharmony_ci        "por        %[mask],        %[Ad],              %[Gd]     \n\t"
454cabdff1aSopenharmony_ci        "pmullh     %[Cd],          %[ftmp5],           %[mask]   \n\t"
455cabdff1aSopenharmony_ci        "pmulhuh    %[C],           %[ftmp9],           %[Cd]     \n\t"
456cabdff1aSopenharmony_ci        "pmullh     %[C],           %[C],               %[mask]   \n\t"
457cabdff1aSopenharmony_ci        "pcmpgth    %[Bd],          %[ftmp10],          %[ftmp3]  \n\t"
458cabdff1aSopenharmony_ci        "por        %[mask],        %[Bd],              %[Gd]     \n\t"
459cabdff1aSopenharmony_ci        "pmullh     %[D],           %[ftmp3],           %[mask]   \n\t"
460cabdff1aSopenharmony_ci        "pmulhuh    %[D],           %[ftmp8],           %[D]      \n\t"
461cabdff1aSopenharmony_ci        "pmullh     %[D],           %[D],               %[mask]   \n\t"
462cabdff1aSopenharmony_ci        "paddh      %[C],           %[C],               %[D]      \n\t"
463cabdff1aSopenharmony_ci        "paddh      %[C],           %[C],               %[Ad]     \n\t"
464cabdff1aSopenharmony_ci        "paddh      %[C],           %[C],               %[Bd]     \n\t"
465cabdff1aSopenharmony_ci        "pcmpgth    %[Bd],          %[ftmp10],          %[ftmp3]  \n\t"
466cabdff1aSopenharmony_ci        "por        %[mask],        %[Bd],              %[Gd]     \n\t"
467cabdff1aSopenharmony_ci        "pmullh     %[Cd],          %[ftmp3],           %[mask]   \n\t"
468cabdff1aSopenharmony_ci        "pmulhuh    %[D],           %[ftmp9],           %[Cd]     \n\t"
469cabdff1aSopenharmony_ci        "pmullh     %[D],           %[D],               %[mask]   \n\t"
470cabdff1aSopenharmony_ci        "pcmpgth    %[Ed],          %[ftmp10],          %[ftmp5]  \n\t"
471cabdff1aSopenharmony_ci        "por        %[mask],        %[Ed],              %[Gd]     \n\t"
472cabdff1aSopenharmony_ci        "pmullh     %[Ad],          %[ftmp5],           %[mask]   \n\t"
473cabdff1aSopenharmony_ci        "pmulhuh    %[Ad],          %[ftmp8],           %[Ad]     \n\t"
474cabdff1aSopenharmony_ci        "pmullh     %[Ad],          %[Ad],              %[mask]   \n\t"
475cabdff1aSopenharmony_ci        "psubh      %[D],           %[Ad],              %[D]      \n\t"
476cabdff1aSopenharmony_ci        "paddh      %[D],           %[D],               %[Ed]     \n\t"
477cabdff1aSopenharmony_ci        "psubh      %[D],           %[D],               %[Bd]     \n\t"
478cabdff1aSopenharmony_ci
479cabdff1aSopenharmony_ci        LOAD_CONST(%[ftmp8], 46341)
480cabdff1aSopenharmony_ci        "psubh      %[Ad],          %[A],             %[C]        \n\t"
481cabdff1aSopenharmony_ci        "pcmpgth    %[Bd],          %[ftmp10],        %[Ad]       \n\t"
482cabdff1aSopenharmony_ci        "por        %[mask],        %[Bd],            %[Gd]       \n\t"
483cabdff1aSopenharmony_ci        "pmullh     %[Ad],          %[Ad],            %[mask]     \n\t"
484cabdff1aSopenharmony_ci        "pmulhuh    %[Ad],          %[ftmp8],         %[Ad]       \n\t"
485cabdff1aSopenharmony_ci        "pmullh     %[Ad],          %[Ad],            %[mask]     \n\t"
486cabdff1aSopenharmony_ci        "paddh      %[Ad],          %[Ad],            %[Bd]       \n\t"
487cabdff1aSopenharmony_ci        "psubh      %[Bd],          %[B],             %[D]        \n\t"
488cabdff1aSopenharmony_ci        "pcmpgth    %[Cd],          %[ftmp10],        %[Bd]       \n\t"
489cabdff1aSopenharmony_ci        "por        %[mask],        %[Cd],            %[Gd]       \n\t"
490cabdff1aSopenharmony_ci        "pmullh     %[Bd],          %[Bd],            %[mask]     \n\t"
491cabdff1aSopenharmony_ci        "pmulhuh    %[Bd],          %[ftmp8],         %[Bd]       \n\t"
492cabdff1aSopenharmony_ci        "pmullh     %[Bd],          %[Bd],            %[mask]     \n\t"
493cabdff1aSopenharmony_ci        "paddh      %[Bd],          %[Bd],            %[Cd]       \n\t"
494cabdff1aSopenharmony_ci        "paddh      %[Cd],          %[A],             %[C]        \n\t"
495cabdff1aSopenharmony_ci        "paddh      %[Dd],          %[B],             %[D]        \n\t"
496cabdff1aSopenharmony_ci
497cabdff1aSopenharmony_ci        LOAD_CONST(%[Ed], 8)
498cabdff1aSopenharmony_ci        "paddh      %[A],           %[ftmp0],         %[ftmp4]    \n\t"
499cabdff1aSopenharmony_ci        "pcmpgth    %[B],           %[ftmp10],        %[A]        \n\t"
500cabdff1aSopenharmony_ci        "por        %[mask],        %[B],             %[Gd]       \n\t"
501cabdff1aSopenharmony_ci        "pmullh     %[A],           %[A],             %[mask]     \n\t"
502cabdff1aSopenharmony_ci        "pmulhuh    %[A],           %[ftmp8],         %[A]        \n\t"
503cabdff1aSopenharmony_ci        "pmullh     %[A],           %[A],             %[mask]     \n\t"
504cabdff1aSopenharmony_ci        "paddh      %[A],           %[A],             %[B]        \n\t"
505cabdff1aSopenharmony_ci        "paddh      %[A],           %[A],             %[Ed]       \n\t"
506cabdff1aSopenharmony_ci        "psubh      %[B],           %[ftmp0],         %[ftmp4]    \n\t"
507cabdff1aSopenharmony_ci        "pcmpgth    %[C],           %[ftmp10],        %[B]        \n\t"
508cabdff1aSopenharmony_ci        "por        %[mask],        %[C],             %[Gd]       \n\t"
509cabdff1aSopenharmony_ci        "pmullh     %[B],           %[B],             %[mask]     \n\t"
510cabdff1aSopenharmony_ci        "pmulhuh    %[B],           %[ftmp8],         %[B]        \n\t"
511cabdff1aSopenharmony_ci        "pmullh     %[B],           %[B],             %[mask]     \n\t"
512cabdff1aSopenharmony_ci        "paddh      %[B],           %[B],             %[C]        \n\t"
513cabdff1aSopenharmony_ci        "paddh      %[B],           %[B],             %[Ed]       \n\t"
514cabdff1aSopenharmony_ci
515cabdff1aSopenharmony_ci        LOAD_CONST(%[ftmp8], 60547)
516cabdff1aSopenharmony_ci        LOAD_CONST(%[ftmp9], 25080)
517cabdff1aSopenharmony_ci        "pmulhh     %[C],           %[ftmp9],         %[ftmp6]    \n\t"
518cabdff1aSopenharmony_ci        "pcmpgth    %[D],           %[ftmp10],        %[ftmp2]    \n\t"
519cabdff1aSopenharmony_ci        "por        %[mask],        %[D],             %[Gd]       \n\t"
520cabdff1aSopenharmony_ci        "pmullh     %[Ed],          %[ftmp2],         %[mask]     \n\t"
521cabdff1aSopenharmony_ci        "pmulhuh    %[Ed],          %[ftmp8],         %[Ed]       \n\t"
522cabdff1aSopenharmony_ci        "pmullh     %[Ed],          %[Ed],            %[mask]     \n\t"
523cabdff1aSopenharmony_ci        "paddh      %[C],           %[C],             %[Ed]       \n\t"
524cabdff1aSopenharmony_ci        "paddh      %[C],           %[C],             %[D]        \n\t"
525cabdff1aSopenharmony_ci        "pcmpgth    %[Ed],          %[ftmp10],        %[ftmp6]    \n\t"
526cabdff1aSopenharmony_ci        "por        %[mask],        %[Ed],            %[Gd]       \n\t"
527cabdff1aSopenharmony_ci        "pmullh     %[D],           %[ftmp6],         %[mask]     \n\t"
528cabdff1aSopenharmony_ci        "pmulhuh    %[D],           %[ftmp8],         %[D]        \n\t"
529cabdff1aSopenharmony_ci        "pmullh     %[D],           %[D],             %[mask]     \n\t"
530cabdff1aSopenharmony_ci        "pmulhh     %[Gd],          %[ftmp9],         %[ftmp2]    \n\t"
531cabdff1aSopenharmony_ci        "psubh      %[D],           %[Gd],            %[D]        \n\t"
532cabdff1aSopenharmony_ci        "psubh      %[D],           %[D],             %[Ed]       \n\t"
533cabdff1aSopenharmony_ci        "psubh      %[Ed],          %[A],             %[C]        \n\t"
534cabdff1aSopenharmony_ci        "paddh      %[Gd],          %[A],             %[C]        \n\t"
535cabdff1aSopenharmony_ci        "paddh      %[A],           %[B],             %[Ad]       \n\t"
536cabdff1aSopenharmony_ci        "psubh      %[C],           %[B],             %[Ad]       \n\t"
537cabdff1aSopenharmony_ci        "psubh      %[B],           %[Bd],            %[D]        \n\t"
538cabdff1aSopenharmony_ci        "paddh      %[D],           %[Bd],            %[D]        \n\t"
539cabdff1aSopenharmony_ci        "por        %[mask],        %[ftmp1],         %[ftmp2]    \n\t"
540cabdff1aSopenharmony_ci        "por        %[mask],        %[mask],          %[ftmp3]    \n\t"
541cabdff1aSopenharmony_ci        "por        %[mask],        %[mask],          %[ftmp4]    \n\t"
542cabdff1aSopenharmony_ci        "por        %[mask],        %[mask],          %[ftmp5]    \n\t"
543cabdff1aSopenharmony_ci        "por        %[mask],        %[mask],          %[ftmp6]    \n\t"
544cabdff1aSopenharmony_ci        "por        %[mask],        %[mask],          %[ftmp7]    \n\t"
545cabdff1aSopenharmony_ci        "pcmpeqh    %[mask],        %[mask],          %[ftmp10]   \n\t"
546cabdff1aSopenharmony_ci        "li         %[tmp1],        0x04                          \n\t"
547cabdff1aSopenharmony_ci        "dmtc1      %[tmp1],        %[ftmp8]                      \n\t"
548cabdff1aSopenharmony_ci        "paddh      %[ftmp0],       %[Gd],            %[Cd]       \n\t"
549cabdff1aSopenharmony_ci        "psrah      %[ftmp0],       %[ftmp0],         %[ftmp8]    \n\t"
550cabdff1aSopenharmony_ci        "paddh      %[ftmp1],       %[A],             %[D]        \n\t"
551cabdff1aSopenharmony_ci        "psrah      %[ftmp1],       %[ftmp1],         %[ftmp8]    \n\t"
552cabdff1aSopenharmony_ci        "psubh      %[ftmp2],       %[A],             %[D]        \n\t"
553cabdff1aSopenharmony_ci        "psrah      %[ftmp2],       %[ftmp2],         %[ftmp8]    \n\t"
554cabdff1aSopenharmony_ci        "paddh      %[ftmp3],       %[Ed],            %[Dd]       \n\t"
555cabdff1aSopenharmony_ci        "psrah      %[ftmp3],       %[ftmp3],         %[ftmp8]    \n\t"
556cabdff1aSopenharmony_ci        "psubh      %[ftmp4],       %[Ed],            %[Dd]       \n\t"
557cabdff1aSopenharmony_ci        "psrah      %[ftmp4],       %[ftmp4],         %[ftmp8]    \n\t"
558cabdff1aSopenharmony_ci        "paddh      %[ftmp5],       %[C],             %[B]        \n\t"
559cabdff1aSopenharmony_ci        "psrah      %[ftmp5],       %[ftmp5],         %[ftmp8]    \n\t"
560cabdff1aSopenharmony_ci        "psubh      %[ftmp6],       %[C],             %[B]        \n\t"
561cabdff1aSopenharmony_ci        "psrah      %[ftmp6],       %[ftmp6],         %[ftmp8]    \n\t"
562cabdff1aSopenharmony_ci        "psubh      %[ftmp7],       %[Gd],            %[Cd]       \n\t"
563cabdff1aSopenharmony_ci        "psrah      %[ftmp7],       %[ftmp7],         %[ftmp8]    \n\t"
564cabdff1aSopenharmony_ci
565cabdff1aSopenharmony_ci        /* Load from dst */
566cabdff1aSopenharmony_ci        "lwc1       %[A],           0x00(%[dst])                  \n\t"
567cabdff1aSopenharmony_ci        PTR_ADDU   "%[tmp1],        %[dst],           %[stride]   \n\t"
568cabdff1aSopenharmony_ci        "lwc1       %[B],           0x00(%[tmp1])                 \n\t"
569cabdff1aSopenharmony_ci        PTR_ADDU   "%[tmp1],        %[tmp1],          %[stride]   \n\t"
570cabdff1aSopenharmony_ci        "lwc1       %[C],           0x00(%[tmp1])                 \n\t"
571cabdff1aSopenharmony_ci        PTR_ADDU   "%[tmp1],        %[tmp1],          %[stride]   \n\t"
572cabdff1aSopenharmony_ci        "lwc1       %[D],           0x00(%[tmp1])                 \n\t"
573cabdff1aSopenharmony_ci        PTR_ADDU   "%[tmp1],        %[tmp1],          %[stride]   \n\t"
574cabdff1aSopenharmony_ci        "lwc1       %[Ad],          0x00(%[tmp1])                 \n\t"
575cabdff1aSopenharmony_ci        PTR_ADDU   "%[tmp1],        %[tmp1],          %[stride]   \n\t"
576cabdff1aSopenharmony_ci        "lwc1       %[Bd],          0x00(%[tmp1])                 \n\t"
577cabdff1aSopenharmony_ci        PTR_ADDU   "%[tmp1],        %[tmp1],          %[stride]   \n\t"
578cabdff1aSopenharmony_ci        "lwc1       %[Cd],          0x00(%[tmp1])                 \n\t"
579cabdff1aSopenharmony_ci        PTR_ADDU   "%[tmp1],        %[tmp1],          %[stride]   \n\t"
580cabdff1aSopenharmony_ci        "lwc1       %[Dd],          0x00(%[tmp1])                 \n\t"
581cabdff1aSopenharmony_ci        "punpcklbh  %[A],           %[A],             %[ftmp10]   \n\t"
582cabdff1aSopenharmony_ci        "punpcklbh  %[B],           %[B],             %[ftmp10]   \n\t"
583cabdff1aSopenharmony_ci        "punpcklbh  %[C],           %[C],             %[ftmp10]   \n\t"
584cabdff1aSopenharmony_ci        "punpcklbh  %[D],           %[D],             %[ftmp10]   \n\t"
585cabdff1aSopenharmony_ci        "punpcklbh  %[Ad],          %[Ad],            %[ftmp10]   \n\t"
586cabdff1aSopenharmony_ci        "punpcklbh  %[Bd],          %[Bd],            %[ftmp10]   \n\t"
587cabdff1aSopenharmony_ci        "punpcklbh  %[Cd],          %[Cd],            %[ftmp10]   \n\t"
588cabdff1aSopenharmony_ci        "punpcklbh  %[Dd],          %[Dd],            %[ftmp10]   \n\t"
589cabdff1aSopenharmony_ci        "ldc1       %[Ed],          0x00(%[temp_value])           \n\t"
590cabdff1aSopenharmony_ci        "pand       %[Ed],          %[Ed],            %[mask]     \n\t"
591cabdff1aSopenharmony_ci        "pnor       %[mask],        %[mask],          %[mask]     \n\t"
592cabdff1aSopenharmony_ci        "pand       %[ftmp0],       %[ftmp0],         %[mask]     \n\t"
593cabdff1aSopenharmony_ci        "pand       %[ftmp1],       %[ftmp1],         %[mask]     \n\t"
594cabdff1aSopenharmony_ci        "pand       %[ftmp2],       %[ftmp2],         %[mask]     \n\t"
595cabdff1aSopenharmony_ci        "pand       %[ftmp3],       %[ftmp3],         %[mask]     \n\t"
596cabdff1aSopenharmony_ci        "pand       %[ftmp4],       %[ftmp4],         %[mask]     \n\t"
597cabdff1aSopenharmony_ci        "pand       %[ftmp5],       %[ftmp5],         %[mask]     \n\t"
598cabdff1aSopenharmony_ci        "pand       %[ftmp6],       %[ftmp6],         %[mask]     \n\t"
599cabdff1aSopenharmony_ci        "pand       %[ftmp7],       %[ftmp7],         %[mask]     \n\t"
600cabdff1aSopenharmony_ci        "paddh      %[ftmp0],       %[ftmp0],         %[A]        \n\t"
601cabdff1aSopenharmony_ci        "paddh      %[ftmp1],       %[ftmp1],         %[B]        \n\t"
602cabdff1aSopenharmony_ci        "paddh      %[ftmp2],       %[ftmp2],         %[C]        \n\t"
603cabdff1aSopenharmony_ci        "paddh      %[ftmp3],       %[ftmp3],         %[D]        \n\t"
604cabdff1aSopenharmony_ci        "paddh      %[ftmp4],       %[ftmp4],         %[Ad]       \n\t"
605cabdff1aSopenharmony_ci        "paddh      %[ftmp5],       %[ftmp5],         %[Bd]       \n\t"
606cabdff1aSopenharmony_ci        "paddh      %[ftmp6],       %[ftmp6],         %[Cd]       \n\t"
607cabdff1aSopenharmony_ci        "paddh      %[ftmp7],       %[ftmp7],         %[Dd]       \n\t"
608cabdff1aSopenharmony_ci        "paddh      %[ftmp0],       %[ftmp0],         %[Ed]       \n\t"
609cabdff1aSopenharmony_ci        "paddh      %[ftmp1],       %[ftmp1],         %[Ed]       \n\t"
610cabdff1aSopenharmony_ci        "paddh      %[ftmp2],       %[ftmp2],         %[Ed]       \n\t"
611cabdff1aSopenharmony_ci        "paddh      %[ftmp3],       %[ftmp3],         %[Ed]       \n\t"
612cabdff1aSopenharmony_ci        "paddh      %[ftmp4],       %[ftmp4],         %[Ed]       \n\t"
613cabdff1aSopenharmony_ci        "paddh      %[ftmp5],       %[ftmp5],         %[Ed]       \n\t"
614cabdff1aSopenharmony_ci        "paddh      %[ftmp6],       %[ftmp6],         %[Ed]       \n\t"
615cabdff1aSopenharmony_ci        "paddh      %[ftmp7],       %[ftmp7],         %[Ed]       \n\t"
616cabdff1aSopenharmony_ci        "pmaxsh     %[ftmp0],       %[ftmp0],         %[ftmp10]   \n\t"
617cabdff1aSopenharmony_ci        "packushb   %[ftmp0],       %[ftmp0],         %[ftmp10]   \n\t"
618cabdff1aSopenharmony_ci        "pmaxsh     %[ftmp1],       %[ftmp1],         %[ftmp10]   \n\t"
619cabdff1aSopenharmony_ci        "packushb   %[ftmp1],       %[ftmp1],         %[ftmp10]   \n\t"
620cabdff1aSopenharmony_ci        "pmaxsh     %[ftmp2],       %[ftmp2],         %[ftmp10]   \n\t"
621cabdff1aSopenharmony_ci        "packushb   %[ftmp2],       %[ftmp2],         %[ftmp10]   \n\t"
622cabdff1aSopenharmony_ci        "pmaxsh     %[ftmp3],       %[ftmp3],         %[ftmp10]   \n\t"
623cabdff1aSopenharmony_ci        "packushb   %[ftmp3],       %[ftmp3],         %[ftmp10]   \n\t"
624cabdff1aSopenharmony_ci        "pmaxsh     %[ftmp4],       %[ftmp4],         %[ftmp10]   \n\t"
625cabdff1aSopenharmony_ci        "packushb   %[ftmp4],       %[ftmp4],         %[ftmp10]   \n\t"
626cabdff1aSopenharmony_ci        "pmaxsh     %[ftmp5],       %[ftmp5],         %[ftmp10]   \n\t"
627cabdff1aSopenharmony_ci        "packushb   %[ftmp5],       %[ftmp5],         %[ftmp10]   \n\t"
628cabdff1aSopenharmony_ci        "pmaxsh     %[ftmp6],       %[ftmp6],         %[ftmp10]   \n\t"
629cabdff1aSopenharmony_ci        "packushb   %[ftmp6],       %[ftmp6],         %[ftmp10]   \n\t"
630cabdff1aSopenharmony_ci        "pmaxsh     %[ftmp7],       %[ftmp7],         %[ftmp10]   \n\t"
631cabdff1aSopenharmony_ci        "packushb   %[ftmp7],       %[ftmp7],         %[ftmp10]   \n\t"
632cabdff1aSopenharmony_ci        "swc1       %[ftmp0],       0x00(%[dst])                  \n\t"
633cabdff1aSopenharmony_ci        PTR_ADDU   "%[tmp1],        %[dst],           %[stride]   \n\t"
634cabdff1aSopenharmony_ci        "swc1       %[ftmp1],       0x00(%[tmp1])                 \n\t"
635cabdff1aSopenharmony_ci        PTR_ADDU   "%[tmp1],        %[tmp1],          %[stride]   \n\t"
636cabdff1aSopenharmony_ci        "swc1       %[ftmp2],       0x00(%[tmp1])                 \n\t"
637cabdff1aSopenharmony_ci        PTR_ADDU   "%[tmp1],        %[tmp1],          %[stride]   \n\t"
638cabdff1aSopenharmony_ci        "swc1       %[ftmp3],       0x00(%[tmp1])                 \n\t"
639cabdff1aSopenharmony_ci        PTR_ADDU   "%[tmp1],        %[tmp1],          %[stride]   \n\t"
640cabdff1aSopenharmony_ci        "swc1       %[ftmp4],       0x00(%[tmp1])                 \n\t"
641cabdff1aSopenharmony_ci        PTR_ADDU   "%[tmp1],        %[tmp1],          %[stride]   \n\t"
642cabdff1aSopenharmony_ci        "swc1       %[ftmp5],       0x00(%[tmp1])                 \n\t"
643cabdff1aSopenharmony_ci        PTR_ADDU   "%[tmp1],        %[tmp1],          %[stride]   \n\t"
644cabdff1aSopenharmony_ci        "swc1       %[ftmp6],       0x00(%[tmp1])                 \n\t"
645cabdff1aSopenharmony_ci        PTR_ADDU   "%[tmp1],        %[tmp1],          %[stride]   \n\t"
646cabdff1aSopenharmony_ci        "swc1       %[ftmp7],       0x00(%[tmp1])                 \n\t"
647cabdff1aSopenharmony_ci        PTR_ADDIU  "%[dst],         %[dst],           0x04        \n\t"
648cabdff1aSopenharmony_ci        PTR_ADDIU  "%[input],       %[input],         0x40        \n\t"
649cabdff1aSopenharmony_ci        PTR_ADDIU  "%[temp_value],  %[temp_value],    0x08        \n\t"
650cabdff1aSopenharmony_ci        PTR_ADDIU  "%[tmp0],        %[tmp0],          -0x01       \n\t"
651cabdff1aSopenharmony_ci        "bnez       %[tmp0],        1b                            \n\t"
652cabdff1aSopenharmony_ci        : [dst]"+&r"(dst), [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
653cabdff1aSopenharmony_ci          [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]),
654cabdff1aSopenharmony_ci          [ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
655cabdff1aSopenharmony_ci          [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp8]"=&f"(ftmp[8]),
656cabdff1aSopenharmony_ci          [ftmp9]"=&f"(ftmp[9]), [ftmp10]"=&f"(ftmp[10]), [mask]"=&f"(ftmp[11]),
657cabdff1aSopenharmony_ci          [A]"=&f"(ftmp[12]), [B]"=&f"(ftmp[13]), [C]"=&f"(ftmp[14]),
658cabdff1aSopenharmony_ci          [D]"=&f"(ftmp[15]), [Ad]"=&f"(ftmp[16]), [Bd]"=&f"(ftmp[17]),
659cabdff1aSopenharmony_ci          [Cd]"=&f"(ftmp[18]), [Dd]"=&f"(ftmp[19]), [Ed]"=&f"(ftmp[20]),
660cabdff1aSopenharmony_ci          [Gd]"=&f"(ftmp[21]), [input]"+&r"(input)
661cabdff1aSopenharmony_ci        : [stride]"r"(stride), [temp_value]"r"(temp_value)
662cabdff1aSopenharmony_ci        : "memory"
663cabdff1aSopenharmony_ci    );
664cabdff1aSopenharmony_ci}
665cabdff1aSopenharmony_cistatic void idct_mmi(uint8_t *dst, int stride, int16_t *input, int type)
666cabdff1aSopenharmony_ci{
667cabdff1aSopenharmony_ci    idct_row_mmi(input);
668cabdff1aSopenharmony_ci    if (type == 1)
669cabdff1aSopenharmony_ci        idct_column_true_mmi(dst, stride, input);
670cabdff1aSopenharmony_ci    else
671cabdff1aSopenharmony_ci        idct_column_false_mmi(dst, stride, input);
672cabdff1aSopenharmony_ci}
673cabdff1aSopenharmony_ci
674cabdff1aSopenharmony_civoid ff_vp3_idct_put_mmi(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
675cabdff1aSopenharmony_ci{
676cabdff1aSopenharmony_ci    idct_mmi(dest, line_size, block, 1);
677cabdff1aSopenharmony_ci    memset(block, 0, sizeof(*block) << 6);
678cabdff1aSopenharmony_ci}
679cabdff1aSopenharmony_ci
680cabdff1aSopenharmony_civoid ff_vp3_idct_add_mmi(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
681cabdff1aSopenharmony_ci{
682cabdff1aSopenharmony_ci    idct_mmi(dest, line_size, block, 2);
683cabdff1aSopenharmony_ci    memset(block, 0, sizeof(*block) << 6);
684cabdff1aSopenharmony_ci}
685cabdff1aSopenharmony_civoid ff_vp3_idct_dc_add_mmi(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
686cabdff1aSopenharmony_ci{
687cabdff1aSopenharmony_ci    int dc = (block[0] + 15) >> 5;
688cabdff1aSopenharmony_ci
689cabdff1aSopenharmony_ci    double ftmp[7];
690cabdff1aSopenharmony_ci    uint64_t tmp;
691cabdff1aSopenharmony_ci    __asm__ volatile (
692cabdff1aSopenharmony_ci        "pxor       %[ftmp0],     %[ftmp0],           %[ftmp0]      \n\t"
693cabdff1aSopenharmony_ci        "mtc1       %[dc],        %[ftmp5]                          \n\t"
694cabdff1aSopenharmony_ci        "pshufh     %[ftmp5],     %[ftmp5],           %[ftmp0]      \n\t"
695cabdff1aSopenharmony_ci        "li         %[tmp0],      0x08                              \n\t"
696cabdff1aSopenharmony_ci        "1:                                                         \n\t"
697cabdff1aSopenharmony_ci        "ldc1       %[ftmp1],     0x00(%[dest])                     \n\t"
698cabdff1aSopenharmony_ci        "punpcklbh  %[ftmp2],     %[ftmp1],           %[ftmp0]      \n\t"
699cabdff1aSopenharmony_ci        "punpckhbh  %[ftmp3],     %[ftmp1],           %[ftmp0]      \n\t"
700cabdff1aSopenharmony_ci        "paddh      %[ftmp4],     %[ftmp2],           %[ftmp5]      \n\t"
701cabdff1aSopenharmony_ci        "paddh      %[ftmp6],     %[ftmp3],           %[ftmp5]      \n\t"
702cabdff1aSopenharmony_ci        "packushb   %[ftmp4],     %[ftmp4],           %[ftmp0]      \n\t"
703cabdff1aSopenharmony_ci        "packushb   %[ftmp6],     %[ftmp6],           %[ftmp0]      \n\t"
704cabdff1aSopenharmony_ci        "swc1       %[ftmp4],     0x00(%[dest])                     \n\t"
705cabdff1aSopenharmony_ci        "swc1       %[ftmp6],     0x04(%[dest])                     \n\t"
706cabdff1aSopenharmony_ci        PTR_ADDU   "%[dest],      %[dest],            %[line_size]  \n\t"
707cabdff1aSopenharmony_ci        PTR_ADDIU  "%[tmp0],      %[tmp0],            -0x01         \n\t"
708cabdff1aSopenharmony_ci        "bnez       %[tmp0],      1b                                \n\t"
709cabdff1aSopenharmony_ci        : [dest]"+&r"(dest), [block]"+&r"(block), [tmp0]"=&r"(tmp),
710cabdff1aSopenharmony_ci          [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]),
711cabdff1aSopenharmony_ci          [ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
712cabdff1aSopenharmony_ci          [ftmp6]"=&f"(ftmp[6])
713cabdff1aSopenharmony_ci        : [line_size]"r"(line_size), [dc]"r"(dc)
714cabdff1aSopenharmony_ci        : "memory"
715cabdff1aSopenharmony_ci    );
716cabdff1aSopenharmony_ci    block[0] = 0;
717cabdff1aSopenharmony_ci}
718cabdff1aSopenharmony_ci
719cabdff1aSopenharmony_civoid ff_put_no_rnd_pixels_l2_mmi(uint8_t *dst, const uint8_t *src1,
720cabdff1aSopenharmony_ci                                 const uint8_t *src2, ptrdiff_t stride, int h)
721cabdff1aSopenharmony_ci{
722cabdff1aSopenharmony_ci    if (h == 8) {
723cabdff1aSopenharmony_ci        double ftmp[6];
724cabdff1aSopenharmony_ci        uint64_t tmp[2];
725cabdff1aSopenharmony_ci        DECLARE_VAR_ALL64;
726cabdff1aSopenharmony_ci
727cabdff1aSopenharmony_ci        __asm__ volatile (
728cabdff1aSopenharmony_ci            "li          %[tmp0],        0x08                            \n\t"
729cabdff1aSopenharmony_ci            "li          %[tmp1],        0xfefefefe                      \n\t"
730cabdff1aSopenharmony_ci            "dmtc1       %[tmp1],        %[ftmp4]                        \n\t"
731cabdff1aSopenharmony_ci            "punpcklwd   %[ftmp4],       %[ftmp4],             %[ftmp4]  \n\t"
732cabdff1aSopenharmony_ci            "li          %[tmp1],        0x01                            \n\t"
733cabdff1aSopenharmony_ci            "dmtc1       %[tmp1],        %[ftmp5]                        \n\t"
734cabdff1aSopenharmony_ci            "1:                                                          \n\t"
735cabdff1aSopenharmony_ci            MMI_ULDC1(%[ftmp1], %[src1], 0x0)
736cabdff1aSopenharmony_ci            MMI_ULDC1(%[ftmp2], %[src2], 0x0)
737cabdff1aSopenharmony_ci            "pxor        %[ftmp3],       %[ftmp1],             %[ftmp2]  \n\t"
738cabdff1aSopenharmony_ci            "pand        %[ftmp3],       %[ftmp3],             %[ftmp4]  \n\t"
739cabdff1aSopenharmony_ci            "psrlw       %[ftmp3],       %[ftmp3],             %[ftmp5]  \n\t"
740cabdff1aSopenharmony_ci            "pand        %[ftmp6],       %[ftmp1],             %[ftmp2]  \n\t"
741cabdff1aSopenharmony_ci            "paddw       %[ftmp3],       %[ftmp3],             %[ftmp6]  \n\t"
742cabdff1aSopenharmony_ci            "sdc1        %[ftmp3],       0x00(%[dst])                    \n\t"
743cabdff1aSopenharmony_ci            PTR_ADDU    "%[src1],        %[src1],              %[stride] \n\t"
744cabdff1aSopenharmony_ci            PTR_ADDU    "%[src2],        %[src2],              %[stride] \n\t"
745cabdff1aSopenharmony_ci            PTR_ADDU    "%[dst],         %[dst],               %[stride] \n\t"
746cabdff1aSopenharmony_ci            PTR_ADDIU   "%[tmp0],        %[tmp0],              -0x01     \n\t"
747cabdff1aSopenharmony_ci            "bnez        %[tmp0],        1b                              \n\t"
748cabdff1aSopenharmony_ci            : RESTRICT_ASM_ALL64
749cabdff1aSopenharmony_ci              [dst]"+&r"(dst), [src1]"+&r"(src1), [src2]"+&r"(src2),
750cabdff1aSopenharmony_ci              [ftmp1]"=&f"(ftmp[0]), [ftmp2]"=&f"(ftmp[1]), [ftmp3]"=&f"(ftmp[2]),
751cabdff1aSopenharmony_ci              [ftmp4]"=&f"(ftmp[3]), [ftmp5]"=&f"(ftmp[4]), [ftmp6]"=&f"(ftmp[5]),
752cabdff1aSopenharmony_ci              [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1])
753cabdff1aSopenharmony_ci            : [stride]"r"(stride)
754cabdff1aSopenharmony_ci            : "memory"
755cabdff1aSopenharmony_ci        );
756cabdff1aSopenharmony_ci    } else {
757cabdff1aSopenharmony_ci        int i;
758cabdff1aSopenharmony_ci
759cabdff1aSopenharmony_ci        for (i = 0; i < h; i++) {
760cabdff1aSopenharmony_ci            uint32_t a, b;
761cabdff1aSopenharmony_ci
762cabdff1aSopenharmony_ci            a = AV_RN32(&src1[i * stride]);
763cabdff1aSopenharmony_ci            b = AV_RN32(&src2[i * stride]);
764cabdff1aSopenharmony_ci            AV_WN32A(&dst[i * stride], no_rnd_avg32(a, b));
765cabdff1aSopenharmony_ci            a = AV_RN32(&src1[i * stride + 4]);
766cabdff1aSopenharmony_ci            b = AV_RN32(&src2[i * stride + 4]);
767cabdff1aSopenharmony_ci            AV_WN32A(&dst[i * stride + 4], no_rnd_avg32(a, b));
768cabdff1aSopenharmony_ci        }
769cabdff1aSopenharmony_ci    }
770cabdff1aSopenharmony_ci}
771