1/*
2 * Loongson SIMD optimized h264dsp
3 *
4 * Copyright (c) 2015 Loongson Technology Corporation Limited
5 * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6 *                    Zhang Shuangshuang <zhangshuangshuang@ict.ac.cn>
7 *                    Heiher <r@hev.cc>
8 *
9 * This file is part of FFmpeg.
10 *
11 * FFmpeg is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
15 *
16 * FFmpeg is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19 * Lesser General Public License for more details.
20 *
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with FFmpeg; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 */
25
26#include "libavcodec/bit_depth_template.c"
27#include "h264dsp_mips.h"
28#include "libavutil/mips/mmiutils.h"
29#include "libavutil/mem_internal.h"
30
31void ff_h264_add_pixels4_8_mmi(uint8_t *dst, int16_t *src, int stride)
32{
33    double ftmp[9];
34    DECLARE_VAR_LOW32;
35
36    __asm__ volatile (
37        "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
38        MMI_LDC1(%[ftmp1], %[src], 0x00)
39        MMI_LDC1(%[ftmp2], %[src], 0x08)
40        MMI_LDC1(%[ftmp3], %[src], 0x10)
41        MMI_LDC1(%[ftmp4], %[src], 0x18)
42        /* memset(src, 0, 32); */
43        MMI_SQC1(%[ftmp0], %[ftmp0], %[src], 0x00)
44        MMI_SQC1(%[ftmp0], %[ftmp0], %[src], 0x10)
45        MMI_ULWC1(%[ftmp5], %[dst0], 0x00)
46        MMI_ULWC1(%[ftmp6], %[dst1], 0x00)
47        MMI_ULWC1(%[ftmp7], %[dst2], 0x00)
48        MMI_ULWC1(%[ftmp8], %[dst3], 0x00)
49        "punpcklbh  %[ftmp5],   %[ftmp5],       %[ftmp0]                \n\t"
50        "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
51        "punpcklbh  %[ftmp7],   %[ftmp7],       %[ftmp0]                \n\t"
52        "punpcklbh  %[ftmp8],   %[ftmp8],       %[ftmp0]                \n\t"
53        "paddh      %[ftmp1],   %[ftmp1],       %[ftmp5]                \n\t"
54        "paddh      %[ftmp2],   %[ftmp2],       %[ftmp6]                \n\t"
55        "paddh      %[ftmp3],   %[ftmp3],       %[ftmp7]                \n\t"
56        "paddh      %[ftmp4],   %[ftmp4],       %[ftmp8]                \n\t"
57        "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
58        "packushb   %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t"
59        "packushb   %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
60        "packushb   %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t"
61        MMI_SWC1(%[ftmp1], %[dst0], 0x00)
62        MMI_SWC1(%[ftmp2], %[dst1], 0x00)
63        MMI_SWC1(%[ftmp3], %[dst2], 0x00)
64        MMI_SWC1(%[ftmp4], %[dst3], 0x00)
65        : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
66          [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
67          [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
68          [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
69          RESTRICT_ASM_LOW32
70          [ftmp8]"=&f"(ftmp[8])
71        : [dst0]"r"(dst),                   [dst1]"r"(dst+stride),
72          [dst2]"r"(dst+2*stride),          [dst3]"r"(dst+3*stride),
73          [src]"r"(src)
74        : "memory"
75    );
76
77}
78
79void ff_h264_idct_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
80{
81    double ftmp[12];
82    uint64_t tmp[1];
83    DECLARE_VAR_LOW32;
84    DECLARE_VAR_ADDRT;
85
86    __asm__ volatile (
87        MMI_LDC1(%[ftmp0], %[block], 0x00)
88        MMI_LDC1(%[ftmp1], %[block], 0x08)
89        MMI_LDC1(%[ftmp2], %[block], 0x10)
90        MMI_LDC1(%[ftmp3], %[block], 0x18)
91        /* memset(block, 0, 32) */
92        "pxor       %[ftmp4],   %[ftmp4],       %[ftmp4]                \n\t"
93        MMI_SQC1(%[ftmp4], %[ftmp4], %[block], 0x00)
94        MMI_SQC1(%[ftmp4], %[ftmp4], %[block], 0x10)
95        "dli        %[tmp0],    0x01                                    \n\t"
96        "mtc1       %[tmp0],    %[ftmp8]                                \n\t"
97        "dli        %[tmp0],    0x06                                    \n\t"
98        "mtc1       %[tmp0],    %[ftmp9]                                \n\t"
99        "psrah      %[ftmp4],   %[ftmp1],       %[ftmp8]                \n\t"
100        "psrah      %[ftmp5],   %[ftmp3],       %[ftmp8]                \n\t"
101        "psubh      %[ftmp4],   %[ftmp4],       %[ftmp3]                \n\t"
102        "paddh      %[ftmp5],   %[ftmp5],       %[ftmp1]                \n\t"
103        "paddh      %[ftmp10],  %[ftmp2],       %[ftmp0]                \n\t"
104        "psubh      %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
105        "paddh      %[ftmp11],  %[ftmp5],       %[ftmp10]               \n\t"
106        "psubh      %[ftmp2],   %[ftmp10],      %[ftmp5]                \n\t"
107        "paddh      %[ftmp10],  %[ftmp4],       %[ftmp0]                \n\t"
108        "psubh      %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
109        "punpckhhw  %[ftmp1],   %[ftmp11],      %[ftmp10]               \n\t"
110        "punpcklhw  %[ftmp5],   %[ftmp11],      %[ftmp10]               \n\t"
111        "punpckhhw  %[ftmp4],   %[ftmp0],       %[ftmp2]                \n\t"
112        "punpcklhw  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
113        "punpckhwd  %[ftmp2],   %[ftmp5],       %[ftmp0]                \n\t"
114        "punpcklwd  %[ftmp5],   %[ftmp5],       %[ftmp0]                \n\t"
115        "punpcklwd  %[ftmp10],  %[ftmp1],       %[ftmp4]                \n\t"
116        "punpckhwd  %[ftmp0],   %[ftmp1],       %[ftmp4]                \n\t"
117        "paddh      %[ftmp5],   %[ftmp5],       %[ff_pw_32]             \n\t"
118        "psrah      %[ftmp4],   %[ftmp2],       %[ftmp8]                \n\t"
119        "psrah      %[ftmp3],   %[ftmp0],       %[ftmp8]                \n\t"
120        "psubh      %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t"
121        "paddh      %[ftmp3],   %[ftmp3],       %[ftmp2]                \n\t"
122        "paddh      %[ftmp1],   %[ftmp10],      %[ftmp5]                \n\t"
123        "psubh      %[ftmp5],   %[ftmp5],       %[ftmp10]               \n\t"
124        "paddh      %[ftmp10],  %[ftmp3],       %[ftmp1]                \n\t"
125        "psubh      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
126        "paddh      %[ftmp11],  %[ftmp4],       %[ftmp5]                \n\t"
127        "psubh      %[ftmp5],   %[ftmp5],       %[ftmp4]                \n\t"
128        MMI_ULWC1(%[ftmp2], %[dst], 0x00)
129        MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
130        "pxor       %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
131        "psrah      %[ftmp3],   %[ftmp10],      %[ftmp9]                \n\t"
132        "psrah      %[ftmp4],   %[ftmp11],      %[ftmp9]                \n\t"
133        "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp7]                \n\t"
134        "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp7]                \n\t"
135        "paddh      %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
136        "paddh      %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
137        "packushb   %[ftmp2],   %[ftmp2],       %[ftmp7]                \n\t"
138        "packushb   %[ftmp0],   %[ftmp0],       %[ftmp7]                \n\t"
139        MMI_SWC1(%[ftmp2], %[dst], 0x00)
140        MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
141        PTR_ADDU   "%[dst],     %[dst],         %[stride]               \n\t"
142        PTR_ADDU   "%[dst],     %[dst],         %[stride]               \n\t"
143        MMI_ULWC1(%[ftmp2], %[dst], 0x00)
144        "psrah      %[ftmp5],   %[ftmp5],       %[ftmp9]                \n\t"
145        MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
146        "psrah      %[ftmp1],   %[ftmp1],       %[ftmp9]                \n\t"
147        "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp7]                \n\t"
148        "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp7]                \n\t"
149        "paddh      %[ftmp2],   %[ftmp2],       %[ftmp5]                \n\t"
150        "paddh      %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
151        "packushb   %[ftmp2],   %[ftmp2],       %[ftmp7]                \n\t"
152        MMI_SWC1(%[ftmp2], %[dst], 0x00)
153        "packushb   %[ftmp0],   %[ftmp0],       %[ftmp7]                \n\t"
154        MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
155        : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
156          [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
157          [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
158          [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
159          [ftmp8]"=&f"(ftmp[8]),            [ftmp9]"=&f"(ftmp[9]),
160          [ftmp10]"=&f"(ftmp[10]),          [ftmp11]"=&f"(ftmp[11]),
161          RESTRICT_ASM_LOW32
162          RESTRICT_ASM_ADDRT
163          [tmp0]"=&r"(tmp[0])
164        : [dst]"r"(dst),                    [block]"r"(block),
165          [stride]"r"((mips_reg)stride),    [ff_pw_32]"f"(ff_pw_32.f)
166        : "memory"
167    );
168
169}
170
171void ff_h264_idct8_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
172{
173    double ftmp[16];
174    uint64_t tmp[7];
175    mips_reg addr[1];
176    DECLARE_VAR_LOW32;
177    DECLARE_VAR_ADDRT;
178
179    __asm__ volatile (
180        "lhu        %[tmp0],    0x00(%[block])                          \n\t"
181        PTR_ADDI   "$sp,        $sp,            -0x20                   \n\t"
182        PTR_ADDIU  "%[tmp0],    %[tmp0],        0x20                    \n\t"
183        MMI_LDC1(%[ftmp1], %[block], 0x10)
184        "sh         %[tmp0],    0x00(%[block])                          \n\t"
185        MMI_LDC1(%[ftmp2], %[block], 0x20)
186        "dli        %[tmp0],    0x01                                    \n\t"
187        MMI_LDC1(%[ftmp3], %[block], 0x30)
188        "mtc1       %[tmp0],    %[ftmp8]                                \n\t"
189        MMI_LDC1(%[ftmp5], %[block], 0x50)
190        MMI_LDC1(%[ftmp6], %[block], 0x60)
191        MMI_LDC1(%[ftmp7], %[block], 0x70)
192        "mov.d      %[ftmp0],   %[ftmp1]                                \n\t"
193        "psrah      %[ftmp1],   %[ftmp1],       %[ftmp8]                \n\t"
194        "psrah      %[ftmp4],   %[ftmp5],       %[ftmp8]                \n\t"
195        "paddh      %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
196        "paddh      %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
197        "paddh      %[ftmp1],   %[ftmp1],       %[ftmp5]                \n\t"
198        "paddh      %[ftmp4],   %[ftmp4],       %[ftmp7]                \n\t"
199        "paddh      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
200        "psubh      %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t"
201        "psubh      %[ftmp0],   %[ftmp0],       %[ftmp3]                \n\t"
202        "psubh      %[ftmp5],   %[ftmp5],       %[ftmp3]                \n\t"
203        "psrah      %[ftmp3],   %[ftmp3],       %[ftmp8]                \n\t"
204        "paddh      %[ftmp0],   %[ftmp0],       %[ftmp7]                \n\t"
205        "psubh      %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
206        "psrah      %[ftmp7],   %[ftmp7],       %[ftmp8]                \n\t"
207        "psubh      %[ftmp0],   %[ftmp0],       %[ftmp3]                \n\t"
208        "dli        %[tmp0],    0x02                                    \n\t"
209        "psubh      %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
210        "mtc1       %[tmp0],    %[ftmp9]                                \n\t"
211        "mov.d      %[ftmp7],   %[ftmp1]                                \n\t"
212        "psrah      %[ftmp1],   %[ftmp1],       %[ftmp9]                \n\t"
213        "psrah      %[ftmp3],   %[ftmp4],       %[ftmp9]                \n\t"
214        "paddh      %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
215        "psrah      %[ftmp0],   %[ftmp0],       %[ftmp9]                \n\t"
216        "paddh      %[ftmp1],   %[ftmp1],       %[ftmp5]                \n\t"
217        "psrah      %[ftmp5],   %[ftmp5],       %[ftmp9]                \n\t"
218        "psubh      %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
219        "psubh      %[ftmp7],   %[ftmp7],       %[ftmp5]                \n\t"
220        "mov.d      %[ftmp5],   %[ftmp6]                                \n\t"
221        "psrah      %[ftmp6],   %[ftmp6],       %[ftmp8]                \n\t"
222        "psrah      %[ftmp4],   %[ftmp2],       %[ftmp8]                \n\t"
223        "paddh      %[ftmp6],   %[ftmp6],       %[ftmp2]                \n\t"
224        "psubh      %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
225        MMI_LDC1(%[ftmp2], %[block], 0x00)
226        MMI_LDC1(%[ftmp5], %[block], 0x40)
227        "paddh      %[ftmp5],   %[ftmp5],       %[ftmp2]                \n\t"
228        "paddh      %[ftmp2],   %[ftmp2],       %[ftmp2]                \n\t"
229        "paddh      %[ftmp6],   %[ftmp6],       %[ftmp5]                \n\t"
230        "psubh      %[ftmp2],   %[ftmp2],       %[ftmp5]                \n\t"
231        "paddh      %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
232        "paddh      %[ftmp4],   %[ftmp4],       %[ftmp2]                \n\t"
233        "psubh      %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
234        "paddh      %[ftmp2],   %[ftmp2],       %[ftmp2]                \n\t"
235        "paddh      %[ftmp7],   %[ftmp7],       %[ftmp6]                \n\t"
236        "psubh      %[ftmp2],   %[ftmp2],       %[ftmp4]                \n\t"
237        "paddh      %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
238        "paddh      %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
239        "psubh      %[ftmp6],   %[ftmp6],       %[ftmp7]                \n\t"
240        "paddh      %[ftmp4],   %[ftmp4],       %[ftmp4]                \n\t"
241        "paddh      %[ftmp3],   %[ftmp3],       %[ftmp2]                \n\t"
242        "psubh      %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t"
243        "paddh      %[ftmp2],   %[ftmp2],       %[ftmp2]                \n\t"
244        "paddh      %[ftmp1],   %[ftmp1],       %[ftmp5]                \n\t"
245        "psubh      %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
246        "paddh      %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
247        MMI_SDC1(%[ftmp6], %[block], 0x00)
248        "psubh      %[ftmp5],   %[ftmp5],       %[ftmp1]                \n\t"
249        "punpckhhw  %[ftmp6],   %[ftmp7],       %[ftmp0]                \n\t"
250        "punpcklhw  %[ftmp7],   %[ftmp7],       %[ftmp0]                \n\t"
251        "punpckhhw  %[ftmp0],   %[ftmp3],       %[ftmp1]                \n\t"
252        "punpcklhw  %[ftmp3],   %[ftmp3],       %[ftmp1]                \n\t"
253        "punpckhwd  %[ftmp1],   %[ftmp7],       %[ftmp3]                \n\t"
254        "punpcklwd  %[ftmp7],   %[ftmp7],       %[ftmp3]                \n\t"
255        "punpckhwd  %[ftmp3],   %[ftmp6],       %[ftmp0]                \n\t"
256        "punpcklwd  %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
257        MMI_LDC1(%[ftmp0], %[block], 0x00)
258        MMI_SDC1(%[ftmp7], $sp, 0x00)
259        MMI_SDC1(%[ftmp1], $sp, 0x10)
260        "dmfc1      %[tmp1],    %[ftmp6]                                \n\t"
261        "dmfc1      %[tmp3],    %[ftmp3]                                \n\t"
262        "punpckhhw  %[ftmp3],   %[ftmp5],       %[ftmp2]                \n\t"
263        "punpcklhw  %[ftmp5],   %[ftmp5],       %[ftmp2]                \n\t"
264        "punpckhhw  %[ftmp2],   %[ftmp4],       %[ftmp0]                \n\t"
265        "punpcklhw  %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t"
266        "punpckhwd  %[ftmp0],   %[ftmp5],       %[ftmp4]                \n\t"
267        "punpcklwd  %[ftmp5],   %[ftmp5],       %[ftmp4]                \n\t"
268        "punpckhwd  %[ftmp4],   %[ftmp3],       %[ftmp2]                \n\t"
269        "punpcklwd  %[ftmp3],   %[ftmp3],       %[ftmp2]                \n\t"
270        MMI_SDC1(%[ftmp5], $sp, 0x08)
271        MMI_SDC1(%[ftmp0], $sp, 0x18)
272        "dmfc1      %[tmp2],    %[ftmp3]                                \n\t"
273        "dmfc1      %[tmp4],    %[ftmp4]                                \n\t"
274        MMI_LDC1(%[ftmp1], %[block], 0x18)
275        MMI_LDC1(%[ftmp6], %[block], 0x28)
276        MMI_LDC1(%[ftmp2], %[block], 0x38)
277        MMI_LDC1(%[ftmp0], %[block], 0x58)
278        MMI_LDC1(%[ftmp3], %[block], 0x68)
279        MMI_LDC1(%[ftmp4], %[block], 0x78)
280        "mov.d      %[ftmp7],   %[ftmp1]                                \n\t"
281        "psrah      %[ftmp5],   %[ftmp0],       %[ftmp8]                \n\t"
282        "psrah      %[ftmp1],   %[ftmp1],       %[ftmp8]                \n\t"
283        "paddh      %[ftmp5],   %[ftmp5],       %[ftmp0]                \n\t"
284        "paddh      %[ftmp1],   %[ftmp1],       %[ftmp7]                \n\t"
285        "paddh      %[ftmp5],   %[ftmp5],       %[ftmp4]                \n\t"
286        "paddh      %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
287        "psubh      %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
288        "paddh      %[ftmp1],   %[ftmp1],       %[ftmp2]                \n\t"
289        "psubh      %[ftmp7],   %[ftmp7],       %[ftmp2]                \n\t"
290        "psubh      %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
291        "psrah      %[ftmp2],   %[ftmp2],       %[ftmp8]                \n\t"
292        "paddh      %[ftmp7],   %[ftmp7],       %[ftmp4]                \n\t"
293        "psubh      %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
294        "psrah      %[ftmp4],   %[ftmp4],       %[ftmp8]                \n\t"
295        "psubh      %[ftmp7],   %[ftmp7],       %[ftmp2]                \n\t"
296        "psubh      %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
297        "mov.d      %[ftmp4],   %[ftmp1]                                \n\t"
298        "psrah      %[ftmp2],   %[ftmp5],       %[ftmp9]                \n\t"
299        "psrah      %[ftmp1],   %[ftmp1],       %[ftmp9]                \n\t"
300        "paddh      %[ftmp2],   %[ftmp2],       %[ftmp7]                \n\t"
301        "psrah      %[ftmp7],   %[ftmp7],       %[ftmp9]                \n\t"
302        "paddh      %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
303        "psrah      %[ftmp0],   %[ftmp0],       %[ftmp9]                \n\t"
304        "psubh      %[ftmp7],   %[ftmp7],       %[ftmp5]                \n\t"
305        "psubh      %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t"
306        "mov.d      %[ftmp0],   %[ftmp3]                                \n\t"
307        "psrah      %[ftmp3],   %[ftmp3],       %[ftmp8]                \n\t"
308        "psrah      %[ftmp5],   %[ftmp6],       %[ftmp8]                \n\t"
309        "paddh      %[ftmp3],   %[ftmp3],       %[ftmp6]                \n\t"
310        "psubh      %[ftmp5],   %[ftmp5],       %[ftmp0]                \n\t"
311        MMI_LDC1(%[ftmp6], %[block], 0x08)
312        MMI_LDC1(%[ftmp0], %[block], 0x48)
313        "paddh      %[ftmp0],   %[ftmp0],       %[ftmp6]                \n\t"
314        "paddh      %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
315        "paddh      %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
316        "psubh      %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
317        "paddh      %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
318        "paddh      %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
319        "psubh      %[ftmp0],   %[ftmp0],       %[ftmp3]                \n\t"
320        "paddh      %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
321        "paddh      %[ftmp4],   %[ftmp4],       %[ftmp3]                \n\t"
322        "psubh      %[ftmp6],   %[ftmp6],       %[ftmp5]                \n\t"
323        "paddh      %[ftmp3],   %[ftmp3],       %[ftmp3]                \n\t"
324        "paddh      %[ftmp7],   %[ftmp7],       %[ftmp5]                \n\t"
325        "psubh      %[ftmp3],   %[ftmp3],       %[ftmp4]                \n\t"
326        "paddh      %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
327        "paddh      %[ftmp2],   %[ftmp2],       %[ftmp6]                \n\t"
328        "psubh      %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
329        "paddh      %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
330        "paddh      %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
331        "psubh      %[ftmp6],   %[ftmp6],       %[ftmp2]                \n\t"
332        "paddh      %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
333        MMI_SDC1(%[ftmp3], %[block], 0x08)
334        "psubh      %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
335        "punpckhhw  %[ftmp3],   %[ftmp4],       %[ftmp7]                \n\t"
336        "punpcklhw  %[ftmp4],   %[ftmp4],       %[ftmp7]                \n\t"
337        "punpckhhw  %[ftmp7],   %[ftmp2],       %[ftmp1]                \n\t"
338        "punpcklhw  %[ftmp2],   %[ftmp2],       %[ftmp1]                \n\t"
339        "punpckhwd  %[ftmp1],   %[ftmp4],       %[ftmp2]                \n\t"
340        "punpcklwd  %[ftmp4],   %[ftmp4],       %[ftmp2]                \n\t"
341        "punpckhwd  %[ftmp2],   %[ftmp3],       %[ftmp7]                \n\t"
342        "punpcklwd  %[ftmp3],   %[ftmp3],       %[ftmp7]                \n\t"
343        MMI_LDC1(%[ftmp7], %[block], 0x08)
344        "dmfc1      %[tmp5],    %[ftmp4]                                \n\t"
345        "mov.d      %[ftmp10],  %[ftmp1]                                \n\t"
346        "mov.d      %[ftmp12],  %[ftmp3]                                \n\t"
347        "mov.d      %[ftmp14],  %[ftmp2]                                \n\t"
348        "punpckhhw  %[ftmp2],   %[ftmp0],       %[ftmp6]                \n\t"
349        "punpcklhw  %[ftmp0],   %[ftmp0],       %[ftmp6]                \n\t"
350        "punpckhhw  %[ftmp6],   %[ftmp5],       %[ftmp7]                \n\t"
351        "punpcklhw  %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
352        "punpckhwd  %[ftmp7],   %[ftmp0],       %[ftmp5]                \n\t"
353        "punpcklwd  %[ftmp0],   %[ftmp0],       %[ftmp5]                \n\t"
354        "punpckhwd  %[ftmp5],   %[ftmp2],       %[ftmp6]                \n\t"
355        "punpcklwd  %[ftmp2],   %[ftmp2],       %[ftmp6]                \n\t"
356        "dmfc1      %[tmp6],    %[ftmp0]                                \n\t"
357        "mov.d      %[ftmp11],  %[ftmp7]                                \n\t"
358        "mov.d      %[ftmp13],  %[ftmp2]                                \n\t"
359        "mov.d      %[ftmp15],  %[ftmp5]                                \n\t"
360        PTR_ADDIU  "%[addr0],   %[dst],         0x04                    \n\t"
361        "mov.d      %[ftmp7],   %[ftmp10]                               \n\t"
362        "dmtc1      %[tmp3],    %[ftmp6]                                \n\t"
363        MMI_LDC1(%[ftmp1], $sp, 0x10)
364        "dmtc1      %[tmp1],    %[ftmp3]                                \n\t"
365        "mov.d      %[ftmp4],   %[ftmp1]                                \n\t"
366        "psrah      %[ftmp1],   %[ftmp1],       %[ftmp8]                \n\t"
367        "psrah      %[ftmp0],   %[ftmp7],       %[ftmp8]                \n\t"
368        "paddh      %[ftmp1],   %[ftmp1],       %[ftmp4]                \n\t"
369        "paddh      %[ftmp0],   %[ftmp0],       %[ftmp7]                \n\t"
370        "paddh      %[ftmp1],   %[ftmp1],       %[ftmp7]                \n\t"
371        "paddh      %[ftmp0],   %[ftmp0],       %[ftmp14]               \n\t"
372        "paddh      %[ftmp1],   %[ftmp1],       %[ftmp6]                \n\t"
373        "psubh      %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
374        "psubh      %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
375        "psubh      %[ftmp7],   %[ftmp7],       %[ftmp6]                \n\t"
376        "psrah      %[ftmp6],   %[ftmp6],       %[ftmp8]                \n\t"
377        "paddh      %[ftmp4],   %[ftmp4],       %[ftmp14]               \n\t"
378        "psubh      %[ftmp7],   %[ftmp7],       %[ftmp14]               \n\t"
379        "psrah      %[ftmp5],   %[ftmp14],      %[ftmp8]                \n\t"
380        "psubh      %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
381        "psubh      %[ftmp7],   %[ftmp7],       %[ftmp5]                \n\t"
382        "mov.d      %[ftmp5],   %[ftmp1]                                \n\t"
383        "psrah      %[ftmp1],   %[ftmp1],       %[ftmp9]                \n\t"
384        "psrah      %[ftmp6],   %[ftmp0],       %[ftmp9]                \n\t"
385        "paddh      %[ftmp1],   %[ftmp1],       %[ftmp7]                \n\t"
386        "paddh      %[ftmp6],   %[ftmp6],       %[ftmp4]                \n\t"
387        "psrah      %[ftmp4],   %[ftmp4],       %[ftmp9]                \n\t"
388        "psrah      %[ftmp7],   %[ftmp7],       %[ftmp9]                \n\t"
389        "psubh      %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t"
390        "psubh      %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
391        "mov.d      %[ftmp7],   %[ftmp12]                               \n\t"
392        "psrah      %[ftmp2],   %[ftmp12],      %[ftmp8]                \n\t"
393        "psrah      %[ftmp0],   %[ftmp3],       %[ftmp8]                \n\t"
394        "paddh      %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
395        "psubh      %[ftmp0],   %[ftmp0],       %[ftmp7]                \n\t"
396        MMI_LDC1(%[ftmp3], $sp, 0x00)
397        "dmtc1      %[tmp5],    %[ftmp7]                                \n\t"
398        "paddh      %[ftmp7],   %[ftmp7],       %[ftmp3]                \n\t"
399        "paddh      %[ftmp3],   %[ftmp3],       %[ftmp3]                \n\t"
400        "paddh      %[ftmp2],   %[ftmp2],       %[ftmp7]                \n\t"
401        "psubh      %[ftmp3],   %[ftmp3],       %[ftmp7]                \n\t"
402        "paddh      %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
403        "paddh      %[ftmp0],   %[ftmp0],       %[ftmp3]                \n\t"
404        "psubh      %[ftmp7],   %[ftmp7],       %[ftmp2]                \n\t"
405        "paddh      %[ftmp3],   %[ftmp3],       %[ftmp3]                \n\t"
406        "paddh      %[ftmp5],   %[ftmp5],       %[ftmp2]                \n\t"
407        "psubh      %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
408        "paddh      %[ftmp2],   %[ftmp2],       %[ftmp2]                \n\t"
409        "paddh      %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t"
410        "psubh      %[ftmp2],   %[ftmp2],       %[ftmp5]                \n\t"
411        "paddh      %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
412        "paddh      %[ftmp6],   %[ftmp6],       %[ftmp3]                \n\t"
413        "psubh      %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
414        "paddh      %[ftmp3],   %[ftmp3],       %[ftmp3]                \n\t"
415        "paddh      %[ftmp1],   %[ftmp1],       %[ftmp7]                \n\t"
416        "psubh      %[ftmp3],   %[ftmp3],       %[ftmp6]                \n\t"
417        "paddh      %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
418        MMI_SDC1(%[ftmp3], $sp, 0x00)
419        "psubh      %[ftmp7],   %[ftmp7],       %[ftmp1]                \n\t"
420        MMI_SDC1(%[ftmp0], $sp, 0x10)
421        "dmfc1      %[tmp1],    %[ftmp2]                                \n\t"
422        "pxor       %[ftmp2],   %[ftmp2],       %[ftmp2]                \n\t"
423        MMI_SDC1(%[ftmp2], %[block], 0x00)
424        MMI_SDC1(%[ftmp2], %[block], 0x08)
425        MMI_SDC1(%[ftmp2], %[block], 0x10)
426        MMI_SDC1(%[ftmp2], %[block], 0x18)
427        MMI_SDC1(%[ftmp2], %[block], 0x20)
428        MMI_SDC1(%[ftmp2], %[block], 0x28)
429        MMI_SDC1(%[ftmp2], %[block], 0x30)
430        MMI_SDC1(%[ftmp2], %[block], 0x38)
431        MMI_SDC1(%[ftmp2], %[block], 0x40)
432        MMI_SDC1(%[ftmp2], %[block], 0x48)
433        MMI_SDC1(%[ftmp2], %[block], 0x50)
434        MMI_SDC1(%[ftmp2], %[block], 0x58)
435        MMI_SDC1(%[ftmp2], %[block], 0x60)
436        MMI_SDC1(%[ftmp2], %[block], 0x68)
437        MMI_SDC1(%[ftmp2], %[block], 0x70)
438        MMI_SDC1(%[ftmp2], %[block], 0x78)
439        "dli        %[tmp3],    0x06                                    \n\t"
440        "mtc1       %[tmp3],    %[ftmp10]                               \n\t"
441        MMI_ULWC1(%[ftmp3], %[dst], 0x00)
442        MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
443        "psrah      %[ftmp5],   %[ftmp5],       %[ftmp10]               \n\t"
444        "psrah      %[ftmp4],   %[ftmp4],       %[ftmp10]               \n\t"
445        "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp2]                \n\t"
446        "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
447        "paddh      %[ftmp3],   %[ftmp3],       %[ftmp5]                \n\t"
448        "paddh      %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
449        "packushb   %[ftmp3],   %[ftmp3],       %[ftmp2]                \n\t"
450        "packushb   %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
451        MMI_SWC1(%[ftmp3], %[dst], 0x00)
452        MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
453        PTR_ADDU   "%[dst],     %[dst],         %[stride]               \n\t"
454        PTR_ADDU   "%[dst],     %[dst],         %[stride]               \n\t"
455        MMI_ULWC1(%[ftmp3], %[dst], 0x00)
456        MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
457        "psrah      %[ftmp6],   %[ftmp6],       %[ftmp10]               \n\t"
458        "psrah      %[ftmp1],   %[ftmp1],       %[ftmp10]               \n\t"
459        "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp2]                \n\t"
460        "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
461        "paddh      %[ftmp3],   %[ftmp3],       %[ftmp6]                \n\t"
462        "paddh      %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
463        "packushb   %[ftmp3],   %[ftmp3],       %[ftmp2]                \n\t"
464        "packushb   %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
465        MMI_SWC1(%[ftmp3], %[dst], 0x00)
466        MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
467        MMI_LDC1(%[ftmp5], $sp, 0x00)
468        MMI_LDC1(%[ftmp4], $sp, 0x10)
469        "dmtc1      %[tmp1],    %[ftmp6]                                \n\t"
470        PTR_ADDU   "%[dst],     %[dst],         %[stride]               \n\t"
471        PTR_ADDU   "%[dst],     %[dst],         %[stride]               \n\t"
472        MMI_ULWC1(%[ftmp3], %[dst], 0x00)
473        MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
474        "psrah      %[ftmp7],   %[ftmp7],       %[ftmp10]               \n\t"
475        "psrah      %[ftmp5],   %[ftmp5],       %[ftmp10]               \n\t"
476        "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp2]                \n\t"
477        "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
478        "paddh      %[ftmp3],   %[ftmp3],       %[ftmp7]                \n\t"
479        "paddh      %[ftmp0],   %[ftmp0],       %[ftmp5]                \n\t"
480        "packushb   %[ftmp3],   %[ftmp3],       %[ftmp2]                \n\t"
481        "packushb   %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
482        MMI_SWC1(%[ftmp3], %[dst], 0x00)
483        MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
484        PTR_ADDU   "%[dst],     %[dst],         %[stride]               \n\t"
485        PTR_ADDU   "%[dst],     %[dst],         %[stride]               \n\t"
486        MMI_ULWC1(%[ftmp3], %[dst], 0x00)
487        MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
488        "psrah      %[ftmp4],   %[ftmp4],       %[ftmp10]               \n\t"
489        "psrah      %[ftmp6],   %[ftmp6],       %[ftmp10]               \n\t"
490        "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp2]                \n\t"
491        "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
492        "paddh      %[ftmp3],   %[ftmp3],       %[ftmp4]                \n\t"
493        "paddh      %[ftmp0],   %[ftmp0],       %[ftmp6]                \n\t"
494        "packushb   %[ftmp3],   %[ftmp3],       %[ftmp2]                \n\t"
495        "packushb   %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
496        MMI_SWC1(%[ftmp3], %[dst], 0x00)
497        MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
498        "dmtc1      %[tmp4],    %[ftmp1]                                \n\t"
499        "dmtc1      %[tmp2],    %[ftmp6]                                \n\t"
500        MMI_LDC1(%[ftmp4], $sp, 0x18)
501        "mov.d      %[ftmp5],   %[ftmp4]                                \n\t"
502        "psrah      %[ftmp4],   %[ftmp4],       %[ftmp8]                \n\t"
503        "psrah      %[ftmp7],   %[ftmp11],      %[ftmp8]                \n\t"
504        "paddh      %[ftmp7],   %[ftmp7],       %[ftmp11]               \n\t"
505        "paddh      %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
506        "paddh      %[ftmp7],   %[ftmp7],       %[ftmp15]               \n\t"
507        "paddh      %[ftmp4],   %[ftmp4],       %[ftmp11]               \n\t"
508        "psubh      %[ftmp7],   %[ftmp7],       %[ftmp5]                \n\t"
509        "paddh      %[ftmp4],   %[ftmp4],       %[ftmp1]                \n\t"
510        "psubh      %[ftmp5],   %[ftmp5],       %[ftmp1]                \n\t"
511        "psubh      %[ftmp3],   %[ftmp11],      %[ftmp1]                \n\t"
512        "psrah      %[ftmp1],   %[ftmp1],       %[ftmp8]                \n\t"
513        "paddh      %[ftmp5],   %[ftmp5],       %[ftmp15]               \n\t"
514        "psubh      %[ftmp3],   %[ftmp3],       %[ftmp15]               \n\t"
515        "psrah      %[ftmp2],   %[ftmp15],      %[ftmp8]                \n\t"
516        "psubh      %[ftmp5],   %[ftmp5],       %[ftmp1]                \n\t"
517        "psubh      %[ftmp3],   %[ftmp3],       %[ftmp2]                \n\t"
518        "mov.d      %[ftmp2],   %[ftmp4]                                \n\t"
519        "psrah      %[ftmp4],   %[ftmp4],       %[ftmp9]                \n\t"
520        "psrah      %[ftmp1],   %[ftmp7],       %[ftmp9]                \n\t"
521        "paddh      %[ftmp4],   %[ftmp4],       %[ftmp3]                \n\t"
522        "paddh      %[ftmp1],   %[ftmp1],       %[ftmp5]                \n\t"
523        "psrah      %[ftmp5],   %[ftmp5],       %[ftmp9]                \n\t"
524        "psrah      %[ftmp3],   %[ftmp3],       %[ftmp9]                \n\t"
525        "psubh      %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
526        "psubh      %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
527        "mov.d      %[ftmp3],   %[ftmp13]                               \n\t"
528        "psrah      %[ftmp0],   %[ftmp13],      %[ftmp8]                \n\t"
529        "psrah      %[ftmp7],   %[ftmp6],       %[ftmp8]                \n\t"
530        "paddh      %[ftmp0],   %[ftmp0],       %[ftmp6]                \n\t"
531        "psubh      %[ftmp7],   %[ftmp7],       %[ftmp3]                \n\t"
532        MMI_LDC1(%[ftmp6], $sp, 0x08)
533        "dmtc1      %[tmp6],    %[ftmp3]                                \n\t"
534        "paddh      %[ftmp3],   %[ftmp3],       %[ftmp6]                \n\t"
535        "paddh      %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
536        "paddh      %[ftmp0],   %[ftmp0],       %[ftmp3]                \n\t"
537        "psubh      %[ftmp6],   %[ftmp6],       %[ftmp3]                \n\t"
538        "paddh      %[ftmp3],   %[ftmp3],       %[ftmp3]                \n\t"
539        "paddh      %[ftmp7],   %[ftmp7],       %[ftmp6]                \n\t"
540        "psubh      %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
541        "paddh      %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
542        "paddh      %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t"
543        "psubh      %[ftmp6],   %[ftmp6],       %[ftmp7]                \n\t"
544        "paddh      %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
545        "paddh      %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
546        "psubh      %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
547        "paddh      %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
548        "paddh      %[ftmp1],   %[ftmp1],       %[ftmp6]                \n\t"
549        "psubh      %[ftmp7],   %[ftmp7],       %[ftmp5]                \n\t"
550        "paddh      %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
551        "paddh      %[ftmp4],   %[ftmp4],       %[ftmp3]                \n\t"
552        "psubh      %[ftmp6],   %[ftmp6],       %[ftmp1]                \n\t"
553        "paddh      %[ftmp3],   %[ftmp3],       %[ftmp3]                \n\t"
554        MMI_SDC1(%[ftmp6], $sp, 0x08)
555        "psubh      %[ftmp3],   %[ftmp3],       %[ftmp4]                \n\t"
556        MMI_SDC1(%[ftmp7], $sp, 0x18)
557        "dmfc1      %[tmp2],    %[ftmp0]                                \n\t"
558        "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
559        MMI_ULWC1(%[ftmp6], %[addr0], 0x00)
560        MMI_LWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
561        "psrah      %[ftmp2],   %[ftmp2],       %[ftmp10]               \n\t"
562        "psrah      %[ftmp5],   %[ftmp5],       %[ftmp10]               \n\t"
563        "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
564        "punpcklbh  %[ftmp7],   %[ftmp7],       %[ftmp0]                \n\t"
565        "paddh      %[ftmp6],   %[ftmp6],       %[ftmp2]                \n\t"
566        "paddh      %[ftmp7],   %[ftmp7],       %[ftmp5]                \n\t"
567        "packushb   %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
568        "packushb   %[ftmp7],   %[ftmp7],       %[ftmp0]                \n\t"
569        MMI_SWC1(%[ftmp6], %[addr0], 0x00)
570        MMI_SWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
571        PTR_ADDU   "%[addr0],   %[addr0],       %[stride]               \n\t"
572        PTR_ADDU   "%[addr0],   %[addr0],       %[stride]               \n\t"
573        MMI_ULWC1(%[ftmp6], %[addr0], 0x00)
574        MMI_LWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
575        "psrah      %[ftmp1],   %[ftmp1],       %[ftmp10]               \n\t"
576        "psrah      %[ftmp4],   %[ftmp4],       %[ftmp10]               \n\t"
577        "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
578        "punpcklbh  %[ftmp7],   %[ftmp7],       %[ftmp0]                \n\t"
579        "paddh      %[ftmp6],   %[ftmp6],       %[ftmp1]                \n\t"
580        "paddh      %[ftmp7],   %[ftmp7],       %[ftmp4]                \n\t"
581        "packushb   %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
582        "packushb   %[ftmp7],   %[ftmp7],       %[ftmp0]                \n\t"
583        MMI_SWC1(%[ftmp6], %[addr0], 0x00)
584        MMI_SWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
585        MMI_LDC1(%[ftmp2], $sp, 0x08)
586        MMI_LDC1(%[ftmp5], $sp, 0x18)
587        PTR_ADDU   "%[addr0],   %[addr0],       %[stride]               \n\t"
588        "dmtc1      %[tmp2],    %[ftmp1]                                \n\t"
589        PTR_ADDU   "%[addr0],   %[addr0],       %[stride]               \n\t"
590        MMI_ULWC1(%[ftmp6], %[addr0], 0x00)
591        MMI_LWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
592        "psrah      %[ftmp3],   %[ftmp3],       %[ftmp10]               \n\t"
593        "psrah      %[ftmp2],   %[ftmp2],       %[ftmp10]               \n\t"
594        "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
595        "punpcklbh  %[ftmp7],   %[ftmp7],       %[ftmp0]                \n\t"
596        "paddh      %[ftmp6],   %[ftmp6],       %[ftmp3]                \n\t"
597        "paddh      %[ftmp7],   %[ftmp7],       %[ftmp2]                \n\t"
598        "packushb   %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
599        "packushb   %[ftmp7],   %[ftmp7],       %[ftmp0]                \n\t"
600        MMI_SWC1(%[ftmp6], %[addr0], 0x00)
601        MMI_SWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
602        PTR_ADDU   "%[addr0],   %[addr0],       %[stride]               \n\t"
603        PTR_ADDU   "%[addr0],   %[addr0],       %[stride]               \n\t"
604        MMI_ULWC1(%[ftmp6], %[addr0], 0x00)
605        MMI_LWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
606        "psrah      %[ftmp5],   %[ftmp5],       %[ftmp10]               \n\t"
607        "psrah      %[ftmp1],   %[ftmp1],       %[ftmp10]               \n\t"
608        "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
609        "punpcklbh  %[ftmp7],   %[ftmp7],       %[ftmp0]                \n\t"
610        "paddh      %[ftmp6],   %[ftmp6],       %[ftmp5]                \n\t"
611        "paddh      %[ftmp7],   %[ftmp7],       %[ftmp1]                \n\t"
612        "packushb   %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
613        "packushb   %[ftmp7],   %[ftmp7],       %[ftmp0]                \n\t"
614        MMI_SWC1(%[ftmp6], %[addr0], 0x00)
615        MMI_SWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
616        PTR_ADDIU  "$sp,        $sp,            0x20                    \n\t"
617        : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
618          [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
619          [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
620          [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
621          [ftmp8]"=&f"(ftmp[8]),            [ftmp9]"=&f"(ftmp[9]),
622          [ftmp10]"=&f"(ftmp[10]),          [ftmp11]"=&f"(ftmp[11]),
623          [ftmp12]"=&f"(ftmp[12]),          [ftmp13]"=&f"(ftmp[13]),
624          [ftmp14]"=&f"(ftmp[14]),          [ftmp15]"=&f"(ftmp[15]),
625          [tmp0]"=&r"(tmp[0]),              [tmp1]"=&r"(tmp[1]),
626          [tmp2]"=&r"(tmp[2]),              [tmp3]"=&r"(tmp[3]),
627          [tmp4]"=&r"(tmp[4]),              [tmp5]"=&r"(tmp[5]),
628          [tmp6]"=&r"(tmp[6]),
629          RESTRICT_ASM_LOW32
630          RESTRICT_ASM_ADDRT
631          [addr0]"=&r"(addr[0])
632        : [dst]"r"(dst),                    [block]"r"(block),
633          [stride]"r"((mips_reg)stride)
634        : "memory"
635    );
636
637}
638
639void ff_h264_idct_dc_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
640{
641    int dc = (block[0] + 32) >> 6;
642    double ftmp[6];
643    DECLARE_VAR_LOW32;
644
645    block[0] = 0;
646
647    __asm__ volatile (
648        "mtc1       %[dc],      %[ftmp5]                                \n\t"
649        "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
650        "pshufh     %[ftmp5],   %[ftmp5],       %[ftmp0]                \n\t"
651        MMI_ULWC1(%[ftmp1], %[dst0], 0x00)
652        MMI_ULWC1(%[ftmp2], %[dst1], 0x00)
653        MMI_ULWC1(%[ftmp3], %[dst2], 0x00)
654        MMI_ULWC1(%[ftmp4], %[dst3], 0x00)
655        "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
656        "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t"
657        "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
658        "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t"
659        "paddsh     %[ftmp1],   %[ftmp1],       %[ftmp5]                \n\t"
660        "paddsh     %[ftmp2],   %[ftmp2],       %[ftmp5]                \n\t"
661        "paddsh     %[ftmp3],   %[ftmp3],       %[ftmp5]                \n\t"
662        "paddsh     %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
663        "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
664        "packushb   %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t"
665        "packushb   %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
666        "packushb   %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t"
667        MMI_SWC1(%[ftmp1], %[dst0], 0x00)
668        MMI_SWC1(%[ftmp2], %[dst1], 0x00)
669        MMI_SWC1(%[ftmp3], %[dst2], 0x00)
670        MMI_SWC1(%[ftmp4], %[dst3], 0x00)
671        : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
672          [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
673          [ftmp4]"=&f"(ftmp[4]),
674          RESTRICT_ASM_LOW32
675          [ftmp5]"=&f"(ftmp[5])
676        : [dst0]"r"(dst),                   [dst1]"r"(dst+stride),
677          [dst2]"r"(dst+2*stride),          [dst3]"r"(dst+3*stride),
678          [dc]"r"(dc)
679        : "memory"
680    );
681}
682
683void ff_h264_idct8_dc_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
684{
685    int dc = (block[0] + 32) >> 6;
686    double ftmp[10];
687    DECLARE_VAR_ALL64;
688
689    block[0] = 0;
690
691    __asm__ volatile (
692        "mtc1       %[dc],      %[ftmp5]                                \n\t"
693        "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
694        "pshufh     %[ftmp5],   %[ftmp5],       %[ftmp0]                \n\t"
695        MMI_LDC1(%[ftmp1], %[dst0], 0x00)
696        MMI_LDC1(%[ftmp2], %[dst1], 0x00)
697        MMI_LDC1(%[ftmp3], %[dst2], 0x00)
698        MMI_LDC1(%[ftmp4], %[dst3], 0x00)
699        "punpckhbh  %[ftmp6],   %[ftmp1],       %[ftmp0]                \n\t"
700        "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
701        "punpckhbh  %[ftmp7],   %[ftmp2],       %[ftmp0]                \n\t"
702        "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t"
703        "punpckhbh  %[ftmp8],   %[ftmp3],       %[ftmp0]                \n\t"
704        "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
705        "punpckhbh  %[ftmp9],   %[ftmp4],       %[ftmp0]                \n\t"
706        "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t"
707        "paddsh     %[ftmp6],   %[ftmp6],       %[ftmp5]                \n\t"
708        "paddsh     %[ftmp1],   %[ftmp1],       %[ftmp5]                \n\t"
709        "paddsh     %[ftmp7],   %[ftmp7],       %[ftmp5]                \n\t"
710        "paddsh     %[ftmp2],   %[ftmp2],       %[ftmp5]                \n\t"
711        "paddsh     %[ftmp8],   %[ftmp8],       %[ftmp5]                \n\t"
712        "paddsh     %[ftmp3],   %[ftmp3],       %[ftmp5]                \n\t"
713        "paddsh     %[ftmp9],   %[ftmp9],       %[ftmp5]                \n\t"
714        "paddsh     %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
715        "packushb   %[ftmp1],   %[ftmp1],       %[ftmp6]                \n\t"
716        "packushb   %[ftmp2],   %[ftmp2],       %[ftmp7]                \n\t"
717        "packushb   %[ftmp3],   %[ftmp3],       %[ftmp8]                \n\t"
718        "packushb   %[ftmp4],   %[ftmp4],       %[ftmp9]                \n\t"
719        MMI_SDC1(%[ftmp1], %[dst0], 0x00)
720        MMI_SDC1(%[ftmp2], %[dst1], 0x00)
721        MMI_SDC1(%[ftmp3], %[dst2], 0x00)
722        MMI_SDC1(%[ftmp4], %[dst3], 0x00)
723
724        MMI_LDC1(%[ftmp1], %[dst4], 0x00)
725        MMI_LDC1(%[ftmp2], %[dst5], 0x00)
726        MMI_LDC1(%[ftmp3], %[dst6], 0x00)
727        MMI_LDC1(%[ftmp4], %[dst7], 0x00)
728        "punpckhbh  %[ftmp6],   %[ftmp1],       %[ftmp0]                \n\t"
729        "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
730        "punpckhbh  %[ftmp7],   %[ftmp2],       %[ftmp0]                \n\t"
731        "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t"
732        "punpckhbh  %[ftmp8],   %[ftmp3],       %[ftmp0]                \n\t"
733        "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
734        "punpckhbh  %[ftmp9],   %[ftmp4],       %[ftmp0]                \n\t"
735        "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t"
736        "paddsh     %[ftmp6],   %[ftmp6],       %[ftmp5]                \n\t"
737        "paddsh     %[ftmp1],   %[ftmp1],       %[ftmp5]                \n\t"
738        "paddsh     %[ftmp7],   %[ftmp7],       %[ftmp5]                \n\t"
739        "paddsh     %[ftmp2],   %[ftmp2],       %[ftmp5]                \n\t"
740        "paddsh     %[ftmp8],   %[ftmp8],       %[ftmp5]                \n\t"
741        "paddsh     %[ftmp3],   %[ftmp3],       %[ftmp5]                \n\t"
742        "paddsh     %[ftmp9],   %[ftmp9],       %[ftmp5]                \n\t"
743        "paddsh     %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
744        "packushb   %[ftmp1],   %[ftmp1],       %[ftmp6]                \n\t"
745        "packushb   %[ftmp2],   %[ftmp2],       %[ftmp7]                \n\t"
746        "packushb   %[ftmp3],   %[ftmp3],       %[ftmp8]                \n\t"
747        "packushb   %[ftmp4],   %[ftmp4],       %[ftmp9]                \n\t"
748        MMI_SDC1(%[ftmp1], %[dst4], 0x00)
749        MMI_SDC1(%[ftmp2], %[dst5], 0x00)
750        MMI_SDC1(%[ftmp3], %[dst6], 0x00)
751        MMI_SDC1(%[ftmp4], %[dst7], 0x00)
752        : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
753          [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
754          [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
755          [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
756          [ftmp8]"=&f"(ftmp[8]),
757          RESTRICT_ASM_ALL64
758          [ftmp9]"=&f"(ftmp[9])
759        : [dst0]"r"(dst),                   [dst1]"r"(dst+stride),
760          [dst2]"r"(dst+2*stride),          [dst3]"r"(dst+3*stride),
761          [dst4]"r"(dst+4*stride),          [dst5]"r"(dst+5*stride),
762          [dst6]"r"(dst+6*stride),          [dst7]"r"(dst+7*stride),
763          [dc]"r"(dc)
764        : "memory"
765    );
766}
767
768void ff_h264_idct_add16_8_mmi(uint8_t *dst, const int *block_offset,
769                              int16_t *block, int stride,
770                              const uint8_t nnzc[5 * 8])
771{
772    int i;
773    for(i=0; i<16; i++){
774        int nnz = nnzc[ scan8[i] ];
775        if(nnz){
776            if(nnz==1 && ((int16_t*)block)[i*16])
777                ff_h264_idct_dc_add_8_mmi(dst + block_offset[i], block + i*16,
778                        stride);
779            else
780                ff_h264_idct_add_8_mmi(dst + block_offset[i], block + i*16,
781                        stride);
782        }
783    }
784}
785
786void ff_h264_idct_add16intra_8_mmi(uint8_t *dst, const int *block_offset,
787        int16_t *block, int stride, const uint8_t nnzc[5 * 8])
788{
789    int i;
790    for(i=0; i<16; i++){
791        if(nnzc[ scan8[i] ])
792            ff_h264_idct_add_8_mmi(dst + block_offset[i], block + i*16, stride);
793        else if(((int16_t*)block)[i*16])
794            ff_h264_idct_dc_add_8_mmi(dst + block_offset[i], block + i*16,
795                    stride);
796    }
797}
798
799void ff_h264_idct8_add4_8_mmi(uint8_t *dst, const int *block_offset,
800        int16_t *block, int stride, const uint8_t nnzc[5 * 8])
801{
802    int i;
803    for(i=0; i<16; i+=4){
804        int nnz = nnzc[ scan8[i] ];
805        if(nnz){
806            if(nnz==1 && ((int16_t*)block)[i*16])
807                ff_h264_idct8_dc_add_8_mmi(dst + block_offset[i],
808                        block + i*16, stride);
809            else
810                ff_h264_idct8_add_8_mmi(dst + block_offset[i], block + i*16,
811                        stride);
812        }
813    }
814}
815
816void ff_h264_idct_add8_8_mmi(uint8_t **dest, const int *block_offset,
817        int16_t *block, int stride, const uint8_t nnzc[15*8])
818{
819    int i, j;
820    for(j=1; j<3; j++){
821        for(i=j*16; i<j*16+4; i++){
822            if(nnzc[ scan8[i] ])
823                ff_h264_idct_add_8_mmi(dest[j-1] + block_offset[i],
824                        block + i*16, stride);
825            else if(((int16_t*)block)[i*16])
826                ff_h264_idct_dc_add_8_mmi(dest[j-1] + block_offset[i],
827                        block + i*16, stride);
828        }
829    }
830}
831
832void ff_h264_idct_add8_422_8_mmi(uint8_t **dest, const int *block_offset,
833        int16_t *block, int stride, const uint8_t nnzc[15*8])
834{
835    int i, j;
836
837    for(j=1; j<3; j++){
838        for(i=j*16; i<j*16+4; i++){
839            if(nnzc[ scan8[i] ])
840                ff_h264_idct_add_8_mmi(dest[j-1] + block_offset[i],
841                        block + i*16, stride);
842            else if(((int16_t*)block)[i*16])
843                ff_h264_idct_dc_add_8_mmi(dest[j-1] + block_offset[i],
844                        block + i*16, stride);
845        }
846    }
847
848    for(j=1; j<3; j++){
849        for(i=j*16+4; i<j*16+8; i++){
850            if(nnzc[ scan8[i+4] ])
851                ff_h264_idct_add_8_mmi(dest[j-1] + block_offset[i+4],
852                        block + i*16, stride);
853            else if(((int16_t*)block)[i*16])
854                ff_h264_idct_dc_add_8_mmi(dest[j-1] + block_offset[i+4],
855                        block + i*16, stride);
856        }
857    }
858}
859
860void ff_h264_luma_dc_dequant_idct_8_mmi(int16_t *output, int16_t *input,
861        int qmul)
862{
863    double ftmp[10];
864    uint64_t tmp[2];
865    DECLARE_VAR_ALL64;
866
867    __asm__ volatile (
868        ".set       noreorder                                           \n\t"
869        "dli        %[tmp0],    0x08                                    \n\t"
870        MMI_LDC1(%[ftmp3], %[input], 0x18)
871        "mtc1       %[tmp0],    %[ftmp8]                                \n\t"
872        MMI_LDC1(%[ftmp2], %[input], 0x10)
873        "dli        %[tmp0],    0x20                                    \n\t"
874        MMI_LDC1(%[ftmp1], %[input], 0x08)
875        "mtc1       %[tmp0],    %[ftmp9]                                \n\t"
876        MMI_LDC1(%[ftmp0], %[input], 0x00)
877        "mov.d      %[ftmp4],   %[ftmp3]                                \n\t"
878        "paddh      %[ftmp3],   %[ftmp3],       %[ftmp2]                \n\t"
879        "psubh      %[ftmp2],   %[ftmp2],       %[ftmp4]                \n\t"
880        "mov.d      %[ftmp4],   %[ftmp1]                                \n\t"
881        "paddh      %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
882        "psubh      %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
883        "mov.d      %[ftmp4],   %[ftmp3]                                \n\t"
884        "paddh      %[ftmp3],   %[ftmp3],       %[ftmp1]                \n\t"
885        "psubh      %[ftmp1],   %[ftmp1],       %[ftmp4]                \n\t"
886        "mov.d      %[ftmp4],   %[ftmp2]                                \n\t"
887        "paddh      %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t"
888        "psubh      %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
889        "mov.d      %[ftmp4],   %[ftmp3]                                \n\t"
890        "punpcklhw  %[ftmp3],   %[ftmp3],       %[ftmp1]                \n\t"
891        "punpckhhw  %[ftmp4],   %[ftmp4],       %[ftmp1]                \n\t"
892        "punpckhhw  %[ftmp1],   %[ftmp0],       %[ftmp2]                \n\t"
893        "punpcklhw  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
894        "punpckhwd  %[ftmp2],   %[ftmp3],       %[ftmp0]                \n\t"
895        "punpcklwd  %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
896        "mov.d      %[ftmp0],   %[ftmp4]                                \n\t"
897        "punpcklwd  %[ftmp4],   %[ftmp4],       %[ftmp1]                \n\t"
898        "punpckhwd  %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
899        "mov.d      %[ftmp1],   %[ftmp0]                                \n\t"
900        "paddh      %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
901        "psubh      %[ftmp4],   %[ftmp4],       %[ftmp1]                \n\t"
902        "mov.d      %[ftmp1],   %[ftmp2]                                \n\t"
903        "paddh      %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
904        "psubh      %[ftmp3],   %[ftmp3],       %[ftmp1]                \n\t"
905        "mov.d      %[ftmp1],   %[ftmp0]                                \n\t"
906        "paddh      %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
907        "psubh      %[ftmp2],   %[ftmp2],       %[ftmp1]                \n\t"
908        "mov.d      %[ftmp1],   %[ftmp4]                                \n\t"
909        "daddi      %[tmp0],    %[qmul],        -0x7fff                 \n\t"
910        "paddh      %[ftmp4],   %[ftmp4],       %[ftmp3]                \n\t"
911        "bgtz       %[tmp0],    1f                                      \n\t"
912        "psubh      %[ftmp3],   %[ftmp3],       %[ftmp1]                \n\t"
913        "ori        %[tmp0],    $0,             0x80                    \n\t"
914        "dsll       %[tmp0],    %[tmp0],        0x10                    \n\t"
915        "punpckhhw  %[ftmp1],   %[ftmp0],       %[ff_pw_1]              \n\t"
916        "daddu      %[qmul],    %[qmul],        %[tmp0]                 \n\t"
917        "punpcklhw  %[ftmp0],   %[ftmp0],       %[ff_pw_1]              \n\t"
918        "punpckhhw  %[ftmp5],   %[ftmp2],       %[ff_pw_1]              \n\t"
919        "punpcklhw  %[ftmp2],   %[ftmp2],       %[ff_pw_1]              \n\t"
920        "mtc1       %[qmul],    %[ftmp7]                                \n\t"
921        "punpcklwd  %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
922        "pmaddhw    %[ftmp0],   %[ftmp0],       %[ftmp7]                \n\t"
923        "pmaddhw    %[ftmp2],   %[ftmp2],       %[ftmp7]                \n\t"
924        "pmaddhw    %[ftmp1],   %[ftmp1],       %[ftmp7]                \n\t"
925        "pmaddhw    %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
926        "psraw      %[ftmp0],   %[ftmp0],       %[ftmp8]                \n\t"
927        "psraw      %[ftmp2],   %[ftmp2],       %[ftmp8]                \n\t"
928        "psraw      %[ftmp1],   %[ftmp1],       %[ftmp8]                \n\t"
929        "psraw      %[ftmp5],   %[ftmp5],       %[ftmp8]                \n\t"
930        "packsswh   %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
931        "packsswh   %[ftmp2],   %[ftmp2],       %[ftmp5]                \n\t"
932        "dmfc1      %[tmp1],    %[ftmp0]                                \n\t"
933        "ssrld      %[ftmp0],   %[ftmp0],       %[ftmp9]                \n\t"
934        "mfc1       %[input],   %[ftmp0]                                \n\t"
935        "sh         %[tmp1],    0x00(%[output])                         \n\t"
936        "sh         %[input],   0x80(%[output])                         \n\t"
937        "dsrl       %[tmp1],    %[tmp1],        0x10                    \n\t"
938        PTR_SRL    "%[input],   %[input],       0x10                    \n\t"
939        "sh         %[tmp1],    0x20(%[output])                         \n\t"
940        "sh         %[input],   0xa0(%[output])                         \n\t"
941        "dmfc1      %[tmp1],    %[ftmp2]                                \n\t"
942        "ssrld      %[ftmp2],   %[ftmp2],       %[ftmp9]                \n\t"
943        "mfc1       %[input],   %[ftmp2]                                \n\t"
944        "sh         %[tmp1],    0x40(%[output])                         \n\t"
945        "sh         %[input],   0xc0(%[output])                         \n\t"
946        "dsrl       %[tmp1],    %[tmp1],        0x10                    \n\t"
947        PTR_SRL    "%[input],   %[input],       0x10                    \n\t"
948        "sh         %[tmp1],    0x60(%[output])                         \n\t"
949        "sh         %[input],   0xe0(%[output])                         \n\t"
950        "punpckhhw  %[ftmp1],   %[ftmp3],       %[ff_pw_1]              \n\t"
951        "punpcklhw  %[ftmp3],   %[ftmp3],       %[ff_pw_1]              \n\t"
952        "punpckhhw  %[ftmp5],   %[ftmp4],       %[ff_pw_1]              \n\t"
953        "punpcklhw  %[ftmp4],   %[ftmp4],       %[ff_pw_1]              \n\t"
954        "mtc1       %[qmul],    %[ftmp7]                                \n\t"
955        "punpcklwd  %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
956        "pmaddhw    %[ftmp3],   %[ftmp3],       %[ftmp7]                \n\t"
957        "pmaddhw    %[ftmp4],   %[ftmp4],       %[ftmp7]                \n\t"
958        "pmaddhw    %[ftmp1],   %[ftmp1],       %[ftmp7]                \n\t"
959        "pmaddhw    %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
960        "psraw      %[ftmp3],   %[ftmp3],       %[ftmp8]                \n\t"
961        "psraw      %[ftmp4],   %[ftmp4],       %[ftmp8]                \n\t"
962        "psraw      %[ftmp1],   %[ftmp1],       %[ftmp8]                \n\t"
963        "psraw      %[ftmp5],   %[ftmp5],       %[ftmp8]                \n\t"
964        "packsswh   %[ftmp3],   %[ftmp3],       %[ftmp1]                \n\t"
965        "packsswh   %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
966        "dmfc1      %[tmp1],    %[ftmp3]                                \n\t"
967        "ssrld      %[ftmp3],   %[ftmp3],       %[ftmp9]                \n\t"
968        "mfc1       %[input],   %[ftmp3]                                \n\t"
969        "sh         %[tmp1],    0x100(%[output])                        \n\t"
970        "sh         %[input],   0x180(%[output])                        \n\t"
971        "dsrl       %[tmp1],    %[tmp1],        0x10                    \n\t"
972        PTR_SRL    "%[input],   %[input],       0x10                    \n\t"
973        "sh         %[tmp1],    0x120(%[output])                        \n\t"
974        "sh         %[input],   0x1a0(%[output])                        \n\t"
975        "dmfc1      %[tmp1],    %[ftmp4]                                \n\t"
976        "ssrld      %[ftmp4],   %[ftmp4],       %[ftmp9]                \n\t"
977        "mfc1       %[input],   %[ftmp4]                                \n\t"
978        "sh         %[tmp1],    0x140(%[output])                        \n\t"
979        "sh         %[input],   0x1c0(%[output])                        \n\t"
980        "dsrl       %[tmp1],    %[tmp1],        0x10                    \n\t"
981        PTR_SRL    "%[input],   %[input],       0x10                    \n\t"
982        "sh         %[tmp1],    0x160(%[output])                        \n\t"
983        "j          2f                                                  \n\t"
984        "sh         %[input],   0x1e0(%[output])                        \n\t"
985        "1:                                                             \n\t"
986        "ori        %[tmp0],    $0,             0x1f                    \n\t"
987#if HAVE_LOONGSON3
988        "clz        %[tmp1],    %[qmul]                                 \n\t"
989#elif HAVE_LOONGSON2
990#endif
991        "ori        %[input],   $0,             0x07                    \n\t"
992        "dsubu      %[tmp1],    %[tmp0],        %[tmp1]                 \n\t"
993        "ori        %[tmp0],    $0,             0x80                    \n\t"
994        "dsll       %[tmp0],    %[tmp0],        0x10                    \n\t"
995        "daddu      %[qmul],    %[qmul],        %[tmp0]                 \n\t"
996        "dsubu      %[tmp0],    %[tmp1],        %[input]                \n\t"
997        "movn       %[tmp1],    %[input],       %[tmp0]                 \n\t"
998        PTR_ADDIU  "%[input],   %[input],       0x01                    \n\t"
999        "andi       %[tmp0],    %[tmp1],        0xff                    \n\t"
1000        "srlv       %[qmul],    %[qmul],        %[tmp0]                 \n\t"
1001        PTR_SUBU   "%[input],   %[input],       %[tmp1]                 \n\t"
1002        "mtc1       %[input],   %[ftmp6]                                \n\t"
1003        "punpckhhw  %[ftmp1],   %[ftmp0],       %[ff_pw_1]              \n\t"
1004        "punpcklhw  %[ftmp0],   %[ftmp0],       %[ff_pw_1]              \n\t"
1005        "punpckhhw  %[ftmp5],   %[ftmp2],       %[ff_pw_1]              \n\t"
1006        "punpcklhw  %[ftmp2],   %[ftmp2],       %[ff_pw_1]              \n\t"
1007        "mtc1       %[qmul],    %[ftmp7]                                \n\t"
1008        "punpcklwd  %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
1009        "pmaddhw    %[ftmp0],   %[ftmp0],       %[ftmp7]                \n\t"
1010        "pmaddhw    %[ftmp2],   %[ftmp2],       %[ftmp7]                \n\t"
1011        "pmaddhw    %[ftmp1],   %[ftmp1],       %[ftmp7]                \n\t"
1012        "pmaddhw    %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1013        "psraw      %[ftmp0],   %[ftmp0],       %[ftmp6]                \n\t"
1014        "psraw      %[ftmp2],   %[ftmp2],       %[ftmp6]                \n\t"
1015        "psraw      %[ftmp1],   %[ftmp1],       %[ftmp6]                \n\t"
1016        "psraw      %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
1017        "packsswh   %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
1018        "packsswh   %[ftmp2],   %[ftmp2],       %[ftmp5]                \n\t"
1019        "dmfc1      %[tmp1],    %[ftmp0]                                \n\t"
1020        "ssrld      %[ftmp0],   %[ftmp0],       %[ftmp9]                \n\t"
1021        "sh         %[tmp1],    0x00(%[output])                         \n\t"
1022        "mfc1       %[input],   %[ftmp0]                                \n\t"
1023        "dsrl       %[tmp1],    %[tmp1],        0x10                    \n\t"
1024        "sh         %[input],   0x80(%[output])                         \n\t"
1025        "sh         %[tmp1],    0x20(%[output])                         \n\t"
1026        PTR_SRL    "%[input],   %[input],       0x10                    \n\t"
1027        "dmfc1      %[tmp1],    %[ftmp2]                                \n\t"
1028        "sh         %[input],   0xa0(%[output])                         \n\t"
1029        "ssrld      %[ftmp2],   %[ftmp2],       %[ftmp9]                \n\t"
1030        "sh         %[tmp1],    0x40(%[output])                         \n\t"
1031        "mfc1       %[input],   %[ftmp2]                                \n\t"
1032        "dsrl       %[tmp1],    %[tmp1],        0x10                    \n\t"
1033        "sh         %[input],   0xc0(%[output])                         \n\t"
1034        "sh         %[tmp1],    0x60(%[output])                         \n\t"
1035        PTR_SRL    "%[input],   %[input],       0x10                    \n\t"
1036        "sh         %[input],   0xe0(%[output])                         \n\t"
1037        "punpckhhw  %[ftmp1],   %[ftmp3],       %[ff_pw_1]              \n\t"
1038        "punpcklhw  %[ftmp3],   %[ftmp3],       %[ff_pw_1]              \n\t"
1039        "punpckhhw  %[ftmp5],   %[ftmp4],       %[ff_pw_1]              \n\t"
1040        "punpcklhw  %[ftmp4],   %[ftmp4],       %[ff_pw_1]              \n\t"
1041        "mtc1       %[qmul],    %[ftmp7]                                \n\t"
1042        "punpcklwd  %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
1043        "pmaddhw    %[ftmp3],   %[ftmp3],       %[ftmp7]                \n\t"
1044        "pmaddhw    %[ftmp4],   %[ftmp4],       %[ftmp7]                \n\t"
1045        "pmaddhw    %[ftmp1],   %[ftmp1],       %[ftmp7]                \n\t"
1046        "pmaddhw    %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1047        "psraw      %[ftmp3],   %[ftmp3],       %[ftmp6]                \n\t"
1048        "psraw      %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
1049        "psraw      %[ftmp1],   %[ftmp1],       %[ftmp6]                \n\t"
1050        "psraw      %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
1051        "packsswh   %[ftmp3],   %[ftmp3],       %[ftmp1]                \n\t"
1052        "packsswh   %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
1053        "dmfc1      %[tmp1],    %[ftmp3]                                \n\t"
1054        "ssrld      %[ftmp3],   %[ftmp3],       %[ftmp9]                \n\t"
1055        "mfc1       %[input],   %[ftmp3]                                \n\t"
1056        "sh         %[tmp1],    0x100(%[output])                        \n\t"
1057        "sh         %[input],   0x180(%[output])                        \n\t"
1058        "dsrl       %[tmp1],    %[tmp1],        0x10                    \n\t"
1059        PTR_SRL    "%[input],   %[input],       0x10                    \n\t"
1060        "sh         %[tmp1],    0x120(%[output])                        \n\t"
1061        "sh         %[input],   0x1a0(%[output])                        \n\t"
1062        "dmfc1      %[tmp1],    %[ftmp4]                                \n\t"
1063        "ssrld      %[ftmp4],   %[ftmp4],       %[ftmp9]                \n\t"
1064        "mfc1       %[input],   %[ftmp4]                                \n\t"
1065        "sh         %[tmp1],    0x140(%[output])                        \n\t"
1066        "sh         %[input],   0x1c0(%[output])                        \n\t"
1067        "dsrl       %[tmp1],    %[tmp1],        0x10                    \n\t"
1068        PTR_SRL    "%[input],   %[input],       0x10                    \n\t"
1069        "sh         %[tmp1],    0x160(%[output])                        \n\t"
1070        "sh         %[input],   0x1e0(%[output])                        \n\t"
1071        "2:                                                             \n\t"
1072        ".set       reorder                                             \n\t"
1073        : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
1074          [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
1075          [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
1076          [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
1077          [ftmp8]"=&f"(ftmp[8]),            [ftmp9]"=&f"(ftmp[9]),
1078          [tmp0]"=&r"(tmp[0]),              [tmp1]"=&r"(tmp[1]),
1079          RESTRICT_ASM_ALL64
1080          [output]"+&r"(output),            [input]"+&r"(input),
1081          [qmul]"+&r"(qmul)
1082        : [ff_pw_1]"f"(ff_pw_1.f)
1083        : "memory"
1084    );
1085}
1086
1087void ff_h264_chroma422_dc_dequant_idct_8_mmi(int16_t *block, int qmul)
1088{
1089    int temp[8];
1090    int t[8];
1091
1092    temp[0] = block[0] + block[16];
1093    temp[1] = block[0] - block[16];
1094    temp[2] = block[32] + block[48];
1095    temp[3] = block[32] - block[48];
1096    temp[4] = block[64] + block[80];
1097    temp[5] = block[64] - block[80];
1098    temp[6] = block[96] + block[112];
1099    temp[7] = block[96] - block[112];
1100
1101    t[0] = temp[0] + temp[4] + temp[2] + temp[6];
1102    t[1] = temp[0] - temp[4] + temp[2] - temp[6];
1103    t[2] = temp[0] - temp[4] - temp[2] + temp[6];
1104    t[3] = temp[0] + temp[4] - temp[2] - temp[6];
1105    t[4] = temp[1] + temp[5] + temp[3] + temp[7];
1106    t[5] = temp[1] - temp[5] + temp[3] - temp[7];
1107    t[6] = temp[1] - temp[5] - temp[3] + temp[7];
1108    t[7] = temp[1] + temp[5] - temp[3] - temp[7];
1109
1110    block[  0]= (t[0]*qmul + 128) >> 8;
1111    block[ 32]= (t[1]*qmul + 128) >> 8;
1112    block[ 64]= (t[2]*qmul + 128) >> 8;
1113    block[ 96]= (t[3]*qmul + 128) >> 8;
1114    block[ 16]= (t[4]*qmul + 128) >> 8;
1115    block[ 48]= (t[5]*qmul + 128) >> 8;
1116    block[ 80]= (t[6]*qmul + 128) >> 8;
1117    block[112]= (t[7]*qmul + 128) >> 8;
1118}
1119
1120void ff_h264_chroma_dc_dequant_idct_8_mmi(int16_t *block, int qmul)
1121{
1122    int a,b,c,d;
1123
1124    d = block[0] - block[16];
1125    a = block[0] + block[16];
1126    b = block[32] - block[48];
1127    c = block[32] + block[48];
1128    block[0] = ((a+c)*qmul) >> 7;
1129    block[16]= ((d+b)*qmul) >> 7;
1130    block[32]= ((a-c)*qmul) >> 7;
1131    block[48]= ((d-b)*qmul) >> 7;
1132}
1133
1134void ff_h264_weight_pixels16_8_mmi(uint8_t *block, ptrdiff_t stride, int height,
1135        int log2_denom, int weight, int offset)
1136{
1137    int y;
1138    double ftmp[8];
1139    DECLARE_VAR_ALL64;
1140
1141    offset <<= log2_denom;
1142
1143    if (log2_denom)
1144        offset += 1 << (log2_denom - 1);
1145
1146    for (y=0; y<height; y++, block+=stride) {
1147        __asm__ volatile (
1148            "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
1149            MMI_LDC1(%[ftmp1], %[block0], 0x00)
1150            MMI_LDC1(%[ftmp2], %[block1], 0x00)
1151            "mtc1       %[weight],  %[ftmp3]                            \n\t"
1152            "mtc1       %[offset],  %[ftmp4]                            \n\t"
1153            "mtc1       %[log2_denom],              %[ftmp5]            \n\t"
1154            "pshufh     %[ftmp3],   %[ftmp3],       %[ftmp0]            \n\t"
1155            "pshufh     %[ftmp4],   %[ftmp4],       %[ftmp0]            \n\t"
1156            "punpckhbh  %[ftmp6],   %[ftmp1],       %[ftmp0]            \n\t"
1157            "punpckhbh  %[ftmp7],   %[ftmp2],       %[ftmp0]            \n\t"
1158            "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
1159            "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp0]            \n\t"
1160            "pmullh     %[ftmp6],   %[ftmp6],       %[ftmp3]            \n\t"
1161            "pmullh     %[ftmp7],   %[ftmp7],       %[ftmp3]            \n\t"
1162            "pmullh     %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
1163            "pmullh     %[ftmp2],   %[ftmp2],       %[ftmp3]            \n\t"
1164            "paddsh     %[ftmp6],   %[ftmp6],       %[ftmp4]            \n\t"
1165            "paddsh     %[ftmp7],   %[ftmp7],       %[ftmp4]            \n\t"
1166            "paddsh     %[ftmp1],   %[ftmp1],       %[ftmp4]            \n\t"
1167            "paddsh     %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t"
1168            "psrah      %[ftmp6],   %[ftmp6],       %[ftmp5]            \n\t"
1169            "psrah      %[ftmp7],   %[ftmp7],       %[ftmp5]            \n\t"
1170            "psrah      %[ftmp1],   %[ftmp1],       %[ftmp5]            \n\t"
1171            "psrah      %[ftmp2],   %[ftmp2],       %[ftmp5]            \n\t"
1172            "packushb   %[ftmp1],   %[ftmp1],       %[ftmp6]            \n\t"
1173            "packushb   %[ftmp2],   %[ftmp2],       %[ftmp7]            \n\t"
1174            MMI_SDC1(%[ftmp1], %[block0], 0x00)
1175            MMI_SDC1(%[ftmp2], %[block1], 0x00)
1176            : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
1177              [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
1178              [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
1179              [ftmp6]"=&f"(ftmp[6]),
1180              RESTRICT_ASM_ALL64
1181              [ftmp7]"=&f"(ftmp[7])
1182            : [block0]"r"(block),           [block1]"r"(block+8),
1183              [weight]"r"(weight),          [offset]"r"(offset),
1184              [log2_denom]"r"(log2_denom)
1185            : "memory"
1186        );
1187    }
1188}
1189
1190void ff_h264_biweight_pixels16_8_mmi(uint8_t *dst, uint8_t *src,
1191        ptrdiff_t stride, int height, int log2_denom, int weightd, int weights,
1192        int offset)
1193{
1194    int y;
1195    double ftmp[9];
1196    DECLARE_VAR_ALL64;
1197
1198    offset = ((offset + 1) | 1) << log2_denom;
1199
1200    for (y=0; y<height; y++, dst+=stride, src+=stride) {
1201        __asm__ volatile (
1202            "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
1203            MMI_LDC1(%[ftmp1], %[src0], 0x00)
1204            MMI_LDC1(%[ftmp2], %[dst0], 0x00)
1205            "mtc1       %[weights], %[ftmp3]                            \n\t"
1206            "mtc1       %[weightd], %[ftmp4]                            \n\t"
1207            "mtc1       %[offset],  %[ftmp5]                            \n\t"
1208            "mtc1       %[log2_denom],              %[ftmp6]            \n\t"
1209            "pshufh     %[ftmp3],   %[ftmp3],       %[ftmp0]            \n\t"
1210            "pshufh     %[ftmp4],   %[ftmp4],       %[ftmp0]            \n\t"
1211            "pshufh     %[ftmp5],   %[ftmp5],       %[ftmp0]            \n\t"
1212            "punpckhbh  %[ftmp7],   %[ftmp1],       %[ftmp0]            \n\t"
1213            "punpckhbh  %[ftmp8],   %[ftmp2],       %[ftmp0]            \n\t"
1214            "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
1215            "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp0]            \n\t"
1216            "pmullh     %[ftmp7],   %[ftmp7],       %[ftmp3]            \n\t"
1217            "pmullh     %[ftmp8],   %[ftmp8],       %[ftmp4]            \n\t"
1218            "pmullh     %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
1219            "pmullh     %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t"
1220            "paddsh     %[ftmp7],   %[ftmp7],       %[ftmp5]            \n\t"
1221            "paddsh     %[ftmp1],   %[ftmp1],       %[ftmp5]            \n\t"
1222            "paddsh     %[ftmp7],   %[ftmp7],       %[ftmp8]            \n\t"
1223            "paddsh     %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
1224            "psrah      %[ftmp7],   %[ftmp7],       %[ftmp6]            \n\t"
1225            "psrah      %[ftmp1],   %[ftmp1],       %[ftmp6]            \n\t"
1226            "packushb   %[ftmp1],   %[ftmp1],       %[ftmp7]            \n\t"
1227            MMI_SDC1(%[ftmp1], %[dst0], 0x00)
1228            MMI_LDC1(%[ftmp1], %[src1], 0x00)
1229            MMI_LDC1(%[ftmp2], %[dst1], 0x00)
1230            "punpckhbh  %[ftmp7],   %[ftmp1],       %[ftmp0]            \n\t"
1231            "punpckhbh  %[ftmp8],   %[ftmp2],       %[ftmp0]            \n\t"
1232            "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
1233            "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp0]            \n\t"
1234            "pmullh     %[ftmp7],   %[ftmp7],       %[ftmp3]            \n\t"
1235            "pmullh     %[ftmp8],   %[ftmp8],       %[ftmp4]            \n\t"
1236            "pmullh     %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
1237            "pmullh     %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t"
1238            "paddsh     %[ftmp7],   %[ftmp7],       %[ftmp5]            \n\t"
1239            "paddsh     %[ftmp1],   %[ftmp1],       %[ftmp5]            \n\t"
1240            "paddsh     %[ftmp7],   %[ftmp7],       %[ftmp8]            \n\t"
1241            "paddsh     %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
1242            "psrah      %[ftmp7],   %[ftmp7],       %[ftmp6]            \n\t"
1243            "psrah      %[ftmp1],   %[ftmp1],       %[ftmp6]            \n\t"
1244            "packushb   %[ftmp1],   %[ftmp1],       %[ftmp7]            \n\t"
1245            MMI_SDC1(%[ftmp1], %[dst1], 0x00)
1246            : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
1247              [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
1248              [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
1249              [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
1250              RESTRICT_ASM_ALL64
1251              [ftmp8]"=&f"(ftmp[8])
1252            : [dst0]"r"(dst),               [dst1]"r"(dst+8),
1253              [src0]"r"(src),               [src1]"r"(src+8),
1254              [weights]"r"(weights),        [weightd]"r"(weightd),
1255              [offset]"r"(offset),          [log2_denom]"r"(log2_denom+1)
1256            : "memory"
1257        );
1258    }
1259}
1260
1261void ff_h264_weight_pixels8_8_mmi(uint8_t *block, ptrdiff_t stride, int height,
1262        int log2_denom, int weight, int offset)
1263{
1264    int y;
1265    double ftmp[6];
1266    DECLARE_VAR_ALL64;
1267
1268    offset <<= log2_denom;
1269
1270    if (log2_denom)
1271        offset += 1 << (log2_denom - 1);
1272
1273    for (y=0; y<height; y++, block+=stride) {
1274        __asm__ volatile (
1275            "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
1276            MMI_LDC1(%[ftmp1], %[block], 0x00)
1277            "mtc1       %[weight],  %[ftmp2]                            \n\t"
1278            "mtc1       %[offset],  %[ftmp3]                            \n\t"
1279            "mtc1       %[log2_denom],              %[ftmp5]            \n\t"
1280            "pshufh     %[ftmp2],   %[ftmp2],       %[ftmp0]            \n\t"
1281            "pshufh     %[ftmp3],   %[ftmp3],       %[ftmp0]            \n\t"
1282            "punpckhbh  %[ftmp4],   %[ftmp1],       %[ftmp0]            \n\t"
1283            "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
1284            "pmullh     %[ftmp4],   %[ftmp4],       %[ftmp2]            \n\t"
1285            "pmullh     %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
1286            "paddsh     %[ftmp4],   %[ftmp4],       %[ftmp3]            \n\t"
1287            "paddsh     %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
1288            "psrah      %[ftmp4],   %[ftmp4],       %[ftmp5]            \n\t"
1289            "psrah      %[ftmp1],   %[ftmp1],       %[ftmp5]            \n\t"
1290            "packushb   %[ftmp1],   %[ftmp1],       %[ftmp4]            \n\t"
1291            MMI_SDC1(%[ftmp1], %[block], 0x00)
1292            : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
1293              [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
1294              [ftmp4]"=&f"(ftmp[4]),
1295              RESTRICT_ASM_ALL64
1296              [ftmp5]"=&f"(ftmp[5])
1297            : [block]"r"(block),            [weight]"r"(weight),
1298              [offset]"r"(offset),          [log2_denom]"r"(log2_denom)
1299            : "memory"
1300        );
1301    }
1302}
1303
1304void ff_h264_biweight_pixels8_8_mmi(uint8_t *dst, uint8_t *src,
1305        ptrdiff_t stride, int height, int log2_denom, int weightd, int weights,
1306        int offset)
1307{
1308    int y;
1309    double ftmp[9];
1310    DECLARE_VAR_ALL64;
1311
1312    offset = ((offset + 1) | 1) << log2_denom;
1313
1314    for (y=0; y<height; y++, dst+=stride, src+=stride) {
1315        __asm__ volatile (
1316            "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
1317            MMI_LDC1(%[ftmp1], %[src], 0x00)
1318            MMI_LDC1(%[ftmp2], %[dst], 0x00)
1319            "mtc1       %[weights], %[ftmp3]                            \n\t"
1320            "mtc1       %[weightd], %[ftmp4]                            \n\t"
1321            "mtc1       %[offset],  %[ftmp5]                            \n\t"
1322            "mtc1       %[log2_denom],              %[ftmp6]            \n\t"
1323            "pshufh     %[ftmp3],   %[ftmp3],       %[ftmp0]            \n\t"
1324            "pshufh     %[ftmp4],   %[ftmp4],       %[ftmp0]            \n\t"
1325            "pshufh     %[ftmp5],   %[ftmp5],       %[ftmp0]            \n\t"
1326            "punpckhbh  %[ftmp7],   %[ftmp1],       %[ftmp0]            \n\t"
1327            "punpckhbh  %[ftmp8],   %[ftmp2],       %[ftmp0]            \n\t"
1328            "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
1329            "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp0]            \n\t"
1330            "pmullh     %[ftmp7],   %[ftmp7],       %[ftmp3]            \n\t"
1331            "pmullh     %[ftmp8],   %[ftmp8],       %[ftmp4]            \n\t"
1332            "pmullh     %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
1333            "pmullh     %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t"
1334            "paddsh     %[ftmp7],   %[ftmp7],       %[ftmp5]            \n\t"
1335            "paddsh     %[ftmp1],   %[ftmp1],       %[ftmp5]            \n\t"
1336            "paddsh     %[ftmp7],   %[ftmp7],       %[ftmp8]            \n\t"
1337            "paddsh     %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
1338            "psrah      %[ftmp7],   %[ftmp7],       %[ftmp6]            \n\t"
1339            "psrah      %[ftmp1],   %[ftmp1],       %[ftmp6]            \n\t"
1340            "packushb   %[ftmp1],   %[ftmp1],       %[ftmp7]            \n\t"
1341            MMI_SDC1(%[ftmp1], %[dst], 0x00)
1342            : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
1343              [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
1344              [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
1345              [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
1346              RESTRICT_ASM_ALL64
1347              [ftmp8]"=&f"(ftmp[8])
1348            : [dst]"r"(dst),                [src]"r"(src),
1349              [weights]"r"(weights),        [weightd]"r"(weightd),
1350              [offset]"r"(offset),          [log2_denom]"r"(log2_denom+1)
1351            : "memory"
1352        );
1353    }
1354}
1355
1356void ff_h264_weight_pixels4_8_mmi(uint8_t *block, ptrdiff_t stride, int height,
1357        int log2_denom, int weight, int offset)
1358{
1359    int y;
1360    double ftmp[5];
1361    DECLARE_VAR_LOW32;
1362
1363    offset <<= log2_denom;
1364
1365    if (log2_denom)
1366        offset += 1 << (log2_denom - 1);
1367
1368    for (y=0; y<height; y++, block+=stride) {
1369        __asm__ volatile (
1370            "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
1371            MMI_ULWC1(%[ftmp1], %[block], 0x00)
1372            "mtc1       %[weight],  %[ftmp2]                            \n\t"
1373            "mtc1       %[offset],  %[ftmp3]                            \n\t"
1374            "mtc1       %[log2_denom],              %[ftmp4]            \n\t"
1375            "pshufh     %[ftmp2],   %[ftmp2],       %[ftmp0]            \n\t"
1376            "pshufh     %[ftmp3],   %[ftmp3],       %[ftmp0]            \n\t"
1377            "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
1378            "pmullh     %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
1379            "paddsh     %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
1380            "psrah      %[ftmp1],   %[ftmp1],       %[ftmp4]            \n\t"
1381            "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
1382            MMI_SWC1(%[ftmp1], %[block], 0x00)
1383            : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
1384              [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
1385              RESTRICT_ASM_LOW32
1386              [ftmp4]"=&f"(ftmp[4])
1387            : [block]"r"(block),            [weight]"r"(weight),
1388              [offset]"r"(offset),          [log2_denom]"r"(log2_denom)
1389            : "memory"
1390        );
1391    }
1392}
1393
1394void ff_h264_biweight_pixels4_8_mmi(uint8_t *dst, uint8_t *src,
1395        ptrdiff_t stride, int height, int log2_denom, int weightd, int weights,
1396        int offset)
1397{
1398    int y;
1399    double ftmp[7];
1400    DECLARE_VAR_LOW32;
1401
1402    offset = ((offset + 1) | 1) << log2_denom;
1403
1404    for (y=0; y<height; y++, dst+=stride, src+=stride) {
1405        __asm__ volatile (
1406            "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
1407            MMI_ULWC1(%[ftmp1], %[src], 0x00)
1408            MMI_ULWC1(%[ftmp2], %[dst], 0x00)
1409            "mtc1       %[weight],  %[ftmp3]                            \n\t"
1410            "mtc1       %[weightd], %[ftmp4]                            \n\t"
1411            "mtc1       %[offset],  %[ftmp5]                            \n\t"
1412            "mtc1       %[log2_denom],              %[ftmp6]            \n\t"
1413            "pshufh     %[ftmp3],   %[ftmp3],       %[ftmp0]            \n\t"
1414            "pshufh     %[ftmp4],   %[ftmp4],       %[ftmp0]            \n\t"
1415            "pshufh     %[ftmp5],   %[ftmp5],       %[ftmp0]            \n\t"
1416            "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
1417            "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp0]            \n\t"
1418            "pmullh     %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
1419            "pmullh     %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t"
1420            "paddsh     %[ftmp1],   %[ftmp1],       %[ftmp5]            \n\t"
1421            "paddsh     %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
1422            "psrah      %[ftmp1],   %[ftmp1],       %[ftmp6]            \n\t"
1423            "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
1424            MMI_SWC1(%[ftmp1], %[dst], 0x00)
1425            : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
1426              [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
1427              [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
1428              RESTRICT_ASM_LOW32
1429              [ftmp6]"=&f"(ftmp[6])
1430            : [dst]"r"(dst),                [src]"r"(src),
1431              [weight]"r"(weights),         [weightd]"r"(weightd),
1432              [offset]"r"(offset),          [log2_denom]"r"(log2_denom+1)
1433            : "memory"
1434        );
1435    }
1436}
1437
1438void ff_deblock_v8_luma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta,
1439        int8_t *tc0)
1440{
1441    double ftmp[12];
1442    mips_reg addr[2];
1443    DECLARE_VAR_LOW32;
1444    DECLARE_VAR_ALL64;
1445    DECLARE_VAR_ADDRT;
1446
1447    __asm__ volatile (
1448        PTR_ADDU   "%[addr0],   %[stride],      %[stride]               \n\t"
1449        "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
1450        PTR_ADDU   "%[addr1],   %[stride],      %[addr0]                \n\t"
1451        "addi       %[alpha],   %[alpha],       -0x01                   \n\t"
1452        PTR_SUBU   "%[addr1],   $0,             %[addr1]                \n\t"
1453        "addi       %[beta],    %[beta],        -0x01                   \n\t"
1454        PTR_ADDU   "%[addr1],   %[addr1],       %[pix]                  \n\t"
1455        MMI_LDC1(%[ftmp3], %[pix], 0x00)
1456        MMI_LDXC1(%[ftmp1], %[addr1], %[stride], 0x00)
1457        MMI_LDXC1(%[ftmp2], %[addr1], %[addr0], 0x00)
1458        MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00)
1459        "mtc1       %[alpha],   %[ftmp5]                                \n\t"
1460        "mtc1       %[beta],    %[ftmp6]                                \n\t"
1461        "pshufh     %[ftmp5],   %[ftmp5],       %[ftmp0]                \n\t"
1462        "pshufh     %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
1463        "packushb   %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
1464        "packushb   %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
1465        "psubusb    %[ftmp7],   %[ftmp3],       %[ftmp2]                \n\t"
1466        "psubusb    %[ftmp8],   %[ftmp2],       %[ftmp3]                \n\t"
1467        "por        %[ftmp8],   %[ftmp8],       %[ftmp7]                \n\t"
1468        "psubusb    %[ftmp7],   %[ftmp2],       %[ftmp1]                \n\t"
1469        "psubusb    %[ftmp8],   %[ftmp8],       %[ftmp5]                \n\t"
1470        "psubusb    %[ftmp5],   %[ftmp1],       %[ftmp2]                \n\t"
1471        "por        %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1472        "psubusb    %[ftmp7],   %[ftmp3],       %[ftmp4]                \n\t"
1473        "psubusb    %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
1474        "por        %[ftmp8],   %[ftmp8],       %[ftmp5]                \n\t"
1475        "psubusb    %[ftmp5],   %[ftmp4],       %[ftmp3]                \n\t"
1476        "por        %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1477        "psubusb    %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
1478        "por        %[ftmp8],   %[ftmp8],       %[ftmp5]                \n\t"
1479        "pcmpeqb    %[ftmp8],   %[ftmp8],       %[ftmp0]                \n\t"
1480        "pcmpeqb    %[ftmp4],   %[ftmp4],       %[ftmp4]                \n\t"
1481        MMI_ULWC1(%[ftmp5], %[tc0], 0x00)
1482        "punpcklbh  %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
1483        "punpcklbh  %[ftmp9],   %[ftmp5],       %[ftmp5]                \n\t"
1484        "pcmpgtb    %[ftmp5],   %[ftmp9],       %[ftmp4]                \n\t"
1485        MMI_LDC1(%[ftmp4], %[addr1], 0x00)
1486        "pand       %[ftmp10],  %[ftmp5],       %[ftmp8]                \n\t"
1487        "psubusb    %[ftmp8],   %[ftmp4],       %[ftmp2]                \n\t"
1488        "psubusb    %[ftmp7],   %[ftmp2],       %[ftmp4]                \n\t"
1489        "psubusb    %[ftmp8],   %[ftmp8],       %[ftmp6]                \n\t"
1490        "psubusb    %[ftmp7],   %[ftmp7],       %[ftmp6]                \n\t"
1491        "pcmpeqb    %[ftmp7],   %[ftmp7],       %[ftmp8]                \n\t"
1492        "pand       %[ftmp7],   %[ftmp7],       %[ftmp10]               \n\t"
1493        "pand       %[ftmp5],   %[ftmp10],      %[ftmp9]                \n\t"
1494        "psubb      %[ftmp8],   %[ftmp5],       %[ftmp7]                \n\t"
1495        "pand       %[ftmp7],   %[ftmp7],       %[ftmp5]                \n\t"
1496        "pavgb      %[ftmp5],   %[ftmp2],       %[ftmp3]                \n\t"
1497        MMI_LDC1(%[ftmp11], %[addr1], 0x00)
1498        "pavgb      %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
1499        "pxor       %[ftmp5],   %[ftmp5],       %[ftmp11]               \n\t"
1500        "pand       %[ftmp5],   %[ftmp5],       %[ff_pb_1]              \n\t"
1501        "psubusb    %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
1502        "psubusb    %[ftmp5],   %[ftmp1],       %[ftmp7]                \n\t"
1503        "paddusb    %[ftmp7],   %[ftmp7],       %[ftmp1]                \n\t"
1504        "pmaxub     %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
1505        "pminub     %[ftmp4],   %[ftmp4],       %[ftmp7]                \n\t"
1506        MMI_SDXC1(%[ftmp4], %[addr1], %[stride], 0x00)
1507        MMI_LDXC1(%[ftmp5], %[pix], %[addr0], 0x00)
1508        "psubusb    %[ftmp4],   %[ftmp5],       %[ftmp3]                \n\t"
1509        "psubusb    %[ftmp7],   %[ftmp3],       %[ftmp5]                \n\t"
1510        "psubusb    %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
1511        "psubusb    %[ftmp7],   %[ftmp7],       %[ftmp6]                \n\t"
1512        "pcmpeqb    %[ftmp7],   %[ftmp7],       %[ftmp4]                \n\t"
1513        "pand       %[ftmp7],   %[ftmp7],       %[ftmp10]               \n\t"
1514        "psubb      %[ftmp8],   %[ftmp8],       %[ftmp7]                \n\t"
1515        "pand       %[ftmp6],   %[ftmp9],       %[ftmp7]                \n\t"
1516        MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00)
1517        "pavgb      %[ftmp7],   %[ftmp2],       %[ftmp3]                \n\t"
1518        MMI_LDXC1(%[ftmp11], %[pix], %[addr0], 0x00)
1519        "pavgb      %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1520        "pxor       %[ftmp7],   %[ftmp7],       %[ftmp11]               \n\t"
1521        "pand       %[ftmp7],   %[ftmp7],       %[ff_pb_1]              \n\t"
1522        "psubusb    %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1523        "psubusb    %[ftmp7],   %[ftmp4],       %[ftmp6]                \n\t"
1524        "paddusb    %[ftmp6],   %[ftmp6],       %[ftmp4]                \n\t"
1525        "pmaxub     %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1526        "pminub     %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
1527        MMI_SDXC1(%[ftmp5], %[pix], %[stride], 0x00)
1528        "pxor       %[ftmp6],   %[ftmp2],       %[ftmp3]                \n\t"
1529        "pcmpeqb    %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
1530        "pand       %[ftmp6],   %[ftmp6],       %[ff_pb_1]              \n\t"
1531        "pxor       %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
1532        "pxor       %[ftmp5],   %[ftmp5],       %[ftmp2]                \n\t"
1533        "pavgb      %[ftmp4],   %[ftmp4],       %[ftmp1]                \n\t"
1534        "pavgb      %[ftmp4],   %[ftmp4],       %[ff_pb_3]              \n\t"
1535        "pavgb      %[ftmp5],   %[ftmp5],       %[ftmp3]                \n\t"
1536        "pavgb      %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
1537        "paddusb    %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
1538        "psubusb    %[ftmp7],   %[ff_pb_A1],    %[ftmp4]                \n\t"
1539        "psubusb    %[ftmp4],   %[ftmp4],       %[ff_pb_A1]             \n\t"
1540        "pminub     %[ftmp7],   %[ftmp7],       %[ftmp8]                \n\t"
1541        "pminub     %[ftmp4],   %[ftmp4],       %[ftmp8]                \n\t"
1542        "psubusb    %[ftmp2],   %[ftmp2],       %[ftmp7]                \n\t"
1543        "psubusb    %[ftmp3],   %[ftmp3],       %[ftmp4]                \n\t"
1544        "paddusb    %[ftmp2],   %[ftmp2],       %[ftmp4]                \n\t"
1545        "paddusb    %[ftmp3],   %[ftmp3],       %[ftmp7]                \n\t"
1546        MMI_SDXC1(%[ftmp2], %[addr1], %[addr0], 0x00)
1547        MMI_SDC1(%[ftmp3], %[pix], 0x00)
1548        : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
1549          [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
1550          [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
1551          [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
1552          [ftmp8]"=&f"(ftmp[8]),            [ftmp9]"=&f"(ftmp[9]),
1553          [ftmp10]"=&f"(ftmp[10]),          [ftmp11]"=&f"(ftmp[11]),
1554          RESTRICT_ASM_LOW32
1555          RESTRICT_ASM_ALL64
1556          RESTRICT_ASM_ADDRT
1557          [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1])
1558        : [pix]"r"(pix),                    [stride]"r"((mips_reg)stride),
1559          [alpha]"r"((mips_reg)alpha),      [beta]"r"((mips_reg)beta),
1560          [tc0]"r"(tc0),                    [ff_pb_1]"f"(ff_pb_1.f),
1561          [ff_pb_3]"f"(ff_pb_3.f),          [ff_pb_A1]"f"(ff_pb_A1.f)
1562        : "memory"
1563    );
1564}
1565
1566static void deblock_v8_luma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha,
1567        int beta)
1568{
1569    DECLARE_ALIGNED(8, const uint64_t, stack[0x0a]);
1570    double ftmp[16];
1571    uint64_t tmp[1];
1572    mips_reg addr[3];
1573    DECLARE_VAR_ALL64;
1574    DECLARE_VAR_ADDRT;
1575
1576    __asm__ volatile (
1577        "ori        %[tmp0],    $0,             0x01                    \n\t"
1578        "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
1579        "mtc1       %[tmp0],    %[ftmp9]                                \n\t"
1580        PTR_SLL    "%[addr0],   %[stride],      0x02                    \n\t"
1581        PTR_ADDU   "%[addr2],   %[stride],      %[stride]               \n\t"
1582        PTR_ADDIU  "%[alpha],   %[alpha],       -0x01                   \n\t"
1583        "sslld      %[ftmp11],  %[ftmp9],       %[ftmp9]                \n\t"
1584        "bltz       %[alpha],   1f                                      \n\t"
1585        PTR_ADDU   "%[addr1],   %[addr2],       %[stride]               \n\t"
1586        PTR_ADDIU  "%[beta],    %[beta],        -0x01                   \n\t"
1587        "bltz       %[beta],    1f                                      \n\t"
1588        PTR_SUBU   "%[addr0],   $0,             %[addr0]                \n\t"
1589        PTR_ADDU   "%[addr0],   %[addr0],       %[pix]                  \n\t"
1590        MMI_LDC1(%[ftmp3], %[pix], 0x00)
1591        MMI_LDXC1(%[ftmp1], %[addr0], %[addr2], 0x00)
1592        MMI_LDXC1(%[ftmp2], %[addr0], %[addr1], 0x00)
1593        MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00)
1594        "mtc1       %[alpha],   %[ftmp5]                                \n\t"
1595        "mtc1       %[beta],    %[ftmp6]                                \n\t"
1596        "pshufh     %[ftmp5],   %[ftmp5],       %[ftmp0]                \n\t"
1597        "pshufh     %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
1598        "packushb   %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
1599        "psubusb    %[ftmp7],   %[ftmp3],       %[ftmp2]                \n\t"
1600        "psubusb    %[ftmp8],   %[ftmp2],       %[ftmp3]                \n\t"
1601        "packushb   %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
1602        "por        %[ftmp8],   %[ftmp8],       %[ftmp7]                \n\t"
1603        MMI_SDC1(%[ftmp5], %[stack], 0x10)
1604        "psubusb    %[ftmp8],   %[ftmp8],       %[ftmp5]                \n\t"
1605        "psubusb    %[ftmp7],   %[ftmp2],       %[ftmp1]                \n\t"
1606        "psubusb    %[ftmp5],   %[ftmp1],       %[ftmp2]                \n\t"
1607        "por        %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1608        "psubusb    %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
1609        "por        %[ftmp8],   %[ftmp8],       %[ftmp5]                \n\t"
1610        "psubusb    %[ftmp7],   %[ftmp3],       %[ftmp4]                \n\t"
1611        "psubusb    %[ftmp5],   %[ftmp4],       %[ftmp3]                \n\t"
1612        "por        %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1613        "psubusb    %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
1614        "por        %[ftmp8],   %[ftmp8],       %[ftmp5]                \n\t"
1615        "pxor       %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
1616        MMI_LDC1(%[ftmp5], %[stack], 0x10)
1617        "pcmpeqb    %[ftmp8],   %[ftmp8],       %[ftmp7]                \n\t"
1618        "ldc1       %[ftmp10],  %[ff_pb_1]                              \n\t"
1619        MMI_SDC1(%[ftmp8], %[stack], 0x20)
1620        "pavgb      %[ftmp5],   %[ftmp5],       %[ftmp0]                \n\t"
1621        "psubusb    %[ftmp8],   %[ftmp3],       %[ftmp2]                \n\t"
1622        "pavgb      %[ftmp5],   %[ftmp5],       %[ftmp10]               \n\t"
1623        "psubusb    %[ftmp7],   %[ftmp2],       %[ftmp3]                \n\t"
1624        "psubusb    %[ftmp8],   %[ftmp8],       %[ftmp5]                \n\t"
1625        "psubusb    %[ftmp7],   %[ftmp7],       %[ftmp5]                \n\t"
1626        MMI_LDC1(%[ftmp15], %[stack], 0x20)
1627        "pcmpeqb    %[ftmp7],   %[ftmp7],       %[ftmp8]                \n\t"
1628        "pand       %[ftmp7],   %[ftmp7],       %[ftmp15]               \n\t"
1629        MMI_LDXC1(%[ftmp15], %[addr0], %[stride], 0x00)
1630        "psubusb    %[ftmp8],   %[ftmp15],      %[ftmp2]                \n\t"
1631        "psubusb    %[ftmp5],   %[ftmp2],       %[ftmp15]               \n\t"
1632        "psubusb    %[ftmp8],   %[ftmp8],       %[ftmp6]                \n\t"
1633        "psubusb    %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
1634        "pcmpeqb    %[ftmp5],   %[ftmp5],       %[ftmp8]                \n\t"
1635        "pand       %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1636        MMI_LDXC1(%[ftmp14], %[pix], %[addr2], 0x00)
1637        MMI_SDC1(%[ftmp5], %[stack], 0x30)
1638        "psubusb    %[ftmp8],   %[ftmp14],      %[ftmp3]                \n\t"
1639        "psubusb    %[ftmp5],   %[ftmp3],       %[ftmp14]               \n\t"
1640        "psubusb    %[ftmp8],   %[ftmp8],       %[ftmp6]                \n\t"
1641        "psubusb    %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
1642        "pcmpeqb    %[ftmp5],   %[ftmp5],       %[ftmp8]                \n\t"
1643        "pand       %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1644        MMI_SDC1(%[ftmp5], %[stack], 0x40)
1645        "pavgb      %[ftmp5],   %[ftmp15],      %[ftmp1]                \n\t"
1646        "pavgb      %[ftmp6],   %[ftmp2],       %[ftmp3]                \n\t"
1647        "pavgb      %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
1648        MMI_SDC1(%[ftmp6], %[stack], 0x10)
1649        "paddb      %[ftmp7],   %[ftmp15],      %[ftmp1]                \n\t"
1650        "paddb      %[ftmp8],   %[ftmp2],       %[ftmp3]                \n\t"
1651        "paddb      %[ftmp7],   %[ftmp7],       %[ftmp8]                \n\t"
1652        "mov.d      %[ftmp8],   %[ftmp7]                                \n\t"
1653        MMI_SDC1(%[ftmp7], %[stack], 0x00)
1654        "psrlh      %[ftmp7],   %[ftmp7],       %[ftmp9]                \n\t"
1655        "pavgb      %[ftmp7],   %[ftmp7],       %[ftmp0]                \n\t"
1656        "pxor       %[ftmp7],   %[ftmp7],       %[ftmp5]                \n\t"
1657        "pand       %[ftmp7],   %[ftmp7],       %[ftmp10]               \n\t"
1658        "psubb      %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1659        "pavgb      %[ftmp6],   %[ftmp15],      %[ftmp4]                \n\t"
1660        "psubb      %[ftmp7],   %[ftmp15],      %[ftmp4]                \n\t"
1661        "paddb      %[ftmp8],   %[ftmp8],       %[ftmp8]                \n\t"
1662        "psubb      %[ftmp8],   %[ftmp8],       %[ftmp7]                \n\t"
1663        "pand       %[ftmp7],   %[ftmp7],       %[ftmp10]               \n\t"
1664        "psubb      %[ftmp6],   %[ftmp6],       %[ftmp7]                \n\t"
1665        MMI_LDC1(%[ftmp13], %[stack], 0x10)
1666        "pavgb      %[ftmp6],   %[ftmp6],       %[ftmp1]                \n\t"
1667        "psrlh      %[ftmp8],   %[ftmp8],       %[ftmp11]               \n\t"
1668        "pavgb      %[ftmp6],   %[ftmp6],       %[ftmp13]               \n\t"
1669        "pavgb      %[ftmp8],   %[ftmp8],       %[ftmp0]                \n\t"
1670        "pxor       %[ftmp8],   %[ftmp8],       %[ftmp6]                \n\t"
1671        "pand       %[ftmp8],   %[ftmp8],       %[ftmp10]               \n\t"
1672        "psubb      %[ftmp6],   %[ftmp6],       %[ftmp8]                \n\t"
1673        "pxor       %[ftmp8],   %[ftmp2],       %[ftmp4]                \n\t"
1674        "pavgb      %[ftmp7],   %[ftmp2],       %[ftmp4]                \n\t"
1675        "pand       %[ftmp8],   %[ftmp8],       %[ftmp10]               \n\t"
1676        "psubb      %[ftmp7],   %[ftmp7],       %[ftmp8]                \n\t"
1677        MMI_LDC1(%[ftmp13], %[stack], 0x30)
1678        "pavgb      %[ftmp7],   %[ftmp7],       %[ftmp1]                \n\t"
1679        MMI_LDC1(%[ftmp12], %[stack], 0x20)
1680        "pxor       %[ftmp6],   %[ftmp6],       %[ftmp7]                \n\t"
1681        "pxor       %[ftmp7],   %[ftmp7],       %[ftmp2]                \n\t"
1682        "pand       %[ftmp6],   %[ftmp6],       %[ftmp13]               \n\t"
1683        "pand       %[ftmp7],   %[ftmp7],       %[ftmp12]               \n\t"
1684        "pxor       %[ftmp6],   %[ftmp6],       %[ftmp7]                \n\t"
1685        "pxor       %[ftmp6],   %[ftmp6],       %[ftmp2]                \n\t"
1686        MMI_SDXC1(%[ftmp6], %[addr0], %[addr1], 0x00)
1687        MMI_LDC1(%[ftmp6], %[addr0], 0x00)
1688        "paddb      %[ftmp7],   %[ftmp15],      %[ftmp6]                \n\t"
1689        "pavgb      %[ftmp6],   %[ftmp6],       %[ftmp15]               \n\t"
1690        MMI_LDC1(%[ftmp12], %[stack], 0x00)
1691        "pavgb      %[ftmp6],   %[ftmp6],       %[ftmp5]                \n\t"
1692        "paddb      %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
1693        "paddb      %[ftmp7],   %[ftmp7],       %[ftmp12]               \n\t"
1694        "psrlh      %[ftmp7],   %[ftmp7],       %[ftmp11]               \n\t"
1695        "pavgb      %[ftmp7],   %[ftmp7],       %[ftmp0]                \n\t"
1696        "pxor       %[ftmp7],   %[ftmp7],       %[ftmp6]                \n\t"
1697        "pand       %[ftmp7],   %[ftmp7],       %[ftmp10]               \n\t"
1698        MMI_LDC1(%[ftmp12], %[stack], 0x30)
1699        "psubb      %[ftmp6],   %[ftmp6],       %[ftmp7]                \n\t"
1700        "pxor       %[ftmp5],   %[ftmp5],       %[ftmp1]                \n\t"
1701        "pxor       %[ftmp6],   %[ftmp6],       %[ftmp15]               \n\t"
1702        "pand       %[ftmp5],   %[ftmp5],       %[ftmp12]               \n\t"
1703        "pand       %[ftmp6],   %[ftmp6],       %[ftmp12]               \n\t"
1704        "pxor       %[ftmp5],   %[ftmp5],       %[ftmp1]                \n\t"
1705        "pxor       %[ftmp6],   %[ftmp6],       %[ftmp15]               \n\t"
1706        MMI_SDXC1(%[ftmp5], %[addr0], %[addr2], 0x00)
1707        MMI_SDXC1(%[ftmp6], %[addr0], %[stride], 0x00)
1708        "pavgb      %[ftmp5],   %[ftmp14],      %[ftmp4]                \n\t"
1709        "pavgb      %[ftmp6],   %[ftmp3],       %[ftmp2]                \n\t"
1710        "pavgb      %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
1711        MMI_SDC1(%[ftmp6], %[stack], 0x10)
1712        "paddb      %[ftmp7],   %[ftmp14],      %[ftmp4]                \n\t"
1713        "paddb      %[ftmp8],   %[ftmp3],       %[ftmp2]                \n\t"
1714        "paddb      %[ftmp7],   %[ftmp7],       %[ftmp8]                \n\t"
1715        "mov.d      %[ftmp8],   %[ftmp7]                                \n\t"
1716        MMI_SDC1(%[ftmp7], %[stack], 0x00)
1717        "psrlh      %[ftmp7],   %[ftmp7],       %[ftmp9]                \n\t"
1718        "pavgb      %[ftmp7],   %[ftmp7],       %[ftmp0]                \n\t"
1719        "pxor       %[ftmp7],   %[ftmp7],       %[ftmp5]                \n\t"
1720        "pand       %[ftmp7],   %[ftmp7],       %[ftmp10]               \n\t"
1721        "psubb      %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1722        "pavgb      %[ftmp6],   %[ftmp14],      %[ftmp1]                \n\t"
1723        "paddb      %[ftmp8],   %[ftmp8],       %[ftmp8]                \n\t"
1724        "psubb      %[ftmp7],   %[ftmp14],      %[ftmp1]                \n\t"
1725        "psubb      %[ftmp8],   %[ftmp8],       %[ftmp7]                \n\t"
1726        "pand       %[ftmp7],   %[ftmp7],       %[ftmp10]               \n\t"
1727        "psubb      %[ftmp6],   %[ftmp6],       %[ftmp7]                \n\t"
1728        MMI_LDC1(%[ftmp12], %[stack], 0x10)
1729        "pavgb      %[ftmp6],   %[ftmp6],       %[ftmp4]                \n\t"
1730        "pavgb      %[ftmp6],   %[ftmp6],       %[ftmp12]               \n\t"
1731        "psrlh      %[ftmp8],   %[ftmp8],       %[ftmp11]               \n\t"
1732        "pavgb      %[ftmp8],   %[ftmp8],       %[ftmp0]                \n\t"
1733        "pxor       %[ftmp8],   %[ftmp8],       %[ftmp6]                \n\t"
1734        "pand       %[ftmp8],   %[ftmp8],       %[ftmp10]               \n\t"
1735        "psubb      %[ftmp6],   %[ftmp6],       %[ftmp8]                \n\t"
1736        "pxor       %[ftmp8],   %[ftmp3],       %[ftmp1]                \n\t"
1737        "pavgb      %[ftmp7],   %[ftmp3],       %[ftmp1]                \n\t"
1738        "pand       %[ftmp8],   %[ftmp8],       %[ftmp10]               \n\t"
1739        MMI_LDC1(%[ftmp12], %[stack], 0x40)
1740        "psubb      %[ftmp7],   %[ftmp7],       %[ftmp8]                \n\t"
1741        MMI_LDC1(%[ftmp13], %[stack], 0x20)
1742        "pavgb      %[ftmp7],   %[ftmp7],       %[ftmp4]                \n\t"
1743        "pxor       %[ftmp6],   %[ftmp6],       %[ftmp7]                \n\t"
1744        "pxor       %[ftmp7],   %[ftmp7],       %[ftmp3]                \n\t"
1745        "pand       %[ftmp6],   %[ftmp6],       %[ftmp12]               \n\t"
1746        "pand       %[ftmp7],   %[ftmp7],       %[ftmp13]               \n\t"
1747        "pxor       %[ftmp6],   %[ftmp6],       %[ftmp7]                \n\t"
1748        "pxor       %[ftmp6],   %[ftmp6],       %[ftmp3]                \n\t"
1749        MMI_SDC1(%[ftmp6], %[pix], 0x00)
1750        MMI_LDXC1(%[ftmp6], %[pix], %[addr1], 0x00)
1751        "paddb      %[ftmp7],   %[ftmp14],      %[ftmp6]                \n\t"
1752        "pavgb      %[ftmp6],   %[ftmp6],       %[ftmp14]               \n\t"
1753        MMI_LDC1(%[ftmp12], %[stack], 0x00)
1754        "pavgb      %[ftmp6],   %[ftmp6],       %[ftmp5]                \n\t"
1755        "paddb      %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
1756        "paddb      %[ftmp7],   %[ftmp7],       %[ftmp12]               \n\t"
1757        "psrlh      %[ftmp7],   %[ftmp7],       %[ftmp11]               \n\t"
1758        "pavgb      %[ftmp7],   %[ftmp7],       %[ftmp0]                \n\t"
1759        "pxor       %[ftmp7],   %[ftmp7],       %[ftmp6]                \n\t"
1760        "pand       %[ftmp7],   %[ftmp7],       %[ftmp10]               \n\t"
1761        MMI_LDC1(%[ftmp12], %[stack], 0x40)
1762        "psubb      %[ftmp6],   %[ftmp6],       %[ftmp7]                \n\t"
1763        "pxor       %[ftmp5],   %[ftmp5],       %[ftmp4]                \n\t"
1764        "pxor       %[ftmp6],   %[ftmp6],       %[ftmp14]               \n\t"
1765        "pand       %[ftmp5],   %[ftmp5],       %[ftmp12]               \n\t"
1766        "pand       %[ftmp6],   %[ftmp6],       %[ftmp12]               \n\t"
1767        "pxor       %[ftmp5],   %[ftmp5],       %[ftmp4]                \n\t"
1768        "pxor       %[ftmp6],   %[ftmp6],       %[ftmp14]               \n\t"
1769        MMI_SDXC1(%[ftmp5], %[pix], %[stride], 0x00)
1770        MMI_SDXC1(%[ftmp6], %[pix], %[addr2], 0x00)
1771        "1:                                                             \n\t"
1772        : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
1773          [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
1774          [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
1775          [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
1776          [ftmp8]"=&f"(ftmp[8]),            [ftmp9]"=&f"(ftmp[9]),
1777          [ftmp10]"=&f"(ftmp[10]),          [ftmp11]"=&f"(ftmp[11]),
1778          [ftmp12]"=&f"(ftmp[12]),          [ftmp13]"=&f"(ftmp[13]),
1779          [ftmp14]"=&f"(ftmp[14]),          [ftmp15]"=&f"(ftmp[15]),
1780          [tmp0]"=&r"(tmp[0]),
1781          RESTRICT_ASM_ALL64
1782          RESTRICT_ASM_ADDRT
1783          [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1]),
1784          [addr2]"=&r"(addr[2]),
1785          [alpha]"+&r"(alpha),              [beta]"+&r"(beta)
1786        : [pix]"r"(pix),                    [stride]"r"((mips_reg)stride),
1787          [stack]"r"(stack),                [ff_pb_1]"m"(ff_pb_1)
1788        : "memory"
1789    );
1790}
1791
1792void ff_deblock_v_chroma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha,
1793        int beta, int8_t *tc0)
1794{
1795    double ftmp[9];
1796    mips_reg addr[1];
1797    DECLARE_VAR_LOW32;
1798    DECLARE_VAR_ALL64;
1799    DECLARE_VAR_ADDRT;
1800
1801    __asm__ volatile (
1802        "addi       %[alpha],   %[alpha],       -0x01                   \n\t"
1803        "addi       %[beta],    %[beta],        -0x01                   \n\t"
1804        "or         %[addr0],   $0,             %[pix]                  \n\t"
1805        PTR_SUBU   "%[addr0],   %[addr0],       %[stride]               \n\t"
1806        PTR_SUBU   "%[addr0],   %[addr0],       %[stride]               \n\t"
1807        MMI_LDC1(%[ftmp1], %[addr0], 0x00)
1808        MMI_LDXC1(%[ftmp2], %[addr0], %[stride], 0x00)
1809        MMI_LDC1(%[ftmp3], %[pix], 0x00)
1810        MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00)
1811
1812        "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
1813        "mtc1       %[alpha],   %[ftmp5]                                \n\t"
1814        "mtc1       %[beta],    %[ftmp6]                                \n\t"
1815        "pshufh     %[ftmp5],   %[ftmp5],       %[ftmp0]                \n\t"
1816        "pshufh     %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
1817        "packushb   %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
1818        "packushb   %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
1819        "psubusb    %[ftmp7],   %[ftmp3],       %[ftmp2]                \n\t"
1820        "psubusb    %[ftmp8],   %[ftmp2],       %[ftmp3]                \n\t"
1821        "por        %[ftmp8],   %[ftmp8],       %[ftmp7]                \n\t"
1822        "psubusb    %[ftmp8],   %[ftmp8],       %[ftmp5]                \n\t"
1823        "psubusb    %[ftmp7],   %[ftmp2],       %[ftmp1]                \n\t"
1824        "psubusb    %[ftmp5],   %[ftmp1],       %[ftmp2]                \n\t"
1825        "por        %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1826        "psubusb    %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
1827        "por        %[ftmp8],   %[ftmp8],       %[ftmp5]                \n\t"
1828        "psubusb    %[ftmp7],   %[ftmp3],       %[ftmp4]                \n\t"
1829        "psubusb    %[ftmp5],   %[ftmp4],       %[ftmp3]                \n\t"
1830        "por        %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1831        "psubusb    %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
1832        "por        %[ftmp8],   %[ftmp8],       %[ftmp5]                \n\t"
1833        "pxor       %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
1834        "pcmpeqb    %[ftmp8],   %[ftmp8],       %[ftmp7]                \n\t"
1835        MMI_ULWC1(%[ftmp7], %[tc0], 0x00)
1836        "punpcklbh  %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
1837        "pand       %[ftmp8],   %[ftmp8],       %[ftmp7]                \n\t"
1838        "pcmpeqb    %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
1839        "pxor       %[ftmp6],   %[ftmp2],       %[ftmp3]                \n\t"
1840        "pxor       %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
1841        "pand       %[ftmp6],   %[ftmp6],       %[ff_pb_1]              \n\t"
1842        "pavgb      %[ftmp4],   %[ftmp4],       %[ftmp1]                \n\t"
1843        "pxor       %[ftmp5],   %[ftmp5],       %[ftmp2]                \n\t"
1844        "pavgb      %[ftmp4],   %[ftmp4],       %[ff_pb_3]              \n\t"
1845        "pavgb      %[ftmp5],   %[ftmp5],       %[ftmp3]                \n\t"
1846        "pavgb      %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
1847        "paddusb    %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
1848        "psubusb    %[ftmp7],   %[ff_pb_A1],    %[ftmp4]                \n\t"
1849        "psubusb    %[ftmp4],   %[ftmp4],       %[ff_pb_A1]             \n\t"
1850        "pminub     %[ftmp7],   %[ftmp7],       %[ftmp8]                \n\t"
1851        "pminub     %[ftmp4],   %[ftmp4],       %[ftmp8]                \n\t"
1852        "psubusb    %[ftmp2],   %[ftmp2],       %[ftmp7]                \n\t"
1853        "psubusb    %[ftmp3],   %[ftmp3],       %[ftmp4]                \n\t"
1854        "paddusb    %[ftmp2],   %[ftmp2],       %[ftmp4]                \n\t"
1855        "paddusb    %[ftmp3],   %[ftmp3],       %[ftmp7]                \n\t"
1856
1857        MMI_SDXC1(%[ftmp2], %[addr0], %[stride], 0x00)
1858        MMI_SDC1(%[ftmp3], %[pix], 0x00)
1859        : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
1860          [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
1861          [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
1862          [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
1863          [ftmp8]"=&f"(ftmp[8]),
1864          RESTRICT_ASM_LOW32
1865          RESTRICT_ASM_ALL64
1866          RESTRICT_ASM_ADDRT
1867          [addr0]"=&r"(addr[0])
1868        : [pix]"r"(pix),                    [stride]"r"((mips_reg)stride),
1869          [alpha]"r"(alpha),                [beta]"r"(beta),
1870          [tc0]"r"(tc0),                    [ff_pb_1]"f"(ff_pb_1.f),
1871          [ff_pb_3]"f"(ff_pb_3.f),          [ff_pb_A1]"f"(ff_pb_A1.f)
1872        : "memory"
1873    );
1874}
1875
1876void ff_deblock_v_chroma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha,
1877        int beta)
1878{
1879    double ftmp[9];
1880    mips_reg addr[1];
1881    DECLARE_VAR_ALL64;
1882    DECLARE_VAR_ADDRT;
1883
1884    __asm__ volatile (
1885        "addi       %[alpha],   %[alpha],       -0x01                   \n\t"
1886        "addi       %[beta],    %[beta],        -0x01                   \n\t"
1887        "or         %[addr0],   $0,             %[pix]                  \n\t"
1888        PTR_SUBU   "%[addr0],   %[addr0],       %[stride]               \n\t"
1889        PTR_SUBU   "%[addr0],   %[addr0],       %[stride]               \n\t"
1890        MMI_LDC1(%[ftmp1], %[addr0], 0x00)
1891        MMI_LDXC1(%[ftmp2], %[addr0], %[stride], 0x00)
1892        MMI_LDC1(%[ftmp3], %[pix], 0x00)
1893        MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00)
1894
1895        "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
1896        "mtc1       %[alpha],   %[ftmp5]                                \n\t"
1897        "mtc1       %[beta],    %[ftmp6]                                \n\t"
1898        "pshufh     %[ftmp5],   %[ftmp5],       %[ftmp0]                \n\t"
1899        "pshufh     %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
1900        "packushb   %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
1901        "packushb   %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
1902        "psubusb    %[ftmp7],   %[ftmp3],       %[ftmp2]                \n\t"
1903        "psubusb    %[ftmp8],   %[ftmp2],       %[ftmp3]                \n\t"
1904        "por        %[ftmp8],   %[ftmp8],       %[ftmp7]                \n\t"
1905        "psubusb    %[ftmp8],   %[ftmp8],       %[ftmp5]                \n\t"
1906        "psubusb    %[ftmp7],   %[ftmp2],       %[ftmp1]                \n\t"
1907        "psubusb    %[ftmp5],   %[ftmp1],       %[ftmp2]                \n\t"
1908        "por        %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1909        "psubusb    %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
1910        "por        %[ftmp8],   %[ftmp8],       %[ftmp5]                \n\t"
1911        "psubusb    %[ftmp7],   %[ftmp3],       %[ftmp4]                \n\t"
1912        "psubusb    %[ftmp5],   %[ftmp4],       %[ftmp3]                \n\t"
1913        "por        %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1914        "psubusb    %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
1915        "por        %[ftmp8],   %[ftmp8],       %[ftmp5]                \n\t"
1916        "pxor       %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
1917        "pcmpeqb    %[ftmp8],   %[ftmp8],       %[ftmp7]                \n\t"
1918        "mov.d      %[ftmp6],   %[ftmp2]                                \n\t"
1919        "mov.d      %[ftmp7],   %[ftmp3]                                \n\t"
1920        "pxor       %[ftmp5],   %[ftmp2],       %[ftmp4]                \n\t"
1921        "pand       %[ftmp5],   %[ftmp5],       %[ff_pb_1]              \n\t"
1922        "pavgb      %[ftmp2],   %[ftmp2],       %[ftmp4]                \n\t"
1923        "psubusb    %[ftmp2],   %[ftmp2],       %[ftmp5]                \n\t"
1924        "pavgb      %[ftmp2],   %[ftmp2],       %[ftmp1]                \n\t"
1925        "pxor       %[ftmp5],   %[ftmp3],       %[ftmp1]                \n\t"
1926        "pand       %[ftmp5],   %[ftmp5],       %[ff_pb_1]              \n\t"
1927        "pavgb      %[ftmp3],   %[ftmp3],       %[ftmp1]                \n\t"
1928        "psubusb    %[ftmp3],   %[ftmp3],       %[ftmp5]                \n\t"
1929        "pavgb      %[ftmp3],   %[ftmp3],       %[ftmp4]                \n\t"
1930        "psubb      %[ftmp2],   %[ftmp2],       %[ftmp6]                \n\t"
1931        "psubb      %[ftmp3],   %[ftmp3],       %[ftmp7]                \n\t"
1932        "pand       %[ftmp2],   %[ftmp2],       %[ftmp8]                \n\t"
1933        "pand       %[ftmp3],   %[ftmp3],       %[ftmp8]                \n\t"
1934        "paddb      %[ftmp2],   %[ftmp2],       %[ftmp6]                \n\t"
1935        "paddb      %[ftmp3],   %[ftmp3],       %[ftmp7]                \n\t"
1936
1937        MMI_SDXC1(%[ftmp2], %[addr0], %[stride], 0x00)
1938        MMI_SDC1(%[ftmp3], %[pix], 0x00)
1939        : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
1940          [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
1941          [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
1942          [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
1943          [ftmp8]"=&f"(ftmp[8]),
1944          RESTRICT_ASM_ALL64
1945          RESTRICT_ASM_ADDRT
1946          [addr0]"=&r"(addr[0])
1947        : [pix]"r"(pix),                    [stride]"r"((mips_reg)stride),
1948          [alpha]"r"(alpha),                [beta]"r"(beta),
1949          [ff_pb_1]"f"(ff_pb_1.f)
1950        : "memory"
1951    );
1952}
1953
1954void ff_deblock_h_chroma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta,
1955        int8_t *tc0)
1956{
1957    double ftmp[11];
1958    mips_reg addr[6];
1959    DECLARE_VAR_LOW32;
1960
1961    __asm__ volatile (
1962        "addi       %[alpha],   %[alpha],       -0x01                   \n\t"
1963        "addi       %[beta],    %[beta],        -0x01                   \n\t"
1964        PTR_ADDU   "%[addr0],   %[stride],      %[stride]               \n\t"
1965        PTR_ADDI   "%[pix],     %[pix],         -0x02                   \n\t"
1966        PTR_ADDU   "%[addr1],   %[addr0],       %[stride]               \n\t"
1967        PTR_ADDU   "%[addr2],   %[addr0],       %[addr0]                \n\t"
1968        "or         %[addr5],   $0,             %[pix]                  \n\t"
1969        PTR_ADDU   "%[pix],     %[pix],         %[addr1]                \n\t"
1970        MMI_ULWC1(%[ftmp0], %[addr5], 0x00)
1971        PTR_ADDU   "%[addr3],   %[addr5],       %[stride]               \n\t"
1972        MMI_ULWC1(%[ftmp2], %[addr3], 0x00)
1973        PTR_ADDU   "%[addr4],   %[addr5],       %[addr0]                \n\t"
1974        MMI_ULWC1(%[ftmp1], %[addr4], 0x00)
1975        MMI_ULWC1(%[ftmp3], %[pix], 0x00)
1976        "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
1977        "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
1978        PTR_ADDU   "%[addr3],   %[pix],         %[stride]               \n\t"
1979        "punpckhhw  %[ftmp2],   %[ftmp0],       %[ftmp1]                \n\t"
1980        "punpcklhw  %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
1981        MMI_ULWC1(%[ftmp4], %[addr3], 0x00)
1982        PTR_ADDU   "%[addr4],   %[pix],         %[addr0]                \n\t"
1983        MMI_ULWC1(%[ftmp6], %[addr4], 0x00)
1984        PTR_ADDU   "%[addr3],   %[pix],         %[addr1]                \n\t"
1985        MMI_ULWC1(%[ftmp5], %[addr3], 0x00)
1986        PTR_ADDU   "%[addr4],   %[pix],         %[addr2]                \n\t"
1987        MMI_ULWC1(%[ftmp7], %[addr4], 0x00)
1988        "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
1989        "punpcklbh  %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
1990        "mov.d      %[ftmp6],   %[ftmp4]                                \n\t"
1991        "punpcklhw  %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
1992        "punpckhhw  %[ftmp6],   %[ftmp6],       %[ftmp5]                \n\t"
1993        "punpckhwd  %[ftmp1],   %[ftmp0],       %[ftmp4]                \n\t"
1994        "punpckhwd  %[ftmp3],   %[ftmp2],       %[ftmp6]                \n\t"
1995        "punpcklwd  %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
1996        "punpcklwd  %[ftmp2],   %[ftmp2],       %[ftmp6]                \n\t"
1997        "mov.d      %[ftmp9],   %[ftmp0]                                \n\t"
1998        "mov.d      %[ftmp10],  %[ftmp3]                                \n\t"
1999
2000        "pxor       %[ftmp8],   %[ftmp8],       %[ftmp8]                \n\t"
2001        "mtc1       %[alpha],   %[ftmp4]                                \n\t"
2002        "mtc1       %[beta],    %[ftmp5]                                \n\t"
2003        "pshufh     %[ftmp4],   %[ftmp4],       %[ftmp8]                \n\t"
2004        "pshufh     %[ftmp5],   %[ftmp5],       %[ftmp8]                \n\t"
2005        "packushb   %[ftmp4],   %[ftmp4],       %[ftmp4]                \n\t"
2006        "packushb   %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
2007        "psubusb    %[ftmp6],   %[ftmp2],       %[ftmp1]                \n\t"
2008        "psubusb    %[ftmp7],   %[ftmp1],       %[ftmp2]                \n\t"
2009        "por        %[ftmp7],   %[ftmp7],       %[ftmp6]                \n\t"
2010        "psubusb    %[ftmp7],   %[ftmp7],       %[ftmp4]                \n\t"
2011        "psubusb    %[ftmp6],   %[ftmp1],       %[ftmp0]                \n\t"
2012        "psubusb    %[ftmp4],   %[ftmp0],       %[ftmp1]                \n\t"
2013        "por        %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
2014        "psubusb    %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
2015        "por        %[ftmp7],   %[ftmp7],       %[ftmp4]                \n\t"
2016        "psubusb    %[ftmp6],   %[ftmp2],       %[ftmp3]                \n\t"
2017        "psubusb    %[ftmp4],   %[ftmp3],       %[ftmp2]                \n\t"
2018        "por        %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
2019        "psubusb    %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
2020        "por        %[ftmp7],   %[ftmp7],       %[ftmp4]                \n\t"
2021        "pxor       %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
2022        "pcmpeqb    %[ftmp7],   %[ftmp7],       %[ftmp6]                \n\t"
2023        MMI_ULWC1(%[ftmp6], %[tc0], 0x00)
2024        "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
2025        "pand       %[ftmp7],   %[ftmp7],       %[ftmp6]                \n\t"
2026        "pcmpeqb    %[ftmp4],   %[ftmp4],       %[ftmp4]                \n\t"
2027        "pxor       %[ftmp5],   %[ftmp1],       %[ftmp2]                \n\t"
2028        "pxor       %[ftmp3],   %[ftmp3],       %[ftmp4]                \n\t"
2029        "pand       %[ftmp5],   %[ftmp5],       %[ff_pb_1]              \n\t"
2030        "pavgb      %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
2031        "pxor       %[ftmp4],   %[ftmp4],       %[ftmp1]                \n\t"
2032        "pavgb      %[ftmp3],   %[ftmp3],       %[ff_pb_3]              \n\t"
2033        "pavgb      %[ftmp4],   %[ftmp4],       %[ftmp2]                \n\t"
2034        "pavgb      %[ftmp3],   %[ftmp3],       %[ftmp5]                \n\t"
2035        "paddusb    %[ftmp3],   %[ftmp3],       %[ftmp4]                \n\t"
2036        "psubusb    %[ftmp6],   %[ff_pb_A1],    %[ftmp3]                \n\t"
2037        "psubusb    %[ftmp3],   %[ftmp3],       %[ff_pb_A1]             \n\t"
2038        "pminub     %[ftmp6],   %[ftmp6],       %[ftmp7]                \n\t"
2039        "pminub     %[ftmp3],   %[ftmp3],       %[ftmp7]                \n\t"
2040        "psubusb    %[ftmp1],   %[ftmp1],       %[ftmp6]                \n\t"
2041        "psubusb    %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
2042        "paddusb    %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
2043        "paddusb    %[ftmp2],   %[ftmp2],       %[ftmp6]                \n\t"
2044
2045        "punpckhwd  %[ftmp4],   %[ftmp9],       %[ftmp9]                \n\t"
2046        "punpckhwd  %[ftmp5],   %[ftmp1],       %[ftmp1]                \n\t"
2047        "punpckhwd  %[ftmp6],   %[ftmp2],       %[ftmp2]                \n\t"
2048        "punpcklbh  %[ftmp0],   %[ftmp9],       %[ftmp1]                \n\t"
2049        "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp10]               \n\t"
2050        "punpcklhw  %[ftmp1],   %[ftmp0],       %[ftmp2]                \n\t"
2051        "punpckhhw  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
2052        MMI_USWC1(%[ftmp1], %[addr5], 0x00)
2053        PTR_ADDU   "%[addr3],   %[addr5],       %[stride]               \n\t"
2054        "punpckhwd  %[ftmp1],   %[ftmp1],       %[ftmp1]                \n\t"
2055        MMI_USWC1(%[ftmp1], %[addr3], 0x00)
2056        PTR_ADDU   "%[addr4],   %[addr5],       %[addr0]                \n\t"
2057        MMI_USWC1(%[ftmp0], %[addr4], 0x00)
2058        "punpckhwd  %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
2059        "punpckhwd  %[ftmp3],   %[ftmp10],      %[ftmp10]               \n\t"
2060        MMI_USWC1(%[ftmp0], %[pix], 0x00)
2061        "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
2062        "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp3]                \n\t"
2063        PTR_ADDU   "%[addr3],   %[pix],         %[stride]               \n\t"
2064        "punpcklhw  %[ftmp5],   %[ftmp4],       %[ftmp6]                \n\t"
2065        "punpckhhw  %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
2066        MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2067        "punpckhwd  %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
2068        PTR_ADDU   "%[addr3],   %[pix],         %[addr0]                \n\t"
2069        PTR_ADDU   "%[addr4],   %[pix],         %[addr1]                \n\t"
2070        MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2071        MMI_USWC1(%[ftmp4], %[addr4], 0x00)
2072        PTR_ADDU   "%[addr3],   %[pix],         %[addr2]                \n\t"
2073        "punpckhwd  %[ftmp9],   %[ftmp4],       %[ftmp4]                \n\t"
2074        MMI_USWC1(%[ftmp9], %[addr3], 0x00)
2075        : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
2076          [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
2077          [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
2078          [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
2079          [ftmp8]"=&f"(ftmp[8]),            [ftmp9]"=&f"(ftmp[9]),
2080          [ftmp10]"=&f"(ftmp[10]),
2081          RESTRICT_ASM_LOW32
2082          [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1]),
2083          [addr2]"=&r"(addr[2]),            [addr3]"=&r"(addr[3]),
2084          [addr4]"=&r"(addr[4]),            [addr5]"=&r"(addr[5]),
2085          [pix]"+&r"(pix)
2086        : [alpha]"r"(alpha),                [beta]"r"(beta),
2087          [stride]"r"((mips_reg)stride),    [tc0]"r"(tc0),
2088          [ff_pb_1]"f"(ff_pb_1.f),          [ff_pb_3]"f"(ff_pb_3.f),
2089          [ff_pb_A1]"f"(ff_pb_A1.f)
2090        : "memory"
2091    );
2092}
2093
2094void ff_deblock_h_chroma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha,
2095        int beta)
2096{
2097    double ftmp[11];
2098    mips_reg addr[6];
2099    DECLARE_VAR_LOW32;
2100
2101    __asm__ volatile (
2102        "addi       %[alpha],   %[alpha],       -0x01                   \n\t"
2103        "addi       %[beta],    %[beta],        -0x01                   \n\t"
2104        PTR_ADDU   "%[addr0],   %[stride],      %[stride]               \n\t"
2105        PTR_ADDI   "%[pix],     %[pix],         -0x02                   \n\t"
2106        PTR_ADDU   "%[addr1],   %[addr0],       %[stride]               \n\t"
2107        PTR_ADDU   "%[addr2],   %[addr0],       %[addr0]                \n\t"
2108        "or         %[addr5],   $0,             %[pix]                  \n\t"
2109        PTR_ADDU   "%[pix],     %[pix],         %[addr1]                \n\t"
2110        MMI_ULWC1(%[ftmp0], %[addr5], 0x00)
2111        PTR_ADDU   "%[addr3],   %[addr5],       %[stride]               \n\t"
2112        MMI_ULWC1(%[ftmp2], %[addr3], 0x00)
2113        PTR_ADDU   "%[addr4],   %[addr5],       %[addr0]                \n\t"
2114        MMI_ULWC1(%[ftmp1], %[addr4], 0x00)
2115        MMI_ULWC1(%[ftmp3], %[pix], 0x00)
2116        "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
2117        "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
2118        PTR_ADDU   "%[addr3],   %[pix],         %[stride]               \n\t"
2119        "punpckhhw  %[ftmp2],   %[ftmp0],       %[ftmp1]                \n\t"
2120        "punpcklhw  %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
2121        MMI_ULWC1(%[ftmp4], %[addr3], 0x00)
2122        PTR_ADDU   "%[addr4],   %[pix],         %[addr0]                \n\t"
2123        MMI_ULWC1(%[ftmp6], %[addr4], 0x00)
2124        PTR_ADDU   "%[addr3],   %[pix],         %[addr1]                \n\t"
2125        MMI_ULWC1(%[ftmp5], %[addr3], 0x00)
2126        PTR_ADDU   "%[addr4],   %[pix],         %[addr2]                \n\t"
2127        MMI_ULWC1(%[ftmp7], %[addr4], 0x00)
2128        "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
2129        "punpcklbh  %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
2130        "mov.d      %[ftmp6],   %[ftmp4]                                \n\t"
2131        "punpcklhw  %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
2132        "punpckhhw  %[ftmp6],   %[ftmp6],       %[ftmp5]                \n\t"
2133        "punpckhwd  %[ftmp1],   %[ftmp0],       %[ftmp4]                \n\t"
2134        "punpckhwd  %[ftmp3],   %[ftmp2],       %[ftmp6]                \n\t"
2135        "punpcklwd  %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
2136        "punpcklwd  %[ftmp2],   %[ftmp2],       %[ftmp6]                \n\t"
2137
2138        "pxor       %[ftmp8],   %[ftmp8],       %[ftmp8]                \n\t"
2139        "mtc1       %[alpha],   %[ftmp4]                                \n\t"
2140        "mtc1       %[beta],    %[ftmp5]                                \n\t"
2141        "pshufh     %[ftmp4],   %[ftmp4],       %[ftmp8]                \n\t"
2142        "pshufh     %[ftmp5],   %[ftmp5],       %[ftmp8]                \n\t"
2143        "packushb   %[ftmp4],   %[ftmp4],       %[ftmp4]                \n\t"
2144        "packushb   %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
2145        "psubusb    %[ftmp6],   %[ftmp2],       %[ftmp1]                \n\t"
2146        "psubusb    %[ftmp7],   %[ftmp1],       %[ftmp2]                \n\t"
2147        "por        %[ftmp7],   %[ftmp7],       %[ftmp6]                \n\t"
2148        "psubusb    %[ftmp7],   %[ftmp7],       %[ftmp4]                \n\t"
2149        "psubusb    %[ftmp6],   %[ftmp1],       %[ftmp0]                \n\t"
2150        "psubusb    %[ftmp4],   %[ftmp0],       %[ftmp1]                \n\t"
2151        "por        %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
2152        "psubusb    %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
2153        "por        %[ftmp7],   %[ftmp7],       %[ftmp4]                \n\t"
2154        "psubusb    %[ftmp6],   %[ftmp2],       %[ftmp3]                \n\t"
2155        "psubusb    %[ftmp4],   %[ftmp3],       %[ftmp2]                \n\t"
2156        "por        %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
2157        "psubusb    %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
2158        "por        %[ftmp7],   %[ftmp7],       %[ftmp4]                \n\t"
2159        "pxor       %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
2160        "pcmpeqb    %[ftmp7],   %[ftmp7],       %[ftmp6]                \n\t"
2161        "mov.d      %[ftmp5],   %[ftmp1]                                \n\t"
2162        "mov.d      %[ftmp6],   %[ftmp2]                                \n\t"
2163        "pxor       %[ftmp4],   %[ftmp1],       %[ftmp3]                \n\t"
2164        "pand       %[ftmp4],   %[ftmp4],       %[ff_pb_1]              \n\t"
2165        "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
2166        "psubusb    %[ftmp1],   %[ftmp1],       %[ftmp4]                \n\t"
2167        "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
2168        "pxor       %[ftmp4],   %[ftmp2],       %[ftmp0]                \n\t"
2169        "pand       %[ftmp4],   %[ftmp4],       %[ff_pb_1]              \n\t"
2170        "pavgb      %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t"
2171        "psubusb    %[ftmp2],   %[ftmp2],       %[ftmp4]                \n\t"
2172        "pavgb      %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
2173        "psubb      %[ftmp1],   %[ftmp1],       %[ftmp5]                \n\t"
2174        "psubb      %[ftmp2],   %[ftmp2],       %[ftmp6]                \n\t"
2175        "pand       %[ftmp1],   %[ftmp1],       %[ftmp7]                \n\t"
2176        "pand       %[ftmp2],   %[ftmp2],       %[ftmp7]                \n\t"
2177        "paddb      %[ftmp1],   %[ftmp1],       %[ftmp5]                \n\t"
2178        "paddb      %[ftmp2],   %[ftmp2],       %[ftmp6]                \n\t"
2179
2180        "punpckhwd  %[ftmp4],   %[ftmp0],       %[ftmp0]                \n\t"
2181        "punpckhwd  %[ftmp5],   %[ftmp1],       %[ftmp1]                \n\t"
2182        "punpckhwd  %[ftmp6],   %[ftmp2],       %[ftmp2]                \n\t"
2183        "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
2184        "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
2185        "punpcklhw  %[ftmp1],   %[ftmp0],       %[ftmp2]                \n\t"
2186        "punpckhhw  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
2187        MMI_USWC1(%[ftmp1], %[addr5], 0x00)
2188        PTR_ADDU   "%[addr3],   %[addr5],       %[stride]               \n\t"
2189        "punpckhwd  %[ftmp1],   %[ftmp1],       %[ftmp1]                \n\t"
2190        PTR_ADDU   "%[addr4],   %[addr5],       %[addr0]                \n\t"
2191        MMI_USWC1(%[ftmp1], %[addr3], 0x00)
2192        MMI_USWC1(%[ftmp0], %[addr4], 0x00)
2193        "punpckhwd  %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
2194        "punpckhwd  %[ftmp3],   %[ftmp3],       %[ftmp3]                \n\t"
2195        MMI_USWC1(%[ftmp0], %[pix], 0x00)
2196        "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
2197        "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp3]                \n\t"
2198        PTR_ADDU   "%[addr3],   %[pix],         %[stride]               \n\t"
2199        "punpcklhw  %[ftmp5],   %[ftmp4],       %[ftmp6]                \n\t"
2200        "punpckhhw  %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
2201        MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2202        "punpckhwd  %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
2203        PTR_ADDU   "%[addr3],   %[pix],         %[addr0]                \n\t"
2204        PTR_ADDU   "%[addr4],   %[pix],         %[addr1]                \n\t"
2205        MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2206        PTR_ADDU   "%[addr3],   %[pix],         %[addr2]                \n\t"
2207        MMI_USWC1(%[ftmp4], %[addr4], 0x00)
2208        "punpckhwd  %[ftmp9],   %[ftmp4],       %[ftmp4]                \n\t"
2209        MMI_USWC1(%[ftmp9], %[addr3], 0x00)
2210        : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
2211          [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
2212          [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
2213          [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
2214          [ftmp8]"=&f"(ftmp[8]),            [ftmp9]"=&f"(ftmp[9]),
2215          [ftmp10]"=&f"(ftmp[10]),
2216          RESTRICT_ASM_LOW32
2217          [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1]),
2218          [addr2]"=&r"(addr[2]),            [addr3]"=&r"(addr[3]),
2219          [addr4]"=&r"(addr[4]),            [addr5]"=&r"(addr[5]),
2220          [pix]"+&r"(pix)
2221        : [alpha]"r"(alpha),                [beta]"r"(beta),
2222          [stride]"r"((mips_reg)stride),    [ff_pb_1]"f"(ff_pb_1.f)
2223        : "memory"
2224    );
2225}
2226
2227void ff_deblock_v_luma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta,
2228        int8_t *tc0)
2229{
2230    if ((tc0[0] & tc0[1]) >= 0)
2231        ff_deblock_v8_luma_8_mmi(pix + 0, stride, alpha, beta, tc0);
2232    if ((tc0[2] & tc0[3]) >= 0)
2233        ff_deblock_v8_luma_8_mmi(pix + 8, stride, alpha, beta, tc0 + 2);
2234}
2235
2236void ff_deblock_v_luma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha,
2237        int beta)
2238{
2239    deblock_v8_luma_intra_8_mmi(pix + 0, stride, alpha, beta);
2240    deblock_v8_luma_intra_8_mmi(pix + 8, stride, alpha, beta);
2241}
2242
2243void ff_deblock_h_luma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta,
2244        int8_t *tc0)
2245{
2246    DECLARE_ALIGNED(8, const uint64_t, stack[0x0d]);
2247    double ftmp[9];
2248    mips_reg addr[8];
2249    DECLARE_VAR_LOW32;
2250    DECLARE_VAR_ALL64;
2251
2252    __asm__ volatile (
2253        PTR_ADDU   "%[addr0],   %[stride],      %[stride]               \n\t"
2254        PTR_ADDI   "%[addr1],   %[pix],         -0x4                    \n\t"
2255        PTR_ADDU   "%[addr2],   %[stride],      %[addr0]                \n\t"
2256        MMI_ULDC1(%[ftmp0], %[addr1], 0x00)
2257        PTR_ADDU   "%[addr3],   %[addr1],       %[stride]               \n\t"
2258        PTR_ADDU   "%[addr4],   %[addr1],       %[addr2]                \n\t"
2259        MMI_ULDC1(%[ftmp1], %[addr3], 0x00)
2260        PTR_ADDU   "%[addr5],   %[addr1],       %[addr0]                \n\t"
2261        MMI_ULDC1(%[ftmp2], %[addr5], 0x00)
2262        MMI_ULDC1(%[ftmp3], %[addr4], 0x00)
2263        PTR_ADDU   "%[addr3],   %[addr4],       %[stride]               \n\t"
2264        MMI_ULDC1(%[ftmp4], %[addr3], 0x00)
2265        PTR_ADDU   "%[addr5],   %[addr4],       %[addr0]                \n\t"
2266        MMI_ULDC1(%[ftmp5], %[addr5], 0x00)
2267        PTR_ADDU   "%[addr3],   %[addr4],       %[addr2]                \n\t"
2268        MMI_ULDC1(%[ftmp6], %[addr3], 0x00)
2269        PTR_ADDU   "%[addr6],   %[addr0],       %[addr0]                \n\t"
2270        "punpckhbh  %[ftmp7],   %[ftmp0],       %[ftmp1]                \n\t"
2271        "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
2272        "punpckhbh  %[ftmp1],   %[ftmp2],       %[ftmp3]                \n\t"
2273        "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
2274        "punpckhbh  %[ftmp3],   %[ftmp4],       %[ftmp5]                \n\t"
2275        "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
2276        PTR_ADDU   "%[addr3],   %[addr4],       %[addr6]                \n\t"
2277        MMI_SDC1(%[ftmp1], %[stack], 0x10)
2278        MMI_ULDC1(%[ftmp8], %[addr3], 0x00)
2279        PTR_ADDU   "%[addr7],   %[addr6],       %[addr6]                \n\t"
2280        "punpckhbh  %[ftmp5],   %[ftmp6],       %[ftmp8]                \n\t"
2281        "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp8]                \n\t"
2282        "punpckhhw  %[ftmp1],   %[ftmp0],       %[ftmp2]                \n\t"
2283        "punpcklhw  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
2284        "punpckhhw  %[ftmp2],   %[ftmp4],       %[ftmp6]                \n\t"
2285        "punpcklhw  %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
2286        MMI_LDC1(%[ftmp8], %[stack], 0x10)
2287        "punpckhwd  %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
2288        MMI_SDC1(%[ftmp0], %[stack], 0x00)
2289        "punpckhhw  %[ftmp6],   %[ftmp7],       %[ftmp8]                \n\t"
2290        "punpcklhw  %[ftmp7],   %[ftmp7],       %[ftmp8]                \n\t"
2291        "punpckhhw  %[ftmp0],   %[ftmp3],       %[ftmp5]                \n\t"
2292        "punpcklhw  %[ftmp3],   %[ftmp3],       %[ftmp5]                \n\t"
2293        "punpcklwd  %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
2294        "punpckhwd  %[ftmp5],   %[ftmp7],       %[ftmp3]                \n\t"
2295        "punpcklwd  %[ftmp7],   %[ftmp7],       %[ftmp3]                \n\t"
2296        "punpckhwd  %[ftmp3],   %[ftmp1],       %[ftmp2]                \n\t"
2297        "punpcklwd  %[ftmp1],   %[ftmp1],       %[ftmp2]                \n\t"
2298        MMI_SDC1(%[ftmp1], %[stack], 0x10)
2299        MMI_SDC1(%[ftmp3], %[stack], 0x20)
2300        MMI_SDC1(%[ftmp7], %[stack], 0x30)
2301        MMI_SDC1(%[ftmp5], %[stack], 0x40)
2302        MMI_SDC1(%[ftmp6], %[stack], 0x50)
2303        PTR_ADDU   "%[addr1],   %[addr1],       %[addr7]                \n\t"
2304        PTR_ADDU   "%[addr4],   %[addr4],       %[addr7]                \n\t"
2305        MMI_ULDC1(%[ftmp0], %[addr1], 0x00)
2306        PTR_ADDU   "%[addr3],   %[addr1],       %[stride]               \n\t"
2307        MMI_ULDC1(%[ftmp1], %[addr3], 0x00)
2308        PTR_ADDU   "%[addr5],   %[addr1],       %[addr0]                \n\t"
2309        MMI_ULDC1(%[ftmp2], %[addr5], 0x00)
2310        MMI_ULDC1(%[ftmp3], %[addr4], 0x00)
2311        PTR_ADDU   "%[addr3],   %[addr4],       %[stride]               \n\t"
2312        MMI_ULDC1(%[ftmp4], %[addr3], 0x00)
2313        PTR_ADDU   "%[addr5],   %[addr4],       %[addr0]                \n\t"
2314        MMI_ULDC1(%[ftmp5], %[addr5], 0x00)
2315        PTR_ADDU   "%[addr3],   %[addr4],       %[addr2]                \n\t"
2316        MMI_ULDC1(%[ftmp6], %[addr3], 0x00)
2317        "punpckhbh  %[ftmp7],   %[ftmp0],       %[ftmp1]                \n\t"
2318        "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
2319        "punpckhbh  %[ftmp1],   %[ftmp2],       %[ftmp3]                \n\t"
2320        "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
2321        "punpckhbh  %[ftmp3],   %[ftmp4],       %[ftmp5]                \n\t"
2322        "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
2323        PTR_ADDU   "%[addr3],   %[addr4],       %[addr6]                \n\t"
2324        MMI_SDC1(%[ftmp1], %[stack], 0x18)
2325        MMI_ULDC1(%[ftmp8], %[addr3], 0x00)
2326        "punpckhhw  %[ftmp1],   %[ftmp0],       %[ftmp2]                \n\t"
2327        "punpckhbh  %[ftmp5],   %[ftmp6],       %[ftmp8]                \n\t"
2328        "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp8]                \n\t"
2329        "punpcklhw  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
2330        "punpckhhw  %[ftmp2],   %[ftmp4],       %[ftmp6]                \n\t"
2331        "punpcklhw  %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
2332        "punpckhwd  %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
2333        MMI_LDC1(%[ftmp8], %[stack], 0x18)
2334        MMI_SDC1(%[ftmp0], %[stack], 0x08)
2335        "punpckhhw  %[ftmp6],   %[ftmp7],       %[ftmp8]                \n\t"
2336        "punpcklhw  %[ftmp7],   %[ftmp7],       %[ftmp8]                \n\t"
2337        "punpckhhw  %[ftmp0],   %[ftmp3],       %[ftmp5]                \n\t"
2338        "punpcklhw  %[ftmp3],   %[ftmp3],       %[ftmp5]                \n\t"
2339        "punpckhwd  %[ftmp5],   %[ftmp7],       %[ftmp3]                \n\t"
2340        "punpcklwd  %[ftmp7],   %[ftmp7],       %[ftmp3]                \n\t"
2341        "punpckhwd  %[ftmp3],   %[ftmp1],       %[ftmp2]                \n\t"
2342        "punpcklwd  %[ftmp1],   %[ftmp1],       %[ftmp2]                \n\t"
2343        "punpcklwd  %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
2344        MMI_SDC1(%[ftmp1], %[stack], 0x18)
2345        MMI_SDC1(%[ftmp3], %[stack], 0x28)
2346        MMI_SDC1(%[ftmp7], %[stack], 0x38)
2347        MMI_SDC1(%[ftmp5], %[stack], 0x48)
2348        MMI_SDC1(%[ftmp6], %[stack], 0x58)
2349        : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
2350          [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
2351          [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
2352          [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
2353          [ftmp8]"=&f"(ftmp[8]),
2354          RESTRICT_ASM_ALL64
2355          [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1]),
2356          [addr2]"=&r"(addr[2]),            [addr3]"=&r"(addr[3]),
2357          [addr4]"=&r"(addr[4]),            [addr5]"=&r"(addr[5]),
2358          [addr6]"=&r"(addr[6]),            [addr7]"=&r"(addr[7])
2359        : [pix]"r"(pix),                    [stride]"r"((mips_reg)stride),
2360          [stack]"r"(stack)
2361        : "memory"
2362    );
2363
2364    ff_deblock_v_luma_8_mmi((uint8_t *) &stack[6], 0x10, alpha, beta, tc0);
2365
2366    __asm__ volatile (
2367        PTR_ADDU   "%[addr0],   %[stride],      %[stride]               \n\t"
2368        PTR_ADDI   "%[addr1],   %[pix],         -0x02                   \n\t"
2369        PTR_ADDU   "%[addr6],   %[addr0],       %[addr0]                \n\t"
2370        PTR_ADDU   "%[addr2],   %[addr0],       %[stride]               \n\t"
2371        PTR_ADDU   "%[addr7],   %[addr6],       %[addr6]                \n\t"
2372        PTR_ADDU   "%[addr4],   %[addr1],       %[addr2]                \n\t"
2373        MMI_LDC1(%[ftmp0], %[stack], 0x10)
2374        MMI_LDC1(%[ftmp1], %[stack], 0x20)
2375        MMI_LDC1(%[ftmp2], %[stack], 0x30)
2376        MMI_LDC1(%[ftmp3], %[stack], 0x40)
2377        "punpckhwd  %[ftmp4],   %[ftmp0],       %[ftmp0]                \n\t"
2378        "punpckhwd  %[ftmp5],   %[ftmp1],       %[ftmp1]                \n\t"
2379        "punpckhwd  %[ftmp6],   %[ftmp2],       %[ftmp2]                \n\t"
2380        "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
2381        "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
2382        "punpcklhw  %[ftmp1],   %[ftmp0],       %[ftmp2]                \n\t"
2383        "punpckhhw  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
2384        MMI_USWC1(%[ftmp1], %[addr1], 0x00)
2385        PTR_ADDU   "%[addr3],   %[addr1],       %[stride]               \n\t"
2386        "punpckhwd  %[ftmp1],   %[ftmp1],       %[ftmp1]                \n\t"
2387        PTR_ADDU   "%[addr5],   %[addr1],       %[addr0]                \n\t"
2388        MMI_USWC1(%[ftmp1], %[addr3], 0x00)
2389        MMI_USWC1(%[ftmp0], %[addr5], 0x00)
2390        "punpckhwd  %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
2391        "punpckhwd  %[ftmp3],   %[ftmp3],       %[ftmp3]                \n\t"
2392        MMI_USWC1(%[ftmp0], %[addr4], 0x00)
2393        "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
2394        "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp3]                \n\t"
2395        "punpcklhw  %[ftmp5],   %[ftmp4],       %[ftmp6]                \n\t"
2396        PTR_ADDU   "%[addr3],   %[addr4],       %[stride]               \n\t"
2397        "punpckhhw  %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
2398        MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2399        PTR_ADDU   "%[addr3],   %[addr4],       %[addr0]                \n\t"
2400        "punpckhwd  %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
2401        PTR_ADDU   "%[addr5],   %[addr4],       %[addr2]                \n\t"
2402        MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2403        MMI_USWC1(%[ftmp4], %[addr5], 0x00)
2404        PTR_ADDU   "%[addr3],   %[addr4],       %[addr6]                \n\t"
2405        "punpckhwd  %[ftmp4],   %[ftmp4],       %[ftmp4]                \n\t"
2406        PTR_ADDU   "%[addr1],   %[addr1],       %[addr7]                \n\t"
2407        MMI_USWC1(%[ftmp4], %[addr3], 0x00)
2408        PTR_ADDU   "%[addr4],   %[addr4],       %[addr7]                \n\t"
2409        MMI_LDC1(%[ftmp0], %[stack], 0x18)
2410        MMI_LDC1(%[ftmp1], %[stack], 0x28)
2411        MMI_LDC1(%[ftmp2], %[stack], 0x38)
2412        MMI_LDC1(%[ftmp3], %[stack], 0x48)
2413        PTR_ADDU   "%[addr0],   %[stride],      %[stride]               \n\t"
2414        "punpckhwd  %[ftmp4],   %[ftmp0],       %[ftmp0]                \n\t"
2415        PTR_ADDU   "%[addr6],   %[addr0],       %[addr0]                \n\t"
2416        "punpckhwd  %[ftmp5],   %[ftmp1],       %[ftmp1]                \n\t"
2417        "punpckhwd  %[ftmp6],   %[ftmp2],       %[ftmp2]                \n\t"
2418        "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
2419        "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
2420        PTR_ADDU   "%[addr3],   %[addr1],       %[stride]               \n\t"
2421        "punpcklhw  %[ftmp1],   %[ftmp0],       %[ftmp2]                \n\t"
2422        "punpckhhw  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
2423        MMI_USWC1(%[ftmp1], %[addr1], 0x00)
2424        "punpckhwd  %[ftmp1],   %[ftmp1],       %[ftmp1]                \n\t"
2425        PTR_ADDU   "%[addr5],   %[addr1],       %[addr0]                \n\t"
2426        MMI_USWC1(%[ftmp1], %[addr3], 0x00)
2427        MMI_USWC1(%[ftmp0], %[addr5], 0x00)
2428        "punpckhwd  %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
2429        "punpckhwd  %[ftmp3],   %[ftmp3],       %[ftmp3]                \n\t"
2430        MMI_USWC1(%[ftmp0], %[addr4], 0x00)
2431        "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
2432        "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp3]                \n\t"
2433        PTR_ADDU   "%[addr3],   %[addr4],       %[stride]               \n\t"
2434        "punpcklhw  %[ftmp5],   %[ftmp4],       %[ftmp6]                \n\t"
2435        "punpckhhw  %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
2436        MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2437        PTR_ADDU   "%[addr3],   %[addr4],       %[addr0]                \n\t"
2438        "punpckhwd  %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
2439        PTR_ADDU   "%[addr5],   %[addr4],       %[addr2]                \n\t"
2440        MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2441        MMI_USWC1(%[ftmp4], %[addr5], 0x00)
2442        PTR_ADDU   "%[addr3],   %[addr4],       %[addr6]                \n\t"
2443        "punpckhwd  %[ftmp4],   %[ftmp4],       %[ftmp4]                \n\t"
2444        MMI_USWC1(%[ftmp4], %[addr3], 0x00)
2445        : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
2446          [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
2447          [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
2448          [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
2449          [ftmp8]"=&f"(ftmp[8]),
2450          RESTRICT_ASM_LOW32
2451          RESTRICT_ASM_ALL64
2452          [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1]),
2453          [addr2]"=&r"(addr[2]),            [addr3]"=&r"(addr[3]),
2454          [addr4]"=&r"(addr[4]),            [addr5]"=&r"(addr[5]),
2455          [addr6]"=&r"(addr[6]),            [addr7]"=&r"(addr[7])
2456        : [pix]"r"(pix),                    [stride]"r"((mips_reg)stride),
2457          [stack]"r"(stack)
2458        : "memory"
2459    );
2460}
2461
2462void ff_deblock_h_luma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha,
2463        int beta)
2464{
2465    DECLARE_ALIGNED(8, const uint64_t, ptmp[0x11]);
2466    DECLARE_ALIGNED(8, const uint64_t, pdat[0x04]);
2467    double ftmp[9];
2468    mips_reg addr[7];
2469    DECLARE_VAR_ALL64;
2470
2471    __asm__ volatile (
2472        PTR_ADDU   "%[addr0],   %[stride],      %[stride]               \n\t"
2473        PTR_ADDI   "%[addr1],   %[pix],         -0x04                   \n\t"
2474        PTR_ADDU   "%[addr2],   %[addr0],       %[stride]               \n\t"
2475        PTR_ADDU   "%[addr3],   %[addr0],       %[addr0]                \n\t"
2476        PTR_ADDU   "%[addr4],   %[addr1],       %[addr2]                \n\t"
2477        PTR_ADDU   "%[addr5],   %[addr1],       %[stride]               \n\t"
2478        MMI_ULDC1(%[ftmp0], %[addr1], 0x00)
2479        PTR_ADDU   "%[addr6],   %[addr1],       %[addr0]                \n\t"
2480        MMI_ULDC1(%[ftmp1], %[addr5], 0x00)
2481        MMI_ULDC1(%[ftmp2], %[addr6], 0x00)
2482        PTR_ADDU   "%[addr5],   %[addr4],       %[stride]               \n\t"
2483        MMI_ULDC1(%[ftmp3], %[addr4], 0x00)
2484        PTR_ADDU   "%[addr6],   %[addr4],       %[addr0]                \n\t"
2485        MMI_ULDC1(%[ftmp4], %[addr5], 0x00)
2486        PTR_ADDU   "%[addr5],   %[addr4],       %[addr2]                \n\t"
2487        MMI_ULDC1(%[ftmp5], %[addr6], 0x00)
2488        MMI_ULDC1(%[ftmp6], %[addr5], 0x00)
2489        PTR_ADDU   "%[addr5],   %[addr4],       %[addr3]                \n\t"
2490        "punpckhbh  %[ftmp7],   %[ftmp0],       %[ftmp1]                \n\t"
2491        "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
2492        "punpckhbh  %[ftmp1],   %[ftmp2],       %[ftmp3]                \n\t"
2493        "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
2494        "punpckhbh  %[ftmp3],   %[ftmp4],       %[ftmp5]                \n\t"
2495        "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
2496        MMI_ULDC1(%[ftmp8], %[addr5], 0x00)
2497        "punpckhbh  %[ftmp5],   %[ftmp6],       %[ftmp8]                \n\t"
2498        "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp8]                \n\t"
2499        MMI_SDC1(%[ftmp3], %[ptmp], 0x00)
2500        "punpckhhw  %[ftmp3],   %[ftmp0],       %[ftmp2]                \n\t"
2501        "punpcklhw  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
2502        "punpckhhw  %[ftmp2],   %[ftmp4],       %[ftmp6]                \n\t"
2503        "punpcklhw  %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
2504        "punpckhhw  %[ftmp6],   %[ftmp7],       %[ftmp1]                \n\t"
2505        "punpcklhw  %[ftmp7],   %[ftmp7],       %[ftmp1]                \n\t"
2506        MMI_SDC1(%[ftmp2], %[ptmp], 0x20)
2507        MMI_LDC1(%[ftmp2], %[ptmp], 0x00)
2508        "punpckhhw  %[ftmp1],   %[ftmp2],       %[ftmp5]                \n\t"
2509        "punpcklhw  %[ftmp2],   %[ftmp2],       %[ftmp5]                \n\t"
2510        "punpckhwd  %[ftmp5],   %[ftmp0],       %[ftmp4]                \n\t"
2511        "punpcklwd  %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
2512        "punpckhwd  %[ftmp4],   %[ftmp7],       %[ftmp2]                \n\t"
2513        "punpcklwd  %[ftmp7],   %[ftmp7],       %[ftmp2]                \n\t"
2514        MMI_SDC1(%[ftmp0], %[ptmp], 0x00)
2515        MMI_SDC1(%[ftmp5], %[ptmp], 0x10)
2516        MMI_SDC1(%[ftmp7], %[ptmp], 0x40)
2517        MMI_SDC1(%[ftmp4], %[ptmp], 0x50)
2518        MMI_LDC1(%[ftmp8], %[ptmp], 0x20)
2519        "punpckhwd  %[ftmp0],   %[ftmp3],       %[ftmp8]                \n\t"
2520        "punpcklwd  %[ftmp3],   %[ftmp3],       %[ftmp8]                \n\t"
2521        "punpckhwd  %[ftmp5],   %[ftmp6],       %[ftmp1]                \n\t"
2522        "punpcklwd  %[ftmp6],   %[ftmp6],       %[ftmp1]                \n\t"
2523        PTR_ADDU   "%[addr5],   %[addr3],       %[addr3]                \n\t"
2524        MMI_SDC1(%[ftmp3], %[ptmp], 0x20)
2525        MMI_SDC1(%[ftmp0], %[ptmp], 0x30)
2526        MMI_SDC1(%[ftmp6], %[ptmp], 0x60)
2527        MMI_SDC1(%[ftmp5], %[ptmp], 0x70)
2528        PTR_ADDU   "%[addr1],   %[addr1],       %[addr5]                \n\t"
2529        PTR_ADDU   "%[addr4],   %[addr4],       %[addr5]                \n\t"
2530        PTR_ADDU   "%[addr5],   %[addr1],       %[stride]               \n\t"
2531        MMI_ULDC1(%[ftmp0], %[addr1], 0x00)
2532        PTR_ADDU   "%[addr6],   %[addr1],       %[addr0]                \n\t"
2533        MMI_ULDC1(%[ftmp1], %[addr5], 0x00)
2534        MMI_ULDC1(%[ftmp2], %[addr6], 0x00)
2535        PTR_ADDU   "%[addr5],   %[addr4],       %[stride]               \n\t"
2536        MMI_ULDC1(%[ftmp3], %[addr4], 0x00)
2537        PTR_ADDU   "%[addr6],   %[addr4],       %[addr0]                \n\t"
2538        MMI_ULDC1(%[ftmp4], %[addr5], 0x00)
2539        PTR_ADDU   "%[addr5],   %[addr4],       %[addr2]                \n\t"
2540        MMI_ULDC1(%[ftmp5], %[addr6], 0x00)
2541        MMI_ULDC1(%[ftmp6], %[addr5], 0x00)
2542        PTR_ADDU   "%[addr5],   %[addr4],       %[addr3]                \n\t"
2543        "punpckhbh  %[ftmp7],   %[ftmp0],       %[ftmp1]                \n\t"
2544        "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
2545        "punpckhbh  %[ftmp1],   %[ftmp2],       %[ftmp3]                \n\t"
2546        "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
2547        "punpckhbh  %[ftmp3],   %[ftmp4],       %[ftmp5]                \n\t"
2548        "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
2549        MMI_ULDC1(%[ftmp8], %[addr5], 0x00)
2550        "punpckhbh  %[ftmp5],   %[ftmp6],       %[ftmp8]                \n\t"
2551        "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp8]                \n\t"
2552        MMI_SDC1(%[ftmp3], %[ptmp], 0x08)
2553        "punpckhhw  %[ftmp3],   %[ftmp0],       %[ftmp2]                \n\t"
2554        "punpcklhw  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
2555        "punpckhhw  %[ftmp2],   %[ftmp4],       %[ftmp6]                \n\t"
2556        "punpcklhw  %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
2557        "punpckhhw  %[ftmp6],   %[ftmp7],       %[ftmp1]                \n\t"
2558        "punpcklhw  %[ftmp7],   %[ftmp7],       %[ftmp1]                \n\t"
2559        MMI_SDC1(%[ftmp2], %[ptmp], 0x28)
2560        MMI_LDC1(%[ftmp2], %[ptmp], 0x08)
2561        "punpckhhw  %[ftmp1],   %[ftmp2],       %[ftmp5]                \n\t"
2562        "punpcklhw  %[ftmp2],   %[ftmp2],       %[ftmp5]                \n\t"
2563        "punpckhwd  %[ftmp5],   %[ftmp0],       %[ftmp4]                \n\t"
2564        "punpcklwd  %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
2565        "punpckhwd  %[ftmp4],   %[ftmp7],       %[ftmp2]                \n\t"
2566        "punpcklwd  %[ftmp7],   %[ftmp7],       %[ftmp2]                \n\t"
2567        MMI_SDC1(%[ftmp0], %[ptmp], 0x08)
2568        MMI_SDC1(%[ftmp5], %[ptmp], 0x18)
2569        MMI_SDC1(%[ftmp7], %[ptmp], 0x48)
2570        MMI_SDC1(%[ftmp4], %[ptmp], 0x58)
2571        MMI_LDC1(%[ftmp8], %[ptmp], 0x28)
2572        "punpckhwd  %[ftmp0],   %[ftmp3],       %[ftmp8]                \n\t"
2573        "punpcklwd  %[ftmp3],   %[ftmp3],       %[ftmp8]                \n\t"
2574        "punpckhwd  %[ftmp5],   %[ftmp6],       %[ftmp1]                \n\t"
2575        "punpcklwd  %[ftmp6],   %[ftmp6],       %[ftmp1]                \n\t"
2576        MMI_SDC1(%[ftmp3], %[ptmp], 0x28)
2577        MMI_SDC1(%[ftmp0], %[ptmp], 0x38)
2578        MMI_SDC1(%[ftmp6], %[ptmp], 0x68)
2579        MMI_SDC1(%[ftmp5], %[ptmp], 0x78)
2580        PTR_S      "%[addr1],   0x00(%[pdat])                           \n\t"
2581        PTR_S      "%[addr2],   0x08(%[pdat])                           \n\t"
2582        PTR_S      "%[addr0],   0x10(%[pdat])                           \n\t"
2583        PTR_S      "%[addr3],   0x18(%[pdat])                           \n\t"
2584        : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
2585          [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
2586          [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
2587          [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
2588          [ftmp8]"=&f"(ftmp[8]),
2589          RESTRICT_ASM_ALL64
2590          [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1]),
2591          [addr2]"=&r"(addr[2]),            [addr3]"=&r"(addr[3]),
2592          [addr4]"=&r"(addr[4]),            [addr5]"=&r"(addr[5]),
2593          [addr6]"=&r"(addr[6])
2594        : [pix]"r"(pix),                    [stride]"r"((mips_reg)stride),
2595          [ptmp]"r"(ptmp),                  [pdat]"r"(pdat)
2596        : "memory"
2597    );
2598
2599    ff_deblock_v_luma_intra_8_mmi((uint8_t *) &ptmp[8], 0x10, alpha, beta);
2600
2601    __asm__ volatile (
2602        PTR_L      "%[addr1],   0x00(%[pdat])                           \n\t"
2603        PTR_L      "%[addr2],   0x08(%[pdat])                           \n\t"
2604        PTR_L      "%[addr0],   0x10(%[pdat])                           \n\t"
2605        PTR_L      "%[addr3],   0x18(%[pdat])                           \n\t"
2606        PTR_ADDU   "%[addr4],   %[addr1],       %[addr2]                \n\t"
2607        MMI_LDC1(%[ftmp0], %[ptmp], 0x08)
2608        MMI_LDC1(%[ftmp1], %[ptmp], 0x18)
2609        MMI_LDC1(%[ftmp2], %[ptmp], 0x28)
2610        MMI_LDC1(%[ftmp3], %[ptmp], 0x38)
2611        MMI_LDC1(%[ftmp4], %[ptmp], 0x48)
2612        MMI_LDC1(%[ftmp5], %[ptmp], 0x58)
2613        MMI_LDC1(%[ftmp6], %[ptmp], 0x68)
2614        "punpckhbh  %[ftmp7],   %[ftmp0],       %[ftmp1]                \n\t"
2615        "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
2616        "punpckhbh  %[ftmp1],   %[ftmp2],       %[ftmp3]                \n\t"
2617        "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
2618        "punpckhbh  %[ftmp3],   %[ftmp4],       %[ftmp5]                \n\t"
2619        "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
2620        MMI_LDC1(%[ftmp8], %[ptmp], 0x78)
2621        "punpckhbh  %[ftmp5],   %[ftmp6],       %[ftmp8]                \n\t"
2622        "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp8]                \n\t"
2623        MMI_USDC1(%[ftmp3], %[addr1], 0x00)
2624        PTR_ADDU   "%[addr5],   %[addr1],       %[addr0]                \n\t"
2625        "punpckhhw  %[ftmp3],   %[ftmp0],       %[ftmp2]                \n\t"
2626        "punpcklhw  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
2627        "punpckhhw  %[ftmp2],   %[ftmp4],       %[ftmp6]                \n\t"
2628        "punpcklhw  %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
2629        "punpckhhw  %[ftmp6],   %[ftmp7],       %[ftmp1]                \n\t"
2630        "punpcklhw  %[ftmp7],   %[ftmp7],       %[ftmp1]                \n\t"
2631        MMI_USDC1(%[ftmp2], %[addr5], 0x00)
2632        MMI_ULDC1(%[ftmp2], %[addr1], 0x00)
2633        "punpckhhw  %[ftmp1],   %[ftmp2],       %[ftmp5]                \n\t"
2634        "punpcklhw  %[ftmp2],   %[ftmp2],       %[ftmp5]                \n\t"
2635        "punpckhwd  %[ftmp5],   %[ftmp0],       %[ftmp4]                \n\t"
2636        "punpcklwd  %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
2637        "punpckhwd  %[ftmp4],   %[ftmp7],       %[ftmp2]                \n\t"
2638        "punpcklwd  %[ftmp7],   %[ftmp7],       %[ftmp2]                \n\t"
2639        PTR_ADDU   "%[addr5],   %[addr1],       %[stride]               \n\t"
2640        MMI_USDC1(%[ftmp0], %[addr1], 0x00)
2641        PTR_ADDU   "%[addr6],   %[addr4],       %[stride]               \n\t"
2642        MMI_USDC1(%[ftmp5], %[addr5], 0x00)
2643        PTR_ADDU   "%[addr5],   %[addr4],       %[addr0]                \n\t"
2644        MMI_USDC1(%[ftmp7], %[addr6], 0x00)
2645        PTR_ADDU   "%[addr6],   %[addr1],       %[addr0]                \n\t"
2646        MMI_USDC1(%[ftmp4], %[addr5], 0x00)
2647        MMI_ULDC1(%[ftmp8], %[addr6], 0x00)
2648        PTR_ADDU   "%[addr5],   %[addr1],       %[addr0]                \n\t"
2649        "punpckhwd  %[ftmp0],   %[ftmp3],       %[ftmp8]                \n\t"
2650        "punpcklwd  %[ftmp3],   %[ftmp3],       %[ftmp8]                \n\t"
2651        "punpckhwd  %[ftmp5],   %[ftmp6],       %[ftmp1]                \n\t"
2652        "punpcklwd  %[ftmp6],   %[ftmp6],       %[ftmp1]                \n\t"
2653        MMI_USDC1(%[ftmp3], %[addr5], 0x00)
2654        PTR_ADDU   "%[addr5],   %[addr4],       %[addr2]                \n\t"
2655        MMI_USDC1(%[ftmp0], %[addr4], 0x00)
2656        PTR_ADDU   "%[addr6],   %[addr4],       %[addr3]                \n\t"
2657        MMI_USDC1(%[ftmp6], %[addr5], 0x00)
2658        PTR_ADDU   "%[addr5],   %[addr3],       %[addr3]                \n\t"
2659        MMI_USDC1(%[ftmp5], %[addr6], 0x00)
2660        PTR_SUBU   "%[addr1],   %[addr1],       %[addr5]                \n\t"
2661        PTR_SUBU   "%[addr4],   %[addr4],       %[addr5]                \n\t"
2662        MMI_LDC1(%[ftmp0], %[ptmp], 0x00)
2663        MMI_LDC1(%[ftmp1], %[ptmp], 0x10)
2664        MMI_LDC1(%[ftmp2], %[ptmp], 0x20)
2665        MMI_LDC1(%[ftmp3], %[ptmp], 0x30)
2666        MMI_LDC1(%[ftmp4], %[ptmp], 0x40)
2667        MMI_LDC1(%[ftmp5], %[ptmp], 0x50)
2668        MMI_LDC1(%[ftmp6], %[ptmp], 0x60)
2669        "punpckhbh  %[ftmp7],   %[ftmp0],       %[ftmp1]                \n\t"
2670        "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
2671        "punpckhbh  %[ftmp1],   %[ftmp2],       %[ftmp3]                \n\t"
2672        "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
2673        "punpckhbh  %[ftmp3],   %[ftmp4],       %[ftmp5]                \n\t"
2674        "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
2675        MMI_LDC1(%[ftmp8], %[ptmp], 0x70)
2676        "punpckhbh  %[ftmp5],   %[ftmp6],       %[ftmp8]                \n\t"
2677        "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp8]                \n\t"
2678        MMI_USDC1(%[ftmp3], %[addr1], 0x00)
2679        PTR_ADDU   "%[addr5],   %[addr1],       %[addr0]                \n\t"
2680        "punpckhhw  %[ftmp3],   %[ftmp0],       %[ftmp2]                \n\t"
2681        "punpcklhw  %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
2682        "punpckhhw  %[ftmp2],   %[ftmp4],       %[ftmp6]                \n\t"
2683        "punpcklhw  %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
2684        "punpckhhw  %[ftmp6],   %[ftmp7],       %[ftmp1]                \n\t"
2685        "punpcklhw  %[ftmp7],   %[ftmp7],       %[ftmp1]                \n\t"
2686        MMI_USDC1(%[ftmp2], %[addr5], 0x00)
2687        MMI_ULDC1(%[ftmp2], %[addr1], 0x00)
2688        "punpckhhw  %[ftmp1],   %[ftmp2],       %[ftmp5]                \n\t"
2689        "punpcklhw  %[ftmp2],   %[ftmp2],       %[ftmp5]                \n\t"
2690        "punpckhwd  %[ftmp5],   %[ftmp0],       %[ftmp4]                \n\t"
2691        "punpcklwd  %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
2692        "punpckhwd  %[ftmp4],   %[ftmp7],       %[ftmp2]                \n\t"
2693        "punpcklwd  %[ftmp7],   %[ftmp7],       %[ftmp2]                \n\t"
2694        PTR_ADDU   "%[addr5],   %[addr1],       %[stride]               \n\t"
2695        MMI_USDC1(%[ftmp0], %[addr1], 0x00)
2696        PTR_ADDU   "%[addr6],   %[addr4],       %[stride]               \n\t"
2697        MMI_USDC1(%[ftmp5], %[addr5], 0x00)
2698        PTR_ADDU   "%[addr5],   %[addr4],       %[addr0]                \n\t"
2699        MMI_USDC1(%[ftmp7], %[addr6], 0x00)
2700        PTR_ADDU   "%[addr6],   %[addr1],       %[addr0]                \n\t"
2701        MMI_USDC1(%[ftmp4], %[addr5], 0x00)
2702        MMI_ULDC1(%[ftmp8], %[addr6], 0x00)
2703        PTR_ADDU   "%[addr5],   %[addr1],       %[addr0]                \n\t"
2704        "punpckhwd  %[ftmp0],   %[ftmp3],       %[ftmp8]                \n\t"
2705        "punpcklwd  %[ftmp3],   %[ftmp3],       %[ftmp8]                \n\t"
2706        "punpckhwd  %[ftmp5],   %[ftmp6],       %[ftmp1]                \n\t"
2707        "punpcklwd  %[ftmp6],   %[ftmp6],       %[ftmp1]                \n\t"
2708        MMI_USDC1(%[ftmp3], %[addr5], 0x00)
2709        PTR_ADDU   "%[addr5],   %[addr4],       %[addr2]                \n\t"
2710        MMI_USDC1(%[ftmp0], %[addr4], 0x00)
2711        PTR_ADDU   "%[addr6],   %[addr4],       %[addr3]                \n\t"
2712        MMI_USDC1(%[ftmp6], %[addr5], 0x00)
2713        MMI_USDC1(%[ftmp5], %[addr6], 0x00)
2714        : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
2715          [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
2716          [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
2717          [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
2718          [ftmp8]"=&f"(ftmp[8]),
2719          RESTRICT_ASM_ALL64
2720          [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1]),
2721          [addr2]"=&r"(addr[2]),            [addr3]"=&r"(addr[3]),
2722          [addr4]"=&r"(addr[4]),            [addr5]"=&r"(addr[5]),
2723          [addr6]"=&r"(addr[6])
2724        : [pix]"r"(pix),                    [stride]"r"((mips_reg)stride),
2725          [ptmp]"r"(ptmp),                  [pdat]"r"(pdat)
2726        : "memory"
2727    );
2728}
2729