1/*
2 * Loongson SIMD optimized mpegvideo
3 *
4 * Copyright (c) 2015 Loongson Technology Corporation Limited
5 * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6 *                    Zhang Shuangshuang <zhangshuangshuang@ict.ac.cn>
7 *
8 * This file is part of FFmpeg.
9 *
10 * FFmpeg is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
14 *
15 * FFmpeg is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 * Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with FFmpeg; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25#include "mpegvideo_mips.h"
26#include "libavutil/mips/mmiutils.h"
27
28void ff_dct_unquantize_h263_intra_mmi(MpegEncContext *s, int16_t *block,
29        int n, int qscale)
30{
31    int64_t level, nCoeffs;
32    double ftmp[6];
33    mips_reg addr[1];
34    union mmi_intfloat64 qmul_u, qadd_u;
35    DECLARE_VAR_ALL64;
36
37    qmul_u.i = qscale << 1;
38    av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
39
40    if (!s->h263_aic) {
41        if (n<4)
42            level = block[0] * s->y_dc_scale;
43        else
44            level = block[0] * s->c_dc_scale;
45        qadd_u.i = (qscale-1) | 1;
46    } else {
47        qadd_u.i = 0;
48        level = block[0];
49    }
50
51    if(s->ac_pred)
52        nCoeffs = 63;
53    else
54        nCoeffs = s->inter_scantable.raster_end[s->block_last_index[n]];
55
56    __asm__ volatile (
57        "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
58        "packsswh   %[qmul],    %[qmul],        %[qmul]                 \n\t"
59        "packsswh   %[qmul],    %[qmul],        %[qmul]                 \n\t"
60        "packsswh   %[qadd],    %[qadd],        %[qadd]                 \n\t"
61        "packsswh   %[qadd],    %[qadd],        %[qadd]                 \n\t"
62        "psubh      %[ftmp0],   %[ftmp0],       %[qadd]                 \n\t"
63        "pxor       %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
64        ".p2align   4                                                   \n\t"
65
66        "1:                                                             \n\t"
67        PTR_ADDU   "%[addr0],   %[block],       %[nCoeffs]              \n\t"
68        MMI_LDC1(%[ftmp1], %[addr0], 0x00)
69        MMI_LDC1(%[ftmp2], %[addr0], 0x08)
70        "mov.d      %[ftmp3],   %[ftmp1]                                \n\t"
71        "mov.d      %[ftmp4],   %[ftmp2]                                \n\t"
72        "pmullh     %[ftmp1],   %[ftmp1],       %[qmul]                 \n\t"
73        "pmullh     %[ftmp2],   %[ftmp2],       %[qmul]                 \n\t"
74        "pcmpgth    %[ftmp3],   %[ftmp3],       %[ftmp5]                \n\t"
75        "pcmpgth    %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
76        "pxor       %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
77        "pxor       %[ftmp2],   %[ftmp2],       %[ftmp4]                \n\t"
78        "paddh      %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
79        "paddh      %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t"
80        "pxor       %[ftmp3],   %[ftmp3],       %[ftmp1]                \n\t"
81        "pxor       %[ftmp4],   %[ftmp4],       %[ftmp2]                \n\t"
82        "pcmpeqh    %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
83        "pcmpeqh    %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t"
84        "pandn      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
85        "pandn      %[ftmp2],   %[ftmp2],       %[ftmp4]                \n\t"
86        PTR_ADDIU  "%[nCoeffs], %[nCoeffs],     0x10                    \n\t"
87        MMI_SDC1(%[ftmp1], %[addr0], 0x00)
88        MMI_SDC1(%[ftmp2], %[addr0], 0x08)
89        "blez       %[nCoeffs], 1b                                      \n\t"
90        : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
91          [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
92          [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
93          RESTRICT_ASM_ALL64
94          [addr0]"=&r"(addr[0])
95        : [block]"r"((mips_reg)(block+nCoeffs)),
96          [nCoeffs]"r"((mips_reg)(2*(-nCoeffs))),
97          [qmul]"f"(qmul_u.f),              [qadd]"f"(qadd_u.f)
98        : "memory"
99    );
100
101    block[0] = level;
102}
103
104void ff_dct_unquantize_h263_inter_mmi(MpegEncContext *s, int16_t *block,
105        int n, int qscale)
106{
107    int64_t nCoeffs;
108    double ftmp[6];
109    mips_reg addr[1];
110    union mmi_intfloat64 qmul_u, qadd_u;
111    DECLARE_VAR_ALL64;
112
113    qmul_u.i = qscale << 1;
114    qadd_u.i = (qscale - 1) | 1;
115    av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
116    nCoeffs = s->inter_scantable.raster_end[s->block_last_index[n]];
117
118    __asm__ volatile (
119        "packsswh   %[qmul],    %[qmul],        %[qmul]                 \n\t"
120        "packsswh   %[qmul],    %[qmul],        %[qmul]                 \n\t"
121        "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
122        "packsswh   %[qadd],    %[qadd],        %[qadd]                 \n\t"
123        "packsswh   %[qadd],    %[qadd],        %[qadd]                 \n\t"
124        "psubh      %[ftmp0],   %[ftmp0],       %[qadd]                 \n\t"
125        "pxor       %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
126        ".p2align   4                                                   \n\t"
127        "1:                                                             \n\t"
128        PTR_ADDU   "%[addr0],   %[block],       %[nCoeffs]              \n\t"
129        MMI_LDC1(%[ftmp1], %[addr0], 0x00)
130        MMI_LDC1(%[ftmp2], %[addr0], 0x08)
131        "mov.d      %[ftmp3],   %[ftmp1]                                \n\t"
132        "mov.d      %[ftmp4],   %[ftmp2]                                \n\t"
133        "pmullh     %[ftmp1],   %[ftmp1],       %[qmul]                 \n\t"
134        "pmullh     %[ftmp2],   %[ftmp2],       %[qmul]                 \n\t"
135        "pcmpgth    %[ftmp3],   %[ftmp3],       %[ftmp5]                \n\t"
136        "pcmpgth    %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
137        "pxor       %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
138        "pxor       %[ftmp2],   %[ftmp2],       %[ftmp4]                \n\t"
139        "paddh      %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
140        "paddh      %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t"
141        "pxor       %[ftmp3],   %[ftmp3],       %[ftmp1]                \n\t"
142        "pxor       %[ftmp4],   %[ftmp4],       %[ftmp2]                \n\t"
143        "pcmpeqh    %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
144        "pcmpeqh    %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t"
145        "pandn      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
146        "pandn      %[ftmp2],   %[ftmp2],       %[ftmp4]                \n\t"
147        PTR_ADDIU  "%[nCoeffs], %[nCoeffs],     0x10                    \n\t"
148        MMI_SDC1(%[ftmp1], %[addr0], 0x00)
149        MMI_SDC1(%[ftmp2], %[addr0], 0x08)
150        "blez       %[nCoeffs], 1b                                      \n\t"
151        : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
152          [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
153          [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
154          RESTRICT_ASM_ALL64
155          [addr0]"=&r"(addr[0])
156        : [block]"r"((mips_reg)(block+nCoeffs)),
157          [nCoeffs]"r"((mips_reg)(2*(-nCoeffs))),
158          [qmul]"f"(qmul_u.f),              [qadd]"f"(qadd_u.f)
159        : "memory"
160    );
161}
162
163void ff_dct_unquantize_mpeg1_intra_mmi(MpegEncContext *s, int16_t *block,
164        int n, int qscale)
165{
166    int64_t nCoeffs;
167    const uint16_t *quant_matrix;
168    int block0;
169    double ftmp[10];
170    uint64_t tmp[1];
171    mips_reg addr[1];
172    DECLARE_VAR_ALL64;
173    DECLARE_VAR_ADDRT;
174
175    av_assert2(s->block_last_index[n]>=0);
176    nCoeffs = s->intra_scantable.raster_end[s->block_last_index[n]] + 1;
177
178    if (n<4)
179        block0 = block[0] * s->y_dc_scale;
180    else
181        block0 = block[0] * s->c_dc_scale;
182
183    /* XXX: only mpeg1 */
184    quant_matrix = s->intra_matrix;
185
186    __asm__ volatile (
187        "dli        %[tmp0],    0x0f                                    \n\t"
188        "pcmpeqh    %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
189        "dmtc1      %[tmp0],    %[ftmp4]                                \n\t"
190        "dmtc1      %[qscale],  %[ftmp1]                                \n\t"
191        "psrlh      %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
192        "packsswh   %[ftmp1],   %[ftmp1],       %[ftmp1]                \n\t"
193        "packsswh   %[ftmp1],   %[ftmp1],       %[ftmp1]                \n\t"
194        "or         %[addr0],   %[nCoeffs],     $0                      \n\t"
195        ".p2align   4                                                   \n\t"
196
197        "1:                                                             \n\t"
198        MMI_LDXC1(%[ftmp2], %[addr0], %[block], 0x00)
199        MMI_LDXC1(%[ftmp3], %[addr0], %[block], 0x08)
200        "mov.d      %[ftmp4],   %[ftmp2]                                \n\t"
201        "mov.d      %[ftmp5],   %[ftmp3]                                \n\t"
202        MMI_LDXC1(%[ftmp6], %[addr0], %[quant], 0x00)
203        MMI_LDXC1(%[ftmp7], %[addr0], %[quant], 0x08)
204        "pmullh     %[ftmp6],   %[ftmp6],       %[ftmp1]                \n\t"
205        "pmullh     %[ftmp7],   %[ftmp7],       %[ftmp1]                \n\t"
206        "pxor       %[ftmp8],   %[ftmp8],       %[ftmp8]                \n\t"
207        "pxor       %[ftmp9],   %[ftmp9],       %[ftmp9]                \n\t"
208        "pcmpgth    %[ftmp8],   %[ftmp8],       %[ftmp2]                \n\t"
209        "pcmpgth    %[ftmp9],   %[ftmp9],       %[ftmp3]                \n\t"
210        "pxor       %[ftmp2],   %[ftmp2],       %[ftmp8]                \n\t"
211        "pxor       %[ftmp3],   %[ftmp3],       %[ftmp9]                \n\t"
212        "psubh      %[ftmp2],   %[ftmp2],       %[ftmp8]                \n\t"
213        "psubh      %[ftmp3],   %[ftmp3],       %[ftmp9]                \n\t"
214        "pmullh     %[ftmp2],   %[ftmp2],       %[ftmp6]                \n\t"
215        "pmullh     %[ftmp3],   %[ftmp3],       %[ftmp7]                \n\t"
216        "pxor       %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
217        "pxor       %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
218        "pcmpeqh    %[ftmp6],   %[ftmp6],       %[ftmp4]                \n\t"
219        "dli        %[tmp0],    0x03                                    \n\t"
220        "pcmpeqh    %[ftmp7],   %[ftmp7],       %[ftmp5]                \n\t"
221        "dmtc1      %[tmp0],    %[ftmp4]                                \n\t"
222        "psrah      %[ftmp2],   %[ftmp2],       %[ftmp4]                \n\t"
223        "psrah      %[ftmp3],   %[ftmp3],       %[ftmp4]                \n\t"
224        "psubh      %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t"
225        "psubh      %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
226        "por        %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t"
227        "por        %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
228        "pxor       %[ftmp2],   %[ftmp2],       %[ftmp8]                \n\t"
229        "pxor       %[ftmp3],   %[ftmp3],       %[ftmp9]                \n\t"
230        "psubh      %[ftmp2],   %[ftmp2],       %[ftmp8]                \n\t"
231        "psubh      %[ftmp3],   %[ftmp3],       %[ftmp9]                \n\t"
232        "pandn      %[ftmp6],   %[ftmp6],       %[ftmp2]                \n\t"
233        "pandn      %[ftmp7],   %[ftmp7],       %[ftmp3]                \n\t"
234        MMI_SDXC1(%[ftmp6], %[addr0], %[block], 0x00)
235        MMI_SDXC1(%[ftmp7], %[addr0], %[block], 0x08)
236        PTR_ADDIU  "%[addr0],   %[addr0],       0x10                    \n\t"
237        "bltz       %[addr0],   1b                                      \n\t"
238        : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
239          [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
240          [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
241          [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
242          [ftmp8]"=&f"(ftmp[8]),            [ftmp9]"=&f"(ftmp[9]),
243          [tmp0]"=&r"(tmp[0]),
244          RESTRICT_ASM_ALL64
245          RESTRICT_ASM_ADDRT
246          [addr0]"=&r"(addr[0])
247        : [block]"r"((mips_reg)(block+nCoeffs)),
248          [quant]"r"((mips_reg)(quant_matrix+nCoeffs)),
249          [nCoeffs]"r"((mips_reg)(2*(-nCoeffs))),
250          [qscale]"r"(qscale)
251        : "memory"
252    );
253
254    block[0] = block0;
255}
256
257void ff_dct_unquantize_mpeg1_inter_mmi(MpegEncContext *s, int16_t *block,
258        int n, int qscale)
259{
260    int64_t nCoeffs;
261    const uint16_t *quant_matrix;
262    double ftmp[10];
263    uint64_t tmp[1];
264    mips_reg addr[1];
265    DECLARE_VAR_ALL64;
266    DECLARE_VAR_ADDRT;
267
268    av_assert2(s->block_last_index[n] >= 0);
269    nCoeffs = s->intra_scantable.raster_end[s->block_last_index[n]] + 1;
270    quant_matrix = s->inter_matrix;
271
272    __asm__ volatile (
273        "dli        %[tmp0],    0x0f                                    \n\t"
274        "pcmpeqh    %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
275        "dmtc1      %[tmp0],    %[ftmp4]                                \n\t"
276        "dmtc1      %[qscale],  %[ftmp1]                                \n\t"
277        "psrlh      %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
278        "packsswh   %[ftmp1],   %[ftmp1],       %[ftmp1]                \n\t"
279        "packsswh   %[ftmp1],   %[ftmp1],       %[ftmp1]                \n\t"
280        "or         %[addr0],   %[nCoeffs],     $0                      \n\t"
281        ".p2align   4                                                   \n\t"
282
283        "1:                                                             \n\t"
284        MMI_LDXC1(%[ftmp2], %[addr0], %[block], 0x00)
285        MMI_LDXC1(%[ftmp3], %[addr0], %[block], 0x08)
286        "mov.d      %[ftmp4],   %[ftmp2]                                \n\t"
287        "mov.d      %[ftmp5],   %[ftmp3]                                \n\t"
288        MMI_LDXC1(%[ftmp6], %[addr0], %[quant], 0x00)
289        MMI_LDXC1(%[ftmp7], %[addr0], %[quant], 0x08)
290        "pmullh     %[ftmp6],   %[ftmp6],       %[ftmp1]                \n\t"
291        "pmullh     %[ftmp7],   %[ftmp7],       %[ftmp1]                \n\t"
292        "pxor       %[ftmp8],   %[ftmp8],       %[ftmp8]                \n\t"
293        "pxor       %[ftmp9],   %[ftmp9],       %[ftmp9]                \n\t"
294        "pcmpgth    %[ftmp8],   %[ftmp8],       %[ftmp2]                \n\t"
295        "pcmpgth    %[ftmp9],   %[ftmp9],       %[ftmp3]                \n\t"
296        "pxor       %[ftmp2],   %[ftmp2],       %[ftmp8]                \n\t"
297        "pxor       %[ftmp3],   %[ftmp3],       %[ftmp9]                \n\t"
298        "psubh      %[ftmp2],   %[ftmp2],       %[ftmp8]                \n\t"
299        "psubh      %[ftmp3],   %[ftmp3],       %[ftmp9]                \n\t"
300        "paddh      %[ftmp2],   %[ftmp2],       %[ftmp2]                \n\t"
301        "paddh      %[ftmp3],   %[ftmp3],       %[ftmp3]                \n\t"
302        "paddh      %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t"
303        "paddh      %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
304        "pmullh     %[ftmp2],   %[ftmp2],       %[ftmp6]                \n\t"
305        "pmullh     %[ftmp3],   %[ftmp3],       %[ftmp7]                \n\t"
306        "pxor       %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
307        "pxor       %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
308        "pcmpeqh    %[ftmp6],   %[ftmp6],       %[ftmp4]                \n\t"
309        "dli        %[tmp0],    0x04                                    \n\t"
310        "pcmpeqh    %[ftmp7],   %[ftmp7],       %[ftmp5]                \n\t"
311        "dmtc1      %[tmp0],    %[ftmp4]                                \n\t"
312        "psrah      %[ftmp2],   %[ftmp2],       %[ftmp4]                \n\t"
313        "psrah      %[ftmp3],   %[ftmp3],       %[ftmp4]                \n\t"
314        "psubh      %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t"
315        "psubh      %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
316        "por        %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t"
317        "por        %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
318        "pxor       %[ftmp2],   %[ftmp2],       %[ftmp8]                \n\t"
319        "pxor       %[ftmp3],   %[ftmp3],       %[ftmp9]                \n\t"
320        "psubh      %[ftmp2],   %[ftmp2],       %[ftmp8]                \n\t"
321        "psubh      %[ftmp3],   %[ftmp3],       %[ftmp9]                \n\t"
322        "pandn      %[ftmp6],   %[ftmp6],       %[ftmp2]                \n\t"
323        "pandn      %[ftmp7],   %[ftmp7],       %[ftmp3]                \n\t"
324        MMI_SDXC1(%[ftmp6], %[addr0], %[block], 0x00)
325        MMI_SDXC1(%[ftmp7], %[addr0], %[block], 0x08)
326        PTR_ADDIU  "%[addr0],   %[addr0],       0x10                    \n\t"
327        "bltz       %[addr0],   1b                                      \n\t"
328        : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
329          [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
330          [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
331          [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
332          [ftmp8]"=&f"(ftmp[8]),            [ftmp9]"=&f"(ftmp[9]),
333          [tmp0]"=&r"(tmp[0]),
334          RESTRICT_ASM_ALL64
335          RESTRICT_ASM_ADDRT
336          [addr0]"=&r"(addr[0])
337        : [block]"r"((mips_reg)(block+nCoeffs)),
338          [quant]"r"((mips_reg)(quant_matrix+nCoeffs)),
339          [nCoeffs]"r"((mips_reg)(2*(-nCoeffs))),
340          [qscale]"r"(qscale)
341        : "memory"
342    );
343}
344
345void ff_dct_unquantize_mpeg2_intra_mmi(MpegEncContext *s, int16_t *block,
346        int n, int qscale)
347{
348    uint64_t nCoeffs;
349    const uint16_t *quant_matrix;
350    int block0;
351    double ftmp[10];
352    uint64_t tmp[1];
353    mips_reg addr[1];
354    DECLARE_VAR_ALL64;
355    DECLARE_VAR_ADDRT;
356
357    assert(s->block_last_index[n]>=0);
358
359    if (s->alternate_scan)
360        nCoeffs = 63;
361    else
362        nCoeffs = s->intra_scantable.raster_end[s->block_last_index[n]];
363
364    if (n < 4)
365        block0 = block[0] * s->y_dc_scale;
366    else
367        block0 = block[0] * s->c_dc_scale;
368
369    quant_matrix = s->intra_matrix;
370
371    __asm__ volatile (
372        "dli        %[tmp0],    0x0f                                    \n\t"
373        "pcmpeqh    %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
374        "mtc1       %[tmp0],    %[ftmp3]                                \n\t"
375        "mtc1       %[qscale],  %[ftmp9]                                \n\t"
376        "psrlh      %[ftmp0],   %[ftmp0],       %[ftmp3]                \n\t"
377        "packsswh   %[ftmp9],   %[ftmp9],       %[ftmp9]                \n\t"
378        "packsswh   %[ftmp9],   %[ftmp9],       %[ftmp9]                \n\t"
379        "or         %[addr0],   %[nCoeffs],     $0                      \n\t"
380        ".p2align   4                                                   \n\t"
381
382        "1:                                                             \n\t"
383        MMI_LDXC1(%[ftmp1], %[addr0], %[block], 0x00)
384        MMI_LDXC1(%[ftmp2], %[addr0], %[block], 0x08)
385        "mov.d      %[ftmp3],   %[ftmp1]                                \n\t"
386        "mov.d      %[ftmp4],   %[ftmp2]                                \n\t"
387        MMI_LDXC1(%[ftmp5], %[addr0], %[quant], 0x00)
388        MMI_LDXC1(%[ftmp6], %[addr0], %[quant], 0x08)
389        "pmullh     %[ftmp5],   %[ftmp5],       %[ftmp9]                \n\t"
390        "pmullh     %[ftmp6],   %[ftmp6],       %[ftmp9]                \n\t"
391        "pxor       %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
392        "pxor       %[ftmp8],   %[ftmp8],       %[ftmp8]                \n\t"
393        "pcmpgth    %[ftmp7],   %[ftmp7],       %[ftmp1]                \n\t"
394        "pcmpgth    %[ftmp8],   %[ftmp8],       %[ftmp2]                \n\t"
395        "pxor       %[ftmp1],   %[ftmp1],       %[ftmp7]                \n\t"
396        "pxor       %[ftmp2],   %[ftmp2],       %[ftmp8]                \n\t"
397        "psubh      %[ftmp1],   %[ftmp1],       %[ftmp7]                \n\t"
398        "psubh      %[ftmp2],   %[ftmp2],       %[ftmp8]                \n\t"
399        "pmullh     %[ftmp1],   %[ftmp1],       %[ftmp5]                \n\t"
400        "pmullh     %[ftmp2],   %[ftmp2],       %[ftmp6]                \n\t"
401        "pxor       %[ftmp5],   %[ftmp5],       %[ftmp5]                \n\t"
402        "pxor       %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
403        "pcmpeqh    %[ftmp5],   %[ftmp5],       %[ftmp3]                \n\t"
404        "dli        %[tmp0],    0x03                                    \n\t"
405        "pcmpeqh    %[ftmp6] ,  %[ftmp6],       %[ftmp4]                \n\t"
406        "mtc1       %[tmp0],    %[ftmp3]                                \n\t"
407        "psrah      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
408        "psrah      %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
409        "pxor       %[ftmp1],   %[ftmp1],       %[ftmp7]                \n\t"
410        "pxor       %[ftmp2],   %[ftmp2],       %[ftmp8]                \n\t"
411        "psubh      %[ftmp1],   %[ftmp1],       %[ftmp7]                \n\t"
412        "psubh      %[ftmp2],   %[ftmp2],       %[ftmp8]                \n\t"
413        "pandn      %[ftmp5],   %[ftmp5],       %[ftmp1]                \n\t"
414        "pandn      %[ftmp6],   %[ftmp6],       %[ftmp2]                \n\t"
415        MMI_SDXC1(%[ftmp5], %[addr0], %[block], 0x00)
416        MMI_SDXC1(%[ftmp6], %[addr0], %[block], 0x08)
417        PTR_ADDIU  "%[addr0],   %[addr0],       0x10                    \n\t"
418        "blez       %[addr0],   1b                                      \n\t"
419        : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
420          [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
421          [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
422          [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
423          [ftmp8]"=&f"(ftmp[8]),            [ftmp9]"=&f"(ftmp[9]),
424          [tmp0]"=&r"(tmp[0]),
425          RESTRICT_ASM_ALL64
426          RESTRICT_ASM_ADDRT
427          [addr0]"=&r"(addr[0])
428        : [block]"r"((mips_reg)(block+nCoeffs)),
429          [quant]"r"((mips_reg)(quant_matrix+nCoeffs)),
430          [nCoeffs]"r"((mips_reg)(2*(-nCoeffs))),
431          [qscale]"r"(qscale)
432        : "memory"
433    );
434
435    block[0]= block0;
436}
437
438void ff_denoise_dct_mmi(MpegEncContext *s, int16_t *block)
439{
440    const int intra = s->mb_intra;
441    int *sum = s->dct_error_sum[intra];
442    uint16_t *offset = s->dct_offset[intra];
443    double ftmp[8];
444    mips_reg addr[1];
445    DECLARE_VAR_ALL64;
446
447    s->dct_count[intra]++;
448
449    __asm__ volatile(
450        "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
451        "1:                                                             \n\t"
452        MMI_LDC1(%[ftmp1], %[block], 0x00)
453        "pxor       %[ftmp2],   %[ftmp2],       %[ftmp2]                \n\t"
454        MMI_LDC1(%[ftmp3], %[block], 0x08)
455        "pxor       %[ftmp4],   %[ftmp4],       %[ftmp4]                \n\t"
456        "pcmpgth    %[ftmp2],   %[ftmp2],       %[ftmp1]                \n\t"
457        "pcmpgth    %[ftmp4],   %[ftmp4],       %[ftmp3]                \n\t"
458        "pxor       %[ftmp1],   %[ftmp1],       %[ftmp2]                \n\t"
459        "pxor       %[ftmp3],   %[ftmp3],       %[ftmp4]                \n\t"
460        "psubh      %[ftmp1],   %[ftmp1],       %[ftmp2]                \n\t"
461        "psubh      %[ftmp3],   %[ftmp3],       %[ftmp4]                \n\t"
462        MMI_LDC1(%[ftmp6], %[offset], 0x00)
463        "mov.d      %[ftmp5],   %[ftmp1]                                \n\t"
464        "psubush    %[ftmp1],   %[ftmp1],       %[ftmp6]                \n\t"
465        MMI_LDC1(%[ftmp6], %[offset], 0x08)
466        "mov.d      %[ftmp7],   %[ftmp3]                                \n\t"
467        "psubush    %[ftmp3],   %[ftmp3],       %[ftmp6]                \n\t"
468        "pxor       %[ftmp1],   %[ftmp1],       %[ftmp2]                \n\t"
469        "pxor       %[ftmp3],   %[ftmp3],       %[ftmp4]                \n\t"
470        "psubh      %[ftmp1],   %[ftmp1],       %[ftmp2]                \n\t"
471        "psubh      %[ftmp3],   %[ftmp3],       %[ftmp4]                \n\t"
472        MMI_SDC1(%[ftmp1], %[block], 0x00)
473        MMI_SDC1(%[ftmp3], %[block], 0x08)
474        "mov.d      %[ftmp1],   %[ftmp5]                                \n\t"
475        "mov.d      %[ftmp3],   %[ftmp7]                                \n\t"
476        "punpcklhw  %[ftmp5],   %[ftmp5],       %[ftmp0]                \n\t"
477        "punpckhhw  %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
478        "punpcklhw  %[ftmp7],   %[ftmp7],       %[ftmp0]                \n\t"
479        "punpckhhw  %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
480        MMI_LDC1(%[ftmp2], %[sum], 0x00)
481        "paddw      %[ftmp5],   %[ftmp5],       %[ftmp2]                \n\t"
482        MMI_LDC1(%[ftmp2], %[sum], 0x08)
483        "paddw      %[ftmp1],   %[ftmp1],       %[ftmp2]                \n\t"
484        MMI_LDC1(%[ftmp2], %[sum], 0x10)
485        "paddw      %[ftmp7],   %[ftmp7],       %[ftmp2]                \n\t"
486        MMI_LDC1(%[ftmp2], %[sum], 0x18)
487        "paddw      %[ftmp3],   %[ftmp3],       %[ftmp2]                \n\t"
488        MMI_SDC1(%[ftmp5], %[sum], 0x00)
489        MMI_SDC1(%[ftmp1], %[sum], 0x08)
490        MMI_SDC1(%[ftmp7], %[sum], 0x10)
491        MMI_SDC1(%[ftmp3], %[sum], 0x18)
492        PTR_ADDIU  "%[block],   %[block],       0x10                    \n\t"
493        PTR_ADDIU  "%[sum],     %[sum],         0x20                    \n\t"
494        PTR_SUBU   "%[addr0],   %[block1],      %[block]                \n\t"
495        PTR_ADDIU  "%[offset],  %[offset],      0x10                    \n\t"
496        "bgtz       %[addr0],   1b                                      \n\t"
497        : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
498          [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
499          [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
500          [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
501          RESTRICT_ASM_ALL64
502          [addr0]"=&r"(addr[0]),
503          [block]"+&r"(block),              [sum]"+&r"(sum),
504          [offset]"+&r"(offset)
505        : [block1]"r"(block+64)
506        : "memory"
507    );
508}
509