1/* 2 * Loongson SIMD optimized mpegvideo 3 * 4 * Copyright (c) 2015 Loongson Technology Corporation Limited 5 * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn> 6 * Zhang Shuangshuang <zhangshuangshuang@ict.ac.cn> 7 * 8 * This file is part of FFmpeg. 9 * 10 * FFmpeg is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU Lesser General Public 12 * License as published by the Free Software Foundation; either 13 * version 2.1 of the License, or (at your option) any later version. 14 * 15 * FFmpeg is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * Lesser General Public License for more details. 19 * 20 * You should have received a copy of the GNU Lesser General Public 21 * License along with FFmpeg; if not, write to the Free Software 22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 23 */ 24 25#include "mpegvideo_mips.h" 26#include "libavutil/mips/mmiutils.h" 27 28void ff_dct_unquantize_h263_intra_mmi(MpegEncContext *s, int16_t *block, 29 int n, int qscale) 30{ 31 int64_t level, nCoeffs; 32 double ftmp[6]; 33 mips_reg addr[1]; 34 union mmi_intfloat64 qmul_u, qadd_u; 35 DECLARE_VAR_ALL64; 36 37 qmul_u.i = qscale << 1; 38 av_assert2(s->block_last_index[n]>=0 || s->h263_aic); 39 40 if (!s->h263_aic) { 41 if (n<4) 42 level = block[0] * s->y_dc_scale; 43 else 44 level = block[0] * s->c_dc_scale; 45 qadd_u.i = (qscale-1) | 1; 46 } else { 47 qadd_u.i = 0; 48 level = block[0]; 49 } 50 51 if(s->ac_pred) 52 nCoeffs = 63; 53 else 54 nCoeffs = s->inter_scantable.raster_end[s->block_last_index[n]]; 55 56 __asm__ volatile ( 57 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 58 "packsswh %[qmul], %[qmul], %[qmul] \n\t" 59 "packsswh %[qmul], %[qmul], %[qmul] \n\t" 60 "packsswh %[qadd], %[qadd], %[qadd] \n\t" 61 "packsswh %[qadd], %[qadd], %[qadd] \n\t" 62 "psubh %[ftmp0], %[ftmp0], %[qadd] \n\t" 63 "pxor %[ftmp5], %[ftmp5], %[ftmp5] \n\t" 64 ".p2align 4 \n\t" 65 66 "1: \n\t" 67 PTR_ADDU "%[addr0], %[block], %[nCoeffs] \n\t" 68 MMI_LDC1(%[ftmp1], %[addr0], 0x00) 69 MMI_LDC1(%[ftmp2], %[addr0], 0x08) 70 "mov.d %[ftmp3], %[ftmp1] \n\t" 71 "mov.d %[ftmp4], %[ftmp2] \n\t" 72 "pmullh %[ftmp1], %[ftmp1], %[qmul] \n\t" 73 "pmullh %[ftmp2], %[ftmp2], %[qmul] \n\t" 74 "pcmpgth %[ftmp3], %[ftmp3], %[ftmp5] \n\t" 75 "pcmpgth %[ftmp4], %[ftmp4], %[ftmp5] \n\t" 76 "pxor %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 77 "pxor %[ftmp2], %[ftmp2], %[ftmp4] \n\t" 78 "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 79 "paddh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 80 "pxor %[ftmp3], %[ftmp3], %[ftmp1] \n\t" 81 "pxor %[ftmp4], %[ftmp4], %[ftmp2] \n\t" 82 "pcmpeqh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 83 "pcmpeqh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 84 "pandn %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 85 "pandn %[ftmp2], %[ftmp2], %[ftmp4] \n\t" 86 PTR_ADDIU "%[nCoeffs], %[nCoeffs], 0x10 \n\t" 87 MMI_SDC1(%[ftmp1], %[addr0], 0x00) 88 MMI_SDC1(%[ftmp2], %[addr0], 0x08) 89 "blez %[nCoeffs], 1b \n\t" 90 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 91 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 92 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 93 RESTRICT_ASM_ALL64 94 [addr0]"=&r"(addr[0]) 95 : [block]"r"((mips_reg)(block+nCoeffs)), 96 [nCoeffs]"r"((mips_reg)(2*(-nCoeffs))), 97 [qmul]"f"(qmul_u.f), [qadd]"f"(qadd_u.f) 98 : "memory" 99 ); 100 101 block[0] = level; 102} 103 104void ff_dct_unquantize_h263_inter_mmi(MpegEncContext *s, int16_t *block, 105 int n, int qscale) 106{ 107 int64_t nCoeffs; 108 double ftmp[6]; 109 mips_reg addr[1]; 110 union mmi_intfloat64 qmul_u, qadd_u; 111 DECLARE_VAR_ALL64; 112 113 qmul_u.i = qscale << 1; 114 qadd_u.i = (qscale - 1) | 1; 115 av_assert2(s->block_last_index[n]>=0 || s->h263_aic); 116 nCoeffs = s->inter_scantable.raster_end[s->block_last_index[n]]; 117 118 __asm__ volatile ( 119 "packsswh %[qmul], %[qmul], %[qmul] \n\t" 120 "packsswh %[qmul], %[qmul], %[qmul] \n\t" 121 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 122 "packsswh %[qadd], %[qadd], %[qadd] \n\t" 123 "packsswh %[qadd], %[qadd], %[qadd] \n\t" 124 "psubh %[ftmp0], %[ftmp0], %[qadd] \n\t" 125 "pxor %[ftmp5], %[ftmp5], %[ftmp5] \n\t" 126 ".p2align 4 \n\t" 127 "1: \n\t" 128 PTR_ADDU "%[addr0], %[block], %[nCoeffs] \n\t" 129 MMI_LDC1(%[ftmp1], %[addr0], 0x00) 130 MMI_LDC1(%[ftmp2], %[addr0], 0x08) 131 "mov.d %[ftmp3], %[ftmp1] \n\t" 132 "mov.d %[ftmp4], %[ftmp2] \n\t" 133 "pmullh %[ftmp1], %[ftmp1], %[qmul] \n\t" 134 "pmullh %[ftmp2], %[ftmp2], %[qmul] \n\t" 135 "pcmpgth %[ftmp3], %[ftmp3], %[ftmp5] \n\t" 136 "pcmpgth %[ftmp4], %[ftmp4], %[ftmp5] \n\t" 137 "pxor %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 138 "pxor %[ftmp2], %[ftmp2], %[ftmp4] \n\t" 139 "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 140 "paddh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 141 "pxor %[ftmp3], %[ftmp3], %[ftmp1] \n\t" 142 "pxor %[ftmp4], %[ftmp4], %[ftmp2] \n\t" 143 "pcmpeqh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 144 "pcmpeqh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 145 "pandn %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 146 "pandn %[ftmp2], %[ftmp2], %[ftmp4] \n\t" 147 PTR_ADDIU "%[nCoeffs], %[nCoeffs], 0x10 \n\t" 148 MMI_SDC1(%[ftmp1], %[addr0], 0x00) 149 MMI_SDC1(%[ftmp2], %[addr0], 0x08) 150 "blez %[nCoeffs], 1b \n\t" 151 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 152 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 153 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 154 RESTRICT_ASM_ALL64 155 [addr0]"=&r"(addr[0]) 156 : [block]"r"((mips_reg)(block+nCoeffs)), 157 [nCoeffs]"r"((mips_reg)(2*(-nCoeffs))), 158 [qmul]"f"(qmul_u.f), [qadd]"f"(qadd_u.f) 159 : "memory" 160 ); 161} 162 163void ff_dct_unquantize_mpeg1_intra_mmi(MpegEncContext *s, int16_t *block, 164 int n, int qscale) 165{ 166 int64_t nCoeffs; 167 const uint16_t *quant_matrix; 168 int block0; 169 double ftmp[10]; 170 uint64_t tmp[1]; 171 mips_reg addr[1]; 172 DECLARE_VAR_ALL64; 173 DECLARE_VAR_ADDRT; 174 175 av_assert2(s->block_last_index[n]>=0); 176 nCoeffs = s->intra_scantable.raster_end[s->block_last_index[n]] + 1; 177 178 if (n<4) 179 block0 = block[0] * s->y_dc_scale; 180 else 181 block0 = block[0] * s->c_dc_scale; 182 183 /* XXX: only mpeg1 */ 184 quant_matrix = s->intra_matrix; 185 186 __asm__ volatile ( 187 "dli %[tmp0], 0x0f \n\t" 188 "pcmpeqh %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 189 "dmtc1 %[tmp0], %[ftmp4] \n\t" 190 "dmtc1 %[qscale], %[ftmp1] \n\t" 191 "psrlh %[ftmp0], %[ftmp0], %[ftmp4] \n\t" 192 "packsswh %[ftmp1], %[ftmp1], %[ftmp1] \n\t" 193 "packsswh %[ftmp1], %[ftmp1], %[ftmp1] \n\t" 194 "or %[addr0], %[nCoeffs], $0 \n\t" 195 ".p2align 4 \n\t" 196 197 "1: \n\t" 198 MMI_LDXC1(%[ftmp2], %[addr0], %[block], 0x00) 199 MMI_LDXC1(%[ftmp3], %[addr0], %[block], 0x08) 200 "mov.d %[ftmp4], %[ftmp2] \n\t" 201 "mov.d %[ftmp5], %[ftmp3] \n\t" 202 MMI_LDXC1(%[ftmp6], %[addr0], %[quant], 0x00) 203 MMI_LDXC1(%[ftmp7], %[addr0], %[quant], 0x08) 204 "pmullh %[ftmp6], %[ftmp6], %[ftmp1] \n\t" 205 "pmullh %[ftmp7], %[ftmp7], %[ftmp1] \n\t" 206 "pxor %[ftmp8], %[ftmp8], %[ftmp8] \n\t" 207 "pxor %[ftmp9], %[ftmp9], %[ftmp9] \n\t" 208 "pcmpgth %[ftmp8], %[ftmp8], %[ftmp2] \n\t" 209 "pcmpgth %[ftmp9], %[ftmp9], %[ftmp3] \n\t" 210 "pxor %[ftmp2], %[ftmp2], %[ftmp8] \n\t" 211 "pxor %[ftmp3], %[ftmp3], %[ftmp9] \n\t" 212 "psubh %[ftmp2], %[ftmp2], %[ftmp8] \n\t" 213 "psubh %[ftmp3], %[ftmp3], %[ftmp9] \n\t" 214 "pmullh %[ftmp2], %[ftmp2], %[ftmp6] \n\t" 215 "pmullh %[ftmp3], %[ftmp3], %[ftmp7] \n\t" 216 "pxor %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 217 "pxor %[ftmp7], %[ftmp7], %[ftmp7] \n\t" 218 "pcmpeqh %[ftmp6], %[ftmp6], %[ftmp4] \n\t" 219 "dli %[tmp0], 0x03 \n\t" 220 "pcmpeqh %[ftmp7], %[ftmp7], %[ftmp5] \n\t" 221 "dmtc1 %[tmp0], %[ftmp4] \n\t" 222 "psrah %[ftmp2], %[ftmp2], %[ftmp4] \n\t" 223 "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t" 224 "psubh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 225 "psubh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 226 "por %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 227 "por %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 228 "pxor %[ftmp2], %[ftmp2], %[ftmp8] \n\t" 229 "pxor %[ftmp3], %[ftmp3], %[ftmp9] \n\t" 230 "psubh %[ftmp2], %[ftmp2], %[ftmp8] \n\t" 231 "psubh %[ftmp3], %[ftmp3], %[ftmp9] \n\t" 232 "pandn %[ftmp6], %[ftmp6], %[ftmp2] \n\t" 233 "pandn %[ftmp7], %[ftmp7], %[ftmp3] \n\t" 234 MMI_SDXC1(%[ftmp6], %[addr0], %[block], 0x00) 235 MMI_SDXC1(%[ftmp7], %[addr0], %[block], 0x08) 236 PTR_ADDIU "%[addr0], %[addr0], 0x10 \n\t" 237 "bltz %[addr0], 1b \n\t" 238 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 239 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 240 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 241 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 242 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), 243 [tmp0]"=&r"(tmp[0]), 244 RESTRICT_ASM_ALL64 245 RESTRICT_ASM_ADDRT 246 [addr0]"=&r"(addr[0]) 247 : [block]"r"((mips_reg)(block+nCoeffs)), 248 [quant]"r"((mips_reg)(quant_matrix+nCoeffs)), 249 [nCoeffs]"r"((mips_reg)(2*(-nCoeffs))), 250 [qscale]"r"(qscale) 251 : "memory" 252 ); 253 254 block[0] = block0; 255} 256 257void ff_dct_unquantize_mpeg1_inter_mmi(MpegEncContext *s, int16_t *block, 258 int n, int qscale) 259{ 260 int64_t nCoeffs; 261 const uint16_t *quant_matrix; 262 double ftmp[10]; 263 uint64_t tmp[1]; 264 mips_reg addr[1]; 265 DECLARE_VAR_ALL64; 266 DECLARE_VAR_ADDRT; 267 268 av_assert2(s->block_last_index[n] >= 0); 269 nCoeffs = s->intra_scantable.raster_end[s->block_last_index[n]] + 1; 270 quant_matrix = s->inter_matrix; 271 272 __asm__ volatile ( 273 "dli %[tmp0], 0x0f \n\t" 274 "pcmpeqh %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 275 "dmtc1 %[tmp0], %[ftmp4] \n\t" 276 "dmtc1 %[qscale], %[ftmp1] \n\t" 277 "psrlh %[ftmp0], %[ftmp0], %[ftmp4] \n\t" 278 "packsswh %[ftmp1], %[ftmp1], %[ftmp1] \n\t" 279 "packsswh %[ftmp1], %[ftmp1], %[ftmp1] \n\t" 280 "or %[addr0], %[nCoeffs], $0 \n\t" 281 ".p2align 4 \n\t" 282 283 "1: \n\t" 284 MMI_LDXC1(%[ftmp2], %[addr0], %[block], 0x00) 285 MMI_LDXC1(%[ftmp3], %[addr0], %[block], 0x08) 286 "mov.d %[ftmp4], %[ftmp2] \n\t" 287 "mov.d %[ftmp5], %[ftmp3] \n\t" 288 MMI_LDXC1(%[ftmp6], %[addr0], %[quant], 0x00) 289 MMI_LDXC1(%[ftmp7], %[addr0], %[quant], 0x08) 290 "pmullh %[ftmp6], %[ftmp6], %[ftmp1] \n\t" 291 "pmullh %[ftmp7], %[ftmp7], %[ftmp1] \n\t" 292 "pxor %[ftmp8], %[ftmp8], %[ftmp8] \n\t" 293 "pxor %[ftmp9], %[ftmp9], %[ftmp9] \n\t" 294 "pcmpgth %[ftmp8], %[ftmp8], %[ftmp2] \n\t" 295 "pcmpgth %[ftmp9], %[ftmp9], %[ftmp3] \n\t" 296 "pxor %[ftmp2], %[ftmp2], %[ftmp8] \n\t" 297 "pxor %[ftmp3], %[ftmp3], %[ftmp9] \n\t" 298 "psubh %[ftmp2], %[ftmp2], %[ftmp8] \n\t" 299 "psubh %[ftmp3], %[ftmp3], %[ftmp9] \n\t" 300 "paddh %[ftmp2], %[ftmp2], %[ftmp2] \n\t" 301 "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t" 302 "paddh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 303 "paddh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 304 "pmullh %[ftmp2], %[ftmp2], %[ftmp6] \n\t" 305 "pmullh %[ftmp3], %[ftmp3], %[ftmp7] \n\t" 306 "pxor %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 307 "pxor %[ftmp7], %[ftmp7], %[ftmp7] \n\t" 308 "pcmpeqh %[ftmp6], %[ftmp6], %[ftmp4] \n\t" 309 "dli %[tmp0], 0x04 \n\t" 310 "pcmpeqh %[ftmp7], %[ftmp7], %[ftmp5] \n\t" 311 "dmtc1 %[tmp0], %[ftmp4] \n\t" 312 "psrah %[ftmp2], %[ftmp2], %[ftmp4] \n\t" 313 "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t" 314 "psubh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 315 "psubh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 316 "por %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 317 "por %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 318 "pxor %[ftmp2], %[ftmp2], %[ftmp8] \n\t" 319 "pxor %[ftmp3], %[ftmp3], %[ftmp9] \n\t" 320 "psubh %[ftmp2], %[ftmp2], %[ftmp8] \n\t" 321 "psubh %[ftmp3], %[ftmp3], %[ftmp9] \n\t" 322 "pandn %[ftmp6], %[ftmp6], %[ftmp2] \n\t" 323 "pandn %[ftmp7], %[ftmp7], %[ftmp3] \n\t" 324 MMI_SDXC1(%[ftmp6], %[addr0], %[block], 0x00) 325 MMI_SDXC1(%[ftmp7], %[addr0], %[block], 0x08) 326 PTR_ADDIU "%[addr0], %[addr0], 0x10 \n\t" 327 "bltz %[addr0], 1b \n\t" 328 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 329 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 330 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 331 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 332 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), 333 [tmp0]"=&r"(tmp[0]), 334 RESTRICT_ASM_ALL64 335 RESTRICT_ASM_ADDRT 336 [addr0]"=&r"(addr[0]) 337 : [block]"r"((mips_reg)(block+nCoeffs)), 338 [quant]"r"((mips_reg)(quant_matrix+nCoeffs)), 339 [nCoeffs]"r"((mips_reg)(2*(-nCoeffs))), 340 [qscale]"r"(qscale) 341 : "memory" 342 ); 343} 344 345void ff_dct_unquantize_mpeg2_intra_mmi(MpegEncContext *s, int16_t *block, 346 int n, int qscale) 347{ 348 uint64_t nCoeffs; 349 const uint16_t *quant_matrix; 350 int block0; 351 double ftmp[10]; 352 uint64_t tmp[1]; 353 mips_reg addr[1]; 354 DECLARE_VAR_ALL64; 355 DECLARE_VAR_ADDRT; 356 357 assert(s->block_last_index[n]>=0); 358 359 if (s->alternate_scan) 360 nCoeffs = 63; 361 else 362 nCoeffs = s->intra_scantable.raster_end[s->block_last_index[n]]; 363 364 if (n < 4) 365 block0 = block[0] * s->y_dc_scale; 366 else 367 block0 = block[0] * s->c_dc_scale; 368 369 quant_matrix = s->intra_matrix; 370 371 __asm__ volatile ( 372 "dli %[tmp0], 0x0f \n\t" 373 "pcmpeqh %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 374 "mtc1 %[tmp0], %[ftmp3] \n\t" 375 "mtc1 %[qscale], %[ftmp9] \n\t" 376 "psrlh %[ftmp0], %[ftmp0], %[ftmp3] \n\t" 377 "packsswh %[ftmp9], %[ftmp9], %[ftmp9] \n\t" 378 "packsswh %[ftmp9], %[ftmp9], %[ftmp9] \n\t" 379 "or %[addr0], %[nCoeffs], $0 \n\t" 380 ".p2align 4 \n\t" 381 382 "1: \n\t" 383 MMI_LDXC1(%[ftmp1], %[addr0], %[block], 0x00) 384 MMI_LDXC1(%[ftmp2], %[addr0], %[block], 0x08) 385 "mov.d %[ftmp3], %[ftmp1] \n\t" 386 "mov.d %[ftmp4], %[ftmp2] \n\t" 387 MMI_LDXC1(%[ftmp5], %[addr0], %[quant], 0x00) 388 MMI_LDXC1(%[ftmp6], %[addr0], %[quant], 0x08) 389 "pmullh %[ftmp5], %[ftmp5], %[ftmp9] \n\t" 390 "pmullh %[ftmp6], %[ftmp6], %[ftmp9] \n\t" 391 "pxor %[ftmp7], %[ftmp7], %[ftmp7] \n\t" 392 "pxor %[ftmp8], %[ftmp8], %[ftmp8] \n\t" 393 "pcmpgth %[ftmp7], %[ftmp7], %[ftmp1] \n\t" 394 "pcmpgth %[ftmp8], %[ftmp8], %[ftmp2] \n\t" 395 "pxor %[ftmp1], %[ftmp1], %[ftmp7] \n\t" 396 "pxor %[ftmp2], %[ftmp2], %[ftmp8] \n\t" 397 "psubh %[ftmp1], %[ftmp1], %[ftmp7] \n\t" 398 "psubh %[ftmp2], %[ftmp2], %[ftmp8] \n\t" 399 "pmullh %[ftmp1], %[ftmp1], %[ftmp5] \n\t" 400 "pmullh %[ftmp2], %[ftmp2], %[ftmp6] \n\t" 401 "pxor %[ftmp5], %[ftmp5], %[ftmp5] \n\t" 402 "pxor %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 403 "pcmpeqh %[ftmp5], %[ftmp5], %[ftmp3] \n\t" 404 "dli %[tmp0], 0x03 \n\t" 405 "pcmpeqh %[ftmp6] , %[ftmp6], %[ftmp4] \n\t" 406 "mtc1 %[tmp0], %[ftmp3] \n\t" 407 "psrah %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 408 "psrah %[ftmp2], %[ftmp2], %[ftmp3] \n\t" 409 "pxor %[ftmp1], %[ftmp1], %[ftmp7] \n\t" 410 "pxor %[ftmp2], %[ftmp2], %[ftmp8] \n\t" 411 "psubh %[ftmp1], %[ftmp1], %[ftmp7] \n\t" 412 "psubh %[ftmp2], %[ftmp2], %[ftmp8] \n\t" 413 "pandn %[ftmp5], %[ftmp5], %[ftmp1] \n\t" 414 "pandn %[ftmp6], %[ftmp6], %[ftmp2] \n\t" 415 MMI_SDXC1(%[ftmp5], %[addr0], %[block], 0x00) 416 MMI_SDXC1(%[ftmp6], %[addr0], %[block], 0x08) 417 PTR_ADDIU "%[addr0], %[addr0], 0x10 \n\t" 418 "blez %[addr0], 1b \n\t" 419 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 420 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 421 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 422 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 423 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), 424 [tmp0]"=&r"(tmp[0]), 425 RESTRICT_ASM_ALL64 426 RESTRICT_ASM_ADDRT 427 [addr0]"=&r"(addr[0]) 428 : [block]"r"((mips_reg)(block+nCoeffs)), 429 [quant]"r"((mips_reg)(quant_matrix+nCoeffs)), 430 [nCoeffs]"r"((mips_reg)(2*(-nCoeffs))), 431 [qscale]"r"(qscale) 432 : "memory" 433 ); 434 435 block[0]= block0; 436} 437 438void ff_denoise_dct_mmi(MpegEncContext *s, int16_t *block) 439{ 440 const int intra = s->mb_intra; 441 int *sum = s->dct_error_sum[intra]; 442 uint16_t *offset = s->dct_offset[intra]; 443 double ftmp[8]; 444 mips_reg addr[1]; 445 DECLARE_VAR_ALL64; 446 447 s->dct_count[intra]++; 448 449 __asm__ volatile( 450 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 451 "1: \n\t" 452 MMI_LDC1(%[ftmp1], %[block], 0x00) 453 "pxor %[ftmp2], %[ftmp2], %[ftmp2] \n\t" 454 MMI_LDC1(%[ftmp3], %[block], 0x08) 455 "pxor %[ftmp4], %[ftmp4], %[ftmp4] \n\t" 456 "pcmpgth %[ftmp2], %[ftmp2], %[ftmp1] \n\t" 457 "pcmpgth %[ftmp4], %[ftmp4], %[ftmp3] \n\t" 458 "pxor %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 459 "pxor %[ftmp3], %[ftmp3], %[ftmp4] \n\t" 460 "psubh %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 461 "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t" 462 MMI_LDC1(%[ftmp6], %[offset], 0x00) 463 "mov.d %[ftmp5], %[ftmp1] \n\t" 464 "psubush %[ftmp1], %[ftmp1], %[ftmp6] \n\t" 465 MMI_LDC1(%[ftmp6], %[offset], 0x08) 466 "mov.d %[ftmp7], %[ftmp3] \n\t" 467 "psubush %[ftmp3], %[ftmp3], %[ftmp6] \n\t" 468 "pxor %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 469 "pxor %[ftmp3], %[ftmp3], %[ftmp4] \n\t" 470 "psubh %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 471 "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t" 472 MMI_SDC1(%[ftmp1], %[block], 0x00) 473 MMI_SDC1(%[ftmp3], %[block], 0x08) 474 "mov.d %[ftmp1], %[ftmp5] \n\t" 475 "mov.d %[ftmp3], %[ftmp7] \n\t" 476 "punpcklhw %[ftmp5], %[ftmp5], %[ftmp0] \n\t" 477 "punpckhhw %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 478 "punpcklhw %[ftmp7], %[ftmp7], %[ftmp0] \n\t" 479 "punpckhhw %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 480 MMI_LDC1(%[ftmp2], %[sum], 0x00) 481 "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" 482 MMI_LDC1(%[ftmp2], %[sum], 0x08) 483 "paddw %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 484 MMI_LDC1(%[ftmp2], %[sum], 0x10) 485 "paddw %[ftmp7], %[ftmp7], %[ftmp2] \n\t" 486 MMI_LDC1(%[ftmp2], %[sum], 0x18) 487 "paddw %[ftmp3], %[ftmp3], %[ftmp2] \n\t" 488 MMI_SDC1(%[ftmp5], %[sum], 0x00) 489 MMI_SDC1(%[ftmp1], %[sum], 0x08) 490 MMI_SDC1(%[ftmp7], %[sum], 0x10) 491 MMI_SDC1(%[ftmp3], %[sum], 0x18) 492 PTR_ADDIU "%[block], %[block], 0x10 \n\t" 493 PTR_ADDIU "%[sum], %[sum], 0x20 \n\t" 494 PTR_SUBU "%[addr0], %[block1], %[block] \n\t" 495 PTR_ADDIU "%[offset], %[offset], 0x10 \n\t" 496 "bgtz %[addr0], 1b \n\t" 497 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 498 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 499 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 500 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 501 RESTRICT_ASM_ALL64 502 [addr0]"=&r"(addr[0]), 503 [block]"+&r"(block), [sum]"+&r"(sum), 504 [offset]"+&r"(offset) 505 : [block1]"r"(block+64) 506 : "memory" 507 ); 508} 509