1/* 2 * Loongson SIMD optimized h264pred 3 * 4 * Copyright (c) 2015 Loongson Technology Corporation Limited 5 * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn> 6 * Zhang Shuangshuang <zhangshuangshuang@ict.ac.cn> 7 * 8 * This file is part of FFmpeg. 9 * 10 * FFmpeg is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU Lesser General Public 12 * License as published by the Free Software Foundation; either 13 * version 2.1 of the License, or (at your option) any later version. 14 * 15 * FFmpeg is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * Lesser General Public License for more details. 19 * 20 * You should have received a copy of the GNU Lesser General Public 21 * License along with FFmpeg; if not, write to the Free Software 22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 23 */ 24 25#include "h264pred_mips.h" 26#include "libavcodec/bit_depth_template.c" 27#include "libavutil/mips/mmiutils.h" 28#include "constants.h" 29 30void ff_pred16x16_vertical_8_mmi(uint8_t *src, ptrdiff_t stride) 31{ 32 double ftmp[2]; 33 uint64_t tmp[1]; 34 DECLARE_VAR_ALL64; 35 36 __asm__ volatile ( 37 "dli %[tmp0], 0x08 \n\t" 38 MMI_LDC1(%[ftmp0], %[srcA], 0x00) 39 MMI_LDC1(%[ftmp1], %[srcA], 0x08) 40 41 "1: \n\t" 42 MMI_SDC1(%[ftmp0], %[src], 0x00) 43 MMI_SDC1(%[ftmp1], %[src], 0x08) 44 PTR_ADDU "%[src], %[src], %[stride] \n\t" 45 MMI_SDC1(%[ftmp0], %[src], 0x00) 46 MMI_SDC1(%[ftmp1], %[src], 0x08) 47 48 "daddi %[tmp0], %[tmp0], -0x01 \n\t" 49 PTR_ADDU "%[src], %[src], %[stride] \n\t" 50 "bnez %[tmp0], 1b \n\t" 51 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 52 [tmp0]"=&r"(tmp[0]), 53 RESTRICT_ASM_ALL64 54 [src]"+&r"(src) 55 : [stride]"r"((mips_reg)stride), [srcA]"r"((mips_reg)(src-stride)) 56 : "memory" 57 ); 58} 59 60void ff_pred16x16_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride) 61{ 62 uint64_t tmp[3]; 63 mips_reg addr[2]; 64 65 __asm__ volatile ( 66 PTR_ADDI "%[addr0], %[src], -0x01 \n\t" 67 PTR_ADDU "%[addr1], %[src], $0 \n\t" 68 "dli %[tmp2], 0x08 \n\t" 69 "1: \n\t" 70 "lbu %[tmp0], 0x00(%[addr0]) \n\t" 71 "dmul %[tmp1], %[tmp0], %[ff_pb_1] \n\t" 72 "swl %[tmp1], 0x07(%[addr1]) \n\t" 73 "swr %[tmp1], 0x00(%[addr1]) \n\t" 74 "swl %[tmp1], 0x0f(%[addr1]) \n\t" 75 "swr %[tmp1], 0x08(%[addr1]) \n\t" 76 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t" 77 PTR_ADDU "%[addr1], %[addr1], %[stride] \n\t" 78 "lbu %[tmp0], 0x00(%[addr0]) \n\t" 79 "dmul %[tmp1], %[tmp0], %[ff_pb_1] \n\t" 80 "swl %[tmp1], 0x07(%[addr1]) \n\t" 81 "swr %[tmp1], 0x00(%[addr1]) \n\t" 82 "swl %[tmp1], 0x0f(%[addr1]) \n\t" 83 "swr %[tmp1], 0x08(%[addr1]) \n\t" 84 "daddi %[tmp2], %[tmp2], -0x01 \n\t" 85 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t" 86 PTR_ADDU "%[addr1], %[addr1], %[stride] \n\t" 87 "bnez %[tmp2], 1b \n\t" 88 : [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]), 89 [tmp2]"=&r"(tmp[2]), 90 [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]) 91 : [src]"r"((mips_reg)src), [stride]"r"((mips_reg)stride), 92 [ff_pb_1]"r"(ff_pb_1) 93 : "memory" 94 ); 95} 96 97void ff_pred16x16_dc_8_mmi(uint8_t *src, ptrdiff_t stride) 98{ 99 uint64_t tmp[4]; 100 mips_reg addr[2]; 101 102 __asm__ volatile ( 103 PTR_ADDI "%[addr0], %[src], -0x01 \n\t" 104 "dli %[tmp0], 0x08 \n\t" 105 "xor %[tmp3], %[tmp3], %[tmp3] \n\t" 106 "1: \n\t" 107 "lbu %[tmp1], 0x00(%[addr0]) \n\t" 108 "daddu %[tmp3], %[tmp3], %[tmp1] \n\t" 109 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t" 110 "lbu %[tmp1], 0x00(%[addr0]) \n\t" 111 "daddi %[tmp0], %[tmp0], -0x01 \n\t" 112 "daddu %[tmp3], %[tmp3], %[tmp1] \n\t" 113 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t" 114 "bnez %[tmp0], 1b \n\t" 115 116 "dli %[tmp0], 0x08 \n\t" 117 PTR_SUBU "%[addr0], %[src], %[stride] \n\t" 118 "2: \n\t" 119 "lbu %[tmp1], 0x00(%[addr0]) \n\t" 120 "daddu %[tmp3], %[tmp3], %[tmp1] \n\t" 121 PTR_ADDIU "%[addr0], %[addr0], 0x01 \n\t" 122 "lbu %[tmp1], 0x00(%[addr0]) \n\t" 123 "daddi %[tmp0], %[tmp0], -0x01 \n\t" 124 "daddu %[tmp3], %[tmp3], %[tmp1] \n\t" 125 PTR_ADDIU "%[addr0], %[addr0], 0x01 \n\t" 126 "bnez %[tmp0], 2b \n\t" 127 128 "daddiu %[tmp3], %[tmp3], 0x10 \n\t" 129 "dsra %[tmp3], 0x05 \n\t" 130 "dmul %[tmp2], %[tmp3], %[ff_pb_1] \n\t" 131 PTR_ADDU "%[addr0], %[src], $0 \n\t" 132 "dli %[tmp0], 0x08 \n\t" 133 "3: \n\t" 134 "swl %[tmp2], 0x07(%[addr0]) \n\t" 135 "swr %[tmp2], 0x00(%[addr0]) \n\t" 136 "swl %[tmp2], 0x0f(%[addr0]) \n\t" 137 "swr %[tmp2], 0x08(%[addr0]) \n\t" 138 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t" 139 "swl %[tmp2], 0x07(%[addr0]) \n\t" 140 "swr %[tmp2], 0x00(%[addr0]) \n\t" 141 "swl %[tmp2], 0x0f(%[addr0]) \n\t" 142 "swr %[tmp2], 0x08(%[addr0]) \n\t" 143 "daddi %[tmp0], %[tmp0], -0x01 \n\t" 144 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t" 145 "bnez %[tmp0], 3b \n\t" 146 : [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]), 147 [tmp2]"=&r"(tmp[2]), [tmp3]"=&r"(tmp[3]), 148 [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]) 149 : [src]"r"((mips_reg)src), [stride]"r"((mips_reg)stride), 150 [ff_pb_1]"r"(ff_pb_1) 151 : "memory" 152 ); 153} 154 155void ff_pred8x8l_top_dc_8_mmi(uint8_t *src, int has_topleft, 156 int has_topright, ptrdiff_t stride) 157{ 158 double ftmp[11]; 159 mips_reg tmp[3]; 160 union av_intfloat64 dc; 161 DECLARE_VAR_ALL64; 162 DECLARE_VAR_ADDRT; 163 164 __asm__ volatile ( 165 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 166 MMI_ULDC1(%[ftmp10], %[srcA], 0x00) 167 MMI_ULDC1(%[ftmp9], %[src0], 0x00) 168 MMI_ULDC1(%[ftmp8], %[src1], 0x00) 169 170 "punpcklbh %[ftmp7], %[ftmp10], %[ftmp0] \n\t" 171 "punpckhbh %[ftmp6], %[ftmp10], %[ftmp0] \n\t" 172 "punpcklbh %[ftmp5], %[ftmp9], %[ftmp0] \n\t" 173 "punpckhbh %[ftmp4], %[ftmp9], %[ftmp0] \n\t" 174 "punpcklbh %[ftmp3], %[ftmp8], %[ftmp0] \n\t" 175 "punpckhbh %[ftmp2], %[ftmp8], %[ftmp0] \n\t" 176 "bnez %[has_topleft], 1f \n\t" 177 "pinsrh_0 %[ftmp7], %[ftmp7], %[ftmp5] \n\t" 178 179 "1: \n\t" 180 "bnez %[has_topright], 2f \n\t" 181 "dli %[tmp0], 0xa4 \n\t" 182 "mtc1 %[tmp0], %[ftmp1] \n\t" 183 "pshufh %[ftmp2], %[ftmp2], %[ftmp1] \n\t" 184 185 "2: \n\t" 186 "dli %[tmp0], 0x02 \n\t" 187 "mtc1 %[tmp0], %[ftmp1] \n\t" 188 "pmullh %[ftmp5], %[ftmp5], %[ff_pw_2] \n\t" 189 "pmullh %[ftmp4], %[ftmp4], %[ff_pw_2] \n\t" 190 "paddh %[ftmp7], %[ftmp7], %[ftmp5] \n\t" 191 "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t" 192 "paddh %[ftmp7], %[ftmp7], %[ftmp3] \n\t" 193 "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t" 194 "paddh %[ftmp7], %[ftmp7], %[ff_pw_2] \n\t" 195 "paddh %[ftmp6], %[ftmp6], %[ff_pw_2] \n\t" 196 "psrah %[ftmp7], %[ftmp7], %[ftmp1] \n\t" 197 "psrah %[ftmp6], %[ftmp6], %[ftmp1] \n\t" 198 "packushb %[ftmp9], %[ftmp7], %[ftmp6] \n\t" 199 "biadd %[ftmp10], %[ftmp9] \n\t" 200 "mfc1 %[tmp1], %[ftmp10] \n\t" 201 "addiu %[tmp1], %[tmp1], 0x04 \n\t" 202 "srl %[tmp1], %[tmp1], 0x03 \n\t" 203 "mul %[dc], %[tmp1], %[ff_pb_1] \n\t" 204 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 205 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 206 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 207 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 208 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), 209 [ftmp10]"=&f"(ftmp[10]), 210 [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]), 211 RESTRICT_ASM_ALL64 212 [dc]"=r"(dc.i) 213 : [srcA]"r"((mips_reg)(src-stride-1)), 214 [src0]"r"((mips_reg)(src-stride)), 215 [src1]"r"((mips_reg)(src-stride+1)), 216 [has_topleft]"r"(has_topleft), [has_topright]"r"(has_topright), 217 [ff_pb_1]"r"(ff_pb_1.i), [ff_pw_2]"f"(ff_pw_2.f) 218 : "memory" 219 ); 220 221 __asm__ volatile ( 222 "dli %[tmp0], 0x02 \n\t" 223 "punpcklwd %[ftmp0], %[dc], %[dc] \n\t" 224 225 "1: \n\t" 226 MMI_SDC1(%[ftmp0], %[src], 0x00) 227 MMI_SDXC1(%[ftmp0], %[src], %[stride], 0x00) 228 PTR_ADDU "%[src], %[src], %[stride] \n\t" 229 PTR_ADDU "%[src], %[src], %[stride] \n\t" 230 MMI_SDC1(%[ftmp0], %[src], 0x00) 231 MMI_SDXC1(%[ftmp0], %[src], %[stride], 0x00) 232 233 "daddi %[tmp0], %[tmp0], -0x01 \n\t" 234 PTR_ADDU "%[src], %[src], %[stride] \n\t" 235 PTR_ADDU "%[src], %[src], %[stride] \n\t" 236 "bnez %[tmp0], 1b \n\t" 237 : [ftmp0]"=&f"(ftmp[0]), [tmp0]"=&r"(tmp[0]), 238 RESTRICT_ASM_ALL64 239 RESTRICT_ASM_ADDRT 240 [src]"+&r"(src) 241 : [dc]"f"(dc.f), [stride]"r"((mips_reg)stride) 242 : "memory" 243 ); 244} 245 246void ff_pred8x8l_dc_8_mmi(uint8_t *src, int has_topleft, int has_topright, 247 ptrdiff_t stride) 248{ 249 uint32_t dc1, dc2; 250 double ftmp[14]; 251 mips_reg tmp[1]; 252 union av_intfloat64 dc; 253 254 const int l0 = ((has_topleft ? src[-1+-1*stride] : src[-1+0*stride]) + 2*src[-1+0*stride] + src[-1+1*stride] + 2) >> 2; 255 const int l1 = (src[-1+0*stride] + 2*src[-1+1*stride] + src[-1+2*stride] + 2) >> 2; 256 const int l2 = (src[-1+1*stride] + 2*src[-1+2*stride] + src[-1+3*stride] + 2) >> 2; 257 const int l3 = (src[-1+2*stride] + 2*src[-1+3*stride] + src[-1+4*stride] + 2) >> 2; 258 const int l4 = (src[-1+3*stride] + 2*src[-1+4*stride] + src[-1+5*stride] + 2) >> 2; 259 const int l5 = (src[-1+4*stride] + 2*src[-1+5*stride] + src[-1+6*stride] + 2) >> 2; 260 const int l6 = (src[-1+5*stride] + 2*src[-1+6*stride] + src[-1+7*stride] + 2) >> 2; 261 const int l7 = (src[-1+6*stride] + 2*src[-1+7*stride] + src[-1+7*stride] + 2) >> 2; 262 263 DECLARE_VAR_ALL64; 264 DECLARE_VAR_ADDRT; 265 266 __asm__ volatile ( 267 MMI_ULDC1(%[ftmp4], %[srcA], 0x00) 268 MMI_ULDC1(%[ftmp5], %[src0], 0x00) 269 MMI_ULDC1(%[ftmp6], %[src1], 0x00) 270 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 271 "dli %[tmp0], 0x03 \n\t" 272 "punpcklbh %[ftmp7], %[ftmp4], %[ftmp0] \n\t" 273 "punpckhbh %[ftmp8], %[ftmp4], %[ftmp0] \n\t" 274 "mtc1 %[tmp0], %[ftmp1] \n\t" 275 "punpcklbh %[ftmp9], %[ftmp5], %[ftmp0] \n\t" 276 "punpckhbh %[ftmp10], %[ftmp5], %[ftmp0] \n\t" 277 "punpcklbh %[ftmp11], %[ftmp6], %[ftmp0] \n\t" 278 "punpckhbh %[ftmp12], %[ftmp6], %[ftmp0] \n\t" 279 "pshufh %[ftmp3], %[ftmp8], %[ftmp1] \n\t" 280 "pshufh %[ftmp13], %[ftmp12], %[ftmp1] \n\t" 281 "pinsrh_3 %[ftmp8], %[ftmp8], %[ftmp13] \n\t" 282 "pinsrh_3 %[ftmp12], %[ftmp12], %[ftmp3] \n\t" 283 "bnez %[has_topleft], 1f \n\t" 284 "pinsrh_0 %[ftmp7], %[ftmp7], %[ftmp9] \n\t" 285 286 "1: \n\t" 287 "bnez %[has_topright], 2f \n\t" 288 "pshufh %[ftmp13], %[ftmp10], %[ftmp1] \n\t" 289 "pinsrh_3 %[ftmp8], %[ftmp8], %[ftmp13] \n\t" 290 291 "2: \n\t" 292 "dli %[tmp0], 0x02 \n\t" 293 "mtc1 %[tmp0], %[ftmp1] \n\t" 294 "pshufh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" 295 "pmullh %[ftmp9], %[ftmp9], %[ftmp2] \n\t" 296 "pmullh %[ftmp10], %[ftmp10], %[ftmp2] \n\t" 297 "paddh %[ftmp7], %[ftmp7], %[ftmp9] \n\t" 298 "paddh %[ftmp8], %[ftmp8], %[ftmp10] \n\t" 299 "paddh %[ftmp7], %[ftmp7], %[ftmp11] \n\t" 300 "paddh %[ftmp8], %[ftmp8], %[ftmp12] \n\t" 301 "paddh %[ftmp7], %[ftmp7], %[ftmp2] \n\t" 302 "paddh %[ftmp8], %[ftmp8], %[ftmp2] \n\t" 303 "psrah %[ftmp7], %[ftmp7], %[ftmp1] \n\t" 304 "psrah %[ftmp8], %[ftmp8], %[ftmp1] \n\t" 305 "packushb %[ftmp5], %[ftmp7], %[ftmp8] \n\t" 306 "biadd %[ftmp4], %[ftmp5] \n\t" 307 "mfc1 %[dc2], %[ftmp4] \n\t" 308 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 309 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 310 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 311 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 312 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), 313 [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), 314 [ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]), 315 [tmp0]"=&r"(tmp[0]), 316 RESTRICT_ASM_ALL64 317 [dc2]"=r"(dc2) 318 : [srcA]"r"((mips_reg)(src-stride-1)), 319 [src0]"r"((mips_reg)(src-stride)), 320 [src1]"r"((mips_reg)(src-stride+1)), 321 [has_topleft]"r"(has_topleft), [has_topright]"r"(has_topright) 322 : "memory" 323 ); 324 325 dc1 = l0+l1+l2+l3+l4+l5+l6+l7; 326 dc.i = ((dc1+dc2+8)>>4)*0x01010101U; 327 328 __asm__ volatile ( 329 "dli %[tmp0], 0x02 \n\t" 330 "punpcklwd %[ftmp0], %[dc], %[dc] \n\t" 331 332 "1: \n\t" 333 MMI_SDC1(%[ftmp0], %[src], 0x00) 334 MMI_SDXC1(%[ftmp0], %[src], %[stride], 0x00) 335 PTR_ADDU "%[src], %[src], %[stride] \n\t" 336 PTR_ADDU "%[src], %[src], %[stride] \n\t" 337 MMI_SDC1(%[ftmp0], %[src], 0x00) 338 MMI_SDXC1(%[ftmp0], %[src], %[stride], 0x00) 339 340 "daddi %[tmp0], %[tmp0], -0x01 \n\t" 341 PTR_ADDU "%[src], %[src], %[stride] \n\t" 342 PTR_ADDU "%[src], %[src], %[stride] \n\t" 343 "bnez %[tmp0], 1b \n\t" 344 : [ftmp0]"=&f"(ftmp[0]), [tmp0]"=&r"(tmp[0]), 345 RESTRICT_ASM_ALL64 346 RESTRICT_ASM_ADDRT 347 [src]"+&r"(src) 348 : [dc]"f"(dc.f), [stride]"r"((mips_reg)stride) 349 : "memory" 350 ); 351} 352 353void ff_pred8x8l_vertical_8_mmi(uint8_t *src, int has_topleft, 354 int has_topright, ptrdiff_t stride) 355{ 356 double ftmp[12]; 357 mips_reg tmp[1]; 358 DECLARE_VAR_ALL64; 359 360 __asm__ volatile ( 361 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 362 MMI_LDC1(%[ftmp3], %[srcA], 0x00) 363 MMI_LDC1(%[ftmp4], %[src0], 0x00) 364 MMI_LDC1(%[ftmp5], %[src1], 0x00) 365 "punpcklbh %[ftmp6], %[ftmp3], %[ftmp0] \n\t" 366 "punpckhbh %[ftmp7], %[ftmp3], %[ftmp0] \n\t" 367 "punpcklbh %[ftmp8], %[ftmp4], %[ftmp0] \n\t" 368 "punpckhbh %[ftmp9], %[ftmp4], %[ftmp0] \n\t" 369 "punpcklbh %[ftmp10], %[ftmp5], %[ftmp0] \n\t" 370 "punpckhbh %[ftmp11], %[ftmp5], %[ftmp0] \n\t" 371 "bnez %[has_topleft], 1f \n\t" 372 "pinsrh_0 %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 373 374 "1: \n\t" 375 "bnez %[has_topright], 2f \n\t" 376 "dli %[tmp0], 0xa4 \n\t" 377 "mtc1 %[tmp0], %[ftmp1] \n\t" 378 "pshufh %[ftmp11], %[ftmp11], %[ftmp1] \n\t" 379 380 "2: \n\t" 381 "dli %[tmp0], 0x02 \n\t" 382 "mtc1 %[tmp0], %[ftmp1] \n\t" 383 "pshufh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" 384 "pmullh %[ftmp8], %[ftmp8], %[ftmp2] \n\t" 385 "pmullh %[ftmp9], %[ftmp9], %[ftmp2] \n\t" 386 "paddh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 387 "paddh %[ftmp7], %[ftmp7], %[ftmp9] \n\t" 388 "paddh %[ftmp6], %[ftmp6], %[ftmp10] \n\t" 389 "paddh %[ftmp7], %[ftmp7], %[ftmp11] \n\t" 390 "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t" 391 "paddh %[ftmp7], %[ftmp7], %[ftmp2] \n\t" 392 "psrah %[ftmp6], %[ftmp6], %[ftmp1] \n\t" 393 "psrah %[ftmp7], %[ftmp7], %[ftmp1] \n\t" 394 "packushb %[ftmp4], %[ftmp6], %[ftmp7] \n\t" 395 MMI_SDC1(%[ftmp4], %[src], 0x00) 396 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 397 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 398 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 399 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 400 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), 401 [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), 402 [tmp0]"=&r"(tmp[0]), 403 RESTRICT_ASM_ALL64 404 [src]"=r"(src) 405 : [srcA]"r"((mips_reg)(src-stride-1)), 406 [src0]"r"((mips_reg)(src-stride)), 407 [src1]"r"((mips_reg)(src-stride+1)), 408 [has_topleft]"r"(has_topleft), [has_topright]"r"(has_topright) 409 : "memory" 410 ); 411 412 __asm__ volatile ( 413 "dli %[tmp0], 0x02 \n\t" 414 415 "1: \n\t" 416 MMI_SDC1(%[ftmp0], %[src], 0x00) 417 PTR_ADDU "%[src], %[src], %[stride] \n\t" 418 MMI_SDC1(%[ftmp0], %[src], 0x00) 419 PTR_ADDU "%[src], %[src], %[stride] \n\t" 420 MMI_SDC1(%[ftmp0], %[src], 0x00) 421 PTR_ADDU "%[src], %[src], %[stride] \n\t" 422 MMI_SDC1(%[ftmp0], %[src], 0x00) 423 424 "daddi %[tmp0], %[tmp0], -0x01 \n\t" 425 PTR_ADDU "%[src], %[src], %[stride] \n\t" 426 "bnez %[tmp0], 1b \n\t" 427 : [ftmp0]"=&f"(ftmp[0]), [tmp0]"=&r"(tmp[0]), 428 RESTRICT_ASM_ALL64 429 [src]"+&r"(src) 430 : [stride]"r"((mips_reg)stride) 431 : "memory" 432 ); 433} 434 435void ff_pred4x4_dc_8_mmi(uint8_t *src, const uint8_t *topright, 436 ptrdiff_t stride) 437{ 438 const int dc = (src[-stride] + src[1-stride] + src[2-stride] 439 + src[3-stride] + src[-1+0*stride] + src[-1+1*stride] 440 + src[-1+2*stride] + src[-1+3*stride] + 4) >>3; 441 uint64_t tmp[2]; 442 mips_reg addr[1]; 443 DECLARE_VAR_ADDRT; 444 445 __asm__ volatile ( 446 PTR_ADDU "%[tmp0], %[dc], $0 \n\t" 447 "dmul %[tmp1], %[tmp0], %[ff_pb_1] \n\t" 448 "xor %[addr0], %[addr0], %[addr0] \n\t" 449 MMI_SWX(%[tmp1], %[src], %[addr0], 0x00) 450 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t" 451 MMI_SWX(%[tmp1], %[src], %[addr0], 0x00) 452 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t" 453 MMI_SWX(%[tmp1], %[src], %[addr0], 0x00) 454 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t" 455 MMI_SWX(%[tmp1], %[src], %[addr0], 0x00) 456 : [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]), 457 RESTRICT_ASM_ADDRT 458 [addr0]"=&r"(addr[0]) 459 : [src]"r"((mips_reg)src), [stride]"r"((mips_reg)stride), 460 [dc]"r"(dc), [ff_pb_1]"r"(ff_pb_1) 461 : "memory" 462 ); 463} 464 465void ff_pred8x8_vertical_8_mmi(uint8_t *src, ptrdiff_t stride) 466{ 467 uint64_t tmp[2]; 468 mips_reg addr[2]; 469 470 __asm__ volatile ( 471 PTR_SUBU "%[addr0], %[src], %[stride] \n\t" 472 PTR_ADDU "%[addr1], %[src], $0 \n\t" 473 "ldl %[tmp0], 0x07(%[addr0]) \n\t" 474 "ldr %[tmp0], 0x00(%[addr0]) \n\t" 475 "dli %[tmp1], 0x04 \n\t" 476 "1: \n\t" 477 "sdl %[tmp0], 0x07(%[addr1]) \n\t" 478 "sdr %[tmp0], 0x00(%[addr1]) \n\t" 479 PTR_ADDU "%[addr1], %[stride] \n\t" 480 "sdl %[tmp0], 0x07(%[addr1]) \n\t" 481 "sdr %[tmp0], 0x00(%[addr1]) \n\t" 482 "daddi %[tmp1], -0x01 \n\t" 483 PTR_ADDU "%[addr1], %[stride] \n\t" 484 "bnez %[tmp1], 1b \n\t" 485 : [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]), 486 [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]) 487 : [src]"r"((mips_reg)src), [stride]"r"((mips_reg)stride) 488 : "memory" 489 ); 490} 491 492void ff_pred8x8_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride) 493{ 494 uint64_t tmp[3]; 495 mips_reg addr[2]; 496 497 __asm__ volatile ( 498 PTR_ADDI "%[addr0], %[src], -0x01 \n\t" 499 PTR_ADDU "%[addr1], %[src], $0 \n\t" 500 "dli %[tmp0], 0x04 \n\t" 501 "1: \n\t" 502 "lbu %[tmp1], 0x00(%[addr0]) \n\t" 503 "dmul %[tmp2], %[tmp1], %[ff_pb_1] \n\t" 504 "swl %[tmp2], 0x07(%[addr1]) \n\t" 505 "swr %[tmp2], 0x00(%[addr1]) \n\t" 506 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t" 507 PTR_ADDU "%[addr1], %[addr1], %[stride] \n\t" 508 "lbu %[tmp1], 0x00(%[addr0]) \n\t" 509 "dmul %[tmp2], %[tmp1], %[ff_pb_1] \n\t" 510 "swl %[tmp2], 0x07(%[addr1]) \n\t" 511 "swr %[tmp2], 0x00(%[addr1]) \n\t" 512 "daddi %[tmp0], %[tmp0], -0x01 \n\t" 513 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t" 514 PTR_ADDU "%[addr1], %[addr1], %[stride] \n\t" 515 "bnez %[tmp0], 1b \n\t" 516 : [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]), 517 [tmp2]"=&r"(tmp[2]), 518 [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]) 519 : [src]"r"((mips_reg)src), [stride]"r"((mips_reg)stride), 520 [ff_pb_1]"r"(ff_pb_1) 521 : "memory" 522 ); 523} 524 525void ff_pred8x8_top_dc_8_mmi(uint8_t *src, ptrdiff_t stride) 526{ 527 double ftmp[4]; 528 uint64_t tmp[1]; 529 mips_reg addr[1]; 530 DECLARE_VAR_ALL64; 531 532 __asm__ volatile ( 533 "dli %[tmp0], 0x02 \n\t" 534 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 535 PTR_SUBU "%[addr0], %[src], %[stride] \n\t" 536 MMI_LDC1(%[ftmp1], %[addr0], 0x00) 537 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" 538 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" 539 "biadd %[ftmp2], %[ftmp2] \n\t" 540 "biadd %[ftmp3], %[ftmp3] \n\t" 541 "mtc1 %[tmp0], %[ftmp1] \n\t" 542 "pshufh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 543 "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 544 "pshufh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 545 "paddush %[ftmp2], %[ftmp2], %[ftmp1] \n\t" 546 "paddush %[ftmp3], %[ftmp3], %[ftmp1] \n\t" 547 "mtc1 %[tmp0], %[ftmp1] \n\t" 548 "psrlh %[ftmp2], %[ftmp2], %[ftmp1] \n\t" 549 "psrlh %[ftmp3], %[ftmp3], %[ftmp1] \n\t" 550 "packushb %[ftmp1], %[ftmp2], %[ftmp3] \n\t" 551 MMI_SDC1(%[ftmp1], %[src], 0x00) 552 PTR_ADDU "%[src], %[src], %[stride] \n\t" 553 MMI_SDC1(%[ftmp1], %[src], 0x00) 554 PTR_ADDU "%[src], %[src], %[stride] \n\t" 555 MMI_SDC1(%[ftmp1], %[src], 0x00) 556 PTR_ADDU "%[src], %[src], %[stride] \n\t" 557 MMI_SDC1(%[ftmp1], %[src], 0x00) 558 PTR_ADDU "%[src], %[src], %[stride] \n\t" 559 MMI_SDC1(%[ftmp1], %[src], 0x00) 560 PTR_ADDU "%[src], %[src], %[stride] \n\t" 561 MMI_SDC1(%[ftmp1], %[src], 0x00) 562 PTR_ADDU "%[src], %[src], %[stride] \n\t" 563 MMI_SDC1(%[ftmp1], %[src], 0x00) 564 PTR_ADDU "%[src], %[src], %[stride] \n\t" 565 MMI_SDC1(%[ftmp1], %[src], 0x00) 566 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 567 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 568 [tmp0]"=&r"(tmp[0]), 569 RESTRICT_ASM_ALL64 570 [addr0]"=&r"(addr[0]), 571 [src]"+&r"(src) 572 : [stride]"r"((mips_reg)stride) 573 : "memory" 574 ); 575} 576 577void ff_pred8x8_dc_8_mmi(uint8_t *src, ptrdiff_t stride) 578{ 579 double ftmp[5]; 580 mips_reg addr[7]; 581 582 __asm__ volatile ( 583 "negu %[addr0], %[stride] \n\t" 584 PTR_ADDU "%[addr0], %[addr0], %[src] \n\t" 585 PTR_ADDIU "%[addr1], %[addr0], 0x04 \n\t" 586 "lbu %[addr2], 0x00(%[addr0]) \n\t" 587 PTR_ADDU "%[addr3], $0, %[addr2] \n\t" 588 PTR_ADDIU "%[addr0], 0x01 \n\t" 589 "lbu %[addr2], 0x00(%[addr1]) \n\t" 590 PTR_ADDU "%[addr4], $0, %[addr2] \n\t" 591 PTR_ADDIU "%[addr1], 0x01 \n\t" 592 "lbu %[addr2], 0x00(%[addr0]) \n\t" 593 PTR_ADDU "%[addr3], %[addr3], %[addr2] \n\t" 594 PTR_ADDIU "%[addr0], 0x01 \n\t" 595 "lbu %[addr2], 0x00(%[addr1]) \n\t" 596 PTR_ADDU "%[addr4], %[addr4], %[addr2] \n\t" 597 PTR_ADDIU "%[addr1], 0x01 \n\t" 598 "lbu %[addr2], 0x00(%[addr0]) \n\t" 599 PTR_ADDU "%[addr3], %[addr3], %[addr2] \n\t" 600 PTR_ADDIU "%[addr0], 0x01 \n\t" 601 "lbu %[addr2], 0x00(%[addr1]) \n\t" 602 PTR_ADDU "%[addr4], %[addr4], %[addr2] \n\t" 603 PTR_ADDIU "%[addr1], 0x01 \n\t" 604 "lbu %[addr2], 0x00(%[addr0]) \n\t" 605 PTR_ADDU "%[addr3], %[addr3], %[addr2] \n\t" 606 PTR_ADDIU "%[addr0], 0x01 \n\t" 607 "lbu %[addr2], 0x00(%[addr1]) \n\t" 608 PTR_ADDU "%[addr4], %[addr4], %[addr2] \n\t" 609 PTR_ADDIU "%[addr1], 0x01 \n\t" 610 "dli %[addr2], -0x01 \n\t" 611 PTR_ADDU "%[addr2], %[addr2], %[src] \n\t" 612 "lbu %[addr1], 0x00(%[addr2]) \n\t" 613 PTR_ADDU "%[addr5], $0, %[addr1] \n\t" 614 PTR_ADDU "%[addr2], %[addr2], %[stride] \n\t" 615 "lbu %[addr1], 0x00(%[addr2]) \n\t" 616 PTR_ADDU "%[addr5], %[addr5], %[addr1] \n\t" 617 PTR_ADDU "%[addr2], %[addr2], %[stride] \n\t" 618 "lbu %[addr1], 0x00(%[addr2]) \n\t" 619 PTR_ADDU "%[addr5], %[addr5], %[addr1] \n\t" 620 PTR_ADDU "%[addr2], %[addr2], %[stride] \n\t" 621 "lbu %[addr1], 0x00(%[addr2]) \n\t" 622 PTR_ADDU "%[addr5], %[addr5], %[addr1] \n\t" 623 PTR_ADDU "%[addr2], %[addr2], %[stride] \n\t" 624 "lbu %[addr1], 0x00(%[addr2]) \n\t" 625 PTR_ADDU "%[addr6], $0, %[addr1] \n\t" 626 PTR_ADDU "%[addr2], %[addr2], %[stride] \n\t" 627 "lbu %[addr1], 0x00(%[addr2]) \n\t" 628 PTR_ADDU "%[addr6], %[addr6], %[addr1] \n\t" 629 PTR_ADDU "%[addr2], %[addr2], %[stride] \n\t" 630 "lbu %[addr1], 0x00(%[addr2]) \n\t" 631 PTR_ADDU "%[addr6], %[addr6], %[addr1] \n\t" 632 PTR_ADDU "%[addr2], %[addr2], %[stride] \n\t" 633 "lbu %[addr1], 0x00(%[addr2]) \n\t" 634 PTR_ADDU "%[addr6], %[addr6], %[addr1] \n\t" 635 PTR_ADDU "%[addr3], %[addr3], %[addr5] \n\t" 636 PTR_ADDIU "%[addr3], %[addr3], 0x04 \n\t" 637 PTR_ADDIU "%[addr4], %[addr4], 0x02 \n\t" 638 PTR_ADDIU "%[addr1], %[addr6], 0x02 \n\t" 639 PTR_ADDU "%[addr2], %[addr4], %[addr1] \n\t" 640 PTR_SRL "%[addr3], 0x03 \n\t" 641 PTR_SRL "%[addr4], 0x02 \n\t" 642 PTR_SRL "%[addr1], 0x02 \n\t" 643 PTR_SRL "%[addr2], 0x03 \n\t" 644 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 645 "dmtc1 %[addr3], %[ftmp1] \n\t" 646 "pshufh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 647 "dmtc1 %[addr4], %[ftmp2] \n\t" 648 "pshufh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 649 "dmtc1 %[addr1], %[ftmp3] \n\t" 650 "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 651 "dmtc1 %[addr2], %[ftmp4] \n\t" 652 "pshufh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 653 "packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 654 "packushb %[ftmp2], %[ftmp3], %[ftmp4] \n\t" 655 PTR_ADDU "%[addr0], $0, %[src] \n\t" 656 MMI_SDC1(%[ftmp1], %[addr0], 0x00) 657 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t" 658 MMI_SDC1(%[ftmp1], %[addr0], 0x00) 659 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t" 660 MMI_SDC1(%[ftmp1], %[addr0], 0x00) 661 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t" 662 MMI_SDC1(%[ftmp1], %[addr0], 0x00) 663 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t" 664 MMI_SDC1(%[ftmp2], %[addr0], 0x00) 665 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t" 666 MMI_SDC1(%[ftmp2], %[addr0], 0x00) 667 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t" 668 MMI_SDC1(%[ftmp2], %[addr0], 0x00) 669 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t" 670 MMI_SDC1(%[ftmp2], %[addr0], 0x00) 671 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 672 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 673 [ftmp4]"=&f"(ftmp[4]), 674 [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]), 675 [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]), 676 [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]), 677 [addr6]"=&r"(addr[6]) 678 : [src]"r"((mips_reg)src), [stride]"r"((mips_reg)stride) 679 : "memory" 680 ); 681} 682 683void ff_pred8x16_vertical_8_mmi(uint8_t *src, ptrdiff_t stride) 684{ 685 double ftmp[1]; 686 uint64_t tmp[1]; 687 DECLARE_VAR_ALL64; 688 689 __asm__ volatile ( 690 MMI_LDC1(%[ftmp0], %[srcA], 0x00) 691 "dli %[tmp0], 0x04 \n\t" 692 693 "1: \n\t" 694 MMI_SDC1(%[ftmp0], %[src], 0x00) 695 PTR_ADDU "%[src], %[src], %[stride] \n\t" 696 MMI_SDC1(%[ftmp0], %[src], 0x00) 697 PTR_ADDU "%[src], %[src], %[stride] \n\t" 698 MMI_SDC1(%[ftmp0], %[src], 0x00) 699 PTR_ADDU "%[src], %[src], %[stride] \n\t" 700 MMI_SDC1(%[ftmp0], %[src], 0x00) 701 702 "daddi %[tmp0], %[tmp0], -0x01 \n\t" 703 PTR_ADDU "%[src], %[src], %[stride] \n\t" 704 "bnez %[tmp0], 1b \n\t" 705 : [ftmp0]"=&f"(ftmp[0]), 706 [tmp0]"=&r"(tmp[0]), 707 RESTRICT_ASM_ALL64 708 [src]"+&r"(src) 709 : [stride]"r"((mips_reg)stride), [srcA]"r"((mips_reg)(src-stride)) 710 : "memory" 711 ); 712} 713 714void ff_pred8x16_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride) 715{ 716 uint64_t tmp[3]; 717 mips_reg addr[2]; 718 719 __asm__ volatile ( 720 PTR_ADDI "%[addr0], %[src], -0x01 \n\t" 721 PTR_ADDU "%[addr1], %[src], $0 \n\t" 722 "dli %[tmp0], 0x08 \n\t" 723 "1: \n\t" 724 "lbu %[tmp1], 0x00(%[addr0]) \n\t" 725 "dmul %[tmp2], %[tmp1], %[ff_pb_1] \n\t" 726 "swl %[tmp2], 0x07(%[addr1]) \n\t" 727 "swr %[tmp2], 0x00(%[addr1]) \n\t" 728 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t" 729 PTR_ADDU "%[addr1], %[addr1], %[stride] \n\t" 730 "lbu %[tmp1], 0x00(%[addr0]) \n\t" 731 "dmul %[tmp2], %[tmp1], %[ff_pb_1] \n\t" 732 "swl %[tmp2], 0x07(%[addr1]) \n\t" 733 "swr %[tmp2], 0x00(%[addr1]) \n\t" 734 "daddi %[tmp0], %[tmp0], -0x01 \n\t" 735 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t" 736 PTR_ADDU "%[addr1], %[addr1], %[stride] \n\t" 737 "bnez %[tmp0], 1b \n\t" 738 : [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]), 739 [tmp2]"=&r"(tmp[2]), 740 [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]) 741 : [src]"r"((mips_reg)src), [stride]"r"((mips_reg)stride), 742 [ff_pb_1]"r"(ff_pb_1) 743 : "memory" 744 ); 745} 746 747static inline void pred16x16_plane_compat_mmi(uint8_t *src, int stride, 748 const int svq3, const int rv40) 749{ 750 double ftmp[11]; 751 uint64_t tmp[6]; 752 mips_reg addr[1]; 753 DECLARE_VAR_ALL64; 754 755 __asm__ volatile( 756 PTR_SUBU "%[addr0], %[src], %[stride] \n\t" 757 "dli %[tmp0], 0x20 \n\t" 758 "dmtc1 %[tmp0], %[ftmp4] \n\t" 759 MMI_ULDC1(%[ftmp0], %[addr0], -0x01) 760 MMI_ULDC1(%[ftmp2], %[addr0], 0x08) 761 "ssrld %[ftmp1], %[ftmp0], %[ftmp4] \n\t" 762 "ssrld %[ftmp3], %[ftmp2], %[ftmp4] \n\t" 763 "pxor %[ftmp4], %[ftmp4], %[ftmp4] \n\t" 764 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp4] \n\t" 765 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp4] \n\t" 766 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp4] \n\t" 767 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp4] \n\t" 768 "pmullh %[ftmp0], %[ftmp0], %[ff_pw_m8tom5] \n\t" 769 "pmullh %[ftmp1], %[ftmp1], %[ff_pw_m4tom1] \n\t" 770 "pmullh %[ftmp2], %[ftmp2], %[ff_pw_1to4] \n\t" 771 "pmullh %[ftmp3], %[ftmp3], %[ff_pw_5to8] \n\t" 772 "paddsh %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 773 "paddsh %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 774 "paddsh %[ftmp0], %[ftmp0], %[ftmp1] \n\t" 775 "dli %[tmp0], 0x0e \n\t" 776 "dmtc1 %[tmp0], %[ftmp4] \n\t" 777 "pshufh %[ftmp1], %[ftmp0], %[ftmp4] \n\t" 778 "paddsh %[ftmp0], %[ftmp0], %[ftmp1] \n\t" 779 "dli %[tmp0], 0x01 \n\t" 780 "dmtc1 %[tmp0], %[ftmp4] \n\t" 781 "pshufh %[ftmp1], %[ftmp0], %[ftmp4] \n\t" 782 "paddsh %[ftmp5], %[ftmp0], %[ftmp1] \n\t" 783 784 PTR_ADDIU "%[addr0], %[src], -0x01 \n\t" 785 PTR_SUBU "%[addr0], %[addr0], %[stride] \n\t" 786 "lbu %[tmp2], 0x00(%[addr0]) \n\t" 787 "lbu %[tmp5], 0x10(%[addr0]) \n\t" 788 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t" 789 "lbu %[tmp3], 0x00(%[addr0]) \n\t" 790 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t" 791 "lbu %[tmp4], 0x00(%[addr0]) \n\t" 792 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t" 793 "lbu %[tmp0], 0x00(%[addr0]) \n\t" 794 "dsll %[tmp3], %[tmp3], 0x10 \n\t" 795 "dsll %[tmp4], %[tmp4], 0x20 \n\t" 796 "dsll %[tmp0], %[tmp0], 0x30 \n\t" 797 "or %[tmp4], %[tmp4], %[tmp0] \n\t" 798 "or %[tmp2], %[tmp2], %[tmp3] \n\t" 799 "or %[tmp2], %[tmp2], %[tmp4] \n\t" 800 "dmtc1 %[tmp2], %[ftmp0] \n\t" 801 802 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t" 803 "lbu %[tmp2], 0x00(%[addr0]) \n\t" 804 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t" 805 "lbu %[tmp3], 0x00(%[addr0]) \n\t" 806 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t" 807 "lbu %[tmp4], 0x00(%[addr0]) \n\t" 808 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t" 809 "lbu %[tmp0], 0x00(%[addr0]) \n\t" 810 "dsll %[tmp3], %[tmp3], 0x10 \n\t" 811 "dsll %[tmp4], %[tmp4], 0x20 \n\t" 812 "dsll %[tmp0], %[tmp0], 0x30 \n\t" 813 "or %[tmp4], %[tmp4], %[tmp0] \n\t" 814 "or %[tmp2], %[tmp2], %[tmp3] \n\t" 815 "or %[tmp2], %[tmp2], %[tmp4] \n\t" 816 "dmtc1 %[tmp2], %[ftmp1] \n\t" 817 818 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t" 819 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t" 820 "lbu %[tmp2], 0x00(%[addr0]) \n\t" 821 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t" 822 "lbu %[tmp3], 0x00(%[addr0]) \n\t" 823 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t" 824 "lbu %[tmp4], 0x00(%[addr0]) \n\t" 825 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t" 826 "lbu %[tmp0], 0x00(%[addr0]) \n\t" 827 "dsll %[tmp3], %[tmp3], 0x10 \n\t" 828 "dsll %[tmp4], %[tmp4], 0x20 \n\t" 829 "dsll %[tmp0], %[tmp0], 0x30 \n\t" 830 "or %[tmp4], %[tmp4], %[tmp0] \n\t" 831 "or %[tmp2], %[tmp2], %[tmp3] \n\t" 832 "or %[tmp2], %[tmp2], %[tmp4] \n\t" 833 "dmtc1 %[tmp2], %[ftmp2] \n\t" 834 835 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t" 836 "lbu %[tmp2], 0x00(%[addr0]) \n\t" 837 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t" 838 "lbu %[tmp3], 0x00(%[addr0]) \n\t" 839 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t" 840 "lbu %[tmp4], 0x00(%[addr0]) \n\t" 841 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t" 842 "lbu %[tmp0], 0x00(%[addr0]) \n\t" 843 "daddu %[tmp5], %[tmp5], %[tmp0] \n\t" 844 "daddiu %[tmp5], %[tmp5], 0x01 \n\t" 845 "dsll %[tmp5], %[tmp5], 0x04 \n\t" 846 847 "dsll %[tmp3], %[tmp3], 0x10 \n\t" 848 "dsll %[tmp4], %[tmp4], 0x20 \n\t" 849 "dsll %[tmp0], %[tmp0], 0x30 \n\t" 850 "or %[tmp4], %[tmp4], %[tmp0] \n\t" 851 "or %[tmp2], %[tmp2], %[tmp3] \n\t" 852 "or %[tmp2], %[tmp2], %[tmp4] \n\t" 853 "dmtc1 %[tmp2], %[ftmp3] \n\t" 854 855 "pmullh %[ftmp0], %[ftmp0], %[ff_pw_m8tom5] \n\t" 856 "pmullh %[ftmp1], %[ftmp1], %[ff_pw_m4tom1] \n\t" 857 "pmullh %[ftmp2], %[ftmp2], %[ff_pw_1to4] \n\t" 858 "pmullh %[ftmp3], %[ftmp3], %[ff_pw_5to8] \n\t" 859 "paddsh %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 860 "paddsh %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 861 "paddsh %[ftmp0], %[ftmp0], %[ftmp1] \n\t" 862 "dli %[tmp0], 0x0e \n\t" 863 "dmtc1 %[tmp0], %[ftmp4] \n\t" 864 "pshufh %[ftmp1], %[ftmp0], %[ftmp4] \n\t" 865 "paddsh %[ftmp0], %[ftmp0], %[ftmp1] \n\t" 866 867 "dli %[tmp0], 0x01 \n\t" 868 "dmtc1 %[tmp0], %[ftmp4] \n\t" 869 "pshufh %[ftmp1], %[ftmp0], %[ftmp4] \n\t" 870 "paddsh %[ftmp6], %[ftmp0], %[ftmp1] \n\t" 871 872 "dmfc1 %[tmp0], %[ftmp5] \n\t" 873 "dsll %[tmp0], %[tmp0], 0x30 \n\t" 874 "dsra %[tmp0], %[tmp0], 0x30 \n\t" 875 "dmfc1 %[tmp1], %[ftmp6] \n\t" 876 "dsll %[tmp1], %[tmp1], 0x30 \n\t" 877 "dsra %[tmp1], %[tmp1], 0x30 \n\t" 878 879 "beqz %[svq3], 1f \n\t" 880 "dli %[tmp2], 0x04 \n\t" 881 "ddiv %[tmp0], %[tmp0], %[tmp2] \n\t" 882 "ddiv %[tmp1], %[tmp1], %[tmp2] \n\t" 883 "dli %[tmp2], 0x05 \n\t" 884 "dmul %[tmp0], %[tmp0], %[tmp2] \n\t" 885 "dmul %[tmp1], %[tmp1], %[tmp2] \n\t" 886 "dli %[tmp2], 0x10 \n\t" 887 "ddiv %[tmp0], %[tmp0], %[tmp2] \n\t" 888 "ddiv %[tmp1], %[tmp1], %[tmp2] \n\t" 889 "daddu %[tmp2], %[tmp0], $0 \n\t" 890 "daddu %[tmp0], %[tmp1], $0 \n\t" 891 "daddu %[tmp1], %[tmp2], $0 \n\t" 892 "b 2f \n\t" 893 894 "1: \n\t" 895 "beqz %[rv40], 1f \n\t" 896 "dsra %[tmp2], %[tmp0], 0x02 \n\t" 897 "daddu %[tmp0], %[tmp0], %[tmp2] \n\t" 898 "dsra %[tmp2], %[tmp1], 0x02 \n\t" 899 "daddu %[tmp1], %[tmp1], %[tmp2] \n\t" 900 "dsra %[tmp0], %[tmp0], 0x04 \n\t" 901 "dsra %[tmp1], %[tmp1], 0x04 \n\t" 902 "b 2f \n\t" 903 904 "1: \n\t" 905 "dli %[tmp2], 0x05 \n\t" 906 "dmul %[tmp0], %[tmp0], %[tmp2] \n\t" 907 "dmul %[tmp1], %[tmp1], %[tmp2] \n\t" 908 "daddiu %[tmp0], %[tmp0], 0x20 \n\t" 909 "daddiu %[tmp1], %[tmp1], 0x20 \n\t" 910 "dsra %[tmp0], %[tmp0], 0x06 \n\t" 911 "dsra %[tmp1], %[tmp1], 0x06 \n\t" 912 913 "2: \n\t" 914 "daddu %[tmp3], %[tmp0], %[tmp1] \n\t" 915 "dli %[tmp2], 0x07 \n\t" 916 "dmul %[tmp3], %[tmp3], %[tmp2] \n\t" 917 "dsubu %[tmp5], %[tmp5], %[tmp3] \n\t" 918 919 "pxor %[ftmp4], %[ftmp4], %[ftmp4] \n\t" 920 "dmtc1 %[tmp0], %[ftmp0] \n\t" 921 "pshufh %[ftmp0], %[ftmp0], %[ftmp4] \n\t" 922 "dmtc1 %[tmp1], %[ftmp5] \n\t" 923 "pshufh %[ftmp5], %[ftmp5], %[ftmp4] \n\t" 924 "dmtc1 %[tmp5], %[ftmp6] \n\t" 925 "pshufh %[ftmp6], %[ftmp6], %[ftmp4] \n\t" 926 "dli %[tmp0], 0x05 \n\t" 927 "dmtc1 %[tmp0], %[ftmp7] \n\t" 928 "pmullh %[ftmp1], %[ff_pw_0to3], %[ftmp0] \n\t" 929 "dmtc1 %[ff_pw_4to7], %[ftmp2] \n\t" 930 "pmullh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 931 "dmtc1 %[ff_pw_8tob], %[ftmp3] \n\t" 932 "pmullh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 933 "dmtc1 %[ff_pw_ctof], %[ftmp4] \n\t" 934 "pmullh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 935 936 "dli %[tmp0], 0x10 \n\t" 937 PTR_ADDU "%[addr0], %[src], $0 \n\t" 938 "1: \n\t" 939 "paddsh %[ftmp8], %[ftmp1], %[ftmp6] \n\t" 940 "psrah %[ftmp8], %[ftmp8], %[ftmp7] \n\t" 941 "paddsh %[ftmp9], %[ftmp2], %[ftmp6] \n\t" 942 "psrah %[ftmp9], %[ftmp9], %[ftmp7] \n\t" 943 "packushb %[ftmp0], %[ftmp8], %[ftmp9] \n\t" 944 MMI_SDC1(%[ftmp0], %[addr0], 0x00) 945 946 "paddsh %[ftmp8], %[ftmp3], %[ftmp6] \n\t" 947 "psrah %[ftmp8], %[ftmp8], %[ftmp7] \n\t" 948 "paddsh %[ftmp9], %[ftmp4], %[ftmp6] \n\t" 949 "psrah %[ftmp9], %[ftmp9], %[ftmp7] \n\t" 950 "packushb %[ftmp0], %[ftmp8], %[ftmp9] \n\t" 951 MMI_SDC1(%[ftmp0], %[addr0], 0x08) 952 953 "paddsh %[ftmp6], %[ftmp6], %[ftmp5] \n\t" 954 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t" 955 "daddiu %[tmp0], %[tmp0], -0x01 \n\t" 956 "bnez %[tmp0], 1b \n\t" 957 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 958 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 959 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 960 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 961 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), 962 [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]), 963 [tmp2]"=&r"(tmp[2]), [tmp3]"=&r"(tmp[3]), 964 [tmp4]"=&r"(tmp[4]), [tmp5]"=&r"(tmp[5]), 965 RESTRICT_ASM_ALL64 966 [addr0]"=&r"(addr[0]) 967 : [src]"r"(src), [stride]"r"((mips_reg)stride), 968 [svq3]"r"(svq3), [rv40]"r"(rv40), 969 [ff_pw_m8tom5]"f"(ff_pw_m8tom5.f),[ff_pw_m4tom1]"f"(ff_pw_m4tom1.f), 970 [ff_pw_1to4]"f"(ff_pw_1to4.f), [ff_pw_5to8]"f"(ff_pw_5to8.f), 971 [ff_pw_0to3]"f"(ff_pw_0to3.f), [ff_pw_4to7]"r"(ff_pw_4to7.i), 972 [ff_pw_8tob]"r"(ff_pw_8tob.i), [ff_pw_ctof]"r"(ff_pw_ctof.i) 973 : "memory" 974 ); 975} 976 977void ff_pred16x16_plane_h264_8_mmi(uint8_t *src, ptrdiff_t stride) 978{ 979 pred16x16_plane_compat_mmi(src, stride, 0, 0); 980} 981 982void ff_pred16x16_plane_svq3_8_mmi(uint8_t *src, ptrdiff_t stride) 983{ 984 pred16x16_plane_compat_mmi(src, stride, 1, 0); 985} 986 987void ff_pred16x16_plane_rv40_8_mmi(uint8_t *src, ptrdiff_t stride) 988{ 989 pred16x16_plane_compat_mmi(src, stride, 0, 1); 990} 991