1/* 2 * Loongson SIMD optimized h264qpel 3 * 4 * Copyright (c) 2015 Loongson Technology Corporation Limited 5 * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn> 6 * 7 * This file is part of FFmpeg. 8 * 9 * FFmpeg is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU Lesser General Public 11 * License as published by the Free Software Foundation; either 12 * version 2.1 of the License, or (at your option) any later version. 13 * 14 * FFmpeg is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * Lesser General Public License for more details. 18 * 19 * You should have received a copy of the GNU Lesser General Public 20 * License along with FFmpeg; if not, write to the Free Software 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 22 */ 23 24#include "h264dsp_mips.h" 25#include "hpeldsp_mips.h" 26#include "libavcodec/bit_depth_template.c" 27#include "libavutil/mips/mmiutils.h" 28 29static inline void copy_block4_mmi(uint8_t *dst, const uint8_t *src, 30 int dstStride, int srcStride, int h) 31{ 32 double ftmp[1]; 33 DECLARE_VAR_LOW32; 34 35 __asm__ volatile ( 36 "1: \n\t" 37 MMI_ULWC1(%[ftmp0], %[src], 0x00) 38 MMI_SWC1(%[ftmp0], %[dst], 0x00) 39 "addi %[h], %[h], -0x01 \n\t" 40 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 41 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 42 "bnez %[h], 1b \n\t" 43 : [ftmp0]"=&f"(ftmp[0]), 44 [dst]"+&r"(dst), [src]"+&r"(src), 45 RESTRICT_ASM_LOW32 46 [h]"+&r"(h) 47 : [dstStride]"r"((mips_reg)dstStride), 48 [srcStride]"r"((mips_reg)srcStride) 49 : "memory" 50 ); 51} 52 53static inline void copy_block8_mmi(uint8_t *dst, const uint8_t *src, 54 int dstStride, int srcStride, int h) 55{ 56 double ftmp[1]; 57 DECLARE_VAR_ALL64; 58 59 __asm__ volatile ( 60 "1: \n\t" 61 MMI_ULDC1(%[ftmp0], %[src], 0x00) 62 MMI_SDC1(%[ftmp0], %[dst], 0x00) 63 "addi %[h], %[h], -0x01 \n\t" 64 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 65 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 66 "bnez %[h], 1b \n\t" 67 : [ftmp0]"=&f"(ftmp[0]), 68 RESTRICT_ASM_ALL64 69 [dst]"+&r"(dst), [src]"+&r"(src), 70 [h]"+&r"(h) 71 : [dstStride]"r"((mips_reg)dstStride), 72 [srcStride]"r"((mips_reg)srcStride) 73 : "memory" 74 ); 75} 76 77static inline void copy_block16_mmi(uint8_t *dst, const uint8_t *src, 78 int dstStride, int srcStride, int h) 79{ 80 double ftmp[1]; 81 uint64_t tmp[1]; 82 DECLARE_VAR_ALL64; 83 84 __asm__ volatile ( 85 "1: \n\t" 86 MMI_ULDC1(%[ftmp0], %[src], 0x00) 87 "ldl %[tmp0], 0x0f(%[src]) \n\t" 88 "ldr %[tmp0], 0x08(%[src]) \n\t" 89 MMI_SDC1(%[ftmp0], %[dst], 0x00) 90 "sdl %[tmp0], 0x0f(%[dst]) \n\t" 91 "sdr %[tmp0], 0x08(%[dst]) \n\t" 92 "addi %[h], %[h], -0x01 \n\t" 93 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 94 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 95 "bnez %[h], 1b \n\t" 96 : [ftmp0]"=&f"(ftmp[0]), 97 [tmp0]"=&r"(tmp[0]), 98 RESTRICT_ASM_ALL64 99 [dst]"+&r"(dst), [src]"+&r"(src), 100 [h]"+&r"(h) 101 : [dstStride]"r"((mips_reg)dstStride), 102 [srcStride]"r"((mips_reg)srcStride) 103 : "memory" 104 ); 105} 106 107#define op2_avg(a, b) a = (((a)+CLIP(((b) + 512)>>10)+1)>>1) 108#define op2_put(a, b) a = CLIP(((b) + 512)>>10) 109static void put_h264_qpel4_h_lowpass_mmi(uint8_t *dst, const uint8_t *src, 110 int dstStride, int srcStride) 111{ 112 double ftmp[10]; 113 uint64_t tmp[1]; 114 DECLARE_VAR_LOW32; 115 116 __asm__ volatile ( 117 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 118 "dli %[tmp0], 0x04 \n\t" 119 "1: \n\t" 120 MMI_ULWC1(%[ftmp1], %[src], -0x02) 121 MMI_ULWC1(%[ftmp2], %[src], -0x01) 122 MMI_ULWC1(%[ftmp3], %[src], 0x00) 123 MMI_ULWC1(%[ftmp4], %[src], 0x01) 124 MMI_ULWC1(%[ftmp5], %[src], 0x02) 125 MMI_ULWC1(%[ftmp6], %[src], 0x03) 126 127 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 128 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 129 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 130 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 131 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" 132 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 133 "paddsh %[ftmp7], %[ftmp3], %[ftmp4] \n\t" 134 "paddsh %[ftmp8], %[ftmp2], %[ftmp5] \n\t" 135 "paddsh %[ftmp9], %[ftmp1], %[ftmp6] \n\t" 136 "pmullh %[ftmp7], %[ftmp7], %[ff_pw_20] \n\t" 137 "pmullh %[ftmp8], %[ftmp8], %[ff_pw_5] \n\t" 138 "psubsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t" 139 "paddsh %[ftmp9], %[ftmp7], %[ftmp9] \n\t" 140 "paddsh %[ftmp9], %[ftmp9], %[ff_pw_16] \n\t" 141 "psrah %[ftmp9], %[ftmp9], %[ff_pw_5] \n\t" 142 "packushb %[ftmp9], %[ftmp9], %[ftmp0] \n\t" 143 MMI_SWC1(%[ftmp9], %[dst], 0x00) 144 "daddi %[tmp0], %[tmp0], -0x01 \n\t" 145 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 146 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 147 "bnez %[tmp0], 1b \n\t" 148 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 149 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 150 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 151 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 152 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), 153 [tmp0]"=&r"(tmp[0]), 154 RESTRICT_ASM_LOW32 155 [dst]"+&r"(dst), [src]"+&r"(src) 156 : [dstStride]"r"((mips_reg)dstStride), 157 [srcStride]"r"((mips_reg)srcStride), 158 [ff_pw_20]"f"(ff_pw_20.f), [ff_pw_5]"f"(ff_pw_5.f), 159 [ff_pw_16]"f"(ff_pw_16.f) 160 : "memory" 161 ); 162} 163 164static void put_h264_qpel8_h_lowpass_mmi(uint8_t *dst, const uint8_t *src, 165 int dstStride, int srcStride) 166{ 167 double ftmp[11]; 168 uint64_t tmp[1]; 169 DECLARE_VAR_ALL64; 170 171 __asm__ volatile ( 172 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 173 "dli %[tmp0], 0x08 \n\t" 174 "1: \n\t" 175 MMI_ULDC1(%[ftmp1], %[src], -0x02) 176 MMI_ULDC1(%[ftmp2], %[src], -0x01) 177 MMI_ULDC1(%[ftmp3], %[src], 0x00) 178 MMI_ULDC1(%[ftmp4], %[src], 0x01) 179 MMI_ULDC1(%[ftmp5], %[src], 0x02) 180 MMI_ULDC1(%[ftmp6], %[src], 0x03) 181 "punpcklbh %[ftmp7], %[ftmp3], %[ftmp0] \n\t" 182 "punpckhbh %[ftmp8], %[ftmp3], %[ftmp0] \n\t" 183 "punpcklbh %[ftmp9], %[ftmp4], %[ftmp0] \n\t" 184 "punpckhbh %[ftmp10], %[ftmp4], %[ftmp0] \n\t" 185 "paddsh %[ftmp3], %[ftmp7], %[ftmp9] \n\t" 186 "paddsh %[ftmp4], %[ftmp8], %[ftmp10] \n\t" 187 "pmullh %[ftmp3], %[ftmp3], %[ff_pw_20] \n\t" 188 "pmullh %[ftmp4], %[ftmp4], %[ff_pw_20] \n\t" 189 "punpcklbh %[ftmp7], %[ftmp2], %[ftmp0] \n\t" 190 "punpckhbh %[ftmp8], %[ftmp2], %[ftmp0] \n\t" 191 "punpcklbh %[ftmp9], %[ftmp5], %[ftmp0] \n\t" 192 "punpckhbh %[ftmp10], %[ftmp5], %[ftmp0] \n\t" 193 "paddsh %[ftmp2], %[ftmp7], %[ftmp9] \n\t" 194 "paddsh %[ftmp5], %[ftmp8], %[ftmp10] \n\t" 195 "pmullh %[ftmp2], %[ftmp2], %[ff_pw_5] \n\t" 196 "pmullh %[ftmp5], %[ftmp5], %[ff_pw_5] \n\t" 197 "punpcklbh %[ftmp7], %[ftmp1], %[ftmp0] \n\t" 198 "punpckhbh %[ftmp8], %[ftmp1], %[ftmp0] \n\t" 199 "punpcklbh %[ftmp9], %[ftmp6], %[ftmp0] \n\t" 200 "punpckhbh %[ftmp10], %[ftmp6], %[ftmp0] \n\t" 201 "paddsh %[ftmp1], %[ftmp7], %[ftmp9] \n\t" 202 "paddsh %[ftmp6], %[ftmp8], %[ftmp10] \n\t" 203 "psubsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" 204 "psubsh %[ftmp4], %[ftmp4], %[ftmp5] \n\t" 205 "paddsh %[ftmp3], %[ftmp3], %[ftmp1] \n\t" 206 "paddsh %[ftmp4], %[ftmp4], %[ftmp6] \n\t" 207 "paddsh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t" 208 "paddsh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t" 209 "psrah %[ftmp3], %[ftmp3], %[ff_pw_5] \n\t" 210 "psrah %[ftmp4], %[ftmp4], %[ff_pw_5] \n\t" 211 "packushb %[ftmp9], %[ftmp3], %[ftmp4] \n\t" 212 MMI_SDC1(%[ftmp9], %[dst], 0x00) 213 "daddi %[tmp0], %[tmp0], -0x01 \n\t" 214 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 215 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 216 "bnez %[tmp0], 1b \n\t" 217 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 218 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 219 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 220 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 221 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), 222 [ftmp10]"=&f"(ftmp[10]), 223 [tmp0]"=&r"(tmp[0]), 224 RESTRICT_ASM_ALL64 225 [dst]"+&r"(dst), [src]"+&r"(src) 226 : [dstStride]"r"((mips_reg)dstStride), 227 [srcStride]"r"((mips_reg)srcStride), 228 [ff_pw_20]"f"(ff_pw_20.f), [ff_pw_5]"f"(ff_pw_5.f), 229 [ff_pw_16]"f"(ff_pw_16.f) 230 : "memory" 231 ); 232} 233 234static void put_h264_qpel16_h_lowpass_mmi(uint8_t *dst, const uint8_t *src, 235 int dstStride, int srcStride) 236{ 237 put_h264_qpel8_h_lowpass_mmi(dst, src, dstStride, srcStride); 238 put_h264_qpel8_h_lowpass_mmi(dst+8, src+8, dstStride, srcStride); 239 src += 8*srcStride; 240 dst += 8*dstStride; 241 put_h264_qpel8_h_lowpass_mmi(dst, src, dstStride, srcStride); 242 put_h264_qpel8_h_lowpass_mmi(dst+8, src+8, dstStride, srcStride); 243} 244 245static void avg_h264_qpel4_h_lowpass_mmi(uint8_t *dst, const uint8_t *src, 246 int dstStride, int srcStride) 247{ 248 double ftmp[11]; 249 uint64_t tmp[1]; 250 DECLARE_VAR_LOW32; 251 252 __asm__ volatile ( 253 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 254 "dli %[tmp0], 0x04 \n\t" 255 "1: \n\t" 256 MMI_ULWC1(%[ftmp1], %[src], -0x02) 257 MMI_ULWC1(%[ftmp2], %[src], -0x01) 258 MMI_ULWC1(%[ftmp3], %[src], 0x00) 259 MMI_ULWC1(%[ftmp4], %[src], 0x01) 260 MMI_ULWC1(%[ftmp5], %[src], 0x02) 261 MMI_ULWC1(%[ftmp6], %[src], 0x03) 262 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 263 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 264 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 265 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 266 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" 267 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 268 "paddsh %[ftmp7], %[ftmp3], %[ftmp4] \n\t" 269 "paddsh %[ftmp8], %[ftmp2], %[ftmp5] \n\t" 270 "paddsh %[ftmp9], %[ftmp1], %[ftmp6] \n\t" 271 "pmullh %[ftmp7], %[ftmp7], %[ff_pw_20] \n\t" 272 "pmullh %[ftmp8], %[ftmp8], %[ff_pw_5] \n\t" 273 "psubsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t" 274 "paddsh %[ftmp9], %[ftmp7], %[ftmp9] \n\t" 275 "paddsh %[ftmp9], %[ftmp9], %[ff_pw_16] \n\t" 276 "psrah %[ftmp9], %[ftmp9], %[ff_pw_5] \n\t" 277 "packushb %[ftmp9], %[ftmp9], %[ftmp0] \n\t" 278 MMI_LWC1(%[ftmp10], %[dst], 0x00) 279 "pavgb %[ftmp9], %[ftmp9], %[ftmp10] \n\t" 280 MMI_SWC1(%[ftmp9], %[dst], 0x00) 281 "daddi %[tmp0], %[tmp0], -0x01 \n\t" 282 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 283 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 284 "bnez %[tmp0], 1b \n\t" 285 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 286 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 287 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 288 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 289 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), 290 [ftmp10]"=&f"(ftmp[10]), 291 [tmp0]"=&r"(tmp[0]), 292 RESTRICT_ASM_LOW32 293 [dst]"+&r"(dst), [src]"+&r"(src) 294 : [dstStride]"r"((mips_reg)dstStride), 295 [srcStride]"r"((mips_reg)srcStride), 296 [ff_pw_20]"f"(ff_pw_20.f), [ff_pw_5]"f"(ff_pw_5.f), 297 [ff_pw_16]"f"(ff_pw_16.f) 298 : "memory" 299 ); 300} 301 302static void avg_h264_qpel8_h_lowpass_mmi(uint8_t *dst, const uint8_t *src, 303 int dstStride, int srcStride) 304{ 305 double ftmp[11]; 306 uint64_t tmp[1]; 307 DECLARE_VAR_ALL64; 308 309 __asm__ volatile ( 310 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 311 "dli %[tmp0], 0x08 \n\t" 312 "1: \n\t" 313 MMI_ULDC1(%[ftmp1], %[src], -0x02) 314 MMI_ULDC1(%[ftmp2], %[src], -0x01) 315 MMI_ULDC1(%[ftmp3], %[src], 0x00) 316 MMI_ULDC1(%[ftmp4], %[src], 0x01) 317 MMI_ULDC1(%[ftmp5], %[src], 0x02) 318 MMI_ULDC1(%[ftmp6], %[src], 0x03) 319 "punpcklbh %[ftmp7], %[ftmp3], %[ftmp0] \n\t" 320 "punpckhbh %[ftmp8], %[ftmp3], %[ftmp0] \n\t" 321 "punpcklbh %[ftmp9], %[ftmp4], %[ftmp0] \n\t" 322 "punpckhbh %[ftmp10], %[ftmp4], %[ftmp0] \n\t" 323 "paddsh %[ftmp3], %[ftmp7], %[ftmp9] \n\t" 324 "paddsh %[ftmp4], %[ftmp8], %[ftmp10] \n\t" 325 "pmullh %[ftmp3], %[ftmp3], %[ff_pw_20] \n\t" 326 "pmullh %[ftmp4], %[ftmp4], %[ff_pw_20] \n\t" 327 "punpcklbh %[ftmp7], %[ftmp2], %[ftmp0] \n\t" 328 "punpckhbh %[ftmp8], %[ftmp2], %[ftmp0] \n\t" 329 "punpcklbh %[ftmp9], %[ftmp5], %[ftmp0] \n\t" 330 "punpckhbh %[ftmp10], %[ftmp5], %[ftmp0] \n\t" 331 "paddsh %[ftmp2], %[ftmp7], %[ftmp9] \n\t" 332 "paddsh %[ftmp5], %[ftmp8], %[ftmp10] \n\t" 333 "pmullh %[ftmp2], %[ftmp2], %[ff_pw_5] \n\t" 334 "pmullh %[ftmp5], %[ftmp5], %[ff_pw_5] \n\t" 335 "punpcklbh %[ftmp7], %[ftmp1], %[ftmp0] \n\t" 336 "punpckhbh %[ftmp8], %[ftmp1], %[ftmp0] \n\t" 337 "punpcklbh %[ftmp9], %[ftmp6], %[ftmp0] \n\t" 338 "punpckhbh %[ftmp10], %[ftmp6], %[ftmp0] \n\t" 339 "paddsh %[ftmp1], %[ftmp7], %[ftmp9] \n\t" 340 "paddsh %[ftmp6], %[ftmp8], %[ftmp10] \n\t" 341 "psubsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" 342 "psubsh %[ftmp4], %[ftmp4], %[ftmp5] \n\t" 343 "paddsh %[ftmp3], %[ftmp3], %[ftmp1] \n\t" 344 "paddsh %[ftmp4], %[ftmp4], %[ftmp6] \n\t" 345 "paddsh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t" 346 "paddsh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t" 347 "psrah %[ftmp3], %[ftmp3], %[ff_pw_5] \n\t" 348 "psrah %[ftmp4], %[ftmp4], %[ff_pw_5] \n\t" 349 "packushb %[ftmp9], %[ftmp3], %[ftmp4] \n\t" 350 MMI_LDC1(%[ftmp10], %[dst], 0x00) 351 "pavgb %[ftmp9], %[ftmp9], %[ftmp10] \n\t" 352 MMI_SDC1(%[ftmp9], %[dst], 0x00) 353 "daddi %[tmp0], %[tmp0], -0x01 \n\t" 354 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 355 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 356 "bnez %[tmp0], 1b \n\t" 357 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 358 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 359 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 360 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 361 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), 362 [ftmp10]"=&f"(ftmp[10]), 363 [tmp0]"=&r"(tmp[0]), 364 RESTRICT_ASM_ALL64 365 [dst]"+&r"(dst), [src]"+&r"(src) 366 : [dstStride]"r"((mips_reg)dstStride), 367 [srcStride]"r"((mips_reg)srcStride), 368 [ff_pw_20]"f"(ff_pw_20.f), [ff_pw_5]"f"(ff_pw_5.f), 369 [ff_pw_16]"f"(ff_pw_16.f) 370 : "memory" 371 ); 372} 373 374static void avg_h264_qpel16_h_lowpass_mmi(uint8_t *dst, const uint8_t *src, 375 int dstStride, int srcStride) 376{ 377 avg_h264_qpel8_h_lowpass_mmi(dst, src, dstStride, srcStride); 378 avg_h264_qpel8_h_lowpass_mmi(dst+8, src+8, dstStride, srcStride); 379 src += 8*srcStride; 380 dst += 8*dstStride; 381 avg_h264_qpel8_h_lowpass_mmi(dst, src, dstStride, srcStride); 382 avg_h264_qpel8_h_lowpass_mmi(dst+8, src+8, dstStride, srcStride); 383} 384 385static void put_h264_qpel4_v_lowpass_mmi(uint8_t *dst, const uint8_t *src, 386 int dstStride, int srcStride) 387{ 388 double ftmp[12]; 389 uint64_t tmp[1]; 390 DECLARE_VAR_LOW32; 391 392 src -= 2 * srcStride; 393 394 __asm__ volatile ( 395 ".set push \n\t" 396 ".set noreorder \n\t" 397 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 398 "dli %[tmp0], 0x02 \n\t" 399 MMI_LWC1(%[ftmp1], %[src], 0x00) 400 "mtc1 %[tmp0], %[ftmp10] \n\t" 401 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 402 "dli %[tmp0], 0x05 \n\t" 403 MMI_LWC1(%[ftmp2], %[src], 0x00) 404 "mtc1 %[tmp0], %[ftmp11] \n\t" 405 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 406 MMI_LWC1(%[ftmp3], %[src], 0x00) 407 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 408 MMI_LWC1(%[ftmp4], %[src], 0x00) 409 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 410 MMI_LWC1(%[ftmp5], %[src], 0x00) 411 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 412 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 413 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 414 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 415 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 416 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" 417 MMI_LWC1(%[ftmp6], %[src], 0x00) 418 "paddh %[ftmp7], %[ftmp3], %[ftmp4] \n\t" 419 "psllh %[ftmp7], %[ftmp7], %[ftmp10] \n\t" 420 "psubh %[ftmp7], %[ftmp7], %[ftmp2] \n\t" 421 "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t" 422 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 423 "pmullh %[ftmp7], %[ftmp7], %[ff_pw_5] \n\t" 424 "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t" 425 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 426 "paddh %[ftmp1], %[ftmp1], %[ftmp6] \n\t" 427 "paddh %[ftmp7], %[ftmp7], %[ftmp1] \n\t" 428 "psrah %[ftmp7], %[ftmp7], %[ftmp11] \n\t" 429 "packushb %[ftmp7], %[ftmp7], %[ftmp7] \n\t" 430 MMI_SWC1(%[ftmp7], %[dst], 0x00) 431 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 432 MMI_LWC1(%[ftmp1], %[src], 0x00) 433 "paddh %[ftmp7], %[ftmp4], %[ftmp5] \n\t" 434 "psllh %[ftmp7], %[ftmp7], %[ftmp10] \n\t" 435 "psubh %[ftmp7], %[ftmp7], %[ftmp3] \n\t" 436 "psubh %[ftmp7], %[ftmp7], %[ftmp6] \n\t" 437 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 438 "pmullh %[ftmp7], %[ftmp7], %[ff_pw_5] \n\t" 439 "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t" 440 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 441 "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t" 442 "paddh %[ftmp7], %[ftmp7], %[ftmp2] \n\t" 443 "psrah %[ftmp7], %[ftmp7], %[ftmp11] \n\t" 444 "packushb %[ftmp7], %[ftmp7], %[ftmp7] \n\t" 445 MMI_SWC1(%[ftmp7], %[dst], 0x00) 446 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 447 MMI_LWC1(%[ftmp2], %[src], 0x00) 448 "paddh %[ftmp7], %[ftmp5], %[ftmp6] \n\t" 449 "psllh %[ftmp7], %[ftmp7], %[ftmp10] \n\t" 450 "psubh %[ftmp7], %[ftmp7], %[ftmp4] \n\t" 451 "psubh %[ftmp7], %[ftmp7], %[ftmp1] \n\t" 452 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 453 "pmullh %[ftmp7], %[ftmp7], %[ff_pw_5] \n\t" 454 "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t" 455 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 456 "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" 457 "paddh %[ftmp7], %[ftmp7], %[ftmp3] \n\t" 458 "psrah %[ftmp7], %[ftmp7], %[ftmp11] \n\t" 459 "packushb %[ftmp7], %[ftmp7], %[ftmp7] \n\t" 460 MMI_SWC1(%[ftmp7], %[dst], 0x00) 461 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 462 MMI_LWC1(%[ftmp3], %[src], 0x00) 463 "paddh %[ftmp7], %[ftmp6], %[ftmp1] \n\t" 464 "psllh %[ftmp7], %[ftmp7], %[ftmp10] \n\t" 465 "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t" 466 "psubh %[ftmp7], %[ftmp7], %[ftmp2] \n\t" 467 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 468 "pmullh %[ftmp7], %[ftmp7], %[ff_pw_5] \n\t" 469 "paddh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t" 470 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 471 "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t" 472 "paddh %[ftmp7], %[ftmp7], %[ftmp4] \n\t" 473 "psrah %[ftmp7], %[ftmp7], %[ftmp11] \n\t" 474 "packushb %[ftmp7], %[ftmp7], %[ftmp7] \n\t" 475 MMI_SWC1(%[ftmp7], %[dst], 0x00) 476 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 477 ".set pop \n\t" 478 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 479 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 480 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 481 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 482 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), 483 [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), 484 [tmp0]"=&r"(tmp[0]), 485 RESTRICT_ASM_LOW32 486 [dst]"+&r"(dst), [src]"+&r"(src) 487 : [dstStride]"r"((mips_reg)dstStride), 488 [srcStride]"r"((mips_reg)srcStride), 489 [ff_pw_5]"f"(ff_pw_5.f), [ff_pw_16]"f"(ff_pw_16.f) 490 : "memory" 491 ); 492} 493 494static void put_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src, 495 int dstStride, int srcStride) 496{ 497 int w = 2; 498 int h = 8; 499 double ftmp[10]; 500 uint64_t tmp[1]; 501 DECLARE_VAR_LOW32; 502 503 src -= 2 * srcStride; 504 505 while (w--) { 506 __asm__ volatile ( 507 ".set push \n\t" 508 ".set noreorder \n\t" 509 "dli %[tmp0], 0x02 \n\t" 510 MMI_LWC1(%[ftmp0], %[src], 0x00) 511 "mtc1 %[tmp0], %[ftmp8] \n\t" 512 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 513 "dli %[tmp0], 0x05 \n\t" 514 MMI_LWC1(%[ftmp1], %[src], 0x00) 515 "mtc1 %[tmp0], %[ftmp9] \n\t" 516 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 517 MMI_LWC1(%[ftmp2], %[src], 0x00) 518 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 519 "pxor %[ftmp7], %[ftmp7], %[ftmp7] \n\t" 520 MMI_LWC1(%[ftmp3], %[src], 0x00) 521 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 522 MMI_LWC1(%[ftmp4], %[src], 0x00) 523 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 524 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t" 525 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t" 526 MMI_LWC1(%[ftmp5], %[src], 0x00) 527 "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t" 528 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t" 529 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 530 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t" 531 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t" 532 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t" 533 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t" 534 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 535 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 536 "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t" 537 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 538 "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t" 539 "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 540 "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t" 541 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 542 MMI_SWC1(%[ftmp6], %[dst], 0x00) 543 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 544 MMI_LWC1(%[ftmp0], %[src], 0x00) 545 "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t" 546 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 547 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t" 548 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t" 549 "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t" 550 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t" 551 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 552 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 553 "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 554 "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t" 555 "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t" 556 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 557 MMI_SWC1(%[ftmp6], %[dst], 0x00) 558 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 559 "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t" 560 MMI_LWC1(%[ftmp1], %[src], 0x00) 561 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 562 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" 563 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t" 564 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 565 "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t" 566 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 567 "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t" 568 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 569 "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t" 570 "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t" 571 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 572 MMI_SWC1(%[ftmp6], %[dst], 0x00) 573 "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t" 574 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 575 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 576 MMI_LWC1(%[ftmp2], %[src], 0x00) 577 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t" 578 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t" 579 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t" 580 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 581 "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t" 582 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 583 "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" 584 "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" 585 "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t" 586 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 587 MMI_SWC1(%[ftmp6], %[dst], 0x00) 588 "paddh %[ftmp6], %[ftmp0], %[ftmp1] \n\t" 589 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 590 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 591 MMI_LWC1(%[ftmp3], %[src], 0x00) 592 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t" 593 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t" 594 "punpcklbh %[ftmp3] , %[ftmp3], %[ftmp7] \n\t" 595 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 596 "paddh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t" 597 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 598 "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t" 599 "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t" 600 "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t" 601 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 602 MMI_SWC1(%[ftmp6], %[dst], 0x00) 603 "paddh %[ftmp6], %[ftmp1], %[ftmp2] \n\t" 604 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 605 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 606 MMI_LWC1(%[ftmp4], %[src], 0x00) 607 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 608 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" 609 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t" 610 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 611 "paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t" 612 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 613 "paddh %[ftmp5], %[ftmp5], %[ftmp4] \n\t" 614 "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t" 615 "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t" 616 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 617 MMI_SWC1(%[ftmp6], %[dst], 0x00) 618 "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t" 619 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 620 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 621 MMI_LWC1(%[ftmp5], %[src], 0x00) 622 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t" 623 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t" 624 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 625 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 626 "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t" 627 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 628 "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t" 629 "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 630 "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t" 631 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 632 MMI_SWC1(%[ftmp6], %[dst], 0x00) 633 "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t" 634 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 635 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 636 MMI_LWC1(%[ftmp0], %[src], 0x00) 637 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t" 638 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t" 639 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t" 640 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 641 "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t" 642 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 643 "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 644 "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t" 645 "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t" 646 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 647 MMI_SWC1(%[ftmp6], %[dst], 0x00) 648 "bne %[h], 0x10, 2f \n\t" 649 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 650 "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t" 651 MMI_LWC1(%[ftmp1], %[src], 0x00) 652 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 653 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" 654 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t" 655 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 656 "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t" 657 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 658 "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t" 659 "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t" 660 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 661 "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t" 662 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 663 MMI_SWC1(%[ftmp6], %[dst], 0x00) 664 "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t" 665 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 666 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 667 MMI_LWC1(%[ftmp2], %[src], 0x00) 668 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t" 669 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t" 670 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t" 671 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 672 "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t" 673 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 674 "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" 675 "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" 676 "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t" 677 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 678 MMI_SWC1(%[ftmp6], %[dst], 0x00) 679 "paddh %[ftmp6], %[ftmp0], %[ftmp1] \n\t" 680 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 681 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 682 MMI_LWC1(%[ftmp3], %[src], 0x00) 683 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t" 684 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t" 685 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t" 686 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 687 "paddh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t" 688 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 689 "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t" 690 "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t" 691 "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t" 692 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 693 MMI_SWC1(%[ftmp6], %[dst], 0x00) 694 "paddh %[ftmp6], %[ftmp1], %[ftmp2] \n\t" 695 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 696 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 697 MMI_LWC1(%[ftmp4], %[src], 0x00) 698 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 699 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" 700 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t" 701 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 702 "paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t" 703 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 704 "paddh %[ftmp5], %[ftmp5], %[ftmp4] \n\t" 705 "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t" 706 "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t" 707 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 708 MMI_SWC1(%[ftmp6], %[dst], 0x00) 709 "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t" 710 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 711 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 712 MMI_LWC1(%[ftmp5], %[src], 0x00) 713 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t" 714 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t" 715 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 716 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 717 "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t" 718 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 719 "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t" 720 "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 721 "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t" 722 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 723 MMI_SWC1(%[ftmp6], %[dst], 0x00) 724 "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t" 725 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 726 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 727 MMI_LWC1(%[ftmp0], %[src], 0x00) 728 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t" 729 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t" 730 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t" 731 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 732 "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t" 733 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 734 "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 735 "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t" 736 "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t" 737 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 738 MMI_SWC1(%[ftmp6], %[dst], 0x00) 739 "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t" 740 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 741 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 742 MMI_LWC1(%[ftmp1], %[src], 0x00) 743 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" 744 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 745 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t" 746 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 747 "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t" 748 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 749 "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t" 750 "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t" 751 "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t" 752 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 753 MMI_SWC1(%[ftmp6], %[dst], 0x00) 754 "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t" 755 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 756 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 757 MMI_LWC1(%[ftmp2], %[src], 0x00) 758 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t" 759 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t" 760 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t" 761 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 762 "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t" 763 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 764 "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" 765 "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" 766 "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t" 767 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 768 MMI_SWC1(%[ftmp6], %[dst], 0x00) 769 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 770 "2: \n\t" 771 ".set pop \n\t" 772 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 773 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 774 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 775 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 776 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), 777 [tmp0]"=&r"(tmp[0]), 778 RESTRICT_ASM_LOW32 779 [src]"+&r"(src), [dst]"+&r"(dst), 780 [h]"+&r"(h) 781 : [dstStride]"r"((mips_reg)dstStride), 782 [srcStride]"r"((mips_reg)srcStride), 783 [ff_pw_5]"f"(ff_pw_5.f), [ff_pw_16]"f"(ff_pw_16.f) 784 : "memory" 785 ); 786 787 src += 4 - (h + 5) * srcStride; 788 dst += 4 - h * dstStride; 789 } 790} 791 792static void put_h264_qpel16_v_lowpass_mmi(uint8_t *dst, const uint8_t *src, 793 int dstStride, int srcStride) 794{ 795 put_h264_qpel8_v_lowpass_mmi(dst, src, dstStride, srcStride); 796 put_h264_qpel8_v_lowpass_mmi(dst+8, src+8, dstStride, srcStride); 797 src += 8*srcStride; 798 dst += 8*dstStride; 799 put_h264_qpel8_v_lowpass_mmi(dst, src, dstStride, srcStride); 800 put_h264_qpel8_v_lowpass_mmi(dst+8, src+8, dstStride, srcStride); 801} 802 803static void avg_h264_qpel4_v_lowpass_mmi(uint8_t *dst, const uint8_t *src, 804 int dstStride, int srcStride) 805{ 806 double ftmp[10]; 807 uint64_t tmp[1]; 808 809 src -= 2 * srcStride; 810 811 __asm__ volatile ( 812 ".set push \n\t" 813 ".set noreorder \n\t" 814 "dli %[tmp0], 0x02 \n\t" 815 "pxor %[ftmp7], %[ftmp7], %[ftmp7] \n\t" 816 "mtc1 %[tmp0], %[ftmp9] \n\t" 817 "dli %[tmp0], 0x05 \n\t" 818 MMI_LWC1(%[ftmp0], %[src], 0x00) 819 "mtc1 %[tmp0], %[ftmp8] \n\t" 820 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 821 MMI_LWC1(%[ftmp1], %[src], 0x00) 822 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 823 MMI_LWC1(%[ftmp2], %[src], 0x00) 824 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 825 MMI_LWC1(%[ftmp3], %[src], 0x00) 826 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 827 MMI_LWC1(%[ftmp4], %[src], 0x00) 828 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 829 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t" 830 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t" 831 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t" 832 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t" 833 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t" 834 MMI_LWC1(%[ftmp5], %[src], 0x00) 835 "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t" 836 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t" 837 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t" 838 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t" 839 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 840 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 841 "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t" 842 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 843 "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t" 844 "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 845 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 846 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 847 MMI_LWC1(%[ftmp0], %[dst], 0x00) 848 "pavgb %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 849 MMI_SWC1(%[ftmp6], %[dst], 0x00) 850 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 851 MMI_LWC1(%[ftmp0], %[src], 0x00) 852 "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t" 853 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t" 854 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t" 855 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t" 856 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t" 857 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 858 "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t" 859 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 860 "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 861 "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t" 862 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 863 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 864 MMI_LWC1(%[ftmp1], %[dst], 0x00) 865 "pavgb %[ftmp6], %[ftmp6], %[ftmp1] \n\t" 866 MMI_SWC1(%[ftmp6], %[dst], 0x00) 867 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 868 MMI_LWC1(%[ftmp1], %[src], 0x00) 869 "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t" 870 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t" 871 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" 872 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 873 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t" 874 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 875 "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t" 876 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 877 "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t" 878 "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t" 879 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 880 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 881 MMI_LWC1(%[ftmp2], %[dst], 0x00) 882 "pavgb %[ftmp6], %[ftmp6], %[ftmp2] \n\t" 883 MMI_SWC1(%[ftmp6], %[dst], 0x00) 884 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 885 MMI_LWC1(%[ftmp2], %[src], 0x00) 886 "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t" 887 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t" 888 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t" 889 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t" 890 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t" 891 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 892 "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t" 893 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 894 "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" 895 "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" 896 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 897 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 898 MMI_LWC1(%[ftmp3], %[dst], 0x00) 899 "pavgb %[ftmp6], %[ftmp6], %[ftmp3] \n\t" 900 MMI_SWC1(%[ftmp6], %[dst], 0x00) 901 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 902 ".set pop \n\t" 903 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 904 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 905 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 906 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 907 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), 908 [tmp0]"=&r"(tmp[0]), 909 [src]"+&r"(src), [dst]"+&r"(dst) 910 : [dstStride]"r"((mips_reg)dstStride), 911 [srcStride]"r"((mips_reg)srcStride), 912 [ff_pw_5]"f"(ff_pw_5.f), [ff_pw_16]"f"(ff_pw_16.f) 913 : "memory" 914 ); 915} 916 917static void avg_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src, 918 int dstStride, int srcStride) 919{ 920 int w = 2; 921 int h = 8; 922 double ftmp[10]; 923 uint64_t tmp[1]; 924 DECLARE_VAR_LOW32; 925 926 src -= 2 * srcStride; 927 928 while (w--) { 929 __asm__ volatile ( 930 ".set push \n\t" 931 ".set noreorder \n\t" 932 "dli %[tmp0], 0x02 \n\t" 933 "pxor %[ftmp7], %[ftmp7], %[ftmp7] \n\t" 934 "mtc1 %[tmp0], %[ftmp9] \n\t" 935 "dli %[tmp0], 0x05 \n\t" 936 MMI_LWC1(%[ftmp0], %[src], 0x00) 937 "mtc1 %[tmp0], %[ftmp8] \n\t" 938 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 939 MMI_LWC1(%[ftmp1], %[src], 0x00) 940 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 941 MMI_LWC1(%[ftmp2], %[src], 0x00) 942 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 943 MMI_LWC1(%[ftmp3], %[src], 0x00) 944 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 945 MMI_LWC1(%[ftmp4], %[src], 0x00) 946 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 947 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t" 948 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t" 949 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t" 950 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t" 951 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t" 952 MMI_LWC1(%[ftmp5], %[src], 0x00) 953 "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t" 954 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t" 955 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t" 956 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t" 957 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 958 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 959 "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t" 960 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 961 "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t" 962 "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 963 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 964 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 965 MMI_LWC1(%[ftmp0], %[dst], 0x00) 966 "pavgb %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 967 MMI_SWC1(%[ftmp6], %[dst], 0x00) 968 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 969 MMI_LWC1(%[ftmp0], %[src], 0x00) 970 "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t" 971 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t" 972 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t" 973 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t" 974 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t" 975 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 976 "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t" 977 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 978 "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 979 "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t" 980 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 981 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 982 MMI_LWC1(%[ftmp1], %[dst], 0x00) 983 "pavgb %[ftmp6], %[ftmp6], %[ftmp1] \n\t" 984 MMI_SWC1(%[ftmp6], %[dst], 0x00) 985 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 986 MMI_LWC1(%[ftmp1], %[src], 0x00) 987 "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t" 988 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t" 989 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" 990 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 991 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t" 992 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 993 "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t" 994 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 995 "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t" 996 "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t" 997 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 998 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 999 MMI_LWC1(%[ftmp2], %[dst], 0x00) 1000 "pavgb %[ftmp6], %[ftmp6], %[ftmp2] \n\t" 1001 MMI_SWC1(%[ftmp6], %[dst], 0x00) 1002 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 1003 MMI_LWC1(%[ftmp2], %[src], 0x00) 1004 "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t" 1005 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t" 1006 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t" 1007 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t" 1008 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t" 1009 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 1010 "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t" 1011 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 1012 "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" 1013 "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" 1014 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 1015 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 1016 MMI_LWC1(%[ftmp3], %[dst], 0x00) 1017 "pavgb %[ftmp6], %[ftmp6], %[ftmp3] \n\t" 1018 MMI_SWC1(%[ftmp6], %[dst], 0x00) 1019 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 1020 MMI_LWC1(%[ftmp3], %[src], 0x00) 1021 "paddh %[ftmp6], %[ftmp0], %[ftmp1] \n\t" 1022 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t" 1023 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t" 1024 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t" 1025 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t" 1026 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 1027 "paddh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t" 1028 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 1029 "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t" 1030 "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t" 1031 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 1032 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 1033 MMI_LWC1(%[ftmp4], %[dst], 0x00) 1034 "pavgb %[ftmp6], %[ftmp6], %[ftmp4] \n\t" 1035 MMI_SWC1(%[ftmp6], %[dst], 0x00) 1036 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 1037 MMI_LWC1(%[ftmp4], %[src], 0x00) 1038 "paddh %[ftmp6], %[ftmp1], %[ftmp2] \n\t" 1039 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t" 1040 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 1041 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" 1042 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t" 1043 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 1044 "paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t" 1045 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 1046 "paddh %[ftmp5], %[ftmp5], %[ftmp4] \n\t" 1047 "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t" 1048 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 1049 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 1050 MMI_LWC1(%[ftmp5], %[dst], 0x00) 1051 "pavgb %[ftmp6], %[ftmp6], %[ftmp5] \n\t" 1052 MMI_SWC1(%[ftmp6], %[dst], 0x00) 1053 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 1054 MMI_LWC1(%[ftmp5], %[src], 0x00) 1055 "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t" 1056 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t" 1057 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t" 1058 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t" 1059 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 1060 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 1061 "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t" 1062 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 1063 "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t" 1064 "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 1065 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 1066 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 1067 MMI_LWC1(%[ftmp0], %[dst], 0x00) 1068 "pavgb %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 1069 MMI_SWC1(%[ftmp6], %[dst], 0x00) 1070 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 1071 MMI_LWC1(%[ftmp0], %[src], 0x00) 1072 "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t" 1073 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t" 1074 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t" 1075 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t" 1076 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t" 1077 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 1078 "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t" 1079 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 1080 "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 1081 "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t" 1082 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 1083 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 1084 MMI_LWC1(%[ftmp1], %[dst], 0x00) 1085 "pavgb %[ftmp6], %[ftmp6], %[ftmp1] \n\t" 1086 MMI_SWC1(%[ftmp6], %[dst], 0x00) 1087 "bne %[h], 0x10, 2f \n\t" 1088 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 1089 MMI_LWC1(%[ftmp1], %[src], 0x00) 1090 "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t" 1091 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t" 1092 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" 1093 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 1094 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t" 1095 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 1096 "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t" 1097 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 1098 "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t" 1099 "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t" 1100 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 1101 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 1102 MMI_LWC1(%[ftmp2], %[dst], 0x00) 1103 "pavgb %[ftmp6], %[ftmp6], %[ftmp2] \n\t" 1104 MMI_SWC1(%[ftmp6], %[dst], 0x00) 1105 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 1106 MMI_LWC1(%[ftmp2], %[src], 0x00) 1107 "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t" 1108 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t" 1109 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t" 1110 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t" 1111 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t" 1112 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 1113 "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t" 1114 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 1115 "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" 1116 "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" 1117 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 1118 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 1119 MMI_LWC1(%[ftmp3], %[dst], 0x00) 1120 "pavgb %[ftmp6], %[ftmp6], %[ftmp3] \n\t" 1121 MMI_SWC1(%[ftmp6], %[dst], 0x00) 1122 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 1123 MMI_LWC1(%[ftmp3], %[src], 0x00) 1124 "paddh %[ftmp6], %[ftmp0], %[ftmp1] \n\t" 1125 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t" 1126 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t" 1127 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t" 1128 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t" 1129 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 1130 "paddh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t" 1131 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 1132 "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t" 1133 "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t" 1134 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 1135 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 1136 MMI_LWC1(%[ftmp4], %[dst], 0x00) 1137 "pavgb %[ftmp6], %[ftmp6], %[ftmp4] \n\t" 1138 MMI_SWC1(%[ftmp6], %[dst], 0x00) 1139 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 1140 MMI_LWC1(%[ftmp4], %[src], 0x00) 1141 "paddh %[ftmp6], %[ftmp1], %[ftmp2] \n\t" 1142 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t" 1143 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 1144 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" 1145 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t" 1146 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 1147 "paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t" 1148 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 1149 "paddh %[ftmp5], %[ftmp5], %[ftmp4] \n\t" 1150 "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t" 1151 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 1152 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 1153 MMI_LWC1(%[ftmp5], %[dst], 0x00) 1154 "pavgb %[ftmp6], %[ftmp6], %[ftmp5] \n\t" 1155 MMI_SWC1(%[ftmp6], %[dst], 0x00) 1156 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 1157 MMI_LWC1(%[ftmp5], %[src], 0x00) 1158 "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t" 1159 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t" 1160 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t" 1161 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t" 1162 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 1163 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 1164 "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t" 1165 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 1166 "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t" 1167 "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 1168 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 1169 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 1170 MMI_LWC1(%[ftmp0], %[dst], 0x00) 1171 "pavgb %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 1172 MMI_SWC1(%[ftmp6], %[dst], 0x00) 1173 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 1174 MMI_LWC1(%[ftmp0], %[src], 0x00) 1175 "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t" 1176 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t" 1177 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t" 1178 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t" 1179 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t" 1180 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 1181 "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t" 1182 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 1183 "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 1184 "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t" 1185 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 1186 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 1187 MMI_LWC1(%[ftmp1], %[dst], 0x00) 1188 "pavgb %[ftmp6], %[ftmp6], %[ftmp1] \n\t" 1189 MMI_SWC1(%[ftmp6], %[dst], 0x00) 1190 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 1191 MMI_LWC1(%[ftmp1], %[src], 0x00) 1192 "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t" 1193 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t" 1194 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" 1195 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 1196 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t" 1197 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 1198 "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t" 1199 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 1200 "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t" 1201 "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t" 1202 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 1203 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 1204 MMI_LWC1(%[ftmp2], %[dst], 0x00) 1205 "pavgb %[ftmp6], %[ftmp6], %[ftmp2] \n\t" 1206 MMI_SWC1(%[ftmp6], %[dst], 0x00) 1207 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 1208 MMI_LWC1(%[ftmp2], %[src], 0x00) 1209 "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t" 1210 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t" 1211 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t" 1212 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t" 1213 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t" 1214 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 1215 "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t" 1216 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 1217 "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" 1218 "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" 1219 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 1220 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 1221 MMI_LWC1(%[ftmp3], %[dst], 0x00) 1222 "pavgb %[ftmp6], %[ftmp6], %[ftmp3] \n\t" 1223 MMI_SWC1(%[ftmp6], %[dst], 0x00) 1224 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 1225 "2: \n\t" 1226 ".set pop \n\t" 1227 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 1228 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 1229 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 1230 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 1231 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), 1232 [tmp0]"=&r"(tmp[0]), 1233 RESTRICT_ASM_LOW32 1234 [src]"+&r"(src), [dst]"+&r"(dst), 1235 [h]"+&r"(h) 1236 : [dstStride]"r"((mips_reg)dstStride), 1237 [srcStride]"r"((mips_reg)srcStride), 1238 [ff_pw_5]"f"(ff_pw_5.f), [ff_pw_16]"f"(ff_pw_16.f) 1239 : "memory" 1240 ); 1241 1242 src += 4 - (h + 5) * srcStride; 1243 dst += 4 - h * dstStride; 1244 } 1245} 1246 1247static void avg_h264_qpel16_v_lowpass_mmi(uint8_t *dst, const uint8_t *src, 1248 int dstStride, int srcStride) 1249{ 1250 avg_h264_qpel8_v_lowpass_mmi(dst, src, dstStride, srcStride); 1251 avg_h264_qpel8_v_lowpass_mmi(dst+8, src+8, dstStride, srcStride); 1252 src += 8*srcStride; 1253 dst += 8*dstStride; 1254 avg_h264_qpel8_v_lowpass_mmi(dst, src, dstStride, srcStride); 1255 avg_h264_qpel8_v_lowpass_mmi(dst+8, src+8, dstStride, srcStride); 1256} 1257 1258static void put_h264_qpel4_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src, 1259 int dstStride, int srcStride) 1260{ 1261 INIT_CLIP 1262 int i; 1263 int16_t _tmp[36]; 1264 int16_t *tmp = _tmp; 1265 double ftmp[10]; 1266 uint64_t tmp0; 1267 DECLARE_VAR_LOW32; 1268 1269 src -= 2*srcStride; 1270 1271 __asm__ volatile ( 1272 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 1273 "dli %[tmp0], 0x09 \n\t" 1274 "1: \n\t" 1275 MMI_ULWC1(%[ftmp1], %[src], -0x02) 1276 MMI_ULWC1(%[ftmp2], %[src], -0x01) 1277 MMI_ULWC1(%[ftmp3], %[src], 0x00) 1278 MMI_ULWC1(%[ftmp4], %[src], 0x01) 1279 MMI_ULWC1(%[ftmp5], %[src], 0x02) 1280 MMI_ULWC1(%[ftmp6], %[src], 0x03) 1281 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 1282 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 1283 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 1284 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 1285 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" 1286 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 1287 "paddsh %[ftmp7], %[ftmp3], %[ftmp4] \n\t" 1288 "paddsh %[ftmp8], %[ftmp2], %[ftmp5] \n\t" 1289 "paddsh %[ftmp9], %[ftmp1], %[ftmp6] \n\t" 1290 "pmullh %[ftmp7], %[ftmp7], %[ff_pw_20] \n\t" 1291 "pmullh %[ftmp8], %[ftmp8], %[ff_pw_5] \n\t" 1292 "psubsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t" 1293 "paddsh %[ftmp9], %[ftmp7], %[ftmp9] \n\t" 1294 MMI_SDC1(%[ftmp9], %[tmp], 0x00) 1295 "daddi %[tmp0], %[tmp0], -0x01 \n\t" 1296 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 1297 PTR_ADDU "%[tmp], %[tmp], %[tmpStride] \n\t" 1298 "bnez %[tmp0], 1b \n\t" 1299 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 1300 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 1301 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 1302 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 1303 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), 1304 [tmp0]"=&r"(tmp0), 1305 RESTRICT_ASM_LOW32 1306 [tmp]"+&r"(tmp), [src]"+&r"(src) 1307 : [tmpStride]"r"(8), 1308 [srcStride]"r"((mips_reg)srcStride), 1309 [ff_pw_20]"f"(ff_pw_20.f), [ff_pw_5]"f"(ff_pw_5.f) 1310 : "memory" 1311 ); 1312 1313 tmp -= 28; 1314 1315 for (i=0; i<4; i++) { 1316 const int16_t tmpB= tmp[-8]; 1317 const int16_t tmpA= tmp[-4]; 1318 const int16_t tmp0= tmp[ 0]; 1319 const int16_t tmp1= tmp[ 4]; 1320 const int16_t tmp2= tmp[ 8]; 1321 const int16_t tmp3= tmp[12]; 1322 const int16_t tmp4= tmp[16]; 1323 const int16_t tmp5= tmp[20]; 1324 const int16_t tmp6= tmp[24]; 1325 op2_put(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3)); 1326 op2_put(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4)); 1327 op2_put(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5)); 1328 op2_put(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6)); 1329 dst++; 1330 tmp++; 1331 } 1332} 1333 1334static void put_h264_qpel8or16_hv1_lowpass_mmi(int16_t *tmp, 1335 const uint8_t *src, ptrdiff_t tmpStride, ptrdiff_t srcStride, int size) 1336{ 1337 int w = (size + 8) >> 2; 1338 double ftmp[11]; 1339 uint64_t tmp0; 1340 DECLARE_VAR_LOW32; 1341 1342 src -= 2 * srcStride + 2; 1343 1344 while (w--) { 1345 __asm__ volatile ( 1346 "dli %[tmp0], 0x02 \n\t" 1347 MMI_ULWC1(%[ftmp0], %[src], 0x00) 1348 "mtc1 %[tmp0], %[ftmp10] \n\t" 1349 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 1350 "pxor %[ftmp7], %[ftmp7], %[ftmp7] \n\t" 1351 MMI_ULWC1(%[ftmp1], %[src], 0x00) 1352 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 1353 MMI_ULWC1(%[ftmp2], %[src], 0x00) 1354 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 1355 MMI_ULWC1(%[ftmp3], %[src], 0x00) 1356 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 1357 MMI_ULWC1(%[ftmp4], %[src], 0x00) 1358 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 1359 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t" 1360 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t" 1361 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t" 1362 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t" 1363 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t" 1364 MMI_ULWC1(%[ftmp5], %[src], 0x00) 1365 "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t" 1366 "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t" 1367 "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t" 1368 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t" 1369 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t" 1370 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 1371 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 1372 "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t" 1373 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 1374 "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 1375 MMI_SDC1(%[ftmp6], %[tmp], 0x00) 1376 MMI_ULWC1(%[ftmp0], %[src], 0x00) 1377 "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t" 1378 "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t" 1379 "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t" 1380 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t" 1381 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t" 1382 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t" 1383 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 1384 "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 1385 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 1386 "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t" 1387 MMI_SDC1(%[ftmp6], %[tmp], 0x30) 1388 MMI_ULWC1(%[ftmp1], %[src], 0x00) 1389 "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t" 1390 "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t" 1391 "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t" 1392 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" 1393 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 1394 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t" 1395 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 1396 "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t" 1397 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 1398 "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t" 1399 MMI_SDC1(%[ftmp6], %[tmp], 0x60) 1400 MMI_ULWC1(%[ftmp2], %[src], 0x00) 1401 "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t" 1402 "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t" 1403 "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t" 1404 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t" 1405 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t" 1406 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t" 1407 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 1408 "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" 1409 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 1410 "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" 1411 MMI_SDC1(%[ftmp6], %[tmp], 0x90) 1412 MMI_ULWC1(%[ftmp3], %[src], 0x00) 1413 "paddh %[ftmp6], %[ftmp0], %[ftmp1] \n\t" 1414 "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t" 1415 "paddh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t" 1416 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t" 1417 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t" 1418 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t" 1419 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 1420 "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t" 1421 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 1422 "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t" 1423 MMI_SDC1(%[ftmp6], %[tmp], 0xc0) 1424 MMI_ULWC1(%[ftmp4], %[src], 0x00) 1425 "paddh %[ftmp6], %[ftmp1], %[ftmp2] \n\t" 1426 "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t" 1427 "paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t" 1428 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 1429 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" 1430 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t" 1431 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 1432 "paddh %[ftmp5], %[ftmp5], %[ftmp4] \n\t" 1433 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 1434 "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t" 1435 MMI_SDC1(%[ftmp6], %[tmp], 0xf0) 1436 MMI_ULWC1(%[ftmp5], %[src], 0x00) 1437 "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t" 1438 "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t" 1439 "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t" 1440 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t" 1441 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t" 1442 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 1443 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 1444 "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t" 1445 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 1446 "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 1447 MMI_SDC1(%[ftmp6], %[tmp], 0x120) 1448 MMI_ULWC1(%[ftmp0], %[src], 0x00) 1449 "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t" 1450 "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t" 1451 "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t" 1452 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t" 1453 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t" 1454 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t" 1455 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 1456 "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 1457 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 1458 "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t" 1459 MMI_SDC1(%[ftmp6], %[tmp], 0x150) 1460 "bne %[size], 0x10, 2f \n\t" 1461 1462 MMI_ULWC1(%[ftmp1], %[src], 0x00) 1463 "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t" 1464 "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t" 1465 "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t" 1466 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" 1467 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 1468 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t" 1469 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 1470 "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t" 1471 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 1472 "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t" 1473 MMI_SDC1(%[ftmp6], %[tmp], 0x180) 1474 MMI_ULWC1(%[ftmp2], %[src], 0x00) 1475 "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t" 1476 "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t" 1477 "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t" 1478 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t" 1479 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t" 1480 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t" 1481 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 1482 "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" 1483 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 1484 "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" 1485 MMI_SDC1(%[ftmp6], %[tmp], 0x1b0) 1486 MMI_ULWC1(%[ftmp3], %[src], 0x00) 1487 "paddh %[ftmp6], %[ftmp0], %[ftmp1] \n\t" 1488 "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t" 1489 "paddh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t" 1490 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t" 1491 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t" 1492 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t" 1493 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 1494 "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t" 1495 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 1496 "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t" 1497 MMI_SDC1(%[ftmp6], %[tmp], 0x1e0) 1498 MMI_ULWC1(%[ftmp4], %[src], 0x00) 1499 "paddh %[ftmp6], %[ftmp1], %[ftmp2] \n\t" 1500 "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t" 1501 "paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t" 1502 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 1503 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" 1504 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t" 1505 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 1506 "paddh %[ftmp5], %[ftmp5], %[ftmp4] \n\t" 1507 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 1508 "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t" 1509 MMI_SDC1(%[ftmp6], %[tmp], 0x210) 1510 MMI_ULWC1(%[ftmp5], %[src], 0x00) 1511 "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t" 1512 "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t" 1513 "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t" 1514 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t" 1515 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t" 1516 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 1517 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 1518 "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t" 1519 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 1520 "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 1521 MMI_SDC1(%[ftmp6], %[tmp], 0x240) 1522 MMI_ULWC1(%[ftmp0], %[src], 0x00) 1523 "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t" 1524 "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t" 1525 "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t" 1526 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t" 1527 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t" 1528 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t" 1529 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 1530 "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 1531 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 1532 "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t" 1533 MMI_SDC1(%[ftmp6], %[tmp], 0x270) 1534 MMI_ULWC1(%[ftmp1], %[src], 0x00) 1535 "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t" 1536 "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t" 1537 "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t" 1538 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" 1539 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 1540 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t" 1541 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 1542 "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t" 1543 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 1544 "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t" 1545 MMI_SDC1(%[ftmp6], %[tmp], 0x2a0) 1546 MMI_ULWC1(%[ftmp2], %[src], 0x00) 1547 "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t" 1548 "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t" 1549 "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t" 1550 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t" 1551 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t" 1552 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t" 1553 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t" 1554 "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" 1555 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 1556 "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" 1557 MMI_SDC1(%[ftmp6], %[tmp], 0x2d0) 1558 "2: \n\t" 1559 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 1560 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 1561 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 1562 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 1563 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), 1564 [ftmp10]"=&f"(ftmp[10]), 1565 [tmp0]"=&r"(tmp0), 1566 RESTRICT_ASM_LOW32 1567 [src]"+&r"(src) 1568 : [tmp]"r"(tmp), [size]"r"(size), 1569 [srcStride]"r"((mips_reg)srcStride), 1570 [ff_pw_5]"f"(ff_pw_5.f), [ff_pw_16]"f"(ff_pw_16.f) 1571 : "memory" 1572 ); 1573 1574 tmp += 4; 1575 src += 4 - (size + 5) * srcStride; 1576 } 1577} 1578 1579static void put_h264_qpel8or16_hv2_lowpass_mmi(uint8_t *dst, 1580 int16_t *tmp, ptrdiff_t dstStride, ptrdiff_t tmpStride, int size) 1581{ 1582 int w = size >> 4; 1583 double ftmp[10]; 1584 uint64_t tmp0; 1585 DECLARE_VAR_ALL64; 1586 1587 do { 1588 int h = size; 1589 1590 __asm__ volatile ( 1591 "dli %[tmp0], 0x02 \n\t" 1592 "mtc1 %[tmp0], %[ftmp8] \n\t" 1593 "dli %[tmp0], 0x06 \n\t" 1594 "mtc1 %[tmp0], %[ftmp9] \n\t" 1595 "1: \n\t" 1596 MMI_LDC1(%[ftmp0], %[tmp], 0x00) 1597 MMI_LDC1(%[ftmp3], %[tmp], 0x08) 1598 MMI_LDC1(%[ftmp6], %[tmp], 0x10) 1599 MMI_ULDC1(%[ftmp1], %[tmp], 0x02) 1600 MMI_ULDC1(%[ftmp4], %[tmp], 0x0a) 1601 MMI_ULDC1(%[ftmp5], %[tmp], 0x12) 1602 "paddh %[ftmp0], %[ftmp0], %[ftmp4] \n\t" 1603 "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 1604 "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" 1605 "paddh %[ftmp4], %[ftmp4], %[ftmp6] \n\t" 1606 MMI_ULDC1(%[ftmp2], %[tmp], 0x04) 1607 MMI_ULDC1(%[ftmp6], %[tmp], 0x06) 1608 MMI_ULDC1(%[ftmp5], %[tmp], 0x0c) 1609 MMI_ULDC1(%[ftmp7], %[tmp], 0x0e) 1610 "paddh %[ftmp2], %[ftmp2], %[ftmp6] \n\t" 1611 "paddh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 1612 "psubh %[ftmp0], %[ftmp0], %[ftmp1] \n\t" 1613 "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t" 1614 "psrah %[ftmp0], %[ftmp0], %[ftmp8] \n\t" 1615 "psrah %[ftmp3], %[ftmp3], %[ftmp8] \n\t" 1616 "psubh %[ftmp0], %[ftmp0], %[ftmp1] \n\t" 1617 "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t" 1618 "paddsh %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 1619 "paddsh %[ftmp3] , %[ftmp3], %[ftmp5] \n\t" 1620 "psrah %[ftmp0], %[ftmp0], %[ftmp8] \n\t" 1621 "psrah %[ftmp3], %[ftmp3], %[ftmp8] \n\t" 1622 "paddh %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 1623 "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" 1624 "psrah %[ftmp0], %[ftmp0], %[ftmp9] \n\t" 1625 "psrah %[ftmp3], %[ftmp3], %[ftmp9] \n\t" 1626 "packushb %[ftmp0], %[ftmp0], %[ftmp3] \n\t" 1627 "addi %[h], %[h], -0x01 \n\t" 1628 MMI_SDC1(%[ftmp0], %[dst], 0x00) 1629 PTR_ADDIU "%[tmp], %[tmp], 0x30 \n\t" 1630 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 1631 "bnez %[h], 1b \n\t" 1632 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 1633 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 1634 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 1635 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 1636 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), 1637 [tmp0]"=&r"(tmp0), 1638 RESTRICT_ASM_ALL64 1639 [tmp]"+&r"(tmp), [dst]"+&r"(dst), 1640 [h]"+&r"(h) 1641 : [dstStride]"r"((mips_reg)dstStride) 1642 : "memory" 1643 ); 1644 1645 tmp += 8 - size * 24; 1646 dst += 8 - size * dstStride; 1647 } while (w--); 1648} 1649 1650static void put_h264_qpel8or16_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp, 1651 const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride, 1652 ptrdiff_t srcStride, int size) 1653{ 1654 put_h264_qpel8or16_hv1_lowpass_mmi(tmp, src, tmpStride, srcStride, size); 1655 put_h264_qpel8or16_hv2_lowpass_mmi(dst, tmp, dstStride, tmpStride, size); 1656} 1657 1658static void put_h264_qpel8_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp, 1659 const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride, 1660 ptrdiff_t srcStride) 1661{ 1662 put_h264_qpel8or16_hv_lowpass_mmi(dst, tmp, src, dstStride, tmpStride, 1663 srcStride, 8); 1664} 1665 1666static void put_h264_qpel16_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp, 1667 const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride, 1668 ptrdiff_t srcStride) 1669{ 1670 put_h264_qpel8or16_hv_lowpass_mmi(dst, tmp, src, dstStride, tmpStride, 1671 srcStride, 16); 1672} 1673 1674static void put_h264_qpel8_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src, 1675 const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride) 1676{ 1677 int h = 8; 1678 double ftmp[9]; 1679 uint64_t tmp[1]; 1680 DECLARE_VAR_LOW32; 1681 DECLARE_VAR_ALL64; 1682 1683 __asm__ volatile ( 1684 "dli %[tmp0], 0x02 \n\t" 1685 "mtc1 %[tmp0], %[ftmp7] \n\t" 1686 "dli %[tmp0], 0x05 \n\t" 1687 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 1688 "mtc1 %[tmp0], %[ftmp8] \n\t" 1689 "1: \n\t" 1690 MMI_ULDC1(%[ftmp1], %[src], 0x00) 1691 MMI_ULDC1(%[ftmp3], %[src], 0x01) 1692 "punpckhbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" 1693 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 1694 "punpckhbh %[ftmp4], %[ftmp3], %[ftmp0] \n\t" 1695 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 1696 "paddh %[ftmp2], %[ftmp2], %[ftmp4] \n\t" 1697 "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 1698 "psllh %[ftmp2], %[ftmp2], %[ftmp7] \n\t" 1699 "psllh %[ftmp1], %[ftmp1], %[ftmp7] \n\t" 1700 MMI_ULDC1(%[ftmp3], %[src], -0x01) 1701 MMI_ULDC1(%[ftmp5], %[src], 0x02) 1702 "punpckhbh %[ftmp4], %[ftmp3], %[ftmp0] \n\t" 1703 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 1704 "punpckhbh %[ftmp6], %[ftmp5], %[ftmp0] \n\t" 1705 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" 1706 "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t" 1707 "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" 1708 "psubh %[ftmp2], %[ftmp2], %[ftmp6] \n\t" 1709 "psubh %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 1710 "pmullh %[ftmp2], %[ftmp2], %[ff_pw_5] \n\t" 1711 "pmullh %[ftmp1], %[ftmp1], %[ff_pw_5] \n\t" 1712 MMI_ULWC1(%[ftmp3], %[src], -0x02) 1713 MMI_ULWC1(%[ftmp6], %[src], 0x07) 1714 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 1715 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 1716 "paddh %[ftmp3], %[ftmp3], %[ftmp4] \n\t" 1717 "paddh %[ftmp5], %[ftmp5], %[ftmp6] \n\t" 1718 "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t" 1719 "paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t" 1720 "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 1721 "paddh %[ftmp2], %[ftmp2], %[ftmp5] \n\t" 1722 "psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t" 1723 "psrah %[ftmp2], %[ftmp2], %[ftmp8] \n\t" 1724 MMI_LDC1(%[ftmp5], %[src2], 0x00) 1725 "packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 1726 PTR_ADDU "%[src], %[src], %[dstStride] \n\t" 1727 "pavgb %[ftmp1], %[ftmp1], %[ftmp5] \n\t" 1728 PTR_ADDU "%[h], %[h], -0x01 \n\t" 1729 MMI_SDC1(%[ftmp1], %[dst], 0x00) 1730 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 1731 PTR_ADDU "%[src2], %[src2], %[src2Stride] \n\t" 1732 "bgtz %[h], 1b \n\t" 1733 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 1734 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 1735 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 1736 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 1737 [ftmp8]"=&f"(ftmp[8]), 1738 [tmp0]"=&r"(tmp[0]), 1739 RESTRICT_ASM_LOW32 1740 RESTRICT_ASM_ALL64 1741 [src]"+&r"(src), [dst]"+&r"(dst), 1742 [src2]"+&r"(src2), [h]"+&r"(h) 1743 : [src2Stride]"r"((mips_reg)src2Stride), 1744 [dstStride]"r"((mips_reg)dstStride), 1745 [ff_pw_5]"f"(ff_pw_5.f), [ff_pw_16]"f"(ff_pw_16.f) 1746 : "memory" 1747 ); 1748} 1749 1750static void put_pixels8_l2_shift5_mmi(uint8_t *dst, int16_t *src16, 1751 const uint8_t *src8, ptrdiff_t dstStride, ptrdiff_t src8Stride, int h) 1752{ 1753 double ftmp[7]; 1754 uint64_t tmp0; 1755 DECLARE_VAR_ALL64; 1756 DECLARE_VAR_ADDRT; 1757 1758 do { 1759 __asm__ volatile ( 1760 "dli %[tmp0], 0x05 \n\t" 1761 MMI_ULDC1(%[ftmp0], %[src16], 0x00) 1762 "mtc1 %[tmp0], %[ftmp6] \n\t" 1763 MMI_ULDC1(%[ftmp1], %[src16], 0x08) 1764 MMI_ULDC1(%[ftmp2], %[src16], 0x30) 1765 MMI_ULDC1(%[ftmp3], %[src16], 0x38) 1766 "psrah %[ftmp0], %[ftmp0], %[ftmp6] \n\t" 1767 "psrah %[ftmp1], %[ftmp1], %[ftmp6] \n\t" 1768 "psrah %[ftmp2], %[ftmp2], %[ftmp6] \n\t" 1769 "psrah %[ftmp3], %[ftmp3], %[ftmp6] \n\t" 1770 "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t" 1771 "packushb %[ftmp2], %[ftmp2], %[ftmp3] \n\t" 1772 MMI_LDC1(%[ftmp5], %[src8], 0x00) 1773 MMI_LDXC1(%[ftmp4], %[src8], %[src8Stride], 0x00) 1774 "pavgb %[ftmp0], %[ftmp0], %[ftmp5] \n\t" 1775 "pavgb %[ftmp2], %[ftmp2], %[ftmp4] \n\t" 1776 MMI_SDC1(%[ftmp0], %[dst], 0x00) 1777 MMI_SDXC1(%[ftmp2], %[dst], %[dstStride], 0x00) 1778 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 1779 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 1780 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 1781 [ftmp6]"=&f"(ftmp[6]), 1782 RESTRICT_ASM_ALL64 1783 RESTRICT_ASM_ADDRT 1784 [tmp0]"=&r"(tmp0) 1785 : [src8]"r"(src8), [src16]"r"(src16), 1786 [dst]"r"(dst), 1787 [src8Stride]"r"((mips_reg)src8Stride), 1788 [dstStride]"r"((mips_reg)dstStride) 1789 : "memory" 1790 ); 1791 1792 src8 += 2 * src8Stride; 1793 src16 += 48; 1794 dst += 2 * dstStride; 1795 } while (h -= 2); 1796} 1797 1798static void put_h264_qpel16_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src, 1799 const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride) 1800{ 1801 put_h264_qpel8_h_lowpass_l2_mmi(dst, src, src2, dstStride, src2Stride); 1802 put_h264_qpel8_h_lowpass_l2_mmi(dst + 8, src + 8, src2 + 8, dstStride, 1803 src2Stride); 1804 1805 src += 8 * dstStride; 1806 dst += 8 * dstStride; 1807 src2 += 8 * src2Stride; 1808 1809 put_h264_qpel8_h_lowpass_l2_mmi(dst, src, src2, dstStride, src2Stride); 1810 put_h264_qpel8_h_lowpass_l2_mmi(dst + 8, src + 8, src2 + 8, dstStride, 1811 src2Stride); 1812} 1813 1814static void put_pixels16_l2_shift5_mmi(uint8_t *dst, int16_t *src16, 1815 const uint8_t *src8, ptrdiff_t dstStride, ptrdiff_t src8Stride, int h) 1816{ 1817 put_pixels8_l2_shift5_mmi(dst, src16, src8, dstStride, src8Stride, h); 1818 put_pixels8_l2_shift5_mmi(dst + 8, src16 + 8, src8 + 8, dstStride, 1819 src8Stride, h); 1820} 1821 1822static void avg_h264_qpel4_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src, 1823 int dstStride, int srcStride) 1824{ 1825 INIT_CLIP 1826 int i; 1827 int16_t _tmp[36]; 1828 int16_t *tmp = _tmp; 1829 double ftmp[10]; 1830 uint64_t tmp0; 1831 DECLARE_VAR_LOW32; 1832 1833 src -= 2*srcStride; 1834 1835 __asm__ volatile ( 1836 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 1837 "dli %[tmp0], 0x09 \n\t" 1838 "1: \n\t" 1839 MMI_ULWC1(%[ftmp1], %[src], -0x02) 1840 MMI_ULWC1(%[ftmp2], %[src], -0x01) 1841 MMI_ULWC1(%[ftmp3], %[src], 0x00) 1842 MMI_ULWC1(%[ftmp4], %[src], 0x01) 1843 MMI_ULWC1(%[ftmp5], %[src], 0x02) 1844 MMI_ULWC1(%[ftmp6], %[src], 0x03) 1845 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 1846 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 1847 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 1848 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 1849 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" 1850 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 1851 "paddsh %[ftmp7], %[ftmp3], %[ftmp4] \n\t" 1852 "paddsh %[ftmp8], %[ftmp2], %[ftmp5] \n\t" 1853 "paddsh %[ftmp9], %[ftmp1], %[ftmp6] \n\t" 1854 "pmullh %[ftmp7], %[ftmp7], %[ff_pw_20] \n\t" 1855 "pmullh %[ftmp8], %[ftmp8], %[ff_pw_5] \n\t" 1856 "psubsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t" 1857 "paddsh %[ftmp9], %[ftmp7], %[ftmp9] \n\t" 1858 MMI_SDC1(%[ftmp9], %[tmp], 0x00) 1859 "daddi %[tmp0], %[tmp0], -0x01 \n\t" 1860 PTR_ADDU "%[src], %[src], %[srcStride] \n\t" 1861 PTR_ADDU "%[tmp], %[tmp], %[tmpStride] \n\t" 1862 "bnez %[tmp0], 1b \n\t" 1863 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 1864 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 1865 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 1866 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 1867 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), 1868 [tmp0]"=&r"(tmp0), 1869 RESTRICT_ASM_LOW32 1870 [tmp]"+&r"(tmp), [src]"+&r"(src) 1871 : [tmpStride]"r"(8), 1872 [srcStride]"r"((mips_reg)srcStride), 1873 [ff_pw_20]"f"(ff_pw_20.f), [ff_pw_5]"f"(ff_pw_5.f) 1874 : "memory" 1875 ); 1876 1877 tmp -= 28; 1878 1879 for (i=0; i<4; i++) { 1880 const int16_t tmpB= tmp[-8]; 1881 const int16_t tmpA= tmp[-4]; 1882 const int16_t tmp0= tmp[ 0]; 1883 const int16_t tmp1= tmp[ 4]; 1884 const int16_t tmp2= tmp[ 8]; 1885 const int16_t tmp3= tmp[12]; 1886 const int16_t tmp4= tmp[16]; 1887 const int16_t tmp5= tmp[20]; 1888 const int16_t tmp6= tmp[24]; 1889 op2_avg(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3)); 1890 op2_avg(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4)); 1891 op2_avg(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5)); 1892 op2_avg(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6)); 1893 dst++; 1894 tmp++; 1895 } 1896} 1897 1898static void avg_h264_qpel8or16_hv2_lowpass_mmi(uint8_t *dst, 1899 int16_t *tmp, ptrdiff_t dstStride, ptrdiff_t tmpStride, int size) 1900{ 1901 int w = size >> 4; 1902 double ftmp[11]; 1903 uint64_t tmp0; 1904 DECLARE_VAR_ALL64; 1905 1906 do { 1907 int h = size; 1908 __asm__ volatile ( 1909 "dli %[tmp0], 0x02 \n\t" 1910 "mtc1 %[tmp0], %[ftmp9] \n\t" 1911 "dli %[tmp0], 0x06 \n\t" 1912 "mtc1 %[tmp0], %[ftmp10] \n\t" 1913 "1: \n\t" 1914 MMI_LDC1(%[ftmp0], %[tmp], 0x00) 1915 MMI_LDC1(%[ftmp3], %[tmp], 0x08) 1916 MMI_ULDC1(%[ftmp1], %[tmp], 0x02) 1917 MMI_ULDC1(%[ftmp4], %[tmp], 0x0a) 1918 MMI_LDC1(%[ftmp7], %[tmp], 0x10) 1919 MMI_ULDC1(%[ftmp8], %[tmp], 0x12) 1920 "paddh %[ftmp0], %[ftmp0], %[ftmp4] \n\t" 1921 "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 1922 "paddh %[ftmp3], %[ftmp3], %[ftmp8] \n\t" 1923 "paddh %[ftmp4], %[ftmp4], %[ftmp7] \n\t" 1924 MMI_ULDC1(%[ftmp2], %[tmp], 0x04) 1925 MMI_ULDC1(%[ftmp5], %[tmp], 0x0c) 1926 MMI_ULDC1(%[ftmp7], %[tmp], 0x06) 1927 MMI_ULDC1(%[ftmp8], %[tmp], 0x0e) 1928 "paddh %[ftmp2], %[ftmp2], %[ftmp7] \n\t" 1929 "paddh %[ftmp5], %[ftmp5], %[ftmp8] \n\t" 1930 "psubh %[ftmp0], %[ftmp0], %[ftmp1] \n\t" 1931 "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t" 1932 "psrah %[ftmp0], %[ftmp0], %[ftmp9] \n\t" 1933 "psrah %[ftmp3], %[ftmp3], %[ftmp9] \n\t" 1934 "psubh %[ftmp0], %[ftmp0], %[ftmp1] \n\t" 1935 "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t" 1936 "paddsh %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 1937 "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" 1938 "psrah %[ftmp0], %[ftmp0], %[ftmp9] \n\t" 1939 "psrah %[ftmp3], %[ftmp3], %[ftmp9] \n\t" 1940 "paddh %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 1941 "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" 1942 "psrah %[ftmp0], %[ftmp0], %[ftmp10] \n\t" 1943 "psrah %[ftmp3], %[ftmp3], %[ftmp10] \n\t" 1944 "packushb %[ftmp0], %[ftmp0], %[ftmp3] \n\t" 1945 MMI_LDC1(%[ftmp6], %[dst], 0x00) 1946 "pavgb %[ftmp0], %[ftmp0], %[ftmp6] \n\t" 1947 MMI_SDC1(%[ftmp0], %[dst], 0x00) 1948 "addi %[h], %[h], -0x01 \n\t" 1949 PTR_ADDI "%[tmp], %[tmp], 0x30 \n\t" 1950 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 1951 "bnez %[h], 1b \n\t" 1952 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 1953 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 1954 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 1955 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 1956 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), 1957 [ftmp10]"=&f"(ftmp[10]), 1958 [tmp0]"=&r"(tmp0), 1959 RESTRICT_ASM_ALL64 1960 [tmp]"+&r"(tmp), [dst]"+&r"(dst), 1961 [h]"+&r"(h) 1962 : [dstStride]"r"((mips_reg)dstStride) 1963 : "memory" 1964 ); 1965 1966 tmp += 8 - size * 24; 1967 dst += 8 - size * dstStride; 1968 } while (w--); 1969} 1970 1971static void avg_h264_qpel8or16_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp, 1972 const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride, 1973 ptrdiff_t srcStride, int size) 1974{ 1975 put_h264_qpel8or16_hv1_lowpass_mmi(tmp, src, tmpStride, srcStride, size); 1976 avg_h264_qpel8or16_hv2_lowpass_mmi(dst, tmp, dstStride, tmpStride, size); 1977} 1978 1979static void avg_h264_qpel8_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp, 1980 const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride, 1981 ptrdiff_t srcStride) 1982{ 1983 avg_h264_qpel8or16_hv_lowpass_mmi(dst, tmp, src, dstStride, tmpStride, 1984 srcStride, 8); 1985} 1986 1987static void avg_h264_qpel16_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp, 1988 const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride, 1989 ptrdiff_t srcStride) 1990{ 1991 avg_h264_qpel8or16_hv_lowpass_mmi(dst, tmp, src, dstStride, tmpStride, 1992 srcStride, 16); 1993} 1994 1995static void avg_h264_qpel8_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src, 1996 const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride) 1997{ 1998 double ftmp[10]; 1999 uint64_t tmp[2]; 2000 DECLARE_VAR_LOW32; 2001 DECLARE_VAR_ALL64; 2002 2003 __asm__ volatile ( 2004 "dli %[tmp1], 0x02 \n\t" 2005 "ori %[tmp0], $0, 0x8 \n\t" 2006 "mtc1 %[tmp1], %[ftmp7] \n\t" 2007 "dli %[tmp1], 0x05 \n\t" 2008 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 2009 "mtc1 %[tmp1], %[ftmp8] \n\t" 2010 "1: \n\t" 2011 MMI_ULDC1(%[ftmp1], %[src], 0x00) 2012 MMI_ULDC1(%[ftmp2], %[src], 0x01) 2013 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" 2014 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 2015 "punpckhbh %[ftmp4], %[ftmp2], %[ftmp0] \n\t" 2016 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 2017 "paddh %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 2018 "paddh %[ftmp3], %[ftmp3], %[ftmp4] \n\t" 2019 "psllh %[ftmp1], %[ftmp1], %[ftmp7] \n\t" 2020 "psllh %[ftmp3], %[ftmp3], %[ftmp7] \n\t" 2021 MMI_ULDC1(%[ftmp2], %[src], -0x01) 2022 MMI_ULDC1(%[ftmp5], %[src], 0x02) 2023 "punpckhbh %[ftmp4], %[ftmp2], %[ftmp0] \n\t" 2024 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 2025 "punpckhbh %[ftmp6], %[ftmp5], %[ftmp0] \n\t" 2026 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" 2027 "paddh %[ftmp2], %[ftmp2], %[ftmp5] \n\t" 2028 "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t" 2029 "psubh %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 2030 "psubh %[ftmp3], %[ftmp3], %[ftmp6] \n\t" 2031 "pmullh %[ftmp1], %[ftmp1], %[ff_pw_5] \n\t" 2032 "pmullh %[ftmp3], %[ftmp3], %[ff_pw_5] \n\t" 2033 MMI_ULWC1(%[ftmp2], %[src], -0x02) 2034 MMI_ULWC1(%[ftmp6], %[src], 0x07) 2035 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 2036 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 2037 "paddh %[ftmp2], %[ftmp2], %[ftmp4] \n\t" 2038 "paddh %[ftmp5], %[ftmp5], %[ftmp6] \n\t" 2039 "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t" 2040 "paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t" 2041 "paddh %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 2042 "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" 2043 "psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t" 2044 "psrah %[ftmp3], %[ftmp3], %[ftmp8] \n\t" 2045 MMI_LDC1(%[ftmp5], %[src2], 0x00) 2046 "packushb %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 2047 MMI_LDC1(%[ftmp9], %[dst], 0x00) 2048 "pavgb %[ftmp1], %[ftmp1], %[ftmp5] \n\t" 2049 "pavgb %[ftmp1], %[ftmp1], %[ftmp9] \n\t" 2050 PTR_ADDU "%[src], %[src], %[dstStride] \n\t" 2051 MMI_SDC1(%[ftmp1], %[dst], 0x00) 2052 "daddi %[tmp0], %[tmp0], -0x01 \n\t" 2053 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t" 2054 PTR_ADDU "%[src2], %[src2], %[src2Stride] \n\t" 2055 "bgtz %[tmp0], 1b \n\t" 2056 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 2057 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 2058 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 2059 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 2060 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), 2061 [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]), 2062 RESTRICT_ASM_LOW32 2063 RESTRICT_ASM_ALL64 2064 [dst]"+&r"(dst), [src]"+&r"(src), 2065 [src2]"+&r"(src2) 2066 : [dstStride]"r"((mips_reg)dstStride), 2067 [src2Stride]"r"((mips_reg)src2Stride), 2068 [ff_pw_5]"f"(ff_pw_5.f), [ff_pw_16]"f"(ff_pw_16.f) 2069 : "memory" 2070 ); 2071} 2072 2073static void avg_h264_qpel16_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src, 2074 const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride) 2075{ 2076 avg_h264_qpel8_h_lowpass_l2_mmi(dst, src, src2, dstStride, src2Stride); 2077 avg_h264_qpel8_h_lowpass_l2_mmi(dst + 8, src + 8, src2 + 8, dstStride, 2078 src2Stride); 2079 2080 src += 8 * dstStride; 2081 dst += 8 * dstStride; 2082 src2 += 8 * src2Stride; 2083 2084 avg_h264_qpel8_h_lowpass_l2_mmi(dst, src, src2, dstStride, src2Stride); 2085 avg_h264_qpel8_h_lowpass_l2_mmi(dst + 8, src + 8, src2 + 8, dstStride, 2086 src2Stride); 2087} 2088 2089static void avg_pixels8_l2_shift5_mmi(uint8_t *dst, int16_t *src16, 2090 const uint8_t *src8, ptrdiff_t dstStride, ptrdiff_t src8Stride, int b) 2091{ 2092 double ftmp[8]; 2093 uint64_t tmp0; 2094 DECLARE_VAR_ALL64; 2095 DECLARE_VAR_ADDRT; 2096 2097 do { 2098 __asm__ volatile ( 2099 "dli %[tmp0], 0x05 \n\t" 2100 MMI_ULDC1(%[ftmp0], %[src16], 0x00) 2101 "mtc1 %[tmp0], %[ftmp6] \n\t" 2102 MMI_ULDC1(%[ftmp1], %[src16], 0x08) 2103 MMI_ULDC1(%[ftmp2], %[src16], 0x30) 2104 MMI_ULDC1(%[ftmp3], %[src16], 0x38) 2105 "psrah %[ftmp0], %[ftmp0], %[ftmp6] \n\t" 2106 "psrah %[ftmp1], %[ftmp1], %[ftmp6] \n\t" 2107 "psrah %[ftmp2], %[ftmp2], %[ftmp6] \n\t" 2108 "psrah %[ftmp3], %[ftmp3], %[ftmp6] \n\t" 2109 "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t" 2110 MMI_LDC1(%[ftmp4], %[src8], 0x00) 2111 MMI_LDXC1(%[ftmp5], %[src8], %[src8Stride], 0x00) 2112 "packushb %[ftmp2], %[ftmp2], %[ftmp3] \n\t" 2113 "pavgb %[ftmp0], %[ftmp0], %[ftmp4] \n\t" 2114 "pavgb %[ftmp2], %[ftmp2], %[ftmp5] \n\t" 2115 MMI_LDC1(%[ftmp7], %[dst], 0x00) 2116 "pavgb %[ftmp0], %[ftmp0], %[ftmp7] \n\t" 2117 MMI_SDC1(%[ftmp0], %[dst], 0x00) 2118 MMI_LDXC1(%[ftmp7], %[dst], %[dstStride], 0x00) 2119 "pavgb %[ftmp2], %[ftmp2], %[ftmp7] \n\t" 2120 MMI_SDXC1(%[ftmp2], %[dst], %[dstStride], 0x00) 2121 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 2122 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 2123 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 2124 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 2125 RESTRICT_ASM_ALL64 2126 RESTRICT_ASM_ADDRT 2127 [tmp0]"=&r"(tmp0) 2128 : [src8]"r"(src8), [src16]"r"(src16), 2129 [dst]"r"(dst), 2130 [src8Stride]"r"((mips_reg)src8Stride), 2131 [dstStride]"r"((mips_reg)dstStride) 2132 : "memory" 2133 ); 2134 2135 src8 += 2 * src8Stride; 2136 src16 += 48; 2137 dst += 2 * dstStride; 2138 } while (b -= 2); 2139} 2140 2141static void avg_pixels16_l2_shift5_mmi(uint8_t *dst, int16_t *src16, 2142 const uint8_t *src8, ptrdiff_t dstStride, ptrdiff_t src8Stride, int b) 2143{ 2144 avg_pixels8_l2_shift5_mmi(dst, src16, src8, dstStride, src8Stride, b); 2145 avg_pixels8_l2_shift5_mmi(dst + 8, src16 + 8, src8 + 8, dstStride, 2146 src8Stride, b); 2147} 2148 2149//DEF_H264_MC_MMI(put_, 4) 2150void ff_put_h264_qpel4_mc00_mmi(uint8_t *dst, const uint8_t *src, 2151 ptrdiff_t stride) 2152{ 2153 ff_put_pixels4_8_mmi(dst, src, stride, 4); 2154} 2155 2156void ff_put_h264_qpel4_mc10_mmi(uint8_t *dst, const uint8_t *src, 2157 ptrdiff_t stride) 2158{ 2159 uint8_t half[16]; 2160 put_h264_qpel4_h_lowpass_mmi(half, src, 4, stride); 2161 ff_put_pixels4_l2_8_mmi(dst, src, half, stride, stride, 4, 4); 2162} 2163 2164void ff_put_h264_qpel4_mc20_mmi(uint8_t *dst, const uint8_t *src, 2165 ptrdiff_t stride) 2166{ 2167 put_h264_qpel4_h_lowpass_mmi(dst, src, stride, stride); 2168} 2169 2170void ff_put_h264_qpel4_mc30_mmi(uint8_t *dst, const uint8_t *src, 2171 ptrdiff_t stride) 2172{ 2173 uint8_t half[16]; 2174 put_h264_qpel4_h_lowpass_mmi(half, src, 4, stride); 2175 ff_put_pixels4_l2_8_mmi(dst, src+1, half, stride, stride, 4, 4); 2176} 2177 2178void ff_put_h264_qpel4_mc01_mmi(uint8_t *dst, const uint8_t *src, 2179 ptrdiff_t stride) 2180{ 2181 uint8_t full[36]; 2182 uint8_t * const full_mid= full + 8; 2183 uint8_t half[16]; 2184 copy_block4_mmi(full, src - stride*2, 4, stride, 9); 2185 put_h264_qpel4_v_lowpass_mmi(half, full_mid, 4, 4); 2186 ff_put_pixels4_l2_8_mmi(dst, full_mid, half, stride, 4, 4, 4); 2187} 2188 2189void ff_put_h264_qpel4_mc02_mmi(uint8_t *dst, const uint8_t *src, 2190 ptrdiff_t stride) 2191{ 2192 uint8_t full[36]; 2193 uint8_t * const full_mid= full + 8; 2194 copy_block4_mmi(full, src - stride*2, 4, stride, 9); 2195 put_h264_qpel4_v_lowpass_mmi(dst, full_mid, stride, 4); 2196} 2197 2198void ff_put_h264_qpel4_mc03_mmi(uint8_t *dst, const uint8_t *src, 2199 ptrdiff_t stride) 2200{ 2201 uint8_t full[36]; 2202 uint8_t * const full_mid= full + 8; 2203 uint8_t half[16]; 2204 copy_block4_mmi(full, src - stride*2, 4, stride, 9); 2205 put_h264_qpel4_v_lowpass_mmi(half, full_mid, 4, 4); 2206 ff_put_pixels4_l2_8_mmi(dst, full_mid+4, half, stride, 4, 4, 4); 2207} 2208 2209void ff_put_h264_qpel4_mc11_mmi(uint8_t *dst, const uint8_t *src, 2210 ptrdiff_t stride) 2211{ 2212 uint8_t full[36]; 2213 uint8_t * const full_mid= full + 8; 2214 uint8_t halfH[16]; 2215 uint8_t halfV[16]; 2216 put_h264_qpel4_h_lowpass_mmi(halfH, src, 4, stride); 2217 copy_block4_mmi(full, src - stride*2, 4, stride, 9); 2218 put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4); 2219 ff_put_pixels4_l2_8_mmi(dst, halfH, halfV, stride, 4, 4, 4); 2220} 2221 2222void ff_put_h264_qpel4_mc31_mmi(uint8_t *dst, const uint8_t *src, 2223 ptrdiff_t stride) 2224{ 2225 uint8_t full[36]; 2226 uint8_t * const full_mid= full + 8; 2227 uint8_t halfH[16]; 2228 uint8_t halfV[16]; 2229 put_h264_qpel4_h_lowpass_mmi(halfH, src, 4, stride); 2230 copy_block4_mmi(full, src - stride*2 + 1, 4, stride, 9); 2231 put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4); 2232 ff_put_pixels4_l2_8_mmi(dst, halfH, halfV, stride, 4, 4, 4); 2233} 2234 2235void ff_put_h264_qpel4_mc13_mmi(uint8_t *dst, const uint8_t *src, 2236 ptrdiff_t stride) 2237{ 2238 uint8_t full[36]; 2239 uint8_t * const full_mid= full + 8; 2240 uint8_t halfH[16]; 2241 uint8_t halfV[16]; 2242 put_h264_qpel4_h_lowpass_mmi(halfH, src + stride, 4, stride); 2243 copy_block4_mmi(full, src - stride*2, 4, stride, 9); 2244 put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4); 2245 ff_put_pixels4_l2_8_mmi(dst, halfH, halfV, stride, 4, 4, 4); 2246} 2247 2248void ff_put_h264_qpel4_mc33_mmi(uint8_t *dst, const uint8_t *src, 2249 ptrdiff_t stride) 2250{ 2251 uint8_t full[36]; 2252 uint8_t * const full_mid= full + 8; 2253 uint8_t halfH[16]; 2254 uint8_t halfV[16]; 2255 put_h264_qpel4_h_lowpass_mmi(halfH, src + stride, 4, stride); 2256 copy_block4_mmi(full, src - stride*2 + 1, 4, stride, 9); 2257 put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4); 2258 ff_put_pixels4_l2_8_mmi(dst, halfH, halfV, stride, 4, 4, 4); 2259} 2260 2261void ff_put_h264_qpel4_mc22_mmi(uint8_t *dst, const uint8_t *src, 2262 ptrdiff_t stride) 2263{ 2264 put_h264_qpel4_hv_lowpass_mmi(dst, src, stride, stride); 2265} 2266 2267void ff_put_h264_qpel4_mc21_mmi(uint8_t *dst, const uint8_t *src, 2268 ptrdiff_t stride) 2269{ 2270 uint8_t halfH[16]; 2271 uint8_t halfHV[16]; 2272 put_h264_qpel4_h_lowpass_mmi(halfH, src, 4, stride); 2273 put_h264_qpel4_hv_lowpass_mmi(halfHV, src, 4, stride); 2274 ff_put_pixels4_l2_8_mmi(dst, halfH, halfHV, stride, 4, 4, 4); 2275} 2276 2277void ff_put_h264_qpel4_mc23_mmi(uint8_t *dst, const uint8_t *src, 2278 ptrdiff_t stride) 2279{ 2280 uint8_t halfH[16]; 2281 uint8_t halfHV[16]; 2282 put_h264_qpel4_h_lowpass_mmi(halfH, src + stride, 4, stride); 2283 put_h264_qpel4_hv_lowpass_mmi(halfHV, src, 4, stride); 2284 ff_put_pixels4_l2_8_mmi(dst, halfH, halfHV, stride, 4, 4, 4); 2285} 2286 2287void ff_put_h264_qpel4_mc12_mmi(uint8_t *dst, const uint8_t *src, 2288 ptrdiff_t stride) 2289{ 2290 uint8_t full[36]; 2291 uint8_t * const full_mid= full + 8; 2292 uint8_t halfV[16]; 2293 uint8_t halfHV[16]; 2294 copy_block4_mmi(full, src - stride*2, 4, stride, 9); 2295 put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4); 2296 put_h264_qpel4_hv_lowpass_mmi(halfHV, src, 4, stride); 2297 ff_put_pixels4_l2_8_mmi(dst, halfV, halfHV, stride, 4, 4, 4); 2298} 2299 2300void ff_put_h264_qpel4_mc32_mmi(uint8_t *dst, const uint8_t *src, 2301 ptrdiff_t stride) 2302{ 2303 uint8_t full[36]; 2304 uint8_t * const full_mid= full + 8; 2305 uint8_t halfV[16]; 2306 uint8_t halfHV[16]; 2307 copy_block4_mmi(full, src - stride*2 + 1, 4, stride, 9); 2308 put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4); 2309 put_h264_qpel4_hv_lowpass_mmi(halfHV, src, 4, stride); 2310 ff_put_pixels4_l2_8_mmi(dst, halfV, halfHV, stride, 4, 4, 4); 2311} 2312 2313//DEF_H264_MC_MMI(avg_, 4) 2314void ff_avg_h264_qpel4_mc00_mmi(uint8_t *dst, const uint8_t *src, 2315 ptrdiff_t stride) 2316{ 2317 ff_avg_pixels4_8_mmi(dst, src, stride, 4); 2318} 2319 2320void ff_avg_h264_qpel4_mc10_mmi(uint8_t *dst, const uint8_t *src, 2321 ptrdiff_t stride) 2322{ 2323 uint8_t half[16]; 2324 put_h264_qpel4_h_lowpass_mmi(half, src, 4, stride); 2325 ff_avg_pixels4_l2_8_mmi(dst, src, half, stride, stride, 4, 4); 2326} 2327 2328void ff_avg_h264_qpel4_mc20_mmi(uint8_t *dst, const uint8_t *src, 2329 ptrdiff_t stride) 2330{ 2331 avg_h264_qpel4_h_lowpass_mmi(dst, src, stride, stride); 2332} 2333 2334void ff_avg_h264_qpel4_mc30_mmi(uint8_t *dst, const uint8_t *src, 2335 ptrdiff_t stride) 2336{ 2337 uint8_t half[16]; 2338 put_h264_qpel4_h_lowpass_mmi(half, src, 4, stride); 2339 ff_avg_pixels4_l2_8_mmi(dst, src+1, half, stride, stride, 4, 4); 2340} 2341 2342void ff_avg_h264_qpel4_mc01_mmi(uint8_t *dst, const uint8_t *src, 2343 ptrdiff_t stride) 2344{ 2345 uint8_t full[36]; 2346 uint8_t * const full_mid= full + 8; 2347 uint8_t half[16]; 2348 copy_block4_mmi(full, src - stride*2, 4, stride, 9); 2349 put_h264_qpel4_v_lowpass_mmi(half, full_mid, 4, 4); 2350 ff_avg_pixels4_l2_8_mmi(dst, full_mid, half, stride, 4, 4, 4); 2351} 2352 2353void ff_avg_h264_qpel4_mc02_mmi(uint8_t *dst, const uint8_t *src, 2354 ptrdiff_t stride) 2355{ 2356 uint8_t full[36]; 2357 uint8_t * const full_mid= full + 8; 2358 copy_block4_mmi(full, src - stride*2, 4, stride, 9); 2359 avg_h264_qpel4_v_lowpass_mmi(dst, full_mid, stride, 4); 2360} 2361 2362void ff_avg_h264_qpel4_mc03_mmi(uint8_t *dst, const uint8_t *src, 2363 ptrdiff_t stride) 2364{ 2365 uint8_t full[36]; 2366 uint8_t * const full_mid= full + 8; 2367 uint8_t half[16]; 2368 copy_block4_mmi(full, src - stride*2, 4, stride, 9); 2369 put_h264_qpel4_v_lowpass_mmi(half, full_mid, 4, 4); 2370 ff_avg_pixels4_l2_8_mmi(dst, full_mid+4, half, stride, 4, 4, 4); 2371} 2372 2373void ff_avg_h264_qpel4_mc11_mmi(uint8_t *dst, const uint8_t *src, 2374 ptrdiff_t stride) 2375{ 2376 uint8_t full[36]; 2377 uint8_t * const full_mid= full + 8; 2378 uint8_t halfH[16]; 2379 uint8_t halfV[16]; 2380 put_h264_qpel4_h_lowpass_mmi(halfH, src, 4, stride); 2381 copy_block4_mmi(full, src - stride*2, 4, stride, 9); 2382 put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4); 2383 ff_avg_pixels4_l2_8_mmi(dst, halfH, halfV, stride, 4, 4, 4); 2384} 2385 2386void ff_avg_h264_qpel4_mc31_mmi(uint8_t *dst, const uint8_t *src, 2387 ptrdiff_t stride) 2388{ 2389 uint8_t full[36]; 2390 uint8_t * const full_mid= full + 8; 2391 uint8_t halfH[16]; 2392 uint8_t halfV[16]; 2393 put_h264_qpel4_h_lowpass_mmi(halfH, src, 4, stride); 2394 copy_block4_mmi(full, src - stride*2 + 1, 4, stride, 9); 2395 put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4); 2396 ff_avg_pixels4_l2_8_mmi(dst, halfH, halfV, stride, 4, 4, 4); 2397} 2398 2399void ff_avg_h264_qpel4_mc13_mmi(uint8_t *dst, const uint8_t *src, 2400 ptrdiff_t stride) 2401{ 2402 uint8_t full[36]; 2403 uint8_t * const full_mid= full + 8; 2404 uint8_t halfH[16]; 2405 uint8_t halfV[16]; 2406 put_h264_qpel4_h_lowpass_mmi(halfH, src + stride, 4, stride); 2407 copy_block4_mmi(full, src - stride*2, 4, stride, 9); 2408 put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4); 2409 ff_avg_pixels4_l2_8_mmi(dst, halfH, halfV, stride, 4, 4, 4); 2410} 2411 2412void ff_avg_h264_qpel4_mc33_mmi(uint8_t *dst, const uint8_t *src, 2413 ptrdiff_t stride) 2414{ 2415 uint8_t full[36]; 2416 uint8_t * const full_mid= full + 8; 2417 uint8_t halfH[16]; 2418 uint8_t halfV[16]; 2419 put_h264_qpel4_h_lowpass_mmi(halfH, src + stride, 4, stride); 2420 copy_block4_mmi(full, src - stride*2 + 1, 4, stride, 9); 2421 put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4); 2422 ff_avg_pixels4_l2_8_mmi(dst, halfH, halfV, stride, 4, 4, 4); 2423} 2424 2425void ff_avg_h264_qpel4_mc22_mmi(uint8_t *dst, const uint8_t *src, 2426 ptrdiff_t stride) 2427{ 2428 avg_h264_qpel4_hv_lowpass_mmi(dst, src, stride, stride); 2429} 2430 2431void ff_avg_h264_qpel4_mc21_mmi(uint8_t *dst, const uint8_t *src, 2432 ptrdiff_t stride) 2433{ 2434 uint8_t halfH[16]; 2435 uint8_t halfHV[16]; 2436 put_h264_qpel4_h_lowpass_mmi(halfH, src, 4, stride); 2437 put_h264_qpel4_hv_lowpass_mmi(halfHV, src, 4, stride); 2438 ff_avg_pixels4_l2_8_mmi(dst, halfH, halfHV, stride, 4, 4, 4); 2439} 2440 2441void ff_avg_h264_qpel4_mc23_mmi(uint8_t *dst, const uint8_t *src, 2442 ptrdiff_t stride) 2443{ 2444 uint8_t halfH[16]; 2445 uint8_t halfHV[16]; 2446 put_h264_qpel4_h_lowpass_mmi(halfH, src + stride, 4, stride); 2447 put_h264_qpel4_hv_lowpass_mmi(halfHV, src, 4, stride); 2448 ff_avg_pixels4_l2_8_mmi(dst, halfH, halfHV, stride, 4, 4, 4); 2449} 2450 2451void ff_avg_h264_qpel4_mc12_mmi(uint8_t *dst, const uint8_t *src, 2452 ptrdiff_t stride) 2453{ 2454 uint8_t full[36]; 2455 uint8_t * const full_mid= full + 8; 2456 uint8_t halfV[16]; 2457 uint8_t halfHV[16]; 2458 copy_block4_mmi(full, src - stride*2, 4, stride, 9); 2459 put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4); 2460 put_h264_qpel4_hv_lowpass_mmi(halfHV, src, 4, stride); 2461 ff_avg_pixels4_l2_8_mmi(dst, halfV, halfHV, stride, 4, 4, 4); 2462} 2463 2464void ff_avg_h264_qpel4_mc32_mmi(uint8_t *dst, const uint8_t *src, 2465 ptrdiff_t stride) 2466{ 2467 uint8_t full[36]; 2468 uint8_t * const full_mid= full + 8; 2469 uint8_t halfV[16]; 2470 uint8_t halfHV[16]; 2471 copy_block4_mmi(full, src - stride*2 + 1, 4, stride, 9); 2472 put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4); 2473 put_h264_qpel4_hv_lowpass_mmi(halfHV, src, 4, stride); 2474 ff_avg_pixels4_l2_8_mmi(dst, halfV, halfHV, stride, 4, 4, 4); 2475} 2476 2477//DEF_H264_MC_MMI(put_, 8) 2478void ff_put_h264_qpel8_mc00_mmi(uint8_t *dst, const uint8_t *src, 2479 ptrdiff_t stride) 2480{ 2481 ff_put_pixels8_8_mmi(dst, src, stride, 8); 2482} 2483 2484void ff_put_h264_qpel8_mc10_mmi(uint8_t *dst, const uint8_t *src, 2485 ptrdiff_t stride) 2486{ 2487 uint8_t half[64]; 2488 put_h264_qpel8_h_lowpass_mmi(half, src, 8, stride); 2489 ff_put_pixels8_l2_8_mmi(dst, src, half, stride, stride, 8, 8); 2490} 2491 2492void ff_put_h264_qpel8_mc20_mmi(uint8_t *dst, const uint8_t *src, 2493 ptrdiff_t stride) 2494{ 2495 put_h264_qpel8_h_lowpass_mmi(dst, src, stride, stride); 2496} 2497 2498void ff_put_h264_qpel8_mc30_mmi(uint8_t *dst, const uint8_t *src, 2499 ptrdiff_t stride) 2500{ 2501 uint8_t half[64]; 2502 put_h264_qpel8_h_lowpass_mmi(half, src, 8, stride); 2503 ff_put_pixels8_l2_8_mmi(dst, src+1, half, stride, stride, 8, 8); 2504} 2505 2506void ff_put_h264_qpel8_mc01_mmi(uint8_t *dst, const uint8_t *src, 2507 ptrdiff_t stride) 2508{ 2509 uint8_t full[104]; 2510 uint8_t * const full_mid= full + 16; 2511 uint8_t half[64]; 2512 copy_block8_mmi(full, src - stride*2, 8, stride, 13); 2513 put_h264_qpel8_v_lowpass_mmi(half, full_mid, 8, 8); 2514 ff_put_pixels8_l2_8_mmi(dst, full_mid, half, stride, 8, 8, 8); 2515} 2516 2517void ff_put_h264_qpel8_mc02_mmi(uint8_t *dst, const uint8_t *src, 2518 ptrdiff_t stride) 2519{ 2520 uint8_t full[104]; 2521 uint8_t * const full_mid= full + 16; 2522 copy_block8_mmi(full, src - stride*2, 8, stride, 13); 2523 put_h264_qpel8_v_lowpass_mmi(dst, full_mid, stride, 8); 2524} 2525 2526void ff_put_h264_qpel8_mc03_mmi(uint8_t *dst, const uint8_t *src, 2527 ptrdiff_t stride) 2528{ 2529 uint8_t full[104]; 2530 uint8_t * const full_mid= full + 16; 2531 uint8_t half[64]; 2532 copy_block8_mmi(full, src - stride*2, 8, stride, 13); 2533 put_h264_qpel8_v_lowpass_mmi(half, full_mid, 8, 8); 2534 ff_put_pixels8_l2_8_mmi(dst, full_mid+8, half, stride, 8, 8, 8); 2535} 2536 2537void ff_put_h264_qpel8_mc11_mmi(uint8_t *dst, const uint8_t *src, 2538 ptrdiff_t stride) 2539{ 2540 uint8_t full[104]; 2541 uint8_t * const full_mid= full + 16; 2542 uint8_t halfH[64]; 2543 uint8_t halfV[64]; 2544 put_h264_qpel8_h_lowpass_mmi(halfH, src, 8, stride); 2545 copy_block8_mmi(full, src - stride*2, 8, stride, 13); 2546 put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8); 2547 ff_put_pixels8_l2_8_mmi(dst, halfH, halfV, stride, 8, 8, 8); 2548} 2549 2550void ff_put_h264_qpel8_mc31_mmi(uint8_t *dst, const uint8_t *src, 2551 ptrdiff_t stride) 2552{ 2553 uint8_t full[104]; 2554 uint8_t * const full_mid= full + 16; 2555 uint8_t halfH[64]; 2556 uint8_t halfV[64]; 2557 put_h264_qpel8_h_lowpass_mmi(halfH, src, 8, stride); 2558 copy_block8_mmi(full, src - stride*2 + 1, 8, stride, 13); 2559 put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8); 2560 ff_put_pixels8_l2_8_mmi(dst, halfH, halfV, stride, 8, 8, 8); 2561} 2562 2563void ff_put_h264_qpel8_mc13_mmi(uint8_t *dst, const uint8_t *src, 2564 ptrdiff_t stride) 2565{ 2566 uint8_t full[104]; 2567 uint8_t * const full_mid= full + 16; 2568 uint8_t halfH[64]; 2569 uint8_t halfV[64]; 2570 put_h264_qpel8_h_lowpass_mmi(halfH, src + stride, 8, stride); 2571 copy_block8_mmi(full, src - stride*2, 8, stride, 13); 2572 put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8); 2573 ff_put_pixels8_l2_8_mmi(dst, halfH, halfV, stride, 8, 8, 8); 2574} 2575 2576void ff_put_h264_qpel8_mc33_mmi(uint8_t *dst, const uint8_t *src, 2577 ptrdiff_t stride) 2578{ 2579 uint8_t full[104]; 2580 uint8_t * const full_mid= full + 16; 2581 uint8_t halfH[64]; 2582 uint8_t halfV[64]; 2583 put_h264_qpel8_h_lowpass_mmi(halfH, src + stride, 8, stride); 2584 copy_block8_mmi(full, src - stride*2 + 1, 8, stride, 13); 2585 put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8); 2586 ff_put_pixels8_l2_8_mmi(dst, halfH, halfV, stride, 8, 8, 8); 2587} 2588 2589void ff_put_h264_qpel8_mc22_mmi(uint8_t *dst, const uint8_t *src, 2590 ptrdiff_t stride) 2591{ 2592 uint16_t __attribute__ ((aligned(8))) temp[192]; 2593 2594 put_h264_qpel8_hv_lowpass_mmi(dst, temp, src, stride, 8, stride); 2595} 2596 2597void ff_put_h264_qpel8_mc21_mmi(uint8_t *dst, const uint8_t *src, 2598 ptrdiff_t stride) 2599{ 2600 uint8_t __attribute__ ((aligned(8))) temp[448]; 2601 uint8_t *const halfHV = temp; 2602 int16_t *const halfV = (int16_t *) (temp + 64); 2603 2604 put_h264_qpel8_hv_lowpass_mmi(halfHV, halfV, src, 8, 8, stride); 2605 put_h264_qpel8_h_lowpass_l2_mmi(dst, src, halfHV, stride, 8); 2606} 2607 2608void ff_put_h264_qpel8_mc23_mmi(uint8_t *dst, const uint8_t *src, 2609 ptrdiff_t stride) 2610{ 2611 uint8_t __attribute__ ((aligned(8))) temp[448]; 2612 uint8_t *const halfHV = temp; 2613 int16_t *const halfV = (int16_t *) (temp + 64); 2614 2615 put_h264_qpel8_hv_lowpass_mmi(halfHV, halfV, src, 8, 8, stride); 2616 put_h264_qpel8_h_lowpass_l2_mmi(dst, src + stride, halfHV, stride, 8); 2617} 2618 2619void ff_put_h264_qpel8_mc12_mmi(uint8_t *dst, const uint8_t *src, 2620 ptrdiff_t stride) 2621{ 2622 uint8_t __attribute__ ((aligned(8))) temp[448]; 2623 uint8_t *const halfHV = temp; 2624 int16_t *const halfV = (int16_t *) (temp + 64); 2625 2626 put_h264_qpel8_hv_lowpass_mmi(halfHV, halfV, src, 8, 8, stride); 2627 put_pixels8_l2_shift5_mmi(dst, halfV + 2, halfHV, stride, 8, 8); 2628} 2629 2630void ff_put_h264_qpel8_mc32_mmi(uint8_t *dst, const uint8_t *src, 2631 ptrdiff_t stride) 2632{ 2633 uint8_t __attribute__ ((aligned(8))) temp[448]; 2634 uint8_t *const halfHV = temp; 2635 int16_t *const halfV = (int16_t *) (temp + 64); 2636 2637 put_h264_qpel8_hv_lowpass_mmi(halfHV, halfV, src, 8, 8, stride); 2638 put_pixels8_l2_shift5_mmi(dst, halfV + 3, halfHV, stride, 8, 8); 2639} 2640 2641//DEF_H264_MC_MMI(avg_, 8) 2642void ff_avg_h264_qpel8_mc00_mmi(uint8_t *dst, const uint8_t *src, 2643 ptrdiff_t stride) 2644{ 2645 ff_avg_pixels8_8_mmi(dst, src, stride, 8); 2646} 2647 2648void ff_avg_h264_qpel8_mc10_mmi(uint8_t *dst, const uint8_t *src, 2649 ptrdiff_t stride) 2650{ 2651 uint8_t half[64]; 2652 put_h264_qpel8_h_lowpass_mmi(half, src, 8, stride); 2653 ff_avg_pixels8_l2_8_mmi(dst, src, half, stride, stride, 8, 8); 2654} 2655 2656void ff_avg_h264_qpel8_mc20_mmi(uint8_t *dst, const uint8_t *src, 2657 ptrdiff_t stride) 2658{ 2659 avg_h264_qpel8_h_lowpass_mmi(dst, src, stride, stride); 2660} 2661 2662void ff_avg_h264_qpel8_mc30_mmi(uint8_t *dst, const uint8_t *src, 2663 ptrdiff_t stride) 2664{ 2665 uint8_t half[64]; 2666 put_h264_qpel8_h_lowpass_mmi(half, src, 8, stride); 2667 ff_avg_pixels8_l2_8_mmi(dst, src+1, half, stride, stride, 8, 8); 2668} 2669 2670void ff_avg_h264_qpel8_mc01_mmi(uint8_t *dst, const uint8_t *src, 2671 ptrdiff_t stride) 2672{ 2673 uint8_t full[104]; 2674 uint8_t * const full_mid= full + 16; 2675 uint8_t half[64]; 2676 copy_block8_mmi(full, src - stride*2, 8, stride, 13); 2677 put_h264_qpel8_v_lowpass_mmi(half, full_mid, 8, 8); 2678 ff_avg_pixels8_l2_8_mmi(dst, full_mid, half, stride, 8, 8, 8); 2679} 2680 2681void ff_avg_h264_qpel8_mc02_mmi(uint8_t *dst, const uint8_t *src, 2682 ptrdiff_t stride) 2683{ 2684 uint8_t full[104]; 2685 uint8_t * const full_mid= full + 16; 2686 copy_block8_mmi(full, src - stride*2, 8, stride, 13); 2687 avg_h264_qpel8_v_lowpass_mmi(dst, full_mid, stride, 8); 2688} 2689 2690void ff_avg_h264_qpel8_mc03_mmi(uint8_t *dst, const uint8_t *src, 2691 ptrdiff_t stride) 2692{ 2693 uint8_t full[104]; 2694 uint8_t * const full_mid= full + 16; 2695 uint8_t half[64]; 2696 copy_block8_mmi(full, src - stride*2, 8, stride, 13); 2697 put_h264_qpel8_v_lowpass_mmi(half, full_mid, 8, 8); 2698 ff_avg_pixels8_l2_8_mmi(dst, full_mid+8, half, stride, 8, 8, 8); 2699} 2700 2701void ff_avg_h264_qpel8_mc11_mmi(uint8_t *dst, const uint8_t *src, 2702 ptrdiff_t stride) 2703{ 2704 uint8_t full[104]; 2705 uint8_t * const full_mid= full + 16; 2706 uint8_t halfH[64]; 2707 uint8_t halfV[64]; 2708 put_h264_qpel8_h_lowpass_mmi(halfH, src, 8, stride); 2709 copy_block8_mmi(full, src - stride*2, 8, stride, 13); 2710 put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8); 2711 ff_avg_pixels8_l2_8_mmi(dst, halfH, halfV, stride, 8, 8, 8); 2712} 2713 2714void ff_avg_h264_qpel8_mc31_mmi(uint8_t *dst, const uint8_t *src, 2715 ptrdiff_t stride) 2716{ 2717 uint8_t full[104]; 2718 uint8_t * const full_mid= full + 16; 2719 uint8_t halfH[64]; 2720 uint8_t halfV[64]; 2721 put_h264_qpel8_h_lowpass_mmi(halfH, src, 8, stride); 2722 copy_block8_mmi(full, src - stride*2 + 1, 8, stride, 13); 2723 put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8); 2724 ff_avg_pixels8_l2_8_mmi(dst, halfH, halfV, stride, 8, 8, 8); 2725} 2726 2727void ff_avg_h264_qpel8_mc13_mmi(uint8_t *dst, const uint8_t *src, 2728 ptrdiff_t stride) 2729{ 2730 uint8_t full[104]; 2731 uint8_t * const full_mid= full + 16; 2732 uint8_t halfH[64]; 2733 uint8_t halfV[64]; 2734 put_h264_qpel8_h_lowpass_mmi(halfH, src + stride, 8, stride); 2735 copy_block8_mmi(full, src - stride*2, 8, stride, 13); 2736 put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8); 2737 ff_avg_pixels8_l2_8_mmi(dst, halfH, halfV, stride, 8, 8, 8); 2738} 2739 2740void ff_avg_h264_qpel8_mc33_mmi(uint8_t *dst, const uint8_t *src, 2741 ptrdiff_t stride) 2742{ 2743 uint8_t full[104]; 2744 uint8_t * const full_mid= full + 16; 2745 uint8_t halfH[64]; 2746 uint8_t halfV[64]; 2747 put_h264_qpel8_h_lowpass_mmi(halfH, src + stride, 8, stride); 2748 copy_block8_mmi(full, src - stride*2 + 1, 8, stride, 13); 2749 put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8); 2750 ff_avg_pixels8_l2_8_mmi(dst, halfH, halfV, stride, 8, 8, 8); 2751} 2752 2753void ff_avg_h264_qpel8_mc22_mmi(uint8_t *dst, const uint8_t *src, 2754 ptrdiff_t stride) 2755{ 2756 uint16_t __attribute__ ((aligned(8))) temp[192]; 2757 2758 avg_h264_qpel8_hv_lowpass_mmi(dst, temp, src, stride, 8, stride); 2759} 2760 2761void ff_avg_h264_qpel8_mc21_mmi(uint8_t *dst, const uint8_t *src, 2762 ptrdiff_t stride) 2763{ 2764 uint8_t __attribute__ ((aligned(8))) temp[448]; 2765 uint8_t *const halfHV = temp; 2766 int16_t *const halfV = (int16_t *) (temp + 64); 2767 2768 put_h264_qpel8_hv_lowpass_mmi(halfHV, halfV, src, 8, 8, stride); 2769 avg_h264_qpel8_h_lowpass_l2_mmi(dst, src, halfHV, stride, 8); 2770} 2771 2772void ff_avg_h264_qpel8_mc23_mmi(uint8_t *dst, const uint8_t *src, 2773 ptrdiff_t stride) 2774{ 2775 uint8_t __attribute__ ((aligned(8))) temp[448]; 2776 uint8_t *const halfHV = temp; 2777 int16_t *const halfV = (int16_t *) (temp + 64); 2778 2779 put_h264_qpel8_hv_lowpass_mmi(halfHV, halfV, src, 8, 8, stride); 2780 avg_h264_qpel8_h_lowpass_l2_mmi(dst, src + stride, halfHV, stride, 8); 2781} 2782 2783void ff_avg_h264_qpel8_mc12_mmi(uint8_t *dst, const uint8_t *src, 2784 ptrdiff_t stride) 2785{ 2786 uint8_t __attribute__ ((aligned(8))) temp[448]; 2787 uint8_t *const halfHV = temp; 2788 int16_t *const halfV = (int16_t *) (temp + 64); 2789 2790 put_h264_qpel8_hv_lowpass_mmi(halfHV, halfV, src, 8, 8, stride); 2791 avg_pixels8_l2_shift5_mmi(dst, halfV + 2, halfHV, stride, 8, 8); 2792} 2793 2794void ff_avg_h264_qpel8_mc32_mmi(uint8_t *dst, const uint8_t *src, 2795 ptrdiff_t stride) 2796{ 2797 uint8_t __attribute__ ((aligned(8))) temp[448]; 2798 uint8_t *const halfHV = temp; 2799 int16_t *const halfV = (int16_t *) (temp + 64); 2800 2801 put_h264_qpel8_hv_lowpass_mmi(halfHV, halfV, src, 8, 8, stride); 2802 avg_pixels8_l2_shift5_mmi(dst, halfV + 3, halfHV, stride, 8, 8); 2803} 2804 2805//DEF_H264_MC_MMI(put_, 16) 2806void ff_put_h264_qpel16_mc00_mmi(uint8_t *dst, const uint8_t *src, 2807 ptrdiff_t stride) 2808{ 2809 ff_put_pixels16_8_mmi(dst, src, stride, 16); 2810} 2811 2812void ff_put_h264_qpel16_mc10_mmi(uint8_t *dst, const uint8_t *src, 2813 ptrdiff_t stride) 2814{ 2815 uint8_t half[256]; 2816 put_h264_qpel16_h_lowpass_mmi(half, src, 16, stride); 2817 ff_put_pixels16_l2_8_mmi(dst, src, half, stride, stride, 16, 16); 2818} 2819 2820void ff_put_h264_qpel16_mc20_mmi(uint8_t *dst, const uint8_t *src, 2821 ptrdiff_t stride) 2822{ 2823 put_h264_qpel16_h_lowpass_mmi(dst, src, stride, stride); 2824} 2825 2826void ff_put_h264_qpel16_mc30_mmi(uint8_t *dst, const uint8_t *src, 2827 ptrdiff_t stride) 2828{ 2829 uint8_t half[256]; 2830 put_h264_qpel16_h_lowpass_mmi(half, src, 16, stride); 2831 ff_put_pixels16_l2_8_mmi(dst, src+1, half, stride, stride, 16, 16); 2832} 2833 2834void ff_put_h264_qpel16_mc01_mmi(uint8_t *dst, const uint8_t *src, 2835 ptrdiff_t stride) 2836{ 2837 uint8_t full[336]; 2838 uint8_t * const full_mid= full + 32; 2839 uint8_t half[256]; 2840 copy_block16_mmi(full, src - stride*2, 16, stride, 21); 2841 put_h264_qpel16_v_lowpass_mmi(half, full_mid, 16, 16); 2842 ff_put_pixels16_l2_8_mmi(dst, full_mid, half, stride, 16, 16, 16); 2843} 2844 2845void ff_put_h264_qpel16_mc02_mmi(uint8_t *dst, const uint8_t *src, 2846 ptrdiff_t stride) 2847{ 2848 uint8_t full[336]; 2849 uint8_t * const full_mid= full + 32; 2850 copy_block16_mmi(full, src - stride*2, 16, stride, 21); 2851 put_h264_qpel16_v_lowpass_mmi(dst, full_mid, stride, 16); 2852} 2853 2854void ff_put_h264_qpel16_mc03_mmi(uint8_t *dst, const uint8_t *src, 2855 ptrdiff_t stride) 2856{ 2857 uint8_t full[336]; 2858 uint8_t * const full_mid= full + 32; 2859 uint8_t half[256]; 2860 copy_block16_mmi(full, src - stride*2, 16, stride, 21); 2861 put_h264_qpel16_v_lowpass_mmi(half, full_mid, 16, 16); 2862 ff_put_pixels16_l2_8_mmi(dst, full_mid+16, half, stride, 16, 16, 16); 2863} 2864 2865void ff_put_h264_qpel16_mc11_mmi(uint8_t *dst, const uint8_t *src, 2866 ptrdiff_t stride) 2867{ 2868 uint8_t full[336]; 2869 uint8_t * const full_mid= full + 32; 2870 uint8_t halfH[256]; 2871 uint8_t halfV[256]; 2872 put_h264_qpel16_h_lowpass_mmi(halfH, src, 16, stride); 2873 copy_block16_mmi(full, src - stride*2, 16, stride, 21); 2874 put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16); 2875 ff_put_pixels16_l2_8_mmi(dst, halfH, halfV, stride, 16, 16, 16); 2876} 2877 2878void ff_put_h264_qpel16_mc31_mmi(uint8_t *dst, const uint8_t *src, 2879 ptrdiff_t stride) 2880{ 2881 uint8_t full[336]; 2882 uint8_t * const full_mid= full + 32; 2883 uint8_t halfH[256]; 2884 uint8_t halfV[256]; 2885 put_h264_qpel16_h_lowpass_mmi(halfH, src, 16, stride); 2886 copy_block16_mmi(full, src - stride*2 + 1, 16, stride, 21); 2887 put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16); 2888 ff_put_pixels16_l2_8_mmi(dst, halfH, halfV, stride, 16, 16, 16); 2889} 2890 2891void ff_put_h264_qpel16_mc13_mmi(uint8_t *dst, const uint8_t *src, 2892 ptrdiff_t stride) 2893{ 2894 uint8_t full[336]; 2895 uint8_t * const full_mid= full + 32; 2896 uint8_t halfH[256]; 2897 uint8_t halfV[256]; 2898 put_h264_qpel16_h_lowpass_mmi(halfH, src + stride, 16, stride); 2899 copy_block16_mmi(full, src - stride*2, 16, stride, 21); 2900 put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16); 2901 ff_put_pixels16_l2_8_mmi(dst, halfH, halfV, stride, 16, 16, 16); 2902} 2903 2904void ff_put_h264_qpel16_mc33_mmi(uint8_t *dst, const uint8_t *src, 2905 ptrdiff_t stride) 2906{ 2907 uint8_t full[336]; 2908 uint8_t * const full_mid= full + 32; 2909 uint8_t halfH[256]; 2910 uint8_t halfV[256]; 2911 put_h264_qpel16_h_lowpass_mmi(halfH, src + stride, 16, stride); 2912 copy_block16_mmi(full, src - stride*2 + 1, 16, stride, 21); 2913 put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16); 2914 ff_put_pixels16_l2_8_mmi(dst, halfH, halfV, stride, 16, 16, 16); 2915} 2916 2917void ff_put_h264_qpel16_mc22_mmi(uint8_t *dst, const uint8_t *src, 2918 ptrdiff_t stride) 2919{ 2920 uint16_t __attribute__ ((aligned(8))) temp[384]; 2921 2922 put_h264_qpel16_hv_lowpass_mmi(dst, temp, src, stride, 16, stride); 2923} 2924 2925void ff_put_h264_qpel16_mc21_mmi(uint8_t *dst, const uint8_t *src, 2926 ptrdiff_t stride) 2927{ 2928 uint8_t __attribute__ ((aligned(8))) temp[1024]; 2929 uint8_t *const halfHV = temp; 2930 int16_t *const halfV = (int16_t *) (temp + 256); 2931 2932 put_h264_qpel16_hv_lowpass_mmi(halfHV, halfV, src, 16, 16, stride); 2933 put_h264_qpel16_h_lowpass_l2_mmi(dst, src, halfHV, stride, 16); 2934} 2935 2936void ff_put_h264_qpel16_mc23_mmi(uint8_t *dst, const uint8_t *src, 2937 ptrdiff_t stride) 2938{ 2939 uint8_t __attribute__ ((aligned(8))) temp[1024]; 2940 uint8_t *const halfHV = temp; 2941 int16_t *const halfV = (int16_t *) (temp + 256); 2942 2943 put_h264_qpel16_hv_lowpass_mmi(halfHV, halfV, src, 16, 16, stride); 2944 put_h264_qpel16_h_lowpass_l2_mmi(dst, src + stride, halfHV, stride, 16); 2945} 2946 2947void ff_put_h264_qpel16_mc12_mmi(uint8_t *dst, const uint8_t *src, 2948 ptrdiff_t stride) 2949{ 2950 uint8_t __attribute__ ((aligned(8))) temp[1024]; 2951 uint8_t *const halfHV = temp; 2952 int16_t *const halfV = (int16_t *) (temp + 256); 2953 2954 put_h264_qpel16_hv_lowpass_mmi(halfHV, halfV, src, 16, 16, stride); 2955 put_pixels16_l2_shift5_mmi(dst, halfV + 2, halfHV, stride, 16, 16); 2956} 2957 2958void ff_put_h264_qpel16_mc32_mmi(uint8_t *dst, const uint8_t *src, 2959 ptrdiff_t stride) 2960{ 2961 uint8_t __attribute__ ((aligned(8))) temp[1024]; 2962 uint8_t *const halfHV = temp; 2963 int16_t *const halfV = (int16_t *) (temp + 256); 2964 2965 put_h264_qpel16_hv_lowpass_mmi(halfHV, halfV, src, 16, 16, stride); 2966 put_pixels16_l2_shift5_mmi(dst, halfV + 3, halfHV, stride, 16, 16); 2967} 2968 2969//DEF_H264_MC_MMI(avg_, 16) 2970void ff_avg_h264_qpel16_mc00_mmi(uint8_t *dst, const uint8_t *src, 2971 ptrdiff_t stride) 2972{ 2973 ff_avg_pixels16_8_mmi(dst, src, stride, 16); 2974} 2975 2976void ff_avg_h264_qpel16_mc10_mmi(uint8_t *dst, const uint8_t *src, 2977 ptrdiff_t stride) 2978{ 2979 uint8_t half[256]; 2980 put_h264_qpel16_h_lowpass_mmi(half, src, 16, stride); 2981 ff_avg_pixels16_l2_8_mmi(dst, src, half, stride, stride, 16, 16); 2982} 2983 2984void ff_avg_h264_qpel16_mc20_mmi(uint8_t *dst, const uint8_t *src, 2985 ptrdiff_t stride) 2986{ 2987 avg_h264_qpel16_h_lowpass_mmi(dst, src, stride, stride); 2988} 2989 2990void ff_avg_h264_qpel16_mc30_mmi(uint8_t *dst, const uint8_t *src, 2991 ptrdiff_t stride) 2992{ 2993 uint8_t half[256]; 2994 put_h264_qpel16_h_lowpass_mmi(half, src, 16, stride); 2995 ff_avg_pixels16_l2_8_mmi(dst, src+1, half, stride, stride, 16, 16); 2996} 2997 2998void ff_avg_h264_qpel16_mc01_mmi(uint8_t *dst, const uint8_t *src, 2999 ptrdiff_t stride) 3000{ 3001 uint8_t full[336]; 3002 uint8_t * const full_mid= full + 32; 3003 uint8_t half[256]; 3004 copy_block16_mmi(full, src - stride*2, 16, stride, 21); 3005 put_h264_qpel16_v_lowpass_mmi(half, full_mid, 16, 16); 3006 ff_avg_pixels16_l2_8_mmi(dst, full_mid, half, stride, 16, 16, 16); 3007} 3008 3009void ff_avg_h264_qpel16_mc02_mmi(uint8_t *dst, const uint8_t *src, 3010 ptrdiff_t stride) 3011{ 3012 uint8_t full[336]; 3013 uint8_t * const full_mid= full + 32; 3014 copy_block16_mmi(full, src - stride*2, 16, stride, 21); 3015 avg_h264_qpel16_v_lowpass_mmi(dst, full_mid, stride, 16); 3016} 3017 3018void ff_avg_h264_qpel16_mc03_mmi(uint8_t *dst, const uint8_t *src, 3019 ptrdiff_t stride) 3020{ 3021 uint8_t full[336]; 3022 uint8_t * const full_mid= full + 32; 3023 uint8_t half[256]; 3024 copy_block16_mmi(full, src - stride*2, 16, stride, 21); 3025 put_h264_qpel16_v_lowpass_mmi(half, full_mid, 16, 16); 3026 ff_avg_pixels16_l2_8_mmi(dst, full_mid+16, half, stride, 16, 16, 16); 3027} 3028 3029void ff_avg_h264_qpel16_mc11_mmi(uint8_t *dst, const uint8_t *src, 3030 ptrdiff_t stride) 3031{ 3032 uint8_t full[336]; 3033 uint8_t * const full_mid= full + 32; 3034 uint8_t halfH[256]; 3035 uint8_t halfV[256]; 3036 put_h264_qpel16_h_lowpass_mmi(halfH, src, 16, stride); 3037 copy_block16_mmi(full, src - stride*2, 16, stride, 21); 3038 put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16); 3039 ff_avg_pixels16_l2_8_mmi(dst, halfH, halfV, stride, 16, 16, 16); 3040} 3041 3042void ff_avg_h264_qpel16_mc31_mmi(uint8_t *dst, const uint8_t *src, 3043 ptrdiff_t stride) 3044{ 3045 uint8_t full[336]; 3046 uint8_t * const full_mid= full + 32; 3047 uint8_t halfH[256]; 3048 uint8_t halfV[256]; 3049 put_h264_qpel16_h_lowpass_mmi(halfH, src, 16, stride); 3050 copy_block16_mmi(full, src - stride*2 + 1, 16, stride, 21); 3051 put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16); 3052 ff_avg_pixels16_l2_8_mmi(dst, halfH, halfV, stride, 16, 16, 16); 3053} 3054 3055void ff_avg_h264_qpel16_mc13_mmi(uint8_t *dst, const uint8_t *src, 3056 ptrdiff_t stride) 3057{ 3058 uint8_t full[336]; 3059 uint8_t * const full_mid= full + 32; 3060 uint8_t halfH[256]; 3061 uint8_t halfV[256]; 3062 put_h264_qpel16_h_lowpass_mmi(halfH, src + stride, 16, stride); 3063 copy_block16_mmi(full, src - stride*2, 16, stride, 21); 3064 put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16); 3065 ff_avg_pixels16_l2_8_mmi(dst, halfH, halfV, stride, 16, 16, 16); 3066} 3067 3068void ff_avg_h264_qpel16_mc33_mmi(uint8_t *dst, const uint8_t *src, 3069 ptrdiff_t stride) 3070{ 3071 uint8_t full[336]; 3072 uint8_t * const full_mid= full + 32; 3073 uint8_t halfH[256]; 3074 uint8_t halfV[256]; 3075 put_h264_qpel16_h_lowpass_mmi(halfH, src + stride, 16, stride); 3076 copy_block16_mmi(full, src - stride*2 + 1, 16, stride, 21); 3077 put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16); 3078 ff_avg_pixels16_l2_8_mmi(dst, halfH, halfV, stride, 16, 16, 16); 3079} 3080 3081void ff_avg_h264_qpel16_mc22_mmi(uint8_t *dst, const uint8_t *src, 3082 ptrdiff_t stride) 3083{ 3084 uint16_t __attribute__ ((aligned(8))) temp[384]; 3085 3086 avg_h264_qpel16_hv_lowpass_mmi(dst, temp, src, stride, 16, stride); 3087} 3088 3089void ff_avg_h264_qpel16_mc21_mmi(uint8_t *dst, const uint8_t *src, 3090 ptrdiff_t stride) 3091{ 3092 uint8_t __attribute__ ((aligned(8))) temp[1024]; 3093 uint8_t *const halfHV = temp; 3094 int16_t *const halfV = (int16_t *) (temp + 256); 3095 3096 put_h264_qpel16_hv_lowpass_mmi(halfHV, halfV, src, 16, 16, stride); 3097 avg_h264_qpel16_h_lowpass_l2_mmi(dst, src, halfHV, stride, 16); 3098} 3099 3100void ff_avg_h264_qpel16_mc23_mmi(uint8_t *dst, const uint8_t *src, 3101 ptrdiff_t stride) 3102{ 3103 uint8_t __attribute__ ((aligned(8))) temp[1024]; 3104 uint8_t *const halfHV = temp; 3105 int16_t *const halfV = (int16_t *) (temp + 256); 3106 3107 put_h264_qpel16_hv_lowpass_mmi(halfHV, halfV, src, 16, 16, stride); 3108 avg_h264_qpel16_h_lowpass_l2_mmi(dst, src + stride, halfHV, stride, 16); 3109} 3110 3111void ff_avg_h264_qpel16_mc12_mmi(uint8_t *dst, const uint8_t *src, 3112 ptrdiff_t stride) 3113{ 3114 uint8_t __attribute__ ((aligned(8))) temp[1024]; 3115 uint8_t *const halfHV = temp; 3116 int16_t *const halfV = (int16_t *) (temp + 256); 3117 3118 put_h264_qpel16_hv_lowpass_mmi(halfHV, halfV, src, 16, 16, stride); 3119 avg_pixels16_l2_shift5_mmi(dst, halfV + 2, halfHV, stride, 16, 16); 3120} 3121 3122void ff_avg_h264_qpel16_mc32_mmi(uint8_t *dst, const uint8_t *src, 3123 ptrdiff_t stride) 3124{ 3125 uint8_t __attribute__ ((aligned(8))) temp[1024]; 3126 uint8_t *const halfHV = temp; 3127 int16_t *const halfV = (int16_t *) (temp + 256); 3128 3129 put_h264_qpel16_hv_lowpass_mmi(halfHV, halfV, src, 16, 16, stride); 3130 avg_pixels16_l2_shift5_mmi(dst, halfV + 3, halfHV, stride, 16, 16); 3131} 3132 3133#undef op2_avg 3134#undef op2_put 3135