1/* 2 * Loongson SIMD optimized h264dsp 3 * 4 * Copyright (c) 2015 Loongson Technology Corporation Limited 5 * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn> 6 * Zhang Shuangshuang <zhangshuangshuang@ict.ac.cn> 7 * Heiher <r@hev.cc> 8 * 9 * This file is part of FFmpeg. 10 * 11 * FFmpeg is free software; you can redistribute it and/or 12 * modify it under the terms of the GNU Lesser General Public 13 * License as published by the Free Software Foundation; either 14 * version 2.1 of the License, or (at your option) any later version. 15 * 16 * FFmpeg is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 * Lesser General Public License for more details. 20 * 21 * You should have received a copy of the GNU Lesser General Public 22 * License along with FFmpeg; if not, write to the Free Software 23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 24 */ 25 26#include "libavcodec/bit_depth_template.c" 27#include "h264dsp_mips.h" 28#include "libavutil/mips/mmiutils.h" 29#include "libavutil/mem_internal.h" 30 31void ff_h264_add_pixels4_8_mmi(uint8_t *dst, int16_t *src, int stride) 32{ 33 double ftmp[9]; 34 DECLARE_VAR_LOW32; 35 36 __asm__ volatile ( 37 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 38 MMI_LDC1(%[ftmp1], %[src], 0x00) 39 MMI_LDC1(%[ftmp2], %[src], 0x08) 40 MMI_LDC1(%[ftmp3], %[src], 0x10) 41 MMI_LDC1(%[ftmp4], %[src], 0x18) 42 /* memset(src, 0, 32); */ 43 MMI_SQC1(%[ftmp0], %[ftmp0], %[src], 0x00) 44 MMI_SQC1(%[ftmp0], %[ftmp0], %[src], 0x10) 45 MMI_ULWC1(%[ftmp5], %[dst0], 0x00) 46 MMI_ULWC1(%[ftmp6], %[dst1], 0x00) 47 MMI_ULWC1(%[ftmp7], %[dst2], 0x00) 48 MMI_ULWC1(%[ftmp8], %[dst3], 0x00) 49 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" 50 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 51 "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t" 52 "punpcklbh %[ftmp8], %[ftmp8], %[ftmp0] \n\t" 53 "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t" 54 "paddh %[ftmp2], %[ftmp2], %[ftmp6] \n\t" 55 "paddh %[ftmp3], %[ftmp3], %[ftmp7] \n\t" 56 "paddh %[ftmp4], %[ftmp4], %[ftmp8] \n\t" 57 "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 58 "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 59 "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 60 "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 61 MMI_SWC1(%[ftmp1], %[dst0], 0x00) 62 MMI_SWC1(%[ftmp2], %[dst1], 0x00) 63 MMI_SWC1(%[ftmp3], %[dst2], 0x00) 64 MMI_SWC1(%[ftmp4], %[dst3], 0x00) 65 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 66 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 67 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 68 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 69 RESTRICT_ASM_LOW32 70 [ftmp8]"=&f"(ftmp[8]) 71 : [dst0]"r"(dst), [dst1]"r"(dst+stride), 72 [dst2]"r"(dst+2*stride), [dst3]"r"(dst+3*stride), 73 [src]"r"(src) 74 : "memory" 75 ); 76 77} 78 79void ff_h264_idct_add_8_mmi(uint8_t *dst, int16_t *block, int stride) 80{ 81 double ftmp[12]; 82 uint64_t tmp[1]; 83 DECLARE_VAR_LOW32; 84 DECLARE_VAR_ADDRT; 85 86 __asm__ volatile ( 87 MMI_LDC1(%[ftmp0], %[block], 0x00) 88 MMI_LDC1(%[ftmp1], %[block], 0x08) 89 MMI_LDC1(%[ftmp2], %[block], 0x10) 90 MMI_LDC1(%[ftmp3], %[block], 0x18) 91 /* memset(block, 0, 32) */ 92 "pxor %[ftmp4], %[ftmp4], %[ftmp4] \n\t" 93 MMI_SQC1(%[ftmp4], %[ftmp4], %[block], 0x00) 94 MMI_SQC1(%[ftmp4], %[ftmp4], %[block], 0x10) 95 "dli %[tmp0], 0x01 \n\t" 96 "mtc1 %[tmp0], %[ftmp8] \n\t" 97 "dli %[tmp0], 0x06 \n\t" 98 "mtc1 %[tmp0], %[ftmp9] \n\t" 99 "psrah %[ftmp4], %[ftmp1], %[ftmp8] \n\t" 100 "psrah %[ftmp5], %[ftmp3], %[ftmp8] \n\t" 101 "psubh %[ftmp4], %[ftmp4], %[ftmp3] \n\t" 102 "paddh %[ftmp5], %[ftmp5], %[ftmp1] \n\t" 103 "paddh %[ftmp10], %[ftmp2], %[ftmp0] \n\t" 104 "psubh %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 105 "paddh %[ftmp11], %[ftmp5], %[ftmp10] \n\t" 106 "psubh %[ftmp2], %[ftmp10], %[ftmp5] \n\t" 107 "paddh %[ftmp10], %[ftmp4], %[ftmp0] \n\t" 108 "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t" 109 "punpckhhw %[ftmp1], %[ftmp11], %[ftmp10] \n\t" 110 "punpcklhw %[ftmp5], %[ftmp11], %[ftmp10] \n\t" 111 "punpckhhw %[ftmp4], %[ftmp0], %[ftmp2] \n\t" 112 "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 113 "punpckhwd %[ftmp2], %[ftmp5], %[ftmp0] \n\t" 114 "punpcklwd %[ftmp5], %[ftmp5], %[ftmp0] \n\t" 115 "punpcklwd %[ftmp10], %[ftmp1], %[ftmp4] \n\t" 116 "punpckhwd %[ftmp0], %[ftmp1], %[ftmp4] \n\t" 117 "paddh %[ftmp5], %[ftmp5], %[ff_pw_32] \n\t" 118 "psrah %[ftmp4], %[ftmp2], %[ftmp8] \n\t" 119 "psrah %[ftmp3], %[ftmp0], %[ftmp8] \n\t" 120 "psubh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 121 "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" 122 "paddh %[ftmp1], %[ftmp10], %[ftmp5] \n\t" 123 "psubh %[ftmp5], %[ftmp5], %[ftmp10] \n\t" 124 "paddh %[ftmp10], %[ftmp3], %[ftmp1] \n\t" 125 "psubh %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 126 "paddh %[ftmp11], %[ftmp4], %[ftmp5] \n\t" 127 "psubh %[ftmp5], %[ftmp5], %[ftmp4] \n\t" 128 MMI_ULWC1(%[ftmp2], %[dst], 0x00) 129 MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00) 130 "pxor %[ftmp7], %[ftmp7], %[ftmp7] \n\t" 131 "psrah %[ftmp3], %[ftmp10], %[ftmp9] \n\t" 132 "psrah %[ftmp4], %[ftmp11], %[ftmp9] \n\t" 133 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t" 134 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t" 135 "paddh %[ftmp2], %[ftmp2], %[ftmp3] \n\t" 136 "paddh %[ftmp0], %[ftmp0], %[ftmp4] \n\t" 137 "packushb %[ftmp2], %[ftmp2], %[ftmp7] \n\t" 138 "packushb %[ftmp0], %[ftmp0], %[ftmp7] \n\t" 139 MMI_SWC1(%[ftmp2], %[dst], 0x00) 140 MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00) 141 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 142 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 143 MMI_ULWC1(%[ftmp2], %[dst], 0x00) 144 "psrah %[ftmp5], %[ftmp5], %[ftmp9] \n\t" 145 MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00) 146 "psrah %[ftmp1], %[ftmp1], %[ftmp9] \n\t" 147 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t" 148 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t" 149 "paddh %[ftmp2], %[ftmp2], %[ftmp5] \n\t" 150 "paddh %[ftmp0], %[ftmp0], %[ftmp1] \n\t" 151 "packushb %[ftmp2], %[ftmp2], %[ftmp7] \n\t" 152 MMI_SWC1(%[ftmp2], %[dst], 0x00) 153 "packushb %[ftmp0], %[ftmp0], %[ftmp7] \n\t" 154 MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00) 155 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 156 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 157 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 158 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 159 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), 160 [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), 161 RESTRICT_ASM_LOW32 162 RESTRICT_ASM_ADDRT 163 [tmp0]"=&r"(tmp[0]) 164 : [dst]"r"(dst), [block]"r"(block), 165 [stride]"r"((mips_reg)stride), [ff_pw_32]"f"(ff_pw_32.f) 166 : "memory" 167 ); 168 169} 170 171void ff_h264_idct8_add_8_mmi(uint8_t *dst, int16_t *block, int stride) 172{ 173 double ftmp[16]; 174 uint64_t tmp[7]; 175 mips_reg addr[1]; 176 DECLARE_VAR_LOW32; 177 DECLARE_VAR_ADDRT; 178 179 __asm__ volatile ( 180 "lhu %[tmp0], 0x00(%[block]) \n\t" 181 PTR_ADDI "$sp, $sp, -0x20 \n\t" 182 PTR_ADDIU "%[tmp0], %[tmp0], 0x20 \n\t" 183 MMI_LDC1(%[ftmp1], %[block], 0x10) 184 "sh %[tmp0], 0x00(%[block]) \n\t" 185 MMI_LDC1(%[ftmp2], %[block], 0x20) 186 "dli %[tmp0], 0x01 \n\t" 187 MMI_LDC1(%[ftmp3], %[block], 0x30) 188 "mtc1 %[tmp0], %[ftmp8] \n\t" 189 MMI_LDC1(%[ftmp5], %[block], 0x50) 190 MMI_LDC1(%[ftmp6], %[block], 0x60) 191 MMI_LDC1(%[ftmp7], %[block], 0x70) 192 "mov.d %[ftmp0], %[ftmp1] \n\t" 193 "psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t" 194 "psrah %[ftmp4], %[ftmp5], %[ftmp8] \n\t" 195 "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 196 "paddh %[ftmp4], %[ftmp4], %[ftmp5] \n\t" 197 "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t" 198 "paddh %[ftmp4], %[ftmp4], %[ftmp7] \n\t" 199 "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 200 "psubh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 201 "psubh %[ftmp0], %[ftmp0], %[ftmp3] \n\t" 202 "psubh %[ftmp5], %[ftmp5], %[ftmp3] \n\t" 203 "psrah %[ftmp3], %[ftmp3], %[ftmp8] \n\t" 204 "paddh %[ftmp0], %[ftmp0], %[ftmp7] \n\t" 205 "psubh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 206 "psrah %[ftmp7], %[ftmp7], %[ftmp8] \n\t" 207 "psubh %[ftmp0], %[ftmp0], %[ftmp3] \n\t" 208 "dli %[tmp0], 0x02 \n\t" 209 "psubh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 210 "mtc1 %[tmp0], %[ftmp9] \n\t" 211 "mov.d %[ftmp7], %[ftmp1] \n\t" 212 "psrah %[ftmp1], %[ftmp1], %[ftmp9] \n\t" 213 "psrah %[ftmp3], %[ftmp4], %[ftmp9] \n\t" 214 "paddh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 215 "psrah %[ftmp0], %[ftmp0], %[ftmp9] \n\t" 216 "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t" 217 "psrah %[ftmp5], %[ftmp5], %[ftmp9] \n\t" 218 "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t" 219 "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t" 220 "mov.d %[ftmp5], %[ftmp6] \n\t" 221 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 222 "psrah %[ftmp4], %[ftmp2], %[ftmp8] \n\t" 223 "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t" 224 "psubh %[ftmp4], %[ftmp4], %[ftmp5] \n\t" 225 MMI_LDC1(%[ftmp2], %[block], 0x00) 226 MMI_LDC1(%[ftmp5], %[block], 0x40) 227 "paddh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" 228 "paddh %[ftmp2], %[ftmp2], %[ftmp2] \n\t" 229 "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t" 230 "psubh %[ftmp2], %[ftmp2], %[ftmp5] \n\t" 231 "paddh %[ftmp5], %[ftmp5], %[ftmp5] \n\t" 232 "paddh %[ftmp4], %[ftmp4], %[ftmp2] \n\t" 233 "psubh %[ftmp5], %[ftmp5], %[ftmp6] \n\t" 234 "paddh %[ftmp2], %[ftmp2], %[ftmp2] \n\t" 235 "paddh %[ftmp7], %[ftmp7], %[ftmp6] \n\t" 236 "psubh %[ftmp2], %[ftmp2], %[ftmp4] \n\t" 237 "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 238 "paddh %[ftmp0], %[ftmp0], %[ftmp4] \n\t" 239 "psubh %[ftmp6], %[ftmp6], %[ftmp7] \n\t" 240 "paddh %[ftmp4], %[ftmp4], %[ftmp4] \n\t" 241 "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" 242 "psubh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 243 "paddh %[ftmp2], %[ftmp2], %[ftmp2] \n\t" 244 "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t" 245 "psubh %[ftmp2], %[ftmp2], %[ftmp3] \n\t" 246 "paddh %[ftmp5], %[ftmp5], %[ftmp5] \n\t" 247 MMI_SDC1(%[ftmp6], %[block], 0x00) 248 "psubh %[ftmp5], %[ftmp5], %[ftmp1] \n\t" 249 "punpckhhw %[ftmp6], %[ftmp7], %[ftmp0] \n\t" 250 "punpcklhw %[ftmp7], %[ftmp7], %[ftmp0] \n\t" 251 "punpckhhw %[ftmp0], %[ftmp3], %[ftmp1] \n\t" 252 "punpcklhw %[ftmp3], %[ftmp3], %[ftmp1] \n\t" 253 "punpckhwd %[ftmp1], %[ftmp7], %[ftmp3] \n\t" 254 "punpcklwd %[ftmp7], %[ftmp7], %[ftmp3] \n\t" 255 "punpckhwd %[ftmp3], %[ftmp6], %[ftmp0] \n\t" 256 "punpcklwd %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 257 MMI_LDC1(%[ftmp0], %[block], 0x00) 258 MMI_SDC1(%[ftmp7], $sp, 0x00) 259 MMI_SDC1(%[ftmp1], $sp, 0x10) 260 "dmfc1 %[tmp1], %[ftmp6] \n\t" 261 "dmfc1 %[tmp3], %[ftmp3] \n\t" 262 "punpckhhw %[ftmp3], %[ftmp5], %[ftmp2] \n\t" 263 "punpcklhw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" 264 "punpckhhw %[ftmp2], %[ftmp4], %[ftmp0] \n\t" 265 "punpcklhw %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 266 "punpckhwd %[ftmp0], %[ftmp5], %[ftmp4] \n\t" 267 "punpcklwd %[ftmp5], %[ftmp5], %[ftmp4] \n\t" 268 "punpckhwd %[ftmp4], %[ftmp3], %[ftmp2] \n\t" 269 "punpcklwd %[ftmp3], %[ftmp3], %[ftmp2] \n\t" 270 MMI_SDC1(%[ftmp5], $sp, 0x08) 271 MMI_SDC1(%[ftmp0], $sp, 0x18) 272 "dmfc1 %[tmp2], %[ftmp3] \n\t" 273 "dmfc1 %[tmp4], %[ftmp4] \n\t" 274 MMI_LDC1(%[ftmp1], %[block], 0x18) 275 MMI_LDC1(%[ftmp6], %[block], 0x28) 276 MMI_LDC1(%[ftmp2], %[block], 0x38) 277 MMI_LDC1(%[ftmp0], %[block], 0x58) 278 MMI_LDC1(%[ftmp3], %[block], 0x68) 279 MMI_LDC1(%[ftmp4], %[block], 0x78) 280 "mov.d %[ftmp7], %[ftmp1] \n\t" 281 "psrah %[ftmp5], %[ftmp0], %[ftmp8] \n\t" 282 "psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t" 283 "paddh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" 284 "paddh %[ftmp1], %[ftmp1], %[ftmp7] \n\t" 285 "paddh %[ftmp5], %[ftmp5], %[ftmp4] \n\t" 286 "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 287 "psubh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 288 "paddh %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 289 "psubh %[ftmp7], %[ftmp7], %[ftmp2] \n\t" 290 "psubh %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 291 "psrah %[ftmp2], %[ftmp2], %[ftmp8] \n\t" 292 "paddh %[ftmp7], %[ftmp7], %[ftmp4] \n\t" 293 "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t" 294 "psrah %[ftmp4], %[ftmp4], %[ftmp8] \n\t" 295 "psubh %[ftmp7], %[ftmp7], %[ftmp2] \n\t" 296 "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t" 297 "mov.d %[ftmp4], %[ftmp1] \n\t" 298 "psrah %[ftmp2], %[ftmp5], %[ftmp9] \n\t" 299 "psrah %[ftmp1], %[ftmp1], %[ftmp9] \n\t" 300 "paddh %[ftmp2], %[ftmp2], %[ftmp7] \n\t" 301 "psrah %[ftmp7], %[ftmp7], %[ftmp9] \n\t" 302 "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 303 "psrah %[ftmp0], %[ftmp0], %[ftmp9] \n\t" 304 "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t" 305 "psubh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 306 "mov.d %[ftmp0], %[ftmp3] \n\t" 307 "psrah %[ftmp3], %[ftmp3], %[ftmp8] \n\t" 308 "psrah %[ftmp5], %[ftmp6], %[ftmp8] \n\t" 309 "paddh %[ftmp3], %[ftmp3], %[ftmp6] \n\t" 310 "psubh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" 311 MMI_LDC1(%[ftmp6], %[block], 0x08) 312 MMI_LDC1(%[ftmp0], %[block], 0x48) 313 "paddh %[ftmp0], %[ftmp0], %[ftmp6] \n\t" 314 "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 315 "paddh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 316 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 317 "paddh %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 318 "paddh %[ftmp5], %[ftmp5], %[ftmp6] \n\t" 319 "psubh %[ftmp0], %[ftmp0], %[ftmp3] \n\t" 320 "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 321 "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t" 322 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t" 323 "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t" 324 "paddh %[ftmp7], %[ftmp7], %[ftmp5] \n\t" 325 "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t" 326 "paddh %[ftmp5], %[ftmp5], %[ftmp5] \n\t" 327 "paddh %[ftmp2], %[ftmp2], %[ftmp6] \n\t" 328 "psubh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 329 "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 330 "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 331 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t" 332 "paddh %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 333 MMI_SDC1(%[ftmp3], %[block], 0x08) 334 "psubh %[ftmp0], %[ftmp0], %[ftmp1] \n\t" 335 "punpckhhw %[ftmp3], %[ftmp4], %[ftmp7] \n\t" 336 "punpcklhw %[ftmp4], %[ftmp4], %[ftmp7] \n\t" 337 "punpckhhw %[ftmp7], %[ftmp2], %[ftmp1] \n\t" 338 "punpcklhw %[ftmp2], %[ftmp2], %[ftmp1] \n\t" 339 "punpckhwd %[ftmp1], %[ftmp4], %[ftmp2] \n\t" 340 "punpcklwd %[ftmp4], %[ftmp4], %[ftmp2] \n\t" 341 "punpckhwd %[ftmp2], %[ftmp3], %[ftmp7] \n\t" 342 "punpcklwd %[ftmp3], %[ftmp3], %[ftmp7] \n\t" 343 MMI_LDC1(%[ftmp7], %[block], 0x08) 344 "dmfc1 %[tmp5], %[ftmp4] \n\t" 345 "mov.d %[ftmp10], %[ftmp1] \n\t" 346 "mov.d %[ftmp12], %[ftmp3] \n\t" 347 "mov.d %[ftmp14], %[ftmp2] \n\t" 348 "punpckhhw %[ftmp2], %[ftmp0], %[ftmp6] \n\t" 349 "punpcklhw %[ftmp0], %[ftmp0], %[ftmp6] \n\t" 350 "punpckhhw %[ftmp6], %[ftmp5], %[ftmp7] \n\t" 351 "punpcklhw %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 352 "punpckhwd %[ftmp7], %[ftmp0], %[ftmp5] \n\t" 353 "punpcklwd %[ftmp0], %[ftmp0], %[ftmp5] \n\t" 354 "punpckhwd %[ftmp5], %[ftmp2], %[ftmp6] \n\t" 355 "punpcklwd %[ftmp2], %[ftmp2], %[ftmp6] \n\t" 356 "dmfc1 %[tmp6], %[ftmp0] \n\t" 357 "mov.d %[ftmp11], %[ftmp7] \n\t" 358 "mov.d %[ftmp13], %[ftmp2] \n\t" 359 "mov.d %[ftmp15], %[ftmp5] \n\t" 360 PTR_ADDIU "%[addr0], %[dst], 0x04 \n\t" 361 "mov.d %[ftmp7], %[ftmp10] \n\t" 362 "dmtc1 %[tmp3], %[ftmp6] \n\t" 363 MMI_LDC1(%[ftmp1], $sp, 0x10) 364 "dmtc1 %[tmp1], %[ftmp3] \n\t" 365 "mov.d %[ftmp4], %[ftmp1] \n\t" 366 "psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t" 367 "psrah %[ftmp0], %[ftmp7], %[ftmp8] \n\t" 368 "paddh %[ftmp1], %[ftmp1], %[ftmp4] \n\t" 369 "paddh %[ftmp0], %[ftmp0], %[ftmp7] \n\t" 370 "paddh %[ftmp1], %[ftmp1], %[ftmp7] \n\t" 371 "paddh %[ftmp0], %[ftmp0], %[ftmp14] \n\t" 372 "paddh %[ftmp1], %[ftmp1], %[ftmp6] \n\t" 373 "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t" 374 "psubh %[ftmp4], %[ftmp4], %[ftmp6] \n\t" 375 "psubh %[ftmp7], %[ftmp7], %[ftmp6] \n\t" 376 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 377 "paddh %[ftmp4], %[ftmp4], %[ftmp14] \n\t" 378 "psubh %[ftmp7], %[ftmp7], %[ftmp14] \n\t" 379 "psrah %[ftmp5], %[ftmp14], %[ftmp8] \n\t" 380 "psubh %[ftmp4], %[ftmp4], %[ftmp6] \n\t" 381 "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t" 382 "mov.d %[ftmp5], %[ftmp1] \n\t" 383 "psrah %[ftmp1], %[ftmp1], %[ftmp9] \n\t" 384 "psrah %[ftmp6], %[ftmp0], %[ftmp9] \n\t" 385 "paddh %[ftmp1], %[ftmp1], %[ftmp7] \n\t" 386 "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t" 387 "psrah %[ftmp4], %[ftmp4], %[ftmp9] \n\t" 388 "psrah %[ftmp7], %[ftmp7], %[ftmp9] \n\t" 389 "psubh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 390 "psubh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 391 "mov.d %[ftmp7], %[ftmp12] \n\t" 392 "psrah %[ftmp2], %[ftmp12], %[ftmp8] \n\t" 393 "psrah %[ftmp0], %[ftmp3], %[ftmp8] \n\t" 394 "paddh %[ftmp2], %[ftmp2], %[ftmp3] \n\t" 395 "psubh %[ftmp0], %[ftmp0], %[ftmp7] \n\t" 396 MMI_LDC1(%[ftmp3], $sp, 0x00) 397 "dmtc1 %[tmp5], %[ftmp7] \n\t" 398 "paddh %[ftmp7], %[ftmp7], %[ftmp3] \n\t" 399 "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t" 400 "paddh %[ftmp2], %[ftmp2], %[ftmp7] \n\t" 401 "psubh %[ftmp3], %[ftmp3], %[ftmp7] \n\t" 402 "paddh %[ftmp7], %[ftmp7], %[ftmp7] \n\t" 403 "paddh %[ftmp0], %[ftmp0], %[ftmp3] \n\t" 404 "psubh %[ftmp7], %[ftmp7], %[ftmp2] \n\t" 405 "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t" 406 "paddh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" 407 "psubh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 408 "paddh %[ftmp2], %[ftmp2], %[ftmp2] \n\t" 409 "paddh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 410 "psubh %[ftmp2], %[ftmp2], %[ftmp5] \n\t" 411 "paddh %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 412 "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" 413 "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t" 414 "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t" 415 "paddh %[ftmp1], %[ftmp1], %[ftmp7] \n\t" 416 "psubh %[ftmp3], %[ftmp3], %[ftmp6] \n\t" 417 "paddh %[ftmp7], %[ftmp7], %[ftmp7] \n\t" 418 MMI_SDC1(%[ftmp3], $sp, 0x00) 419 "psubh %[ftmp7], %[ftmp7], %[ftmp1] \n\t" 420 MMI_SDC1(%[ftmp0], $sp, 0x10) 421 "dmfc1 %[tmp1], %[ftmp2] \n\t" 422 "pxor %[ftmp2], %[ftmp2], %[ftmp2] \n\t" 423 MMI_SDC1(%[ftmp2], %[block], 0x00) 424 MMI_SDC1(%[ftmp2], %[block], 0x08) 425 MMI_SDC1(%[ftmp2], %[block], 0x10) 426 MMI_SDC1(%[ftmp2], %[block], 0x18) 427 MMI_SDC1(%[ftmp2], %[block], 0x20) 428 MMI_SDC1(%[ftmp2], %[block], 0x28) 429 MMI_SDC1(%[ftmp2], %[block], 0x30) 430 MMI_SDC1(%[ftmp2], %[block], 0x38) 431 MMI_SDC1(%[ftmp2], %[block], 0x40) 432 MMI_SDC1(%[ftmp2], %[block], 0x48) 433 MMI_SDC1(%[ftmp2], %[block], 0x50) 434 MMI_SDC1(%[ftmp2], %[block], 0x58) 435 MMI_SDC1(%[ftmp2], %[block], 0x60) 436 MMI_SDC1(%[ftmp2], %[block], 0x68) 437 MMI_SDC1(%[ftmp2], %[block], 0x70) 438 MMI_SDC1(%[ftmp2], %[block], 0x78) 439 "dli %[tmp3], 0x06 \n\t" 440 "mtc1 %[tmp3], %[ftmp10] \n\t" 441 MMI_ULWC1(%[ftmp3], %[dst], 0x00) 442 MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00) 443 "psrah %[ftmp5], %[ftmp5], %[ftmp10] \n\t" 444 "psrah %[ftmp4], %[ftmp4], %[ftmp10] \n\t" 445 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" 446 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 447 "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" 448 "paddh %[ftmp0], %[ftmp0], %[ftmp4] \n\t" 449 "packushb %[ftmp3], %[ftmp3], %[ftmp2] \n\t" 450 "packushb %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 451 MMI_SWC1(%[ftmp3], %[dst], 0x00) 452 MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00) 453 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 454 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 455 MMI_ULWC1(%[ftmp3], %[dst], 0x00) 456 MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00) 457 "psrah %[ftmp6], %[ftmp6], %[ftmp10] \n\t" 458 "psrah %[ftmp1], %[ftmp1], %[ftmp10] \n\t" 459 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" 460 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 461 "paddh %[ftmp3], %[ftmp3], %[ftmp6] \n\t" 462 "paddh %[ftmp0], %[ftmp0], %[ftmp1] \n\t" 463 "packushb %[ftmp3], %[ftmp3], %[ftmp2] \n\t" 464 "packushb %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 465 MMI_SWC1(%[ftmp3], %[dst], 0x00) 466 MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00) 467 MMI_LDC1(%[ftmp5], $sp, 0x00) 468 MMI_LDC1(%[ftmp4], $sp, 0x10) 469 "dmtc1 %[tmp1], %[ftmp6] \n\t" 470 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 471 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 472 MMI_ULWC1(%[ftmp3], %[dst], 0x00) 473 MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00) 474 "psrah %[ftmp7], %[ftmp7], %[ftmp10] \n\t" 475 "psrah %[ftmp5], %[ftmp5], %[ftmp10] \n\t" 476 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" 477 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 478 "paddh %[ftmp3], %[ftmp3], %[ftmp7] \n\t" 479 "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t" 480 "packushb %[ftmp3], %[ftmp3], %[ftmp2] \n\t" 481 "packushb %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 482 MMI_SWC1(%[ftmp3], %[dst], 0x00) 483 MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00) 484 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 485 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 486 MMI_ULWC1(%[ftmp3], %[dst], 0x00) 487 MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00) 488 "psrah %[ftmp4], %[ftmp4], %[ftmp10] \n\t" 489 "psrah %[ftmp6], %[ftmp6], %[ftmp10] \n\t" 490 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" 491 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 492 "paddh %[ftmp3], %[ftmp3], %[ftmp4] \n\t" 493 "paddh %[ftmp0], %[ftmp0], %[ftmp6] \n\t" 494 "packushb %[ftmp3], %[ftmp3], %[ftmp2] \n\t" 495 "packushb %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 496 MMI_SWC1(%[ftmp3], %[dst], 0x00) 497 MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00) 498 "dmtc1 %[tmp4], %[ftmp1] \n\t" 499 "dmtc1 %[tmp2], %[ftmp6] \n\t" 500 MMI_LDC1(%[ftmp4], $sp, 0x18) 501 "mov.d %[ftmp5], %[ftmp4] \n\t" 502 "psrah %[ftmp4], %[ftmp4], %[ftmp8] \n\t" 503 "psrah %[ftmp7], %[ftmp11], %[ftmp8] \n\t" 504 "paddh %[ftmp7], %[ftmp7], %[ftmp11] \n\t" 505 "paddh %[ftmp4], %[ftmp4], %[ftmp5] \n\t" 506 "paddh %[ftmp7], %[ftmp7], %[ftmp15] \n\t" 507 "paddh %[ftmp4], %[ftmp4], %[ftmp11] \n\t" 508 "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t" 509 "paddh %[ftmp4], %[ftmp4], %[ftmp1] \n\t" 510 "psubh %[ftmp5], %[ftmp5], %[ftmp1] \n\t" 511 "psubh %[ftmp3], %[ftmp11], %[ftmp1] \n\t" 512 "psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t" 513 "paddh %[ftmp5], %[ftmp5], %[ftmp15] \n\t" 514 "psubh %[ftmp3], %[ftmp3], %[ftmp15] \n\t" 515 "psrah %[ftmp2], %[ftmp15], %[ftmp8] \n\t" 516 "psubh %[ftmp5], %[ftmp5], %[ftmp1] \n\t" 517 "psubh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" 518 "mov.d %[ftmp2], %[ftmp4] \n\t" 519 "psrah %[ftmp4], %[ftmp4], %[ftmp9] \n\t" 520 "psrah %[ftmp1], %[ftmp7], %[ftmp9] \n\t" 521 "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t" 522 "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t" 523 "psrah %[ftmp5], %[ftmp5], %[ftmp9] \n\t" 524 "psrah %[ftmp3], %[ftmp3], %[ftmp9] \n\t" 525 "psubh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 526 "psubh %[ftmp2], %[ftmp2], %[ftmp3] \n\t" 527 "mov.d %[ftmp3], %[ftmp13] \n\t" 528 "psrah %[ftmp0], %[ftmp13], %[ftmp8] \n\t" 529 "psrah %[ftmp7], %[ftmp6], %[ftmp8] \n\t" 530 "paddh %[ftmp0], %[ftmp0], %[ftmp6] \n\t" 531 "psubh %[ftmp7], %[ftmp7], %[ftmp3] \n\t" 532 MMI_LDC1(%[ftmp6], $sp, 0x08) 533 "dmtc1 %[tmp6], %[ftmp3] \n\t" 534 "paddh %[ftmp3], %[ftmp3], %[ftmp6] \n\t" 535 "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 536 "paddh %[ftmp0], %[ftmp0], %[ftmp3] \n\t" 537 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" 538 "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t" 539 "paddh %[ftmp7], %[ftmp7], %[ftmp6] \n\t" 540 "psubh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 541 "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 542 "paddh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 543 "psubh %[ftmp6], %[ftmp6], %[ftmp7] \n\t" 544 "paddh %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 545 "paddh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 546 "psubh %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 547 "paddh %[ftmp7], %[ftmp7], %[ftmp7] \n\t" 548 "paddh %[ftmp1], %[ftmp1], %[ftmp6] \n\t" 549 "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t" 550 "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 551 "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t" 552 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t" 553 "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t" 554 MMI_SDC1(%[ftmp6], $sp, 0x08) 555 "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t" 556 MMI_SDC1(%[ftmp7], $sp, 0x18) 557 "dmfc1 %[tmp2], %[ftmp0] \n\t" 558 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 559 MMI_ULWC1(%[ftmp6], %[addr0], 0x00) 560 MMI_LWXC1(%[ftmp7], %[addr0], %[stride], 0x00) 561 "psrah %[ftmp2], %[ftmp2], %[ftmp10] \n\t" 562 "psrah %[ftmp5], %[ftmp5], %[ftmp10] \n\t" 563 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 564 "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t" 565 "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t" 566 "paddh %[ftmp7], %[ftmp7], %[ftmp5] \n\t" 567 "packushb %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 568 "packushb %[ftmp7], %[ftmp7], %[ftmp0] \n\t" 569 MMI_SWC1(%[ftmp6], %[addr0], 0x00) 570 MMI_SWXC1(%[ftmp7], %[addr0], %[stride], 0x00) 571 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t" 572 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t" 573 MMI_ULWC1(%[ftmp6], %[addr0], 0x00) 574 MMI_LWXC1(%[ftmp7], %[addr0], %[stride], 0x00) 575 "psrah %[ftmp1], %[ftmp1], %[ftmp10] \n\t" 576 "psrah %[ftmp4], %[ftmp4], %[ftmp10] \n\t" 577 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 578 "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t" 579 "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t" 580 "paddh %[ftmp7], %[ftmp7], %[ftmp4] \n\t" 581 "packushb %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 582 "packushb %[ftmp7], %[ftmp7], %[ftmp0] \n\t" 583 MMI_SWC1(%[ftmp6], %[addr0], 0x00) 584 MMI_SWXC1(%[ftmp7], %[addr0], %[stride], 0x00) 585 MMI_LDC1(%[ftmp2], $sp, 0x08) 586 MMI_LDC1(%[ftmp5], $sp, 0x18) 587 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t" 588 "dmtc1 %[tmp2], %[ftmp1] \n\t" 589 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t" 590 MMI_ULWC1(%[ftmp6], %[addr0], 0x00) 591 MMI_LWXC1(%[ftmp7], %[addr0], %[stride], 0x00) 592 "psrah %[ftmp3], %[ftmp3], %[ftmp10] \n\t" 593 "psrah %[ftmp2], %[ftmp2], %[ftmp10] \n\t" 594 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 595 "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t" 596 "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" 597 "paddh %[ftmp7], %[ftmp7], %[ftmp2] \n\t" 598 "packushb %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 599 "packushb %[ftmp7], %[ftmp7], %[ftmp0] \n\t" 600 MMI_SWC1(%[ftmp6], %[addr0], 0x00) 601 MMI_SWXC1(%[ftmp7], %[addr0], %[stride], 0x00) 602 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t" 603 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t" 604 MMI_ULWC1(%[ftmp6], %[addr0], 0x00) 605 MMI_LWXC1(%[ftmp7], %[addr0], %[stride], 0x00) 606 "psrah %[ftmp5], %[ftmp5], %[ftmp10] \n\t" 607 "psrah %[ftmp1], %[ftmp1], %[ftmp10] \n\t" 608 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 609 "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t" 610 "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t" 611 "paddh %[ftmp7], %[ftmp7], %[ftmp1] \n\t" 612 "packushb %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 613 "packushb %[ftmp7], %[ftmp7], %[ftmp0] \n\t" 614 MMI_SWC1(%[ftmp6], %[addr0], 0x00) 615 MMI_SWXC1(%[ftmp7], %[addr0], %[stride], 0x00) 616 PTR_ADDIU "$sp, $sp, 0x20 \n\t" 617 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 618 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 619 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 620 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 621 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), 622 [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), 623 [ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]), 624 [ftmp14]"=&f"(ftmp[14]), [ftmp15]"=&f"(ftmp[15]), 625 [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]), 626 [tmp2]"=&r"(tmp[2]), [tmp3]"=&r"(tmp[3]), 627 [tmp4]"=&r"(tmp[4]), [tmp5]"=&r"(tmp[5]), 628 [tmp6]"=&r"(tmp[6]), 629 RESTRICT_ASM_LOW32 630 RESTRICT_ASM_ADDRT 631 [addr0]"=&r"(addr[0]) 632 : [dst]"r"(dst), [block]"r"(block), 633 [stride]"r"((mips_reg)stride) 634 : "memory" 635 ); 636 637} 638 639void ff_h264_idct_dc_add_8_mmi(uint8_t *dst, int16_t *block, int stride) 640{ 641 int dc = (block[0] + 32) >> 6; 642 double ftmp[6]; 643 DECLARE_VAR_LOW32; 644 645 block[0] = 0; 646 647 __asm__ volatile ( 648 "mtc1 %[dc], %[ftmp5] \n\t" 649 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 650 "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" 651 MMI_ULWC1(%[ftmp1], %[dst0], 0x00) 652 MMI_ULWC1(%[ftmp2], %[dst1], 0x00) 653 MMI_ULWC1(%[ftmp3], %[dst2], 0x00) 654 MMI_ULWC1(%[ftmp4], %[dst3], 0x00) 655 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 656 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 657 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 658 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 659 "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t" 660 "paddsh %[ftmp2], %[ftmp2], %[ftmp5] \n\t" 661 "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" 662 "paddsh %[ftmp4], %[ftmp4], %[ftmp5] \n\t" 663 "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 664 "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 665 "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 666 "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 667 MMI_SWC1(%[ftmp1], %[dst0], 0x00) 668 MMI_SWC1(%[ftmp2], %[dst1], 0x00) 669 MMI_SWC1(%[ftmp3], %[dst2], 0x00) 670 MMI_SWC1(%[ftmp4], %[dst3], 0x00) 671 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 672 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 673 [ftmp4]"=&f"(ftmp[4]), 674 RESTRICT_ASM_LOW32 675 [ftmp5]"=&f"(ftmp[5]) 676 : [dst0]"r"(dst), [dst1]"r"(dst+stride), 677 [dst2]"r"(dst+2*stride), [dst3]"r"(dst+3*stride), 678 [dc]"r"(dc) 679 : "memory" 680 ); 681} 682 683void ff_h264_idct8_dc_add_8_mmi(uint8_t *dst, int16_t *block, int stride) 684{ 685 int dc = (block[0] + 32) >> 6; 686 double ftmp[10]; 687 DECLARE_VAR_ALL64; 688 689 block[0] = 0; 690 691 __asm__ volatile ( 692 "mtc1 %[dc], %[ftmp5] \n\t" 693 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 694 "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" 695 MMI_LDC1(%[ftmp1], %[dst0], 0x00) 696 MMI_LDC1(%[ftmp2], %[dst1], 0x00) 697 MMI_LDC1(%[ftmp3], %[dst2], 0x00) 698 MMI_LDC1(%[ftmp4], %[dst3], 0x00) 699 "punpckhbh %[ftmp6], %[ftmp1], %[ftmp0] \n\t" 700 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 701 "punpckhbh %[ftmp7], %[ftmp2], %[ftmp0] \n\t" 702 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 703 "punpckhbh %[ftmp8], %[ftmp3], %[ftmp0] \n\t" 704 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 705 "punpckhbh %[ftmp9], %[ftmp4], %[ftmp0] \n\t" 706 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 707 "paddsh %[ftmp6], %[ftmp6], %[ftmp5] \n\t" 708 "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t" 709 "paddsh %[ftmp7], %[ftmp7], %[ftmp5] \n\t" 710 "paddsh %[ftmp2], %[ftmp2], %[ftmp5] \n\t" 711 "paddsh %[ftmp8], %[ftmp8], %[ftmp5] \n\t" 712 "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" 713 "paddsh %[ftmp9], %[ftmp9], %[ftmp5] \n\t" 714 "paddsh %[ftmp4], %[ftmp4], %[ftmp5] \n\t" 715 "packushb %[ftmp1], %[ftmp1], %[ftmp6] \n\t" 716 "packushb %[ftmp2], %[ftmp2], %[ftmp7] \n\t" 717 "packushb %[ftmp3], %[ftmp3], %[ftmp8] \n\t" 718 "packushb %[ftmp4], %[ftmp4], %[ftmp9] \n\t" 719 MMI_SDC1(%[ftmp1], %[dst0], 0x00) 720 MMI_SDC1(%[ftmp2], %[dst1], 0x00) 721 MMI_SDC1(%[ftmp3], %[dst2], 0x00) 722 MMI_SDC1(%[ftmp4], %[dst3], 0x00) 723 724 MMI_LDC1(%[ftmp1], %[dst4], 0x00) 725 MMI_LDC1(%[ftmp2], %[dst5], 0x00) 726 MMI_LDC1(%[ftmp3], %[dst6], 0x00) 727 MMI_LDC1(%[ftmp4], %[dst7], 0x00) 728 "punpckhbh %[ftmp6], %[ftmp1], %[ftmp0] \n\t" 729 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 730 "punpckhbh %[ftmp7], %[ftmp2], %[ftmp0] \n\t" 731 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 732 "punpckhbh %[ftmp8], %[ftmp3], %[ftmp0] \n\t" 733 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 734 "punpckhbh %[ftmp9], %[ftmp4], %[ftmp0] \n\t" 735 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 736 "paddsh %[ftmp6], %[ftmp6], %[ftmp5] \n\t" 737 "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t" 738 "paddsh %[ftmp7], %[ftmp7], %[ftmp5] \n\t" 739 "paddsh %[ftmp2], %[ftmp2], %[ftmp5] \n\t" 740 "paddsh %[ftmp8], %[ftmp8], %[ftmp5] \n\t" 741 "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" 742 "paddsh %[ftmp9], %[ftmp9], %[ftmp5] \n\t" 743 "paddsh %[ftmp4], %[ftmp4], %[ftmp5] \n\t" 744 "packushb %[ftmp1], %[ftmp1], %[ftmp6] \n\t" 745 "packushb %[ftmp2], %[ftmp2], %[ftmp7] \n\t" 746 "packushb %[ftmp3], %[ftmp3], %[ftmp8] \n\t" 747 "packushb %[ftmp4], %[ftmp4], %[ftmp9] \n\t" 748 MMI_SDC1(%[ftmp1], %[dst4], 0x00) 749 MMI_SDC1(%[ftmp2], %[dst5], 0x00) 750 MMI_SDC1(%[ftmp3], %[dst6], 0x00) 751 MMI_SDC1(%[ftmp4], %[dst7], 0x00) 752 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 753 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 754 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 755 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 756 [ftmp8]"=&f"(ftmp[8]), 757 RESTRICT_ASM_ALL64 758 [ftmp9]"=&f"(ftmp[9]) 759 : [dst0]"r"(dst), [dst1]"r"(dst+stride), 760 [dst2]"r"(dst+2*stride), [dst3]"r"(dst+3*stride), 761 [dst4]"r"(dst+4*stride), [dst5]"r"(dst+5*stride), 762 [dst6]"r"(dst+6*stride), [dst7]"r"(dst+7*stride), 763 [dc]"r"(dc) 764 : "memory" 765 ); 766} 767 768void ff_h264_idct_add16_8_mmi(uint8_t *dst, const int *block_offset, 769 int16_t *block, int stride, 770 const uint8_t nnzc[5 * 8]) 771{ 772 int i; 773 for(i=0; i<16; i++){ 774 int nnz = nnzc[ scan8[i] ]; 775 if(nnz){ 776 if(nnz==1 && ((int16_t*)block)[i*16]) 777 ff_h264_idct_dc_add_8_mmi(dst + block_offset[i], block + i*16, 778 stride); 779 else 780 ff_h264_idct_add_8_mmi(dst + block_offset[i], block + i*16, 781 stride); 782 } 783 } 784} 785 786void ff_h264_idct_add16intra_8_mmi(uint8_t *dst, const int *block_offset, 787 int16_t *block, int stride, const uint8_t nnzc[5 * 8]) 788{ 789 int i; 790 for(i=0; i<16; i++){ 791 if(nnzc[ scan8[i] ]) 792 ff_h264_idct_add_8_mmi(dst + block_offset[i], block + i*16, stride); 793 else if(((int16_t*)block)[i*16]) 794 ff_h264_idct_dc_add_8_mmi(dst + block_offset[i], block + i*16, 795 stride); 796 } 797} 798 799void ff_h264_idct8_add4_8_mmi(uint8_t *dst, const int *block_offset, 800 int16_t *block, int stride, const uint8_t nnzc[5 * 8]) 801{ 802 int i; 803 for(i=0; i<16; i+=4){ 804 int nnz = nnzc[ scan8[i] ]; 805 if(nnz){ 806 if(nnz==1 && ((int16_t*)block)[i*16]) 807 ff_h264_idct8_dc_add_8_mmi(dst + block_offset[i], 808 block + i*16, stride); 809 else 810 ff_h264_idct8_add_8_mmi(dst + block_offset[i], block + i*16, 811 stride); 812 } 813 } 814} 815 816void ff_h264_idct_add8_8_mmi(uint8_t **dest, const int *block_offset, 817 int16_t *block, int stride, const uint8_t nnzc[15*8]) 818{ 819 int i, j; 820 for(j=1; j<3; j++){ 821 for(i=j*16; i<j*16+4; i++){ 822 if(nnzc[ scan8[i] ]) 823 ff_h264_idct_add_8_mmi(dest[j-1] + block_offset[i], 824 block + i*16, stride); 825 else if(((int16_t*)block)[i*16]) 826 ff_h264_idct_dc_add_8_mmi(dest[j-1] + block_offset[i], 827 block + i*16, stride); 828 } 829 } 830} 831 832void ff_h264_idct_add8_422_8_mmi(uint8_t **dest, const int *block_offset, 833 int16_t *block, int stride, const uint8_t nnzc[15*8]) 834{ 835 int i, j; 836 837 for(j=1; j<3; j++){ 838 for(i=j*16; i<j*16+4; i++){ 839 if(nnzc[ scan8[i] ]) 840 ff_h264_idct_add_8_mmi(dest[j-1] + block_offset[i], 841 block + i*16, stride); 842 else if(((int16_t*)block)[i*16]) 843 ff_h264_idct_dc_add_8_mmi(dest[j-1] + block_offset[i], 844 block + i*16, stride); 845 } 846 } 847 848 for(j=1; j<3; j++){ 849 for(i=j*16+4; i<j*16+8; i++){ 850 if(nnzc[ scan8[i+4] ]) 851 ff_h264_idct_add_8_mmi(dest[j-1] + block_offset[i+4], 852 block + i*16, stride); 853 else if(((int16_t*)block)[i*16]) 854 ff_h264_idct_dc_add_8_mmi(dest[j-1] + block_offset[i+4], 855 block + i*16, stride); 856 } 857 } 858} 859 860void ff_h264_luma_dc_dequant_idct_8_mmi(int16_t *output, int16_t *input, 861 int qmul) 862{ 863 double ftmp[10]; 864 uint64_t tmp[2]; 865 DECLARE_VAR_ALL64; 866 867 __asm__ volatile ( 868 ".set noreorder \n\t" 869 "dli %[tmp0], 0x08 \n\t" 870 MMI_LDC1(%[ftmp3], %[input], 0x18) 871 "mtc1 %[tmp0], %[ftmp8] \n\t" 872 MMI_LDC1(%[ftmp2], %[input], 0x10) 873 "dli %[tmp0], 0x20 \n\t" 874 MMI_LDC1(%[ftmp1], %[input], 0x08) 875 "mtc1 %[tmp0], %[ftmp9] \n\t" 876 MMI_LDC1(%[ftmp0], %[input], 0x00) 877 "mov.d %[ftmp4], %[ftmp3] \n\t" 878 "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" 879 "psubh %[ftmp2], %[ftmp2], %[ftmp4] \n\t" 880 "mov.d %[ftmp4], %[ftmp1] \n\t" 881 "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 882 "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t" 883 "mov.d %[ftmp4], %[ftmp3] \n\t" 884 "paddh %[ftmp3], %[ftmp3], %[ftmp1] \n\t" 885 "psubh %[ftmp1], %[ftmp1], %[ftmp4] \n\t" 886 "mov.d %[ftmp4], %[ftmp2] \n\t" 887 "paddh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 888 "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t" 889 "mov.d %[ftmp4], %[ftmp3] \n\t" 890 "punpcklhw %[ftmp3], %[ftmp3], %[ftmp1] \n\t" 891 "punpckhhw %[ftmp4], %[ftmp4], %[ftmp1] \n\t" 892 "punpckhhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t" 893 "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 894 "punpckhwd %[ftmp2], %[ftmp3], %[ftmp0] \n\t" 895 "punpcklwd %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 896 "mov.d %[ftmp0], %[ftmp4] \n\t" 897 "punpcklwd %[ftmp4], %[ftmp4], %[ftmp1] \n\t" 898 "punpckhwd %[ftmp0], %[ftmp0], %[ftmp1] \n\t" 899 "mov.d %[ftmp1], %[ftmp0] \n\t" 900 "paddh %[ftmp0], %[ftmp0], %[ftmp4] \n\t" 901 "psubh %[ftmp4], %[ftmp4], %[ftmp1] \n\t" 902 "mov.d %[ftmp1], %[ftmp2] \n\t" 903 "paddh %[ftmp2], %[ftmp2], %[ftmp3] \n\t" 904 "psubh %[ftmp3], %[ftmp3], %[ftmp1] \n\t" 905 "mov.d %[ftmp1], %[ftmp0] \n\t" 906 "paddh %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 907 "psubh %[ftmp2], %[ftmp2], %[ftmp1] \n\t" 908 "mov.d %[ftmp1], %[ftmp4] \n\t" 909 "daddi %[tmp0], %[qmul], -0x7fff \n\t" 910 "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t" 911 "bgtz %[tmp0], 1f \n\t" 912 "psubh %[ftmp3], %[ftmp3], %[ftmp1] \n\t" 913 "ori %[tmp0], $0, 0x80 \n\t" 914 "dsll %[tmp0], %[tmp0], 0x10 \n\t" 915 "punpckhhw %[ftmp1], %[ftmp0], %[ff_pw_1] \n\t" 916 "daddu %[qmul], %[qmul], %[tmp0] \n\t" 917 "punpcklhw %[ftmp0], %[ftmp0], %[ff_pw_1] \n\t" 918 "punpckhhw %[ftmp5], %[ftmp2], %[ff_pw_1] \n\t" 919 "punpcklhw %[ftmp2], %[ftmp2], %[ff_pw_1] \n\t" 920 "mtc1 %[qmul], %[ftmp7] \n\t" 921 "punpcklwd %[ftmp7], %[ftmp7], %[ftmp7] \n\t" 922 "pmaddhw %[ftmp0], %[ftmp0], %[ftmp7] \n\t" 923 "pmaddhw %[ftmp2], %[ftmp2], %[ftmp7] \n\t" 924 "pmaddhw %[ftmp1], %[ftmp1], %[ftmp7] \n\t" 925 "pmaddhw %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 926 "psraw %[ftmp0], %[ftmp0], %[ftmp8] \n\t" 927 "psraw %[ftmp2], %[ftmp2], %[ftmp8] \n\t" 928 "psraw %[ftmp1], %[ftmp1], %[ftmp8] \n\t" 929 "psraw %[ftmp5], %[ftmp5], %[ftmp8] \n\t" 930 "packsswh %[ftmp0], %[ftmp0], %[ftmp1] \n\t" 931 "packsswh %[ftmp2], %[ftmp2], %[ftmp5] \n\t" 932 "dmfc1 %[tmp1], %[ftmp0] \n\t" 933 "ssrld %[ftmp0], %[ftmp0], %[ftmp9] \n\t" 934 "mfc1 %[input], %[ftmp0] \n\t" 935 "sh %[tmp1], 0x00(%[output]) \n\t" 936 "sh %[input], 0x80(%[output]) \n\t" 937 "dsrl %[tmp1], %[tmp1], 0x10 \n\t" 938 PTR_SRL "%[input], %[input], 0x10 \n\t" 939 "sh %[tmp1], 0x20(%[output]) \n\t" 940 "sh %[input], 0xa0(%[output]) \n\t" 941 "dmfc1 %[tmp1], %[ftmp2] \n\t" 942 "ssrld %[ftmp2], %[ftmp2], %[ftmp9] \n\t" 943 "mfc1 %[input], %[ftmp2] \n\t" 944 "sh %[tmp1], 0x40(%[output]) \n\t" 945 "sh %[input], 0xc0(%[output]) \n\t" 946 "dsrl %[tmp1], %[tmp1], 0x10 \n\t" 947 PTR_SRL "%[input], %[input], 0x10 \n\t" 948 "sh %[tmp1], 0x60(%[output]) \n\t" 949 "sh %[input], 0xe0(%[output]) \n\t" 950 "punpckhhw %[ftmp1], %[ftmp3], %[ff_pw_1] \n\t" 951 "punpcklhw %[ftmp3], %[ftmp3], %[ff_pw_1] \n\t" 952 "punpckhhw %[ftmp5], %[ftmp4], %[ff_pw_1] \n\t" 953 "punpcklhw %[ftmp4], %[ftmp4], %[ff_pw_1] \n\t" 954 "mtc1 %[qmul], %[ftmp7] \n\t" 955 "punpcklwd %[ftmp7], %[ftmp7], %[ftmp7] \n\t" 956 "pmaddhw %[ftmp3], %[ftmp3], %[ftmp7] \n\t" 957 "pmaddhw %[ftmp4], %[ftmp4], %[ftmp7] \n\t" 958 "pmaddhw %[ftmp1], %[ftmp1], %[ftmp7] \n\t" 959 "pmaddhw %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 960 "psraw %[ftmp3], %[ftmp3], %[ftmp8] \n\t" 961 "psraw %[ftmp4], %[ftmp4], %[ftmp8] \n\t" 962 "psraw %[ftmp1], %[ftmp1], %[ftmp8] \n\t" 963 "psraw %[ftmp5], %[ftmp5], %[ftmp8] \n\t" 964 "packsswh %[ftmp3], %[ftmp3], %[ftmp1] \n\t" 965 "packsswh %[ftmp4], %[ftmp4], %[ftmp5] \n\t" 966 "dmfc1 %[tmp1], %[ftmp3] \n\t" 967 "ssrld %[ftmp3], %[ftmp3], %[ftmp9] \n\t" 968 "mfc1 %[input], %[ftmp3] \n\t" 969 "sh %[tmp1], 0x100(%[output]) \n\t" 970 "sh %[input], 0x180(%[output]) \n\t" 971 "dsrl %[tmp1], %[tmp1], 0x10 \n\t" 972 PTR_SRL "%[input], %[input], 0x10 \n\t" 973 "sh %[tmp1], 0x120(%[output]) \n\t" 974 "sh %[input], 0x1a0(%[output]) \n\t" 975 "dmfc1 %[tmp1], %[ftmp4] \n\t" 976 "ssrld %[ftmp4], %[ftmp4], %[ftmp9] \n\t" 977 "mfc1 %[input], %[ftmp4] \n\t" 978 "sh %[tmp1], 0x140(%[output]) \n\t" 979 "sh %[input], 0x1c0(%[output]) \n\t" 980 "dsrl %[tmp1], %[tmp1], 0x10 \n\t" 981 PTR_SRL "%[input], %[input], 0x10 \n\t" 982 "sh %[tmp1], 0x160(%[output]) \n\t" 983 "j 2f \n\t" 984 "sh %[input], 0x1e0(%[output]) \n\t" 985 "1: \n\t" 986 "ori %[tmp0], $0, 0x1f \n\t" 987#if HAVE_LOONGSON3 988 "clz %[tmp1], %[qmul] \n\t" 989#elif HAVE_LOONGSON2 990#endif 991 "ori %[input], $0, 0x07 \n\t" 992 "dsubu %[tmp1], %[tmp0], %[tmp1] \n\t" 993 "ori %[tmp0], $0, 0x80 \n\t" 994 "dsll %[tmp0], %[tmp0], 0x10 \n\t" 995 "daddu %[qmul], %[qmul], %[tmp0] \n\t" 996 "dsubu %[tmp0], %[tmp1], %[input] \n\t" 997 "movn %[tmp1], %[input], %[tmp0] \n\t" 998 PTR_ADDIU "%[input], %[input], 0x01 \n\t" 999 "andi %[tmp0], %[tmp1], 0xff \n\t" 1000 "srlv %[qmul], %[qmul], %[tmp0] \n\t" 1001 PTR_SUBU "%[input], %[input], %[tmp1] \n\t" 1002 "mtc1 %[input], %[ftmp6] \n\t" 1003 "punpckhhw %[ftmp1], %[ftmp0], %[ff_pw_1] \n\t" 1004 "punpcklhw %[ftmp0], %[ftmp0], %[ff_pw_1] \n\t" 1005 "punpckhhw %[ftmp5], %[ftmp2], %[ff_pw_1] \n\t" 1006 "punpcklhw %[ftmp2], %[ftmp2], %[ff_pw_1] \n\t" 1007 "mtc1 %[qmul], %[ftmp7] \n\t" 1008 "punpcklwd %[ftmp7], %[ftmp7], %[ftmp7] \n\t" 1009 "pmaddhw %[ftmp0], %[ftmp0], %[ftmp7] \n\t" 1010 "pmaddhw %[ftmp2], %[ftmp2], %[ftmp7] \n\t" 1011 "pmaddhw %[ftmp1], %[ftmp1], %[ftmp7] \n\t" 1012 "pmaddhw %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 1013 "psraw %[ftmp0], %[ftmp0], %[ftmp6] \n\t" 1014 "psraw %[ftmp2], %[ftmp2], %[ftmp6] \n\t" 1015 "psraw %[ftmp1], %[ftmp1], %[ftmp6] \n\t" 1016 "psraw %[ftmp5], %[ftmp5], %[ftmp6] \n\t" 1017 "packsswh %[ftmp0], %[ftmp0], %[ftmp1] \n\t" 1018 "packsswh %[ftmp2], %[ftmp2], %[ftmp5] \n\t" 1019 "dmfc1 %[tmp1], %[ftmp0] \n\t" 1020 "ssrld %[ftmp0], %[ftmp0], %[ftmp9] \n\t" 1021 "sh %[tmp1], 0x00(%[output]) \n\t" 1022 "mfc1 %[input], %[ftmp0] \n\t" 1023 "dsrl %[tmp1], %[tmp1], 0x10 \n\t" 1024 "sh %[input], 0x80(%[output]) \n\t" 1025 "sh %[tmp1], 0x20(%[output]) \n\t" 1026 PTR_SRL "%[input], %[input], 0x10 \n\t" 1027 "dmfc1 %[tmp1], %[ftmp2] \n\t" 1028 "sh %[input], 0xa0(%[output]) \n\t" 1029 "ssrld %[ftmp2], %[ftmp2], %[ftmp9] \n\t" 1030 "sh %[tmp1], 0x40(%[output]) \n\t" 1031 "mfc1 %[input], %[ftmp2] \n\t" 1032 "dsrl %[tmp1], %[tmp1], 0x10 \n\t" 1033 "sh %[input], 0xc0(%[output]) \n\t" 1034 "sh %[tmp1], 0x60(%[output]) \n\t" 1035 PTR_SRL "%[input], %[input], 0x10 \n\t" 1036 "sh %[input], 0xe0(%[output]) \n\t" 1037 "punpckhhw %[ftmp1], %[ftmp3], %[ff_pw_1] \n\t" 1038 "punpcklhw %[ftmp3], %[ftmp3], %[ff_pw_1] \n\t" 1039 "punpckhhw %[ftmp5], %[ftmp4], %[ff_pw_1] \n\t" 1040 "punpcklhw %[ftmp4], %[ftmp4], %[ff_pw_1] \n\t" 1041 "mtc1 %[qmul], %[ftmp7] \n\t" 1042 "punpcklwd %[ftmp7], %[ftmp7], %[ftmp7] \n\t" 1043 "pmaddhw %[ftmp3], %[ftmp3], %[ftmp7] \n\t" 1044 "pmaddhw %[ftmp4], %[ftmp4], %[ftmp7] \n\t" 1045 "pmaddhw %[ftmp1], %[ftmp1], %[ftmp7] \n\t" 1046 "pmaddhw %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 1047 "psraw %[ftmp3], %[ftmp3], %[ftmp6] \n\t" 1048 "psraw %[ftmp4], %[ftmp4], %[ftmp6] \n\t" 1049 "psraw %[ftmp1], %[ftmp1], %[ftmp6] \n\t" 1050 "psraw %[ftmp5], %[ftmp5], %[ftmp6] \n\t" 1051 "packsswh %[ftmp3], %[ftmp3], %[ftmp1] \n\t" 1052 "packsswh %[ftmp4], %[ftmp4], %[ftmp5] \n\t" 1053 "dmfc1 %[tmp1], %[ftmp3] \n\t" 1054 "ssrld %[ftmp3], %[ftmp3], %[ftmp9] \n\t" 1055 "mfc1 %[input], %[ftmp3] \n\t" 1056 "sh %[tmp1], 0x100(%[output]) \n\t" 1057 "sh %[input], 0x180(%[output]) \n\t" 1058 "dsrl %[tmp1], %[tmp1], 0x10 \n\t" 1059 PTR_SRL "%[input], %[input], 0x10 \n\t" 1060 "sh %[tmp1], 0x120(%[output]) \n\t" 1061 "sh %[input], 0x1a0(%[output]) \n\t" 1062 "dmfc1 %[tmp1], %[ftmp4] \n\t" 1063 "ssrld %[ftmp4], %[ftmp4], %[ftmp9] \n\t" 1064 "mfc1 %[input], %[ftmp4] \n\t" 1065 "sh %[tmp1], 0x140(%[output]) \n\t" 1066 "sh %[input], 0x1c0(%[output]) \n\t" 1067 "dsrl %[tmp1], %[tmp1], 0x10 \n\t" 1068 PTR_SRL "%[input], %[input], 0x10 \n\t" 1069 "sh %[tmp1], 0x160(%[output]) \n\t" 1070 "sh %[input], 0x1e0(%[output]) \n\t" 1071 "2: \n\t" 1072 ".set reorder \n\t" 1073 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 1074 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 1075 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 1076 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 1077 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), 1078 [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]), 1079 RESTRICT_ASM_ALL64 1080 [output]"+&r"(output), [input]"+&r"(input), 1081 [qmul]"+&r"(qmul) 1082 : [ff_pw_1]"f"(ff_pw_1.f) 1083 : "memory" 1084 ); 1085} 1086 1087void ff_h264_chroma422_dc_dequant_idct_8_mmi(int16_t *block, int qmul) 1088{ 1089 int temp[8]; 1090 int t[8]; 1091 1092 temp[0] = block[0] + block[16]; 1093 temp[1] = block[0] - block[16]; 1094 temp[2] = block[32] + block[48]; 1095 temp[3] = block[32] - block[48]; 1096 temp[4] = block[64] + block[80]; 1097 temp[5] = block[64] - block[80]; 1098 temp[6] = block[96] + block[112]; 1099 temp[7] = block[96] - block[112]; 1100 1101 t[0] = temp[0] + temp[4] + temp[2] + temp[6]; 1102 t[1] = temp[0] - temp[4] + temp[2] - temp[6]; 1103 t[2] = temp[0] - temp[4] - temp[2] + temp[6]; 1104 t[3] = temp[0] + temp[4] - temp[2] - temp[6]; 1105 t[4] = temp[1] + temp[5] + temp[3] + temp[7]; 1106 t[5] = temp[1] - temp[5] + temp[3] - temp[7]; 1107 t[6] = temp[1] - temp[5] - temp[3] + temp[7]; 1108 t[7] = temp[1] + temp[5] - temp[3] - temp[7]; 1109 1110 block[ 0]= (t[0]*qmul + 128) >> 8; 1111 block[ 32]= (t[1]*qmul + 128) >> 8; 1112 block[ 64]= (t[2]*qmul + 128) >> 8; 1113 block[ 96]= (t[3]*qmul + 128) >> 8; 1114 block[ 16]= (t[4]*qmul + 128) >> 8; 1115 block[ 48]= (t[5]*qmul + 128) >> 8; 1116 block[ 80]= (t[6]*qmul + 128) >> 8; 1117 block[112]= (t[7]*qmul + 128) >> 8; 1118} 1119 1120void ff_h264_chroma_dc_dequant_idct_8_mmi(int16_t *block, int qmul) 1121{ 1122 int a,b,c,d; 1123 1124 d = block[0] - block[16]; 1125 a = block[0] + block[16]; 1126 b = block[32] - block[48]; 1127 c = block[32] + block[48]; 1128 block[0] = ((a+c)*qmul) >> 7; 1129 block[16]= ((d+b)*qmul) >> 7; 1130 block[32]= ((a-c)*qmul) >> 7; 1131 block[48]= ((d-b)*qmul) >> 7; 1132} 1133 1134void ff_h264_weight_pixels16_8_mmi(uint8_t *block, ptrdiff_t stride, int height, 1135 int log2_denom, int weight, int offset) 1136{ 1137 int y; 1138 double ftmp[8]; 1139 DECLARE_VAR_ALL64; 1140 1141 offset <<= log2_denom; 1142 1143 if (log2_denom) 1144 offset += 1 << (log2_denom - 1); 1145 1146 for (y=0; y<height; y++, block+=stride) { 1147 __asm__ volatile ( 1148 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 1149 MMI_LDC1(%[ftmp1], %[block0], 0x00) 1150 MMI_LDC1(%[ftmp2], %[block1], 0x00) 1151 "mtc1 %[weight], %[ftmp3] \n\t" 1152 "mtc1 %[offset], %[ftmp4] \n\t" 1153 "mtc1 %[log2_denom], %[ftmp5] \n\t" 1154 "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 1155 "pshufh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 1156 "punpckhbh %[ftmp6], %[ftmp1], %[ftmp0] \n\t" 1157 "punpckhbh %[ftmp7], %[ftmp2], %[ftmp0] \n\t" 1158 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 1159 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 1160 "pmullh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" 1161 "pmullh %[ftmp7], %[ftmp7], %[ftmp3] \n\t" 1162 "pmullh %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 1163 "pmullh %[ftmp2], %[ftmp2], %[ftmp3] \n\t" 1164 "paddsh %[ftmp6], %[ftmp6], %[ftmp4] \n\t" 1165 "paddsh %[ftmp7], %[ftmp7], %[ftmp4] \n\t" 1166 "paddsh %[ftmp1], %[ftmp1], %[ftmp4] \n\t" 1167 "paddsh %[ftmp2], %[ftmp2], %[ftmp4] \n\t" 1168 "psrah %[ftmp6], %[ftmp6], %[ftmp5] \n\t" 1169 "psrah %[ftmp7], %[ftmp7], %[ftmp5] \n\t" 1170 "psrah %[ftmp1], %[ftmp1], %[ftmp5] \n\t" 1171 "psrah %[ftmp2], %[ftmp2], %[ftmp5] \n\t" 1172 "packushb %[ftmp1], %[ftmp1], %[ftmp6] \n\t" 1173 "packushb %[ftmp2], %[ftmp2], %[ftmp7] \n\t" 1174 MMI_SDC1(%[ftmp1], %[block0], 0x00) 1175 MMI_SDC1(%[ftmp2], %[block1], 0x00) 1176 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 1177 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 1178 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 1179 [ftmp6]"=&f"(ftmp[6]), 1180 RESTRICT_ASM_ALL64 1181 [ftmp7]"=&f"(ftmp[7]) 1182 : [block0]"r"(block), [block1]"r"(block+8), 1183 [weight]"r"(weight), [offset]"r"(offset), 1184 [log2_denom]"r"(log2_denom) 1185 : "memory" 1186 ); 1187 } 1188} 1189 1190void ff_h264_biweight_pixels16_8_mmi(uint8_t *dst, uint8_t *src, 1191 ptrdiff_t stride, int height, int log2_denom, int weightd, int weights, 1192 int offset) 1193{ 1194 int y; 1195 double ftmp[9]; 1196 DECLARE_VAR_ALL64; 1197 1198 offset = ((offset + 1) | 1) << log2_denom; 1199 1200 for (y=0; y<height; y++, dst+=stride, src+=stride) { 1201 __asm__ volatile ( 1202 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 1203 MMI_LDC1(%[ftmp1], %[src0], 0x00) 1204 MMI_LDC1(%[ftmp2], %[dst0], 0x00) 1205 "mtc1 %[weights], %[ftmp3] \n\t" 1206 "mtc1 %[weightd], %[ftmp4] \n\t" 1207 "mtc1 %[offset], %[ftmp5] \n\t" 1208 "mtc1 %[log2_denom], %[ftmp6] \n\t" 1209 "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 1210 "pshufh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 1211 "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" 1212 "punpckhbh %[ftmp7], %[ftmp1], %[ftmp0] \n\t" 1213 "punpckhbh %[ftmp8], %[ftmp2], %[ftmp0] \n\t" 1214 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 1215 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 1216 "pmullh %[ftmp7], %[ftmp7], %[ftmp3] \n\t" 1217 "pmullh %[ftmp8], %[ftmp8], %[ftmp4] \n\t" 1218 "pmullh %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 1219 "pmullh %[ftmp2], %[ftmp2], %[ftmp4] \n\t" 1220 "paddsh %[ftmp7], %[ftmp7], %[ftmp5] \n\t" 1221 "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t" 1222 "paddsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t" 1223 "paddsh %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 1224 "psrah %[ftmp7], %[ftmp7], %[ftmp6] \n\t" 1225 "psrah %[ftmp1], %[ftmp1], %[ftmp6] \n\t" 1226 "packushb %[ftmp1], %[ftmp1], %[ftmp7] \n\t" 1227 MMI_SDC1(%[ftmp1], %[dst0], 0x00) 1228 MMI_LDC1(%[ftmp1], %[src1], 0x00) 1229 MMI_LDC1(%[ftmp2], %[dst1], 0x00) 1230 "punpckhbh %[ftmp7], %[ftmp1], %[ftmp0] \n\t" 1231 "punpckhbh %[ftmp8], %[ftmp2], %[ftmp0] \n\t" 1232 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 1233 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 1234 "pmullh %[ftmp7], %[ftmp7], %[ftmp3] \n\t" 1235 "pmullh %[ftmp8], %[ftmp8], %[ftmp4] \n\t" 1236 "pmullh %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 1237 "pmullh %[ftmp2], %[ftmp2], %[ftmp4] \n\t" 1238 "paddsh %[ftmp7], %[ftmp7], %[ftmp5] \n\t" 1239 "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t" 1240 "paddsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t" 1241 "paddsh %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 1242 "psrah %[ftmp7], %[ftmp7], %[ftmp6] \n\t" 1243 "psrah %[ftmp1], %[ftmp1], %[ftmp6] \n\t" 1244 "packushb %[ftmp1], %[ftmp1], %[ftmp7] \n\t" 1245 MMI_SDC1(%[ftmp1], %[dst1], 0x00) 1246 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 1247 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 1248 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 1249 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 1250 RESTRICT_ASM_ALL64 1251 [ftmp8]"=&f"(ftmp[8]) 1252 : [dst0]"r"(dst), [dst1]"r"(dst+8), 1253 [src0]"r"(src), [src1]"r"(src+8), 1254 [weights]"r"(weights), [weightd]"r"(weightd), 1255 [offset]"r"(offset), [log2_denom]"r"(log2_denom+1) 1256 : "memory" 1257 ); 1258 } 1259} 1260 1261void ff_h264_weight_pixels8_8_mmi(uint8_t *block, ptrdiff_t stride, int height, 1262 int log2_denom, int weight, int offset) 1263{ 1264 int y; 1265 double ftmp[6]; 1266 DECLARE_VAR_ALL64; 1267 1268 offset <<= log2_denom; 1269 1270 if (log2_denom) 1271 offset += 1 << (log2_denom - 1); 1272 1273 for (y=0; y<height; y++, block+=stride) { 1274 __asm__ volatile ( 1275 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 1276 MMI_LDC1(%[ftmp1], %[block], 0x00) 1277 "mtc1 %[weight], %[ftmp2] \n\t" 1278 "mtc1 %[offset], %[ftmp3] \n\t" 1279 "mtc1 %[log2_denom], %[ftmp5] \n\t" 1280 "pshufh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 1281 "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 1282 "punpckhbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t" 1283 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 1284 "pmullh %[ftmp4], %[ftmp4], %[ftmp2] \n\t" 1285 "pmullh %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 1286 "paddsh %[ftmp4], %[ftmp4], %[ftmp3] \n\t" 1287 "paddsh %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 1288 "psrah %[ftmp4], %[ftmp4], %[ftmp5] \n\t" 1289 "psrah %[ftmp1], %[ftmp1], %[ftmp5] \n\t" 1290 "packushb %[ftmp1], %[ftmp1], %[ftmp4] \n\t" 1291 MMI_SDC1(%[ftmp1], %[block], 0x00) 1292 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 1293 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 1294 [ftmp4]"=&f"(ftmp[4]), 1295 RESTRICT_ASM_ALL64 1296 [ftmp5]"=&f"(ftmp[5]) 1297 : [block]"r"(block), [weight]"r"(weight), 1298 [offset]"r"(offset), [log2_denom]"r"(log2_denom) 1299 : "memory" 1300 ); 1301 } 1302} 1303 1304void ff_h264_biweight_pixels8_8_mmi(uint8_t *dst, uint8_t *src, 1305 ptrdiff_t stride, int height, int log2_denom, int weightd, int weights, 1306 int offset) 1307{ 1308 int y; 1309 double ftmp[9]; 1310 DECLARE_VAR_ALL64; 1311 1312 offset = ((offset + 1) | 1) << log2_denom; 1313 1314 for (y=0; y<height; y++, dst+=stride, src+=stride) { 1315 __asm__ volatile ( 1316 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 1317 MMI_LDC1(%[ftmp1], %[src], 0x00) 1318 MMI_LDC1(%[ftmp2], %[dst], 0x00) 1319 "mtc1 %[weights], %[ftmp3] \n\t" 1320 "mtc1 %[weightd], %[ftmp4] \n\t" 1321 "mtc1 %[offset], %[ftmp5] \n\t" 1322 "mtc1 %[log2_denom], %[ftmp6] \n\t" 1323 "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 1324 "pshufh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 1325 "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" 1326 "punpckhbh %[ftmp7], %[ftmp1], %[ftmp0] \n\t" 1327 "punpckhbh %[ftmp8], %[ftmp2], %[ftmp0] \n\t" 1328 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 1329 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 1330 "pmullh %[ftmp7], %[ftmp7], %[ftmp3] \n\t" 1331 "pmullh %[ftmp8], %[ftmp8], %[ftmp4] \n\t" 1332 "pmullh %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 1333 "pmullh %[ftmp2], %[ftmp2], %[ftmp4] \n\t" 1334 "paddsh %[ftmp7], %[ftmp7], %[ftmp5] \n\t" 1335 "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t" 1336 "paddsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t" 1337 "paddsh %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 1338 "psrah %[ftmp7], %[ftmp7], %[ftmp6] \n\t" 1339 "psrah %[ftmp1], %[ftmp1], %[ftmp6] \n\t" 1340 "packushb %[ftmp1], %[ftmp1], %[ftmp7] \n\t" 1341 MMI_SDC1(%[ftmp1], %[dst], 0x00) 1342 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 1343 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 1344 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 1345 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 1346 RESTRICT_ASM_ALL64 1347 [ftmp8]"=&f"(ftmp[8]) 1348 : [dst]"r"(dst), [src]"r"(src), 1349 [weights]"r"(weights), [weightd]"r"(weightd), 1350 [offset]"r"(offset), [log2_denom]"r"(log2_denom+1) 1351 : "memory" 1352 ); 1353 } 1354} 1355 1356void ff_h264_weight_pixels4_8_mmi(uint8_t *block, ptrdiff_t stride, int height, 1357 int log2_denom, int weight, int offset) 1358{ 1359 int y; 1360 double ftmp[5]; 1361 DECLARE_VAR_LOW32; 1362 1363 offset <<= log2_denom; 1364 1365 if (log2_denom) 1366 offset += 1 << (log2_denom - 1); 1367 1368 for (y=0; y<height; y++, block+=stride) { 1369 __asm__ volatile ( 1370 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 1371 MMI_ULWC1(%[ftmp1], %[block], 0x00) 1372 "mtc1 %[weight], %[ftmp2] \n\t" 1373 "mtc1 %[offset], %[ftmp3] \n\t" 1374 "mtc1 %[log2_denom], %[ftmp4] \n\t" 1375 "pshufh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 1376 "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 1377 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 1378 "pmullh %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 1379 "paddsh %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 1380 "psrah %[ftmp1], %[ftmp1], %[ftmp4] \n\t" 1381 "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 1382 MMI_SWC1(%[ftmp1], %[block], 0x00) 1383 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 1384 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 1385 RESTRICT_ASM_LOW32 1386 [ftmp4]"=&f"(ftmp[4]) 1387 : [block]"r"(block), [weight]"r"(weight), 1388 [offset]"r"(offset), [log2_denom]"r"(log2_denom) 1389 : "memory" 1390 ); 1391 } 1392} 1393 1394void ff_h264_biweight_pixels4_8_mmi(uint8_t *dst, uint8_t *src, 1395 ptrdiff_t stride, int height, int log2_denom, int weightd, int weights, 1396 int offset) 1397{ 1398 int y; 1399 double ftmp[7]; 1400 DECLARE_VAR_LOW32; 1401 1402 offset = ((offset + 1) | 1) << log2_denom; 1403 1404 for (y=0; y<height; y++, dst+=stride, src+=stride) { 1405 __asm__ volatile ( 1406 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 1407 MMI_ULWC1(%[ftmp1], %[src], 0x00) 1408 MMI_ULWC1(%[ftmp2], %[dst], 0x00) 1409 "mtc1 %[weight], %[ftmp3] \n\t" 1410 "mtc1 %[weightd], %[ftmp4] \n\t" 1411 "mtc1 %[offset], %[ftmp5] \n\t" 1412 "mtc1 %[log2_denom], %[ftmp6] \n\t" 1413 "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 1414 "pshufh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 1415 "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" 1416 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 1417 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 1418 "pmullh %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 1419 "pmullh %[ftmp2], %[ftmp2], %[ftmp4] \n\t" 1420 "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t" 1421 "paddsh %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 1422 "psrah %[ftmp1], %[ftmp1], %[ftmp6] \n\t" 1423 "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 1424 MMI_SWC1(%[ftmp1], %[dst], 0x00) 1425 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 1426 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 1427 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 1428 RESTRICT_ASM_LOW32 1429 [ftmp6]"=&f"(ftmp[6]) 1430 : [dst]"r"(dst), [src]"r"(src), 1431 [weight]"r"(weights), [weightd]"r"(weightd), 1432 [offset]"r"(offset), [log2_denom]"r"(log2_denom+1) 1433 : "memory" 1434 ); 1435 } 1436} 1437 1438void ff_deblock_v8_luma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, 1439 int8_t *tc0) 1440{ 1441 double ftmp[12]; 1442 mips_reg addr[2]; 1443 DECLARE_VAR_LOW32; 1444 DECLARE_VAR_ALL64; 1445 DECLARE_VAR_ADDRT; 1446 1447 __asm__ volatile ( 1448 PTR_ADDU "%[addr0], %[stride], %[stride] \n\t" 1449 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 1450 PTR_ADDU "%[addr1], %[stride], %[addr0] \n\t" 1451 "addi %[alpha], %[alpha], -0x01 \n\t" 1452 PTR_SUBU "%[addr1], $0, %[addr1] \n\t" 1453 "addi %[beta], %[beta], -0x01 \n\t" 1454 PTR_ADDU "%[addr1], %[addr1], %[pix] \n\t" 1455 MMI_LDC1(%[ftmp3], %[pix], 0x00) 1456 MMI_LDXC1(%[ftmp1], %[addr1], %[stride], 0x00) 1457 MMI_LDXC1(%[ftmp2], %[addr1], %[addr0], 0x00) 1458 MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00) 1459 "mtc1 %[alpha], %[ftmp5] \n\t" 1460 "mtc1 %[beta], %[ftmp6] \n\t" 1461 "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" 1462 "pshufh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 1463 "packushb %[ftmp5], %[ftmp5], %[ftmp5] \n\t" 1464 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 1465 "psubusb %[ftmp7], %[ftmp3], %[ftmp2] \n\t" 1466 "psubusb %[ftmp8], %[ftmp2], %[ftmp3] \n\t" 1467 "por %[ftmp8], %[ftmp8], %[ftmp7] \n\t" 1468 "psubusb %[ftmp7], %[ftmp2], %[ftmp1] \n\t" 1469 "psubusb %[ftmp8], %[ftmp8], %[ftmp5] \n\t" 1470 "psubusb %[ftmp5], %[ftmp1], %[ftmp2] \n\t" 1471 "por %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 1472 "psubusb %[ftmp7], %[ftmp3], %[ftmp4] \n\t" 1473 "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t" 1474 "por %[ftmp8], %[ftmp8], %[ftmp5] \n\t" 1475 "psubusb %[ftmp5], %[ftmp4], %[ftmp3] \n\t" 1476 "por %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 1477 "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t" 1478 "por %[ftmp8], %[ftmp8], %[ftmp5] \n\t" 1479 "pcmpeqb %[ftmp8], %[ftmp8], %[ftmp0] \n\t" 1480 "pcmpeqb %[ftmp4], %[ftmp4], %[ftmp4] \n\t" 1481 MMI_ULWC1(%[ftmp5], %[tc0], 0x00) 1482 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp5] \n\t" 1483 "punpcklbh %[ftmp9], %[ftmp5], %[ftmp5] \n\t" 1484 "pcmpgtb %[ftmp5], %[ftmp9], %[ftmp4] \n\t" 1485 MMI_LDC1(%[ftmp4], %[addr1], 0x00) 1486 "pand %[ftmp10], %[ftmp5], %[ftmp8] \n\t" 1487 "psubusb %[ftmp8], %[ftmp4], %[ftmp2] \n\t" 1488 "psubusb %[ftmp7], %[ftmp2], %[ftmp4] \n\t" 1489 "psubusb %[ftmp8], %[ftmp8], %[ftmp6] \n\t" 1490 "psubusb %[ftmp7], %[ftmp7], %[ftmp6] \n\t" 1491 "pcmpeqb %[ftmp7], %[ftmp7], %[ftmp8] \n\t" 1492 "pand %[ftmp7], %[ftmp7], %[ftmp10] \n\t" 1493 "pand %[ftmp5], %[ftmp10], %[ftmp9] \n\t" 1494 "psubb %[ftmp8], %[ftmp5], %[ftmp7] \n\t" 1495 "pand %[ftmp7], %[ftmp7], %[ftmp5] \n\t" 1496 "pavgb %[ftmp5], %[ftmp2], %[ftmp3] \n\t" 1497 MMI_LDC1(%[ftmp11], %[addr1], 0x00) 1498 "pavgb %[ftmp4], %[ftmp4], %[ftmp5] \n\t" 1499 "pxor %[ftmp5], %[ftmp5], %[ftmp11] \n\t" 1500 "pand %[ftmp5], %[ftmp5], %[ff_pb_1] \n\t" 1501 "psubusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t" 1502 "psubusb %[ftmp5], %[ftmp1], %[ftmp7] \n\t" 1503 "paddusb %[ftmp7], %[ftmp7], %[ftmp1] \n\t" 1504 "pmaxub %[ftmp4], %[ftmp4], %[ftmp5] \n\t" 1505 "pminub %[ftmp4], %[ftmp4], %[ftmp7] \n\t" 1506 MMI_SDXC1(%[ftmp4], %[addr1], %[stride], 0x00) 1507 MMI_LDXC1(%[ftmp5], %[pix], %[addr0], 0x00) 1508 "psubusb %[ftmp4], %[ftmp5], %[ftmp3] \n\t" 1509 "psubusb %[ftmp7], %[ftmp3], %[ftmp5] \n\t" 1510 "psubusb %[ftmp4], %[ftmp4], %[ftmp6] \n\t" 1511 "psubusb %[ftmp7], %[ftmp7], %[ftmp6] \n\t" 1512 "pcmpeqb %[ftmp7], %[ftmp7], %[ftmp4] \n\t" 1513 "pand %[ftmp7], %[ftmp7], %[ftmp10] \n\t" 1514 "psubb %[ftmp8], %[ftmp8], %[ftmp7] \n\t" 1515 "pand %[ftmp6], %[ftmp9], %[ftmp7] \n\t" 1516 MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00) 1517 "pavgb %[ftmp7], %[ftmp2], %[ftmp3] \n\t" 1518 MMI_LDXC1(%[ftmp11], %[pix], %[addr0], 0x00) 1519 "pavgb %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 1520 "pxor %[ftmp7], %[ftmp7], %[ftmp11] \n\t" 1521 "pand %[ftmp7], %[ftmp7], %[ff_pb_1] \n\t" 1522 "psubusb %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 1523 "psubusb %[ftmp7], %[ftmp4], %[ftmp6] \n\t" 1524 "paddusb %[ftmp6], %[ftmp6], %[ftmp4] \n\t" 1525 "pmaxub %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 1526 "pminub %[ftmp5], %[ftmp5], %[ftmp6] \n\t" 1527 MMI_SDXC1(%[ftmp5], %[pix], %[stride], 0x00) 1528 "pxor %[ftmp6], %[ftmp2], %[ftmp3] \n\t" 1529 "pcmpeqb %[ftmp5], %[ftmp5], %[ftmp5] \n\t" 1530 "pand %[ftmp6], %[ftmp6], %[ff_pb_1] \n\t" 1531 "pxor %[ftmp4], %[ftmp4], %[ftmp5] \n\t" 1532 "pxor %[ftmp5], %[ftmp5], %[ftmp2] \n\t" 1533 "pavgb %[ftmp4], %[ftmp4], %[ftmp1] \n\t" 1534 "pavgb %[ftmp4], %[ftmp4], %[ff_pb_3] \n\t" 1535 "pavgb %[ftmp5], %[ftmp5], %[ftmp3] \n\t" 1536 "pavgb %[ftmp4], %[ftmp4], %[ftmp6] \n\t" 1537 "paddusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t" 1538 "psubusb %[ftmp7], %[ff_pb_A1], %[ftmp4] \n\t" 1539 "psubusb %[ftmp4], %[ftmp4], %[ff_pb_A1] \n\t" 1540 "pminub %[ftmp7], %[ftmp7], %[ftmp8] \n\t" 1541 "pminub %[ftmp4], %[ftmp4], %[ftmp8] \n\t" 1542 "psubusb %[ftmp2], %[ftmp2], %[ftmp7] \n\t" 1543 "psubusb %[ftmp3], %[ftmp3], %[ftmp4] \n\t" 1544 "paddusb %[ftmp2], %[ftmp2], %[ftmp4] \n\t" 1545 "paddusb %[ftmp3], %[ftmp3], %[ftmp7] \n\t" 1546 MMI_SDXC1(%[ftmp2], %[addr1], %[addr0], 0x00) 1547 MMI_SDC1(%[ftmp3], %[pix], 0x00) 1548 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 1549 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 1550 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 1551 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 1552 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), 1553 [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), 1554 RESTRICT_ASM_LOW32 1555 RESTRICT_ASM_ALL64 1556 RESTRICT_ASM_ADDRT 1557 [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]) 1558 : [pix]"r"(pix), [stride]"r"((mips_reg)stride), 1559 [alpha]"r"((mips_reg)alpha), [beta]"r"((mips_reg)beta), 1560 [tc0]"r"(tc0), [ff_pb_1]"f"(ff_pb_1.f), 1561 [ff_pb_3]"f"(ff_pb_3.f), [ff_pb_A1]"f"(ff_pb_A1.f) 1562 : "memory" 1563 ); 1564} 1565 1566static void deblock_v8_luma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, 1567 int beta) 1568{ 1569 DECLARE_ALIGNED(8, const uint64_t, stack[0x0a]); 1570 double ftmp[16]; 1571 uint64_t tmp[1]; 1572 mips_reg addr[3]; 1573 DECLARE_VAR_ALL64; 1574 DECLARE_VAR_ADDRT; 1575 1576 __asm__ volatile ( 1577 "ori %[tmp0], $0, 0x01 \n\t" 1578 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 1579 "mtc1 %[tmp0], %[ftmp9] \n\t" 1580 PTR_SLL "%[addr0], %[stride], 0x02 \n\t" 1581 PTR_ADDU "%[addr2], %[stride], %[stride] \n\t" 1582 PTR_ADDIU "%[alpha], %[alpha], -0x01 \n\t" 1583 "sslld %[ftmp11], %[ftmp9], %[ftmp9] \n\t" 1584 "bltz %[alpha], 1f \n\t" 1585 PTR_ADDU "%[addr1], %[addr2], %[stride] \n\t" 1586 PTR_ADDIU "%[beta], %[beta], -0x01 \n\t" 1587 "bltz %[beta], 1f \n\t" 1588 PTR_SUBU "%[addr0], $0, %[addr0] \n\t" 1589 PTR_ADDU "%[addr0], %[addr0], %[pix] \n\t" 1590 MMI_LDC1(%[ftmp3], %[pix], 0x00) 1591 MMI_LDXC1(%[ftmp1], %[addr0], %[addr2], 0x00) 1592 MMI_LDXC1(%[ftmp2], %[addr0], %[addr1], 0x00) 1593 MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00) 1594 "mtc1 %[alpha], %[ftmp5] \n\t" 1595 "mtc1 %[beta], %[ftmp6] \n\t" 1596 "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" 1597 "pshufh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 1598 "packushb %[ftmp5], %[ftmp5], %[ftmp5] \n\t" 1599 "psubusb %[ftmp7], %[ftmp3], %[ftmp2] \n\t" 1600 "psubusb %[ftmp8], %[ftmp2], %[ftmp3] \n\t" 1601 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 1602 "por %[ftmp8], %[ftmp8], %[ftmp7] \n\t" 1603 MMI_SDC1(%[ftmp5], %[stack], 0x10) 1604 "psubusb %[ftmp8], %[ftmp8], %[ftmp5] \n\t" 1605 "psubusb %[ftmp7], %[ftmp2], %[ftmp1] \n\t" 1606 "psubusb %[ftmp5], %[ftmp1], %[ftmp2] \n\t" 1607 "por %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 1608 "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t" 1609 "por %[ftmp8], %[ftmp8], %[ftmp5] \n\t" 1610 "psubusb %[ftmp7], %[ftmp3], %[ftmp4] \n\t" 1611 "psubusb %[ftmp5], %[ftmp4], %[ftmp3] \n\t" 1612 "por %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 1613 "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t" 1614 "por %[ftmp8], %[ftmp8], %[ftmp5] \n\t" 1615 "pxor %[ftmp7], %[ftmp7], %[ftmp7] \n\t" 1616 MMI_LDC1(%[ftmp5], %[stack], 0x10) 1617 "pcmpeqb %[ftmp8], %[ftmp8], %[ftmp7] \n\t" 1618 "ldc1 %[ftmp10], %[ff_pb_1] \n\t" 1619 MMI_SDC1(%[ftmp8], %[stack], 0x20) 1620 "pavgb %[ftmp5], %[ftmp5], %[ftmp0] \n\t" 1621 "psubusb %[ftmp8], %[ftmp3], %[ftmp2] \n\t" 1622 "pavgb %[ftmp5], %[ftmp5], %[ftmp10] \n\t" 1623 "psubusb %[ftmp7], %[ftmp2], %[ftmp3] \n\t" 1624 "psubusb %[ftmp8], %[ftmp8], %[ftmp5] \n\t" 1625 "psubusb %[ftmp7], %[ftmp7], %[ftmp5] \n\t" 1626 MMI_LDC1(%[ftmp15], %[stack], 0x20) 1627 "pcmpeqb %[ftmp7], %[ftmp7], %[ftmp8] \n\t" 1628 "pand %[ftmp7], %[ftmp7], %[ftmp15] \n\t" 1629 MMI_LDXC1(%[ftmp15], %[addr0], %[stride], 0x00) 1630 "psubusb %[ftmp8], %[ftmp15], %[ftmp2] \n\t" 1631 "psubusb %[ftmp5], %[ftmp2], %[ftmp15] \n\t" 1632 "psubusb %[ftmp8], %[ftmp8], %[ftmp6] \n\t" 1633 "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t" 1634 "pcmpeqb %[ftmp5], %[ftmp5], %[ftmp8] \n\t" 1635 "pand %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 1636 MMI_LDXC1(%[ftmp14], %[pix], %[addr2], 0x00) 1637 MMI_SDC1(%[ftmp5], %[stack], 0x30) 1638 "psubusb %[ftmp8], %[ftmp14], %[ftmp3] \n\t" 1639 "psubusb %[ftmp5], %[ftmp3], %[ftmp14] \n\t" 1640 "psubusb %[ftmp8], %[ftmp8], %[ftmp6] \n\t" 1641 "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t" 1642 "pcmpeqb %[ftmp5], %[ftmp5], %[ftmp8] \n\t" 1643 "pand %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 1644 MMI_SDC1(%[ftmp5], %[stack], 0x40) 1645 "pavgb %[ftmp5], %[ftmp15], %[ftmp1] \n\t" 1646 "pavgb %[ftmp6], %[ftmp2], %[ftmp3] \n\t" 1647 "pavgb %[ftmp5], %[ftmp5], %[ftmp6] \n\t" 1648 MMI_SDC1(%[ftmp6], %[stack], 0x10) 1649 "paddb %[ftmp7], %[ftmp15], %[ftmp1] \n\t" 1650 "paddb %[ftmp8], %[ftmp2], %[ftmp3] \n\t" 1651 "paddb %[ftmp7], %[ftmp7], %[ftmp8] \n\t" 1652 "mov.d %[ftmp8], %[ftmp7] \n\t" 1653 MMI_SDC1(%[ftmp7], %[stack], 0x00) 1654 "psrlh %[ftmp7], %[ftmp7], %[ftmp9] \n\t" 1655 "pavgb %[ftmp7], %[ftmp7], %[ftmp0] \n\t" 1656 "pxor %[ftmp7], %[ftmp7], %[ftmp5] \n\t" 1657 "pand %[ftmp7], %[ftmp7], %[ftmp10] \n\t" 1658 "psubb %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 1659 "pavgb %[ftmp6], %[ftmp15], %[ftmp4] \n\t" 1660 "psubb %[ftmp7], %[ftmp15], %[ftmp4] \n\t" 1661 "paddb %[ftmp8], %[ftmp8], %[ftmp8] \n\t" 1662 "psubb %[ftmp8], %[ftmp8], %[ftmp7] \n\t" 1663 "pand %[ftmp7], %[ftmp7], %[ftmp10] \n\t" 1664 "psubb %[ftmp6], %[ftmp6], %[ftmp7] \n\t" 1665 MMI_LDC1(%[ftmp13], %[stack], 0x10) 1666 "pavgb %[ftmp6], %[ftmp6], %[ftmp1] \n\t" 1667 "psrlh %[ftmp8], %[ftmp8], %[ftmp11] \n\t" 1668 "pavgb %[ftmp6], %[ftmp6], %[ftmp13] \n\t" 1669 "pavgb %[ftmp8], %[ftmp8], %[ftmp0] \n\t" 1670 "pxor %[ftmp8], %[ftmp8], %[ftmp6] \n\t" 1671 "pand %[ftmp8], %[ftmp8], %[ftmp10] \n\t" 1672 "psubb %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 1673 "pxor %[ftmp8], %[ftmp2], %[ftmp4] \n\t" 1674 "pavgb %[ftmp7], %[ftmp2], %[ftmp4] \n\t" 1675 "pand %[ftmp8], %[ftmp8], %[ftmp10] \n\t" 1676 "psubb %[ftmp7], %[ftmp7], %[ftmp8] \n\t" 1677 MMI_LDC1(%[ftmp13], %[stack], 0x30) 1678 "pavgb %[ftmp7], %[ftmp7], %[ftmp1] \n\t" 1679 MMI_LDC1(%[ftmp12], %[stack], 0x20) 1680 "pxor %[ftmp6], %[ftmp6], %[ftmp7] \n\t" 1681 "pxor %[ftmp7], %[ftmp7], %[ftmp2] \n\t" 1682 "pand %[ftmp6], %[ftmp6], %[ftmp13] \n\t" 1683 "pand %[ftmp7], %[ftmp7], %[ftmp12] \n\t" 1684 "pxor %[ftmp6], %[ftmp6], %[ftmp7] \n\t" 1685 "pxor %[ftmp6], %[ftmp6], %[ftmp2] \n\t" 1686 MMI_SDXC1(%[ftmp6], %[addr0], %[addr1], 0x00) 1687 MMI_LDC1(%[ftmp6], %[addr0], 0x00) 1688 "paddb %[ftmp7], %[ftmp15], %[ftmp6] \n\t" 1689 "pavgb %[ftmp6], %[ftmp6], %[ftmp15] \n\t" 1690 MMI_LDC1(%[ftmp12], %[stack], 0x00) 1691 "pavgb %[ftmp6], %[ftmp6], %[ftmp5] \n\t" 1692 "paddb %[ftmp7], %[ftmp7], %[ftmp7] \n\t" 1693 "paddb %[ftmp7], %[ftmp7], %[ftmp12] \n\t" 1694 "psrlh %[ftmp7], %[ftmp7], %[ftmp11] \n\t" 1695 "pavgb %[ftmp7], %[ftmp7], %[ftmp0] \n\t" 1696 "pxor %[ftmp7], %[ftmp7], %[ftmp6] \n\t" 1697 "pand %[ftmp7], %[ftmp7], %[ftmp10] \n\t" 1698 MMI_LDC1(%[ftmp12], %[stack], 0x30) 1699 "psubb %[ftmp6], %[ftmp6], %[ftmp7] \n\t" 1700 "pxor %[ftmp5], %[ftmp5], %[ftmp1] \n\t" 1701 "pxor %[ftmp6], %[ftmp6], %[ftmp15] \n\t" 1702 "pand %[ftmp5], %[ftmp5], %[ftmp12] \n\t" 1703 "pand %[ftmp6], %[ftmp6], %[ftmp12] \n\t" 1704 "pxor %[ftmp5], %[ftmp5], %[ftmp1] \n\t" 1705 "pxor %[ftmp6], %[ftmp6], %[ftmp15] \n\t" 1706 MMI_SDXC1(%[ftmp5], %[addr0], %[addr2], 0x00) 1707 MMI_SDXC1(%[ftmp6], %[addr0], %[stride], 0x00) 1708 "pavgb %[ftmp5], %[ftmp14], %[ftmp4] \n\t" 1709 "pavgb %[ftmp6], %[ftmp3], %[ftmp2] \n\t" 1710 "pavgb %[ftmp5], %[ftmp5], %[ftmp6] \n\t" 1711 MMI_SDC1(%[ftmp6], %[stack], 0x10) 1712 "paddb %[ftmp7], %[ftmp14], %[ftmp4] \n\t" 1713 "paddb %[ftmp8], %[ftmp3], %[ftmp2] \n\t" 1714 "paddb %[ftmp7], %[ftmp7], %[ftmp8] \n\t" 1715 "mov.d %[ftmp8], %[ftmp7] \n\t" 1716 MMI_SDC1(%[ftmp7], %[stack], 0x00) 1717 "psrlh %[ftmp7], %[ftmp7], %[ftmp9] \n\t" 1718 "pavgb %[ftmp7], %[ftmp7], %[ftmp0] \n\t" 1719 "pxor %[ftmp7], %[ftmp7], %[ftmp5] \n\t" 1720 "pand %[ftmp7], %[ftmp7], %[ftmp10] \n\t" 1721 "psubb %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 1722 "pavgb %[ftmp6], %[ftmp14], %[ftmp1] \n\t" 1723 "paddb %[ftmp8], %[ftmp8], %[ftmp8] \n\t" 1724 "psubb %[ftmp7], %[ftmp14], %[ftmp1] \n\t" 1725 "psubb %[ftmp8], %[ftmp8], %[ftmp7] \n\t" 1726 "pand %[ftmp7], %[ftmp7], %[ftmp10] \n\t" 1727 "psubb %[ftmp6], %[ftmp6], %[ftmp7] \n\t" 1728 MMI_LDC1(%[ftmp12], %[stack], 0x10) 1729 "pavgb %[ftmp6], %[ftmp6], %[ftmp4] \n\t" 1730 "pavgb %[ftmp6], %[ftmp6], %[ftmp12] \n\t" 1731 "psrlh %[ftmp8], %[ftmp8], %[ftmp11] \n\t" 1732 "pavgb %[ftmp8], %[ftmp8], %[ftmp0] \n\t" 1733 "pxor %[ftmp8], %[ftmp8], %[ftmp6] \n\t" 1734 "pand %[ftmp8], %[ftmp8], %[ftmp10] \n\t" 1735 "psubb %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 1736 "pxor %[ftmp8], %[ftmp3], %[ftmp1] \n\t" 1737 "pavgb %[ftmp7], %[ftmp3], %[ftmp1] \n\t" 1738 "pand %[ftmp8], %[ftmp8], %[ftmp10] \n\t" 1739 MMI_LDC1(%[ftmp12], %[stack], 0x40) 1740 "psubb %[ftmp7], %[ftmp7], %[ftmp8] \n\t" 1741 MMI_LDC1(%[ftmp13], %[stack], 0x20) 1742 "pavgb %[ftmp7], %[ftmp7], %[ftmp4] \n\t" 1743 "pxor %[ftmp6], %[ftmp6], %[ftmp7] \n\t" 1744 "pxor %[ftmp7], %[ftmp7], %[ftmp3] \n\t" 1745 "pand %[ftmp6], %[ftmp6], %[ftmp12] \n\t" 1746 "pand %[ftmp7], %[ftmp7], %[ftmp13] \n\t" 1747 "pxor %[ftmp6], %[ftmp6], %[ftmp7] \n\t" 1748 "pxor %[ftmp6], %[ftmp6], %[ftmp3] \n\t" 1749 MMI_SDC1(%[ftmp6], %[pix], 0x00) 1750 MMI_LDXC1(%[ftmp6], %[pix], %[addr1], 0x00) 1751 "paddb %[ftmp7], %[ftmp14], %[ftmp6] \n\t" 1752 "pavgb %[ftmp6], %[ftmp6], %[ftmp14] \n\t" 1753 MMI_LDC1(%[ftmp12], %[stack], 0x00) 1754 "pavgb %[ftmp6], %[ftmp6], %[ftmp5] \n\t" 1755 "paddb %[ftmp7], %[ftmp7], %[ftmp7] \n\t" 1756 "paddb %[ftmp7], %[ftmp7], %[ftmp12] \n\t" 1757 "psrlh %[ftmp7], %[ftmp7], %[ftmp11] \n\t" 1758 "pavgb %[ftmp7], %[ftmp7], %[ftmp0] \n\t" 1759 "pxor %[ftmp7], %[ftmp7], %[ftmp6] \n\t" 1760 "pand %[ftmp7], %[ftmp7], %[ftmp10] \n\t" 1761 MMI_LDC1(%[ftmp12], %[stack], 0x40) 1762 "psubb %[ftmp6], %[ftmp6], %[ftmp7] \n\t" 1763 "pxor %[ftmp5], %[ftmp5], %[ftmp4] \n\t" 1764 "pxor %[ftmp6], %[ftmp6], %[ftmp14] \n\t" 1765 "pand %[ftmp5], %[ftmp5], %[ftmp12] \n\t" 1766 "pand %[ftmp6], %[ftmp6], %[ftmp12] \n\t" 1767 "pxor %[ftmp5], %[ftmp5], %[ftmp4] \n\t" 1768 "pxor %[ftmp6], %[ftmp6], %[ftmp14] \n\t" 1769 MMI_SDXC1(%[ftmp5], %[pix], %[stride], 0x00) 1770 MMI_SDXC1(%[ftmp6], %[pix], %[addr2], 0x00) 1771 "1: \n\t" 1772 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 1773 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 1774 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 1775 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 1776 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), 1777 [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), 1778 [ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]), 1779 [ftmp14]"=&f"(ftmp[14]), [ftmp15]"=&f"(ftmp[15]), 1780 [tmp0]"=&r"(tmp[0]), 1781 RESTRICT_ASM_ALL64 1782 RESTRICT_ASM_ADDRT 1783 [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]), 1784 [addr2]"=&r"(addr[2]), 1785 [alpha]"+&r"(alpha), [beta]"+&r"(beta) 1786 : [pix]"r"(pix), [stride]"r"((mips_reg)stride), 1787 [stack]"r"(stack), [ff_pb_1]"m"(ff_pb_1) 1788 : "memory" 1789 ); 1790} 1791 1792void ff_deblock_v_chroma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, 1793 int beta, int8_t *tc0) 1794{ 1795 double ftmp[9]; 1796 mips_reg addr[1]; 1797 DECLARE_VAR_LOW32; 1798 DECLARE_VAR_ALL64; 1799 DECLARE_VAR_ADDRT; 1800 1801 __asm__ volatile ( 1802 "addi %[alpha], %[alpha], -0x01 \n\t" 1803 "addi %[beta], %[beta], -0x01 \n\t" 1804 "or %[addr0], $0, %[pix] \n\t" 1805 PTR_SUBU "%[addr0], %[addr0], %[stride] \n\t" 1806 PTR_SUBU "%[addr0], %[addr0], %[stride] \n\t" 1807 MMI_LDC1(%[ftmp1], %[addr0], 0x00) 1808 MMI_LDXC1(%[ftmp2], %[addr0], %[stride], 0x00) 1809 MMI_LDC1(%[ftmp3], %[pix], 0x00) 1810 MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00) 1811 1812 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 1813 "mtc1 %[alpha], %[ftmp5] \n\t" 1814 "mtc1 %[beta], %[ftmp6] \n\t" 1815 "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" 1816 "pshufh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 1817 "packushb %[ftmp5], %[ftmp5], %[ftmp5] \n\t" 1818 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 1819 "psubusb %[ftmp7], %[ftmp3], %[ftmp2] \n\t" 1820 "psubusb %[ftmp8], %[ftmp2], %[ftmp3] \n\t" 1821 "por %[ftmp8], %[ftmp8], %[ftmp7] \n\t" 1822 "psubusb %[ftmp8], %[ftmp8], %[ftmp5] \n\t" 1823 "psubusb %[ftmp7], %[ftmp2], %[ftmp1] \n\t" 1824 "psubusb %[ftmp5], %[ftmp1], %[ftmp2] \n\t" 1825 "por %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 1826 "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t" 1827 "por %[ftmp8], %[ftmp8], %[ftmp5] \n\t" 1828 "psubusb %[ftmp7], %[ftmp3], %[ftmp4] \n\t" 1829 "psubusb %[ftmp5], %[ftmp4], %[ftmp3] \n\t" 1830 "por %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 1831 "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t" 1832 "por %[ftmp8], %[ftmp8], %[ftmp5] \n\t" 1833 "pxor %[ftmp7], %[ftmp7], %[ftmp7] \n\t" 1834 "pcmpeqb %[ftmp8], %[ftmp8], %[ftmp7] \n\t" 1835 MMI_ULWC1(%[ftmp7], %[tc0], 0x00) 1836 "punpcklbh %[ftmp7], %[ftmp7], %[ftmp7] \n\t" 1837 "pand %[ftmp8], %[ftmp8], %[ftmp7] \n\t" 1838 "pcmpeqb %[ftmp5], %[ftmp5], %[ftmp5] \n\t" 1839 "pxor %[ftmp6], %[ftmp2], %[ftmp3] \n\t" 1840 "pxor %[ftmp4], %[ftmp4], %[ftmp5] \n\t" 1841 "pand %[ftmp6], %[ftmp6], %[ff_pb_1] \n\t" 1842 "pavgb %[ftmp4], %[ftmp4], %[ftmp1] \n\t" 1843 "pxor %[ftmp5], %[ftmp5], %[ftmp2] \n\t" 1844 "pavgb %[ftmp4], %[ftmp4], %[ff_pb_3] \n\t" 1845 "pavgb %[ftmp5], %[ftmp5], %[ftmp3] \n\t" 1846 "pavgb %[ftmp4], %[ftmp4], %[ftmp6] \n\t" 1847 "paddusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t" 1848 "psubusb %[ftmp7], %[ff_pb_A1], %[ftmp4] \n\t" 1849 "psubusb %[ftmp4], %[ftmp4], %[ff_pb_A1] \n\t" 1850 "pminub %[ftmp7], %[ftmp7], %[ftmp8] \n\t" 1851 "pminub %[ftmp4], %[ftmp4], %[ftmp8] \n\t" 1852 "psubusb %[ftmp2], %[ftmp2], %[ftmp7] \n\t" 1853 "psubusb %[ftmp3], %[ftmp3], %[ftmp4] \n\t" 1854 "paddusb %[ftmp2], %[ftmp2], %[ftmp4] \n\t" 1855 "paddusb %[ftmp3], %[ftmp3], %[ftmp7] \n\t" 1856 1857 MMI_SDXC1(%[ftmp2], %[addr0], %[stride], 0x00) 1858 MMI_SDC1(%[ftmp3], %[pix], 0x00) 1859 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 1860 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 1861 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 1862 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 1863 [ftmp8]"=&f"(ftmp[8]), 1864 RESTRICT_ASM_LOW32 1865 RESTRICT_ASM_ALL64 1866 RESTRICT_ASM_ADDRT 1867 [addr0]"=&r"(addr[0]) 1868 : [pix]"r"(pix), [stride]"r"((mips_reg)stride), 1869 [alpha]"r"(alpha), [beta]"r"(beta), 1870 [tc0]"r"(tc0), [ff_pb_1]"f"(ff_pb_1.f), 1871 [ff_pb_3]"f"(ff_pb_3.f), [ff_pb_A1]"f"(ff_pb_A1.f) 1872 : "memory" 1873 ); 1874} 1875 1876void ff_deblock_v_chroma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, 1877 int beta) 1878{ 1879 double ftmp[9]; 1880 mips_reg addr[1]; 1881 DECLARE_VAR_ALL64; 1882 DECLARE_VAR_ADDRT; 1883 1884 __asm__ volatile ( 1885 "addi %[alpha], %[alpha], -0x01 \n\t" 1886 "addi %[beta], %[beta], -0x01 \n\t" 1887 "or %[addr0], $0, %[pix] \n\t" 1888 PTR_SUBU "%[addr0], %[addr0], %[stride] \n\t" 1889 PTR_SUBU "%[addr0], %[addr0], %[stride] \n\t" 1890 MMI_LDC1(%[ftmp1], %[addr0], 0x00) 1891 MMI_LDXC1(%[ftmp2], %[addr0], %[stride], 0x00) 1892 MMI_LDC1(%[ftmp3], %[pix], 0x00) 1893 MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00) 1894 1895 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 1896 "mtc1 %[alpha], %[ftmp5] \n\t" 1897 "mtc1 %[beta], %[ftmp6] \n\t" 1898 "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" 1899 "pshufh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 1900 "packushb %[ftmp5], %[ftmp5], %[ftmp5] \n\t" 1901 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 1902 "psubusb %[ftmp7], %[ftmp3], %[ftmp2] \n\t" 1903 "psubusb %[ftmp8], %[ftmp2], %[ftmp3] \n\t" 1904 "por %[ftmp8], %[ftmp8], %[ftmp7] \n\t" 1905 "psubusb %[ftmp8], %[ftmp8], %[ftmp5] \n\t" 1906 "psubusb %[ftmp7], %[ftmp2], %[ftmp1] \n\t" 1907 "psubusb %[ftmp5], %[ftmp1], %[ftmp2] \n\t" 1908 "por %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 1909 "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t" 1910 "por %[ftmp8], %[ftmp8], %[ftmp5] \n\t" 1911 "psubusb %[ftmp7], %[ftmp3], %[ftmp4] \n\t" 1912 "psubusb %[ftmp5], %[ftmp4], %[ftmp3] \n\t" 1913 "por %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 1914 "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t" 1915 "por %[ftmp8], %[ftmp8], %[ftmp5] \n\t" 1916 "pxor %[ftmp7], %[ftmp7], %[ftmp7] \n\t" 1917 "pcmpeqb %[ftmp8], %[ftmp8], %[ftmp7] \n\t" 1918 "mov.d %[ftmp6], %[ftmp2] \n\t" 1919 "mov.d %[ftmp7], %[ftmp3] \n\t" 1920 "pxor %[ftmp5], %[ftmp2], %[ftmp4] \n\t" 1921 "pand %[ftmp5], %[ftmp5], %[ff_pb_1] \n\t" 1922 "pavgb %[ftmp2], %[ftmp2], %[ftmp4] \n\t" 1923 "psubusb %[ftmp2], %[ftmp2], %[ftmp5] \n\t" 1924 "pavgb %[ftmp2], %[ftmp2], %[ftmp1] \n\t" 1925 "pxor %[ftmp5], %[ftmp3], %[ftmp1] \n\t" 1926 "pand %[ftmp5], %[ftmp5], %[ff_pb_1] \n\t" 1927 "pavgb %[ftmp3], %[ftmp3], %[ftmp1] \n\t" 1928 "psubusb %[ftmp3], %[ftmp3], %[ftmp5] \n\t" 1929 "pavgb %[ftmp3], %[ftmp3], %[ftmp4] \n\t" 1930 "psubb %[ftmp2], %[ftmp2], %[ftmp6] \n\t" 1931 "psubb %[ftmp3], %[ftmp3], %[ftmp7] \n\t" 1932 "pand %[ftmp2], %[ftmp2], %[ftmp8] \n\t" 1933 "pand %[ftmp3], %[ftmp3], %[ftmp8] \n\t" 1934 "paddb %[ftmp2], %[ftmp2], %[ftmp6] \n\t" 1935 "paddb %[ftmp3], %[ftmp3], %[ftmp7] \n\t" 1936 1937 MMI_SDXC1(%[ftmp2], %[addr0], %[stride], 0x00) 1938 MMI_SDC1(%[ftmp3], %[pix], 0x00) 1939 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 1940 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 1941 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 1942 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 1943 [ftmp8]"=&f"(ftmp[8]), 1944 RESTRICT_ASM_ALL64 1945 RESTRICT_ASM_ADDRT 1946 [addr0]"=&r"(addr[0]) 1947 : [pix]"r"(pix), [stride]"r"((mips_reg)stride), 1948 [alpha]"r"(alpha), [beta]"r"(beta), 1949 [ff_pb_1]"f"(ff_pb_1.f) 1950 : "memory" 1951 ); 1952} 1953 1954void ff_deblock_h_chroma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, 1955 int8_t *tc0) 1956{ 1957 double ftmp[11]; 1958 mips_reg addr[6]; 1959 DECLARE_VAR_LOW32; 1960 1961 __asm__ volatile ( 1962 "addi %[alpha], %[alpha], -0x01 \n\t" 1963 "addi %[beta], %[beta], -0x01 \n\t" 1964 PTR_ADDU "%[addr0], %[stride], %[stride] \n\t" 1965 PTR_ADDI "%[pix], %[pix], -0x02 \n\t" 1966 PTR_ADDU "%[addr1], %[addr0], %[stride] \n\t" 1967 PTR_ADDU "%[addr2], %[addr0], %[addr0] \n\t" 1968 "or %[addr5], $0, %[pix] \n\t" 1969 PTR_ADDU "%[pix], %[pix], %[addr1] \n\t" 1970 MMI_ULWC1(%[ftmp0], %[addr5], 0x00) 1971 PTR_ADDU "%[addr3], %[addr5], %[stride] \n\t" 1972 MMI_ULWC1(%[ftmp2], %[addr3], 0x00) 1973 PTR_ADDU "%[addr4], %[addr5], %[addr0] \n\t" 1974 MMI_ULWC1(%[ftmp1], %[addr4], 0x00) 1975 MMI_ULWC1(%[ftmp3], %[pix], 0x00) 1976 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 1977 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 1978 PTR_ADDU "%[addr3], %[pix], %[stride] \n\t" 1979 "punpckhhw %[ftmp2], %[ftmp0], %[ftmp1] \n\t" 1980 "punpcklhw %[ftmp0], %[ftmp0], %[ftmp1] \n\t" 1981 MMI_ULWC1(%[ftmp4], %[addr3], 0x00) 1982 PTR_ADDU "%[addr4], %[pix], %[addr0] \n\t" 1983 MMI_ULWC1(%[ftmp6], %[addr4], 0x00) 1984 PTR_ADDU "%[addr3], %[pix], %[addr1] \n\t" 1985 MMI_ULWC1(%[ftmp5], %[addr3], 0x00) 1986 PTR_ADDU "%[addr4], %[pix], %[addr2] \n\t" 1987 MMI_ULWC1(%[ftmp7], %[addr4], 0x00) 1988 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp6] \n\t" 1989 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 1990 "mov.d %[ftmp6], %[ftmp4] \n\t" 1991 "punpcklhw %[ftmp4], %[ftmp4], %[ftmp5] \n\t" 1992 "punpckhhw %[ftmp6], %[ftmp6], %[ftmp5] \n\t" 1993 "punpckhwd %[ftmp1], %[ftmp0], %[ftmp4] \n\t" 1994 "punpckhwd %[ftmp3], %[ftmp2], %[ftmp6] \n\t" 1995 "punpcklwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t" 1996 "punpcklwd %[ftmp2], %[ftmp2], %[ftmp6] \n\t" 1997 "mov.d %[ftmp9], %[ftmp0] \n\t" 1998 "mov.d %[ftmp10], %[ftmp3] \n\t" 1999 2000 "pxor %[ftmp8], %[ftmp8], %[ftmp8] \n\t" 2001 "mtc1 %[alpha], %[ftmp4] \n\t" 2002 "mtc1 %[beta], %[ftmp5] \n\t" 2003 "pshufh %[ftmp4], %[ftmp4], %[ftmp8] \n\t" 2004 "pshufh %[ftmp5], %[ftmp5], %[ftmp8] \n\t" 2005 "packushb %[ftmp4], %[ftmp4], %[ftmp4] \n\t" 2006 "packushb %[ftmp5], %[ftmp5], %[ftmp5] \n\t" 2007 "psubusb %[ftmp6], %[ftmp2], %[ftmp1] \n\t" 2008 "psubusb %[ftmp7], %[ftmp1], %[ftmp2] \n\t" 2009 "por %[ftmp7], %[ftmp7], %[ftmp6] \n\t" 2010 "psubusb %[ftmp7], %[ftmp7], %[ftmp4] \n\t" 2011 "psubusb %[ftmp6], %[ftmp1], %[ftmp0] \n\t" 2012 "psubusb %[ftmp4], %[ftmp0], %[ftmp1] \n\t" 2013 "por %[ftmp4], %[ftmp4], %[ftmp6] \n\t" 2014 "psubusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t" 2015 "por %[ftmp7], %[ftmp7], %[ftmp4] \n\t" 2016 "psubusb %[ftmp6], %[ftmp2], %[ftmp3] \n\t" 2017 "psubusb %[ftmp4], %[ftmp3], %[ftmp2] \n\t" 2018 "por %[ftmp4], %[ftmp4], %[ftmp6] \n\t" 2019 "psubusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t" 2020 "por %[ftmp7], %[ftmp7], %[ftmp4] \n\t" 2021 "pxor %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 2022 "pcmpeqb %[ftmp7], %[ftmp7], %[ftmp6] \n\t" 2023 MMI_ULWC1(%[ftmp6], %[tc0], 0x00) 2024 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 2025 "pand %[ftmp7], %[ftmp7], %[ftmp6] \n\t" 2026 "pcmpeqb %[ftmp4], %[ftmp4], %[ftmp4] \n\t" 2027 "pxor %[ftmp5], %[ftmp1], %[ftmp2] \n\t" 2028 "pxor %[ftmp3], %[ftmp3], %[ftmp4] \n\t" 2029 "pand %[ftmp5], %[ftmp5], %[ff_pb_1] \n\t" 2030 "pavgb %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 2031 "pxor %[ftmp4], %[ftmp4], %[ftmp1] \n\t" 2032 "pavgb %[ftmp3], %[ftmp3], %[ff_pb_3] \n\t" 2033 "pavgb %[ftmp4], %[ftmp4], %[ftmp2] \n\t" 2034 "pavgb %[ftmp3], %[ftmp3], %[ftmp5] \n\t" 2035 "paddusb %[ftmp3], %[ftmp3], %[ftmp4] \n\t" 2036 "psubusb %[ftmp6], %[ff_pb_A1], %[ftmp3] \n\t" 2037 "psubusb %[ftmp3], %[ftmp3], %[ff_pb_A1] \n\t" 2038 "pminub %[ftmp6], %[ftmp6], %[ftmp7] \n\t" 2039 "pminub %[ftmp3], %[ftmp3], %[ftmp7] \n\t" 2040 "psubusb %[ftmp1], %[ftmp1], %[ftmp6] \n\t" 2041 "psubusb %[ftmp2], %[ftmp2], %[ftmp3] \n\t" 2042 "paddusb %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 2043 "paddusb %[ftmp2], %[ftmp2], %[ftmp6] \n\t" 2044 2045 "punpckhwd %[ftmp4], %[ftmp9], %[ftmp9] \n\t" 2046 "punpckhwd %[ftmp5], %[ftmp1], %[ftmp1] \n\t" 2047 "punpckhwd %[ftmp6], %[ftmp2], %[ftmp2] \n\t" 2048 "punpcklbh %[ftmp0], %[ftmp9], %[ftmp1] \n\t" 2049 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp10] \n\t" 2050 "punpcklhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t" 2051 "punpckhhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 2052 MMI_USWC1(%[ftmp1], %[addr5], 0x00) 2053 PTR_ADDU "%[addr3], %[addr5], %[stride] \n\t" 2054 "punpckhwd %[ftmp1], %[ftmp1], %[ftmp1] \n\t" 2055 MMI_USWC1(%[ftmp1], %[addr3], 0x00) 2056 PTR_ADDU "%[addr4], %[addr5], %[addr0] \n\t" 2057 MMI_USWC1(%[ftmp0], %[addr4], 0x00) 2058 "punpckhwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 2059 "punpckhwd %[ftmp3], %[ftmp10], %[ftmp10] \n\t" 2060 MMI_USWC1(%[ftmp0], %[pix], 0x00) 2061 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t" 2062 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" 2063 PTR_ADDU "%[addr3], %[pix], %[stride] \n\t" 2064 "punpcklhw %[ftmp5], %[ftmp4], %[ftmp6] \n\t" 2065 "punpckhhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t" 2066 MMI_USWC1(%[ftmp5], %[addr3], 0x00) 2067 "punpckhwd %[ftmp5], %[ftmp5], %[ftmp5] \n\t" 2068 PTR_ADDU "%[addr3], %[pix], %[addr0] \n\t" 2069 PTR_ADDU "%[addr4], %[pix], %[addr1] \n\t" 2070 MMI_USWC1(%[ftmp5], %[addr3], 0x00) 2071 MMI_USWC1(%[ftmp4], %[addr4], 0x00) 2072 PTR_ADDU "%[addr3], %[pix], %[addr2] \n\t" 2073 "punpckhwd %[ftmp9], %[ftmp4], %[ftmp4] \n\t" 2074 MMI_USWC1(%[ftmp9], %[addr3], 0x00) 2075 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 2076 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 2077 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 2078 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 2079 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), 2080 [ftmp10]"=&f"(ftmp[10]), 2081 RESTRICT_ASM_LOW32 2082 [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]), 2083 [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]), 2084 [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]), 2085 [pix]"+&r"(pix) 2086 : [alpha]"r"(alpha), [beta]"r"(beta), 2087 [stride]"r"((mips_reg)stride), [tc0]"r"(tc0), 2088 [ff_pb_1]"f"(ff_pb_1.f), [ff_pb_3]"f"(ff_pb_3.f), 2089 [ff_pb_A1]"f"(ff_pb_A1.f) 2090 : "memory" 2091 ); 2092} 2093 2094void ff_deblock_h_chroma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, 2095 int beta) 2096{ 2097 double ftmp[11]; 2098 mips_reg addr[6]; 2099 DECLARE_VAR_LOW32; 2100 2101 __asm__ volatile ( 2102 "addi %[alpha], %[alpha], -0x01 \n\t" 2103 "addi %[beta], %[beta], -0x01 \n\t" 2104 PTR_ADDU "%[addr0], %[stride], %[stride] \n\t" 2105 PTR_ADDI "%[pix], %[pix], -0x02 \n\t" 2106 PTR_ADDU "%[addr1], %[addr0], %[stride] \n\t" 2107 PTR_ADDU "%[addr2], %[addr0], %[addr0] \n\t" 2108 "or %[addr5], $0, %[pix] \n\t" 2109 PTR_ADDU "%[pix], %[pix], %[addr1] \n\t" 2110 MMI_ULWC1(%[ftmp0], %[addr5], 0x00) 2111 PTR_ADDU "%[addr3], %[addr5], %[stride] \n\t" 2112 MMI_ULWC1(%[ftmp2], %[addr3], 0x00) 2113 PTR_ADDU "%[addr4], %[addr5], %[addr0] \n\t" 2114 MMI_ULWC1(%[ftmp1], %[addr4], 0x00) 2115 MMI_ULWC1(%[ftmp3], %[pix], 0x00) 2116 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 2117 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 2118 PTR_ADDU "%[addr3], %[pix], %[stride] \n\t" 2119 "punpckhhw %[ftmp2], %[ftmp0], %[ftmp1] \n\t" 2120 "punpcklhw %[ftmp0], %[ftmp0], %[ftmp1] \n\t" 2121 MMI_ULWC1(%[ftmp4], %[addr3], 0x00) 2122 PTR_ADDU "%[addr4], %[pix], %[addr0] \n\t" 2123 MMI_ULWC1(%[ftmp6], %[addr4], 0x00) 2124 PTR_ADDU "%[addr3], %[pix], %[addr1] \n\t" 2125 MMI_ULWC1(%[ftmp5], %[addr3], 0x00) 2126 PTR_ADDU "%[addr4], %[pix], %[addr2] \n\t" 2127 MMI_ULWC1(%[ftmp7], %[addr4], 0x00) 2128 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp6] \n\t" 2129 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 2130 "mov.d %[ftmp6], %[ftmp4] \n\t" 2131 "punpcklhw %[ftmp4], %[ftmp4], %[ftmp5] \n\t" 2132 "punpckhhw %[ftmp6], %[ftmp6], %[ftmp5] \n\t" 2133 "punpckhwd %[ftmp1], %[ftmp0], %[ftmp4] \n\t" 2134 "punpckhwd %[ftmp3], %[ftmp2], %[ftmp6] \n\t" 2135 "punpcklwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t" 2136 "punpcklwd %[ftmp2], %[ftmp2], %[ftmp6] \n\t" 2137 2138 "pxor %[ftmp8], %[ftmp8], %[ftmp8] \n\t" 2139 "mtc1 %[alpha], %[ftmp4] \n\t" 2140 "mtc1 %[beta], %[ftmp5] \n\t" 2141 "pshufh %[ftmp4], %[ftmp4], %[ftmp8] \n\t" 2142 "pshufh %[ftmp5], %[ftmp5], %[ftmp8] \n\t" 2143 "packushb %[ftmp4], %[ftmp4], %[ftmp4] \n\t" 2144 "packushb %[ftmp5], %[ftmp5], %[ftmp5] \n\t" 2145 "psubusb %[ftmp6], %[ftmp2], %[ftmp1] \n\t" 2146 "psubusb %[ftmp7], %[ftmp1], %[ftmp2] \n\t" 2147 "por %[ftmp7], %[ftmp7], %[ftmp6] \n\t" 2148 "psubusb %[ftmp7], %[ftmp7], %[ftmp4] \n\t" 2149 "psubusb %[ftmp6], %[ftmp1], %[ftmp0] \n\t" 2150 "psubusb %[ftmp4], %[ftmp0], %[ftmp1] \n\t" 2151 "por %[ftmp4], %[ftmp4], %[ftmp6] \n\t" 2152 "psubusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t" 2153 "por %[ftmp7], %[ftmp7], %[ftmp4] \n\t" 2154 "psubusb %[ftmp6], %[ftmp2], %[ftmp3] \n\t" 2155 "psubusb %[ftmp4], %[ftmp3], %[ftmp2] \n\t" 2156 "por %[ftmp4], %[ftmp4], %[ftmp6] \n\t" 2157 "psubusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t" 2158 "por %[ftmp7], %[ftmp7], %[ftmp4] \n\t" 2159 "pxor %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 2160 "pcmpeqb %[ftmp7], %[ftmp7], %[ftmp6] \n\t" 2161 "mov.d %[ftmp5], %[ftmp1] \n\t" 2162 "mov.d %[ftmp6], %[ftmp2] \n\t" 2163 "pxor %[ftmp4], %[ftmp1], %[ftmp3] \n\t" 2164 "pand %[ftmp4], %[ftmp4], %[ff_pb_1] \n\t" 2165 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 2166 "psubusb %[ftmp1], %[ftmp1], %[ftmp4] \n\t" 2167 "pavgb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 2168 "pxor %[ftmp4], %[ftmp2], %[ftmp0] \n\t" 2169 "pand %[ftmp4], %[ftmp4], %[ff_pb_1] \n\t" 2170 "pavgb %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 2171 "psubusb %[ftmp2], %[ftmp2], %[ftmp4] \n\t" 2172 "pavgb %[ftmp2], %[ftmp2], %[ftmp3] \n\t" 2173 "psubb %[ftmp1], %[ftmp1], %[ftmp5] \n\t" 2174 "psubb %[ftmp2], %[ftmp2], %[ftmp6] \n\t" 2175 "pand %[ftmp1], %[ftmp1], %[ftmp7] \n\t" 2176 "pand %[ftmp2], %[ftmp2], %[ftmp7] \n\t" 2177 "paddb %[ftmp1], %[ftmp1], %[ftmp5] \n\t" 2178 "paddb %[ftmp2], %[ftmp2], %[ftmp6] \n\t" 2179 2180 "punpckhwd %[ftmp4], %[ftmp0], %[ftmp0] \n\t" 2181 "punpckhwd %[ftmp5], %[ftmp1], %[ftmp1] \n\t" 2182 "punpckhwd %[ftmp6], %[ftmp2], %[ftmp2] \n\t" 2183 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t" 2184 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t" 2185 "punpcklhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t" 2186 "punpckhhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 2187 MMI_USWC1(%[ftmp1], %[addr5], 0x00) 2188 PTR_ADDU "%[addr3], %[addr5], %[stride] \n\t" 2189 "punpckhwd %[ftmp1], %[ftmp1], %[ftmp1] \n\t" 2190 PTR_ADDU "%[addr4], %[addr5], %[addr0] \n\t" 2191 MMI_USWC1(%[ftmp1], %[addr3], 0x00) 2192 MMI_USWC1(%[ftmp0], %[addr4], 0x00) 2193 "punpckhwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 2194 "punpckhwd %[ftmp3], %[ftmp3], %[ftmp3] \n\t" 2195 MMI_USWC1(%[ftmp0], %[pix], 0x00) 2196 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t" 2197 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" 2198 PTR_ADDU "%[addr3], %[pix], %[stride] \n\t" 2199 "punpcklhw %[ftmp5], %[ftmp4], %[ftmp6] \n\t" 2200 "punpckhhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t" 2201 MMI_USWC1(%[ftmp5], %[addr3], 0x00) 2202 "punpckhwd %[ftmp5], %[ftmp5], %[ftmp5] \n\t" 2203 PTR_ADDU "%[addr3], %[pix], %[addr0] \n\t" 2204 PTR_ADDU "%[addr4], %[pix], %[addr1] \n\t" 2205 MMI_USWC1(%[ftmp5], %[addr3], 0x00) 2206 PTR_ADDU "%[addr3], %[pix], %[addr2] \n\t" 2207 MMI_USWC1(%[ftmp4], %[addr4], 0x00) 2208 "punpckhwd %[ftmp9], %[ftmp4], %[ftmp4] \n\t" 2209 MMI_USWC1(%[ftmp9], %[addr3], 0x00) 2210 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 2211 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 2212 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 2213 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 2214 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), 2215 [ftmp10]"=&f"(ftmp[10]), 2216 RESTRICT_ASM_LOW32 2217 [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]), 2218 [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]), 2219 [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]), 2220 [pix]"+&r"(pix) 2221 : [alpha]"r"(alpha), [beta]"r"(beta), 2222 [stride]"r"((mips_reg)stride), [ff_pb_1]"f"(ff_pb_1.f) 2223 : "memory" 2224 ); 2225} 2226 2227void ff_deblock_v_luma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, 2228 int8_t *tc0) 2229{ 2230 if ((tc0[0] & tc0[1]) >= 0) 2231 ff_deblock_v8_luma_8_mmi(pix + 0, stride, alpha, beta, tc0); 2232 if ((tc0[2] & tc0[3]) >= 0) 2233 ff_deblock_v8_luma_8_mmi(pix + 8, stride, alpha, beta, tc0 + 2); 2234} 2235 2236void ff_deblock_v_luma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, 2237 int beta) 2238{ 2239 deblock_v8_luma_intra_8_mmi(pix + 0, stride, alpha, beta); 2240 deblock_v8_luma_intra_8_mmi(pix + 8, stride, alpha, beta); 2241} 2242 2243void ff_deblock_h_luma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, 2244 int8_t *tc0) 2245{ 2246 DECLARE_ALIGNED(8, const uint64_t, stack[0x0d]); 2247 double ftmp[9]; 2248 mips_reg addr[8]; 2249 DECLARE_VAR_LOW32; 2250 DECLARE_VAR_ALL64; 2251 2252 __asm__ volatile ( 2253 PTR_ADDU "%[addr0], %[stride], %[stride] \n\t" 2254 PTR_ADDI "%[addr1], %[pix], -0x4 \n\t" 2255 PTR_ADDU "%[addr2], %[stride], %[addr0] \n\t" 2256 MMI_ULDC1(%[ftmp0], %[addr1], 0x00) 2257 PTR_ADDU "%[addr3], %[addr1], %[stride] \n\t" 2258 PTR_ADDU "%[addr4], %[addr1], %[addr2] \n\t" 2259 MMI_ULDC1(%[ftmp1], %[addr3], 0x00) 2260 PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t" 2261 MMI_ULDC1(%[ftmp2], %[addr5], 0x00) 2262 MMI_ULDC1(%[ftmp3], %[addr4], 0x00) 2263 PTR_ADDU "%[addr3], %[addr4], %[stride] \n\t" 2264 MMI_ULDC1(%[ftmp4], %[addr3], 0x00) 2265 PTR_ADDU "%[addr5], %[addr4], %[addr0] \n\t" 2266 MMI_ULDC1(%[ftmp5], %[addr5], 0x00) 2267 PTR_ADDU "%[addr3], %[addr4], %[addr2] \n\t" 2268 MMI_ULDC1(%[ftmp6], %[addr3], 0x00) 2269 PTR_ADDU "%[addr6], %[addr0], %[addr0] \n\t" 2270 "punpckhbh %[ftmp7], %[ftmp0], %[ftmp1] \n\t" 2271 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t" 2272 "punpckhbh %[ftmp1], %[ftmp2], %[ftmp3] \n\t" 2273 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t" 2274 "punpckhbh %[ftmp3], %[ftmp4], %[ftmp5] \n\t" 2275 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t" 2276 PTR_ADDU "%[addr3], %[addr4], %[addr6] \n\t" 2277 MMI_SDC1(%[ftmp1], %[stack], 0x10) 2278 MMI_ULDC1(%[ftmp8], %[addr3], 0x00) 2279 PTR_ADDU "%[addr7], %[addr6], %[addr6] \n\t" 2280 "punpckhbh %[ftmp5], %[ftmp6], %[ftmp8] \n\t" 2281 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 2282 "punpckhhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t" 2283 "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 2284 "punpckhhw %[ftmp2], %[ftmp4], %[ftmp6] \n\t" 2285 "punpcklhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t" 2286 MMI_LDC1(%[ftmp8], %[stack], 0x10) 2287 "punpckhwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t" 2288 MMI_SDC1(%[ftmp0], %[stack], 0x00) 2289 "punpckhhw %[ftmp6], %[ftmp7], %[ftmp8] \n\t" 2290 "punpcklhw %[ftmp7], %[ftmp7], %[ftmp8] \n\t" 2291 "punpckhhw %[ftmp0], %[ftmp3], %[ftmp5] \n\t" 2292 "punpcklhw %[ftmp3], %[ftmp3], %[ftmp5] \n\t" 2293 "punpcklwd %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 2294 "punpckhwd %[ftmp5], %[ftmp7], %[ftmp3] \n\t" 2295 "punpcklwd %[ftmp7], %[ftmp7], %[ftmp3] \n\t" 2296 "punpckhwd %[ftmp3], %[ftmp1], %[ftmp2] \n\t" 2297 "punpcklwd %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 2298 MMI_SDC1(%[ftmp1], %[stack], 0x10) 2299 MMI_SDC1(%[ftmp3], %[stack], 0x20) 2300 MMI_SDC1(%[ftmp7], %[stack], 0x30) 2301 MMI_SDC1(%[ftmp5], %[stack], 0x40) 2302 MMI_SDC1(%[ftmp6], %[stack], 0x50) 2303 PTR_ADDU "%[addr1], %[addr1], %[addr7] \n\t" 2304 PTR_ADDU "%[addr4], %[addr4], %[addr7] \n\t" 2305 MMI_ULDC1(%[ftmp0], %[addr1], 0x00) 2306 PTR_ADDU "%[addr3], %[addr1], %[stride] \n\t" 2307 MMI_ULDC1(%[ftmp1], %[addr3], 0x00) 2308 PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t" 2309 MMI_ULDC1(%[ftmp2], %[addr5], 0x00) 2310 MMI_ULDC1(%[ftmp3], %[addr4], 0x00) 2311 PTR_ADDU "%[addr3], %[addr4], %[stride] \n\t" 2312 MMI_ULDC1(%[ftmp4], %[addr3], 0x00) 2313 PTR_ADDU "%[addr5], %[addr4], %[addr0] \n\t" 2314 MMI_ULDC1(%[ftmp5], %[addr5], 0x00) 2315 PTR_ADDU "%[addr3], %[addr4], %[addr2] \n\t" 2316 MMI_ULDC1(%[ftmp6], %[addr3], 0x00) 2317 "punpckhbh %[ftmp7], %[ftmp0], %[ftmp1] \n\t" 2318 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t" 2319 "punpckhbh %[ftmp1], %[ftmp2], %[ftmp3] \n\t" 2320 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t" 2321 "punpckhbh %[ftmp3], %[ftmp4], %[ftmp5] \n\t" 2322 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t" 2323 PTR_ADDU "%[addr3], %[addr4], %[addr6] \n\t" 2324 MMI_SDC1(%[ftmp1], %[stack], 0x18) 2325 MMI_ULDC1(%[ftmp8], %[addr3], 0x00) 2326 "punpckhhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t" 2327 "punpckhbh %[ftmp5], %[ftmp6], %[ftmp8] \n\t" 2328 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 2329 "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 2330 "punpckhhw %[ftmp2], %[ftmp4], %[ftmp6] \n\t" 2331 "punpcklhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t" 2332 "punpckhwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t" 2333 MMI_LDC1(%[ftmp8], %[stack], 0x18) 2334 MMI_SDC1(%[ftmp0], %[stack], 0x08) 2335 "punpckhhw %[ftmp6], %[ftmp7], %[ftmp8] \n\t" 2336 "punpcklhw %[ftmp7], %[ftmp7], %[ftmp8] \n\t" 2337 "punpckhhw %[ftmp0], %[ftmp3], %[ftmp5] \n\t" 2338 "punpcklhw %[ftmp3], %[ftmp3], %[ftmp5] \n\t" 2339 "punpckhwd %[ftmp5], %[ftmp7], %[ftmp3] \n\t" 2340 "punpcklwd %[ftmp7], %[ftmp7], %[ftmp3] \n\t" 2341 "punpckhwd %[ftmp3], %[ftmp1], %[ftmp2] \n\t" 2342 "punpcklwd %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 2343 "punpcklwd %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 2344 MMI_SDC1(%[ftmp1], %[stack], 0x18) 2345 MMI_SDC1(%[ftmp3], %[stack], 0x28) 2346 MMI_SDC1(%[ftmp7], %[stack], 0x38) 2347 MMI_SDC1(%[ftmp5], %[stack], 0x48) 2348 MMI_SDC1(%[ftmp6], %[stack], 0x58) 2349 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 2350 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 2351 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 2352 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 2353 [ftmp8]"=&f"(ftmp[8]), 2354 RESTRICT_ASM_ALL64 2355 [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]), 2356 [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]), 2357 [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]), 2358 [addr6]"=&r"(addr[6]), [addr7]"=&r"(addr[7]) 2359 : [pix]"r"(pix), [stride]"r"((mips_reg)stride), 2360 [stack]"r"(stack) 2361 : "memory" 2362 ); 2363 2364 ff_deblock_v_luma_8_mmi((uint8_t *) &stack[6], 0x10, alpha, beta, tc0); 2365 2366 __asm__ volatile ( 2367 PTR_ADDU "%[addr0], %[stride], %[stride] \n\t" 2368 PTR_ADDI "%[addr1], %[pix], -0x02 \n\t" 2369 PTR_ADDU "%[addr6], %[addr0], %[addr0] \n\t" 2370 PTR_ADDU "%[addr2], %[addr0], %[stride] \n\t" 2371 PTR_ADDU "%[addr7], %[addr6], %[addr6] \n\t" 2372 PTR_ADDU "%[addr4], %[addr1], %[addr2] \n\t" 2373 MMI_LDC1(%[ftmp0], %[stack], 0x10) 2374 MMI_LDC1(%[ftmp1], %[stack], 0x20) 2375 MMI_LDC1(%[ftmp2], %[stack], 0x30) 2376 MMI_LDC1(%[ftmp3], %[stack], 0x40) 2377 "punpckhwd %[ftmp4], %[ftmp0], %[ftmp0] \n\t" 2378 "punpckhwd %[ftmp5], %[ftmp1], %[ftmp1] \n\t" 2379 "punpckhwd %[ftmp6], %[ftmp2], %[ftmp2] \n\t" 2380 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t" 2381 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t" 2382 "punpcklhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t" 2383 "punpckhhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 2384 MMI_USWC1(%[ftmp1], %[addr1], 0x00) 2385 PTR_ADDU "%[addr3], %[addr1], %[stride] \n\t" 2386 "punpckhwd %[ftmp1], %[ftmp1], %[ftmp1] \n\t" 2387 PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t" 2388 MMI_USWC1(%[ftmp1], %[addr3], 0x00) 2389 MMI_USWC1(%[ftmp0], %[addr5], 0x00) 2390 "punpckhwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 2391 "punpckhwd %[ftmp3], %[ftmp3], %[ftmp3] \n\t" 2392 MMI_USWC1(%[ftmp0], %[addr4], 0x00) 2393 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t" 2394 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" 2395 "punpcklhw %[ftmp5], %[ftmp4], %[ftmp6] \n\t" 2396 PTR_ADDU "%[addr3], %[addr4], %[stride] \n\t" 2397 "punpckhhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t" 2398 MMI_USWC1(%[ftmp5], %[addr3], 0x00) 2399 PTR_ADDU "%[addr3], %[addr4], %[addr0] \n\t" 2400 "punpckhwd %[ftmp5], %[ftmp5], %[ftmp5] \n\t" 2401 PTR_ADDU "%[addr5], %[addr4], %[addr2] \n\t" 2402 MMI_USWC1(%[ftmp5], %[addr3], 0x00) 2403 MMI_USWC1(%[ftmp4], %[addr5], 0x00) 2404 PTR_ADDU "%[addr3], %[addr4], %[addr6] \n\t" 2405 "punpckhwd %[ftmp4], %[ftmp4], %[ftmp4] \n\t" 2406 PTR_ADDU "%[addr1], %[addr1], %[addr7] \n\t" 2407 MMI_USWC1(%[ftmp4], %[addr3], 0x00) 2408 PTR_ADDU "%[addr4], %[addr4], %[addr7] \n\t" 2409 MMI_LDC1(%[ftmp0], %[stack], 0x18) 2410 MMI_LDC1(%[ftmp1], %[stack], 0x28) 2411 MMI_LDC1(%[ftmp2], %[stack], 0x38) 2412 MMI_LDC1(%[ftmp3], %[stack], 0x48) 2413 PTR_ADDU "%[addr0], %[stride], %[stride] \n\t" 2414 "punpckhwd %[ftmp4], %[ftmp0], %[ftmp0] \n\t" 2415 PTR_ADDU "%[addr6], %[addr0], %[addr0] \n\t" 2416 "punpckhwd %[ftmp5], %[ftmp1], %[ftmp1] \n\t" 2417 "punpckhwd %[ftmp6], %[ftmp2], %[ftmp2] \n\t" 2418 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t" 2419 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t" 2420 PTR_ADDU "%[addr3], %[addr1], %[stride] \n\t" 2421 "punpcklhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t" 2422 "punpckhhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 2423 MMI_USWC1(%[ftmp1], %[addr1], 0x00) 2424 "punpckhwd %[ftmp1], %[ftmp1], %[ftmp1] \n\t" 2425 PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t" 2426 MMI_USWC1(%[ftmp1], %[addr3], 0x00) 2427 MMI_USWC1(%[ftmp0], %[addr5], 0x00) 2428 "punpckhwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 2429 "punpckhwd %[ftmp3], %[ftmp3], %[ftmp3] \n\t" 2430 MMI_USWC1(%[ftmp0], %[addr4], 0x00) 2431 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t" 2432 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" 2433 PTR_ADDU "%[addr3], %[addr4], %[stride] \n\t" 2434 "punpcklhw %[ftmp5], %[ftmp4], %[ftmp6] \n\t" 2435 "punpckhhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t" 2436 MMI_USWC1(%[ftmp5], %[addr3], 0x00) 2437 PTR_ADDU "%[addr3], %[addr4], %[addr0] \n\t" 2438 "punpckhwd %[ftmp5], %[ftmp5], %[ftmp5] \n\t" 2439 PTR_ADDU "%[addr5], %[addr4], %[addr2] \n\t" 2440 MMI_USWC1(%[ftmp5], %[addr3], 0x00) 2441 MMI_USWC1(%[ftmp4], %[addr5], 0x00) 2442 PTR_ADDU "%[addr3], %[addr4], %[addr6] \n\t" 2443 "punpckhwd %[ftmp4], %[ftmp4], %[ftmp4] \n\t" 2444 MMI_USWC1(%[ftmp4], %[addr3], 0x00) 2445 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 2446 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 2447 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 2448 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 2449 [ftmp8]"=&f"(ftmp[8]), 2450 RESTRICT_ASM_LOW32 2451 RESTRICT_ASM_ALL64 2452 [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]), 2453 [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]), 2454 [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]), 2455 [addr6]"=&r"(addr[6]), [addr7]"=&r"(addr[7]) 2456 : [pix]"r"(pix), [stride]"r"((mips_reg)stride), 2457 [stack]"r"(stack) 2458 : "memory" 2459 ); 2460} 2461 2462void ff_deblock_h_luma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, 2463 int beta) 2464{ 2465 DECLARE_ALIGNED(8, const uint64_t, ptmp[0x11]); 2466 DECLARE_ALIGNED(8, const uint64_t, pdat[0x04]); 2467 double ftmp[9]; 2468 mips_reg addr[7]; 2469 DECLARE_VAR_ALL64; 2470 2471 __asm__ volatile ( 2472 PTR_ADDU "%[addr0], %[stride], %[stride] \n\t" 2473 PTR_ADDI "%[addr1], %[pix], -0x04 \n\t" 2474 PTR_ADDU "%[addr2], %[addr0], %[stride] \n\t" 2475 PTR_ADDU "%[addr3], %[addr0], %[addr0] \n\t" 2476 PTR_ADDU "%[addr4], %[addr1], %[addr2] \n\t" 2477 PTR_ADDU "%[addr5], %[addr1], %[stride] \n\t" 2478 MMI_ULDC1(%[ftmp0], %[addr1], 0x00) 2479 PTR_ADDU "%[addr6], %[addr1], %[addr0] \n\t" 2480 MMI_ULDC1(%[ftmp1], %[addr5], 0x00) 2481 MMI_ULDC1(%[ftmp2], %[addr6], 0x00) 2482 PTR_ADDU "%[addr5], %[addr4], %[stride] \n\t" 2483 MMI_ULDC1(%[ftmp3], %[addr4], 0x00) 2484 PTR_ADDU "%[addr6], %[addr4], %[addr0] \n\t" 2485 MMI_ULDC1(%[ftmp4], %[addr5], 0x00) 2486 PTR_ADDU "%[addr5], %[addr4], %[addr2] \n\t" 2487 MMI_ULDC1(%[ftmp5], %[addr6], 0x00) 2488 MMI_ULDC1(%[ftmp6], %[addr5], 0x00) 2489 PTR_ADDU "%[addr5], %[addr4], %[addr3] \n\t" 2490 "punpckhbh %[ftmp7], %[ftmp0], %[ftmp1] \n\t" 2491 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t" 2492 "punpckhbh %[ftmp1], %[ftmp2], %[ftmp3] \n\t" 2493 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t" 2494 "punpckhbh %[ftmp3], %[ftmp4], %[ftmp5] \n\t" 2495 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t" 2496 MMI_ULDC1(%[ftmp8], %[addr5], 0x00) 2497 "punpckhbh %[ftmp5], %[ftmp6], %[ftmp8] \n\t" 2498 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 2499 MMI_SDC1(%[ftmp3], %[ptmp], 0x00) 2500 "punpckhhw %[ftmp3], %[ftmp0], %[ftmp2] \n\t" 2501 "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 2502 "punpckhhw %[ftmp2], %[ftmp4], %[ftmp6] \n\t" 2503 "punpcklhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t" 2504 "punpckhhw %[ftmp6], %[ftmp7], %[ftmp1] \n\t" 2505 "punpcklhw %[ftmp7], %[ftmp7], %[ftmp1] \n\t" 2506 MMI_SDC1(%[ftmp2], %[ptmp], 0x20) 2507 MMI_LDC1(%[ftmp2], %[ptmp], 0x00) 2508 "punpckhhw %[ftmp1], %[ftmp2], %[ftmp5] \n\t" 2509 "punpcklhw %[ftmp2], %[ftmp2], %[ftmp5] \n\t" 2510 "punpckhwd %[ftmp5], %[ftmp0], %[ftmp4] \n\t" 2511 "punpcklwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t" 2512 "punpckhwd %[ftmp4], %[ftmp7], %[ftmp2] \n\t" 2513 "punpcklwd %[ftmp7], %[ftmp7], %[ftmp2] \n\t" 2514 MMI_SDC1(%[ftmp0], %[ptmp], 0x00) 2515 MMI_SDC1(%[ftmp5], %[ptmp], 0x10) 2516 MMI_SDC1(%[ftmp7], %[ptmp], 0x40) 2517 MMI_SDC1(%[ftmp4], %[ptmp], 0x50) 2518 MMI_LDC1(%[ftmp8], %[ptmp], 0x20) 2519 "punpckhwd %[ftmp0], %[ftmp3], %[ftmp8] \n\t" 2520 "punpcklwd %[ftmp3], %[ftmp3], %[ftmp8] \n\t" 2521 "punpckhwd %[ftmp5], %[ftmp6], %[ftmp1] \n\t" 2522 "punpcklwd %[ftmp6], %[ftmp6], %[ftmp1] \n\t" 2523 PTR_ADDU "%[addr5], %[addr3], %[addr3] \n\t" 2524 MMI_SDC1(%[ftmp3], %[ptmp], 0x20) 2525 MMI_SDC1(%[ftmp0], %[ptmp], 0x30) 2526 MMI_SDC1(%[ftmp6], %[ptmp], 0x60) 2527 MMI_SDC1(%[ftmp5], %[ptmp], 0x70) 2528 PTR_ADDU "%[addr1], %[addr1], %[addr5] \n\t" 2529 PTR_ADDU "%[addr4], %[addr4], %[addr5] \n\t" 2530 PTR_ADDU "%[addr5], %[addr1], %[stride] \n\t" 2531 MMI_ULDC1(%[ftmp0], %[addr1], 0x00) 2532 PTR_ADDU "%[addr6], %[addr1], %[addr0] \n\t" 2533 MMI_ULDC1(%[ftmp1], %[addr5], 0x00) 2534 MMI_ULDC1(%[ftmp2], %[addr6], 0x00) 2535 PTR_ADDU "%[addr5], %[addr4], %[stride] \n\t" 2536 MMI_ULDC1(%[ftmp3], %[addr4], 0x00) 2537 PTR_ADDU "%[addr6], %[addr4], %[addr0] \n\t" 2538 MMI_ULDC1(%[ftmp4], %[addr5], 0x00) 2539 PTR_ADDU "%[addr5], %[addr4], %[addr2] \n\t" 2540 MMI_ULDC1(%[ftmp5], %[addr6], 0x00) 2541 MMI_ULDC1(%[ftmp6], %[addr5], 0x00) 2542 PTR_ADDU "%[addr5], %[addr4], %[addr3] \n\t" 2543 "punpckhbh %[ftmp7], %[ftmp0], %[ftmp1] \n\t" 2544 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t" 2545 "punpckhbh %[ftmp1], %[ftmp2], %[ftmp3] \n\t" 2546 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t" 2547 "punpckhbh %[ftmp3], %[ftmp4], %[ftmp5] \n\t" 2548 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t" 2549 MMI_ULDC1(%[ftmp8], %[addr5], 0x00) 2550 "punpckhbh %[ftmp5], %[ftmp6], %[ftmp8] \n\t" 2551 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 2552 MMI_SDC1(%[ftmp3], %[ptmp], 0x08) 2553 "punpckhhw %[ftmp3], %[ftmp0], %[ftmp2] \n\t" 2554 "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 2555 "punpckhhw %[ftmp2], %[ftmp4], %[ftmp6] \n\t" 2556 "punpcklhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t" 2557 "punpckhhw %[ftmp6], %[ftmp7], %[ftmp1] \n\t" 2558 "punpcklhw %[ftmp7], %[ftmp7], %[ftmp1] \n\t" 2559 MMI_SDC1(%[ftmp2], %[ptmp], 0x28) 2560 MMI_LDC1(%[ftmp2], %[ptmp], 0x08) 2561 "punpckhhw %[ftmp1], %[ftmp2], %[ftmp5] \n\t" 2562 "punpcklhw %[ftmp2], %[ftmp2], %[ftmp5] \n\t" 2563 "punpckhwd %[ftmp5], %[ftmp0], %[ftmp4] \n\t" 2564 "punpcklwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t" 2565 "punpckhwd %[ftmp4], %[ftmp7], %[ftmp2] \n\t" 2566 "punpcklwd %[ftmp7], %[ftmp7], %[ftmp2] \n\t" 2567 MMI_SDC1(%[ftmp0], %[ptmp], 0x08) 2568 MMI_SDC1(%[ftmp5], %[ptmp], 0x18) 2569 MMI_SDC1(%[ftmp7], %[ptmp], 0x48) 2570 MMI_SDC1(%[ftmp4], %[ptmp], 0x58) 2571 MMI_LDC1(%[ftmp8], %[ptmp], 0x28) 2572 "punpckhwd %[ftmp0], %[ftmp3], %[ftmp8] \n\t" 2573 "punpcklwd %[ftmp3], %[ftmp3], %[ftmp8] \n\t" 2574 "punpckhwd %[ftmp5], %[ftmp6], %[ftmp1] \n\t" 2575 "punpcklwd %[ftmp6], %[ftmp6], %[ftmp1] \n\t" 2576 MMI_SDC1(%[ftmp3], %[ptmp], 0x28) 2577 MMI_SDC1(%[ftmp0], %[ptmp], 0x38) 2578 MMI_SDC1(%[ftmp6], %[ptmp], 0x68) 2579 MMI_SDC1(%[ftmp5], %[ptmp], 0x78) 2580 PTR_S "%[addr1], 0x00(%[pdat]) \n\t" 2581 PTR_S "%[addr2], 0x08(%[pdat]) \n\t" 2582 PTR_S "%[addr0], 0x10(%[pdat]) \n\t" 2583 PTR_S "%[addr3], 0x18(%[pdat]) \n\t" 2584 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 2585 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 2586 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 2587 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 2588 [ftmp8]"=&f"(ftmp[8]), 2589 RESTRICT_ASM_ALL64 2590 [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]), 2591 [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]), 2592 [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]), 2593 [addr6]"=&r"(addr[6]) 2594 : [pix]"r"(pix), [stride]"r"((mips_reg)stride), 2595 [ptmp]"r"(ptmp), [pdat]"r"(pdat) 2596 : "memory" 2597 ); 2598 2599 ff_deblock_v_luma_intra_8_mmi((uint8_t *) &ptmp[8], 0x10, alpha, beta); 2600 2601 __asm__ volatile ( 2602 PTR_L "%[addr1], 0x00(%[pdat]) \n\t" 2603 PTR_L "%[addr2], 0x08(%[pdat]) \n\t" 2604 PTR_L "%[addr0], 0x10(%[pdat]) \n\t" 2605 PTR_L "%[addr3], 0x18(%[pdat]) \n\t" 2606 PTR_ADDU "%[addr4], %[addr1], %[addr2] \n\t" 2607 MMI_LDC1(%[ftmp0], %[ptmp], 0x08) 2608 MMI_LDC1(%[ftmp1], %[ptmp], 0x18) 2609 MMI_LDC1(%[ftmp2], %[ptmp], 0x28) 2610 MMI_LDC1(%[ftmp3], %[ptmp], 0x38) 2611 MMI_LDC1(%[ftmp4], %[ptmp], 0x48) 2612 MMI_LDC1(%[ftmp5], %[ptmp], 0x58) 2613 MMI_LDC1(%[ftmp6], %[ptmp], 0x68) 2614 "punpckhbh %[ftmp7], %[ftmp0], %[ftmp1] \n\t" 2615 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t" 2616 "punpckhbh %[ftmp1], %[ftmp2], %[ftmp3] \n\t" 2617 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t" 2618 "punpckhbh %[ftmp3], %[ftmp4], %[ftmp5] \n\t" 2619 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t" 2620 MMI_LDC1(%[ftmp8], %[ptmp], 0x78) 2621 "punpckhbh %[ftmp5], %[ftmp6], %[ftmp8] \n\t" 2622 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 2623 MMI_USDC1(%[ftmp3], %[addr1], 0x00) 2624 PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t" 2625 "punpckhhw %[ftmp3], %[ftmp0], %[ftmp2] \n\t" 2626 "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 2627 "punpckhhw %[ftmp2], %[ftmp4], %[ftmp6] \n\t" 2628 "punpcklhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t" 2629 "punpckhhw %[ftmp6], %[ftmp7], %[ftmp1] \n\t" 2630 "punpcklhw %[ftmp7], %[ftmp7], %[ftmp1] \n\t" 2631 MMI_USDC1(%[ftmp2], %[addr5], 0x00) 2632 MMI_ULDC1(%[ftmp2], %[addr1], 0x00) 2633 "punpckhhw %[ftmp1], %[ftmp2], %[ftmp5] \n\t" 2634 "punpcklhw %[ftmp2], %[ftmp2], %[ftmp5] \n\t" 2635 "punpckhwd %[ftmp5], %[ftmp0], %[ftmp4] \n\t" 2636 "punpcklwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t" 2637 "punpckhwd %[ftmp4], %[ftmp7], %[ftmp2] \n\t" 2638 "punpcklwd %[ftmp7], %[ftmp7], %[ftmp2] \n\t" 2639 PTR_ADDU "%[addr5], %[addr1], %[stride] \n\t" 2640 MMI_USDC1(%[ftmp0], %[addr1], 0x00) 2641 PTR_ADDU "%[addr6], %[addr4], %[stride] \n\t" 2642 MMI_USDC1(%[ftmp5], %[addr5], 0x00) 2643 PTR_ADDU "%[addr5], %[addr4], %[addr0] \n\t" 2644 MMI_USDC1(%[ftmp7], %[addr6], 0x00) 2645 PTR_ADDU "%[addr6], %[addr1], %[addr0] \n\t" 2646 MMI_USDC1(%[ftmp4], %[addr5], 0x00) 2647 MMI_ULDC1(%[ftmp8], %[addr6], 0x00) 2648 PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t" 2649 "punpckhwd %[ftmp0], %[ftmp3], %[ftmp8] \n\t" 2650 "punpcklwd %[ftmp3], %[ftmp3], %[ftmp8] \n\t" 2651 "punpckhwd %[ftmp5], %[ftmp6], %[ftmp1] \n\t" 2652 "punpcklwd %[ftmp6], %[ftmp6], %[ftmp1] \n\t" 2653 MMI_USDC1(%[ftmp3], %[addr5], 0x00) 2654 PTR_ADDU "%[addr5], %[addr4], %[addr2] \n\t" 2655 MMI_USDC1(%[ftmp0], %[addr4], 0x00) 2656 PTR_ADDU "%[addr6], %[addr4], %[addr3] \n\t" 2657 MMI_USDC1(%[ftmp6], %[addr5], 0x00) 2658 PTR_ADDU "%[addr5], %[addr3], %[addr3] \n\t" 2659 MMI_USDC1(%[ftmp5], %[addr6], 0x00) 2660 PTR_SUBU "%[addr1], %[addr1], %[addr5] \n\t" 2661 PTR_SUBU "%[addr4], %[addr4], %[addr5] \n\t" 2662 MMI_LDC1(%[ftmp0], %[ptmp], 0x00) 2663 MMI_LDC1(%[ftmp1], %[ptmp], 0x10) 2664 MMI_LDC1(%[ftmp2], %[ptmp], 0x20) 2665 MMI_LDC1(%[ftmp3], %[ptmp], 0x30) 2666 MMI_LDC1(%[ftmp4], %[ptmp], 0x40) 2667 MMI_LDC1(%[ftmp5], %[ptmp], 0x50) 2668 MMI_LDC1(%[ftmp6], %[ptmp], 0x60) 2669 "punpckhbh %[ftmp7], %[ftmp0], %[ftmp1] \n\t" 2670 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t" 2671 "punpckhbh %[ftmp1], %[ftmp2], %[ftmp3] \n\t" 2672 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t" 2673 "punpckhbh %[ftmp3], %[ftmp4], %[ftmp5] \n\t" 2674 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t" 2675 MMI_LDC1(%[ftmp8], %[ptmp], 0x70) 2676 "punpckhbh %[ftmp5], %[ftmp6], %[ftmp8] \n\t" 2677 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 2678 MMI_USDC1(%[ftmp3], %[addr1], 0x00) 2679 PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t" 2680 "punpckhhw %[ftmp3], %[ftmp0], %[ftmp2] \n\t" 2681 "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 2682 "punpckhhw %[ftmp2], %[ftmp4], %[ftmp6] \n\t" 2683 "punpcklhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t" 2684 "punpckhhw %[ftmp6], %[ftmp7], %[ftmp1] \n\t" 2685 "punpcklhw %[ftmp7], %[ftmp7], %[ftmp1] \n\t" 2686 MMI_USDC1(%[ftmp2], %[addr5], 0x00) 2687 MMI_ULDC1(%[ftmp2], %[addr1], 0x00) 2688 "punpckhhw %[ftmp1], %[ftmp2], %[ftmp5] \n\t" 2689 "punpcklhw %[ftmp2], %[ftmp2], %[ftmp5] \n\t" 2690 "punpckhwd %[ftmp5], %[ftmp0], %[ftmp4] \n\t" 2691 "punpcklwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t" 2692 "punpckhwd %[ftmp4], %[ftmp7], %[ftmp2] \n\t" 2693 "punpcklwd %[ftmp7], %[ftmp7], %[ftmp2] \n\t" 2694 PTR_ADDU "%[addr5], %[addr1], %[stride] \n\t" 2695 MMI_USDC1(%[ftmp0], %[addr1], 0x00) 2696 PTR_ADDU "%[addr6], %[addr4], %[stride] \n\t" 2697 MMI_USDC1(%[ftmp5], %[addr5], 0x00) 2698 PTR_ADDU "%[addr5], %[addr4], %[addr0] \n\t" 2699 MMI_USDC1(%[ftmp7], %[addr6], 0x00) 2700 PTR_ADDU "%[addr6], %[addr1], %[addr0] \n\t" 2701 MMI_USDC1(%[ftmp4], %[addr5], 0x00) 2702 MMI_ULDC1(%[ftmp8], %[addr6], 0x00) 2703 PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t" 2704 "punpckhwd %[ftmp0], %[ftmp3], %[ftmp8] \n\t" 2705 "punpcklwd %[ftmp3], %[ftmp3], %[ftmp8] \n\t" 2706 "punpckhwd %[ftmp5], %[ftmp6], %[ftmp1] \n\t" 2707 "punpcklwd %[ftmp6], %[ftmp6], %[ftmp1] \n\t" 2708 MMI_USDC1(%[ftmp3], %[addr5], 0x00) 2709 PTR_ADDU "%[addr5], %[addr4], %[addr2] \n\t" 2710 MMI_USDC1(%[ftmp0], %[addr4], 0x00) 2711 PTR_ADDU "%[addr6], %[addr4], %[addr3] \n\t" 2712 MMI_USDC1(%[ftmp6], %[addr5], 0x00) 2713 MMI_USDC1(%[ftmp5], %[addr6], 0x00) 2714 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 2715 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 2716 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 2717 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 2718 [ftmp8]"=&f"(ftmp[8]), 2719 RESTRICT_ASM_ALL64 2720 [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]), 2721 [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]), 2722 [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]), 2723 [addr6]"=&r"(addr[6]) 2724 : [pix]"r"(pix), [stride]"r"((mips_reg)stride), 2725 [ptmp]"r"(ptmp), [pdat]"r"(pdat) 2726 : "memory" 2727 ); 2728} 2729