1/* 2 * Loongson SIMD optimized h264chroma 3 * 4 * Copyright (c) 2015 Loongson Technology Corporation Limited 5 * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn> 6 * Zhang Shuangshuang <zhangshuangshuang@ict.ac.cn> 7 * 8 * This file is part of FFmpeg. 9 * 10 * FFmpeg is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU Lesser General Public 12 * License as published by the Free Software Foundation; either 13 * version 2.1 of the License, or (at your option) any later version. 14 * 15 * FFmpeg is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * Lesser General Public License for more details. 19 * 20 * You should have received a copy of the GNU Lesser General Public 21 * License along with FFmpeg; if not, write to the Free Software 22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 23 */ 24 25#include "h264chroma_mips.h" 26#include "constants.h" 27#include "libavutil/mips/mmiutils.h" 28 29void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride, 30 int h, int x, int y) 31{ 32 double ftmp[12]; 33 union mmi_intfloat64 A, B, C, D, E; 34 DECLARE_VAR_ALL64; 35 36 A.i = 64; 37 38 if (!(x || y)) { 39 /* x=0, y=0, A.i=64 */ 40 __asm__ volatile ( 41 "1: \n\t" 42 MMI_ULDC1(%[ftmp0], %[src], 0x00) 43 PTR_ADDU "%[src], %[src], %[stride] \n\t" 44 MMI_ULDC1(%[ftmp1], %[src], 0x00) 45 PTR_ADDU "%[src], %[src], %[stride] \n\t" 46 MMI_ULDC1(%[ftmp2], %[src], 0x00) 47 PTR_ADDU "%[src], %[src], %[stride] \n\t" 48 MMI_ULDC1(%[ftmp3], %[src], 0x00) 49 PTR_ADDU "%[src], %[src], %[stride] \n\t" 50 51 "addi %[h], %[h], -0x04 \n\t" 52 53 MMI_SDC1(%[ftmp0], %[dst], 0x00) 54 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 55 MMI_SDC1(%[ftmp1], %[dst], 0x00) 56 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 57 MMI_SDC1(%[ftmp2], %[dst], 0x00) 58 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 59 MMI_SDC1(%[ftmp3], %[dst], 0x00) 60 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 61 "bnez %[h], 1b \n\t" 62 : RESTRICT_ASM_ALL64 63 [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 64 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 65 [dst]"+&r"(dst), [src]"+&r"(src), 66 [h]"+&r"(h) 67 : [stride]"r"((mips_reg)stride) 68 : "memory" 69 ); 70 } else if (x && y) { 71 /* x!=0, y!=0 */ 72 D.i = x * y; 73 B.i = (x << 3) - D.i; 74 C.i = (y << 3) - D.i; 75 A.i = 64 - D.i - B.i - C.i; 76 77 __asm__ volatile ( 78 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 79 "pshufh %[A], %[A], %[ftmp0] \n\t" 80 "pshufh %[B], %[B], %[ftmp0] \n\t" 81 "mtc1 %[tmp0], %[ftmp9] \n\t" 82 "pshufh %[C], %[C], %[ftmp0] \n\t" 83 "pshufh %[D], %[D], %[ftmp0] \n\t" 84 85 "1: \n\t" 86 MMI_ULDC1(%[ftmp1], %[src], 0x00) 87 MMI_ULDC1(%[ftmp2], %[src], 0x01) 88 PTR_ADDU "%[src], %[src], %[stride] \n\t" 89 MMI_ULDC1(%[ftmp3], %[src], 0x00) 90 MMI_ULDC1(%[ftmp4], %[src], 0x01) 91 PTR_ADDU "%[src], %[src], %[stride] \n\t" 92 MMI_ULDC1(%[ftmp10], %[src], 0x00) 93 MMI_ULDC1(%[ftmp11], %[src], 0x01) 94 "addi %[h], %[h], -0x02 \n\t" 95 96 "punpcklbh %[ftmp5], %[ftmp1], %[ftmp0] \n\t" 97 "punpckhbh %[ftmp6], %[ftmp1], %[ftmp0] \n\t" 98 "punpcklbh %[ftmp7], %[ftmp2], %[ftmp0] \n\t" 99 "punpckhbh %[ftmp8], %[ftmp2], %[ftmp0] \n\t" 100 "pmullh %[ftmp5], %[ftmp5], %[A] \n\t" 101 "pmullh %[ftmp7], %[ftmp7], %[B] \n\t" 102 "paddh %[ftmp1], %[ftmp5], %[ftmp7] \n\t" 103 "pmullh %[ftmp6], %[ftmp6], %[A] \n\t" 104 "pmullh %[ftmp8], %[ftmp8], %[B] \n\t" 105 "paddh %[ftmp2], %[ftmp6], %[ftmp8] \n\t" 106 "punpcklbh %[ftmp5], %[ftmp3], %[ftmp0] \n\t" 107 "punpckhbh %[ftmp6], %[ftmp3], %[ftmp0] \n\t" 108 "punpcklbh %[ftmp7], %[ftmp4], %[ftmp0] \n\t" 109 "punpckhbh %[ftmp8], %[ftmp4], %[ftmp0] \n\t" 110 "pmullh %[ftmp5], %[ftmp5], %[C] \n\t" 111 "pmullh %[ftmp7], %[ftmp7], %[D] \n\t" 112 "paddh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 113 "pmullh %[ftmp6], %[ftmp6], %[C] \n\t" 114 "pmullh %[ftmp8], %[ftmp8], %[D] \n\t" 115 "paddh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 116 "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t" 117 "paddh %[ftmp2], %[ftmp2], %[ftmp6] \n\t" 118 "paddh %[ftmp1], %[ftmp1], %[ff_pw_32] \n\t" 119 "paddh %[ftmp2], %[ftmp2], %[ff_pw_32] \n\t" 120 "psrlh %[ftmp1], %[ftmp1], %[ftmp9] \n\t" 121 "psrlh %[ftmp2], %[ftmp2], %[ftmp9] \n\t" 122 "packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 123 124 "punpcklbh %[ftmp5], %[ftmp3], %[ftmp0] \n\t" 125 "punpckhbh %[ftmp6], %[ftmp3], %[ftmp0] \n\t" 126 "punpcklbh %[ftmp7], %[ftmp4], %[ftmp0] \n\t" 127 "punpckhbh %[ftmp8], %[ftmp4], %[ftmp0] \n\t" 128 "pmullh %[ftmp5], %[ftmp5], %[A] \n\t" 129 "pmullh %[ftmp7], %[ftmp7], %[B] \n\t" 130 "paddh %[ftmp3], %[ftmp5], %[ftmp7] \n\t" 131 "pmullh %[ftmp6], %[ftmp6], %[A] \n\t" 132 "pmullh %[ftmp8], %[ftmp8], %[B] \n\t" 133 "paddh %[ftmp4], %[ftmp6], %[ftmp8] \n\t" 134 "punpcklbh %[ftmp5], %[ftmp10], %[ftmp0] \n\t" 135 "punpckhbh %[ftmp6], %[ftmp10], %[ftmp0] \n\t" 136 "punpcklbh %[ftmp7], %[ftmp11], %[ftmp0] \n\t" 137 "punpckhbh %[ftmp8], %[ftmp11], %[ftmp0] \n\t" 138 "pmullh %[ftmp5], %[ftmp5], %[C] \n\t" 139 "pmullh %[ftmp7], %[ftmp7], %[D] \n\t" 140 "paddh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 141 "pmullh %[ftmp6], %[ftmp6], %[C] \n\t" 142 "pmullh %[ftmp8], %[ftmp8], %[D] \n\t" 143 "paddh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 144 "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" 145 "paddh %[ftmp4], %[ftmp4], %[ftmp6] \n\t" 146 "paddh %[ftmp3], %[ftmp3], %[ff_pw_32] \n\t" 147 "paddh %[ftmp4], %[ftmp4], %[ff_pw_32] \n\t" 148 "psrlh %[ftmp3], %[ftmp3], %[ftmp9] \n\t" 149 "psrlh %[ftmp4], %[ftmp4], %[ftmp9] \n\t" 150 "packushb %[ftmp3], %[ftmp3], %[ftmp4] \n\t" 151 152 MMI_SDC1(%[ftmp1], %[dst], 0x00) 153 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 154 MMI_SDC1(%[ftmp3], %[dst], 0x00) 155 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 156 "bnez %[h], 1b \n\t" 157 : RESTRICT_ASM_ALL64 158 [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 159 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 160 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 161 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 162 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), 163 [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), 164 [dst]"+&r"(dst), [src]"+&r"(src), 165 [h]"+&r"(h) 166 : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32.f), 167 [A]"f"(A.f), [B]"f"(B.f), 168 [C]"f"(C.f), [D]"f"(D.f), 169 [tmp0]"r"(0x06) 170 : "memory" 171 ); 172 } else if (x) { 173 /* x!=0, y==0 */ 174 E.i = x << 3; 175 A.i = 64 - E.i; 176 177 __asm__ volatile ( 178 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 179 "pshufh %[A], %[A], %[ftmp0] \n\t" 180 "pshufh %[E], %[E], %[ftmp0] \n\t" 181 "mtc1 %[tmp0], %[ftmp7] \n\t" 182 183 "1: \n\t" 184 MMI_ULDC1(%[ftmp1], %[src], 0x00) 185 MMI_ULDC1(%[ftmp2], %[src], 0x01) 186 "addi %[h], %[h], -0x01 \n\t" 187 PTR_ADDU "%[src], %[src], %[stride] \n\t" 188 189 "punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" 190 "punpckhbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t" 191 "punpcklbh %[ftmp5], %[ftmp2], %[ftmp0] \n\t" 192 "punpckhbh %[ftmp6], %[ftmp2], %[ftmp0] \n\t" 193 "pmullh %[ftmp3], %[ftmp3], %[A] \n\t" 194 "pmullh %[ftmp5], %[ftmp5], %[E] \n\t" 195 "paddh %[ftmp1], %[ftmp3], %[ftmp5] \n\t" 196 "pmullh %[ftmp4], %[ftmp4], %[A] \n\t" 197 "pmullh %[ftmp6], %[ftmp6], %[E] \n\t" 198 "paddh %[ftmp2], %[ftmp4], %[ftmp6] \n\t" 199 200 "paddh %[ftmp1], %[ftmp1], %[ff_pw_32] \n\t" 201 "paddh %[ftmp2], %[ftmp2], %[ff_pw_32] \n\t" 202 "psrlh %[ftmp1], %[ftmp1], %[ftmp7] \n\t" 203 "psrlh %[ftmp2], %[ftmp2], %[ftmp7] \n\t" 204 "packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 205 MMI_SDC1(%[ftmp1], %[dst], 0x00) 206 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 207 "bnez %[h], 1b \n\t" 208 : RESTRICT_ASM_ALL64 209 [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 210 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 211 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 212 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 213 [dst]"+&r"(dst), [src]"+&r"(src), 214 [h]"+&r"(h) 215 : [stride]"r"((mips_reg)stride), 216 [ff_pw_32]"f"(ff_pw_32.f), [tmp0]"r"(0x06), 217 [A]"f"(A.f), [E]"f"(E.f) 218 : "memory" 219 ); 220 } else { 221 /* x==0, y!=0 */ 222 E.i = y << 3; 223 A.i = 64 - E.i; 224 225 __asm__ volatile ( 226 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 227 "pshufh %[A], %[A], %[ftmp0] \n\t" 228 "pshufh %[E], %[E], %[ftmp0] \n\t" 229 "mtc1 %[tmp0], %[ftmp7] \n\t" 230 231 "1: \n\t" 232 MMI_ULDC1(%[ftmp1], %[src], 0x00) 233 PTR_ADDU "%[src], %[src], %[stride] \n\t" 234 MMI_ULDC1(%[ftmp2], %[src], 0x00) 235 PTR_ADDU "%[src], %[src], %[stride] \n\t" 236 MMI_ULDC1(%[ftmp8], %[src], 0x00) 237 "addi %[h], %[h], -0x02 \n\t" 238 239 "punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" 240 "punpckhbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t" 241 "punpcklbh %[ftmp5], %[ftmp2], %[ftmp0] \n\t" 242 "punpckhbh %[ftmp6], %[ftmp2], %[ftmp0] \n\t" 243 "pmullh %[ftmp3], %[ftmp3], %[A] \n\t" 244 "pmullh %[ftmp5], %[ftmp5], %[E] \n\t" 245 "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" 246 "pmullh %[ftmp4], %[ftmp4], %[A] \n\t" 247 "pmullh %[ftmp6], %[ftmp6], %[E] \n\t" 248 "paddh %[ftmp4], %[ftmp4], %[ftmp6] \n\t" 249 "paddh %[ftmp3], %[ftmp3], %[ff_pw_32] \n\t" 250 "paddh %[ftmp4], %[ftmp4], %[ff_pw_32] \n\t" 251 "psrlh %[ftmp3], %[ftmp3], %[ftmp7] \n\t" 252 "psrlh %[ftmp4], %[ftmp4], %[ftmp7] \n\t" 253 "packushb %[ftmp1], %[ftmp3], %[ftmp4] \n\t" 254 255 "punpcklbh %[ftmp3], %[ftmp2], %[ftmp0] \n\t" 256 "punpckhbh %[ftmp4], %[ftmp2], %[ftmp0] \n\t" 257 "punpcklbh %[ftmp5], %[ftmp8], %[ftmp0] \n\t" 258 "punpckhbh %[ftmp6], %[ftmp8], %[ftmp0] \n\t" 259 "pmullh %[ftmp3], %[ftmp3], %[A] \n\t" 260 "pmullh %[ftmp5], %[ftmp5], %[E] \n\t" 261 "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" 262 "pmullh %[ftmp4], %[ftmp4], %[A] \n\t" 263 "pmullh %[ftmp6], %[ftmp6], %[E] \n\t" 264 "paddh %[ftmp4], %[ftmp4], %[ftmp6] \n\t" 265 "paddh %[ftmp3], %[ftmp3], %[ff_pw_32] \n\t" 266 "paddh %[ftmp4], %[ftmp4], %[ff_pw_32] \n\t" 267 "psrlh %[ftmp3], %[ftmp3], %[ftmp7] \n\t" 268 "psrlh %[ftmp4], %[ftmp4], %[ftmp7] \n\t" 269 "packushb %[ftmp2], %[ftmp3], %[ftmp4] \n\t" 270 271 MMI_SDC1(%[ftmp1], %[dst], 0x00) 272 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 273 MMI_SDC1(%[ftmp2], %[dst], 0x00) 274 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 275 "bnez %[h], 1b \n\t" 276 : RESTRICT_ASM_ALL64 277 [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 278 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 279 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 280 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 281 [ftmp8]"=&f"(ftmp[8]), 282 [dst]"+&r"(dst), [src]"+&r"(src), 283 [h]"+&r"(h) 284 : [stride]"r"((mips_reg)stride), 285 [ff_pw_32]"f"(ff_pw_32.f), [A]"f"(A.f), 286 [E]"f"(E.f), [tmp0]"r"(0x06) 287 : "memory" 288 ); 289 } 290} 291 292void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride, 293 int h, int x, int y) 294{ 295 double ftmp[10]; 296 union mmi_intfloat64 A, B, C, D, E; 297 DECLARE_VAR_ALL64; 298 299 A.i = 64; 300 301 if(!(x || y)){ 302 /* x=0, y=0, A.i=64 */ 303 __asm__ volatile ( 304 "1: \n\t" 305 MMI_ULDC1(%[ftmp0], %[src], 0x00) 306 PTR_ADDU "%[src], %[src], %[stride] \n\t" 307 MMI_ULDC1(%[ftmp1], %[src], 0x00) 308 PTR_ADDU "%[src], %[src], %[stride] \n\t" 309 MMI_LDC1(%[ftmp2], %[dst], 0x00) 310 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 311 MMI_LDC1(%[ftmp3], %[dst], 0x00) 312 PTR_SUBU "%[dst], %[dst], %[stride] \n\t" 313 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 314 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 315 MMI_SDC1(%[ftmp0], %[dst], 0x00) 316 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 317 MMI_SDC1(%[ftmp1], %[dst], 0x00) 318 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 319 "addi %[h], %[h], -0x02 \n\t" 320 "bnez %[h], 1b \n\t" 321 : RESTRICT_ASM_ALL64 322 [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 323 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 324 [dst]"+&r"(dst), [src]"+&r"(src), 325 [h]"+&r"(h) 326 : [stride]"r"((mips_reg)stride) 327 : "memory" 328 ); 329 } else if (x && y) { 330 /* x!=0, y!=0 */ 331 D.i = x * y; 332 B.i = (x << 3) - D.i; 333 C.i = (y << 3) - D.i; 334 A.i = 64 - D.i - B.i - C.i; 335 __asm__ volatile ( 336 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 337 "pshufh %[A], %[A], %[ftmp0] \n\t" 338 "pshufh %[B], %[B], %[ftmp0] \n\t" 339 "mtc1 %[tmp0], %[ftmp9] \n\t" 340 "pshufh %[C], %[C], %[ftmp0] \n\t" 341 "pshufh %[D], %[D], %[ftmp0] \n\t" 342 343 "1: \n\t" 344 MMI_ULDC1(%[ftmp1], %[src], 0x00) 345 MMI_ULDC1(%[ftmp2], %[src], 0x01) 346 PTR_ADDU "%[src], %[src], %[stride] \n\t" 347 MMI_ULDC1(%[ftmp3], %[src], 0x00) 348 MMI_ULDC1(%[ftmp4], %[src], 0x01) 349 "addi %[h], %[h], -0x01 \n\t" 350 351 "punpcklbh %[ftmp5], %[ftmp1], %[ftmp0] \n\t" 352 "punpckhbh %[ftmp6], %[ftmp1], %[ftmp0] \n\t" 353 "punpcklbh %[ftmp7], %[ftmp2], %[ftmp0] \n\t" 354 "punpckhbh %[ftmp8], %[ftmp2], %[ftmp0] \n\t" 355 "pmullh %[ftmp5], %[ftmp5], %[A] \n\t" 356 "pmullh %[ftmp7], %[ftmp7], %[B] \n\t" 357 "paddh %[ftmp1], %[ftmp5], %[ftmp7] \n\t" 358 "pmullh %[ftmp6], %[ftmp6], %[A] \n\t" 359 "pmullh %[ftmp8], %[ftmp8], %[B] \n\t" 360 "paddh %[ftmp2], %[ftmp6], %[ftmp8] \n\t" 361 362 "punpcklbh %[ftmp5], %[ftmp3], %[ftmp0] \n\t" 363 "punpckhbh %[ftmp6], %[ftmp3], %[ftmp0] \n\t" 364 "punpcklbh %[ftmp7], %[ftmp4], %[ftmp0] \n\t" 365 "punpckhbh %[ftmp8], %[ftmp4], %[ftmp0] \n\t" 366 "pmullh %[ftmp5], %[ftmp5], %[C] \n\t" 367 "pmullh %[ftmp7], %[ftmp7], %[D] \n\t" 368 "paddh %[ftmp3], %[ftmp5], %[ftmp7] \n\t" 369 "pmullh %[ftmp6], %[ftmp6], %[C] \n\t" 370 "pmullh %[ftmp8], %[ftmp8], %[D] \n\t" 371 "paddh %[ftmp4], %[ftmp6], %[ftmp8] \n\t" 372 373 "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 374 "paddh %[ftmp2], %[ftmp2], %[ftmp4] \n\t" 375 "paddh %[ftmp1], %[ftmp1], %[ff_pw_32] \n\t" 376 "paddh %[ftmp2], %[ftmp2], %[ff_pw_32] \n\t" 377 "psrlh %[ftmp1], %[ftmp1], %[ftmp9] \n\t" 378 "psrlh %[ftmp2], %[ftmp2], %[ftmp9] \n\t" 379 "packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 380 MMI_LDC1(%[ftmp2], %[dst], 0x00) 381 "pavgb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 382 MMI_SDC1(%[ftmp1], %[dst], 0x00) 383 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 384 "bnez %[h], 1b \n\t" 385 : RESTRICT_ASM_ALL64 386 [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 387 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 388 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 389 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 390 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), 391 [dst]"+&r"(dst), [src]"+&r"(src), 392 [h]"+&r"(h) 393 : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32.f), 394 [A]"f"(A.f), [B]"f"(B.f), 395 [C]"f"(C.f), [D]"f"(D.f), 396 [tmp0]"r"(0x06) 397 : "memory" 398 ); 399 } else if (x) { 400 /* x!=0, y==0 */ 401 E.i = x << 3; 402 A.i = 64 - E.i; 403 __asm__ volatile ( 404 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 405 "pshufh %[A], %[A], %[ftmp0] \n\t" 406 "pshufh %[E], %[E], %[ftmp0] \n\t" 407 "mtc1 %[tmp0], %[ftmp7] \n\t" 408 409 "1: \n\t" 410 MMI_ULDC1(%[ftmp1], %[src], 0x00) 411 MMI_ULDC1(%[ftmp2], %[src], 0x01) 412 PTR_ADDU "%[src], %[src], %[stride] \n\t" 413 "addi %[h], %[h], -0x01 \n\t" 414 415 "punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" 416 "punpckhbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t" 417 "punpcklbh %[ftmp5], %[ftmp2], %[ftmp0] \n\t" 418 "punpckhbh %[ftmp6], %[ftmp2], %[ftmp0] \n\t" 419 "pmullh %[ftmp3], %[ftmp3], %[A] \n\t" 420 "pmullh %[ftmp5], %[ftmp5], %[E] \n\t" 421 "paddh %[ftmp1], %[ftmp3], %[ftmp5] \n\t" 422 "pmullh %[ftmp4], %[ftmp4], %[A] \n\t" 423 "pmullh %[ftmp6], %[ftmp6], %[E] \n\t" 424 "paddh %[ftmp2], %[ftmp4], %[ftmp6] \n\t" 425 426 "paddh %[ftmp1], %[ftmp1], %[ff_pw_32] \n\t" 427 "paddh %[ftmp2], %[ftmp2], %[ff_pw_32] \n\t" 428 "psrlh %[ftmp1], %[ftmp1], %[ftmp7] \n\t" 429 "psrlh %[ftmp2], %[ftmp2], %[ftmp7] \n\t" 430 "packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 431 MMI_LDC1(%[ftmp2], %[dst], 0x00) 432 "pavgb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 433 MMI_SDC1(%[ftmp1], %[dst], 0x00) 434 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 435 "bnez %[h], 1b \n\t" 436 : RESTRICT_ASM_ALL64 437 [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 438 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 439 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 440 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 441 [dst]"+&r"(dst), [src]"+&r"(src), 442 [h]"+&r"(h) 443 : [stride]"r"((mips_reg)stride), 444 [ff_pw_32]"f"(ff_pw_32.f), [tmp0]"r"(0x06), 445 [A]"f"(A.f), [E]"f"(E.f) 446 : "memory" 447 ); 448 } else { 449 /* x==0, y!=0 */ 450 E.i = y << 3; 451 A.i = 64 - E.i; 452 __asm__ volatile ( 453 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 454 "pshufh %[A], %[A], %[ftmp0] \n\t" 455 "pshufh %[E], %[E], %[ftmp0] \n\t" 456 "mtc1 %[tmp0], %[ftmp7] \n\t" 457 458 "1: \n\t" 459 MMI_ULDC1(%[ftmp1], %[src], 0x00) 460 PTR_ADDU "%[src], %[src], %[stride] \n\t" 461 MMI_ULDC1(%[ftmp2], %[src], 0x00) 462 "addi %[h], %[h], -0x01 \n\t" 463 464 "punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" 465 "punpckhbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t" 466 "punpcklbh %[ftmp5], %[ftmp2], %[ftmp0] \n\t" 467 "punpckhbh %[ftmp6], %[ftmp2], %[ftmp0] \n\t" 468 "pmullh %[ftmp3], %[ftmp3], %[A] \n\t" 469 "pmullh %[ftmp5], %[ftmp5], %[E] \n\t" 470 "paddh %[ftmp1], %[ftmp3], %[ftmp5] \n\t" 471 "pmullh %[ftmp4], %[ftmp4], %[A] \n\t" 472 "pmullh %[ftmp6], %[ftmp6], %[E] \n\t" 473 "paddh %[ftmp2], %[ftmp4], %[ftmp6] \n\t" 474 475 "paddh %[ftmp1], %[ftmp1], %[ff_pw_32] \n\t" 476 "paddh %[ftmp2], %[ftmp2], %[ff_pw_32] \n\t" 477 "psrlh %[ftmp1], %[ftmp1], %[ftmp7] \n\t" 478 "psrlh %[ftmp2], %[ftmp2], %[ftmp7] \n\t" 479 "packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 480 MMI_LDC1(%[ftmp2], %[dst], 0x00) 481 "pavgb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 482 MMI_SDC1(%[ftmp1], %[dst], 0x00) 483 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 484 "bnez %[h], 1b \n\t" 485 : RESTRICT_ASM_ALL64 486 [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 487 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 488 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 489 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 490 [dst]"+&r"(dst), [src]"+&r"(src), 491 [h]"+&r"(h) 492 : [stride]"r"((mips_reg)stride), 493 [ff_pw_32]"f"(ff_pw_32.f), [tmp0]"r"(0x06), 494 [A]"f"(A.f), [E]"f"(E.f) 495 : "memory" 496 ); 497 } 498} 499 500void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride, 501 int h, int x, int y) 502{ 503 double ftmp[8]; 504 mips_reg addr[1]; 505 union mmi_intfloat64 A, B, C, D, E; 506 DECLARE_VAR_LOW32; 507 A.i = (8 - x) * (8 - y); 508 B.i = x * (8 - y); 509 C.i = (8 - x) * y; 510 D.i = x * y; 511 E.i = B.i + C.i; 512 513 if (D.i) { 514 __asm__ volatile ( 515 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 516 "pshufh %[A], %[A], %[ftmp0] \n\t" 517 "pshufh %[B], %[B], %[ftmp0] \n\t" 518 "mtc1 %[tmp0], %[ftmp7] \n\t" 519 "pshufh %[C], %[C], %[ftmp0] \n\t" 520 "pshufh %[D], %[D], %[ftmp0] \n\t" 521 522 "1: \n\t" 523 MMI_ULWC1(%[ftmp1], %[src], 0x00) 524 MMI_ULWC1(%[ftmp2], %[src], 0x01) 525 PTR_ADDU "%[src], %[src], %[stride] \n\t" 526 MMI_ULWC1(%[ftmp3], %[src], 0x00) 527 MMI_ULWC1(%[ftmp4], %[src], 0x01) 528 529 "punpcklbh %[ftmp5], %[ftmp1], %[ftmp0] \n\t" 530 "punpcklbh %[ftmp6], %[ftmp2], %[ftmp0] \n\t" 531 "pmullh %[ftmp5], %[ftmp5], %[A] \n\t" 532 "pmullh %[ftmp6], %[ftmp6], %[B] \n\t" 533 "paddh %[ftmp1], %[ftmp5], %[ftmp6] \n\t" 534 "punpcklbh %[ftmp5], %[ftmp3], %[ftmp0] \n\t" 535 "punpcklbh %[ftmp6], %[ftmp4], %[ftmp0] \n\t" 536 "pmullh %[ftmp5], %[ftmp5], %[C] \n\t" 537 "pmullh %[ftmp6], %[ftmp6], %[D] \n\t" 538 "paddh %[ftmp2], %[ftmp5], %[ftmp6] \n\t" 539 "paddh %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 540 "paddh %[ftmp1], %[ftmp1], %[ff_pw_32] \n\t" 541 "psrlh %[ftmp1], %[ftmp1], %[ftmp7] \n\t" 542 "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 543 544 "addi %[h], %[h], -0x01 \n\t" 545 MMI_SWC1(%[ftmp1], %[dst], 0x00) 546 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 547 "bnez %[h], 1b \n\t" 548 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 549 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 550 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 551 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 552 RESTRICT_ASM_LOW32 553 [dst]"+&r"(dst), [src]"+&r"(src), 554 [h]"+&r"(h) 555 : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32.f), 556 [A]"f"(A.f), [B]"f"(B.f), 557 [C]"f"(C.f), [D]"f"(D.f), 558 [tmp0]"r"(0x06) 559 : "memory" 560 ); 561 } else if (E.i) { 562 const int step = C.i ? stride : 1; 563 __asm__ volatile ( 564 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 565 "pshufh %[A], %[A], %[ftmp0] \n\t" 566 "pshufh %[E], %[E], %[ftmp0] \n\t" 567 "mtc1 %[tmp0], %[ftmp5] \n\t" 568 569 "1: \n\t" 570 MMI_ULWC1(%[ftmp1], %[src], 0x00) 571 PTR_ADDU "%[addr0], %[src], %[step] \n\t" 572 MMI_ULWC1(%[ftmp2], %[addr0], 0x00) 573 PTR_ADDU "%[src], %[src], %[stride] \n\t" 574 "addi %[h], %[h], -0x01 \n\t" 575 "punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" 576 "punpcklbh %[ftmp4], %[ftmp2], %[ftmp0] \n\t" 577 "pmullh %[ftmp3], %[ftmp3], %[A] \n\t" 578 "pmullh %[ftmp4], %[ftmp4], %[E] \n\t" 579 "paddh %[ftmp1], %[ftmp3], %[ftmp4] \n\t" 580 "paddh %[ftmp1], %[ftmp1], %[ff_pw_32] \n\t" 581 "psrlh %[ftmp1], %[ftmp1], %[ftmp5] \n\t" 582 "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 583 MMI_SWC1(%[ftmp1], %[dst], 0x00) 584 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 585 "bnez %[h], 1b \n\t" 586 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 587 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 588 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 589 RESTRICT_ASM_LOW32 590 [addr0]"=&r"(addr[0]), 591 [dst]"+&r"(dst), [src]"+&r"(src), 592 [h]"+&r"(h) 593 : [stride]"r"((mips_reg)stride),[step]"r"((mips_reg)step), 594 [ff_pw_32]"f"(ff_pw_32.f), [tmp0]"r"(0x06), 595 [A]"f"(A.f), [E]"f"(E.f) 596 : "memory" 597 ); 598 } else { 599 __asm__ volatile ( 600 "1: \n\t" 601 MMI_ULWC1(%[ftmp0], %[src], 0x00) 602 PTR_ADDU "%[src], %[src], %[stride] \n\t" 603 MMI_ULWC1(%[ftmp1], %[src], 0x00) 604 PTR_ADDU "%[src], %[src], %[stride] \n\t" 605 "addi %[h], %[h], -0x02 \n\t" 606 MMI_SWC1(%[ftmp0], %[dst], 0x00) 607 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 608 MMI_SWC1(%[ftmp1], %[dst], 0x00) 609 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 610 "bnez %[h], 1b \n\t" 611 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 612 [dst]"+&r"(dst), [src]"+&r"(src), 613 RESTRICT_ASM_LOW32 614 [h]"+&r"(h) 615 : [stride]"r"((mips_reg)stride) 616 : "memory" 617 ); 618 } 619} 620 621void ff_avg_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride, 622 int h, int x, int y) 623{ 624 double ftmp[8]; 625 mips_reg addr[1]; 626 union mmi_intfloat64 A, B, C, D, E; 627 DECLARE_VAR_LOW32; 628 A.i = (8 - x) *(8 - y); 629 B.i = x * (8 - y); 630 C.i = (8 - x) * y; 631 D.i = x * y; 632 E.i = B.i + C.i; 633 634 if (D.i) { 635 __asm__ volatile ( 636 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 637 "pshufh %[A], %[A], %[ftmp0] \n\t" 638 "pshufh %[B], %[B], %[ftmp0] \n\t" 639 "mtc1 %[tmp0], %[ftmp7] \n\t" 640 "pshufh %[C], %[C], %[ftmp0] \n\t" 641 "pshufh %[D], %[D], %[ftmp0] \n\t" 642 643 "1: \n\t" 644 MMI_ULWC1(%[ftmp1], %[src], 0x00) 645 MMI_ULWC1(%[ftmp2], %[src], 0x01) 646 PTR_ADDU "%[src], %[src], %[stride] \n\t" 647 MMI_ULWC1(%[ftmp3], %[src], 0x00) 648 MMI_ULWC1(%[ftmp4], %[src], 0x01) 649 650 "punpcklbh %[ftmp5], %[ftmp1], %[ftmp0] \n\t" 651 "punpcklbh %[ftmp6], %[ftmp2], %[ftmp0] \n\t" 652 "pmullh %[ftmp5], %[ftmp5], %[A] \n\t" 653 "pmullh %[ftmp6], %[ftmp6], %[B] \n\t" 654 "paddh %[ftmp1], %[ftmp5], %[ftmp6] \n\t" 655 "punpcklbh %[ftmp5], %[ftmp3], %[ftmp0] \n\t" 656 "punpcklbh %[ftmp6], %[ftmp4], %[ftmp0] \n\t" 657 "pmullh %[ftmp5], %[ftmp5], %[C] \n\t" 658 "pmullh %[ftmp6], %[ftmp6], %[D] \n\t" 659 "paddh %[ftmp2], %[ftmp5], %[ftmp6] \n\t" 660 "paddh %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 661 "paddh %[ftmp1], %[ftmp1], %[ff_pw_32] \n\t" 662 "psrlh %[ftmp1], %[ftmp1], %[ftmp7] \n\t" 663 "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 664 MMI_LWC1(%[ftmp2], %[dst], 0x00) 665 "pavgb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 666 667 "addi %[h], %[h], -0x01 \n\t" 668 MMI_SWC1(%[ftmp1], %[dst], 0x00) 669 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 670 "bnez %[h], 1b \n\t" 671 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 672 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 673 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 674 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 675 RESTRICT_ASM_LOW32 676 [dst]"+&r"(dst), [src]"+&r"(src), 677 [h]"+&r"(h) 678 : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32.f), 679 [A]"f"(A.f), [B]"f"(B.f), 680 [C]"f"(C.f), [D]"f"(D.f), 681 [tmp0]"r"(0x06) 682 : "memory" 683 ); 684 } else if (E.i) { 685 const int step = C.i ? stride : 1; 686 __asm__ volatile ( 687 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 688 "pshufh %[A], %[A], %[ftmp0] \n\t" 689 "pshufh %[E], %[E], %[ftmp0] \n\t" 690 "mtc1 %[tmp0], %[ftmp5] \n\t" 691 692 "1: \n\t" 693 MMI_ULWC1(%[ftmp1], %[src], 0x00) 694 PTR_ADDU "%[addr0], %[src], %[step] \n\t" 695 MMI_ULWC1(%[ftmp2], %[addr0], 0x00) 696 PTR_ADDU "%[src], %[src], %[stride] \n\t" 697 "addi %[h], %[h], -0x01 \n\t" 698 "punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" 699 "punpcklbh %[ftmp4], %[ftmp2], %[ftmp0] \n\t" 700 "pmullh %[ftmp3], %[ftmp3], %[A] \n\t" 701 "pmullh %[ftmp4], %[ftmp4], %[E] \n\t" 702 "paddh %[ftmp1], %[ftmp3], %[ftmp4] \n\t" 703 "paddh %[ftmp1], %[ftmp1], %[ff_pw_32] \n\t" 704 "psrlh %[ftmp1], %[ftmp1], %[ftmp5] \n\t" 705 "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 706 MMI_LWC1(%[ftmp2], %[dst], 0x00) 707 "pavgb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 708 MMI_SWC1(%[ftmp1], %[dst], 0x00) 709 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 710 "bnez %[h], 1b \n\t" 711 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 712 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 713 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 714 RESTRICT_ASM_LOW32 715 [addr0]"=&r"(addr[0]), 716 [dst]"+&r"(dst), [src]"+&r"(src), 717 [h]"+&r"(h) 718 : [stride]"r"((mips_reg)stride),[step]"r"((mips_reg)step), 719 [ff_pw_32]"f"(ff_pw_32.f), [tmp0]"r"(0x06), 720 [A]"f"(A.f), [E]"f"(E.f) 721 : "memory" 722 ); 723 } else { 724 __asm__ volatile ( 725 "1: \n\t" 726 MMI_ULWC1(%[ftmp0], %[src], 0x00) 727 PTR_ADDU "%[src], %[src], %[stride] \n\t" 728 MMI_ULWC1(%[ftmp1], %[src], 0x00) 729 PTR_ADDU "%[src], %[src], %[stride] \n\t" 730 "addi %[h], %[h], -0x02 \n\t" 731 MMI_LWC1(%[ftmp2], %[dst], 0x00) 732 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 733 MMI_SWC1(%[ftmp0], %[dst], 0x00) 734 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 735 MMI_LWC1(%[ftmp3], %[dst], 0x00) 736 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 737 MMI_SWC1(%[ftmp1], %[dst], 0x00) 738 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 739 "bnez %[h], 1b \n\t" 740 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 741 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 742 [dst]"+&r"(dst), [src]"+&r"(src), 743 RESTRICT_ASM_LOW32 744 [h]"+&r"(h) 745 : [stride]"r"((mips_reg)stride) 746 : "memory" 747 ); 748 } 749} 750