1/* 2 * Loongson SIMD optimized qpeldsp 3 * 4 * Copyright (c) 2016 Loongson Technology Corporation Limited 5 * Copyright (c) 2016 Zhou Xiaoyong <zhouxiaoyong@loongson.cn> 6 * 7 * This file is part of FFmpeg. 8 * 9 * FFmpeg is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU Lesser General Public 11 * License as published by the Free Software Foundation; either 12 * version 2.1 of the License, or (at your option) any later version. 13 * 14 * FFmpeg is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * Lesser General Public License for more details. 18 * 19 * You should have received a copy of the GNU Lesser General Public 20 * License along with FFmpeg; if not, write to the Free Software 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 22 */ 23 24#include "hpeldsp_mips.h" 25#include "libavcodec/bit_depth_template.c" 26#include "libavutil/mips/mmiutils.h" 27#include "constants.h" 28 29void ff_put_pixels4_8_mmi(uint8_t *block, const uint8_t *pixels, 30 ptrdiff_t line_size, int h) 31{ 32 double ftmp[4]; 33 DECLARE_VAR_LOW32; 34 35 __asm__ volatile ( 36 "1: \n\t" 37 MMI_ULWC1(%[ftmp0], %[pixels], 0x00) 38 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t" 39 MMI_ULWC1(%[ftmp1], %[pixels], 0x00) 40 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t" 41 42 PTR_ADDI "%[h], %[h], -0x02 \n\t" 43 44 MMI_SWC1(%[ftmp0], %[block], 0x00) 45 PTR_ADDU "%[block], %[block], %[line_size] \n\t" 46 MMI_SWC1(%[ftmp1], %[block], 0x00) 47 PTR_ADDU "%[block], %[block], %[line_size] \n\t" 48 49 "bnez %[h], 1b \n\t" 50 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 51 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 52 RESTRICT_ASM_LOW32 53 [block]"+&r"(block), [pixels]"+&r"(pixels), 54 [h]"+&r"(h) 55 : [line_size]"r"((mips_reg)line_size) 56 : "memory" 57 ); 58} 59 60void ff_put_pixels8_8_mmi(uint8_t *block, const uint8_t *pixels, 61 ptrdiff_t line_size, int h) 62{ 63 double ftmp[4]; 64 DECLARE_VAR_ALL64; 65 66 __asm__ volatile ( 67 "1: \n\t" 68 MMI_ULDC1(%[ftmp0], %[pixels], 0x00) 69 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t" 70 MMI_ULDC1(%[ftmp1], %[pixels], 0x00) 71 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t" 72 MMI_ULDC1(%[ftmp2], %[pixels], 0x00) 73 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t" 74 MMI_ULDC1(%[ftmp3], %[pixels], 0x00) 75 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t" 76 77 PTR_ADDI "%[h], %[h], -0x04 \n\t" 78 79 MMI_SDC1(%[ftmp0], %[block], 0x00) 80 PTR_ADDU "%[block], %[block], %[line_size] \n\t" 81 MMI_SDC1(%[ftmp1], %[block], 0x00) 82 PTR_ADDU "%[block], %[block], %[line_size] \n\t" 83 MMI_SDC1(%[ftmp2], %[block], 0x00) 84 PTR_ADDU "%[block], %[block], %[line_size] \n\t" 85 MMI_SDC1(%[ftmp3], %[block], 0x00) 86 PTR_ADDU "%[block], %[block], %[line_size] \n\t" 87 88 "bnez %[h], 1b \n\t" 89 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 90 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 91 RESTRICT_ASM_ALL64 92 [block]"+&r"(block), [pixels]"+&r"(pixels), 93 [h]"+&r"(h) 94 : [line_size]"r"((mips_reg)line_size) 95 : "memory" 96 ); 97} 98 99void ff_put_pixels16_8_mmi(uint8_t *block, const uint8_t *pixels, 100 ptrdiff_t line_size, int h) 101{ 102 double ftmp[8]; 103 DECLARE_VAR_ALL64; 104 105 __asm__ volatile ( 106 "1: \n\t" 107 MMI_ULDC1(%[ftmp0], %[pixels], 0x00) 108 MMI_ULDC1(%[ftmp2], %[pixels], 0x08) 109 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t" 110 MMI_ULDC1(%[ftmp1], %[pixels], 0x00) 111 MMI_ULDC1(%[ftmp3], %[pixels], 0x08) 112 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t" 113 MMI_ULDC1(%[ftmp4], %[pixels], 0x00) 114 MMI_ULDC1(%[ftmp6], %[pixels], 0x08) 115 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t" 116 MMI_ULDC1(%[ftmp5], %[pixels], 0x00) 117 MMI_ULDC1(%[ftmp7], %[pixels], 0x08) 118 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t" 119 120 PTR_ADDI "%[h], %[h], -0x04 \n\t" 121 122 MMI_SDC1(%[ftmp0], %[block], 0x00) 123 MMI_SDC1(%[ftmp2], %[block], 0x08) 124 PTR_ADDU "%[block], %[block], %[line_size] \n\t" 125 MMI_SDC1(%[ftmp1], %[block], 0x00) 126 MMI_SDC1(%[ftmp3], %[block], 0x08) 127 PTR_ADDU "%[block], %[block], %[line_size] \n\t" 128 MMI_SDC1(%[ftmp4], %[block], 0x00) 129 MMI_SDC1(%[ftmp6], %[block], 0x08) 130 PTR_ADDU "%[block], %[block], %[line_size] \n\t" 131 MMI_SDC1(%[ftmp5], %[block], 0x00) 132 MMI_SDC1(%[ftmp7], %[block], 0x08) 133 PTR_ADDU "%[block], %[block], %[line_size] \n\t" 134 135 "bnez %[h], 1b \n\t" 136 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 137 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 138 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 139 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 140 RESTRICT_ASM_ALL64 141 [block]"+&r"(block), [pixels]"+&r"(pixels), 142 [h]"+&r"(h) 143 : [line_size]"r"((mips_reg)line_size) 144 : "memory" 145 ); 146} 147 148void ff_avg_pixels4_8_mmi(uint8_t *block, const uint8_t *pixels, 149 ptrdiff_t line_size, int h) 150{ 151 double ftmp[4]; 152 mips_reg addr[2]; 153 DECLARE_VAR_LOW32; 154 155 __asm__ volatile ( 156 "1: \n\t" 157 PTR_ADDU "%[addr0], %[pixels], %[line_size] \n\t" 158 MMI_ULWC1(%[ftmp0], %[pixels], 0x00) 159 MMI_ULWC1(%[ftmp1], %[addr0], 0x00) 160 PTR_ADDU "%[addr1], %[block], %[line_size] \n\t" 161 MMI_ULWC1(%[ftmp2], %[block], 0x00) 162 MMI_ULWC1(%[ftmp3], %[addr1], 0x00) 163 164 PTR_ADDI "%[h], %[h], -0x02 \n\t" 165 166 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 167 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 168 MMI_SWC1(%[ftmp0], %[block], 0x00) 169 MMI_SWC1(%[ftmp1], %[addr1], 0x00) 170 PTR_ADDU "%[pixels], %[addr0], %[line_size] \n\t" 171 PTR_ADDU "%[block], %[addr1], %[line_size] \n\t" 172 173 "bnez %[h], 1b \n\t" 174 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 175 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 176 RESTRICT_ASM_LOW32 177 [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]), 178 [block]"+&r"(block), [pixels]"+&r"(pixels), 179 [h]"+&r"(h) 180 : [line_size]"r"((mips_reg)line_size) 181 : "memory" 182 ); 183} 184 185void ff_avg_pixels8_8_mmi(uint8_t *block, const uint8_t *pixels, 186 ptrdiff_t line_size, int h) 187{ 188 double ftmp[4]; 189 mips_reg addr[3]; 190 DECLARE_VAR_ALL64; 191 DECLARE_VAR_ADDRT; 192 193 __asm__ volatile ( 194 PTR_ADDU "%[addr2], %[line_size], %[line_size] \n\t" 195 "1: \n\t" 196 MMI_ULDC1(%[ftmp0], %[pixels], 0x00) 197 PTR_ADDU "%[addr0], %[pixels], %[line_size] \n\t" 198 MMI_ULDC1(%[ftmp1], %[addr0], 0x00) 199 PTR_ADDU "%[addr1], %[block], %[line_size] \n\t" 200 MMI_ULDC1(%[ftmp2], %[block], 0x00) 201 MMI_ULDC1(%[ftmp3], %[addr1], 0x00) 202 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 203 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 204 MMI_SDC1(%[ftmp0], %[block], 0x00) 205 MMI_SDXC1(%[ftmp1], %[block], %[line_size], 0x00) 206 PTR_ADDU "%[pixels], %[pixels], %[addr2] \n\t" 207 PTR_ADDU "%[block], %[block], %[addr2] \n\t" 208 209 MMI_ULDC1(%[ftmp0], %[pixels], 0x00) 210 PTR_ADDU "%[addr0], %[pixels], %[line_size] \n\t" 211 MMI_ULDC1(%[ftmp1], %[addr0], 0x00) 212 PTR_ADDU "%[addr1], %[block], %[line_size] \n\t" 213 MMI_ULDC1(%[ftmp2], %[block], 0x00) 214 MMI_ULDC1(%[ftmp3], %[addr1], 0x00) 215 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 216 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 217 MMI_SDC1(%[ftmp0], %[block], 0x00) 218 MMI_SDXC1(%[ftmp1], %[block], %[line_size], 0x00) 219 PTR_ADDU "%[pixels], %[pixels], %[addr2] \n\t" 220 PTR_ADDU "%[block], %[block], %[addr2] \n\t" 221 222 PTR_ADDI "%[h], %[h], -0x04 \n\t" 223 "bnez %[h], 1b \n\t" 224 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 225 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 226 RESTRICT_ASM_ALL64 227 RESTRICT_ASM_ADDRT 228 [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]), 229 [addr2]"=&r"(addr[2]), 230 [block]"+&r"(block), [pixels]"+&r"(pixels), 231 [h]"+&r"(h) 232 : [line_size]"r"((mips_reg)line_size) 233 : "memory" 234 ); 235} 236 237void ff_avg_pixels16_8_mmi(uint8_t *block, const uint8_t *pixels, 238 ptrdiff_t line_size, int h) 239{ 240 double ftmp[8]; 241 mips_reg addr[1]; 242 DECLARE_VAR_ALL64; 243 244 __asm__ volatile ( 245 "1: \n\t" 246 PTR_ADDI "%[h], %[h], -0x04 \n\t" 247 MMI_ULDC1(%[ftmp0], %[pixels], 0x00) 248 MMI_ULDC1(%[ftmp4], %[pixels], 0x08) 249 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t" 250 MMI_ULDC1(%[ftmp1], %[pixels], 0x00) 251 MMI_ULDC1(%[ftmp5], %[pixels], 0x08) 252 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t" 253 MMI_ULDC1(%[ftmp2], %[block], 0x00) 254 MMI_ULDC1(%[ftmp6], %[block], 0x08) 255 PTR_ADDU "%[addr0], %[block], %[line_size] \n\t" 256 MMI_ULDC1(%[ftmp3], %[addr0], 0x00) 257 MMI_ULDC1(%[ftmp7], %[addr0], 0x08) 258 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 259 "pavgb %[ftmp4], %[ftmp4], %[ftmp6] \n\t" 260 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 261 "pavgb %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 262 MMI_SDC1(%[ftmp0], %[block], 0x00) 263 MMI_SDC1(%[ftmp4], %[block], 0x08) 264 MMI_SDC1(%[ftmp1], %[addr0], 0x00) 265 MMI_SDC1(%[ftmp5], %[addr0], 0x08) 266 PTR_ADDU "%[block], %[addr0], %[line_size] \n\t" 267 268 MMI_ULDC1(%[ftmp0], %[pixels], 0x00) 269 MMI_ULDC1(%[ftmp4], %[pixels], 0x08) 270 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t" 271 MMI_ULDC1(%[ftmp1], %[pixels], 0x00) 272 MMI_ULDC1(%[ftmp5], %[pixels], 0x08) 273 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t" 274 MMI_ULDC1(%[ftmp2], %[block], 0x00) 275 MMI_ULDC1(%[ftmp6], %[block], 0x08) 276 PTR_ADDU "%[addr0], %[block], %[line_size] \n\t" 277 MMI_ULDC1(%[ftmp3], %[addr0], 0x00) 278 MMI_ULDC1(%[ftmp7], %[addr0], 0x08) 279 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 280 "pavgb %[ftmp4], %[ftmp4], %[ftmp6] \n\t" 281 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 282 "pavgb %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 283 MMI_SDC1(%[ftmp0], %[block], 0x00) 284 MMI_SDC1(%[ftmp4], %[block], 0x08) 285 MMI_SDC1(%[ftmp1], %[addr0], 0x00) 286 MMI_SDC1(%[ftmp5], %[addr0], 0x08) 287 PTR_ADDU "%[block], %[addr0], %[line_size] \n\t" 288 289 "bnez %[h], 1b \n\t" 290 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 291 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 292 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 293 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 294 RESTRICT_ASM_ALL64 295 [addr0]"=&r"(addr[0]), 296 [block]"+&r"(block), [pixels]"+&r"(pixels), 297 [h]"+&r"(h) 298 : [line_size]"r"((mips_reg)line_size) 299 : "memory" 300 ); 301} 302 303inline void ff_put_pixels4_l2_8_mmi(uint8_t *dst, const uint8_t *src1, 304 const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, 305 int h) 306{ 307 double ftmp[4]; 308 mips_reg addr[5]; 309 DECLARE_VAR_LOW32; 310 DECLARE_VAR_ADDRT; 311 312 __asm__ volatile ( 313 "1: \n\t" 314 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t" 315 MMI_ULWC1(%[ftmp0], %[src1], 0x00) 316 MMI_ULWC1(%[ftmp1], %[addr0], 0x00) 317 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t" 318 MMI_ULWC1(%[ftmp2], %[src2], 0x00) 319 MMI_ULWC1(%[ftmp3], %[addr1], 0x00) 320 PTR_ADDU "%[src1], %[addr0], %[src_stride1] \n\t" 321 PTR_ADDU "%[src2], %[addr1], %[src_stride2] \n\t" 322 323 PTR_ADDI "%[h], %[h], -0x02 \n\t" 324 325 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 326 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 327 MMI_SWC1(%[ftmp0], %[dst], 0x00) 328 PTR_ADDU "%[dst], %[dst], %[dst_stride] \n\t" 329 MMI_SWC1(%[ftmp1], %[dst], 0x00) 330 PTR_ADDU "%[dst], %[dst], %[dst_stride] \n\t" 331 332 "bnez %[h], 1b \n\t" 333 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 334 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 335 RESTRICT_ASM_LOW32 336 RESTRICT_ASM_ADDRT 337 [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]), 338 [dst]"+&r"(dst), [src1]"+&r"(src1), 339 [src2]"+&r"(src2), [h]"+&r"(h) 340 : [dst_stride]"r"((mips_reg)dst_stride), 341 [src_stride1]"r"((mips_reg)src_stride1), 342 [src_stride2]"r"((mips_reg)src_stride2) 343 : "memory" 344 ); 345} 346 347inline void ff_put_pixels8_l2_8_mmi(uint8_t *dst, const uint8_t *src1, 348 const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, 349 int h) 350{ 351 double ftmp[4]; 352 mips_reg addr[5]; 353 DECLARE_VAR_ALL64; 354 DECLARE_VAR_ADDRT; 355 356 __asm__ volatile ( 357 PTR_ADDU "%[addr2], %[src_stride1], %[src_stride1] \n\t" 358 PTR_ADDU "%[addr3], %[src_stride2], %[src_stride2] \n\t" 359 PTR_ADDU "%[addr4], %[dst_stride], %[dst_stride] \n\t" 360 361 "1: \n\t" 362 MMI_ULDC1(%[ftmp0], %[src1], 0x00) 363 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t" 364 MMI_ULDC1(%[ftmp1], %[addr0], 0x00) 365 MMI_ULDC1(%[ftmp2], %[src2], 0x00) 366 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t" 367 MMI_ULDC1(%[ftmp3], %[addr1], 0x00) 368 PTR_ADDU "%[src1], %[src1], %[addr2] \n\t" 369 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 370 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 371 MMI_SDC1(%[ftmp0], %[dst], 0x00) 372 MMI_SDXC1(%[ftmp1], %[dst], %[dst_stride], 0x00) 373 PTR_ADDU "%[src2], %[src2], %[addr3] \n\t" 374 PTR_ADDU "%[dst], %[dst], %[addr4] \n\t" 375 376 MMI_ULDC1(%[ftmp0], %[src1], 0x00) 377 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t" 378 MMI_ULDC1(%[ftmp1], %[addr0], 0x00) 379 MMI_ULDC1(%[ftmp2], %[src2], 0x00) 380 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t" 381 MMI_ULDC1(%[ftmp3], %[addr1], 0x00) 382 PTR_ADDU "%[src1], %[src1], %[addr2] \n\t" 383 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 384 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 385 MMI_SDC1(%[ftmp0], %[dst], 0x00) 386 MMI_SDXC1(%[ftmp1], %[dst], %[dst_stride], 0x00) 387 PTR_ADDU "%[src2], %[src2], %[addr3] \n\t" 388 PTR_ADDU "%[dst], %[dst], %[addr4] \n\t" 389 390 PTR_ADDI "%[h], %[h], -0x04 \n\t" 391 "bnez %[h], 1b \n\t" 392 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 393 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 394 RESTRICT_ASM_ALL64 395 RESTRICT_ASM_ADDRT 396 [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]), 397 [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]), 398 [addr4]"=&r"(addr[4]), 399 [dst]"+&r"(dst), [src1]"+&r"(src1), 400 [src2]"+&r"(src2), [h]"+&r"(h) 401 : [dst_stride]"r"((mips_reg)dst_stride), 402 [src_stride1]"r"((mips_reg)src_stride1), 403 [src_stride2]"r"((mips_reg)src_stride2) 404 : "memory" 405 ); 406} 407 408inline void ff_put_pixels16_l2_8_mmi(uint8_t *dst, const uint8_t *src1, 409 const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, 410 int h) 411{ 412 double ftmp[8]; 413 mips_reg addr[5]; 414 DECLARE_VAR_ALL64; 415 DECLARE_VAR_ADDRT; 416 417 __asm__ volatile ( 418 PTR_ADDU "%[addr2], %[src_stride1], %[src_stride1] \n\t" 419 PTR_ADDU "%[addr3], %[src_stride2], %[src_stride2] \n\t" 420 PTR_ADDU "%[addr4], %[dst_stride], %[dst_stride] \n\t" 421 422 "1: \n\t" 423 MMI_ULDC1(%[ftmp0], %[src1], 0x00) 424 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t" 425 MMI_ULDC1(%[ftmp4], %[src1], 0x08) 426 MMI_ULDC1(%[ftmp1], %[addr0], 0x00) 427 MMI_ULDC1(%[ftmp5], %[addr0], 0x08) 428 MMI_ULDC1(%[ftmp2], %[src2], 0x00) 429 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t" 430 MMI_ULDC1(%[ftmp6], %[src2], 0x08) 431 MMI_ULDC1(%[ftmp3], %[addr1], 0x00) 432 PTR_ADDU "%[src1], %[src1], %[addr2] \n\t" 433 MMI_ULDC1(%[ftmp7], %[addr1], 0x08) 434 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 435 "pavgb %[ftmp4], %[ftmp4], %[ftmp6] \n\t" 436 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 437 "pavgb %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 438 MMI_SDC1(%[ftmp0], %[dst], 0x00) 439 MMI_SDXC1(%[ftmp1], %[dst], %[dst_stride], 0x00) 440 MMI_SDC1(%[ftmp4], %[dst], 0x08) 441 MMI_SDXC1(%[ftmp5], %[dst], %[dst_stride], 0x08) 442 PTR_ADDU "%[src2], %[src2], %[addr3] \n\t" 443 PTR_ADDU "%[dst], %[dst], %[addr4] \n\t" 444 445 MMI_ULDC1(%[ftmp0], %[src1], 0x00) 446 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t" 447 MMI_ULDC1(%[ftmp4], %[src1], 0x08) 448 MMI_ULDC1(%[ftmp1], %[addr0], 0x00) 449 MMI_ULDC1(%[ftmp5], %[addr0], 0x08) 450 MMI_ULDC1(%[ftmp2], %[src2], 0x00) 451 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t" 452 MMI_ULDC1(%[ftmp6], %[src2], 0x08) 453 MMI_ULDC1(%[ftmp3], %[addr1], 0x00) 454 PTR_ADDU "%[src1], %[src1], %[addr2] \n\t" 455 MMI_ULDC1(%[ftmp7], %[addr1], 0x08) 456 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 457 "pavgb %[ftmp4], %[ftmp4], %[ftmp6] \n\t" 458 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 459 "pavgb %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 460 MMI_SDC1(%[ftmp0], %[dst], 0x00) 461 MMI_SDXC1(%[ftmp1], %[dst], %[dst_stride], 0x00) 462 MMI_SDC1(%[ftmp4], %[dst], 0x08) 463 MMI_SDXC1(%[ftmp5], %[dst], %[dst_stride], 0x08) 464 PTR_ADDU "%[src2], %[src2], %[addr3] \n\t" 465 PTR_ADDU "%[dst], %[dst], %[addr4] \n\t" 466 467 PTR_ADDI "%[h], %[h], -0x04 \n\t" 468 "bnez %[h], 1b \n\t" 469 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 470 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 471 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 472 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 473 RESTRICT_ASM_ALL64 474 RESTRICT_ASM_ADDRT 475 [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]), 476 [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]), 477 [addr4]"=&r"(addr[4]), 478 [dst]"+&r"(dst), [src1]"+&r"(src1), 479 [src2]"+&r"(src2), [h]"+&r"(h) 480 : [dst_stride]"r"((mips_reg)dst_stride), 481 [src_stride1]"r"((mips_reg)src_stride1), 482 [src_stride2]"r"((mips_reg)src_stride2) 483 : "memory" 484 ); 485} 486 487inline void ff_avg_pixels4_l2_8_mmi(uint8_t *dst, const uint8_t *src1, 488 const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, 489 int h) 490{ 491 double ftmp[6]; 492 mips_reg addr[6]; 493 DECLARE_VAR_LOW32; 494 495 __asm__ volatile ( 496 "1: \n\t" 497 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t" 498 MMI_ULWC1(%[ftmp0], %[src1], 0x00) 499 MMI_ULWC1(%[ftmp1], %[addr0], 0x00) 500 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t" 501 MMI_ULWC1(%[ftmp2], %[src2], 0x00) 502 MMI_ULWC1(%[ftmp3], %[addr1], 0x00) 503 PTR_ADDU "%[src1], %[addr0], %[src_stride1] \n\t" 504 PTR_ADDU "%[src2], %[addr1], %[src_stride2] \n\t" 505 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 506 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 507 PTR_ADDU "%[addr2], %[dst], %[dst_stride] \n\t" 508 MMI_ULWC1(%[ftmp4], %[dst], 0x00) 509 MMI_ULWC1(%[ftmp5], %[addr2], 0x00) 510 PTR_ADDI "%[h], %[h], -0x02 \n\t" 511 "pavgb %[ftmp0], %[ftmp0], %[ftmp4] \n\t" 512 "pavgb %[ftmp1], %[ftmp1], %[ftmp5] \n\t" 513 MMI_SWC1(%[ftmp0], %[dst], 0x00) 514 MMI_SWC1(%[ftmp1], %[addr2], 0x00) 515 PTR_ADDU "%[dst], %[addr2], %[dst_stride] \n\t" 516 517 "bnez %[h], 1b \n\t" 518 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 519 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 520 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 521 RESTRICT_ASM_LOW32 522 [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]), 523 [addr2]"=&r"(addr[2]), 524 [dst]"+&r"(dst), [src1]"+&r"(src1), 525 [src2]"+&r"(src2), [h]"+&r"(h) 526 : [dst_stride]"r"((mips_reg)dst_stride), 527 [src_stride1]"r"((mips_reg)src_stride1), 528 [src_stride2]"r"((mips_reg)src_stride2) 529 : "memory" 530 ); 531} 532 533inline void ff_avg_pixels8_l2_8_mmi(uint8_t *dst, const uint8_t *src1, 534 const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, 535 int h) 536{ 537 double ftmp[6]; 538 mips_reg addr[6]; 539 DECLARE_VAR_ALL64; 540 DECLARE_VAR_ADDRT; 541 542 __asm__ volatile ( 543 PTR_ADDU "%[addr2], %[src_stride1], %[src_stride1] \n\t" 544 PTR_ADDU "%[addr3], %[src_stride2], %[src_stride2] \n\t" 545 PTR_ADDU "%[addr4], %[dst_stride], %[dst_stride] \n\t" 546 547 "1: \n\t" 548 MMI_ULDC1(%[ftmp0], %[src1], 0x00) 549 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t" 550 MMI_ULDC1(%[ftmp1], %[addr0], 0x00) 551 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t" 552 MMI_ULDC1(%[ftmp2], %[src2], 0x00) 553 MMI_ULDC1(%[ftmp3], %[addr1], 0x00) 554 PTR_ADDU "%[src1], %[src1], %[addr2] \n\t" 555 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 556 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 557 PTR_ADDU "%[addr5], %[dst], %[dst_stride] \n\t" 558 MMI_ULDC1(%[ftmp4], %[dst], 0x00) 559 MMI_ULDC1(%[ftmp5], %[addr5], 0x00) 560 "pavgb %[ftmp0], %[ftmp0], %[ftmp4] \n\t" 561 "pavgb %[ftmp1], %[ftmp1], %[ftmp5] \n\t" 562 MMI_SDC1(%[ftmp0], %[dst], 0x00) 563 MMI_SDXC1(%[ftmp1], %[dst], %[dst_stride], 0x00) 564 PTR_ADDU "%[src2], %[src2], %[addr3] \n\t" 565 PTR_ADDU "%[dst], %[dst], %[addr4] \n\t" 566 567 MMI_ULDC1(%[ftmp0], %[src1], 0x00) 568 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t" 569 MMI_ULDC1(%[ftmp1], %[addr0], 0x00) 570 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t" 571 MMI_ULDC1(%[ftmp2], %[src2], 0x00) 572 MMI_ULDC1(%[ftmp3], %[addr1], 0x00) 573 PTR_ADDU "%[src1], %[src1], %[addr2] \n\t" 574 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 575 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 576 PTR_ADDU "%[addr5], %[dst], %[dst_stride] \n\t" 577 MMI_ULDC1(%[ftmp4], %[dst], 0x00) 578 MMI_ULDC1(%[ftmp5], %[addr5], 0x00) 579 "pavgb %[ftmp0], %[ftmp0], %[ftmp4] \n\t" 580 "pavgb %[ftmp1], %[ftmp1], %[ftmp5] \n\t" 581 MMI_SDC1(%[ftmp0], %[dst], 0x00) 582 MMI_SDXC1(%[ftmp1], %[dst], %[dst_stride], 0x00) 583 PTR_ADDU "%[src2], %[src2], %[addr3] \n\t" 584 PTR_ADDU "%[dst], %[dst], %[addr4] \n\t" 585 586 PTR_ADDI "%[h], %[h], -0x04 \n\t" 587 "bnez %[h], 1b \n\t" 588 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 589 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 590 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 591 RESTRICT_ASM_ALL64 592 RESTRICT_ASM_ADDRT 593 [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]), 594 [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]), 595 [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]), 596 [dst]"+&r"(dst), [src1]"+&r"(src1), 597 [src2]"+&r"(src2), [h]"+&r"(h) 598 : [dst_stride]"r"((mips_reg)dst_stride), 599 [src_stride1]"r"((mips_reg)src_stride1), 600 [src_stride2]"r"((mips_reg)src_stride2) 601 : "memory" 602 ); 603} 604 605inline void ff_avg_pixels16_l2_8_mmi(uint8_t *dst, const uint8_t *src1, 606 const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, 607 int h) 608{ 609 ff_avg_pixels8_l2_8_mmi(dst, src1, src2, dst_stride, src_stride1, 610 src_stride2, h); 611 ff_avg_pixels8_l2_8_mmi(dst + 8, src1 + 8, src2 + 8, dst_stride, 612 src_stride1, src_stride2, h); 613} 614 615void ff_put_pixels4_x2_8_mmi(uint8_t *block, const uint8_t *pixels, 616 ptrdiff_t line_size, int h) 617{ 618 ff_put_pixels4_l2_8_mmi(block, pixels, pixels + 1, line_size, line_size, 619 line_size, h); 620} 621 622void ff_put_pixels8_x2_8_mmi(uint8_t *block, const uint8_t *pixels, 623 ptrdiff_t line_size, int h) 624{ 625 ff_put_pixels8_l2_8_mmi(block, pixels, pixels + 1, line_size, line_size, 626 line_size, h); 627} 628 629void ff_put_pixels16_x2_8_mmi(uint8_t *block, const uint8_t *pixels, 630 ptrdiff_t line_size, int h) 631{ 632 ff_put_pixels16_l2_8_mmi(block, pixels, pixels + 1, line_size, line_size, 633 line_size, h); 634} 635 636void ff_avg_pixels4_x2_8_mmi(uint8_t *block, const uint8_t *pixels, 637 ptrdiff_t line_size, int h) 638{ 639 ff_avg_pixels4_l2_8_mmi(block, pixels, pixels + 1, line_size, line_size, 640 line_size, h); 641} 642 643void ff_avg_pixels8_x2_8_mmi(uint8_t *block, const uint8_t *pixels, 644 ptrdiff_t line_size, int h) 645{ 646 ff_avg_pixels8_l2_8_mmi(block, pixels, pixels + 1, line_size, line_size, 647 line_size, h); 648} 649 650void ff_avg_pixels16_x2_8_mmi(uint8_t *block, const uint8_t *pixels, 651 ptrdiff_t line_size, int h) 652{ 653 ff_avg_pixels8_x2_8_mmi(block, pixels, line_size, h); 654 ff_avg_pixels8_x2_8_mmi(block + 8, pixels + 8, line_size, h); 655} 656 657inline void ff_put_no_rnd_pixels8_l2_8_mmi(uint8_t *dst, const uint8_t *src1, 658 const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, 659 int h) 660{ 661 double ftmp[5]; 662 mips_reg addr[5]; 663 DECLARE_VAR_ALL64; 664 DECLARE_VAR_ADDRT; 665 666 __asm__ volatile ( 667 "pcmpeqb %[ftmp4], %[ftmp4], %[ftmp4] \n\t" 668 PTR_ADDU "%[addr2], %[src_stride1], %[src_stride1] \n\t" 669 PTR_ADDU "%[addr3], %[src_stride2], %[src_stride2] \n\t" 670 PTR_ADDU "%[addr4], %[dst_stride], %[dst_stride] \n\t" 671 672 "1: \n\t" 673 MMI_ULDC1(%[ftmp0], %[src1], 0x00) 674 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t" 675 MMI_ULDC1(%[ftmp1], %[addr0], 0x00) 676 MMI_ULDC1(%[ftmp2], %[src2], 0x00) 677 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t" 678 MMI_ULDC1(%[ftmp3], %[addr1], 0x00) 679 PTR_ADDU "%[src1], %[src1], %[addr2] \n\t" 680 "pxor %[ftmp0], %[ftmp0], %[ftmp4] \n\t" 681 "pxor %[ftmp1], %[ftmp1], %[ftmp4] \n\t" 682 "pxor %[ftmp2], %[ftmp2], %[ftmp4] \n\t" 683 "pxor %[ftmp3], %[ftmp3], %[ftmp4] \n\t" 684 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 685 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 686 "pxor %[ftmp0], %[ftmp0], %[ftmp4] \n\t" 687 "pxor %[ftmp1], %[ftmp1], %[ftmp4] \n\t" 688 MMI_SDC1(%[ftmp0], %[dst], 0x00) 689 MMI_SDXC1(%[ftmp1], %[dst], %[dst_stride], 0x00) 690 PTR_ADDU "%[src2], %[src2], %[addr3] \n\t" 691 PTR_ADDU "%[dst], %[dst], %[addr4] \n\t" 692 693 MMI_ULDC1(%[ftmp0], %[src1], 0x00) 694 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t" 695 MMI_ULDC1(%[ftmp1], %[addr0], 0x00) 696 MMI_ULDC1(%[ftmp2], %[src2], 0x00) 697 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t" 698 MMI_ULDC1(%[ftmp3], %[addr1], 0x00) 699 PTR_ADDU "%[src1], %[src1], %[addr2] \n\t" 700 "pxor %[ftmp0], %[ftmp0], %[ftmp4] \n\t" 701 "pxor %[ftmp1], %[ftmp1], %[ftmp4] \n\t" 702 "pxor %[ftmp2], %[ftmp2], %[ftmp4] \n\t" 703 "pxor %[ftmp3], %[ftmp3], %[ftmp4] \n\t" 704 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 705 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 706 "pxor %[ftmp0], %[ftmp0], %[ftmp4] \n\t" 707 "pxor %[ftmp1], %[ftmp1], %[ftmp4] \n\t" 708 MMI_SDC1(%[ftmp0], %[dst], 0x00) 709 MMI_SDXC1(%[ftmp1], %[dst], %[dst_stride], 0x00) 710 PTR_ADDU "%[src2], %[src2], %[addr3] \n\t" 711 PTR_ADDU "%[dst], %[dst], %[addr4] \n\t" 712 713 PTR_ADDI "%[h], %[h], -0x04 \n\t" 714 "bnez %[h], 1b \n\t" 715 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 716 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 717 [ftmp4]"=&f"(ftmp[4]), 718 RESTRICT_ASM_ALL64 719 RESTRICT_ASM_ADDRT 720 [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]), 721 [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]), 722 [addr4]"=&r"(addr[4]), 723 [dst]"+&r"(dst), [src1]"+&r"(src1), 724 [src2]"+&r"(src2), [h]"+&r"(h) 725 : [dst_stride]"r"((mips_reg)dst_stride), 726 [src_stride1]"r"((mips_reg)src_stride1), 727 [src_stride2]"r"((mips_reg)src_stride2) 728 : "memory" 729 ); 730} 731 732void ff_put_no_rnd_pixels8_x2_8_mmi(uint8_t *block, const uint8_t *pixels, 733 ptrdiff_t line_size, int h) 734{ 735 ff_put_no_rnd_pixels8_l2_8_mmi(block, pixels, pixels + 1, line_size, 736 line_size, line_size, h); 737} 738 739void ff_put_no_rnd_pixels16_x2_8_mmi(uint8_t *block, const uint8_t *pixels, 740 ptrdiff_t line_size, int h) 741{ 742 ff_put_no_rnd_pixels8_x2_8_mmi(block, pixels, line_size, h); 743 ff_put_no_rnd_pixels8_x2_8_mmi(block + 8, pixels + 8, line_size, h); 744} 745 746void ff_put_pixels4_y2_8_mmi(uint8_t *block, const uint8_t *pixels, 747 ptrdiff_t line_size, int h) 748{ 749 ff_put_pixels4_l2_8_mmi(block, pixels, pixels + line_size, line_size, 750 line_size, line_size, h); 751} 752 753void ff_put_pixels8_y2_8_mmi(uint8_t *block, const uint8_t *pixels, 754 ptrdiff_t line_size, int h) 755{ 756 ff_put_pixels8_l2_8_mmi(block, pixels, pixels + line_size, line_size, 757 line_size, line_size, h); 758} 759 760void ff_put_pixels16_y2_8_mmi(uint8_t *block, const uint8_t *pixels, 761 ptrdiff_t line_size, int h) 762{ 763 ff_put_pixels16_l2_8_mmi(block, pixels, pixels + line_size, line_size, 764 line_size, line_size, h); 765} 766 767void ff_avg_pixels4_y2_8_mmi(uint8_t *block, const uint8_t *pixels, 768 ptrdiff_t line_size, int h) 769{ 770 ff_avg_pixels4_l2_8_mmi(block, pixels, pixels + line_size, line_size, 771 line_size, line_size, h); 772} 773 774void ff_avg_pixels8_y2_8_mmi(uint8_t *block, const uint8_t *pixels, 775 ptrdiff_t line_size, int h) 776{ 777 ff_avg_pixels8_l2_8_mmi(block, pixels, pixels + line_size, line_size, 778 line_size, line_size, h); 779} 780 781void ff_avg_pixels16_y2_8_mmi(uint8_t *block, const uint8_t *pixels, 782 ptrdiff_t line_size, int h) 783{ 784 ff_avg_pixels8_y2_8_mmi(block, pixels, line_size, h); 785 ff_avg_pixels8_y2_8_mmi(block + 8, pixels + 8, line_size, h); 786} 787 788void ff_put_no_rnd_pixels8_y2_8_mmi(uint8_t *block, const uint8_t *pixels, 789 ptrdiff_t line_size, int h) 790{ 791 ff_put_no_rnd_pixels8_l2_8_mmi(block, pixels, pixels + line_size, 792 line_size, line_size, line_size, h); 793} 794 795void ff_put_no_rnd_pixels16_y2_8_mmi(uint8_t *block, const uint8_t *pixels, 796 ptrdiff_t line_size, int h) 797{ 798 ff_put_no_rnd_pixels8_y2_8_mmi(block, pixels, line_size, h); 799 ff_put_no_rnd_pixels8_y2_8_mmi(block + 8 , pixels + 8, line_size, h); 800} 801 802void ff_put_pixels4_xy2_8_mmi(uint8_t *block, const uint8_t *pixels, 803 ptrdiff_t line_size, int h) 804{ 805 /* FIXME HIGH BIT DEPTH */ 806 int i; 807 const uint32_t a = AV_RN32(pixels); 808 const uint32_t b = AV_RN32(pixels + 1); 809 uint32_t l0 = (a & 0x03030303UL) + 810 (b & 0x03030303UL) + 811 0x02020202UL; 812 uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) + 813 ((b & 0xFCFCFCFCUL) >> 2); 814 uint32_t l1, h1; 815 816 pixels += line_size; 817 for (i = 0; i < h; i += 2) { 818 uint32_t a = AV_RN32(pixels); 819 uint32_t b = AV_RN32(pixels + 1); 820 l1 = (a & 0x03030303UL) + 821 (b & 0x03030303UL); 822 h1 = ((a & 0xFCFCFCFCUL) >> 2) + 823 ((b & 0xFCFCFCFCUL) >> 2); 824 *((uint32_t *) block) = h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL); 825 pixels += line_size; 826 block += line_size; 827 a = AV_RN32(pixels); 828 b = AV_RN32(pixels + 1); 829 l0 = (a & 0x03030303UL) + 830 (b & 0x03030303UL) + 831 0x02020202UL; 832 h0 = ((a & 0xFCFCFCFCUL) >> 2) + 833 ((b & 0xFCFCFCFCUL) >> 2); 834 *((uint32_t *) block) = h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL); 835 pixels += line_size; 836 block += line_size; 837 } 838} 839 840void ff_put_pixels8_xy2_8_mmi(uint8_t *block, const uint8_t *pixels, 841 ptrdiff_t line_size, int h) 842{ 843#if 1 844 double ftmp[10]; 845 mips_reg addr[2]; 846 DECLARE_VAR_ALL64; 847 DECLARE_VAR_ADDRT; 848 849 __asm__ volatile ( 850 "pxor %[ftmp7], %[ftmp7], %[ftmp7] \n\t" 851 "dli %[addr0], 0x0f \n\t" 852 "pcmpeqw %[ftmp6], %[ftmp6], %[ftmp6] \n\t" 853 "dmtc1 %[addr0], %[ftmp8] \n\t" 854 "dli %[addr0], 0x01 \n\t" 855 "psrlh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 856 "dmtc1 %[addr0], %[ftmp8] \n\t" 857 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 858 859 "dli %[addr0], 0x02 \n\t" 860 "dmtc1 %[addr0], %[ftmp9] \n\t" 861 MMI_ULDC1(%[ftmp0], %[pixels], 0x00) 862 MMI_ULDC1(%[ftmp4], %[pixels], 0x01) 863 "mov.d %[ftmp1], %[ftmp0] \n\t" 864 "mov.d %[ftmp5], %[ftmp4] \n\t" 865 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t" 866 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t" 867 "punpckhbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t" 868 "punpckhbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 869 "paddush %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 870 "paddush %[ftmp5], %[ftmp5], %[ftmp1] \n\t" 871 "xor %[addr0], %[addr0], %[addr0] \n\t" 872 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t" 873 ".p2align 3 \n\t" 874 875 "1: \n\t" 876 PTR_ADDU "%[addr1], %[pixels], %[addr0] \n\t" 877 MMI_ULDC1(%[ftmp0], %[addr1], 0x00) 878 MMI_ULDC1(%[ftmp2], %[addr1], 0x01) 879 "mov.d %[ftmp1], %[ftmp0] \n\t" 880 "mov.d %[ftmp3], %[ftmp2] \n\t" 881 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t" 882 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t" 883 "punpckhbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t" 884 "punpckhbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t" 885 "paddush %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 886 "paddush %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 887 "paddush %[ftmp4], %[ftmp4], %[ftmp6] \n\t" 888 "paddush %[ftmp5], %[ftmp5], %[ftmp6] \n\t" 889 "paddush %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 890 "paddush %[ftmp5], %[ftmp5], %[ftmp1] \n\t" 891 "psrlh %[ftmp4], %[ftmp4], %[ftmp9] \n\t" 892 "psrlh %[ftmp5], %[ftmp5], %[ftmp9] \n\t" 893 "packushb %[ftmp4], %[ftmp4], %[ftmp5] \n\t" 894 MMI_SDXC1(%[ftmp4], %[block], %[addr0], 0x00) 895 PTR_ADDU "%[addr0], %[addr0], %[line_size] \n\t" 896 PTR_ADDU "%[addr1], %[pixels], %[addr0] \n\t" 897 MMI_ULDC1(%[ftmp2], %[addr1], 0x00) 898 MMI_ULDC1(%[ftmp4], %[addr1], 0x01) 899 "mov.d %[ftmp3], %[ftmp2] \n\t" 900 "mov.d %[ftmp5], %[ftmp4] \n\t" 901 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t" 902 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t" 903 "punpckhbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t" 904 "punpckhbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 905 "paddush %[ftmp4], %[ftmp4], %[ftmp2] \n\t" 906 "paddush %[ftmp5], %[ftmp5], %[ftmp3] \n\t" 907 "paddush %[ftmp0], %[ftmp0], %[ftmp6] \n\t" 908 "paddush %[ftmp1], %[ftmp1], %[ftmp6] \n\t" 909 "paddush %[ftmp0], %[ftmp0], %[ftmp4] \n\t" 910 "paddush %[ftmp1], %[ftmp1], %[ftmp5] \n\t" 911 "psrlh %[ftmp0], %[ftmp0], %[ftmp9] \n\t" 912 "psrlh %[ftmp1], %[ftmp1], %[ftmp9] \n\t" 913 "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t" 914 MMI_SDXC1(%[ftmp0], %[block], %[addr0], 0x00) 915 PTR_ADDU "%[addr0], %[addr0], %[line_size] \n\t" 916 PTR_ADDU "%[h], %[h], -0x02 \n\t" 917 "bnez %[h], 1b \n\t" 918 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 919 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 920 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 921 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 922 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), 923 RESTRICT_ASM_ALL64 924 RESTRICT_ASM_ADDRT 925 [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]), 926 [h]"+&r"(h), [pixels]"+&r"(pixels) 927 : [block]"r"(block), [line_size]"r"((mips_reg)line_size) 928 : "memory" 929 ); 930#else 931 /* FIXME HIGH BIT DEPTH */ 932 int j; 933 934 for (j = 0; j < 2; j++) { 935 int i; 936 const uint32_t a = AV_RN32(pixels); 937 const uint32_t b = AV_RN32(pixels + 1); 938 uint32_t l0 = (a & 0x03030303UL) + 939 (b & 0x03030303UL) + 940 0x02020202UL; 941 uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) + 942 ((b & 0xFCFCFCFCUL) >> 2); 943 uint32_t l1, h1; 944 945 pixels += line_size; 946 for (i = 0; i < h; i += 2) { 947 uint32_t a = AV_RN32(pixels); 948 uint32_t b = AV_RN32(pixels + 1); 949 l1 = (a & 0x03030303UL) + 950 (b & 0x03030303UL); 951 h1 = ((a & 0xFCFCFCFCUL) >> 2) + 952 ((b & 0xFCFCFCFCUL) >> 2); 953 *((uint32_t *) block) = h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL); 954 pixels += line_size; 955 block += line_size; 956 a = AV_RN32(pixels); 957 b = AV_RN32(pixels + 1); 958 l0 = (a & 0x03030303UL) + 959 (b & 0x03030303UL) + 960 0x02020202UL; 961 h0 = ((a & 0xFCFCFCFCUL) >> 2) + 962 ((b & 0xFCFCFCFCUL) >> 2); 963 *((uint32_t *) block) = h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL); 964 pixels += line_size; 965 block += line_size; 966 } 967 pixels += 4 - line_size * (h + 1); 968 block += 4 - line_size * h; 969 } 970#endif 971} 972 973void ff_put_pixels16_xy2_8_mmi(uint8_t *block, const uint8_t *pixels, 974 ptrdiff_t line_size, int h) 975{ 976 ff_put_pixels8_xy2_8_mmi(block, pixels, line_size, h); 977 ff_put_pixels8_xy2_8_mmi(block + 8, pixels + 8, line_size, h); 978} 979 980void ff_avg_pixels4_xy2_8_mmi(uint8_t *block, const uint8_t *pixels, 981 ptrdiff_t line_size, int h) 982{ 983 /* FIXME HIGH BIT DEPTH */ 984 int i; 985 const uint32_t a = AV_RN32(pixels); 986 const uint32_t b = AV_RN32(pixels + 1); 987 uint32_t l0 = (a & 0x03030303UL) + 988 (b & 0x03030303UL) + 989 0x02020202UL; 990 uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) + 991 ((b & 0xFCFCFCFCUL) >> 2); 992 uint32_t l1, h1; 993 994 pixels += line_size; 995 for (i = 0; i < h; i += 2) { 996 uint32_t a = AV_RN32(pixels); 997 uint32_t b = AV_RN32(pixels + 1); 998 l1 = (a & 0x03030303UL) + 999 (b & 0x03030303UL); 1000 h1 = ((a & 0xFCFCFCFCUL) >> 2) + 1001 ((b & 0xFCFCFCFCUL) >> 2); 1002 *((uint32_t *) block) = rnd_avg32(*((uint32_t *) block), h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); 1003 pixels += line_size; 1004 block += line_size; 1005 a = AV_RN32(pixels); 1006 b = AV_RN32(pixels + 1); 1007 l0 = (a & 0x03030303UL) + 1008 (b & 0x03030303UL) + 1009 0x02020202UL; 1010 h0 = ((a & 0xFCFCFCFCUL) >> 2) + 1011 ((b & 0xFCFCFCFCUL) >> 2); 1012 *((uint32_t *) block) = rnd_avg32(*((uint32_t *) block), h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); 1013 pixels += line_size; 1014 block += line_size; 1015 } 1016} 1017 1018void ff_avg_pixels8_xy2_8_mmi(uint8_t *block, const uint8_t *pixels, 1019 ptrdiff_t line_size, int h) 1020{ 1021 /* FIXME HIGH BIT DEPTH */ 1022 int j; 1023 1024 for (j = 0; j < 2; j++) { 1025 int i; 1026 const uint32_t a = AV_RN32(pixels); 1027 const uint32_t b = AV_RN32(pixels + 1); 1028 uint32_t l0 = (a & 0x03030303UL) + 1029 (b & 0x03030303UL) + 1030 0x02020202UL; 1031 uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) + 1032 ((b & 0xFCFCFCFCUL) >> 2); 1033 uint32_t l1, h1; 1034 1035 pixels += line_size; 1036 for (i = 0; i < h; i += 2) { 1037 uint32_t a = AV_RN32(pixels); 1038 uint32_t b = AV_RN32(pixels + 1); 1039 l1 = (a & 0x03030303UL) + 1040 (b & 0x03030303UL); 1041 h1 = ((a & 0xFCFCFCFCUL) >> 2) + 1042 ((b & 0xFCFCFCFCUL) >> 2); 1043 *((uint32_t *) block) = rnd_avg32(*((uint32_t *) block), h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); 1044 pixels += line_size; 1045 block += line_size; 1046 a = AV_RN32(pixels); 1047 b = AV_RN32(pixels + 1); 1048 l0 = (a & 0x03030303UL) + 1049 (b & 0x03030303UL) + 1050 0x02020202UL; 1051 h0 = ((a & 0xFCFCFCFCUL) >> 2) + 1052 ((b & 0xFCFCFCFCUL) >> 2); 1053 *((uint32_t *) block) = rnd_avg32(*((uint32_t *) block), h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); 1054 pixels += line_size; 1055 block += line_size; 1056 } 1057 pixels += 4 - line_size * (h + 1); 1058 block += 4 - line_size * h; 1059 } 1060} 1061 1062void ff_avg_pixels16_xy2_8_mmi(uint8_t *block, const uint8_t *pixels, 1063 ptrdiff_t line_size, int h) 1064{ 1065 ff_avg_pixels8_xy2_8_mmi(block, pixels, line_size, h); 1066 ff_avg_pixels8_xy2_8_mmi(block + 8, pixels + 8, line_size, h); 1067} 1068 1069void ff_put_no_rnd_pixels8_xy2_8_mmi(uint8_t *block, const uint8_t *pixels, 1070 ptrdiff_t line_size, int h) 1071{ 1072 /* FIXME HIGH BIT DEPTH */ 1073 int j; 1074 1075 for (j = 0; j < 2; j++) { 1076 int i; 1077 const uint32_t a = AV_RN32(pixels); 1078 const uint32_t b = AV_RN32(pixels + 1); 1079 uint32_t l0 = (a & 0x03030303UL) + 1080 (b & 0x03030303UL) + 1081 0x01010101UL; 1082 uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) + 1083 ((b & 0xFCFCFCFCUL) >> 2); 1084 uint32_t l1, h1; 1085 1086 pixels += line_size; 1087 for (i = 0; i < h; i += 2) { 1088 uint32_t a = AV_RN32(pixels); 1089 uint32_t b = AV_RN32(pixels + 1); 1090 l1 = (a & 0x03030303UL) + 1091 (b & 0x03030303UL); 1092 h1 = ((a & 0xFCFCFCFCUL) >> 2) + 1093 ((b & 0xFCFCFCFCUL) >> 2); 1094 *((uint32_t *) block) = h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL); 1095 pixels += line_size; 1096 block += line_size; 1097 a = AV_RN32(pixels); 1098 b = AV_RN32(pixels + 1); 1099 l0 = (a & 0x03030303UL) + 1100 (b & 0x03030303UL) + 1101 0x01010101UL; 1102 h0 = ((a & 0xFCFCFCFCUL) >> 2) + 1103 ((b & 0xFCFCFCFCUL) >> 2); 1104 *((uint32_t *) block) = h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL); 1105 pixels += line_size; 1106 block += line_size; 1107 } 1108 pixels += 4 - line_size * (h + 1); 1109 block += 4 - line_size * h; 1110 } 1111} 1112 1113void ff_put_no_rnd_pixels16_xy2_8_mmi(uint8_t *block, const uint8_t *pixels, 1114 ptrdiff_t line_size, int h) 1115{ 1116 ff_put_no_rnd_pixels8_xy2_8_mmi(block, pixels, line_size, h); 1117 ff_put_no_rnd_pixels8_xy2_8_mmi(block + 8, pixels + 8, line_size, h); 1118} 1119