1/* 2 * Copyright (c) 2019 gxw <guxiwei-hf@loongson.cn> 3 * 4 * This file is part of FFmpeg. 5 * 6 * FFmpeg is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * FFmpeg is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with FFmpeg; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19 */ 20 21#include "libavcodec/vp9dsp.h" 22#include "libavutil/mips/mmiutils.h" 23#include "vp9dsp_mips.h" 24 25#define GET_DATA_H_MMI \ 26 "pmaddhw %[ftmp4], %[ftmp4], %[filter1] \n\t" \ 27 "pmaddhw %[ftmp5], %[ftmp5], %[filter2] \n\t" \ 28 "paddw %[ftmp4], %[ftmp4], %[ftmp5] \n\t" \ 29 "punpckhwd %[ftmp5], %[ftmp4], %[ftmp0] \n\t" \ 30 "paddw %[ftmp4], %[ftmp4], %[ftmp5] \n\t" \ 31 "pmaddhw %[ftmp6], %[ftmp6], %[filter1] \n\t" \ 32 "pmaddhw %[ftmp7], %[ftmp7], %[filter2] \n\t" \ 33 "paddw %[ftmp6], %[ftmp6], %[ftmp7] \n\t" \ 34 "punpckhwd %[ftmp7], %[ftmp6], %[ftmp0] \n\t" \ 35 "paddw %[ftmp6], %[ftmp6], %[ftmp7] \n\t" \ 36 "punpcklwd %[srcl], %[ftmp4], %[ftmp6] \n\t" \ 37 "pmaddhw %[ftmp8], %[ftmp8], %[filter1] \n\t" \ 38 "pmaddhw %[ftmp9], %[ftmp9], %[filter2] \n\t" \ 39 "paddw %[ftmp8], %[ftmp8], %[ftmp9] \n\t" \ 40 "punpckhwd %[ftmp9], %[ftmp8], %[ftmp0] \n\t" \ 41 "paddw %[ftmp8], %[ftmp8], %[ftmp9] \n\t" \ 42 "pmaddhw %[ftmp10], %[ftmp10], %[filter1] \n\t" \ 43 "pmaddhw %[ftmp11], %[ftmp11], %[filter2] \n\t" \ 44 "paddw %[ftmp10], %[ftmp10], %[ftmp11] \n\t" \ 45 "punpckhwd %[ftmp11], %[ftmp10], %[ftmp0] \n\t" \ 46 "paddw %[ftmp10], %[ftmp10], %[ftmp11] \n\t" \ 47 "punpcklwd %[srch], %[ftmp8], %[ftmp10] \n\t" 48 49#define GET_DATA_V_MMI \ 50 "punpcklhw %[srcl], %[ftmp4], %[ftmp5] \n\t" \ 51 "pmaddhw %[srcl], %[srcl], %[filter10] \n\t" \ 52 "punpcklhw %[ftmp12], %[ftmp6], %[ftmp7] \n\t" \ 53 "pmaddhw %[ftmp12], %[ftmp12], %[filter32] \n\t" \ 54 "paddw %[srcl], %[srcl], %[ftmp12] \n\t" \ 55 "punpcklhw %[ftmp12], %[ftmp8], %[ftmp9] \n\t" \ 56 "pmaddhw %[ftmp12], %[ftmp12], %[filter54] \n\t" \ 57 "paddw %[srcl], %[srcl], %[ftmp12] \n\t" \ 58 "punpcklhw %[ftmp12], %[ftmp10], %[ftmp11] \n\t" \ 59 "pmaddhw %[ftmp12], %[ftmp12], %[filter76] \n\t" \ 60 "paddw %[srcl], %[srcl], %[ftmp12] \n\t" \ 61 "punpckhhw %[srch], %[ftmp4], %[ftmp5] \n\t" \ 62 "pmaddhw %[srch], %[srch], %[filter10] \n\t" \ 63 "punpckhhw %[ftmp12], %[ftmp6], %[ftmp7] \n\t" \ 64 "pmaddhw %[ftmp12], %[ftmp12], %[filter32] \n\t" \ 65 "paddw %[srch], %[srch], %[ftmp12] \n\t" \ 66 "punpckhhw %[ftmp12], %[ftmp8], %[ftmp9] \n\t" \ 67 "pmaddhw %[ftmp12], %[ftmp12], %[filter54] \n\t" \ 68 "paddw %[srch], %[srch], %[ftmp12] \n\t" \ 69 "punpckhhw %[ftmp12], %[ftmp10], %[ftmp11] \n\t" \ 70 "pmaddhw %[ftmp12], %[ftmp12], %[filter76] \n\t" \ 71 "paddw %[srch], %[srch], %[ftmp12] \n\t" 72 73static void convolve_horiz_mmi(const uint8_t *src, int32_t src_stride, 74 uint8_t *dst, int32_t dst_stride, 75 const uint16_t *filter_x, int32_t w, 76 int32_t h) 77{ 78 double ftmp[15]; 79 uint32_t tmp[2]; 80 DECLARE_VAR_ALL64; 81 src -= 3; 82 src_stride -= w; 83 dst_stride -= w; 84 __asm__ volatile ( 85 "move %[tmp1], %[width] \n\t" 86 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 87 MMI_ULDC1(%[filter1], %[filter], 0x00) 88 MMI_ULDC1(%[filter2], %[filter], 0x08) 89 "li %[tmp0], 0x07 \n\t" 90 "dmtc1 %[tmp0], %[ftmp13] \n\t" 91 "punpcklwd %[ftmp13], %[ftmp13], %[ftmp13] \n\t" 92 "1: \n\t" 93 /* Get 8 data per row */ 94 MMI_ULDC1(%[ftmp5], %[src], 0x00) 95 MMI_ULDC1(%[ftmp7], %[src], 0x01) 96 MMI_ULDC1(%[ftmp9], %[src], 0x02) 97 MMI_ULDC1(%[ftmp11], %[src], 0x03) 98 "punpcklbh %[ftmp4], %[ftmp5], %[ftmp0] \n\t" 99 "punpckhbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" 100 "punpcklbh %[ftmp6], %[ftmp7], %[ftmp0] \n\t" 101 "punpckhbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t" 102 "punpcklbh %[ftmp8], %[ftmp9], %[ftmp0] \n\t" 103 "punpckhbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t" 104 "punpcklbh %[ftmp10], %[ftmp11], %[ftmp0] \n\t" 105 "punpckhbh %[ftmp11], %[ftmp11], %[ftmp0] \n\t" 106 PTR_ADDIU "%[width], %[width], -0x04 \n\t" 107 /* Get raw data */ 108 GET_DATA_H_MMI 109 ROUND_POWER_OF_TWO_MMI(%[srcl], %[ftmp13], %[ftmp5], 110 %[ftmp6], %[tmp0]) 111 ROUND_POWER_OF_TWO_MMI(%[srch], %[ftmp13], %[ftmp5], 112 %[ftmp6], %[tmp0]) 113 "packsswh %[srcl], %[srcl], %[srch] \n\t" 114 "packushb %[ftmp12], %[srcl], %[ftmp0] \n\t" 115 "swc1 %[ftmp12], 0x00(%[dst]) \n\t" 116 PTR_ADDIU "%[dst], %[dst], 0x04 \n\t" 117 PTR_ADDIU "%[src], %[src], 0x04 \n\t" 118 /* Loop count */ 119 "bnez %[width], 1b \n\t" 120 "move %[width], %[tmp1] \n\t" 121 PTR_ADDU "%[src], %[src], %[src_stride] \n\t" 122 PTR_ADDU "%[dst], %[dst], %[dst_stride] \n\t" 123 PTR_ADDIU "%[height], %[height], -0x01 \n\t" 124 "bnez %[height], 1b \n\t" 125 : RESTRICT_ASM_ALL64 126 [srcl]"=&f"(ftmp[0]), [srch]"=&f"(ftmp[1]), 127 [filter1]"=&f"(ftmp[2]), [filter2]"=&f"(ftmp[3]), 128 [ftmp0]"=&f"(ftmp[4]), [ftmp4]"=&f"(ftmp[5]), 129 [ftmp5]"=&f"(ftmp[6]), [ftmp6]"=&f"(ftmp[7]), 130 [ftmp7]"=&f"(ftmp[8]), [ftmp8]"=&f"(ftmp[9]), 131 [ftmp9]"=&f"(ftmp[10]), [ftmp10]"=&f"(ftmp[11]), 132 [ftmp11]"=&f"(ftmp[12]), [ftmp12]"=&f"(ftmp[13]), 133 [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]), 134 [src]"+&r"(src), [width]"+&r"(w), 135 [dst]"+&r"(dst), [height]"+&r"(h), 136 [ftmp13]"=&f"(ftmp[14]) 137 : [filter]"r"(filter_x), 138 [src_stride]"r"((mips_reg)src_stride), 139 [dst_stride]"r"((mips_reg)dst_stride) 140 : "memory" 141 ); 142} 143 144static void convolve_vert_mmi(const uint8_t *src, int32_t src_stride, 145 uint8_t *dst, int32_t dst_stride, 146 const int16_t *filter_y, int32_t w, 147 int32_t h) 148{ 149 double ftmp[17]; 150 uint32_t tmp[1]; 151 ptrdiff_t addr = src_stride; 152 DECLARE_VAR_ALL64; 153 src_stride -= w; 154 dst_stride -= w; 155 156 __asm__ volatile ( 157 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 158 MMI_ULDC1(%[ftmp4], %[filter], 0x00) 159 MMI_ULDC1(%[ftmp5], %[filter], 0x08) 160 "punpcklwd %[filter10], %[ftmp4], %[ftmp4] \n\t" 161 "punpckhwd %[filter32], %[ftmp4], %[ftmp4] \n\t" 162 "punpcklwd %[filter54], %[ftmp5], %[ftmp5] \n\t" 163 "punpckhwd %[filter76], %[ftmp5], %[ftmp5] \n\t" 164 "li %[tmp0], 0x07 \n\t" 165 "dmtc1 %[tmp0], %[ftmp13] \n\t" 166 "punpcklwd %[ftmp13], %[ftmp13], %[ftmp13] \n\t" 167 "1: \n\t" 168 /* Get 8 data per column */ 169 MMI_ULDC1(%[ftmp4], %[src], 0x0) 170 PTR_ADDU "%[tmp0], %[src], %[addr] \n\t" 171 MMI_ULDC1(%[ftmp5], %[tmp0], 0x0) 172 PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t" 173 MMI_ULDC1(%[ftmp6], %[tmp0], 0x0) 174 PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t" 175 MMI_ULDC1(%[ftmp7], %[tmp0], 0x0) 176 PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t" 177 MMI_ULDC1(%[ftmp8], %[tmp0], 0x0) 178 PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t" 179 MMI_ULDC1(%[ftmp9], %[tmp0], 0x0) 180 PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t" 181 MMI_ULDC1(%[ftmp10], %[tmp0], 0x0) 182 PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t" 183 MMI_ULDC1(%[ftmp11], %[tmp0], 0x0) 184 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 185 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" 186 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 187 "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t" 188 "punpcklbh %[ftmp8], %[ftmp8], %[ftmp0] \n\t" 189 "punpcklbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t" 190 "punpcklbh %[ftmp10], %[ftmp10], %[ftmp0] \n\t" 191 "punpcklbh %[ftmp11], %[ftmp11], %[ftmp0] \n\t" 192 PTR_ADDIU "%[width], %[width], -0x04 \n\t" 193 /* Get raw data */ 194 GET_DATA_V_MMI 195 ROUND_POWER_OF_TWO_MMI(%[srcl], %[ftmp13], %[ftmp5], 196 %[ftmp6], %[tmp0]) 197 ROUND_POWER_OF_TWO_MMI(%[srch], %[ftmp13], %[ftmp5], 198 %[ftmp6], %[tmp0]) 199 "packsswh %[srcl], %[srcl], %[srch] \n\t" 200 "packushb %[ftmp12], %[srcl], %[ftmp0] \n\t" 201 "swc1 %[ftmp12], 0x00(%[dst]) \n\t" 202 PTR_ADDIU "%[dst], %[dst], 0x04 \n\t" 203 PTR_ADDIU "%[src], %[src], 0x04 \n\t" 204 /* Loop count */ 205 "bnez %[width], 1b \n\t" 206 PTR_SUBU "%[width], %[addr], %[src_stride] \n\t" 207 PTR_ADDU "%[src], %[src], %[src_stride] \n\t" 208 PTR_ADDU "%[dst], %[dst], %[dst_stride] \n\t" 209 PTR_ADDIU "%[height], %[height], -0x01 \n\t" 210 "bnez %[height], 1b \n\t" 211 : RESTRICT_ASM_ALL64 212 [srcl]"=&f"(ftmp[0]), [srch]"=&f"(ftmp[1]), 213 [filter10]"=&f"(ftmp[2]), [filter32]"=&f"(ftmp[3]), 214 [filter54]"=&f"(ftmp[4]), [filter76]"=&f"(ftmp[5]), 215 [ftmp0]"=&f"(ftmp[6]), [ftmp4]"=&f"(ftmp[7]), 216 [ftmp5]"=&f"(ftmp[8]), [ftmp6]"=&f"(ftmp[9]), 217 [ftmp7]"=&f"(ftmp[10]), [ftmp8]"=&f"(ftmp[11]), 218 [ftmp9]"=&f"(ftmp[12]), [ftmp10]"=&f"(ftmp[13]), 219 [ftmp11]"=&f"(ftmp[14]), [ftmp12]"=&f"(ftmp[15]), 220 [src]"+&r"(src), [dst]"+&r"(dst), 221 [width]"+&r"(w), [height]"+&r"(h), 222 [tmp0]"=&r"(tmp[0]), [ftmp13]"=&f"(ftmp[16]) 223 : [filter]"r"(filter_y), 224 [src_stride]"r"((mips_reg)src_stride), 225 [dst_stride]"r"((mips_reg)dst_stride), 226 [addr]"r"((mips_reg)addr) 227 : "memory" 228 ); 229} 230 231static void convolve_avg_horiz_mmi(const uint8_t *src, int32_t src_stride, 232 uint8_t *dst, int32_t dst_stride, 233 const uint16_t *filter_x, int32_t w, 234 int32_t h) 235{ 236 double ftmp[15]; 237 uint32_t tmp[2]; 238 DECLARE_VAR_ALL64; 239 src -= 3; 240 src_stride -= w; 241 dst_stride -= w; 242 243 __asm__ volatile ( 244 "move %[tmp1], %[width] \n\t" 245 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 246 MMI_ULDC1(%[filter1], %[filter], 0x00) 247 MMI_ULDC1(%[filter2], %[filter], 0x08) 248 "li %[tmp0], 0x07 \n\t" 249 "dmtc1 %[tmp0], %[ftmp13] \n\t" 250 "punpcklwd %[ftmp13], %[ftmp13], %[ftmp13] \n\t" 251 "1: \n\t" 252 /* Get 8 data per row */ 253 MMI_ULDC1(%[ftmp5], %[src], 0x00) 254 MMI_ULDC1(%[ftmp7], %[src], 0x01) 255 MMI_ULDC1(%[ftmp9], %[src], 0x02) 256 MMI_ULDC1(%[ftmp11], %[src], 0x03) 257 "punpcklbh %[ftmp4], %[ftmp5], %[ftmp0] \n\t" 258 "punpckhbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" 259 "punpcklbh %[ftmp6], %[ftmp7], %[ftmp0] \n\t" 260 "punpckhbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t" 261 "punpcklbh %[ftmp8], %[ftmp9], %[ftmp0] \n\t" 262 "punpckhbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t" 263 "punpcklbh %[ftmp10], %[ftmp11], %[ftmp0] \n\t" 264 "punpckhbh %[ftmp11], %[ftmp11], %[ftmp0] \n\t" 265 PTR_ADDIU "%[width], %[width], -0x04 \n\t" 266 /* Get raw data */ 267 GET_DATA_H_MMI 268 ROUND_POWER_OF_TWO_MMI(%[srcl], %[ftmp13], %[ftmp5], 269 %[ftmp6], %[tmp0]) 270 ROUND_POWER_OF_TWO_MMI(%[srch], %[ftmp13], %[ftmp5], 271 %[ftmp6], %[tmp0]) 272 "packsswh %[srcl], %[srcl], %[srch] \n\t" 273 "packushb %[ftmp12], %[srcl], %[ftmp0] \n\t" 274 "punpcklbh %[ftmp12], %[ftmp12], %[ftmp0] \n\t" 275 MMI_ULDC1(%[ftmp4], %[dst], 0x0) 276 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 277 "paddh %[ftmp12], %[ftmp12], %[ftmp4] \n\t" 278 "li %[tmp0], 0x10001 \n\t" 279 "dmtc1 %[tmp0], %[ftmp5] \n\t" 280 "punpcklhw %[ftmp5], %[ftmp5], %[ftmp5] \n\t" 281 "paddh %[ftmp12], %[ftmp12], %[ftmp5] \n\t" 282 "psrah %[ftmp12], %[ftmp12], %[ftmp5] \n\t" 283 "packushb %[ftmp12], %[ftmp12], %[ftmp0] \n\t" 284 "swc1 %[ftmp12], 0x00(%[dst]) \n\t" 285 PTR_ADDIU "%[dst], %[dst], 0x04 \n\t" 286 PTR_ADDIU "%[src], %[src], 0x04 \n\t" 287 /* Loop count */ 288 "bnez %[width], 1b \n\t" 289 "move %[width], %[tmp1] \n\t" 290 PTR_ADDU "%[src], %[src], %[src_stride] \n\t" 291 PTR_ADDU "%[dst], %[dst], %[dst_stride] \n\t" 292 PTR_ADDIU "%[height], %[height], -0x01 \n\t" 293 "bnez %[height], 1b \n\t" 294 : RESTRICT_ASM_ALL64 295 [srcl]"=&f"(ftmp[0]), [srch]"=&f"(ftmp[1]), 296 [filter1]"=&f"(ftmp[2]), [filter2]"=&f"(ftmp[3]), 297 [ftmp0]"=&f"(ftmp[4]), [ftmp4]"=&f"(ftmp[5]), 298 [ftmp5]"=&f"(ftmp[6]), [ftmp6]"=&f"(ftmp[7]), 299 [ftmp7]"=&f"(ftmp[8]), [ftmp8]"=&f"(ftmp[9]), 300 [ftmp9]"=&f"(ftmp[10]), [ftmp10]"=&f"(ftmp[11]), 301 [ftmp11]"=&f"(ftmp[12]), [ftmp12]"=&f"(ftmp[13]), 302 [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]), 303 [src]"+&r"(src), [width]"+&r"(w), 304 [dst]"+&r"(dst), [height]"+&r"(h), 305 [ftmp13]"=&f"(ftmp[14]) 306 : [filter]"r"(filter_x), 307 [src_stride]"r"((mips_reg)src_stride), 308 [dst_stride]"r"((mips_reg)dst_stride) 309 : "memory" 310 ); 311} 312 313static void convolve_avg_vert_mmi(const uint8_t *src, int32_t src_stride, 314 uint8_t *dst, int32_t dst_stride, 315 const int16_t *filter_y, int32_t w, 316 int32_t h) 317{ 318 double ftmp[17]; 319 uint32_t tmp[1]; 320 ptrdiff_t addr = src_stride; 321 DECLARE_VAR_ALL64; 322 src_stride -= w; 323 dst_stride -= w; 324 325 __asm__ volatile ( 326 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 327 MMI_ULDC1(%[ftmp4], %[filter], 0x00) 328 MMI_ULDC1(%[ftmp5], %[filter], 0x08) 329 "punpcklwd %[filter10], %[ftmp4], %[ftmp4] \n\t" 330 "punpckhwd %[filter32], %[ftmp4], %[ftmp4] \n\t" 331 "punpcklwd %[filter54], %[ftmp5], %[ftmp5] \n\t" 332 "punpckhwd %[filter76], %[ftmp5], %[ftmp5] \n\t" 333 "li %[tmp0], 0x07 \n\t" 334 "dmtc1 %[tmp0], %[ftmp13] \n\t" 335 "punpcklwd %[ftmp13], %[ftmp13], %[ftmp13] \n\t" 336 "1: \n\t" 337 /* Get 8 data per column */ 338 MMI_ULDC1(%[ftmp4], %[src], 0x0) 339 PTR_ADDU "%[tmp0], %[src], %[addr] \n\t" 340 MMI_ULDC1(%[ftmp5], %[tmp0], 0x0) 341 PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t" 342 MMI_ULDC1(%[ftmp6], %[tmp0], 0x0) 343 PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t" 344 MMI_ULDC1(%[ftmp7], %[tmp0], 0x0) 345 PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t" 346 MMI_ULDC1(%[ftmp8], %[tmp0], 0x0) 347 PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t" 348 MMI_ULDC1(%[ftmp9], %[tmp0], 0x0) 349 PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t" 350 MMI_ULDC1(%[ftmp10], %[tmp0], 0x0) 351 PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t" 352 MMI_ULDC1(%[ftmp11], %[tmp0], 0x0) 353 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 354 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" 355 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 356 "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t" 357 "punpcklbh %[ftmp8], %[ftmp8], %[ftmp0] \n\t" 358 "punpcklbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t" 359 "punpcklbh %[ftmp10], %[ftmp10], %[ftmp0] \n\t" 360 "punpcklbh %[ftmp11], %[ftmp11], %[ftmp0] \n\t" 361 PTR_ADDIU "%[width], %[width], -0x04 \n\t" 362 /* Get raw data */ 363 GET_DATA_V_MMI 364 ROUND_POWER_OF_TWO_MMI(%[srcl], %[ftmp13], %[ftmp5], 365 %[ftmp6], %[tmp0]) 366 ROUND_POWER_OF_TWO_MMI(%[srch], %[ftmp13], %[ftmp5], 367 %[ftmp6], %[tmp0]) 368 "packsswh %[srcl], %[srcl], %[srch] \n\t" 369 "packushb %[ftmp12], %[srcl], %[ftmp0] \n\t" 370 "punpcklbh %[ftmp12], %[ftmp12], %[ftmp0] \n\t" 371 MMI_ULDC1(%[ftmp4], %[dst], 0x00) 372 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 373 "paddh %[ftmp12], %[ftmp12], %[ftmp4] \n\t" 374 "li %[tmp0], 0x10001 \n\t" 375 "dmtc1 %[tmp0], %[ftmp5] \n\t" 376 "punpcklhw %[ftmp5], %[ftmp5], %[ftmp5] \n\t" 377 "paddh %[ftmp12], %[ftmp12], %[ftmp5] \n\t" 378 "psrah %[ftmp12], %[ftmp12], %[ftmp5] \n\t" 379 "packushb %[ftmp12], %[ftmp12], %[ftmp0] \n\t" 380 "swc1 %[ftmp12], 0x00(%[dst]) \n\t" 381 PTR_ADDIU "%[dst], %[dst], 0x04 \n\t" 382 PTR_ADDIU "%[src], %[src], 0x04 \n\t" 383 /* Loop count */ 384 "bnez %[width], 1b \n\t" 385 PTR_SUBU "%[width], %[addr], %[src_stride] \n\t" 386 PTR_ADDU "%[src], %[src], %[src_stride] \n\t" 387 PTR_ADDU "%[dst], %[dst], %[dst_stride] \n\t" 388 PTR_ADDIU "%[height], %[height], -0x01 \n\t" 389 "bnez %[height], 1b \n\t" 390 : RESTRICT_ASM_ALL64 391 [srcl]"=&f"(ftmp[0]), [srch]"=&f"(ftmp[1]), 392 [filter10]"=&f"(ftmp[2]), [filter32]"=&f"(ftmp[3]), 393 [filter54]"=&f"(ftmp[4]), [filter76]"=&f"(ftmp[5]), 394 [ftmp0]"=&f"(ftmp[6]), [ftmp4]"=&f"(ftmp[7]), 395 [ftmp5]"=&f"(ftmp[8]), [ftmp6]"=&f"(ftmp[9]), 396 [ftmp7]"=&f"(ftmp[10]), [ftmp8]"=&f"(ftmp[11]), 397 [ftmp9]"=&f"(ftmp[12]), [ftmp10]"=&f"(ftmp[13]), 398 [ftmp11]"=&f"(ftmp[14]), [ftmp12]"=&f"(ftmp[15]), 399 [src]"+&r"(src), [dst]"+&r"(dst), 400 [width]"+&r"(w), [height]"+&r"(h), 401 [tmp0]"=&r"(tmp[0]), [ftmp13]"=&f"(ftmp[16]) 402 : [filter]"r"(filter_y), 403 [src_stride]"r"((mips_reg)src_stride), 404 [dst_stride]"r"((mips_reg)dst_stride), 405 [addr]"r"((mips_reg)addr) 406 : "memory" 407 ); 408} 409 410static void convolve_avg_mmi(const uint8_t *src, int32_t src_stride, 411 uint8_t *dst, int32_t dst_stride, 412 int32_t w, int32_t h) 413{ 414 double ftmp[4]; 415 uint32_t tmp[2]; 416 DECLARE_VAR_ALL64; 417 src_stride -= w; 418 dst_stride -= w; 419 420 __asm__ volatile ( 421 "move %[tmp1], %[width] \n\t" 422 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 423 "li %[tmp0], 0x10001 \n\t" 424 "dmtc1 %[tmp0], %[ftmp3] \n\t" 425 "punpcklhw %[ftmp3], %[ftmp3], %[ftmp3] \n\t" 426 "1: \n\t" 427 MMI_ULDC1(%[ftmp1], %[src], 0x00) 428 MMI_ULDC1(%[ftmp2], %[dst], 0x00) 429 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 430 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 431 "paddh %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 432 "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 433 "psrah %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 434 "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 435 "swc1 %[ftmp1], 0x00(%[dst]) \n\t" 436 PTR_ADDIU "%[width], %[width], -0x04 \n\t" 437 PTR_ADDIU "%[dst], %[dst], 0x04 \n\t" 438 PTR_ADDIU "%[src], %[src], 0x04 \n\t" 439 "bnez %[width], 1b \n\t" 440 "move %[width], %[tmp1] \n\t" 441 PTR_ADDU "%[dst], %[dst], %[dst_stride] \n\t" 442 PTR_ADDU "%[src], %[src], %[src_stride] \n\t" 443 PTR_ADDIU "%[height], %[height], -0x01 \n\t" 444 "bnez %[height], 1b \n\t" 445 : RESTRICT_ASM_ALL64 446 [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 447 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 448 [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]), 449 [src]"+&r"(src), [dst]"+&r"(dst), 450 [width]"+&r"(w), [height]"+&r"(h) 451 : [src_stride]"r"((mips_reg)src_stride), 452 [dst_stride]"r"((mips_reg)dst_stride) 453 : "memory" 454 ); 455} 456 457static const int16_t vp9_subpel_filters_mmi[3][15][8] = { 458 [FILTER_8TAP_REGULAR] = { 459 {0, 1, -5, 126, 8, -3, 1, 0}, 460 {-1, 3, -10, 122, 18, -6, 2, 0}, 461 {-1, 4, -13, 118, 27, -9, 3, -1}, 462 {-1, 4, -16, 112, 37, -11, 4, -1}, 463 {-1, 5, -18, 105, 48, -14, 4, -1}, 464 {-1, 5, -19, 97, 58, -16, 5, -1}, 465 {-1, 6, -19, 88, 68, -18, 5, -1}, 466 {-1, 6, -19, 78, 78, -19, 6, -1}, 467 {-1, 5, -18, 68, 88, -19, 6, -1}, 468 {-1, 5, -16, 58, 97, -19, 5, -1}, 469 {-1, 4, -14, 48, 105, -18, 5, -1}, 470 {-1, 4, -11, 37, 112, -16, 4, -1}, 471 {-1, 3, -9, 27, 118, -13, 4, -1}, 472 {0, 2, -6, 18, 122, -10, 3, -1}, 473 {0, 1, -3, 8, 126, -5, 1, 0}, 474 }, [FILTER_8TAP_SHARP] = { 475 {-1, 3, -7, 127, 8, -3, 1, 0}, 476 {-2, 5, -13, 125, 17, -6, 3, -1}, 477 {-3, 7, -17, 121, 27, -10, 5, -2}, 478 {-4, 9, -20, 115, 37, -13, 6, -2}, 479 {-4, 10, -23, 108, 48, -16, 8, -3}, 480 {-4, 10, -24, 100, 59, -19, 9, -3}, 481 {-4, 11, -24, 90, 70, -21, 10, -4}, 482 {-4, 11, -23, 80, 80, -23, 11, -4}, 483 {-4, 10, -21, 70, 90, -24, 11, -4}, 484 {-3, 9, -19, 59, 100, -24, 10, -4}, 485 {-3, 8, -16, 48, 108, -23, 10, -4}, 486 {-2, 6, -13, 37, 115, -20, 9, -4}, 487 {-2, 5, -10, 27, 121, -17, 7, -3}, 488 {-1, 3, -6, 17, 125, -13, 5, -2}, 489 {0, 1, -3, 8, 127, -7, 3, -1}, 490 }, [FILTER_8TAP_SMOOTH] = { 491 {-3, -1, 32, 64, 38, 1, -3, 0}, 492 {-2, -2, 29, 63, 41, 2, -3, 0}, 493 {-2, -2, 26, 63, 43, 4, -4, 0}, 494 {-2, -3, 24, 62, 46, 5, -4, 0}, 495 {-2, -3, 21, 60, 49, 7, -4, 0}, 496 {-1, -4, 18, 59, 51, 9, -4, 0}, 497 {-1, -4, 16, 57, 53, 12, -4, -1}, 498 {-1, -4, 14, 55, 55, 14, -4, -1}, 499 {-1, -4, 12, 53, 57, 16, -4, -1}, 500 {0, -4, 9, 51, 59, 18, -4, -1}, 501 {0, -4, 7, 49, 60, 21, -3, -2}, 502 {0, -4, 5, 46, 62, 24, -3, -2}, 503 {0, -4, 4, 43, 63, 26, -2, -2}, 504 {0, -3, 2, 41, 63, 29, -2, -2}, 505 {0, -3, 1, 38, 64, 32, -1, -3}, 506 } 507}; 508 509#define VP9_8TAP_MIPS_MMI_FUNC(SIZE, TYPE, TYPE_IDX) \ 510void ff_put_8tap_##TYPE##_##SIZE##h_mmi(uint8_t *dst, ptrdiff_t dststride, \ 511 const uint8_t *src, \ 512 ptrdiff_t srcstride, \ 513 int h, int mx, int my) \ 514{ \ 515 const int16_t *filter = vp9_subpel_filters_mmi[TYPE_IDX][mx-1]; \ 516 \ 517 convolve_horiz_mmi(src, srcstride, dst, dststride, filter, SIZE, h); \ 518} \ 519 \ 520void ff_put_8tap_##TYPE##_##SIZE##v_mmi(uint8_t *dst, ptrdiff_t dststride, \ 521 const uint8_t *src, \ 522 ptrdiff_t srcstride, \ 523 int h, int mx, int my) \ 524{ \ 525 const int16_t *filter = vp9_subpel_filters_mmi[TYPE_IDX][my-1]; \ 526 \ 527 src -= (3 * srcstride); \ 528 convolve_vert_mmi(src, srcstride, dst, dststride, filter, SIZE, h); \ 529} \ 530 \ 531void ff_put_8tap_##TYPE##_##SIZE##hv_mmi(uint8_t *dst, ptrdiff_t dststride, \ 532 const uint8_t *src, \ 533 ptrdiff_t srcstride, \ 534 int h, int mx, int my) \ 535{ \ 536 const uint16_t *hfilter = vp9_subpel_filters_mmi[TYPE_IDX][mx-1]; \ 537 const uint16_t *vfilter = vp9_subpel_filters_mmi[TYPE_IDX][my-1]; \ 538 \ 539 int tmp_h = h + 7; \ 540 uint8_t temp[64 * 71]; \ 541 src -= (3 * srcstride); \ 542 convolve_horiz_mmi(src, srcstride, temp, 64, hfilter, SIZE, tmp_h); \ 543 convolve_vert_mmi(temp, 64, dst, dststride, vfilter, SIZE, h); \ 544} \ 545 \ 546void ff_avg_8tap_##TYPE##_##SIZE##h_mmi(uint8_t *dst, ptrdiff_t dststride, \ 547 const uint8_t *src, \ 548 ptrdiff_t srcstride, \ 549 int h, int mx, int my) \ 550{ \ 551 const int16_t *filter = vp9_subpel_filters_mmi[TYPE_IDX][mx-1]; \ 552 \ 553 convolve_avg_horiz_mmi(src, srcstride, dst, dststride, filter, SIZE, h); \ 554} \ 555 \ 556void ff_avg_8tap_##TYPE##_##SIZE##v_mmi(uint8_t *dst, ptrdiff_t dststride, \ 557 const uint8_t *src, \ 558 ptrdiff_t srcstride, \ 559 int h, int mx, int my) \ 560{ \ 561 const int16_t *filter = vp9_subpel_filters_mmi[TYPE_IDX][my-1]; \ 562 \ 563 src -= (3 * srcstride); \ 564 convolve_avg_vert_mmi(src, srcstride, dst, dststride, filter, SIZE, h); \ 565} \ 566 \ 567void ff_avg_8tap_##TYPE##_##SIZE##hv_mmi(uint8_t *dst, ptrdiff_t dststride, \ 568 const uint8_t *src, \ 569 ptrdiff_t srcstride, \ 570 int h, int mx, int my) \ 571{ \ 572 const uint16_t *hfilter = vp9_subpel_filters_mmi[TYPE_IDX][mx-1]; \ 573 const uint16_t *vfilter = vp9_subpel_filters_mmi[TYPE_IDX][my-1]; \ 574 \ 575 uint8_t temp1[64 * 64]; \ 576 uint8_t temp2[64 * 71]; \ 577 int tmp_h = h + 7; \ 578 src -= (3 * srcstride); \ 579 convolve_horiz_mmi(src, srcstride, temp2, 64, hfilter, SIZE, tmp_h); \ 580 convolve_vert_mmi(temp2, 64, temp1, 64, vfilter, SIZE, h); \ 581 convolve_avg_mmi(temp1, 64, dst, dststride, SIZE, h); \ 582} 583 584VP9_8TAP_MIPS_MMI_FUNC(64, regular, FILTER_8TAP_REGULAR); 585VP9_8TAP_MIPS_MMI_FUNC(32, regular, FILTER_8TAP_REGULAR); 586VP9_8TAP_MIPS_MMI_FUNC(16, regular, FILTER_8TAP_REGULAR); 587VP9_8TAP_MIPS_MMI_FUNC(8, regular, FILTER_8TAP_REGULAR); 588VP9_8TAP_MIPS_MMI_FUNC(4, regular, FILTER_8TAP_REGULAR); 589 590VP9_8TAP_MIPS_MMI_FUNC(64, sharp, FILTER_8TAP_SHARP); 591VP9_8TAP_MIPS_MMI_FUNC(32, sharp, FILTER_8TAP_SHARP); 592VP9_8TAP_MIPS_MMI_FUNC(16, sharp, FILTER_8TAP_SHARP); 593VP9_8TAP_MIPS_MMI_FUNC(8, sharp, FILTER_8TAP_SHARP); 594VP9_8TAP_MIPS_MMI_FUNC(4, sharp, FILTER_8TAP_SHARP); 595 596VP9_8TAP_MIPS_MMI_FUNC(64, smooth, FILTER_8TAP_SMOOTH); 597VP9_8TAP_MIPS_MMI_FUNC(32, smooth, FILTER_8TAP_SMOOTH); 598VP9_8TAP_MIPS_MMI_FUNC(16, smooth, FILTER_8TAP_SMOOTH); 599VP9_8TAP_MIPS_MMI_FUNC(8, smooth, FILTER_8TAP_SMOOTH); 600VP9_8TAP_MIPS_MMI_FUNC(4, smooth, FILTER_8TAP_SMOOTH); 601 602#undef VP9_8TAP_MIPS_MMI_FUNC 603