1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * Loongson SIMD optimized h264chroma 3cabdff1aSopenharmony_ci * 4cabdff1aSopenharmony_ci * Copyright (c) 2015 Loongson Technology Corporation Limited 5cabdff1aSopenharmony_ci * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn> 6cabdff1aSopenharmony_ci * Zhang Shuangshuang <zhangshuangshuang@ict.ac.cn> 7cabdff1aSopenharmony_ci * 8cabdff1aSopenharmony_ci * This file is part of FFmpeg. 9cabdff1aSopenharmony_ci * 10cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 11cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 12cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 13cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 14cabdff1aSopenharmony_ci * 15cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 16cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 17cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18cabdff1aSopenharmony_ci * Lesser General Public License for more details. 19cabdff1aSopenharmony_ci * 20cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 21cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 22cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 23cabdff1aSopenharmony_ci */ 24cabdff1aSopenharmony_ci 25cabdff1aSopenharmony_ci#include "h264chroma_mips.h" 26cabdff1aSopenharmony_ci#include "constants.h" 27cabdff1aSopenharmony_ci#include "libavutil/mips/mmiutils.h" 28cabdff1aSopenharmony_ci 29cabdff1aSopenharmony_civoid ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride, 30cabdff1aSopenharmony_ci int h, int x, int y) 31cabdff1aSopenharmony_ci{ 32cabdff1aSopenharmony_ci double ftmp[12]; 33cabdff1aSopenharmony_ci union mmi_intfloat64 A, B, C, D, E; 34cabdff1aSopenharmony_ci DECLARE_VAR_ALL64; 35cabdff1aSopenharmony_ci 36cabdff1aSopenharmony_ci A.i = 64; 37cabdff1aSopenharmony_ci 38cabdff1aSopenharmony_ci if (!(x || y)) { 39cabdff1aSopenharmony_ci /* x=0, y=0, A.i=64 */ 40cabdff1aSopenharmony_ci __asm__ volatile ( 41cabdff1aSopenharmony_ci "1: \n\t" 42cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp0], %[src], 0x00) 43cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[stride] \n\t" 44cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp1], %[src], 0x00) 45cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[stride] \n\t" 46cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp2], %[src], 0x00) 47cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[stride] \n\t" 48cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp3], %[src], 0x00) 49cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[stride] \n\t" 50cabdff1aSopenharmony_ci 51cabdff1aSopenharmony_ci "addi %[h], %[h], -0x04 \n\t" 52cabdff1aSopenharmony_ci 53cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp0], %[dst], 0x00) 54cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 55cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp1], %[dst], 0x00) 56cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 57cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp2], %[dst], 0x00) 58cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 59cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp3], %[dst], 0x00) 60cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 61cabdff1aSopenharmony_ci "bnez %[h], 1b \n\t" 62cabdff1aSopenharmony_ci : RESTRICT_ASM_ALL64 63cabdff1aSopenharmony_ci [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 64cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 65cabdff1aSopenharmony_ci [dst]"+&r"(dst), [src]"+&r"(src), 66cabdff1aSopenharmony_ci [h]"+&r"(h) 67cabdff1aSopenharmony_ci : [stride]"r"((mips_reg)stride) 68cabdff1aSopenharmony_ci : "memory" 69cabdff1aSopenharmony_ci ); 70cabdff1aSopenharmony_ci } else if (x && y) { 71cabdff1aSopenharmony_ci /* x!=0, y!=0 */ 72cabdff1aSopenharmony_ci D.i = x * y; 73cabdff1aSopenharmony_ci B.i = (x << 3) - D.i; 74cabdff1aSopenharmony_ci C.i = (y << 3) - D.i; 75cabdff1aSopenharmony_ci A.i = 64 - D.i - B.i - C.i; 76cabdff1aSopenharmony_ci 77cabdff1aSopenharmony_ci __asm__ volatile ( 78cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 79cabdff1aSopenharmony_ci "pshufh %[A], %[A], %[ftmp0] \n\t" 80cabdff1aSopenharmony_ci "pshufh %[B], %[B], %[ftmp0] \n\t" 81cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp9] \n\t" 82cabdff1aSopenharmony_ci "pshufh %[C], %[C], %[ftmp0] \n\t" 83cabdff1aSopenharmony_ci "pshufh %[D], %[D], %[ftmp0] \n\t" 84cabdff1aSopenharmony_ci 85cabdff1aSopenharmony_ci "1: \n\t" 86cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp1], %[src], 0x00) 87cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp2], %[src], 0x01) 88cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[stride] \n\t" 89cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp3], %[src], 0x00) 90cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp4], %[src], 0x01) 91cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[stride] \n\t" 92cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp10], %[src], 0x00) 93cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp11], %[src], 0x01) 94cabdff1aSopenharmony_ci "addi %[h], %[h], -0x02 \n\t" 95cabdff1aSopenharmony_ci 96cabdff1aSopenharmony_ci "punpcklbh %[ftmp5], %[ftmp1], %[ftmp0] \n\t" 97cabdff1aSopenharmony_ci "punpckhbh %[ftmp6], %[ftmp1], %[ftmp0] \n\t" 98cabdff1aSopenharmony_ci "punpcklbh %[ftmp7], %[ftmp2], %[ftmp0] \n\t" 99cabdff1aSopenharmony_ci "punpckhbh %[ftmp8], %[ftmp2], %[ftmp0] \n\t" 100cabdff1aSopenharmony_ci "pmullh %[ftmp5], %[ftmp5], %[A] \n\t" 101cabdff1aSopenharmony_ci "pmullh %[ftmp7], %[ftmp7], %[B] \n\t" 102cabdff1aSopenharmony_ci "paddh %[ftmp1], %[ftmp5], %[ftmp7] \n\t" 103cabdff1aSopenharmony_ci "pmullh %[ftmp6], %[ftmp6], %[A] \n\t" 104cabdff1aSopenharmony_ci "pmullh %[ftmp8], %[ftmp8], %[B] \n\t" 105cabdff1aSopenharmony_ci "paddh %[ftmp2], %[ftmp6], %[ftmp8] \n\t" 106cabdff1aSopenharmony_ci "punpcklbh %[ftmp5], %[ftmp3], %[ftmp0] \n\t" 107cabdff1aSopenharmony_ci "punpckhbh %[ftmp6], %[ftmp3], %[ftmp0] \n\t" 108cabdff1aSopenharmony_ci "punpcklbh %[ftmp7], %[ftmp4], %[ftmp0] \n\t" 109cabdff1aSopenharmony_ci "punpckhbh %[ftmp8], %[ftmp4], %[ftmp0] \n\t" 110cabdff1aSopenharmony_ci "pmullh %[ftmp5], %[ftmp5], %[C] \n\t" 111cabdff1aSopenharmony_ci "pmullh %[ftmp7], %[ftmp7], %[D] \n\t" 112cabdff1aSopenharmony_ci "paddh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 113cabdff1aSopenharmony_ci "pmullh %[ftmp6], %[ftmp6], %[C] \n\t" 114cabdff1aSopenharmony_ci "pmullh %[ftmp8], %[ftmp8], %[D] \n\t" 115cabdff1aSopenharmony_ci "paddh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 116cabdff1aSopenharmony_ci "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t" 117cabdff1aSopenharmony_ci "paddh %[ftmp2], %[ftmp2], %[ftmp6] \n\t" 118cabdff1aSopenharmony_ci "paddh %[ftmp1], %[ftmp1], %[ff_pw_32] \n\t" 119cabdff1aSopenharmony_ci "paddh %[ftmp2], %[ftmp2], %[ff_pw_32] \n\t" 120cabdff1aSopenharmony_ci "psrlh %[ftmp1], %[ftmp1], %[ftmp9] \n\t" 121cabdff1aSopenharmony_ci "psrlh %[ftmp2], %[ftmp2], %[ftmp9] \n\t" 122cabdff1aSopenharmony_ci "packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 123cabdff1aSopenharmony_ci 124cabdff1aSopenharmony_ci "punpcklbh %[ftmp5], %[ftmp3], %[ftmp0] \n\t" 125cabdff1aSopenharmony_ci "punpckhbh %[ftmp6], %[ftmp3], %[ftmp0] \n\t" 126cabdff1aSopenharmony_ci "punpcklbh %[ftmp7], %[ftmp4], %[ftmp0] \n\t" 127cabdff1aSopenharmony_ci "punpckhbh %[ftmp8], %[ftmp4], %[ftmp0] \n\t" 128cabdff1aSopenharmony_ci "pmullh %[ftmp5], %[ftmp5], %[A] \n\t" 129cabdff1aSopenharmony_ci "pmullh %[ftmp7], %[ftmp7], %[B] \n\t" 130cabdff1aSopenharmony_ci "paddh %[ftmp3], %[ftmp5], %[ftmp7] \n\t" 131cabdff1aSopenharmony_ci "pmullh %[ftmp6], %[ftmp6], %[A] \n\t" 132cabdff1aSopenharmony_ci "pmullh %[ftmp8], %[ftmp8], %[B] \n\t" 133cabdff1aSopenharmony_ci "paddh %[ftmp4], %[ftmp6], %[ftmp8] \n\t" 134cabdff1aSopenharmony_ci "punpcklbh %[ftmp5], %[ftmp10], %[ftmp0] \n\t" 135cabdff1aSopenharmony_ci "punpckhbh %[ftmp6], %[ftmp10], %[ftmp0] \n\t" 136cabdff1aSopenharmony_ci "punpcklbh %[ftmp7], %[ftmp11], %[ftmp0] \n\t" 137cabdff1aSopenharmony_ci "punpckhbh %[ftmp8], %[ftmp11], %[ftmp0] \n\t" 138cabdff1aSopenharmony_ci "pmullh %[ftmp5], %[ftmp5], %[C] \n\t" 139cabdff1aSopenharmony_ci "pmullh %[ftmp7], %[ftmp7], %[D] \n\t" 140cabdff1aSopenharmony_ci "paddh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" 141cabdff1aSopenharmony_ci "pmullh %[ftmp6], %[ftmp6], %[C] \n\t" 142cabdff1aSopenharmony_ci "pmullh %[ftmp8], %[ftmp8], %[D] \n\t" 143cabdff1aSopenharmony_ci "paddh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 144cabdff1aSopenharmony_ci "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" 145cabdff1aSopenharmony_ci "paddh %[ftmp4], %[ftmp4], %[ftmp6] \n\t" 146cabdff1aSopenharmony_ci "paddh %[ftmp3], %[ftmp3], %[ff_pw_32] \n\t" 147cabdff1aSopenharmony_ci "paddh %[ftmp4], %[ftmp4], %[ff_pw_32] \n\t" 148cabdff1aSopenharmony_ci "psrlh %[ftmp3], %[ftmp3], %[ftmp9] \n\t" 149cabdff1aSopenharmony_ci "psrlh %[ftmp4], %[ftmp4], %[ftmp9] \n\t" 150cabdff1aSopenharmony_ci "packushb %[ftmp3], %[ftmp3], %[ftmp4] \n\t" 151cabdff1aSopenharmony_ci 152cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp1], %[dst], 0x00) 153cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 154cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp3], %[dst], 0x00) 155cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 156cabdff1aSopenharmony_ci "bnez %[h], 1b \n\t" 157cabdff1aSopenharmony_ci : RESTRICT_ASM_ALL64 158cabdff1aSopenharmony_ci [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 159cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 160cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 161cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 162cabdff1aSopenharmony_ci [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), 163cabdff1aSopenharmony_ci [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), 164cabdff1aSopenharmony_ci [dst]"+&r"(dst), [src]"+&r"(src), 165cabdff1aSopenharmony_ci [h]"+&r"(h) 166cabdff1aSopenharmony_ci : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32.f), 167cabdff1aSopenharmony_ci [A]"f"(A.f), [B]"f"(B.f), 168cabdff1aSopenharmony_ci [C]"f"(C.f), [D]"f"(D.f), 169cabdff1aSopenharmony_ci [tmp0]"r"(0x06) 170cabdff1aSopenharmony_ci : "memory" 171cabdff1aSopenharmony_ci ); 172cabdff1aSopenharmony_ci } else if (x) { 173cabdff1aSopenharmony_ci /* x!=0, y==0 */ 174cabdff1aSopenharmony_ci E.i = x << 3; 175cabdff1aSopenharmony_ci A.i = 64 - E.i; 176cabdff1aSopenharmony_ci 177cabdff1aSopenharmony_ci __asm__ volatile ( 178cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 179cabdff1aSopenharmony_ci "pshufh %[A], %[A], %[ftmp0] \n\t" 180cabdff1aSopenharmony_ci "pshufh %[E], %[E], %[ftmp0] \n\t" 181cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp7] \n\t" 182cabdff1aSopenharmony_ci 183cabdff1aSopenharmony_ci "1: \n\t" 184cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp1], %[src], 0x00) 185cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp2], %[src], 0x01) 186cabdff1aSopenharmony_ci "addi %[h], %[h], -0x01 \n\t" 187cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[stride] \n\t" 188cabdff1aSopenharmony_ci 189cabdff1aSopenharmony_ci "punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" 190cabdff1aSopenharmony_ci "punpckhbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t" 191cabdff1aSopenharmony_ci "punpcklbh %[ftmp5], %[ftmp2], %[ftmp0] \n\t" 192cabdff1aSopenharmony_ci "punpckhbh %[ftmp6], %[ftmp2], %[ftmp0] \n\t" 193cabdff1aSopenharmony_ci "pmullh %[ftmp3], %[ftmp3], %[A] \n\t" 194cabdff1aSopenharmony_ci "pmullh %[ftmp5], %[ftmp5], %[E] \n\t" 195cabdff1aSopenharmony_ci "paddh %[ftmp1], %[ftmp3], %[ftmp5] \n\t" 196cabdff1aSopenharmony_ci "pmullh %[ftmp4], %[ftmp4], %[A] \n\t" 197cabdff1aSopenharmony_ci "pmullh %[ftmp6], %[ftmp6], %[E] \n\t" 198cabdff1aSopenharmony_ci "paddh %[ftmp2], %[ftmp4], %[ftmp6] \n\t" 199cabdff1aSopenharmony_ci 200cabdff1aSopenharmony_ci "paddh %[ftmp1], %[ftmp1], %[ff_pw_32] \n\t" 201cabdff1aSopenharmony_ci "paddh %[ftmp2], %[ftmp2], %[ff_pw_32] \n\t" 202cabdff1aSopenharmony_ci "psrlh %[ftmp1], %[ftmp1], %[ftmp7] \n\t" 203cabdff1aSopenharmony_ci "psrlh %[ftmp2], %[ftmp2], %[ftmp7] \n\t" 204cabdff1aSopenharmony_ci "packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 205cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp1], %[dst], 0x00) 206cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 207cabdff1aSopenharmony_ci "bnez %[h], 1b \n\t" 208cabdff1aSopenharmony_ci : RESTRICT_ASM_ALL64 209cabdff1aSopenharmony_ci [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 210cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 211cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 212cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 213cabdff1aSopenharmony_ci [dst]"+&r"(dst), [src]"+&r"(src), 214cabdff1aSopenharmony_ci [h]"+&r"(h) 215cabdff1aSopenharmony_ci : [stride]"r"((mips_reg)stride), 216cabdff1aSopenharmony_ci [ff_pw_32]"f"(ff_pw_32.f), [tmp0]"r"(0x06), 217cabdff1aSopenharmony_ci [A]"f"(A.f), [E]"f"(E.f) 218cabdff1aSopenharmony_ci : "memory" 219cabdff1aSopenharmony_ci ); 220cabdff1aSopenharmony_ci } else { 221cabdff1aSopenharmony_ci /* x==0, y!=0 */ 222cabdff1aSopenharmony_ci E.i = y << 3; 223cabdff1aSopenharmony_ci A.i = 64 - E.i; 224cabdff1aSopenharmony_ci 225cabdff1aSopenharmony_ci __asm__ volatile ( 226cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 227cabdff1aSopenharmony_ci "pshufh %[A], %[A], %[ftmp0] \n\t" 228cabdff1aSopenharmony_ci "pshufh %[E], %[E], %[ftmp0] \n\t" 229cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp7] \n\t" 230cabdff1aSopenharmony_ci 231cabdff1aSopenharmony_ci "1: \n\t" 232cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp1], %[src], 0x00) 233cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[stride] \n\t" 234cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp2], %[src], 0x00) 235cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[stride] \n\t" 236cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp8], %[src], 0x00) 237cabdff1aSopenharmony_ci "addi %[h], %[h], -0x02 \n\t" 238cabdff1aSopenharmony_ci 239cabdff1aSopenharmony_ci "punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" 240cabdff1aSopenharmony_ci "punpckhbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t" 241cabdff1aSopenharmony_ci "punpcklbh %[ftmp5], %[ftmp2], %[ftmp0] \n\t" 242cabdff1aSopenharmony_ci "punpckhbh %[ftmp6], %[ftmp2], %[ftmp0] \n\t" 243cabdff1aSopenharmony_ci "pmullh %[ftmp3], %[ftmp3], %[A] \n\t" 244cabdff1aSopenharmony_ci "pmullh %[ftmp5], %[ftmp5], %[E] \n\t" 245cabdff1aSopenharmony_ci "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" 246cabdff1aSopenharmony_ci "pmullh %[ftmp4], %[ftmp4], %[A] \n\t" 247cabdff1aSopenharmony_ci "pmullh %[ftmp6], %[ftmp6], %[E] \n\t" 248cabdff1aSopenharmony_ci "paddh %[ftmp4], %[ftmp4], %[ftmp6] \n\t" 249cabdff1aSopenharmony_ci "paddh %[ftmp3], %[ftmp3], %[ff_pw_32] \n\t" 250cabdff1aSopenharmony_ci "paddh %[ftmp4], %[ftmp4], %[ff_pw_32] \n\t" 251cabdff1aSopenharmony_ci "psrlh %[ftmp3], %[ftmp3], %[ftmp7] \n\t" 252cabdff1aSopenharmony_ci "psrlh %[ftmp4], %[ftmp4], %[ftmp7] \n\t" 253cabdff1aSopenharmony_ci "packushb %[ftmp1], %[ftmp3], %[ftmp4] \n\t" 254cabdff1aSopenharmony_ci 255cabdff1aSopenharmony_ci "punpcklbh %[ftmp3], %[ftmp2], %[ftmp0] \n\t" 256cabdff1aSopenharmony_ci "punpckhbh %[ftmp4], %[ftmp2], %[ftmp0] \n\t" 257cabdff1aSopenharmony_ci "punpcklbh %[ftmp5], %[ftmp8], %[ftmp0] \n\t" 258cabdff1aSopenharmony_ci "punpckhbh %[ftmp6], %[ftmp8], %[ftmp0] \n\t" 259cabdff1aSopenharmony_ci "pmullh %[ftmp3], %[ftmp3], %[A] \n\t" 260cabdff1aSopenharmony_ci "pmullh %[ftmp5], %[ftmp5], %[E] \n\t" 261cabdff1aSopenharmony_ci "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" 262cabdff1aSopenharmony_ci "pmullh %[ftmp4], %[ftmp4], %[A] \n\t" 263cabdff1aSopenharmony_ci "pmullh %[ftmp6], %[ftmp6], %[E] \n\t" 264cabdff1aSopenharmony_ci "paddh %[ftmp4], %[ftmp4], %[ftmp6] \n\t" 265cabdff1aSopenharmony_ci "paddh %[ftmp3], %[ftmp3], %[ff_pw_32] \n\t" 266cabdff1aSopenharmony_ci "paddh %[ftmp4], %[ftmp4], %[ff_pw_32] \n\t" 267cabdff1aSopenharmony_ci "psrlh %[ftmp3], %[ftmp3], %[ftmp7] \n\t" 268cabdff1aSopenharmony_ci "psrlh %[ftmp4], %[ftmp4], %[ftmp7] \n\t" 269cabdff1aSopenharmony_ci "packushb %[ftmp2], %[ftmp3], %[ftmp4] \n\t" 270cabdff1aSopenharmony_ci 271cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp1], %[dst], 0x00) 272cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 273cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp2], %[dst], 0x00) 274cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 275cabdff1aSopenharmony_ci "bnez %[h], 1b \n\t" 276cabdff1aSopenharmony_ci : RESTRICT_ASM_ALL64 277cabdff1aSopenharmony_ci [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 278cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 279cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 280cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 281cabdff1aSopenharmony_ci [ftmp8]"=&f"(ftmp[8]), 282cabdff1aSopenharmony_ci [dst]"+&r"(dst), [src]"+&r"(src), 283cabdff1aSopenharmony_ci [h]"+&r"(h) 284cabdff1aSopenharmony_ci : [stride]"r"((mips_reg)stride), 285cabdff1aSopenharmony_ci [ff_pw_32]"f"(ff_pw_32.f), [A]"f"(A.f), 286cabdff1aSopenharmony_ci [E]"f"(E.f), [tmp0]"r"(0x06) 287cabdff1aSopenharmony_ci : "memory" 288cabdff1aSopenharmony_ci ); 289cabdff1aSopenharmony_ci } 290cabdff1aSopenharmony_ci} 291cabdff1aSopenharmony_ci 292cabdff1aSopenharmony_civoid ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride, 293cabdff1aSopenharmony_ci int h, int x, int y) 294cabdff1aSopenharmony_ci{ 295cabdff1aSopenharmony_ci double ftmp[10]; 296cabdff1aSopenharmony_ci union mmi_intfloat64 A, B, C, D, E; 297cabdff1aSopenharmony_ci DECLARE_VAR_ALL64; 298cabdff1aSopenharmony_ci 299cabdff1aSopenharmony_ci A.i = 64; 300cabdff1aSopenharmony_ci 301cabdff1aSopenharmony_ci if(!(x || y)){ 302cabdff1aSopenharmony_ci /* x=0, y=0, A.i=64 */ 303cabdff1aSopenharmony_ci __asm__ volatile ( 304cabdff1aSopenharmony_ci "1: \n\t" 305cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp0], %[src], 0x00) 306cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[stride] \n\t" 307cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp1], %[src], 0x00) 308cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[stride] \n\t" 309cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp2], %[dst], 0x00) 310cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 311cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp3], %[dst], 0x00) 312cabdff1aSopenharmony_ci PTR_SUBU "%[dst], %[dst], %[stride] \n\t" 313cabdff1aSopenharmony_ci "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 314cabdff1aSopenharmony_ci "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 315cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp0], %[dst], 0x00) 316cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 317cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp1], %[dst], 0x00) 318cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 319cabdff1aSopenharmony_ci "addi %[h], %[h], -0x02 \n\t" 320cabdff1aSopenharmony_ci "bnez %[h], 1b \n\t" 321cabdff1aSopenharmony_ci : RESTRICT_ASM_ALL64 322cabdff1aSopenharmony_ci [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 323cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 324cabdff1aSopenharmony_ci [dst]"+&r"(dst), [src]"+&r"(src), 325cabdff1aSopenharmony_ci [h]"+&r"(h) 326cabdff1aSopenharmony_ci : [stride]"r"((mips_reg)stride) 327cabdff1aSopenharmony_ci : "memory" 328cabdff1aSopenharmony_ci ); 329cabdff1aSopenharmony_ci } else if (x && y) { 330cabdff1aSopenharmony_ci /* x!=0, y!=0 */ 331cabdff1aSopenharmony_ci D.i = x * y; 332cabdff1aSopenharmony_ci B.i = (x << 3) - D.i; 333cabdff1aSopenharmony_ci C.i = (y << 3) - D.i; 334cabdff1aSopenharmony_ci A.i = 64 - D.i - B.i - C.i; 335cabdff1aSopenharmony_ci __asm__ volatile ( 336cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 337cabdff1aSopenharmony_ci "pshufh %[A], %[A], %[ftmp0] \n\t" 338cabdff1aSopenharmony_ci "pshufh %[B], %[B], %[ftmp0] \n\t" 339cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp9] \n\t" 340cabdff1aSopenharmony_ci "pshufh %[C], %[C], %[ftmp0] \n\t" 341cabdff1aSopenharmony_ci "pshufh %[D], %[D], %[ftmp0] \n\t" 342cabdff1aSopenharmony_ci 343cabdff1aSopenharmony_ci "1: \n\t" 344cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp1], %[src], 0x00) 345cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp2], %[src], 0x01) 346cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[stride] \n\t" 347cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp3], %[src], 0x00) 348cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp4], %[src], 0x01) 349cabdff1aSopenharmony_ci "addi %[h], %[h], -0x01 \n\t" 350cabdff1aSopenharmony_ci 351cabdff1aSopenharmony_ci "punpcklbh %[ftmp5], %[ftmp1], %[ftmp0] \n\t" 352cabdff1aSopenharmony_ci "punpckhbh %[ftmp6], %[ftmp1], %[ftmp0] \n\t" 353cabdff1aSopenharmony_ci "punpcklbh %[ftmp7], %[ftmp2], %[ftmp0] \n\t" 354cabdff1aSopenharmony_ci "punpckhbh %[ftmp8], %[ftmp2], %[ftmp0] \n\t" 355cabdff1aSopenharmony_ci "pmullh %[ftmp5], %[ftmp5], %[A] \n\t" 356cabdff1aSopenharmony_ci "pmullh %[ftmp7], %[ftmp7], %[B] \n\t" 357cabdff1aSopenharmony_ci "paddh %[ftmp1], %[ftmp5], %[ftmp7] \n\t" 358cabdff1aSopenharmony_ci "pmullh %[ftmp6], %[ftmp6], %[A] \n\t" 359cabdff1aSopenharmony_ci "pmullh %[ftmp8], %[ftmp8], %[B] \n\t" 360cabdff1aSopenharmony_ci "paddh %[ftmp2], %[ftmp6], %[ftmp8] \n\t" 361cabdff1aSopenharmony_ci 362cabdff1aSopenharmony_ci "punpcklbh %[ftmp5], %[ftmp3], %[ftmp0] \n\t" 363cabdff1aSopenharmony_ci "punpckhbh %[ftmp6], %[ftmp3], %[ftmp0] \n\t" 364cabdff1aSopenharmony_ci "punpcklbh %[ftmp7], %[ftmp4], %[ftmp0] \n\t" 365cabdff1aSopenharmony_ci "punpckhbh %[ftmp8], %[ftmp4], %[ftmp0] \n\t" 366cabdff1aSopenharmony_ci "pmullh %[ftmp5], %[ftmp5], %[C] \n\t" 367cabdff1aSopenharmony_ci "pmullh %[ftmp7], %[ftmp7], %[D] \n\t" 368cabdff1aSopenharmony_ci "paddh %[ftmp3], %[ftmp5], %[ftmp7] \n\t" 369cabdff1aSopenharmony_ci "pmullh %[ftmp6], %[ftmp6], %[C] \n\t" 370cabdff1aSopenharmony_ci "pmullh %[ftmp8], %[ftmp8], %[D] \n\t" 371cabdff1aSopenharmony_ci "paddh %[ftmp4], %[ftmp6], %[ftmp8] \n\t" 372cabdff1aSopenharmony_ci 373cabdff1aSopenharmony_ci "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 374cabdff1aSopenharmony_ci "paddh %[ftmp2], %[ftmp2], %[ftmp4] \n\t" 375cabdff1aSopenharmony_ci "paddh %[ftmp1], %[ftmp1], %[ff_pw_32] \n\t" 376cabdff1aSopenharmony_ci "paddh %[ftmp2], %[ftmp2], %[ff_pw_32] \n\t" 377cabdff1aSopenharmony_ci "psrlh %[ftmp1], %[ftmp1], %[ftmp9] \n\t" 378cabdff1aSopenharmony_ci "psrlh %[ftmp2], %[ftmp2], %[ftmp9] \n\t" 379cabdff1aSopenharmony_ci "packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 380cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp2], %[dst], 0x00) 381cabdff1aSopenharmony_ci "pavgb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 382cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp1], %[dst], 0x00) 383cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 384cabdff1aSopenharmony_ci "bnez %[h], 1b \n\t" 385cabdff1aSopenharmony_ci : RESTRICT_ASM_ALL64 386cabdff1aSopenharmony_ci [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 387cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 388cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 389cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 390cabdff1aSopenharmony_ci [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), 391cabdff1aSopenharmony_ci [dst]"+&r"(dst), [src]"+&r"(src), 392cabdff1aSopenharmony_ci [h]"+&r"(h) 393cabdff1aSopenharmony_ci : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32.f), 394cabdff1aSopenharmony_ci [A]"f"(A.f), [B]"f"(B.f), 395cabdff1aSopenharmony_ci [C]"f"(C.f), [D]"f"(D.f), 396cabdff1aSopenharmony_ci [tmp0]"r"(0x06) 397cabdff1aSopenharmony_ci : "memory" 398cabdff1aSopenharmony_ci ); 399cabdff1aSopenharmony_ci } else if (x) { 400cabdff1aSopenharmony_ci /* x!=0, y==0 */ 401cabdff1aSopenharmony_ci E.i = x << 3; 402cabdff1aSopenharmony_ci A.i = 64 - E.i; 403cabdff1aSopenharmony_ci __asm__ volatile ( 404cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 405cabdff1aSopenharmony_ci "pshufh %[A], %[A], %[ftmp0] \n\t" 406cabdff1aSopenharmony_ci "pshufh %[E], %[E], %[ftmp0] \n\t" 407cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp7] \n\t" 408cabdff1aSopenharmony_ci 409cabdff1aSopenharmony_ci "1: \n\t" 410cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp1], %[src], 0x00) 411cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp2], %[src], 0x01) 412cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[stride] \n\t" 413cabdff1aSopenharmony_ci "addi %[h], %[h], -0x01 \n\t" 414cabdff1aSopenharmony_ci 415cabdff1aSopenharmony_ci "punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" 416cabdff1aSopenharmony_ci "punpckhbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t" 417cabdff1aSopenharmony_ci "punpcklbh %[ftmp5], %[ftmp2], %[ftmp0] \n\t" 418cabdff1aSopenharmony_ci "punpckhbh %[ftmp6], %[ftmp2], %[ftmp0] \n\t" 419cabdff1aSopenharmony_ci "pmullh %[ftmp3], %[ftmp3], %[A] \n\t" 420cabdff1aSopenharmony_ci "pmullh %[ftmp5], %[ftmp5], %[E] \n\t" 421cabdff1aSopenharmony_ci "paddh %[ftmp1], %[ftmp3], %[ftmp5] \n\t" 422cabdff1aSopenharmony_ci "pmullh %[ftmp4], %[ftmp4], %[A] \n\t" 423cabdff1aSopenharmony_ci "pmullh %[ftmp6], %[ftmp6], %[E] \n\t" 424cabdff1aSopenharmony_ci "paddh %[ftmp2], %[ftmp4], %[ftmp6] \n\t" 425cabdff1aSopenharmony_ci 426cabdff1aSopenharmony_ci "paddh %[ftmp1], %[ftmp1], %[ff_pw_32] \n\t" 427cabdff1aSopenharmony_ci "paddh %[ftmp2], %[ftmp2], %[ff_pw_32] \n\t" 428cabdff1aSopenharmony_ci "psrlh %[ftmp1], %[ftmp1], %[ftmp7] \n\t" 429cabdff1aSopenharmony_ci "psrlh %[ftmp2], %[ftmp2], %[ftmp7] \n\t" 430cabdff1aSopenharmony_ci "packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 431cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp2], %[dst], 0x00) 432cabdff1aSopenharmony_ci "pavgb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 433cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp1], %[dst], 0x00) 434cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 435cabdff1aSopenharmony_ci "bnez %[h], 1b \n\t" 436cabdff1aSopenharmony_ci : RESTRICT_ASM_ALL64 437cabdff1aSopenharmony_ci [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 438cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 439cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 440cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 441cabdff1aSopenharmony_ci [dst]"+&r"(dst), [src]"+&r"(src), 442cabdff1aSopenharmony_ci [h]"+&r"(h) 443cabdff1aSopenharmony_ci : [stride]"r"((mips_reg)stride), 444cabdff1aSopenharmony_ci [ff_pw_32]"f"(ff_pw_32.f), [tmp0]"r"(0x06), 445cabdff1aSopenharmony_ci [A]"f"(A.f), [E]"f"(E.f) 446cabdff1aSopenharmony_ci : "memory" 447cabdff1aSopenharmony_ci ); 448cabdff1aSopenharmony_ci } else { 449cabdff1aSopenharmony_ci /* x==0, y!=0 */ 450cabdff1aSopenharmony_ci E.i = y << 3; 451cabdff1aSopenharmony_ci A.i = 64 - E.i; 452cabdff1aSopenharmony_ci __asm__ volatile ( 453cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 454cabdff1aSopenharmony_ci "pshufh %[A], %[A], %[ftmp0] \n\t" 455cabdff1aSopenharmony_ci "pshufh %[E], %[E], %[ftmp0] \n\t" 456cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp7] \n\t" 457cabdff1aSopenharmony_ci 458cabdff1aSopenharmony_ci "1: \n\t" 459cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp1], %[src], 0x00) 460cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[stride] \n\t" 461cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp2], %[src], 0x00) 462cabdff1aSopenharmony_ci "addi %[h], %[h], -0x01 \n\t" 463cabdff1aSopenharmony_ci 464cabdff1aSopenharmony_ci "punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" 465cabdff1aSopenharmony_ci "punpckhbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t" 466cabdff1aSopenharmony_ci "punpcklbh %[ftmp5], %[ftmp2], %[ftmp0] \n\t" 467cabdff1aSopenharmony_ci "punpckhbh %[ftmp6], %[ftmp2], %[ftmp0] \n\t" 468cabdff1aSopenharmony_ci "pmullh %[ftmp3], %[ftmp3], %[A] \n\t" 469cabdff1aSopenharmony_ci "pmullh %[ftmp5], %[ftmp5], %[E] \n\t" 470cabdff1aSopenharmony_ci "paddh %[ftmp1], %[ftmp3], %[ftmp5] \n\t" 471cabdff1aSopenharmony_ci "pmullh %[ftmp4], %[ftmp4], %[A] \n\t" 472cabdff1aSopenharmony_ci "pmullh %[ftmp6], %[ftmp6], %[E] \n\t" 473cabdff1aSopenharmony_ci "paddh %[ftmp2], %[ftmp4], %[ftmp6] \n\t" 474cabdff1aSopenharmony_ci 475cabdff1aSopenharmony_ci "paddh %[ftmp1], %[ftmp1], %[ff_pw_32] \n\t" 476cabdff1aSopenharmony_ci "paddh %[ftmp2], %[ftmp2], %[ff_pw_32] \n\t" 477cabdff1aSopenharmony_ci "psrlh %[ftmp1], %[ftmp1], %[ftmp7] \n\t" 478cabdff1aSopenharmony_ci "psrlh %[ftmp2], %[ftmp2], %[ftmp7] \n\t" 479cabdff1aSopenharmony_ci "packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 480cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp2], %[dst], 0x00) 481cabdff1aSopenharmony_ci "pavgb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 482cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp1], %[dst], 0x00) 483cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 484cabdff1aSopenharmony_ci "bnez %[h], 1b \n\t" 485cabdff1aSopenharmony_ci : RESTRICT_ASM_ALL64 486cabdff1aSopenharmony_ci [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 487cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 488cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 489cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 490cabdff1aSopenharmony_ci [dst]"+&r"(dst), [src]"+&r"(src), 491cabdff1aSopenharmony_ci [h]"+&r"(h) 492cabdff1aSopenharmony_ci : [stride]"r"((mips_reg)stride), 493cabdff1aSopenharmony_ci [ff_pw_32]"f"(ff_pw_32.f), [tmp0]"r"(0x06), 494cabdff1aSopenharmony_ci [A]"f"(A.f), [E]"f"(E.f) 495cabdff1aSopenharmony_ci : "memory" 496cabdff1aSopenharmony_ci ); 497cabdff1aSopenharmony_ci } 498cabdff1aSopenharmony_ci} 499cabdff1aSopenharmony_ci 500cabdff1aSopenharmony_civoid ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride, 501cabdff1aSopenharmony_ci int h, int x, int y) 502cabdff1aSopenharmony_ci{ 503cabdff1aSopenharmony_ci double ftmp[8]; 504cabdff1aSopenharmony_ci mips_reg addr[1]; 505cabdff1aSopenharmony_ci union mmi_intfloat64 A, B, C, D, E; 506cabdff1aSopenharmony_ci DECLARE_VAR_LOW32; 507cabdff1aSopenharmony_ci A.i = (8 - x) * (8 - y); 508cabdff1aSopenharmony_ci B.i = x * (8 - y); 509cabdff1aSopenharmony_ci C.i = (8 - x) * y; 510cabdff1aSopenharmony_ci D.i = x * y; 511cabdff1aSopenharmony_ci E.i = B.i + C.i; 512cabdff1aSopenharmony_ci 513cabdff1aSopenharmony_ci if (D.i) { 514cabdff1aSopenharmony_ci __asm__ volatile ( 515cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 516cabdff1aSopenharmony_ci "pshufh %[A], %[A], %[ftmp0] \n\t" 517cabdff1aSopenharmony_ci "pshufh %[B], %[B], %[ftmp0] \n\t" 518cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp7] \n\t" 519cabdff1aSopenharmony_ci "pshufh %[C], %[C], %[ftmp0] \n\t" 520cabdff1aSopenharmony_ci "pshufh %[D], %[D], %[ftmp0] \n\t" 521cabdff1aSopenharmony_ci 522cabdff1aSopenharmony_ci "1: \n\t" 523cabdff1aSopenharmony_ci MMI_ULWC1(%[ftmp1], %[src], 0x00) 524cabdff1aSopenharmony_ci MMI_ULWC1(%[ftmp2], %[src], 0x01) 525cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[stride] \n\t" 526cabdff1aSopenharmony_ci MMI_ULWC1(%[ftmp3], %[src], 0x00) 527cabdff1aSopenharmony_ci MMI_ULWC1(%[ftmp4], %[src], 0x01) 528cabdff1aSopenharmony_ci 529cabdff1aSopenharmony_ci "punpcklbh %[ftmp5], %[ftmp1], %[ftmp0] \n\t" 530cabdff1aSopenharmony_ci "punpcklbh %[ftmp6], %[ftmp2], %[ftmp0] \n\t" 531cabdff1aSopenharmony_ci "pmullh %[ftmp5], %[ftmp5], %[A] \n\t" 532cabdff1aSopenharmony_ci "pmullh %[ftmp6], %[ftmp6], %[B] \n\t" 533cabdff1aSopenharmony_ci "paddh %[ftmp1], %[ftmp5], %[ftmp6] \n\t" 534cabdff1aSopenharmony_ci "punpcklbh %[ftmp5], %[ftmp3], %[ftmp0] \n\t" 535cabdff1aSopenharmony_ci "punpcklbh %[ftmp6], %[ftmp4], %[ftmp0] \n\t" 536cabdff1aSopenharmony_ci "pmullh %[ftmp5], %[ftmp5], %[C] \n\t" 537cabdff1aSopenharmony_ci "pmullh %[ftmp6], %[ftmp6], %[D] \n\t" 538cabdff1aSopenharmony_ci "paddh %[ftmp2], %[ftmp5], %[ftmp6] \n\t" 539cabdff1aSopenharmony_ci "paddh %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 540cabdff1aSopenharmony_ci "paddh %[ftmp1], %[ftmp1], %[ff_pw_32] \n\t" 541cabdff1aSopenharmony_ci "psrlh %[ftmp1], %[ftmp1], %[ftmp7] \n\t" 542cabdff1aSopenharmony_ci "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 543cabdff1aSopenharmony_ci 544cabdff1aSopenharmony_ci "addi %[h], %[h], -0x01 \n\t" 545cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp1], %[dst], 0x00) 546cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 547cabdff1aSopenharmony_ci "bnez %[h], 1b \n\t" 548cabdff1aSopenharmony_ci : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 549cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 550cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 551cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 552cabdff1aSopenharmony_ci RESTRICT_ASM_LOW32 553cabdff1aSopenharmony_ci [dst]"+&r"(dst), [src]"+&r"(src), 554cabdff1aSopenharmony_ci [h]"+&r"(h) 555cabdff1aSopenharmony_ci : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32.f), 556cabdff1aSopenharmony_ci [A]"f"(A.f), [B]"f"(B.f), 557cabdff1aSopenharmony_ci [C]"f"(C.f), [D]"f"(D.f), 558cabdff1aSopenharmony_ci [tmp0]"r"(0x06) 559cabdff1aSopenharmony_ci : "memory" 560cabdff1aSopenharmony_ci ); 561cabdff1aSopenharmony_ci } else if (E.i) { 562cabdff1aSopenharmony_ci const int step = C.i ? stride : 1; 563cabdff1aSopenharmony_ci __asm__ volatile ( 564cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 565cabdff1aSopenharmony_ci "pshufh %[A], %[A], %[ftmp0] \n\t" 566cabdff1aSopenharmony_ci "pshufh %[E], %[E], %[ftmp0] \n\t" 567cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp5] \n\t" 568cabdff1aSopenharmony_ci 569cabdff1aSopenharmony_ci "1: \n\t" 570cabdff1aSopenharmony_ci MMI_ULWC1(%[ftmp1], %[src], 0x00) 571cabdff1aSopenharmony_ci PTR_ADDU "%[addr0], %[src], %[step] \n\t" 572cabdff1aSopenharmony_ci MMI_ULWC1(%[ftmp2], %[addr0], 0x00) 573cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[stride] \n\t" 574cabdff1aSopenharmony_ci "addi %[h], %[h], -0x01 \n\t" 575cabdff1aSopenharmony_ci "punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" 576cabdff1aSopenharmony_ci "punpcklbh %[ftmp4], %[ftmp2], %[ftmp0] \n\t" 577cabdff1aSopenharmony_ci "pmullh %[ftmp3], %[ftmp3], %[A] \n\t" 578cabdff1aSopenharmony_ci "pmullh %[ftmp4], %[ftmp4], %[E] \n\t" 579cabdff1aSopenharmony_ci "paddh %[ftmp1], %[ftmp3], %[ftmp4] \n\t" 580cabdff1aSopenharmony_ci "paddh %[ftmp1], %[ftmp1], %[ff_pw_32] \n\t" 581cabdff1aSopenharmony_ci "psrlh %[ftmp1], %[ftmp1], %[ftmp5] \n\t" 582cabdff1aSopenharmony_ci "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 583cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp1], %[dst], 0x00) 584cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 585cabdff1aSopenharmony_ci "bnez %[h], 1b \n\t" 586cabdff1aSopenharmony_ci : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 587cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 588cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 589cabdff1aSopenharmony_ci RESTRICT_ASM_LOW32 590cabdff1aSopenharmony_ci [addr0]"=&r"(addr[0]), 591cabdff1aSopenharmony_ci [dst]"+&r"(dst), [src]"+&r"(src), 592cabdff1aSopenharmony_ci [h]"+&r"(h) 593cabdff1aSopenharmony_ci : [stride]"r"((mips_reg)stride),[step]"r"((mips_reg)step), 594cabdff1aSopenharmony_ci [ff_pw_32]"f"(ff_pw_32.f), [tmp0]"r"(0x06), 595cabdff1aSopenharmony_ci [A]"f"(A.f), [E]"f"(E.f) 596cabdff1aSopenharmony_ci : "memory" 597cabdff1aSopenharmony_ci ); 598cabdff1aSopenharmony_ci } else { 599cabdff1aSopenharmony_ci __asm__ volatile ( 600cabdff1aSopenharmony_ci "1: \n\t" 601cabdff1aSopenharmony_ci MMI_ULWC1(%[ftmp0], %[src], 0x00) 602cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[stride] \n\t" 603cabdff1aSopenharmony_ci MMI_ULWC1(%[ftmp1], %[src], 0x00) 604cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[stride] \n\t" 605cabdff1aSopenharmony_ci "addi %[h], %[h], -0x02 \n\t" 606cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp0], %[dst], 0x00) 607cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 608cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp1], %[dst], 0x00) 609cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 610cabdff1aSopenharmony_ci "bnez %[h], 1b \n\t" 611cabdff1aSopenharmony_ci : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 612cabdff1aSopenharmony_ci [dst]"+&r"(dst), [src]"+&r"(src), 613cabdff1aSopenharmony_ci RESTRICT_ASM_LOW32 614cabdff1aSopenharmony_ci [h]"+&r"(h) 615cabdff1aSopenharmony_ci : [stride]"r"((mips_reg)stride) 616cabdff1aSopenharmony_ci : "memory" 617cabdff1aSopenharmony_ci ); 618cabdff1aSopenharmony_ci } 619cabdff1aSopenharmony_ci} 620cabdff1aSopenharmony_ci 621cabdff1aSopenharmony_civoid ff_avg_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride, 622cabdff1aSopenharmony_ci int h, int x, int y) 623cabdff1aSopenharmony_ci{ 624cabdff1aSopenharmony_ci double ftmp[8]; 625cabdff1aSopenharmony_ci mips_reg addr[1]; 626cabdff1aSopenharmony_ci union mmi_intfloat64 A, B, C, D, E; 627cabdff1aSopenharmony_ci DECLARE_VAR_LOW32; 628cabdff1aSopenharmony_ci A.i = (8 - x) *(8 - y); 629cabdff1aSopenharmony_ci B.i = x * (8 - y); 630cabdff1aSopenharmony_ci C.i = (8 - x) * y; 631cabdff1aSopenharmony_ci D.i = x * y; 632cabdff1aSopenharmony_ci E.i = B.i + C.i; 633cabdff1aSopenharmony_ci 634cabdff1aSopenharmony_ci if (D.i) { 635cabdff1aSopenharmony_ci __asm__ volatile ( 636cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 637cabdff1aSopenharmony_ci "pshufh %[A], %[A], %[ftmp0] \n\t" 638cabdff1aSopenharmony_ci "pshufh %[B], %[B], %[ftmp0] \n\t" 639cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp7] \n\t" 640cabdff1aSopenharmony_ci "pshufh %[C], %[C], %[ftmp0] \n\t" 641cabdff1aSopenharmony_ci "pshufh %[D], %[D], %[ftmp0] \n\t" 642cabdff1aSopenharmony_ci 643cabdff1aSopenharmony_ci "1: \n\t" 644cabdff1aSopenharmony_ci MMI_ULWC1(%[ftmp1], %[src], 0x00) 645cabdff1aSopenharmony_ci MMI_ULWC1(%[ftmp2], %[src], 0x01) 646cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[stride] \n\t" 647cabdff1aSopenharmony_ci MMI_ULWC1(%[ftmp3], %[src], 0x00) 648cabdff1aSopenharmony_ci MMI_ULWC1(%[ftmp4], %[src], 0x01) 649cabdff1aSopenharmony_ci 650cabdff1aSopenharmony_ci "punpcklbh %[ftmp5], %[ftmp1], %[ftmp0] \n\t" 651cabdff1aSopenharmony_ci "punpcklbh %[ftmp6], %[ftmp2], %[ftmp0] \n\t" 652cabdff1aSopenharmony_ci "pmullh %[ftmp5], %[ftmp5], %[A] \n\t" 653cabdff1aSopenharmony_ci "pmullh %[ftmp6], %[ftmp6], %[B] \n\t" 654cabdff1aSopenharmony_ci "paddh %[ftmp1], %[ftmp5], %[ftmp6] \n\t" 655cabdff1aSopenharmony_ci "punpcklbh %[ftmp5], %[ftmp3], %[ftmp0] \n\t" 656cabdff1aSopenharmony_ci "punpcklbh %[ftmp6], %[ftmp4], %[ftmp0] \n\t" 657cabdff1aSopenharmony_ci "pmullh %[ftmp5], %[ftmp5], %[C] \n\t" 658cabdff1aSopenharmony_ci "pmullh %[ftmp6], %[ftmp6], %[D] \n\t" 659cabdff1aSopenharmony_ci "paddh %[ftmp2], %[ftmp5], %[ftmp6] \n\t" 660cabdff1aSopenharmony_ci "paddh %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 661cabdff1aSopenharmony_ci "paddh %[ftmp1], %[ftmp1], %[ff_pw_32] \n\t" 662cabdff1aSopenharmony_ci "psrlh %[ftmp1], %[ftmp1], %[ftmp7] \n\t" 663cabdff1aSopenharmony_ci "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 664cabdff1aSopenharmony_ci MMI_LWC1(%[ftmp2], %[dst], 0x00) 665cabdff1aSopenharmony_ci "pavgb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 666cabdff1aSopenharmony_ci 667cabdff1aSopenharmony_ci "addi %[h], %[h], -0x01 \n\t" 668cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp1], %[dst], 0x00) 669cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 670cabdff1aSopenharmony_ci "bnez %[h], 1b \n\t" 671cabdff1aSopenharmony_ci : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 672cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 673cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 674cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 675cabdff1aSopenharmony_ci RESTRICT_ASM_LOW32 676cabdff1aSopenharmony_ci [dst]"+&r"(dst), [src]"+&r"(src), 677cabdff1aSopenharmony_ci [h]"+&r"(h) 678cabdff1aSopenharmony_ci : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32.f), 679cabdff1aSopenharmony_ci [A]"f"(A.f), [B]"f"(B.f), 680cabdff1aSopenharmony_ci [C]"f"(C.f), [D]"f"(D.f), 681cabdff1aSopenharmony_ci [tmp0]"r"(0x06) 682cabdff1aSopenharmony_ci : "memory" 683cabdff1aSopenharmony_ci ); 684cabdff1aSopenharmony_ci } else if (E.i) { 685cabdff1aSopenharmony_ci const int step = C.i ? stride : 1; 686cabdff1aSopenharmony_ci __asm__ volatile ( 687cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 688cabdff1aSopenharmony_ci "pshufh %[A], %[A], %[ftmp0] \n\t" 689cabdff1aSopenharmony_ci "pshufh %[E], %[E], %[ftmp0] \n\t" 690cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp5] \n\t" 691cabdff1aSopenharmony_ci 692cabdff1aSopenharmony_ci "1: \n\t" 693cabdff1aSopenharmony_ci MMI_ULWC1(%[ftmp1], %[src], 0x00) 694cabdff1aSopenharmony_ci PTR_ADDU "%[addr0], %[src], %[step] \n\t" 695cabdff1aSopenharmony_ci MMI_ULWC1(%[ftmp2], %[addr0], 0x00) 696cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[stride] \n\t" 697cabdff1aSopenharmony_ci "addi %[h], %[h], -0x01 \n\t" 698cabdff1aSopenharmony_ci "punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" 699cabdff1aSopenharmony_ci "punpcklbh %[ftmp4], %[ftmp2], %[ftmp0] \n\t" 700cabdff1aSopenharmony_ci "pmullh %[ftmp3], %[ftmp3], %[A] \n\t" 701cabdff1aSopenharmony_ci "pmullh %[ftmp4], %[ftmp4], %[E] \n\t" 702cabdff1aSopenharmony_ci "paddh %[ftmp1], %[ftmp3], %[ftmp4] \n\t" 703cabdff1aSopenharmony_ci "paddh %[ftmp1], %[ftmp1], %[ff_pw_32] \n\t" 704cabdff1aSopenharmony_ci "psrlh %[ftmp1], %[ftmp1], %[ftmp5] \n\t" 705cabdff1aSopenharmony_ci "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 706cabdff1aSopenharmony_ci MMI_LWC1(%[ftmp2], %[dst], 0x00) 707cabdff1aSopenharmony_ci "pavgb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 708cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp1], %[dst], 0x00) 709cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 710cabdff1aSopenharmony_ci "bnez %[h], 1b \n\t" 711cabdff1aSopenharmony_ci : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 712cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 713cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 714cabdff1aSopenharmony_ci RESTRICT_ASM_LOW32 715cabdff1aSopenharmony_ci [addr0]"=&r"(addr[0]), 716cabdff1aSopenharmony_ci [dst]"+&r"(dst), [src]"+&r"(src), 717cabdff1aSopenharmony_ci [h]"+&r"(h) 718cabdff1aSopenharmony_ci : [stride]"r"((mips_reg)stride),[step]"r"((mips_reg)step), 719cabdff1aSopenharmony_ci [ff_pw_32]"f"(ff_pw_32.f), [tmp0]"r"(0x06), 720cabdff1aSopenharmony_ci [A]"f"(A.f), [E]"f"(E.f) 721cabdff1aSopenharmony_ci : "memory" 722cabdff1aSopenharmony_ci ); 723cabdff1aSopenharmony_ci } else { 724cabdff1aSopenharmony_ci __asm__ volatile ( 725cabdff1aSopenharmony_ci "1: \n\t" 726cabdff1aSopenharmony_ci MMI_ULWC1(%[ftmp0], %[src], 0x00) 727cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[stride] \n\t" 728cabdff1aSopenharmony_ci MMI_ULWC1(%[ftmp1], %[src], 0x00) 729cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[stride] \n\t" 730cabdff1aSopenharmony_ci "addi %[h], %[h], -0x02 \n\t" 731cabdff1aSopenharmony_ci MMI_LWC1(%[ftmp2], %[dst], 0x00) 732cabdff1aSopenharmony_ci "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 733cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp0], %[dst], 0x00) 734cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 735cabdff1aSopenharmony_ci MMI_LWC1(%[ftmp3], %[dst], 0x00) 736cabdff1aSopenharmony_ci "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 737cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp1], %[dst], 0x00) 738cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 739cabdff1aSopenharmony_ci "bnez %[h], 1b \n\t" 740cabdff1aSopenharmony_ci : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 741cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 742cabdff1aSopenharmony_ci [dst]"+&r"(dst), [src]"+&r"(src), 743cabdff1aSopenharmony_ci RESTRICT_ASM_LOW32 744cabdff1aSopenharmony_ci [h]"+&r"(h) 745cabdff1aSopenharmony_ci : [stride]"r"((mips_reg)stride) 746cabdff1aSopenharmony_ci : "memory" 747cabdff1aSopenharmony_ci ); 748cabdff1aSopenharmony_ci } 749cabdff1aSopenharmony_ci} 750