1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * VC-1 and WMV3 - DSP functions Loongson MMI-optimized 3cabdff1aSopenharmony_ci * 4cabdff1aSopenharmony_ci * Copyright (c) 2016 Zhou Xiaoyong <zhouxiaoyong@loongson.cn> 5cabdff1aSopenharmony_ci * 6cabdff1aSopenharmony_ci * This file is part of FFmpeg. 7cabdff1aSopenharmony_ci * 8cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 9cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 10cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 11cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 12cabdff1aSopenharmony_ci * 13cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 14cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 15cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16cabdff1aSopenharmony_ci * Lesser General Public License for more details. 17cabdff1aSopenharmony_ci * 18cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 19cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 20cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 21cabdff1aSopenharmony_ci */ 22cabdff1aSopenharmony_ci 23cabdff1aSopenharmony_ci#include "libavutil/attributes.h" 24cabdff1aSopenharmony_ci#include "libavutil/avassert.h" 25cabdff1aSopenharmony_ci#include "libavutil/mem_internal.h" 26cabdff1aSopenharmony_ci 27cabdff1aSopenharmony_ci#include "libavcodec/vc1dsp.h" 28cabdff1aSopenharmony_ci#include "constants.h" 29cabdff1aSopenharmony_ci#include "vc1dsp_mips.h" 30cabdff1aSopenharmony_ci#include "hpeldsp_mips.h" 31cabdff1aSopenharmony_ci#include "libavutil/mips/mmiutils.h" 32cabdff1aSopenharmony_ci 33cabdff1aSopenharmony_ci#define VC1_INV_TRANCS_8_TYPE1(o1, o2, r1, r2, r3, r4, c0) \ 34cabdff1aSopenharmony_ci "li %[tmp0], "#r1" \n\t" \ 35cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp13] \n\t" \ 36cabdff1aSopenharmony_ci "punpcklwd %[ftmp13], %[ftmp13], %[ftmp13] \n\t" \ 37cabdff1aSopenharmony_ci "li %[tmp0], "#r2" \n\t" \ 38cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp14] \n\t" \ 39cabdff1aSopenharmony_ci "punpcklwd %[ftmp14], %[ftmp14], %[ftmp14] \n\t" \ 40cabdff1aSopenharmony_ci "pmaddhw %[ftmp1], %[ftmp5], %[ftmp13] \n\t" \ 41cabdff1aSopenharmony_ci "pmaddhw %[ftmp2], %[ftmp7], %[ftmp14] \n\t" \ 42cabdff1aSopenharmony_ci "paddw %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \ 43cabdff1aSopenharmony_ci "pmaddhw %[ftmp2], %[ftmp6], %[ftmp13] \n\t" \ 44cabdff1aSopenharmony_ci "pmaddhw %[ftmp3], %[ftmp8], %[ftmp14] \n\t" \ 45cabdff1aSopenharmony_ci "paddw %[ftmp2], %[ftmp2], %[ftmp3] \n\t" \ 46cabdff1aSopenharmony_ci \ 47cabdff1aSopenharmony_ci "li %[tmp0], "#r3" \n\t" \ 48cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp13] \n\t" \ 49cabdff1aSopenharmony_ci "punpcklwd %[ftmp13], %[ftmp13], %[ftmp13] \n\t" \ 50cabdff1aSopenharmony_ci "li %[tmp0], "#r4" \n\t" \ 51cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp14] \n\t" \ 52cabdff1aSopenharmony_ci "punpcklwd %[ftmp14], %[ftmp14], %[ftmp14] \n\t" \ 53cabdff1aSopenharmony_ci "pmaddhw %[ftmp3], %[ftmp9], %[ftmp13] \n\t" \ 54cabdff1aSopenharmony_ci "pmaddhw %[ftmp4], %[ftmp11], %[ftmp14] \n\t" \ 55cabdff1aSopenharmony_ci "paddw %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \ 56cabdff1aSopenharmony_ci "pmaddhw %[ftmp4], %[ftmp10], %[ftmp13] \n\t" \ 57cabdff1aSopenharmony_ci "pmaddhw %[ftmp13], %[ftmp12], %[ftmp14] \n\t" \ 58cabdff1aSopenharmony_ci "paddw %[ftmp4], %[ftmp4], %[ftmp13] \n\t" \ 59cabdff1aSopenharmony_ci \ 60cabdff1aSopenharmony_ci "paddw %[ftmp1], %[ftmp1], "#c0" \n\t" \ 61cabdff1aSopenharmony_ci "paddw %[ftmp2], %[ftmp2], "#c0" \n\t" \ 62cabdff1aSopenharmony_ci "paddw %[ftmp13], %[ftmp1], %[ftmp3] \n\t" \ 63cabdff1aSopenharmony_ci "psubw %[ftmp14], %[ftmp1], %[ftmp3] \n\t" \ 64cabdff1aSopenharmony_ci "paddw %[ftmp1], %[ftmp2], %[ftmp4] \n\t" \ 65cabdff1aSopenharmony_ci "psubw %[ftmp3], %[ftmp2], %[ftmp4] \n\t" \ 66cabdff1aSopenharmony_ci "psraw %[ftmp13], %[ftmp13], %[ftmp0] \n\t" \ 67cabdff1aSopenharmony_ci "psraw %[ftmp1], %[ftmp1], %[ftmp0] \n\t" \ 68cabdff1aSopenharmony_ci "psraw %[ftmp14], %[ftmp14], %[ftmp0] \n\t" \ 69cabdff1aSopenharmony_ci "psraw %[ftmp3], %[ftmp3], %[ftmp0] \n\t" \ 70cabdff1aSopenharmony_ci "punpcklhw %[ftmp2], %[ftmp13], %[ftmp1] \n\t" \ 71cabdff1aSopenharmony_ci "punpckhhw %[ftmp4], %[ftmp13], %[ftmp1] \n\t" \ 72cabdff1aSopenharmony_ci "punpcklhw "#o1", %[ftmp2], %[ftmp4] \n\t" \ 73cabdff1aSopenharmony_ci "punpcklhw %[ftmp2], %[ftmp14], %[ftmp3] \n\t" \ 74cabdff1aSopenharmony_ci "punpckhhw %[ftmp4], %[ftmp14], %[ftmp3] \n\t" \ 75cabdff1aSopenharmony_ci "punpcklhw "#o2", %[ftmp2], %[ftmp4] \n\t" 76cabdff1aSopenharmony_ci 77cabdff1aSopenharmony_ci#define VC1_INV_TRANCS_8_TYPE2(o1, o2, r1, r2, r3, r4, c0, c1) \ 78cabdff1aSopenharmony_ci "li %[tmp0], "#r1" \n\t" \ 79cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp13] \n\t" \ 80cabdff1aSopenharmony_ci "punpcklwd %[ftmp13], %[ftmp13], %[ftmp13] \n\t" \ 81cabdff1aSopenharmony_ci "li %[tmp0], "#r2" \n\t" \ 82cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp14] \n\t" \ 83cabdff1aSopenharmony_ci "punpcklwd %[ftmp14], %[ftmp14], %[ftmp14] \n\t" \ 84cabdff1aSopenharmony_ci "pmaddhw %[ftmp1], %[ftmp5], %[ftmp13] \n\t" \ 85cabdff1aSopenharmony_ci "pmaddhw %[ftmp2], %[ftmp7], %[ftmp14] \n\t" \ 86cabdff1aSopenharmony_ci "paddw %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \ 87cabdff1aSopenharmony_ci "pmaddhw %[ftmp2], %[ftmp6], %[ftmp13] \n\t" \ 88cabdff1aSopenharmony_ci "pmaddhw %[ftmp3], %[ftmp8], %[ftmp14] \n\t" \ 89cabdff1aSopenharmony_ci "paddw %[ftmp2], %[ftmp2], %[ftmp3] \n\t" \ 90cabdff1aSopenharmony_ci \ 91cabdff1aSopenharmony_ci "li %[tmp0], "#r3" \n\t" \ 92cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp13] \n\t" \ 93cabdff1aSopenharmony_ci "punpcklwd %[ftmp13], %[ftmp13], %[ftmp13] \n\t" \ 94cabdff1aSopenharmony_ci "li %[tmp0], "#r4" \n\t" \ 95cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp14] \n\t" \ 96cabdff1aSopenharmony_ci "punpcklwd %[ftmp14], %[ftmp14], %[ftmp14] \n\t" \ 97cabdff1aSopenharmony_ci "pmaddhw %[ftmp3], %[ftmp9], %[ftmp13] \n\t" \ 98cabdff1aSopenharmony_ci "pmaddhw %[ftmp4], %[ftmp11], %[ftmp14] \n\t" \ 99cabdff1aSopenharmony_ci "paddw %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \ 100cabdff1aSopenharmony_ci "pmaddhw %[ftmp4], %[ftmp10], %[ftmp13] \n\t" \ 101cabdff1aSopenharmony_ci "pmaddhw %[ftmp13], %[ftmp12], %[ftmp14] \n\t" \ 102cabdff1aSopenharmony_ci "paddw %[ftmp4], %[ftmp4], %[ftmp13] \n\t" \ 103cabdff1aSopenharmony_ci \ 104cabdff1aSopenharmony_ci "paddw %[ftmp13], %[ftmp1], %[ftmp3] \n\t" \ 105cabdff1aSopenharmony_ci "psubw %[ftmp14], %[ftmp1], %[ftmp3] \n\t" \ 106cabdff1aSopenharmony_ci "paddw %[ftmp14], %[ftmp14], "#c1" \n\t" \ 107cabdff1aSopenharmony_ci "paddw %[ftmp1], %[ftmp2], %[ftmp4] \n\t" \ 108cabdff1aSopenharmony_ci "psubw %[ftmp3], %[ftmp2], %[ftmp4] \n\t" \ 109cabdff1aSopenharmony_ci "paddw %[ftmp3], %[ftmp3], "#c1" \n\t" \ 110cabdff1aSopenharmony_ci "paddw %[ftmp13], %[ftmp13], "#c0" \n\t" \ 111cabdff1aSopenharmony_ci "paddw %[ftmp14], %[ftmp14], "#c0" \n\t" \ 112cabdff1aSopenharmony_ci "paddw %[ftmp1], %[ftmp1], "#c0" \n\t" \ 113cabdff1aSopenharmony_ci "paddw %[ftmp3], %[ftmp3], "#c0" \n\t" \ 114cabdff1aSopenharmony_ci "psraw %[ftmp13], %[ftmp13], %[ftmp0] \n\t" \ 115cabdff1aSopenharmony_ci "psraw %[ftmp1], %[ftmp1], %[ftmp0] \n\t" \ 116cabdff1aSopenharmony_ci "psraw %[ftmp14], %[ftmp14], %[ftmp0] \n\t" \ 117cabdff1aSopenharmony_ci "psraw %[ftmp3], %[ftmp3], %[ftmp0] \n\t" \ 118cabdff1aSopenharmony_ci "punpcklhw %[ftmp2], %[ftmp13], %[ftmp1] \n\t" \ 119cabdff1aSopenharmony_ci "punpckhhw %[ftmp4], %[ftmp13], %[ftmp1] \n\t" \ 120cabdff1aSopenharmony_ci "punpcklhw "#o1", %[ftmp2], %[ftmp4] \n\t" \ 121cabdff1aSopenharmony_ci "punpcklhw %[ftmp2], %[ftmp14], %[ftmp3] \n\t" \ 122cabdff1aSopenharmony_ci "punpckhhw %[ftmp4], %[ftmp14], %[ftmp3] \n\t" \ 123cabdff1aSopenharmony_ci "punpcklhw "#o2", %[ftmp2], %[ftmp4] \n\t" 124cabdff1aSopenharmony_ci 125cabdff1aSopenharmony_ci/* Do inverse transform on 8x8 block */ 126cabdff1aSopenharmony_civoid ff_vc1_inv_trans_8x8_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block) 127cabdff1aSopenharmony_ci{ 128cabdff1aSopenharmony_ci int dc = block[0]; 129cabdff1aSopenharmony_ci double ftmp[9]; 130cabdff1aSopenharmony_ci mips_reg addr[1]; 131cabdff1aSopenharmony_ci int count; 132cabdff1aSopenharmony_ci union mmi_intfloat64 dc_u; 133cabdff1aSopenharmony_ci 134cabdff1aSopenharmony_ci dc = (3 * dc + 1) >> 1; 135cabdff1aSopenharmony_ci dc = (3 * dc + 16) >> 5; 136cabdff1aSopenharmony_ci dc_u.i = dc; 137cabdff1aSopenharmony_ci 138cabdff1aSopenharmony_ci __asm__ volatile( 139cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 140cabdff1aSopenharmony_ci "pshufh %[dc], %[dc], %[ftmp0] \n\t" 141cabdff1aSopenharmony_ci "li %[count], 0x02 \n\t" 142cabdff1aSopenharmony_ci 143cabdff1aSopenharmony_ci "1: \n\t" 144cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp1], %[dest], 0x00) 145cabdff1aSopenharmony_ci PTR_ADDU "%[addr0], %[dest], %[linesize] \n\t" 146cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp2], %[addr0], 0x00) 147cabdff1aSopenharmony_ci PTR_ADDU "%[addr0], %[addr0], %[linesize] \n\t" 148cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp3], %[addr0], 0x00) 149cabdff1aSopenharmony_ci PTR_ADDU "%[addr0], %[addr0], %[linesize] \n\t" 150cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp4], %[addr0], 0x00) 151cabdff1aSopenharmony_ci 152cabdff1aSopenharmony_ci "punpckhbh %[ftmp5], %[ftmp1], %[ftmp0] \n\t" 153cabdff1aSopenharmony_ci "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 154cabdff1aSopenharmony_ci "punpckhbh %[ftmp6], %[ftmp2], %[ftmp0] \n\t" 155cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 156cabdff1aSopenharmony_ci "punpckhbh %[ftmp7], %[ftmp3], %[ftmp0] \n\t" 157cabdff1aSopenharmony_ci "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 158cabdff1aSopenharmony_ci "punpckhbh %[ftmp8], %[ftmp4], %[ftmp0] \n\t" 159cabdff1aSopenharmony_ci "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 160cabdff1aSopenharmony_ci 161cabdff1aSopenharmony_ci "paddsh %[ftmp1], %[ftmp1], %[dc] \n\t" 162cabdff1aSopenharmony_ci "paddsh %[ftmp2], %[ftmp2], %[dc] \n\t" 163cabdff1aSopenharmony_ci "paddsh %[ftmp3], %[ftmp3], %[dc] \n\t" 164cabdff1aSopenharmony_ci "paddsh %[ftmp4], %[ftmp4], %[dc] \n\t" 165cabdff1aSopenharmony_ci "paddsh %[ftmp5], %[ftmp5], %[dc] \n\t" 166cabdff1aSopenharmony_ci "paddsh %[ftmp6], %[ftmp6], %[dc] \n\t" 167cabdff1aSopenharmony_ci "paddsh %[ftmp7], %[ftmp7], %[dc] \n\t" 168cabdff1aSopenharmony_ci "paddsh %[ftmp8], %[ftmp8], %[dc] \n\t" 169cabdff1aSopenharmony_ci 170cabdff1aSopenharmony_ci "packushb %[ftmp1], %[ftmp1], %[ftmp5] \n\t" 171cabdff1aSopenharmony_ci "packushb %[ftmp2], %[ftmp2], %[ftmp6] \n\t" 172cabdff1aSopenharmony_ci "packushb %[ftmp3], %[ftmp3], %[ftmp7] \n\t" 173cabdff1aSopenharmony_ci "packushb %[ftmp4], %[ftmp4], %[ftmp8] \n\t" 174cabdff1aSopenharmony_ci 175cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp1], %[dest], 0x00) 176cabdff1aSopenharmony_ci PTR_ADDU "%[addr0], %[dest], %[linesize] \n\t" 177cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp2], %[addr0], 0x00) 178cabdff1aSopenharmony_ci PTR_ADDU "%[addr0], %[addr0], %[linesize] \n\t" 179cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp3], %[addr0], 0x00) 180cabdff1aSopenharmony_ci PTR_ADDU "%[addr0], %[addr0], %[linesize] \n\t" 181cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp4], %[addr0], 0x00) 182cabdff1aSopenharmony_ci 183cabdff1aSopenharmony_ci "addiu %[count], %[count], -0x01 \n\t" 184cabdff1aSopenharmony_ci PTR_ADDU "%[dest], %[addr0], %[linesize] \n\t" 185cabdff1aSopenharmony_ci "bnez %[count], 1b \n\t" 186cabdff1aSopenharmony_ci : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 187cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 188cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 189cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 190cabdff1aSopenharmony_ci [ftmp8]"=&f"(ftmp[8]), 191cabdff1aSopenharmony_ci [addr0]"=&r"(addr[0]), 192cabdff1aSopenharmony_ci [count]"=&r"(count), [dest]"+&r"(dest) 193cabdff1aSopenharmony_ci : [linesize]"r"((mips_reg)linesize), 194cabdff1aSopenharmony_ci [dc]"f"(dc_u.f) 195cabdff1aSopenharmony_ci : "memory" 196cabdff1aSopenharmony_ci ); 197cabdff1aSopenharmony_ci} 198cabdff1aSopenharmony_ci 199cabdff1aSopenharmony_ci#if _MIPS_SIM != _ABIO32 200cabdff1aSopenharmony_civoid ff_vc1_inv_trans_8x8_mmi(int16_t block[64]) 201cabdff1aSopenharmony_ci{ 202cabdff1aSopenharmony_ci DECLARE_ALIGNED(16, int16_t, temp[64]); 203cabdff1aSopenharmony_ci double ftmp[23]; 204cabdff1aSopenharmony_ci uint64_t tmp[1]; 205cabdff1aSopenharmony_ci 206cabdff1aSopenharmony_ci __asm__ volatile ( 207cabdff1aSopenharmony_ci /* 1st loop: start */ 208cabdff1aSopenharmony_ci "li %[tmp0], 0x03 \n\t" 209cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp0] \n\t" 210cabdff1aSopenharmony_ci 211cabdff1aSopenharmony_ci // 1st part 212cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp1], %[block], 0x00) 213cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp11], %[block], 0x10) 214cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp2], %[block], 0x20) 215cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp12], %[block], 0x30) 216cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp3], %[block], 0x40) 217cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp13], %[block], 0x50) 218cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp4], %[block], 0x60) 219cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp14], %[block], 0x70) 220cabdff1aSopenharmony_ci "punpcklhw %[ftmp5], %[ftmp1], %[ftmp2] \n\t" 221cabdff1aSopenharmony_ci "punpckhhw %[ftmp6], %[ftmp1], %[ftmp2] \n\t" 222cabdff1aSopenharmony_ci "punpcklhw %[ftmp7], %[ftmp3], %[ftmp4] \n\t" 223cabdff1aSopenharmony_ci "punpckhhw %[ftmp8], %[ftmp3], %[ftmp4] \n\t" 224cabdff1aSopenharmony_ci 225cabdff1aSopenharmony_ci "punpcklhw %[ftmp9], %[ftmp11], %[ftmp12] \n\t" 226cabdff1aSopenharmony_ci "punpckhhw %[ftmp10], %[ftmp11], %[ftmp12] \n\t" 227cabdff1aSopenharmony_ci "punpcklhw %[ftmp11], %[ftmp13], %[ftmp14] \n\t" 228cabdff1aSopenharmony_ci "punpckhhw %[ftmp12], %[ftmp13], %[ftmp14] \n\t" 229cabdff1aSopenharmony_ci 230cabdff1aSopenharmony_ci /* ftmp15:dst03,dst02,dst01,dst00 ftmp22:dst73,dst72,dst71,dst70 */ 231cabdff1aSopenharmony_ci VC1_INV_TRANCS_8_TYPE1(%[ftmp15], %[ftmp22], 0x0010000c, 0x0006000c, 232cabdff1aSopenharmony_ci 0x000f0010, 0x00040009, %[ff_pw_4]) 233cabdff1aSopenharmony_ci 234cabdff1aSopenharmony_ci /* ftmp16:dst13,dst12,dst11,dst10 ftmp21:dst63,dst62,dst61,dst60 */ 235cabdff1aSopenharmony_ci VC1_INV_TRANCS_8_TYPE1(%[ftmp16], %[ftmp21], 0x0006000c, 0xfff0fff4, 236cabdff1aSopenharmony_ci 0xfffc000f, 0xfff7fff0, %[ff_pw_4]) 237cabdff1aSopenharmony_ci 238cabdff1aSopenharmony_ci /* ftmp17:dst23,dst22,dst21,dst20 ftmp20:dst53,dst52,dst51,dst50 */ 239cabdff1aSopenharmony_ci VC1_INV_TRANCS_8_TYPE1(%[ftmp17], %[ftmp20], 0xfffa000c, 0x0010fff4, 240cabdff1aSopenharmony_ci 0xfff00009, 0x000f0004, %[ff_pw_4]) 241cabdff1aSopenharmony_ci 242cabdff1aSopenharmony_ci /* ftmp18:dst33,dst32,dst31,dst30 ftmp19:dst43,dst42,dst41,dst40 */ 243cabdff1aSopenharmony_ci VC1_INV_TRANCS_8_TYPE1(%[ftmp18], %[ftmp19], 0xfff0000c, 0xfffa000c, 244cabdff1aSopenharmony_ci 0xfff70004, 0xfff0000f, %[ff_pw_4]) 245cabdff1aSopenharmony_ci 246cabdff1aSopenharmony_ci TRANSPOSE_4H(%[ftmp15], %[ftmp16], %[ftmp17], %[ftmp18], 247cabdff1aSopenharmony_ci %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4]) 248cabdff1aSopenharmony_ci 249cabdff1aSopenharmony_ci TRANSPOSE_4H(%[ftmp19], %[ftmp20], %[ftmp21], %[ftmp22], 250cabdff1aSopenharmony_ci %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4]) 251cabdff1aSopenharmony_ci 252cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp15], %[temp], 0x00) 253cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp19], %[temp], 0x08) 254cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp16], %[temp], 0x10) 255cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp20], %[temp], 0x18) 256cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp17], %[temp], 0x20) 257cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp21], %[temp], 0x28) 258cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp18], %[temp], 0x30) 259cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp22], %[temp], 0x38) 260cabdff1aSopenharmony_ci 261cabdff1aSopenharmony_ci // 2nd part 262cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp1], %[block], 0x08) 263cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp11], %[block], 0x18) 264cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp2], %[block], 0x28) 265cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp12], %[block], 0x38) 266cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp3], %[block], 0x48) 267cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp13], %[block], 0x58) 268cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp4], %[block], 0x68) 269cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp14], %[block], 0x78) 270cabdff1aSopenharmony_ci "punpcklhw %[ftmp5], %[ftmp1], %[ftmp2] \n\t" 271cabdff1aSopenharmony_ci "punpckhhw %[ftmp6], %[ftmp1], %[ftmp2] \n\t" 272cabdff1aSopenharmony_ci "punpcklhw %[ftmp7], %[ftmp3], %[ftmp4] \n\t" 273cabdff1aSopenharmony_ci "punpckhhw %[ftmp8], %[ftmp3], %[ftmp4] \n\t" 274cabdff1aSopenharmony_ci 275cabdff1aSopenharmony_ci "punpcklhw %[ftmp9], %[ftmp11], %[ftmp12] \n\t" 276cabdff1aSopenharmony_ci "punpckhhw %[ftmp10], %[ftmp11], %[ftmp12] \n\t" 277cabdff1aSopenharmony_ci "punpcklhw %[ftmp11], %[ftmp13], %[ftmp14] \n\t" 278cabdff1aSopenharmony_ci "punpckhhw %[ftmp12], %[ftmp13], %[ftmp14] \n\t" 279cabdff1aSopenharmony_ci 280cabdff1aSopenharmony_ci /* ftmp15:dst03,dst02,dst01,dst00 ftmp22:dst73,dst72,dst71,dst70 */ 281cabdff1aSopenharmony_ci VC1_INV_TRANCS_8_TYPE1(%[ftmp15], %[ftmp22], 0x0010000c, 0x0006000c, 282cabdff1aSopenharmony_ci 0x000f0010, 0x00040009, %[ff_pw_4]) 283cabdff1aSopenharmony_ci 284cabdff1aSopenharmony_ci /* ftmp16:dst13,dst12,dst11,dst10 ftmp21:dst63,dst62,dst61,dst60 */ 285cabdff1aSopenharmony_ci VC1_INV_TRANCS_8_TYPE1(%[ftmp16], %[ftmp21], 0x0006000c, 0xfff0fff4, 286cabdff1aSopenharmony_ci 0xfffc000f, 0xfff7fff0, %[ff_pw_4]) 287cabdff1aSopenharmony_ci 288cabdff1aSopenharmony_ci /* ftmp17:dst23,dst22,dst21,dst20 ftmp20:dst53,dst52,dst51,dst50 */ 289cabdff1aSopenharmony_ci VC1_INV_TRANCS_8_TYPE1(%[ftmp17], %[ftmp20], 0xfffa000c, 0x0010fff4, 290cabdff1aSopenharmony_ci 0xfff00009, 0x000f0004, %[ff_pw_4]) 291cabdff1aSopenharmony_ci 292cabdff1aSopenharmony_ci /* ftmp18:dst33,dst32,dst31,dst30 ftmp19:dst43,dst42,dst41,dst40 */ 293cabdff1aSopenharmony_ci VC1_INV_TRANCS_8_TYPE1(%[ftmp18], %[ftmp19], 0xfff0000c, 0xfffa000c, 294cabdff1aSopenharmony_ci 0xfff70004, 0xfff0000f, %[ff_pw_4]) 295cabdff1aSopenharmony_ci 296cabdff1aSopenharmony_ci TRANSPOSE_4H(%[ftmp15], %[ftmp16], %[ftmp17], %[ftmp18], 297cabdff1aSopenharmony_ci %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4]) 298cabdff1aSopenharmony_ci 299cabdff1aSopenharmony_ci TRANSPOSE_4H(%[ftmp19], %[ftmp20], %[ftmp21], %[ftmp22], 300cabdff1aSopenharmony_ci %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4]) 301cabdff1aSopenharmony_ci 302cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp19], %[temp], 0x48) 303cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp20], %[temp], 0x58) 304cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp21], %[temp], 0x68) 305cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp22], %[temp], 0x78) 306cabdff1aSopenharmony_ci /* 1st loop: end */ 307cabdff1aSopenharmony_ci 308cabdff1aSopenharmony_ci /* 2nd loop: start */ 309cabdff1aSopenharmony_ci "li %[tmp0], 0x07 \n\t" 310cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp0] \n\t" 311cabdff1aSopenharmony_ci 312cabdff1aSopenharmony_ci // 1st part 313cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp1], %[temp], 0x00) 314cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp11], %[temp], 0x10) 315cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp2], %[temp], 0x20) 316cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp12], %[temp], 0x30) 317cabdff1aSopenharmony_ci "punpcklhw %[ftmp5], %[ftmp1], %[ftmp2] \n\t" 318cabdff1aSopenharmony_ci "punpckhhw %[ftmp6], %[ftmp1], %[ftmp2] \n\t" 319cabdff1aSopenharmony_ci "punpcklhw %[ftmp7], %[ftmp15], %[ftmp17] \n\t" 320cabdff1aSopenharmony_ci "punpckhhw %[ftmp8], %[ftmp15], %[ftmp17] \n\t" 321cabdff1aSopenharmony_ci 322cabdff1aSopenharmony_ci "punpcklhw %[ftmp9], %[ftmp11], %[ftmp12] \n\t" 323cabdff1aSopenharmony_ci "punpckhhw %[ftmp10], %[ftmp11], %[ftmp12] \n\t" 324cabdff1aSopenharmony_ci "punpcklhw %[ftmp11], %[ftmp16], %[ftmp18] \n\t" 325cabdff1aSopenharmony_ci "punpckhhw %[ftmp12], %[ftmp16], %[ftmp18] \n\t" 326cabdff1aSopenharmony_ci 327cabdff1aSopenharmony_ci /* ftmp15:dst03,dst02,dst01,dst00 ftmp22:dst73,dst72,dst71,dst70 */ 328cabdff1aSopenharmony_ci VC1_INV_TRANCS_8_TYPE2(%[ftmp15], %[ftmp22], 0x0010000c, 0x0006000c, 329cabdff1aSopenharmony_ci 0x000f0010, 0x00040009, %[ff_pw_64], %[ff_pw_1]) 330cabdff1aSopenharmony_ci 331cabdff1aSopenharmony_ci /* ftmp16:dst13,dst12,dst11,dst10 ftmp21:dst63,dst62,dst61,dst60 */ 332cabdff1aSopenharmony_ci VC1_INV_TRANCS_8_TYPE2(%[ftmp16], %[ftmp21], 0x0006000c, 0xfff0fff4, 333cabdff1aSopenharmony_ci 0xfffc000f, 0xfff7fff0, %[ff_pw_64], %[ff_pw_1]) 334cabdff1aSopenharmony_ci 335cabdff1aSopenharmony_ci /* ftmp17:dst23,dst22,dst21,dst20 ftmp20:dst53,dst52,dst51,dst50 */ 336cabdff1aSopenharmony_ci VC1_INV_TRANCS_8_TYPE2(%[ftmp17], %[ftmp20], 0xfffa000c, 0x0010fff4, 337cabdff1aSopenharmony_ci 0xfff00009, 0x000f0004, %[ff_pw_64], %[ff_pw_1]) 338cabdff1aSopenharmony_ci 339cabdff1aSopenharmony_ci /* ftmp18:dst33,dst32,dst31,dst30 ftmp19:dst43,dst42,dst41,dst40 */ 340cabdff1aSopenharmony_ci VC1_INV_TRANCS_8_TYPE2(%[ftmp18], %[ftmp19], 0xfff0000c, 0xfffa000c, 341cabdff1aSopenharmony_ci 0xfff70004, 0xfff0000f, %[ff_pw_64], %[ff_pw_1]) 342cabdff1aSopenharmony_ci 343cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp15], %[block], 0x00) 344cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp16], %[block], 0x10) 345cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp17], %[block], 0x20) 346cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp18], %[block], 0x30) 347cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp19], %[block], 0x40) 348cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp20], %[block], 0x50) 349cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp21], %[block], 0x60) 350cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp22], %[block], 0x70) 351cabdff1aSopenharmony_ci 352cabdff1aSopenharmony_ci // 2nd part 353cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp1], %[temp], 0x08) 354cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp11], %[temp], 0x18) 355cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp2], %[temp], 0x28) 356cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp12], %[temp], 0x38) 357cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp3], %[temp], 0x48) 358cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp13], %[temp], 0x58) 359cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp4], %[temp], 0x68) 360cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp14], %[temp], 0x78) 361cabdff1aSopenharmony_ci "punpcklhw %[ftmp5], %[ftmp1], %[ftmp2] \n\t" 362cabdff1aSopenharmony_ci "punpckhhw %[ftmp6], %[ftmp1], %[ftmp2] \n\t" 363cabdff1aSopenharmony_ci "punpcklhw %[ftmp7], %[ftmp3], %[ftmp4] \n\t" 364cabdff1aSopenharmony_ci "punpckhhw %[ftmp8], %[ftmp3], %[ftmp4] \n\t" 365cabdff1aSopenharmony_ci 366cabdff1aSopenharmony_ci "punpcklhw %[ftmp9], %[ftmp11], %[ftmp12] \n\t" 367cabdff1aSopenharmony_ci "punpckhhw %[ftmp10], %[ftmp11], %[ftmp12] \n\t" 368cabdff1aSopenharmony_ci "punpcklhw %[ftmp11], %[ftmp13], %[ftmp14] \n\t" 369cabdff1aSopenharmony_ci "punpckhhw %[ftmp12], %[ftmp13], %[ftmp14] \n\t" 370cabdff1aSopenharmony_ci 371cabdff1aSopenharmony_ci /* ftmp15:dst03,dst02,dst01,dst00 ftmp22:dst73,dst72,dst71,dst70 */ 372cabdff1aSopenharmony_ci VC1_INV_TRANCS_8_TYPE2(%[ftmp15], %[ftmp22], 0x0010000c, 0x0006000c, 373cabdff1aSopenharmony_ci 0x000f0010, 0x00040009, %[ff_pw_64], %[ff_pw_1]) 374cabdff1aSopenharmony_ci 375cabdff1aSopenharmony_ci /* ftmp16:dst13,dst12,dst11,dst10 ftmp21:dst63,dst62,dst61,dst60 */ 376cabdff1aSopenharmony_ci VC1_INV_TRANCS_8_TYPE2(%[ftmp16], %[ftmp21], 0x0006000c, 0xfff0fff4, 377cabdff1aSopenharmony_ci 0xfffc000f, 0xfff7fff0, %[ff_pw_64], %[ff_pw_1]) 378cabdff1aSopenharmony_ci 379cabdff1aSopenharmony_ci /* ftmp17:dst23,dst22,dst21,dst20 ftmp20:dst53,dst52,dst51,dst50 */ 380cabdff1aSopenharmony_ci VC1_INV_TRANCS_8_TYPE2(%[ftmp17], %[ftmp20], 0xfffa000c, 0x0010fff4, 381cabdff1aSopenharmony_ci 0xfff00009, 0x000f0004, %[ff_pw_64], %[ff_pw_1]) 382cabdff1aSopenharmony_ci 383cabdff1aSopenharmony_ci /* ftmp18:dst33,dst32,dst31,dst30 ftmp19:dst43,dst42,dst41,dst40 */ 384cabdff1aSopenharmony_ci VC1_INV_TRANCS_8_TYPE2(%[ftmp18], %[ftmp19], 0xfff0000c, 0xfffa000c, 385cabdff1aSopenharmony_ci 0xfff70004, 0xfff0000f, %[ff_pw_64], %[ff_pw_1]) 386cabdff1aSopenharmony_ci 387cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp15], %[block], 0x08) 388cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp16], %[block], 0x18) 389cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp17], %[block], 0x28) 390cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp18], %[block], 0x38) 391cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp19], %[block], 0x48) 392cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp20], %[block], 0x58) 393cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp21], %[block], 0x68) 394cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp22], %[block], 0x78) 395cabdff1aSopenharmony_ci /* 2nd loop: end */ 396cabdff1aSopenharmony_ci : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 397cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 398cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 399cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 400cabdff1aSopenharmony_ci [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), 401cabdff1aSopenharmony_ci [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), 402cabdff1aSopenharmony_ci [ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]), 403cabdff1aSopenharmony_ci [ftmp14]"=&f"(ftmp[14]), [ftmp15]"=&f"(ftmp[15]), 404cabdff1aSopenharmony_ci [ftmp16]"=&f"(ftmp[16]), [ftmp17]"=&f"(ftmp[17]), 405cabdff1aSopenharmony_ci [ftmp18]"=&f"(ftmp[18]), [ftmp19]"=&f"(ftmp[19]), 406cabdff1aSopenharmony_ci [ftmp20]"=&f"(ftmp[20]), [ftmp21]"=&f"(ftmp[21]), 407cabdff1aSopenharmony_ci [ftmp22]"=&f"(ftmp[22]), 408cabdff1aSopenharmony_ci [tmp0]"=&r"(tmp[0]) 409cabdff1aSopenharmony_ci : [ff_pw_1]"f"(ff_pw_32_1.f), [ff_pw_64]"f"(ff_pw_32_64.f), 410cabdff1aSopenharmony_ci [ff_pw_4]"f"(ff_pw_32_4.f), [block]"r"(block), 411cabdff1aSopenharmony_ci [temp]"r"(temp) 412cabdff1aSopenharmony_ci : "memory" 413cabdff1aSopenharmony_ci ); 414cabdff1aSopenharmony_ci} 415cabdff1aSopenharmony_ci#endif 416cabdff1aSopenharmony_ci 417cabdff1aSopenharmony_ci/* Do inverse transform on 8x4 part of block */ 418cabdff1aSopenharmony_civoid ff_vc1_inv_trans_8x4_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block) 419cabdff1aSopenharmony_ci{ 420cabdff1aSopenharmony_ci int dc = block[0]; 421cabdff1aSopenharmony_ci double ftmp[9]; 422cabdff1aSopenharmony_ci union mmi_intfloat64 dc_u; 423cabdff1aSopenharmony_ci 424cabdff1aSopenharmony_ci dc = ( 3 * dc + 1) >> 1; 425cabdff1aSopenharmony_ci dc = (17 * dc + 64) >> 7; 426cabdff1aSopenharmony_ci dc_u.i = dc; 427cabdff1aSopenharmony_ci 428cabdff1aSopenharmony_ci __asm__ volatile( 429cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 430cabdff1aSopenharmony_ci "pshufh %[dc], %[dc], %[ftmp0] \n\t" 431cabdff1aSopenharmony_ci 432cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp1], %[dest0], 0x00) 433cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp2], %[dest1], 0x00) 434cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp3], %[dest2], 0x00) 435cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp4], %[dest3], 0x00) 436cabdff1aSopenharmony_ci 437cabdff1aSopenharmony_ci "punpckhbh %[ftmp5], %[ftmp1], %[ftmp0] \n\t" 438cabdff1aSopenharmony_ci "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 439cabdff1aSopenharmony_ci "punpckhbh %[ftmp6], %[ftmp2], %[ftmp0] \n\t" 440cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 441cabdff1aSopenharmony_ci "punpckhbh %[ftmp7], %[ftmp3], %[ftmp0] \n\t" 442cabdff1aSopenharmony_ci "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 443cabdff1aSopenharmony_ci "punpckhbh %[ftmp8], %[ftmp4], %[ftmp0] \n\t" 444cabdff1aSopenharmony_ci "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 445cabdff1aSopenharmony_ci 446cabdff1aSopenharmony_ci "paddsh %[ftmp1], %[ftmp1], %[dc] \n\t" 447cabdff1aSopenharmony_ci "paddsh %[ftmp2], %[ftmp2], %[dc] \n\t" 448cabdff1aSopenharmony_ci "paddsh %[ftmp3], %[ftmp3], %[dc] \n\t" 449cabdff1aSopenharmony_ci "paddsh %[ftmp4], %[ftmp4], %[dc] \n\t" 450cabdff1aSopenharmony_ci "paddsh %[ftmp5], %[ftmp5], %[dc] \n\t" 451cabdff1aSopenharmony_ci "paddsh %[ftmp6], %[ftmp6], %[dc] \n\t" 452cabdff1aSopenharmony_ci "paddsh %[ftmp7], %[ftmp7], %[dc] \n\t" 453cabdff1aSopenharmony_ci "paddsh %[ftmp8], %[ftmp8], %[dc] \n\t" 454cabdff1aSopenharmony_ci 455cabdff1aSopenharmony_ci "packushb %[ftmp1], %[ftmp1], %[ftmp5] \n\t" 456cabdff1aSopenharmony_ci "packushb %[ftmp2], %[ftmp2], %[ftmp6] \n\t" 457cabdff1aSopenharmony_ci "packushb %[ftmp3], %[ftmp3], %[ftmp7] \n\t" 458cabdff1aSopenharmony_ci "packushb %[ftmp4], %[ftmp4], %[ftmp8] \n\t" 459cabdff1aSopenharmony_ci 460cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp1], %[dest0], 0x00) 461cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp2], %[dest1], 0x00) 462cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp3], %[dest2], 0x00) 463cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp4], %[dest3], 0x00) 464cabdff1aSopenharmony_ci : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 465cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 466cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 467cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 468cabdff1aSopenharmony_ci [ftmp8]"=&f"(ftmp[8]) 469cabdff1aSopenharmony_ci : [dest0]"r"(dest+0*linesize), [dest1]"r"(dest+1*linesize), 470cabdff1aSopenharmony_ci [dest2]"r"(dest+2*linesize), [dest3]"r"(dest+3*linesize), 471cabdff1aSopenharmony_ci [dc]"f"(dc_u.f) 472cabdff1aSopenharmony_ci : "memory" 473cabdff1aSopenharmony_ci ); 474cabdff1aSopenharmony_ci} 475cabdff1aSopenharmony_ci 476cabdff1aSopenharmony_ci#if _MIPS_SIM != _ABIO32 477cabdff1aSopenharmony_civoid ff_vc1_inv_trans_8x4_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block) 478cabdff1aSopenharmony_ci{ 479cabdff1aSopenharmony_ci int16_t *src = block; 480cabdff1aSopenharmony_ci int16_t *dst = block; 481cabdff1aSopenharmony_ci double ftmp[16]; 482cabdff1aSopenharmony_ci uint32_t tmp[1]; 483cabdff1aSopenharmony_ci int16_t count = 4; 484cabdff1aSopenharmony_ci int16_t coeff[64] = {12, 16, 16, 15, 12, 9, 6, 4, 485cabdff1aSopenharmony_ci 12, 15, 6, -4, -12, -16, -16, -9, 486cabdff1aSopenharmony_ci 12, 9, -6, -16, -12, 4, 16, 15, 487cabdff1aSopenharmony_ci 12, 4, -16, -9, 12, 15, -6, -16, 488cabdff1aSopenharmony_ci 12, -4, -16, 9, 12, -15, -6, 16, 489cabdff1aSopenharmony_ci 12, -9, -6, 16, -12, -4, 16, -15, 490cabdff1aSopenharmony_ci 12, -15, 6, 4, -12, 16, -16, 9, 491cabdff1aSopenharmony_ci 12, -16, 16, -15, 12, -9, 6, -4}; 492cabdff1aSopenharmony_ci 493cabdff1aSopenharmony_ci // 1st loop 494cabdff1aSopenharmony_ci __asm__ volatile ( 495cabdff1aSopenharmony_ci "li %[tmp0], 0x03 \n\t" 496cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp0] \n\t" 497cabdff1aSopenharmony_ci 498cabdff1aSopenharmony_ci "1: \n\t" 499cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp1], %[src], 0x00) 500cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp2], %[src], 0x08) 501cabdff1aSopenharmony_ci 502cabdff1aSopenharmony_ci /* ftmp11: dst1,dst0 */ 503cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp3], %[coeff], 0x00) 504cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp4], %[coeff], 0x08) 505cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp5], %[coeff], 0x10) 506cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp6], %[coeff], 0x18) 507cabdff1aSopenharmony_ci "pmaddhw %[ftmp7], %[ftmp1], %[ftmp3] \n\t" 508cabdff1aSopenharmony_ci "pmaddhw %[ftmp8], %[ftmp2], %[ftmp4] \n\t" 509cabdff1aSopenharmony_ci "paddw %[ftmp9], %[ftmp7], %[ftmp8] \n\t" 510cabdff1aSopenharmony_ci "pmaddhw %[ftmp7], %[ftmp1], %[ftmp5] \n\t" 511cabdff1aSopenharmony_ci "pmaddhw %[ftmp8], %[ftmp2], %[ftmp6] \n\t" 512cabdff1aSopenharmony_ci "paddw %[ftmp10], %[ftmp7], %[ftmp8] \n\t" 513cabdff1aSopenharmony_ci "punpcklwd %[ftmp7], %[ftmp9], %[ftmp10] \n\t" 514cabdff1aSopenharmony_ci "punpckhwd %[ftmp8], %[ftmp9], %[ftmp10] \n\t" 515cabdff1aSopenharmony_ci "paddw %[ftmp11], %[ftmp7], %[ftmp8] \n\t" 516cabdff1aSopenharmony_ci "paddw %[ftmp11], %[ftmp11], %[ff_pw_4] \n\t" 517cabdff1aSopenharmony_ci 518cabdff1aSopenharmony_ci /* ftmp12: dst3,dst2 */ 519cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp3], %[coeff], 0x20) 520cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp4], %[coeff], 0x28) 521cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp5], %[coeff], 0x30) 522cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp6], %[coeff], 0x38) 523cabdff1aSopenharmony_ci "pmaddhw %[ftmp7], %[ftmp1], %[ftmp3] \n\t" 524cabdff1aSopenharmony_ci "pmaddhw %[ftmp8], %[ftmp2], %[ftmp4] \n\t" 525cabdff1aSopenharmony_ci "paddw %[ftmp9], %[ftmp7], %[ftmp8] \n\t" 526cabdff1aSopenharmony_ci "pmaddhw %[ftmp7], %[ftmp1], %[ftmp5] \n\t" 527cabdff1aSopenharmony_ci "pmaddhw %[ftmp8], %[ftmp2], %[ftmp6] \n\t" 528cabdff1aSopenharmony_ci "paddw %[ftmp10], %[ftmp7], %[ftmp8] \n\t" 529cabdff1aSopenharmony_ci "punpcklwd %[ftmp7], %[ftmp9], %[ftmp10] \n\t" 530cabdff1aSopenharmony_ci "punpckhwd %[ftmp8], %[ftmp9], %[ftmp10] \n\t" 531cabdff1aSopenharmony_ci "paddw %[ftmp12], %[ftmp7], %[ftmp8] \n\t" 532cabdff1aSopenharmony_ci "paddw %[ftmp12], %[ftmp12], %[ff_pw_4] \n\t" 533cabdff1aSopenharmony_ci 534cabdff1aSopenharmony_ci /* ftmp13: dst5,dst4 */ 535cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp3], %[coeff], 0x40) 536cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp4], %[coeff], 0x48) 537cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp5], %[coeff], 0x50) 538cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp6], %[coeff], 0x58) 539cabdff1aSopenharmony_ci "pmaddhw %[ftmp7], %[ftmp1], %[ftmp3] \n\t" 540cabdff1aSopenharmony_ci "pmaddhw %[ftmp8], %[ftmp2], %[ftmp4] \n\t" 541cabdff1aSopenharmony_ci "paddw %[ftmp9], %[ftmp7], %[ftmp8] \n\t" 542cabdff1aSopenharmony_ci "pmaddhw %[ftmp7], %[ftmp1], %[ftmp5] \n\t" 543cabdff1aSopenharmony_ci "pmaddhw %[ftmp8], %[ftmp2], %[ftmp6] \n\t" 544cabdff1aSopenharmony_ci "paddw %[ftmp10], %[ftmp7], %[ftmp8] \n\t" 545cabdff1aSopenharmony_ci "punpcklwd %[ftmp7], %[ftmp9], %[ftmp10] \n\t" 546cabdff1aSopenharmony_ci "punpckhwd %[ftmp8], %[ftmp9], %[ftmp10] \n\t" 547cabdff1aSopenharmony_ci "paddw %[ftmp13], %[ftmp7], %[ftmp8] \n\t" 548cabdff1aSopenharmony_ci "paddw %[ftmp13], %[ftmp13], %[ff_pw_4] \n\t" 549cabdff1aSopenharmony_ci 550cabdff1aSopenharmony_ci /* ftmp14: dst7,dst6 */ 551cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp3], %[coeff], 0x60) 552cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp4], %[coeff], 0x68) 553cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp5], %[coeff], 0x70) 554cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp6], %[coeff], 0x78) 555cabdff1aSopenharmony_ci "pmaddhw %[ftmp7], %[ftmp1], %[ftmp3] \n\t" 556cabdff1aSopenharmony_ci "pmaddhw %[ftmp8], %[ftmp2], %[ftmp4] \n\t" 557cabdff1aSopenharmony_ci "paddw %[ftmp9], %[ftmp7], %[ftmp8] \n\t" 558cabdff1aSopenharmony_ci "pmaddhw %[ftmp7], %[ftmp1], %[ftmp5] \n\t" 559cabdff1aSopenharmony_ci "pmaddhw %[ftmp8], %[ftmp2], %[ftmp6] \n\t" 560cabdff1aSopenharmony_ci "paddw %[ftmp10], %[ftmp7], %[ftmp8] \n\t" 561cabdff1aSopenharmony_ci "punpcklwd %[ftmp7], %[ftmp9], %[ftmp10] \n\t" 562cabdff1aSopenharmony_ci "punpckhwd %[ftmp8], %[ftmp9], %[ftmp10] \n\t" 563cabdff1aSopenharmony_ci "paddw %[ftmp14], %[ftmp7], %[ftmp8] \n\t" 564cabdff1aSopenharmony_ci "paddw %[ftmp14], %[ftmp14], %[ff_pw_4] \n\t" 565cabdff1aSopenharmony_ci 566cabdff1aSopenharmony_ci /* ftmp9: dst3,dst2,dst1,dst0 ftmp10: dst7,dst6,dst5,dst4 */ 567cabdff1aSopenharmony_ci "psraw %[ftmp11], %[ftmp11], %[ftmp0] \n\t" 568cabdff1aSopenharmony_ci "psraw %[ftmp12], %[ftmp12], %[ftmp0] \n\t" 569cabdff1aSopenharmony_ci "psraw %[ftmp13], %[ftmp13], %[ftmp0] \n\t" 570cabdff1aSopenharmony_ci "psraw %[ftmp14], %[ftmp14], %[ftmp0] \n\t" 571cabdff1aSopenharmony_ci "punpcklhw %[ftmp7], %[ftmp11], %[ftmp12] \n\t" 572cabdff1aSopenharmony_ci "punpckhhw %[ftmp8], %[ftmp11], %[ftmp12] \n\t" 573cabdff1aSopenharmony_ci "punpcklhw %[ftmp9], %[ftmp7], %[ftmp8] \n\t" 574cabdff1aSopenharmony_ci "punpcklhw %[ftmp7], %[ftmp13], %[ftmp14] \n\t" 575cabdff1aSopenharmony_ci "punpckhhw %[ftmp8], %[ftmp13], %[ftmp14] \n\t" 576cabdff1aSopenharmony_ci "punpcklhw %[ftmp10], %[ftmp7], %[ftmp8] \n\t" 577cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp9], %[dst], 0x00) 578cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp10], %[dst], 0x08) 579cabdff1aSopenharmony_ci 580cabdff1aSopenharmony_ci PTR_ADDIU "%[src], %[src], 0x10 \n\t" 581cabdff1aSopenharmony_ci PTR_ADDIU "%[dst], %[dst], 0x10 \n\t" 582cabdff1aSopenharmony_ci "addiu %[count], %[count], -0x01 \n\t" 583cabdff1aSopenharmony_ci "bnez %[count], 1b \n\t" 584cabdff1aSopenharmony_ci : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 585cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 586cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 587cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 588cabdff1aSopenharmony_ci [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), 589cabdff1aSopenharmony_ci [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), 590cabdff1aSopenharmony_ci [ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]), 591cabdff1aSopenharmony_ci [ftmp14]"=&f"(ftmp[14]), [tmp0]"=&r"(tmp[0]), 592cabdff1aSopenharmony_ci [src]"+&r"(src), [dst]"+&r"(dst), [count]"+&r"(count) 593cabdff1aSopenharmony_ci : [ff_pw_4]"f"(ff_pw_32_4.f), [coeff]"r"(coeff) 594cabdff1aSopenharmony_ci : "memory" 595cabdff1aSopenharmony_ci ); 596cabdff1aSopenharmony_ci 597cabdff1aSopenharmony_ci src = block; 598cabdff1aSopenharmony_ci 599cabdff1aSopenharmony_ci // 2nd loop 600cabdff1aSopenharmony_ci __asm__ volatile ( 601cabdff1aSopenharmony_ci "li %[tmp0], 0x44 \n\t" 602cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp15] \n\t" 603cabdff1aSopenharmony_ci 604cabdff1aSopenharmony_ci // 1st part 605cabdff1aSopenharmony_ci "li %[tmp0], 0x07 \n\t" 606cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp0] \n\t" 607cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp1], %[src], 0x00) 608cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp2], %[src], 0x10) 609cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp3], %[src], 0x20) 610cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp4], %[src], 0x30) 611cabdff1aSopenharmony_ci "punpcklhw %[ftmp5], %[ftmp1], %[ftmp2] \n\t" 612cabdff1aSopenharmony_ci "punpckhhw %[ftmp6], %[ftmp1], %[ftmp2] \n\t" 613cabdff1aSopenharmony_ci "punpcklhw %[ftmp7], %[ftmp3], %[ftmp4] \n\t" 614cabdff1aSopenharmony_ci "punpckhhw %[ftmp8], %[ftmp3], %[ftmp4] \n\t" 615cabdff1aSopenharmony_ci 616cabdff1aSopenharmony_ci /* ftmp11: dst03,dst02,dst01,dst00 */ 617cabdff1aSopenharmony_ci "li %[tmp0], 0x00160011 \n\t" 618cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp3] \n\t" 619cabdff1aSopenharmony_ci "pshufh %[ftmp3], %[ftmp3], %[ftmp15] \n\t" 620cabdff1aSopenharmony_ci "li %[tmp0], 0x000a0011 \n\t" 621cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp4] \n\t" 622cabdff1aSopenharmony_ci "pshufh %[ftmp4], %[ftmp4], %[ftmp15] \n\t" 623cabdff1aSopenharmony_ci "pmaddhw %[ftmp1], %[ftmp5], %[ftmp3] \n\t" 624cabdff1aSopenharmony_ci "pmaddhw %[ftmp2], %[ftmp7], %[ftmp4] \n\t" 625cabdff1aSopenharmony_ci "paddw %[ftmp9], %[ftmp1], %[ftmp2] \n\t" 626cabdff1aSopenharmony_ci "pmaddhw %[ftmp1], %[ftmp6], %[ftmp3] \n\t" 627cabdff1aSopenharmony_ci "pmaddhw %[ftmp2], %[ftmp8], %[ftmp4] \n\t" 628cabdff1aSopenharmony_ci "paddw %[ftmp10], %[ftmp1], %[ftmp2] \n\t" 629cabdff1aSopenharmony_ci "paddw %[ftmp9], %[ftmp9], %[ff_pw_64] \n\t" 630cabdff1aSopenharmony_ci "paddw %[ftmp10], %[ftmp10], %[ff_pw_64] \n\t" 631cabdff1aSopenharmony_ci "psraw %[ftmp9], %[ftmp9], %[ftmp0] \n\t" 632cabdff1aSopenharmony_ci "psraw %[ftmp10], %[ftmp10], %[ftmp0] \n\t" 633cabdff1aSopenharmony_ci "punpcklhw %[ftmp1], %[ftmp9], %[ftmp10] \n\t" 634cabdff1aSopenharmony_ci "punpckhhw %[ftmp2], %[ftmp9], %[ftmp10] \n\t" 635cabdff1aSopenharmony_ci "punpcklhw %[ftmp11], %[ftmp1], %[ftmp2] \n\t" 636cabdff1aSopenharmony_ci 637cabdff1aSopenharmony_ci /* ftmp12: dst13,dst12,dst11,dst10 */ 638cabdff1aSopenharmony_ci "li %[tmp0], 0x000a0011 \n\t" 639cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp3] \n\t" 640cabdff1aSopenharmony_ci "pshufh %[ftmp3], %[ftmp3], %[ftmp15] \n\t" 641cabdff1aSopenharmony_ci "li %[tmp0], 0xffeaffef \n\t" 642cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp4] \n\t" 643cabdff1aSopenharmony_ci "pshufh %[ftmp4], %[ftmp4], %[ftmp15] \n\t" 644cabdff1aSopenharmony_ci "pmaddhw %[ftmp1], %[ftmp5], %[ftmp3] \n\t" 645cabdff1aSopenharmony_ci "pmaddhw %[ftmp2], %[ftmp7], %[ftmp4] \n\t" 646cabdff1aSopenharmony_ci "paddw %[ftmp9], %[ftmp1], %[ftmp2] \n\t" 647cabdff1aSopenharmony_ci "pmaddhw %[ftmp1], %[ftmp6], %[ftmp3] \n\t" 648cabdff1aSopenharmony_ci "pmaddhw %[ftmp2], %[ftmp8], %[ftmp4] \n\t" 649cabdff1aSopenharmony_ci "paddw %[ftmp10], %[ftmp1], %[ftmp2] \n\t" 650cabdff1aSopenharmony_ci "paddw %[ftmp9], %[ftmp9], %[ff_pw_64] \n\t" 651cabdff1aSopenharmony_ci "paddw %[ftmp10], %[ftmp10], %[ff_pw_64] \n\t" 652cabdff1aSopenharmony_ci "psraw %[ftmp9], %[ftmp9], %[ftmp0] \n\t" 653cabdff1aSopenharmony_ci "psraw %[ftmp10], %[ftmp10], %[ftmp0] \n\t" 654cabdff1aSopenharmony_ci "punpcklhw %[ftmp1], %[ftmp9], %[ftmp10] \n\t" 655cabdff1aSopenharmony_ci "punpckhhw %[ftmp2], %[ftmp9], %[ftmp10] \n\t" 656cabdff1aSopenharmony_ci "punpcklhw %[ftmp12], %[ftmp1], %[ftmp2] \n\t" 657cabdff1aSopenharmony_ci 658cabdff1aSopenharmony_ci /* ftmp13: dst23,dst22,dst21,dst20 */ 659cabdff1aSopenharmony_ci "li %[tmp0], 0xfff60011 \n\t" 660cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp3] \n\t" 661cabdff1aSopenharmony_ci "pshufh %[ftmp3], %[ftmp3], %[ftmp15] \n\t" 662cabdff1aSopenharmony_ci "li %[tmp0], 0x0016ffef \n\t" 663cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp4] \n\t" 664cabdff1aSopenharmony_ci "pshufh %[ftmp4], %[ftmp4], %[ftmp15] \n\t" 665cabdff1aSopenharmony_ci "pmaddhw %[ftmp1], %[ftmp5], %[ftmp3] \n\t" 666cabdff1aSopenharmony_ci "pmaddhw %[ftmp2], %[ftmp7], %[ftmp4] \n\t" 667cabdff1aSopenharmony_ci "paddw %[ftmp9], %[ftmp1], %[ftmp2] \n\t" 668cabdff1aSopenharmony_ci "pmaddhw %[ftmp1], %[ftmp6], %[ftmp3] \n\t" 669cabdff1aSopenharmony_ci "pmaddhw %[ftmp2], %[ftmp8], %[ftmp4] \n\t" 670cabdff1aSopenharmony_ci "paddw %[ftmp10], %[ftmp1], %[ftmp2] \n\t" 671cabdff1aSopenharmony_ci "paddw %[ftmp9], %[ftmp9], %[ff_pw_64] \n\t" 672cabdff1aSopenharmony_ci "paddw %[ftmp10], %[ftmp10], %[ff_pw_64] \n\t" 673cabdff1aSopenharmony_ci "psraw %[ftmp9], %[ftmp9], %[ftmp0] \n\t" 674cabdff1aSopenharmony_ci "psraw %[ftmp10], %[ftmp10], %[ftmp0] \n\t" 675cabdff1aSopenharmony_ci "punpcklhw %[ftmp1], %[ftmp9], %[ftmp10] \n\t" 676cabdff1aSopenharmony_ci "punpckhhw %[ftmp2], %[ftmp9], %[ftmp10] \n\t" 677cabdff1aSopenharmony_ci "punpcklhw %[ftmp13], %[ftmp1], %[ftmp2] \n\t" 678cabdff1aSopenharmony_ci 679cabdff1aSopenharmony_ci /* ftmp14: dst33,dst32,dst31,dst30 */ 680cabdff1aSopenharmony_ci "li %[tmp0], 0xffea0011 \n\t" 681cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp3] \n\t" 682cabdff1aSopenharmony_ci "pshufh %[ftmp3], %[ftmp3], %[ftmp15] \n\t" 683cabdff1aSopenharmony_ci "li %[tmp0], 0xfff60011 \n\t" 684cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp4] \n\t" 685cabdff1aSopenharmony_ci "pshufh %[ftmp4], %[ftmp4], %[ftmp15] \n\t" 686cabdff1aSopenharmony_ci "pmaddhw %[ftmp1], %[ftmp5], %[ftmp3] \n\t" 687cabdff1aSopenharmony_ci "pmaddhw %[ftmp2], %[ftmp7], %[ftmp4] \n\t" 688cabdff1aSopenharmony_ci "paddw %[ftmp9], %[ftmp1], %[ftmp2] \n\t" 689cabdff1aSopenharmony_ci "pmaddhw %[ftmp1], %[ftmp6], %[ftmp3] \n\t" 690cabdff1aSopenharmony_ci "pmaddhw %[ftmp2], %[ftmp8], %[ftmp4] \n\t" 691cabdff1aSopenharmony_ci "paddw %[ftmp10], %[ftmp1], %[ftmp2] \n\t" 692cabdff1aSopenharmony_ci "paddw %[ftmp9], %[ftmp9], %[ff_pw_64] \n\t" 693cabdff1aSopenharmony_ci "paddw %[ftmp10], %[ftmp10], %[ff_pw_64] \n\t" 694cabdff1aSopenharmony_ci "psraw %[ftmp9], %[ftmp9], %[ftmp0] \n\t" 695cabdff1aSopenharmony_ci "psraw %[ftmp10], %[ftmp10], %[ftmp0] \n\t" 696cabdff1aSopenharmony_ci "punpcklhw %[ftmp1], %[ftmp9], %[ftmp10] \n\t" 697cabdff1aSopenharmony_ci "punpckhhw %[ftmp2], %[ftmp9], %[ftmp10] \n\t" 698cabdff1aSopenharmony_ci "punpcklhw %[ftmp14], %[ftmp1], %[ftmp2] \n\t" 699cabdff1aSopenharmony_ci 700cabdff1aSopenharmony_ci MMI_LWC1(%[ftmp1], %[dest], 0x00) 701cabdff1aSopenharmony_ci PTR_ADDU "%[tmp0], %[dest], %[linesize] \n\t" 702cabdff1aSopenharmony_ci MMI_LWC1(%[ftmp2], %[tmp0], 0x00) 703cabdff1aSopenharmony_ci PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t" 704cabdff1aSopenharmony_ci MMI_LWC1(%[ftmp3], %[tmp0], 0x00) 705cabdff1aSopenharmony_ci PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t" 706cabdff1aSopenharmony_ci MMI_LWC1(%[ftmp4], %[tmp0], 0x00) 707cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 708cabdff1aSopenharmony_ci "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 709cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 710cabdff1aSopenharmony_ci "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 711cabdff1aSopenharmony_ci "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 712cabdff1aSopenharmony_ci "paddh %[ftmp1], %[ftmp1], %[ftmp11] \n\t" 713cabdff1aSopenharmony_ci "paddh %[ftmp2], %[ftmp2], %[ftmp12] \n\t" 714cabdff1aSopenharmony_ci "paddh %[ftmp3], %[ftmp3], %[ftmp13] \n\t" 715cabdff1aSopenharmony_ci "paddh %[ftmp4], %[ftmp4], %[ftmp14] \n\t" 716cabdff1aSopenharmony_ci "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 717cabdff1aSopenharmony_ci "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 718cabdff1aSopenharmony_ci "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 719cabdff1aSopenharmony_ci "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 720cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp1], %[dest], 0x00) 721cabdff1aSopenharmony_ci PTR_ADDU "%[tmp0], %[dest], %[linesize] \n\t" 722cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp2], %[tmp0], 0x00) 723cabdff1aSopenharmony_ci PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t" 724cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp3], %[tmp0], 0x00) 725cabdff1aSopenharmony_ci PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t" 726cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp4], %[tmp0], 0x00) 727cabdff1aSopenharmony_ci 728cabdff1aSopenharmony_ci // 2nd part 729cabdff1aSopenharmony_ci "li %[tmp0], 0x07 \n\t" 730cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp0] \n\t" 731cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp1], %[src], 0x08) 732cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp2], %[src], 0x18) 733cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp3], %[src], 0x28) 734cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp4], %[src], 0x38) 735cabdff1aSopenharmony_ci "punpcklhw %[ftmp5], %[ftmp1], %[ftmp2] \n\t" 736cabdff1aSopenharmony_ci "punpckhhw %[ftmp6], %[ftmp1], %[ftmp2] \n\t" 737cabdff1aSopenharmony_ci "punpcklhw %[ftmp7], %[ftmp3], %[ftmp4] \n\t" 738cabdff1aSopenharmony_ci "punpckhhw %[ftmp8], %[ftmp3], %[ftmp4] \n\t" 739cabdff1aSopenharmony_ci 740cabdff1aSopenharmony_ci /* ftmp11: dst03,dst02,dst01,dst00 */ 741cabdff1aSopenharmony_ci "li %[tmp0], 0x00160011 \n\t" 742cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp3] \n\t" 743cabdff1aSopenharmony_ci "pshufh %[ftmp3], %[ftmp3], %[ftmp15] \n\t" 744cabdff1aSopenharmony_ci "li %[tmp0], 0x000a0011 \n\t" 745cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp4] \n\t" 746cabdff1aSopenharmony_ci "pshufh %[ftmp4], %[ftmp4], %[ftmp15] \n\t" 747cabdff1aSopenharmony_ci "pmaddhw %[ftmp1], %[ftmp5], %[ftmp3] \n\t" 748cabdff1aSopenharmony_ci "pmaddhw %[ftmp2], %[ftmp7], %[ftmp4] \n\t" 749cabdff1aSopenharmony_ci "paddw %[ftmp9], %[ftmp1], %[ftmp2] \n\t" 750cabdff1aSopenharmony_ci "pmaddhw %[ftmp1], %[ftmp6], %[ftmp3] \n\t" 751cabdff1aSopenharmony_ci "pmaddhw %[ftmp2], %[ftmp8], %[ftmp4] \n\t" 752cabdff1aSopenharmony_ci "paddw %[ftmp10], %[ftmp1], %[ftmp2] \n\t" 753cabdff1aSopenharmony_ci "paddw %[ftmp9], %[ftmp9], %[ff_pw_64] \n\t" 754cabdff1aSopenharmony_ci "paddw %[ftmp10], %[ftmp10], %[ff_pw_64] \n\t" 755cabdff1aSopenharmony_ci "psraw %[ftmp9], %[ftmp9], %[ftmp0] \n\t" 756cabdff1aSopenharmony_ci "psraw %[ftmp10], %[ftmp10], %[ftmp0] \n\t" 757cabdff1aSopenharmony_ci "punpcklhw %[ftmp1], %[ftmp9], %[ftmp10] \n\t" 758cabdff1aSopenharmony_ci "punpckhhw %[ftmp2], %[ftmp9], %[ftmp10] \n\t" 759cabdff1aSopenharmony_ci "punpcklhw %[ftmp11], %[ftmp1], %[ftmp2] \n\t" 760cabdff1aSopenharmony_ci 761cabdff1aSopenharmony_ci /* ftmp12: dst13,dst12,dst11,dst10 */ 762cabdff1aSopenharmony_ci "li %[tmp0], 0x000a0011 \n\t" 763cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp3] \n\t" 764cabdff1aSopenharmony_ci "pshufh %[ftmp3], %[ftmp3], %[ftmp15] \n\t" 765cabdff1aSopenharmony_ci "li %[tmp0], 0xffeaffef \n\t" 766cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp4] \n\t" 767cabdff1aSopenharmony_ci "pshufh %[ftmp4], %[ftmp4], %[ftmp15] \n\t" 768cabdff1aSopenharmony_ci "pmaddhw %[ftmp1], %[ftmp5], %[ftmp3] \n\t" 769cabdff1aSopenharmony_ci "pmaddhw %[ftmp2], %[ftmp7], %[ftmp4] \n\t" 770cabdff1aSopenharmony_ci "paddw %[ftmp9], %[ftmp1], %[ftmp2] \n\t" 771cabdff1aSopenharmony_ci "pmaddhw %[ftmp1], %[ftmp6], %[ftmp3] \n\t" 772cabdff1aSopenharmony_ci "pmaddhw %[ftmp2], %[ftmp8], %[ftmp4] \n\t" 773cabdff1aSopenharmony_ci "paddw %[ftmp10], %[ftmp1], %[ftmp2] \n\t" 774cabdff1aSopenharmony_ci "paddw %[ftmp9], %[ftmp9], %[ff_pw_64] \n\t" 775cabdff1aSopenharmony_ci "paddw %[ftmp10], %[ftmp10], %[ff_pw_64] \n\t" 776cabdff1aSopenharmony_ci "psraw %[ftmp9], %[ftmp9], %[ftmp0] \n\t" 777cabdff1aSopenharmony_ci "psraw %[ftmp10], %[ftmp10], %[ftmp0] \n\t" 778cabdff1aSopenharmony_ci "punpcklhw %[ftmp1], %[ftmp9], %[ftmp10] \n\t" 779cabdff1aSopenharmony_ci "punpckhhw %[ftmp2], %[ftmp9], %[ftmp10] \n\t" 780cabdff1aSopenharmony_ci "punpcklhw %[ftmp12], %[ftmp1], %[ftmp2] \n\t" 781cabdff1aSopenharmony_ci 782cabdff1aSopenharmony_ci /* ftmp13: dst23,dst22,dst21,dst20 */ 783cabdff1aSopenharmony_ci "li %[tmp0], 0xfff60011 \n\t" 784cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp3] \n\t" 785cabdff1aSopenharmony_ci "pshufh %[ftmp3], %[ftmp3], %[ftmp15] \n\t" 786cabdff1aSopenharmony_ci "li %[tmp0], 0x0016ffef \n\t" 787cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp4] \n\t" 788cabdff1aSopenharmony_ci "pshufh %[ftmp4], %[ftmp4], %[ftmp15] \n\t" 789cabdff1aSopenharmony_ci "pmaddhw %[ftmp1], %[ftmp5], %[ftmp3] \n\t" 790cabdff1aSopenharmony_ci "pmaddhw %[ftmp2], %[ftmp7], %[ftmp4] \n\t" 791cabdff1aSopenharmony_ci "paddw %[ftmp9], %[ftmp1], %[ftmp2] \n\t" 792cabdff1aSopenharmony_ci "pmaddhw %[ftmp1], %[ftmp6], %[ftmp3] \n\t" 793cabdff1aSopenharmony_ci "pmaddhw %[ftmp2], %[ftmp8], %[ftmp4] \n\t" 794cabdff1aSopenharmony_ci "paddw %[ftmp10], %[ftmp1], %[ftmp2] \n\t" 795cabdff1aSopenharmony_ci "paddw %[ftmp9], %[ftmp9], %[ff_pw_64] \n\t" 796cabdff1aSopenharmony_ci "paddw %[ftmp10], %[ftmp10], %[ff_pw_64] \n\t" 797cabdff1aSopenharmony_ci "psraw %[ftmp9], %[ftmp9], %[ftmp0] \n\t" 798cabdff1aSopenharmony_ci "psraw %[ftmp10], %[ftmp10], %[ftmp0] \n\t" 799cabdff1aSopenharmony_ci "punpcklhw %[ftmp1], %[ftmp9], %[ftmp10] \n\t" 800cabdff1aSopenharmony_ci "punpckhhw %[ftmp2], %[ftmp9], %[ftmp10] \n\t" 801cabdff1aSopenharmony_ci "punpcklhw %[ftmp13], %[ftmp1], %[ftmp2] \n\t" 802cabdff1aSopenharmony_ci 803cabdff1aSopenharmony_ci /* ftmp14: dst33,dst32,dst31,dst30 */ 804cabdff1aSopenharmony_ci "li %[tmp0], 0xffea0011 \n\t" 805cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp3] \n\t" 806cabdff1aSopenharmony_ci "pshufh %[ftmp3], %[ftmp3], %[ftmp15] \n\t" 807cabdff1aSopenharmony_ci "li %[tmp0], 0xfff60011 \n\t" 808cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp4] \n\t" 809cabdff1aSopenharmony_ci "pshufh %[ftmp4], %[ftmp4], %[ftmp15] \n\t" 810cabdff1aSopenharmony_ci "pmaddhw %[ftmp1], %[ftmp5], %[ftmp3] \n\t" 811cabdff1aSopenharmony_ci "pmaddhw %[ftmp2], %[ftmp7], %[ftmp4] \n\t" 812cabdff1aSopenharmony_ci "paddw %[ftmp9], %[ftmp1], %[ftmp2] \n\t" 813cabdff1aSopenharmony_ci "pmaddhw %[ftmp1], %[ftmp6], %[ftmp3] \n\t" 814cabdff1aSopenharmony_ci "pmaddhw %[ftmp2], %[ftmp8], %[ftmp4] \n\t" 815cabdff1aSopenharmony_ci "paddw %[ftmp10], %[ftmp1], %[ftmp2] \n\t" 816cabdff1aSopenharmony_ci "paddw %[ftmp9], %[ftmp9], %[ff_pw_64] \n\t" 817cabdff1aSopenharmony_ci "paddw %[ftmp10], %[ftmp10], %[ff_pw_64] \n\t" 818cabdff1aSopenharmony_ci "psraw %[ftmp9], %[ftmp9], %[ftmp0] \n\t" 819cabdff1aSopenharmony_ci "psraw %[ftmp10], %[ftmp10], %[ftmp0] \n\t" 820cabdff1aSopenharmony_ci "punpcklhw %[ftmp1], %[ftmp9], %[ftmp10] \n\t" 821cabdff1aSopenharmony_ci "punpckhhw %[ftmp2], %[ftmp9], %[ftmp10] \n\t" 822cabdff1aSopenharmony_ci "punpcklhw %[ftmp14], %[ftmp1], %[ftmp2] \n\t" 823cabdff1aSopenharmony_ci 824cabdff1aSopenharmony_ci MMI_LWC1(%[ftmp1], %[dest], 0x04) 825cabdff1aSopenharmony_ci PTR_ADDU "%[tmp0], %[dest], %[linesize] \n\t" 826cabdff1aSopenharmony_ci MMI_LWC1(%[ftmp2], %[tmp0], 0x04) 827cabdff1aSopenharmony_ci PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t" 828cabdff1aSopenharmony_ci MMI_LWC1(%[ftmp3], %[tmp0], 0x04) 829cabdff1aSopenharmony_ci PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t" 830cabdff1aSopenharmony_ci MMI_LWC1(%[ftmp4], %[tmp0], 0x04) 831cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 832cabdff1aSopenharmony_ci "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 833cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 834cabdff1aSopenharmony_ci "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 835cabdff1aSopenharmony_ci "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 836cabdff1aSopenharmony_ci "paddh %[ftmp1], %[ftmp1], %[ftmp11] \n\t" 837cabdff1aSopenharmony_ci "paddh %[ftmp2], %[ftmp2], %[ftmp12] \n\t" 838cabdff1aSopenharmony_ci "paddh %[ftmp3], %[ftmp3], %[ftmp13] \n\t" 839cabdff1aSopenharmony_ci "paddh %[ftmp4], %[ftmp4], %[ftmp14] \n\t" 840cabdff1aSopenharmony_ci "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 841cabdff1aSopenharmony_ci "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 842cabdff1aSopenharmony_ci "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 843cabdff1aSopenharmony_ci "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 844cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp1], %[dest], 0x04) 845cabdff1aSopenharmony_ci PTR_ADDU "%[tmp0], %[dest], %[linesize] \n\t" 846cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp2], %[tmp0], 0x04) 847cabdff1aSopenharmony_ci PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t" 848cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp3], %[tmp0], 0x04) 849cabdff1aSopenharmony_ci PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t" 850cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp4], %[tmp0], 0x04) 851cabdff1aSopenharmony_ci 852cabdff1aSopenharmony_ci : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 853cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 854cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 855cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 856cabdff1aSopenharmony_ci [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), 857cabdff1aSopenharmony_ci [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), 858cabdff1aSopenharmony_ci [ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]), 859cabdff1aSopenharmony_ci [ftmp14]"=&f"(ftmp[14]), [ftmp15]"=&f"(ftmp[15]), 860cabdff1aSopenharmony_ci [tmp0]"=&r"(tmp[0]) 861cabdff1aSopenharmony_ci : [ff_pw_64]"f"(ff_pw_32_64.f), 862cabdff1aSopenharmony_ci [src]"r"(src), [dest]"r"(dest), [linesize]"r"(linesize) 863cabdff1aSopenharmony_ci :"memory" 864cabdff1aSopenharmony_ci ); 865cabdff1aSopenharmony_ci} 866cabdff1aSopenharmony_ci#endif 867cabdff1aSopenharmony_ci 868cabdff1aSopenharmony_ci/* Do inverse transform on 4x8 parts of block */ 869cabdff1aSopenharmony_civoid ff_vc1_inv_trans_4x8_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block) 870cabdff1aSopenharmony_ci{ 871cabdff1aSopenharmony_ci int dc = block[0]; 872cabdff1aSopenharmony_ci double ftmp[9]; 873cabdff1aSopenharmony_ci union mmi_intfloat64 dc_u; 874cabdff1aSopenharmony_ci DECLARE_VAR_LOW32; 875cabdff1aSopenharmony_ci 876cabdff1aSopenharmony_ci dc = (17 * dc + 4) >> 3; 877cabdff1aSopenharmony_ci dc = (12 * dc + 64) >> 7; 878cabdff1aSopenharmony_ci dc_u.i = dc; 879cabdff1aSopenharmony_ci 880cabdff1aSopenharmony_ci __asm__ volatile( 881cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 882cabdff1aSopenharmony_ci "pshufh %[dc], %[dc], %[ftmp0] \n\t" 883cabdff1aSopenharmony_ci 884cabdff1aSopenharmony_ci MMI_LWC1(%[ftmp1], %[dest0], 0x00) 885cabdff1aSopenharmony_ci MMI_LWC1(%[ftmp2], %[dest1], 0x00) 886cabdff1aSopenharmony_ci MMI_LWC1(%[ftmp3], %[dest2], 0x00) 887cabdff1aSopenharmony_ci MMI_LWC1(%[ftmp4], %[dest3], 0x00) 888cabdff1aSopenharmony_ci MMI_LWC1(%[ftmp5], %[dest4], 0x00) 889cabdff1aSopenharmony_ci MMI_LWC1(%[ftmp6], %[dest5], 0x00) 890cabdff1aSopenharmony_ci MMI_LWC1(%[ftmp7], %[dest6], 0x00) 891cabdff1aSopenharmony_ci MMI_LWC1(%[ftmp8], %[dest7], 0x00) 892cabdff1aSopenharmony_ci 893cabdff1aSopenharmony_ci "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 894cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 895cabdff1aSopenharmony_ci "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 896cabdff1aSopenharmony_ci "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 897cabdff1aSopenharmony_ci "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" 898cabdff1aSopenharmony_ci "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 899cabdff1aSopenharmony_ci "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t" 900cabdff1aSopenharmony_ci "punpcklbh %[ftmp8], %[ftmp8], %[ftmp0] \n\t" 901cabdff1aSopenharmony_ci 902cabdff1aSopenharmony_ci "paddsh %[ftmp1], %[ftmp1], %[dc] \n\t" 903cabdff1aSopenharmony_ci "paddsh %[ftmp2], %[ftmp2], %[dc] \n\t" 904cabdff1aSopenharmony_ci "paddsh %[ftmp3], %[ftmp3], %[dc] \n\t" 905cabdff1aSopenharmony_ci "paddsh %[ftmp4], %[ftmp4], %[dc] \n\t" 906cabdff1aSopenharmony_ci "paddsh %[ftmp5], %[ftmp5], %[dc] \n\t" 907cabdff1aSopenharmony_ci "paddsh %[ftmp6], %[ftmp6], %[dc] \n\t" 908cabdff1aSopenharmony_ci "paddsh %[ftmp7], %[ftmp7], %[dc] \n\t" 909cabdff1aSopenharmony_ci "paddsh %[ftmp8], %[ftmp8], %[dc] \n\t" 910cabdff1aSopenharmony_ci 911cabdff1aSopenharmony_ci "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 912cabdff1aSopenharmony_ci "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 913cabdff1aSopenharmony_ci "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 914cabdff1aSopenharmony_ci "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 915cabdff1aSopenharmony_ci "packushb %[ftmp5], %[ftmp5], %[ftmp0] \n\t" 916cabdff1aSopenharmony_ci "packushb %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 917cabdff1aSopenharmony_ci "packushb %[ftmp7], %[ftmp7], %[ftmp0] \n\t" 918cabdff1aSopenharmony_ci "packushb %[ftmp8], %[ftmp8], %[ftmp0] \n\t" 919cabdff1aSopenharmony_ci 920cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp1], %[dest0], 0x00) 921cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp2], %[dest1], 0x00) 922cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp3], %[dest2], 0x00) 923cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp4], %[dest3], 0x00) 924cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp5], %[dest4], 0x00) 925cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp6], %[dest5], 0x00) 926cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp7], %[dest6], 0x00) 927cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp8], %[dest7], 0x00) 928cabdff1aSopenharmony_ci : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 929cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 930cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 931cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 932cabdff1aSopenharmony_ci RESTRICT_ASM_LOW32 933cabdff1aSopenharmony_ci [ftmp8]"=&f"(ftmp[8]) 934cabdff1aSopenharmony_ci : [dest0]"r"(dest+0*linesize), [dest1]"r"(dest+1*linesize), 935cabdff1aSopenharmony_ci [dest2]"r"(dest+2*linesize), [dest3]"r"(dest+3*linesize), 936cabdff1aSopenharmony_ci [dest4]"r"(dest+4*linesize), [dest5]"r"(dest+5*linesize), 937cabdff1aSopenharmony_ci [dest6]"r"(dest+6*linesize), [dest7]"r"(dest+7*linesize), 938cabdff1aSopenharmony_ci [dc]"f"(dc_u.f) 939cabdff1aSopenharmony_ci : "memory" 940cabdff1aSopenharmony_ci ); 941cabdff1aSopenharmony_ci} 942cabdff1aSopenharmony_ci 943cabdff1aSopenharmony_ci#if _MIPS_SIM != _ABIO32 944cabdff1aSopenharmony_civoid ff_vc1_inv_trans_4x8_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block) 945cabdff1aSopenharmony_ci{ 946cabdff1aSopenharmony_ci int16_t *src = block; 947cabdff1aSopenharmony_ci int16_t *dst = block; 948cabdff1aSopenharmony_ci double ftmp[23]; 949cabdff1aSopenharmony_ci uint64_t count = 8, tmp[1]; 950cabdff1aSopenharmony_ci int16_t coeff[16] = {17, 22, 17, 10, 951cabdff1aSopenharmony_ci 17, 10,-17,-22, 952cabdff1aSopenharmony_ci 17,-10,-17, 22, 953cabdff1aSopenharmony_ci 17,-22, 17,-10}; 954cabdff1aSopenharmony_ci 955cabdff1aSopenharmony_ci // 1st loop 956cabdff1aSopenharmony_ci __asm__ volatile ( 957cabdff1aSopenharmony_ci 958cabdff1aSopenharmony_ci "li %[tmp0], 0x03 \n\t" 959cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp0] \n\t" 960cabdff1aSopenharmony_ci 961cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp2], %[coeff], 0x00) 962cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp3], %[coeff], 0x08) 963cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp4], %[coeff], 0x10) 964cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp5], %[coeff], 0x18) 965cabdff1aSopenharmony_ci "1: \n\t" 966cabdff1aSopenharmony_ci /* ftmp8: dst3,dst2,dst1,dst0 */ 967cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp1], %[src], 0x00) 968cabdff1aSopenharmony_ci "pmaddhw %[ftmp6], %[ftmp2], %[ftmp1] \n\t" 969cabdff1aSopenharmony_ci "pmaddhw %[ftmp7], %[ftmp3], %[ftmp1] \n\t" 970cabdff1aSopenharmony_ci "pmaddhw %[ftmp8], %[ftmp4], %[ftmp1] \n\t" 971cabdff1aSopenharmony_ci "pmaddhw %[ftmp9], %[ftmp5], %[ftmp1] \n\t" 972cabdff1aSopenharmony_ci "punpcklwd %[ftmp10], %[ftmp6], %[ftmp7] \n\t" 973cabdff1aSopenharmony_ci "punpckhwd %[ftmp11], %[ftmp6], %[ftmp7] \n\t" 974cabdff1aSopenharmony_ci "punpcklwd %[ftmp6], %[ftmp8], %[ftmp9] \n\t" 975cabdff1aSopenharmony_ci "punpckhwd %[ftmp7], %[ftmp8], %[ftmp9] \n\t" 976cabdff1aSopenharmony_ci "paddw %[ftmp8], %[ftmp10], %[ftmp11] \n\t" 977cabdff1aSopenharmony_ci "paddw %[ftmp9], %[ftmp6], %[ftmp7] \n\t" 978cabdff1aSopenharmony_ci "paddw %[ftmp8], %[ftmp8], %[ff_pw_4] \n\t" 979cabdff1aSopenharmony_ci "paddw %[ftmp9], %[ftmp9], %[ff_pw_4] \n\t" 980cabdff1aSopenharmony_ci "psraw %[ftmp8], %[ftmp8], %[ftmp0] \n\t" 981cabdff1aSopenharmony_ci "psraw %[ftmp9], %[ftmp9], %[ftmp0] \n\t" 982cabdff1aSopenharmony_ci "punpcklhw %[ftmp6], %[ftmp8], %[ftmp9] \n\t" 983cabdff1aSopenharmony_ci "punpckhhw %[ftmp7], %[ftmp8], %[ftmp9] \n\t" 984cabdff1aSopenharmony_ci "punpcklhw %[ftmp8], %[ftmp6], %[ftmp7] \n\t" 985cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp8], %[dst], 0x00) 986cabdff1aSopenharmony_ci 987cabdff1aSopenharmony_ci PTR_ADDIU "%[src], %[src], 0x10 \n\t" 988cabdff1aSopenharmony_ci PTR_ADDIU "%[dst], %[dst], 0x10 \n\t" 989cabdff1aSopenharmony_ci "addiu %[count], %[count], -0x01 \n\t" 990cabdff1aSopenharmony_ci "bnez %[count], 1b \n\t" 991cabdff1aSopenharmony_ci : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 992cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 993cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 994cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 995cabdff1aSopenharmony_ci [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), 996cabdff1aSopenharmony_ci [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), 997cabdff1aSopenharmony_ci [tmp0]"=&r"(tmp[0]), [count]"+&r"(count), 998cabdff1aSopenharmony_ci [src]"+&r"(src), [dst]"+&r"(dst) 999cabdff1aSopenharmony_ci : [ff_pw_4]"f"(ff_pw_32_4.f), [coeff]"r"(coeff) 1000cabdff1aSopenharmony_ci : "memory" 1001cabdff1aSopenharmony_ci ); 1002cabdff1aSopenharmony_ci 1003cabdff1aSopenharmony_ci src = block; 1004cabdff1aSopenharmony_ci 1005cabdff1aSopenharmony_ci // 2nd loop 1006cabdff1aSopenharmony_ci __asm__ volatile ( 1007cabdff1aSopenharmony_ci "li %[tmp0], 0x07 \n\t" 1008cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp0] \n\t" 1009cabdff1aSopenharmony_ci 1010cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp1], %[src], 0x00) 1011cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp2], %[src], 0x20) 1012cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp3], %[src], 0x40) 1013cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp4], %[src], 0x60) 1014cabdff1aSopenharmony_ci "punpcklhw %[ftmp5], %[ftmp1], %[ftmp2] \n\t" 1015cabdff1aSopenharmony_ci "punpckhhw %[ftmp6], %[ftmp1], %[ftmp2] \n\t" 1016cabdff1aSopenharmony_ci "punpcklhw %[ftmp7], %[ftmp3], %[ftmp4] \n\t" 1017cabdff1aSopenharmony_ci "punpckhhw %[ftmp8], %[ftmp3], %[ftmp4] \n\t" 1018cabdff1aSopenharmony_ci 1019cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp1], %[src], 0x10) 1020cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp2], %[src], 0x30) 1021cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp3], %[src], 0x50) 1022cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp4], %[src], 0x70) 1023cabdff1aSopenharmony_ci "punpcklhw %[ftmp9], %[ftmp1], %[ftmp2] \n\t" 1024cabdff1aSopenharmony_ci "punpckhhw %[ftmp10], %[ftmp1], %[ftmp2] \n\t" 1025cabdff1aSopenharmony_ci "punpcklhw %[ftmp11], %[ftmp3], %[ftmp4] \n\t" 1026cabdff1aSopenharmony_ci "punpckhhw %[ftmp12], %[ftmp3], %[ftmp4] \n\t" 1027cabdff1aSopenharmony_ci 1028cabdff1aSopenharmony_ci /* ftmp15:dst03,dst02,dst01,dst00 ftmp22:dst73,dst72,dst71,dst70 */ 1029cabdff1aSopenharmony_ci VC1_INV_TRANCS_8_TYPE2(%[ftmp15], %[ftmp22], 0x0010000c, 0x0006000c, 1030cabdff1aSopenharmony_ci 0x000f0010, 0x00040009, %[ff_pw_64], %[ff_pw_1]) 1031cabdff1aSopenharmony_ci 1032cabdff1aSopenharmony_ci /* ftmp16:dst13,dst12,dst11,dst10 ftmp21:dst63,dst62,dst61,dst60 */ 1033cabdff1aSopenharmony_ci VC1_INV_TRANCS_8_TYPE2(%[ftmp16], %[ftmp21], 0x0006000c, 0xfff0fff4, 1034cabdff1aSopenharmony_ci 0xfffc000f, 0xfff7fff0, %[ff_pw_64], %[ff_pw_1]) 1035cabdff1aSopenharmony_ci 1036cabdff1aSopenharmony_ci /* ftmp17:dst23,dst22,dst21,dst20 ftmp20:dst53,dst52,dst51,dst50 */ 1037cabdff1aSopenharmony_ci VC1_INV_TRANCS_8_TYPE2(%[ftmp17], %[ftmp20], 0xfffa000c, 0x0010fff4, 1038cabdff1aSopenharmony_ci 0xfff00009, 0x000f0004, %[ff_pw_64], %[ff_pw_1]) 1039cabdff1aSopenharmony_ci 1040cabdff1aSopenharmony_ci /* ftmp18:dst33,dst32,dst31,dst30 ftmp19:dst43,dst42,dst41,dst40 */ 1041cabdff1aSopenharmony_ci VC1_INV_TRANCS_8_TYPE2(%[ftmp18], %[ftmp19], 0xfff0000c, 0xfffa000c, 1042cabdff1aSopenharmony_ci 0xfff70004, 0xfff0000f, %[ff_pw_64], %[ff_pw_1]) 1043cabdff1aSopenharmony_ci 1044cabdff1aSopenharmony_ci MMI_LWC1(%[ftmp1], %[dest], 0x00) 1045cabdff1aSopenharmony_ci PTR_ADDU "%[tmp0], %[dest], %[linesize] \n\t" 1046cabdff1aSopenharmony_ci MMI_LWC1(%[ftmp2], %[tmp0], 0x00) 1047cabdff1aSopenharmony_ci PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t" 1048cabdff1aSopenharmony_ci MMI_LWC1(%[ftmp3], %[tmp0], 0x00) 1049cabdff1aSopenharmony_ci PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t" 1050cabdff1aSopenharmony_ci MMI_LWC1(%[ftmp4], %[tmp0], 0x00) 1051cabdff1aSopenharmony_ci PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t" 1052cabdff1aSopenharmony_ci MMI_LWC1(%[ftmp5], %[tmp0], 0x00) 1053cabdff1aSopenharmony_ci PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t" 1054cabdff1aSopenharmony_ci MMI_LWC1(%[ftmp6], %[tmp0], 0x00) 1055cabdff1aSopenharmony_ci PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t" 1056cabdff1aSopenharmony_ci MMI_LWC1(%[ftmp7], %[tmp0], 0x00) 1057cabdff1aSopenharmony_ci PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t" 1058cabdff1aSopenharmony_ci MMI_LWC1(%[ftmp8], %[tmp0], 0x00) 1059cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 1060cabdff1aSopenharmony_ci "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 1061cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 1062cabdff1aSopenharmony_ci "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 1063cabdff1aSopenharmony_ci "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 1064cabdff1aSopenharmony_ci "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" 1065cabdff1aSopenharmony_ci "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 1066cabdff1aSopenharmony_ci "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t" 1067cabdff1aSopenharmony_ci "punpcklbh %[ftmp8], %[ftmp8], %[ftmp0] \n\t" 1068cabdff1aSopenharmony_ci 1069cabdff1aSopenharmony_ci "paddh %[ftmp1], %[ftmp1], %[ftmp15] \n\t" 1070cabdff1aSopenharmony_ci "paddh %[ftmp2], %[ftmp2], %[ftmp16] \n\t" 1071cabdff1aSopenharmony_ci "paddh %[ftmp3], %[ftmp3], %[ftmp17] \n\t" 1072cabdff1aSopenharmony_ci "paddh %[ftmp4], %[ftmp4], %[ftmp18] \n\t" 1073cabdff1aSopenharmony_ci "paddh %[ftmp5], %[ftmp5], %[ftmp19] \n\t" 1074cabdff1aSopenharmony_ci "paddh %[ftmp6], %[ftmp6], %[ftmp20] \n\t" 1075cabdff1aSopenharmony_ci "paddh %[ftmp7], %[ftmp7], %[ftmp21] \n\t" 1076cabdff1aSopenharmony_ci "paddh %[ftmp8], %[ftmp8], %[ftmp22] \n\t" 1077cabdff1aSopenharmony_ci 1078cabdff1aSopenharmony_ci "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 1079cabdff1aSopenharmony_ci "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 1080cabdff1aSopenharmony_ci "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 1081cabdff1aSopenharmony_ci "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 1082cabdff1aSopenharmony_ci "packushb %[ftmp5], %[ftmp5], %[ftmp0] \n\t" 1083cabdff1aSopenharmony_ci "packushb %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 1084cabdff1aSopenharmony_ci "packushb %[ftmp7], %[ftmp7], %[ftmp0] \n\t" 1085cabdff1aSopenharmony_ci "packushb %[ftmp8], %[ftmp8], %[ftmp0] \n\t" 1086cabdff1aSopenharmony_ci 1087cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp1], %[dest], 0x00) 1088cabdff1aSopenharmony_ci PTR_ADDU "%[tmp0], %[dest], %[linesize] \n\t" 1089cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp2], %[tmp0], 0x00) 1090cabdff1aSopenharmony_ci PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t" 1091cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp3], %[tmp0], 0x00) 1092cabdff1aSopenharmony_ci PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t" 1093cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp4], %[tmp0], 0x00) 1094cabdff1aSopenharmony_ci PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t" 1095cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp5], %[tmp0], 0x00) 1096cabdff1aSopenharmony_ci PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t" 1097cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp6], %[tmp0], 0x00) 1098cabdff1aSopenharmony_ci PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t" 1099cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp7], %[tmp0], 0x00) 1100cabdff1aSopenharmony_ci PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t" 1101cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp8], %[tmp0], 0x00) 1102cabdff1aSopenharmony_ci 1103cabdff1aSopenharmony_ci : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 1104cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 1105cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 1106cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 1107cabdff1aSopenharmony_ci [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), 1108cabdff1aSopenharmony_ci [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), 1109cabdff1aSopenharmony_ci [ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]), 1110cabdff1aSopenharmony_ci [ftmp14]"=&f"(ftmp[14]), [ftmp15]"=&f"(ftmp[15]), 1111cabdff1aSopenharmony_ci [ftmp16]"=&f"(ftmp[16]), [ftmp17]"=&f"(ftmp[17]), 1112cabdff1aSopenharmony_ci [ftmp18]"=&f"(ftmp[18]), [ftmp19]"=&f"(ftmp[19]), 1113cabdff1aSopenharmony_ci [ftmp20]"=&f"(ftmp[20]), [ftmp21]"=&f"(ftmp[21]), 1114cabdff1aSopenharmony_ci [ftmp22]"=&f"(ftmp[22]), 1115cabdff1aSopenharmony_ci [tmp0]"=&r"(tmp[0]) 1116cabdff1aSopenharmony_ci : [ff_pw_1]"f"(ff_pw_32_1.f), [ff_pw_64]"f"(ff_pw_32_64.f), 1117cabdff1aSopenharmony_ci [src]"r"(src), [dest]"r"(dest), [linesize]"r"(linesize) 1118cabdff1aSopenharmony_ci : "memory" 1119cabdff1aSopenharmony_ci ); 1120cabdff1aSopenharmony_ci} 1121cabdff1aSopenharmony_ci#endif 1122cabdff1aSopenharmony_ci 1123cabdff1aSopenharmony_ci/* Do inverse transform on 4x4 part of block */ 1124cabdff1aSopenharmony_civoid ff_vc1_inv_trans_4x4_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block) 1125cabdff1aSopenharmony_ci{ 1126cabdff1aSopenharmony_ci int dc = block[0]; 1127cabdff1aSopenharmony_ci double ftmp[5]; 1128cabdff1aSopenharmony_ci union mmi_intfloat64 dc_u; 1129cabdff1aSopenharmony_ci DECLARE_VAR_LOW32; 1130cabdff1aSopenharmony_ci 1131cabdff1aSopenharmony_ci dc = (17 * dc + 4) >> 3; 1132cabdff1aSopenharmony_ci dc = (17 * dc + 64) >> 7; 1133cabdff1aSopenharmony_ci dc_u.i = dc; 1134cabdff1aSopenharmony_ci 1135cabdff1aSopenharmony_ci __asm__ volatile( 1136cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 1137cabdff1aSopenharmony_ci "pshufh %[dc], %[dc], %[ftmp0] \n\t" 1138cabdff1aSopenharmony_ci 1139cabdff1aSopenharmony_ci MMI_LWC1(%[ftmp1], %[dest0], 0x00) 1140cabdff1aSopenharmony_ci MMI_LWC1(%[ftmp2], %[dest1], 0x00) 1141cabdff1aSopenharmony_ci MMI_LWC1(%[ftmp3], %[dest2], 0x00) 1142cabdff1aSopenharmony_ci MMI_LWC1(%[ftmp4], %[dest3], 0x00) 1143cabdff1aSopenharmony_ci 1144cabdff1aSopenharmony_ci "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 1145cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 1146cabdff1aSopenharmony_ci "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 1147cabdff1aSopenharmony_ci "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 1148cabdff1aSopenharmony_ci 1149cabdff1aSopenharmony_ci "paddsh %[ftmp1], %[ftmp1], %[dc] \n\t" 1150cabdff1aSopenharmony_ci "paddsh %[ftmp2], %[ftmp2], %[dc] \n\t" 1151cabdff1aSopenharmony_ci "paddsh %[ftmp3], %[ftmp3], %[dc] \n\t" 1152cabdff1aSopenharmony_ci "paddsh %[ftmp4], %[ftmp4], %[dc] \n\t" 1153cabdff1aSopenharmony_ci 1154cabdff1aSopenharmony_ci "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 1155cabdff1aSopenharmony_ci "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 1156cabdff1aSopenharmony_ci "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 1157cabdff1aSopenharmony_ci "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 1158cabdff1aSopenharmony_ci 1159cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp1], %[dest0], 0x00) 1160cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp2], %[dest1], 0x00) 1161cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp3], %[dest2], 0x00) 1162cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp4], %[dest3], 0x00) 1163cabdff1aSopenharmony_ci : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 1164cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 1165cabdff1aSopenharmony_ci RESTRICT_ASM_LOW32 1166cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]) 1167cabdff1aSopenharmony_ci : [dest0]"r"(dest+0*linesize), [dest1]"r"(dest+1*linesize), 1168cabdff1aSopenharmony_ci [dest2]"r"(dest+2*linesize), [dest3]"r"(dest+3*linesize), 1169cabdff1aSopenharmony_ci [dc]"f"(dc_u.f) 1170cabdff1aSopenharmony_ci : "memory" 1171cabdff1aSopenharmony_ci ); 1172cabdff1aSopenharmony_ci} 1173cabdff1aSopenharmony_ci 1174cabdff1aSopenharmony_civoid ff_vc1_inv_trans_4x4_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block) 1175cabdff1aSopenharmony_ci{ 1176cabdff1aSopenharmony_ci int16_t *src = block; 1177cabdff1aSopenharmony_ci int16_t *dst = block; 1178cabdff1aSopenharmony_ci double ftmp[16]; 1179cabdff1aSopenharmony_ci uint32_t count = 4, tmp[1]; 1180cabdff1aSopenharmony_ci int16_t coeff[16] = {17, 22, 17, 10, 1181cabdff1aSopenharmony_ci 17, 10,-17,-22, 1182cabdff1aSopenharmony_ci 17,-10,-17, 22, 1183cabdff1aSopenharmony_ci 17,-22, 17,-10}; 1184cabdff1aSopenharmony_ci // 1st loop 1185cabdff1aSopenharmony_ci __asm__ volatile ( 1186cabdff1aSopenharmony_ci 1187cabdff1aSopenharmony_ci "li %[tmp0], 0x03 \n\t" 1188cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp0] \n\t" 1189cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp2], %[coeff], 0x00) 1190cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp3], %[coeff], 0x08) 1191cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp4], %[coeff], 0x10) 1192cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp5], %[coeff], 0x18) 1193cabdff1aSopenharmony_ci "1: \n\t" 1194cabdff1aSopenharmony_ci /* ftmp8: dst3,dst2,dst1,dst0 */ 1195cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp1], %[src], 0x00) 1196cabdff1aSopenharmony_ci "pmaddhw %[ftmp6], %[ftmp2], %[ftmp1] \n\t" 1197cabdff1aSopenharmony_ci "pmaddhw %[ftmp7], %[ftmp3], %[ftmp1] \n\t" 1198cabdff1aSopenharmony_ci "pmaddhw %[ftmp8], %[ftmp4], %[ftmp1] \n\t" 1199cabdff1aSopenharmony_ci "pmaddhw %[ftmp9], %[ftmp5], %[ftmp1] \n\t" 1200cabdff1aSopenharmony_ci "punpcklwd %[ftmp10], %[ftmp6], %[ftmp7] \n\t" 1201cabdff1aSopenharmony_ci "punpckhwd %[ftmp11], %[ftmp6], %[ftmp7] \n\t" 1202cabdff1aSopenharmony_ci "punpcklwd %[ftmp6], %[ftmp8], %[ftmp9] \n\t" 1203cabdff1aSopenharmony_ci "punpckhwd %[ftmp7], %[ftmp8], %[ftmp9] \n\t" 1204cabdff1aSopenharmony_ci "paddw %[ftmp8], %[ftmp10], %[ftmp11] \n\t" 1205cabdff1aSopenharmony_ci "paddw %[ftmp9], %[ftmp6], %[ftmp7] \n\t" 1206cabdff1aSopenharmony_ci "paddw %[ftmp8], %[ftmp8], %[ff_pw_4] \n\t" 1207cabdff1aSopenharmony_ci "paddw %[ftmp9], %[ftmp9], %[ff_pw_4] \n\t" 1208cabdff1aSopenharmony_ci "psraw %[ftmp8], %[ftmp8], %[ftmp0] \n\t" 1209cabdff1aSopenharmony_ci "psraw %[ftmp9], %[ftmp9], %[ftmp0] \n\t" 1210cabdff1aSopenharmony_ci "punpcklhw %[ftmp6], %[ftmp8], %[ftmp9] \n\t" 1211cabdff1aSopenharmony_ci "punpckhhw %[ftmp7], %[ftmp8], %[ftmp9] \n\t" 1212cabdff1aSopenharmony_ci "punpcklhw %[ftmp8], %[ftmp6], %[ftmp7] \n\t" 1213cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp8], %[dst], 0x00) 1214cabdff1aSopenharmony_ci 1215cabdff1aSopenharmony_ci PTR_ADDIU "%[src], %[src], 0x10 \n\t" 1216cabdff1aSopenharmony_ci PTR_ADDIU "%[dst], %[dst], 0x10 \n\t" 1217cabdff1aSopenharmony_ci "addiu %[count], %[count], -0x01 \n\t" 1218cabdff1aSopenharmony_ci "bnez %[count], 1b \n\t" 1219cabdff1aSopenharmony_ci : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 1220cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 1221cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 1222cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 1223cabdff1aSopenharmony_ci [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), 1224cabdff1aSopenharmony_ci [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), 1225cabdff1aSopenharmony_ci [tmp0]"=&r"(tmp[0]), [count]"+&r"(count), 1226cabdff1aSopenharmony_ci [src]"+&r"(src), [dst]"+&r"(dst) 1227cabdff1aSopenharmony_ci : [ff_pw_4]"f"(ff_pw_32_4.f), [coeff]"r"(coeff) 1228cabdff1aSopenharmony_ci : "memory" 1229cabdff1aSopenharmony_ci ); 1230cabdff1aSopenharmony_ci 1231cabdff1aSopenharmony_ci src = block; 1232cabdff1aSopenharmony_ci 1233cabdff1aSopenharmony_ci // 2nd loop 1234cabdff1aSopenharmony_ci __asm__ volatile ( 1235cabdff1aSopenharmony_ci "li %[tmp0], 0x07 \n\t" 1236cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp0] \n\t" 1237cabdff1aSopenharmony_ci "li %[tmp0], 0x44 \n\t" 1238cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp15] \n\t" 1239cabdff1aSopenharmony_ci 1240cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp1], %[src], 0x00) 1241cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp2], %[src], 0x10) 1242cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp3], %[src], 0x20) 1243cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp4], %[src], 0x30) 1244cabdff1aSopenharmony_ci "punpcklhw %[ftmp5], %[ftmp1], %[ftmp2] \n\t" 1245cabdff1aSopenharmony_ci "punpckhhw %[ftmp6], %[ftmp1], %[ftmp2] \n\t" 1246cabdff1aSopenharmony_ci "punpcklhw %[ftmp7], %[ftmp3], %[ftmp4] \n\t" 1247cabdff1aSopenharmony_ci "punpckhhw %[ftmp8], %[ftmp3], %[ftmp4] \n\t" 1248cabdff1aSopenharmony_ci 1249cabdff1aSopenharmony_ci /* ftmp11: dst03,dst02,dst01,dst00 */ 1250cabdff1aSopenharmony_ci "li %[tmp0], 0x00160011 \n\t" 1251cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp3] \n\t" 1252cabdff1aSopenharmony_ci "pshufh %[ftmp3], %[ftmp3], %[ftmp15] \n\t" 1253cabdff1aSopenharmony_ci "li %[tmp0], 0x000a0011 \n\t" 1254cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp4] \n\t" 1255cabdff1aSopenharmony_ci "pshufh %[ftmp4], %[ftmp4], %[ftmp15] \n\t" 1256cabdff1aSopenharmony_ci "pmaddhw %[ftmp1], %[ftmp5], %[ftmp3] \n\t" 1257cabdff1aSopenharmony_ci "pmaddhw %[ftmp2], %[ftmp7], %[ftmp4] \n\t" 1258cabdff1aSopenharmony_ci "paddw %[ftmp9], %[ftmp1], %[ftmp2] \n\t" 1259cabdff1aSopenharmony_ci "pmaddhw %[ftmp1], %[ftmp6], %[ftmp3] \n\t" 1260cabdff1aSopenharmony_ci "pmaddhw %[ftmp2], %[ftmp8], %[ftmp4] \n\t" 1261cabdff1aSopenharmony_ci "paddw %[ftmp10], %[ftmp1], %[ftmp2] \n\t" 1262cabdff1aSopenharmony_ci "paddw %[ftmp9], %[ftmp9], %[ff_pw_64] \n\t" 1263cabdff1aSopenharmony_ci "paddw %[ftmp10], %[ftmp10], %[ff_pw_64] \n\t" 1264cabdff1aSopenharmony_ci "psraw %[ftmp9], %[ftmp9], %[ftmp0] \n\t" 1265cabdff1aSopenharmony_ci "psraw %[ftmp10], %[ftmp10], %[ftmp0] \n\t" 1266cabdff1aSopenharmony_ci "punpcklhw %[ftmp1], %[ftmp9], %[ftmp10] \n\t" 1267cabdff1aSopenharmony_ci "punpckhhw %[ftmp2], %[ftmp9], %[ftmp10] \n\t" 1268cabdff1aSopenharmony_ci "punpcklhw %[ftmp11], %[ftmp1], %[ftmp2] \n\t" 1269cabdff1aSopenharmony_ci 1270cabdff1aSopenharmony_ci /* ftmp12: dst13,dst12,dst11,dst10 */ 1271cabdff1aSopenharmony_ci "li %[tmp0], 0x000a0011 \n\t" 1272cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp3] \n\t" 1273cabdff1aSopenharmony_ci "pshufh %[ftmp3], %[ftmp3], %[ftmp15] \n\t" 1274cabdff1aSopenharmony_ci "li %[tmp0], 0xffeaffef \n\t" 1275cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp4] \n\t" 1276cabdff1aSopenharmony_ci "pshufh %[ftmp4], %[ftmp4], %[ftmp15] \n\t" 1277cabdff1aSopenharmony_ci "pmaddhw %[ftmp1], %[ftmp5], %[ftmp3] \n\t" 1278cabdff1aSopenharmony_ci "pmaddhw %[ftmp2], %[ftmp7], %[ftmp4] \n\t" 1279cabdff1aSopenharmony_ci "paddw %[ftmp9], %[ftmp1], %[ftmp2] \n\t" 1280cabdff1aSopenharmony_ci "pmaddhw %[ftmp1], %[ftmp6], %[ftmp3] \n\t" 1281cabdff1aSopenharmony_ci "pmaddhw %[ftmp2], %[ftmp8], %[ftmp4] \n\t" 1282cabdff1aSopenharmony_ci "paddw %[ftmp10], %[ftmp1], %[ftmp2] \n\t" 1283cabdff1aSopenharmony_ci "paddw %[ftmp9], %[ftmp9], %[ff_pw_64] \n\t" 1284cabdff1aSopenharmony_ci "paddw %[ftmp10], %[ftmp10], %[ff_pw_64] \n\t" 1285cabdff1aSopenharmony_ci "psraw %[ftmp9], %[ftmp9], %[ftmp0] \n\t" 1286cabdff1aSopenharmony_ci "psraw %[ftmp10], %[ftmp10], %[ftmp0] \n\t" 1287cabdff1aSopenharmony_ci "punpcklhw %[ftmp1], %[ftmp9], %[ftmp10] \n\t" 1288cabdff1aSopenharmony_ci "punpckhhw %[ftmp2], %[ftmp9], %[ftmp10] \n\t" 1289cabdff1aSopenharmony_ci "punpcklhw %[ftmp12], %[ftmp1], %[ftmp2] \n\t" 1290cabdff1aSopenharmony_ci 1291cabdff1aSopenharmony_ci /* ftmp13: dst23,dst22,dst21,dst20 */ 1292cabdff1aSopenharmony_ci "li %[tmp0], 0xfff60011 \n\t" 1293cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp3] \n\t" 1294cabdff1aSopenharmony_ci "pshufh %[ftmp3], %[ftmp3], %[ftmp15] \n\t" 1295cabdff1aSopenharmony_ci "li %[tmp0], 0x0016ffef \n\t" 1296cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp4] \n\t" 1297cabdff1aSopenharmony_ci "pshufh %[ftmp4], %[ftmp4], %[ftmp15] \n\t" 1298cabdff1aSopenharmony_ci "pmaddhw %[ftmp1], %[ftmp5], %[ftmp3] \n\t" 1299cabdff1aSopenharmony_ci "pmaddhw %[ftmp2], %[ftmp7], %[ftmp4] \n\t" 1300cabdff1aSopenharmony_ci "paddw %[ftmp9], %[ftmp1], %[ftmp2] \n\t" 1301cabdff1aSopenharmony_ci "pmaddhw %[ftmp1], %[ftmp6], %[ftmp3] \n\t" 1302cabdff1aSopenharmony_ci "pmaddhw %[ftmp2], %[ftmp8], %[ftmp4] \n\t" 1303cabdff1aSopenharmony_ci "paddw %[ftmp10], %[ftmp1], %[ftmp2] \n\t" 1304cabdff1aSopenharmony_ci "paddw %[ftmp9], %[ftmp9], %[ff_pw_64] \n\t" 1305cabdff1aSopenharmony_ci "paddw %[ftmp10], %[ftmp10], %[ff_pw_64] \n\t" 1306cabdff1aSopenharmony_ci "psraw %[ftmp9], %[ftmp9], %[ftmp0] \n\t" 1307cabdff1aSopenharmony_ci "psraw %[ftmp10], %[ftmp10], %[ftmp0] \n\t" 1308cabdff1aSopenharmony_ci "punpcklhw %[ftmp1], %[ftmp9], %[ftmp10] \n\t" 1309cabdff1aSopenharmony_ci "punpckhhw %[ftmp2], %[ftmp9], %[ftmp10] \n\t" 1310cabdff1aSopenharmony_ci "punpcklhw %[ftmp13], %[ftmp1], %[ftmp2] \n\t" 1311cabdff1aSopenharmony_ci 1312cabdff1aSopenharmony_ci /* ftmp14: dst33,dst32,dst31,dst30 */ 1313cabdff1aSopenharmony_ci "li %[tmp0], 0xffea0011 \n\t" 1314cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp3] \n\t" 1315cabdff1aSopenharmony_ci "pshufh %[ftmp3], %[ftmp3], %[ftmp15] \n\t" 1316cabdff1aSopenharmony_ci "li %[tmp0], 0xfff60011 \n\t" 1317cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp4] \n\t" 1318cabdff1aSopenharmony_ci "pshufh %[ftmp4], %[ftmp4], %[ftmp15] \n\t" 1319cabdff1aSopenharmony_ci "pmaddhw %[ftmp1], %[ftmp5], %[ftmp3] \n\t" 1320cabdff1aSopenharmony_ci "pmaddhw %[ftmp2], %[ftmp7], %[ftmp4] \n\t" 1321cabdff1aSopenharmony_ci "paddw %[ftmp9], %[ftmp1], %[ftmp2] \n\t" 1322cabdff1aSopenharmony_ci "pmaddhw %[ftmp1], %[ftmp6], %[ftmp3] \n\t" 1323cabdff1aSopenharmony_ci "pmaddhw %[ftmp2], %[ftmp8], %[ftmp4] \n\t" 1324cabdff1aSopenharmony_ci "paddw %[ftmp10], %[ftmp1], %[ftmp2] \n\t" 1325cabdff1aSopenharmony_ci "paddw %[ftmp9], %[ftmp9], %[ff_pw_64] \n\t" 1326cabdff1aSopenharmony_ci "paddw %[ftmp10], %[ftmp10], %[ff_pw_64] \n\t" 1327cabdff1aSopenharmony_ci "psraw %[ftmp9], %[ftmp9], %[ftmp0] \n\t" 1328cabdff1aSopenharmony_ci "psraw %[ftmp10], %[ftmp10], %[ftmp0] \n\t" 1329cabdff1aSopenharmony_ci "punpcklhw %[ftmp1], %[ftmp9], %[ftmp10] \n\t" 1330cabdff1aSopenharmony_ci "punpckhhw %[ftmp2], %[ftmp9], %[ftmp10] \n\t" 1331cabdff1aSopenharmony_ci "punpcklhw %[ftmp14], %[ftmp1], %[ftmp2] \n\t" 1332cabdff1aSopenharmony_ci 1333cabdff1aSopenharmony_ci MMI_LWC1(%[ftmp1], %[dest], 0x00) 1334cabdff1aSopenharmony_ci PTR_ADDU "%[tmp0], %[dest], %[linesize] \n\t" 1335cabdff1aSopenharmony_ci MMI_LWC1(%[ftmp2], %[tmp0], 0x00) 1336cabdff1aSopenharmony_ci PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t" 1337cabdff1aSopenharmony_ci MMI_LWC1(%[ftmp3], %[tmp0], 0x00) 1338cabdff1aSopenharmony_ci PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t" 1339cabdff1aSopenharmony_ci MMI_LWC1(%[ftmp4], %[tmp0], 0x00) 1340cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 1341cabdff1aSopenharmony_ci "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 1342cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 1343cabdff1aSopenharmony_ci "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 1344cabdff1aSopenharmony_ci "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 1345cabdff1aSopenharmony_ci "paddh %[ftmp1], %[ftmp1], %[ftmp11] \n\t" 1346cabdff1aSopenharmony_ci "paddh %[ftmp2], %[ftmp2], %[ftmp12] \n\t" 1347cabdff1aSopenharmony_ci "paddh %[ftmp3], %[ftmp3], %[ftmp13] \n\t" 1348cabdff1aSopenharmony_ci "paddh %[ftmp4], %[ftmp4], %[ftmp14] \n\t" 1349cabdff1aSopenharmony_ci "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 1350cabdff1aSopenharmony_ci "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 1351cabdff1aSopenharmony_ci "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 1352cabdff1aSopenharmony_ci "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 1353cabdff1aSopenharmony_ci 1354cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp1], %[dest], 0x00) 1355cabdff1aSopenharmony_ci PTR_ADDU "%[tmp0], %[dest], %[linesize] \n\t" 1356cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp2], %[tmp0], 0x00) 1357cabdff1aSopenharmony_ci PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t" 1358cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp3], %[tmp0], 0x00) 1359cabdff1aSopenharmony_ci PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t" 1360cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp4], %[tmp0], 0x00) 1361cabdff1aSopenharmony_ci 1362cabdff1aSopenharmony_ci : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 1363cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 1364cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 1365cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 1366cabdff1aSopenharmony_ci [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), 1367cabdff1aSopenharmony_ci [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), 1368cabdff1aSopenharmony_ci [ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]), 1369cabdff1aSopenharmony_ci [ftmp14]"=&f"(ftmp[14]), [ftmp15]"=&f"(ftmp[15]), 1370cabdff1aSopenharmony_ci [tmp0]"=&r"(tmp[0]) 1371cabdff1aSopenharmony_ci : [ff_pw_64]"f"(ff_pw_32_64.f), 1372cabdff1aSopenharmony_ci [src]"r"(src), [dest]"r"(dest), [linesize]"r"(linesize) 1373cabdff1aSopenharmony_ci :"memory" 1374cabdff1aSopenharmony_ci ); 1375cabdff1aSopenharmony_ci} 1376cabdff1aSopenharmony_ci 1377cabdff1aSopenharmony_ci/* Apply overlap transform to horizontal edge */ 1378cabdff1aSopenharmony_civoid ff_vc1_h_overlap_mmi(uint8_t *src, ptrdiff_t stride) 1379cabdff1aSopenharmony_ci{ 1380cabdff1aSopenharmony_ci int i; 1381cabdff1aSopenharmony_ci int a, b, c, d; 1382cabdff1aSopenharmony_ci int d1, d2; 1383cabdff1aSopenharmony_ci int rnd = 1; 1384cabdff1aSopenharmony_ci for (i = 0; i < 8; i++) { 1385cabdff1aSopenharmony_ci a = src[-2]; 1386cabdff1aSopenharmony_ci b = src[-1]; 1387cabdff1aSopenharmony_ci c = src[0]; 1388cabdff1aSopenharmony_ci d = src[1]; 1389cabdff1aSopenharmony_ci d1 = (a - d + 3 + rnd) >> 3; 1390cabdff1aSopenharmony_ci d2 = (a - d + b - c + 4 - rnd) >> 3; 1391cabdff1aSopenharmony_ci 1392cabdff1aSopenharmony_ci src[-2] = a - d1; 1393cabdff1aSopenharmony_ci src[-1] = av_clip_uint8(b - d2); 1394cabdff1aSopenharmony_ci src[0] = av_clip_uint8(c + d2); 1395cabdff1aSopenharmony_ci src[1] = d + d1; 1396cabdff1aSopenharmony_ci src += stride; 1397cabdff1aSopenharmony_ci rnd = !rnd; 1398cabdff1aSopenharmony_ci } 1399cabdff1aSopenharmony_ci} 1400cabdff1aSopenharmony_ci 1401cabdff1aSopenharmony_civoid ff_vc1_h_s_overlap_mmi(int16_t *left, int16_t *right, ptrdiff_t left_stride, ptrdiff_t right_stride, int flags) 1402cabdff1aSopenharmony_ci{ 1403cabdff1aSopenharmony_ci int i; 1404cabdff1aSopenharmony_ci int a, b, c, d; 1405cabdff1aSopenharmony_ci int d1, d2; 1406cabdff1aSopenharmony_ci int rnd1 = flags & 2 ? 3 : 4; 1407cabdff1aSopenharmony_ci int rnd2 = 7 - rnd1; 1408cabdff1aSopenharmony_ci for (i = 0; i < 8; i++) { 1409cabdff1aSopenharmony_ci a = left[6]; 1410cabdff1aSopenharmony_ci b = left[7]; 1411cabdff1aSopenharmony_ci c = right[0]; 1412cabdff1aSopenharmony_ci d = right[1]; 1413cabdff1aSopenharmony_ci d1 = a - d; 1414cabdff1aSopenharmony_ci d2 = a - d + b - c; 1415cabdff1aSopenharmony_ci 1416cabdff1aSopenharmony_ci left[6] = ((a << 3) - d1 + rnd1) >> 3; 1417cabdff1aSopenharmony_ci left[7] = ((b << 3) - d2 + rnd2) >> 3; 1418cabdff1aSopenharmony_ci right[0] = ((c << 3) + d2 + rnd1) >> 3; 1419cabdff1aSopenharmony_ci right[1] = ((d << 3) + d1 + rnd2) >> 3; 1420cabdff1aSopenharmony_ci 1421cabdff1aSopenharmony_ci right += right_stride; 1422cabdff1aSopenharmony_ci left += left_stride; 1423cabdff1aSopenharmony_ci if (flags & 1) { 1424cabdff1aSopenharmony_ci rnd2 = 7 - rnd2; 1425cabdff1aSopenharmony_ci rnd1 = 7 - rnd1; 1426cabdff1aSopenharmony_ci } 1427cabdff1aSopenharmony_ci } 1428cabdff1aSopenharmony_ci} 1429cabdff1aSopenharmony_ci 1430cabdff1aSopenharmony_ci/* Apply overlap transform to vertical edge */ 1431cabdff1aSopenharmony_civoid ff_vc1_v_overlap_mmi(uint8_t *src, ptrdiff_t stride) 1432cabdff1aSopenharmony_ci{ 1433cabdff1aSopenharmony_ci int i; 1434cabdff1aSopenharmony_ci int a, b, c, d; 1435cabdff1aSopenharmony_ci int d1, d2; 1436cabdff1aSopenharmony_ci int rnd = 1; 1437cabdff1aSopenharmony_ci for (i = 0; i < 8; i++) { 1438cabdff1aSopenharmony_ci a = src[-2 * stride]; 1439cabdff1aSopenharmony_ci b = src[-stride]; 1440cabdff1aSopenharmony_ci c = src[0]; 1441cabdff1aSopenharmony_ci d = src[stride]; 1442cabdff1aSopenharmony_ci d1 = (a - d + 3 + rnd) >> 3; 1443cabdff1aSopenharmony_ci d2 = (a - d + b - c + 4 - rnd) >> 3; 1444cabdff1aSopenharmony_ci 1445cabdff1aSopenharmony_ci src[-2 * stride] = a - d1; 1446cabdff1aSopenharmony_ci src[-stride] = av_clip_uint8(b - d2); 1447cabdff1aSopenharmony_ci src[0] = av_clip_uint8(c + d2); 1448cabdff1aSopenharmony_ci src[stride] = d + d1; 1449cabdff1aSopenharmony_ci src++; 1450cabdff1aSopenharmony_ci rnd = !rnd; 1451cabdff1aSopenharmony_ci } 1452cabdff1aSopenharmony_ci} 1453cabdff1aSopenharmony_ci 1454cabdff1aSopenharmony_civoid ff_vc1_v_s_overlap_mmi(int16_t *top, int16_t *bottom) 1455cabdff1aSopenharmony_ci{ 1456cabdff1aSopenharmony_ci int i; 1457cabdff1aSopenharmony_ci int a, b, c, d; 1458cabdff1aSopenharmony_ci int d1, d2; 1459cabdff1aSopenharmony_ci int rnd1 = 4, rnd2 = 3; 1460cabdff1aSopenharmony_ci for (i = 0; i < 8; i++) { 1461cabdff1aSopenharmony_ci a = top[48]; 1462cabdff1aSopenharmony_ci b = top[56]; 1463cabdff1aSopenharmony_ci c = bottom[0]; 1464cabdff1aSopenharmony_ci d = bottom[8]; 1465cabdff1aSopenharmony_ci d1 = a - d; 1466cabdff1aSopenharmony_ci d2 = a - d + b - c; 1467cabdff1aSopenharmony_ci 1468cabdff1aSopenharmony_ci top[48] = ((a << 3) - d1 + rnd1) >> 3; 1469cabdff1aSopenharmony_ci top[56] = ((b << 3) - d2 + rnd2) >> 3; 1470cabdff1aSopenharmony_ci bottom[0] = ((c << 3) + d2 + rnd1) >> 3; 1471cabdff1aSopenharmony_ci bottom[8] = ((d << 3) + d1 + rnd2) >> 3; 1472cabdff1aSopenharmony_ci 1473cabdff1aSopenharmony_ci bottom++; 1474cabdff1aSopenharmony_ci top++; 1475cabdff1aSopenharmony_ci rnd2 = 7 - rnd2; 1476cabdff1aSopenharmony_ci rnd1 = 7 - rnd1; 1477cabdff1aSopenharmony_ci } 1478cabdff1aSopenharmony_ci} 1479cabdff1aSopenharmony_ci 1480cabdff1aSopenharmony_ci/** 1481cabdff1aSopenharmony_ci * VC-1 in-loop deblocking filter for one line 1482cabdff1aSopenharmony_ci * @param src source block type 1483cabdff1aSopenharmony_ci * @param stride block stride 1484cabdff1aSopenharmony_ci * @param pq block quantizer 1485cabdff1aSopenharmony_ci * @return whether other 3 pairs should be filtered or not 1486cabdff1aSopenharmony_ci * @see 8.6 1487cabdff1aSopenharmony_ci */ 1488cabdff1aSopenharmony_cistatic av_always_inline int vc1_filter_line(uint8_t *src, int stride, int pq) 1489cabdff1aSopenharmony_ci{ 1490cabdff1aSopenharmony_ci int a0 = (2 * (src[-2 * stride] - src[1 * stride]) - 1491cabdff1aSopenharmony_ci 5 * (src[-1 * stride] - src[0 * stride]) + 4) >> 3; 1492cabdff1aSopenharmony_ci int a0_sign = a0 >> 31; /* Store sign */ 1493cabdff1aSopenharmony_ci 1494cabdff1aSopenharmony_ci a0 = (a0 ^ a0_sign) - a0_sign; /* a0 = FFABS(a0); */ 1495cabdff1aSopenharmony_ci if (a0 < pq) { 1496cabdff1aSopenharmony_ci int a1 = FFABS((2 * (src[-4 * stride] - src[-1 * stride]) - 1497cabdff1aSopenharmony_ci 5 * (src[-3 * stride] - src[-2 * stride]) + 4) >> 3); 1498cabdff1aSopenharmony_ci int a2 = FFABS((2 * (src[ 0 * stride] - src[ 3 * stride]) - 1499cabdff1aSopenharmony_ci 5 * (src[ 1 * stride] - src[ 2 * stride]) + 4) >> 3); 1500cabdff1aSopenharmony_ci if (a1 < a0 || a2 < a0) { 1501cabdff1aSopenharmony_ci int clip = src[-1 * stride] - src[0 * stride]; 1502cabdff1aSopenharmony_ci int clip_sign = clip >> 31; 1503cabdff1aSopenharmony_ci 1504cabdff1aSopenharmony_ci clip = ((clip ^ clip_sign) - clip_sign) >> 1; 1505cabdff1aSopenharmony_ci if (clip) { 1506cabdff1aSopenharmony_ci int a3 = FFMIN(a1, a2); 1507cabdff1aSopenharmony_ci int d = 5 * (a3 - a0); 1508cabdff1aSopenharmony_ci int d_sign = (d >> 31); 1509cabdff1aSopenharmony_ci 1510cabdff1aSopenharmony_ci d = ((d ^ d_sign) - d_sign) >> 3; 1511cabdff1aSopenharmony_ci d_sign ^= a0_sign; 1512cabdff1aSopenharmony_ci 1513cabdff1aSopenharmony_ci if (d_sign ^ clip_sign) 1514cabdff1aSopenharmony_ci d = 0; 1515cabdff1aSopenharmony_ci else { 1516cabdff1aSopenharmony_ci d = FFMIN(d, clip); 1517cabdff1aSopenharmony_ci d = (d ^ d_sign) - d_sign; /* Restore sign */ 1518cabdff1aSopenharmony_ci src[-1 * stride] = av_clip_uint8(src[-1 * stride] - d); 1519cabdff1aSopenharmony_ci src[ 0 * stride] = av_clip_uint8(src[ 0 * stride] + d); 1520cabdff1aSopenharmony_ci } 1521cabdff1aSopenharmony_ci return 1; 1522cabdff1aSopenharmony_ci } 1523cabdff1aSopenharmony_ci } 1524cabdff1aSopenharmony_ci } 1525cabdff1aSopenharmony_ci return 0; 1526cabdff1aSopenharmony_ci} 1527cabdff1aSopenharmony_ci 1528cabdff1aSopenharmony_ci/** 1529cabdff1aSopenharmony_ci * VC-1 in-loop deblocking filter 1530cabdff1aSopenharmony_ci * @param src source block type 1531cabdff1aSopenharmony_ci * @param step distance between horizontally adjacent elements 1532cabdff1aSopenharmony_ci * @param stride distance between vertically adjacent elements 1533cabdff1aSopenharmony_ci * @param len edge length to filter (4 or 8 pixels) 1534cabdff1aSopenharmony_ci * @param pq block quantizer 1535cabdff1aSopenharmony_ci * @see 8.6 1536cabdff1aSopenharmony_ci */ 1537cabdff1aSopenharmony_cistatic inline void vc1_loop_filter(uint8_t *src, int step, int stride, 1538cabdff1aSopenharmony_ci int len, int pq) 1539cabdff1aSopenharmony_ci{ 1540cabdff1aSopenharmony_ci int i; 1541cabdff1aSopenharmony_ci int filt3; 1542cabdff1aSopenharmony_ci 1543cabdff1aSopenharmony_ci for (i = 0; i < len; i += 4) { 1544cabdff1aSopenharmony_ci filt3 = vc1_filter_line(src + 2 * step, stride, pq); 1545cabdff1aSopenharmony_ci if (filt3) { 1546cabdff1aSopenharmony_ci vc1_filter_line(src + 0 * step, stride, pq); 1547cabdff1aSopenharmony_ci vc1_filter_line(src + 1 * step, stride, pq); 1548cabdff1aSopenharmony_ci vc1_filter_line(src + 3 * step, stride, pq); 1549cabdff1aSopenharmony_ci } 1550cabdff1aSopenharmony_ci src += step * 4; 1551cabdff1aSopenharmony_ci } 1552cabdff1aSopenharmony_ci} 1553cabdff1aSopenharmony_ci 1554cabdff1aSopenharmony_civoid ff_vc1_v_loop_filter4_mmi(uint8_t *src, ptrdiff_t stride, int pq) 1555cabdff1aSopenharmony_ci{ 1556cabdff1aSopenharmony_ci vc1_loop_filter(src, 1, stride, 4, pq); 1557cabdff1aSopenharmony_ci} 1558cabdff1aSopenharmony_ci 1559cabdff1aSopenharmony_civoid ff_vc1_h_loop_filter4_mmi(uint8_t *src, ptrdiff_t stride, int pq) 1560cabdff1aSopenharmony_ci{ 1561cabdff1aSopenharmony_ci vc1_loop_filter(src, stride, 1, 4, pq); 1562cabdff1aSopenharmony_ci} 1563cabdff1aSopenharmony_ci 1564cabdff1aSopenharmony_civoid ff_vc1_v_loop_filter8_mmi(uint8_t *src, ptrdiff_t stride, int pq) 1565cabdff1aSopenharmony_ci{ 1566cabdff1aSopenharmony_ci vc1_loop_filter(src, 1, stride, 8, pq); 1567cabdff1aSopenharmony_ci} 1568cabdff1aSopenharmony_ci 1569cabdff1aSopenharmony_civoid ff_vc1_h_loop_filter8_mmi(uint8_t *src, ptrdiff_t stride, int pq) 1570cabdff1aSopenharmony_ci{ 1571cabdff1aSopenharmony_ci vc1_loop_filter(src, stride, 1, 8, pq); 1572cabdff1aSopenharmony_ci} 1573cabdff1aSopenharmony_ci 1574cabdff1aSopenharmony_civoid ff_vc1_v_loop_filter16_mmi(uint8_t *src, ptrdiff_t stride, int pq) 1575cabdff1aSopenharmony_ci{ 1576cabdff1aSopenharmony_ci vc1_loop_filter(src, 1, stride, 16, pq); 1577cabdff1aSopenharmony_ci} 1578cabdff1aSopenharmony_ci 1579cabdff1aSopenharmony_civoid ff_vc1_h_loop_filter16_mmi(uint8_t *src, ptrdiff_t stride, int pq) 1580cabdff1aSopenharmony_ci{ 1581cabdff1aSopenharmony_ci vc1_loop_filter(src, stride, 1, 16, pq); 1582cabdff1aSopenharmony_ci} 1583cabdff1aSopenharmony_ci 1584cabdff1aSopenharmony_civoid ff_put_vc1_mspel_mc00_mmi(uint8_t *dst, const uint8_t *src, 1585cabdff1aSopenharmony_ci ptrdiff_t stride, int rnd) 1586cabdff1aSopenharmony_ci{ 1587cabdff1aSopenharmony_ci ff_put_pixels8_8_mmi(dst, src, stride, 8); 1588cabdff1aSopenharmony_ci} 1589cabdff1aSopenharmony_civoid ff_put_vc1_mspel_mc00_16_mmi(uint8_t *dst, const uint8_t *src, 1590cabdff1aSopenharmony_ci ptrdiff_t stride, int rnd) 1591cabdff1aSopenharmony_ci{ 1592cabdff1aSopenharmony_ci ff_put_pixels16_8_mmi(dst, src, stride, 16); 1593cabdff1aSopenharmony_ci} 1594cabdff1aSopenharmony_civoid ff_avg_vc1_mspel_mc00_mmi(uint8_t *dst, const uint8_t *src, 1595cabdff1aSopenharmony_ci ptrdiff_t stride, int rnd) 1596cabdff1aSopenharmony_ci{ 1597cabdff1aSopenharmony_ci ff_avg_pixels8_8_mmi(dst, src, stride, 8); 1598cabdff1aSopenharmony_ci} 1599cabdff1aSopenharmony_civoid ff_avg_vc1_mspel_mc00_16_mmi(uint8_t *dst, const uint8_t *src, 1600cabdff1aSopenharmony_ci ptrdiff_t stride, int rnd) 1601cabdff1aSopenharmony_ci{ 1602cabdff1aSopenharmony_ci ff_avg_pixels16_8_mmi(dst, src, stride, 16); 1603cabdff1aSopenharmony_ci} 1604cabdff1aSopenharmony_ci 1605cabdff1aSopenharmony_ci#define OP_PUT(S, D) 1606cabdff1aSopenharmony_ci#define OP_AVG(S, D) \ 1607cabdff1aSopenharmony_ci "ldc1 $f16, "#S" \n\t" \ 1608cabdff1aSopenharmony_ci "pavgb "#D", "#D", $f16 \n\t" 1609cabdff1aSopenharmony_ci 1610cabdff1aSopenharmony_ci/** Add rounder from $f14 to $f6 and pack result at destination */ 1611cabdff1aSopenharmony_ci#define NORMALIZE_MMI(SHIFT) \ 1612cabdff1aSopenharmony_ci "paddh $f6, $f6, $f14 \n\t" /* +bias-r */ \ 1613cabdff1aSopenharmony_ci "paddh $f8, $f8, $f14 \n\t" /* +bias-r */ \ 1614cabdff1aSopenharmony_ci "psrah $f6, $f6, "SHIFT" \n\t" \ 1615cabdff1aSopenharmony_ci "psrah $f8, $f8, "SHIFT" \n\t" 1616cabdff1aSopenharmony_ci 1617cabdff1aSopenharmony_ci#define TRANSFER_DO_PACK(OP) \ 1618cabdff1aSopenharmony_ci "packushb $f6, $f6, $f8 \n\t" \ 1619cabdff1aSopenharmony_ci OP((%[dst]), $f6) \ 1620cabdff1aSopenharmony_ci "sdc1 $f6, 0x00(%[dst]) \n\t" 1621cabdff1aSopenharmony_ci 1622cabdff1aSopenharmony_ci#define TRANSFER_DONT_PACK(OP) \ 1623cabdff1aSopenharmony_ci OP(0(%[dst]), $f6) \ 1624cabdff1aSopenharmony_ci OP(8(%[dst]), $f8) \ 1625cabdff1aSopenharmony_ci "sdc1 $f6, 0x00(%[dst]) \n\t" \ 1626cabdff1aSopenharmony_ci "sdc1 $f8, 0x08(%[dst]) \n\t" 1627cabdff1aSopenharmony_ci 1628cabdff1aSopenharmony_ci/** @see MSPEL_FILTER13_CORE for use as UNPACK macro */ 1629cabdff1aSopenharmony_ci#define DO_UNPACK(reg) \ 1630cabdff1aSopenharmony_ci "punpcklbh "reg", "reg", $f0 \n\t" 1631cabdff1aSopenharmony_ci#define DONT_UNPACK(reg) 1632cabdff1aSopenharmony_ci 1633cabdff1aSopenharmony_ci/** Compute the rounder 32-r or 8-r and unpacks it to $f14 */ 1634cabdff1aSopenharmony_ci#define LOAD_ROUNDER_MMI(ROUND) \ 1635cabdff1aSopenharmony_ci "lwc1 $f14, "ROUND" \n\t" \ 1636cabdff1aSopenharmony_ci "punpcklhw $f14, $f14, $f14 \n\t" \ 1637cabdff1aSopenharmony_ci "punpcklwd $f14, $f14, $f14 \n\t" 1638cabdff1aSopenharmony_ci 1639cabdff1aSopenharmony_ci 1640cabdff1aSopenharmony_ci#define SHIFT2_LINE(OFF, R0, R1, R2, R3) \ 1641cabdff1aSopenharmony_ci "paddh "#R1", "#R1", "#R2" \n\t" \ 1642cabdff1aSopenharmony_ci PTR_ADDU "$9, %[src], %[stride1] \n\t" \ 1643cabdff1aSopenharmony_ci MMI_ULWC1(R0, $9, 0x00) \ 1644cabdff1aSopenharmony_ci "pmullh "#R1", "#R1", $f6 \n\t" \ 1645cabdff1aSopenharmony_ci "punpcklbh "#R0", "#R0", $f0 \n\t" \ 1646cabdff1aSopenharmony_ci PTR_ADDU "$9, %[src], %[stride] \n\t" \ 1647cabdff1aSopenharmony_ci MMI_ULWC1(R3, $9, 0x00) \ 1648cabdff1aSopenharmony_ci "psubh "#R1", "#R1", "#R0" \n\t" \ 1649cabdff1aSopenharmony_ci "punpcklbh "#R3", "#R3", $f0 \n\t" \ 1650cabdff1aSopenharmony_ci "paddh "#R1", "#R1", $f14 \n\t" \ 1651cabdff1aSopenharmony_ci "psubh "#R1", "#R1", "#R3" \n\t" \ 1652cabdff1aSopenharmony_ci "psrah "#R1", "#R1", %[shift] \n\t" \ 1653cabdff1aSopenharmony_ci MMI_SDC1(R1, %[dst], OFF) \ 1654cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[stride] \n\t" 1655cabdff1aSopenharmony_ci 1656cabdff1aSopenharmony_ci/** Sacrificing $f12 makes it possible to pipeline loads from src */ 1657cabdff1aSopenharmony_cistatic void vc1_put_ver_16b_shift2_mmi(int16_t *dst, 1658cabdff1aSopenharmony_ci const uint8_t *src, mips_reg stride, 1659cabdff1aSopenharmony_ci int rnd, int64_t shift) 1660cabdff1aSopenharmony_ci{ 1661cabdff1aSopenharmony_ci union mmi_intfloat64 shift_u; 1662cabdff1aSopenharmony_ci DECLARE_VAR_LOW32; 1663cabdff1aSopenharmony_ci DECLARE_VAR_ADDRT; 1664cabdff1aSopenharmony_ci shift_u.i = shift; 1665cabdff1aSopenharmony_ci 1666cabdff1aSopenharmony_ci __asm__ volatile( 1667cabdff1aSopenharmony_ci "pxor $f0, $f0, $f0 \n\t" 1668cabdff1aSopenharmony_ci "li $8, 0x03 \n\t" 1669cabdff1aSopenharmony_ci LOAD_ROUNDER_MMI("%[rnd]") 1670cabdff1aSopenharmony_ci "1: \n\t" 1671cabdff1aSopenharmony_ci MMI_ULWC1($f4, %[src], 0x00) 1672cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[stride] \n\t" 1673cabdff1aSopenharmony_ci MMI_ULWC1($f6, %[src], 0x00) 1674cabdff1aSopenharmony_ci "punpcklbh $f4, $f4, $f0 \n\t" 1675cabdff1aSopenharmony_ci "punpcklbh $f6, $f6, $f0 \n\t" 1676cabdff1aSopenharmony_ci SHIFT2_LINE( 0, $f2, $f4, $f6, $f8) 1677cabdff1aSopenharmony_ci SHIFT2_LINE( 24, $f4, $f6, $f8, $f2) 1678cabdff1aSopenharmony_ci SHIFT2_LINE( 48, $f6, $f8, $f2, $f4) 1679cabdff1aSopenharmony_ci SHIFT2_LINE( 72, $f8, $f2, $f4, $f6) 1680cabdff1aSopenharmony_ci SHIFT2_LINE( 96, $f2, $f4, $f6, $f8) 1681cabdff1aSopenharmony_ci SHIFT2_LINE(120, $f4, $f6, $f8, $f2) 1682cabdff1aSopenharmony_ci SHIFT2_LINE(144, $f6, $f8, $f2, $f4) 1683cabdff1aSopenharmony_ci SHIFT2_LINE(168, $f8, $f2, $f4, $f6) 1684cabdff1aSopenharmony_ci PTR_SUBU "%[src], %[src], %[stride2] \n\t" 1685cabdff1aSopenharmony_ci PTR_ADDIU "%[dst], %[dst], 0x08 \n\t" 1686cabdff1aSopenharmony_ci "addiu $8, $8, -0x01 \n\t" 1687cabdff1aSopenharmony_ci "bnez $8, 1b \n\t" 1688cabdff1aSopenharmony_ci : RESTRICT_ASM_LOW32 RESTRICT_ASM_ADDRT 1689cabdff1aSopenharmony_ci [src]"+r"(src), [dst]"+r"(dst) 1690cabdff1aSopenharmony_ci : [stride]"r"(stride), [stride1]"r"(-2*stride), 1691cabdff1aSopenharmony_ci [shift]"f"(shift_u.f), [rnd]"m"(rnd), 1692cabdff1aSopenharmony_ci [stride2]"r"(9*stride-4) 1693cabdff1aSopenharmony_ci : "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10", 1694cabdff1aSopenharmony_ci "$f14", "$f16", "memory" 1695cabdff1aSopenharmony_ci ); 1696cabdff1aSopenharmony_ci} 1697cabdff1aSopenharmony_ci 1698cabdff1aSopenharmony_ci/** 1699cabdff1aSopenharmony_ci * Data is already unpacked, so some operations can directly be made from 1700cabdff1aSopenharmony_ci * memory. 1701cabdff1aSopenharmony_ci */ 1702cabdff1aSopenharmony_ci#define VC1_HOR_16B_SHIFT2(OP, OPNAME) \ 1703cabdff1aSopenharmony_cistatic void OPNAME ## vc1_hor_16b_shift2_mmi(uint8_t *dst, mips_reg stride, \ 1704cabdff1aSopenharmony_ci const int16_t *src, int rnd) \ 1705cabdff1aSopenharmony_ci{ \ 1706cabdff1aSopenharmony_ci int h = 8; \ 1707cabdff1aSopenharmony_ci DECLARE_VAR_ALL64; \ 1708cabdff1aSopenharmony_ci DECLARE_VAR_ADDRT; \ 1709cabdff1aSopenharmony_ci \ 1710cabdff1aSopenharmony_ci src -= 1; \ 1711cabdff1aSopenharmony_ci rnd -= (-1+9+9-1)*1024; /* Add -1024 bias */ \ 1712cabdff1aSopenharmony_ci \ 1713cabdff1aSopenharmony_ci __asm__ volatile( \ 1714cabdff1aSopenharmony_ci LOAD_ROUNDER_MMI("%[rnd]") \ 1715cabdff1aSopenharmony_ci "1: \n\t" \ 1716cabdff1aSopenharmony_ci MMI_ULDC1($f2, %[src], 0x00) \ 1717cabdff1aSopenharmony_ci MMI_ULDC1($f4, %[src], 0x08) \ 1718cabdff1aSopenharmony_ci MMI_ULDC1($f6, %[src], 0x02) \ 1719cabdff1aSopenharmony_ci MMI_ULDC1($f8, %[src], 0x0a) \ 1720cabdff1aSopenharmony_ci MMI_ULDC1($f0, %[src], 0x06) \ 1721cabdff1aSopenharmony_ci "paddh $f2, $f2, $f0 \n\t" \ 1722cabdff1aSopenharmony_ci MMI_ULDC1($f0, %[src], 0x0e) \ 1723cabdff1aSopenharmony_ci "paddh $f4, $f4, $f0 \n\t" \ 1724cabdff1aSopenharmony_ci MMI_ULDC1($f0, %[src], 0x04) \ 1725cabdff1aSopenharmony_ci "paddh $f6, $f6, $f0 \n\t" \ 1726cabdff1aSopenharmony_ci MMI_ULDC1($f0, %[src], 0x0b) \ 1727cabdff1aSopenharmony_ci "paddh $f8, $f8, $f0 \n\t" \ 1728cabdff1aSopenharmony_ci "pmullh $f6, $f6, %[ff_pw_9] \n\t" \ 1729cabdff1aSopenharmony_ci "pmullh $f8, $f8, %[ff_pw_9] \n\t" \ 1730cabdff1aSopenharmony_ci "psubh $f6, $f6, $f2 \n\t" \ 1731cabdff1aSopenharmony_ci "psubh $f8, $f8, $f4 \n\t" \ 1732cabdff1aSopenharmony_ci "li $8, 0x07 \n\t" \ 1733cabdff1aSopenharmony_ci "mtc1 $8, $f16 \n\t" \ 1734cabdff1aSopenharmony_ci NORMALIZE_MMI("$f16") \ 1735cabdff1aSopenharmony_ci /* Remove bias */ \ 1736cabdff1aSopenharmony_ci "paddh $f6, $f6, %[ff_pw_128] \n\t" \ 1737cabdff1aSopenharmony_ci "paddh $f8, $f8, %[ff_pw_128] \n\t" \ 1738cabdff1aSopenharmony_ci TRANSFER_DO_PACK(OP) \ 1739cabdff1aSopenharmony_ci "addiu %[h], %[h], -0x01 \n\t" \ 1740cabdff1aSopenharmony_ci PTR_ADDIU "%[src], %[src], 0x18 \n\t" \ 1741cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[stride] \n\t" \ 1742cabdff1aSopenharmony_ci "bnez %[h], 1b \n\t" \ 1743cabdff1aSopenharmony_ci : RESTRICT_ASM_ALL64 RESTRICT_ASM_ADDRT \ 1744cabdff1aSopenharmony_ci [h]"+r"(h), \ 1745cabdff1aSopenharmony_ci [src]"+r"(src), [dst]"+r"(dst) \ 1746cabdff1aSopenharmony_ci : [stride]"r"(stride), [rnd]"m"(rnd), \ 1747cabdff1aSopenharmony_ci [ff_pw_9]"f"(ff_pw_9.f), [ff_pw_128]"f"(ff_pw_128.f) \ 1748cabdff1aSopenharmony_ci : "$8", "$f0", "$f2", "$f4", "$f6", "$f8", "$f14", \ 1749cabdff1aSopenharmony_ci "$f16", "memory" \ 1750cabdff1aSopenharmony_ci ); \ 1751cabdff1aSopenharmony_ci} 1752cabdff1aSopenharmony_ci 1753cabdff1aSopenharmony_ciVC1_HOR_16B_SHIFT2(OP_PUT, put_) 1754cabdff1aSopenharmony_ciVC1_HOR_16B_SHIFT2(OP_AVG, avg_) 1755cabdff1aSopenharmony_ci 1756cabdff1aSopenharmony_ci/** 1757cabdff1aSopenharmony_ci * Purely vertical or horizontal 1/2 shift interpolation. 1758cabdff1aSopenharmony_ci * Sacrify $f12 for *9 factor. 1759cabdff1aSopenharmony_ci */ 1760cabdff1aSopenharmony_ci#define VC1_SHIFT2(OP, OPNAME)\ 1761cabdff1aSopenharmony_cistatic void OPNAME ## vc1_shift2_mmi(uint8_t *dst, const uint8_t *src, \ 1762cabdff1aSopenharmony_ci mips_reg stride, int rnd, \ 1763cabdff1aSopenharmony_ci mips_reg offset) \ 1764cabdff1aSopenharmony_ci{ \ 1765cabdff1aSopenharmony_ci DECLARE_VAR_LOW32; \ 1766cabdff1aSopenharmony_ci DECLARE_VAR_ADDRT; \ 1767cabdff1aSopenharmony_ci \ 1768cabdff1aSopenharmony_ci rnd = 8 - rnd; \ 1769cabdff1aSopenharmony_ci \ 1770cabdff1aSopenharmony_ci __asm__ volatile( \ 1771cabdff1aSopenharmony_ci "pxor $f0, $f0, $f0 \n\t" \ 1772cabdff1aSopenharmony_ci "li $10, 0x08 \n\t" \ 1773cabdff1aSopenharmony_ci LOAD_ROUNDER_MMI("%[rnd]") \ 1774cabdff1aSopenharmony_ci "1: \n\t" \ 1775cabdff1aSopenharmony_ci MMI_ULWC1($f6, %[src], 0x00) \ 1776cabdff1aSopenharmony_ci MMI_ULWC1($f8, %[src], 0x04) \ 1777cabdff1aSopenharmony_ci PTR_ADDU "$9, %[src], %[offset] \n\t" \ 1778cabdff1aSopenharmony_ci MMI_ULWC1($f2, $9, 0x00) \ 1779cabdff1aSopenharmony_ci MMI_ULWC1($f4, $9, 0x04) \ 1780cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[offset] \n\t" \ 1781cabdff1aSopenharmony_ci "punpcklbh $f6, $f6, $f0 \n\t" \ 1782cabdff1aSopenharmony_ci "punpcklbh $f8, $f8, $f0 \n\t" \ 1783cabdff1aSopenharmony_ci "punpcklbh $f2, $f2, $f0 \n\t" \ 1784cabdff1aSopenharmony_ci "punpcklbh $f4, $f4, $f0 \n\t" \ 1785cabdff1aSopenharmony_ci "paddh $f6, $f6, $f2 \n\t" \ 1786cabdff1aSopenharmony_ci "paddh $f8, $f8, $f4 \n\t" \ 1787cabdff1aSopenharmony_ci PTR_ADDU "$9, %[src], %[offset_x2n] \n\t" \ 1788cabdff1aSopenharmony_ci MMI_ULWC1($f2, $9, 0x00) \ 1789cabdff1aSopenharmony_ci MMI_ULWC1($f4, $9, 0x04) \ 1790cabdff1aSopenharmony_ci "pmullh $f6, $f6, %[ff_pw_9] \n\t" /* 0,9,9,0*/ \ 1791cabdff1aSopenharmony_ci "pmullh $f8, $f8, %[ff_pw_9] \n\t" /* 0,9,9,0*/ \ 1792cabdff1aSopenharmony_ci "punpcklbh $f2, $f2, $f0 \n\t" \ 1793cabdff1aSopenharmony_ci "punpcklbh $f4, $f4, $f0 \n\t" \ 1794cabdff1aSopenharmony_ci "psubh $f6, $f6, $f2 \n\t" /*-1,9,9,0*/ \ 1795cabdff1aSopenharmony_ci "psubh $f8, $f8, $f4 \n\t" /*-1,9,9,0*/ \ 1796cabdff1aSopenharmony_ci PTR_ADDU "$9, %[src], %[offset] \n\t" \ 1797cabdff1aSopenharmony_ci MMI_ULWC1($f2, $9, 0x00) \ 1798cabdff1aSopenharmony_ci MMI_ULWC1($f4, $9, 0x04) \ 1799cabdff1aSopenharmony_ci "punpcklbh $f2, $f2, $f0 \n\t" \ 1800cabdff1aSopenharmony_ci "punpcklbh $f4, $f4, $f0 \n\t" \ 1801cabdff1aSopenharmony_ci "psubh $f6, $f6, $f2 \n\t" /*-1,9,9,-1*/ \ 1802cabdff1aSopenharmony_ci "psubh $f8, $f8, $f4 \n\t" /*-1,9,9,-1*/ \ 1803cabdff1aSopenharmony_ci "li $8, 0x04 \n\t" \ 1804cabdff1aSopenharmony_ci "mtc1 $8, $f16 \n\t" \ 1805cabdff1aSopenharmony_ci NORMALIZE_MMI("$f16") \ 1806cabdff1aSopenharmony_ci "packushb $f6, $f6, $f8 \n\t" \ 1807cabdff1aSopenharmony_ci OP((%[dst]), $f6) \ 1808cabdff1aSopenharmony_ci "sdc1 $f6, 0x00(%[dst]) \n\t" \ 1809cabdff1aSopenharmony_ci "addiu $10, $10, -0x01 \n\t" \ 1810cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[stride1] \n\t" \ 1811cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[stride] \n\t" \ 1812cabdff1aSopenharmony_ci "bnez $10, 1b \n\t" \ 1813cabdff1aSopenharmony_ci : RESTRICT_ASM_LOW32 RESTRICT_ASM_ADDRT \ 1814cabdff1aSopenharmony_ci [src]"+r"(src), [dst]"+r"(dst) \ 1815cabdff1aSopenharmony_ci : [offset]"r"(offset), [offset_x2n]"r"(-2*offset), \ 1816cabdff1aSopenharmony_ci [stride]"r"(stride), [rnd]"m"(rnd), \ 1817cabdff1aSopenharmony_ci [stride1]"r"(stride-offset), \ 1818cabdff1aSopenharmony_ci [ff_pw_9]"f"(ff_pw_9.f) \ 1819cabdff1aSopenharmony_ci : "$8", "$9", "$10", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10", \ 1820cabdff1aSopenharmony_ci "$f14", "$f16", "memory" \ 1821cabdff1aSopenharmony_ci ); \ 1822cabdff1aSopenharmony_ci} 1823cabdff1aSopenharmony_ci 1824cabdff1aSopenharmony_ciVC1_SHIFT2(OP_PUT, put_) 1825cabdff1aSopenharmony_ciVC1_SHIFT2(OP_AVG, avg_) 1826cabdff1aSopenharmony_ci 1827cabdff1aSopenharmony_ci/** 1828cabdff1aSopenharmony_ci * Core of the 1/4 and 3/4 shift bicubic interpolation. 1829cabdff1aSopenharmony_ci * 1830cabdff1aSopenharmony_ci * @param UNPACK Macro unpacking arguments from 8 to 16bits (can be empty). 1831cabdff1aSopenharmony_ci * @param LOAD "MMI_ULWC1" or "MMI_ULDC1", if data read is already unpacked. 1832cabdff1aSopenharmony_ci * @param M "1" for MMI_ULWC1, "2" for MMI_ULDC1. 1833cabdff1aSopenharmony_ci * @param A1 Stride address of 1st tap (beware of unpacked/packed). 1834cabdff1aSopenharmony_ci * @param A2 Stride address of 2nd tap 1835cabdff1aSopenharmony_ci * @param A3 Stride address of 3rd tap 1836cabdff1aSopenharmony_ci * @param A4 Stride address of 4th tap 1837cabdff1aSopenharmony_ci */ 1838cabdff1aSopenharmony_ci#define MSPEL_FILTER13_CORE(UNPACK, LOAD, M, A1, A2, A3, A4) \ 1839cabdff1aSopenharmony_ci PTR_ADDU "$9, %[src], "#A1" \n\t" \ 1840cabdff1aSopenharmony_ci LOAD($f2, $9, M*0) \ 1841cabdff1aSopenharmony_ci LOAD($f4, $9, M*4) \ 1842cabdff1aSopenharmony_ci UNPACK("$f2") \ 1843cabdff1aSopenharmony_ci UNPACK("$f4") \ 1844cabdff1aSopenharmony_ci "pmullh $f2, $f2, %[ff_pw_3] \n\t" \ 1845cabdff1aSopenharmony_ci "pmullh $f4, $f4, %[ff_pw_3] \n\t" \ 1846cabdff1aSopenharmony_ci PTR_ADDU "$9, %[src], "#A2" \n\t" \ 1847cabdff1aSopenharmony_ci LOAD($f6, $9, M*0) \ 1848cabdff1aSopenharmony_ci LOAD($f8, $9, M*4) \ 1849cabdff1aSopenharmony_ci UNPACK("$f6") \ 1850cabdff1aSopenharmony_ci UNPACK("$f8") \ 1851cabdff1aSopenharmony_ci "pmullh $f6, $f6, %[ff_pw_18] \n\t" /* *18 */ \ 1852cabdff1aSopenharmony_ci "pmullh $f8, $f8, %[ff_pw_18] \n\t" /* *18 */ \ 1853cabdff1aSopenharmony_ci "psubh $f6, $f6, $f2 \n\t" /* *18, -3 */ \ 1854cabdff1aSopenharmony_ci "psubh $f8, $f8, $f4 \n\t" /* *18, -3 */ \ 1855cabdff1aSopenharmony_ci PTR_ADDU "$9, %[src], "#A4" \n\t" \ 1856cabdff1aSopenharmony_ci LOAD($f2, $9, M*0) \ 1857cabdff1aSopenharmony_ci LOAD($f4, $9, M*4) \ 1858cabdff1aSopenharmony_ci UNPACK("$f2") \ 1859cabdff1aSopenharmony_ci UNPACK("$f4") \ 1860cabdff1aSopenharmony_ci "li $8, 0x02 \n\t" \ 1861cabdff1aSopenharmony_ci "mtc1 $8, $f16 \n\t" \ 1862cabdff1aSopenharmony_ci "psllh $f2, $f2, $f16 \n\t" /* 4* */ \ 1863cabdff1aSopenharmony_ci "psllh $f4, $f4, $f16 \n\t" /* 4* */ \ 1864cabdff1aSopenharmony_ci "psubh $f6, $f6, $f2 \n\t" /* -4,18,-3 */ \ 1865cabdff1aSopenharmony_ci "psubh $f8, $f8, $f4 \n\t" /* -4,18,-3 */ \ 1866cabdff1aSopenharmony_ci PTR_ADDU "$9, %[src], "#A3" \n\t" \ 1867cabdff1aSopenharmony_ci LOAD($f2, $9, M*0) \ 1868cabdff1aSopenharmony_ci LOAD($f4, $9, M*4) \ 1869cabdff1aSopenharmony_ci UNPACK("$f2") \ 1870cabdff1aSopenharmony_ci UNPACK("$f4") \ 1871cabdff1aSopenharmony_ci "pmullh $f2, $f2, %[ff_pw_53] \n\t" /* *53 */ \ 1872cabdff1aSopenharmony_ci "pmullh $f4, $f4, %[ff_pw_53] \n\t" /* *53 */ \ 1873cabdff1aSopenharmony_ci "paddh $f6, $f6, $f2 \n\t" /* 4,53,18,-3 */ \ 1874cabdff1aSopenharmony_ci "paddh $f8, $f8, $f4 \n\t" /* 4,53,18,-3 */ 1875cabdff1aSopenharmony_ci 1876cabdff1aSopenharmony_ci/** 1877cabdff1aSopenharmony_ci * Macro to build the vertical 16bits version of vc1_put_shift[13]. 1878cabdff1aSopenharmony_ci * Here, offset=src_stride. Parameters passed A1 to A4 must use 1879cabdff1aSopenharmony_ci * %3 (src_stride), %4 (2*src_stride) and %5 (3*src_stride). 1880cabdff1aSopenharmony_ci * 1881cabdff1aSopenharmony_ci * @param NAME Either 1 or 3 1882cabdff1aSopenharmony_ci * @see MSPEL_FILTER13_CORE for information on A1->A4 1883cabdff1aSopenharmony_ci */ 1884cabdff1aSopenharmony_ci#define MSPEL_FILTER13_VER_16B(NAME, A1, A2, A3, A4) \ 1885cabdff1aSopenharmony_cistatic void \ 1886cabdff1aSopenharmony_civc1_put_ver_16b_ ## NAME ## _mmi(int16_t *dst, const uint8_t *src, \ 1887cabdff1aSopenharmony_ci mips_reg src_stride, \ 1888cabdff1aSopenharmony_ci int rnd, int64_t shift) \ 1889cabdff1aSopenharmony_ci{ \ 1890cabdff1aSopenharmony_ci int h = 8; \ 1891cabdff1aSopenharmony_ci union mmi_intfloat64 shift_u; \ 1892cabdff1aSopenharmony_ci DECLARE_VAR_LOW32; \ 1893cabdff1aSopenharmony_ci DECLARE_VAR_ADDRT; \ 1894cabdff1aSopenharmony_ci shift_u.i = shift; \ 1895cabdff1aSopenharmony_ci \ 1896cabdff1aSopenharmony_ci src -= src_stride; \ 1897cabdff1aSopenharmony_ci \ 1898cabdff1aSopenharmony_ci __asm__ volatile( \ 1899cabdff1aSopenharmony_ci "pxor $f0, $f0, $f0 \n\t" \ 1900cabdff1aSopenharmony_ci LOAD_ROUNDER_MMI("%[rnd]") \ 1901cabdff1aSopenharmony_ci ".p2align 3 \n\t" \ 1902cabdff1aSopenharmony_ci "1: \n\t" \ 1903cabdff1aSopenharmony_ci MSPEL_FILTER13_CORE(DO_UNPACK, MMI_ULWC1, 1, A1, A2, A3, A4) \ 1904cabdff1aSopenharmony_ci NORMALIZE_MMI("%[shift]") \ 1905cabdff1aSopenharmony_ci TRANSFER_DONT_PACK(OP_PUT) \ 1906cabdff1aSopenharmony_ci /* Last 3 (in fact 4) bytes on the line */ \ 1907cabdff1aSopenharmony_ci PTR_ADDU "$9, %[src], "#A1" \n\t" \ 1908cabdff1aSopenharmony_ci MMI_ULWC1($f2, $9, 0x08) \ 1909cabdff1aSopenharmony_ci DO_UNPACK("$f2") \ 1910cabdff1aSopenharmony_ci "mov.d $f6, $f2 \n\t" \ 1911cabdff1aSopenharmony_ci "paddh $f2, $f2, $f2 \n\t" \ 1912cabdff1aSopenharmony_ci "paddh $f2, $f2, $f6 \n\t" /* 3* */ \ 1913cabdff1aSopenharmony_ci PTR_ADDU "$9, %[src], "#A2" \n\t" \ 1914cabdff1aSopenharmony_ci MMI_ULWC1($f6, $9, 0x08) \ 1915cabdff1aSopenharmony_ci DO_UNPACK("$f6") \ 1916cabdff1aSopenharmony_ci "pmullh $f6, $f6, %[ff_pw_18] \n\t" /* *18 */ \ 1917cabdff1aSopenharmony_ci "psubh $f6, $f6, $f2 \n\t" /* *18,-3 */ \ 1918cabdff1aSopenharmony_ci PTR_ADDU "$9, %[src], "#A3" \n\t" \ 1919cabdff1aSopenharmony_ci MMI_ULWC1($f2, $9, 0x08) \ 1920cabdff1aSopenharmony_ci DO_UNPACK("$f2") \ 1921cabdff1aSopenharmony_ci "pmullh $f2, $f2, %[ff_pw_53] \n\t" /* *53 */ \ 1922cabdff1aSopenharmony_ci "paddh $f6, $f6, $f2 \n\t" /* *53,18,-3 */ \ 1923cabdff1aSopenharmony_ci PTR_ADDU "$9, %[src], "#A4" \n\t" \ 1924cabdff1aSopenharmony_ci MMI_ULWC1($f2, $9, 0x08) \ 1925cabdff1aSopenharmony_ci DO_UNPACK("$f2") \ 1926cabdff1aSopenharmony_ci "li $8, 0x02 \n\t" \ 1927cabdff1aSopenharmony_ci "mtc1 $8, $f16 \n\t" \ 1928cabdff1aSopenharmony_ci "psllh $f2, $f2, $f16 \n\t" /* 4* */ \ 1929cabdff1aSopenharmony_ci "psubh $f6, $f6, $f2 \n\t" \ 1930cabdff1aSopenharmony_ci "paddh $f6, $f6, $f14 \n\t" \ 1931cabdff1aSopenharmony_ci "li $8, 0x06 \n\t" \ 1932cabdff1aSopenharmony_ci "mtc1 $8, $f16 \n\t" \ 1933cabdff1aSopenharmony_ci "psrah $f6, $f6, $f16 \n\t" \ 1934cabdff1aSopenharmony_ci "sdc1 $f6, 0x10(%[dst]) \n\t" \ 1935cabdff1aSopenharmony_ci "addiu %[h], %[h], -0x01 \n\t" \ 1936cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[stride_x1] \n\t" \ 1937cabdff1aSopenharmony_ci PTR_ADDIU "%[dst], %[dst], 0x18 \n\t" \ 1938cabdff1aSopenharmony_ci "bnez %[h], 1b \n\t" \ 1939cabdff1aSopenharmony_ci : RESTRICT_ASM_LOW32 RESTRICT_ASM_ADDRT \ 1940cabdff1aSopenharmony_ci [h]"+r"(h), \ 1941cabdff1aSopenharmony_ci [src]"+r"(src), [dst]"+r"(dst) \ 1942cabdff1aSopenharmony_ci : [stride_x1]"r"(src_stride), [stride_x2]"r"(2*src_stride), \ 1943cabdff1aSopenharmony_ci [stride_x3]"r"(3*src_stride), \ 1944cabdff1aSopenharmony_ci [rnd]"m"(rnd), [shift]"f"(shift_u.f), \ 1945cabdff1aSopenharmony_ci [ff_pw_53]"f"(ff_pw_53.f), [ff_pw_18]"f"(ff_pw_18.f), \ 1946cabdff1aSopenharmony_ci [ff_pw_3]"f"(ff_pw_3.f) \ 1947cabdff1aSopenharmony_ci : "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8", \ 1948cabdff1aSopenharmony_ci "$f14", "$f16", "memory" \ 1949cabdff1aSopenharmony_ci ); \ 1950cabdff1aSopenharmony_ci} 1951cabdff1aSopenharmony_ci 1952cabdff1aSopenharmony_ci/** 1953cabdff1aSopenharmony_ci * Macro to build the horizontal 16bits version of vc1_put_shift[13]. 1954cabdff1aSopenharmony_ci * Here, offset=16bits, so parameters passed A1 to A4 should be simple. 1955cabdff1aSopenharmony_ci * 1956cabdff1aSopenharmony_ci * @param NAME Either 1 or 3 1957cabdff1aSopenharmony_ci * @see MSPEL_FILTER13_CORE for information on A1->A4 1958cabdff1aSopenharmony_ci */ 1959cabdff1aSopenharmony_ci#define MSPEL_FILTER13_HOR_16B(NAME, A1, A2, A3, A4, OP, OPNAME) \ 1960cabdff1aSopenharmony_cistatic void \ 1961cabdff1aSopenharmony_ciOPNAME ## vc1_hor_16b_ ## NAME ## _mmi(uint8_t *dst, mips_reg stride, \ 1962cabdff1aSopenharmony_ci const int16_t *src, int rnd) \ 1963cabdff1aSopenharmony_ci{ \ 1964cabdff1aSopenharmony_ci int h = 8; \ 1965cabdff1aSopenharmony_ci DECLARE_VAR_ALL64; \ 1966cabdff1aSopenharmony_ci DECLARE_VAR_ADDRT; \ 1967cabdff1aSopenharmony_ci \ 1968cabdff1aSopenharmony_ci src -= 1; \ 1969cabdff1aSopenharmony_ci rnd -= (-4+58+13-3)*256; /* Add -256 bias */ \ 1970cabdff1aSopenharmony_ci \ 1971cabdff1aSopenharmony_ci __asm__ volatile( \ 1972cabdff1aSopenharmony_ci "pxor $f0, $f0, $f0 \n\t" \ 1973cabdff1aSopenharmony_ci LOAD_ROUNDER_MMI("%[rnd]") \ 1974cabdff1aSopenharmony_ci ".p2align 3 \n\t" \ 1975cabdff1aSopenharmony_ci "1: \n\t" \ 1976cabdff1aSopenharmony_ci MSPEL_FILTER13_CORE(DONT_UNPACK, MMI_ULDC1, 2, A1, A2, A3, A4) \ 1977cabdff1aSopenharmony_ci "li $8, 0x07 \n\t" \ 1978cabdff1aSopenharmony_ci "mtc1 $8, $f16 \n\t" \ 1979cabdff1aSopenharmony_ci NORMALIZE_MMI("$f16") \ 1980cabdff1aSopenharmony_ci /* Remove bias */ \ 1981cabdff1aSopenharmony_ci "paddh $f6, $f6, %[ff_pw_128] \n\t" \ 1982cabdff1aSopenharmony_ci "paddh $f8, $f8, %[ff_pw_128] \n\t" \ 1983cabdff1aSopenharmony_ci TRANSFER_DO_PACK(OP) \ 1984cabdff1aSopenharmony_ci "addiu %[h], %[h], -0x01 \n\t" \ 1985cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], 0x18 \n\t" \ 1986cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[stride] \n\t" \ 1987cabdff1aSopenharmony_ci "bnez %[h], 1b \n\t" \ 1988cabdff1aSopenharmony_ci : RESTRICT_ASM_ALL64 RESTRICT_ASM_ADDRT \ 1989cabdff1aSopenharmony_ci [h]"+r"(h), \ 1990cabdff1aSopenharmony_ci [src]"+r"(src), [dst]"+r"(dst) \ 1991cabdff1aSopenharmony_ci : [stride]"r"(stride), [rnd]"m"(rnd), \ 1992cabdff1aSopenharmony_ci [ff_pw_53]"f"(ff_pw_53.f), [ff_pw_18]"f"(ff_pw_18.f), \ 1993cabdff1aSopenharmony_ci [ff_pw_3]"f"(ff_pw_3.f), [ff_pw_128]"f"(ff_pw_128.f) \ 1994cabdff1aSopenharmony_ci : "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8", \ 1995cabdff1aSopenharmony_ci "$f14", "$f16", "memory" \ 1996cabdff1aSopenharmony_ci ); \ 1997cabdff1aSopenharmony_ci} 1998cabdff1aSopenharmony_ci 1999cabdff1aSopenharmony_ci/** 2000cabdff1aSopenharmony_ci * Macro to build the 8bits, any direction, version of vc1_put_shift[13]. 2001cabdff1aSopenharmony_ci * Here, offset=src_stride. Parameters passed A1 to A4 must use 2002cabdff1aSopenharmony_ci * %3 (offset), %4 (2*offset) and %5 (3*offset). 2003cabdff1aSopenharmony_ci * 2004cabdff1aSopenharmony_ci * @param NAME Either 1 or 3 2005cabdff1aSopenharmony_ci * @see MSPEL_FILTER13_CORE for information on A1->A4 2006cabdff1aSopenharmony_ci */ 2007cabdff1aSopenharmony_ci#define MSPEL_FILTER13_8B(NAME, A1, A2, A3, A4, OP, OPNAME) \ 2008cabdff1aSopenharmony_cistatic void \ 2009cabdff1aSopenharmony_ciOPNAME ## vc1_## NAME ## _mmi(uint8_t *dst, const uint8_t *src, \ 2010cabdff1aSopenharmony_ci mips_reg stride, int rnd, mips_reg offset) \ 2011cabdff1aSopenharmony_ci{ \ 2012cabdff1aSopenharmony_ci int h = 8; \ 2013cabdff1aSopenharmony_ci DECLARE_VAR_LOW32; \ 2014cabdff1aSopenharmony_ci DECLARE_VAR_ADDRT; \ 2015cabdff1aSopenharmony_ci \ 2016cabdff1aSopenharmony_ci src -= offset; \ 2017cabdff1aSopenharmony_ci rnd = 32-rnd; \ 2018cabdff1aSopenharmony_ci \ 2019cabdff1aSopenharmony_ci __asm__ volatile ( \ 2020cabdff1aSopenharmony_ci "pxor $f0, $f0, $f0 \n\t" \ 2021cabdff1aSopenharmony_ci LOAD_ROUNDER_MMI("%[rnd]") \ 2022cabdff1aSopenharmony_ci ".p2align 3 \n\t" \ 2023cabdff1aSopenharmony_ci "1: \n\t" \ 2024cabdff1aSopenharmony_ci MSPEL_FILTER13_CORE(DO_UNPACK, MMI_ULWC1, 1, A1, A2, A3, A4) \ 2025cabdff1aSopenharmony_ci "li $8, 0x06 \n\t" \ 2026cabdff1aSopenharmony_ci "mtc1 $8, $f16 \n\t" \ 2027cabdff1aSopenharmony_ci NORMALIZE_MMI("$f16") \ 2028cabdff1aSopenharmony_ci TRANSFER_DO_PACK(OP) \ 2029cabdff1aSopenharmony_ci "addiu %[h], %[h], -0x01 \n\t" \ 2030cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[stride] \n\t" \ 2031cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[stride] \n\t" \ 2032cabdff1aSopenharmony_ci "bnez %[h], 1b \n\t" \ 2033cabdff1aSopenharmony_ci : RESTRICT_ASM_LOW32 RESTRICT_ASM_ADDRT \ 2034cabdff1aSopenharmony_ci [h]"+r"(h), \ 2035cabdff1aSopenharmony_ci [src]"+r"(src), [dst]"+r"(dst) \ 2036cabdff1aSopenharmony_ci : [offset_x1]"r"(offset), [offset_x2]"r"(2*offset), \ 2037cabdff1aSopenharmony_ci [offset_x3]"r"(3*offset), [stride]"r"(stride), \ 2038cabdff1aSopenharmony_ci [rnd]"m"(rnd), \ 2039cabdff1aSopenharmony_ci [ff_pw_53]"f"(ff_pw_53.f), [ff_pw_18]"f"(ff_pw_18.f), \ 2040cabdff1aSopenharmony_ci [ff_pw_3]"f"(ff_pw_3.f) \ 2041cabdff1aSopenharmony_ci : "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8", \ 2042cabdff1aSopenharmony_ci "$f14", "$f16", "memory" \ 2043cabdff1aSopenharmony_ci ); \ 2044cabdff1aSopenharmony_ci} 2045cabdff1aSopenharmony_ci 2046cabdff1aSopenharmony_ci 2047cabdff1aSopenharmony_ci/** 1/4 shift bicubic interpolation */ 2048cabdff1aSopenharmony_ciMSPEL_FILTER13_8B(shift1, %[offset_x3], %[offset_x2], %[offset_x1], $0, OP_PUT, put_) 2049cabdff1aSopenharmony_ciMSPEL_FILTER13_8B(shift1, %[offset_x3], %[offset_x2], %[offset_x1], $0, OP_AVG, avg_) 2050cabdff1aSopenharmony_ciMSPEL_FILTER13_VER_16B(shift1, %[stride_x3], %[stride_x2], %[stride_x1], $0) 2051cabdff1aSopenharmony_ciMSPEL_FILTER13_HOR_16B(shift1, 6, 4, 2, 0, OP_PUT, put_) 2052cabdff1aSopenharmony_ciMSPEL_FILTER13_HOR_16B(shift1, 6, 4, 2, 0, OP_AVG, avg_) 2053cabdff1aSopenharmony_ci 2054cabdff1aSopenharmony_ci/** 3/4 shift bicubic interpolation */ 2055cabdff1aSopenharmony_ciMSPEL_FILTER13_8B(shift3, $0, %[offset_x1], %[offset_x2], %[offset_x3], OP_PUT, put_) 2056cabdff1aSopenharmony_ciMSPEL_FILTER13_8B(shift3, $0, %[offset_x1], %[offset_x2], %[offset_x3], OP_AVG, avg_) 2057cabdff1aSopenharmony_ciMSPEL_FILTER13_VER_16B(shift3, $0, %[stride_x1], %[stride_x2], %[stride_x3]) 2058cabdff1aSopenharmony_ciMSPEL_FILTER13_HOR_16B(shift3, 0, 2, 4, 6, OP_PUT, put_) 2059cabdff1aSopenharmony_ciMSPEL_FILTER13_HOR_16B(shift3, 0, 2, 4, 6, OP_AVG, avg_) 2060cabdff1aSopenharmony_ci 2061cabdff1aSopenharmony_citypedef void (*vc1_mspel_mc_filter_ver_16bits) 2062cabdff1aSopenharmony_ci (int16_t *dst, const uint8_t *src, mips_reg src_stride, int rnd, 2063cabdff1aSopenharmony_ci int64_t shift); 2064cabdff1aSopenharmony_citypedef void (*vc1_mspel_mc_filter_hor_16bits) 2065cabdff1aSopenharmony_ci (uint8_t *dst, mips_reg dst_stride, const int16_t *src, int rnd); 2066cabdff1aSopenharmony_citypedef void (*vc1_mspel_mc_filter_8bits) 2067cabdff1aSopenharmony_ci (uint8_t *dst, const uint8_t *src, mips_reg stride, int rnd, 2068cabdff1aSopenharmony_ci mips_reg offset); 2069cabdff1aSopenharmony_ci 2070cabdff1aSopenharmony_ci/** 2071cabdff1aSopenharmony_ci * Interpolate fractional pel values by applying proper vertical then 2072cabdff1aSopenharmony_ci * horizontal filter. 2073cabdff1aSopenharmony_ci * 2074cabdff1aSopenharmony_ci * @param dst Destination buffer for interpolated pels. 2075cabdff1aSopenharmony_ci * @param src Source buffer. 2076cabdff1aSopenharmony_ci * @param stride Stride for both src and dst buffers. 2077cabdff1aSopenharmony_ci * @param hmode Horizontal filter (expressed in quarter pixels shift). 2078cabdff1aSopenharmony_ci * @param hmode Vertical filter. 2079cabdff1aSopenharmony_ci * @param rnd Rounding bias. 2080cabdff1aSopenharmony_ci */ 2081cabdff1aSopenharmony_ci#define VC1_MSPEL_MC(OP) \ 2082cabdff1aSopenharmony_cistatic void OP ## vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride,\ 2083cabdff1aSopenharmony_ci int hmode, int vmode, int rnd) \ 2084cabdff1aSopenharmony_ci{ \ 2085cabdff1aSopenharmony_ci static const vc1_mspel_mc_filter_ver_16bits vc1_put_shift_ver_16bits[] =\ 2086cabdff1aSopenharmony_ci { NULL, vc1_put_ver_16b_shift1_mmi, \ 2087cabdff1aSopenharmony_ci vc1_put_ver_16b_shift2_mmi, \ 2088cabdff1aSopenharmony_ci vc1_put_ver_16b_shift3_mmi }; \ 2089cabdff1aSopenharmony_ci static const vc1_mspel_mc_filter_hor_16bits vc1_put_shift_hor_16bits[] =\ 2090cabdff1aSopenharmony_ci { NULL, OP ## vc1_hor_16b_shift1_mmi, \ 2091cabdff1aSopenharmony_ci OP ## vc1_hor_16b_shift2_mmi, \ 2092cabdff1aSopenharmony_ci OP ## vc1_hor_16b_shift3_mmi }; \ 2093cabdff1aSopenharmony_ci static const vc1_mspel_mc_filter_8bits vc1_put_shift_8bits[] = \ 2094cabdff1aSopenharmony_ci { NULL, OP ## vc1_shift1_mmi, \ 2095cabdff1aSopenharmony_ci OP ## vc1_shift2_mmi, \ 2096cabdff1aSopenharmony_ci OP ## vc1_shift3_mmi }; \ 2097cabdff1aSopenharmony_ci \ 2098cabdff1aSopenharmony_ci if (vmode) { /* Vertical filter to apply */ \ 2099cabdff1aSopenharmony_ci if (hmode) { /* Horizontal filter to apply, output to tmp */ \ 2100cabdff1aSopenharmony_ci static const int shift_value[] = { 0, 5, 1, 5 }; \ 2101cabdff1aSopenharmony_ci int shift = (shift_value[hmode]+shift_value[vmode])>>1; \ 2102cabdff1aSopenharmony_ci int r; \ 2103cabdff1aSopenharmony_ci LOCAL_ALIGNED(16, int16_t, tmp, [12*8]); \ 2104cabdff1aSopenharmony_ci \ 2105cabdff1aSopenharmony_ci r = (1<<(shift-1)) + rnd-1; \ 2106cabdff1aSopenharmony_ci vc1_put_shift_ver_16bits[vmode](tmp, src-1, stride, r, shift); \ 2107cabdff1aSopenharmony_ci \ 2108cabdff1aSopenharmony_ci vc1_put_shift_hor_16bits[hmode](dst, stride, tmp+1, 64-rnd); \ 2109cabdff1aSopenharmony_ci return; \ 2110cabdff1aSopenharmony_ci } \ 2111cabdff1aSopenharmony_ci else { /* No horizontal filter, output 8 lines to dst */ \ 2112cabdff1aSopenharmony_ci vc1_put_shift_8bits[vmode](dst, src, stride, 1-rnd, stride); \ 2113cabdff1aSopenharmony_ci return; \ 2114cabdff1aSopenharmony_ci } \ 2115cabdff1aSopenharmony_ci } \ 2116cabdff1aSopenharmony_ci \ 2117cabdff1aSopenharmony_ci /* Horizontal mode with no vertical mode */ \ 2118cabdff1aSopenharmony_ci vc1_put_shift_8bits[hmode](dst, src, stride, rnd, 1); \ 2119cabdff1aSopenharmony_ci} \ 2120cabdff1aSopenharmony_cistatic void OP ## vc1_mspel_mc_16(uint8_t *dst, const uint8_t *src, \ 2121cabdff1aSopenharmony_ci int stride, int hmode, int vmode, int rnd)\ 2122cabdff1aSopenharmony_ci{ \ 2123cabdff1aSopenharmony_ci OP ## vc1_mspel_mc(dst + 0, src + 0, stride, hmode, vmode, rnd); \ 2124cabdff1aSopenharmony_ci OP ## vc1_mspel_mc(dst + 8, src + 8, stride, hmode, vmode, rnd); \ 2125cabdff1aSopenharmony_ci dst += 8*stride; src += 8*stride; \ 2126cabdff1aSopenharmony_ci OP ## vc1_mspel_mc(dst + 0, src + 0, stride, hmode, vmode, rnd); \ 2127cabdff1aSopenharmony_ci OP ## vc1_mspel_mc(dst + 8, src + 8, stride, hmode, vmode, rnd); \ 2128cabdff1aSopenharmony_ci} 2129cabdff1aSopenharmony_ci 2130cabdff1aSopenharmony_ciVC1_MSPEL_MC(put_) 2131cabdff1aSopenharmony_ciVC1_MSPEL_MC(avg_) 2132cabdff1aSopenharmony_ci 2133cabdff1aSopenharmony_ci/** Macro to ease bicubic filter interpolation functions declarations */ 2134cabdff1aSopenharmony_ci#define DECLARE_FUNCTION(a, b) \ 2135cabdff1aSopenharmony_civoid ff_put_vc1_mspel_mc ## a ## b ## _mmi(uint8_t *dst, \ 2136cabdff1aSopenharmony_ci const uint8_t *src, \ 2137cabdff1aSopenharmony_ci ptrdiff_t stride, \ 2138cabdff1aSopenharmony_ci int rnd) \ 2139cabdff1aSopenharmony_ci{ \ 2140cabdff1aSopenharmony_ci put_vc1_mspel_mc(dst, src, stride, a, b, rnd); \ 2141cabdff1aSopenharmony_ci} \ 2142cabdff1aSopenharmony_civoid ff_avg_vc1_mspel_mc ## a ## b ## _mmi(uint8_t *dst, \ 2143cabdff1aSopenharmony_ci const uint8_t *src, \ 2144cabdff1aSopenharmony_ci ptrdiff_t stride, \ 2145cabdff1aSopenharmony_ci int rnd) \ 2146cabdff1aSopenharmony_ci{ \ 2147cabdff1aSopenharmony_ci avg_vc1_mspel_mc(dst, src, stride, a, b, rnd); \ 2148cabdff1aSopenharmony_ci} \ 2149cabdff1aSopenharmony_civoid ff_put_vc1_mspel_mc ## a ## b ## _16_mmi(uint8_t *dst, \ 2150cabdff1aSopenharmony_ci const uint8_t *src, \ 2151cabdff1aSopenharmony_ci ptrdiff_t stride, \ 2152cabdff1aSopenharmony_ci int rnd) \ 2153cabdff1aSopenharmony_ci{ \ 2154cabdff1aSopenharmony_ci put_vc1_mspel_mc_16(dst, src, stride, a, b, rnd); \ 2155cabdff1aSopenharmony_ci} \ 2156cabdff1aSopenharmony_civoid ff_avg_vc1_mspel_mc ## a ## b ## _16_mmi(uint8_t *dst, \ 2157cabdff1aSopenharmony_ci const uint8_t *src, \ 2158cabdff1aSopenharmony_ci ptrdiff_t stride, \ 2159cabdff1aSopenharmony_ci int rnd) \ 2160cabdff1aSopenharmony_ci{ \ 2161cabdff1aSopenharmony_ci avg_vc1_mspel_mc_16(dst, src, stride, a, b, rnd); \ 2162cabdff1aSopenharmony_ci} 2163cabdff1aSopenharmony_ci 2164cabdff1aSopenharmony_ciDECLARE_FUNCTION(0, 1) 2165cabdff1aSopenharmony_ciDECLARE_FUNCTION(0, 2) 2166cabdff1aSopenharmony_ciDECLARE_FUNCTION(0, 3) 2167cabdff1aSopenharmony_ci 2168cabdff1aSopenharmony_ciDECLARE_FUNCTION(1, 0) 2169cabdff1aSopenharmony_ciDECLARE_FUNCTION(1, 1) 2170cabdff1aSopenharmony_ciDECLARE_FUNCTION(1, 2) 2171cabdff1aSopenharmony_ciDECLARE_FUNCTION(1, 3) 2172cabdff1aSopenharmony_ci 2173cabdff1aSopenharmony_ciDECLARE_FUNCTION(2, 0) 2174cabdff1aSopenharmony_ciDECLARE_FUNCTION(2, 1) 2175cabdff1aSopenharmony_ciDECLARE_FUNCTION(2, 2) 2176cabdff1aSopenharmony_ciDECLARE_FUNCTION(2, 3) 2177cabdff1aSopenharmony_ci 2178cabdff1aSopenharmony_ciDECLARE_FUNCTION(3, 0) 2179cabdff1aSopenharmony_ciDECLARE_FUNCTION(3, 1) 2180cabdff1aSopenharmony_ciDECLARE_FUNCTION(3, 2) 2181cabdff1aSopenharmony_ciDECLARE_FUNCTION(3, 3) 2182cabdff1aSopenharmony_ci 2183cabdff1aSopenharmony_ci#define CHROMA_MC_8_MMI \ 2184cabdff1aSopenharmony_ci "punpckhbh %[ftmp5], %[ftmp1], %[ftmp0] \n\t" \ 2185cabdff1aSopenharmony_ci "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" \ 2186cabdff1aSopenharmony_ci "punpckhbh %[ftmp6], %[ftmp2], %[ftmp0] \n\t" \ 2187cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" \ 2188cabdff1aSopenharmony_ci "punpckhbh %[ftmp7], %[ftmp3], %[ftmp0] \n\t" \ 2189cabdff1aSopenharmony_ci "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" \ 2190cabdff1aSopenharmony_ci "punpckhbh %[ftmp8], %[ftmp4], %[ftmp0] \n\t" \ 2191cabdff1aSopenharmony_ci "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" \ 2192cabdff1aSopenharmony_ci \ 2193cabdff1aSopenharmony_ci "pmullh %[ftmp1], %[ftmp1], %[A] \n\t" \ 2194cabdff1aSopenharmony_ci "pmullh %[ftmp5], %[ftmp5], %[A] \n\t" \ 2195cabdff1aSopenharmony_ci "pmullh %[ftmp2], %[ftmp2], %[B] \n\t" \ 2196cabdff1aSopenharmony_ci "pmullh %[ftmp6], %[ftmp6], %[B] \n\t" \ 2197cabdff1aSopenharmony_ci "pmullh %[ftmp3], %[ftmp3], %[C] \n\t" \ 2198cabdff1aSopenharmony_ci "pmullh %[ftmp7], %[ftmp7], %[C] \n\t" \ 2199cabdff1aSopenharmony_ci "pmullh %[ftmp4], %[ftmp4], %[D] \n\t" \ 2200cabdff1aSopenharmony_ci "pmullh %[ftmp8], %[ftmp8], %[D] \n\t" \ 2201cabdff1aSopenharmony_ci \ 2202cabdff1aSopenharmony_ci "paddh %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \ 2203cabdff1aSopenharmony_ci "paddh %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \ 2204cabdff1aSopenharmony_ci "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \ 2205cabdff1aSopenharmony_ci "paddh %[ftmp1], %[ftmp1], %[ff_pw_28] \n\t" \ 2206cabdff1aSopenharmony_ci \ 2207cabdff1aSopenharmony_ci "paddh %[ftmp5], %[ftmp5], %[ftmp6] \n\t" \ 2208cabdff1aSopenharmony_ci "paddh %[ftmp7], %[ftmp7], %[ftmp8] \n\t" \ 2209cabdff1aSopenharmony_ci "paddh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" \ 2210cabdff1aSopenharmony_ci "paddh %[ftmp5], %[ftmp5], %[ff_pw_28] \n\t" \ 2211cabdff1aSopenharmony_ci \ 2212cabdff1aSopenharmony_ci "psrlh %[ftmp1], %[ftmp1], %[ftmp9] \n\t" \ 2213cabdff1aSopenharmony_ci "psrlh %[ftmp5], %[ftmp5], %[ftmp9] \n\t" \ 2214cabdff1aSopenharmony_ci "packushb %[ftmp1], %[ftmp1], %[ftmp5] \n\t" 2215cabdff1aSopenharmony_ci 2216cabdff1aSopenharmony_ci 2217cabdff1aSopenharmony_ci#define CHROMA_MC_4_MMI \ 2218cabdff1aSopenharmony_ci "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" \ 2219cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" \ 2220cabdff1aSopenharmony_ci "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" \ 2221cabdff1aSopenharmony_ci "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" \ 2222cabdff1aSopenharmony_ci \ 2223cabdff1aSopenharmony_ci "pmullh %[ftmp1], %[ftmp1], %[A] \n\t" \ 2224cabdff1aSopenharmony_ci "pmullh %[ftmp2], %[ftmp2], %[B] \n\t" \ 2225cabdff1aSopenharmony_ci "pmullh %[ftmp3], %[ftmp3], %[C] \n\t" \ 2226cabdff1aSopenharmony_ci "pmullh %[ftmp4], %[ftmp4], %[D] \n\t" \ 2227cabdff1aSopenharmony_ci \ 2228cabdff1aSopenharmony_ci "paddh %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \ 2229cabdff1aSopenharmony_ci "paddh %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \ 2230cabdff1aSopenharmony_ci "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \ 2231cabdff1aSopenharmony_ci "paddh %[ftmp1], %[ftmp1], %[ff_pw_28] \n\t" \ 2232cabdff1aSopenharmony_ci \ 2233cabdff1aSopenharmony_ci "psrlh %[ftmp1], %[ftmp1], %[ftmp5] \n\t" \ 2234cabdff1aSopenharmony_ci "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 2235cabdff1aSopenharmony_ci 2236cabdff1aSopenharmony_ci 2237cabdff1aSopenharmony_civoid ff_put_no_rnd_vc1_chroma_mc8_mmi(uint8_t *dst /* align 8 */, 2238cabdff1aSopenharmony_ci uint8_t *src /* align 1 */, 2239cabdff1aSopenharmony_ci ptrdiff_t stride, int h, int x, int y) 2240cabdff1aSopenharmony_ci{ 2241cabdff1aSopenharmony_ci union mmi_intfloat64 A, B, C, D; 2242cabdff1aSopenharmony_ci double ftmp[10]; 2243cabdff1aSopenharmony_ci uint32_t tmp[1]; 2244cabdff1aSopenharmony_ci DECLARE_VAR_ALL64; 2245cabdff1aSopenharmony_ci DECLARE_VAR_ADDRT; 2246cabdff1aSopenharmony_ci A.i = (8 - x) * (8 - y); 2247cabdff1aSopenharmony_ci B.i = (x) * (8 - y); 2248cabdff1aSopenharmony_ci C.i = (8 - x) * (y); 2249cabdff1aSopenharmony_ci D.i = (x) * (y); 2250cabdff1aSopenharmony_ci 2251cabdff1aSopenharmony_ci av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0); 2252cabdff1aSopenharmony_ci 2253cabdff1aSopenharmony_ci __asm__ volatile( 2254cabdff1aSopenharmony_ci "li %[tmp0], 0x06 \n\t" 2255cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 2256cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp9] \n\t" 2257cabdff1aSopenharmony_ci "pshufh %[A], %[A], %[ftmp0] \n\t" 2258cabdff1aSopenharmony_ci "pshufh %[B], %[B], %[ftmp0] \n\t" 2259cabdff1aSopenharmony_ci "pshufh %[C], %[C], %[ftmp0] \n\t" 2260cabdff1aSopenharmony_ci "pshufh %[D], %[D], %[ftmp0] \n\t" 2261cabdff1aSopenharmony_ci 2262cabdff1aSopenharmony_ci "1: \n\t" 2263cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp1], %[src], 0x00) 2264cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp2], %[src], 0x01) 2265cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[stride] \n\t" 2266cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp3], %[src], 0x00) 2267cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp4], %[src], 0x01) 2268cabdff1aSopenharmony_ci 2269cabdff1aSopenharmony_ci CHROMA_MC_8_MMI 2270cabdff1aSopenharmony_ci 2271cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp1], %[dst], 0x00) 2272cabdff1aSopenharmony_ci "addiu %[h], %[h], -0x01 \n\t" 2273cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 2274cabdff1aSopenharmony_ci "bnez %[h], 1b \n\t" 2275cabdff1aSopenharmony_ci : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 2276cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 2277cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 2278cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 2279cabdff1aSopenharmony_ci [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), 2280cabdff1aSopenharmony_ci RESTRICT_ASM_ALL64 2281cabdff1aSopenharmony_ci RESTRICT_ASM_ADDRT 2282cabdff1aSopenharmony_ci [tmp0]"=&r"(tmp[0]), 2283cabdff1aSopenharmony_ci [src]"+&r"(src), [dst]"+&r"(dst), 2284cabdff1aSopenharmony_ci [h]"+&r"(h) 2285cabdff1aSopenharmony_ci : [stride]"r"((mips_reg)stride), 2286cabdff1aSopenharmony_ci [A]"f"(A.f), [B]"f"(B.f), 2287cabdff1aSopenharmony_ci [C]"f"(C.f), [D]"f"(D.f), 2288cabdff1aSopenharmony_ci [ff_pw_28]"f"(ff_pw_28.f) 2289cabdff1aSopenharmony_ci : "memory" 2290cabdff1aSopenharmony_ci ); 2291cabdff1aSopenharmony_ci} 2292cabdff1aSopenharmony_ci 2293cabdff1aSopenharmony_civoid ff_put_no_rnd_vc1_chroma_mc4_mmi(uint8_t *dst /* align 8 */, 2294cabdff1aSopenharmony_ci uint8_t *src /* align 1 */, 2295cabdff1aSopenharmony_ci ptrdiff_t stride, int h, int x, int y) 2296cabdff1aSopenharmony_ci{ 2297cabdff1aSopenharmony_ci union mmi_intfloat64 A, B, C, D; 2298cabdff1aSopenharmony_ci double ftmp[6]; 2299cabdff1aSopenharmony_ci uint32_t tmp[1]; 2300cabdff1aSopenharmony_ci DECLARE_VAR_LOW32; 2301cabdff1aSopenharmony_ci DECLARE_VAR_ADDRT; 2302cabdff1aSopenharmony_ci A.i = (8 - x) * (8 - y); 2303cabdff1aSopenharmony_ci B.i = (x) * (8 - y); 2304cabdff1aSopenharmony_ci C.i = (8 - x) * (y); 2305cabdff1aSopenharmony_ci D.i = (x) * (y); 2306cabdff1aSopenharmony_ci 2307cabdff1aSopenharmony_ci av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0); 2308cabdff1aSopenharmony_ci 2309cabdff1aSopenharmony_ci __asm__ volatile( 2310cabdff1aSopenharmony_ci "li %[tmp0], 0x06 \n\t" 2311cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 2312cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp5] \n\t" 2313cabdff1aSopenharmony_ci "pshufh %[A], %[A], %[ftmp0] \n\t" 2314cabdff1aSopenharmony_ci "pshufh %[B], %[B], %[ftmp0] \n\t" 2315cabdff1aSopenharmony_ci "pshufh %[C], %[C], %[ftmp0] \n\t" 2316cabdff1aSopenharmony_ci "pshufh %[D], %[D], %[ftmp0] \n\t" 2317cabdff1aSopenharmony_ci 2318cabdff1aSopenharmony_ci "1: \n\t" 2319cabdff1aSopenharmony_ci MMI_ULWC1(%[ftmp1], %[src], 0x00) 2320cabdff1aSopenharmony_ci MMI_ULWC1(%[ftmp2], %[src], 0x01) 2321cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[stride] \n\t" 2322cabdff1aSopenharmony_ci MMI_ULWC1(%[ftmp3], %[src], 0x00) 2323cabdff1aSopenharmony_ci MMI_ULWC1(%[ftmp4], %[src], 0x01) 2324cabdff1aSopenharmony_ci 2325cabdff1aSopenharmony_ci CHROMA_MC_4_MMI 2326cabdff1aSopenharmony_ci 2327cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp1], %[dst], 0x00) 2328cabdff1aSopenharmony_ci "addiu %[h], %[h], -0x01 \n\t" 2329cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 2330cabdff1aSopenharmony_ci "bnez %[h], 1b \n\t" 2331cabdff1aSopenharmony_ci : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 2332cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 2333cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 2334cabdff1aSopenharmony_ci [tmp0]"=&r"(tmp[0]), 2335cabdff1aSopenharmony_ci RESTRICT_ASM_LOW32 2336cabdff1aSopenharmony_ci RESTRICT_ASM_ADDRT 2337cabdff1aSopenharmony_ci [src]"+&r"(src), [dst]"+&r"(dst), 2338cabdff1aSopenharmony_ci [h]"+&r"(h) 2339cabdff1aSopenharmony_ci : [stride]"r"((mips_reg)stride), 2340cabdff1aSopenharmony_ci [A]"f"(A.f), [B]"f"(B.f), 2341cabdff1aSopenharmony_ci [C]"f"(C.f), [D]"f"(D.f), 2342cabdff1aSopenharmony_ci [ff_pw_28]"f"(ff_pw_28.f) 2343cabdff1aSopenharmony_ci : "memory" 2344cabdff1aSopenharmony_ci ); 2345cabdff1aSopenharmony_ci} 2346cabdff1aSopenharmony_ci 2347cabdff1aSopenharmony_civoid ff_avg_no_rnd_vc1_chroma_mc8_mmi(uint8_t *dst /* align 8 */, 2348cabdff1aSopenharmony_ci uint8_t *src /* align 1 */, 2349cabdff1aSopenharmony_ci ptrdiff_t stride, int h, int x, int y) 2350cabdff1aSopenharmony_ci{ 2351cabdff1aSopenharmony_ci union mmi_intfloat64 A, B, C, D; 2352cabdff1aSopenharmony_ci double ftmp[10]; 2353cabdff1aSopenharmony_ci uint32_t tmp[1]; 2354cabdff1aSopenharmony_ci DECLARE_VAR_ALL64; 2355cabdff1aSopenharmony_ci DECLARE_VAR_ADDRT; 2356cabdff1aSopenharmony_ci A.i = (8 - x) * (8 - y); 2357cabdff1aSopenharmony_ci B.i = (x) * (8 - y); 2358cabdff1aSopenharmony_ci C.i = (8 - x) * (y); 2359cabdff1aSopenharmony_ci D.i = (x) * (y); 2360cabdff1aSopenharmony_ci 2361cabdff1aSopenharmony_ci av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0); 2362cabdff1aSopenharmony_ci 2363cabdff1aSopenharmony_ci __asm__ volatile( 2364cabdff1aSopenharmony_ci "li %[tmp0], 0x06 \n\t" 2365cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 2366cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp9] \n\t" 2367cabdff1aSopenharmony_ci "pshufh %[A], %[A], %[ftmp0] \n\t" 2368cabdff1aSopenharmony_ci "pshufh %[B], %[B], %[ftmp0] \n\t" 2369cabdff1aSopenharmony_ci "pshufh %[C], %[C], %[ftmp0] \n\t" 2370cabdff1aSopenharmony_ci "pshufh %[D], %[D], %[ftmp0] \n\t" 2371cabdff1aSopenharmony_ci 2372cabdff1aSopenharmony_ci "1: \n\t" 2373cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp1], %[src], 0x00) 2374cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp2], %[src], 0x01) 2375cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[stride] \n\t" 2376cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp3], %[src], 0x00) 2377cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp4], %[src], 0x01) 2378cabdff1aSopenharmony_ci 2379cabdff1aSopenharmony_ci CHROMA_MC_8_MMI 2380cabdff1aSopenharmony_ci 2381cabdff1aSopenharmony_ci MMI_LDC1(%[ftmp2], %[dst], 0x00) 2382cabdff1aSopenharmony_ci "pavgb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 2383cabdff1aSopenharmony_ci 2384cabdff1aSopenharmony_ci MMI_SDC1(%[ftmp1], %[dst], 0x00) 2385cabdff1aSopenharmony_ci "addiu %[h], %[h], -0x01 \n\t" 2386cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 2387cabdff1aSopenharmony_ci "bnez %[h], 1b \n\t" 2388cabdff1aSopenharmony_ci : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 2389cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 2390cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 2391cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 2392cabdff1aSopenharmony_ci [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), 2393cabdff1aSopenharmony_ci [tmp0]"=&r"(tmp[0]), 2394cabdff1aSopenharmony_ci RESTRICT_ASM_ALL64 2395cabdff1aSopenharmony_ci RESTRICT_ASM_ADDRT 2396cabdff1aSopenharmony_ci [src]"+&r"(src), [dst]"+&r"(dst), 2397cabdff1aSopenharmony_ci [h]"+&r"(h) 2398cabdff1aSopenharmony_ci : [stride]"r"((mips_reg)stride), 2399cabdff1aSopenharmony_ci [A]"f"(A.f), [B]"f"(B.f), 2400cabdff1aSopenharmony_ci [C]"f"(C.f), [D]"f"(D.f), 2401cabdff1aSopenharmony_ci [ff_pw_28]"f"(ff_pw_28.f) 2402cabdff1aSopenharmony_ci : "memory" 2403cabdff1aSopenharmony_ci ); 2404cabdff1aSopenharmony_ci} 2405cabdff1aSopenharmony_ci 2406cabdff1aSopenharmony_civoid ff_avg_no_rnd_vc1_chroma_mc4_mmi(uint8_t *dst /* align 8 */, 2407cabdff1aSopenharmony_ci uint8_t *src /* align 1 */, 2408cabdff1aSopenharmony_ci ptrdiff_t stride, int h, int x, int y) 2409cabdff1aSopenharmony_ci{ 2410cabdff1aSopenharmony_ci union mmi_intfloat64 A, B, C, D; 2411cabdff1aSopenharmony_ci double ftmp[6]; 2412cabdff1aSopenharmony_ci uint32_t tmp[1]; 2413cabdff1aSopenharmony_ci DECLARE_VAR_LOW32; 2414cabdff1aSopenharmony_ci DECLARE_VAR_ADDRT; 2415cabdff1aSopenharmony_ci A.i = (8 - x) * (8 - y); 2416cabdff1aSopenharmony_ci B.i = (x) * (8 - y); 2417cabdff1aSopenharmony_ci C.i = (8 - x) * (y); 2418cabdff1aSopenharmony_ci D.i = (x) * (y); 2419cabdff1aSopenharmony_ci 2420cabdff1aSopenharmony_ci av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0); 2421cabdff1aSopenharmony_ci 2422cabdff1aSopenharmony_ci __asm__ volatile( 2423cabdff1aSopenharmony_ci "li %[tmp0], 0x06 \n\t" 2424cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 2425cabdff1aSopenharmony_ci "mtc1 %[tmp0], %[ftmp5] \n\t" 2426cabdff1aSopenharmony_ci "pshufh %[A], %[A], %[ftmp0] \n\t" 2427cabdff1aSopenharmony_ci "pshufh %[B], %[B], %[ftmp0] \n\t" 2428cabdff1aSopenharmony_ci "pshufh %[C], %[C], %[ftmp0] \n\t" 2429cabdff1aSopenharmony_ci "pshufh %[D], %[D], %[ftmp0] \n\t" 2430cabdff1aSopenharmony_ci 2431cabdff1aSopenharmony_ci "1: \n\t" 2432cabdff1aSopenharmony_ci MMI_ULWC1(%[ftmp1], %[src], 0x00) 2433cabdff1aSopenharmony_ci MMI_ULWC1(%[ftmp2], %[src], 0x01) 2434cabdff1aSopenharmony_ci PTR_ADDU "%[src], %[src], %[stride] \n\t" 2435cabdff1aSopenharmony_ci MMI_ULWC1(%[ftmp3], %[src], 0x00) 2436cabdff1aSopenharmony_ci MMI_ULWC1(%[ftmp4], %[src], 0x01) 2437cabdff1aSopenharmony_ci 2438cabdff1aSopenharmony_ci CHROMA_MC_4_MMI 2439cabdff1aSopenharmony_ci 2440cabdff1aSopenharmony_ci MMI_LWC1(%[ftmp2], %[dst], 0x00) 2441cabdff1aSopenharmony_ci "pavgb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 2442cabdff1aSopenharmony_ci 2443cabdff1aSopenharmony_ci MMI_SWC1(%[ftmp1], %[dst], 0x00) 2444cabdff1aSopenharmony_ci "addiu %[h], %[h], -0x01 \n\t" 2445cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 2446cabdff1aSopenharmony_ci "bnez %[h], 1b \n\t" 2447cabdff1aSopenharmony_ci : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 2448cabdff1aSopenharmony_ci [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 2449cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 2450cabdff1aSopenharmony_ci [tmp0]"=&r"(tmp[0]), 2451cabdff1aSopenharmony_ci RESTRICT_ASM_LOW32 2452cabdff1aSopenharmony_ci RESTRICT_ASM_ADDRT 2453cabdff1aSopenharmony_ci [src]"+&r"(src), [dst]"+&r"(dst), 2454cabdff1aSopenharmony_ci [h]"+&r"(h) 2455cabdff1aSopenharmony_ci : [stride]"r"((mips_reg)stride), 2456cabdff1aSopenharmony_ci [A]"f"(A.f), [B]"f"(B.f), 2457cabdff1aSopenharmony_ci [C]"f"(C.f), [D]"f"(D.f), 2458cabdff1aSopenharmony_ci [ff_pw_28]"f"(ff_pw_28.f) 2459cabdff1aSopenharmony_ci : "memory" 2460cabdff1aSopenharmony_ci ); 2461cabdff1aSopenharmony_ci} 2462