1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * Copyright (c) 2018 gxw <guxiwei-hf@loongson.cn> 3cabdff1aSopenharmony_ci * 4cabdff1aSopenharmony_ci * This file is part of FFmpeg. 5cabdff1aSopenharmony_ci * 6cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 7cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 8cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 9cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 10cabdff1aSopenharmony_ci * 11cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 12cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 13cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14cabdff1aSopenharmony_ci * Lesser General Public License for more details. 15cabdff1aSopenharmony_ci * 16cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 17cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 18cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19cabdff1aSopenharmony_ci */ 20cabdff1aSopenharmony_ci 21cabdff1aSopenharmony_ci#include "vp3dsp_mips.h" 22cabdff1aSopenharmony_ci#include "libavutil/intreadwrite.h" 23cabdff1aSopenharmony_ci#include "libavutil/mips/mmiutils.h" 24cabdff1aSopenharmony_ci#include "libavutil/common.h" 25cabdff1aSopenharmony_ci#include "libavcodec/rnd_avg.h" 26cabdff1aSopenharmony_ci 27cabdff1aSopenharmony_ci#define LOAD_CONST(dst, value) \ 28cabdff1aSopenharmony_ci "li %[tmp1], "#value" \n\t" \ 29cabdff1aSopenharmony_ci "dmtc1 %[tmp1], "#dst" \n\t" \ 30cabdff1aSopenharmony_ci "pshufh "#dst", "#dst", %[ftmp10] \n\t" 31cabdff1aSopenharmony_ci 32cabdff1aSopenharmony_cistatic void idct_row_mmi(int16_t *input) 33cabdff1aSopenharmony_ci{ 34cabdff1aSopenharmony_ci double ftmp[23]; 35cabdff1aSopenharmony_ci uint64_t tmp[2]; 36cabdff1aSopenharmony_ci __asm__ volatile ( 37cabdff1aSopenharmony_ci "pxor %[ftmp10], %[ftmp10], %[ftmp10] \n\t" 38cabdff1aSopenharmony_ci LOAD_CONST(%[csth_1], 1) 39cabdff1aSopenharmony_ci "li %[tmp0], 0x02 \n\t" 40cabdff1aSopenharmony_ci "1: \n\t" 41cabdff1aSopenharmony_ci /* Load input */ 42cabdff1aSopenharmony_ci "ldc1 %[ftmp0], 0x00(%[input]) \n\t" 43cabdff1aSopenharmony_ci "ldc1 %[ftmp1], 0x10(%[input]) \n\t" 44cabdff1aSopenharmony_ci "ldc1 %[ftmp2], 0x20(%[input]) \n\t" 45cabdff1aSopenharmony_ci "ldc1 %[ftmp3], 0x30(%[input]) \n\t" 46cabdff1aSopenharmony_ci "ldc1 %[ftmp4], 0x40(%[input]) \n\t" 47cabdff1aSopenharmony_ci "ldc1 %[ftmp5], 0x50(%[input]) \n\t" 48cabdff1aSopenharmony_ci "ldc1 %[ftmp6], 0x60(%[input]) \n\t" 49cabdff1aSopenharmony_ci "ldc1 %[ftmp7], 0x70(%[input]) \n\t" 50cabdff1aSopenharmony_ci LOAD_CONST(%[ftmp8], 64277) 51cabdff1aSopenharmony_ci LOAD_CONST(%[ftmp9], 12785) 52cabdff1aSopenharmony_ci "pmulhh %[A], %[ftmp9], %[ftmp7] \n\t" 53cabdff1aSopenharmony_ci "pcmpgth %[C], %[ftmp10], %[ftmp1] \n\t" 54cabdff1aSopenharmony_ci "por %[mask], %[C], %[csth_1] \n\t" 55cabdff1aSopenharmony_ci "pmullh %[B], %[ftmp1], %[mask] \n\t" 56cabdff1aSopenharmony_ci "pmulhuh %[B], %[ftmp8], %[B] \n\t" 57cabdff1aSopenharmony_ci "pmullh %[B], %[B], %[mask] \n\t" 58cabdff1aSopenharmony_ci "paddh %[A], %[A], %[B] \n\t" 59cabdff1aSopenharmony_ci "paddh %[A], %[A], %[C] \n\t" 60cabdff1aSopenharmony_ci "pcmpgth %[D], %[ftmp10], %[ftmp7] \n\t" 61cabdff1aSopenharmony_ci "por %[mask], %[D], %[csth_1] \n\t" 62cabdff1aSopenharmony_ci "pmullh %[ftmp7], %[ftmp7], %[mask] \n\t" 63cabdff1aSopenharmony_ci "pmulhuh %[B], %[ftmp8], %[ftmp7] \n\t" 64cabdff1aSopenharmony_ci "pmullh %[B], %[B], %[mask] \n\t" 65cabdff1aSopenharmony_ci "pmulhh %[C], %[ftmp9], %[ftmp1] \n\t" 66cabdff1aSopenharmony_ci "psubh %[B], %[C], %[B] \n\t" 67cabdff1aSopenharmony_ci "psubh %[B], %[B], %[D] \n\t" 68cabdff1aSopenharmony_ci 69cabdff1aSopenharmony_ci LOAD_CONST(%[ftmp8], 54491) 70cabdff1aSopenharmony_ci LOAD_CONST(%[ftmp9], 36410) 71cabdff1aSopenharmony_ci "pcmpgth %[Ad], %[ftmp10], %[ftmp5] \n\t" 72cabdff1aSopenharmony_ci "por %[mask], %[Ad], %[csth_1] \n\t" 73cabdff1aSopenharmony_ci "pmullh %[ftmp1], %[ftmp5], %[mask] \n\t" 74cabdff1aSopenharmony_ci "pmulhuh %[C], %[ftmp9], %[ftmp1] \n\t" 75cabdff1aSopenharmony_ci "pmullh %[C], %[C], %[mask] \n\t" 76cabdff1aSopenharmony_ci "pcmpgth %[Bd], %[ftmp10], %[ftmp3] \n\t" 77cabdff1aSopenharmony_ci "por %[mask], %[Bd], %[csth_1] \n\t" 78cabdff1aSopenharmony_ci "pmullh %[D], %[ftmp3], %[mask] \n\t" 79cabdff1aSopenharmony_ci "pmulhuh %[D], %[ftmp8], %[D] \n\t" 80cabdff1aSopenharmony_ci "pmullh %[D], %[D], %[mask] \n\t" 81cabdff1aSopenharmony_ci "paddh %[C], %[C], %[D] \n\t" 82cabdff1aSopenharmony_ci "paddh %[C], %[C], %[Ad] \n\t" 83cabdff1aSopenharmony_ci "paddh %[C], %[C], %[Bd] \n\t" 84cabdff1aSopenharmony_ci "pcmpgth %[Bd], %[ftmp10], %[ftmp3] \n\t" 85cabdff1aSopenharmony_ci "por %[mask], %[Bd], %[csth_1] \n\t" 86cabdff1aSopenharmony_ci "pmullh %[ftmp1], %[ftmp3], %[mask] \n\t" 87cabdff1aSopenharmony_ci "pmulhuh %[D], %[ftmp9], %[ftmp1] \n\t" 88cabdff1aSopenharmony_ci "pmullh %[D], %[D], %[mask] \n\t" 89cabdff1aSopenharmony_ci "pcmpgth %[Ed], %[ftmp10], %[ftmp5] \n\t" 90cabdff1aSopenharmony_ci "por %[mask], %[Ed], %[csth_1] \n\t" 91cabdff1aSopenharmony_ci "pmullh %[Ad], %[ftmp5], %[mask] \n\t" 92cabdff1aSopenharmony_ci "pmulhuh %[Ad], %[ftmp8], %[Ad] \n\t" 93cabdff1aSopenharmony_ci "pmullh %[Ad], %[Ad], %[mask] \n\t" 94cabdff1aSopenharmony_ci "psubh %[D], %[Ad], %[D] \n\t" 95cabdff1aSopenharmony_ci "paddh %[D], %[D], %[Ed] \n\t" 96cabdff1aSopenharmony_ci "psubh %[D], %[D], %[Bd] \n\t" 97cabdff1aSopenharmony_ci 98cabdff1aSopenharmony_ci LOAD_CONST(%[ftmp8], 46341) 99cabdff1aSopenharmony_ci "psubh %[Ad], %[A], %[C] \n\t" 100cabdff1aSopenharmony_ci "pcmpgth %[Bd], %[ftmp10], %[Ad] \n\t" 101cabdff1aSopenharmony_ci "por %[mask], %[Bd], %[csth_1] \n\t" 102cabdff1aSopenharmony_ci "pmullh %[Ad], %[Ad], %[mask] \n\t" 103cabdff1aSopenharmony_ci "pmulhuh %[Ad], %[ftmp8], %[Ad] \n\t" 104cabdff1aSopenharmony_ci "pmullh %[Ad], %[Ad], %[mask] \n\t" 105cabdff1aSopenharmony_ci "paddh %[Ad], %[Ad], %[Bd] \n\t" 106cabdff1aSopenharmony_ci "psubh %[Bd], %[B], %[D] \n\t" 107cabdff1aSopenharmony_ci "pcmpgth %[Cd], %[ftmp10], %[Bd] \n\t" 108cabdff1aSopenharmony_ci "por %[mask], %[Cd], %[csth_1] \n\t" 109cabdff1aSopenharmony_ci "pmullh %[Bd], %[Bd], %[mask] \n\t" 110cabdff1aSopenharmony_ci "pmulhuh %[Bd], %[ftmp8], %[Bd] \n\t" 111cabdff1aSopenharmony_ci "pmullh %[Bd], %[Bd], %[mask] \n\t" 112cabdff1aSopenharmony_ci "paddh %[Bd], %[Bd], %[Cd] \n\t" 113cabdff1aSopenharmony_ci "paddh %[Cd], %[A], %[C] \n\t" 114cabdff1aSopenharmony_ci "paddh %[Dd], %[B], %[D] \n\t" 115cabdff1aSopenharmony_ci "paddh %[A], %[ftmp0], %[ftmp4] \n\t" 116cabdff1aSopenharmony_ci "pcmpgth %[B], %[ftmp10], %[A] \n\t" 117cabdff1aSopenharmony_ci "por %[mask], %[B], %[csth_1] \n\t" 118cabdff1aSopenharmony_ci "pmullh %[A], %[A], %[mask] \n\t" 119cabdff1aSopenharmony_ci "pmulhuh %[A], %[ftmp8], %[A] \n\t" 120cabdff1aSopenharmony_ci "pmullh %[A], %[A], %[mask] \n\t" 121cabdff1aSopenharmony_ci "paddh %[A], %[A], %[B] \n\t" 122cabdff1aSopenharmony_ci "psubh %[B], %[ftmp0], %[ftmp4] \n\t" 123cabdff1aSopenharmony_ci "pcmpgth %[C], %[ftmp10], %[B] \n\t" 124cabdff1aSopenharmony_ci "por %[mask], %[C], %[csth_1] \n\t" 125cabdff1aSopenharmony_ci "pmullh %[B], %[B], %[mask] \n\t" 126cabdff1aSopenharmony_ci "pmulhuh %[B], %[ftmp8], %[B] \n\t" 127cabdff1aSopenharmony_ci "pmullh %[B], %[B], %[mask] \n\t" 128cabdff1aSopenharmony_ci "paddh %[B], %[B], %[C] \n\t" 129cabdff1aSopenharmony_ci 130cabdff1aSopenharmony_ci LOAD_CONST(%[ftmp8], 60547) 131cabdff1aSopenharmony_ci LOAD_CONST(%[ftmp9], 25080) 132cabdff1aSopenharmony_ci "pmulhh %[C], %[ftmp9], %[ftmp6] \n\t" 133cabdff1aSopenharmony_ci "pcmpgth %[D], %[ftmp10], %[ftmp2] \n\t" 134cabdff1aSopenharmony_ci "por %[mask], %[D], %[csth_1] \n\t" 135cabdff1aSopenharmony_ci "pmullh %[Ed], %[ftmp2], %[mask] \n\t" 136cabdff1aSopenharmony_ci "pmulhuh %[Ed], %[ftmp8], %[Ed] \n\t" 137cabdff1aSopenharmony_ci "pmullh %[Ed], %[Ed], %[mask] \n\t" 138cabdff1aSopenharmony_ci "paddh %[C], %[C], %[Ed] \n\t" 139cabdff1aSopenharmony_ci "paddh %[C], %[C], %[D] \n\t" 140cabdff1aSopenharmony_ci "pcmpgth %[Ed], %[ftmp10], %[ftmp6] \n\t" 141cabdff1aSopenharmony_ci "por %[mask], %[Ed], %[csth_1] \n\t" 142cabdff1aSopenharmony_ci "pmullh %[ftmp6], %[ftmp6], %[mask] \n\t" 143cabdff1aSopenharmony_ci "pmulhuh %[D], %[ftmp8], %[ftmp6] \n\t" 144cabdff1aSopenharmony_ci "pmullh %[D], %[D], %[mask] \n\t" 145cabdff1aSopenharmony_ci "pmulhh %[Gd], %[ftmp9], %[ftmp2] \n\t" 146cabdff1aSopenharmony_ci "psubh %[D], %[Gd], %[D] \n\t" 147cabdff1aSopenharmony_ci "psubh %[D], %[D], %[Ed] \n\t" 148cabdff1aSopenharmony_ci "psubh %[Ed], %[A], %[C] \n\t" 149cabdff1aSopenharmony_ci "paddh %[Gd], %[A], %[C] \n\t" 150cabdff1aSopenharmony_ci "paddh %[A], %[B], %[Ad] \n\t" 151cabdff1aSopenharmony_ci "psubh %[C], %[B], %[Ad] \n\t" 152cabdff1aSopenharmony_ci "psubh %[B], %[Bd], %[D] \n\t" 153cabdff1aSopenharmony_ci "paddh %[D], %[Bd], %[D] \n\t" 154cabdff1aSopenharmony_ci /* Final sequence of operations over-write original inputs */ 155cabdff1aSopenharmony_ci "paddh %[ftmp0], %[Gd], %[Cd] \n\t" 156cabdff1aSopenharmony_ci "paddh %[ftmp1], %[A], %[D] \n\t" 157cabdff1aSopenharmony_ci "psubh %[ftmp2], %[A], %[D] \n\t" 158cabdff1aSopenharmony_ci "paddh %[ftmp3], %[Ed], %[Dd] \n\t" 159cabdff1aSopenharmony_ci "psubh %[ftmp4], %[Ed], %[Dd] \n\t" 160cabdff1aSopenharmony_ci "paddh %[ftmp5], %[C], %[B] \n\t" 161cabdff1aSopenharmony_ci "psubh %[ftmp6], %[C], %[B] \n\t" 162cabdff1aSopenharmony_ci "psubh %[ftmp7], %[Gd], %[Cd] \n\t" 163cabdff1aSopenharmony_ci "sdc1 %[ftmp0], 0x00(%[input]) \n\t" 164cabdff1aSopenharmony_ci "sdc1 %[ftmp1], 0x10(%[input]) \n\t" 165cabdff1aSopenharmony_ci "sdc1 %[ftmp2], 0x20(%[input]) \n\t" 166cabdff1aSopenharmony_ci "sdc1 %[ftmp3], 0x30(%[input]) \n\t" 167cabdff1aSopenharmony_ci "sdc1 %[ftmp4], 0x40(%[input]) \n\t" 168cabdff1aSopenharmony_ci "sdc1 %[ftmp5], 0x50(%[input]) \n\t" 169cabdff1aSopenharmony_ci "sdc1 %[ftmp6], 0x60(%[input]) \n\t" 170cabdff1aSopenharmony_ci "sdc1 %[ftmp7], 0x70(%[input]) \n\t" 171cabdff1aSopenharmony_ci PTR_ADDU "%[tmp0], %[tmp0], -0x01 \n\t" 172cabdff1aSopenharmony_ci PTR_ADDIU "%[input], %[input], 0x08 \n\t" 173cabdff1aSopenharmony_ci "bnez %[tmp0], 1b \n\t" 174cabdff1aSopenharmony_ci : [input]"+&r"(input), [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]), 175cabdff1aSopenharmony_ci [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]), 176cabdff1aSopenharmony_ci [ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 177cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp8]"=&f"(ftmp[8]), 178cabdff1aSopenharmony_ci [ftmp9]"=&f"(ftmp[9]), [ftmp10]"=&f"(ftmp[10]), [mask]"=&f"(ftmp[11]), 179cabdff1aSopenharmony_ci [A]"=&f"(ftmp[12]), [B]"=&f"(ftmp[13]), [C]"=&f"(ftmp[14]), 180cabdff1aSopenharmony_ci [D]"=&f"(ftmp[15]), [Ad]"=&f"(ftmp[16]), [Bd]"=&f"(ftmp[17]), 181cabdff1aSopenharmony_ci [Cd]"=&f"(ftmp[18]), [Dd]"=&f"(ftmp[19]), [Ed]"=&f"(ftmp[20]), 182cabdff1aSopenharmony_ci [Gd]"=&f"(ftmp[21]), [csth_1]"=&f"(ftmp[22]) 183cabdff1aSopenharmony_ci : 184cabdff1aSopenharmony_ci : "memory" 185cabdff1aSopenharmony_ci ); 186cabdff1aSopenharmony_ci} 187cabdff1aSopenharmony_ci 188cabdff1aSopenharmony_cistatic void idct_column_true_mmi(uint8_t *dst, int stride, int16_t *input) 189cabdff1aSopenharmony_ci{ 190cabdff1aSopenharmony_ci uint8_t temp_value[8]; 191cabdff1aSopenharmony_ci double ftmp[23]; 192cabdff1aSopenharmony_ci uint64_t tmp[2]; 193cabdff1aSopenharmony_ci for (int i = 0; i < 8; ++i) 194cabdff1aSopenharmony_ci temp_value[i] = av_clip_uint8(128 + ((46341 * input[i << 3] + (8 << 16)) >> 20)); 195cabdff1aSopenharmony_ci __asm__ volatile ( 196cabdff1aSopenharmony_ci "pxor %[ftmp10], %[ftmp10], %[ftmp10] \n\t" 197cabdff1aSopenharmony_ci "li %[tmp0], 0x02 \n\t" 198cabdff1aSopenharmony_ci "1: \n\t" 199cabdff1aSopenharmony_ci "ldc1 %[ftmp0], 0x00(%[input]) \n\t" 200cabdff1aSopenharmony_ci "ldc1 %[ftmp4], 0x08(%[input]) \n\t" 201cabdff1aSopenharmony_ci "ldc1 %[ftmp1], 0x10(%[input]) \n\t" 202cabdff1aSopenharmony_ci "ldc1 %[ftmp5], 0x18(%[input]) \n\t" 203cabdff1aSopenharmony_ci "ldc1 %[ftmp2], 0x20(%[input]) \n\t" 204cabdff1aSopenharmony_ci "ldc1 %[ftmp6], 0x28(%[input]) \n\t" 205cabdff1aSopenharmony_ci "ldc1 %[ftmp3], 0x30(%[input]) \n\t" 206cabdff1aSopenharmony_ci "ldc1 %[ftmp7], 0x38(%[input]) \n\t" 207cabdff1aSopenharmony_ci TRANSPOSE_4H(%[ftmp0], %[ftmp1], %[ftmp2], %[ftmp3], 208cabdff1aSopenharmony_ci %[A], %[B], %[C], %[D]) 209cabdff1aSopenharmony_ci TRANSPOSE_4H(%[ftmp4], %[ftmp5], %[ftmp6], %[ftmp7], 210cabdff1aSopenharmony_ci %[A], %[B], %[C], %[D]) 211cabdff1aSopenharmony_ci LOAD_CONST(%[ftmp8], 64277) 212cabdff1aSopenharmony_ci LOAD_CONST(%[ftmp9], 12785) 213cabdff1aSopenharmony_ci LOAD_CONST(%[Gd], 1) 214cabdff1aSopenharmony_ci "pmulhh %[A], %[ftmp9], %[ftmp7] \n\t" 215cabdff1aSopenharmony_ci "pcmpgth %[C], %[ftmp10], %[ftmp1] \n\t" 216cabdff1aSopenharmony_ci "por %[mask], %[C], %[Gd] \n\t" 217cabdff1aSopenharmony_ci "pmullh %[B], %[ftmp1], %[mask] \n\t" 218cabdff1aSopenharmony_ci "pmulhuh %[B], %[ftmp8], %[B] \n\t" 219cabdff1aSopenharmony_ci "pmullh %[B], %[B], %[mask] \n\t" 220cabdff1aSopenharmony_ci "paddh %[A], %[A], %[B] \n\t" 221cabdff1aSopenharmony_ci "paddh %[A], %[A], %[C] \n\t" 222cabdff1aSopenharmony_ci "pcmpgth %[D], %[ftmp10], %[ftmp7] \n\t" 223cabdff1aSopenharmony_ci "por %[mask], %[D], %[Gd] \n\t" 224cabdff1aSopenharmony_ci "pmullh %[Ad], %[ftmp7], %[mask] \n\t" 225cabdff1aSopenharmony_ci "pmulhuh %[B], %[ftmp8], %[Ad] \n\t" 226cabdff1aSopenharmony_ci "pmullh %[B], %[B], %[mask] \n\t" 227cabdff1aSopenharmony_ci "pmulhh %[C], %[ftmp9], %[ftmp1] \n\t" 228cabdff1aSopenharmony_ci "psubh %[B], %[C], %[B] \n\t" 229cabdff1aSopenharmony_ci "psubh %[B], %[B], %[D] \n\t" 230cabdff1aSopenharmony_ci 231cabdff1aSopenharmony_ci LOAD_CONST(%[ftmp8], 54491) 232cabdff1aSopenharmony_ci LOAD_CONST(%[ftmp9], 36410) 233cabdff1aSopenharmony_ci "pcmpgth %[Ad], %[ftmp10], %[ftmp5] \n\t" 234cabdff1aSopenharmony_ci "por %[mask], %[Ad], %[Gd] \n\t" 235cabdff1aSopenharmony_ci "pmullh %[Cd], %[ftmp5], %[mask] \n\t" 236cabdff1aSopenharmony_ci "pmulhuh %[C], %[ftmp9], %[Cd] \n\t" 237cabdff1aSopenharmony_ci "pmullh %[C], %[C], %[mask] \n\t" 238cabdff1aSopenharmony_ci "pcmpgth %[Bd], %[ftmp10], %[ftmp3] \n\t" 239cabdff1aSopenharmony_ci "por %[mask], %[Bd], %[Gd] \n\t" 240cabdff1aSopenharmony_ci "pmullh %[D], %[ftmp3], %[mask] \n\t" 241cabdff1aSopenharmony_ci "pmulhuh %[D], %[ftmp8], %[D] \n\t" 242cabdff1aSopenharmony_ci "pmullh %[D], %[D], %[mask] \n\t" 243cabdff1aSopenharmony_ci "paddh %[C], %[C], %[D] \n\t" 244cabdff1aSopenharmony_ci "paddh %[C], %[C], %[Ad] \n\t" 245cabdff1aSopenharmony_ci "paddh %[C], %[C], %[Bd] \n\t" 246cabdff1aSopenharmony_ci "pcmpgth %[Bd], %[ftmp10], %[ftmp3] \n\t" 247cabdff1aSopenharmony_ci "por %[mask], %[Bd], %[Gd] \n\t" 248cabdff1aSopenharmony_ci "pmullh %[Cd], %[ftmp3], %[mask] \n\t" 249cabdff1aSopenharmony_ci "pmulhuh %[D], %[ftmp9], %[Cd] \n\t" 250cabdff1aSopenharmony_ci "pmullh %[D], %[D], %[mask] \n\t" 251cabdff1aSopenharmony_ci "pcmpgth %[Ed], %[ftmp10], %[ftmp5] \n\t" 252cabdff1aSopenharmony_ci "por %[mask], %[Ed], %[Gd] \n\t" 253cabdff1aSopenharmony_ci "pmullh %[Ad], %[ftmp5], %[mask] \n\t" 254cabdff1aSopenharmony_ci "pmulhuh %[Ad], %[ftmp8], %[Ad] \n\t" 255cabdff1aSopenharmony_ci "pmullh %[Ad], %[Ad], %[mask] \n\t" 256cabdff1aSopenharmony_ci "psubh %[D], %[Ad], %[D] \n\t" 257cabdff1aSopenharmony_ci "paddh %[D], %[D], %[Ed] \n\t" 258cabdff1aSopenharmony_ci "psubh %[D], %[D], %[Bd] \n\t" 259cabdff1aSopenharmony_ci 260cabdff1aSopenharmony_ci LOAD_CONST(%[ftmp8], 46341) 261cabdff1aSopenharmony_ci "psubh %[Ad], %[A], %[C] \n\t" 262cabdff1aSopenharmony_ci "pcmpgth %[Bd], %[ftmp10], %[Ad] \n\t" 263cabdff1aSopenharmony_ci "por %[mask], %[Bd], %[Gd] \n\t" 264cabdff1aSopenharmony_ci "pmullh %[Ad], %[Ad], %[mask] \n\t" 265cabdff1aSopenharmony_ci "pmulhuh %[Ad], %[ftmp8], %[Ad] \n\t" 266cabdff1aSopenharmony_ci "pmullh %[Ad], %[Ad], %[mask] \n\t" 267cabdff1aSopenharmony_ci "paddh %[Ad], %[Ad], %[Bd] \n\t" 268cabdff1aSopenharmony_ci "psubh %[Bd], %[B], %[D] \n\t" 269cabdff1aSopenharmony_ci "pcmpgth %[Cd], %[ftmp10], %[Bd] \n\t" 270cabdff1aSopenharmony_ci "por %[mask], %[Cd], %[Gd] \n\t" 271cabdff1aSopenharmony_ci "pmullh %[Bd], %[Bd], %[mask] \n\t" 272cabdff1aSopenharmony_ci "pmulhuh %[Bd], %[ftmp8], %[Bd] \n\t" 273cabdff1aSopenharmony_ci "pmullh %[Bd], %[Bd], %[mask] \n\t" 274cabdff1aSopenharmony_ci "paddh %[Bd], %[Bd], %[Cd] \n\t" 275cabdff1aSopenharmony_ci "paddh %[Cd], %[A], %[C] \n\t" 276cabdff1aSopenharmony_ci "paddh %[Dd], %[B], %[D] \n\t" 277cabdff1aSopenharmony_ci 278cabdff1aSopenharmony_ci LOAD_CONST(%[Ed], 2056) 279cabdff1aSopenharmony_ci "paddh %[A], %[ftmp0], %[ftmp4] \n\t" 280cabdff1aSopenharmony_ci "pcmpgth %[B], %[ftmp10], %[A] \n\t" 281cabdff1aSopenharmony_ci "por %[mask], %[B], %[Gd] \n\t" 282cabdff1aSopenharmony_ci "pmullh %[A], %[A], %[mask] \n\t" 283cabdff1aSopenharmony_ci "pmulhuh %[A], %[ftmp8], %[A] \n\t" 284cabdff1aSopenharmony_ci "pmullh %[A], %[A], %[mask] \n\t" 285cabdff1aSopenharmony_ci "paddh %[A], %[A], %[B] \n\t" 286cabdff1aSopenharmony_ci "paddh %[A], %[A], %[Ed] \n\t" 287cabdff1aSopenharmony_ci "psubh %[B], %[ftmp0], %[ftmp4] \n\t" 288cabdff1aSopenharmony_ci "pcmpgth %[C], %[ftmp10], %[B] \n\t" 289cabdff1aSopenharmony_ci "por %[mask], %[C], %[Gd] \n\t" 290cabdff1aSopenharmony_ci "pmullh %[B], %[B], %[mask] \n\t" 291cabdff1aSopenharmony_ci "pmulhuh %[B], %[ftmp8], %[B] \n\t" 292cabdff1aSopenharmony_ci "pmullh %[B], %[B], %[mask] \n\t" 293cabdff1aSopenharmony_ci "paddh %[B], %[B], %[C] \n\t" 294cabdff1aSopenharmony_ci "paddh %[B], %[B], %[Ed] \n\t" 295cabdff1aSopenharmony_ci 296cabdff1aSopenharmony_ci LOAD_CONST(%[ftmp8], 60547) 297cabdff1aSopenharmony_ci LOAD_CONST(%[ftmp9], 25080) 298cabdff1aSopenharmony_ci "pmulhh %[C], %[ftmp9], %[ftmp6] \n\t" 299cabdff1aSopenharmony_ci "pcmpgth %[D], %[ftmp10], %[ftmp2] \n\t" 300cabdff1aSopenharmony_ci "por %[mask], %[D], %[Gd] \n\t" 301cabdff1aSopenharmony_ci "pmullh %[Ed], %[ftmp2], %[mask] \n\t" 302cabdff1aSopenharmony_ci "pmulhuh %[Ed], %[ftmp8], %[Ed] \n\t" 303cabdff1aSopenharmony_ci "pmullh %[Ed], %[Ed], %[mask] \n\t" 304cabdff1aSopenharmony_ci "paddh %[C], %[C], %[Ed] \n\t" 305cabdff1aSopenharmony_ci "paddh %[C], %[C], %[D] \n\t" 306cabdff1aSopenharmony_ci "pcmpgth %[Ed], %[ftmp10], %[ftmp6] \n\t" 307cabdff1aSopenharmony_ci "por %[mask], %[Ed], %[Gd] \n\t" 308cabdff1aSopenharmony_ci "pmullh %[D], %[ftmp6], %[mask] \n\t" 309cabdff1aSopenharmony_ci "pmulhuh %[D], %[ftmp8], %[D] \n\t" 310cabdff1aSopenharmony_ci "pmullh %[D], %[D], %[mask] \n\t" 311cabdff1aSopenharmony_ci "pmulhh %[Gd], %[ftmp9], %[ftmp2] \n\t" 312cabdff1aSopenharmony_ci "psubh %[D], %[Gd], %[D] \n\t" 313cabdff1aSopenharmony_ci "psubh %[D], %[D], %[Ed] \n\t" 314cabdff1aSopenharmony_ci "psubh %[Ed], %[A], %[C] \n\t" 315cabdff1aSopenharmony_ci "paddh %[Gd], %[A], %[C] \n\t" 316cabdff1aSopenharmony_ci "paddh %[A], %[B], %[Ad] \n\t" 317cabdff1aSopenharmony_ci "psubh %[C], %[B], %[Ad] \n\t" 318cabdff1aSopenharmony_ci "psubh %[B], %[Bd], %[D] \n\t" 319cabdff1aSopenharmony_ci "paddh %[D], %[Bd], %[D] \n\t" 320cabdff1aSopenharmony_ci "por %[mask], %[ftmp1], %[ftmp2] \n\t" 321cabdff1aSopenharmony_ci "por %[mask], %[mask], %[ftmp3] \n\t" 322cabdff1aSopenharmony_ci "por %[mask], %[mask], %[ftmp4] \n\t" 323cabdff1aSopenharmony_ci "por %[mask], %[mask], %[ftmp5] \n\t" 324cabdff1aSopenharmony_ci "por %[mask], %[mask], %[ftmp6] \n\t" 325cabdff1aSopenharmony_ci "por %[mask], %[mask], %[ftmp7] \n\t" 326cabdff1aSopenharmony_ci "pcmpeqh %[mask], %[mask], %[ftmp10] \n\t" 327cabdff1aSopenharmony_ci "packushb %[mask], %[mask], %[ftmp10] \n\t" 328cabdff1aSopenharmony_ci "li %[tmp1], 0x04 \n\t" 329cabdff1aSopenharmony_ci "dmtc1 %[tmp1], %[ftmp8] \n\t" 330cabdff1aSopenharmony_ci "paddh %[ftmp0], %[Gd], %[Cd] \n\t" 331cabdff1aSopenharmony_ci "psrah %[ftmp0], %[ftmp0], %[ftmp8] \n\t" 332cabdff1aSopenharmony_ci "paddh %[ftmp1], %[A], %[D] \n\t" 333cabdff1aSopenharmony_ci "psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t" 334cabdff1aSopenharmony_ci "psubh %[ftmp2], %[A], %[D] \n\t" 335cabdff1aSopenharmony_ci "psrah %[ftmp2], %[ftmp2], %[ftmp8] \n\t" 336cabdff1aSopenharmony_ci "paddh %[ftmp3], %[Ed], %[Dd] \n\t" 337cabdff1aSopenharmony_ci "psrah %[ftmp3], %[ftmp3], %[ftmp8] \n\t" 338cabdff1aSopenharmony_ci "psubh %[ftmp4], %[Ed], %[Dd] \n\t" 339cabdff1aSopenharmony_ci "psrah %[ftmp4], %[ftmp4], %[ftmp8] \n\t" 340cabdff1aSopenharmony_ci "paddh %[ftmp5], %[C], %[B] \n\t" 341cabdff1aSopenharmony_ci "psrah %[ftmp5], %[ftmp5], %[ftmp8] \n\t" 342cabdff1aSopenharmony_ci "psubh %[ftmp6], %[C], %[B] \n\t" 343cabdff1aSopenharmony_ci "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 344cabdff1aSopenharmony_ci "psubh %[ftmp7], %[Gd], %[Cd] \n\t" 345cabdff1aSopenharmony_ci "psrah %[ftmp7], %[ftmp7], %[ftmp8] \n\t" 346cabdff1aSopenharmony_ci "pmaxsh %[ftmp0], %[ftmp0], %[ftmp10] \n\t" 347cabdff1aSopenharmony_ci "packushb %[ftmp0], %[ftmp0], %[ftmp10] \n\t" 348cabdff1aSopenharmony_ci "pmaxsh %[ftmp1], %[ftmp1], %[ftmp10] \n\t" 349cabdff1aSopenharmony_ci "packushb %[ftmp1], %[ftmp1], %[ftmp10] \n\t" 350cabdff1aSopenharmony_ci "pmaxsh %[ftmp2], %[ftmp2], %[ftmp10] \n\t" 351cabdff1aSopenharmony_ci "packushb %[ftmp2], %[ftmp2], %[ftmp10] \n\t" 352cabdff1aSopenharmony_ci "pmaxsh %[ftmp3], %[ftmp3], %[ftmp10] \n\t" 353cabdff1aSopenharmony_ci "packushb %[ftmp3], %[ftmp3], %[ftmp10] \n\t" 354cabdff1aSopenharmony_ci "pmaxsh %[ftmp4], %[ftmp4], %[ftmp10] \n\t" 355cabdff1aSopenharmony_ci "packushb %[ftmp4], %[ftmp4], %[ftmp10] \n\t" 356cabdff1aSopenharmony_ci "pmaxsh %[ftmp5], %[ftmp5], %[ftmp10] \n\t" 357cabdff1aSopenharmony_ci "packushb %[ftmp5], %[ftmp5], %[ftmp10] \n\t" 358cabdff1aSopenharmony_ci "pmaxsh %[ftmp6], %[ftmp6], %[ftmp10] \n\t" 359cabdff1aSopenharmony_ci "packushb %[ftmp6], %[ftmp6], %[ftmp10] \n\t" 360cabdff1aSopenharmony_ci "pmaxsh %[ftmp7], %[ftmp7], %[ftmp10] \n\t" 361cabdff1aSopenharmony_ci "packushb %[ftmp7], %[ftmp7], %[ftmp10] \n\t" 362cabdff1aSopenharmony_ci 363cabdff1aSopenharmony_ci "lwc1 %[Ed], 0x00(%[temp_value]) \n\t" 364cabdff1aSopenharmony_ci "pand %[Ed], %[Ed], %[mask] \n\t" 365cabdff1aSopenharmony_ci "paddb %[ftmp0], %[ftmp0], %[Ed] \n\t" 366cabdff1aSopenharmony_ci "paddb %[ftmp1], %[ftmp1], %[Ed] \n\t" 367cabdff1aSopenharmony_ci "paddb %[ftmp2], %[ftmp2], %[Ed] \n\t" 368cabdff1aSopenharmony_ci "paddb %[ftmp3], %[ftmp3], %[Ed] \n\t" 369cabdff1aSopenharmony_ci "paddb %[ftmp4], %[ftmp4], %[Ed] \n\t" 370cabdff1aSopenharmony_ci "paddb %[ftmp5], %[ftmp5], %[Ed] \n\t" 371cabdff1aSopenharmony_ci "paddb %[ftmp6], %[ftmp6], %[Ed] \n\t" 372cabdff1aSopenharmony_ci "paddb %[ftmp7], %[ftmp7], %[Ed] \n\t" 373cabdff1aSopenharmony_ci "swc1 %[ftmp0], 0x00(%[dst]) \n\t" 374cabdff1aSopenharmony_ci PTR_ADDU "%[tmp1], %[dst], %[stride] \n\t" 375cabdff1aSopenharmony_ci "swc1 %[ftmp1], 0x00(%[tmp1]) \n\t" 376cabdff1aSopenharmony_ci PTR_ADDU "%[tmp1], %[tmp1], %[stride] \n\t" 377cabdff1aSopenharmony_ci "swc1 %[ftmp2], 0x00(%[tmp1]) \n\t" 378cabdff1aSopenharmony_ci PTR_ADDU "%[tmp1], %[tmp1], %[stride] \n\t" 379cabdff1aSopenharmony_ci "swc1 %[ftmp3], 0x00(%[tmp1]) \n\t" 380cabdff1aSopenharmony_ci PTR_ADDU "%[tmp1], %[tmp1], %[stride] \n\t" 381cabdff1aSopenharmony_ci "swc1 %[ftmp4], 0x00(%[tmp1]) \n\t" 382cabdff1aSopenharmony_ci PTR_ADDU "%[tmp1], %[tmp1], %[stride] \n\t" 383cabdff1aSopenharmony_ci "swc1 %[ftmp5], 0x00(%[tmp1]) \n\t" 384cabdff1aSopenharmony_ci PTR_ADDU "%[tmp1], %[tmp1], %[stride] \n\t" 385cabdff1aSopenharmony_ci "swc1 %[ftmp6], 0x00(%[tmp1]) \n\t" 386cabdff1aSopenharmony_ci PTR_ADDU "%[tmp1], %[tmp1], %[stride] \n\t" 387cabdff1aSopenharmony_ci "swc1 %[ftmp7], 0x00(%[tmp1]) \n\t" 388cabdff1aSopenharmony_ci PTR_ADDIU "%[dst], %[dst], 0x04 \n\t" 389cabdff1aSopenharmony_ci PTR_ADDIU "%[input], %[input], 0x40 \n\t" 390cabdff1aSopenharmony_ci PTR_ADDIU "%[temp_value], %[temp_value], 0x04 \n\t" 391cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp0], %[tmp0], -0x01 \n\t" 392cabdff1aSopenharmony_ci "bnez %[tmp0], 1b \n\t" 393cabdff1aSopenharmony_ci : [dst]"+&r"(dst), [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]), 394cabdff1aSopenharmony_ci [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]), 395cabdff1aSopenharmony_ci [ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 396cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp8]"=&f"(ftmp[8]), 397cabdff1aSopenharmony_ci [ftmp9]"=&f"(ftmp[9]), [ftmp10]"=&f"(ftmp[10]), [mask]"=&f"(ftmp[11]), 398cabdff1aSopenharmony_ci [A]"=&f"(ftmp[12]), [B]"=&f"(ftmp[13]), [C]"=&f"(ftmp[14]), 399cabdff1aSopenharmony_ci [D]"=&f"(ftmp[15]), [Ad]"=&f"(ftmp[16]), [Bd]"=&f"(ftmp[17]), 400cabdff1aSopenharmony_ci [Cd]"=&f"(ftmp[18]), [Dd]"=&f"(ftmp[19]), [Ed]"=&f"(ftmp[20]), 401cabdff1aSopenharmony_ci [Gd]"=&f"(ftmp[21]), [input]"+&r"(input) 402cabdff1aSopenharmony_ci : [stride]"r"(stride), [temp_value]"r"(temp_value) 403cabdff1aSopenharmony_ci : "memory" 404cabdff1aSopenharmony_ci ); 405cabdff1aSopenharmony_ci} 406cabdff1aSopenharmony_ci 407cabdff1aSopenharmony_cistatic void idct_column_false_mmi(uint8_t *dst, int stride, int16_t *input) 408cabdff1aSopenharmony_ci{ 409cabdff1aSopenharmony_ci int16_t temp_value[8]; 410cabdff1aSopenharmony_ci double ftmp[23]; 411cabdff1aSopenharmony_ci uint64_t tmp[2]; 412cabdff1aSopenharmony_ci for (int i = 0; i < 8; ++i) 413cabdff1aSopenharmony_ci temp_value[i] = (46341 * input[i << 3] + (8 << 16)) >> 20; 414cabdff1aSopenharmony_ci __asm__ volatile ( 415cabdff1aSopenharmony_ci "pxor %[ftmp10], %[ftmp10], %[ftmp10] \n\t" 416cabdff1aSopenharmony_ci "li %[tmp0], 0x02 \n\t" 417cabdff1aSopenharmony_ci "1: \n\t" 418cabdff1aSopenharmony_ci "ldc1 %[ftmp0], 0x00(%[input]) \n\t" 419cabdff1aSopenharmony_ci "ldc1 %[ftmp4], 0x08(%[input]) \n\t" 420cabdff1aSopenharmony_ci "ldc1 %[ftmp1], 0x10(%[input]) \n\t" 421cabdff1aSopenharmony_ci "ldc1 %[ftmp5], 0x18(%[input]) \n\t" 422cabdff1aSopenharmony_ci "ldc1 %[ftmp2], 0x20(%[input]) \n\t" 423cabdff1aSopenharmony_ci "ldc1 %[ftmp6], 0x28(%[input]) \n\t" 424cabdff1aSopenharmony_ci "ldc1 %[ftmp3], 0x30(%[input]) \n\t" 425cabdff1aSopenharmony_ci "ldc1 %[ftmp7], 0x38(%[input]) \n\t" 426cabdff1aSopenharmony_ci TRANSPOSE_4H(%[ftmp0], %[ftmp1], %[ftmp2], %[ftmp3], 427cabdff1aSopenharmony_ci %[A], %[B], %[C], %[D]) 428cabdff1aSopenharmony_ci TRANSPOSE_4H(%[ftmp4], %[ftmp5], %[ftmp6], %[ftmp7], 429cabdff1aSopenharmony_ci %[A], %[B], %[C], %[D]) 430cabdff1aSopenharmony_ci LOAD_CONST(%[ftmp8], 64277) 431cabdff1aSopenharmony_ci LOAD_CONST(%[ftmp9], 12785) 432cabdff1aSopenharmony_ci LOAD_CONST(%[Gd], 1) 433cabdff1aSopenharmony_ci "pmulhh %[A], %[ftmp9], %[ftmp7] \n\t" 434cabdff1aSopenharmony_ci "pcmpgth %[C], %[ftmp10], %[ftmp1] \n\t" 435cabdff1aSopenharmony_ci "por %[mask], %[C], %[Gd] \n\t" 436cabdff1aSopenharmony_ci "pmullh %[B], %[ftmp1], %[mask] \n\t" 437cabdff1aSopenharmony_ci "pmulhuh %[B], %[ftmp8], %[B] \n\t" 438cabdff1aSopenharmony_ci "pmullh %[B], %[B], %[mask] \n\t" 439cabdff1aSopenharmony_ci "paddh %[A], %[A], %[B] \n\t" 440cabdff1aSopenharmony_ci "paddh %[A], %[A], %[C] \n\t" 441cabdff1aSopenharmony_ci "pcmpgth %[D], %[ftmp10], %[ftmp7] \n\t" 442cabdff1aSopenharmony_ci "por %[mask], %[D], %[Gd] \n\t" 443cabdff1aSopenharmony_ci "pmullh %[Ad], %[ftmp7], %[mask] \n\t" 444cabdff1aSopenharmony_ci "pmulhuh %[B], %[ftmp8], %[Ad] \n\t" 445cabdff1aSopenharmony_ci "pmullh %[B], %[B], %[mask] \n\t" 446cabdff1aSopenharmony_ci "pmulhh %[C], %[ftmp9], %[ftmp1] \n\t" 447cabdff1aSopenharmony_ci "psubh %[B], %[C], %[B] \n\t" 448cabdff1aSopenharmony_ci "psubh %[B], %[B], %[D] \n\t" 449cabdff1aSopenharmony_ci 450cabdff1aSopenharmony_ci LOAD_CONST(%[ftmp8], 54491) 451cabdff1aSopenharmony_ci LOAD_CONST(%[ftmp9], 36410) 452cabdff1aSopenharmony_ci "pcmpgth %[Ad], %[ftmp10], %[ftmp5] \n\t" 453cabdff1aSopenharmony_ci "por %[mask], %[Ad], %[Gd] \n\t" 454cabdff1aSopenharmony_ci "pmullh %[Cd], %[ftmp5], %[mask] \n\t" 455cabdff1aSopenharmony_ci "pmulhuh %[C], %[ftmp9], %[Cd] \n\t" 456cabdff1aSopenharmony_ci "pmullh %[C], %[C], %[mask] \n\t" 457cabdff1aSopenharmony_ci "pcmpgth %[Bd], %[ftmp10], %[ftmp3] \n\t" 458cabdff1aSopenharmony_ci "por %[mask], %[Bd], %[Gd] \n\t" 459cabdff1aSopenharmony_ci "pmullh %[D], %[ftmp3], %[mask] \n\t" 460cabdff1aSopenharmony_ci "pmulhuh %[D], %[ftmp8], %[D] \n\t" 461cabdff1aSopenharmony_ci "pmullh %[D], %[D], %[mask] \n\t" 462cabdff1aSopenharmony_ci "paddh %[C], %[C], %[D] \n\t" 463cabdff1aSopenharmony_ci "paddh %[C], %[C], %[Ad] \n\t" 464cabdff1aSopenharmony_ci "paddh %[C], %[C], %[Bd] \n\t" 465cabdff1aSopenharmony_ci "pcmpgth %[Bd], %[ftmp10], %[ftmp3] \n\t" 466cabdff1aSopenharmony_ci "por %[mask], %[Bd], %[Gd] \n\t" 467cabdff1aSopenharmony_ci "pmullh %[Cd], %[ftmp3], %[mask] \n\t" 468cabdff1aSopenharmony_ci "pmulhuh %[D], %[ftmp9], %[Cd] \n\t" 469cabdff1aSopenharmony_ci "pmullh %[D], %[D], %[mask] \n\t" 470cabdff1aSopenharmony_ci "pcmpgth %[Ed], %[ftmp10], %[ftmp5] \n\t" 471cabdff1aSopenharmony_ci "por %[mask], %[Ed], %[Gd] \n\t" 472cabdff1aSopenharmony_ci "pmullh %[Ad], %[ftmp5], %[mask] \n\t" 473cabdff1aSopenharmony_ci "pmulhuh %[Ad], %[ftmp8], %[Ad] \n\t" 474cabdff1aSopenharmony_ci "pmullh %[Ad], %[Ad], %[mask] \n\t" 475cabdff1aSopenharmony_ci "psubh %[D], %[Ad], %[D] \n\t" 476cabdff1aSopenharmony_ci "paddh %[D], %[D], %[Ed] \n\t" 477cabdff1aSopenharmony_ci "psubh %[D], %[D], %[Bd] \n\t" 478cabdff1aSopenharmony_ci 479cabdff1aSopenharmony_ci LOAD_CONST(%[ftmp8], 46341) 480cabdff1aSopenharmony_ci "psubh %[Ad], %[A], %[C] \n\t" 481cabdff1aSopenharmony_ci "pcmpgth %[Bd], %[ftmp10], %[Ad] \n\t" 482cabdff1aSopenharmony_ci "por %[mask], %[Bd], %[Gd] \n\t" 483cabdff1aSopenharmony_ci "pmullh %[Ad], %[Ad], %[mask] \n\t" 484cabdff1aSopenharmony_ci "pmulhuh %[Ad], %[ftmp8], %[Ad] \n\t" 485cabdff1aSopenharmony_ci "pmullh %[Ad], %[Ad], %[mask] \n\t" 486cabdff1aSopenharmony_ci "paddh %[Ad], %[Ad], %[Bd] \n\t" 487cabdff1aSopenharmony_ci "psubh %[Bd], %[B], %[D] \n\t" 488cabdff1aSopenharmony_ci "pcmpgth %[Cd], %[ftmp10], %[Bd] \n\t" 489cabdff1aSopenharmony_ci "por %[mask], %[Cd], %[Gd] \n\t" 490cabdff1aSopenharmony_ci "pmullh %[Bd], %[Bd], %[mask] \n\t" 491cabdff1aSopenharmony_ci "pmulhuh %[Bd], %[ftmp8], %[Bd] \n\t" 492cabdff1aSopenharmony_ci "pmullh %[Bd], %[Bd], %[mask] \n\t" 493cabdff1aSopenharmony_ci "paddh %[Bd], %[Bd], %[Cd] \n\t" 494cabdff1aSopenharmony_ci "paddh %[Cd], %[A], %[C] \n\t" 495cabdff1aSopenharmony_ci "paddh %[Dd], %[B], %[D] \n\t" 496cabdff1aSopenharmony_ci 497cabdff1aSopenharmony_ci LOAD_CONST(%[Ed], 8) 498cabdff1aSopenharmony_ci "paddh %[A], %[ftmp0], %[ftmp4] \n\t" 499cabdff1aSopenharmony_ci "pcmpgth %[B], %[ftmp10], %[A] \n\t" 500cabdff1aSopenharmony_ci "por %[mask], %[B], %[Gd] \n\t" 501cabdff1aSopenharmony_ci "pmullh %[A], %[A], %[mask] \n\t" 502cabdff1aSopenharmony_ci "pmulhuh %[A], %[ftmp8], %[A] \n\t" 503cabdff1aSopenharmony_ci "pmullh %[A], %[A], %[mask] \n\t" 504cabdff1aSopenharmony_ci "paddh %[A], %[A], %[B] \n\t" 505cabdff1aSopenharmony_ci "paddh %[A], %[A], %[Ed] \n\t" 506cabdff1aSopenharmony_ci "psubh %[B], %[ftmp0], %[ftmp4] \n\t" 507cabdff1aSopenharmony_ci "pcmpgth %[C], %[ftmp10], %[B] \n\t" 508cabdff1aSopenharmony_ci "por %[mask], %[C], %[Gd] \n\t" 509cabdff1aSopenharmony_ci "pmullh %[B], %[B], %[mask] \n\t" 510cabdff1aSopenharmony_ci "pmulhuh %[B], %[ftmp8], %[B] \n\t" 511cabdff1aSopenharmony_ci "pmullh %[B], %[B], %[mask] \n\t" 512cabdff1aSopenharmony_ci "paddh %[B], %[B], %[C] \n\t" 513cabdff1aSopenharmony_ci "paddh %[B], %[B], %[Ed] \n\t" 514cabdff1aSopenharmony_ci 515cabdff1aSopenharmony_ci LOAD_CONST(%[ftmp8], 60547) 516cabdff1aSopenharmony_ci LOAD_CONST(%[ftmp9], 25080) 517cabdff1aSopenharmony_ci "pmulhh %[C], %[ftmp9], %[ftmp6] \n\t" 518cabdff1aSopenharmony_ci "pcmpgth %[D], %[ftmp10], %[ftmp2] \n\t" 519cabdff1aSopenharmony_ci "por %[mask], %[D], %[Gd] \n\t" 520cabdff1aSopenharmony_ci "pmullh %[Ed], %[ftmp2], %[mask] \n\t" 521cabdff1aSopenharmony_ci "pmulhuh %[Ed], %[ftmp8], %[Ed] \n\t" 522cabdff1aSopenharmony_ci "pmullh %[Ed], %[Ed], %[mask] \n\t" 523cabdff1aSopenharmony_ci "paddh %[C], %[C], %[Ed] \n\t" 524cabdff1aSopenharmony_ci "paddh %[C], %[C], %[D] \n\t" 525cabdff1aSopenharmony_ci "pcmpgth %[Ed], %[ftmp10], %[ftmp6] \n\t" 526cabdff1aSopenharmony_ci "por %[mask], %[Ed], %[Gd] \n\t" 527cabdff1aSopenharmony_ci "pmullh %[D], %[ftmp6], %[mask] \n\t" 528cabdff1aSopenharmony_ci "pmulhuh %[D], %[ftmp8], %[D] \n\t" 529cabdff1aSopenharmony_ci "pmullh %[D], %[D], %[mask] \n\t" 530cabdff1aSopenharmony_ci "pmulhh %[Gd], %[ftmp9], %[ftmp2] \n\t" 531cabdff1aSopenharmony_ci "psubh %[D], %[Gd], %[D] \n\t" 532cabdff1aSopenharmony_ci "psubh %[D], %[D], %[Ed] \n\t" 533cabdff1aSopenharmony_ci "psubh %[Ed], %[A], %[C] \n\t" 534cabdff1aSopenharmony_ci "paddh %[Gd], %[A], %[C] \n\t" 535cabdff1aSopenharmony_ci "paddh %[A], %[B], %[Ad] \n\t" 536cabdff1aSopenharmony_ci "psubh %[C], %[B], %[Ad] \n\t" 537cabdff1aSopenharmony_ci "psubh %[B], %[Bd], %[D] \n\t" 538cabdff1aSopenharmony_ci "paddh %[D], %[Bd], %[D] \n\t" 539cabdff1aSopenharmony_ci "por %[mask], %[ftmp1], %[ftmp2] \n\t" 540cabdff1aSopenharmony_ci "por %[mask], %[mask], %[ftmp3] \n\t" 541cabdff1aSopenharmony_ci "por %[mask], %[mask], %[ftmp4] \n\t" 542cabdff1aSopenharmony_ci "por %[mask], %[mask], %[ftmp5] \n\t" 543cabdff1aSopenharmony_ci "por %[mask], %[mask], %[ftmp6] \n\t" 544cabdff1aSopenharmony_ci "por %[mask], %[mask], %[ftmp7] \n\t" 545cabdff1aSopenharmony_ci "pcmpeqh %[mask], %[mask], %[ftmp10] \n\t" 546cabdff1aSopenharmony_ci "li %[tmp1], 0x04 \n\t" 547cabdff1aSopenharmony_ci "dmtc1 %[tmp1], %[ftmp8] \n\t" 548cabdff1aSopenharmony_ci "paddh %[ftmp0], %[Gd], %[Cd] \n\t" 549cabdff1aSopenharmony_ci "psrah %[ftmp0], %[ftmp0], %[ftmp8] \n\t" 550cabdff1aSopenharmony_ci "paddh %[ftmp1], %[A], %[D] \n\t" 551cabdff1aSopenharmony_ci "psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t" 552cabdff1aSopenharmony_ci "psubh %[ftmp2], %[A], %[D] \n\t" 553cabdff1aSopenharmony_ci "psrah %[ftmp2], %[ftmp2], %[ftmp8] \n\t" 554cabdff1aSopenharmony_ci "paddh %[ftmp3], %[Ed], %[Dd] \n\t" 555cabdff1aSopenharmony_ci "psrah %[ftmp3], %[ftmp3], %[ftmp8] \n\t" 556cabdff1aSopenharmony_ci "psubh %[ftmp4], %[Ed], %[Dd] \n\t" 557cabdff1aSopenharmony_ci "psrah %[ftmp4], %[ftmp4], %[ftmp8] \n\t" 558cabdff1aSopenharmony_ci "paddh %[ftmp5], %[C], %[B] \n\t" 559cabdff1aSopenharmony_ci "psrah %[ftmp5], %[ftmp5], %[ftmp8] \n\t" 560cabdff1aSopenharmony_ci "psubh %[ftmp6], %[C], %[B] \n\t" 561cabdff1aSopenharmony_ci "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t" 562cabdff1aSopenharmony_ci "psubh %[ftmp7], %[Gd], %[Cd] \n\t" 563cabdff1aSopenharmony_ci "psrah %[ftmp7], %[ftmp7], %[ftmp8] \n\t" 564cabdff1aSopenharmony_ci 565cabdff1aSopenharmony_ci /* Load from dst */ 566cabdff1aSopenharmony_ci "lwc1 %[A], 0x00(%[dst]) \n\t" 567cabdff1aSopenharmony_ci PTR_ADDU "%[tmp1], %[dst], %[stride] \n\t" 568cabdff1aSopenharmony_ci "lwc1 %[B], 0x00(%[tmp1]) \n\t" 569cabdff1aSopenharmony_ci PTR_ADDU "%[tmp1], %[tmp1], %[stride] \n\t" 570cabdff1aSopenharmony_ci "lwc1 %[C], 0x00(%[tmp1]) \n\t" 571cabdff1aSopenharmony_ci PTR_ADDU "%[tmp1], %[tmp1], %[stride] \n\t" 572cabdff1aSopenharmony_ci "lwc1 %[D], 0x00(%[tmp1]) \n\t" 573cabdff1aSopenharmony_ci PTR_ADDU "%[tmp1], %[tmp1], %[stride] \n\t" 574cabdff1aSopenharmony_ci "lwc1 %[Ad], 0x00(%[tmp1]) \n\t" 575cabdff1aSopenharmony_ci PTR_ADDU "%[tmp1], %[tmp1], %[stride] \n\t" 576cabdff1aSopenharmony_ci "lwc1 %[Bd], 0x00(%[tmp1]) \n\t" 577cabdff1aSopenharmony_ci PTR_ADDU "%[tmp1], %[tmp1], %[stride] \n\t" 578cabdff1aSopenharmony_ci "lwc1 %[Cd], 0x00(%[tmp1]) \n\t" 579cabdff1aSopenharmony_ci PTR_ADDU "%[tmp1], %[tmp1], %[stride] \n\t" 580cabdff1aSopenharmony_ci "lwc1 %[Dd], 0x00(%[tmp1]) \n\t" 581cabdff1aSopenharmony_ci "punpcklbh %[A], %[A], %[ftmp10] \n\t" 582cabdff1aSopenharmony_ci "punpcklbh %[B], %[B], %[ftmp10] \n\t" 583cabdff1aSopenharmony_ci "punpcklbh %[C], %[C], %[ftmp10] \n\t" 584cabdff1aSopenharmony_ci "punpcklbh %[D], %[D], %[ftmp10] \n\t" 585cabdff1aSopenharmony_ci "punpcklbh %[Ad], %[Ad], %[ftmp10] \n\t" 586cabdff1aSopenharmony_ci "punpcklbh %[Bd], %[Bd], %[ftmp10] \n\t" 587cabdff1aSopenharmony_ci "punpcklbh %[Cd], %[Cd], %[ftmp10] \n\t" 588cabdff1aSopenharmony_ci "punpcklbh %[Dd], %[Dd], %[ftmp10] \n\t" 589cabdff1aSopenharmony_ci "ldc1 %[Ed], 0x00(%[temp_value]) \n\t" 590cabdff1aSopenharmony_ci "pand %[Ed], %[Ed], %[mask] \n\t" 591cabdff1aSopenharmony_ci "pnor %[mask], %[mask], %[mask] \n\t" 592cabdff1aSopenharmony_ci "pand %[ftmp0], %[ftmp0], %[mask] \n\t" 593cabdff1aSopenharmony_ci "pand %[ftmp1], %[ftmp1], %[mask] \n\t" 594cabdff1aSopenharmony_ci "pand %[ftmp2], %[ftmp2], %[mask] \n\t" 595cabdff1aSopenharmony_ci "pand %[ftmp3], %[ftmp3], %[mask] \n\t" 596cabdff1aSopenharmony_ci "pand %[ftmp4], %[ftmp4], %[mask] \n\t" 597cabdff1aSopenharmony_ci "pand %[ftmp5], %[ftmp5], %[mask] \n\t" 598cabdff1aSopenharmony_ci "pand %[ftmp6], %[ftmp6], %[mask] \n\t" 599cabdff1aSopenharmony_ci "pand %[ftmp7], %[ftmp7], %[mask] \n\t" 600cabdff1aSopenharmony_ci "paddh %[ftmp0], %[ftmp0], %[A] \n\t" 601cabdff1aSopenharmony_ci "paddh %[ftmp1], %[ftmp1], %[B] \n\t" 602cabdff1aSopenharmony_ci "paddh %[ftmp2], %[ftmp2], %[C] \n\t" 603cabdff1aSopenharmony_ci "paddh %[ftmp3], %[ftmp3], %[D] \n\t" 604cabdff1aSopenharmony_ci "paddh %[ftmp4], %[ftmp4], %[Ad] \n\t" 605cabdff1aSopenharmony_ci "paddh %[ftmp5], %[ftmp5], %[Bd] \n\t" 606cabdff1aSopenharmony_ci "paddh %[ftmp6], %[ftmp6], %[Cd] \n\t" 607cabdff1aSopenharmony_ci "paddh %[ftmp7], %[ftmp7], %[Dd] \n\t" 608cabdff1aSopenharmony_ci "paddh %[ftmp0], %[ftmp0], %[Ed] \n\t" 609cabdff1aSopenharmony_ci "paddh %[ftmp1], %[ftmp1], %[Ed] \n\t" 610cabdff1aSopenharmony_ci "paddh %[ftmp2], %[ftmp2], %[Ed] \n\t" 611cabdff1aSopenharmony_ci "paddh %[ftmp3], %[ftmp3], %[Ed] \n\t" 612cabdff1aSopenharmony_ci "paddh %[ftmp4], %[ftmp4], %[Ed] \n\t" 613cabdff1aSopenharmony_ci "paddh %[ftmp5], %[ftmp5], %[Ed] \n\t" 614cabdff1aSopenharmony_ci "paddh %[ftmp6], %[ftmp6], %[Ed] \n\t" 615cabdff1aSopenharmony_ci "paddh %[ftmp7], %[ftmp7], %[Ed] \n\t" 616cabdff1aSopenharmony_ci "pmaxsh %[ftmp0], %[ftmp0], %[ftmp10] \n\t" 617cabdff1aSopenharmony_ci "packushb %[ftmp0], %[ftmp0], %[ftmp10] \n\t" 618cabdff1aSopenharmony_ci "pmaxsh %[ftmp1], %[ftmp1], %[ftmp10] \n\t" 619cabdff1aSopenharmony_ci "packushb %[ftmp1], %[ftmp1], %[ftmp10] \n\t" 620cabdff1aSopenharmony_ci "pmaxsh %[ftmp2], %[ftmp2], %[ftmp10] \n\t" 621cabdff1aSopenharmony_ci "packushb %[ftmp2], %[ftmp2], %[ftmp10] \n\t" 622cabdff1aSopenharmony_ci "pmaxsh %[ftmp3], %[ftmp3], %[ftmp10] \n\t" 623cabdff1aSopenharmony_ci "packushb %[ftmp3], %[ftmp3], %[ftmp10] \n\t" 624cabdff1aSopenharmony_ci "pmaxsh %[ftmp4], %[ftmp4], %[ftmp10] \n\t" 625cabdff1aSopenharmony_ci "packushb %[ftmp4], %[ftmp4], %[ftmp10] \n\t" 626cabdff1aSopenharmony_ci "pmaxsh %[ftmp5], %[ftmp5], %[ftmp10] \n\t" 627cabdff1aSopenharmony_ci "packushb %[ftmp5], %[ftmp5], %[ftmp10] \n\t" 628cabdff1aSopenharmony_ci "pmaxsh %[ftmp6], %[ftmp6], %[ftmp10] \n\t" 629cabdff1aSopenharmony_ci "packushb %[ftmp6], %[ftmp6], %[ftmp10] \n\t" 630cabdff1aSopenharmony_ci "pmaxsh %[ftmp7], %[ftmp7], %[ftmp10] \n\t" 631cabdff1aSopenharmony_ci "packushb %[ftmp7], %[ftmp7], %[ftmp10] \n\t" 632cabdff1aSopenharmony_ci "swc1 %[ftmp0], 0x00(%[dst]) \n\t" 633cabdff1aSopenharmony_ci PTR_ADDU "%[tmp1], %[dst], %[stride] \n\t" 634cabdff1aSopenharmony_ci "swc1 %[ftmp1], 0x00(%[tmp1]) \n\t" 635cabdff1aSopenharmony_ci PTR_ADDU "%[tmp1], %[tmp1], %[stride] \n\t" 636cabdff1aSopenharmony_ci "swc1 %[ftmp2], 0x00(%[tmp1]) \n\t" 637cabdff1aSopenharmony_ci PTR_ADDU "%[tmp1], %[tmp1], %[stride] \n\t" 638cabdff1aSopenharmony_ci "swc1 %[ftmp3], 0x00(%[tmp1]) \n\t" 639cabdff1aSopenharmony_ci PTR_ADDU "%[tmp1], %[tmp1], %[stride] \n\t" 640cabdff1aSopenharmony_ci "swc1 %[ftmp4], 0x00(%[tmp1]) \n\t" 641cabdff1aSopenharmony_ci PTR_ADDU "%[tmp1], %[tmp1], %[stride] \n\t" 642cabdff1aSopenharmony_ci "swc1 %[ftmp5], 0x00(%[tmp1]) \n\t" 643cabdff1aSopenharmony_ci PTR_ADDU "%[tmp1], %[tmp1], %[stride] \n\t" 644cabdff1aSopenharmony_ci "swc1 %[ftmp6], 0x00(%[tmp1]) \n\t" 645cabdff1aSopenharmony_ci PTR_ADDU "%[tmp1], %[tmp1], %[stride] \n\t" 646cabdff1aSopenharmony_ci "swc1 %[ftmp7], 0x00(%[tmp1]) \n\t" 647cabdff1aSopenharmony_ci PTR_ADDIU "%[dst], %[dst], 0x04 \n\t" 648cabdff1aSopenharmony_ci PTR_ADDIU "%[input], %[input], 0x40 \n\t" 649cabdff1aSopenharmony_ci PTR_ADDIU "%[temp_value], %[temp_value], 0x08 \n\t" 650cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp0], %[tmp0], -0x01 \n\t" 651cabdff1aSopenharmony_ci "bnez %[tmp0], 1b \n\t" 652cabdff1aSopenharmony_ci : [dst]"+&r"(dst), [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]), 653cabdff1aSopenharmony_ci [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]), 654cabdff1aSopenharmony_ci [ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 655cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp8]"=&f"(ftmp[8]), 656cabdff1aSopenharmony_ci [ftmp9]"=&f"(ftmp[9]), [ftmp10]"=&f"(ftmp[10]), [mask]"=&f"(ftmp[11]), 657cabdff1aSopenharmony_ci [A]"=&f"(ftmp[12]), [B]"=&f"(ftmp[13]), [C]"=&f"(ftmp[14]), 658cabdff1aSopenharmony_ci [D]"=&f"(ftmp[15]), [Ad]"=&f"(ftmp[16]), [Bd]"=&f"(ftmp[17]), 659cabdff1aSopenharmony_ci [Cd]"=&f"(ftmp[18]), [Dd]"=&f"(ftmp[19]), [Ed]"=&f"(ftmp[20]), 660cabdff1aSopenharmony_ci [Gd]"=&f"(ftmp[21]), [input]"+&r"(input) 661cabdff1aSopenharmony_ci : [stride]"r"(stride), [temp_value]"r"(temp_value) 662cabdff1aSopenharmony_ci : "memory" 663cabdff1aSopenharmony_ci ); 664cabdff1aSopenharmony_ci} 665cabdff1aSopenharmony_cistatic void idct_mmi(uint8_t *dst, int stride, int16_t *input, int type) 666cabdff1aSopenharmony_ci{ 667cabdff1aSopenharmony_ci idct_row_mmi(input); 668cabdff1aSopenharmony_ci if (type == 1) 669cabdff1aSopenharmony_ci idct_column_true_mmi(dst, stride, input); 670cabdff1aSopenharmony_ci else 671cabdff1aSopenharmony_ci idct_column_false_mmi(dst, stride, input); 672cabdff1aSopenharmony_ci} 673cabdff1aSopenharmony_ci 674cabdff1aSopenharmony_civoid ff_vp3_idct_put_mmi(uint8_t *dest, ptrdiff_t line_size, int16_t *block) 675cabdff1aSopenharmony_ci{ 676cabdff1aSopenharmony_ci idct_mmi(dest, line_size, block, 1); 677cabdff1aSopenharmony_ci memset(block, 0, sizeof(*block) << 6); 678cabdff1aSopenharmony_ci} 679cabdff1aSopenharmony_ci 680cabdff1aSopenharmony_civoid ff_vp3_idct_add_mmi(uint8_t *dest, ptrdiff_t line_size, int16_t *block) 681cabdff1aSopenharmony_ci{ 682cabdff1aSopenharmony_ci idct_mmi(dest, line_size, block, 2); 683cabdff1aSopenharmony_ci memset(block, 0, sizeof(*block) << 6); 684cabdff1aSopenharmony_ci} 685cabdff1aSopenharmony_civoid ff_vp3_idct_dc_add_mmi(uint8_t *dest, ptrdiff_t line_size, int16_t *block) 686cabdff1aSopenharmony_ci{ 687cabdff1aSopenharmony_ci int dc = (block[0] + 15) >> 5; 688cabdff1aSopenharmony_ci 689cabdff1aSopenharmony_ci double ftmp[7]; 690cabdff1aSopenharmony_ci uint64_t tmp; 691cabdff1aSopenharmony_ci __asm__ volatile ( 692cabdff1aSopenharmony_ci "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 693cabdff1aSopenharmony_ci "mtc1 %[dc], %[ftmp5] \n\t" 694cabdff1aSopenharmony_ci "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" 695cabdff1aSopenharmony_ci "li %[tmp0], 0x08 \n\t" 696cabdff1aSopenharmony_ci "1: \n\t" 697cabdff1aSopenharmony_ci "ldc1 %[ftmp1], 0x00(%[dest]) \n\t" 698cabdff1aSopenharmony_ci "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" 699cabdff1aSopenharmony_ci "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" 700cabdff1aSopenharmony_ci "paddh %[ftmp4], %[ftmp2], %[ftmp5] \n\t" 701cabdff1aSopenharmony_ci "paddh %[ftmp6], %[ftmp3], %[ftmp5] \n\t" 702cabdff1aSopenharmony_ci "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 703cabdff1aSopenharmony_ci "packushb %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 704cabdff1aSopenharmony_ci "swc1 %[ftmp4], 0x00(%[dest]) \n\t" 705cabdff1aSopenharmony_ci "swc1 %[ftmp6], 0x04(%[dest]) \n\t" 706cabdff1aSopenharmony_ci PTR_ADDU "%[dest], %[dest], %[line_size] \n\t" 707cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp0], %[tmp0], -0x01 \n\t" 708cabdff1aSopenharmony_ci "bnez %[tmp0], 1b \n\t" 709cabdff1aSopenharmony_ci : [dest]"+&r"(dest), [block]"+&r"(block), [tmp0]"=&r"(tmp), 710cabdff1aSopenharmony_ci [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]), 711cabdff1aSopenharmony_ci [ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 712cabdff1aSopenharmony_ci [ftmp6]"=&f"(ftmp[6]) 713cabdff1aSopenharmony_ci : [line_size]"r"(line_size), [dc]"r"(dc) 714cabdff1aSopenharmony_ci : "memory" 715cabdff1aSopenharmony_ci ); 716cabdff1aSopenharmony_ci block[0] = 0; 717cabdff1aSopenharmony_ci} 718cabdff1aSopenharmony_ci 719cabdff1aSopenharmony_civoid ff_put_no_rnd_pixels_l2_mmi(uint8_t *dst, const uint8_t *src1, 720cabdff1aSopenharmony_ci const uint8_t *src2, ptrdiff_t stride, int h) 721cabdff1aSopenharmony_ci{ 722cabdff1aSopenharmony_ci if (h == 8) { 723cabdff1aSopenharmony_ci double ftmp[6]; 724cabdff1aSopenharmony_ci uint64_t tmp[2]; 725cabdff1aSopenharmony_ci DECLARE_VAR_ALL64; 726cabdff1aSopenharmony_ci 727cabdff1aSopenharmony_ci __asm__ volatile ( 728cabdff1aSopenharmony_ci "li %[tmp0], 0x08 \n\t" 729cabdff1aSopenharmony_ci "li %[tmp1], 0xfefefefe \n\t" 730cabdff1aSopenharmony_ci "dmtc1 %[tmp1], %[ftmp4] \n\t" 731cabdff1aSopenharmony_ci "punpcklwd %[ftmp4], %[ftmp4], %[ftmp4] \n\t" 732cabdff1aSopenharmony_ci "li %[tmp1], 0x01 \n\t" 733cabdff1aSopenharmony_ci "dmtc1 %[tmp1], %[ftmp5] \n\t" 734cabdff1aSopenharmony_ci "1: \n\t" 735cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp1], %[src1], 0x0) 736cabdff1aSopenharmony_ci MMI_ULDC1(%[ftmp2], %[src2], 0x0) 737cabdff1aSopenharmony_ci "pxor %[ftmp3], %[ftmp1], %[ftmp2] \n\t" 738cabdff1aSopenharmony_ci "pand %[ftmp3], %[ftmp3], %[ftmp4] \n\t" 739cabdff1aSopenharmony_ci "psrlw %[ftmp3], %[ftmp3], %[ftmp5] \n\t" 740cabdff1aSopenharmony_ci "pand %[ftmp6], %[ftmp1], %[ftmp2] \n\t" 741cabdff1aSopenharmony_ci "paddw %[ftmp3], %[ftmp3], %[ftmp6] \n\t" 742cabdff1aSopenharmony_ci "sdc1 %[ftmp3], 0x00(%[dst]) \n\t" 743cabdff1aSopenharmony_ci PTR_ADDU "%[src1], %[src1], %[stride] \n\t" 744cabdff1aSopenharmony_ci PTR_ADDU "%[src2], %[src2], %[stride] \n\t" 745cabdff1aSopenharmony_ci PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 746cabdff1aSopenharmony_ci PTR_ADDIU "%[tmp0], %[tmp0], -0x01 \n\t" 747cabdff1aSopenharmony_ci "bnez %[tmp0], 1b \n\t" 748cabdff1aSopenharmony_ci : RESTRICT_ASM_ALL64 749cabdff1aSopenharmony_ci [dst]"+&r"(dst), [src1]"+&r"(src1), [src2]"+&r"(src2), 750cabdff1aSopenharmony_ci [ftmp1]"=&f"(ftmp[0]), [ftmp2]"=&f"(ftmp[1]), [ftmp3]"=&f"(ftmp[2]), 751cabdff1aSopenharmony_ci [ftmp4]"=&f"(ftmp[3]), [ftmp5]"=&f"(ftmp[4]), [ftmp6]"=&f"(ftmp[5]), 752cabdff1aSopenharmony_ci [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]) 753cabdff1aSopenharmony_ci : [stride]"r"(stride) 754cabdff1aSopenharmony_ci : "memory" 755cabdff1aSopenharmony_ci ); 756cabdff1aSopenharmony_ci } else { 757cabdff1aSopenharmony_ci int i; 758cabdff1aSopenharmony_ci 759cabdff1aSopenharmony_ci for (i = 0; i < h; i++) { 760cabdff1aSopenharmony_ci uint32_t a, b; 761cabdff1aSopenharmony_ci 762cabdff1aSopenharmony_ci a = AV_RN32(&src1[i * stride]); 763cabdff1aSopenharmony_ci b = AV_RN32(&src2[i * stride]); 764cabdff1aSopenharmony_ci AV_WN32A(&dst[i * stride], no_rnd_avg32(a, b)); 765cabdff1aSopenharmony_ci a = AV_RN32(&src1[i * stride + 4]); 766cabdff1aSopenharmony_ci b = AV_RN32(&src2[i * stride + 4]); 767cabdff1aSopenharmony_ci AV_WN32A(&dst[i * stride + 4], no_rnd_avg32(a, b)); 768cabdff1aSopenharmony_ci } 769cabdff1aSopenharmony_ci } 770cabdff1aSopenharmony_ci} 771