1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * Copyright (c) 2012 3cabdff1aSopenharmony_ci * MIPS Technologies, Inc., California. 4cabdff1aSopenharmony_ci * 5cabdff1aSopenharmony_ci * Redistribution and use in source and binary forms, with or without 6cabdff1aSopenharmony_ci * modification, are permitted provided that the following conditions 7cabdff1aSopenharmony_ci * are met: 8cabdff1aSopenharmony_ci * 1. Redistributions of source code must retain the above copyright 9cabdff1aSopenharmony_ci * notice, this list of conditions and the following disclaimer. 10cabdff1aSopenharmony_ci * 2. Redistributions in binary form must reproduce the above copyright 11cabdff1aSopenharmony_ci * notice, this list of conditions and the following disclaimer in the 12cabdff1aSopenharmony_ci * documentation and/or other materials provided with the distribution. 13cabdff1aSopenharmony_ci * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its 14cabdff1aSopenharmony_ci * contributors may be used to endorse or promote products derived from 15cabdff1aSopenharmony_ci * this software without specific prior written permission. 16cabdff1aSopenharmony_ci * 17cabdff1aSopenharmony_ci * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND 18cabdff1aSopenharmony_ci * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19cabdff1aSopenharmony_ci * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20cabdff1aSopenharmony_ci * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE 21cabdff1aSopenharmony_ci * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22cabdff1aSopenharmony_ci * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23cabdff1aSopenharmony_ci * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24cabdff1aSopenharmony_ci * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25cabdff1aSopenharmony_ci * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26cabdff1aSopenharmony_ci * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27cabdff1aSopenharmony_ci * SUCH DAMAGE. 28cabdff1aSopenharmony_ci * 29cabdff1aSopenharmony_ci * Authors: Darko Laus (darko@mips.com) 30cabdff1aSopenharmony_ci * Djordje Pesut (djordje@mips.com) 31cabdff1aSopenharmony_ci * Mirjana Vulin (mvulin@mips.com) 32cabdff1aSopenharmony_ci * 33cabdff1aSopenharmony_ci * AAC Spectral Band Replication decoding functions optimized for MIPS 34cabdff1aSopenharmony_ci * 35cabdff1aSopenharmony_ci * This file is part of FFmpeg. 36cabdff1aSopenharmony_ci * 37cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 38cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 39cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 40cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 41cabdff1aSopenharmony_ci * 42cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 43cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 44cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 45cabdff1aSopenharmony_ci * Lesser General Public License for more details. 46cabdff1aSopenharmony_ci * 47cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 48cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 49cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 50cabdff1aSopenharmony_ci */ 51cabdff1aSopenharmony_ci 52cabdff1aSopenharmony_ci/** 53cabdff1aSopenharmony_ci * @file 54cabdff1aSopenharmony_ci * Reference: libavcodec/sbrdsp.c 55cabdff1aSopenharmony_ci */ 56cabdff1aSopenharmony_ci 57cabdff1aSopenharmony_ci#include "config.h" 58cabdff1aSopenharmony_ci#include "libavcodec/sbrdsp.h" 59cabdff1aSopenharmony_ci#include "libavutil/mips/asmdefs.h" 60cabdff1aSopenharmony_ci 61cabdff1aSopenharmony_ci#if HAVE_INLINE_ASM 62cabdff1aSopenharmony_ci#if HAVE_MIPSFPU 63cabdff1aSopenharmony_cistatic void sbr_qmf_pre_shuffle_mips(float *z) 64cabdff1aSopenharmony_ci{ 65cabdff1aSopenharmony_ci int Temp1, Temp2, Temp3, Temp4, Temp5, Temp6; 66cabdff1aSopenharmony_ci float *z1 = &z[66]; 67cabdff1aSopenharmony_ci float *z2 = &z[59]; 68cabdff1aSopenharmony_ci float *z3 = &z[2]; 69cabdff1aSopenharmony_ci float *z4 = z1 + 60; 70cabdff1aSopenharmony_ci 71cabdff1aSopenharmony_ci /* loop unrolled 5 times */ 72cabdff1aSopenharmony_ci __asm__ volatile ( 73cabdff1aSopenharmony_ci "lui %[Temp6], 0x8000 \n\t" 74cabdff1aSopenharmony_ci "1: \n\t" 75cabdff1aSopenharmony_ci "lw %[Temp1], 0(%[z2]) \n\t" 76cabdff1aSopenharmony_ci "lw %[Temp2], 4(%[z2]) \n\t" 77cabdff1aSopenharmony_ci "lw %[Temp3], 8(%[z2]) \n\t" 78cabdff1aSopenharmony_ci "lw %[Temp4], 12(%[z2]) \n\t" 79cabdff1aSopenharmony_ci "lw %[Temp5], 16(%[z2]) \n\t" 80cabdff1aSopenharmony_ci "xor %[Temp1], %[Temp1], %[Temp6] \n\t" 81cabdff1aSopenharmony_ci "xor %[Temp2], %[Temp2], %[Temp6] \n\t" 82cabdff1aSopenharmony_ci "xor %[Temp3], %[Temp3], %[Temp6] \n\t" 83cabdff1aSopenharmony_ci "xor %[Temp4], %[Temp4], %[Temp6] \n\t" 84cabdff1aSopenharmony_ci "xor %[Temp5], %[Temp5], %[Temp6] \n\t" 85cabdff1aSopenharmony_ci PTR_ADDIU "%[z2], %[z2], -20 \n\t" 86cabdff1aSopenharmony_ci "sw %[Temp1], 32(%[z1]) \n\t" 87cabdff1aSopenharmony_ci "sw %[Temp2], 24(%[z1]) \n\t" 88cabdff1aSopenharmony_ci "sw %[Temp3], 16(%[z1]) \n\t" 89cabdff1aSopenharmony_ci "sw %[Temp4], 8(%[z1]) \n\t" 90cabdff1aSopenharmony_ci "sw %[Temp5], 0(%[z1]) \n\t" 91cabdff1aSopenharmony_ci "lw %[Temp1], 0(%[z3]) \n\t" 92cabdff1aSopenharmony_ci "lw %[Temp2], 4(%[z3]) \n\t" 93cabdff1aSopenharmony_ci "lw %[Temp3], 8(%[z3]) \n\t" 94cabdff1aSopenharmony_ci "lw %[Temp4], 12(%[z3]) \n\t" 95cabdff1aSopenharmony_ci "lw %[Temp5], 16(%[z3]) \n\t" 96cabdff1aSopenharmony_ci "sw %[Temp1], 4(%[z1]) \n\t" 97cabdff1aSopenharmony_ci "sw %[Temp2], 12(%[z1]) \n\t" 98cabdff1aSopenharmony_ci "sw %[Temp3], 20(%[z1]) \n\t" 99cabdff1aSopenharmony_ci "sw %[Temp4], 28(%[z1]) \n\t" 100cabdff1aSopenharmony_ci "sw %[Temp5], 36(%[z1]) \n\t" 101cabdff1aSopenharmony_ci PTR_ADDIU "%[z3], %[z3], 20 \n\t" 102cabdff1aSopenharmony_ci PTR_ADDIU "%[z1], %[z1], 40 \n\t" 103cabdff1aSopenharmony_ci "bne %[z1], %[z4], 1b \n\t" 104cabdff1aSopenharmony_ci "lw %[Temp1], 132(%[z]) \n\t" 105cabdff1aSopenharmony_ci "lw %[Temp2], 128(%[z]) \n\t" 106cabdff1aSopenharmony_ci "lw %[Temp3], 0(%[z]) \n\t" 107cabdff1aSopenharmony_ci "lw %[Temp4], 4(%[z]) \n\t" 108cabdff1aSopenharmony_ci "xor %[Temp1], %[Temp1], %[Temp6] \n\t" 109cabdff1aSopenharmony_ci "sw %[Temp1], 504(%[z]) \n\t" 110cabdff1aSopenharmony_ci "sw %[Temp2], 508(%[z]) \n\t" 111cabdff1aSopenharmony_ci "sw %[Temp3], 256(%[z]) \n\t" 112cabdff1aSopenharmony_ci "sw %[Temp4], 260(%[z]) \n\t" 113cabdff1aSopenharmony_ci 114cabdff1aSopenharmony_ci : [Temp1]"=&r"(Temp1), [Temp2]"=&r"(Temp2), 115cabdff1aSopenharmony_ci [Temp3]"=&r"(Temp3), [Temp4]"=&r"(Temp4), 116cabdff1aSopenharmony_ci [Temp5]"=&r"(Temp5), [Temp6]"=&r"(Temp6), 117cabdff1aSopenharmony_ci [z1]"+r"(z1), [z2]"+r"(z2), [z3]"+r"(z3) 118cabdff1aSopenharmony_ci : [z4]"r"(z4), [z]"r"(z) 119cabdff1aSopenharmony_ci : "memory" 120cabdff1aSopenharmony_ci ); 121cabdff1aSopenharmony_ci} 122cabdff1aSopenharmony_ci 123cabdff1aSopenharmony_cistatic void sbr_qmf_post_shuffle_mips(float W[32][2], const float *z) 124cabdff1aSopenharmony_ci{ 125cabdff1aSopenharmony_ci int Temp1, Temp2, Temp3, Temp4, Temp5; 126cabdff1aSopenharmony_ci float *W_ptr = (float *)W; 127cabdff1aSopenharmony_ci float *z1 = (float *)z; 128cabdff1aSopenharmony_ci float *z2 = (float *)&z[60]; 129cabdff1aSopenharmony_ci float *z_end = z1 + 32; 130cabdff1aSopenharmony_ci 131cabdff1aSopenharmony_ci /* loop unrolled 4 times */ 132cabdff1aSopenharmony_ci __asm__ volatile ( 133cabdff1aSopenharmony_ci "lui %[Temp5], 0x8000 \n\t" 134cabdff1aSopenharmony_ci "1: \n\t" 135cabdff1aSopenharmony_ci "lw %[Temp1], 0(%[z2]) \n\t" 136cabdff1aSopenharmony_ci "lw %[Temp2], 4(%[z2]) \n\t" 137cabdff1aSopenharmony_ci "lw %[Temp3], 8(%[z2]) \n\t" 138cabdff1aSopenharmony_ci "lw %[Temp4], 12(%[z2]) \n\t" 139cabdff1aSopenharmony_ci "xor %[Temp1], %[Temp1], %[Temp5] \n\t" 140cabdff1aSopenharmony_ci "xor %[Temp2], %[Temp2], %[Temp5] \n\t" 141cabdff1aSopenharmony_ci "xor %[Temp3], %[Temp3], %[Temp5] \n\t" 142cabdff1aSopenharmony_ci "xor %[Temp4], %[Temp4], %[Temp5] \n\t" 143cabdff1aSopenharmony_ci PTR_ADDIU "%[z2], %[z2], -16 \n\t" 144cabdff1aSopenharmony_ci "sw %[Temp1], 24(%[W_ptr]) \n\t" 145cabdff1aSopenharmony_ci "sw %[Temp2], 16(%[W_ptr]) \n\t" 146cabdff1aSopenharmony_ci "sw %[Temp3], 8(%[W_ptr]) \n\t" 147cabdff1aSopenharmony_ci "sw %[Temp4], 0(%[W_ptr]) \n\t" 148cabdff1aSopenharmony_ci "lw %[Temp1], 0(%[z1]) \n\t" 149cabdff1aSopenharmony_ci "lw %[Temp2], 4(%[z1]) \n\t" 150cabdff1aSopenharmony_ci "lw %[Temp3], 8(%[z1]) \n\t" 151cabdff1aSopenharmony_ci "lw %[Temp4], 12(%[z1]) \n\t" 152cabdff1aSopenharmony_ci "sw %[Temp1], 4(%[W_ptr]) \n\t" 153cabdff1aSopenharmony_ci "sw %[Temp2], 12(%[W_ptr]) \n\t" 154cabdff1aSopenharmony_ci "sw %[Temp3], 20(%[W_ptr]) \n\t" 155cabdff1aSopenharmony_ci "sw %[Temp4], 28(%[W_ptr]) \n\t" 156cabdff1aSopenharmony_ci PTR_ADDIU "%[z1], %[z1], 16 \n\t" 157cabdff1aSopenharmony_ci PTR_ADDIU "%[W_ptr],%[W_ptr], 32 \n\t" 158cabdff1aSopenharmony_ci "bne %[z1], %[z_end], 1b \n\t" 159cabdff1aSopenharmony_ci 160cabdff1aSopenharmony_ci : [Temp1]"=&r"(Temp1), [Temp2]"=&r"(Temp2), 161cabdff1aSopenharmony_ci [Temp3]"=&r"(Temp3), [Temp4]"=&r"(Temp4), 162cabdff1aSopenharmony_ci [Temp5]"=&r"(Temp5), [z1]"+r"(z1), 163cabdff1aSopenharmony_ci [z2]"+r"(z2), [W_ptr]"+r"(W_ptr) 164cabdff1aSopenharmony_ci : [z_end]"r"(z_end) 165cabdff1aSopenharmony_ci : "memory" 166cabdff1aSopenharmony_ci ); 167cabdff1aSopenharmony_ci} 168cabdff1aSopenharmony_ci 169cabdff1aSopenharmony_ci#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6 170cabdff1aSopenharmony_cistatic void sbr_sum64x5_mips(float *z) 171cabdff1aSopenharmony_ci{ 172cabdff1aSopenharmony_ci int k; 173cabdff1aSopenharmony_ci float *z1; 174cabdff1aSopenharmony_ci float f1, f2, f3, f4, f5, f6, f7, f8; 175cabdff1aSopenharmony_ci for (k = 0; k < 64; k += 8) { 176cabdff1aSopenharmony_ci 177cabdff1aSopenharmony_ci z1 = &z[k]; 178cabdff1aSopenharmony_ci 179cabdff1aSopenharmony_ci /* loop unrolled 8 times */ 180cabdff1aSopenharmony_ci __asm__ volatile ( 181cabdff1aSopenharmony_ci "lwc1 $f0, 0(%[z1]) \n\t" 182cabdff1aSopenharmony_ci "lwc1 $f1, 256(%[z1]) \n\t" 183cabdff1aSopenharmony_ci "lwc1 $f2, 4(%[z1]) \n\t" 184cabdff1aSopenharmony_ci "lwc1 $f3, 260(%[z1]) \n\t" 185cabdff1aSopenharmony_ci "lwc1 $f4, 8(%[z1]) \n\t" 186cabdff1aSopenharmony_ci "add.s %[f1], $f0, $f1 \n\t" 187cabdff1aSopenharmony_ci "lwc1 $f5, 264(%[z1]) \n\t" 188cabdff1aSopenharmony_ci "add.s %[f2], $f2, $f3 \n\t" 189cabdff1aSopenharmony_ci "lwc1 $f6, 12(%[z1]) \n\t" 190cabdff1aSopenharmony_ci "lwc1 $f7, 268(%[z1]) \n\t" 191cabdff1aSopenharmony_ci "add.s %[f3], $f4, $f5 \n\t" 192cabdff1aSopenharmony_ci "lwc1 $f8, 16(%[z1]) \n\t" 193cabdff1aSopenharmony_ci "lwc1 $f9, 272(%[z1]) \n\t" 194cabdff1aSopenharmony_ci "add.s %[f4], $f6, $f7 \n\t" 195cabdff1aSopenharmony_ci "lwc1 $f10, 20(%[z1]) \n\t" 196cabdff1aSopenharmony_ci "lwc1 $f11, 276(%[z1]) \n\t" 197cabdff1aSopenharmony_ci "add.s %[f5], $f8, $f9 \n\t" 198cabdff1aSopenharmony_ci "lwc1 $f12, 24(%[z1]) \n\t" 199cabdff1aSopenharmony_ci "lwc1 $f13, 280(%[z1]) \n\t" 200cabdff1aSopenharmony_ci "add.s %[f6], $f10, $f11 \n\t" 201cabdff1aSopenharmony_ci "lwc1 $f14, 28(%[z1]) \n\t" 202cabdff1aSopenharmony_ci "lwc1 $f15, 284(%[z1]) \n\t" 203cabdff1aSopenharmony_ci "add.s %[f7], $f12, $f13 \n\t" 204cabdff1aSopenharmony_ci "lwc1 $f0, 512(%[z1]) \n\t" 205cabdff1aSopenharmony_ci "lwc1 $f1, 516(%[z1]) \n\t" 206cabdff1aSopenharmony_ci "add.s %[f8], $f14, $f15 \n\t" 207cabdff1aSopenharmony_ci "lwc1 $f2, 520(%[z1]) \n\t" 208cabdff1aSopenharmony_ci "add.s %[f1], %[f1], $f0 \n\t" 209cabdff1aSopenharmony_ci "add.s %[f2], %[f2], $f1 \n\t" 210cabdff1aSopenharmony_ci "lwc1 $f3, 524(%[z1]) \n\t" 211cabdff1aSopenharmony_ci "add.s %[f3], %[f3], $f2 \n\t" 212cabdff1aSopenharmony_ci "lwc1 $f4, 528(%[z1]) \n\t" 213cabdff1aSopenharmony_ci "lwc1 $f5, 532(%[z1]) \n\t" 214cabdff1aSopenharmony_ci "add.s %[f4], %[f4], $f3 \n\t" 215cabdff1aSopenharmony_ci "lwc1 $f6, 536(%[z1]) \n\t" 216cabdff1aSopenharmony_ci "add.s %[f5], %[f5], $f4 \n\t" 217cabdff1aSopenharmony_ci "add.s %[f6], %[f6], $f5 \n\t" 218cabdff1aSopenharmony_ci "lwc1 $f7, 540(%[z1]) \n\t" 219cabdff1aSopenharmony_ci "add.s %[f7], %[f7], $f6 \n\t" 220cabdff1aSopenharmony_ci "lwc1 $f0, 768(%[z1]) \n\t" 221cabdff1aSopenharmony_ci "lwc1 $f1, 772(%[z1]) \n\t" 222cabdff1aSopenharmony_ci "add.s %[f8], %[f8], $f7 \n\t" 223cabdff1aSopenharmony_ci "lwc1 $f2, 776(%[z1]) \n\t" 224cabdff1aSopenharmony_ci "add.s %[f1], %[f1], $f0 \n\t" 225cabdff1aSopenharmony_ci "add.s %[f2], %[f2], $f1 \n\t" 226cabdff1aSopenharmony_ci "lwc1 $f3, 780(%[z1]) \n\t" 227cabdff1aSopenharmony_ci "add.s %[f3], %[f3], $f2 \n\t" 228cabdff1aSopenharmony_ci "lwc1 $f4, 784(%[z1]) \n\t" 229cabdff1aSopenharmony_ci "lwc1 $f5, 788(%[z1]) \n\t" 230cabdff1aSopenharmony_ci "add.s %[f4], %[f4], $f3 \n\t" 231cabdff1aSopenharmony_ci "lwc1 $f6, 792(%[z1]) \n\t" 232cabdff1aSopenharmony_ci "add.s %[f5], %[f5], $f4 \n\t" 233cabdff1aSopenharmony_ci "add.s %[f6], %[f6], $f5 \n\t" 234cabdff1aSopenharmony_ci "lwc1 $f7, 796(%[z1]) \n\t" 235cabdff1aSopenharmony_ci "add.s %[f7], %[f7], $f6 \n\t" 236cabdff1aSopenharmony_ci "lwc1 $f0, 1024(%[z1]) \n\t" 237cabdff1aSopenharmony_ci "lwc1 $f1, 1028(%[z1]) \n\t" 238cabdff1aSopenharmony_ci "add.s %[f8], %[f8], $f7 \n\t" 239cabdff1aSopenharmony_ci "lwc1 $f2, 1032(%[z1]) \n\t" 240cabdff1aSopenharmony_ci "add.s %[f1], %[f1], $f0 \n\t" 241cabdff1aSopenharmony_ci "add.s %[f2], %[f2], $f1 \n\t" 242cabdff1aSopenharmony_ci "lwc1 $f3, 1036(%[z1]) \n\t" 243cabdff1aSopenharmony_ci "add.s %[f3], %[f3], $f2 \n\t" 244cabdff1aSopenharmony_ci "lwc1 $f4, 1040(%[z1]) \n\t" 245cabdff1aSopenharmony_ci "lwc1 $f5, 1044(%[z1]) \n\t" 246cabdff1aSopenharmony_ci "add.s %[f4], %[f4], $f3 \n\t" 247cabdff1aSopenharmony_ci "lwc1 $f6, 1048(%[z1]) \n\t" 248cabdff1aSopenharmony_ci "add.s %[f5], %[f5], $f4 \n\t" 249cabdff1aSopenharmony_ci "add.s %[f6], %[f6], $f5 \n\t" 250cabdff1aSopenharmony_ci "lwc1 $f7, 1052(%[z1]) \n\t" 251cabdff1aSopenharmony_ci "add.s %[f7], %[f7], $f6 \n\t" 252cabdff1aSopenharmony_ci "swc1 %[f1], 0(%[z1]) \n\t" 253cabdff1aSopenharmony_ci "swc1 %[f2], 4(%[z1]) \n\t" 254cabdff1aSopenharmony_ci "add.s %[f8], %[f8], $f7 \n\t" 255cabdff1aSopenharmony_ci "swc1 %[f3], 8(%[z1]) \n\t" 256cabdff1aSopenharmony_ci "swc1 %[f4], 12(%[z1]) \n\t" 257cabdff1aSopenharmony_ci "swc1 %[f5], 16(%[z1]) \n\t" 258cabdff1aSopenharmony_ci "swc1 %[f6], 20(%[z1]) \n\t" 259cabdff1aSopenharmony_ci "swc1 %[f7], 24(%[z1]) \n\t" 260cabdff1aSopenharmony_ci "swc1 %[f8], 28(%[z1]) \n\t" 261cabdff1aSopenharmony_ci 262cabdff1aSopenharmony_ci : [f1]"=&f"(f1), [f2]"=&f"(f2), [f3]"=&f"(f3), 263cabdff1aSopenharmony_ci [f4]"=&f"(f4), [f5]"=&f"(f5), [f6]"=&f"(f6), 264cabdff1aSopenharmony_ci [f7]"=&f"(f7), [f8]"=&f"(f8) 265cabdff1aSopenharmony_ci : [z1]"r"(z1) 266cabdff1aSopenharmony_ci : "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", 267cabdff1aSopenharmony_ci "$f6", "$f7", "$f8", "$f9", "$f10", "$f11", 268cabdff1aSopenharmony_ci "$f12", "$f13", "$f14", "$f15", 269cabdff1aSopenharmony_ci "memory" 270cabdff1aSopenharmony_ci ); 271cabdff1aSopenharmony_ci } 272cabdff1aSopenharmony_ci} 273cabdff1aSopenharmony_ci 274cabdff1aSopenharmony_cistatic float sbr_sum_square_mips(float (*x)[2], int n) 275cabdff1aSopenharmony_ci{ 276cabdff1aSopenharmony_ci float sum0 = 0.0f, sum1 = 0.0f; 277cabdff1aSopenharmony_ci float *p_x; 278cabdff1aSopenharmony_ci float temp0, temp1, temp2, temp3; 279cabdff1aSopenharmony_ci float *loop_end; 280cabdff1aSopenharmony_ci p_x = &x[0][0]; 281cabdff1aSopenharmony_ci loop_end = p_x + (n >> 1)*4 - 4; 282cabdff1aSopenharmony_ci 283cabdff1aSopenharmony_ci __asm__ volatile ( 284cabdff1aSopenharmony_ci ".set push \n\t" 285cabdff1aSopenharmony_ci ".set noreorder \n\t" 286cabdff1aSopenharmony_ci "lwc1 %[temp0], 0(%[p_x]) \n\t" 287cabdff1aSopenharmony_ci "lwc1 %[temp1], 4(%[p_x]) \n\t" 288cabdff1aSopenharmony_ci "lwc1 %[temp2], 8(%[p_x]) \n\t" 289cabdff1aSopenharmony_ci "lwc1 %[temp3], 12(%[p_x]) \n\t" 290cabdff1aSopenharmony_ci "1: \n\t" 291cabdff1aSopenharmony_ci PTR_ADDIU "%[p_x], %[p_x], 16 \n\t" 292cabdff1aSopenharmony_ci "madd.s %[sum0], %[sum0], %[temp0], %[temp0] \n\t" 293cabdff1aSopenharmony_ci "lwc1 %[temp0], 0(%[p_x]) \n\t" 294cabdff1aSopenharmony_ci "madd.s %[sum1], %[sum1], %[temp1], %[temp1] \n\t" 295cabdff1aSopenharmony_ci "lwc1 %[temp1], 4(%[p_x]) \n\t" 296cabdff1aSopenharmony_ci "madd.s %[sum0], %[sum0], %[temp2], %[temp2] \n\t" 297cabdff1aSopenharmony_ci "lwc1 %[temp2], 8(%[p_x]) \n\t" 298cabdff1aSopenharmony_ci "madd.s %[sum1], %[sum1], %[temp3], %[temp3] \n\t" 299cabdff1aSopenharmony_ci "bne %[p_x], %[loop_end], 1b \n\t" 300cabdff1aSopenharmony_ci " lwc1 %[temp3], 12(%[p_x]) \n\t" 301cabdff1aSopenharmony_ci "madd.s %[sum0], %[sum0], %[temp0], %[temp0] \n\t" 302cabdff1aSopenharmony_ci "madd.s %[sum1], %[sum1], %[temp1], %[temp1] \n\t" 303cabdff1aSopenharmony_ci "madd.s %[sum0], %[sum0], %[temp2], %[temp2] \n\t" 304cabdff1aSopenharmony_ci "madd.s %[sum1], %[sum1], %[temp3], %[temp3] \n\t" 305cabdff1aSopenharmony_ci ".set pop \n\t" 306cabdff1aSopenharmony_ci 307cabdff1aSopenharmony_ci : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2), 308cabdff1aSopenharmony_ci [temp3]"=&f"(temp3), [sum0]"+f"(sum0), [sum1]"+f"(sum1), 309cabdff1aSopenharmony_ci [p_x]"+r"(p_x) 310cabdff1aSopenharmony_ci : [loop_end]"r"(loop_end) 311cabdff1aSopenharmony_ci : "memory" 312cabdff1aSopenharmony_ci ); 313cabdff1aSopenharmony_ci return sum0 + sum1; 314cabdff1aSopenharmony_ci} 315cabdff1aSopenharmony_ci 316cabdff1aSopenharmony_cistatic void sbr_qmf_deint_bfly_mips(float *v, const float *src0, const float *src1) 317cabdff1aSopenharmony_ci{ 318cabdff1aSopenharmony_ci int i; 319cabdff1aSopenharmony_ci float temp0, temp1, temp2, temp3, temp4, temp5; 320cabdff1aSopenharmony_ci float temp6, temp7, temp8, temp9, temp10, temp11; 321cabdff1aSopenharmony_ci float *v0 = v; 322cabdff1aSopenharmony_ci float *v1 = &v[127]; 323cabdff1aSopenharmony_ci float *psrc0 = (float*)src0; 324cabdff1aSopenharmony_ci float *psrc1 = (float*)&src1[63]; 325cabdff1aSopenharmony_ci 326cabdff1aSopenharmony_ci for (i = 0; i < 4; i++) { 327cabdff1aSopenharmony_ci 328cabdff1aSopenharmony_ci /* loop unrolled 16 times */ 329cabdff1aSopenharmony_ci __asm__ volatile( 330cabdff1aSopenharmony_ci "lwc1 %[temp0], 0(%[src0]) \n\t" 331cabdff1aSopenharmony_ci "lwc1 %[temp1], 0(%[src1]) \n\t" 332cabdff1aSopenharmony_ci "lwc1 %[temp3], 4(%[src0]) \n\t" 333cabdff1aSopenharmony_ci "lwc1 %[temp4], -4(%[src1]) \n\t" 334cabdff1aSopenharmony_ci "lwc1 %[temp6], 8(%[src0]) \n\t" 335cabdff1aSopenharmony_ci "lwc1 %[temp7], -8(%[src1]) \n\t" 336cabdff1aSopenharmony_ci "lwc1 %[temp9], 12(%[src0]) \n\t" 337cabdff1aSopenharmony_ci "lwc1 %[temp10], -12(%[src1]) \n\t" 338cabdff1aSopenharmony_ci "add.s %[temp2], %[temp0], %[temp1] \n\t" 339cabdff1aSopenharmony_ci "add.s %[temp5], %[temp3], %[temp4] \n\t" 340cabdff1aSopenharmony_ci "add.s %[temp8], %[temp6], %[temp7] \n\t" 341cabdff1aSopenharmony_ci "add.s %[temp11], %[temp9], %[temp10] \n\t" 342cabdff1aSopenharmony_ci "sub.s %[temp0], %[temp0], %[temp1] \n\t" 343cabdff1aSopenharmony_ci "sub.s %[temp3], %[temp3], %[temp4] \n\t" 344cabdff1aSopenharmony_ci "sub.s %[temp6], %[temp6], %[temp7] \n\t" 345cabdff1aSopenharmony_ci "sub.s %[temp9], %[temp9], %[temp10] \n\t" 346cabdff1aSopenharmony_ci "swc1 %[temp2], 0(%[v1]) \n\t" 347cabdff1aSopenharmony_ci "swc1 %[temp0], 0(%[v0]) \n\t" 348cabdff1aSopenharmony_ci "swc1 %[temp5], -4(%[v1]) \n\t" 349cabdff1aSopenharmony_ci "swc1 %[temp3], 4(%[v0]) \n\t" 350cabdff1aSopenharmony_ci "swc1 %[temp8], -8(%[v1]) \n\t" 351cabdff1aSopenharmony_ci "swc1 %[temp6], 8(%[v0]) \n\t" 352cabdff1aSopenharmony_ci "swc1 %[temp11], -12(%[v1]) \n\t" 353cabdff1aSopenharmony_ci "swc1 %[temp9], 12(%[v0]) \n\t" 354cabdff1aSopenharmony_ci "lwc1 %[temp0], 16(%[src0]) \n\t" 355cabdff1aSopenharmony_ci "lwc1 %[temp1], -16(%[src1]) \n\t" 356cabdff1aSopenharmony_ci "lwc1 %[temp3], 20(%[src0]) \n\t" 357cabdff1aSopenharmony_ci "lwc1 %[temp4], -20(%[src1]) \n\t" 358cabdff1aSopenharmony_ci "lwc1 %[temp6], 24(%[src0]) \n\t" 359cabdff1aSopenharmony_ci "lwc1 %[temp7], -24(%[src1]) \n\t" 360cabdff1aSopenharmony_ci "lwc1 %[temp9], 28(%[src0]) \n\t" 361cabdff1aSopenharmony_ci "lwc1 %[temp10], -28(%[src1]) \n\t" 362cabdff1aSopenharmony_ci "add.s %[temp2], %[temp0], %[temp1] \n\t" 363cabdff1aSopenharmony_ci "add.s %[temp5], %[temp3], %[temp4] \n\t" 364cabdff1aSopenharmony_ci "add.s %[temp8], %[temp6], %[temp7] \n\t" 365cabdff1aSopenharmony_ci "add.s %[temp11], %[temp9], %[temp10] \n\t" 366cabdff1aSopenharmony_ci "sub.s %[temp0], %[temp0], %[temp1] \n\t" 367cabdff1aSopenharmony_ci "sub.s %[temp3], %[temp3], %[temp4] \n\t" 368cabdff1aSopenharmony_ci "sub.s %[temp6], %[temp6], %[temp7] \n\t" 369cabdff1aSopenharmony_ci "sub.s %[temp9], %[temp9], %[temp10] \n\t" 370cabdff1aSopenharmony_ci "swc1 %[temp2], -16(%[v1]) \n\t" 371cabdff1aSopenharmony_ci "swc1 %[temp0], 16(%[v0]) \n\t" 372cabdff1aSopenharmony_ci "swc1 %[temp5], -20(%[v1]) \n\t" 373cabdff1aSopenharmony_ci "swc1 %[temp3], 20(%[v0]) \n\t" 374cabdff1aSopenharmony_ci "swc1 %[temp8], -24(%[v1]) \n\t" 375cabdff1aSopenharmony_ci "swc1 %[temp6], 24(%[v0]) \n\t" 376cabdff1aSopenharmony_ci "swc1 %[temp11], -28(%[v1]) \n\t" 377cabdff1aSopenharmony_ci "swc1 %[temp9], 28(%[v0]) \n\t" 378cabdff1aSopenharmony_ci "lwc1 %[temp0], 32(%[src0]) \n\t" 379cabdff1aSopenharmony_ci "lwc1 %[temp1], -32(%[src1]) \n\t" 380cabdff1aSopenharmony_ci "lwc1 %[temp3], 36(%[src0]) \n\t" 381cabdff1aSopenharmony_ci "lwc1 %[temp4], -36(%[src1]) \n\t" 382cabdff1aSopenharmony_ci "lwc1 %[temp6], 40(%[src0]) \n\t" 383cabdff1aSopenharmony_ci "lwc1 %[temp7], -40(%[src1]) \n\t" 384cabdff1aSopenharmony_ci "lwc1 %[temp9], 44(%[src0]) \n\t" 385cabdff1aSopenharmony_ci "lwc1 %[temp10], -44(%[src1]) \n\t" 386cabdff1aSopenharmony_ci "add.s %[temp2], %[temp0], %[temp1] \n\t" 387cabdff1aSopenharmony_ci "add.s %[temp5], %[temp3], %[temp4] \n\t" 388cabdff1aSopenharmony_ci "add.s %[temp8], %[temp6], %[temp7] \n\t" 389cabdff1aSopenharmony_ci "add.s %[temp11], %[temp9], %[temp10] \n\t" 390cabdff1aSopenharmony_ci "sub.s %[temp0], %[temp0], %[temp1] \n\t" 391cabdff1aSopenharmony_ci "sub.s %[temp3], %[temp3], %[temp4] \n\t" 392cabdff1aSopenharmony_ci "sub.s %[temp6], %[temp6], %[temp7] \n\t" 393cabdff1aSopenharmony_ci "sub.s %[temp9], %[temp9], %[temp10] \n\t" 394cabdff1aSopenharmony_ci "swc1 %[temp2], -32(%[v1]) \n\t" 395cabdff1aSopenharmony_ci "swc1 %[temp0], 32(%[v0]) \n\t" 396cabdff1aSopenharmony_ci "swc1 %[temp5], -36(%[v1]) \n\t" 397cabdff1aSopenharmony_ci "swc1 %[temp3], 36(%[v0]) \n\t" 398cabdff1aSopenharmony_ci "swc1 %[temp8], -40(%[v1]) \n\t" 399cabdff1aSopenharmony_ci "swc1 %[temp6], 40(%[v0]) \n\t" 400cabdff1aSopenharmony_ci "swc1 %[temp11], -44(%[v1]) \n\t" 401cabdff1aSopenharmony_ci "swc1 %[temp9], 44(%[v0]) \n\t" 402cabdff1aSopenharmony_ci "lwc1 %[temp0], 48(%[src0]) \n\t" 403cabdff1aSopenharmony_ci "lwc1 %[temp1], -48(%[src1]) \n\t" 404cabdff1aSopenharmony_ci "lwc1 %[temp3], 52(%[src0]) \n\t" 405cabdff1aSopenharmony_ci "lwc1 %[temp4], -52(%[src1]) \n\t" 406cabdff1aSopenharmony_ci "lwc1 %[temp6], 56(%[src0]) \n\t" 407cabdff1aSopenharmony_ci "lwc1 %[temp7], -56(%[src1]) \n\t" 408cabdff1aSopenharmony_ci "lwc1 %[temp9], 60(%[src0]) \n\t" 409cabdff1aSopenharmony_ci "lwc1 %[temp10], -60(%[src1]) \n\t" 410cabdff1aSopenharmony_ci "add.s %[temp2], %[temp0], %[temp1] \n\t" 411cabdff1aSopenharmony_ci "add.s %[temp5], %[temp3], %[temp4] \n\t" 412cabdff1aSopenharmony_ci "add.s %[temp8], %[temp6], %[temp7] \n\t" 413cabdff1aSopenharmony_ci "add.s %[temp11], %[temp9], %[temp10] \n\t" 414cabdff1aSopenharmony_ci "sub.s %[temp0], %[temp0], %[temp1] \n\t" 415cabdff1aSopenharmony_ci "sub.s %[temp3], %[temp3], %[temp4] \n\t" 416cabdff1aSopenharmony_ci "sub.s %[temp6], %[temp6], %[temp7] \n\t" 417cabdff1aSopenharmony_ci "sub.s %[temp9], %[temp9], %[temp10] \n\t" 418cabdff1aSopenharmony_ci "swc1 %[temp2], -48(%[v1]) \n\t" 419cabdff1aSopenharmony_ci "swc1 %[temp0], 48(%[v0]) \n\t" 420cabdff1aSopenharmony_ci "swc1 %[temp5], -52(%[v1]) \n\t" 421cabdff1aSopenharmony_ci "swc1 %[temp3], 52(%[v0]) \n\t" 422cabdff1aSopenharmony_ci "swc1 %[temp8], -56(%[v1]) \n\t" 423cabdff1aSopenharmony_ci "swc1 %[temp6], 56(%[v0]) \n\t" 424cabdff1aSopenharmony_ci "swc1 %[temp11], -60(%[v1]) \n\t" 425cabdff1aSopenharmony_ci "swc1 %[temp9], 60(%[v0]) \n\t" 426cabdff1aSopenharmony_ci PTR_ADDIU " %[src0], %[src0], 64 \n\t" 427cabdff1aSopenharmony_ci PTR_ADDIU " %[src1], %[src1], -64 \n\t" 428cabdff1aSopenharmony_ci PTR_ADDIU " %[v0], %[v0], 64 \n\t" 429cabdff1aSopenharmony_ci PTR_ADDIU " %[v1], %[v1], -64 \n\t" 430cabdff1aSopenharmony_ci 431cabdff1aSopenharmony_ci : [v0]"+r"(v0), [v1]"+r"(v1), [src0]"+r"(psrc0), [src1]"+r"(psrc1), 432cabdff1aSopenharmony_ci [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2), 433cabdff1aSopenharmony_ci [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5), 434cabdff1aSopenharmony_ci [temp6]"=&f"(temp6), [temp7]"=&f"(temp7), [temp8]"=&f"(temp8), 435cabdff1aSopenharmony_ci [temp9]"=&f"(temp9), [temp10]"=&f"(temp10), [temp11]"=&f"(temp11) 436cabdff1aSopenharmony_ci : 437cabdff1aSopenharmony_ci :"memory" 438cabdff1aSopenharmony_ci ); 439cabdff1aSopenharmony_ci } 440cabdff1aSopenharmony_ci} 441cabdff1aSopenharmony_ci 442cabdff1aSopenharmony_cistatic void sbr_autocorrelate_mips(const float x[40][2], float phi[3][2][2]) 443cabdff1aSopenharmony_ci{ 444cabdff1aSopenharmony_ci int i; 445cabdff1aSopenharmony_ci float real_sum_0 = 0.0f; 446cabdff1aSopenharmony_ci float real_sum_1 = 0.0f; 447cabdff1aSopenharmony_ci float real_sum_2 = 0.0f; 448cabdff1aSopenharmony_ci float imag_sum_1 = 0.0f; 449cabdff1aSopenharmony_ci float imag_sum_2 = 0.0f; 450cabdff1aSopenharmony_ci float *p_x, *p_phi; 451cabdff1aSopenharmony_ci float temp0, temp1, temp2, temp3, temp4, temp5, temp6; 452cabdff1aSopenharmony_ci float temp7, temp_r, temp_r1, temp_r2, temp_r3, temp_r4; 453cabdff1aSopenharmony_ci p_x = (float*)&x[0][0]; 454cabdff1aSopenharmony_ci p_phi = &phi[0][0][0]; 455cabdff1aSopenharmony_ci 456cabdff1aSopenharmony_ci __asm__ volatile ( 457cabdff1aSopenharmony_ci "lwc1 %[temp0], 8(%[p_x]) \n\t" 458cabdff1aSopenharmony_ci "lwc1 %[temp1], 12(%[p_x]) \n\t" 459cabdff1aSopenharmony_ci "lwc1 %[temp2], 16(%[p_x]) \n\t" 460cabdff1aSopenharmony_ci "lwc1 %[temp3], 20(%[p_x]) \n\t" 461cabdff1aSopenharmony_ci "lwc1 %[temp4], 24(%[p_x]) \n\t" 462cabdff1aSopenharmony_ci "lwc1 %[temp5], 28(%[p_x]) \n\t" 463cabdff1aSopenharmony_ci "mul.s %[temp_r], %[temp1], %[temp1] \n\t" 464cabdff1aSopenharmony_ci "mul.s %[temp_r1], %[temp1], %[temp3] \n\t" 465cabdff1aSopenharmony_ci "mul.s %[temp_r2], %[temp1], %[temp2] \n\t" 466cabdff1aSopenharmony_ci "mul.s %[temp_r3], %[temp1], %[temp5] \n\t" 467cabdff1aSopenharmony_ci "mul.s %[temp_r4], %[temp1], %[temp4] \n\t" 468cabdff1aSopenharmony_ci "madd.s %[temp_r], %[temp_r], %[temp0], %[temp0] \n\t" 469cabdff1aSopenharmony_ci "madd.s %[temp_r1], %[temp_r1], %[temp0], %[temp2] \n\t" 470cabdff1aSopenharmony_ci "msub.s %[temp_r2], %[temp_r2], %[temp0], %[temp3] \n\t" 471cabdff1aSopenharmony_ci "madd.s %[temp_r3], %[temp_r3], %[temp0], %[temp4] \n\t" 472cabdff1aSopenharmony_ci "msub.s %[temp_r4], %[temp_r4], %[temp0], %[temp5] \n\t" 473cabdff1aSopenharmony_ci "add.s %[real_sum_0], %[real_sum_0], %[temp_r] \n\t" 474cabdff1aSopenharmony_ci "add.s %[real_sum_1], %[real_sum_1], %[temp_r1] \n\t" 475cabdff1aSopenharmony_ci "add.s %[imag_sum_1], %[imag_sum_1], %[temp_r2] \n\t" 476cabdff1aSopenharmony_ci "add.s %[real_sum_2], %[real_sum_2], %[temp_r3] \n\t" 477cabdff1aSopenharmony_ci "add.s %[imag_sum_2], %[imag_sum_2], %[temp_r4] \n\t" 478cabdff1aSopenharmony_ci PTR_ADDIU "%[p_x], %[p_x], 8 \n\t" 479cabdff1aSopenharmony_ci 480cabdff1aSopenharmony_ci : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2), 481cabdff1aSopenharmony_ci [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5), 482cabdff1aSopenharmony_ci [real_sum_0]"+f"(real_sum_0), [real_sum_1]"+f"(real_sum_1), 483cabdff1aSopenharmony_ci [imag_sum_1]"+f"(imag_sum_1), [real_sum_2]"+f"(real_sum_2), 484cabdff1aSopenharmony_ci [temp_r]"=&f"(temp_r), [temp_r1]"=&f"(temp_r1), [temp_r2]"=&f"(temp_r2), 485cabdff1aSopenharmony_ci [temp_r3]"=&f"(temp_r3), [temp_r4]"=&f"(temp_r4), 486cabdff1aSopenharmony_ci [p_x]"+r"(p_x), [imag_sum_2]"+f"(imag_sum_2) 487cabdff1aSopenharmony_ci : 488cabdff1aSopenharmony_ci : "memory" 489cabdff1aSopenharmony_ci ); 490cabdff1aSopenharmony_ci 491cabdff1aSopenharmony_ci for (i = 0; i < 12; i++) { 492cabdff1aSopenharmony_ci __asm__ volatile ( 493cabdff1aSopenharmony_ci "lwc1 %[temp0], 8(%[p_x]) \n\t" 494cabdff1aSopenharmony_ci "lwc1 %[temp1], 12(%[p_x]) \n\t" 495cabdff1aSopenharmony_ci "lwc1 %[temp2], 16(%[p_x]) \n\t" 496cabdff1aSopenharmony_ci "lwc1 %[temp3], 20(%[p_x]) \n\t" 497cabdff1aSopenharmony_ci "lwc1 %[temp4], 24(%[p_x]) \n\t" 498cabdff1aSopenharmony_ci "lwc1 %[temp5], 28(%[p_x]) \n\t" 499cabdff1aSopenharmony_ci "mul.s %[temp_r], %[temp1], %[temp1] \n\t" 500cabdff1aSopenharmony_ci "mul.s %[temp_r1], %[temp1], %[temp3] \n\t" 501cabdff1aSopenharmony_ci "mul.s %[temp_r2], %[temp1], %[temp2] \n\t" 502cabdff1aSopenharmony_ci "mul.s %[temp_r3], %[temp1], %[temp5] \n\t" 503cabdff1aSopenharmony_ci "mul.s %[temp_r4], %[temp1], %[temp4] \n\t" 504cabdff1aSopenharmony_ci "madd.s %[temp_r], %[temp_r], %[temp0], %[temp0] \n\t" 505cabdff1aSopenharmony_ci "madd.s %[temp_r1], %[temp_r1], %[temp0], %[temp2] \n\t" 506cabdff1aSopenharmony_ci "msub.s %[temp_r2], %[temp_r2], %[temp0], %[temp3] \n\t" 507cabdff1aSopenharmony_ci "madd.s %[temp_r3], %[temp_r3], %[temp0], %[temp4] \n\t" 508cabdff1aSopenharmony_ci "msub.s %[temp_r4], %[temp_r4], %[temp0], %[temp5] \n\t" 509cabdff1aSopenharmony_ci "add.s %[real_sum_0], %[real_sum_0], %[temp_r] \n\t" 510cabdff1aSopenharmony_ci "add.s %[real_sum_1], %[real_sum_1], %[temp_r1] \n\t" 511cabdff1aSopenharmony_ci "add.s %[imag_sum_1], %[imag_sum_1], %[temp_r2] \n\t" 512cabdff1aSopenharmony_ci "add.s %[real_sum_2], %[real_sum_2], %[temp_r3] \n\t" 513cabdff1aSopenharmony_ci "add.s %[imag_sum_2], %[imag_sum_2], %[temp_r4] \n\t" 514cabdff1aSopenharmony_ci "lwc1 %[temp0], 32(%[p_x]) \n\t" 515cabdff1aSopenharmony_ci "lwc1 %[temp1], 36(%[p_x]) \n\t" 516cabdff1aSopenharmony_ci "mul.s %[temp_r], %[temp3], %[temp3] \n\t" 517cabdff1aSopenharmony_ci "mul.s %[temp_r1], %[temp3], %[temp5] \n\t" 518cabdff1aSopenharmony_ci "mul.s %[temp_r2], %[temp3], %[temp4] \n\t" 519cabdff1aSopenharmony_ci "mul.s %[temp_r3], %[temp3], %[temp1] \n\t" 520cabdff1aSopenharmony_ci "mul.s %[temp_r4], %[temp3], %[temp0] \n\t" 521cabdff1aSopenharmony_ci "madd.s %[temp_r], %[temp_r], %[temp2], %[temp2] \n\t" 522cabdff1aSopenharmony_ci "madd.s %[temp_r1], %[temp_r1], %[temp2], %[temp4] \n\t" 523cabdff1aSopenharmony_ci "msub.s %[temp_r2], %[temp_r2], %[temp2], %[temp5] \n\t" 524cabdff1aSopenharmony_ci "madd.s %[temp_r3], %[temp_r3], %[temp2], %[temp0] \n\t" 525cabdff1aSopenharmony_ci "msub.s %[temp_r4], %[temp_r4], %[temp2], %[temp1] \n\t" 526cabdff1aSopenharmony_ci "add.s %[real_sum_0], %[real_sum_0], %[temp_r] \n\t" 527cabdff1aSopenharmony_ci "add.s %[real_sum_1], %[real_sum_1], %[temp_r1] \n\t" 528cabdff1aSopenharmony_ci "add.s %[imag_sum_1], %[imag_sum_1], %[temp_r2] \n\t" 529cabdff1aSopenharmony_ci "add.s %[real_sum_2], %[real_sum_2], %[temp_r3] \n\t" 530cabdff1aSopenharmony_ci "add.s %[imag_sum_2], %[imag_sum_2], %[temp_r4] \n\t" 531cabdff1aSopenharmony_ci "lwc1 %[temp2], 40(%[p_x]) \n\t" 532cabdff1aSopenharmony_ci "lwc1 %[temp3], 44(%[p_x]) \n\t" 533cabdff1aSopenharmony_ci "mul.s %[temp_r], %[temp5], %[temp5] \n\t" 534cabdff1aSopenharmony_ci "mul.s %[temp_r1], %[temp5], %[temp1] \n\t" 535cabdff1aSopenharmony_ci "mul.s %[temp_r2], %[temp5], %[temp0] \n\t" 536cabdff1aSopenharmony_ci "mul.s %[temp_r3], %[temp5], %[temp3] \n\t" 537cabdff1aSopenharmony_ci "mul.s %[temp_r4], %[temp5], %[temp2] \n\t" 538cabdff1aSopenharmony_ci "madd.s %[temp_r], %[temp_r], %[temp4], %[temp4] \n\t" 539cabdff1aSopenharmony_ci "madd.s %[temp_r1], %[temp_r1], %[temp4], %[temp0] \n\t" 540cabdff1aSopenharmony_ci "msub.s %[temp_r2], %[temp_r2], %[temp4], %[temp1] \n\t" 541cabdff1aSopenharmony_ci "madd.s %[temp_r3], %[temp_r3], %[temp4], %[temp2] \n\t" 542cabdff1aSopenharmony_ci "msub.s %[temp_r4], %[temp_r4], %[temp4], %[temp3] \n\t" 543cabdff1aSopenharmony_ci "add.s %[real_sum_0], %[real_sum_0], %[temp_r] \n\t" 544cabdff1aSopenharmony_ci "add.s %[real_sum_1], %[real_sum_1], %[temp_r1] \n\t" 545cabdff1aSopenharmony_ci "add.s %[imag_sum_1], %[imag_sum_1], %[temp_r2] \n\t" 546cabdff1aSopenharmony_ci "add.s %[real_sum_2], %[real_sum_2], %[temp_r3] \n\t" 547cabdff1aSopenharmony_ci "add.s %[imag_sum_2], %[imag_sum_2], %[temp_r4] \n\t" 548cabdff1aSopenharmony_ci PTR_ADDIU "%[p_x], %[p_x], 24 \n\t" 549cabdff1aSopenharmony_ci 550cabdff1aSopenharmony_ci : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2), 551cabdff1aSopenharmony_ci [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5), 552cabdff1aSopenharmony_ci [real_sum_0]"+f"(real_sum_0), [real_sum_1]"+f"(real_sum_1), 553cabdff1aSopenharmony_ci [imag_sum_1]"+f"(imag_sum_1), [real_sum_2]"+f"(real_sum_2), 554cabdff1aSopenharmony_ci [temp_r]"=&f"(temp_r), [temp_r1]"=&f"(temp_r1), 555cabdff1aSopenharmony_ci [temp_r2]"=&f"(temp_r2), [temp_r3]"=&f"(temp_r3), 556cabdff1aSopenharmony_ci [temp_r4]"=&f"(temp_r4), [p_x]"+r"(p_x), 557cabdff1aSopenharmony_ci [imag_sum_2]"+f"(imag_sum_2) 558cabdff1aSopenharmony_ci : 559cabdff1aSopenharmony_ci : "memory" 560cabdff1aSopenharmony_ci ); 561cabdff1aSopenharmony_ci } 562cabdff1aSopenharmony_ci __asm__ volatile ( 563cabdff1aSopenharmony_ci "lwc1 %[temp0], -296(%[p_x]) \n\t" 564cabdff1aSopenharmony_ci "lwc1 %[temp1], -292(%[p_x]) \n\t" 565cabdff1aSopenharmony_ci "lwc1 %[temp2], 8(%[p_x]) \n\t" 566cabdff1aSopenharmony_ci "lwc1 %[temp3], 12(%[p_x]) \n\t" 567cabdff1aSopenharmony_ci "lwc1 %[temp4], -288(%[p_x]) \n\t" 568cabdff1aSopenharmony_ci "lwc1 %[temp5], -284(%[p_x]) \n\t" 569cabdff1aSopenharmony_ci "lwc1 %[temp6], -280(%[p_x]) \n\t" 570cabdff1aSopenharmony_ci "lwc1 %[temp7], -276(%[p_x]) \n\t" 571cabdff1aSopenharmony_ci "madd.s %[temp_r], %[real_sum_0], %[temp0], %[temp0] \n\t" 572cabdff1aSopenharmony_ci "madd.s %[temp_r1], %[real_sum_0], %[temp2], %[temp2] \n\t" 573cabdff1aSopenharmony_ci "madd.s %[temp_r2], %[real_sum_1], %[temp0], %[temp4] \n\t" 574cabdff1aSopenharmony_ci "madd.s %[temp_r3], %[imag_sum_1], %[temp0], %[temp5] \n\t" 575cabdff1aSopenharmony_ci "madd.s %[temp_r], %[temp_r], %[temp1], %[temp1] \n\t" 576cabdff1aSopenharmony_ci "madd.s %[temp_r1], %[temp_r1], %[temp3], %[temp3] \n\t" 577cabdff1aSopenharmony_ci "madd.s %[temp_r2], %[temp_r2], %[temp1], %[temp5] \n\t" 578cabdff1aSopenharmony_ci "nmsub.s %[temp_r3], %[temp_r3], %[temp1], %[temp4] \n\t" 579cabdff1aSopenharmony_ci "lwc1 %[temp4], 16(%[p_x]) \n\t" 580cabdff1aSopenharmony_ci "lwc1 %[temp5], 20(%[p_x]) \n\t" 581cabdff1aSopenharmony_ci "swc1 %[temp_r], 40(%[p_phi]) \n\t" 582cabdff1aSopenharmony_ci "swc1 %[temp_r1], 16(%[p_phi]) \n\t" 583cabdff1aSopenharmony_ci "swc1 %[temp_r2], 24(%[p_phi]) \n\t" 584cabdff1aSopenharmony_ci "swc1 %[temp_r3], 28(%[p_phi]) \n\t" 585cabdff1aSopenharmony_ci "madd.s %[temp_r], %[real_sum_1], %[temp2], %[temp4] \n\t" 586cabdff1aSopenharmony_ci "madd.s %[temp_r1], %[imag_sum_1], %[temp2], %[temp5] \n\t" 587cabdff1aSopenharmony_ci "madd.s %[temp_r2], %[real_sum_2], %[temp0], %[temp6] \n\t" 588cabdff1aSopenharmony_ci "madd.s %[temp_r3], %[imag_sum_2], %[temp0], %[temp7] \n\t" 589cabdff1aSopenharmony_ci "madd.s %[temp_r], %[temp_r], %[temp3], %[temp5] \n\t" 590cabdff1aSopenharmony_ci "nmsub.s %[temp_r1], %[temp_r1], %[temp3], %[temp4] \n\t" 591cabdff1aSopenharmony_ci "madd.s %[temp_r2], %[temp_r2], %[temp1], %[temp7] \n\t" 592cabdff1aSopenharmony_ci "nmsub.s %[temp_r3], %[temp_r3], %[temp1], %[temp6] \n\t" 593cabdff1aSopenharmony_ci "swc1 %[temp_r], 0(%[p_phi]) \n\t" 594cabdff1aSopenharmony_ci "swc1 %[temp_r1], 4(%[p_phi]) \n\t" 595cabdff1aSopenharmony_ci "swc1 %[temp_r2], 8(%[p_phi]) \n\t" 596cabdff1aSopenharmony_ci "swc1 %[temp_r3], 12(%[p_phi]) \n\t" 597cabdff1aSopenharmony_ci 598cabdff1aSopenharmony_ci : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2), 599cabdff1aSopenharmony_ci [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5), 600cabdff1aSopenharmony_ci [temp6]"=&f"(temp6), [temp7]"=&f"(temp7), [temp_r]"=&f"(temp_r), 601cabdff1aSopenharmony_ci [real_sum_0]"+f"(real_sum_0), [real_sum_1]"+f"(real_sum_1), 602cabdff1aSopenharmony_ci [real_sum_2]"+f"(real_sum_2), [imag_sum_1]"+f"(imag_sum_1), 603cabdff1aSopenharmony_ci [temp_r2]"=&f"(temp_r2), [temp_r3]"=&f"(temp_r3), 604cabdff1aSopenharmony_ci [temp_r1]"=&f"(temp_r1), [p_phi]"+r"(p_phi), 605cabdff1aSopenharmony_ci [imag_sum_2]"+f"(imag_sum_2) 606cabdff1aSopenharmony_ci : [p_x]"r"(p_x) 607cabdff1aSopenharmony_ci : "memory" 608cabdff1aSopenharmony_ci ); 609cabdff1aSopenharmony_ci} 610cabdff1aSopenharmony_ci 611cabdff1aSopenharmony_cistatic void sbr_hf_gen_mips(float (*X_high)[2], const float (*X_low)[2], 612cabdff1aSopenharmony_ci const float alpha0[2], const float alpha1[2], 613cabdff1aSopenharmony_ci float bw, int start, int end) 614cabdff1aSopenharmony_ci{ 615cabdff1aSopenharmony_ci float alpha[4]; 616cabdff1aSopenharmony_ci int i; 617cabdff1aSopenharmony_ci float *p_x_low = (float*)&X_low[0][0] + 2*start; 618cabdff1aSopenharmony_ci float *p_x_high = &X_high[0][0] + 2*start; 619cabdff1aSopenharmony_ci float temp0, temp1, temp2, temp3, temp4, temp5, temp6; 620cabdff1aSopenharmony_ci float temp7, temp8, temp9, temp10, temp11, temp12; 621cabdff1aSopenharmony_ci 622cabdff1aSopenharmony_ci alpha[0] = alpha1[0] * bw * bw; 623cabdff1aSopenharmony_ci alpha[1] = alpha1[1] * bw * bw; 624cabdff1aSopenharmony_ci alpha[2] = alpha0[0] * bw; 625cabdff1aSopenharmony_ci alpha[3] = alpha0[1] * bw; 626cabdff1aSopenharmony_ci 627cabdff1aSopenharmony_ci for (i = start; i < end; i++) { 628cabdff1aSopenharmony_ci __asm__ volatile ( 629cabdff1aSopenharmony_ci "lwc1 %[temp0], -16(%[p_x_low]) \n\t" 630cabdff1aSopenharmony_ci "lwc1 %[temp1], -12(%[p_x_low]) \n\t" 631cabdff1aSopenharmony_ci "lwc1 %[temp2], -8(%[p_x_low]) \n\t" 632cabdff1aSopenharmony_ci "lwc1 %[temp3], -4(%[p_x_low]) \n\t" 633cabdff1aSopenharmony_ci "lwc1 %[temp5], 0(%[p_x_low]) \n\t" 634cabdff1aSopenharmony_ci "lwc1 %[temp6], 4(%[p_x_low]) \n\t" 635cabdff1aSopenharmony_ci "lwc1 %[temp7], 0(%[alpha]) \n\t" 636cabdff1aSopenharmony_ci "lwc1 %[temp8], 4(%[alpha]) \n\t" 637cabdff1aSopenharmony_ci "lwc1 %[temp9], 8(%[alpha]) \n\t" 638cabdff1aSopenharmony_ci "lwc1 %[temp10], 12(%[alpha]) \n\t" 639cabdff1aSopenharmony_ci PTR_ADDIU "%[p_x_high], %[p_x_high], 8 \n\t" 640cabdff1aSopenharmony_ci PTR_ADDIU "%[p_x_low], %[p_x_low], 8 \n\t" 641cabdff1aSopenharmony_ci "mul.s %[temp11], %[temp1], %[temp8] \n\t" 642cabdff1aSopenharmony_ci "msub.s %[temp11], %[temp11], %[temp0], %[temp7] \n\t" 643cabdff1aSopenharmony_ci "madd.s %[temp11], %[temp11], %[temp2], %[temp9] \n\t" 644cabdff1aSopenharmony_ci "nmsub.s %[temp11], %[temp11], %[temp3], %[temp10] \n\t" 645cabdff1aSopenharmony_ci "add.s %[temp11], %[temp11], %[temp5] \n\t" 646cabdff1aSopenharmony_ci "swc1 %[temp11], -8(%[p_x_high]) \n\t" 647cabdff1aSopenharmony_ci "mul.s %[temp12], %[temp1], %[temp7] \n\t" 648cabdff1aSopenharmony_ci "madd.s %[temp12], %[temp12], %[temp0], %[temp8] \n\t" 649cabdff1aSopenharmony_ci "madd.s %[temp12], %[temp12], %[temp3], %[temp9] \n\t" 650cabdff1aSopenharmony_ci "madd.s %[temp12], %[temp12], %[temp2], %[temp10] \n\t" 651cabdff1aSopenharmony_ci "add.s %[temp12], %[temp12], %[temp6] \n\t" 652cabdff1aSopenharmony_ci "swc1 %[temp12], -4(%[p_x_high]) \n\t" 653cabdff1aSopenharmony_ci 654cabdff1aSopenharmony_ci : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2), 655cabdff1aSopenharmony_ci [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5), 656cabdff1aSopenharmony_ci [temp6]"=&f"(temp6), [temp7]"=&f"(temp7), [temp8]"=&f"(temp8), 657cabdff1aSopenharmony_ci [temp9]"=&f"(temp9), [temp10]"=&f"(temp10), [temp11]"=&f"(temp11), 658cabdff1aSopenharmony_ci [temp12]"=&f"(temp12), [p_x_high]"+r"(p_x_high), 659cabdff1aSopenharmony_ci [p_x_low]"+r"(p_x_low) 660cabdff1aSopenharmony_ci : [alpha]"r"(alpha) 661cabdff1aSopenharmony_ci : "memory" 662cabdff1aSopenharmony_ci ); 663cabdff1aSopenharmony_ci } 664cabdff1aSopenharmony_ci} 665cabdff1aSopenharmony_ci 666cabdff1aSopenharmony_cistatic void sbr_hf_g_filt_mips(float (*Y)[2], const float (*X_high)[40][2], 667cabdff1aSopenharmony_ci const float *g_filt, int m_max, intptr_t ixh) 668cabdff1aSopenharmony_ci{ 669cabdff1aSopenharmony_ci const float *p_x, *p_g, *loop_end; 670cabdff1aSopenharmony_ci float *p_y; 671cabdff1aSopenharmony_ci float temp0, temp1, temp2; 672cabdff1aSopenharmony_ci 673cabdff1aSopenharmony_ci p_g = &g_filt[0]; 674cabdff1aSopenharmony_ci p_y = &Y[0][0]; 675cabdff1aSopenharmony_ci p_x = &X_high[0][ixh][0]; 676cabdff1aSopenharmony_ci loop_end = p_g + m_max; 677cabdff1aSopenharmony_ci 678cabdff1aSopenharmony_ci __asm__ volatile( 679cabdff1aSopenharmony_ci ".set push \n\t" 680cabdff1aSopenharmony_ci ".set noreorder \n\t" 681cabdff1aSopenharmony_ci "1: \n\t" 682cabdff1aSopenharmony_ci "lwc1 %[temp0], 0(%[p_g]) \n\t" 683cabdff1aSopenharmony_ci "lwc1 %[temp1], 0(%[p_x]) \n\t" 684cabdff1aSopenharmony_ci "lwc1 %[temp2], 4(%[p_x]) \n\t" 685cabdff1aSopenharmony_ci "mul.s %[temp1], %[temp1], %[temp0] \n\t" 686cabdff1aSopenharmony_ci "mul.s %[temp2], %[temp2], %[temp0] \n\t" 687cabdff1aSopenharmony_ci PTR_ADDIU "%[p_g], %[p_g], 4 \n\t" 688cabdff1aSopenharmony_ci PTR_ADDIU "%[p_x], %[p_x], 320 \n\t" 689cabdff1aSopenharmony_ci "swc1 %[temp1], 0(%[p_y]) \n\t" 690cabdff1aSopenharmony_ci "swc1 %[temp2], 4(%[p_y]) \n\t" 691cabdff1aSopenharmony_ci "bne %[p_g], %[loop_end], 1b \n\t" 692cabdff1aSopenharmony_ci PTR_ADDIU "%[p_y], %[p_y], 8 \n\t" 693cabdff1aSopenharmony_ci ".set pop \n\t" 694cabdff1aSopenharmony_ci 695cabdff1aSopenharmony_ci : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), 696cabdff1aSopenharmony_ci [temp2]"=&f"(temp2), [p_x]"+r"(p_x), 697cabdff1aSopenharmony_ci [p_y]"+r"(p_y), [p_g]"+r"(p_g) 698cabdff1aSopenharmony_ci : [loop_end]"r"(loop_end) 699cabdff1aSopenharmony_ci : "memory" 700cabdff1aSopenharmony_ci ); 701cabdff1aSopenharmony_ci} 702cabdff1aSopenharmony_ci 703cabdff1aSopenharmony_cistatic void sbr_hf_apply_noise_0_mips(float (*Y)[2], const float *s_m, 704cabdff1aSopenharmony_ci const float *q_filt, int noise, 705cabdff1aSopenharmony_ci int kx, int m_max) 706cabdff1aSopenharmony_ci{ 707cabdff1aSopenharmony_ci int m; 708cabdff1aSopenharmony_ci 709cabdff1aSopenharmony_ci for (m = 0; m < m_max; m++){ 710cabdff1aSopenharmony_ci 711cabdff1aSopenharmony_ci float *Y1=&Y[m][0]; 712cabdff1aSopenharmony_ci float *ff_table; 713cabdff1aSopenharmony_ci float y0,y1, temp1, temp2, temp4, temp5; 714cabdff1aSopenharmony_ci int temp0, temp3; 715cabdff1aSopenharmony_ci const float *s_m1=&s_m[m]; 716cabdff1aSopenharmony_ci const float *q_filt1= &q_filt[m]; 717cabdff1aSopenharmony_ci 718cabdff1aSopenharmony_ci __asm__ volatile( 719cabdff1aSopenharmony_ci "lwc1 %[y0], 0(%[Y1]) \n\t" 720cabdff1aSopenharmony_ci "lwc1 %[temp1], 0(%[s_m1]) \n\t" 721cabdff1aSopenharmony_ci "addiu %[noise], %[noise], 1 \n\t" 722cabdff1aSopenharmony_ci "andi %[noise], %[noise], 0x1ff \n\t" 723cabdff1aSopenharmony_ci "sll %[temp0], %[noise], 3 \n\t" 724cabdff1aSopenharmony_ci PTR_ADDU "%[ff_table],%[ff_sbr_noise_table], %[temp0] \n\t" 725cabdff1aSopenharmony_ci "add.s %[y0], %[y0], %[temp1] \n\t" 726cabdff1aSopenharmony_ci "mfc1 %[temp3], %[temp1] \n\t" 727cabdff1aSopenharmony_ci "bne %[temp3], $0, 1f \n\t" 728cabdff1aSopenharmony_ci "lwc1 %[y1], 4(%[Y1]) \n\t" 729cabdff1aSopenharmony_ci "lwc1 %[temp2], 0(%[q_filt1]) \n\t" 730cabdff1aSopenharmony_ci "lwc1 %[temp4], 0(%[ff_table]) \n\t" 731cabdff1aSopenharmony_ci "lwc1 %[temp5], 4(%[ff_table]) \n\t" 732cabdff1aSopenharmony_ci "madd.s %[y0], %[y0], %[temp2], %[temp4] \n\t" 733cabdff1aSopenharmony_ci "madd.s %[y1], %[y1], %[temp2], %[temp5] \n\t" 734cabdff1aSopenharmony_ci "swc1 %[y1], 4(%[Y1]) \n\t" 735cabdff1aSopenharmony_ci "1: \n\t" 736cabdff1aSopenharmony_ci "swc1 %[y0], 0(%[Y1]) \n\t" 737cabdff1aSopenharmony_ci 738cabdff1aSopenharmony_ci : [ff_table]"=&r"(ff_table), [y0]"=&f"(y0), [y1]"=&f"(y1), 739cabdff1aSopenharmony_ci [temp0]"=&r"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2), 740cabdff1aSopenharmony_ci [temp3]"=&r"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5) 741cabdff1aSopenharmony_ci : [ff_sbr_noise_table]"r"(ff_sbr_noise_table), [noise]"r"(noise), 742cabdff1aSopenharmony_ci [Y1]"r"(Y1), [s_m1]"r"(s_m1), [q_filt1]"r"(q_filt1) 743cabdff1aSopenharmony_ci : "memory" 744cabdff1aSopenharmony_ci ); 745cabdff1aSopenharmony_ci } 746cabdff1aSopenharmony_ci} 747cabdff1aSopenharmony_ci 748cabdff1aSopenharmony_cistatic void sbr_hf_apply_noise_1_mips(float (*Y)[2], const float *s_m, 749cabdff1aSopenharmony_ci const float *q_filt, int noise, 750cabdff1aSopenharmony_ci int kx, int m_max) 751cabdff1aSopenharmony_ci{ 752cabdff1aSopenharmony_ci float y0,y1,temp1, temp2, temp4, temp5; 753cabdff1aSopenharmony_ci int temp0, temp3, m; 754cabdff1aSopenharmony_ci float phi_sign = 1 - 2 * (kx & 1); 755cabdff1aSopenharmony_ci 756cabdff1aSopenharmony_ci for (m = 0; m < m_max; m++) { 757cabdff1aSopenharmony_ci 758cabdff1aSopenharmony_ci float *ff_table; 759cabdff1aSopenharmony_ci float *Y1=&Y[m][0]; 760cabdff1aSopenharmony_ci const float *s_m1=&s_m[m]; 761cabdff1aSopenharmony_ci const float *q_filt1= &q_filt[m]; 762cabdff1aSopenharmony_ci 763cabdff1aSopenharmony_ci __asm__ volatile( 764cabdff1aSopenharmony_ci "lwc1 %[y1], 4(%[Y1]) \n\t" 765cabdff1aSopenharmony_ci "lwc1 %[temp1], 0(%[s_m1]) \n\t" 766cabdff1aSopenharmony_ci "lw %[temp3], 0(%[s_m1]) \n\t" 767cabdff1aSopenharmony_ci "addiu %[noise], %[noise], 1 \n\t" 768cabdff1aSopenharmony_ci "andi %[noise], %[noise], 0x1ff \n\t" 769cabdff1aSopenharmony_ci "sll %[temp0], %[noise], 3 \n\t" 770cabdff1aSopenharmony_ci PTR_ADDU "%[ff_table],%[ff_sbr_noise_table],%[temp0] \n\t" 771cabdff1aSopenharmony_ci "madd.s %[y1], %[y1], %[temp1], %[phi_sign] \n\t" 772cabdff1aSopenharmony_ci "bne %[temp3], $0, 1f \n\t" 773cabdff1aSopenharmony_ci "lwc1 %[y0], 0(%[Y1]) \n\t" 774cabdff1aSopenharmony_ci "lwc1 %[temp2], 0(%[q_filt1]) \n\t" 775cabdff1aSopenharmony_ci "lwc1 %[temp4], 0(%[ff_table]) \n\t" 776cabdff1aSopenharmony_ci "lwc1 %[temp5], 4(%[ff_table]) \n\t" 777cabdff1aSopenharmony_ci "madd.s %[y0], %[y0], %[temp2], %[temp4] \n\t" 778cabdff1aSopenharmony_ci "madd.s %[y1], %[y1], %[temp2], %[temp5] \n\t" 779cabdff1aSopenharmony_ci "swc1 %[y0], 0(%[Y1]) \n\t" 780cabdff1aSopenharmony_ci "1: \n\t" 781cabdff1aSopenharmony_ci "swc1 %[y1], 4(%[Y1]) \n\t" 782cabdff1aSopenharmony_ci 783cabdff1aSopenharmony_ci : [ff_table] "=&r" (ff_table), [y0] "=&f" (y0), [y1] "=&f" (y1), 784cabdff1aSopenharmony_ci [temp0] "=&r" (temp0), [temp1] "=&f" (temp1), [temp2] "=&f" (temp2), 785cabdff1aSopenharmony_ci [temp3] "=&r" (temp3), [temp4] "=&f" (temp4), [temp5] "=&f" (temp5) 786cabdff1aSopenharmony_ci : [ff_sbr_noise_table] "r" (ff_sbr_noise_table), [noise] "r" (noise), 787cabdff1aSopenharmony_ci [Y1] "r" (Y1), [s_m1] "r" (s_m1), [q_filt1] "r" (q_filt1), 788cabdff1aSopenharmony_ci [phi_sign] "f" (phi_sign) 789cabdff1aSopenharmony_ci : "memory" 790cabdff1aSopenharmony_ci ); 791cabdff1aSopenharmony_ci phi_sign = -phi_sign; 792cabdff1aSopenharmony_ci } 793cabdff1aSopenharmony_ci} 794cabdff1aSopenharmony_ci 795cabdff1aSopenharmony_cistatic void sbr_hf_apply_noise_2_mips(float (*Y)[2], const float *s_m, 796cabdff1aSopenharmony_ci const float *q_filt, int noise, 797cabdff1aSopenharmony_ci int kx, int m_max) 798cabdff1aSopenharmony_ci{ 799cabdff1aSopenharmony_ci int m, temp0, temp1; 800cabdff1aSopenharmony_ci float *ff_table; 801cabdff1aSopenharmony_ci float y0, y1, temp2, temp3, temp4, temp5; 802cabdff1aSopenharmony_ci 803cabdff1aSopenharmony_ci for (m = 0; m < m_max; m++) { 804cabdff1aSopenharmony_ci 805cabdff1aSopenharmony_ci float *Y1=&Y[m][0]; 806cabdff1aSopenharmony_ci const float *s_m1=&s_m[m]; 807cabdff1aSopenharmony_ci const float *q_filt1= &q_filt[m]; 808cabdff1aSopenharmony_ci 809cabdff1aSopenharmony_ci __asm__ volatile( 810cabdff1aSopenharmony_ci "lwc1 %[y0], 0(%[Y1]) \n\t" 811cabdff1aSopenharmony_ci "lwc1 %[temp3], 0(%[s_m1]) \n\t" 812cabdff1aSopenharmony_ci "addiu %[noise], %[noise], 1 \n\t" 813cabdff1aSopenharmony_ci "andi %[noise], %[noise], 0x1ff \n\t" 814cabdff1aSopenharmony_ci "sll %[temp0], %[noise], 3 \n\t" 815cabdff1aSopenharmony_ci PTR_ADDU "%[ff_table],%[ff_sbr_noise_table],%[temp0] \n\t" 816cabdff1aSopenharmony_ci "sub.s %[y0], %[y0], %[temp3] \n\t" 817cabdff1aSopenharmony_ci "mfc1 %[temp1], %[temp3] \n\t" 818cabdff1aSopenharmony_ci "bne %[temp1], $0, 1f \n\t" 819cabdff1aSopenharmony_ci "lwc1 %[y1], 4(%[Y1]) \n\t" 820cabdff1aSopenharmony_ci "lwc1 %[temp2], 0(%[q_filt1]) \n\t" 821cabdff1aSopenharmony_ci "lwc1 %[temp4], 0(%[ff_table]) \n\t" 822cabdff1aSopenharmony_ci "lwc1 %[temp5], 4(%[ff_table]) \n\t" 823cabdff1aSopenharmony_ci "madd.s %[y0], %[y0], %[temp2], %[temp4] \n\t" 824cabdff1aSopenharmony_ci "madd.s %[y1], %[y1], %[temp2], %[temp5] \n\t" 825cabdff1aSopenharmony_ci "swc1 %[y1], 4(%[Y1]) \n\t" 826cabdff1aSopenharmony_ci "1: \n\t" 827cabdff1aSopenharmony_ci "swc1 %[y0], 0(%[Y1]) \n\t" 828cabdff1aSopenharmony_ci 829cabdff1aSopenharmony_ci : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [y0]"=&f"(y0), 830cabdff1aSopenharmony_ci [y1]"=&f"(y1), [ff_table]"=&r"(ff_table), 831cabdff1aSopenharmony_ci [temp2]"=&f"(temp2), [temp3]"=&f"(temp3), 832cabdff1aSopenharmony_ci [temp4]"=&f"(temp4), [temp5]"=&f"(temp5) 833cabdff1aSopenharmony_ci : [ff_sbr_noise_table]"r"(ff_sbr_noise_table), [noise]"r"(noise), 834cabdff1aSopenharmony_ci [Y1]"r"(Y1), [s_m1]"r"(s_m1), [q_filt1]"r"(q_filt1) 835cabdff1aSopenharmony_ci : "memory" 836cabdff1aSopenharmony_ci ); 837cabdff1aSopenharmony_ci } 838cabdff1aSopenharmony_ci} 839cabdff1aSopenharmony_ci 840cabdff1aSopenharmony_cistatic void sbr_hf_apply_noise_3_mips(float (*Y)[2], const float *s_m, 841cabdff1aSopenharmony_ci const float *q_filt, int noise, 842cabdff1aSopenharmony_ci int kx, int m_max) 843cabdff1aSopenharmony_ci{ 844cabdff1aSopenharmony_ci float phi_sign = 1 - 2 * (kx & 1); 845cabdff1aSopenharmony_ci int m; 846cabdff1aSopenharmony_ci 847cabdff1aSopenharmony_ci for (m = 0; m < m_max; m++) { 848cabdff1aSopenharmony_ci 849cabdff1aSopenharmony_ci float *Y1=&Y[m][0]; 850cabdff1aSopenharmony_ci float *ff_table; 851cabdff1aSopenharmony_ci float y0,y1, temp1, temp2, temp4, temp5; 852cabdff1aSopenharmony_ci int temp0, temp3; 853cabdff1aSopenharmony_ci const float *s_m1=&s_m[m]; 854cabdff1aSopenharmony_ci const float *q_filt1= &q_filt[m]; 855cabdff1aSopenharmony_ci 856cabdff1aSopenharmony_ci __asm__ volatile( 857cabdff1aSopenharmony_ci "lwc1 %[y1], 4(%[Y1]) \n\t" 858cabdff1aSopenharmony_ci "lwc1 %[temp1], 0(%[s_m1]) \n\t" 859cabdff1aSopenharmony_ci "addiu %[noise], %[noise], 1 \n\t" 860cabdff1aSopenharmony_ci "andi %[noise], %[noise], 0x1ff \n\t" 861cabdff1aSopenharmony_ci "sll %[temp0], %[noise], 3 \n\t" 862cabdff1aSopenharmony_ci PTR_ADDU "%[ff_table],%[ff_sbr_noise_table], %[temp0] \n\t" 863cabdff1aSopenharmony_ci "nmsub.s %[y1], %[y1], %[temp1], %[phi_sign] \n\t" 864cabdff1aSopenharmony_ci "mfc1 %[temp3], %[temp1] \n\t" 865cabdff1aSopenharmony_ci "bne %[temp3], $0, 1f \n\t" 866cabdff1aSopenharmony_ci "lwc1 %[y0], 0(%[Y1]) \n\t" 867cabdff1aSopenharmony_ci "lwc1 %[temp2], 0(%[q_filt1]) \n\t" 868cabdff1aSopenharmony_ci "lwc1 %[temp4], 0(%[ff_table]) \n\t" 869cabdff1aSopenharmony_ci "lwc1 %[temp5], 4(%[ff_table]) \n\t" 870cabdff1aSopenharmony_ci "madd.s %[y0], %[y0], %[temp2], %[temp4] \n\t" 871cabdff1aSopenharmony_ci "madd.s %[y1], %[y1], %[temp2], %[temp5] \n\t" 872cabdff1aSopenharmony_ci "swc1 %[y0], 0(%[Y1]) \n\t" 873cabdff1aSopenharmony_ci "1: \n\t" 874cabdff1aSopenharmony_ci "swc1 %[y1], 4(%[Y1]) \n\t" 875cabdff1aSopenharmony_ci 876cabdff1aSopenharmony_ci : [ff_table]"=&r"(ff_table), [y0]"=&f"(y0), [y1]"=&f"(y1), 877cabdff1aSopenharmony_ci [temp0]"=&r"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2), 878cabdff1aSopenharmony_ci [temp3]"=&r"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5) 879cabdff1aSopenharmony_ci : [ff_sbr_noise_table]"r"(ff_sbr_noise_table), [noise]"r"(noise), 880cabdff1aSopenharmony_ci [Y1]"r"(Y1), [s_m1]"r"(s_m1), [q_filt1]"r"(q_filt1), 881cabdff1aSopenharmony_ci [phi_sign]"f"(phi_sign) 882cabdff1aSopenharmony_ci : "memory" 883cabdff1aSopenharmony_ci ); 884cabdff1aSopenharmony_ci phi_sign = -phi_sign; 885cabdff1aSopenharmony_ci } 886cabdff1aSopenharmony_ci} 887cabdff1aSopenharmony_ci#endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */ 888cabdff1aSopenharmony_ci#endif /* HAVE_MIPSFPU */ 889cabdff1aSopenharmony_ci#endif /* HAVE_INLINE_ASM */ 890cabdff1aSopenharmony_ci 891cabdff1aSopenharmony_civoid ff_sbrdsp_init_mips(SBRDSPContext *s) 892cabdff1aSopenharmony_ci{ 893cabdff1aSopenharmony_ci#if HAVE_INLINE_ASM 894cabdff1aSopenharmony_ci#if HAVE_MIPSFPU 895cabdff1aSopenharmony_ci s->qmf_pre_shuffle = sbr_qmf_pre_shuffle_mips; 896cabdff1aSopenharmony_ci s->qmf_post_shuffle = sbr_qmf_post_shuffle_mips; 897cabdff1aSopenharmony_ci#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6 898cabdff1aSopenharmony_ci s->sum64x5 = sbr_sum64x5_mips; 899cabdff1aSopenharmony_ci s->sum_square = sbr_sum_square_mips; 900cabdff1aSopenharmony_ci s->qmf_deint_bfly = sbr_qmf_deint_bfly_mips; 901cabdff1aSopenharmony_ci s->autocorrelate = sbr_autocorrelate_mips; 902cabdff1aSopenharmony_ci s->hf_gen = sbr_hf_gen_mips; 903cabdff1aSopenharmony_ci s->hf_g_filt = sbr_hf_g_filt_mips; 904cabdff1aSopenharmony_ci 905cabdff1aSopenharmony_ci s->hf_apply_noise[0] = sbr_hf_apply_noise_0_mips; 906cabdff1aSopenharmony_ci s->hf_apply_noise[1] = sbr_hf_apply_noise_1_mips; 907cabdff1aSopenharmony_ci s->hf_apply_noise[2] = sbr_hf_apply_noise_2_mips; 908cabdff1aSopenharmony_ci s->hf_apply_noise[3] = sbr_hf_apply_noise_3_mips; 909cabdff1aSopenharmony_ci#endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */ 910cabdff1aSopenharmony_ci#endif /* HAVE_MIPSFPU */ 911cabdff1aSopenharmony_ci#endif /* HAVE_INLINE_ASM */ 912cabdff1aSopenharmony_ci} 913