1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * Copyright (c) 2012
3cabdff1aSopenharmony_ci *      MIPS Technologies, Inc., California.
4cabdff1aSopenharmony_ci *
5cabdff1aSopenharmony_ci * Redistribution and use in source and binary forms, with or without
6cabdff1aSopenharmony_ci * modification, are permitted provided that the following conditions
7cabdff1aSopenharmony_ci * are met:
8cabdff1aSopenharmony_ci * 1. Redistributions of source code must retain the above copyright
9cabdff1aSopenharmony_ci *    notice, this list of conditions and the following disclaimer.
10cabdff1aSopenharmony_ci * 2. Redistributions in binary form must reproduce the above copyright
11cabdff1aSopenharmony_ci *    notice, this list of conditions and the following disclaimer in the
12cabdff1aSopenharmony_ci *    documentation and/or other materials provided with the distribution.
13cabdff1aSopenharmony_ci * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
14cabdff1aSopenharmony_ci *    contributors may be used to endorse or promote products derived from
15cabdff1aSopenharmony_ci *    this software without specific prior written permission.
16cabdff1aSopenharmony_ci *
17cabdff1aSopenharmony_ci * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
18cabdff1aSopenharmony_ci * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19cabdff1aSopenharmony_ci * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20cabdff1aSopenharmony_ci * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
21cabdff1aSopenharmony_ci * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22cabdff1aSopenharmony_ci * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23cabdff1aSopenharmony_ci * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24cabdff1aSopenharmony_ci * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25cabdff1aSopenharmony_ci * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26cabdff1aSopenharmony_ci * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27cabdff1aSopenharmony_ci * SUCH DAMAGE.
28cabdff1aSopenharmony_ci *
29cabdff1aSopenharmony_ci * Authors:  Darko Laus      (darko@mips.com)
30cabdff1aSopenharmony_ci *           Djordje Pesut   (djordje@mips.com)
31cabdff1aSopenharmony_ci *           Mirjana Vulin   (mvulin@mips.com)
32cabdff1aSopenharmony_ci *
33cabdff1aSopenharmony_ci * AAC Spectral Band Replication decoding functions optimized for MIPS
34cabdff1aSopenharmony_ci *
35cabdff1aSopenharmony_ci * This file is part of FFmpeg.
36cabdff1aSopenharmony_ci *
37cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
38cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
39cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
40cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
41cabdff1aSopenharmony_ci *
42cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
43cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
44cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
45cabdff1aSopenharmony_ci * Lesser General Public License for more details.
46cabdff1aSopenharmony_ci *
47cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
48cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
49cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
50cabdff1aSopenharmony_ci */
51cabdff1aSopenharmony_ci
52cabdff1aSopenharmony_ci/**
53cabdff1aSopenharmony_ci * @file
54cabdff1aSopenharmony_ci * Reference: libavcodec/sbrdsp.c
55cabdff1aSopenharmony_ci */
56cabdff1aSopenharmony_ci
57cabdff1aSopenharmony_ci#include "config.h"
58cabdff1aSopenharmony_ci#include "libavcodec/sbrdsp.h"
59cabdff1aSopenharmony_ci#include "libavutil/mips/asmdefs.h"
60cabdff1aSopenharmony_ci
61cabdff1aSopenharmony_ci#if HAVE_INLINE_ASM
62cabdff1aSopenharmony_ci#if HAVE_MIPSFPU
63cabdff1aSopenharmony_cistatic void sbr_qmf_pre_shuffle_mips(float *z)
64cabdff1aSopenharmony_ci{
65cabdff1aSopenharmony_ci    int Temp1, Temp2, Temp3, Temp4, Temp5, Temp6;
66cabdff1aSopenharmony_ci    float *z1 = &z[66];
67cabdff1aSopenharmony_ci    float *z2 = &z[59];
68cabdff1aSopenharmony_ci    float *z3 = &z[2];
69cabdff1aSopenharmony_ci    float *z4 = z1 + 60;
70cabdff1aSopenharmony_ci
71cabdff1aSopenharmony_ci    /* loop unrolled 5 times */
72cabdff1aSopenharmony_ci    __asm__ volatile (
73cabdff1aSopenharmony_ci        "lui    %[Temp6],   0x8000                  \n\t"
74cabdff1aSopenharmony_ci    "1:                                             \n\t"
75cabdff1aSopenharmony_ci        "lw     %[Temp1],   0(%[z2])                \n\t"
76cabdff1aSopenharmony_ci        "lw     %[Temp2],   4(%[z2])                \n\t"
77cabdff1aSopenharmony_ci        "lw     %[Temp3],   8(%[z2])                \n\t"
78cabdff1aSopenharmony_ci        "lw     %[Temp4],   12(%[z2])               \n\t"
79cabdff1aSopenharmony_ci        "lw     %[Temp5],   16(%[z2])               \n\t"
80cabdff1aSopenharmony_ci        "xor    %[Temp1],   %[Temp1],   %[Temp6]    \n\t"
81cabdff1aSopenharmony_ci        "xor    %[Temp2],   %[Temp2],   %[Temp6]    \n\t"
82cabdff1aSopenharmony_ci        "xor    %[Temp3],   %[Temp3],   %[Temp6]    \n\t"
83cabdff1aSopenharmony_ci        "xor    %[Temp4],   %[Temp4],   %[Temp6]    \n\t"
84cabdff1aSopenharmony_ci        "xor    %[Temp5],   %[Temp5],   %[Temp6]    \n\t"
85cabdff1aSopenharmony_ci        PTR_ADDIU "%[z2],   %[z2],      -20         \n\t"
86cabdff1aSopenharmony_ci        "sw     %[Temp1],   32(%[z1])               \n\t"
87cabdff1aSopenharmony_ci        "sw     %[Temp2],   24(%[z1])               \n\t"
88cabdff1aSopenharmony_ci        "sw     %[Temp3],   16(%[z1])               \n\t"
89cabdff1aSopenharmony_ci        "sw     %[Temp4],   8(%[z1])                \n\t"
90cabdff1aSopenharmony_ci        "sw     %[Temp5],   0(%[z1])                \n\t"
91cabdff1aSopenharmony_ci        "lw     %[Temp1],   0(%[z3])                \n\t"
92cabdff1aSopenharmony_ci        "lw     %[Temp2],   4(%[z3])                \n\t"
93cabdff1aSopenharmony_ci        "lw     %[Temp3],   8(%[z3])                \n\t"
94cabdff1aSopenharmony_ci        "lw     %[Temp4],   12(%[z3])               \n\t"
95cabdff1aSopenharmony_ci        "lw     %[Temp5],   16(%[z3])               \n\t"
96cabdff1aSopenharmony_ci        "sw     %[Temp1],   4(%[z1])                \n\t"
97cabdff1aSopenharmony_ci        "sw     %[Temp2],   12(%[z1])               \n\t"
98cabdff1aSopenharmony_ci        "sw     %[Temp3],   20(%[z1])               \n\t"
99cabdff1aSopenharmony_ci        "sw     %[Temp4],   28(%[z1])               \n\t"
100cabdff1aSopenharmony_ci        "sw     %[Temp5],   36(%[z1])               \n\t"
101cabdff1aSopenharmony_ci        PTR_ADDIU "%[z3],   %[z3],      20          \n\t"
102cabdff1aSopenharmony_ci        PTR_ADDIU "%[z1],   %[z1],      40          \n\t"
103cabdff1aSopenharmony_ci        "bne    %[z1],      %[z4],      1b          \n\t"
104cabdff1aSopenharmony_ci        "lw     %[Temp1],   132(%[z])               \n\t"
105cabdff1aSopenharmony_ci        "lw     %[Temp2],   128(%[z])               \n\t"
106cabdff1aSopenharmony_ci        "lw     %[Temp3],   0(%[z])                 \n\t"
107cabdff1aSopenharmony_ci        "lw     %[Temp4],   4(%[z])                 \n\t"
108cabdff1aSopenharmony_ci        "xor    %[Temp1],   %[Temp1],   %[Temp6]    \n\t"
109cabdff1aSopenharmony_ci        "sw     %[Temp1],   504(%[z])               \n\t"
110cabdff1aSopenharmony_ci        "sw     %[Temp2],   508(%[z])               \n\t"
111cabdff1aSopenharmony_ci        "sw     %[Temp3],   256(%[z])               \n\t"
112cabdff1aSopenharmony_ci        "sw     %[Temp4],   260(%[z])               \n\t"
113cabdff1aSopenharmony_ci
114cabdff1aSopenharmony_ci        : [Temp1]"=&r"(Temp1), [Temp2]"=&r"(Temp2),
115cabdff1aSopenharmony_ci          [Temp3]"=&r"(Temp3), [Temp4]"=&r"(Temp4),
116cabdff1aSopenharmony_ci          [Temp5]"=&r"(Temp5), [Temp6]"=&r"(Temp6),
117cabdff1aSopenharmony_ci          [z1]"+r"(z1), [z2]"+r"(z2), [z3]"+r"(z3)
118cabdff1aSopenharmony_ci        : [z4]"r"(z4), [z]"r"(z)
119cabdff1aSopenharmony_ci        : "memory"
120cabdff1aSopenharmony_ci    );
121cabdff1aSopenharmony_ci}
122cabdff1aSopenharmony_ci
123cabdff1aSopenharmony_cistatic void sbr_qmf_post_shuffle_mips(float W[32][2], const float *z)
124cabdff1aSopenharmony_ci{
125cabdff1aSopenharmony_ci    int Temp1, Temp2, Temp3, Temp4, Temp5;
126cabdff1aSopenharmony_ci    float *W_ptr = (float *)W;
127cabdff1aSopenharmony_ci    float *z1    = (float *)z;
128cabdff1aSopenharmony_ci    float *z2    = (float *)&z[60];
129cabdff1aSopenharmony_ci    float *z_end = z1 + 32;
130cabdff1aSopenharmony_ci
131cabdff1aSopenharmony_ci     /* loop unrolled 4 times */
132cabdff1aSopenharmony_ci    __asm__ volatile (
133cabdff1aSopenharmony_ci        "lui    %[Temp5],   0x8000                  \n\t"
134cabdff1aSopenharmony_ci    "1:                                             \n\t"
135cabdff1aSopenharmony_ci        "lw     %[Temp1],   0(%[z2])                \n\t"
136cabdff1aSopenharmony_ci        "lw     %[Temp2],   4(%[z2])                \n\t"
137cabdff1aSopenharmony_ci        "lw     %[Temp3],   8(%[z2])                \n\t"
138cabdff1aSopenharmony_ci        "lw     %[Temp4],   12(%[z2])               \n\t"
139cabdff1aSopenharmony_ci        "xor    %[Temp1],   %[Temp1],   %[Temp5]    \n\t"
140cabdff1aSopenharmony_ci        "xor    %[Temp2],   %[Temp2],   %[Temp5]    \n\t"
141cabdff1aSopenharmony_ci        "xor    %[Temp3],   %[Temp3],   %[Temp5]    \n\t"
142cabdff1aSopenharmony_ci        "xor    %[Temp4],   %[Temp4],   %[Temp5]    \n\t"
143cabdff1aSopenharmony_ci        PTR_ADDIU "%[z2],   %[z2],      -16         \n\t"
144cabdff1aSopenharmony_ci        "sw     %[Temp1],   24(%[W_ptr])            \n\t"
145cabdff1aSopenharmony_ci        "sw     %[Temp2],   16(%[W_ptr])            \n\t"
146cabdff1aSopenharmony_ci        "sw     %[Temp3],   8(%[W_ptr])             \n\t"
147cabdff1aSopenharmony_ci        "sw     %[Temp4],   0(%[W_ptr])             \n\t"
148cabdff1aSopenharmony_ci        "lw     %[Temp1],   0(%[z1])                \n\t"
149cabdff1aSopenharmony_ci        "lw     %[Temp2],   4(%[z1])                \n\t"
150cabdff1aSopenharmony_ci        "lw     %[Temp3],   8(%[z1])                \n\t"
151cabdff1aSopenharmony_ci        "lw     %[Temp4],   12(%[z1])               \n\t"
152cabdff1aSopenharmony_ci        "sw     %[Temp1],   4(%[W_ptr])             \n\t"
153cabdff1aSopenharmony_ci        "sw     %[Temp2],   12(%[W_ptr])            \n\t"
154cabdff1aSopenharmony_ci        "sw     %[Temp3],   20(%[W_ptr])            \n\t"
155cabdff1aSopenharmony_ci        "sw     %[Temp4],   28(%[W_ptr])            \n\t"
156cabdff1aSopenharmony_ci        PTR_ADDIU "%[z1],   %[z1],      16          \n\t"
157cabdff1aSopenharmony_ci        PTR_ADDIU "%[W_ptr],%[W_ptr],   32          \n\t"
158cabdff1aSopenharmony_ci        "bne    %[z1],      %[z_end],   1b          \n\t"
159cabdff1aSopenharmony_ci
160cabdff1aSopenharmony_ci        : [Temp1]"=&r"(Temp1), [Temp2]"=&r"(Temp2),
161cabdff1aSopenharmony_ci          [Temp3]"=&r"(Temp3), [Temp4]"=&r"(Temp4),
162cabdff1aSopenharmony_ci          [Temp5]"=&r"(Temp5), [z1]"+r"(z1),
163cabdff1aSopenharmony_ci          [z2]"+r"(z2), [W_ptr]"+r"(W_ptr)
164cabdff1aSopenharmony_ci        : [z_end]"r"(z_end)
165cabdff1aSopenharmony_ci        : "memory"
166cabdff1aSopenharmony_ci    );
167cabdff1aSopenharmony_ci}
168cabdff1aSopenharmony_ci
169cabdff1aSopenharmony_ci#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
170cabdff1aSopenharmony_cistatic void sbr_sum64x5_mips(float *z)
171cabdff1aSopenharmony_ci{
172cabdff1aSopenharmony_ci    int k;
173cabdff1aSopenharmony_ci    float *z1;
174cabdff1aSopenharmony_ci    float f1, f2, f3, f4, f5, f6, f7, f8;
175cabdff1aSopenharmony_ci    for (k = 0; k < 64; k += 8) {
176cabdff1aSopenharmony_ci
177cabdff1aSopenharmony_ci        z1 = &z[k];
178cabdff1aSopenharmony_ci
179cabdff1aSopenharmony_ci         /* loop unrolled 8 times */
180cabdff1aSopenharmony_ci        __asm__ volatile (
181cabdff1aSopenharmony_ci            "lwc1   $f0,    0(%[z1])        \n\t"
182cabdff1aSopenharmony_ci            "lwc1   $f1,    256(%[z1])      \n\t"
183cabdff1aSopenharmony_ci            "lwc1   $f2,    4(%[z1])        \n\t"
184cabdff1aSopenharmony_ci            "lwc1   $f3,    260(%[z1])      \n\t"
185cabdff1aSopenharmony_ci            "lwc1   $f4,    8(%[z1])        \n\t"
186cabdff1aSopenharmony_ci            "add.s  %[f1],  $f0,    $f1     \n\t"
187cabdff1aSopenharmony_ci            "lwc1   $f5,    264(%[z1])      \n\t"
188cabdff1aSopenharmony_ci            "add.s  %[f2],  $f2,    $f3     \n\t"
189cabdff1aSopenharmony_ci            "lwc1   $f6,    12(%[z1])       \n\t"
190cabdff1aSopenharmony_ci            "lwc1   $f7,    268(%[z1])      \n\t"
191cabdff1aSopenharmony_ci            "add.s  %[f3],  $f4,    $f5     \n\t"
192cabdff1aSopenharmony_ci            "lwc1   $f8,    16(%[z1])       \n\t"
193cabdff1aSopenharmony_ci            "lwc1   $f9,    272(%[z1])      \n\t"
194cabdff1aSopenharmony_ci            "add.s  %[f4],  $f6,    $f7     \n\t"
195cabdff1aSopenharmony_ci            "lwc1   $f10,   20(%[z1])       \n\t"
196cabdff1aSopenharmony_ci            "lwc1   $f11,   276(%[z1])      \n\t"
197cabdff1aSopenharmony_ci            "add.s  %[f5],  $f8,    $f9     \n\t"
198cabdff1aSopenharmony_ci            "lwc1   $f12,   24(%[z1])       \n\t"
199cabdff1aSopenharmony_ci            "lwc1   $f13,   280(%[z1])      \n\t"
200cabdff1aSopenharmony_ci            "add.s  %[f6],  $f10,   $f11    \n\t"
201cabdff1aSopenharmony_ci            "lwc1   $f14,   28(%[z1])       \n\t"
202cabdff1aSopenharmony_ci            "lwc1   $f15,   284(%[z1])      \n\t"
203cabdff1aSopenharmony_ci            "add.s  %[f7],  $f12,   $f13    \n\t"
204cabdff1aSopenharmony_ci            "lwc1   $f0,    512(%[z1])      \n\t"
205cabdff1aSopenharmony_ci            "lwc1   $f1,    516(%[z1])      \n\t"
206cabdff1aSopenharmony_ci            "add.s  %[f8],  $f14,   $f15    \n\t"
207cabdff1aSopenharmony_ci            "lwc1   $f2,    520(%[z1])      \n\t"
208cabdff1aSopenharmony_ci            "add.s  %[f1],  %[f1],  $f0     \n\t"
209cabdff1aSopenharmony_ci            "add.s  %[f2],  %[f2],  $f1     \n\t"
210cabdff1aSopenharmony_ci            "lwc1   $f3,    524(%[z1])      \n\t"
211cabdff1aSopenharmony_ci            "add.s  %[f3],  %[f3],  $f2     \n\t"
212cabdff1aSopenharmony_ci            "lwc1   $f4,    528(%[z1])      \n\t"
213cabdff1aSopenharmony_ci            "lwc1   $f5,    532(%[z1])      \n\t"
214cabdff1aSopenharmony_ci            "add.s  %[f4],  %[f4],  $f3     \n\t"
215cabdff1aSopenharmony_ci            "lwc1   $f6,    536(%[z1])      \n\t"
216cabdff1aSopenharmony_ci            "add.s  %[f5],  %[f5],  $f4     \n\t"
217cabdff1aSopenharmony_ci            "add.s  %[f6],  %[f6],  $f5     \n\t"
218cabdff1aSopenharmony_ci            "lwc1   $f7,    540(%[z1])      \n\t"
219cabdff1aSopenharmony_ci            "add.s  %[f7],  %[f7],  $f6     \n\t"
220cabdff1aSopenharmony_ci            "lwc1   $f0,    768(%[z1])      \n\t"
221cabdff1aSopenharmony_ci            "lwc1   $f1,    772(%[z1])      \n\t"
222cabdff1aSopenharmony_ci            "add.s  %[f8],  %[f8],  $f7     \n\t"
223cabdff1aSopenharmony_ci            "lwc1   $f2,    776(%[z1])      \n\t"
224cabdff1aSopenharmony_ci            "add.s  %[f1],  %[f1],  $f0     \n\t"
225cabdff1aSopenharmony_ci            "add.s  %[f2],  %[f2],  $f1     \n\t"
226cabdff1aSopenharmony_ci            "lwc1   $f3,    780(%[z1])      \n\t"
227cabdff1aSopenharmony_ci            "add.s  %[f3],  %[f3],  $f2     \n\t"
228cabdff1aSopenharmony_ci            "lwc1   $f4,    784(%[z1])      \n\t"
229cabdff1aSopenharmony_ci            "lwc1   $f5,    788(%[z1])      \n\t"
230cabdff1aSopenharmony_ci            "add.s  %[f4],  %[f4],  $f3     \n\t"
231cabdff1aSopenharmony_ci            "lwc1   $f6,    792(%[z1])      \n\t"
232cabdff1aSopenharmony_ci            "add.s  %[f5],  %[f5],  $f4     \n\t"
233cabdff1aSopenharmony_ci            "add.s  %[f6],  %[f6],  $f5     \n\t"
234cabdff1aSopenharmony_ci            "lwc1   $f7,    796(%[z1])      \n\t"
235cabdff1aSopenharmony_ci            "add.s  %[f7],  %[f7],  $f6     \n\t"
236cabdff1aSopenharmony_ci            "lwc1   $f0,    1024(%[z1])     \n\t"
237cabdff1aSopenharmony_ci            "lwc1   $f1,    1028(%[z1])     \n\t"
238cabdff1aSopenharmony_ci            "add.s  %[f8],  %[f8],  $f7     \n\t"
239cabdff1aSopenharmony_ci            "lwc1   $f2,    1032(%[z1])     \n\t"
240cabdff1aSopenharmony_ci            "add.s  %[f1],  %[f1],  $f0     \n\t"
241cabdff1aSopenharmony_ci            "add.s  %[f2],  %[f2],  $f1     \n\t"
242cabdff1aSopenharmony_ci            "lwc1   $f3,    1036(%[z1])     \n\t"
243cabdff1aSopenharmony_ci            "add.s  %[f3],  %[f3],  $f2     \n\t"
244cabdff1aSopenharmony_ci            "lwc1   $f4,    1040(%[z1])     \n\t"
245cabdff1aSopenharmony_ci            "lwc1   $f5,    1044(%[z1])     \n\t"
246cabdff1aSopenharmony_ci            "add.s  %[f4],  %[f4],  $f3     \n\t"
247cabdff1aSopenharmony_ci            "lwc1   $f6,    1048(%[z1])     \n\t"
248cabdff1aSopenharmony_ci            "add.s  %[f5],  %[f5],  $f4     \n\t"
249cabdff1aSopenharmony_ci            "add.s  %[f6],  %[f6],  $f5     \n\t"
250cabdff1aSopenharmony_ci            "lwc1   $f7,    1052(%[z1])     \n\t"
251cabdff1aSopenharmony_ci            "add.s  %[f7],  %[f7],  $f6     \n\t"
252cabdff1aSopenharmony_ci            "swc1   %[f1],  0(%[z1])        \n\t"
253cabdff1aSopenharmony_ci            "swc1   %[f2],  4(%[z1])        \n\t"
254cabdff1aSopenharmony_ci            "add.s  %[f8],  %[f8],  $f7     \n\t"
255cabdff1aSopenharmony_ci            "swc1   %[f3],  8(%[z1])        \n\t"
256cabdff1aSopenharmony_ci            "swc1   %[f4],  12(%[z1])       \n\t"
257cabdff1aSopenharmony_ci            "swc1   %[f5],  16(%[z1])       \n\t"
258cabdff1aSopenharmony_ci            "swc1   %[f6],  20(%[z1])       \n\t"
259cabdff1aSopenharmony_ci            "swc1   %[f7],  24(%[z1])       \n\t"
260cabdff1aSopenharmony_ci            "swc1   %[f8],  28(%[z1])       \n\t"
261cabdff1aSopenharmony_ci
262cabdff1aSopenharmony_ci            : [f1]"=&f"(f1), [f2]"=&f"(f2), [f3]"=&f"(f3),
263cabdff1aSopenharmony_ci              [f4]"=&f"(f4), [f5]"=&f"(f5), [f6]"=&f"(f6),
264cabdff1aSopenharmony_ci              [f7]"=&f"(f7), [f8]"=&f"(f8)
265cabdff1aSopenharmony_ci            : [z1]"r"(z1)
266cabdff1aSopenharmony_ci            : "$f0", "$f1", "$f2", "$f3", "$f4", "$f5",
267cabdff1aSopenharmony_ci              "$f6", "$f7", "$f8", "$f9", "$f10", "$f11",
268cabdff1aSopenharmony_ci              "$f12", "$f13", "$f14", "$f15",
269cabdff1aSopenharmony_ci              "memory"
270cabdff1aSopenharmony_ci        );
271cabdff1aSopenharmony_ci    }
272cabdff1aSopenharmony_ci}
273cabdff1aSopenharmony_ci
274cabdff1aSopenharmony_cistatic float sbr_sum_square_mips(float (*x)[2], int n)
275cabdff1aSopenharmony_ci{
276cabdff1aSopenharmony_ci    float sum0 = 0.0f, sum1 = 0.0f;
277cabdff1aSopenharmony_ci    float *p_x;
278cabdff1aSopenharmony_ci    float temp0, temp1, temp2, temp3;
279cabdff1aSopenharmony_ci    float *loop_end;
280cabdff1aSopenharmony_ci    p_x = &x[0][0];
281cabdff1aSopenharmony_ci    loop_end = p_x + (n >> 1)*4 - 4;
282cabdff1aSopenharmony_ci
283cabdff1aSopenharmony_ci    __asm__ volatile (
284cabdff1aSopenharmony_ci        ".set      push                                             \n\t"
285cabdff1aSopenharmony_ci        ".set      noreorder                                        \n\t"
286cabdff1aSopenharmony_ci        "lwc1      %[temp0],   0(%[p_x])                            \n\t"
287cabdff1aSopenharmony_ci        "lwc1      %[temp1],   4(%[p_x])                            \n\t"
288cabdff1aSopenharmony_ci        "lwc1      %[temp2],   8(%[p_x])                            \n\t"
289cabdff1aSopenharmony_ci        "lwc1      %[temp3],   12(%[p_x])                           \n\t"
290cabdff1aSopenharmony_ci    "1:                                                             \n\t"
291cabdff1aSopenharmony_ci        PTR_ADDIU "%[p_x],     %[p_x],       16                     \n\t"
292cabdff1aSopenharmony_ci        "madd.s    %[sum0],    %[sum0],      %[temp0],   %[temp0]   \n\t"
293cabdff1aSopenharmony_ci        "lwc1      %[temp0],   0(%[p_x])                            \n\t"
294cabdff1aSopenharmony_ci        "madd.s    %[sum1],    %[sum1],      %[temp1],   %[temp1]   \n\t"
295cabdff1aSopenharmony_ci        "lwc1      %[temp1],   4(%[p_x])                            \n\t"
296cabdff1aSopenharmony_ci        "madd.s    %[sum0],    %[sum0],      %[temp2],   %[temp2]   \n\t"
297cabdff1aSopenharmony_ci        "lwc1      %[temp2],   8(%[p_x])                            \n\t"
298cabdff1aSopenharmony_ci        "madd.s    %[sum1],    %[sum1],      %[temp3],   %[temp3]   \n\t"
299cabdff1aSopenharmony_ci        "bne       %[p_x],     %[loop_end],  1b                     \n\t"
300cabdff1aSopenharmony_ci        " lwc1     %[temp3],   12(%[p_x])                           \n\t"
301cabdff1aSopenharmony_ci        "madd.s    %[sum0],    %[sum0],      %[temp0],   %[temp0]   \n\t"
302cabdff1aSopenharmony_ci        "madd.s    %[sum1],    %[sum1],      %[temp1],   %[temp1]   \n\t"
303cabdff1aSopenharmony_ci        "madd.s    %[sum0],    %[sum0],      %[temp2],   %[temp2]   \n\t"
304cabdff1aSopenharmony_ci        "madd.s    %[sum1],    %[sum1],      %[temp3],   %[temp3]   \n\t"
305cabdff1aSopenharmony_ci        ".set      pop                                              \n\t"
306cabdff1aSopenharmony_ci
307cabdff1aSopenharmony_ci        : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
308cabdff1aSopenharmony_ci          [temp3]"=&f"(temp3), [sum0]"+f"(sum0), [sum1]"+f"(sum1),
309cabdff1aSopenharmony_ci          [p_x]"+r"(p_x)
310cabdff1aSopenharmony_ci        : [loop_end]"r"(loop_end)
311cabdff1aSopenharmony_ci        : "memory"
312cabdff1aSopenharmony_ci    );
313cabdff1aSopenharmony_ci    return sum0 + sum1;
314cabdff1aSopenharmony_ci}
315cabdff1aSopenharmony_ci
316cabdff1aSopenharmony_cistatic void sbr_qmf_deint_bfly_mips(float *v, const float *src0, const float *src1)
317cabdff1aSopenharmony_ci{
318cabdff1aSopenharmony_ci    int i;
319cabdff1aSopenharmony_ci    float temp0, temp1, temp2, temp3, temp4, temp5;
320cabdff1aSopenharmony_ci    float temp6, temp7, temp8, temp9, temp10, temp11;
321cabdff1aSopenharmony_ci    float *v0 = v;
322cabdff1aSopenharmony_ci    float *v1 = &v[127];
323cabdff1aSopenharmony_ci    float *psrc0 = (float*)src0;
324cabdff1aSopenharmony_ci    float *psrc1 = (float*)&src1[63];
325cabdff1aSopenharmony_ci
326cabdff1aSopenharmony_ci    for (i = 0; i < 4; i++) {
327cabdff1aSopenharmony_ci
328cabdff1aSopenharmony_ci         /* loop unrolled 16 times */
329cabdff1aSopenharmony_ci        __asm__ volatile(
330cabdff1aSopenharmony_ci            "lwc1       %[temp0],   0(%[src0])             \n\t"
331cabdff1aSopenharmony_ci            "lwc1       %[temp1],   0(%[src1])             \n\t"
332cabdff1aSopenharmony_ci            "lwc1       %[temp3],   4(%[src0])             \n\t"
333cabdff1aSopenharmony_ci            "lwc1       %[temp4],   -4(%[src1])            \n\t"
334cabdff1aSopenharmony_ci            "lwc1       %[temp6],   8(%[src0])             \n\t"
335cabdff1aSopenharmony_ci            "lwc1       %[temp7],   -8(%[src1])            \n\t"
336cabdff1aSopenharmony_ci            "lwc1       %[temp9],   12(%[src0])            \n\t"
337cabdff1aSopenharmony_ci            "lwc1       %[temp10],  -12(%[src1])           \n\t"
338cabdff1aSopenharmony_ci            "add.s      %[temp2],   %[temp0],   %[temp1]   \n\t"
339cabdff1aSopenharmony_ci            "add.s      %[temp5],   %[temp3],   %[temp4]   \n\t"
340cabdff1aSopenharmony_ci            "add.s      %[temp8],   %[temp6],   %[temp7]   \n\t"
341cabdff1aSopenharmony_ci            "add.s      %[temp11],  %[temp9],   %[temp10]  \n\t"
342cabdff1aSopenharmony_ci            "sub.s      %[temp0],   %[temp0],   %[temp1]   \n\t"
343cabdff1aSopenharmony_ci            "sub.s      %[temp3],   %[temp3],   %[temp4]   \n\t"
344cabdff1aSopenharmony_ci            "sub.s      %[temp6],   %[temp6],   %[temp7]   \n\t"
345cabdff1aSopenharmony_ci            "sub.s      %[temp9],   %[temp9],   %[temp10]  \n\t"
346cabdff1aSopenharmony_ci            "swc1       %[temp2],   0(%[v1])               \n\t"
347cabdff1aSopenharmony_ci            "swc1       %[temp0],   0(%[v0])               \n\t"
348cabdff1aSopenharmony_ci            "swc1       %[temp5],   -4(%[v1])              \n\t"
349cabdff1aSopenharmony_ci            "swc1       %[temp3],   4(%[v0])               \n\t"
350cabdff1aSopenharmony_ci            "swc1       %[temp8],   -8(%[v1])              \n\t"
351cabdff1aSopenharmony_ci            "swc1       %[temp6],   8(%[v0])               \n\t"
352cabdff1aSopenharmony_ci            "swc1       %[temp11],  -12(%[v1])             \n\t"
353cabdff1aSopenharmony_ci            "swc1       %[temp9],   12(%[v0])              \n\t"
354cabdff1aSopenharmony_ci            "lwc1       %[temp0],   16(%[src0])            \n\t"
355cabdff1aSopenharmony_ci            "lwc1       %[temp1],   -16(%[src1])           \n\t"
356cabdff1aSopenharmony_ci            "lwc1       %[temp3],   20(%[src0])            \n\t"
357cabdff1aSopenharmony_ci            "lwc1       %[temp4],   -20(%[src1])           \n\t"
358cabdff1aSopenharmony_ci            "lwc1       %[temp6],   24(%[src0])            \n\t"
359cabdff1aSopenharmony_ci            "lwc1       %[temp7],   -24(%[src1])           \n\t"
360cabdff1aSopenharmony_ci            "lwc1       %[temp9],   28(%[src0])            \n\t"
361cabdff1aSopenharmony_ci            "lwc1       %[temp10],  -28(%[src1])           \n\t"
362cabdff1aSopenharmony_ci            "add.s      %[temp2],   %[temp0],   %[temp1]   \n\t"
363cabdff1aSopenharmony_ci            "add.s      %[temp5],   %[temp3],   %[temp4]   \n\t"
364cabdff1aSopenharmony_ci            "add.s      %[temp8],   %[temp6],   %[temp7]   \n\t"
365cabdff1aSopenharmony_ci            "add.s      %[temp11],  %[temp9],   %[temp10]  \n\t"
366cabdff1aSopenharmony_ci            "sub.s      %[temp0],   %[temp0],   %[temp1]   \n\t"
367cabdff1aSopenharmony_ci            "sub.s      %[temp3],   %[temp3],   %[temp4]   \n\t"
368cabdff1aSopenharmony_ci            "sub.s      %[temp6],   %[temp6],   %[temp7]   \n\t"
369cabdff1aSopenharmony_ci            "sub.s      %[temp9],   %[temp9],   %[temp10]  \n\t"
370cabdff1aSopenharmony_ci            "swc1       %[temp2],   -16(%[v1])             \n\t"
371cabdff1aSopenharmony_ci            "swc1       %[temp0],   16(%[v0])              \n\t"
372cabdff1aSopenharmony_ci            "swc1       %[temp5],   -20(%[v1])             \n\t"
373cabdff1aSopenharmony_ci            "swc1       %[temp3],   20(%[v0])              \n\t"
374cabdff1aSopenharmony_ci            "swc1       %[temp8],   -24(%[v1])             \n\t"
375cabdff1aSopenharmony_ci            "swc1       %[temp6],   24(%[v0])              \n\t"
376cabdff1aSopenharmony_ci            "swc1       %[temp11],  -28(%[v1])             \n\t"
377cabdff1aSopenharmony_ci            "swc1       %[temp9],   28(%[v0])              \n\t"
378cabdff1aSopenharmony_ci            "lwc1       %[temp0],   32(%[src0])            \n\t"
379cabdff1aSopenharmony_ci            "lwc1       %[temp1],   -32(%[src1])           \n\t"
380cabdff1aSopenharmony_ci            "lwc1       %[temp3],   36(%[src0])            \n\t"
381cabdff1aSopenharmony_ci            "lwc1       %[temp4],   -36(%[src1])           \n\t"
382cabdff1aSopenharmony_ci            "lwc1       %[temp6],   40(%[src0])            \n\t"
383cabdff1aSopenharmony_ci            "lwc1       %[temp7],   -40(%[src1])           \n\t"
384cabdff1aSopenharmony_ci            "lwc1       %[temp9],   44(%[src0])            \n\t"
385cabdff1aSopenharmony_ci            "lwc1       %[temp10],  -44(%[src1])           \n\t"
386cabdff1aSopenharmony_ci            "add.s      %[temp2],   %[temp0],   %[temp1]   \n\t"
387cabdff1aSopenharmony_ci            "add.s      %[temp5],   %[temp3],   %[temp4]   \n\t"
388cabdff1aSopenharmony_ci            "add.s      %[temp8],   %[temp6],   %[temp7]   \n\t"
389cabdff1aSopenharmony_ci            "add.s      %[temp11],  %[temp9],   %[temp10]  \n\t"
390cabdff1aSopenharmony_ci            "sub.s      %[temp0],   %[temp0],   %[temp1]   \n\t"
391cabdff1aSopenharmony_ci            "sub.s      %[temp3],   %[temp3],   %[temp4]   \n\t"
392cabdff1aSopenharmony_ci            "sub.s      %[temp6],   %[temp6],   %[temp7]   \n\t"
393cabdff1aSopenharmony_ci            "sub.s      %[temp9],   %[temp9],   %[temp10]  \n\t"
394cabdff1aSopenharmony_ci            "swc1       %[temp2],   -32(%[v1])             \n\t"
395cabdff1aSopenharmony_ci            "swc1       %[temp0],   32(%[v0])              \n\t"
396cabdff1aSopenharmony_ci            "swc1       %[temp5],   -36(%[v1])             \n\t"
397cabdff1aSopenharmony_ci            "swc1       %[temp3],   36(%[v0])              \n\t"
398cabdff1aSopenharmony_ci            "swc1       %[temp8],   -40(%[v1])             \n\t"
399cabdff1aSopenharmony_ci            "swc1       %[temp6],   40(%[v0])              \n\t"
400cabdff1aSopenharmony_ci            "swc1       %[temp11],  -44(%[v1])             \n\t"
401cabdff1aSopenharmony_ci            "swc1       %[temp9],   44(%[v0])              \n\t"
402cabdff1aSopenharmony_ci            "lwc1       %[temp0],   48(%[src0])            \n\t"
403cabdff1aSopenharmony_ci            "lwc1       %[temp1],   -48(%[src1])           \n\t"
404cabdff1aSopenharmony_ci            "lwc1       %[temp3],   52(%[src0])            \n\t"
405cabdff1aSopenharmony_ci            "lwc1       %[temp4],   -52(%[src1])           \n\t"
406cabdff1aSopenharmony_ci            "lwc1       %[temp6],   56(%[src0])            \n\t"
407cabdff1aSopenharmony_ci            "lwc1       %[temp7],   -56(%[src1])           \n\t"
408cabdff1aSopenharmony_ci            "lwc1       %[temp9],   60(%[src0])            \n\t"
409cabdff1aSopenharmony_ci            "lwc1       %[temp10],  -60(%[src1])           \n\t"
410cabdff1aSopenharmony_ci            "add.s      %[temp2],   %[temp0],   %[temp1]   \n\t"
411cabdff1aSopenharmony_ci            "add.s      %[temp5],   %[temp3],   %[temp4]   \n\t"
412cabdff1aSopenharmony_ci            "add.s      %[temp8],   %[temp6],   %[temp7]   \n\t"
413cabdff1aSopenharmony_ci            "add.s      %[temp11],  %[temp9],   %[temp10]  \n\t"
414cabdff1aSopenharmony_ci            "sub.s      %[temp0],   %[temp0],   %[temp1]   \n\t"
415cabdff1aSopenharmony_ci            "sub.s      %[temp3],   %[temp3],   %[temp4]   \n\t"
416cabdff1aSopenharmony_ci            "sub.s      %[temp6],   %[temp6],   %[temp7]   \n\t"
417cabdff1aSopenharmony_ci            "sub.s      %[temp9],   %[temp9],   %[temp10]  \n\t"
418cabdff1aSopenharmony_ci            "swc1       %[temp2],   -48(%[v1])             \n\t"
419cabdff1aSopenharmony_ci            "swc1       %[temp0],   48(%[v0])              \n\t"
420cabdff1aSopenharmony_ci            "swc1       %[temp5],   -52(%[v1])             \n\t"
421cabdff1aSopenharmony_ci            "swc1       %[temp3],   52(%[v0])              \n\t"
422cabdff1aSopenharmony_ci            "swc1       %[temp8],   -56(%[v1])             \n\t"
423cabdff1aSopenharmony_ci            "swc1       %[temp6],   56(%[v0])              \n\t"
424cabdff1aSopenharmony_ci            "swc1       %[temp11],  -60(%[v1])             \n\t"
425cabdff1aSopenharmony_ci            "swc1       %[temp9],   60(%[v0])              \n\t"
426cabdff1aSopenharmony_ci            PTR_ADDIU " %[src0],    %[src0],    64         \n\t"
427cabdff1aSopenharmony_ci            PTR_ADDIU " %[src1],    %[src1],    -64        \n\t"
428cabdff1aSopenharmony_ci            PTR_ADDIU " %[v0],      %[v0],      64         \n\t"
429cabdff1aSopenharmony_ci            PTR_ADDIU " %[v1],      %[v1],      -64        \n\t"
430cabdff1aSopenharmony_ci
431cabdff1aSopenharmony_ci            : [v0]"+r"(v0), [v1]"+r"(v1), [src0]"+r"(psrc0), [src1]"+r"(psrc1),
432cabdff1aSopenharmony_ci              [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
433cabdff1aSopenharmony_ci              [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
434cabdff1aSopenharmony_ci              [temp6]"=&f"(temp6), [temp7]"=&f"(temp7), [temp8]"=&f"(temp8),
435cabdff1aSopenharmony_ci              [temp9]"=&f"(temp9), [temp10]"=&f"(temp10), [temp11]"=&f"(temp11)
436cabdff1aSopenharmony_ci            :
437cabdff1aSopenharmony_ci            :"memory"
438cabdff1aSopenharmony_ci        );
439cabdff1aSopenharmony_ci    }
440cabdff1aSopenharmony_ci}
441cabdff1aSopenharmony_ci
442cabdff1aSopenharmony_cistatic void sbr_autocorrelate_mips(const float x[40][2], float phi[3][2][2])
443cabdff1aSopenharmony_ci{
444cabdff1aSopenharmony_ci    int i;
445cabdff1aSopenharmony_ci    float real_sum_0 = 0.0f;
446cabdff1aSopenharmony_ci    float real_sum_1 = 0.0f;
447cabdff1aSopenharmony_ci    float real_sum_2 = 0.0f;
448cabdff1aSopenharmony_ci    float imag_sum_1 = 0.0f;
449cabdff1aSopenharmony_ci    float imag_sum_2 = 0.0f;
450cabdff1aSopenharmony_ci    float *p_x, *p_phi;
451cabdff1aSopenharmony_ci    float temp0, temp1, temp2, temp3, temp4, temp5, temp6;
452cabdff1aSopenharmony_ci    float temp7, temp_r, temp_r1, temp_r2, temp_r3, temp_r4;
453cabdff1aSopenharmony_ci    p_x = (float*)&x[0][0];
454cabdff1aSopenharmony_ci    p_phi = &phi[0][0][0];
455cabdff1aSopenharmony_ci
456cabdff1aSopenharmony_ci    __asm__ volatile (
457cabdff1aSopenharmony_ci        "lwc1    %[temp0],      8(%[p_x])                           \n\t"
458cabdff1aSopenharmony_ci        "lwc1    %[temp1],      12(%[p_x])                          \n\t"
459cabdff1aSopenharmony_ci        "lwc1    %[temp2],      16(%[p_x])                          \n\t"
460cabdff1aSopenharmony_ci        "lwc1    %[temp3],      20(%[p_x])                          \n\t"
461cabdff1aSopenharmony_ci        "lwc1    %[temp4],      24(%[p_x])                          \n\t"
462cabdff1aSopenharmony_ci        "lwc1    %[temp5],      28(%[p_x])                          \n\t"
463cabdff1aSopenharmony_ci        "mul.s   %[temp_r],     %[temp1],      %[temp1]             \n\t"
464cabdff1aSopenharmony_ci        "mul.s   %[temp_r1],    %[temp1],      %[temp3]             \n\t"
465cabdff1aSopenharmony_ci        "mul.s   %[temp_r2],    %[temp1],      %[temp2]             \n\t"
466cabdff1aSopenharmony_ci        "mul.s   %[temp_r3],    %[temp1],      %[temp5]             \n\t"
467cabdff1aSopenharmony_ci        "mul.s   %[temp_r4],    %[temp1],      %[temp4]             \n\t"
468cabdff1aSopenharmony_ci        "madd.s  %[temp_r],     %[temp_r],     %[temp0],  %[temp0]  \n\t"
469cabdff1aSopenharmony_ci        "madd.s  %[temp_r1],    %[temp_r1],    %[temp0],  %[temp2]  \n\t"
470cabdff1aSopenharmony_ci        "msub.s  %[temp_r2],    %[temp_r2],    %[temp0],  %[temp3]  \n\t"
471cabdff1aSopenharmony_ci        "madd.s  %[temp_r3],    %[temp_r3],    %[temp0],  %[temp4]  \n\t"
472cabdff1aSopenharmony_ci        "msub.s  %[temp_r4],    %[temp_r4],    %[temp0],  %[temp5]  \n\t"
473cabdff1aSopenharmony_ci        "add.s   %[real_sum_0], %[real_sum_0], %[temp_r]            \n\t"
474cabdff1aSopenharmony_ci        "add.s   %[real_sum_1], %[real_sum_1], %[temp_r1]           \n\t"
475cabdff1aSopenharmony_ci        "add.s   %[imag_sum_1], %[imag_sum_1], %[temp_r2]           \n\t"
476cabdff1aSopenharmony_ci        "add.s   %[real_sum_2], %[real_sum_2], %[temp_r3]           \n\t"
477cabdff1aSopenharmony_ci        "add.s   %[imag_sum_2], %[imag_sum_2], %[temp_r4]           \n\t"
478cabdff1aSopenharmony_ci        PTR_ADDIU "%[p_x],      %[p_x],        8                    \n\t"
479cabdff1aSopenharmony_ci
480cabdff1aSopenharmony_ci        : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
481cabdff1aSopenharmony_ci          [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
482cabdff1aSopenharmony_ci          [real_sum_0]"+f"(real_sum_0), [real_sum_1]"+f"(real_sum_1),
483cabdff1aSopenharmony_ci          [imag_sum_1]"+f"(imag_sum_1), [real_sum_2]"+f"(real_sum_2),
484cabdff1aSopenharmony_ci          [temp_r]"=&f"(temp_r), [temp_r1]"=&f"(temp_r1), [temp_r2]"=&f"(temp_r2),
485cabdff1aSopenharmony_ci          [temp_r3]"=&f"(temp_r3), [temp_r4]"=&f"(temp_r4),
486cabdff1aSopenharmony_ci          [p_x]"+r"(p_x), [imag_sum_2]"+f"(imag_sum_2)
487cabdff1aSopenharmony_ci        :
488cabdff1aSopenharmony_ci        : "memory"
489cabdff1aSopenharmony_ci    );
490cabdff1aSopenharmony_ci
491cabdff1aSopenharmony_ci    for (i = 0; i < 12; i++) {
492cabdff1aSopenharmony_ci        __asm__ volatile (
493cabdff1aSopenharmony_ci            "lwc1    %[temp0],      8(%[p_x])                           \n\t"
494cabdff1aSopenharmony_ci            "lwc1    %[temp1],      12(%[p_x])                          \n\t"
495cabdff1aSopenharmony_ci            "lwc1    %[temp2],      16(%[p_x])                          \n\t"
496cabdff1aSopenharmony_ci            "lwc1    %[temp3],      20(%[p_x])                          \n\t"
497cabdff1aSopenharmony_ci            "lwc1    %[temp4],      24(%[p_x])                          \n\t"
498cabdff1aSopenharmony_ci            "lwc1    %[temp5],      28(%[p_x])                          \n\t"
499cabdff1aSopenharmony_ci            "mul.s   %[temp_r],     %[temp1],      %[temp1]             \n\t"
500cabdff1aSopenharmony_ci            "mul.s   %[temp_r1],    %[temp1],      %[temp3]             \n\t"
501cabdff1aSopenharmony_ci            "mul.s   %[temp_r2],    %[temp1],      %[temp2]             \n\t"
502cabdff1aSopenharmony_ci            "mul.s   %[temp_r3],    %[temp1],      %[temp5]             \n\t"
503cabdff1aSopenharmony_ci            "mul.s   %[temp_r4],    %[temp1],      %[temp4]             \n\t"
504cabdff1aSopenharmony_ci            "madd.s  %[temp_r],     %[temp_r],     %[temp0],  %[temp0]  \n\t"
505cabdff1aSopenharmony_ci            "madd.s  %[temp_r1],    %[temp_r1],    %[temp0],  %[temp2]  \n\t"
506cabdff1aSopenharmony_ci            "msub.s  %[temp_r2],    %[temp_r2],    %[temp0],  %[temp3]  \n\t"
507cabdff1aSopenharmony_ci            "madd.s  %[temp_r3],    %[temp_r3],    %[temp0],  %[temp4]  \n\t"
508cabdff1aSopenharmony_ci            "msub.s  %[temp_r4],    %[temp_r4],    %[temp0],  %[temp5]  \n\t"
509cabdff1aSopenharmony_ci            "add.s   %[real_sum_0], %[real_sum_0], %[temp_r]            \n\t"
510cabdff1aSopenharmony_ci            "add.s   %[real_sum_1], %[real_sum_1], %[temp_r1]           \n\t"
511cabdff1aSopenharmony_ci            "add.s   %[imag_sum_1], %[imag_sum_1], %[temp_r2]           \n\t"
512cabdff1aSopenharmony_ci            "add.s   %[real_sum_2], %[real_sum_2], %[temp_r3]           \n\t"
513cabdff1aSopenharmony_ci            "add.s   %[imag_sum_2], %[imag_sum_2], %[temp_r4]           \n\t"
514cabdff1aSopenharmony_ci            "lwc1    %[temp0],      32(%[p_x])                          \n\t"
515cabdff1aSopenharmony_ci            "lwc1    %[temp1],      36(%[p_x])                          \n\t"
516cabdff1aSopenharmony_ci            "mul.s   %[temp_r],     %[temp3],      %[temp3]             \n\t"
517cabdff1aSopenharmony_ci            "mul.s   %[temp_r1],    %[temp3],      %[temp5]             \n\t"
518cabdff1aSopenharmony_ci            "mul.s   %[temp_r2],    %[temp3],      %[temp4]             \n\t"
519cabdff1aSopenharmony_ci            "mul.s   %[temp_r3],    %[temp3],      %[temp1]             \n\t"
520cabdff1aSopenharmony_ci            "mul.s   %[temp_r4],    %[temp3],      %[temp0]             \n\t"
521cabdff1aSopenharmony_ci            "madd.s  %[temp_r],     %[temp_r],     %[temp2],  %[temp2]  \n\t"
522cabdff1aSopenharmony_ci            "madd.s  %[temp_r1],    %[temp_r1],    %[temp2],  %[temp4]  \n\t"
523cabdff1aSopenharmony_ci            "msub.s  %[temp_r2],    %[temp_r2],    %[temp2],  %[temp5]  \n\t"
524cabdff1aSopenharmony_ci            "madd.s  %[temp_r3],    %[temp_r3],    %[temp2],  %[temp0]  \n\t"
525cabdff1aSopenharmony_ci            "msub.s  %[temp_r4],    %[temp_r4],    %[temp2],  %[temp1]  \n\t"
526cabdff1aSopenharmony_ci            "add.s   %[real_sum_0], %[real_sum_0], %[temp_r]            \n\t"
527cabdff1aSopenharmony_ci            "add.s   %[real_sum_1], %[real_sum_1], %[temp_r1]           \n\t"
528cabdff1aSopenharmony_ci            "add.s   %[imag_sum_1], %[imag_sum_1], %[temp_r2]           \n\t"
529cabdff1aSopenharmony_ci            "add.s   %[real_sum_2], %[real_sum_2], %[temp_r3]           \n\t"
530cabdff1aSopenharmony_ci            "add.s   %[imag_sum_2], %[imag_sum_2], %[temp_r4]           \n\t"
531cabdff1aSopenharmony_ci            "lwc1    %[temp2],      40(%[p_x])                          \n\t"
532cabdff1aSopenharmony_ci            "lwc1    %[temp3],      44(%[p_x])                          \n\t"
533cabdff1aSopenharmony_ci            "mul.s   %[temp_r],     %[temp5],      %[temp5]             \n\t"
534cabdff1aSopenharmony_ci            "mul.s   %[temp_r1],    %[temp5],      %[temp1]             \n\t"
535cabdff1aSopenharmony_ci            "mul.s   %[temp_r2],    %[temp5],      %[temp0]             \n\t"
536cabdff1aSopenharmony_ci            "mul.s   %[temp_r3],    %[temp5],      %[temp3]             \n\t"
537cabdff1aSopenharmony_ci            "mul.s   %[temp_r4],    %[temp5],      %[temp2]             \n\t"
538cabdff1aSopenharmony_ci            "madd.s  %[temp_r],     %[temp_r],     %[temp4],  %[temp4]  \n\t"
539cabdff1aSopenharmony_ci            "madd.s  %[temp_r1],    %[temp_r1],    %[temp4],  %[temp0]  \n\t"
540cabdff1aSopenharmony_ci            "msub.s  %[temp_r2],    %[temp_r2],    %[temp4],  %[temp1]  \n\t"
541cabdff1aSopenharmony_ci            "madd.s  %[temp_r3],    %[temp_r3],    %[temp4],  %[temp2]  \n\t"
542cabdff1aSopenharmony_ci            "msub.s  %[temp_r4],    %[temp_r4],    %[temp4],  %[temp3]  \n\t"
543cabdff1aSopenharmony_ci            "add.s   %[real_sum_0], %[real_sum_0], %[temp_r]            \n\t"
544cabdff1aSopenharmony_ci            "add.s   %[real_sum_1], %[real_sum_1], %[temp_r1]           \n\t"
545cabdff1aSopenharmony_ci            "add.s   %[imag_sum_1], %[imag_sum_1], %[temp_r2]           \n\t"
546cabdff1aSopenharmony_ci            "add.s   %[real_sum_2], %[real_sum_2], %[temp_r3]           \n\t"
547cabdff1aSopenharmony_ci            "add.s   %[imag_sum_2], %[imag_sum_2], %[temp_r4]           \n\t"
548cabdff1aSopenharmony_ci            PTR_ADDIU "%[p_x],      %[p_x],        24                   \n\t"
549cabdff1aSopenharmony_ci
550cabdff1aSopenharmony_ci            : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
551cabdff1aSopenharmony_ci              [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
552cabdff1aSopenharmony_ci              [real_sum_0]"+f"(real_sum_0), [real_sum_1]"+f"(real_sum_1),
553cabdff1aSopenharmony_ci              [imag_sum_1]"+f"(imag_sum_1), [real_sum_2]"+f"(real_sum_2),
554cabdff1aSopenharmony_ci              [temp_r]"=&f"(temp_r), [temp_r1]"=&f"(temp_r1),
555cabdff1aSopenharmony_ci              [temp_r2]"=&f"(temp_r2), [temp_r3]"=&f"(temp_r3),
556cabdff1aSopenharmony_ci              [temp_r4]"=&f"(temp_r4), [p_x]"+r"(p_x),
557cabdff1aSopenharmony_ci              [imag_sum_2]"+f"(imag_sum_2)
558cabdff1aSopenharmony_ci            :
559cabdff1aSopenharmony_ci            : "memory"
560cabdff1aSopenharmony_ci        );
561cabdff1aSopenharmony_ci    }
562cabdff1aSopenharmony_ci    __asm__ volatile (
563cabdff1aSopenharmony_ci        "lwc1    %[temp0],    -296(%[p_x])                        \n\t"
564cabdff1aSopenharmony_ci        "lwc1    %[temp1],    -292(%[p_x])                        \n\t"
565cabdff1aSopenharmony_ci        "lwc1    %[temp2],    8(%[p_x])                           \n\t"
566cabdff1aSopenharmony_ci        "lwc1    %[temp3],    12(%[p_x])                          \n\t"
567cabdff1aSopenharmony_ci        "lwc1    %[temp4],    -288(%[p_x])                        \n\t"
568cabdff1aSopenharmony_ci        "lwc1    %[temp5],    -284(%[p_x])                        \n\t"
569cabdff1aSopenharmony_ci        "lwc1    %[temp6],    -280(%[p_x])                        \n\t"
570cabdff1aSopenharmony_ci        "lwc1    %[temp7],    -276(%[p_x])                        \n\t"
571cabdff1aSopenharmony_ci        "madd.s  %[temp_r],   %[real_sum_0], %[temp0],  %[temp0]  \n\t"
572cabdff1aSopenharmony_ci        "madd.s  %[temp_r1],  %[real_sum_0], %[temp2],  %[temp2]  \n\t"
573cabdff1aSopenharmony_ci        "madd.s  %[temp_r2],  %[real_sum_1], %[temp0],  %[temp4]  \n\t"
574cabdff1aSopenharmony_ci        "madd.s  %[temp_r3],  %[imag_sum_1], %[temp0],  %[temp5]  \n\t"
575cabdff1aSopenharmony_ci        "madd.s  %[temp_r],   %[temp_r],     %[temp1],  %[temp1]  \n\t"
576cabdff1aSopenharmony_ci        "madd.s  %[temp_r1],  %[temp_r1],    %[temp3],  %[temp3]  \n\t"
577cabdff1aSopenharmony_ci        "madd.s  %[temp_r2],  %[temp_r2],    %[temp1],  %[temp5]  \n\t"
578cabdff1aSopenharmony_ci        "nmsub.s  %[temp_r3], %[temp_r3],    %[temp1],  %[temp4]  \n\t"
579cabdff1aSopenharmony_ci        "lwc1    %[temp4],    16(%[p_x])                          \n\t"
580cabdff1aSopenharmony_ci        "lwc1    %[temp5],    20(%[p_x])                          \n\t"
581cabdff1aSopenharmony_ci        "swc1    %[temp_r],   40(%[p_phi])                        \n\t"
582cabdff1aSopenharmony_ci        "swc1    %[temp_r1],  16(%[p_phi])                        \n\t"
583cabdff1aSopenharmony_ci        "swc1    %[temp_r2],  24(%[p_phi])                        \n\t"
584cabdff1aSopenharmony_ci        "swc1    %[temp_r3],  28(%[p_phi])                        \n\t"
585cabdff1aSopenharmony_ci        "madd.s  %[temp_r],   %[real_sum_1], %[temp2],  %[temp4]  \n\t"
586cabdff1aSopenharmony_ci        "madd.s  %[temp_r1],  %[imag_sum_1], %[temp2],  %[temp5]  \n\t"
587cabdff1aSopenharmony_ci        "madd.s  %[temp_r2],  %[real_sum_2], %[temp0],  %[temp6]  \n\t"
588cabdff1aSopenharmony_ci        "madd.s  %[temp_r3],  %[imag_sum_2], %[temp0],  %[temp7]  \n\t"
589cabdff1aSopenharmony_ci        "madd.s  %[temp_r],   %[temp_r],     %[temp3],  %[temp5]  \n\t"
590cabdff1aSopenharmony_ci        "nmsub.s %[temp_r1],  %[temp_r1],    %[temp3],  %[temp4]  \n\t"
591cabdff1aSopenharmony_ci        "madd.s  %[temp_r2],  %[temp_r2],    %[temp1],  %[temp7]  \n\t"
592cabdff1aSopenharmony_ci        "nmsub.s %[temp_r3],  %[temp_r3],    %[temp1],  %[temp6]  \n\t"
593cabdff1aSopenharmony_ci        "swc1    %[temp_r],   0(%[p_phi])                         \n\t"
594cabdff1aSopenharmony_ci        "swc1    %[temp_r1],  4(%[p_phi])                         \n\t"
595cabdff1aSopenharmony_ci        "swc1    %[temp_r2],  8(%[p_phi])                         \n\t"
596cabdff1aSopenharmony_ci        "swc1    %[temp_r3],  12(%[p_phi])                        \n\t"
597cabdff1aSopenharmony_ci
598cabdff1aSopenharmony_ci        : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
599cabdff1aSopenharmony_ci          [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
600cabdff1aSopenharmony_ci          [temp6]"=&f"(temp6), [temp7]"=&f"(temp7), [temp_r]"=&f"(temp_r),
601cabdff1aSopenharmony_ci          [real_sum_0]"+f"(real_sum_0), [real_sum_1]"+f"(real_sum_1),
602cabdff1aSopenharmony_ci          [real_sum_2]"+f"(real_sum_2), [imag_sum_1]"+f"(imag_sum_1),
603cabdff1aSopenharmony_ci          [temp_r2]"=&f"(temp_r2), [temp_r3]"=&f"(temp_r3),
604cabdff1aSopenharmony_ci          [temp_r1]"=&f"(temp_r1), [p_phi]"+r"(p_phi),
605cabdff1aSopenharmony_ci          [imag_sum_2]"+f"(imag_sum_2)
606cabdff1aSopenharmony_ci        : [p_x]"r"(p_x)
607cabdff1aSopenharmony_ci        : "memory"
608cabdff1aSopenharmony_ci    );
609cabdff1aSopenharmony_ci}
610cabdff1aSopenharmony_ci
611cabdff1aSopenharmony_cistatic void sbr_hf_gen_mips(float (*X_high)[2], const float (*X_low)[2],
612cabdff1aSopenharmony_ci                         const float alpha0[2], const float alpha1[2],
613cabdff1aSopenharmony_ci                         float bw, int start, int end)
614cabdff1aSopenharmony_ci{
615cabdff1aSopenharmony_ci    float alpha[4];
616cabdff1aSopenharmony_ci    int i;
617cabdff1aSopenharmony_ci    float *p_x_low = (float*)&X_low[0][0] + 2*start;
618cabdff1aSopenharmony_ci    float *p_x_high = &X_high[0][0] + 2*start;
619cabdff1aSopenharmony_ci    float temp0, temp1, temp2, temp3, temp4, temp5, temp6;
620cabdff1aSopenharmony_ci    float temp7, temp8, temp9, temp10, temp11, temp12;
621cabdff1aSopenharmony_ci
622cabdff1aSopenharmony_ci    alpha[0] = alpha1[0] * bw * bw;
623cabdff1aSopenharmony_ci    alpha[1] = alpha1[1] * bw * bw;
624cabdff1aSopenharmony_ci    alpha[2] = alpha0[0] * bw;
625cabdff1aSopenharmony_ci    alpha[3] = alpha0[1] * bw;
626cabdff1aSopenharmony_ci
627cabdff1aSopenharmony_ci    for (i = start; i < end; i++) {
628cabdff1aSopenharmony_ci        __asm__ volatile (
629cabdff1aSopenharmony_ci            "lwc1    %[temp0],    -16(%[p_x_low])                        \n\t"
630cabdff1aSopenharmony_ci            "lwc1    %[temp1],    -12(%[p_x_low])                        \n\t"
631cabdff1aSopenharmony_ci            "lwc1    %[temp2],    -8(%[p_x_low])                         \n\t"
632cabdff1aSopenharmony_ci            "lwc1    %[temp3],    -4(%[p_x_low])                         \n\t"
633cabdff1aSopenharmony_ci            "lwc1    %[temp5],    0(%[p_x_low])                          \n\t"
634cabdff1aSopenharmony_ci            "lwc1    %[temp6],    4(%[p_x_low])                          \n\t"
635cabdff1aSopenharmony_ci            "lwc1    %[temp7],    0(%[alpha])                            \n\t"
636cabdff1aSopenharmony_ci            "lwc1    %[temp8],    4(%[alpha])                            \n\t"
637cabdff1aSopenharmony_ci            "lwc1    %[temp9],    8(%[alpha])                            \n\t"
638cabdff1aSopenharmony_ci            "lwc1    %[temp10],   12(%[alpha])                           \n\t"
639cabdff1aSopenharmony_ci            PTR_ADDIU "%[p_x_high], %[p_x_high],   8                     \n\t"
640cabdff1aSopenharmony_ci            PTR_ADDIU "%[p_x_low],  %[p_x_low],    8                     \n\t"
641cabdff1aSopenharmony_ci            "mul.s   %[temp11],   %[temp1],        %[temp8]              \n\t"
642cabdff1aSopenharmony_ci            "msub.s  %[temp11],   %[temp11],       %[temp0],  %[temp7]   \n\t"
643cabdff1aSopenharmony_ci            "madd.s  %[temp11],   %[temp11],       %[temp2],  %[temp9]   \n\t"
644cabdff1aSopenharmony_ci            "nmsub.s %[temp11],   %[temp11],       %[temp3],  %[temp10]  \n\t"
645cabdff1aSopenharmony_ci            "add.s   %[temp11],   %[temp11],       %[temp5]              \n\t"
646cabdff1aSopenharmony_ci            "swc1    %[temp11],   -8(%[p_x_high])                        \n\t"
647cabdff1aSopenharmony_ci            "mul.s   %[temp12],   %[temp1],        %[temp7]              \n\t"
648cabdff1aSopenharmony_ci            "madd.s  %[temp12],   %[temp12],       %[temp0],  %[temp8]   \n\t"
649cabdff1aSopenharmony_ci            "madd.s  %[temp12],   %[temp12],       %[temp3],  %[temp9]   \n\t"
650cabdff1aSopenharmony_ci            "madd.s  %[temp12],   %[temp12],       %[temp2],  %[temp10]  \n\t"
651cabdff1aSopenharmony_ci            "add.s   %[temp12],   %[temp12],       %[temp6]              \n\t"
652cabdff1aSopenharmony_ci            "swc1    %[temp12],   -4(%[p_x_high])                        \n\t"
653cabdff1aSopenharmony_ci
654cabdff1aSopenharmony_ci            : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
655cabdff1aSopenharmony_ci              [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
656cabdff1aSopenharmony_ci              [temp6]"=&f"(temp6), [temp7]"=&f"(temp7), [temp8]"=&f"(temp8),
657cabdff1aSopenharmony_ci              [temp9]"=&f"(temp9), [temp10]"=&f"(temp10), [temp11]"=&f"(temp11),
658cabdff1aSopenharmony_ci              [temp12]"=&f"(temp12), [p_x_high]"+r"(p_x_high),
659cabdff1aSopenharmony_ci              [p_x_low]"+r"(p_x_low)
660cabdff1aSopenharmony_ci            : [alpha]"r"(alpha)
661cabdff1aSopenharmony_ci            : "memory"
662cabdff1aSopenharmony_ci        );
663cabdff1aSopenharmony_ci    }
664cabdff1aSopenharmony_ci}
665cabdff1aSopenharmony_ci
666cabdff1aSopenharmony_cistatic void sbr_hf_g_filt_mips(float (*Y)[2], const float (*X_high)[40][2],
667cabdff1aSopenharmony_ci                            const float *g_filt, int m_max, intptr_t ixh)
668cabdff1aSopenharmony_ci{
669cabdff1aSopenharmony_ci    const float *p_x, *p_g, *loop_end;
670cabdff1aSopenharmony_ci    float *p_y;
671cabdff1aSopenharmony_ci    float temp0, temp1, temp2;
672cabdff1aSopenharmony_ci
673cabdff1aSopenharmony_ci    p_g = &g_filt[0];
674cabdff1aSopenharmony_ci    p_y = &Y[0][0];
675cabdff1aSopenharmony_ci    p_x = &X_high[0][ixh][0];
676cabdff1aSopenharmony_ci    loop_end = p_g + m_max;
677cabdff1aSopenharmony_ci
678cabdff1aSopenharmony_ci    __asm__ volatile(
679cabdff1aSopenharmony_ci        ".set    push                                \n\t"
680cabdff1aSopenharmony_ci        ".set    noreorder                           \n\t"
681cabdff1aSopenharmony_ci    "1:                                              \n\t"
682cabdff1aSopenharmony_ci        "lwc1    %[temp0],   0(%[p_g])               \n\t"
683cabdff1aSopenharmony_ci        "lwc1    %[temp1],   0(%[p_x])               \n\t"
684cabdff1aSopenharmony_ci        "lwc1    %[temp2],   4(%[p_x])               \n\t"
685cabdff1aSopenharmony_ci        "mul.s   %[temp1],   %[temp1],     %[temp0]  \n\t"
686cabdff1aSopenharmony_ci        "mul.s   %[temp2],   %[temp2],     %[temp0]  \n\t"
687cabdff1aSopenharmony_ci        PTR_ADDIU "%[p_g],   %[p_g],       4         \n\t"
688cabdff1aSopenharmony_ci        PTR_ADDIU "%[p_x],   %[p_x],       320       \n\t"
689cabdff1aSopenharmony_ci        "swc1    %[temp1],   0(%[p_y])               \n\t"
690cabdff1aSopenharmony_ci        "swc1    %[temp2],   4(%[p_y])               \n\t"
691cabdff1aSopenharmony_ci        "bne     %[p_g],     %[loop_end],  1b        \n\t"
692cabdff1aSopenharmony_ci        PTR_ADDIU "%[p_y],   %[p_y],       8         \n\t"
693cabdff1aSopenharmony_ci        ".set    pop                                 \n\t"
694cabdff1aSopenharmony_ci
695cabdff1aSopenharmony_ci        : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1),
696cabdff1aSopenharmony_ci          [temp2]"=&f"(temp2), [p_x]"+r"(p_x),
697cabdff1aSopenharmony_ci          [p_y]"+r"(p_y), [p_g]"+r"(p_g)
698cabdff1aSopenharmony_ci        : [loop_end]"r"(loop_end)
699cabdff1aSopenharmony_ci        : "memory"
700cabdff1aSopenharmony_ci    );
701cabdff1aSopenharmony_ci}
702cabdff1aSopenharmony_ci
703cabdff1aSopenharmony_cistatic void sbr_hf_apply_noise_0_mips(float (*Y)[2], const float *s_m,
704cabdff1aSopenharmony_ci                                 const float *q_filt, int noise,
705cabdff1aSopenharmony_ci                                 int kx, int m_max)
706cabdff1aSopenharmony_ci{
707cabdff1aSopenharmony_ci    int m;
708cabdff1aSopenharmony_ci
709cabdff1aSopenharmony_ci    for (m = 0; m < m_max; m++){
710cabdff1aSopenharmony_ci
711cabdff1aSopenharmony_ci        float *Y1=&Y[m][0];
712cabdff1aSopenharmony_ci        float *ff_table;
713cabdff1aSopenharmony_ci        float y0,y1, temp1, temp2, temp4, temp5;
714cabdff1aSopenharmony_ci        int temp0, temp3;
715cabdff1aSopenharmony_ci        const float *s_m1=&s_m[m];
716cabdff1aSopenharmony_ci        const float *q_filt1= &q_filt[m];
717cabdff1aSopenharmony_ci
718cabdff1aSopenharmony_ci        __asm__ volatile(
719cabdff1aSopenharmony_ci            "lwc1    %[y0],       0(%[Y1])                                    \n\t"
720cabdff1aSopenharmony_ci            "lwc1    %[temp1],    0(%[s_m1])                                  \n\t"
721cabdff1aSopenharmony_ci            "addiu   %[noise],    %[noise],              1                    \n\t"
722cabdff1aSopenharmony_ci            "andi    %[noise],    %[noise],              0x1ff                \n\t"
723cabdff1aSopenharmony_ci            "sll     %[temp0],    %[noise], 3                                 \n\t"
724cabdff1aSopenharmony_ci            PTR_ADDU "%[ff_table],%[ff_sbr_noise_table], %[temp0]             \n\t"
725cabdff1aSopenharmony_ci            "add.s   %[y0],       %[y0],                 %[temp1]             \n\t"
726cabdff1aSopenharmony_ci            "mfc1    %[temp3],    %[temp1]                                    \n\t"
727cabdff1aSopenharmony_ci            "bne     %[temp3],    $0,                    1f                   \n\t"
728cabdff1aSopenharmony_ci            "lwc1    %[y1],       4(%[Y1])                                    \n\t"
729cabdff1aSopenharmony_ci            "lwc1    %[temp2],    0(%[q_filt1])                               \n\t"
730cabdff1aSopenharmony_ci            "lwc1    %[temp4],    0(%[ff_table])                              \n\t"
731cabdff1aSopenharmony_ci            "lwc1    %[temp5],    4(%[ff_table])                              \n\t"
732cabdff1aSopenharmony_ci            "madd.s  %[y0],       %[y0],                 %[temp2],  %[temp4]  \n\t"
733cabdff1aSopenharmony_ci            "madd.s  %[y1],       %[y1],                 %[temp2],  %[temp5]  \n\t"
734cabdff1aSopenharmony_ci            "swc1    %[y1],       4(%[Y1])                                    \n\t"
735cabdff1aSopenharmony_ci        "1:                                                                   \n\t"
736cabdff1aSopenharmony_ci            "swc1    %[y0],       0(%[Y1])                                    \n\t"
737cabdff1aSopenharmony_ci
738cabdff1aSopenharmony_ci            : [ff_table]"=&r"(ff_table), [y0]"=&f"(y0), [y1]"=&f"(y1),
739cabdff1aSopenharmony_ci              [temp0]"=&r"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
740cabdff1aSopenharmony_ci              [temp3]"=&r"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5)
741cabdff1aSopenharmony_ci            : [ff_sbr_noise_table]"r"(ff_sbr_noise_table), [noise]"r"(noise),
742cabdff1aSopenharmony_ci              [Y1]"r"(Y1), [s_m1]"r"(s_m1), [q_filt1]"r"(q_filt1)
743cabdff1aSopenharmony_ci            : "memory"
744cabdff1aSopenharmony_ci        );
745cabdff1aSopenharmony_ci    }
746cabdff1aSopenharmony_ci}
747cabdff1aSopenharmony_ci
748cabdff1aSopenharmony_cistatic void sbr_hf_apply_noise_1_mips(float (*Y)[2], const float *s_m,
749cabdff1aSopenharmony_ci                                 const float *q_filt, int noise,
750cabdff1aSopenharmony_ci                                 int kx, int m_max)
751cabdff1aSopenharmony_ci{
752cabdff1aSopenharmony_ci    float y0,y1,temp1, temp2, temp4, temp5;
753cabdff1aSopenharmony_ci    int temp0, temp3, m;
754cabdff1aSopenharmony_ci    float phi_sign = 1 - 2 * (kx & 1);
755cabdff1aSopenharmony_ci
756cabdff1aSopenharmony_ci    for (m = 0; m < m_max; m++) {
757cabdff1aSopenharmony_ci
758cabdff1aSopenharmony_ci        float *ff_table;
759cabdff1aSopenharmony_ci        float *Y1=&Y[m][0];
760cabdff1aSopenharmony_ci        const float *s_m1=&s_m[m];
761cabdff1aSopenharmony_ci        const float *q_filt1= &q_filt[m];
762cabdff1aSopenharmony_ci
763cabdff1aSopenharmony_ci        __asm__ volatile(
764cabdff1aSopenharmony_ci            "lwc1   %[y1],       4(%[Y1])                                     \n\t"
765cabdff1aSopenharmony_ci            "lwc1   %[temp1],    0(%[s_m1])                                   \n\t"
766cabdff1aSopenharmony_ci            "lw     %[temp3],    0(%[s_m1])                                   \n\t"
767cabdff1aSopenharmony_ci            "addiu  %[noise],    %[noise],               1                    \n\t"
768cabdff1aSopenharmony_ci            "andi   %[noise],    %[noise],               0x1ff                \n\t"
769cabdff1aSopenharmony_ci            "sll    %[temp0],    %[noise],               3                    \n\t"
770cabdff1aSopenharmony_ci            PTR_ADDU "%[ff_table],%[ff_sbr_noise_table],%[temp0]              \n\t"
771cabdff1aSopenharmony_ci            "madd.s %[y1],       %[y1],                 %[temp1], %[phi_sign] \n\t"
772cabdff1aSopenharmony_ci            "bne    %[temp3],    $0,                    1f                    \n\t"
773cabdff1aSopenharmony_ci            "lwc1   %[y0],       0(%[Y1])                                     \n\t"
774cabdff1aSopenharmony_ci            "lwc1   %[temp2],    0(%[q_filt1])                                \n\t"
775cabdff1aSopenharmony_ci            "lwc1   %[temp4],    0(%[ff_table])                               \n\t"
776cabdff1aSopenharmony_ci            "lwc1   %[temp5],    4(%[ff_table])                               \n\t"
777cabdff1aSopenharmony_ci            "madd.s %[y0],       %[y0],                 %[temp2], %[temp4]    \n\t"
778cabdff1aSopenharmony_ci            "madd.s %[y1],       %[y1],                 %[temp2], %[temp5]    \n\t"
779cabdff1aSopenharmony_ci            "swc1   %[y0],       0(%[Y1])                                     \n\t"
780cabdff1aSopenharmony_ci        "1:                                                                   \n\t"
781cabdff1aSopenharmony_ci            "swc1   %[y1],       4(%[Y1])                                     \n\t"
782cabdff1aSopenharmony_ci
783cabdff1aSopenharmony_ci            : [ff_table] "=&r" (ff_table), [y0] "=&f" (y0), [y1] "=&f" (y1),
784cabdff1aSopenharmony_ci              [temp0] "=&r" (temp0), [temp1] "=&f" (temp1), [temp2] "=&f" (temp2),
785cabdff1aSopenharmony_ci              [temp3] "=&r" (temp3), [temp4] "=&f" (temp4), [temp5] "=&f" (temp5)
786cabdff1aSopenharmony_ci            : [ff_sbr_noise_table] "r" (ff_sbr_noise_table), [noise] "r" (noise),
787cabdff1aSopenharmony_ci              [Y1] "r" (Y1), [s_m1] "r" (s_m1), [q_filt1] "r" (q_filt1),
788cabdff1aSopenharmony_ci              [phi_sign] "f" (phi_sign)
789cabdff1aSopenharmony_ci            : "memory"
790cabdff1aSopenharmony_ci        );
791cabdff1aSopenharmony_ci        phi_sign = -phi_sign;
792cabdff1aSopenharmony_ci    }
793cabdff1aSopenharmony_ci}
794cabdff1aSopenharmony_ci
795cabdff1aSopenharmony_cistatic void sbr_hf_apply_noise_2_mips(float (*Y)[2], const float *s_m,
796cabdff1aSopenharmony_ci                                 const float *q_filt, int noise,
797cabdff1aSopenharmony_ci                                 int kx, int m_max)
798cabdff1aSopenharmony_ci{
799cabdff1aSopenharmony_ci    int m, temp0, temp1;
800cabdff1aSopenharmony_ci    float *ff_table;
801cabdff1aSopenharmony_ci    float y0, y1, temp2, temp3, temp4, temp5;
802cabdff1aSopenharmony_ci
803cabdff1aSopenharmony_ci    for (m = 0; m < m_max; m++) {
804cabdff1aSopenharmony_ci
805cabdff1aSopenharmony_ci        float *Y1=&Y[m][0];
806cabdff1aSopenharmony_ci        const float *s_m1=&s_m[m];
807cabdff1aSopenharmony_ci        const float *q_filt1= &q_filt[m];
808cabdff1aSopenharmony_ci
809cabdff1aSopenharmony_ci        __asm__ volatile(
810cabdff1aSopenharmony_ci            "lwc1   %[y0],       0(%[Y1])                                  \n\t"
811cabdff1aSopenharmony_ci            "lwc1   %[temp3],    0(%[s_m1])                                \n\t"
812cabdff1aSopenharmony_ci            "addiu  %[noise],    %[noise],              1                  \n\t"
813cabdff1aSopenharmony_ci            "andi   %[noise],    %[noise],              0x1ff              \n\t"
814cabdff1aSopenharmony_ci            "sll    %[temp0],    %[noise],              3                  \n\t"
815cabdff1aSopenharmony_ci            PTR_ADDU "%[ff_table],%[ff_sbr_noise_table],%[temp0]           \n\t"
816cabdff1aSopenharmony_ci            "sub.s  %[y0],       %[y0],                 %[temp3]           \n\t"
817cabdff1aSopenharmony_ci            "mfc1   %[temp1],    %[temp3]                                  \n\t"
818cabdff1aSopenharmony_ci            "bne    %[temp1],    $0,                    1f                 \n\t"
819cabdff1aSopenharmony_ci            "lwc1   %[y1],       4(%[Y1])                                  \n\t"
820cabdff1aSopenharmony_ci            "lwc1   %[temp2],    0(%[q_filt1])                             \n\t"
821cabdff1aSopenharmony_ci            "lwc1   %[temp4],    0(%[ff_table])                            \n\t"
822cabdff1aSopenharmony_ci            "lwc1   %[temp5],    4(%[ff_table])                            \n\t"
823cabdff1aSopenharmony_ci            "madd.s %[y0],       %[y0],                 %[temp2], %[temp4] \n\t"
824cabdff1aSopenharmony_ci            "madd.s %[y1],       %[y1],                 %[temp2], %[temp5] \n\t"
825cabdff1aSopenharmony_ci            "swc1   %[y1],       4(%[Y1])                                  \n\t"
826cabdff1aSopenharmony_ci        "1:                                                                \n\t"
827cabdff1aSopenharmony_ci            "swc1   %[y0],       0(%[Y1])                                  \n\t"
828cabdff1aSopenharmony_ci
829cabdff1aSopenharmony_ci            : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [y0]"=&f"(y0),
830cabdff1aSopenharmony_ci              [y1]"=&f"(y1), [ff_table]"=&r"(ff_table),
831cabdff1aSopenharmony_ci              [temp2]"=&f"(temp2), [temp3]"=&f"(temp3),
832cabdff1aSopenharmony_ci              [temp4]"=&f"(temp4), [temp5]"=&f"(temp5)
833cabdff1aSopenharmony_ci            : [ff_sbr_noise_table]"r"(ff_sbr_noise_table), [noise]"r"(noise),
834cabdff1aSopenharmony_ci              [Y1]"r"(Y1), [s_m1]"r"(s_m1), [q_filt1]"r"(q_filt1)
835cabdff1aSopenharmony_ci            : "memory"
836cabdff1aSopenharmony_ci        );
837cabdff1aSopenharmony_ci    }
838cabdff1aSopenharmony_ci}
839cabdff1aSopenharmony_ci
840cabdff1aSopenharmony_cistatic void sbr_hf_apply_noise_3_mips(float (*Y)[2], const float *s_m,
841cabdff1aSopenharmony_ci                                 const float *q_filt, int noise,
842cabdff1aSopenharmony_ci                                 int kx, int m_max)
843cabdff1aSopenharmony_ci{
844cabdff1aSopenharmony_ci    float phi_sign = 1 - 2 * (kx & 1);
845cabdff1aSopenharmony_ci    int m;
846cabdff1aSopenharmony_ci
847cabdff1aSopenharmony_ci    for (m = 0; m < m_max; m++) {
848cabdff1aSopenharmony_ci
849cabdff1aSopenharmony_ci        float *Y1=&Y[m][0];
850cabdff1aSopenharmony_ci        float *ff_table;
851cabdff1aSopenharmony_ci        float y0,y1, temp1, temp2, temp4, temp5;
852cabdff1aSopenharmony_ci        int temp0, temp3;
853cabdff1aSopenharmony_ci        const float *s_m1=&s_m[m];
854cabdff1aSopenharmony_ci        const float *q_filt1= &q_filt[m];
855cabdff1aSopenharmony_ci
856cabdff1aSopenharmony_ci        __asm__ volatile(
857cabdff1aSopenharmony_ci            "lwc1    %[y1],       4(%[Y1])                                     \n\t"
858cabdff1aSopenharmony_ci            "lwc1    %[temp1],    0(%[s_m1])                                   \n\t"
859cabdff1aSopenharmony_ci            "addiu   %[noise],    %[noise],              1                     \n\t"
860cabdff1aSopenharmony_ci            "andi    %[noise],    %[noise],              0x1ff                 \n\t"
861cabdff1aSopenharmony_ci            "sll     %[temp0],    %[noise],              3                     \n\t"
862cabdff1aSopenharmony_ci            PTR_ADDU "%[ff_table],%[ff_sbr_noise_table], %[temp0]              \n\t"
863cabdff1aSopenharmony_ci            "nmsub.s %[y1],       %[y1],                 %[temp1], %[phi_sign] \n\t"
864cabdff1aSopenharmony_ci            "mfc1    %[temp3],    %[temp1]                                     \n\t"
865cabdff1aSopenharmony_ci            "bne     %[temp3],    $0,                    1f                    \n\t"
866cabdff1aSopenharmony_ci            "lwc1    %[y0],       0(%[Y1])                                     \n\t"
867cabdff1aSopenharmony_ci            "lwc1    %[temp2],    0(%[q_filt1])                                \n\t"
868cabdff1aSopenharmony_ci            "lwc1    %[temp4],    0(%[ff_table])                               \n\t"
869cabdff1aSopenharmony_ci            "lwc1    %[temp5],    4(%[ff_table])                               \n\t"
870cabdff1aSopenharmony_ci            "madd.s  %[y0],       %[y0],                 %[temp2], %[temp4]    \n\t"
871cabdff1aSopenharmony_ci            "madd.s  %[y1],       %[y1],                 %[temp2], %[temp5]    \n\t"
872cabdff1aSopenharmony_ci            "swc1    %[y0],       0(%[Y1])                                     \n\t"
873cabdff1aSopenharmony_ci            "1:                                                                \n\t"
874cabdff1aSopenharmony_ci            "swc1    %[y1],       4(%[Y1])                                     \n\t"
875cabdff1aSopenharmony_ci
876cabdff1aSopenharmony_ci            : [ff_table]"=&r"(ff_table), [y0]"=&f"(y0), [y1]"=&f"(y1),
877cabdff1aSopenharmony_ci              [temp0]"=&r"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
878cabdff1aSopenharmony_ci              [temp3]"=&r"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5)
879cabdff1aSopenharmony_ci            : [ff_sbr_noise_table]"r"(ff_sbr_noise_table), [noise]"r"(noise),
880cabdff1aSopenharmony_ci              [Y1]"r"(Y1), [s_m1]"r"(s_m1), [q_filt1]"r"(q_filt1),
881cabdff1aSopenharmony_ci              [phi_sign]"f"(phi_sign)
882cabdff1aSopenharmony_ci            : "memory"
883cabdff1aSopenharmony_ci        );
884cabdff1aSopenharmony_ci       phi_sign = -phi_sign;
885cabdff1aSopenharmony_ci    }
886cabdff1aSopenharmony_ci}
887cabdff1aSopenharmony_ci#endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
888cabdff1aSopenharmony_ci#endif /* HAVE_MIPSFPU */
889cabdff1aSopenharmony_ci#endif /* HAVE_INLINE_ASM */
890cabdff1aSopenharmony_ci
891cabdff1aSopenharmony_civoid ff_sbrdsp_init_mips(SBRDSPContext *s)
892cabdff1aSopenharmony_ci{
893cabdff1aSopenharmony_ci#if HAVE_INLINE_ASM
894cabdff1aSopenharmony_ci#if HAVE_MIPSFPU
895cabdff1aSopenharmony_ci    s->qmf_pre_shuffle = sbr_qmf_pre_shuffle_mips;
896cabdff1aSopenharmony_ci    s->qmf_post_shuffle = sbr_qmf_post_shuffle_mips;
897cabdff1aSopenharmony_ci#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
898cabdff1aSopenharmony_ci    s->sum64x5 = sbr_sum64x5_mips;
899cabdff1aSopenharmony_ci    s->sum_square = sbr_sum_square_mips;
900cabdff1aSopenharmony_ci    s->qmf_deint_bfly = sbr_qmf_deint_bfly_mips;
901cabdff1aSopenharmony_ci    s->autocorrelate = sbr_autocorrelate_mips;
902cabdff1aSopenharmony_ci    s->hf_gen = sbr_hf_gen_mips;
903cabdff1aSopenharmony_ci    s->hf_g_filt = sbr_hf_g_filt_mips;
904cabdff1aSopenharmony_ci
905cabdff1aSopenharmony_ci    s->hf_apply_noise[0] = sbr_hf_apply_noise_0_mips;
906cabdff1aSopenharmony_ci    s->hf_apply_noise[1] = sbr_hf_apply_noise_1_mips;
907cabdff1aSopenharmony_ci    s->hf_apply_noise[2] = sbr_hf_apply_noise_2_mips;
908cabdff1aSopenharmony_ci    s->hf_apply_noise[3] = sbr_hf_apply_noise_3_mips;
909cabdff1aSopenharmony_ci#endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
910cabdff1aSopenharmony_ci#endif /* HAVE_MIPSFPU */
911cabdff1aSopenharmony_ci#endif /* HAVE_INLINE_ASM */
912cabdff1aSopenharmony_ci}
913