1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * Copyright (c) 2012
3cabdff1aSopenharmony_ci *      MIPS Technologies, Inc., California.
4cabdff1aSopenharmony_ci *
5cabdff1aSopenharmony_ci * Redistribution and use in source and binary forms, with or without
6cabdff1aSopenharmony_ci * modification, are permitted provided that the following conditions
7cabdff1aSopenharmony_ci * are met:
8cabdff1aSopenharmony_ci * 1. Redistributions of source code must retain the above copyright
9cabdff1aSopenharmony_ci *    notice, this list of conditions and the following disclaimer.
10cabdff1aSopenharmony_ci * 2. Redistributions in binary form must reproduce the above copyright
11cabdff1aSopenharmony_ci *    notice, this list of conditions and the following disclaimer in the
12cabdff1aSopenharmony_ci *    documentation and/or other materials provided with the distribution.
13cabdff1aSopenharmony_ci * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
14cabdff1aSopenharmony_ci *    contributors may be used to endorse or promote products derived from
15cabdff1aSopenharmony_ci *    this software without specific prior written permission.
16cabdff1aSopenharmony_ci *
17cabdff1aSopenharmony_ci * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
18cabdff1aSopenharmony_ci * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19cabdff1aSopenharmony_ci * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20cabdff1aSopenharmony_ci * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
21cabdff1aSopenharmony_ci * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22cabdff1aSopenharmony_ci * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23cabdff1aSopenharmony_ci * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24cabdff1aSopenharmony_ci * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25cabdff1aSopenharmony_ci * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26cabdff1aSopenharmony_ci * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27cabdff1aSopenharmony_ci * SUCH DAMAGE.
28cabdff1aSopenharmony_ci *
29cabdff1aSopenharmony_ci * Authors:  Djordje Pesut   (djordje@mips.com)
30cabdff1aSopenharmony_ci *           Mirjana Vulin   (mvulin@mips.com)
31cabdff1aSopenharmony_ci *
32cabdff1aSopenharmony_ci * This file is part of FFmpeg.
33cabdff1aSopenharmony_ci *
34cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
35cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
36cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
37cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
38cabdff1aSopenharmony_ci *
39cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
40cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
41cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
42cabdff1aSopenharmony_ci * Lesser General Public License for more details.
43cabdff1aSopenharmony_ci *
44cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
45cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
46cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
47cabdff1aSopenharmony_ci */
48cabdff1aSopenharmony_ci
49cabdff1aSopenharmony_ci/**
50cabdff1aSopenharmony_ci * @file
51cabdff1aSopenharmony_ci * Reference: libavcodec/aacsbr.c
52cabdff1aSopenharmony_ci */
53cabdff1aSopenharmony_ci
54cabdff1aSopenharmony_ci#include "libavcodec/aac.h"
55cabdff1aSopenharmony_ci#include "libavcodec/aacsbr.h"
56cabdff1aSopenharmony_ci#include "libavutil/mem_internal.h"
57cabdff1aSopenharmony_ci#include "libavutil/mips/asmdefs.h"
58cabdff1aSopenharmony_ci
59cabdff1aSopenharmony_ci#define ENVELOPE_ADJUSTMENT_OFFSET 2
60cabdff1aSopenharmony_ci
61cabdff1aSopenharmony_ci#if HAVE_INLINE_ASM
62cabdff1aSopenharmony_ci#if HAVE_MIPSFPU
63cabdff1aSopenharmony_cistatic int sbr_lf_gen_mips(AACContext *ac, SpectralBandReplication *sbr,
64cabdff1aSopenharmony_ci                      float X_low[32][40][2], const float W[2][32][32][2],
65cabdff1aSopenharmony_ci                      int buf_idx)
66cabdff1aSopenharmony_ci{
67cabdff1aSopenharmony_ci    int i, k;
68cabdff1aSopenharmony_ci    int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
69cabdff1aSopenharmony_ci    float *p_x_low = &X_low[0][8][0];
70cabdff1aSopenharmony_ci    float *p_w = (float*)&W[buf_idx][0][0][0];
71cabdff1aSopenharmony_ci    float *p_x1_low = &X_low[0][0][0];
72cabdff1aSopenharmony_ci    float *p_w1 = (float*)&W[1-buf_idx][24][0][0];
73cabdff1aSopenharmony_ci
74cabdff1aSopenharmony_ci    float *loop_end=p_x1_low + 2560;
75cabdff1aSopenharmony_ci
76cabdff1aSopenharmony_ci    /* loop unrolled 8 times */
77cabdff1aSopenharmony_ci    __asm__ volatile (
78cabdff1aSopenharmony_ci    "1:                                                 \n\t"
79cabdff1aSopenharmony_ci        "sw     $0,            0(%[p_x1_low])           \n\t"
80cabdff1aSopenharmony_ci        "sw     $0,            4(%[p_x1_low])           \n\t"
81cabdff1aSopenharmony_ci        "sw     $0,            8(%[p_x1_low])           \n\t"
82cabdff1aSopenharmony_ci        "sw     $0,            12(%[p_x1_low])          \n\t"
83cabdff1aSopenharmony_ci        "sw     $0,            16(%[p_x1_low])          \n\t"
84cabdff1aSopenharmony_ci        "sw     $0,            20(%[p_x1_low])          \n\t"
85cabdff1aSopenharmony_ci        "sw     $0,            24(%[p_x1_low])          \n\t"
86cabdff1aSopenharmony_ci        "sw     $0,            28(%[p_x1_low])          \n\t"
87cabdff1aSopenharmony_ci        PTR_ADDIU "%[p_x1_low],%[p_x1_low],      32     \n\t"
88cabdff1aSopenharmony_ci        "bne    %[p_x1_low],   %[loop_end],      1b     \n\t"
89cabdff1aSopenharmony_ci        PTR_ADDIU "%[p_x1_low],%[p_x1_low],      -10240 \n\t"
90cabdff1aSopenharmony_ci
91cabdff1aSopenharmony_ci        : [p_x1_low]"+r"(p_x1_low)
92cabdff1aSopenharmony_ci        : [loop_end]"r"(loop_end)
93cabdff1aSopenharmony_ci        : "memory"
94cabdff1aSopenharmony_ci    );
95cabdff1aSopenharmony_ci
96cabdff1aSopenharmony_ci    for (k = 0; k < sbr->kx[1]; k++) {
97cabdff1aSopenharmony_ci        for (i = 0; i < 32; i+=4) {
98cabdff1aSopenharmony_ci            /* loop unrolled 4 times */
99cabdff1aSopenharmony_ci            __asm__ volatile (
100cabdff1aSopenharmony_ci                "lw     %[temp0],   0(%[p_w])               \n\t"
101cabdff1aSopenharmony_ci                "lw     %[temp1],   4(%[p_w])               \n\t"
102cabdff1aSopenharmony_ci                "lw     %[temp2],   256(%[p_w])             \n\t"
103cabdff1aSopenharmony_ci                "lw     %[temp3],   260(%[p_w])             \n\t"
104cabdff1aSopenharmony_ci                "lw     %[temp4],   512(%[p_w])             \n\t"
105cabdff1aSopenharmony_ci                "lw     %[temp5],   516(%[p_w])             \n\t"
106cabdff1aSopenharmony_ci                "lw     %[temp6],   768(%[p_w])             \n\t"
107cabdff1aSopenharmony_ci                "lw     %[temp7],   772(%[p_w])             \n\t"
108cabdff1aSopenharmony_ci                "sw     %[temp0],   0(%[p_x_low])           \n\t"
109cabdff1aSopenharmony_ci                "sw     %[temp1],   4(%[p_x_low])           \n\t"
110cabdff1aSopenharmony_ci                "sw     %[temp2],   8(%[p_x_low])           \n\t"
111cabdff1aSopenharmony_ci                "sw     %[temp3],   12(%[p_x_low])          \n\t"
112cabdff1aSopenharmony_ci                "sw     %[temp4],   16(%[p_x_low])          \n\t"
113cabdff1aSopenharmony_ci                "sw     %[temp5],   20(%[p_x_low])          \n\t"
114cabdff1aSopenharmony_ci                "sw     %[temp6],   24(%[p_x_low])          \n\t"
115cabdff1aSopenharmony_ci                "sw     %[temp7],   28(%[p_x_low])          \n\t"
116cabdff1aSopenharmony_ci                PTR_ADDIU "%[p_x_low], %[p_x_low],  32      \n\t"
117cabdff1aSopenharmony_ci                PTR_ADDIU "%[p_w],     %[p_w],      1024    \n\t"
118cabdff1aSopenharmony_ci
119cabdff1aSopenharmony_ci                : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
120cabdff1aSopenharmony_ci                  [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
121cabdff1aSopenharmony_ci                  [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
122cabdff1aSopenharmony_ci                  [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
123cabdff1aSopenharmony_ci                  [p_w]"+r"(p_w), [p_x_low]"+r"(p_x_low)
124cabdff1aSopenharmony_ci                :
125cabdff1aSopenharmony_ci                : "memory"
126cabdff1aSopenharmony_ci            );
127cabdff1aSopenharmony_ci        }
128cabdff1aSopenharmony_ci        p_x_low += 16;
129cabdff1aSopenharmony_ci        p_w -= 2046;
130cabdff1aSopenharmony_ci    }
131cabdff1aSopenharmony_ci
132cabdff1aSopenharmony_ci    for (k = 0; k < sbr->kx[0]; k++) {
133cabdff1aSopenharmony_ci        for (i = 0; i < 2; i++) {
134cabdff1aSopenharmony_ci
135cabdff1aSopenharmony_ci            /* loop unrolled 4 times */
136cabdff1aSopenharmony_ci            __asm__ volatile (
137cabdff1aSopenharmony_ci                "lw     %[temp0],    0(%[p_w1])             \n\t"
138cabdff1aSopenharmony_ci                "lw     %[temp1],    4(%[p_w1])             \n\t"
139cabdff1aSopenharmony_ci                "lw     %[temp2],    256(%[p_w1])           \n\t"
140cabdff1aSopenharmony_ci                "lw     %[temp3],    260(%[p_w1])           \n\t"
141cabdff1aSopenharmony_ci                "lw     %[temp4],    512(%[p_w1])           \n\t"
142cabdff1aSopenharmony_ci                "lw     %[temp5],    516(%[p_w1])           \n\t"
143cabdff1aSopenharmony_ci                "lw     %[temp6],    768(%[p_w1])           \n\t"
144cabdff1aSopenharmony_ci                "lw     %[temp7],    772(%[p_w1])           \n\t"
145cabdff1aSopenharmony_ci                "sw     %[temp0],    0(%[p_x1_low])         \n\t"
146cabdff1aSopenharmony_ci                "sw     %[temp1],    4(%[p_x1_low])         \n\t"
147cabdff1aSopenharmony_ci                "sw     %[temp2],    8(%[p_x1_low])         \n\t"
148cabdff1aSopenharmony_ci                "sw     %[temp3],    12(%[p_x1_low])        \n\t"
149cabdff1aSopenharmony_ci                "sw     %[temp4],    16(%[p_x1_low])        \n\t"
150cabdff1aSopenharmony_ci                "sw     %[temp5],    20(%[p_x1_low])        \n\t"
151cabdff1aSopenharmony_ci                "sw     %[temp6],    24(%[p_x1_low])        \n\t"
152cabdff1aSopenharmony_ci                "sw     %[temp7],    28(%[p_x1_low])        \n\t"
153cabdff1aSopenharmony_ci                PTR_ADDIU "%[p_x1_low], %[p_x1_low], 32     \n\t"
154cabdff1aSopenharmony_ci                PTR_ADDIU "%[p_w1],     %[p_w1],     1024   \n\t"
155cabdff1aSopenharmony_ci
156cabdff1aSopenharmony_ci                : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
157cabdff1aSopenharmony_ci                  [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
158cabdff1aSopenharmony_ci                  [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
159cabdff1aSopenharmony_ci                  [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
160cabdff1aSopenharmony_ci                  [p_w1]"+r"(p_w1), [p_x1_low]"+r"(p_x1_low)
161cabdff1aSopenharmony_ci                :
162cabdff1aSopenharmony_ci                : "memory"
163cabdff1aSopenharmony_ci            );
164cabdff1aSopenharmony_ci        }
165cabdff1aSopenharmony_ci        p_x1_low += 64;
166cabdff1aSopenharmony_ci        p_w1 -= 510;
167cabdff1aSopenharmony_ci    }
168cabdff1aSopenharmony_ci    return 0;
169cabdff1aSopenharmony_ci}
170cabdff1aSopenharmony_ci
171cabdff1aSopenharmony_cistatic int sbr_x_gen_mips(SpectralBandReplication *sbr, float X[2][38][64],
172cabdff1aSopenharmony_ci                     const float Y0[38][64][2], const float Y1[38][64][2],
173cabdff1aSopenharmony_ci                     const float X_low[32][40][2], int ch)
174cabdff1aSopenharmony_ci{
175cabdff1aSopenharmony_ci    int k, i;
176cabdff1aSopenharmony_ci    const int i_f = 32;
177cabdff1aSopenharmony_ci    int temp0, temp1, temp2, temp3;
178cabdff1aSopenharmony_ci    const float *X_low1, *Y01, *Y11;
179cabdff1aSopenharmony_ci    float *x1=&X[0][0][0];
180cabdff1aSopenharmony_ci    float *j=x1+4864;
181cabdff1aSopenharmony_ci    const int i_Temp = FFMAX(2*sbr->data[ch].t_env_num_env_old - i_f, 0);
182cabdff1aSopenharmony_ci
183cabdff1aSopenharmony_ci    /* loop unrolled 8 times */
184cabdff1aSopenharmony_ci    __asm__ volatile (
185cabdff1aSopenharmony_ci    "1:                                       \n\t"
186cabdff1aSopenharmony_ci        "sw     $0,      0(%[x1])             \n\t"
187cabdff1aSopenharmony_ci        "sw     $0,      4(%[x1])             \n\t"
188cabdff1aSopenharmony_ci        "sw     $0,      8(%[x1])             \n\t"
189cabdff1aSopenharmony_ci        "sw     $0,      12(%[x1])            \n\t"
190cabdff1aSopenharmony_ci        "sw     $0,      16(%[x1])            \n\t"
191cabdff1aSopenharmony_ci        "sw     $0,      20(%[x1])            \n\t"
192cabdff1aSopenharmony_ci        "sw     $0,      24(%[x1])            \n\t"
193cabdff1aSopenharmony_ci        "sw     $0,      28(%[x1])            \n\t"
194cabdff1aSopenharmony_ci        PTR_ADDIU "%[x1],%[x1],      32       \n\t"
195cabdff1aSopenharmony_ci        "bne    %[x1],   %[j],       1b       \n\t"
196cabdff1aSopenharmony_ci        PTR_ADDIU "%[x1],%[x1],      -19456   \n\t"
197cabdff1aSopenharmony_ci
198cabdff1aSopenharmony_ci        : [x1]"+r"(x1)
199cabdff1aSopenharmony_ci        : [j]"r"(j)
200cabdff1aSopenharmony_ci        : "memory"
201cabdff1aSopenharmony_ci    );
202cabdff1aSopenharmony_ci
203cabdff1aSopenharmony_ci    if (i_Temp != 0) {
204cabdff1aSopenharmony_ci
205cabdff1aSopenharmony_ci        X_low1=&X_low[0][2][0];
206cabdff1aSopenharmony_ci
207cabdff1aSopenharmony_ci        for (k = 0; k < sbr->kx[0]; k++) {
208cabdff1aSopenharmony_ci
209cabdff1aSopenharmony_ci            __asm__ volatile (
210cabdff1aSopenharmony_ci                "move    %[i],        $zero                  \n\t"
211cabdff1aSopenharmony_ci            "2:                                              \n\t"
212cabdff1aSopenharmony_ci                "lw      %[temp0],    0(%[X_low1])           \n\t"
213cabdff1aSopenharmony_ci                "lw      %[temp1],    4(%[X_low1])           \n\t"
214cabdff1aSopenharmony_ci                "sw      %[temp0],    0(%[x1])               \n\t"
215cabdff1aSopenharmony_ci                "sw      %[temp1],    9728(%[x1])            \n\t"
216cabdff1aSopenharmony_ci                PTR_ADDIU "%[x1],     %[x1],         256     \n\t"
217cabdff1aSopenharmony_ci                PTR_ADDIU "%[X_low1], %[X_low1],     8       \n\t"
218cabdff1aSopenharmony_ci                "addiu   %[i],        %[i],          1       \n\t"
219cabdff1aSopenharmony_ci                "bne     %[i],        %[i_Temp],     2b      \n\t"
220cabdff1aSopenharmony_ci
221cabdff1aSopenharmony_ci                : [x1]"+r"(x1), [X_low1]"+r"(X_low1), [i]"=&r"(i),
222cabdff1aSopenharmony_ci                  [temp0]"=&r"(temp0), [temp1]"=&r"(temp1)
223cabdff1aSopenharmony_ci                : [i_Temp]"r"(i_Temp)
224cabdff1aSopenharmony_ci                : "memory"
225cabdff1aSopenharmony_ci            );
226cabdff1aSopenharmony_ci            x1-=(i_Temp<<6)-1;
227cabdff1aSopenharmony_ci            X_low1-=(i_Temp<<1)-80;
228cabdff1aSopenharmony_ci        }
229cabdff1aSopenharmony_ci
230cabdff1aSopenharmony_ci        x1=&X[0][0][k];
231cabdff1aSopenharmony_ci        Y01=(float*)&Y0[32][k][0];
232cabdff1aSopenharmony_ci
233cabdff1aSopenharmony_ci        for (; k < sbr->kx[0] + sbr->m[0]; k++) {
234cabdff1aSopenharmony_ci            __asm__ volatile (
235cabdff1aSopenharmony_ci                "move    %[i],       $zero               \n\t"
236cabdff1aSopenharmony_ci            "3:                                          \n\t"
237cabdff1aSopenharmony_ci                "lw      %[temp0],   0(%[Y01])           \n\t"
238cabdff1aSopenharmony_ci                "lw      %[temp1],   4(%[Y01])           \n\t"
239cabdff1aSopenharmony_ci                "sw      %[temp0],   0(%[x1])            \n\t"
240cabdff1aSopenharmony_ci                "sw      %[temp1],   9728(%[x1])         \n\t"
241cabdff1aSopenharmony_ci                PTR_ADDIU "%[x1],    %[x1],      256     \n\t"
242cabdff1aSopenharmony_ci                PTR_ADDIU "%[Y01],   %[Y01],     512     \n\t"
243cabdff1aSopenharmony_ci                "addiu   %[i],       %[i],       1       \n\t"
244cabdff1aSopenharmony_ci                "bne     %[i],       %[i_Temp],  3b      \n\t"
245cabdff1aSopenharmony_ci
246cabdff1aSopenharmony_ci                : [x1]"+r"(x1), [Y01]"+r"(Y01), [i]"=&r"(i),
247cabdff1aSopenharmony_ci                  [temp0]"=&r"(temp0), [temp1]"=&r"(temp1)
248cabdff1aSopenharmony_ci                : [i_Temp]"r"(i_Temp)
249cabdff1aSopenharmony_ci                : "memory"
250cabdff1aSopenharmony_ci            );
251cabdff1aSopenharmony_ci            x1 -=(i_Temp<<6)-1;
252cabdff1aSopenharmony_ci            Y01 -=(i_Temp<<7)-2;
253cabdff1aSopenharmony_ci        }
254cabdff1aSopenharmony_ci    }
255cabdff1aSopenharmony_ci
256cabdff1aSopenharmony_ci    x1=&X[0][i_Temp][0];
257cabdff1aSopenharmony_ci    X_low1=&X_low[0][i_Temp+2][0];
258cabdff1aSopenharmony_ci    temp3=38;
259cabdff1aSopenharmony_ci
260cabdff1aSopenharmony_ci    for (k = 0; k < sbr->kx[1]; k++) {
261cabdff1aSopenharmony_ci
262cabdff1aSopenharmony_ci        __asm__ volatile (
263cabdff1aSopenharmony_ci            "move    %[i],       %[i_Temp]              \n\t"
264cabdff1aSopenharmony_ci        "4:                                             \n\t"
265cabdff1aSopenharmony_ci            "lw      %[temp0],   0(%[X_low1])           \n\t"
266cabdff1aSopenharmony_ci            "lw      %[temp1],   4(%[X_low1])           \n\t"
267cabdff1aSopenharmony_ci            "sw      %[temp0],   0(%[x1])               \n\t"
268cabdff1aSopenharmony_ci            "sw      %[temp1],   9728(%[x1])            \n\t"
269cabdff1aSopenharmony_ci            PTR_ADDIU "%[x1],    %[x1],         256     \n\t"
270cabdff1aSopenharmony_ci            PTR_ADDIU "%[X_low1],%[X_low1],     8       \n\t"
271cabdff1aSopenharmony_ci            "addiu   %[i],       %[i],          1       \n\t"
272cabdff1aSopenharmony_ci            "bne     %[i],       %[temp3],      4b      \n\t"
273cabdff1aSopenharmony_ci
274cabdff1aSopenharmony_ci            : [x1]"+r"(x1), [X_low1]"+r"(X_low1), [i]"=&r"(i),
275cabdff1aSopenharmony_ci              [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
276cabdff1aSopenharmony_ci              [temp2]"=&r"(temp2)
277cabdff1aSopenharmony_ci            : [i_Temp]"r"(i_Temp), [temp3]"r"(temp3)
278cabdff1aSopenharmony_ci            : "memory"
279cabdff1aSopenharmony_ci        );
280cabdff1aSopenharmony_ci        x1 -= ((38-i_Temp)<<6)-1;
281cabdff1aSopenharmony_ci        X_low1 -= ((38-i_Temp)<<1)- 80;
282cabdff1aSopenharmony_ci    }
283cabdff1aSopenharmony_ci
284cabdff1aSopenharmony_ci    x1=&X[0][i_Temp][k];
285cabdff1aSopenharmony_ci    Y11=&Y1[i_Temp][k][0];
286cabdff1aSopenharmony_ci    temp2=32;
287cabdff1aSopenharmony_ci
288cabdff1aSopenharmony_ci    for (; k < sbr->kx[1] + sbr->m[1]; k++) {
289cabdff1aSopenharmony_ci
290cabdff1aSopenharmony_ci        __asm__ volatile (
291cabdff1aSopenharmony_ci           "move    %[i],       %[i_Temp]               \n\t"
292cabdff1aSopenharmony_ci        "5:                                             \n\t"
293cabdff1aSopenharmony_ci           "lw      %[temp0],   0(%[Y11])               \n\t"
294cabdff1aSopenharmony_ci           "lw      %[temp1],   4(%[Y11])               \n\t"
295cabdff1aSopenharmony_ci           "sw      %[temp0],   0(%[x1])                \n\t"
296cabdff1aSopenharmony_ci           "sw      %[temp1],   9728(%[x1])             \n\t"
297cabdff1aSopenharmony_ci           PTR_ADDIU "%[x1],    %[x1],          256     \n\t"
298cabdff1aSopenharmony_ci           PTR_ADDIU "%[Y11],   %[Y11],         512     \n\t"
299cabdff1aSopenharmony_ci           "addiu   %[i],       %[i],           1       \n\t"
300cabdff1aSopenharmony_ci           "bne     %[i],       %[temp2],       5b      \n\t"
301cabdff1aSopenharmony_ci
302cabdff1aSopenharmony_ci           : [x1]"+r"(x1), [Y11]"+r"(Y11), [i]"=&r"(i),
303cabdff1aSopenharmony_ci             [temp0]"=&r"(temp0), [temp1]"=&r"(temp1)
304cabdff1aSopenharmony_ci           : [i_Temp]"r"(i_Temp), [temp3]"r"(temp3),
305cabdff1aSopenharmony_ci             [temp2]"r"(temp2)
306cabdff1aSopenharmony_ci           : "memory"
307cabdff1aSopenharmony_ci        );
308cabdff1aSopenharmony_ci
309cabdff1aSopenharmony_ci        x1 -= ((32-i_Temp)<<6)-1;
310cabdff1aSopenharmony_ci        Y11 -= ((32-i_Temp)<<7)-2;
311cabdff1aSopenharmony_ci   }
312cabdff1aSopenharmony_ci      return 0;
313cabdff1aSopenharmony_ci}
314cabdff1aSopenharmony_ci
315cabdff1aSopenharmony_ci#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
316cabdff1aSopenharmony_cistatic void sbr_hf_assemble_mips(float Y1[38][64][2],
317cabdff1aSopenharmony_ci                            const float X_high[64][40][2],
318cabdff1aSopenharmony_ci                            SpectralBandReplication *sbr, SBRData *ch_data,
319cabdff1aSopenharmony_ci                            const int e_a[2])
320cabdff1aSopenharmony_ci{
321cabdff1aSopenharmony_ci    int e, i, j, m;
322cabdff1aSopenharmony_ci    const int h_SL = 4 * !sbr->bs_smoothing_mode;
323cabdff1aSopenharmony_ci    const int kx = sbr->kx[1];
324cabdff1aSopenharmony_ci    const int m_max = sbr->m[1];
325cabdff1aSopenharmony_ci    static const float h_smooth[5] = {
326cabdff1aSopenharmony_ci        0.33333333333333,
327cabdff1aSopenharmony_ci        0.30150283239582,
328cabdff1aSopenharmony_ci        0.21816949906249,
329cabdff1aSopenharmony_ci        0.11516383427084,
330cabdff1aSopenharmony_ci        0.03183050093751,
331cabdff1aSopenharmony_ci    };
332cabdff1aSopenharmony_ci
333cabdff1aSopenharmony_ci    float (*g_temp)[48] = ch_data->g_temp, (*q_temp)[48] = ch_data->q_temp;
334cabdff1aSopenharmony_ci    int indexnoise = ch_data->f_indexnoise;
335cabdff1aSopenharmony_ci    int indexsine  = ch_data->f_indexsine;
336cabdff1aSopenharmony_ci    float *g_temp1, *q_temp1, *pok, *pok1;
337cabdff1aSopenharmony_ci    uint32_t temp1, temp2, temp3, temp4;
338cabdff1aSopenharmony_ci    int size = m_max;
339cabdff1aSopenharmony_ci
340cabdff1aSopenharmony_ci    if (sbr->reset) {
341cabdff1aSopenharmony_ci        for (i = 0; i < h_SL; i++) {
342cabdff1aSopenharmony_ci            memcpy(g_temp[i + 2*ch_data->t_env[0]], sbr->gain[0], m_max * sizeof(sbr->gain[0][0]));
343cabdff1aSopenharmony_ci            memcpy(q_temp[i + 2*ch_data->t_env[0]], sbr->q_m[0],  m_max * sizeof(sbr->q_m[0][0]));
344cabdff1aSopenharmony_ci        }
345cabdff1aSopenharmony_ci    } else if (h_SL) {
346cabdff1aSopenharmony_ci        memcpy(g_temp[2*ch_data->t_env[0]], g_temp[2*ch_data->t_env_num_env_old], 4*sizeof(g_temp[0]));
347cabdff1aSopenharmony_ci        memcpy(q_temp[2*ch_data->t_env[0]], q_temp[2*ch_data->t_env_num_env_old], 4*sizeof(q_temp[0]));
348cabdff1aSopenharmony_ci    }
349cabdff1aSopenharmony_ci
350cabdff1aSopenharmony_ci    for (e = 0; e < ch_data->bs_num_env; e++) {
351cabdff1aSopenharmony_ci        for (i = 2 * ch_data->t_env[e]; i < 2 * ch_data->t_env[e + 1]; i++) {
352cabdff1aSopenharmony_ci            g_temp1 = g_temp[h_SL + i];
353cabdff1aSopenharmony_ci            pok = sbr->gain[e];
354cabdff1aSopenharmony_ci            q_temp1 = q_temp[h_SL + i];
355cabdff1aSopenharmony_ci            pok1 = sbr->q_m[e];
356cabdff1aSopenharmony_ci
357cabdff1aSopenharmony_ci            /* loop unrolled 4 times */
358cabdff1aSopenharmony_ci            for (j=0; j<(size>>2); j++) {
359cabdff1aSopenharmony_ci                __asm__ volatile (
360cabdff1aSopenharmony_ci                    "lw      %[temp1],   0(%[pok])               \n\t"
361cabdff1aSopenharmony_ci                    "lw      %[temp2],   4(%[pok])               \n\t"
362cabdff1aSopenharmony_ci                    "lw      %[temp3],   8(%[pok])               \n\t"
363cabdff1aSopenharmony_ci                    "lw      %[temp4],   12(%[pok])              \n\t"
364cabdff1aSopenharmony_ci                    "sw      %[temp1],   0(%[g_temp1])           \n\t"
365cabdff1aSopenharmony_ci                    "sw      %[temp2],   4(%[g_temp1])           \n\t"
366cabdff1aSopenharmony_ci                    "sw      %[temp3],   8(%[g_temp1])           \n\t"
367cabdff1aSopenharmony_ci                    "sw      %[temp4],   12(%[g_temp1])          \n\t"
368cabdff1aSopenharmony_ci                    "lw      %[temp1],   0(%[pok1])              \n\t"
369cabdff1aSopenharmony_ci                    "lw      %[temp2],   4(%[pok1])              \n\t"
370cabdff1aSopenharmony_ci                    "lw      %[temp3],   8(%[pok1])              \n\t"
371cabdff1aSopenharmony_ci                    "lw      %[temp4],   12(%[pok1])             \n\t"
372cabdff1aSopenharmony_ci                    "sw      %[temp1],   0(%[q_temp1])           \n\t"
373cabdff1aSopenharmony_ci                    "sw      %[temp2],   4(%[q_temp1])           \n\t"
374cabdff1aSopenharmony_ci                    "sw      %[temp3],   8(%[q_temp1])           \n\t"
375cabdff1aSopenharmony_ci                    "sw      %[temp4],   12(%[q_temp1])          \n\t"
376cabdff1aSopenharmony_ci                    PTR_ADDIU "%[pok],     %[pok],         16    \n\t"
377cabdff1aSopenharmony_ci                    PTR_ADDIU "%[g_temp1], %[g_temp1],     16    \n\t"
378cabdff1aSopenharmony_ci                    PTR_ADDIU "%[pok1],    %[pok1],        16    \n\t"
379cabdff1aSopenharmony_ci                    PTR_ADDIU "%[q_temp1], %[q_temp1],     16    \n\t"
380cabdff1aSopenharmony_ci
381cabdff1aSopenharmony_ci                    : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
382cabdff1aSopenharmony_ci                      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4),
383cabdff1aSopenharmony_ci                      [pok]"+r"(pok), [g_temp1]"+r"(g_temp1),
384cabdff1aSopenharmony_ci                      [pok1]"+r"(pok1), [q_temp1]"+r"(q_temp1)
385cabdff1aSopenharmony_ci                    :
386cabdff1aSopenharmony_ci                    : "memory"
387cabdff1aSopenharmony_ci                );
388cabdff1aSopenharmony_ci            }
389cabdff1aSopenharmony_ci
390cabdff1aSopenharmony_ci            for (j=0; j<(size&3); j++) {
391cabdff1aSopenharmony_ci                __asm__ volatile (
392cabdff1aSopenharmony_ci                    "lw      %[temp1],   0(%[pok])              \n\t"
393cabdff1aSopenharmony_ci                    "lw      %[temp2],   0(%[pok1])             \n\t"
394cabdff1aSopenharmony_ci                    "sw      %[temp1],   0(%[g_temp1])          \n\t"
395cabdff1aSopenharmony_ci                    "sw      %[temp2],   0(%[q_temp1])          \n\t"
396cabdff1aSopenharmony_ci                    PTR_ADDIU "%[pok],     %[pok],        4     \n\t"
397cabdff1aSopenharmony_ci                    PTR_ADDIU "%[g_temp1], %[g_temp1],    4     \n\t"
398cabdff1aSopenharmony_ci                    PTR_ADDIU "%[pok1],    %[pok1],       4     \n\t"
399cabdff1aSopenharmony_ci                    PTR_ADDIU "%[q_temp1], %[q_temp1],    4     \n\t"
400cabdff1aSopenharmony_ci
401cabdff1aSopenharmony_ci                    : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
402cabdff1aSopenharmony_ci                      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4),
403cabdff1aSopenharmony_ci                      [pok]"+r"(pok), [g_temp1]"+r"(g_temp1),
404cabdff1aSopenharmony_ci                      [pok1]"+r"(pok1), [q_temp1]"+r"(q_temp1)
405cabdff1aSopenharmony_ci                    :
406cabdff1aSopenharmony_ci                    : "memory"
407cabdff1aSopenharmony_ci                );
408cabdff1aSopenharmony_ci            }
409cabdff1aSopenharmony_ci        }
410cabdff1aSopenharmony_ci    }
411cabdff1aSopenharmony_ci
412cabdff1aSopenharmony_ci    for (e = 0; e < ch_data->bs_num_env; e++) {
413cabdff1aSopenharmony_ci        for (i = 2 * ch_data->t_env[e]; i < 2 * ch_data->t_env[e + 1]; i++) {
414cabdff1aSopenharmony_ci            LOCAL_ALIGNED_16(float, g_filt_tab, [48]);
415cabdff1aSopenharmony_ci            LOCAL_ALIGNED_16(float, q_filt_tab, [48]);
416cabdff1aSopenharmony_ci            float *g_filt, *q_filt;
417cabdff1aSopenharmony_ci
418cabdff1aSopenharmony_ci            if (h_SL && e != e_a[0] && e != e_a[1]) {
419cabdff1aSopenharmony_ci                g_filt = g_filt_tab;
420cabdff1aSopenharmony_ci                q_filt = q_filt_tab;
421cabdff1aSopenharmony_ci
422cabdff1aSopenharmony_ci                for (m = 0; m < m_max; m++) {
423cabdff1aSopenharmony_ci                    const int idx1 = i + h_SL;
424cabdff1aSopenharmony_ci                    g_filt[m] = 0.0f;
425cabdff1aSopenharmony_ci                    q_filt[m] = 0.0f;
426cabdff1aSopenharmony_ci
427cabdff1aSopenharmony_ci                    for (j = 0; j <= h_SL; j++) {
428cabdff1aSopenharmony_ci                        g_filt[m] += g_temp[idx1 - j][m] * h_smooth[j];
429cabdff1aSopenharmony_ci                        q_filt[m] += q_temp[idx1 - j][m] * h_smooth[j];
430cabdff1aSopenharmony_ci                    }
431cabdff1aSopenharmony_ci                }
432cabdff1aSopenharmony_ci            } else {
433cabdff1aSopenharmony_ci                g_filt = g_temp[i + h_SL];
434cabdff1aSopenharmony_ci                q_filt = q_temp[i];
435cabdff1aSopenharmony_ci            }
436cabdff1aSopenharmony_ci
437cabdff1aSopenharmony_ci            sbr->dsp.hf_g_filt(Y1[i] + kx, X_high + kx, g_filt, m_max,
438cabdff1aSopenharmony_ci                               i + ENVELOPE_ADJUSTMENT_OFFSET);
439cabdff1aSopenharmony_ci
440cabdff1aSopenharmony_ci            if (e != e_a[0] && e != e_a[1]) {
441cabdff1aSopenharmony_ci                sbr->dsp.hf_apply_noise[indexsine](Y1[i] + kx, sbr->s_m[e],
442cabdff1aSopenharmony_ci                                                   q_filt, indexnoise,
443cabdff1aSopenharmony_ci                                                   kx, m_max);
444cabdff1aSopenharmony_ci            } else {
445cabdff1aSopenharmony_ci                int idx = indexsine&1;
446cabdff1aSopenharmony_ci                int A = (1-((indexsine+(kx & 1))&2));
447cabdff1aSopenharmony_ci                int B = (A^(-idx)) + idx;
448cabdff1aSopenharmony_ci                float *out = &Y1[i][kx][idx];
449cabdff1aSopenharmony_ci                float *in  = sbr->s_m[e];
450cabdff1aSopenharmony_ci                float temp0, temp1, temp2, temp3, temp4, temp5;
451cabdff1aSopenharmony_ci                float A_f = (float)A;
452cabdff1aSopenharmony_ci                float B_f = (float)B;
453cabdff1aSopenharmony_ci
454cabdff1aSopenharmony_ci                for (m = 0; m+1 < m_max; m+=2) {
455cabdff1aSopenharmony_ci
456cabdff1aSopenharmony_ci                    temp2 = out[0];
457cabdff1aSopenharmony_ci                    temp3 = out[2];
458cabdff1aSopenharmony_ci
459cabdff1aSopenharmony_ci                    __asm__ volatile(
460cabdff1aSopenharmony_ci                        "lwc1    %[temp0],  0(%[in])                     \n\t"
461cabdff1aSopenharmony_ci                        "lwc1    %[temp1],  4(%[in])                     \n\t"
462cabdff1aSopenharmony_ci                        "madd.s  %[temp4],  %[temp2],  %[temp0], %[A_f]  \n\t"
463cabdff1aSopenharmony_ci                        "madd.s  %[temp5],  %[temp3],  %[temp1], %[B_f]  \n\t"
464cabdff1aSopenharmony_ci                        "swc1    %[temp4],  0(%[out])                    \n\t"
465cabdff1aSopenharmony_ci                        "swc1    %[temp5],  8(%[out])                    \n\t"
466cabdff1aSopenharmony_ci                        PTR_ADDIU "%[in],   %[in],     8                 \n\t"
467cabdff1aSopenharmony_ci                        PTR_ADDIU "%[out],  %[out],    16                \n\t"
468cabdff1aSopenharmony_ci
469cabdff1aSopenharmony_ci                        : [temp0]"=&f" (temp0), [temp1]"=&f"(temp1),
470cabdff1aSopenharmony_ci                          [temp4]"=&f" (temp4), [temp5]"=&f"(temp5),
471cabdff1aSopenharmony_ci                          [in]"+r"(in), [out]"+r"(out)
472cabdff1aSopenharmony_ci                        : [A_f]"f"(A_f), [B_f]"f"(B_f), [temp2]"f"(temp2),
473cabdff1aSopenharmony_ci                          [temp3]"f"(temp3)
474cabdff1aSopenharmony_ci                        : "memory"
475cabdff1aSopenharmony_ci                    );
476cabdff1aSopenharmony_ci                }
477cabdff1aSopenharmony_ci                if(m_max&1)
478cabdff1aSopenharmony_ci                    out[2*m  ] += in[m  ] * A;
479cabdff1aSopenharmony_ci            }
480cabdff1aSopenharmony_ci            indexnoise = (indexnoise + m_max) & 0x1ff;
481cabdff1aSopenharmony_ci            indexsine = (indexsine + 1) & 3;
482cabdff1aSopenharmony_ci        }
483cabdff1aSopenharmony_ci    }
484cabdff1aSopenharmony_ci    ch_data->f_indexnoise = indexnoise;
485cabdff1aSopenharmony_ci    ch_data->f_indexsine  = indexsine;
486cabdff1aSopenharmony_ci}
487cabdff1aSopenharmony_ci
488cabdff1aSopenharmony_cistatic void sbr_hf_inverse_filter_mips(SBRDSPContext *dsp,
489cabdff1aSopenharmony_ci                                  float (*alpha0)[2], float (*alpha1)[2],
490cabdff1aSopenharmony_ci                                  const float X_low[32][40][2], int k0)
491cabdff1aSopenharmony_ci{
492cabdff1aSopenharmony_ci    int k;
493cabdff1aSopenharmony_ci    float temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, c;
494cabdff1aSopenharmony_ci    float *phi1, *alpha_1, *alpha_0, res1, res2, temp_real, temp_im;
495cabdff1aSopenharmony_ci
496cabdff1aSopenharmony_ci    c = 1.000001f;
497cabdff1aSopenharmony_ci
498cabdff1aSopenharmony_ci    for (k = 0; k < k0; k++) {
499cabdff1aSopenharmony_ci        LOCAL_ALIGNED_16(float, phi, [3], [2][2]);
500cabdff1aSopenharmony_ci        float dk;
501cabdff1aSopenharmony_ci        phi1 = &phi[0][0][0];
502cabdff1aSopenharmony_ci        alpha_1 = &alpha1[k][0];
503cabdff1aSopenharmony_ci        alpha_0 = &alpha0[k][0];
504cabdff1aSopenharmony_ci        dsp->autocorrelate(X_low[k], phi);
505cabdff1aSopenharmony_ci
506cabdff1aSopenharmony_ci        __asm__ volatile (
507cabdff1aSopenharmony_ci            "lwc1    %[temp0],  40(%[phi1])                       \n\t"
508cabdff1aSopenharmony_ci            "lwc1    %[temp1],  16(%[phi1])                       \n\t"
509cabdff1aSopenharmony_ci            "lwc1    %[temp2],  24(%[phi1])                       \n\t"
510cabdff1aSopenharmony_ci            "lwc1    %[temp3],  28(%[phi1])                       \n\t"
511cabdff1aSopenharmony_ci            "mul.s   %[dk],     %[temp0],    %[temp1]             \n\t"
512cabdff1aSopenharmony_ci            "lwc1    %[temp4],  0(%[phi1])                        \n\t"
513cabdff1aSopenharmony_ci            "mul.s   %[res2],   %[temp2],    %[temp2]             \n\t"
514cabdff1aSopenharmony_ci            "lwc1    %[temp5],  4(%[phi1])                        \n\t"
515cabdff1aSopenharmony_ci            "madd.s  %[res2],   %[res2],     %[temp3],  %[temp3]  \n\t"
516cabdff1aSopenharmony_ci            "lwc1    %[temp6],  8(%[phi1])                        \n\t"
517cabdff1aSopenharmony_ci            "div.s   %[res2],   %[res2],     %[c]                 \n\t"
518cabdff1aSopenharmony_ci            "lwc1    %[temp0],  12(%[phi1])                       \n\t"
519cabdff1aSopenharmony_ci            "sub.s   %[dk],     %[dk],       %[res2]              \n\t"
520cabdff1aSopenharmony_ci
521cabdff1aSopenharmony_ci            : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
522cabdff1aSopenharmony_ci              [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
523cabdff1aSopenharmony_ci              [temp6]"=&f"(temp6), [res2]"=&f"(res2), [dk]"=&f"(dk)
524cabdff1aSopenharmony_ci            : [phi1]"r"(phi1), [c]"f"(c)
525cabdff1aSopenharmony_ci            : "memory"
526cabdff1aSopenharmony_ci        );
527cabdff1aSopenharmony_ci
528cabdff1aSopenharmony_ci        if (!dk) {
529cabdff1aSopenharmony_ci            alpha_1[0] = 0;
530cabdff1aSopenharmony_ci            alpha_1[1] = 0;
531cabdff1aSopenharmony_ci        } else {
532cabdff1aSopenharmony_ci            __asm__ volatile (
533cabdff1aSopenharmony_ci                "mul.s   %[temp_real], %[temp4],     %[temp2]            \n\t"
534cabdff1aSopenharmony_ci                "nmsub.s %[temp_real], %[temp_real], %[temp5], %[temp3]  \n\t"
535cabdff1aSopenharmony_ci                "nmsub.s %[temp_real], %[temp_real], %[temp6], %[temp1]  \n\t"
536cabdff1aSopenharmony_ci                "mul.s   %[temp_im],   %[temp4],     %[temp3]            \n\t"
537cabdff1aSopenharmony_ci                "madd.s  %[temp_im],   %[temp_im],   %[temp5], %[temp2]  \n\t"
538cabdff1aSopenharmony_ci                "nmsub.s %[temp_im],   %[temp_im],   %[temp0], %[temp1]  \n\t"
539cabdff1aSopenharmony_ci                "div.s   %[temp_real], %[temp_real], %[dk]               \n\t"
540cabdff1aSopenharmony_ci                "div.s   %[temp_im],   %[temp_im],   %[dk]               \n\t"
541cabdff1aSopenharmony_ci                "swc1    %[temp_real], 0(%[alpha_1])                     \n\t"
542cabdff1aSopenharmony_ci                "swc1    %[temp_im],   4(%[alpha_1])                     \n\t"
543cabdff1aSopenharmony_ci
544cabdff1aSopenharmony_ci                : [temp_real]"=&f" (temp_real), [temp_im]"=&f"(temp_im)
545cabdff1aSopenharmony_ci                : [phi1]"r"(phi1), [temp0]"f"(temp0), [temp1]"f"(temp1),
546cabdff1aSopenharmony_ci                  [temp2]"f"(temp2), [temp3]"f"(temp3), [temp4]"f"(temp4),
547cabdff1aSopenharmony_ci                  [temp5]"f"(temp5), [temp6]"f"(temp6),
548cabdff1aSopenharmony_ci                  [alpha_1]"r"(alpha_1), [dk]"f"(dk)
549cabdff1aSopenharmony_ci                : "memory"
550cabdff1aSopenharmony_ci            );
551cabdff1aSopenharmony_ci        }
552cabdff1aSopenharmony_ci
553cabdff1aSopenharmony_ci        if (!phi1[4]) {
554cabdff1aSopenharmony_ci            alpha_0[0] = 0;
555cabdff1aSopenharmony_ci            alpha_0[1] = 0;
556cabdff1aSopenharmony_ci        } else {
557cabdff1aSopenharmony_ci            __asm__ volatile (
558cabdff1aSopenharmony_ci                "lwc1    %[temp6],     0(%[alpha_1])                     \n\t"
559cabdff1aSopenharmony_ci                "lwc1    %[temp7],     4(%[alpha_1])                     \n\t"
560cabdff1aSopenharmony_ci                "mul.s   %[temp_real], %[temp6],     %[temp2]            \n\t"
561cabdff1aSopenharmony_ci                "add.s   %[temp_real], %[temp_real], %[temp4]            \n\t"
562cabdff1aSopenharmony_ci                "madd.s  %[temp_real], %[temp_real], %[temp7], %[temp3]  \n\t"
563cabdff1aSopenharmony_ci                "mul.s   %[temp_im],   %[temp7],     %[temp2]            \n\t"
564cabdff1aSopenharmony_ci                "add.s   %[temp_im],   %[temp_im],   %[temp5]            \n\t"
565cabdff1aSopenharmony_ci                "nmsub.s %[temp_im],   %[temp_im],   %[temp6], %[temp3]  \n\t"
566cabdff1aSopenharmony_ci                "div.s   %[temp_real], %[temp_real], %[temp1]            \n\t"
567cabdff1aSopenharmony_ci                "div.s   %[temp_im],   %[temp_im],   %[temp1]            \n\t"
568cabdff1aSopenharmony_ci                "neg.s   %[temp_real], %[temp_real]                      \n\t"
569cabdff1aSopenharmony_ci                "neg.s   %[temp_im],   %[temp_im]                        \n\t"
570cabdff1aSopenharmony_ci                "swc1    %[temp_real], 0(%[alpha_0])                     \n\t"
571cabdff1aSopenharmony_ci                "swc1    %[temp_im],   4(%[alpha_0])                     \n\t"
572cabdff1aSopenharmony_ci
573cabdff1aSopenharmony_ci                : [temp_real]"=&f"(temp_real), [temp_im]"=&f"(temp_im),
574cabdff1aSopenharmony_ci                  [temp6]"=&f"(temp6), [temp7]"=&f"(temp7),
575cabdff1aSopenharmony_ci                  [res1]"=&f"(res1), [res2]"=&f"(res2)
576cabdff1aSopenharmony_ci                : [alpha_1]"r"(alpha_1), [alpha_0]"r"(alpha_0),
577cabdff1aSopenharmony_ci                  [temp0]"f"(temp0), [temp1]"f"(temp1), [temp2]"f"(temp2),
578cabdff1aSopenharmony_ci                  [temp3]"f"(temp3), [temp4]"f"(temp4), [temp5]"f"(temp5)
579cabdff1aSopenharmony_ci                : "memory"
580cabdff1aSopenharmony_ci            );
581cabdff1aSopenharmony_ci        }
582cabdff1aSopenharmony_ci
583cabdff1aSopenharmony_ci        __asm__ volatile (
584cabdff1aSopenharmony_ci            "lwc1    %[temp1],      0(%[alpha_1])                           \n\t"
585cabdff1aSopenharmony_ci            "lwc1    %[temp2],      4(%[alpha_1])                           \n\t"
586cabdff1aSopenharmony_ci            "lwc1    %[temp_real],  0(%[alpha_0])                           \n\t"
587cabdff1aSopenharmony_ci            "lwc1    %[temp_im],    4(%[alpha_0])                           \n\t"
588cabdff1aSopenharmony_ci            "mul.s   %[res1],       %[temp1],      %[temp1]                 \n\t"
589cabdff1aSopenharmony_ci            "madd.s  %[res1],       %[res1],       %[temp2],    %[temp2]    \n\t"
590cabdff1aSopenharmony_ci            "mul.s   %[res2],       %[temp_real],  %[temp_real]             \n\t"
591cabdff1aSopenharmony_ci            "madd.s  %[res2],       %[res2],       %[temp_im],  %[temp_im]  \n\t"
592cabdff1aSopenharmony_ci
593cabdff1aSopenharmony_ci            : [temp_real]"=&f"(temp_real), [temp_im]"=&f"(temp_im),
594cabdff1aSopenharmony_ci              [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
595cabdff1aSopenharmony_ci              [res1]"=&f"(res1), [res2]"=&f"(res2)
596cabdff1aSopenharmony_ci            : [alpha_1]"r"(alpha_1), [alpha_0]"r"(alpha_0)
597cabdff1aSopenharmony_ci            : "memory"
598cabdff1aSopenharmony_ci        );
599cabdff1aSopenharmony_ci
600cabdff1aSopenharmony_ci        if (res1 >= 16.0f || res2 >= 16.0f) {
601cabdff1aSopenharmony_ci            alpha_1[0] = 0;
602cabdff1aSopenharmony_ci            alpha_1[1] = 0;
603cabdff1aSopenharmony_ci            alpha_0[0] = 0;
604cabdff1aSopenharmony_ci            alpha_0[1] = 0;
605cabdff1aSopenharmony_ci        }
606cabdff1aSopenharmony_ci    }
607cabdff1aSopenharmony_ci}
608cabdff1aSopenharmony_ci#endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
609cabdff1aSopenharmony_ci#endif /* HAVE_MIPSFPU */
610cabdff1aSopenharmony_ci#endif /* HAVE_INLINE_ASM */
611cabdff1aSopenharmony_ci
612cabdff1aSopenharmony_civoid ff_aacsbr_func_ptr_init_mips(AACSBRContext *c)
613cabdff1aSopenharmony_ci{
614cabdff1aSopenharmony_ci#if HAVE_INLINE_ASM
615cabdff1aSopenharmony_ci#if HAVE_MIPSFPU
616cabdff1aSopenharmony_ci    c->sbr_lf_gen            = sbr_lf_gen_mips;
617cabdff1aSopenharmony_ci    c->sbr_x_gen             = sbr_x_gen_mips;
618cabdff1aSopenharmony_ci#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
619cabdff1aSopenharmony_ci    c->sbr_hf_inverse_filter = sbr_hf_inverse_filter_mips;
620cabdff1aSopenharmony_ci    c->sbr_hf_assemble       = sbr_hf_assemble_mips;
621cabdff1aSopenharmony_ci#endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
622cabdff1aSopenharmony_ci#endif /* HAVE_MIPSFPU */
623cabdff1aSopenharmony_ci#endif /* HAVE_INLINE_ASM */
624cabdff1aSopenharmony_ci}
625