1/*
2 * Copyright (c) 2012
3 *      MIPS Technologies, Inc., California.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
14 *    contributors may be used to endorse or promote products derived from
15 *    this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * Authors:  Djordje Pesut   (djordje@mips.com)
30 *           Mirjana Vulin   (mvulin@mips.com)
31 *
32 * This file is part of FFmpeg.
33 *
34 * FFmpeg is free software; you can redistribute it and/or
35 * modify it under the terms of the GNU Lesser General Public
36 * License as published by the Free Software Foundation; either
37 * version 2.1 of the License, or (at your option) any later version.
38 *
39 * FFmpeg is distributed in the hope that it will be useful,
40 * but WITHOUT ANY WARRANTY; without even the implied warranty of
41 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
42 * Lesser General Public License for more details.
43 *
44 * You should have received a copy of the GNU Lesser General Public
45 * License along with FFmpeg; if not, write to the Free Software
46 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
47 */
48
49/**
50 * @file
51 * Reference: libavcodec/aacsbr.c
52 */
53
54#include "libavcodec/aac.h"
55#include "libavcodec/aacsbr.h"
56#include "libavutil/mem_internal.h"
57#include "libavutil/mips/asmdefs.h"
58
59#define ENVELOPE_ADJUSTMENT_OFFSET 2
60
61#if HAVE_INLINE_ASM
62#if HAVE_MIPSFPU
63static int sbr_lf_gen_mips(AACContext *ac, SpectralBandReplication *sbr,
64                      float X_low[32][40][2], const float W[2][32][32][2],
65                      int buf_idx)
66{
67    int i, k;
68    int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
69    float *p_x_low = &X_low[0][8][0];
70    float *p_w = (float*)&W[buf_idx][0][0][0];
71    float *p_x1_low = &X_low[0][0][0];
72    float *p_w1 = (float*)&W[1-buf_idx][24][0][0];
73
74    float *loop_end=p_x1_low + 2560;
75
76    /* loop unrolled 8 times */
77    __asm__ volatile (
78    "1:                                                 \n\t"
79        "sw     $0,            0(%[p_x1_low])           \n\t"
80        "sw     $0,            4(%[p_x1_low])           \n\t"
81        "sw     $0,            8(%[p_x1_low])           \n\t"
82        "sw     $0,            12(%[p_x1_low])          \n\t"
83        "sw     $0,            16(%[p_x1_low])          \n\t"
84        "sw     $0,            20(%[p_x1_low])          \n\t"
85        "sw     $0,            24(%[p_x1_low])          \n\t"
86        "sw     $0,            28(%[p_x1_low])          \n\t"
87        PTR_ADDIU "%[p_x1_low],%[p_x1_low],      32     \n\t"
88        "bne    %[p_x1_low],   %[loop_end],      1b     \n\t"
89        PTR_ADDIU "%[p_x1_low],%[p_x1_low],      -10240 \n\t"
90
91        : [p_x1_low]"+r"(p_x1_low)
92        : [loop_end]"r"(loop_end)
93        : "memory"
94    );
95
96    for (k = 0; k < sbr->kx[1]; k++) {
97        for (i = 0; i < 32; i+=4) {
98            /* loop unrolled 4 times */
99            __asm__ volatile (
100                "lw     %[temp0],   0(%[p_w])               \n\t"
101                "lw     %[temp1],   4(%[p_w])               \n\t"
102                "lw     %[temp2],   256(%[p_w])             \n\t"
103                "lw     %[temp3],   260(%[p_w])             \n\t"
104                "lw     %[temp4],   512(%[p_w])             \n\t"
105                "lw     %[temp5],   516(%[p_w])             \n\t"
106                "lw     %[temp6],   768(%[p_w])             \n\t"
107                "lw     %[temp7],   772(%[p_w])             \n\t"
108                "sw     %[temp0],   0(%[p_x_low])           \n\t"
109                "sw     %[temp1],   4(%[p_x_low])           \n\t"
110                "sw     %[temp2],   8(%[p_x_low])           \n\t"
111                "sw     %[temp3],   12(%[p_x_low])          \n\t"
112                "sw     %[temp4],   16(%[p_x_low])          \n\t"
113                "sw     %[temp5],   20(%[p_x_low])          \n\t"
114                "sw     %[temp6],   24(%[p_x_low])          \n\t"
115                "sw     %[temp7],   28(%[p_x_low])          \n\t"
116                PTR_ADDIU "%[p_x_low], %[p_x_low],  32      \n\t"
117                PTR_ADDIU "%[p_w],     %[p_w],      1024    \n\t"
118
119                : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
120                  [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
121                  [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
122                  [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
123                  [p_w]"+r"(p_w), [p_x_low]"+r"(p_x_low)
124                :
125                : "memory"
126            );
127        }
128        p_x_low += 16;
129        p_w -= 2046;
130    }
131
132    for (k = 0; k < sbr->kx[0]; k++) {
133        for (i = 0; i < 2; i++) {
134
135            /* loop unrolled 4 times */
136            __asm__ volatile (
137                "lw     %[temp0],    0(%[p_w1])             \n\t"
138                "lw     %[temp1],    4(%[p_w1])             \n\t"
139                "lw     %[temp2],    256(%[p_w1])           \n\t"
140                "lw     %[temp3],    260(%[p_w1])           \n\t"
141                "lw     %[temp4],    512(%[p_w1])           \n\t"
142                "lw     %[temp5],    516(%[p_w1])           \n\t"
143                "lw     %[temp6],    768(%[p_w1])           \n\t"
144                "lw     %[temp7],    772(%[p_w1])           \n\t"
145                "sw     %[temp0],    0(%[p_x1_low])         \n\t"
146                "sw     %[temp1],    4(%[p_x1_low])         \n\t"
147                "sw     %[temp2],    8(%[p_x1_low])         \n\t"
148                "sw     %[temp3],    12(%[p_x1_low])        \n\t"
149                "sw     %[temp4],    16(%[p_x1_low])        \n\t"
150                "sw     %[temp5],    20(%[p_x1_low])        \n\t"
151                "sw     %[temp6],    24(%[p_x1_low])        \n\t"
152                "sw     %[temp7],    28(%[p_x1_low])        \n\t"
153                PTR_ADDIU "%[p_x1_low], %[p_x1_low], 32     \n\t"
154                PTR_ADDIU "%[p_w1],     %[p_w1],     1024   \n\t"
155
156                : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
157                  [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
158                  [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
159                  [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
160                  [p_w1]"+r"(p_w1), [p_x1_low]"+r"(p_x1_low)
161                :
162                : "memory"
163            );
164        }
165        p_x1_low += 64;
166        p_w1 -= 510;
167    }
168    return 0;
169}
170
171static int sbr_x_gen_mips(SpectralBandReplication *sbr, float X[2][38][64],
172                     const float Y0[38][64][2], const float Y1[38][64][2],
173                     const float X_low[32][40][2], int ch)
174{
175    int k, i;
176    const int i_f = 32;
177    int temp0, temp1, temp2, temp3;
178    const float *X_low1, *Y01, *Y11;
179    float *x1=&X[0][0][0];
180    float *j=x1+4864;
181    const int i_Temp = FFMAX(2*sbr->data[ch].t_env_num_env_old - i_f, 0);
182
183    /* loop unrolled 8 times */
184    __asm__ volatile (
185    "1:                                       \n\t"
186        "sw     $0,      0(%[x1])             \n\t"
187        "sw     $0,      4(%[x1])             \n\t"
188        "sw     $0,      8(%[x1])             \n\t"
189        "sw     $0,      12(%[x1])            \n\t"
190        "sw     $0,      16(%[x1])            \n\t"
191        "sw     $0,      20(%[x1])            \n\t"
192        "sw     $0,      24(%[x1])            \n\t"
193        "sw     $0,      28(%[x1])            \n\t"
194        PTR_ADDIU "%[x1],%[x1],      32       \n\t"
195        "bne    %[x1],   %[j],       1b       \n\t"
196        PTR_ADDIU "%[x1],%[x1],      -19456   \n\t"
197
198        : [x1]"+r"(x1)
199        : [j]"r"(j)
200        : "memory"
201    );
202
203    if (i_Temp != 0) {
204
205        X_low1=&X_low[0][2][0];
206
207        for (k = 0; k < sbr->kx[0]; k++) {
208
209            __asm__ volatile (
210                "move    %[i],        $zero                  \n\t"
211            "2:                                              \n\t"
212                "lw      %[temp0],    0(%[X_low1])           \n\t"
213                "lw      %[temp1],    4(%[X_low1])           \n\t"
214                "sw      %[temp0],    0(%[x1])               \n\t"
215                "sw      %[temp1],    9728(%[x1])            \n\t"
216                PTR_ADDIU "%[x1],     %[x1],         256     \n\t"
217                PTR_ADDIU "%[X_low1], %[X_low1],     8       \n\t"
218                "addiu   %[i],        %[i],          1       \n\t"
219                "bne     %[i],        %[i_Temp],     2b      \n\t"
220
221                : [x1]"+r"(x1), [X_low1]"+r"(X_low1), [i]"=&r"(i),
222                  [temp0]"=&r"(temp0), [temp1]"=&r"(temp1)
223                : [i_Temp]"r"(i_Temp)
224                : "memory"
225            );
226            x1-=(i_Temp<<6)-1;
227            X_low1-=(i_Temp<<1)-80;
228        }
229
230        x1=&X[0][0][k];
231        Y01=(float*)&Y0[32][k][0];
232
233        for (; k < sbr->kx[0] + sbr->m[0]; k++) {
234            __asm__ volatile (
235                "move    %[i],       $zero               \n\t"
236            "3:                                          \n\t"
237                "lw      %[temp0],   0(%[Y01])           \n\t"
238                "lw      %[temp1],   4(%[Y01])           \n\t"
239                "sw      %[temp0],   0(%[x1])            \n\t"
240                "sw      %[temp1],   9728(%[x1])         \n\t"
241                PTR_ADDIU "%[x1],    %[x1],      256     \n\t"
242                PTR_ADDIU "%[Y01],   %[Y01],     512     \n\t"
243                "addiu   %[i],       %[i],       1       \n\t"
244                "bne     %[i],       %[i_Temp],  3b      \n\t"
245
246                : [x1]"+r"(x1), [Y01]"+r"(Y01), [i]"=&r"(i),
247                  [temp0]"=&r"(temp0), [temp1]"=&r"(temp1)
248                : [i_Temp]"r"(i_Temp)
249                : "memory"
250            );
251            x1 -=(i_Temp<<6)-1;
252            Y01 -=(i_Temp<<7)-2;
253        }
254    }
255
256    x1=&X[0][i_Temp][0];
257    X_low1=&X_low[0][i_Temp+2][0];
258    temp3=38;
259
260    for (k = 0; k < sbr->kx[1]; k++) {
261
262        __asm__ volatile (
263            "move    %[i],       %[i_Temp]              \n\t"
264        "4:                                             \n\t"
265            "lw      %[temp0],   0(%[X_low1])           \n\t"
266            "lw      %[temp1],   4(%[X_low1])           \n\t"
267            "sw      %[temp0],   0(%[x1])               \n\t"
268            "sw      %[temp1],   9728(%[x1])            \n\t"
269            PTR_ADDIU "%[x1],    %[x1],         256     \n\t"
270            PTR_ADDIU "%[X_low1],%[X_low1],     8       \n\t"
271            "addiu   %[i],       %[i],          1       \n\t"
272            "bne     %[i],       %[temp3],      4b      \n\t"
273
274            : [x1]"+r"(x1), [X_low1]"+r"(X_low1), [i]"=&r"(i),
275              [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
276              [temp2]"=&r"(temp2)
277            : [i_Temp]"r"(i_Temp), [temp3]"r"(temp3)
278            : "memory"
279        );
280        x1 -= ((38-i_Temp)<<6)-1;
281        X_low1 -= ((38-i_Temp)<<1)- 80;
282    }
283
284    x1=&X[0][i_Temp][k];
285    Y11=&Y1[i_Temp][k][0];
286    temp2=32;
287
288    for (; k < sbr->kx[1] + sbr->m[1]; k++) {
289
290        __asm__ volatile (
291           "move    %[i],       %[i_Temp]               \n\t"
292        "5:                                             \n\t"
293           "lw      %[temp0],   0(%[Y11])               \n\t"
294           "lw      %[temp1],   4(%[Y11])               \n\t"
295           "sw      %[temp0],   0(%[x1])                \n\t"
296           "sw      %[temp1],   9728(%[x1])             \n\t"
297           PTR_ADDIU "%[x1],    %[x1],          256     \n\t"
298           PTR_ADDIU "%[Y11],   %[Y11],         512     \n\t"
299           "addiu   %[i],       %[i],           1       \n\t"
300           "bne     %[i],       %[temp2],       5b      \n\t"
301
302           : [x1]"+r"(x1), [Y11]"+r"(Y11), [i]"=&r"(i),
303             [temp0]"=&r"(temp0), [temp1]"=&r"(temp1)
304           : [i_Temp]"r"(i_Temp), [temp3]"r"(temp3),
305             [temp2]"r"(temp2)
306           : "memory"
307        );
308
309        x1 -= ((32-i_Temp)<<6)-1;
310        Y11 -= ((32-i_Temp)<<7)-2;
311   }
312      return 0;
313}
314
315#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
316static void sbr_hf_assemble_mips(float Y1[38][64][2],
317                            const float X_high[64][40][2],
318                            SpectralBandReplication *sbr, SBRData *ch_data,
319                            const int e_a[2])
320{
321    int e, i, j, m;
322    const int h_SL = 4 * !sbr->bs_smoothing_mode;
323    const int kx = sbr->kx[1];
324    const int m_max = sbr->m[1];
325    static const float h_smooth[5] = {
326        0.33333333333333,
327        0.30150283239582,
328        0.21816949906249,
329        0.11516383427084,
330        0.03183050093751,
331    };
332
333    float (*g_temp)[48] = ch_data->g_temp, (*q_temp)[48] = ch_data->q_temp;
334    int indexnoise = ch_data->f_indexnoise;
335    int indexsine  = ch_data->f_indexsine;
336    float *g_temp1, *q_temp1, *pok, *pok1;
337    uint32_t temp1, temp2, temp3, temp4;
338    int size = m_max;
339
340    if (sbr->reset) {
341        for (i = 0; i < h_SL; i++) {
342            memcpy(g_temp[i + 2*ch_data->t_env[0]], sbr->gain[0], m_max * sizeof(sbr->gain[0][0]));
343            memcpy(q_temp[i + 2*ch_data->t_env[0]], sbr->q_m[0],  m_max * sizeof(sbr->q_m[0][0]));
344        }
345    } else if (h_SL) {
346        memcpy(g_temp[2*ch_data->t_env[0]], g_temp[2*ch_data->t_env_num_env_old], 4*sizeof(g_temp[0]));
347        memcpy(q_temp[2*ch_data->t_env[0]], q_temp[2*ch_data->t_env_num_env_old], 4*sizeof(q_temp[0]));
348    }
349
350    for (e = 0; e < ch_data->bs_num_env; e++) {
351        for (i = 2 * ch_data->t_env[e]; i < 2 * ch_data->t_env[e + 1]; i++) {
352            g_temp1 = g_temp[h_SL + i];
353            pok = sbr->gain[e];
354            q_temp1 = q_temp[h_SL + i];
355            pok1 = sbr->q_m[e];
356
357            /* loop unrolled 4 times */
358            for (j=0; j<(size>>2); j++) {
359                __asm__ volatile (
360                    "lw      %[temp1],   0(%[pok])               \n\t"
361                    "lw      %[temp2],   4(%[pok])               \n\t"
362                    "lw      %[temp3],   8(%[pok])               \n\t"
363                    "lw      %[temp4],   12(%[pok])              \n\t"
364                    "sw      %[temp1],   0(%[g_temp1])           \n\t"
365                    "sw      %[temp2],   4(%[g_temp1])           \n\t"
366                    "sw      %[temp3],   8(%[g_temp1])           \n\t"
367                    "sw      %[temp4],   12(%[g_temp1])          \n\t"
368                    "lw      %[temp1],   0(%[pok1])              \n\t"
369                    "lw      %[temp2],   4(%[pok1])              \n\t"
370                    "lw      %[temp3],   8(%[pok1])              \n\t"
371                    "lw      %[temp4],   12(%[pok1])             \n\t"
372                    "sw      %[temp1],   0(%[q_temp1])           \n\t"
373                    "sw      %[temp2],   4(%[q_temp1])           \n\t"
374                    "sw      %[temp3],   8(%[q_temp1])           \n\t"
375                    "sw      %[temp4],   12(%[q_temp1])          \n\t"
376                    PTR_ADDIU "%[pok],     %[pok],         16    \n\t"
377                    PTR_ADDIU "%[g_temp1], %[g_temp1],     16    \n\t"
378                    PTR_ADDIU "%[pok1],    %[pok1],        16    \n\t"
379                    PTR_ADDIU "%[q_temp1], %[q_temp1],     16    \n\t"
380
381                    : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
382                      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4),
383                      [pok]"+r"(pok), [g_temp1]"+r"(g_temp1),
384                      [pok1]"+r"(pok1), [q_temp1]"+r"(q_temp1)
385                    :
386                    : "memory"
387                );
388            }
389
390            for (j=0; j<(size&3); j++) {
391                __asm__ volatile (
392                    "lw      %[temp1],   0(%[pok])              \n\t"
393                    "lw      %[temp2],   0(%[pok1])             \n\t"
394                    "sw      %[temp1],   0(%[g_temp1])          \n\t"
395                    "sw      %[temp2],   0(%[q_temp1])          \n\t"
396                    PTR_ADDIU "%[pok],     %[pok],        4     \n\t"
397                    PTR_ADDIU "%[g_temp1], %[g_temp1],    4     \n\t"
398                    PTR_ADDIU "%[pok1],    %[pok1],       4     \n\t"
399                    PTR_ADDIU "%[q_temp1], %[q_temp1],    4     \n\t"
400
401                    : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
402                      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4),
403                      [pok]"+r"(pok), [g_temp1]"+r"(g_temp1),
404                      [pok1]"+r"(pok1), [q_temp1]"+r"(q_temp1)
405                    :
406                    : "memory"
407                );
408            }
409        }
410    }
411
412    for (e = 0; e < ch_data->bs_num_env; e++) {
413        for (i = 2 * ch_data->t_env[e]; i < 2 * ch_data->t_env[e + 1]; i++) {
414            LOCAL_ALIGNED_16(float, g_filt_tab, [48]);
415            LOCAL_ALIGNED_16(float, q_filt_tab, [48]);
416            float *g_filt, *q_filt;
417
418            if (h_SL && e != e_a[0] && e != e_a[1]) {
419                g_filt = g_filt_tab;
420                q_filt = q_filt_tab;
421
422                for (m = 0; m < m_max; m++) {
423                    const int idx1 = i + h_SL;
424                    g_filt[m] = 0.0f;
425                    q_filt[m] = 0.0f;
426
427                    for (j = 0; j <= h_SL; j++) {
428                        g_filt[m] += g_temp[idx1 - j][m] * h_smooth[j];
429                        q_filt[m] += q_temp[idx1 - j][m] * h_smooth[j];
430                    }
431                }
432            } else {
433                g_filt = g_temp[i + h_SL];
434                q_filt = q_temp[i];
435            }
436
437            sbr->dsp.hf_g_filt(Y1[i] + kx, X_high + kx, g_filt, m_max,
438                               i + ENVELOPE_ADJUSTMENT_OFFSET);
439
440            if (e != e_a[0] && e != e_a[1]) {
441                sbr->dsp.hf_apply_noise[indexsine](Y1[i] + kx, sbr->s_m[e],
442                                                   q_filt, indexnoise,
443                                                   kx, m_max);
444            } else {
445                int idx = indexsine&1;
446                int A = (1-((indexsine+(kx & 1))&2));
447                int B = (A^(-idx)) + idx;
448                float *out = &Y1[i][kx][idx];
449                float *in  = sbr->s_m[e];
450                float temp0, temp1, temp2, temp3, temp4, temp5;
451                float A_f = (float)A;
452                float B_f = (float)B;
453
454                for (m = 0; m+1 < m_max; m+=2) {
455
456                    temp2 = out[0];
457                    temp3 = out[2];
458
459                    __asm__ volatile(
460                        "lwc1    %[temp0],  0(%[in])                     \n\t"
461                        "lwc1    %[temp1],  4(%[in])                     \n\t"
462                        "madd.s  %[temp4],  %[temp2],  %[temp0], %[A_f]  \n\t"
463                        "madd.s  %[temp5],  %[temp3],  %[temp1], %[B_f]  \n\t"
464                        "swc1    %[temp4],  0(%[out])                    \n\t"
465                        "swc1    %[temp5],  8(%[out])                    \n\t"
466                        PTR_ADDIU "%[in],   %[in],     8                 \n\t"
467                        PTR_ADDIU "%[out],  %[out],    16                \n\t"
468
469                        : [temp0]"=&f" (temp0), [temp1]"=&f"(temp1),
470                          [temp4]"=&f" (temp4), [temp5]"=&f"(temp5),
471                          [in]"+r"(in), [out]"+r"(out)
472                        : [A_f]"f"(A_f), [B_f]"f"(B_f), [temp2]"f"(temp2),
473                          [temp3]"f"(temp3)
474                        : "memory"
475                    );
476                }
477                if(m_max&1)
478                    out[2*m  ] += in[m  ] * A;
479            }
480            indexnoise = (indexnoise + m_max) & 0x1ff;
481            indexsine = (indexsine + 1) & 3;
482        }
483    }
484    ch_data->f_indexnoise = indexnoise;
485    ch_data->f_indexsine  = indexsine;
486}
487
488static void sbr_hf_inverse_filter_mips(SBRDSPContext *dsp,
489                                  float (*alpha0)[2], float (*alpha1)[2],
490                                  const float X_low[32][40][2], int k0)
491{
492    int k;
493    float temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, c;
494    float *phi1, *alpha_1, *alpha_0, res1, res2, temp_real, temp_im;
495
496    c = 1.000001f;
497
498    for (k = 0; k < k0; k++) {
499        LOCAL_ALIGNED_16(float, phi, [3], [2][2]);
500        float dk;
501        phi1 = &phi[0][0][0];
502        alpha_1 = &alpha1[k][0];
503        alpha_0 = &alpha0[k][0];
504        dsp->autocorrelate(X_low[k], phi);
505
506        __asm__ volatile (
507            "lwc1    %[temp0],  40(%[phi1])                       \n\t"
508            "lwc1    %[temp1],  16(%[phi1])                       \n\t"
509            "lwc1    %[temp2],  24(%[phi1])                       \n\t"
510            "lwc1    %[temp3],  28(%[phi1])                       \n\t"
511            "mul.s   %[dk],     %[temp0],    %[temp1]             \n\t"
512            "lwc1    %[temp4],  0(%[phi1])                        \n\t"
513            "mul.s   %[res2],   %[temp2],    %[temp2]             \n\t"
514            "lwc1    %[temp5],  4(%[phi1])                        \n\t"
515            "madd.s  %[res2],   %[res2],     %[temp3],  %[temp3]  \n\t"
516            "lwc1    %[temp6],  8(%[phi1])                        \n\t"
517            "div.s   %[res2],   %[res2],     %[c]                 \n\t"
518            "lwc1    %[temp0],  12(%[phi1])                       \n\t"
519            "sub.s   %[dk],     %[dk],       %[res2]              \n\t"
520
521            : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
522              [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
523              [temp6]"=&f"(temp6), [res2]"=&f"(res2), [dk]"=&f"(dk)
524            : [phi1]"r"(phi1), [c]"f"(c)
525            : "memory"
526        );
527
528        if (!dk) {
529            alpha_1[0] = 0;
530            alpha_1[1] = 0;
531        } else {
532            __asm__ volatile (
533                "mul.s   %[temp_real], %[temp4],     %[temp2]            \n\t"
534                "nmsub.s %[temp_real], %[temp_real], %[temp5], %[temp3]  \n\t"
535                "nmsub.s %[temp_real], %[temp_real], %[temp6], %[temp1]  \n\t"
536                "mul.s   %[temp_im],   %[temp4],     %[temp3]            \n\t"
537                "madd.s  %[temp_im],   %[temp_im],   %[temp5], %[temp2]  \n\t"
538                "nmsub.s %[temp_im],   %[temp_im],   %[temp0], %[temp1]  \n\t"
539                "div.s   %[temp_real], %[temp_real], %[dk]               \n\t"
540                "div.s   %[temp_im],   %[temp_im],   %[dk]               \n\t"
541                "swc1    %[temp_real], 0(%[alpha_1])                     \n\t"
542                "swc1    %[temp_im],   4(%[alpha_1])                     \n\t"
543
544                : [temp_real]"=&f" (temp_real), [temp_im]"=&f"(temp_im)
545                : [phi1]"r"(phi1), [temp0]"f"(temp0), [temp1]"f"(temp1),
546                  [temp2]"f"(temp2), [temp3]"f"(temp3), [temp4]"f"(temp4),
547                  [temp5]"f"(temp5), [temp6]"f"(temp6),
548                  [alpha_1]"r"(alpha_1), [dk]"f"(dk)
549                : "memory"
550            );
551        }
552
553        if (!phi1[4]) {
554            alpha_0[0] = 0;
555            alpha_0[1] = 0;
556        } else {
557            __asm__ volatile (
558                "lwc1    %[temp6],     0(%[alpha_1])                     \n\t"
559                "lwc1    %[temp7],     4(%[alpha_1])                     \n\t"
560                "mul.s   %[temp_real], %[temp6],     %[temp2]            \n\t"
561                "add.s   %[temp_real], %[temp_real], %[temp4]            \n\t"
562                "madd.s  %[temp_real], %[temp_real], %[temp7], %[temp3]  \n\t"
563                "mul.s   %[temp_im],   %[temp7],     %[temp2]            \n\t"
564                "add.s   %[temp_im],   %[temp_im],   %[temp5]            \n\t"
565                "nmsub.s %[temp_im],   %[temp_im],   %[temp6], %[temp3]  \n\t"
566                "div.s   %[temp_real], %[temp_real], %[temp1]            \n\t"
567                "div.s   %[temp_im],   %[temp_im],   %[temp1]            \n\t"
568                "neg.s   %[temp_real], %[temp_real]                      \n\t"
569                "neg.s   %[temp_im],   %[temp_im]                        \n\t"
570                "swc1    %[temp_real], 0(%[alpha_0])                     \n\t"
571                "swc1    %[temp_im],   4(%[alpha_0])                     \n\t"
572
573                : [temp_real]"=&f"(temp_real), [temp_im]"=&f"(temp_im),
574                  [temp6]"=&f"(temp6), [temp7]"=&f"(temp7),
575                  [res1]"=&f"(res1), [res2]"=&f"(res2)
576                : [alpha_1]"r"(alpha_1), [alpha_0]"r"(alpha_0),
577                  [temp0]"f"(temp0), [temp1]"f"(temp1), [temp2]"f"(temp2),
578                  [temp3]"f"(temp3), [temp4]"f"(temp4), [temp5]"f"(temp5)
579                : "memory"
580            );
581        }
582
583        __asm__ volatile (
584            "lwc1    %[temp1],      0(%[alpha_1])                           \n\t"
585            "lwc1    %[temp2],      4(%[alpha_1])                           \n\t"
586            "lwc1    %[temp_real],  0(%[alpha_0])                           \n\t"
587            "lwc1    %[temp_im],    4(%[alpha_0])                           \n\t"
588            "mul.s   %[res1],       %[temp1],      %[temp1]                 \n\t"
589            "madd.s  %[res1],       %[res1],       %[temp2],    %[temp2]    \n\t"
590            "mul.s   %[res2],       %[temp_real],  %[temp_real]             \n\t"
591            "madd.s  %[res2],       %[res2],       %[temp_im],  %[temp_im]  \n\t"
592
593            : [temp_real]"=&f"(temp_real), [temp_im]"=&f"(temp_im),
594              [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
595              [res1]"=&f"(res1), [res2]"=&f"(res2)
596            : [alpha_1]"r"(alpha_1), [alpha_0]"r"(alpha_0)
597            : "memory"
598        );
599
600        if (res1 >= 16.0f || res2 >= 16.0f) {
601            alpha_1[0] = 0;
602            alpha_1[1] = 0;
603            alpha_0[0] = 0;
604            alpha_0[1] = 0;
605        }
606    }
607}
608#endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
609#endif /* HAVE_MIPSFPU */
610#endif /* HAVE_INLINE_ASM */
611
612void ff_aacsbr_func_ptr_init_mips(AACSBRContext *c)
613{
614#if HAVE_INLINE_ASM
615#if HAVE_MIPSFPU
616    c->sbr_lf_gen            = sbr_lf_gen_mips;
617    c->sbr_x_gen             = sbr_x_gen_mips;
618#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
619    c->sbr_hf_inverse_filter = sbr_hf_inverse_filter_mips;
620    c->sbr_hf_assemble       = sbr_hf_assemble_mips;
621#endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
622#endif /* HAVE_MIPSFPU */
623#endif /* HAVE_INLINE_ASM */
624}
625