1cabdff1aSopenharmony_ci    /*
2cabdff1aSopenharmony_ci * Copyright (c) 2012
3cabdff1aSopenharmony_ci *      MIPS Technologies, Inc., California.
4cabdff1aSopenharmony_ci *
5cabdff1aSopenharmony_ci * Redistribution and use in source and binary forms, with or without
6cabdff1aSopenharmony_ci * modification, are permitted provided that the following conditions
7cabdff1aSopenharmony_ci * are met:
8cabdff1aSopenharmony_ci * 1. Redistributions of source code must retain the above copyright
9cabdff1aSopenharmony_ci *    notice, this list of conditions and the following disclaimer.
10cabdff1aSopenharmony_ci * 2. Redistributions in binary form must reproduce the above copyright
11cabdff1aSopenharmony_ci *    notice, this list of conditions and the following disclaimer in the
12cabdff1aSopenharmony_ci *    documentation and/or other materials provided with the distribution.
13cabdff1aSopenharmony_ci * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
14cabdff1aSopenharmony_ci *    contributors may be used to endorse or promote products derived from
15cabdff1aSopenharmony_ci *    this software without specific prior written permission.
16cabdff1aSopenharmony_ci *
17cabdff1aSopenharmony_ci * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
18cabdff1aSopenharmony_ci * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19cabdff1aSopenharmony_ci * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20cabdff1aSopenharmony_ci * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
21cabdff1aSopenharmony_ci * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22cabdff1aSopenharmony_ci * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23cabdff1aSopenharmony_ci * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24cabdff1aSopenharmony_ci * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25cabdff1aSopenharmony_ci * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26cabdff1aSopenharmony_ci * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27cabdff1aSopenharmony_ci * SUCH DAMAGE.
28cabdff1aSopenharmony_ci *
29cabdff1aSopenharmony_ci * Author:  Bojan Zivkovic (bojan@mips.com)
30cabdff1aSopenharmony_ci *
31cabdff1aSopenharmony_ci * MPEG Audio decoder optimized for MIPS fixed-point architecture
32cabdff1aSopenharmony_ci *
33cabdff1aSopenharmony_ci * This file is part of FFmpeg.
34cabdff1aSopenharmony_ci *
35cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
36cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
37cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
38cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
39cabdff1aSopenharmony_ci *
40cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
41cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
42cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
43cabdff1aSopenharmony_ci * Lesser General Public License for more details.
44cabdff1aSopenharmony_ci *
45cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
46cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
47cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
48cabdff1aSopenharmony_ci */
49cabdff1aSopenharmony_ci
50cabdff1aSopenharmony_ci/**
51cabdff1aSopenharmony_ci * @file
52cabdff1aSopenharmony_ci * Reference: libavcodec/mpegaudiodsp_template.c
53cabdff1aSopenharmony_ci */
54cabdff1aSopenharmony_ci
55cabdff1aSopenharmony_ci#include <string.h>
56cabdff1aSopenharmony_ci
57cabdff1aSopenharmony_ci#include "config.h"
58cabdff1aSopenharmony_ci#include "libavutil/mips/asmdefs.h"
59cabdff1aSopenharmony_ci#include "libavcodec/mpegaudiodsp.h"
60cabdff1aSopenharmony_ci
61cabdff1aSopenharmony_ci#if HAVE_INLINE_ASM
62cabdff1aSopenharmony_ci#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
63cabdff1aSopenharmony_ci
64cabdff1aSopenharmony_cistatic void ff_mpadsp_apply_window_mips_fixed(int32_t *synth_buf, int32_t *window,
65cabdff1aSopenharmony_ci                               int *dither_state, int16_t *samples, ptrdiff_t incr)
66cabdff1aSopenharmony_ci{
67cabdff1aSopenharmony_ci    register const int32_t *w, *w2, *p;
68cabdff1aSopenharmony_ci    int j;
69cabdff1aSopenharmony_ci    int16_t *samples2;
70cabdff1aSopenharmony_ci    int w_asm, p_asm, w_asm1, p_asm1, w_asm2, p_asm2;
71cabdff1aSopenharmony_ci    int w2_asm, w2_asm1, *p_temp1, *p_temp2;
72cabdff1aSopenharmony_ci    int sum1 = 0;
73cabdff1aSopenharmony_ci    int const min_asm = -32768, max_asm = 32767;
74cabdff1aSopenharmony_ci    int temp1, temp2 = 0, temp3 = 0;
75cabdff1aSopenharmony_ci    int64_t sum;
76cabdff1aSopenharmony_ci
77cabdff1aSopenharmony_ci    /* copy to avoid wrap */
78cabdff1aSopenharmony_ci    memcpy(synth_buf + 512, synth_buf, 32 * sizeof(*synth_buf));
79cabdff1aSopenharmony_ci    samples2 = samples + 31 * incr;
80cabdff1aSopenharmony_ci    w = window;
81cabdff1aSopenharmony_ci    w2 = window + 31;
82cabdff1aSopenharmony_ci    sum = *dither_state;
83cabdff1aSopenharmony_ci    p = synth_buf + 16;
84cabdff1aSopenharmony_ci    p_temp1 = synth_buf + 16;
85cabdff1aSopenharmony_ci    p_temp2 = synth_buf + 48;
86cabdff1aSopenharmony_ci    temp1 = sum;
87cabdff1aSopenharmony_ci
88cabdff1aSopenharmony_ci    /**
89cabdff1aSopenharmony_ci    * use of round_sample function from the original code is eliminated,
90cabdff1aSopenharmony_ci    * changed with appropriate assembly instructions.
91cabdff1aSopenharmony_ci    */
92cabdff1aSopenharmony_ci    __asm__ volatile (
93cabdff1aSopenharmony_ci         "mthi   $zero                                                    \n\t"
94cabdff1aSopenharmony_ci         "mtlo   %[temp1]                                                 \n\t"
95cabdff1aSopenharmony_ci         "lw     %[w_asm],  0(%[w])                                       \n\t"
96cabdff1aSopenharmony_ci         "lw     %[p_asm],  0(%[p])                                       \n\t"
97cabdff1aSopenharmony_ci         "lw     %[w_asm1], 64*4(%[w])                                    \n\t"
98cabdff1aSopenharmony_ci         "lw     %[p_asm1], 64*4(%[p])                                    \n\t"
99cabdff1aSopenharmony_ci         "lw     %[w_asm2], 128*4(%[w])                                   \n\t"
100cabdff1aSopenharmony_ci         "lw     %[p_asm2], 128*4(%[p])                                   \n\t"
101cabdff1aSopenharmony_ci         "madd   %[w_asm],  %[p_asm]                                      \n\t"
102cabdff1aSopenharmony_ci         "madd   %[w_asm1], %[p_asm1]                                     \n\t"
103cabdff1aSopenharmony_ci         "madd   %[w_asm2], %[p_asm2]                                     \n\t"
104cabdff1aSopenharmony_ci         "lw     %[w_asm],  192*4(%[w])                                   \n\t"
105cabdff1aSopenharmony_ci         "lw     %[p_asm],  192*4(%[p])                                   \n\t"
106cabdff1aSopenharmony_ci         "lw     %[w_asm1], 256*4(%[w])                                   \n\t"
107cabdff1aSopenharmony_ci         "lw     %[p_asm1], 256*4(%[p])                                   \n\t"
108cabdff1aSopenharmony_ci         "lw     %[w_asm2], 320*4(%[w])                                   \n\t"
109cabdff1aSopenharmony_ci         "lw     %[p_asm2], 320*4(%[p])                                   \n\t"
110cabdff1aSopenharmony_ci         "madd   %[w_asm],  %[p_asm]                                      \n\t"
111cabdff1aSopenharmony_ci         "madd   %[w_asm1], %[p_asm1]                                     \n\t"
112cabdff1aSopenharmony_ci         "madd   %[w_asm2], %[p_asm2]                                     \n\t"
113cabdff1aSopenharmony_ci         "lw     %[w_asm],  384*4(%[w])                                   \n\t"
114cabdff1aSopenharmony_ci         "lw     %[p_asm],  384*4(%[p])                                   \n\t"
115cabdff1aSopenharmony_ci         "lw     %[w_asm1], 448*4(%[w])                                   \n\t"
116cabdff1aSopenharmony_ci         "lw     %[p_asm1], 448*4(%[p])                                   \n\t"
117cabdff1aSopenharmony_ci         "lw     %[w_asm2], 32*4(%[w])                                    \n\t"
118cabdff1aSopenharmony_ci         "lw     %[p_asm2], 32*4(%[p])                                    \n\t"
119cabdff1aSopenharmony_ci         "madd   %[w_asm],  %[p_asm]                                      \n\t"
120cabdff1aSopenharmony_ci         "madd   %[w_asm1], %[p_asm1]                                     \n\t"
121cabdff1aSopenharmony_ci         "msub   %[w_asm2], %[p_asm2]                                     \n\t"
122cabdff1aSopenharmony_ci         "lw     %[w_asm],  96*4(%[w])                                    \n\t"
123cabdff1aSopenharmony_ci         "lw     %[p_asm],  96*4(%[p])                                    \n\t"
124cabdff1aSopenharmony_ci         "lw     %[w_asm1], 160*4(%[w])                                   \n\t"
125cabdff1aSopenharmony_ci         "lw     %[p_asm1], 160*4(%[p])                                   \n\t"
126cabdff1aSopenharmony_ci         "lw     %[w_asm2], 224*4(%[w])                                   \n\t"
127cabdff1aSopenharmony_ci         "lw     %[p_asm2], 224*4(%[p])                                   \n\t"
128cabdff1aSopenharmony_ci         "msub   %[w_asm],  %[p_asm]                                      \n\t"
129cabdff1aSopenharmony_ci         "msub   %[w_asm1], %[p_asm1]                                     \n\t"
130cabdff1aSopenharmony_ci         "msub   %[w_asm2], %[p_asm2]                                     \n\t"
131cabdff1aSopenharmony_ci         "lw     %[w_asm],  288*4(%[w])                                   \n\t"
132cabdff1aSopenharmony_ci         "lw     %[p_asm],  288*4(%[p])                                   \n\t"
133cabdff1aSopenharmony_ci         "lw     %[w_asm1], 352*4(%[w])                                   \n\t"
134cabdff1aSopenharmony_ci         "lw     %[p_asm1], 352*4(%[p])                                   \n\t"
135cabdff1aSopenharmony_ci         "msub   %[w_asm],  %[p_asm]                                      \n\t"
136cabdff1aSopenharmony_ci         "lw     %[w_asm],  480*4(%[w])                                   \n\t"
137cabdff1aSopenharmony_ci         "lw     %[p_asm],  480*4(%[p])                                   \n\t"
138cabdff1aSopenharmony_ci         "lw     %[w_asm2], 416*4(%[w])                                   \n\t"
139cabdff1aSopenharmony_ci         "lw     %[p_asm2], 416*4(%[p])                                   \n\t"
140cabdff1aSopenharmony_ci         "msub   %[w_asm],  %[p_asm]                                      \n\t"
141cabdff1aSopenharmony_ci         "msub   %[w_asm1], %[p_asm1]                                     \n\t"
142cabdff1aSopenharmony_ci         "msub   %[w_asm2], %[p_asm2]                                     \n\t"
143cabdff1aSopenharmony_ci
144cabdff1aSopenharmony_ci         /*round_sample function from the original code is eliminated,
145cabdff1aSopenharmony_ci          * changed with appropriate assembly instructions
146cabdff1aSopenharmony_ci          * code example:
147cabdff1aSopenharmony_ci
148cabdff1aSopenharmony_ci         "extr.w  %[sum1],$ac0,24                                       \n\t"
149cabdff1aSopenharmony_ci         "mflo %[temp3],  $ac0                                          \n\t"
150cabdff1aSopenharmony_ci         "and  %[temp1],  %[temp3],  0x00ffffff                         \n\t"
151cabdff1aSopenharmony_ci         "slt  %[temp2],  %[sum1],   %[min_asm]                         \n\t"
152cabdff1aSopenharmony_ci         "movn %[sum1],   %[min_asm],%[temp2]                           \n\t"
153cabdff1aSopenharmony_ci         "slt  %[temp2],  %[max_asm],%[sum1]                            \n\t"
154cabdff1aSopenharmony_ci         "movn %[sum1],   %[max_asm],%[temp2]                           \n\t"
155cabdff1aSopenharmony_ci         "sh   %[sum1],   0(%[samples])                                 \n\t"
156cabdff1aSopenharmony_ci         */
157cabdff1aSopenharmony_ci
158cabdff1aSopenharmony_ci         "extr.w %[sum1],   $ac0,       24                                \n\t"
159cabdff1aSopenharmony_ci         "mflo   %[temp3]                                                 \n\t"
160cabdff1aSopenharmony_ci         PTR_ADDIU "%[w],   %[w],       4                                 \n\t"
161cabdff1aSopenharmony_ci         "and    %[temp1],  %[temp3],   0x00ffffff                        \n\t"
162cabdff1aSopenharmony_ci         "slt    %[temp2],  %[sum1],    %[min_asm]                        \n\t"
163cabdff1aSopenharmony_ci         "movn   %[sum1],   %[min_asm], %[temp2]                          \n\t"
164cabdff1aSopenharmony_ci         "slt    %[temp2],  %[max_asm], %[sum1]                           \n\t"
165cabdff1aSopenharmony_ci         "movn   %[sum1],   %[max_asm], %[temp2]                          \n\t"
166cabdff1aSopenharmony_ci         "sh     %[sum1],   0(%[samples])                                 \n\t"
167cabdff1aSopenharmony_ci
168cabdff1aSopenharmony_ci        : [w_asm] "=&r" (w_asm), [p_asm] "=&r" (p_asm), [w_asm1] "=&r" (w_asm1),
169cabdff1aSopenharmony_ci          [p_asm1] "=&r" (p_asm1), [temp1] "+r" (temp1), [temp2] "+r" (temp2),
170cabdff1aSopenharmony_ci          [w_asm2] "=&r" (w_asm2), [p_asm2] "=&r" (p_asm2),
171cabdff1aSopenharmony_ci          [sum1] "+r" (sum1), [w] "+r" (w), [temp3] "+r" (temp3)
172cabdff1aSopenharmony_ci        : [p] "r" (p), [samples] "r" (samples), [min_asm] "r" (min_asm),
173cabdff1aSopenharmony_ci          [max_asm] "r" (max_asm)
174cabdff1aSopenharmony_ci        : "memory", "hi","lo"
175cabdff1aSopenharmony_ci     );
176cabdff1aSopenharmony_ci
177cabdff1aSopenharmony_ci     samples += incr;
178cabdff1aSopenharmony_ci
179cabdff1aSopenharmony_ci    /* we calculate two samples at the same time to avoid one memory
180cabdff1aSopenharmony_ci       access per two sample */
181cabdff1aSopenharmony_ci
182cabdff1aSopenharmony_ci    for(j = 1; j < 16; j++) {
183cabdff1aSopenharmony_ci        __asm__ volatile (
184cabdff1aSopenharmony_ci             "mthi   $0,         $ac1                                      \n\t"
185cabdff1aSopenharmony_ci             "mtlo   $0,         $ac1                                      \n\t"
186cabdff1aSopenharmony_ci             "mthi   $0                                                    \n\t"
187cabdff1aSopenharmony_ci             "mtlo   %[temp1]                                              \n\t"
188cabdff1aSopenharmony_ci             PTR_ADDIU "%[p_temp1], %[p_temp1],    4                       \n\t"
189cabdff1aSopenharmony_ci             "lw     %[w_asm],   0(%[w])                                   \n\t"
190cabdff1aSopenharmony_ci             "lw     %[p_asm],   0(%[p_temp1])                             \n\t"
191cabdff1aSopenharmony_ci             "lw     %[w2_asm],  0(%[w2])                                  \n\t"
192cabdff1aSopenharmony_ci             "lw     %[w_asm1],  64*4(%[w])                                \n\t"
193cabdff1aSopenharmony_ci             "lw     %[p_asm1],  64*4(%[p_temp1])                          \n\t"
194cabdff1aSopenharmony_ci             "lw     %[w2_asm1], 64*4(%[w2])                               \n\t"
195cabdff1aSopenharmony_ci             "madd   %[w_asm],   %[p_asm]                                  \n\t"
196cabdff1aSopenharmony_ci             "msub   $ac1,       %[w2_asm],        %[p_asm]                \n\t"
197cabdff1aSopenharmony_ci             "madd   %[w_asm1],  %[p_asm1]                                 \n\t"
198cabdff1aSopenharmony_ci             "msub   $ac1,       %[w2_asm1],       %[p_asm1]               \n\t"
199cabdff1aSopenharmony_ci             "lw     %[w_asm],   128*4(%[w])                               \n\t"
200cabdff1aSopenharmony_ci             "lw     %[p_asm],   128*4(%[p_temp1])                         \n\t"
201cabdff1aSopenharmony_ci             "lw     %[w2_asm],  128*4(%[w2])                              \n\t"
202cabdff1aSopenharmony_ci             "lw     %[w_asm1],  192*4(%[w])                               \n\t"
203cabdff1aSopenharmony_ci             "lw     %[p_asm1],  192*4(%[p_temp1])                         \n\t"
204cabdff1aSopenharmony_ci             "lw     %[w2_asm1], 192*4(%[w2])                              \n\t"
205cabdff1aSopenharmony_ci             "madd   %[w_asm],   %[p_asm]                                  \n\t"
206cabdff1aSopenharmony_ci             "msub   $ac1,       %[w2_asm],        %[p_asm]                \n\t"
207cabdff1aSopenharmony_ci             "madd   %[w_asm1],  %[p_asm1]                                 \n\t"
208cabdff1aSopenharmony_ci             "msub   $ac1,       %[w2_asm1],       %[p_asm1]               \n\t"
209cabdff1aSopenharmony_ci             "lw     %[w_asm],   256*4(%[w])                               \n\t"
210cabdff1aSopenharmony_ci             "lw     %[p_asm],   256*4(%[p_temp1])                         \n\t"
211cabdff1aSopenharmony_ci             "lw     %[w2_asm],  256*4(%[w2])                              \n\t"
212cabdff1aSopenharmony_ci             "lw     %[w_asm1],  320*4(%[w])                               \n\t"
213cabdff1aSopenharmony_ci             "lw     %[p_asm1],  320*4(%[p_temp1])                         \n\t"
214cabdff1aSopenharmony_ci             "lw     %[w2_asm1], 320*4(%[w2])                              \n\t"
215cabdff1aSopenharmony_ci             "madd   %[w_asm],   %[p_asm]                                  \n\t"
216cabdff1aSopenharmony_ci             "msub   $ac1,       %[w2_asm],        %[p_asm]                \n\t"
217cabdff1aSopenharmony_ci             "madd   %[w_asm1],  %[p_asm1]                                 \n\t"
218cabdff1aSopenharmony_ci             "msub   $ac1,       %[w2_asm1],       %[p_asm1]               \n\t"
219cabdff1aSopenharmony_ci             "lw     %[w_asm],   384*4(%[w])                               \n\t"
220cabdff1aSopenharmony_ci             "lw     %[p_asm],   384*4(%[p_temp1])                         \n\t"
221cabdff1aSopenharmony_ci             "lw     %[w2_asm],  384*4(%[w2])                              \n\t"
222cabdff1aSopenharmony_ci             "lw     %[w_asm1],  448*4(%[w])                               \n\t"
223cabdff1aSopenharmony_ci             "lw     %[p_asm1],  448*4(%[p_temp1])                         \n\t"
224cabdff1aSopenharmony_ci             "lw     %[w2_asm1], 448*4(%[w2])                              \n\t"
225cabdff1aSopenharmony_ci             "madd   %[w_asm],   %[p_asm]                                  \n\t"
226cabdff1aSopenharmony_ci             "msub   $ac1,       %[w2_asm],        %[p_asm]                \n\t"
227cabdff1aSopenharmony_ci             "madd   %[w_asm1],  %[p_asm1]                                 \n\t"
228cabdff1aSopenharmony_ci             "msub   $ac1,       %[w2_asm1],       %[p_asm1]               \n\t"
229cabdff1aSopenharmony_ci             PTR_ADDIU "%[p_temp2], %[p_temp2],   -4                      \n\t"
230cabdff1aSopenharmony_ci             "lw     %[w_asm],   32*4(%[w])                                \n\t"
231cabdff1aSopenharmony_ci             "lw     %[p_asm],   0(%[p_temp2])                             \n\t"
232cabdff1aSopenharmony_ci             "lw     %[w2_asm],  32*4(%[w2])                               \n\t"
233cabdff1aSopenharmony_ci             "lw     %[w_asm1],  96*4(%[w])                                \n\t"
234cabdff1aSopenharmony_ci             "lw     %[p_asm1],  64*4(%[p_temp2])                          \n\t"
235cabdff1aSopenharmony_ci             "lw     %[w2_asm1], 96*4(%[w2])                               \n\t"
236cabdff1aSopenharmony_ci             "msub   %[w_asm],   %[p_asm]                                  \n\t"
237cabdff1aSopenharmony_ci             "msub   $ac1,       %[w2_asm],        %[p_asm]                \n\t"
238cabdff1aSopenharmony_ci             "msub   %[w_asm1],  %[p_asm1]                                 \n\t"
239cabdff1aSopenharmony_ci             "msub   $ac1,       %[w2_asm1],       %[p_asm1]               \n\t"
240cabdff1aSopenharmony_ci             "lw     %[w_asm],   160*4(%[w])                               \n\t"
241cabdff1aSopenharmony_ci             "lw     %[p_asm],   128*4(%[p_temp2])                         \n\t"
242cabdff1aSopenharmony_ci             "lw     %[w2_asm],  160*4(%[w2])                              \n\t"
243cabdff1aSopenharmony_ci             "lw     %[w_asm1],  224*4(%[w])                               \n\t"
244cabdff1aSopenharmony_ci             "lw     %[p_asm1],  192*4(%[p_temp2])                         \n\t"
245cabdff1aSopenharmony_ci             "lw     %[w2_asm1], 224*4(%[w2])                              \n\t"
246cabdff1aSopenharmony_ci             "msub   %[w_asm],   %[p_asm]                                  \n\t"
247cabdff1aSopenharmony_ci             "msub   $ac1,       %[w2_asm],        %[p_asm]                \n\t"
248cabdff1aSopenharmony_ci             "msub   %[w_asm1],  %[p_asm1]                                 \n\t"
249cabdff1aSopenharmony_ci             "msub   $ac1,       %[w2_asm1],       %[p_asm1]               \n\t"
250cabdff1aSopenharmony_ci             "lw     %[w_asm],   288*4(%[w])                               \n\t"
251cabdff1aSopenharmony_ci             "lw     %[p_asm],   256*4(%[p_temp2])                         \n\t"
252cabdff1aSopenharmony_ci             "lw     %[w2_asm],  288*4(%[w2])                              \n\t"
253cabdff1aSopenharmony_ci             "lw     %[w_asm1],  352*4(%[w])                               \n\t"
254cabdff1aSopenharmony_ci             "lw     %[p_asm1],  320*4(%[p_temp2])                         \n\t"
255cabdff1aSopenharmony_ci             "lw     %[w2_asm1], 352*4(%[w2])                              \n\t"
256cabdff1aSopenharmony_ci             "msub   %[w_asm],   %[p_asm]                                  \n\t"
257cabdff1aSopenharmony_ci             "msub   $ac1,       %[w2_asm],        %[p_asm]                \n\t"
258cabdff1aSopenharmony_ci             "msub   %[w_asm1],  %[p_asm1]                                 \n\t"
259cabdff1aSopenharmony_ci             "msub   $ac1,       %[w2_asm1],       %[p_asm1]               \n\t"
260cabdff1aSopenharmony_ci             "lw     %[w_asm],   416*4(%[w])                               \n\t"
261cabdff1aSopenharmony_ci             "lw     %[p_asm],   384*4(%[p_temp2])                         \n\t"
262cabdff1aSopenharmony_ci             "lw     %[w2_asm],  416*4(%[w2])                              \n\t"
263cabdff1aSopenharmony_ci             "lw     %[w_asm1],  480*4(%[w])                               \n\t"
264cabdff1aSopenharmony_ci             "lw     %[p_asm1],  448*4(%[p_temp2])                         \n\t"
265cabdff1aSopenharmony_ci             "lw     %[w2_asm1], 480*4(%[w2])                              \n\t"
266cabdff1aSopenharmony_ci             "msub   %[w_asm],   %[p_asm]                                  \n\t"
267cabdff1aSopenharmony_ci             "msub   %[w_asm1],  %[p_asm1]                                 \n\t"
268cabdff1aSopenharmony_ci             "msub   $ac1,       %[w2_asm],        %[p_asm]                \n\t"
269cabdff1aSopenharmony_ci             "msub   $ac1,       %[w2_asm1],       %[p_asm1]               \n\t"
270cabdff1aSopenharmony_ci             PTR_ADDIU "%[w],    %[w],             4                       \n\t"
271cabdff1aSopenharmony_ci             PTR_ADDIU "%[w2],   %[w2],            -4                      \n\t"
272cabdff1aSopenharmony_ci             "mflo   %[temp2]                                              \n\t"
273cabdff1aSopenharmony_ci             "extr.w %[sum1],    $ac0,             24                      \n\t"
274cabdff1aSopenharmony_ci             "li     %[temp3],   1                                         \n\t"
275cabdff1aSopenharmony_ci             "and    %[temp1],   %[temp2],         0x00ffffff              \n\t"
276cabdff1aSopenharmony_ci             "madd   $ac1,       %[temp1],         %[temp3]                \n\t"
277cabdff1aSopenharmony_ci             "slt    %[temp2],   %[sum1],          %[min_asm]              \n\t"
278cabdff1aSopenharmony_ci             "movn   %[sum1],    %[min_asm],       %[temp2]                \n\t"
279cabdff1aSopenharmony_ci             "slt    %[temp2],   %[max_asm],       %[sum1]                 \n\t"
280cabdff1aSopenharmony_ci             "movn   %[sum1],    %[max_asm],       %[temp2]                \n\t"
281cabdff1aSopenharmony_ci             "sh     %[sum1],    0(%[samples])                             \n\t"
282cabdff1aSopenharmony_ci             "mflo   %[temp3],   $ac1                                      \n\t"
283cabdff1aSopenharmony_ci             "extr.w %[sum1],    $ac1,             24                      \n\t"
284cabdff1aSopenharmony_ci             "and    %[temp1],   %[temp3],         0x00ffffff              \n\t"
285cabdff1aSopenharmony_ci             "slt    %[temp2],   %[sum1],          %[min_asm]              \n\t"
286cabdff1aSopenharmony_ci             "movn   %[sum1],    %[min_asm],       %[temp2]                \n\t"
287cabdff1aSopenharmony_ci             "slt    %[temp2],   %[max_asm],       %[sum1]                 \n\t"
288cabdff1aSopenharmony_ci             "movn   %[sum1],    %[max_asm],       %[temp2]                \n\t"
289cabdff1aSopenharmony_ci             "sh     %[sum1],    0(%[samples2])                            \n\t"
290cabdff1aSopenharmony_ci
291cabdff1aSopenharmony_ci            : [w_asm] "=&r" (w_asm), [p_asm] "=&r" (p_asm), [w_asm1] "=&r" (w_asm1),
292cabdff1aSopenharmony_ci              [p_asm1] "=&r" (p_asm1), [w2_asm1] "=&r" (w2_asm1),
293cabdff1aSopenharmony_ci              [w2_asm] "=&r" (w2_asm), [temp1] "+r" (temp1), [temp2] "+r" (temp2),
294cabdff1aSopenharmony_ci              [p_temp1] "+r" (p_temp1), [p_temp2] "+r" (p_temp2), [sum1] "+r" (sum1),
295cabdff1aSopenharmony_ci              [w] "+r" (w), [w2] "+r" (w2), [samples] "+r" (samples),
296cabdff1aSopenharmony_ci              [samples2] "+r" (samples2), [temp3] "+r" (temp3)
297cabdff1aSopenharmony_ci            : [min_asm] "r" (min_asm), [max_asm] "r" (max_asm)
298cabdff1aSopenharmony_ci            : "memory", "hi", "lo", "$ac1hi", "$ac1lo"
299cabdff1aSopenharmony_ci        );
300cabdff1aSopenharmony_ci
301cabdff1aSopenharmony_ci        samples += incr;
302cabdff1aSopenharmony_ci        samples2 -= incr;
303cabdff1aSopenharmony_ci    }
304cabdff1aSopenharmony_ci
305cabdff1aSopenharmony_ci    p = synth_buf + 32;
306cabdff1aSopenharmony_ci
307cabdff1aSopenharmony_ci    __asm__ volatile (
308cabdff1aSopenharmony_ci        "mthi   $0                                                        \n\t"
309cabdff1aSopenharmony_ci        "mtlo   %[temp1]                                                  \n\t"
310cabdff1aSopenharmony_ci        "lw     %[w_asm],  32*4(%[w])                                     \n\t"
311cabdff1aSopenharmony_ci        "lw     %[p_asm],  0(%[p])                                        \n\t"
312cabdff1aSopenharmony_ci        "lw     %[w_asm1], 96*4(%[w])                                     \n\t"
313cabdff1aSopenharmony_ci        "lw     %[p_asm1], 64*4(%[p])                                     \n\t"
314cabdff1aSopenharmony_ci        "lw     %[w_asm2], 160*4(%[w])                                    \n\t"
315cabdff1aSopenharmony_ci        "lw     %[p_asm2], 128*4(%[p])                                    \n\t"
316cabdff1aSopenharmony_ci        "msub   %[w_asm],  %[p_asm]                                       \n\t"
317cabdff1aSopenharmony_ci        "msub   %[w_asm1], %[p_asm1]                                      \n\t"
318cabdff1aSopenharmony_ci        "msub   %[w_asm2], %[p_asm2]                                      \n\t"
319cabdff1aSopenharmony_ci        "lw     %[w_asm],  224*4(%[w])                                    \n\t"
320cabdff1aSopenharmony_ci        "lw     %[p_asm],  192*4(%[p])                                    \n\t"
321cabdff1aSopenharmony_ci        "lw     %[w_asm1], 288*4(%[w])                                    \n\t"
322cabdff1aSopenharmony_ci        "lw     %[p_asm1], 256*4(%[p])                                    \n\t"
323cabdff1aSopenharmony_ci        "lw     %[w_asm2], 352*4(%[w])                                    \n\t"
324cabdff1aSopenharmony_ci        "lw     %[p_asm2], 320*4(%[p])                                    \n\t"
325cabdff1aSopenharmony_ci        "msub   %[w_asm],  %[p_asm]                                       \n\t"
326cabdff1aSopenharmony_ci        "msub   %[w_asm1], %[p_asm1]                                      \n\t"
327cabdff1aSopenharmony_ci        "msub   %[w_asm2], %[p_asm2]                                      \n\t"
328cabdff1aSopenharmony_ci        "lw     %[w_asm],  416*4(%[w])                                    \n\t"
329cabdff1aSopenharmony_ci        "lw     %[p_asm],  384*4(%[p])                                    \n\t"
330cabdff1aSopenharmony_ci        "lw     %[w_asm1], 480*4(%[w])                                    \n\t"
331cabdff1aSopenharmony_ci        "lw     %[p_asm1], 448*4(%[p])                                    \n\t"
332cabdff1aSopenharmony_ci        "msub   %[w_asm],  %[p_asm]                                       \n\t"
333cabdff1aSopenharmony_ci        "msub   %[w_asm1], %[p_asm1]                                      \n\t"
334cabdff1aSopenharmony_ci        "extr.w %[sum1],   $ac0,       24                                 \n\t"
335cabdff1aSopenharmony_ci        "mflo   %[temp2]                                                  \n\t"
336cabdff1aSopenharmony_ci        "and    %[temp1],  %[temp2],   0x00ffffff                         \n\t"
337cabdff1aSopenharmony_ci        "slt    %[temp2],  %[sum1],    %[min_asm]                         \n\t"
338cabdff1aSopenharmony_ci        "movn   %[sum1],   %[min_asm], %[temp2]                           \n\t"
339cabdff1aSopenharmony_ci        "slt    %[temp2],  %[max_asm], %[sum1]                            \n\t"
340cabdff1aSopenharmony_ci        "movn   %[sum1],   %[max_asm], %[temp2]                           \n\t"
341cabdff1aSopenharmony_ci        "sh     %[sum1],   0(%[samples])                                  \n\t"
342cabdff1aSopenharmony_ci
343cabdff1aSopenharmony_ci        : [w_asm] "=&r" (w_asm), [p_asm] "=&r" (p_asm), [w_asm1] "=&r" (w_asm1),
344cabdff1aSopenharmony_ci          [p_asm1] "=&r" (p_asm1), [temp1] "+r" (temp1), [temp2] "+r" (temp2),
345cabdff1aSopenharmony_ci          [w_asm2] "=&r" (w_asm2), [p_asm2] "=&r" (p_asm2), [sum1] "+r" (sum1)
346cabdff1aSopenharmony_ci        : [w] "r" (w), [p] "r" (p), [samples] "r" (samples), [min_asm] "r" (min_asm),
347cabdff1aSopenharmony_ci          [max_asm] "r" (max_asm)
348cabdff1aSopenharmony_ci        : "memory", "hi", "lo", "$ac1hi", "$ac1lo"
349cabdff1aSopenharmony_ci     );
350cabdff1aSopenharmony_ci
351cabdff1aSopenharmony_ci    *dither_state= temp1;
352cabdff1aSopenharmony_ci}
353cabdff1aSopenharmony_ci
354cabdff1aSopenharmony_cistatic void imdct36_mips_fixed(int *out, int *buf, int *in, int *win)
355cabdff1aSopenharmony_ci{
356cabdff1aSopenharmony_ci    int j;
357cabdff1aSopenharmony_ci    int t0, t1, t2, t3, s0, s1, s2, s3;
358cabdff1aSopenharmony_ci    int tmp[18], *tmp1, *in1;
359cabdff1aSopenharmony_ci    /* temporary variables */
360cabdff1aSopenharmony_ci    int temp_reg1, temp_reg2, temp_reg3, temp_reg4, temp_reg5, temp_reg6;
361cabdff1aSopenharmony_ci    int t4, t5, t6, t8, t7;
362cabdff1aSopenharmony_ci
363cabdff1aSopenharmony_ci   /* values defined in macros and tables are
364cabdff1aSopenharmony_ci    * eliminated - they are directly loaded in appropriate variables
365cabdff1aSopenharmony_ci    */
366cabdff1aSopenharmony_ci    int const C_1  =  4229717092; /* cos(pi*1/18)*2  */
367cabdff1aSopenharmony_ci    int const C_2  =  4035949074; /* cos(pi*2/18)*2  */
368cabdff1aSopenharmony_ci    int const C_3  =  575416510;  /* -cos(pi*3/18)*2 */
369cabdff1aSopenharmony_ci    int const C_3A =  3719550786; /* cos(pi*3/18)*2  */
370cabdff1aSopenharmony_ci    int const C_4  =  1004831466; /* -cos(pi*4/18)*2 */
371cabdff1aSopenharmony_ci    int const C_5  =  1534215534; /* -cos(pi*5/18)*2 */
372cabdff1aSopenharmony_ci    int const C_7  = -1468965330; /* -cos(pi*7/18)*2 */
373cabdff1aSopenharmony_ci    int const C_8  = -745813244;  /* -cos(pi*8/18)*2 */
374cabdff1aSopenharmony_ci
375cabdff1aSopenharmony_ci   /*
376cabdff1aSopenharmony_ci    * instructions of the first two loops are reorganized and loops are unrolled,
377cabdff1aSopenharmony_ci    * in order to eliminate unnecessary readings and writings in array
378cabdff1aSopenharmony_ci    */
379cabdff1aSopenharmony_ci
380cabdff1aSopenharmony_ci    __asm__ volatile (
381cabdff1aSopenharmony_ci        "lw   %[t1], 17*4(%[in])                                         \n\t"
382cabdff1aSopenharmony_ci        "lw   %[t2], 16*4(%[in])                                         \n\t"
383cabdff1aSopenharmony_ci        "lw   %[t3], 15*4(%[in])                                         \n\t"
384cabdff1aSopenharmony_ci        "lw   %[t4], 14*4(%[in])                                         \n\t"
385cabdff1aSopenharmony_ci        "addu %[t1], %[t1],      %[t2]                                   \n\t"
386cabdff1aSopenharmony_ci        "addu %[t2], %[t2],      %[t3]                                   \n\t"
387cabdff1aSopenharmony_ci        "addu %[t3], %[t3],      %[t4]                                   \n\t"
388cabdff1aSopenharmony_ci        "lw   %[t5], 13*4(%[in])                                         \n\t"
389cabdff1aSopenharmony_ci        "addu %[t1], %[t1],      %[t3]                                   \n\t"
390cabdff1aSopenharmony_ci        "sw   %[t2], 16*4(%[in])                                         \n\t"
391cabdff1aSopenharmony_ci        "lw   %[t6], 12*4(%[in])                                         \n\t"
392cabdff1aSopenharmony_ci        "sw   %[t1], 17*4(%[in])                                         \n\t"
393cabdff1aSopenharmony_ci        "addu %[t4], %[t4],      %[t5]                                   \n\t"
394cabdff1aSopenharmony_ci        "addu %[t5], %[t5],      %[t6]                                   \n\t"
395cabdff1aSopenharmony_ci        "lw   %[t7], 11*4(%[in])                                         \n\t"
396cabdff1aSopenharmony_ci        "addu %[t3], %[t3],      %[t5]                                   \n\t"
397cabdff1aSopenharmony_ci        "sw   %[t4], 14*4(%[in])                                         \n\t"
398cabdff1aSopenharmony_ci        "lw   %[t8], 10*4(%[in])                                         \n\t"
399cabdff1aSopenharmony_ci        "sw   %[t3], 15*4(%[in])                                         \n\t"
400cabdff1aSopenharmony_ci        "addu %[t6], %[t6],      %[t7]                                   \n\t"
401cabdff1aSopenharmony_ci        "addu %[t7], %[t7],      %[t8]                                   \n\t"
402cabdff1aSopenharmony_ci        "sw   %[t6], 12*4(%[in])                                         \n\t"
403cabdff1aSopenharmony_ci        "addu %[t5], %[t5],      %[t7]                                   \n\t"
404cabdff1aSopenharmony_ci        "lw   %[t1], 9*4(%[in])                                          \n\t"
405cabdff1aSopenharmony_ci        "lw   %[t2], 8*4(%[in])                                          \n\t"
406cabdff1aSopenharmony_ci        "sw   %[t5], 13*4(%[in])                                         \n\t"
407cabdff1aSopenharmony_ci        "addu %[t8], %[t8],      %[t1]                                   \n\t"
408cabdff1aSopenharmony_ci        "addu %[t1], %[t1],      %[t2]                                   \n\t"
409cabdff1aSopenharmony_ci        "sw   %[t8], 10*4(%[in])                                         \n\t"
410cabdff1aSopenharmony_ci        "addu %[t7], %[t7],      %[t1]                                   \n\t"
411cabdff1aSopenharmony_ci        "lw   %[t3], 7*4(%[in])                                          \n\t"
412cabdff1aSopenharmony_ci        "lw   %[t4], 6*4(%[in])                                          \n\t"
413cabdff1aSopenharmony_ci        "sw   %[t7], 11*4(%[in])                                         \n\t"
414cabdff1aSopenharmony_ci        "addu %[t2], %[t2],      %[t3]                                   \n\t"
415cabdff1aSopenharmony_ci        "addu %[t3], %[t3],      %[t4]                                   \n\t"
416cabdff1aSopenharmony_ci        "sw   %[t2], 8*4(%[in])                                          \n\t"
417cabdff1aSopenharmony_ci        "addu %[t1], %[t1],      %[t3]                                   \n\t"
418cabdff1aSopenharmony_ci        "lw   %[t5], 5*4(%[in])                                          \n\t"
419cabdff1aSopenharmony_ci        "lw   %[t6], 4*4(%[in])                                          \n\t"
420cabdff1aSopenharmony_ci        "sw   %[t1], 9*4(%[in])                                          \n\t"
421cabdff1aSopenharmony_ci        "addu %[t4], %[t4],      %[t5]                                   \n\t"
422cabdff1aSopenharmony_ci        "addu %[t5], %[t5],      %[t6]                                   \n\t"
423cabdff1aSopenharmony_ci        "sw   %[t4], 6*4(%[in])                                          \n\t"
424cabdff1aSopenharmony_ci        "addu %[t3], %[t3],      %[t5]                                   \n\t"
425cabdff1aSopenharmony_ci        "lw   %[t7], 3*4(%[in])                                          \n\t"
426cabdff1aSopenharmony_ci        "lw   %[t8], 2*4(%[in])                                          \n\t"
427cabdff1aSopenharmony_ci        "sw   %[t3], 7*4(%[in])                                          \n\t"
428cabdff1aSopenharmony_ci        "addu %[t6], %[t6],      %[t7]                                   \n\t"
429cabdff1aSopenharmony_ci        "addu %[t7], %[t7],      %[t8]                                   \n\t"
430cabdff1aSopenharmony_ci        "sw   %[t6], 4*4(%[in])                                          \n\t"
431cabdff1aSopenharmony_ci        "addu %[t5], %[t5],      %[t7]                                   \n\t"
432cabdff1aSopenharmony_ci        "lw   %[t1], 1*4(%[in])                                          \n\t"
433cabdff1aSopenharmony_ci        "lw   %[t2], 0*4(%[in])                                          \n\t"
434cabdff1aSopenharmony_ci        "sw   %[t5], 5*4(%[in])                                          \n\t"
435cabdff1aSopenharmony_ci        "addu %[t8], %[t8],      %[t1]                                   \n\t"
436cabdff1aSopenharmony_ci        "addu %[t1], %[t1],      %[t2]                                   \n\t"
437cabdff1aSopenharmony_ci        "sw   %[t8], 2*4(%[in])                                          \n\t"
438cabdff1aSopenharmony_ci        "addu %[t7], %[t7],      %[t1]                                   \n\t"
439cabdff1aSopenharmony_ci        "sw   %[t7], 3*4(%[in])                                          \n\t"
440cabdff1aSopenharmony_ci        "sw   %[t1], 1*4(%[in])                                          \n\t"
441cabdff1aSopenharmony_ci
442cabdff1aSopenharmony_ci        : [in] "+r" (in), [t1] "=&r" (t1), [t2] "=&r" (t2), [t3] "=&r" (t3),
443cabdff1aSopenharmony_ci          [t4] "=&r" (t4), [t5] "=&r" (t5), [t6] "=&r" (t6),
444cabdff1aSopenharmony_ci          [t7] "=&r" (t7), [t8] "=&r" (t8)
445cabdff1aSopenharmony_ci        :
446cabdff1aSopenharmony_ci        : "memory"
447cabdff1aSopenharmony_ci    );
448cabdff1aSopenharmony_ci
449cabdff1aSopenharmony_ci    for(j = 0; j < 2; j++) {
450cabdff1aSopenharmony_ci
451cabdff1aSopenharmony_ci        tmp1 = tmp + j;
452cabdff1aSopenharmony_ci        in1 = in + j;
453cabdff1aSopenharmony_ci
454cabdff1aSopenharmony_ci         /**
455cabdff1aSopenharmony_ci         *  Original constants are multiplied by two in advanced
456cabdff1aSopenharmony_ci         *  for assembly optimization (e.g. C_2 = 2 * C2).
457cabdff1aSopenharmony_ci         *  That can lead to overflow in operations where they are used.
458cabdff1aSopenharmony_ci         *
459cabdff1aSopenharmony_ci         *  Example of the solution:
460cabdff1aSopenharmony_ci         *
461cabdff1aSopenharmony_ci         *  in original code:
462cabdff1aSopenharmony_ci         *  t0 = ((int64_t)(in1[2*2] + in1[2*4]) * (int64_t)(2*C2))>>32
463cabdff1aSopenharmony_ci         *
464cabdff1aSopenharmony_ci         *  in assembly:
465cabdff1aSopenharmony_ci         *  C_2 = 2 * C2;
466cabdff1aSopenharmony_ci         *   .
467cabdff1aSopenharmony_ci         *   .
468cabdff1aSopenharmony_ci         *  "lw   %[t7],       4*4(%[in1])                               \n\t"
469cabdff1aSopenharmony_ci         *  "lw   %[t8],       8*4(%[in1])                               \n\t"
470cabdff1aSopenharmony_ci         *  "addu %[temp_reg2],%[t7],       %[t8]                        \n\t"
471cabdff1aSopenharmony_ci         *  "multu %[C_2],     %[temp_reg2]                              \n\t"
472cabdff1aSopenharmony_ci         *  "mfhi %[temp_reg1]                                           \n\t"
473cabdff1aSopenharmony_ci         *  "sra  %[temp_reg2],%[temp_reg2],31                           \n\t"
474cabdff1aSopenharmony_ci         *  "move %[t0],       $0                                        \n\t"
475cabdff1aSopenharmony_ci         *  "movn %[t0],       %[C_2],      %[temp_reg2]                 \n\t"
476cabdff1aSopenharmony_ci         *  "sub  %[t0],       %[temp_reg1],%[t0]                        \n\t"
477cabdff1aSopenharmony_ci         */
478cabdff1aSopenharmony_ci
479cabdff1aSopenharmony_ci        __asm__ volatile (
480cabdff1aSopenharmony_ci            "lw    %[t7],        4*4(%[in1])                               \n\t"
481cabdff1aSopenharmony_ci            "lw    %[t8],        8*4(%[in1])                               \n\t"
482cabdff1aSopenharmony_ci            "lw    %[t6],        16*4(%[in1])                              \n\t"
483cabdff1aSopenharmony_ci            "lw    %[t4],        0*4(%[in1])                               \n\t"
484cabdff1aSopenharmony_ci            "addu  %[temp_reg2], %[t7],        %[t8]                       \n\t"
485cabdff1aSopenharmony_ci            "addu  %[t2],        %[t6],        %[t8]                       \n\t"
486cabdff1aSopenharmony_ci            "multu %[C_2],       %[temp_reg2]                              \n\t"
487cabdff1aSopenharmony_ci            "lw    %[t5],        12*4(%[in1])                              \n\t"
488cabdff1aSopenharmony_ci            "sub   %[t2],        %[t2],        %[t7]                       \n\t"
489cabdff1aSopenharmony_ci            "sub   %[t1],        %[t4],        %[t5]                       \n\t"
490cabdff1aSopenharmony_ci            "sra   %[t3],        %[t5],        1                           \n\t"
491cabdff1aSopenharmony_ci            "sra   %[temp_reg1], %[t2],        1                           \n\t"
492cabdff1aSopenharmony_ci            "addu  %[t3],        %[t3],        %[t4]                       \n\t"
493cabdff1aSopenharmony_ci            "sub   %[temp_reg1], %[t1],        %[temp_reg1]                \n\t"
494cabdff1aSopenharmony_ci            "sra   %[temp_reg2], %[temp_reg2], 31                          \n\t"
495cabdff1aSopenharmony_ci            "sw    %[temp_reg1], 6*4(%[tmp1])                              \n\t"
496cabdff1aSopenharmony_ci            "move  %[t0],        $0                                        \n\t"
497cabdff1aSopenharmony_ci            "movn  %[t0],        %[C_2],       %[temp_reg2]                \n\t"
498cabdff1aSopenharmony_ci            "mfhi  %[temp_reg1]                                            \n\t"
499cabdff1aSopenharmony_ci            "addu  %[t1],        %[t1],        %[t2]                       \n\t"
500cabdff1aSopenharmony_ci            "sw    %[t1],        16*4(%[tmp1])                             \n\t"
501cabdff1aSopenharmony_ci            "sub   %[temp_reg4], %[t8],        %[t6]                       \n\t"
502cabdff1aSopenharmony_ci            "add   %[temp_reg2], %[t7],        %[t6]                       \n\t"
503cabdff1aSopenharmony_ci            "mult  $ac1,         %[C_8],       %[temp_reg4]                \n\t"
504cabdff1aSopenharmony_ci            "multu $ac2,         %[C_4],       %[temp_reg2]                \n\t"
505cabdff1aSopenharmony_ci            "sub   %[t0],        %[temp_reg1], %[t0]                       \n\t"
506cabdff1aSopenharmony_ci            "sra   %[temp_reg1], %[temp_reg2], 31                          \n\t"
507cabdff1aSopenharmony_ci            "move  %[t2],        $0                                        \n\t"
508cabdff1aSopenharmony_ci            "movn  %[t2],        %[C_4],       %[temp_reg1]                \n\t"
509cabdff1aSopenharmony_ci            "mfhi  %[t1],        $ac1                                      \n\t"
510cabdff1aSopenharmony_ci            "mfhi  %[temp_reg1], $ac2                                      \n\t"
511cabdff1aSopenharmony_ci            "lw    %[t6],        10*4(%[in1])                              \n\t"
512cabdff1aSopenharmony_ci            "lw    %[t8],        14*4(%[in1])                              \n\t"
513cabdff1aSopenharmony_ci            "lw    %[t7],        2*4(%[in1])                               \n\t"
514cabdff1aSopenharmony_ci            "lw    %[t4],        6*4(%[in1])                               \n\t"
515cabdff1aSopenharmony_ci            "sub   %[temp_reg3], %[t3],        %[t0]                       \n\t"
516cabdff1aSopenharmony_ci            "add   %[temp_reg4], %[t3],        %[t0]                       \n\t"
517cabdff1aSopenharmony_ci            "sub   %[temp_reg1], %[temp_reg1], %[temp_reg2]                \n\t"
518cabdff1aSopenharmony_ci            "add   %[temp_reg4], %[temp_reg4], %[t1]                       \n\t"
519cabdff1aSopenharmony_ci            "sub   %[t2],        %[temp_reg1], %[t2]                       \n\t"
520cabdff1aSopenharmony_ci            "sw    %[temp_reg4], 2*4(%[tmp1])                              \n\t"
521cabdff1aSopenharmony_ci            "sub   %[temp_reg3], %[temp_reg3], %[t2]                       \n\t"
522cabdff1aSopenharmony_ci            "add   %[temp_reg1], %[t3],        %[t2]                       \n\t"
523cabdff1aSopenharmony_ci            "sw    %[temp_reg3], 10*4(%[tmp1])                             \n\t"
524cabdff1aSopenharmony_ci            "sub   %[temp_reg1], %[temp_reg1], %[t1]                       \n\t"
525cabdff1aSopenharmony_ci            "addu  %[temp_reg2], %[t6],        %[t8]                       \n\t"
526cabdff1aSopenharmony_ci            "sw    %[temp_reg1], 14*4(%[tmp1])                             \n\t"
527cabdff1aSopenharmony_ci            "sub   %[temp_reg2], %[temp_reg2], %[t7]                       \n\t"
528cabdff1aSopenharmony_ci            "addu  %[temp_reg3], %[t7],        %[t6]                       \n\t"
529cabdff1aSopenharmony_ci            "multu $ac3,         %[C_3],       %[temp_reg2]                \n\t"
530cabdff1aSopenharmony_ci            "multu %[C_1],       %[temp_reg3]                              \n\t"
531cabdff1aSopenharmony_ci            "sra   %[temp_reg1], %[temp_reg2], 31                          \n\t"
532cabdff1aSopenharmony_ci            "move  %[t1],        $0                                        \n\t"
533cabdff1aSopenharmony_ci            "sra   %[temp_reg3], %[temp_reg3], 31                          \n\t"
534cabdff1aSopenharmony_ci            "movn  %[t1],        %[C_3],       %[temp_reg1]                \n\t"
535cabdff1aSopenharmony_ci            "mfhi  %[temp_reg1], $ac3                                      \n\t"
536cabdff1aSopenharmony_ci            "mfhi  %[temp_reg4]                                            \n\t"
537cabdff1aSopenharmony_ci            "move  %[t2],        $0                                        \n\t"
538cabdff1aSopenharmony_ci            "movn  %[t2],        %[C_1],       %[temp_reg3]                \n\t"
539cabdff1aSopenharmony_ci            "sub   %[temp_reg3], %[t6],        %[t8]                       \n\t"
540cabdff1aSopenharmony_ci            "sub   %[t2],        %[temp_reg4], %[t2]                       \n\t"
541cabdff1aSopenharmony_ci            "multu $ac1,         %[C_7],       %[temp_reg3]                \n\t"
542cabdff1aSopenharmony_ci            "sub   %[temp_reg1], %[temp_reg1], %[temp_reg2]                \n\t"
543cabdff1aSopenharmony_ci            "sra   %[temp_reg4], %[temp_reg3], 31                          \n\t"
544cabdff1aSopenharmony_ci            "sub   %[t1],        %[temp_reg1], %[t1]                       \n\t"
545cabdff1aSopenharmony_ci            "move  %[t3],        $0                                        \n\t"
546cabdff1aSopenharmony_ci            "sw    %[t1],        4*4(%[tmp1])                              \n\t"
547cabdff1aSopenharmony_ci            "movn  %[t3],        %[C_7],       %[temp_reg4]                \n\t"
548cabdff1aSopenharmony_ci            "multu $ac2,         %[C_3A],      %[t4]                       \n\t"
549cabdff1aSopenharmony_ci            "add   %[temp_reg2], %[t7],        %[t8]                       \n\t"
550cabdff1aSopenharmony_ci            "move  %[t1],        $0                                        \n\t"
551cabdff1aSopenharmony_ci            "mfhi  %[temp_reg4], $ac1                                      \n\t"
552cabdff1aSopenharmony_ci            "multu $ac3,%[C_5],  %[temp_reg2]                              \n\t"
553cabdff1aSopenharmony_ci            "move  %[t0],        $0                                        \n\t"
554cabdff1aSopenharmony_ci            "sra   %[temp_reg1], %[temp_reg2], 31                          \n\t"
555cabdff1aSopenharmony_ci            "movn  %[t1],%[C_5], %[temp_reg1]                              \n\t"
556cabdff1aSopenharmony_ci            "sub   %[temp_reg4], %[temp_reg4], %[temp_reg3]                \n\t"
557cabdff1aSopenharmony_ci            "mfhi  %[temp_reg1], $ac3                                      \n\t"
558cabdff1aSopenharmony_ci            "sra   %[temp_reg3], %[t4],        31                          \n\t"
559cabdff1aSopenharmony_ci            "movn  %[t0],        %[C_3A],      %[temp_reg3]                \n\t"
560cabdff1aSopenharmony_ci            "mfhi  %[temp_reg3], $ac2                                      \n\t"
561cabdff1aSopenharmony_ci            "sub   %[t3],        %[temp_reg4], %[t3]                       \n\t"
562cabdff1aSopenharmony_ci            "add   %[temp_reg4], %[t3],        %[t2]                       \n\t"
563cabdff1aSopenharmony_ci            "sub   %[temp_reg1], %[temp_reg1], %[temp_reg2]                \n\t"
564cabdff1aSopenharmony_ci            "sub   %[t1],        %[temp_reg1], %[t1]                       \n\t"
565cabdff1aSopenharmony_ci            "sub   %[t0],        %[temp_reg3], %[t0]                       \n\t"
566cabdff1aSopenharmony_ci            "add   %[temp_reg1], %[t2],        %[t1]                       \n\t"
567cabdff1aSopenharmony_ci            "add   %[temp_reg4], %[temp_reg4], %[t0]                       \n\t"
568cabdff1aSopenharmony_ci            "sub   %[temp_reg2], %[t3],        %[t1]                       \n\t"
569cabdff1aSopenharmony_ci            "sw    %[temp_reg4], 0*4(%[tmp1])                              \n\t"
570cabdff1aSopenharmony_ci            "sub   %[temp_reg1], %[temp_reg1], %[t0]                       \n\t"
571cabdff1aSopenharmony_ci            "sub   %[temp_reg2], %[temp_reg2], %[t0]                       \n\t"
572cabdff1aSopenharmony_ci            "sw    %[temp_reg1], 12*4(%[tmp1])                             \n\t"
573cabdff1aSopenharmony_ci            "sw    %[temp_reg2], 8*4(%[tmp1])                              \n\t"
574cabdff1aSopenharmony_ci
575cabdff1aSopenharmony_ci            : [t7] "=&r" (t7), [temp_reg1] "=&r" (temp_reg1),
576cabdff1aSopenharmony_ci              [temp_reg2] "=&r" (temp_reg2), [temp_reg4] "=&r" (temp_reg4),
577cabdff1aSopenharmony_ci              [temp_reg3] "=&r" (temp_reg3), [t8] "=&r" (t8), [t0] "=&r" (t0),
578cabdff1aSopenharmony_ci              [t4] "=&r" (t4), [t5] "=&r" (t5), [t6] "=&r"(t6), [t2] "=&r" (t2),
579cabdff1aSopenharmony_ci              [t3] "=&r" (t3), [t1] "=&r" (t1)
580cabdff1aSopenharmony_ci            : [C_2] "r" (C_2), [in1] "r" (in1), [tmp1] "r" (tmp1), [C_8] "r" (C_8),
581cabdff1aSopenharmony_ci              [C_4] "r" (C_4), [C_3] "r" (C_3), [C_1] "r" (C_1), [C_7] "r" (C_7),
582cabdff1aSopenharmony_ci              [C_3A] "r" (C_3A), [C_5] "r" (C_5)
583cabdff1aSopenharmony_ci            : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi", "$ac2lo",
584cabdff1aSopenharmony_ci              "$ac3hi", "$ac3lo"
585cabdff1aSopenharmony_ci         );
586cabdff1aSopenharmony_ci    }
587cabdff1aSopenharmony_ci
588cabdff1aSopenharmony_ci    /**
589cabdff1aSopenharmony_ci    * loop is unrolled four times
590cabdff1aSopenharmony_ci    *
591cabdff1aSopenharmony_ci    * values defined in tables(icos36[] and icos36h[]) are not loaded from
592cabdff1aSopenharmony_ci    * these tables - they are directly loaded in appropriate registers
593cabdff1aSopenharmony_ci    *
594cabdff1aSopenharmony_ci    */
595cabdff1aSopenharmony_ci
596cabdff1aSopenharmony_ci    __asm__ volatile (
597cabdff1aSopenharmony_ci        "lw     %[t2],        1*4(%[tmp])                                  \n\t"
598cabdff1aSopenharmony_ci        "lw     %[t3],        3*4(%[tmp])                                  \n\t"
599cabdff1aSopenharmony_ci        "lw     %[t0],        0*4(%[tmp])                                  \n\t"
600cabdff1aSopenharmony_ci        "lw     %[t1],        2*4(%[tmp])                                  \n\t"
601cabdff1aSopenharmony_ci        "addu   %[temp_reg1], %[t3],        %[t2]                          \n\t"
602cabdff1aSopenharmony_ci        "li     %[temp_reg2], 0x807D2B1E                                   \n\t"
603cabdff1aSopenharmony_ci        "move   %[s1],        $0                                           \n\t"
604cabdff1aSopenharmony_ci        "multu  %[temp_reg2], %[temp_reg1]                                 \n\t"
605cabdff1aSopenharmony_ci        "sra    %[temp_reg1], %[temp_reg1], 31                             \n\t"
606cabdff1aSopenharmony_ci        "movn   %[s1],        %[temp_reg2], %[temp_reg1]                   \n\t"
607cabdff1aSopenharmony_ci        "sub    %[temp_reg3], %[t3],        %[t2]                          \n\t"
608cabdff1aSopenharmony_ci        "li     %[temp_reg4], 0x2de5151                                    \n\t"
609cabdff1aSopenharmony_ci        "mfhi   %[temp_reg2]                                               \n\t"
610cabdff1aSopenharmony_ci        "addu   %[s0],        %[t1],        %[t0]                          \n\t"
611cabdff1aSopenharmony_ci        "lw     %[temp_reg5], 9*4(%[win])                                  \n\t"
612cabdff1aSopenharmony_ci        "mult   $ac1,         %[temp_reg4], %[temp_reg3]                   \n\t"
613cabdff1aSopenharmony_ci        "lw     %[temp_reg6], 4*9*4(%[buf])                                \n\t"
614cabdff1aSopenharmony_ci        "sub    %[s2],        %[t1],        %[t0]                          \n\t"
615cabdff1aSopenharmony_ci        "lw     %[temp_reg3], 29*4(%[win])                                 \n\t"
616cabdff1aSopenharmony_ci        "subu   %[s1],        %[temp_reg2], %[s1]                          \n\t"
617cabdff1aSopenharmony_ci        "lw     %[temp_reg4], 28*4(%[win])                                 \n\t"
618cabdff1aSopenharmony_ci        "add    %[t0],        %[s0],        %[s1]                          \n\t"
619cabdff1aSopenharmony_ci        "extr.w %[s3],        $ac1,23                                      \n\t"
620cabdff1aSopenharmony_ci        "mult   $ac2,         %[t0],        %[temp_reg3]                   \n\t"
621cabdff1aSopenharmony_ci        "sub    %[t1],        %[s0],        %[s1]                          \n\t"
622cabdff1aSopenharmony_ci        "lw     %[temp_reg1], 4*8*4(%[buf])                                \n\t"
623cabdff1aSopenharmony_ci        "mult   %[t1],        %[temp_reg5]                                 \n\t"
624cabdff1aSopenharmony_ci        "lw     %[temp_reg2], 8*4(%[win])                                  \n\t"
625cabdff1aSopenharmony_ci        "mfhi   %[temp_reg3], $ac2                                         \n\t"
626cabdff1aSopenharmony_ci        "mult   $ac3,         %[t0],        %[temp_reg4]                   \n\t"
627cabdff1aSopenharmony_ci        "add    %[t0],        %[s2],        %[s3]                          \n\t"
628cabdff1aSopenharmony_ci        "mfhi   %[temp_reg5]                                               \n\t"
629cabdff1aSopenharmony_ci        "mult   $ac1,         %[t1],        %[temp_reg2]                   \n\t"
630cabdff1aSopenharmony_ci        "sub    %[t1],        %[s2],        %[s3]                          \n\t"
631cabdff1aSopenharmony_ci        "sw     %[temp_reg3], 4*9*4(%[buf])                                \n\t"
632cabdff1aSopenharmony_ci        "mfhi   %[temp_reg4], $ac3                                         \n\t"
633cabdff1aSopenharmony_ci        "lw     %[temp_reg3], 37*4(%[win])                                 \n\t"
634cabdff1aSopenharmony_ci        "mfhi   %[temp_reg2], $ac1                                         \n\t"
635cabdff1aSopenharmony_ci        "add    %[temp_reg5], %[temp_reg5], %[temp_reg6]                   \n\t"
636cabdff1aSopenharmony_ci        "lw     %[temp_reg6], 17*4(%[win])                                 \n\t"
637cabdff1aSopenharmony_ci        "sw     %[temp_reg5], 32*9*4(%[out])                               \n\t"
638cabdff1aSopenharmony_ci        "sw     %[temp_reg4], 4*8*4(%[buf])                                \n\t"
639cabdff1aSopenharmony_ci        "mult   %[t1],        %[temp_reg6]                                 \n\t"
640cabdff1aSopenharmony_ci        "add    %[temp_reg1], %[temp_reg1], %[temp_reg2]                   \n\t"
641cabdff1aSopenharmony_ci        "lw     %[temp_reg2], 0*4(%[win])                                  \n\t"
642cabdff1aSopenharmony_ci        "lw     %[temp_reg5], 4*17*4(%[buf])                               \n\t"
643cabdff1aSopenharmony_ci        "sw     %[temp_reg1], 8*32*4(%[out])                               \n\t"
644cabdff1aSopenharmony_ci        "mfhi   %[temp_reg6]                                               \n\t"
645cabdff1aSopenharmony_ci        "mult   $ac1,         %[t1],        %[temp_reg2]                   \n\t"
646cabdff1aSopenharmony_ci        "lw     %[temp_reg4], 20*4(%[win])                                 \n\t"
647cabdff1aSopenharmony_ci        "lw     %[temp_reg1], 0(%[buf])                                    \n\t"
648cabdff1aSopenharmony_ci        "mult   $ac2,         %[t0],        %[temp_reg3]                   \n\t"
649cabdff1aSopenharmony_ci        "mult   %[t0],        %[temp_reg4]                                 \n\t"
650cabdff1aSopenharmony_ci        "mfhi   %[temp_reg2], $ac1                                         \n\t"
651cabdff1aSopenharmony_ci        "lw     %[t0],        4*4(%[tmp])                                  \n\t"
652cabdff1aSopenharmony_ci        "add    %[temp_reg5], %[temp_reg5], %[temp_reg6]                   \n\t"
653cabdff1aSopenharmony_ci        "mfhi   %[temp_reg3], $ac2                                         \n\t"
654cabdff1aSopenharmony_ci        "mfhi   %[temp_reg4]                                               \n\t"
655cabdff1aSopenharmony_ci        "sw     %[temp_reg5], 17*32*4(%[out])                              \n\t"
656cabdff1aSopenharmony_ci        "lw     %[t1],        6*4(%[tmp])                                  \n\t"
657cabdff1aSopenharmony_ci        "add    %[temp_reg1], %[temp_reg1], %[temp_reg2]                   \n\t"
658cabdff1aSopenharmony_ci        "lw     %[t2],        5*4(%[tmp])                                  \n\t"
659cabdff1aSopenharmony_ci        "sw     %[temp_reg1], 0*32*4(%[out])                               \n\t"
660cabdff1aSopenharmony_ci        "addu   %[s0],        %[t1],        %[t0]                          \n\t"
661cabdff1aSopenharmony_ci        "sw     %[temp_reg3], 4*17*4(%[buf])                               \n\t"
662cabdff1aSopenharmony_ci        "lw     %[t3],        7*4(%[tmp])                                  \n\t"
663cabdff1aSopenharmony_ci        "sub    %[s2],        %[t1],        %[t0]                          \n\t"
664cabdff1aSopenharmony_ci        "sw     %[temp_reg4], 0(%[buf])                                    \n\t"
665cabdff1aSopenharmony_ci        "addu   %[temp_reg5], %[t3],        %[t2]                          \n\t"
666cabdff1aSopenharmony_ci        "li     %[temp_reg6], 0x8483EE0C                                   \n\t"
667cabdff1aSopenharmony_ci        "move   %[s1],        $0                                           \n\t"
668cabdff1aSopenharmony_ci        "multu  %[temp_reg6], %[temp_reg5]                                 \n\t"
669cabdff1aSopenharmony_ci        "sub    %[temp_reg1], %[t3],        %[t2]                          \n\t"
670cabdff1aSopenharmony_ci        "li     %[temp_reg2], 0xf746ea                                     \n\t"
671cabdff1aSopenharmony_ci        "sra    %[temp_reg5], %[temp_reg5], 31                             \n\t"
672cabdff1aSopenharmony_ci        "mult   $ac1,         %[temp_reg2], %[temp_reg1]                   \n\t"
673cabdff1aSopenharmony_ci        "movn   %[s1],        %[temp_reg6], %[temp_reg5]                   \n\t"
674cabdff1aSopenharmony_ci        "mfhi   %[temp_reg5]                                               \n\t"
675cabdff1aSopenharmony_ci        "lw     %[temp_reg3], 10*4(%[win])                                 \n\t"
676cabdff1aSopenharmony_ci        "lw     %[temp_reg4], 4*10*4(%[buf])                               \n\t"
677cabdff1aSopenharmony_ci        "extr.w %[s3],        $ac1,         23                             \n\t"
678cabdff1aSopenharmony_ci        "lw     %[temp_reg1], 4*7*4(%[buf])                                \n\t"
679cabdff1aSopenharmony_ci        "lw     %[temp_reg2], 7*4(%[win])                                  \n\t"
680cabdff1aSopenharmony_ci        "lw     %[temp_reg6], 30*4(%[win])                                 \n\t"
681cabdff1aSopenharmony_ci        "subu   %[s1],        %[temp_reg5], %[s1]                          \n\t"
682cabdff1aSopenharmony_ci        "sub    %[t1],        %[s0],        %[s1]                          \n\t"
683cabdff1aSopenharmony_ci        "add    %[t0],        %[s0],        %[s1]                          \n\t"
684cabdff1aSopenharmony_ci        "mult   $ac2,         %[t1],        %[temp_reg3]                   \n\t"
685cabdff1aSopenharmony_ci        "mult   $ac3,         %[t1],        %[temp_reg2]                   \n\t"
686cabdff1aSopenharmony_ci        "mult   %[t0],        %[temp_reg6]                                 \n\t"
687cabdff1aSopenharmony_ci        "lw     %[temp_reg5], 27*4(%[win])                                 \n\t"
688cabdff1aSopenharmony_ci        "mult   $ac1,         %[t0],        %[temp_reg5]                   \n\t"
689cabdff1aSopenharmony_ci        "mfhi   %[temp_reg3], $ac2                                         \n\t"
690cabdff1aSopenharmony_ci        "mfhi   %[temp_reg2], $ac3                                         \n\t"
691cabdff1aSopenharmony_ci        "mfhi   %[temp_reg6]                                               \n\t"
692cabdff1aSopenharmony_ci        "add    %[t0],        %[s2],        %[s3]                          \n\t"
693cabdff1aSopenharmony_ci        "sub    %[t1],        %[s2],        %[s3]                          \n\t"
694cabdff1aSopenharmony_ci        "add    %[temp_reg3], %[temp_reg3], %[temp_reg4]                   \n\t"
695cabdff1aSopenharmony_ci        "lw     %[temp_reg4], 16*4(%[win])                                 \n\t"
696cabdff1aSopenharmony_ci        "mfhi   %[temp_reg5], $ac1                                         \n\t"
697cabdff1aSopenharmony_ci        "sw     %[temp_reg3], 32*10*4(%[out])                              \n\t"
698cabdff1aSopenharmony_ci        "add    %[temp_reg1], %[temp_reg1], %[temp_reg2]                   \n\t"
699cabdff1aSopenharmony_ci        "lw     %[temp_reg3], 4*16*4(%[buf])                               \n\t"
700cabdff1aSopenharmony_ci        "sw     %[temp_reg6], 4*10*4(%[buf])                               \n\t"
701cabdff1aSopenharmony_ci        "sw     %[temp_reg1], 7*32*4(%[out])                               \n\t"
702cabdff1aSopenharmony_ci        "mult   $ac2,         %[t1],        %[temp_reg4]                   \n\t"
703cabdff1aSopenharmony_ci        "sw     %[temp_reg5], 4*7*4(%[buf])                                \n\t"
704cabdff1aSopenharmony_ci        "lw     %[temp_reg6], 1*4(%[win])                                  \n\t"
705cabdff1aSopenharmony_ci        "lw     %[temp_reg5], 4*1*4(%[buf])                                \n\t"
706cabdff1aSopenharmony_ci        "lw     %[temp_reg1], 36*4(%[win])                                 \n\t"
707cabdff1aSopenharmony_ci        "mult   $ac3,         %[t1],        %[temp_reg6]                   \n\t"
708cabdff1aSopenharmony_ci        "lw     %[temp_reg2], 21*4(%[win])                                 \n\t"
709cabdff1aSopenharmony_ci        "mfhi   %[temp_reg4], $ac2                                         \n\t"
710cabdff1aSopenharmony_ci        "mult   %[t0],        %[temp_reg1]                                 \n\t"
711cabdff1aSopenharmony_ci        "mult   $ac1,         %[t0],%[temp_reg2]                           \n\t"
712cabdff1aSopenharmony_ci        "lw     %[t0],        8*4(%[tmp])                                  \n\t"
713cabdff1aSopenharmony_ci        "mfhi   %[temp_reg6], $ac3                                         \n\t"
714cabdff1aSopenharmony_ci        "lw     %[t1],        10*4(%[tmp])                                 \n\t"
715cabdff1aSopenharmony_ci        "lw     %[t3],        11*4(%[tmp])                                 \n\t"
716cabdff1aSopenharmony_ci        "mfhi   %[temp_reg1]                                               \n\t"
717cabdff1aSopenharmony_ci        "add    %[temp_reg3], %[temp_reg3], %[temp_reg4]                   \n\t"
718cabdff1aSopenharmony_ci        "lw     %[t2],        9*4(%[tmp])                                  \n\t"
719cabdff1aSopenharmony_ci        "mfhi   %[temp_reg2], $ac1                                         \n\t"
720cabdff1aSopenharmony_ci        "add    %[temp_reg5], %[temp_reg5], %[temp_reg6]                   \n\t"
721cabdff1aSopenharmony_ci        "sw     %[temp_reg3], 16*32*4(%[out])                              \n\t"
722cabdff1aSopenharmony_ci        "sw     %[temp_reg5], 1*32*4(%[out])                               \n\t"
723cabdff1aSopenharmony_ci        "sw     %[temp_reg1], 4*16*4(%[buf])                               \n\t"
724cabdff1aSopenharmony_ci        "addu   %[temp_reg3], %[t3],        %[t2]                          \n\t"
725cabdff1aSopenharmony_ci        "li     %[temp_reg4], 0x8D3B7CD6                                   \n\t"
726cabdff1aSopenharmony_ci        "sw     %[temp_reg2], 4*1*4(%[buf])                                \n\t"
727cabdff1aSopenharmony_ci        "multu  %[temp_reg4],%[temp_reg3]                                  \n\t"
728cabdff1aSopenharmony_ci        "sra    %[temp_reg3], %[temp_reg3], 31                             \n\t"
729cabdff1aSopenharmony_ci        "move   %[s1],        $0                                           \n\t"
730cabdff1aSopenharmony_ci        "movn   %[s1],        %[temp_reg4], %[temp_reg3]                   \n\t"
731cabdff1aSopenharmony_ci        "addu   %[s0],        %[t1],        %[t0]                          \n\t"
732cabdff1aSopenharmony_ci        "mfhi   %[temp_reg3]                                               \n\t"
733cabdff1aSopenharmony_ci        "sub    %[s2],        %[t1],        %[t0]                          \n\t"
734cabdff1aSopenharmony_ci        "sub    %[temp_reg5], %[t3],        %[t2]                          \n\t"
735cabdff1aSopenharmony_ci        "li     %[temp_reg6], 0x976fd9                                     \n\t"
736cabdff1aSopenharmony_ci        "lw     %[temp_reg2], 11*4(%[win])                                 \n\t"
737cabdff1aSopenharmony_ci        "lw     %[temp_reg1], 4*11*4(%[buf])                               \n\t"
738cabdff1aSopenharmony_ci        "mult   $ac1,         %[temp_reg6], %[temp_reg5]                   \n\t"
739cabdff1aSopenharmony_ci        "subu   %[s1],        %[temp_reg3], %[s1]                          \n\t"
740cabdff1aSopenharmony_ci        "lw     %[temp_reg5], 31*4(%[win])                                 \n\t"
741cabdff1aSopenharmony_ci        "sub    %[t1],        %[s0],        %[s1]                          \n\t"
742cabdff1aSopenharmony_ci        "add    %[t0],        %[s0],        %[s1]                          \n\t"
743cabdff1aSopenharmony_ci        "mult   $ac2,         %[t1],        %[temp_reg2]                   \n\t"
744cabdff1aSopenharmony_ci        "mult   %[t0],        %[temp_reg5]                                 \n\t"
745cabdff1aSopenharmony_ci        "lw     %[temp_reg4], 6*4(%[win])                                  \n\t"
746cabdff1aSopenharmony_ci        "extr.w %[s3],        $ac1,         23                             \n\t"
747cabdff1aSopenharmony_ci        "lw     %[temp_reg3], 4*6*4(%[buf])                                \n\t"
748cabdff1aSopenharmony_ci        "mfhi   %[temp_reg2], $ac2                                         \n\t"
749cabdff1aSopenharmony_ci        "lw     %[temp_reg6], 26*4(%[win])                                 \n\t"
750cabdff1aSopenharmony_ci        "mfhi   %[temp_reg5]                                               \n\t"
751cabdff1aSopenharmony_ci        "mult   $ac3,         %[t1],        %[temp_reg4]                   \n\t"
752cabdff1aSopenharmony_ci        "mult   $ac1,         %[t0],        %[temp_reg6]                   \n\t"
753cabdff1aSopenharmony_ci        "add    %[t0],        %[s2],        %[s3]                          \n\t"
754cabdff1aSopenharmony_ci        "sub    %[t1],        %[s2],        %[s3]                          \n\t"
755cabdff1aSopenharmony_ci        "add    %[temp_reg2], %[temp_reg2], %[temp_reg1]                   \n\t"
756cabdff1aSopenharmony_ci        "mfhi   %[temp_reg4], $ac3                                         \n\t"
757cabdff1aSopenharmony_ci        "mfhi   %[temp_reg6], $ac1                                         \n\t"
758cabdff1aSopenharmony_ci        "sw     %[temp_reg5], 4*11*4(%[buf])                               \n\t"
759cabdff1aSopenharmony_ci        "sw     %[temp_reg2], 32*11*4(%[out])                              \n\t"
760cabdff1aSopenharmony_ci        "lw     %[temp_reg1], 4*15*4(%[buf])                               \n\t"
761cabdff1aSopenharmony_ci        "add    %[temp_reg3], %[temp_reg3], %[temp_reg4]                   \n\t"
762cabdff1aSopenharmony_ci        "lw     %[temp_reg2], 15*4(%[win])                                 \n\t"
763cabdff1aSopenharmony_ci        "sw     %[temp_reg3], 6*32*4(%[out])                               \n\t"
764cabdff1aSopenharmony_ci        "sw     %[temp_reg6], 4*6*4(%[buf])                                \n\t"
765cabdff1aSopenharmony_ci        "mult   %[t1],        %[temp_reg2]                                 \n\t"
766cabdff1aSopenharmony_ci        "lw     %[temp_reg3], 2*4(%[win])                                  \n\t"
767cabdff1aSopenharmony_ci        "lw     %[temp_reg4], 4*2*4(%[buf])                                \n\t"
768cabdff1aSopenharmony_ci        "lw     %[temp_reg5], 35*4(%[win])                                 \n\t"
769cabdff1aSopenharmony_ci        "mult   $ac1,         %[t1],        %[temp_reg3]                   \n\t"
770cabdff1aSopenharmony_ci        "mfhi   %[temp_reg2]                                               \n\t"
771cabdff1aSopenharmony_ci        "lw     %[temp_reg6], 22*4(%[win])                                 \n\t"
772cabdff1aSopenharmony_ci        "mult   $ac2,         %[t0],        %[temp_reg5]                   \n\t"
773cabdff1aSopenharmony_ci        "lw     %[t1],        14*4(%[tmp])                                 \n\t"
774cabdff1aSopenharmony_ci        "mult   $ac3,         %[t0],        %[temp_reg6]                   \n\t"
775cabdff1aSopenharmony_ci        "lw     %[t0],        12*4(%[tmp])                                 \n\t"
776cabdff1aSopenharmony_ci        "mfhi   %[temp_reg3], $ac1                                         \n\t"
777cabdff1aSopenharmony_ci        "add    %[temp_reg1], %[temp_reg1], %[temp_reg2]                   \n\t"
778cabdff1aSopenharmony_ci        "mfhi   %[temp_reg5], $ac2                                         \n\t"
779cabdff1aSopenharmony_ci        "sw     %[temp_reg1], 15*32*4(%[out])                              \n\t"
780cabdff1aSopenharmony_ci        "mfhi   %[temp_reg6], $ac3                                         \n\t"
781cabdff1aSopenharmony_ci        "lw     %[t2],        13*4(%[tmp])                                 \n\t"
782cabdff1aSopenharmony_ci        "lw     %[t3],        15*4(%[tmp])                                 \n\t"
783cabdff1aSopenharmony_ci        "add    %[temp_reg4], %[temp_reg4], %[temp_reg3]                   \n\t"
784cabdff1aSopenharmony_ci        "sw     %[temp_reg5], 4*15*4(%[buf])                               \n\t"
785cabdff1aSopenharmony_ci        "addu   %[temp_reg1], %[t3],        %[t2]                          \n\t"
786cabdff1aSopenharmony_ci        "li     %[temp_reg2], 0x9C42577C                                   \n\t"
787cabdff1aSopenharmony_ci        "move   %[s1],        $0                                           \n\t"
788cabdff1aSopenharmony_ci        "multu  %[temp_reg2], %[temp_reg1]                                 \n\t"
789cabdff1aSopenharmony_ci        "sw     %[temp_reg4], 2*32*4(%[out])                               \n\t"
790cabdff1aSopenharmony_ci        "sra    %[temp_reg1], %[temp_reg1], 31                             \n\t"
791cabdff1aSopenharmony_ci        "movn   %[s1],        %[temp_reg2], %[temp_reg1]                   \n\t"
792cabdff1aSopenharmony_ci        "sub    %[temp_reg3], %[t3],        %[t2]                          \n\t"
793cabdff1aSopenharmony_ci        "li     %[temp_reg4], 0x6f94a2                                     \n\t"
794cabdff1aSopenharmony_ci        "mfhi   %[temp_reg1]                                               \n\t"
795cabdff1aSopenharmony_ci        "addu   %[s0],        %[t1],        %[t0]                          \n\t"
796cabdff1aSopenharmony_ci        "sw     %[temp_reg6], 4*2*4(%[buf])                                \n\t"
797cabdff1aSopenharmony_ci        "mult   $ac1,         %[temp_reg4], %[temp_reg3]                   \n\t"
798cabdff1aSopenharmony_ci        "sub    %[s2],        %[t1],        %[t0]                          \n\t"
799cabdff1aSopenharmony_ci        "lw     %[temp_reg5], 12*4(%[win])                                 \n\t"
800cabdff1aSopenharmony_ci        "lw     %[temp_reg6], 4*12*4(%[buf])                               \n\t"
801cabdff1aSopenharmony_ci        "subu   %[s1],        %[temp_reg1], %[s1]                          \n\t"
802cabdff1aSopenharmony_ci        "sub    %[t1],        %[s0],        %[s1]                          \n\t"
803cabdff1aSopenharmony_ci        "lw     %[temp_reg3], 32*4(%[win])                                 \n\t"
804cabdff1aSopenharmony_ci        "mult   $ac2,         %[t1],        %[temp_reg5]                   \n\t"
805cabdff1aSopenharmony_ci        "add    %[t0],        %[s0],        %[s1]                          \n\t"
806cabdff1aSopenharmony_ci        "extr.w %[s3],        $ac1,         23                             \n\t"
807cabdff1aSopenharmony_ci        "lw     %[temp_reg2], 5*4(%[win])                                  \n\t"
808cabdff1aSopenharmony_ci        "mult   %[t0],        %[temp_reg3]                                 \n\t"
809cabdff1aSopenharmony_ci        "mfhi   %[temp_reg5], $ac2                                         \n\t"
810cabdff1aSopenharmony_ci        "lw     %[temp_reg4], 25*4(%[win])                                 \n\t"
811cabdff1aSopenharmony_ci        "lw     %[temp_reg1], 4*5*4(%[buf])                                \n\t"
812cabdff1aSopenharmony_ci        "mult   $ac3,         %[t1],        %[temp_reg2]                   \n\t"
813cabdff1aSopenharmony_ci        "mult   $ac1,         %[t0],        %[temp_reg4]                   \n\t"
814cabdff1aSopenharmony_ci        "mfhi   %[temp_reg3]                                               \n\t"
815cabdff1aSopenharmony_ci        "add    %[t0],        %[s2],        %[s3]                          \n\t"
816cabdff1aSopenharmony_ci        "add    %[temp_reg5], %[temp_reg5], %[temp_reg6]                   \n\t"
817cabdff1aSopenharmony_ci        "mfhi   %[temp_reg2], $ac3                                         \n\t"
818cabdff1aSopenharmony_ci        "mfhi   %[temp_reg4], $ac1                                         \n\t"
819cabdff1aSopenharmony_ci        "sub    %[t1],        %[s2],        %[s3]                          \n\t"
820cabdff1aSopenharmony_ci        "sw     %[temp_reg5], 32*12*4(%[out])                              \n\t"
821cabdff1aSopenharmony_ci        "sw     %[temp_reg3], 4*12*4(%[buf])                               \n\t"
822cabdff1aSopenharmony_ci        "lw     %[temp_reg6], 14*4(%[win])                                 \n\t"
823cabdff1aSopenharmony_ci        "lw     %[temp_reg5], 4*14*4(%[buf])                               \n\t"
824cabdff1aSopenharmony_ci        "add    %[temp_reg1], %[temp_reg1], %[temp_reg2]                   \n\t"
825cabdff1aSopenharmony_ci        "sw     %[temp_reg4], 4*5*4(%[buf])                                \n\t"
826cabdff1aSopenharmony_ci        "sw     %[temp_reg1], 5*32*4(%[out])                               \n\t"
827cabdff1aSopenharmony_ci        "mult   %[t1],        %[temp_reg6]                                 \n\t"
828cabdff1aSopenharmony_ci        "lw     %[temp_reg4], 34*4(%[win])                                 \n\t"
829cabdff1aSopenharmony_ci        "lw     %[temp_reg2], 3*4(%[win])                                  \n\t"
830cabdff1aSopenharmony_ci        "lw     %[temp_reg1], 4*3*4(%[buf])                                \n\t"
831cabdff1aSopenharmony_ci        "mult   $ac2,         %[t0],        %[temp_reg4]                   \n\t"
832cabdff1aSopenharmony_ci        "mfhi   %[temp_reg6]                                               \n\t"
833cabdff1aSopenharmony_ci        "mult   $ac1,         %[t1],        %[temp_reg2]                   \n\t"
834cabdff1aSopenharmony_ci        "lw     %[temp_reg3], 23*4(%[win])                                 \n\t"
835cabdff1aSopenharmony_ci        "lw     %[s0],        16*4(%[tmp])                                 \n\t"
836cabdff1aSopenharmony_ci        "mfhi   %[temp_reg4], $ac2                                         \n\t"
837cabdff1aSopenharmony_ci        "lw     %[t1],        17*4(%[tmp])                                 \n\t"
838cabdff1aSopenharmony_ci        "mult   $ac3,         %[t0],        %[temp_reg3]                   \n\t"
839cabdff1aSopenharmony_ci        "move   %[s1],        $0                                           \n\t"
840cabdff1aSopenharmony_ci        "add    %[temp_reg5], %[temp_reg5], %[temp_reg6]                   \n\t"
841cabdff1aSopenharmony_ci        "mfhi   %[temp_reg2], $ac1                                         \n\t"
842cabdff1aSopenharmony_ci        "sw     %[temp_reg5], 14*32*4(%[out])                              \n\t"
843cabdff1aSopenharmony_ci        "sw     %[temp_reg4], 4*14*4(%[buf])                               \n\t"
844cabdff1aSopenharmony_ci        "mfhi   %[temp_reg3], $ac3                                         \n\t"
845cabdff1aSopenharmony_ci        "li     %[temp_reg5], 0xB504F334                                   \n\t"
846cabdff1aSopenharmony_ci        "add    %[temp_reg1], %[temp_reg1], %[temp_reg2]                   \n\t"
847cabdff1aSopenharmony_ci        "multu  %[temp_reg5], %[t1]                                        \n\t"
848cabdff1aSopenharmony_ci        "lw     %[temp_reg2], 4*13*4(%[buf])                               \n\t"
849cabdff1aSopenharmony_ci        "sw     %[temp_reg1], 3*32*4(%[out])                               \n\t"
850cabdff1aSopenharmony_ci        "sra    %[t1],        %[t1],        31                             \n\t"
851cabdff1aSopenharmony_ci        "mfhi   %[temp_reg6]                                               \n\t"
852cabdff1aSopenharmony_ci        "movn   %[s1],        %[temp_reg5], %[t1]                          \n\t"
853cabdff1aSopenharmony_ci        "sw     %[temp_reg3], 4*3*4(%[buf])                                \n\t"
854cabdff1aSopenharmony_ci        "lw     %[temp_reg1], 13*4(%[win])                                 \n\t"
855cabdff1aSopenharmony_ci        "lw     %[temp_reg4], 4*4*4(%[buf])                                \n\t"
856cabdff1aSopenharmony_ci        "lw     %[temp_reg3], 4*4(%[win])                                  \n\t"
857cabdff1aSopenharmony_ci        "lw     %[temp_reg5], 33*4(%[win])                                 \n\t"
858cabdff1aSopenharmony_ci        "subu   %[s1],        %[temp_reg6], %[s1]                          \n\t"
859cabdff1aSopenharmony_ci        "lw     %[temp_reg6], 24*4(%[win])                                 \n\t"
860cabdff1aSopenharmony_ci        "sub    %[t1],        %[s0],        %[s1]                          \n\t"
861cabdff1aSopenharmony_ci        "add    %[t0],        %[s0],        %[s1]                          \n\t"
862cabdff1aSopenharmony_ci        "mult   $ac1,         %[t1],        %[temp_reg1]                   \n\t"
863cabdff1aSopenharmony_ci        "mult   $ac2,         %[t1],        %[temp_reg3]                   \n\t"
864cabdff1aSopenharmony_ci        "mult   $ac3,         %[t0],        %[temp_reg5]                   \n\t"
865cabdff1aSopenharmony_ci        "mult   %[t0],        %[temp_reg6]                                 \n\t"
866cabdff1aSopenharmony_ci        "mfhi   %[temp_reg1], $ac1                                         \n\t"
867cabdff1aSopenharmony_ci        "mfhi   %[temp_reg3], $ac2                                         \n\t"
868cabdff1aSopenharmony_ci        "mfhi   %[temp_reg5], $ac3                                         \n\t"
869cabdff1aSopenharmony_ci        "mfhi   %[temp_reg6]                                               \n\t"
870cabdff1aSopenharmony_ci        "add    %[temp_reg2], %[temp_reg2], %[temp_reg1]                   \n\t"
871cabdff1aSopenharmony_ci        "add    %[temp_reg4], %[temp_reg4], %[temp_reg3]                   \n\t"
872cabdff1aSopenharmony_ci        "sw     %[temp_reg2], 13*32*4(%[out])                              \n\t"
873cabdff1aSopenharmony_ci        "sw     %[temp_reg4], 4*32*4(%[out])                               \n\t"
874cabdff1aSopenharmony_ci        "sw     %[temp_reg5], 4*13*4(%[buf])                               \n\t"
875cabdff1aSopenharmony_ci        "sw     %[temp_reg6], 4*4*4(%[buf])                                \n\t"
876cabdff1aSopenharmony_ci
877cabdff1aSopenharmony_ci        : [t0] "=&r" (t0), [t1] "=&r" (t1), [t2] "=&r" (t2), [t3] "=&r" (t3),
878cabdff1aSopenharmony_ci          [s0] "=&r" (s0), [s2] "=&r" (s2), [temp_reg1] "=&r" (temp_reg1),
879cabdff1aSopenharmony_ci          [temp_reg2] "=&r" (temp_reg2), [s1] "=&r" (s1), [s3] "=&r" (s3),
880cabdff1aSopenharmony_ci          [temp_reg3] "=&r" (temp_reg3), [temp_reg4] "=&r" (temp_reg4),
881cabdff1aSopenharmony_ci          [temp_reg5] "=&r" (temp_reg5), [temp_reg6] "=&r" (temp_reg6),
882cabdff1aSopenharmony_ci          [out] "+r" (out)
883cabdff1aSopenharmony_ci        : [tmp] "r" (tmp), [win] "r" (win), [buf] "r" (buf)
884cabdff1aSopenharmony_ci        : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi", "$ac2lo",
885cabdff1aSopenharmony_ci          "$ac3hi", "$ac3lo"
886cabdff1aSopenharmony_ci    );
887cabdff1aSopenharmony_ci}
888cabdff1aSopenharmony_ci
889cabdff1aSopenharmony_cistatic void ff_imdct36_blocks_mips_fixed(int *out, int *buf, int *in,
890cabdff1aSopenharmony_ci                               int count, int switch_point, int block_type)
891cabdff1aSopenharmony_ci{
892cabdff1aSopenharmony_ci    int j;
893cabdff1aSopenharmony_ci    for (j=0 ; j < count; j++) {
894cabdff1aSopenharmony_ci        /* apply window & overlap with previous buffer */
895cabdff1aSopenharmony_ci
896cabdff1aSopenharmony_ci        /* select window */
897cabdff1aSopenharmony_ci        int win_idx = (switch_point && j < 2) ? 0 : block_type;
898cabdff1aSopenharmony_ci        int *win = ff_mdct_win_fixed[win_idx + (4 & -(j & 1))];
899cabdff1aSopenharmony_ci
900cabdff1aSopenharmony_ci        imdct36_mips_fixed(out, buf, in, win);
901cabdff1aSopenharmony_ci
902cabdff1aSopenharmony_ci        in  += 18;
903cabdff1aSopenharmony_ci        buf += ((j&3) != 3 ? 1 : (72-3));
904cabdff1aSopenharmony_ci        out++;
905cabdff1aSopenharmony_ci    }
906cabdff1aSopenharmony_ci}
907cabdff1aSopenharmony_ci
908cabdff1aSopenharmony_ci#endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
909cabdff1aSopenharmony_ci#endif /* HAVE_INLINE_ASM */
910cabdff1aSopenharmony_ci
911cabdff1aSopenharmony_civoid ff_mpadsp_init_mipsdsp(MPADSPContext *s)
912cabdff1aSopenharmony_ci{
913cabdff1aSopenharmony_ci#if HAVE_INLINE_ASM
914cabdff1aSopenharmony_ci#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
915cabdff1aSopenharmony_ci    s->apply_window_fixed   = ff_mpadsp_apply_window_mips_fixed;
916cabdff1aSopenharmony_ci    s->imdct36_blocks_fixed = ff_imdct36_blocks_mips_fixed;
917cabdff1aSopenharmony_ci#endif
918cabdff1aSopenharmony_ci#endif
919cabdff1aSopenharmony_ci}
920