1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * Copyright (c) 2012
3cabdff1aSopenharmony_ci *      MIPS Technologies, Inc., California.
4cabdff1aSopenharmony_ci *
5cabdff1aSopenharmony_ci * Redistribution and use in source and binary forms, with or without
6cabdff1aSopenharmony_ci * modification, are permitted provided that the following conditions
7cabdff1aSopenharmony_ci * are met:
8cabdff1aSopenharmony_ci * 1. Redistributions of source code must retain the above copyright
9cabdff1aSopenharmony_ci *    notice, this list of conditions and the following disclaimer.
10cabdff1aSopenharmony_ci * 2. Redistributions in binary form must reproduce the above copyright
11cabdff1aSopenharmony_ci *    notice, this list of conditions and the following disclaimer in the
12cabdff1aSopenharmony_ci *    documentation and/or other materials provided with the distribution.
13cabdff1aSopenharmony_ci * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
14cabdff1aSopenharmony_ci *    contributors may be used to endorse or promote products derived from
15cabdff1aSopenharmony_ci *    this software without specific prior written permission.
16cabdff1aSopenharmony_ci *
17cabdff1aSopenharmony_ci * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
18cabdff1aSopenharmony_ci * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19cabdff1aSopenharmony_ci * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20cabdff1aSopenharmony_ci * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
21cabdff1aSopenharmony_ci * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22cabdff1aSopenharmony_ci * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23cabdff1aSopenharmony_ci * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24cabdff1aSopenharmony_ci * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25cabdff1aSopenharmony_ci * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26cabdff1aSopenharmony_ci * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27cabdff1aSopenharmony_ci * SUCH DAMAGE.
28cabdff1aSopenharmony_ci *
29cabdff1aSopenharmony_ci * Author:  Stanislav Ocovaj (socovaj@mips.com)
30cabdff1aSopenharmony_ci *          Szabolcs Pal     (sabolc@mips.com)
31cabdff1aSopenharmony_ci *
32cabdff1aSopenharmony_ci * AAC coefficients encoder optimized for MIPS floating-point architecture
33cabdff1aSopenharmony_ci *
34cabdff1aSopenharmony_ci * This file is part of FFmpeg.
35cabdff1aSopenharmony_ci *
36cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
37cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
38cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
39cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
40cabdff1aSopenharmony_ci *
41cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
42cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
43cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
44cabdff1aSopenharmony_ci * Lesser General Public License for more details.
45cabdff1aSopenharmony_ci *
46cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
47cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
48cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
49cabdff1aSopenharmony_ci */
50cabdff1aSopenharmony_ci
51cabdff1aSopenharmony_ci/**
52cabdff1aSopenharmony_ci * @file
53cabdff1aSopenharmony_ci * Reference: libavcodec/aaccoder.c
54cabdff1aSopenharmony_ci */
55cabdff1aSopenharmony_ci
56cabdff1aSopenharmony_ci#include "libavutil/libm.h"
57cabdff1aSopenharmony_ci
58cabdff1aSopenharmony_ci#include <float.h>
59cabdff1aSopenharmony_ci#include "libavutil/mathematics.h"
60cabdff1aSopenharmony_ci#include "libavcodec/avcodec.h"
61cabdff1aSopenharmony_ci#include "libavcodec/put_bits.h"
62cabdff1aSopenharmony_ci#include "libavcodec/aac.h"
63cabdff1aSopenharmony_ci#include "libavcodec/aacenc.h"
64cabdff1aSopenharmony_ci#include "libavcodec/aactab.h"
65cabdff1aSopenharmony_ci#include "libavcodec/aacenctab.h"
66cabdff1aSopenharmony_ci#include "libavcodec/aacenc_utils.h"
67cabdff1aSopenharmony_ci
68cabdff1aSopenharmony_ci#if HAVE_INLINE_ASM
69cabdff1aSopenharmony_ci#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
70cabdff1aSopenharmony_citypedef struct BandCodingPath {
71cabdff1aSopenharmony_ci    int prev_idx;
72cabdff1aSopenharmony_ci    float cost;
73cabdff1aSopenharmony_ci    int run;
74cabdff1aSopenharmony_ci} BandCodingPath;
75cabdff1aSopenharmony_ci
76cabdff1aSopenharmony_cistatic const uint8_t uquad_sign_bits[81] = {
77cabdff1aSopenharmony_ci    0, 1, 1, 1, 2, 2, 1, 2, 2,
78cabdff1aSopenharmony_ci    1, 2, 2, 2, 3, 3, 2, 3, 3,
79cabdff1aSopenharmony_ci    1, 2, 2, 2, 3, 3, 2, 3, 3,
80cabdff1aSopenharmony_ci    1, 2, 2, 2, 3, 3, 2, 3, 3,
81cabdff1aSopenharmony_ci    2, 3, 3, 3, 4, 4, 3, 4, 4,
82cabdff1aSopenharmony_ci    2, 3, 3, 3, 4, 4, 3, 4, 4,
83cabdff1aSopenharmony_ci    1, 2, 2, 2, 3, 3, 2, 3, 3,
84cabdff1aSopenharmony_ci    2, 3, 3, 3, 4, 4, 3, 4, 4,
85cabdff1aSopenharmony_ci    2, 3, 3, 3, 4, 4, 3, 4, 4
86cabdff1aSopenharmony_ci};
87cabdff1aSopenharmony_ci
88cabdff1aSopenharmony_cistatic const uint8_t upair7_sign_bits[64] = {
89cabdff1aSopenharmony_ci    0, 1, 1, 1, 1, 1, 1, 1,
90cabdff1aSopenharmony_ci    1, 2, 2, 2, 2, 2, 2, 2,
91cabdff1aSopenharmony_ci    1, 2, 2, 2, 2, 2, 2, 2,
92cabdff1aSopenharmony_ci    1, 2, 2, 2, 2, 2, 2, 2,
93cabdff1aSopenharmony_ci    1, 2, 2, 2, 2, 2, 2, 2,
94cabdff1aSopenharmony_ci    1, 2, 2, 2, 2, 2, 2, 2,
95cabdff1aSopenharmony_ci    1, 2, 2, 2, 2, 2, 2, 2,
96cabdff1aSopenharmony_ci    1, 2, 2, 2, 2, 2, 2, 2,
97cabdff1aSopenharmony_ci};
98cabdff1aSopenharmony_ci
99cabdff1aSopenharmony_cistatic const uint8_t upair12_sign_bits[169] = {
100cabdff1aSopenharmony_ci    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
101cabdff1aSopenharmony_ci    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
102cabdff1aSopenharmony_ci    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
103cabdff1aSopenharmony_ci    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
104cabdff1aSopenharmony_ci    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
105cabdff1aSopenharmony_ci    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
106cabdff1aSopenharmony_ci    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
107cabdff1aSopenharmony_ci    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
108cabdff1aSopenharmony_ci    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
109cabdff1aSopenharmony_ci    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
110cabdff1aSopenharmony_ci    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
111cabdff1aSopenharmony_ci    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
112cabdff1aSopenharmony_ci    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
113cabdff1aSopenharmony_ci};
114cabdff1aSopenharmony_ci
115cabdff1aSopenharmony_cistatic const uint8_t esc_sign_bits[289] = {
116cabdff1aSopenharmony_ci    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
117cabdff1aSopenharmony_ci    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
118cabdff1aSopenharmony_ci    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
119cabdff1aSopenharmony_ci    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
120cabdff1aSopenharmony_ci    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
121cabdff1aSopenharmony_ci    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
122cabdff1aSopenharmony_ci    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
123cabdff1aSopenharmony_ci    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
124cabdff1aSopenharmony_ci    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
125cabdff1aSopenharmony_ci    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
126cabdff1aSopenharmony_ci    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
127cabdff1aSopenharmony_ci    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
128cabdff1aSopenharmony_ci    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
129cabdff1aSopenharmony_ci    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
130cabdff1aSopenharmony_ci    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
131cabdff1aSopenharmony_ci    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
132cabdff1aSopenharmony_ci    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
133cabdff1aSopenharmony_ci};
134cabdff1aSopenharmony_ci
135cabdff1aSopenharmony_ci/**
136cabdff1aSopenharmony_ci * Functions developed from template function and optimized for quantizing and encoding band
137cabdff1aSopenharmony_ci */
138cabdff1aSopenharmony_cistatic void quantize_and_encode_band_cost_SQUAD_mips(struct AACEncContext *s,
139cabdff1aSopenharmony_ci                                                     PutBitContext *pb, const float *in, float *out,
140cabdff1aSopenharmony_ci                                                     const float *scaled, int size, int scale_idx,
141cabdff1aSopenharmony_ci                                                     int cb, const float lambda, const float uplim,
142cabdff1aSopenharmony_ci                                                     int *bits, float *energy, const float ROUNDING)
143cabdff1aSopenharmony_ci{
144cabdff1aSopenharmony_ci    const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
145cabdff1aSopenharmony_ci    const float IQ  = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
146cabdff1aSopenharmony_ci    int i;
147cabdff1aSopenharmony_ci    int qc1, qc2, qc3, qc4;
148cabdff1aSopenharmony_ci    float qenergy = 0.0f;
149cabdff1aSopenharmony_ci
150cabdff1aSopenharmony_ci    uint8_t  *p_bits  = (uint8_t  *)ff_aac_spectral_bits[cb-1];
151cabdff1aSopenharmony_ci    uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
152cabdff1aSopenharmony_ci    float    *p_vec   = (float    *)ff_aac_codebook_vectors[cb-1];
153cabdff1aSopenharmony_ci
154cabdff1aSopenharmony_ci    abs_pow34_v(s->scoefs, in, size);
155cabdff1aSopenharmony_ci    scaled = s->scoefs;
156cabdff1aSopenharmony_ci    for (i = 0; i < size; i += 4) {
157cabdff1aSopenharmony_ci        int curidx;
158cabdff1aSopenharmony_ci        int *in_int = (int *)&in[i];
159cabdff1aSopenharmony_ci        int t0, t1, t2, t3, t4, t5, t6, t7;
160cabdff1aSopenharmony_ci        const float *vec;
161cabdff1aSopenharmony_ci
162cabdff1aSopenharmony_ci        qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;
163cabdff1aSopenharmony_ci        qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
164cabdff1aSopenharmony_ci        qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
165cabdff1aSopenharmony_ci        qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
166cabdff1aSopenharmony_ci
167cabdff1aSopenharmony_ci        __asm__ volatile (
168cabdff1aSopenharmony_ci            ".set push                      \n\t"
169cabdff1aSopenharmony_ci            ".set noreorder                 \n\t"
170cabdff1aSopenharmony_ci
171cabdff1aSopenharmony_ci            "slt    %[qc1], $zero,  %[qc1]  \n\t"
172cabdff1aSopenharmony_ci            "slt    %[qc2], $zero,  %[qc2]  \n\t"
173cabdff1aSopenharmony_ci            "slt    %[qc3], $zero,  %[qc3]  \n\t"
174cabdff1aSopenharmony_ci            "slt    %[qc4], $zero,  %[qc4]  \n\t"
175cabdff1aSopenharmony_ci            "lw     %[t0],  0(%[in_int])    \n\t"
176cabdff1aSopenharmony_ci            "lw     %[t1],  4(%[in_int])    \n\t"
177cabdff1aSopenharmony_ci            "lw     %[t2],  8(%[in_int])    \n\t"
178cabdff1aSopenharmony_ci            "lw     %[t3],  12(%[in_int])   \n\t"
179cabdff1aSopenharmony_ci            "srl    %[t0],  %[t0],  31      \n\t"
180cabdff1aSopenharmony_ci            "srl    %[t1],  %[t1],  31      \n\t"
181cabdff1aSopenharmony_ci            "srl    %[t2],  %[t2],  31      \n\t"
182cabdff1aSopenharmony_ci            "srl    %[t3],  %[t3],  31      \n\t"
183cabdff1aSopenharmony_ci            "subu   %[t4],  $zero,  %[qc1]  \n\t"
184cabdff1aSopenharmony_ci            "subu   %[t5],  $zero,  %[qc2]  \n\t"
185cabdff1aSopenharmony_ci            "subu   %[t6],  $zero,  %[qc3]  \n\t"
186cabdff1aSopenharmony_ci            "subu   %[t7],  $zero,  %[qc4]  \n\t"
187cabdff1aSopenharmony_ci            "movn   %[qc1], %[t4],  %[t0]   \n\t"
188cabdff1aSopenharmony_ci            "movn   %[qc2], %[t5],  %[t1]   \n\t"
189cabdff1aSopenharmony_ci            "movn   %[qc3], %[t6],  %[t2]   \n\t"
190cabdff1aSopenharmony_ci            "movn   %[qc4], %[t7],  %[t3]   \n\t"
191cabdff1aSopenharmony_ci
192cabdff1aSopenharmony_ci            ".set pop                       \n\t"
193cabdff1aSopenharmony_ci
194cabdff1aSopenharmony_ci            : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
195cabdff1aSopenharmony_ci              [qc3]"+r"(qc3), [qc4]"+r"(qc4),
196cabdff1aSopenharmony_ci              [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
197cabdff1aSopenharmony_ci              [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
198cabdff1aSopenharmony_ci            : [in_int]"r"(in_int)
199cabdff1aSopenharmony_ci            : "memory"
200cabdff1aSopenharmony_ci        );
201cabdff1aSopenharmony_ci
202cabdff1aSopenharmony_ci        curidx = qc1;
203cabdff1aSopenharmony_ci        curidx *= 3;
204cabdff1aSopenharmony_ci        curidx += qc2;
205cabdff1aSopenharmony_ci        curidx *= 3;
206cabdff1aSopenharmony_ci        curidx += qc3;
207cabdff1aSopenharmony_ci        curidx *= 3;
208cabdff1aSopenharmony_ci        curidx += qc4;
209cabdff1aSopenharmony_ci        curidx += 40;
210cabdff1aSopenharmony_ci
211cabdff1aSopenharmony_ci        put_bits(pb, p_bits[curidx], p_codes[curidx]);
212cabdff1aSopenharmony_ci
213cabdff1aSopenharmony_ci        if (out || energy) {
214cabdff1aSopenharmony_ci            float e1,e2,e3,e4;
215cabdff1aSopenharmony_ci            vec = &p_vec[curidx*4];
216cabdff1aSopenharmony_ci            e1 = vec[0] * IQ;
217cabdff1aSopenharmony_ci            e2 = vec[1] * IQ;
218cabdff1aSopenharmony_ci            e3 = vec[2] * IQ;
219cabdff1aSopenharmony_ci            e4 = vec[3] * IQ;
220cabdff1aSopenharmony_ci            if (out) {
221cabdff1aSopenharmony_ci                out[i+0] = e1;
222cabdff1aSopenharmony_ci                out[i+1] = e2;
223cabdff1aSopenharmony_ci                out[i+2] = e3;
224cabdff1aSopenharmony_ci                out[i+3] = e4;
225cabdff1aSopenharmony_ci            }
226cabdff1aSopenharmony_ci            if (energy)
227cabdff1aSopenharmony_ci                qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
228cabdff1aSopenharmony_ci        }
229cabdff1aSopenharmony_ci    }
230cabdff1aSopenharmony_ci    if (energy)
231cabdff1aSopenharmony_ci        *energy = qenergy;
232cabdff1aSopenharmony_ci}
233cabdff1aSopenharmony_ci
234cabdff1aSopenharmony_cistatic void quantize_and_encode_band_cost_UQUAD_mips(struct AACEncContext *s,
235cabdff1aSopenharmony_ci                                                     PutBitContext *pb, const float *in, float *out,
236cabdff1aSopenharmony_ci                                                     const float *scaled, int size, int scale_idx,
237cabdff1aSopenharmony_ci                                                     int cb, const float lambda, const float uplim,
238cabdff1aSopenharmony_ci                                                     int *bits, float *energy, const float ROUNDING)
239cabdff1aSopenharmony_ci{
240cabdff1aSopenharmony_ci    const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
241cabdff1aSopenharmony_ci    const float IQ  = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
242cabdff1aSopenharmony_ci    int i;
243cabdff1aSopenharmony_ci    int qc1, qc2, qc3, qc4;
244cabdff1aSopenharmony_ci    float qenergy = 0.0f;
245cabdff1aSopenharmony_ci
246cabdff1aSopenharmony_ci    uint8_t  *p_bits  = (uint8_t  *)ff_aac_spectral_bits[cb-1];
247cabdff1aSopenharmony_ci    uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
248cabdff1aSopenharmony_ci    float    *p_vec   = (float    *)ff_aac_codebook_vectors[cb-1];
249cabdff1aSopenharmony_ci
250cabdff1aSopenharmony_ci    abs_pow34_v(s->scoefs, in, size);
251cabdff1aSopenharmony_ci    scaled = s->scoefs;
252cabdff1aSopenharmony_ci    for (i = 0; i < size; i += 4) {
253cabdff1aSopenharmony_ci        int curidx, sign, count;
254cabdff1aSopenharmony_ci        int *in_int = (int *)&in[i];
255cabdff1aSopenharmony_ci        uint8_t v_bits;
256cabdff1aSopenharmony_ci        unsigned int v_codes;
257cabdff1aSopenharmony_ci        int t0, t1, t2, t3, t4;
258cabdff1aSopenharmony_ci        const float *vec;
259cabdff1aSopenharmony_ci
260cabdff1aSopenharmony_ci        qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;
261cabdff1aSopenharmony_ci        qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
262cabdff1aSopenharmony_ci        qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
263cabdff1aSopenharmony_ci        qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
264cabdff1aSopenharmony_ci
265cabdff1aSopenharmony_ci        __asm__ volatile (
266cabdff1aSopenharmony_ci            ".set push                              \n\t"
267cabdff1aSopenharmony_ci            ".set noreorder                         \n\t"
268cabdff1aSopenharmony_ci
269cabdff1aSopenharmony_ci            "ori    %[t4],      $zero,      2       \n\t"
270cabdff1aSopenharmony_ci            "ori    %[sign],    $zero,      0       \n\t"
271cabdff1aSopenharmony_ci            "slt    %[t0],      %[t4],      %[qc1]  \n\t"
272cabdff1aSopenharmony_ci            "slt    %[t1],      %[t4],      %[qc2]  \n\t"
273cabdff1aSopenharmony_ci            "slt    %[t2],      %[t4],      %[qc3]  \n\t"
274cabdff1aSopenharmony_ci            "slt    %[t3],      %[t4],      %[qc4]  \n\t"
275cabdff1aSopenharmony_ci            "movn   %[qc1],     %[t4],      %[t0]   \n\t"
276cabdff1aSopenharmony_ci            "movn   %[qc2],     %[t4],      %[t1]   \n\t"
277cabdff1aSopenharmony_ci            "movn   %[qc3],     %[t4],      %[t2]   \n\t"
278cabdff1aSopenharmony_ci            "movn   %[qc4],     %[t4],      %[t3]   \n\t"
279cabdff1aSopenharmony_ci            "lw     %[t0],      0(%[in_int])        \n\t"
280cabdff1aSopenharmony_ci            "lw     %[t1],      4(%[in_int])        \n\t"
281cabdff1aSopenharmony_ci            "lw     %[t2],      8(%[in_int])        \n\t"
282cabdff1aSopenharmony_ci            "lw     %[t3],      12(%[in_int])       \n\t"
283cabdff1aSopenharmony_ci            "slt    %[t0],      %[t0],      $zero   \n\t"
284cabdff1aSopenharmony_ci            "movn   %[sign],    %[t0],      %[qc1]  \n\t"
285cabdff1aSopenharmony_ci            "slt    %[t1],      %[t1],      $zero   \n\t"
286cabdff1aSopenharmony_ci            "slt    %[t2],      %[t2],      $zero   \n\t"
287cabdff1aSopenharmony_ci            "slt    %[t3],      %[t3],      $zero   \n\t"
288cabdff1aSopenharmony_ci            "sll    %[t0],      %[sign],    1       \n\t"
289cabdff1aSopenharmony_ci            "or     %[t0],      %[t0],      %[t1]   \n\t"
290cabdff1aSopenharmony_ci            "movn   %[sign],    %[t0],      %[qc2]  \n\t"
291cabdff1aSopenharmony_ci            "slt    %[t4],      $zero,      %[qc1]  \n\t"
292cabdff1aSopenharmony_ci            "slt    %[t1],      $zero,      %[qc2]  \n\t"
293cabdff1aSopenharmony_ci            "slt    %[count],   $zero,      %[qc3]  \n\t"
294cabdff1aSopenharmony_ci            "sll    %[t0],      %[sign],    1       \n\t"
295cabdff1aSopenharmony_ci            "or     %[t0],      %[t0],      %[t2]   \n\t"
296cabdff1aSopenharmony_ci            "movn   %[sign],    %[t0],      %[qc3]  \n\t"
297cabdff1aSopenharmony_ci            "slt    %[t2],      $zero,      %[qc4]  \n\t"
298cabdff1aSopenharmony_ci            "addu   %[count],   %[count],   %[t4]   \n\t"
299cabdff1aSopenharmony_ci            "addu   %[count],   %[count],   %[t1]   \n\t"
300cabdff1aSopenharmony_ci            "sll    %[t0],      %[sign],    1       \n\t"
301cabdff1aSopenharmony_ci            "or     %[t0],      %[t0],      %[t3]   \n\t"
302cabdff1aSopenharmony_ci            "movn   %[sign],    %[t0],      %[qc4]  \n\t"
303cabdff1aSopenharmony_ci            "addu   %[count],   %[count],   %[t2]   \n\t"
304cabdff1aSopenharmony_ci
305cabdff1aSopenharmony_ci            ".set pop                               \n\t"
306cabdff1aSopenharmony_ci
307cabdff1aSopenharmony_ci            : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
308cabdff1aSopenharmony_ci              [qc3]"+r"(qc3), [qc4]"+r"(qc4),
309cabdff1aSopenharmony_ci              [sign]"=&r"(sign), [count]"=&r"(count),
310cabdff1aSopenharmony_ci              [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
311cabdff1aSopenharmony_ci              [t4]"=&r"(t4)
312cabdff1aSopenharmony_ci            : [in_int]"r"(in_int)
313cabdff1aSopenharmony_ci            : "memory"
314cabdff1aSopenharmony_ci        );
315cabdff1aSopenharmony_ci
316cabdff1aSopenharmony_ci        curidx = qc1;
317cabdff1aSopenharmony_ci        curidx *= 3;
318cabdff1aSopenharmony_ci        curidx += qc2;
319cabdff1aSopenharmony_ci        curidx *= 3;
320cabdff1aSopenharmony_ci        curidx += qc3;
321cabdff1aSopenharmony_ci        curidx *= 3;
322cabdff1aSopenharmony_ci        curidx += qc4;
323cabdff1aSopenharmony_ci
324cabdff1aSopenharmony_ci        v_codes = (p_codes[curidx] << count) | (sign & ((1 << count) - 1));
325cabdff1aSopenharmony_ci        v_bits  = p_bits[curidx] + count;
326cabdff1aSopenharmony_ci        put_bits(pb, v_bits, v_codes);
327cabdff1aSopenharmony_ci
328cabdff1aSopenharmony_ci        if (out || energy) {
329cabdff1aSopenharmony_ci            float e1,e2,e3,e4;
330cabdff1aSopenharmony_ci            vec = &p_vec[curidx*4];
331cabdff1aSopenharmony_ci            e1 = copysignf(vec[0] * IQ, in[i+0]);
332cabdff1aSopenharmony_ci            e2 = copysignf(vec[1] * IQ, in[i+1]);
333cabdff1aSopenharmony_ci            e3 = copysignf(vec[2] * IQ, in[i+2]);
334cabdff1aSopenharmony_ci            e4 = copysignf(vec[3] * IQ, in[i+3]);
335cabdff1aSopenharmony_ci            if (out) {
336cabdff1aSopenharmony_ci                out[i+0] = e1;
337cabdff1aSopenharmony_ci                out[i+1] = e2;
338cabdff1aSopenharmony_ci                out[i+2] = e3;
339cabdff1aSopenharmony_ci                out[i+3] = e4;
340cabdff1aSopenharmony_ci            }
341cabdff1aSopenharmony_ci            if (energy)
342cabdff1aSopenharmony_ci                qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
343cabdff1aSopenharmony_ci        }
344cabdff1aSopenharmony_ci    }
345cabdff1aSopenharmony_ci    if (energy)
346cabdff1aSopenharmony_ci        *energy = qenergy;
347cabdff1aSopenharmony_ci}
348cabdff1aSopenharmony_ci
349cabdff1aSopenharmony_cistatic void quantize_and_encode_band_cost_SPAIR_mips(struct AACEncContext *s,
350cabdff1aSopenharmony_ci                                                     PutBitContext *pb, const float *in, float *out,
351cabdff1aSopenharmony_ci                                                     const float *scaled, int size, int scale_idx,
352cabdff1aSopenharmony_ci                                                     int cb, const float lambda, const float uplim,
353cabdff1aSopenharmony_ci                                                     int *bits, float *energy, const float ROUNDING)
354cabdff1aSopenharmony_ci{
355cabdff1aSopenharmony_ci    const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
356cabdff1aSopenharmony_ci    const float IQ  = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
357cabdff1aSopenharmony_ci    int i;
358cabdff1aSopenharmony_ci    int qc1, qc2, qc3, qc4;
359cabdff1aSopenharmony_ci    float qenergy = 0.0f;
360cabdff1aSopenharmony_ci
361cabdff1aSopenharmony_ci    uint8_t  *p_bits  = (uint8_t  *)ff_aac_spectral_bits[cb-1];
362cabdff1aSopenharmony_ci    uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
363cabdff1aSopenharmony_ci    float    *p_vec   = (float    *)ff_aac_codebook_vectors[cb-1];
364cabdff1aSopenharmony_ci
365cabdff1aSopenharmony_ci    abs_pow34_v(s->scoefs, in, size);
366cabdff1aSopenharmony_ci    scaled = s->scoefs;
367cabdff1aSopenharmony_ci    for (i = 0; i < size; i += 4) {
368cabdff1aSopenharmony_ci        int curidx, curidx2;
369cabdff1aSopenharmony_ci        int *in_int = (int *)&in[i];
370cabdff1aSopenharmony_ci        uint8_t v_bits;
371cabdff1aSopenharmony_ci        unsigned int v_codes;
372cabdff1aSopenharmony_ci        int t0, t1, t2, t3, t4, t5, t6, t7;
373cabdff1aSopenharmony_ci        const float *vec1, *vec2;
374cabdff1aSopenharmony_ci
375cabdff1aSopenharmony_ci        qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;
376cabdff1aSopenharmony_ci        qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
377cabdff1aSopenharmony_ci        qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
378cabdff1aSopenharmony_ci        qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
379cabdff1aSopenharmony_ci
380cabdff1aSopenharmony_ci        __asm__ volatile (
381cabdff1aSopenharmony_ci            ".set push                      \n\t"
382cabdff1aSopenharmony_ci            ".set noreorder                 \n\t"
383cabdff1aSopenharmony_ci
384cabdff1aSopenharmony_ci            "ori    %[t4],  $zero,  4       \n\t"
385cabdff1aSopenharmony_ci            "slt    %[t0],  %[t4],  %[qc1]  \n\t"
386cabdff1aSopenharmony_ci            "slt    %[t1],  %[t4],  %[qc2]  \n\t"
387cabdff1aSopenharmony_ci            "slt    %[t2],  %[t4],  %[qc3]  \n\t"
388cabdff1aSopenharmony_ci            "slt    %[t3],  %[t4],  %[qc4]  \n\t"
389cabdff1aSopenharmony_ci            "movn   %[qc1], %[t4],  %[t0]   \n\t"
390cabdff1aSopenharmony_ci            "movn   %[qc2], %[t4],  %[t1]   \n\t"
391cabdff1aSopenharmony_ci            "movn   %[qc3], %[t4],  %[t2]   \n\t"
392cabdff1aSopenharmony_ci            "movn   %[qc4], %[t4],  %[t3]   \n\t"
393cabdff1aSopenharmony_ci            "lw     %[t0],  0(%[in_int])    \n\t"
394cabdff1aSopenharmony_ci            "lw     %[t1],  4(%[in_int])    \n\t"
395cabdff1aSopenharmony_ci            "lw     %[t2],  8(%[in_int])    \n\t"
396cabdff1aSopenharmony_ci            "lw     %[t3],  12(%[in_int])   \n\t"
397cabdff1aSopenharmony_ci            "srl    %[t0],  %[t0],  31      \n\t"
398cabdff1aSopenharmony_ci            "srl    %[t1],  %[t1],  31      \n\t"
399cabdff1aSopenharmony_ci            "srl    %[t2],  %[t2],  31      \n\t"
400cabdff1aSopenharmony_ci            "srl    %[t3],  %[t3],  31      \n\t"
401cabdff1aSopenharmony_ci            "subu   %[t4],  $zero,  %[qc1]  \n\t"
402cabdff1aSopenharmony_ci            "subu   %[t5],  $zero,  %[qc2]  \n\t"
403cabdff1aSopenharmony_ci            "subu   %[t6],  $zero,  %[qc3]  \n\t"
404cabdff1aSopenharmony_ci            "subu   %[t7],  $zero,  %[qc4]  \n\t"
405cabdff1aSopenharmony_ci            "movn   %[qc1], %[t4],  %[t0]   \n\t"
406cabdff1aSopenharmony_ci            "movn   %[qc2], %[t5],  %[t1]   \n\t"
407cabdff1aSopenharmony_ci            "movn   %[qc3], %[t6],  %[t2]   \n\t"
408cabdff1aSopenharmony_ci            "movn   %[qc4], %[t7],  %[t3]   \n\t"
409cabdff1aSopenharmony_ci
410cabdff1aSopenharmony_ci            ".set pop                       \n\t"
411cabdff1aSopenharmony_ci
412cabdff1aSopenharmony_ci            : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
413cabdff1aSopenharmony_ci              [qc3]"+r"(qc3), [qc4]"+r"(qc4),
414cabdff1aSopenharmony_ci              [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
415cabdff1aSopenharmony_ci              [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
416cabdff1aSopenharmony_ci            : [in_int]"r"(in_int)
417cabdff1aSopenharmony_ci            : "memory"
418cabdff1aSopenharmony_ci        );
419cabdff1aSopenharmony_ci
420cabdff1aSopenharmony_ci        curidx = 9 * qc1;
421cabdff1aSopenharmony_ci        curidx += qc2 + 40;
422cabdff1aSopenharmony_ci
423cabdff1aSopenharmony_ci        curidx2 = 9 * qc3;
424cabdff1aSopenharmony_ci        curidx2 += qc4 + 40;
425cabdff1aSopenharmony_ci
426cabdff1aSopenharmony_ci        v_codes = (p_codes[curidx] << p_bits[curidx2]) | (p_codes[curidx2]);
427cabdff1aSopenharmony_ci        v_bits  = p_bits[curidx] + p_bits[curidx2];
428cabdff1aSopenharmony_ci        put_bits(pb, v_bits, v_codes);
429cabdff1aSopenharmony_ci
430cabdff1aSopenharmony_ci        if (out || energy) {
431cabdff1aSopenharmony_ci            float e1,e2,e3,e4;
432cabdff1aSopenharmony_ci            vec1 = &p_vec[curidx*2 ];
433cabdff1aSopenharmony_ci            vec2 = &p_vec[curidx2*2];
434cabdff1aSopenharmony_ci            e1 = vec1[0] * IQ;
435cabdff1aSopenharmony_ci            e2 = vec1[1] * IQ;
436cabdff1aSopenharmony_ci            e3 = vec2[0] * IQ;
437cabdff1aSopenharmony_ci            e4 = vec2[1] * IQ;
438cabdff1aSopenharmony_ci            if (out) {
439cabdff1aSopenharmony_ci                out[i+0] = e1;
440cabdff1aSopenharmony_ci                out[i+1] = e2;
441cabdff1aSopenharmony_ci                out[i+2] = e3;
442cabdff1aSopenharmony_ci                out[i+3] = e4;
443cabdff1aSopenharmony_ci            }
444cabdff1aSopenharmony_ci            if (energy)
445cabdff1aSopenharmony_ci                qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
446cabdff1aSopenharmony_ci        }
447cabdff1aSopenharmony_ci    }
448cabdff1aSopenharmony_ci    if (energy)
449cabdff1aSopenharmony_ci        *energy = qenergy;
450cabdff1aSopenharmony_ci}
451cabdff1aSopenharmony_ci
452cabdff1aSopenharmony_cistatic void quantize_and_encode_band_cost_UPAIR7_mips(struct AACEncContext *s,
453cabdff1aSopenharmony_ci                                                      PutBitContext *pb, const float *in, float *out,
454cabdff1aSopenharmony_ci                                                      const float *scaled, int size, int scale_idx,
455cabdff1aSopenharmony_ci                                                      int cb, const float lambda, const float uplim,
456cabdff1aSopenharmony_ci                                                      int *bits, float *energy, const float ROUNDING)
457cabdff1aSopenharmony_ci{
458cabdff1aSopenharmony_ci    const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
459cabdff1aSopenharmony_ci    const float IQ  = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
460cabdff1aSopenharmony_ci    int i;
461cabdff1aSopenharmony_ci    int qc1, qc2, qc3, qc4;
462cabdff1aSopenharmony_ci    float qenergy = 0.0f;
463cabdff1aSopenharmony_ci
464cabdff1aSopenharmony_ci    uint8_t  *p_bits  = (uint8_t*) ff_aac_spectral_bits[cb-1];
465cabdff1aSopenharmony_ci    uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
466cabdff1aSopenharmony_ci    float    *p_vec   = (float    *)ff_aac_codebook_vectors[cb-1];
467cabdff1aSopenharmony_ci
468cabdff1aSopenharmony_ci    abs_pow34_v(s->scoefs, in, size);
469cabdff1aSopenharmony_ci    scaled = s->scoefs;
470cabdff1aSopenharmony_ci    for (i = 0; i < size; i += 4) {
471cabdff1aSopenharmony_ci        int curidx1, curidx2, sign1, count1, sign2, count2;
472cabdff1aSopenharmony_ci        int *in_int = (int *)&in[i];
473cabdff1aSopenharmony_ci        uint8_t v_bits;
474cabdff1aSopenharmony_ci        unsigned int v_codes;
475cabdff1aSopenharmony_ci        int t0, t1, t2, t3, t4;
476cabdff1aSopenharmony_ci        const float *vec1, *vec2;
477cabdff1aSopenharmony_ci
478cabdff1aSopenharmony_ci        qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;
479cabdff1aSopenharmony_ci        qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
480cabdff1aSopenharmony_ci        qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
481cabdff1aSopenharmony_ci        qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
482cabdff1aSopenharmony_ci
483cabdff1aSopenharmony_ci        __asm__ volatile (
484cabdff1aSopenharmony_ci            ".set push                              \n\t"
485cabdff1aSopenharmony_ci            ".set noreorder                         \n\t"
486cabdff1aSopenharmony_ci
487cabdff1aSopenharmony_ci            "ori    %[t4],      $zero,      7       \n\t"
488cabdff1aSopenharmony_ci            "ori    %[sign1],   $zero,      0       \n\t"
489cabdff1aSopenharmony_ci            "ori    %[sign2],   $zero,      0       \n\t"
490cabdff1aSopenharmony_ci            "slt    %[t0],      %[t4],      %[qc1]  \n\t"
491cabdff1aSopenharmony_ci            "slt    %[t1],      %[t4],      %[qc2]  \n\t"
492cabdff1aSopenharmony_ci            "slt    %[t2],      %[t4],      %[qc3]  \n\t"
493cabdff1aSopenharmony_ci            "slt    %[t3],      %[t4],      %[qc4]  \n\t"
494cabdff1aSopenharmony_ci            "movn   %[qc1],     %[t4],      %[t0]   \n\t"
495cabdff1aSopenharmony_ci            "movn   %[qc2],     %[t4],      %[t1]   \n\t"
496cabdff1aSopenharmony_ci            "movn   %[qc3],     %[t4],      %[t2]   \n\t"
497cabdff1aSopenharmony_ci            "movn   %[qc4],     %[t4],      %[t3]   \n\t"
498cabdff1aSopenharmony_ci            "lw     %[t0],      0(%[in_int])        \n\t"
499cabdff1aSopenharmony_ci            "lw     %[t1],      4(%[in_int])        \n\t"
500cabdff1aSopenharmony_ci            "lw     %[t2],      8(%[in_int])        \n\t"
501cabdff1aSopenharmony_ci            "lw     %[t3],      12(%[in_int])       \n\t"
502cabdff1aSopenharmony_ci            "slt    %[t0],      %[t0],      $zero   \n\t"
503cabdff1aSopenharmony_ci            "movn   %[sign1],   %[t0],      %[qc1]  \n\t"
504cabdff1aSopenharmony_ci            "slt    %[t2],      %[t2],      $zero   \n\t"
505cabdff1aSopenharmony_ci            "movn   %[sign2],   %[t2],      %[qc3]  \n\t"
506cabdff1aSopenharmony_ci            "slt    %[t1],      %[t1],      $zero   \n\t"
507cabdff1aSopenharmony_ci            "sll    %[t0],      %[sign1],   1       \n\t"
508cabdff1aSopenharmony_ci            "or     %[t0],      %[t0],      %[t1]   \n\t"
509cabdff1aSopenharmony_ci            "movn   %[sign1],   %[t0],      %[qc2]  \n\t"
510cabdff1aSopenharmony_ci            "slt    %[t3],      %[t3],      $zero   \n\t"
511cabdff1aSopenharmony_ci            "sll    %[t0],      %[sign2],   1       \n\t"
512cabdff1aSopenharmony_ci            "or     %[t0],      %[t0],      %[t3]   \n\t"
513cabdff1aSopenharmony_ci            "movn   %[sign2],   %[t0],      %[qc4]  \n\t"
514cabdff1aSopenharmony_ci            "slt    %[count1],  $zero,      %[qc1]  \n\t"
515cabdff1aSopenharmony_ci            "slt    %[t1],      $zero,      %[qc2]  \n\t"
516cabdff1aSopenharmony_ci            "slt    %[count2],  $zero,      %[qc3]  \n\t"
517cabdff1aSopenharmony_ci            "slt    %[t2],      $zero,      %[qc4]  \n\t"
518cabdff1aSopenharmony_ci            "addu   %[count1],  %[count1],  %[t1]   \n\t"
519cabdff1aSopenharmony_ci            "addu   %[count2],  %[count2],  %[t2]   \n\t"
520cabdff1aSopenharmony_ci
521cabdff1aSopenharmony_ci            ".set pop                               \n\t"
522cabdff1aSopenharmony_ci
523cabdff1aSopenharmony_ci            : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
524cabdff1aSopenharmony_ci              [qc3]"+r"(qc3), [qc4]"+r"(qc4),
525cabdff1aSopenharmony_ci              [sign1]"=&r"(sign1), [count1]"=&r"(count1),
526cabdff1aSopenharmony_ci              [sign2]"=&r"(sign2), [count2]"=&r"(count2),
527cabdff1aSopenharmony_ci              [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
528cabdff1aSopenharmony_ci              [t4]"=&r"(t4)
529cabdff1aSopenharmony_ci            : [in_int]"r"(in_int)
530cabdff1aSopenharmony_ci            : "t0", "t1", "t2", "t3", "t4",
531cabdff1aSopenharmony_ci              "memory"
532cabdff1aSopenharmony_ci        );
533cabdff1aSopenharmony_ci
534cabdff1aSopenharmony_ci        curidx1  = 8 * qc1;
535cabdff1aSopenharmony_ci        curidx1 += qc2;
536cabdff1aSopenharmony_ci
537cabdff1aSopenharmony_ci        v_codes = (p_codes[curidx1] << count1) | sign1;
538cabdff1aSopenharmony_ci        v_bits  = p_bits[curidx1] + count1;
539cabdff1aSopenharmony_ci        put_bits(pb, v_bits, v_codes);
540cabdff1aSopenharmony_ci
541cabdff1aSopenharmony_ci        curidx2  = 8 * qc3;
542cabdff1aSopenharmony_ci        curidx2 += qc4;
543cabdff1aSopenharmony_ci
544cabdff1aSopenharmony_ci        v_codes = (p_codes[curidx2] << count2) | sign2;
545cabdff1aSopenharmony_ci        v_bits  = p_bits[curidx2] + count2;
546cabdff1aSopenharmony_ci        put_bits(pb, v_bits, v_codes);
547cabdff1aSopenharmony_ci
548cabdff1aSopenharmony_ci        if (out || energy) {
549cabdff1aSopenharmony_ci            float e1,e2,e3,e4;
550cabdff1aSopenharmony_ci            vec1 = &p_vec[curidx1*2];
551cabdff1aSopenharmony_ci            vec2 = &p_vec[curidx2*2];
552cabdff1aSopenharmony_ci            e1 = copysignf(vec1[0] * IQ, in[i+0]);
553cabdff1aSopenharmony_ci            e2 = copysignf(vec1[1] * IQ, in[i+1]);
554cabdff1aSopenharmony_ci            e3 = copysignf(vec2[0] * IQ, in[i+2]);
555cabdff1aSopenharmony_ci            e4 = copysignf(vec2[1] * IQ, in[i+3]);
556cabdff1aSopenharmony_ci            if (out) {
557cabdff1aSopenharmony_ci                out[i+0] = e1;
558cabdff1aSopenharmony_ci                out[i+1] = e2;
559cabdff1aSopenharmony_ci                out[i+2] = e3;
560cabdff1aSopenharmony_ci                out[i+3] = e4;
561cabdff1aSopenharmony_ci            }
562cabdff1aSopenharmony_ci            if (energy)
563cabdff1aSopenharmony_ci                qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
564cabdff1aSopenharmony_ci        }
565cabdff1aSopenharmony_ci    }
566cabdff1aSopenharmony_ci    if (energy)
567cabdff1aSopenharmony_ci        *energy = qenergy;
568cabdff1aSopenharmony_ci}
569cabdff1aSopenharmony_ci
570cabdff1aSopenharmony_cistatic void quantize_and_encode_band_cost_UPAIR12_mips(struct AACEncContext *s,
571cabdff1aSopenharmony_ci                                                       PutBitContext *pb, const float *in, float *out,
572cabdff1aSopenharmony_ci                                                       const float *scaled, int size, int scale_idx,
573cabdff1aSopenharmony_ci                                                       int cb, const float lambda, const float uplim,
574cabdff1aSopenharmony_ci                                                       int *bits, float *energy, const float ROUNDING)
575cabdff1aSopenharmony_ci{
576cabdff1aSopenharmony_ci    const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
577cabdff1aSopenharmony_ci    const float IQ  = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
578cabdff1aSopenharmony_ci    int i;
579cabdff1aSopenharmony_ci    int qc1, qc2, qc3, qc4;
580cabdff1aSopenharmony_ci    float qenergy = 0.0f;
581cabdff1aSopenharmony_ci
582cabdff1aSopenharmony_ci    uint8_t  *p_bits  = (uint8_t*) ff_aac_spectral_bits[cb-1];
583cabdff1aSopenharmony_ci    uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
584cabdff1aSopenharmony_ci    float    *p_vec   = (float   *)ff_aac_codebook_vectors[cb-1];
585cabdff1aSopenharmony_ci
586cabdff1aSopenharmony_ci    abs_pow34_v(s->scoefs, in, size);
587cabdff1aSopenharmony_ci    scaled = s->scoefs;
588cabdff1aSopenharmony_ci    for (i = 0; i < size; i += 4) {
589cabdff1aSopenharmony_ci        int curidx1, curidx2, sign1, count1, sign2, count2;
590cabdff1aSopenharmony_ci        int *in_int = (int *)&in[i];
591cabdff1aSopenharmony_ci        uint8_t v_bits;
592cabdff1aSopenharmony_ci        unsigned int v_codes;
593cabdff1aSopenharmony_ci        int t0, t1, t2, t3, t4;
594cabdff1aSopenharmony_ci        const float *vec1, *vec2;
595cabdff1aSopenharmony_ci
596cabdff1aSopenharmony_ci        qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;
597cabdff1aSopenharmony_ci        qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
598cabdff1aSopenharmony_ci        qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
599cabdff1aSopenharmony_ci        qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
600cabdff1aSopenharmony_ci
601cabdff1aSopenharmony_ci        __asm__ volatile (
602cabdff1aSopenharmony_ci            ".set push                              \n\t"
603cabdff1aSopenharmony_ci            ".set noreorder                         \n\t"
604cabdff1aSopenharmony_ci
605cabdff1aSopenharmony_ci            "ori    %[t4],      $zero,      12      \n\t"
606cabdff1aSopenharmony_ci            "ori    %[sign1],   $zero,      0       \n\t"
607cabdff1aSopenharmony_ci            "ori    %[sign2],   $zero,      0       \n\t"
608cabdff1aSopenharmony_ci            "slt    %[t0],      %[t4],      %[qc1]  \n\t"
609cabdff1aSopenharmony_ci            "slt    %[t1],      %[t4],      %[qc2]  \n\t"
610cabdff1aSopenharmony_ci            "slt    %[t2],      %[t4],      %[qc3]  \n\t"
611cabdff1aSopenharmony_ci            "slt    %[t3],      %[t4],      %[qc4]  \n\t"
612cabdff1aSopenharmony_ci            "movn   %[qc1],     %[t4],      %[t0]   \n\t"
613cabdff1aSopenharmony_ci            "movn   %[qc2],     %[t4],      %[t1]   \n\t"
614cabdff1aSopenharmony_ci            "movn   %[qc3],     %[t4],      %[t2]   \n\t"
615cabdff1aSopenharmony_ci            "movn   %[qc4],     %[t4],      %[t3]   \n\t"
616cabdff1aSopenharmony_ci            "lw     %[t0],      0(%[in_int])        \n\t"
617cabdff1aSopenharmony_ci            "lw     %[t1],      4(%[in_int])        \n\t"
618cabdff1aSopenharmony_ci            "lw     %[t2],      8(%[in_int])        \n\t"
619cabdff1aSopenharmony_ci            "lw     %[t3],      12(%[in_int])       \n\t"
620cabdff1aSopenharmony_ci            "slt    %[t0],      %[t0],      $zero   \n\t"
621cabdff1aSopenharmony_ci            "movn   %[sign1],   %[t0],      %[qc1]  \n\t"
622cabdff1aSopenharmony_ci            "slt    %[t2],      %[t2],      $zero   \n\t"
623cabdff1aSopenharmony_ci            "movn   %[sign2],   %[t2],      %[qc3]  \n\t"
624cabdff1aSopenharmony_ci            "slt    %[t1],      %[t1],      $zero   \n\t"
625cabdff1aSopenharmony_ci            "sll    %[t0],      %[sign1],   1       \n\t"
626cabdff1aSopenharmony_ci            "or     %[t0],      %[t0],      %[t1]   \n\t"
627cabdff1aSopenharmony_ci            "movn   %[sign1],   %[t0],      %[qc2]  \n\t"
628cabdff1aSopenharmony_ci            "slt    %[t3],      %[t3],      $zero   \n\t"
629cabdff1aSopenharmony_ci            "sll    %[t0],      %[sign2],   1       \n\t"
630cabdff1aSopenharmony_ci            "or     %[t0],      %[t0],      %[t3]   \n\t"
631cabdff1aSopenharmony_ci            "movn   %[sign2],   %[t0],      %[qc4]  \n\t"
632cabdff1aSopenharmony_ci            "slt    %[count1],  $zero,      %[qc1]  \n\t"
633cabdff1aSopenharmony_ci            "slt    %[t1],      $zero,      %[qc2]  \n\t"
634cabdff1aSopenharmony_ci            "slt    %[count2],  $zero,      %[qc3]  \n\t"
635cabdff1aSopenharmony_ci            "slt    %[t2],      $zero,      %[qc4]  \n\t"
636cabdff1aSopenharmony_ci            "addu   %[count1],  %[count1],  %[t1]   \n\t"
637cabdff1aSopenharmony_ci            "addu   %[count2],  %[count2],  %[t2]   \n\t"
638cabdff1aSopenharmony_ci
639cabdff1aSopenharmony_ci            ".set pop                               \n\t"
640cabdff1aSopenharmony_ci
641cabdff1aSopenharmony_ci            : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
642cabdff1aSopenharmony_ci              [qc3]"+r"(qc3), [qc4]"+r"(qc4),
643cabdff1aSopenharmony_ci              [sign1]"=&r"(sign1), [count1]"=&r"(count1),
644cabdff1aSopenharmony_ci              [sign2]"=&r"(sign2), [count2]"=&r"(count2),
645cabdff1aSopenharmony_ci              [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
646cabdff1aSopenharmony_ci              [t4]"=&r"(t4)
647cabdff1aSopenharmony_ci            : [in_int]"r"(in_int)
648cabdff1aSopenharmony_ci            : "memory"
649cabdff1aSopenharmony_ci        );
650cabdff1aSopenharmony_ci
651cabdff1aSopenharmony_ci        curidx1  = 13 * qc1;
652cabdff1aSopenharmony_ci        curidx1 += qc2;
653cabdff1aSopenharmony_ci
654cabdff1aSopenharmony_ci        v_codes = (p_codes[curidx1] << count1) | sign1;
655cabdff1aSopenharmony_ci        v_bits  = p_bits[curidx1] + count1;
656cabdff1aSopenharmony_ci        put_bits(pb, v_bits, v_codes);
657cabdff1aSopenharmony_ci
658cabdff1aSopenharmony_ci        curidx2  = 13 * qc3;
659cabdff1aSopenharmony_ci        curidx2 += qc4;
660cabdff1aSopenharmony_ci
661cabdff1aSopenharmony_ci        v_codes = (p_codes[curidx2] << count2) | sign2;
662cabdff1aSopenharmony_ci        v_bits  = p_bits[curidx2] + count2;
663cabdff1aSopenharmony_ci        put_bits(pb, v_bits, v_codes);
664cabdff1aSopenharmony_ci
665cabdff1aSopenharmony_ci        if (out || energy) {
666cabdff1aSopenharmony_ci            float e1,e2,e3,e4;
667cabdff1aSopenharmony_ci            vec1 = &p_vec[curidx1*2];
668cabdff1aSopenharmony_ci            vec2 = &p_vec[curidx2*2];
669cabdff1aSopenharmony_ci            e1 = copysignf(vec1[0] * IQ, in[i+0]);
670cabdff1aSopenharmony_ci            e2 = copysignf(vec1[1] * IQ, in[i+1]);
671cabdff1aSopenharmony_ci            e3 = copysignf(vec2[0] * IQ, in[i+2]);
672cabdff1aSopenharmony_ci            e4 = copysignf(vec2[1] * IQ, in[i+3]);
673cabdff1aSopenharmony_ci            if (out) {
674cabdff1aSopenharmony_ci                out[i+0] = e1;
675cabdff1aSopenharmony_ci                out[i+1] = e2;
676cabdff1aSopenharmony_ci                out[i+2] = e3;
677cabdff1aSopenharmony_ci                out[i+3] = e4;
678cabdff1aSopenharmony_ci            }
679cabdff1aSopenharmony_ci            if (energy)
680cabdff1aSopenharmony_ci                qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
681cabdff1aSopenharmony_ci        }
682cabdff1aSopenharmony_ci    }
683cabdff1aSopenharmony_ci    if (energy)
684cabdff1aSopenharmony_ci        *energy = qenergy;
685cabdff1aSopenharmony_ci}
686cabdff1aSopenharmony_ci
687cabdff1aSopenharmony_cistatic void quantize_and_encode_band_cost_ESC_mips(struct AACEncContext *s,
688cabdff1aSopenharmony_ci                                                   PutBitContext *pb, const float *in, float *out,
689cabdff1aSopenharmony_ci                                                   const float *scaled, int size, int scale_idx,
690cabdff1aSopenharmony_ci                                                   int cb, const float lambda, const float uplim,
691cabdff1aSopenharmony_ci                                                   int *bits, float *energy, const float ROUNDING)
692cabdff1aSopenharmony_ci{
693cabdff1aSopenharmony_ci    const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
694cabdff1aSopenharmony_ci    const float IQ  = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
695cabdff1aSopenharmony_ci    int i;
696cabdff1aSopenharmony_ci    int qc1, qc2, qc3, qc4;
697cabdff1aSopenharmony_ci    float qenergy = 0.0f;
698cabdff1aSopenharmony_ci
699cabdff1aSopenharmony_ci    uint8_t  *p_bits    = (uint8_t* )ff_aac_spectral_bits[cb-1];
700cabdff1aSopenharmony_ci    uint16_t *p_codes   = (uint16_t*)ff_aac_spectral_codes[cb-1];
701cabdff1aSopenharmony_ci    float    *p_vectors = (float*   )ff_aac_codebook_vectors[cb-1];
702cabdff1aSopenharmony_ci
703cabdff1aSopenharmony_ci    abs_pow34_v(s->scoefs, in, size);
704cabdff1aSopenharmony_ci    scaled = s->scoefs;
705cabdff1aSopenharmony_ci
706cabdff1aSopenharmony_ci    if (cb < 11) {
707cabdff1aSopenharmony_ci        for (i = 0; i < size; i += 4) {
708cabdff1aSopenharmony_ci            int curidx, curidx2, sign1, count1, sign2, count2;
709cabdff1aSopenharmony_ci            int *in_int = (int *)&in[i];
710cabdff1aSopenharmony_ci            uint8_t v_bits;
711cabdff1aSopenharmony_ci            unsigned int v_codes;
712cabdff1aSopenharmony_ci            int t0, t1, t2, t3, t4;
713cabdff1aSopenharmony_ci            const float *vec1, *vec2;
714cabdff1aSopenharmony_ci
715cabdff1aSopenharmony_ci            qc1 = scaled[i  ] * Q34 + ROUNDING;
716cabdff1aSopenharmony_ci            qc2 = scaled[i+1] * Q34 + ROUNDING;
717cabdff1aSopenharmony_ci            qc3 = scaled[i+2] * Q34 + ROUNDING;
718cabdff1aSopenharmony_ci            qc4 = scaled[i+3] * Q34 + ROUNDING;
719cabdff1aSopenharmony_ci
720cabdff1aSopenharmony_ci            __asm__ volatile (
721cabdff1aSopenharmony_ci                ".set push                                  \n\t"
722cabdff1aSopenharmony_ci                ".set noreorder                             \n\t"
723cabdff1aSopenharmony_ci
724cabdff1aSopenharmony_ci                "ori        %[t4],      $zero,      16      \n\t"
725cabdff1aSopenharmony_ci                "ori        %[sign1],   $zero,      0       \n\t"
726cabdff1aSopenharmony_ci                "ori        %[sign2],   $zero,      0       \n\t"
727cabdff1aSopenharmony_ci                "slt        %[t0],      %[t4],      %[qc1]  \n\t"
728cabdff1aSopenharmony_ci                "slt        %[t1],      %[t4],      %[qc2]  \n\t"
729cabdff1aSopenharmony_ci                "slt        %[t2],      %[t4],      %[qc3]  \n\t"
730cabdff1aSopenharmony_ci                "slt        %[t3],      %[t4],      %[qc4]  \n\t"
731cabdff1aSopenharmony_ci                "movn       %[qc1],     %[t4],      %[t0]   \n\t"
732cabdff1aSopenharmony_ci                "movn       %[qc2],     %[t4],      %[t1]   \n\t"
733cabdff1aSopenharmony_ci                "movn       %[qc3],     %[t4],      %[t2]   \n\t"
734cabdff1aSopenharmony_ci                "movn       %[qc4],     %[t4],      %[t3]   \n\t"
735cabdff1aSopenharmony_ci                "lw         %[t0],      0(%[in_int])        \n\t"
736cabdff1aSopenharmony_ci                "lw         %[t1],      4(%[in_int])        \n\t"
737cabdff1aSopenharmony_ci                "lw         %[t2],      8(%[in_int])        \n\t"
738cabdff1aSopenharmony_ci                "lw         %[t3],      12(%[in_int])       \n\t"
739cabdff1aSopenharmony_ci                "slt        %[t0],      %[t0],      $zero   \n\t"
740cabdff1aSopenharmony_ci                "movn       %[sign1],   %[t0],      %[qc1]  \n\t"
741cabdff1aSopenharmony_ci                "slt        %[t2],      %[t2],      $zero   \n\t"
742cabdff1aSopenharmony_ci                "movn       %[sign2],   %[t2],      %[qc3]  \n\t"
743cabdff1aSopenharmony_ci                "slt        %[t1],      %[t1],      $zero   \n\t"
744cabdff1aSopenharmony_ci                "sll        %[t0],      %[sign1],   1       \n\t"
745cabdff1aSopenharmony_ci                "or         %[t0],      %[t0],      %[t1]   \n\t"
746cabdff1aSopenharmony_ci                "movn       %[sign1],   %[t0],      %[qc2]  \n\t"
747cabdff1aSopenharmony_ci                "slt        %[t3],      %[t3],      $zero   \n\t"
748cabdff1aSopenharmony_ci                "sll        %[t0],      %[sign2],   1       \n\t"
749cabdff1aSopenharmony_ci                "or         %[t0],      %[t0],      %[t3]   \n\t"
750cabdff1aSopenharmony_ci                "movn       %[sign2],   %[t0],      %[qc4]  \n\t"
751cabdff1aSopenharmony_ci                "slt        %[count1],  $zero,      %[qc1]  \n\t"
752cabdff1aSopenharmony_ci                "slt        %[t1],      $zero,      %[qc2]  \n\t"
753cabdff1aSopenharmony_ci                "slt        %[count2],  $zero,      %[qc3]  \n\t"
754cabdff1aSopenharmony_ci                "slt        %[t2],      $zero,      %[qc4]  \n\t"
755cabdff1aSopenharmony_ci                "addu       %[count1],  %[count1],  %[t1]   \n\t"
756cabdff1aSopenharmony_ci                "addu       %[count2],  %[count2],  %[t2]   \n\t"
757cabdff1aSopenharmony_ci
758cabdff1aSopenharmony_ci                ".set pop                                   \n\t"
759cabdff1aSopenharmony_ci
760cabdff1aSopenharmony_ci                : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
761cabdff1aSopenharmony_ci                  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
762cabdff1aSopenharmony_ci                  [sign1]"=&r"(sign1), [count1]"=&r"(count1),
763cabdff1aSopenharmony_ci                  [sign2]"=&r"(sign2), [count2]"=&r"(count2),
764cabdff1aSopenharmony_ci                  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
765cabdff1aSopenharmony_ci                  [t4]"=&r"(t4)
766cabdff1aSopenharmony_ci                : [in_int]"r"(in_int)
767cabdff1aSopenharmony_ci                : "memory"
768cabdff1aSopenharmony_ci            );
769cabdff1aSopenharmony_ci
770cabdff1aSopenharmony_ci            curidx = 17 * qc1;
771cabdff1aSopenharmony_ci            curidx += qc2;
772cabdff1aSopenharmony_ci            curidx2 = 17 * qc3;
773cabdff1aSopenharmony_ci            curidx2 += qc4;
774cabdff1aSopenharmony_ci
775cabdff1aSopenharmony_ci            v_codes = (p_codes[curidx] << count1) | sign1;
776cabdff1aSopenharmony_ci            v_bits  = p_bits[curidx] + count1;
777cabdff1aSopenharmony_ci            put_bits(pb, v_bits, v_codes);
778cabdff1aSopenharmony_ci
779cabdff1aSopenharmony_ci            v_codes = (p_codes[curidx2] << count2) | sign2;
780cabdff1aSopenharmony_ci            v_bits  = p_bits[curidx2] + count2;
781cabdff1aSopenharmony_ci            put_bits(pb, v_bits, v_codes);
782cabdff1aSopenharmony_ci
783cabdff1aSopenharmony_ci            if (out || energy) {
784cabdff1aSopenharmony_ci                float e1,e2,e3,e4;
785cabdff1aSopenharmony_ci                vec1 = &p_vectors[curidx*2 ];
786cabdff1aSopenharmony_ci                vec2 = &p_vectors[curidx2*2];
787cabdff1aSopenharmony_ci                e1 = copysignf(vec1[0] * IQ, in[i+0]);
788cabdff1aSopenharmony_ci                e2 = copysignf(vec1[1] * IQ, in[i+1]);
789cabdff1aSopenharmony_ci                e3 = copysignf(vec2[0] * IQ, in[i+2]);
790cabdff1aSopenharmony_ci                e4 = copysignf(vec2[1] * IQ, in[i+3]);
791cabdff1aSopenharmony_ci                if (out) {
792cabdff1aSopenharmony_ci                    out[i+0] = e1;
793cabdff1aSopenharmony_ci                    out[i+1] = e2;
794cabdff1aSopenharmony_ci                    out[i+2] = e3;
795cabdff1aSopenharmony_ci                    out[i+3] = e4;
796cabdff1aSopenharmony_ci                }
797cabdff1aSopenharmony_ci                if (energy)
798cabdff1aSopenharmony_ci                    qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
799cabdff1aSopenharmony_ci            }
800cabdff1aSopenharmony_ci        }
801cabdff1aSopenharmony_ci    } else {
802cabdff1aSopenharmony_ci        for (i = 0; i < size; i += 4) {
803cabdff1aSopenharmony_ci            int curidx, curidx2, sign1, count1, sign2, count2;
804cabdff1aSopenharmony_ci            int *in_int = (int *)&in[i];
805cabdff1aSopenharmony_ci            uint8_t v_bits;
806cabdff1aSopenharmony_ci            unsigned int v_codes;
807cabdff1aSopenharmony_ci            int c1, c2, c3, c4;
808cabdff1aSopenharmony_ci            int t0, t1, t2, t3, t4;
809cabdff1aSopenharmony_ci
810cabdff1aSopenharmony_ci            qc1 = scaled[i  ] * Q34 + ROUNDING;
811cabdff1aSopenharmony_ci            qc2 = scaled[i+1] * Q34 + ROUNDING;
812cabdff1aSopenharmony_ci            qc3 = scaled[i+2] * Q34 + ROUNDING;
813cabdff1aSopenharmony_ci            qc4 = scaled[i+3] * Q34 + ROUNDING;
814cabdff1aSopenharmony_ci
815cabdff1aSopenharmony_ci            __asm__ volatile (
816cabdff1aSopenharmony_ci                ".set push                                  \n\t"
817cabdff1aSopenharmony_ci                ".set noreorder                             \n\t"
818cabdff1aSopenharmony_ci
819cabdff1aSopenharmony_ci                "ori        %[t4],      $zero,      16      \n\t"
820cabdff1aSopenharmony_ci                "ori        %[sign1],   $zero,      0       \n\t"
821cabdff1aSopenharmony_ci                "ori        %[sign2],   $zero,      0       \n\t"
822cabdff1aSopenharmony_ci                "shll_s.w   %[c1],      %[qc1],     18      \n\t"
823cabdff1aSopenharmony_ci                "shll_s.w   %[c2],      %[qc2],     18      \n\t"
824cabdff1aSopenharmony_ci                "shll_s.w   %[c3],      %[qc3],     18      \n\t"
825cabdff1aSopenharmony_ci                "shll_s.w   %[c4],      %[qc4],     18      \n\t"
826cabdff1aSopenharmony_ci                "srl        %[c1],      %[c1],      18      \n\t"
827cabdff1aSopenharmony_ci                "srl        %[c2],      %[c2],      18      \n\t"
828cabdff1aSopenharmony_ci                "srl        %[c3],      %[c3],      18      \n\t"
829cabdff1aSopenharmony_ci                "srl        %[c4],      %[c4],      18      \n\t"
830cabdff1aSopenharmony_ci                "slt        %[t0],      %[t4],      %[qc1]  \n\t"
831cabdff1aSopenharmony_ci                "slt        %[t1],      %[t4],      %[qc2]  \n\t"
832cabdff1aSopenharmony_ci                "slt        %[t2],      %[t4],      %[qc3]  \n\t"
833cabdff1aSopenharmony_ci                "slt        %[t3],      %[t4],      %[qc4]  \n\t"
834cabdff1aSopenharmony_ci                "movn       %[qc1],     %[t4],      %[t0]   \n\t"
835cabdff1aSopenharmony_ci                "movn       %[qc2],     %[t4],      %[t1]   \n\t"
836cabdff1aSopenharmony_ci                "movn       %[qc3],     %[t4],      %[t2]   \n\t"
837cabdff1aSopenharmony_ci                "movn       %[qc4],     %[t4],      %[t3]   \n\t"
838cabdff1aSopenharmony_ci                "lw         %[t0],      0(%[in_int])        \n\t"
839cabdff1aSopenharmony_ci                "lw         %[t1],      4(%[in_int])        \n\t"
840cabdff1aSopenharmony_ci                "lw         %[t2],      8(%[in_int])        \n\t"
841cabdff1aSopenharmony_ci                "lw         %[t3],      12(%[in_int])       \n\t"
842cabdff1aSopenharmony_ci                "slt        %[t0],      %[t0],      $zero   \n\t"
843cabdff1aSopenharmony_ci                "movn       %[sign1],   %[t0],      %[qc1]  \n\t"
844cabdff1aSopenharmony_ci                "slt        %[t2],      %[t2],      $zero   \n\t"
845cabdff1aSopenharmony_ci                "movn       %[sign2],   %[t2],      %[qc3]  \n\t"
846cabdff1aSopenharmony_ci                "slt        %[t1],      %[t1],      $zero   \n\t"
847cabdff1aSopenharmony_ci                "sll        %[t0],      %[sign1],   1       \n\t"
848cabdff1aSopenharmony_ci                "or         %[t0],      %[t0],      %[t1]   \n\t"
849cabdff1aSopenharmony_ci                "movn       %[sign1],   %[t0],      %[qc2]  \n\t"
850cabdff1aSopenharmony_ci                "slt        %[t3],      %[t3],      $zero   \n\t"
851cabdff1aSopenharmony_ci                "sll        %[t0],      %[sign2],   1       \n\t"
852cabdff1aSopenharmony_ci                "or         %[t0],      %[t0],      %[t3]   \n\t"
853cabdff1aSopenharmony_ci                "movn       %[sign2],   %[t0],      %[qc4]  \n\t"
854cabdff1aSopenharmony_ci                "slt        %[count1],  $zero,      %[qc1]  \n\t"
855cabdff1aSopenharmony_ci                "slt        %[t1],      $zero,      %[qc2]  \n\t"
856cabdff1aSopenharmony_ci                "slt        %[count2],  $zero,      %[qc3]  \n\t"
857cabdff1aSopenharmony_ci                "slt        %[t2],      $zero,      %[qc4]  \n\t"
858cabdff1aSopenharmony_ci                "addu       %[count1],  %[count1],  %[t1]   \n\t"
859cabdff1aSopenharmony_ci                "addu       %[count2],  %[count2],  %[t2]   \n\t"
860cabdff1aSopenharmony_ci
861cabdff1aSopenharmony_ci                ".set pop                                   \n\t"
862cabdff1aSopenharmony_ci
863cabdff1aSopenharmony_ci                : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
864cabdff1aSopenharmony_ci                  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
865cabdff1aSopenharmony_ci                  [sign1]"=&r"(sign1), [count1]"=&r"(count1),
866cabdff1aSopenharmony_ci                  [sign2]"=&r"(sign2), [count2]"=&r"(count2),
867cabdff1aSopenharmony_ci                  [c1]"=&r"(c1), [c2]"=&r"(c2),
868cabdff1aSopenharmony_ci                  [c3]"=&r"(c3), [c4]"=&r"(c4),
869cabdff1aSopenharmony_ci                  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
870cabdff1aSopenharmony_ci                  [t4]"=&r"(t4)
871cabdff1aSopenharmony_ci                : [in_int]"r"(in_int)
872cabdff1aSopenharmony_ci                : "memory"
873cabdff1aSopenharmony_ci            );
874cabdff1aSopenharmony_ci
875cabdff1aSopenharmony_ci            curidx = 17 * qc1;
876cabdff1aSopenharmony_ci            curidx += qc2;
877cabdff1aSopenharmony_ci
878cabdff1aSopenharmony_ci            curidx2 = 17 * qc3;
879cabdff1aSopenharmony_ci            curidx2 += qc4;
880cabdff1aSopenharmony_ci
881cabdff1aSopenharmony_ci            v_codes = (p_codes[curidx] << count1) | sign1;
882cabdff1aSopenharmony_ci            v_bits  = p_bits[curidx] + count1;
883cabdff1aSopenharmony_ci            put_bits(pb, v_bits, v_codes);
884cabdff1aSopenharmony_ci
885cabdff1aSopenharmony_ci            if (p_vectors[curidx*2  ] == 64.0f) {
886cabdff1aSopenharmony_ci                int len = av_log2(c1);
887cabdff1aSopenharmony_ci                v_codes = (((1 << (len - 3)) - 2) << len) | (c1 & ((1 << len) - 1));
888cabdff1aSopenharmony_ci                put_bits(pb, len * 2 - 3, v_codes);
889cabdff1aSopenharmony_ci            }
890cabdff1aSopenharmony_ci            if (p_vectors[curidx*2+1] == 64.0f) {
891cabdff1aSopenharmony_ci                int len = av_log2(c2);
892cabdff1aSopenharmony_ci                v_codes = (((1 << (len - 3)) - 2) << len) | (c2 & ((1 << len) - 1));
893cabdff1aSopenharmony_ci                put_bits(pb, len*2-3, v_codes);
894cabdff1aSopenharmony_ci            }
895cabdff1aSopenharmony_ci
896cabdff1aSopenharmony_ci            v_codes = (p_codes[curidx2] << count2) | sign2;
897cabdff1aSopenharmony_ci            v_bits  = p_bits[curidx2] + count2;
898cabdff1aSopenharmony_ci            put_bits(pb, v_bits, v_codes);
899cabdff1aSopenharmony_ci
900cabdff1aSopenharmony_ci            if (p_vectors[curidx2*2  ] == 64.0f) {
901cabdff1aSopenharmony_ci                int len = av_log2(c3);
902cabdff1aSopenharmony_ci                v_codes = (((1 << (len - 3)) - 2) << len) | (c3 & ((1 << len) - 1));
903cabdff1aSopenharmony_ci                put_bits(pb, len* 2 - 3, v_codes);
904cabdff1aSopenharmony_ci            }
905cabdff1aSopenharmony_ci            if (p_vectors[curidx2*2+1] == 64.0f) {
906cabdff1aSopenharmony_ci                int len = av_log2(c4);
907cabdff1aSopenharmony_ci                v_codes = (((1 << (len - 3)) - 2) << len) | (c4 & ((1 << len) - 1));
908cabdff1aSopenharmony_ci                put_bits(pb, len * 2 - 3, v_codes);
909cabdff1aSopenharmony_ci            }
910cabdff1aSopenharmony_ci
911cabdff1aSopenharmony_ci            if (out || energy) {
912cabdff1aSopenharmony_ci                float e1, e2, e3, e4;
913cabdff1aSopenharmony_ci                e1 = copysignf(c1 * cbrtf(c1) * IQ, in[i+0]);
914cabdff1aSopenharmony_ci                e2 = copysignf(c2 * cbrtf(c2) * IQ, in[i+1]);
915cabdff1aSopenharmony_ci                e3 = copysignf(c3 * cbrtf(c3) * IQ, in[i+2]);
916cabdff1aSopenharmony_ci                e4 = copysignf(c4 * cbrtf(c4) * IQ, in[i+3]);
917cabdff1aSopenharmony_ci                if (out) {
918cabdff1aSopenharmony_ci                    out[i+0] = e1;
919cabdff1aSopenharmony_ci                    out[i+1] = e2;
920cabdff1aSopenharmony_ci                    out[i+2] = e3;
921cabdff1aSopenharmony_ci                    out[i+3] = e4;
922cabdff1aSopenharmony_ci                }
923cabdff1aSopenharmony_ci                if (energy)
924cabdff1aSopenharmony_ci                    qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
925cabdff1aSopenharmony_ci            }
926cabdff1aSopenharmony_ci        }
927cabdff1aSopenharmony_ci    }
928cabdff1aSopenharmony_ci    if (energy)
929cabdff1aSopenharmony_ci        *energy = qenergy;
930cabdff1aSopenharmony_ci}
931cabdff1aSopenharmony_ci
932cabdff1aSopenharmony_cistatic void quantize_and_encode_band_cost_NONE_mips(struct AACEncContext *s,
933cabdff1aSopenharmony_ci                                                         PutBitContext *pb, const float *in, float *out,
934cabdff1aSopenharmony_ci                                                         const float *scaled, int size, int scale_idx,
935cabdff1aSopenharmony_ci                                                         int cb, const float lambda, const float uplim,
936cabdff1aSopenharmony_ci                                                         int *bits, float *energy, const float ROUNDING) {
937cabdff1aSopenharmony_ci    av_assert0(0);
938cabdff1aSopenharmony_ci}
939cabdff1aSopenharmony_ci
940cabdff1aSopenharmony_cistatic void quantize_and_encode_band_cost_ZERO_mips(struct AACEncContext *s,
941cabdff1aSopenharmony_ci                                                         PutBitContext *pb, const float *in, float *out,
942cabdff1aSopenharmony_ci                                                         const float *scaled, int size, int scale_idx,
943cabdff1aSopenharmony_ci                                                         int cb, const float lambda, const float uplim,
944cabdff1aSopenharmony_ci                                                         int *bits, float *energy, const float ROUNDING) {
945cabdff1aSopenharmony_ci    int i;
946cabdff1aSopenharmony_ci    if (bits)
947cabdff1aSopenharmony_ci        *bits = 0;
948cabdff1aSopenharmony_ci    if (out) {
949cabdff1aSopenharmony_ci        for (i = 0; i < size; i += 4) {
950cabdff1aSopenharmony_ci           out[i  ] = 0.0f;
951cabdff1aSopenharmony_ci           out[i+1] = 0.0f;
952cabdff1aSopenharmony_ci           out[i+2] = 0.0f;
953cabdff1aSopenharmony_ci           out[i+3] = 0.0f;
954cabdff1aSopenharmony_ci        }
955cabdff1aSopenharmony_ci    }
956cabdff1aSopenharmony_ci    if (energy)
957cabdff1aSopenharmony_ci        *energy = 0.0f;
958cabdff1aSopenharmony_ci}
959cabdff1aSopenharmony_ci
960cabdff1aSopenharmony_cistatic void (*const quantize_and_encode_band_cost_arr[])(struct AACEncContext *s,
961cabdff1aSopenharmony_ci                                                         PutBitContext *pb, const float *in, float *out,
962cabdff1aSopenharmony_ci                                                         const float *scaled, int size, int scale_idx,
963cabdff1aSopenharmony_ci                                                         int cb, const float lambda, const float uplim,
964cabdff1aSopenharmony_ci                                                         int *bits, float *energy, const float ROUNDING) = {
965cabdff1aSopenharmony_ci    quantize_and_encode_band_cost_ZERO_mips,
966cabdff1aSopenharmony_ci    quantize_and_encode_band_cost_SQUAD_mips,
967cabdff1aSopenharmony_ci    quantize_and_encode_band_cost_SQUAD_mips,
968cabdff1aSopenharmony_ci    quantize_and_encode_band_cost_UQUAD_mips,
969cabdff1aSopenharmony_ci    quantize_and_encode_band_cost_UQUAD_mips,
970cabdff1aSopenharmony_ci    quantize_and_encode_band_cost_SPAIR_mips,
971cabdff1aSopenharmony_ci    quantize_and_encode_band_cost_SPAIR_mips,
972cabdff1aSopenharmony_ci    quantize_and_encode_band_cost_UPAIR7_mips,
973cabdff1aSopenharmony_ci    quantize_and_encode_band_cost_UPAIR7_mips,
974cabdff1aSopenharmony_ci    quantize_and_encode_band_cost_UPAIR12_mips,
975cabdff1aSopenharmony_ci    quantize_and_encode_band_cost_UPAIR12_mips,
976cabdff1aSopenharmony_ci    quantize_and_encode_band_cost_ESC_mips,
977cabdff1aSopenharmony_ci    quantize_and_encode_band_cost_NONE_mips, /* cb 12 doesn't exist */
978cabdff1aSopenharmony_ci    quantize_and_encode_band_cost_ZERO_mips,
979cabdff1aSopenharmony_ci    quantize_and_encode_band_cost_ZERO_mips,
980cabdff1aSopenharmony_ci    quantize_and_encode_band_cost_ZERO_mips,
981cabdff1aSopenharmony_ci};
982cabdff1aSopenharmony_ci
983cabdff1aSopenharmony_ci#define quantize_and_encode_band_cost(                                       \
984cabdff1aSopenharmony_ci                                s, pb, in, out, scaled, size, scale_idx, cb, \
985cabdff1aSopenharmony_ci                                lambda, uplim, bits, energy, ROUNDING)       \
986cabdff1aSopenharmony_ci    quantize_and_encode_band_cost_arr[cb](                                   \
987cabdff1aSopenharmony_ci                                s, pb, in, out, scaled, size, scale_idx, cb, \
988cabdff1aSopenharmony_ci                                lambda, uplim, bits, energy, ROUNDING)
989cabdff1aSopenharmony_ci
990cabdff1aSopenharmony_cistatic void quantize_and_encode_band_mips(struct AACEncContext *s, PutBitContext *pb,
991cabdff1aSopenharmony_ci                                          const float *in, float *out, int size, int scale_idx,
992cabdff1aSopenharmony_ci                                          int cb, const float lambda, int rtz)
993cabdff1aSopenharmony_ci{
994cabdff1aSopenharmony_ci    quantize_and_encode_band_cost(s, pb, in, out, NULL, size, scale_idx, cb, lambda,
995cabdff1aSopenharmony_ci                                  INFINITY, NULL, NULL, (rtz) ? ROUND_TO_ZERO : ROUND_STANDARD);
996cabdff1aSopenharmony_ci}
997cabdff1aSopenharmony_ci
998cabdff1aSopenharmony_ci/**
999cabdff1aSopenharmony_ci * Functions developed from template function and optimized for getting the number of bits
1000cabdff1aSopenharmony_ci */
1001cabdff1aSopenharmony_cistatic float get_band_numbits_ZERO_mips(struct AACEncContext *s,
1002cabdff1aSopenharmony_ci                                        PutBitContext *pb, const float *in,
1003cabdff1aSopenharmony_ci                                        const float *scaled, int size, int scale_idx,
1004cabdff1aSopenharmony_ci                                        int cb, const float lambda, const float uplim,
1005cabdff1aSopenharmony_ci                                        int *bits)
1006cabdff1aSopenharmony_ci{
1007cabdff1aSopenharmony_ci    return 0;
1008cabdff1aSopenharmony_ci}
1009cabdff1aSopenharmony_ci
1010cabdff1aSopenharmony_cistatic float get_band_numbits_NONE_mips(struct AACEncContext *s,
1011cabdff1aSopenharmony_ci                                        PutBitContext *pb, const float *in,
1012cabdff1aSopenharmony_ci                                        const float *scaled, int size, int scale_idx,
1013cabdff1aSopenharmony_ci                                        int cb, const float lambda, const float uplim,
1014cabdff1aSopenharmony_ci                                        int *bits)
1015cabdff1aSopenharmony_ci{
1016cabdff1aSopenharmony_ci    av_assert0(0);
1017cabdff1aSopenharmony_ci    return 0;
1018cabdff1aSopenharmony_ci}
1019cabdff1aSopenharmony_ci
1020cabdff1aSopenharmony_cistatic float get_band_numbits_SQUAD_mips(struct AACEncContext *s,
1021cabdff1aSopenharmony_ci                                         PutBitContext *pb, const float *in,
1022cabdff1aSopenharmony_ci                                         const float *scaled, int size, int scale_idx,
1023cabdff1aSopenharmony_ci                                         int cb, const float lambda, const float uplim,
1024cabdff1aSopenharmony_ci                                         int *bits)
1025cabdff1aSopenharmony_ci{
1026cabdff1aSopenharmony_ci    const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1027cabdff1aSopenharmony_ci    int i;
1028cabdff1aSopenharmony_ci    int qc1, qc2, qc3, qc4;
1029cabdff1aSopenharmony_ci    int curbits = 0;
1030cabdff1aSopenharmony_ci
1031cabdff1aSopenharmony_ci    uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1032cabdff1aSopenharmony_ci
1033cabdff1aSopenharmony_ci    for (i = 0; i < size; i += 4) {
1034cabdff1aSopenharmony_ci        int curidx;
1035cabdff1aSopenharmony_ci        int *in_int = (int *)&in[i];
1036cabdff1aSopenharmony_ci        int t0, t1, t2, t3, t4, t5, t6, t7;
1037cabdff1aSopenharmony_ci
1038cabdff1aSopenharmony_ci        qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;
1039cabdff1aSopenharmony_ci        qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1040cabdff1aSopenharmony_ci        qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1041cabdff1aSopenharmony_ci        qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1042cabdff1aSopenharmony_ci
1043cabdff1aSopenharmony_ci        __asm__ volatile (
1044cabdff1aSopenharmony_ci            ".set push                      \n\t"
1045cabdff1aSopenharmony_ci            ".set noreorder                 \n\t"
1046cabdff1aSopenharmony_ci
1047cabdff1aSopenharmony_ci            "slt    %[qc1], $zero,  %[qc1]  \n\t"
1048cabdff1aSopenharmony_ci            "slt    %[qc2], $zero,  %[qc2]  \n\t"
1049cabdff1aSopenharmony_ci            "slt    %[qc3], $zero,  %[qc3]  \n\t"
1050cabdff1aSopenharmony_ci            "slt    %[qc4], $zero,  %[qc4]  \n\t"
1051cabdff1aSopenharmony_ci            "lw     %[t0],  0(%[in_int])    \n\t"
1052cabdff1aSopenharmony_ci            "lw     %[t1],  4(%[in_int])    \n\t"
1053cabdff1aSopenharmony_ci            "lw     %[t2],  8(%[in_int])    \n\t"
1054cabdff1aSopenharmony_ci            "lw     %[t3],  12(%[in_int])   \n\t"
1055cabdff1aSopenharmony_ci            "srl    %[t0],  %[t0],  31      \n\t"
1056cabdff1aSopenharmony_ci            "srl    %[t1],  %[t1],  31      \n\t"
1057cabdff1aSopenharmony_ci            "srl    %[t2],  %[t2],  31      \n\t"
1058cabdff1aSopenharmony_ci            "srl    %[t3],  %[t3],  31      \n\t"
1059cabdff1aSopenharmony_ci            "subu   %[t4],  $zero,  %[qc1]  \n\t"
1060cabdff1aSopenharmony_ci            "subu   %[t5],  $zero,  %[qc2]  \n\t"
1061cabdff1aSopenharmony_ci            "subu   %[t6],  $zero,  %[qc3]  \n\t"
1062cabdff1aSopenharmony_ci            "subu   %[t7],  $zero,  %[qc4]  \n\t"
1063cabdff1aSopenharmony_ci            "movn   %[qc1], %[t4],  %[t0]   \n\t"
1064cabdff1aSopenharmony_ci            "movn   %[qc2], %[t5],  %[t1]   \n\t"
1065cabdff1aSopenharmony_ci            "movn   %[qc3], %[t6],  %[t2]   \n\t"
1066cabdff1aSopenharmony_ci            "movn   %[qc4], %[t7],  %[t3]   \n\t"
1067cabdff1aSopenharmony_ci
1068cabdff1aSopenharmony_ci            ".set pop                       \n\t"
1069cabdff1aSopenharmony_ci
1070cabdff1aSopenharmony_ci            : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1071cabdff1aSopenharmony_ci              [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1072cabdff1aSopenharmony_ci              [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1073cabdff1aSopenharmony_ci              [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
1074cabdff1aSopenharmony_ci            : [in_int]"r"(in_int)
1075cabdff1aSopenharmony_ci            : "memory"
1076cabdff1aSopenharmony_ci        );
1077cabdff1aSopenharmony_ci
1078cabdff1aSopenharmony_ci        curidx = qc1;
1079cabdff1aSopenharmony_ci        curidx *= 3;
1080cabdff1aSopenharmony_ci        curidx += qc2;
1081cabdff1aSopenharmony_ci        curidx *= 3;
1082cabdff1aSopenharmony_ci        curidx += qc3;
1083cabdff1aSopenharmony_ci        curidx *= 3;
1084cabdff1aSopenharmony_ci        curidx += qc4;
1085cabdff1aSopenharmony_ci        curidx += 40;
1086cabdff1aSopenharmony_ci
1087cabdff1aSopenharmony_ci        curbits += p_bits[curidx];
1088cabdff1aSopenharmony_ci    }
1089cabdff1aSopenharmony_ci    return curbits;
1090cabdff1aSopenharmony_ci}
1091cabdff1aSopenharmony_ci
1092cabdff1aSopenharmony_cistatic float get_band_numbits_UQUAD_mips(struct AACEncContext *s,
1093cabdff1aSopenharmony_ci                                         PutBitContext *pb, const float *in,
1094cabdff1aSopenharmony_ci                                         const float *scaled, int size, int scale_idx,
1095cabdff1aSopenharmony_ci                                         int cb, const float lambda, const float uplim,
1096cabdff1aSopenharmony_ci                                         int *bits)
1097cabdff1aSopenharmony_ci{
1098cabdff1aSopenharmony_ci    const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1099cabdff1aSopenharmony_ci    int i;
1100cabdff1aSopenharmony_ci    int curbits = 0;
1101cabdff1aSopenharmony_ci    int qc1, qc2, qc3, qc4;
1102cabdff1aSopenharmony_ci
1103cabdff1aSopenharmony_ci    uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1104cabdff1aSopenharmony_ci
1105cabdff1aSopenharmony_ci    for (i = 0; i < size; i += 4) {
1106cabdff1aSopenharmony_ci        int curidx;
1107cabdff1aSopenharmony_ci        int t0, t1, t2, t3, t4;
1108cabdff1aSopenharmony_ci
1109cabdff1aSopenharmony_ci        qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;
1110cabdff1aSopenharmony_ci        qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1111cabdff1aSopenharmony_ci        qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1112cabdff1aSopenharmony_ci        qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1113cabdff1aSopenharmony_ci
1114cabdff1aSopenharmony_ci        __asm__ volatile (
1115cabdff1aSopenharmony_ci            ".set push                      \n\t"
1116cabdff1aSopenharmony_ci            ".set noreorder                 \n\t"
1117cabdff1aSopenharmony_ci
1118cabdff1aSopenharmony_ci            "ori    %[t4],  $zero,  2       \n\t"
1119cabdff1aSopenharmony_ci            "slt    %[t0],  %[t4],  %[qc1]  \n\t"
1120cabdff1aSopenharmony_ci            "slt    %[t1],  %[t4],  %[qc2]  \n\t"
1121cabdff1aSopenharmony_ci            "slt    %[t2],  %[t4],  %[qc3]  \n\t"
1122cabdff1aSopenharmony_ci            "slt    %[t3],  %[t4],  %[qc4]  \n\t"
1123cabdff1aSopenharmony_ci            "movn   %[qc1], %[t4],  %[t0]   \n\t"
1124cabdff1aSopenharmony_ci            "movn   %[qc2], %[t4],  %[t1]   \n\t"
1125cabdff1aSopenharmony_ci            "movn   %[qc3], %[t4],  %[t2]   \n\t"
1126cabdff1aSopenharmony_ci            "movn   %[qc4], %[t4],  %[t3]   \n\t"
1127cabdff1aSopenharmony_ci
1128cabdff1aSopenharmony_ci            ".set pop                       \n\t"
1129cabdff1aSopenharmony_ci
1130cabdff1aSopenharmony_ci            : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1131cabdff1aSopenharmony_ci              [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1132cabdff1aSopenharmony_ci              [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1133cabdff1aSopenharmony_ci              [t4]"=&r"(t4)
1134cabdff1aSopenharmony_ci        );
1135cabdff1aSopenharmony_ci
1136cabdff1aSopenharmony_ci        curidx = qc1;
1137cabdff1aSopenharmony_ci        curidx *= 3;
1138cabdff1aSopenharmony_ci        curidx += qc2;
1139cabdff1aSopenharmony_ci        curidx *= 3;
1140cabdff1aSopenharmony_ci        curidx += qc3;
1141cabdff1aSopenharmony_ci        curidx *= 3;
1142cabdff1aSopenharmony_ci        curidx += qc4;
1143cabdff1aSopenharmony_ci
1144cabdff1aSopenharmony_ci        curbits += p_bits[curidx];
1145cabdff1aSopenharmony_ci        curbits += uquad_sign_bits[curidx];
1146cabdff1aSopenharmony_ci    }
1147cabdff1aSopenharmony_ci    return curbits;
1148cabdff1aSopenharmony_ci}
1149cabdff1aSopenharmony_ci
1150cabdff1aSopenharmony_cistatic float get_band_numbits_SPAIR_mips(struct AACEncContext *s,
1151cabdff1aSopenharmony_ci                                         PutBitContext *pb, const float *in,
1152cabdff1aSopenharmony_ci                                         const float *scaled, int size, int scale_idx,
1153cabdff1aSopenharmony_ci                                         int cb, const float lambda, const float uplim,
1154cabdff1aSopenharmony_ci                                         int *bits)
1155cabdff1aSopenharmony_ci{
1156cabdff1aSopenharmony_ci    const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1157cabdff1aSopenharmony_ci    int i;
1158cabdff1aSopenharmony_ci    int qc1, qc2, qc3, qc4;
1159cabdff1aSopenharmony_ci    int curbits = 0;
1160cabdff1aSopenharmony_ci
1161cabdff1aSopenharmony_ci    uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
1162cabdff1aSopenharmony_ci
1163cabdff1aSopenharmony_ci    for (i = 0; i < size; i += 4) {
1164cabdff1aSopenharmony_ci        int curidx, curidx2;
1165cabdff1aSopenharmony_ci        int *in_int = (int *)&in[i];
1166cabdff1aSopenharmony_ci        int t0, t1, t2, t3, t4, t5, t6, t7;
1167cabdff1aSopenharmony_ci
1168cabdff1aSopenharmony_ci        qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;
1169cabdff1aSopenharmony_ci        qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1170cabdff1aSopenharmony_ci        qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1171cabdff1aSopenharmony_ci        qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1172cabdff1aSopenharmony_ci
1173cabdff1aSopenharmony_ci        __asm__ volatile (
1174cabdff1aSopenharmony_ci            ".set push                      \n\t"
1175cabdff1aSopenharmony_ci            ".set noreorder                 \n\t"
1176cabdff1aSopenharmony_ci
1177cabdff1aSopenharmony_ci            "ori    %[t4],  $zero,  4       \n\t"
1178cabdff1aSopenharmony_ci            "slt    %[t0],  %[t4],  %[qc1]  \n\t"
1179cabdff1aSopenharmony_ci            "slt    %[t1],  %[t4],  %[qc2]  \n\t"
1180cabdff1aSopenharmony_ci            "slt    %[t2],  %[t4],  %[qc3]  \n\t"
1181cabdff1aSopenharmony_ci            "slt    %[t3],  %[t4],  %[qc4]  \n\t"
1182cabdff1aSopenharmony_ci            "movn   %[qc1], %[t4],  %[t0]   \n\t"
1183cabdff1aSopenharmony_ci            "movn   %[qc2], %[t4],  %[t1]   \n\t"
1184cabdff1aSopenharmony_ci            "movn   %[qc3], %[t4],  %[t2]   \n\t"
1185cabdff1aSopenharmony_ci            "movn   %[qc4], %[t4],  %[t3]   \n\t"
1186cabdff1aSopenharmony_ci            "lw     %[t0],  0(%[in_int])    \n\t"
1187cabdff1aSopenharmony_ci            "lw     %[t1],  4(%[in_int])    \n\t"
1188cabdff1aSopenharmony_ci            "lw     %[t2],  8(%[in_int])    \n\t"
1189cabdff1aSopenharmony_ci            "lw     %[t3],  12(%[in_int])   \n\t"
1190cabdff1aSopenharmony_ci            "srl    %[t0],  %[t0],  31      \n\t"
1191cabdff1aSopenharmony_ci            "srl    %[t1],  %[t1],  31      \n\t"
1192cabdff1aSopenharmony_ci            "srl    %[t2],  %[t2],  31      \n\t"
1193cabdff1aSopenharmony_ci            "srl    %[t3],  %[t3],  31      \n\t"
1194cabdff1aSopenharmony_ci            "subu   %[t4],  $zero,  %[qc1]  \n\t"
1195cabdff1aSopenharmony_ci            "subu   %[t5],  $zero,  %[qc2]  \n\t"
1196cabdff1aSopenharmony_ci            "subu   %[t6],  $zero,  %[qc3]  \n\t"
1197cabdff1aSopenharmony_ci            "subu   %[t7],  $zero,  %[qc4]  \n\t"
1198cabdff1aSopenharmony_ci            "movn   %[qc1], %[t4],  %[t0]   \n\t"
1199cabdff1aSopenharmony_ci            "movn   %[qc2], %[t5],  %[t1]   \n\t"
1200cabdff1aSopenharmony_ci            "movn   %[qc3], %[t6],  %[t2]   \n\t"
1201cabdff1aSopenharmony_ci            "movn   %[qc4], %[t7],  %[t3]   \n\t"
1202cabdff1aSopenharmony_ci
1203cabdff1aSopenharmony_ci            ".set pop                       \n\t"
1204cabdff1aSopenharmony_ci
1205cabdff1aSopenharmony_ci            : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1206cabdff1aSopenharmony_ci              [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1207cabdff1aSopenharmony_ci              [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1208cabdff1aSopenharmony_ci              [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
1209cabdff1aSopenharmony_ci            : [in_int]"r"(in_int)
1210cabdff1aSopenharmony_ci            : "memory"
1211cabdff1aSopenharmony_ci        );
1212cabdff1aSopenharmony_ci
1213cabdff1aSopenharmony_ci        curidx  = 9 * qc1;
1214cabdff1aSopenharmony_ci        curidx += qc2 + 40;
1215cabdff1aSopenharmony_ci
1216cabdff1aSopenharmony_ci        curidx2  = 9 * qc3;
1217cabdff1aSopenharmony_ci        curidx2 += qc4 + 40;
1218cabdff1aSopenharmony_ci
1219cabdff1aSopenharmony_ci        curbits += p_bits[curidx] + p_bits[curidx2];
1220cabdff1aSopenharmony_ci    }
1221cabdff1aSopenharmony_ci    return curbits;
1222cabdff1aSopenharmony_ci}
1223cabdff1aSopenharmony_ci
1224cabdff1aSopenharmony_cistatic float get_band_numbits_UPAIR7_mips(struct AACEncContext *s,
1225cabdff1aSopenharmony_ci                                          PutBitContext *pb, const float *in,
1226cabdff1aSopenharmony_ci                                          const float *scaled, int size, int scale_idx,
1227cabdff1aSopenharmony_ci                                          int cb, const float lambda, const float uplim,
1228cabdff1aSopenharmony_ci                                          int *bits)
1229cabdff1aSopenharmony_ci{
1230cabdff1aSopenharmony_ci    const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1231cabdff1aSopenharmony_ci    int i;
1232cabdff1aSopenharmony_ci    int qc1, qc2, qc3, qc4;
1233cabdff1aSopenharmony_ci    int curbits = 0;
1234cabdff1aSopenharmony_ci
1235cabdff1aSopenharmony_ci    uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1236cabdff1aSopenharmony_ci
1237cabdff1aSopenharmony_ci    for (i = 0; i < size; i += 4) {
1238cabdff1aSopenharmony_ci        int curidx, curidx2;
1239cabdff1aSopenharmony_ci        int t0, t1, t2, t3, t4;
1240cabdff1aSopenharmony_ci
1241cabdff1aSopenharmony_ci        qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;
1242cabdff1aSopenharmony_ci        qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1243cabdff1aSopenharmony_ci        qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1244cabdff1aSopenharmony_ci        qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1245cabdff1aSopenharmony_ci
1246cabdff1aSopenharmony_ci        __asm__ volatile (
1247cabdff1aSopenharmony_ci            ".set push                      \n\t"
1248cabdff1aSopenharmony_ci            ".set noreorder                 \n\t"
1249cabdff1aSopenharmony_ci
1250cabdff1aSopenharmony_ci            "ori    %[t4],  $zero,  7       \n\t"
1251cabdff1aSopenharmony_ci            "slt    %[t0],  %[t4],  %[qc1]  \n\t"
1252cabdff1aSopenharmony_ci            "slt    %[t1],  %[t4],  %[qc2]  \n\t"
1253cabdff1aSopenharmony_ci            "slt    %[t2],  %[t4],  %[qc3]  \n\t"
1254cabdff1aSopenharmony_ci            "slt    %[t3],  %[t4],  %[qc4]  \n\t"
1255cabdff1aSopenharmony_ci            "movn   %[qc1], %[t4],  %[t0]   \n\t"
1256cabdff1aSopenharmony_ci            "movn   %[qc2], %[t4],  %[t1]   \n\t"
1257cabdff1aSopenharmony_ci            "movn   %[qc3], %[t4],  %[t2]   \n\t"
1258cabdff1aSopenharmony_ci            "movn   %[qc4], %[t4],  %[t3]   \n\t"
1259cabdff1aSopenharmony_ci
1260cabdff1aSopenharmony_ci            ".set pop                       \n\t"
1261cabdff1aSopenharmony_ci
1262cabdff1aSopenharmony_ci            : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1263cabdff1aSopenharmony_ci              [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1264cabdff1aSopenharmony_ci              [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1265cabdff1aSopenharmony_ci              [t4]"=&r"(t4)
1266cabdff1aSopenharmony_ci        );
1267cabdff1aSopenharmony_ci
1268cabdff1aSopenharmony_ci        curidx  = 8 * qc1;
1269cabdff1aSopenharmony_ci        curidx += qc2;
1270cabdff1aSopenharmony_ci
1271cabdff1aSopenharmony_ci        curidx2  = 8 * qc3;
1272cabdff1aSopenharmony_ci        curidx2 += qc4;
1273cabdff1aSopenharmony_ci
1274cabdff1aSopenharmony_ci        curbits += p_bits[curidx] +
1275cabdff1aSopenharmony_ci                   upair7_sign_bits[curidx] +
1276cabdff1aSopenharmony_ci                   p_bits[curidx2] +
1277cabdff1aSopenharmony_ci                   upair7_sign_bits[curidx2];
1278cabdff1aSopenharmony_ci    }
1279cabdff1aSopenharmony_ci    return curbits;
1280cabdff1aSopenharmony_ci}
1281cabdff1aSopenharmony_ci
1282cabdff1aSopenharmony_cistatic float get_band_numbits_UPAIR12_mips(struct AACEncContext *s,
1283cabdff1aSopenharmony_ci                                           PutBitContext *pb, const float *in,
1284cabdff1aSopenharmony_ci                                           const float *scaled, int size, int scale_idx,
1285cabdff1aSopenharmony_ci                                           int cb, const float lambda, const float uplim,
1286cabdff1aSopenharmony_ci                                           int *bits)
1287cabdff1aSopenharmony_ci{
1288cabdff1aSopenharmony_ci    const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1289cabdff1aSopenharmony_ci    int i;
1290cabdff1aSopenharmony_ci    int qc1, qc2, qc3, qc4;
1291cabdff1aSopenharmony_ci    int curbits = 0;
1292cabdff1aSopenharmony_ci
1293cabdff1aSopenharmony_ci    uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1294cabdff1aSopenharmony_ci
1295cabdff1aSopenharmony_ci    for (i = 0; i < size; i += 4) {
1296cabdff1aSopenharmony_ci        int curidx, curidx2;
1297cabdff1aSopenharmony_ci        int t0, t1, t2, t3, t4;
1298cabdff1aSopenharmony_ci
1299cabdff1aSopenharmony_ci        qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;
1300cabdff1aSopenharmony_ci        qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1301cabdff1aSopenharmony_ci        qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1302cabdff1aSopenharmony_ci        qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1303cabdff1aSopenharmony_ci
1304cabdff1aSopenharmony_ci        __asm__ volatile (
1305cabdff1aSopenharmony_ci            ".set push                      \n\t"
1306cabdff1aSopenharmony_ci            ".set noreorder                 \n\t"
1307cabdff1aSopenharmony_ci
1308cabdff1aSopenharmony_ci            "ori    %[t4],  $zero,  12      \n\t"
1309cabdff1aSopenharmony_ci            "slt    %[t0],  %[t4],  %[qc1]  \n\t"
1310cabdff1aSopenharmony_ci            "slt    %[t1],  %[t4],  %[qc2]  \n\t"
1311cabdff1aSopenharmony_ci            "slt    %[t2],  %[t4],  %[qc3]  \n\t"
1312cabdff1aSopenharmony_ci            "slt    %[t3],  %[t4],  %[qc4]  \n\t"
1313cabdff1aSopenharmony_ci            "movn   %[qc1], %[t4],  %[t0]   \n\t"
1314cabdff1aSopenharmony_ci            "movn   %[qc2], %[t4],  %[t1]   \n\t"
1315cabdff1aSopenharmony_ci            "movn   %[qc3], %[t4],  %[t2]   \n\t"
1316cabdff1aSopenharmony_ci            "movn   %[qc4], %[t4],  %[t3]   \n\t"
1317cabdff1aSopenharmony_ci
1318cabdff1aSopenharmony_ci            ".set pop                       \n\t"
1319cabdff1aSopenharmony_ci
1320cabdff1aSopenharmony_ci            : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1321cabdff1aSopenharmony_ci              [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1322cabdff1aSopenharmony_ci              [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1323cabdff1aSopenharmony_ci              [t4]"=&r"(t4)
1324cabdff1aSopenharmony_ci        );
1325cabdff1aSopenharmony_ci
1326cabdff1aSopenharmony_ci        curidx  = 13 * qc1;
1327cabdff1aSopenharmony_ci        curidx += qc2;
1328cabdff1aSopenharmony_ci
1329cabdff1aSopenharmony_ci        curidx2  = 13 * qc3;
1330cabdff1aSopenharmony_ci        curidx2 += qc4;
1331cabdff1aSopenharmony_ci
1332cabdff1aSopenharmony_ci        curbits += p_bits[curidx] +
1333cabdff1aSopenharmony_ci                   p_bits[curidx2] +
1334cabdff1aSopenharmony_ci                   upair12_sign_bits[curidx] +
1335cabdff1aSopenharmony_ci                   upair12_sign_bits[curidx2];
1336cabdff1aSopenharmony_ci    }
1337cabdff1aSopenharmony_ci    return curbits;
1338cabdff1aSopenharmony_ci}
1339cabdff1aSopenharmony_ci
1340cabdff1aSopenharmony_cistatic float get_band_numbits_ESC_mips(struct AACEncContext *s,
1341cabdff1aSopenharmony_ci                                       PutBitContext *pb, const float *in,
1342cabdff1aSopenharmony_ci                                       const float *scaled, int size, int scale_idx,
1343cabdff1aSopenharmony_ci                                       int cb, const float lambda, const float uplim,
1344cabdff1aSopenharmony_ci                                       int *bits)
1345cabdff1aSopenharmony_ci{
1346cabdff1aSopenharmony_ci    const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1347cabdff1aSopenharmony_ci    int i;
1348cabdff1aSopenharmony_ci    int qc1, qc2, qc3, qc4;
1349cabdff1aSopenharmony_ci    int curbits = 0;
1350cabdff1aSopenharmony_ci
1351cabdff1aSopenharmony_ci    uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
1352cabdff1aSopenharmony_ci
1353cabdff1aSopenharmony_ci    for (i = 0; i < size; i += 4) {
1354cabdff1aSopenharmony_ci        int curidx, curidx2;
1355cabdff1aSopenharmony_ci        int cond0, cond1, cond2, cond3;
1356cabdff1aSopenharmony_ci        int c1, c2, c3, c4;
1357cabdff1aSopenharmony_ci        int t4, t5;
1358cabdff1aSopenharmony_ci
1359cabdff1aSopenharmony_ci        qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;
1360cabdff1aSopenharmony_ci        qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1361cabdff1aSopenharmony_ci        qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1362cabdff1aSopenharmony_ci        qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1363cabdff1aSopenharmony_ci
1364cabdff1aSopenharmony_ci        __asm__ volatile (
1365cabdff1aSopenharmony_ci            ".set push                                  \n\t"
1366cabdff1aSopenharmony_ci            ".set noreorder                             \n\t"
1367cabdff1aSopenharmony_ci
1368cabdff1aSopenharmony_ci            "ori        %[t4],      $zero,  15          \n\t"
1369cabdff1aSopenharmony_ci            "ori        %[t5],      $zero,  16          \n\t"
1370cabdff1aSopenharmony_ci            "shll_s.w   %[c1],      %[qc1], 18          \n\t"
1371cabdff1aSopenharmony_ci            "shll_s.w   %[c2],      %[qc2], 18          \n\t"
1372cabdff1aSopenharmony_ci            "shll_s.w   %[c3],      %[qc3], 18          \n\t"
1373cabdff1aSopenharmony_ci            "shll_s.w   %[c4],      %[qc4], 18          \n\t"
1374cabdff1aSopenharmony_ci            "srl        %[c1],      %[c1],  18          \n\t"
1375cabdff1aSopenharmony_ci            "srl        %[c2],      %[c2],  18          \n\t"
1376cabdff1aSopenharmony_ci            "srl        %[c3],      %[c3],  18          \n\t"
1377cabdff1aSopenharmony_ci            "srl        %[c4],      %[c4],  18          \n\t"
1378cabdff1aSopenharmony_ci            "slt        %[cond0],   %[t4],  %[qc1]      \n\t"
1379cabdff1aSopenharmony_ci            "slt        %[cond1],   %[t4],  %[qc2]      \n\t"
1380cabdff1aSopenharmony_ci            "slt        %[cond2],   %[t4],  %[qc3]      \n\t"
1381cabdff1aSopenharmony_ci            "slt        %[cond3],   %[t4],  %[qc4]      \n\t"
1382cabdff1aSopenharmony_ci            "movn       %[qc1],     %[t5],  %[cond0]    \n\t"
1383cabdff1aSopenharmony_ci            "movn       %[qc2],     %[t5],  %[cond1]    \n\t"
1384cabdff1aSopenharmony_ci            "movn       %[qc3],     %[t5],  %[cond2]    \n\t"
1385cabdff1aSopenharmony_ci            "movn       %[qc4],     %[t5],  %[cond3]    \n\t"
1386cabdff1aSopenharmony_ci            "ori        %[t5],      $zero,  31          \n\t"
1387cabdff1aSopenharmony_ci            "clz        %[c1],      %[c1]               \n\t"
1388cabdff1aSopenharmony_ci            "clz        %[c2],      %[c2]               \n\t"
1389cabdff1aSopenharmony_ci            "clz        %[c3],      %[c3]               \n\t"
1390cabdff1aSopenharmony_ci            "clz        %[c4],      %[c4]               \n\t"
1391cabdff1aSopenharmony_ci            "subu       %[c1],      %[t5],  %[c1]       \n\t"
1392cabdff1aSopenharmony_ci            "subu       %[c2],      %[t5],  %[c2]       \n\t"
1393cabdff1aSopenharmony_ci            "subu       %[c3],      %[t5],  %[c3]       \n\t"
1394cabdff1aSopenharmony_ci            "subu       %[c4],      %[t5],  %[c4]       \n\t"
1395cabdff1aSopenharmony_ci            "sll        %[c1],      %[c1],  1           \n\t"
1396cabdff1aSopenharmony_ci            "sll        %[c2],      %[c2],  1           \n\t"
1397cabdff1aSopenharmony_ci            "sll        %[c3],      %[c3],  1           \n\t"
1398cabdff1aSopenharmony_ci            "sll        %[c4],      %[c4],  1           \n\t"
1399cabdff1aSopenharmony_ci            "addiu      %[c1],      %[c1],  -3          \n\t"
1400cabdff1aSopenharmony_ci            "addiu      %[c2],      %[c2],  -3          \n\t"
1401cabdff1aSopenharmony_ci            "addiu      %[c3],      %[c3],  -3          \n\t"
1402cabdff1aSopenharmony_ci            "addiu      %[c4],      %[c4],  -3          \n\t"
1403cabdff1aSopenharmony_ci            "subu       %[cond0],   $zero,  %[cond0]    \n\t"
1404cabdff1aSopenharmony_ci            "subu       %[cond1],   $zero,  %[cond1]    \n\t"
1405cabdff1aSopenharmony_ci            "subu       %[cond2],   $zero,  %[cond2]    \n\t"
1406cabdff1aSopenharmony_ci            "subu       %[cond3],   $zero,  %[cond3]    \n\t"
1407cabdff1aSopenharmony_ci            "and        %[c1],      %[c1],  %[cond0]    \n\t"
1408cabdff1aSopenharmony_ci            "and        %[c2],      %[c2],  %[cond1]    \n\t"
1409cabdff1aSopenharmony_ci            "and        %[c3],      %[c3],  %[cond2]    \n\t"
1410cabdff1aSopenharmony_ci            "and        %[c4],      %[c4],  %[cond3]    \n\t"
1411cabdff1aSopenharmony_ci
1412cabdff1aSopenharmony_ci            ".set pop                                   \n\t"
1413cabdff1aSopenharmony_ci
1414cabdff1aSopenharmony_ci            : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1415cabdff1aSopenharmony_ci              [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1416cabdff1aSopenharmony_ci              [cond0]"=&r"(cond0), [cond1]"=&r"(cond1),
1417cabdff1aSopenharmony_ci              [cond2]"=&r"(cond2), [cond3]"=&r"(cond3),
1418cabdff1aSopenharmony_ci              [c1]"=&r"(c1), [c2]"=&r"(c2),
1419cabdff1aSopenharmony_ci              [c3]"=&r"(c3), [c4]"=&r"(c4),
1420cabdff1aSopenharmony_ci              [t4]"=&r"(t4), [t5]"=&r"(t5)
1421cabdff1aSopenharmony_ci        );
1422cabdff1aSopenharmony_ci
1423cabdff1aSopenharmony_ci        curidx = 17 * qc1;
1424cabdff1aSopenharmony_ci        curidx += qc2;
1425cabdff1aSopenharmony_ci
1426cabdff1aSopenharmony_ci        curidx2 = 17 * qc3;
1427cabdff1aSopenharmony_ci        curidx2 += qc4;
1428cabdff1aSopenharmony_ci
1429cabdff1aSopenharmony_ci        curbits += p_bits[curidx];
1430cabdff1aSopenharmony_ci        curbits += esc_sign_bits[curidx];
1431cabdff1aSopenharmony_ci        curbits += p_bits[curidx2];
1432cabdff1aSopenharmony_ci        curbits += esc_sign_bits[curidx2];
1433cabdff1aSopenharmony_ci
1434cabdff1aSopenharmony_ci        curbits += c1;
1435cabdff1aSopenharmony_ci        curbits += c2;
1436cabdff1aSopenharmony_ci        curbits += c3;
1437cabdff1aSopenharmony_ci        curbits += c4;
1438cabdff1aSopenharmony_ci    }
1439cabdff1aSopenharmony_ci    return curbits;
1440cabdff1aSopenharmony_ci}
1441cabdff1aSopenharmony_ci
1442cabdff1aSopenharmony_cistatic float (*const get_band_numbits_arr[])(struct AACEncContext *s,
1443cabdff1aSopenharmony_ci                                             PutBitContext *pb, const float *in,
1444cabdff1aSopenharmony_ci                                             const float *scaled, int size, int scale_idx,
1445cabdff1aSopenharmony_ci                                             int cb, const float lambda, const float uplim,
1446cabdff1aSopenharmony_ci                                             int *bits) = {
1447cabdff1aSopenharmony_ci    get_band_numbits_ZERO_mips,
1448cabdff1aSopenharmony_ci    get_band_numbits_SQUAD_mips,
1449cabdff1aSopenharmony_ci    get_band_numbits_SQUAD_mips,
1450cabdff1aSopenharmony_ci    get_band_numbits_UQUAD_mips,
1451cabdff1aSopenharmony_ci    get_band_numbits_UQUAD_mips,
1452cabdff1aSopenharmony_ci    get_band_numbits_SPAIR_mips,
1453cabdff1aSopenharmony_ci    get_band_numbits_SPAIR_mips,
1454cabdff1aSopenharmony_ci    get_band_numbits_UPAIR7_mips,
1455cabdff1aSopenharmony_ci    get_band_numbits_UPAIR7_mips,
1456cabdff1aSopenharmony_ci    get_band_numbits_UPAIR12_mips,
1457cabdff1aSopenharmony_ci    get_band_numbits_UPAIR12_mips,
1458cabdff1aSopenharmony_ci    get_band_numbits_ESC_mips,
1459cabdff1aSopenharmony_ci    get_band_numbits_NONE_mips, /* cb 12 doesn't exist */
1460cabdff1aSopenharmony_ci    get_band_numbits_ZERO_mips,
1461cabdff1aSopenharmony_ci    get_band_numbits_ZERO_mips,
1462cabdff1aSopenharmony_ci    get_band_numbits_ZERO_mips,
1463cabdff1aSopenharmony_ci};
1464cabdff1aSopenharmony_ci
1465cabdff1aSopenharmony_ci#define get_band_numbits(                                  \
1466cabdff1aSopenharmony_ci                                s, pb, in, scaled, size, scale_idx, cb, \
1467cabdff1aSopenharmony_ci                                lambda, uplim, bits)                    \
1468cabdff1aSopenharmony_ci    get_band_numbits_arr[cb](                              \
1469cabdff1aSopenharmony_ci                                s, pb, in, scaled, size, scale_idx, cb, \
1470cabdff1aSopenharmony_ci                                lambda, uplim, bits)
1471cabdff1aSopenharmony_ci
1472cabdff1aSopenharmony_cistatic float quantize_band_cost_bits(struct AACEncContext *s, const float *in,
1473cabdff1aSopenharmony_ci                                     const float *scaled, int size, int scale_idx,
1474cabdff1aSopenharmony_ci                                     int cb, const float lambda, const float uplim,
1475cabdff1aSopenharmony_ci                                     int *bits, float *energy, int rtz)
1476cabdff1aSopenharmony_ci{
1477cabdff1aSopenharmony_ci    return get_band_numbits(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits);
1478cabdff1aSopenharmony_ci}
1479cabdff1aSopenharmony_ci
1480cabdff1aSopenharmony_ci/**
1481cabdff1aSopenharmony_ci * Functions developed from template function and optimized for getting the band cost
1482cabdff1aSopenharmony_ci */
1483cabdff1aSopenharmony_ci#if HAVE_MIPSFPU
1484cabdff1aSopenharmony_cistatic float get_band_cost_ZERO_mips(struct AACEncContext *s,
1485cabdff1aSopenharmony_ci                                     PutBitContext *pb, const float *in,
1486cabdff1aSopenharmony_ci                                     const float *scaled, int size, int scale_idx,
1487cabdff1aSopenharmony_ci                                     int cb, const float lambda, const float uplim,
1488cabdff1aSopenharmony_ci                                     int *bits, float *energy)
1489cabdff1aSopenharmony_ci{
1490cabdff1aSopenharmony_ci    int i;
1491cabdff1aSopenharmony_ci    float cost = 0;
1492cabdff1aSopenharmony_ci
1493cabdff1aSopenharmony_ci    for (i = 0; i < size; i += 4) {
1494cabdff1aSopenharmony_ci        cost += in[i  ] * in[i  ];
1495cabdff1aSopenharmony_ci        cost += in[i+1] * in[i+1];
1496cabdff1aSopenharmony_ci        cost += in[i+2] * in[i+2];
1497cabdff1aSopenharmony_ci        cost += in[i+3] * in[i+3];
1498cabdff1aSopenharmony_ci    }
1499cabdff1aSopenharmony_ci    if (bits)
1500cabdff1aSopenharmony_ci        *bits = 0;
1501cabdff1aSopenharmony_ci    if (energy)
1502cabdff1aSopenharmony_ci        *energy = 0.0f;
1503cabdff1aSopenharmony_ci    return cost * lambda;
1504cabdff1aSopenharmony_ci}
1505cabdff1aSopenharmony_ci
1506cabdff1aSopenharmony_cistatic float get_band_cost_NONE_mips(struct AACEncContext *s,
1507cabdff1aSopenharmony_ci                                     PutBitContext *pb, const float *in,
1508cabdff1aSopenharmony_ci                                     const float *scaled, int size, int scale_idx,
1509cabdff1aSopenharmony_ci                                     int cb, const float lambda, const float uplim,
1510cabdff1aSopenharmony_ci                                     int *bits, float *energy)
1511cabdff1aSopenharmony_ci{
1512cabdff1aSopenharmony_ci    av_assert0(0);
1513cabdff1aSopenharmony_ci    return 0;
1514cabdff1aSopenharmony_ci}
1515cabdff1aSopenharmony_ci
1516cabdff1aSopenharmony_cistatic float get_band_cost_SQUAD_mips(struct AACEncContext *s,
1517cabdff1aSopenharmony_ci                                      PutBitContext *pb, const float *in,
1518cabdff1aSopenharmony_ci                                      const float *scaled, int size, int scale_idx,
1519cabdff1aSopenharmony_ci                                      int cb, const float lambda, const float uplim,
1520cabdff1aSopenharmony_ci                                      int *bits, float *energy)
1521cabdff1aSopenharmony_ci{
1522cabdff1aSopenharmony_ci    const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1523cabdff1aSopenharmony_ci    const float IQ  = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
1524cabdff1aSopenharmony_ci    int i;
1525cabdff1aSopenharmony_ci    float cost = 0;
1526cabdff1aSopenharmony_ci    float qenergy = 0.0f;
1527cabdff1aSopenharmony_ci    int qc1, qc2, qc3, qc4;
1528cabdff1aSopenharmony_ci    int curbits = 0;
1529cabdff1aSopenharmony_ci
1530cabdff1aSopenharmony_ci    uint8_t *p_bits  = (uint8_t *)ff_aac_spectral_bits[cb-1];
1531cabdff1aSopenharmony_ci    float   *p_codes = (float   *)ff_aac_codebook_vectors[cb-1];
1532cabdff1aSopenharmony_ci
1533cabdff1aSopenharmony_ci    for (i = 0; i < size; i += 4) {
1534cabdff1aSopenharmony_ci        const float *vec;
1535cabdff1aSopenharmony_ci        int curidx;
1536cabdff1aSopenharmony_ci        int   *in_int = (int   *)&in[i];
1537cabdff1aSopenharmony_ci        float *in_pos = (float *)&in[i];
1538cabdff1aSopenharmony_ci        float di0, di1, di2, di3;
1539cabdff1aSopenharmony_ci        int t0, t1, t2, t3, t4, t5, t6, t7;
1540cabdff1aSopenharmony_ci
1541cabdff1aSopenharmony_ci        qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;
1542cabdff1aSopenharmony_ci        qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1543cabdff1aSopenharmony_ci        qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1544cabdff1aSopenharmony_ci        qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1545cabdff1aSopenharmony_ci
1546cabdff1aSopenharmony_ci        __asm__ volatile (
1547cabdff1aSopenharmony_ci            ".set push                                  \n\t"
1548cabdff1aSopenharmony_ci            ".set noreorder                             \n\t"
1549cabdff1aSopenharmony_ci
1550cabdff1aSopenharmony_ci            "slt        %[qc1], $zero,  %[qc1]          \n\t"
1551cabdff1aSopenharmony_ci            "slt        %[qc2], $zero,  %[qc2]          \n\t"
1552cabdff1aSopenharmony_ci            "slt        %[qc3], $zero,  %[qc3]          \n\t"
1553cabdff1aSopenharmony_ci            "slt        %[qc4], $zero,  %[qc4]          \n\t"
1554cabdff1aSopenharmony_ci            "lw         %[t0],  0(%[in_int])            \n\t"
1555cabdff1aSopenharmony_ci            "lw         %[t1],  4(%[in_int])            \n\t"
1556cabdff1aSopenharmony_ci            "lw         %[t2],  8(%[in_int])            \n\t"
1557cabdff1aSopenharmony_ci            "lw         %[t3],  12(%[in_int])           \n\t"
1558cabdff1aSopenharmony_ci            "srl        %[t0],  %[t0],  31              \n\t"
1559cabdff1aSopenharmony_ci            "srl        %[t1],  %[t1],  31              \n\t"
1560cabdff1aSopenharmony_ci            "srl        %[t2],  %[t2],  31              \n\t"
1561cabdff1aSopenharmony_ci            "srl        %[t3],  %[t3],  31              \n\t"
1562cabdff1aSopenharmony_ci            "subu       %[t4],  $zero,  %[qc1]          \n\t"
1563cabdff1aSopenharmony_ci            "subu       %[t5],  $zero,  %[qc2]          \n\t"
1564cabdff1aSopenharmony_ci            "subu       %[t6],  $zero,  %[qc3]          \n\t"
1565cabdff1aSopenharmony_ci            "subu       %[t7],  $zero,  %[qc4]          \n\t"
1566cabdff1aSopenharmony_ci            "movn       %[qc1], %[t4],  %[t0]           \n\t"
1567cabdff1aSopenharmony_ci            "movn       %[qc2], %[t5],  %[t1]           \n\t"
1568cabdff1aSopenharmony_ci            "movn       %[qc3], %[t6],  %[t2]           \n\t"
1569cabdff1aSopenharmony_ci            "movn       %[qc4], %[t7],  %[t3]           \n\t"
1570cabdff1aSopenharmony_ci
1571cabdff1aSopenharmony_ci            ".set pop                                   \n\t"
1572cabdff1aSopenharmony_ci
1573cabdff1aSopenharmony_ci            : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1574cabdff1aSopenharmony_ci              [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1575cabdff1aSopenharmony_ci              [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1576cabdff1aSopenharmony_ci              [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
1577cabdff1aSopenharmony_ci            : [in_int]"r"(in_int)
1578cabdff1aSopenharmony_ci            : "memory"
1579cabdff1aSopenharmony_ci        );
1580cabdff1aSopenharmony_ci
1581cabdff1aSopenharmony_ci        curidx = qc1;
1582cabdff1aSopenharmony_ci        curidx *= 3;
1583cabdff1aSopenharmony_ci        curidx += qc2;
1584cabdff1aSopenharmony_ci        curidx *= 3;
1585cabdff1aSopenharmony_ci        curidx += qc3;
1586cabdff1aSopenharmony_ci        curidx *= 3;
1587cabdff1aSopenharmony_ci        curidx += qc4;
1588cabdff1aSopenharmony_ci        curidx += 40;
1589cabdff1aSopenharmony_ci
1590cabdff1aSopenharmony_ci        curbits += p_bits[curidx];
1591cabdff1aSopenharmony_ci        vec     = &p_codes[curidx*4];
1592cabdff1aSopenharmony_ci
1593cabdff1aSopenharmony_ci        qenergy += vec[0]*vec[0] + vec[1]*vec[1]
1594cabdff1aSopenharmony_ci                +  vec[2]*vec[2] + vec[3]*vec[3];
1595cabdff1aSopenharmony_ci
1596cabdff1aSopenharmony_ci        __asm__ volatile (
1597cabdff1aSopenharmony_ci            ".set push                                  \n\t"
1598cabdff1aSopenharmony_ci            ".set noreorder                             \n\t"
1599cabdff1aSopenharmony_ci
1600cabdff1aSopenharmony_ci            "lwc1       $f0,    0(%[in_pos])            \n\t"
1601cabdff1aSopenharmony_ci            "lwc1       $f1,    0(%[vec])               \n\t"
1602cabdff1aSopenharmony_ci            "lwc1       $f2,    4(%[in_pos])            \n\t"
1603cabdff1aSopenharmony_ci            "lwc1       $f3,    4(%[vec])               \n\t"
1604cabdff1aSopenharmony_ci            "lwc1       $f4,    8(%[in_pos])            \n\t"
1605cabdff1aSopenharmony_ci            "lwc1       $f5,    8(%[vec])               \n\t"
1606cabdff1aSopenharmony_ci            "lwc1       $f6,    12(%[in_pos])           \n\t"
1607cabdff1aSopenharmony_ci            "lwc1       $f7,    12(%[vec])              \n\t"
1608cabdff1aSopenharmony_ci            "nmsub.s    %[di0], $f0,    $f1,    %[IQ]   \n\t"
1609cabdff1aSopenharmony_ci            "nmsub.s    %[di1], $f2,    $f3,    %[IQ]   \n\t"
1610cabdff1aSopenharmony_ci            "nmsub.s    %[di2], $f4,    $f5,    %[IQ]   \n\t"
1611cabdff1aSopenharmony_ci            "nmsub.s    %[di3], $f6,    $f7,    %[IQ]   \n\t"
1612cabdff1aSopenharmony_ci
1613cabdff1aSopenharmony_ci            ".set pop                                   \n\t"
1614cabdff1aSopenharmony_ci
1615cabdff1aSopenharmony_ci            : [di0]"=&f"(di0), [di1]"=&f"(di1),
1616cabdff1aSopenharmony_ci              [di2]"=&f"(di2), [di3]"=&f"(di3)
1617cabdff1aSopenharmony_ci            : [in_pos]"r"(in_pos), [vec]"r"(vec),
1618cabdff1aSopenharmony_ci              [IQ]"f"(IQ)
1619cabdff1aSopenharmony_ci            : "$f0", "$f1", "$f2", "$f3",
1620cabdff1aSopenharmony_ci              "$f4", "$f5", "$f6", "$f7",
1621cabdff1aSopenharmony_ci              "memory"
1622cabdff1aSopenharmony_ci        );
1623cabdff1aSopenharmony_ci
1624cabdff1aSopenharmony_ci        cost += di0 * di0 + di1 * di1
1625cabdff1aSopenharmony_ci                + di2 * di2 + di3 * di3;
1626cabdff1aSopenharmony_ci    }
1627cabdff1aSopenharmony_ci
1628cabdff1aSopenharmony_ci    if (bits)
1629cabdff1aSopenharmony_ci        *bits = curbits;
1630cabdff1aSopenharmony_ci    if (energy)
1631cabdff1aSopenharmony_ci        *energy = qenergy * (IQ*IQ);
1632cabdff1aSopenharmony_ci    return cost * lambda + curbits;
1633cabdff1aSopenharmony_ci}
1634cabdff1aSopenharmony_ci
1635cabdff1aSopenharmony_cistatic float get_band_cost_UQUAD_mips(struct AACEncContext *s,
1636cabdff1aSopenharmony_ci                                      PutBitContext *pb, const float *in,
1637cabdff1aSopenharmony_ci                                      const float *scaled, int size, int scale_idx,
1638cabdff1aSopenharmony_ci                                      int cb, const float lambda, const float uplim,
1639cabdff1aSopenharmony_ci                                      int *bits, float *energy)
1640cabdff1aSopenharmony_ci{
1641cabdff1aSopenharmony_ci    const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1642cabdff1aSopenharmony_ci    const float IQ  = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
1643cabdff1aSopenharmony_ci    int i;
1644cabdff1aSopenharmony_ci    float cost = 0;
1645cabdff1aSopenharmony_ci    float qenergy = 0.0f;
1646cabdff1aSopenharmony_ci    int curbits = 0;
1647cabdff1aSopenharmony_ci    int qc1, qc2, qc3, qc4;
1648cabdff1aSopenharmony_ci
1649cabdff1aSopenharmony_ci    uint8_t *p_bits  = (uint8_t*)ff_aac_spectral_bits[cb-1];
1650cabdff1aSopenharmony_ci    float   *p_codes = (float  *)ff_aac_codebook_vectors[cb-1];
1651cabdff1aSopenharmony_ci
1652cabdff1aSopenharmony_ci    for (i = 0; i < size; i += 4) {
1653cabdff1aSopenharmony_ci        const float *vec;
1654cabdff1aSopenharmony_ci        int curidx;
1655cabdff1aSopenharmony_ci        float *in_pos = (float *)&in[i];
1656cabdff1aSopenharmony_ci        float di0, di1, di2, di3;
1657cabdff1aSopenharmony_ci        int t0, t1, t2, t3, t4;
1658cabdff1aSopenharmony_ci
1659cabdff1aSopenharmony_ci        qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;
1660cabdff1aSopenharmony_ci        qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1661cabdff1aSopenharmony_ci        qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1662cabdff1aSopenharmony_ci        qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1663cabdff1aSopenharmony_ci
1664cabdff1aSopenharmony_ci        __asm__ volatile (
1665cabdff1aSopenharmony_ci            ".set push                                  \n\t"
1666cabdff1aSopenharmony_ci            ".set noreorder                             \n\t"
1667cabdff1aSopenharmony_ci
1668cabdff1aSopenharmony_ci            "ori        %[t4],  $zero,  2               \n\t"
1669cabdff1aSopenharmony_ci            "slt        %[t0],  %[t4],  %[qc1]          \n\t"
1670cabdff1aSopenharmony_ci            "slt        %[t1],  %[t4],  %[qc2]          \n\t"
1671cabdff1aSopenharmony_ci            "slt        %[t2],  %[t4],  %[qc3]          \n\t"
1672cabdff1aSopenharmony_ci            "slt        %[t3],  %[t4],  %[qc4]          \n\t"
1673cabdff1aSopenharmony_ci            "movn       %[qc1], %[t4],  %[t0]           \n\t"
1674cabdff1aSopenharmony_ci            "movn       %[qc2], %[t4],  %[t1]           \n\t"
1675cabdff1aSopenharmony_ci            "movn       %[qc3], %[t4],  %[t2]           \n\t"
1676cabdff1aSopenharmony_ci            "movn       %[qc4], %[t4],  %[t3]           \n\t"
1677cabdff1aSopenharmony_ci
1678cabdff1aSopenharmony_ci            ".set pop                                   \n\t"
1679cabdff1aSopenharmony_ci
1680cabdff1aSopenharmony_ci            : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1681cabdff1aSopenharmony_ci              [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1682cabdff1aSopenharmony_ci              [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1683cabdff1aSopenharmony_ci              [t4]"=&r"(t4)
1684cabdff1aSopenharmony_ci        );
1685cabdff1aSopenharmony_ci
1686cabdff1aSopenharmony_ci        curidx = qc1;
1687cabdff1aSopenharmony_ci        curidx *= 3;
1688cabdff1aSopenharmony_ci        curidx += qc2;
1689cabdff1aSopenharmony_ci        curidx *= 3;
1690cabdff1aSopenharmony_ci        curidx += qc3;
1691cabdff1aSopenharmony_ci        curidx *= 3;
1692cabdff1aSopenharmony_ci        curidx += qc4;
1693cabdff1aSopenharmony_ci
1694cabdff1aSopenharmony_ci        curbits += p_bits[curidx];
1695cabdff1aSopenharmony_ci        curbits += uquad_sign_bits[curidx];
1696cabdff1aSopenharmony_ci        vec     = &p_codes[curidx*4];
1697cabdff1aSopenharmony_ci
1698cabdff1aSopenharmony_ci        qenergy += vec[0]*vec[0] + vec[1]*vec[1]
1699cabdff1aSopenharmony_ci                +  vec[2]*vec[2] + vec[3]*vec[3];
1700cabdff1aSopenharmony_ci
1701cabdff1aSopenharmony_ci        __asm__ volatile (
1702cabdff1aSopenharmony_ci            ".set push                                  \n\t"
1703cabdff1aSopenharmony_ci            ".set noreorder                             \n\t"
1704cabdff1aSopenharmony_ci
1705cabdff1aSopenharmony_ci            "lwc1       %[di0], 0(%[in_pos])            \n\t"
1706cabdff1aSopenharmony_ci            "lwc1       %[di1], 4(%[in_pos])            \n\t"
1707cabdff1aSopenharmony_ci            "lwc1       %[di2], 8(%[in_pos])            \n\t"
1708cabdff1aSopenharmony_ci            "lwc1       %[di3], 12(%[in_pos])           \n\t"
1709cabdff1aSopenharmony_ci            "abs.s      %[di0], %[di0]                  \n\t"
1710cabdff1aSopenharmony_ci            "abs.s      %[di1], %[di1]                  \n\t"
1711cabdff1aSopenharmony_ci            "abs.s      %[di2], %[di2]                  \n\t"
1712cabdff1aSopenharmony_ci            "abs.s      %[di3], %[di3]                  \n\t"
1713cabdff1aSopenharmony_ci            "lwc1       $f0,    0(%[vec])               \n\t"
1714cabdff1aSopenharmony_ci            "lwc1       $f1,    4(%[vec])               \n\t"
1715cabdff1aSopenharmony_ci            "lwc1       $f2,    8(%[vec])               \n\t"
1716cabdff1aSopenharmony_ci            "lwc1       $f3,    12(%[vec])              \n\t"
1717cabdff1aSopenharmony_ci            "nmsub.s    %[di0], %[di0], $f0,    %[IQ]   \n\t"
1718cabdff1aSopenharmony_ci            "nmsub.s    %[di1], %[di1], $f1,    %[IQ]   \n\t"
1719cabdff1aSopenharmony_ci            "nmsub.s    %[di2], %[di2], $f2,    %[IQ]   \n\t"
1720cabdff1aSopenharmony_ci            "nmsub.s    %[di3], %[di3], $f3,    %[IQ]   \n\t"
1721cabdff1aSopenharmony_ci
1722cabdff1aSopenharmony_ci            ".set pop                                   \n\t"
1723cabdff1aSopenharmony_ci
1724cabdff1aSopenharmony_ci            : [di0]"=&f"(di0), [di1]"=&f"(di1),
1725cabdff1aSopenharmony_ci              [di2]"=&f"(di2), [di3]"=&f"(di3)
1726cabdff1aSopenharmony_ci            : [in_pos]"r"(in_pos), [vec]"r"(vec),
1727cabdff1aSopenharmony_ci              [IQ]"f"(IQ)
1728cabdff1aSopenharmony_ci            : "$f0", "$f1", "$f2", "$f3",
1729cabdff1aSopenharmony_ci              "memory"
1730cabdff1aSopenharmony_ci        );
1731cabdff1aSopenharmony_ci
1732cabdff1aSopenharmony_ci        cost += di0 * di0 + di1 * di1
1733cabdff1aSopenharmony_ci                + di2 * di2 + di3 * di3;
1734cabdff1aSopenharmony_ci    }
1735cabdff1aSopenharmony_ci
1736cabdff1aSopenharmony_ci    if (bits)
1737cabdff1aSopenharmony_ci        *bits = curbits;
1738cabdff1aSopenharmony_ci    if (energy)
1739cabdff1aSopenharmony_ci        *energy = qenergy * (IQ*IQ);
1740cabdff1aSopenharmony_ci    return cost * lambda + curbits;
1741cabdff1aSopenharmony_ci}
1742cabdff1aSopenharmony_ci
1743cabdff1aSopenharmony_cistatic float get_band_cost_SPAIR_mips(struct AACEncContext *s,
1744cabdff1aSopenharmony_ci                                      PutBitContext *pb, const float *in,
1745cabdff1aSopenharmony_ci                                      const float *scaled, int size, int scale_idx,
1746cabdff1aSopenharmony_ci                                      int cb, const float lambda, const float uplim,
1747cabdff1aSopenharmony_ci                                      int *bits, float *energy)
1748cabdff1aSopenharmony_ci{
1749cabdff1aSopenharmony_ci    const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1750cabdff1aSopenharmony_ci    const float IQ  = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
1751cabdff1aSopenharmony_ci    int i;
1752cabdff1aSopenharmony_ci    float cost = 0;
1753cabdff1aSopenharmony_ci    float qenergy = 0.0f;
1754cabdff1aSopenharmony_ci    int qc1, qc2, qc3, qc4;
1755cabdff1aSopenharmony_ci    int curbits = 0;
1756cabdff1aSopenharmony_ci
1757cabdff1aSopenharmony_ci    uint8_t *p_bits  = (uint8_t *)ff_aac_spectral_bits[cb-1];
1758cabdff1aSopenharmony_ci    float   *p_codes = (float   *)ff_aac_codebook_vectors[cb-1];
1759cabdff1aSopenharmony_ci
1760cabdff1aSopenharmony_ci    for (i = 0; i < size; i += 4) {
1761cabdff1aSopenharmony_ci        const float *vec, *vec2;
1762cabdff1aSopenharmony_ci        int curidx, curidx2;
1763cabdff1aSopenharmony_ci        int   *in_int = (int   *)&in[i];
1764cabdff1aSopenharmony_ci        float *in_pos = (float *)&in[i];
1765cabdff1aSopenharmony_ci        float di0, di1, di2, di3;
1766cabdff1aSopenharmony_ci        int t0, t1, t2, t3, t4, t5, t6, t7;
1767cabdff1aSopenharmony_ci
1768cabdff1aSopenharmony_ci        qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;
1769cabdff1aSopenharmony_ci        qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1770cabdff1aSopenharmony_ci        qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1771cabdff1aSopenharmony_ci        qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1772cabdff1aSopenharmony_ci
1773cabdff1aSopenharmony_ci        __asm__ volatile (
1774cabdff1aSopenharmony_ci            ".set push                                  \n\t"
1775cabdff1aSopenharmony_ci            ".set noreorder                             \n\t"
1776cabdff1aSopenharmony_ci
1777cabdff1aSopenharmony_ci            "ori        %[t4],  $zero,  4               \n\t"
1778cabdff1aSopenharmony_ci            "slt        %[t0],  %[t4],  %[qc1]          \n\t"
1779cabdff1aSopenharmony_ci            "slt        %[t1],  %[t4],  %[qc2]          \n\t"
1780cabdff1aSopenharmony_ci            "slt        %[t2],  %[t4],  %[qc3]          \n\t"
1781cabdff1aSopenharmony_ci            "slt        %[t3],  %[t4],  %[qc4]          \n\t"
1782cabdff1aSopenharmony_ci            "movn       %[qc1], %[t4],  %[t0]           \n\t"
1783cabdff1aSopenharmony_ci            "movn       %[qc2], %[t4],  %[t1]           \n\t"
1784cabdff1aSopenharmony_ci            "movn       %[qc3], %[t4],  %[t2]           \n\t"
1785cabdff1aSopenharmony_ci            "movn       %[qc4], %[t4],  %[t3]           \n\t"
1786cabdff1aSopenharmony_ci            "lw         %[t0],  0(%[in_int])            \n\t"
1787cabdff1aSopenharmony_ci            "lw         %[t1],  4(%[in_int])            \n\t"
1788cabdff1aSopenharmony_ci            "lw         %[t2],  8(%[in_int])            \n\t"
1789cabdff1aSopenharmony_ci            "lw         %[t3],  12(%[in_int])           \n\t"
1790cabdff1aSopenharmony_ci            "srl        %[t0],  %[t0],  31              \n\t"
1791cabdff1aSopenharmony_ci            "srl        %[t1],  %[t1],  31              \n\t"
1792cabdff1aSopenharmony_ci            "srl        %[t2],  %[t2],  31              \n\t"
1793cabdff1aSopenharmony_ci            "srl        %[t3],  %[t3],  31              \n\t"
1794cabdff1aSopenharmony_ci            "subu       %[t4],  $zero,  %[qc1]          \n\t"
1795cabdff1aSopenharmony_ci            "subu       %[t5],  $zero,  %[qc2]          \n\t"
1796cabdff1aSopenharmony_ci            "subu       %[t6],  $zero,  %[qc3]          \n\t"
1797cabdff1aSopenharmony_ci            "subu       %[t7],  $zero,  %[qc4]          \n\t"
1798cabdff1aSopenharmony_ci            "movn       %[qc1], %[t4],  %[t0]           \n\t"
1799cabdff1aSopenharmony_ci            "movn       %[qc2], %[t5],  %[t1]           \n\t"
1800cabdff1aSopenharmony_ci            "movn       %[qc3], %[t6],  %[t2]           \n\t"
1801cabdff1aSopenharmony_ci            "movn       %[qc4], %[t7],  %[t3]           \n\t"
1802cabdff1aSopenharmony_ci
1803cabdff1aSopenharmony_ci            ".set pop                                   \n\t"
1804cabdff1aSopenharmony_ci
1805cabdff1aSopenharmony_ci            : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1806cabdff1aSopenharmony_ci              [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1807cabdff1aSopenharmony_ci              [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1808cabdff1aSopenharmony_ci              [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
1809cabdff1aSopenharmony_ci            : [in_int]"r"(in_int)
1810cabdff1aSopenharmony_ci            : "memory"
1811cabdff1aSopenharmony_ci        );
1812cabdff1aSopenharmony_ci
1813cabdff1aSopenharmony_ci        curidx = 9 * qc1;
1814cabdff1aSopenharmony_ci        curidx += qc2 + 40;
1815cabdff1aSopenharmony_ci
1816cabdff1aSopenharmony_ci        curidx2 = 9 * qc3;
1817cabdff1aSopenharmony_ci        curidx2 += qc4 + 40;
1818cabdff1aSopenharmony_ci
1819cabdff1aSopenharmony_ci        curbits += p_bits[curidx];
1820cabdff1aSopenharmony_ci        curbits += p_bits[curidx2];
1821cabdff1aSopenharmony_ci
1822cabdff1aSopenharmony_ci        vec     = &p_codes[curidx*2];
1823cabdff1aSopenharmony_ci        vec2    = &p_codes[curidx2*2];
1824cabdff1aSopenharmony_ci
1825cabdff1aSopenharmony_ci        qenergy += vec[0]*vec[0] + vec[1]*vec[1]
1826cabdff1aSopenharmony_ci                +  vec2[0]*vec2[0] + vec2[1]*vec2[1];
1827cabdff1aSopenharmony_ci
1828cabdff1aSopenharmony_ci        __asm__ volatile (
1829cabdff1aSopenharmony_ci            ".set push                                  \n\t"
1830cabdff1aSopenharmony_ci            ".set noreorder                             \n\t"
1831cabdff1aSopenharmony_ci
1832cabdff1aSopenharmony_ci            "lwc1       $f0,    0(%[in_pos])            \n\t"
1833cabdff1aSopenharmony_ci            "lwc1       $f1,    0(%[vec])               \n\t"
1834cabdff1aSopenharmony_ci            "lwc1       $f2,    4(%[in_pos])            \n\t"
1835cabdff1aSopenharmony_ci            "lwc1       $f3,    4(%[vec])               \n\t"
1836cabdff1aSopenharmony_ci            "lwc1       $f4,    8(%[in_pos])            \n\t"
1837cabdff1aSopenharmony_ci            "lwc1       $f5,    0(%[vec2])              \n\t"
1838cabdff1aSopenharmony_ci            "lwc1       $f6,    12(%[in_pos])           \n\t"
1839cabdff1aSopenharmony_ci            "lwc1       $f7,    4(%[vec2])              \n\t"
1840cabdff1aSopenharmony_ci            "nmsub.s    %[di0], $f0,    $f1,    %[IQ]   \n\t"
1841cabdff1aSopenharmony_ci            "nmsub.s    %[di1], $f2,    $f3,    %[IQ]   \n\t"
1842cabdff1aSopenharmony_ci            "nmsub.s    %[di2], $f4,    $f5,    %[IQ]   \n\t"
1843cabdff1aSopenharmony_ci            "nmsub.s    %[di3], $f6,    $f7,    %[IQ]   \n\t"
1844cabdff1aSopenharmony_ci
1845cabdff1aSopenharmony_ci            ".set pop                                   \n\t"
1846cabdff1aSopenharmony_ci
1847cabdff1aSopenharmony_ci            : [di0]"=&f"(di0), [di1]"=&f"(di1),
1848cabdff1aSopenharmony_ci              [di2]"=&f"(di2), [di3]"=&f"(di3)
1849cabdff1aSopenharmony_ci            : [in_pos]"r"(in_pos), [vec]"r"(vec),
1850cabdff1aSopenharmony_ci              [vec2]"r"(vec2), [IQ]"f"(IQ)
1851cabdff1aSopenharmony_ci            : "$f0", "$f1", "$f2", "$f3",
1852cabdff1aSopenharmony_ci              "$f4", "$f5", "$f6", "$f7",
1853cabdff1aSopenharmony_ci              "memory"
1854cabdff1aSopenharmony_ci        );
1855cabdff1aSopenharmony_ci
1856cabdff1aSopenharmony_ci        cost += di0 * di0 + di1 * di1
1857cabdff1aSopenharmony_ci                + di2 * di2 + di3 * di3;
1858cabdff1aSopenharmony_ci    }
1859cabdff1aSopenharmony_ci
1860cabdff1aSopenharmony_ci    if (bits)
1861cabdff1aSopenharmony_ci        *bits = curbits;
1862cabdff1aSopenharmony_ci    if (energy)
1863cabdff1aSopenharmony_ci        *energy = qenergy * (IQ*IQ);
1864cabdff1aSopenharmony_ci    return cost * lambda + curbits;
1865cabdff1aSopenharmony_ci}
1866cabdff1aSopenharmony_ci
1867cabdff1aSopenharmony_cistatic float get_band_cost_UPAIR7_mips(struct AACEncContext *s,
1868cabdff1aSopenharmony_ci                                       PutBitContext *pb, const float *in,
1869cabdff1aSopenharmony_ci                                       const float *scaled, int size, int scale_idx,
1870cabdff1aSopenharmony_ci                                       int cb, const float lambda, const float uplim,
1871cabdff1aSopenharmony_ci                                       int *bits, float *energy)
1872cabdff1aSopenharmony_ci{
1873cabdff1aSopenharmony_ci    const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1874cabdff1aSopenharmony_ci    const float IQ  = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
1875cabdff1aSopenharmony_ci    int i;
1876cabdff1aSopenharmony_ci    float cost = 0;
1877cabdff1aSopenharmony_ci    float qenergy = 0.0f;
1878cabdff1aSopenharmony_ci    int qc1, qc2, qc3, qc4;
1879cabdff1aSopenharmony_ci    int curbits = 0;
1880cabdff1aSopenharmony_ci
1881cabdff1aSopenharmony_ci    uint8_t *p_bits  = (uint8_t *)ff_aac_spectral_bits[cb-1];
1882cabdff1aSopenharmony_ci    float   *p_codes = (float   *)ff_aac_codebook_vectors[cb-1];
1883cabdff1aSopenharmony_ci
1884cabdff1aSopenharmony_ci    for (i = 0; i < size; i += 4) {
1885cabdff1aSopenharmony_ci        const float *vec, *vec2;
1886cabdff1aSopenharmony_ci        int curidx, curidx2, sign1, count1, sign2, count2;
1887cabdff1aSopenharmony_ci        int   *in_int = (int   *)&in[i];
1888cabdff1aSopenharmony_ci        float *in_pos = (float *)&in[i];
1889cabdff1aSopenharmony_ci        float di0, di1, di2, di3;
1890cabdff1aSopenharmony_ci        int t0, t1, t2, t3, t4;
1891cabdff1aSopenharmony_ci
1892cabdff1aSopenharmony_ci        qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;
1893cabdff1aSopenharmony_ci        qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1894cabdff1aSopenharmony_ci        qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1895cabdff1aSopenharmony_ci        qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1896cabdff1aSopenharmony_ci
1897cabdff1aSopenharmony_ci        __asm__ volatile (
1898cabdff1aSopenharmony_ci            ".set push                                          \n\t"
1899cabdff1aSopenharmony_ci            ".set noreorder                                     \n\t"
1900cabdff1aSopenharmony_ci
1901cabdff1aSopenharmony_ci            "ori        %[t4],      $zero,      7               \n\t"
1902cabdff1aSopenharmony_ci            "ori        %[sign1],   $zero,      0               \n\t"
1903cabdff1aSopenharmony_ci            "ori        %[sign2],   $zero,      0               \n\t"
1904cabdff1aSopenharmony_ci            "slt        %[t0],      %[t4],      %[qc1]          \n\t"
1905cabdff1aSopenharmony_ci            "slt        %[t1],      %[t4],      %[qc2]          \n\t"
1906cabdff1aSopenharmony_ci            "slt        %[t2],      %[t4],      %[qc3]          \n\t"
1907cabdff1aSopenharmony_ci            "slt        %[t3],      %[t4],      %[qc4]          \n\t"
1908cabdff1aSopenharmony_ci            "movn       %[qc1],     %[t4],      %[t0]           \n\t"
1909cabdff1aSopenharmony_ci            "movn       %[qc2],     %[t4],      %[t1]           \n\t"
1910cabdff1aSopenharmony_ci            "movn       %[qc3],     %[t4],      %[t2]           \n\t"
1911cabdff1aSopenharmony_ci            "movn       %[qc4],     %[t4],      %[t3]           \n\t"
1912cabdff1aSopenharmony_ci            "lw         %[t0],      0(%[in_int])                \n\t"
1913cabdff1aSopenharmony_ci            "lw         %[t1],      4(%[in_int])                \n\t"
1914cabdff1aSopenharmony_ci            "lw         %[t2],      8(%[in_int])                \n\t"
1915cabdff1aSopenharmony_ci            "lw         %[t3],      12(%[in_int])               \n\t"
1916cabdff1aSopenharmony_ci            "slt        %[t0],      %[t0],      $zero           \n\t"
1917cabdff1aSopenharmony_ci            "movn       %[sign1],   %[t0],      %[qc1]          \n\t"
1918cabdff1aSopenharmony_ci            "slt        %[t2],      %[t2],      $zero           \n\t"
1919cabdff1aSopenharmony_ci            "movn       %[sign2],   %[t2],      %[qc3]          \n\t"
1920cabdff1aSopenharmony_ci            "slt        %[t1],      %[t1],      $zero           \n\t"
1921cabdff1aSopenharmony_ci            "sll        %[t0],      %[sign1],   1               \n\t"
1922cabdff1aSopenharmony_ci            "or         %[t0],      %[t0],      %[t1]           \n\t"
1923cabdff1aSopenharmony_ci            "movn       %[sign1],   %[t0],      %[qc2]          \n\t"
1924cabdff1aSopenharmony_ci            "slt        %[t3],      %[t3],      $zero           \n\t"
1925cabdff1aSopenharmony_ci            "sll        %[t0],      %[sign2],   1               \n\t"
1926cabdff1aSopenharmony_ci            "or         %[t0],      %[t0],      %[t3]           \n\t"
1927cabdff1aSopenharmony_ci            "movn       %[sign2],   %[t0],      %[qc4]          \n\t"
1928cabdff1aSopenharmony_ci            "slt        %[count1],  $zero,      %[qc1]          \n\t"
1929cabdff1aSopenharmony_ci            "slt        %[t1],      $zero,      %[qc2]          \n\t"
1930cabdff1aSopenharmony_ci            "slt        %[count2],  $zero,      %[qc3]          \n\t"
1931cabdff1aSopenharmony_ci            "slt        %[t2],      $zero,      %[qc4]          \n\t"
1932cabdff1aSopenharmony_ci            "addu       %[count1],  %[count1],  %[t1]           \n\t"
1933cabdff1aSopenharmony_ci            "addu       %[count2],  %[count2],  %[t2]           \n\t"
1934cabdff1aSopenharmony_ci
1935cabdff1aSopenharmony_ci            ".set pop                                           \n\t"
1936cabdff1aSopenharmony_ci
1937cabdff1aSopenharmony_ci            : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1938cabdff1aSopenharmony_ci              [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1939cabdff1aSopenharmony_ci              [sign1]"=&r"(sign1), [count1]"=&r"(count1),
1940cabdff1aSopenharmony_ci              [sign2]"=&r"(sign2), [count2]"=&r"(count2),
1941cabdff1aSopenharmony_ci              [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1942cabdff1aSopenharmony_ci              [t4]"=&r"(t4)
1943cabdff1aSopenharmony_ci            : [in_int]"r"(in_int)
1944cabdff1aSopenharmony_ci            : "memory"
1945cabdff1aSopenharmony_ci        );
1946cabdff1aSopenharmony_ci
1947cabdff1aSopenharmony_ci        curidx = 8 * qc1;
1948cabdff1aSopenharmony_ci        curidx += qc2;
1949cabdff1aSopenharmony_ci
1950cabdff1aSopenharmony_ci        curidx2 = 8 * qc3;
1951cabdff1aSopenharmony_ci        curidx2 += qc4;
1952cabdff1aSopenharmony_ci
1953cabdff1aSopenharmony_ci        curbits += p_bits[curidx];
1954cabdff1aSopenharmony_ci        curbits += upair7_sign_bits[curidx];
1955cabdff1aSopenharmony_ci        vec     = &p_codes[curidx*2];
1956cabdff1aSopenharmony_ci
1957cabdff1aSopenharmony_ci        curbits += p_bits[curidx2];
1958cabdff1aSopenharmony_ci        curbits += upair7_sign_bits[curidx2];
1959cabdff1aSopenharmony_ci        vec2    = &p_codes[curidx2*2];
1960cabdff1aSopenharmony_ci
1961cabdff1aSopenharmony_ci        qenergy += vec[0]*vec[0] + vec[1]*vec[1]
1962cabdff1aSopenharmony_ci                +  vec2[0]*vec2[0] + vec2[1]*vec2[1];
1963cabdff1aSopenharmony_ci
1964cabdff1aSopenharmony_ci        __asm__ volatile (
1965cabdff1aSopenharmony_ci            ".set push                                          \n\t"
1966cabdff1aSopenharmony_ci            ".set noreorder                                     \n\t"
1967cabdff1aSopenharmony_ci
1968cabdff1aSopenharmony_ci            "lwc1       %[di0],     0(%[in_pos])                \n\t"
1969cabdff1aSopenharmony_ci            "lwc1       %[di1],     4(%[in_pos])                \n\t"
1970cabdff1aSopenharmony_ci            "lwc1       %[di2],     8(%[in_pos])                \n\t"
1971cabdff1aSopenharmony_ci            "lwc1       %[di3],     12(%[in_pos])               \n\t"
1972cabdff1aSopenharmony_ci            "abs.s      %[di0],     %[di0]                      \n\t"
1973cabdff1aSopenharmony_ci            "abs.s      %[di1],     %[di1]                      \n\t"
1974cabdff1aSopenharmony_ci            "abs.s      %[di2],     %[di2]                      \n\t"
1975cabdff1aSopenharmony_ci            "abs.s      %[di3],     %[di3]                      \n\t"
1976cabdff1aSopenharmony_ci            "lwc1       $f0,        0(%[vec])                   \n\t"
1977cabdff1aSopenharmony_ci            "lwc1       $f1,        4(%[vec])                   \n\t"
1978cabdff1aSopenharmony_ci            "lwc1       $f2,        0(%[vec2])                  \n\t"
1979cabdff1aSopenharmony_ci            "lwc1       $f3,        4(%[vec2])                  \n\t"
1980cabdff1aSopenharmony_ci            "nmsub.s    %[di0],     %[di0],     $f0,    %[IQ]   \n\t"
1981cabdff1aSopenharmony_ci            "nmsub.s    %[di1],     %[di1],     $f1,    %[IQ]   \n\t"
1982cabdff1aSopenharmony_ci            "nmsub.s    %[di2],     %[di2],     $f2,    %[IQ]   \n\t"
1983cabdff1aSopenharmony_ci            "nmsub.s    %[di3],     %[di3],     $f3,    %[IQ]   \n\t"
1984cabdff1aSopenharmony_ci
1985cabdff1aSopenharmony_ci            ".set pop                                           \n\t"
1986cabdff1aSopenharmony_ci
1987cabdff1aSopenharmony_ci            : [di0]"=&f"(di0), [di1]"=&f"(di1),
1988cabdff1aSopenharmony_ci              [di2]"=&f"(di2), [di3]"=&f"(di3)
1989cabdff1aSopenharmony_ci            : [in_pos]"r"(in_pos), [vec]"r"(vec),
1990cabdff1aSopenharmony_ci              [vec2]"r"(vec2), [IQ]"f"(IQ)
1991cabdff1aSopenharmony_ci            : "$f0", "$f1", "$f2", "$f3",
1992cabdff1aSopenharmony_ci              "memory"
1993cabdff1aSopenharmony_ci        );
1994cabdff1aSopenharmony_ci
1995cabdff1aSopenharmony_ci        cost += di0 * di0 + di1 * di1
1996cabdff1aSopenharmony_ci                + di2 * di2 + di3 * di3;
1997cabdff1aSopenharmony_ci    }
1998cabdff1aSopenharmony_ci
1999cabdff1aSopenharmony_ci    if (bits)
2000cabdff1aSopenharmony_ci        *bits = curbits;
2001cabdff1aSopenharmony_ci    if (energy)
2002cabdff1aSopenharmony_ci        *energy = qenergy * (IQ*IQ);
2003cabdff1aSopenharmony_ci    return cost * lambda + curbits;
2004cabdff1aSopenharmony_ci}
2005cabdff1aSopenharmony_ci
2006cabdff1aSopenharmony_cistatic float get_band_cost_UPAIR12_mips(struct AACEncContext *s,
2007cabdff1aSopenharmony_ci                                        PutBitContext *pb, const float *in,
2008cabdff1aSopenharmony_ci                                        const float *scaled, int size, int scale_idx,
2009cabdff1aSopenharmony_ci                                        int cb, const float lambda, const float uplim,
2010cabdff1aSopenharmony_ci                                        int *bits, float *energy)
2011cabdff1aSopenharmony_ci{
2012cabdff1aSopenharmony_ci    const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
2013cabdff1aSopenharmony_ci    const float IQ  = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
2014cabdff1aSopenharmony_ci    int i;
2015cabdff1aSopenharmony_ci    float cost = 0;
2016cabdff1aSopenharmony_ci    float qenergy = 0.0f;
2017cabdff1aSopenharmony_ci    int qc1, qc2, qc3, qc4;
2018cabdff1aSopenharmony_ci    int curbits = 0;
2019cabdff1aSopenharmony_ci
2020cabdff1aSopenharmony_ci    uint8_t *p_bits  = (uint8_t *)ff_aac_spectral_bits[cb-1];
2021cabdff1aSopenharmony_ci    float   *p_codes = (float   *)ff_aac_codebook_vectors[cb-1];
2022cabdff1aSopenharmony_ci
2023cabdff1aSopenharmony_ci    for (i = 0; i < size; i += 4) {
2024cabdff1aSopenharmony_ci        const float *vec, *vec2;
2025cabdff1aSopenharmony_ci        int curidx, curidx2;
2026cabdff1aSopenharmony_ci        int sign1, count1, sign2, count2;
2027cabdff1aSopenharmony_ci        int   *in_int = (int   *)&in[i];
2028cabdff1aSopenharmony_ci        float *in_pos = (float *)&in[i];
2029cabdff1aSopenharmony_ci        float di0, di1, di2, di3;
2030cabdff1aSopenharmony_ci        int t0, t1, t2, t3, t4;
2031cabdff1aSopenharmony_ci
2032cabdff1aSopenharmony_ci        qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;
2033cabdff1aSopenharmony_ci        qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
2034cabdff1aSopenharmony_ci        qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
2035cabdff1aSopenharmony_ci        qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
2036cabdff1aSopenharmony_ci
2037cabdff1aSopenharmony_ci        __asm__ volatile (
2038cabdff1aSopenharmony_ci            ".set push                                          \n\t"
2039cabdff1aSopenharmony_ci            ".set noreorder                                     \n\t"
2040cabdff1aSopenharmony_ci
2041cabdff1aSopenharmony_ci            "ori        %[t4],      $zero,      12              \n\t"
2042cabdff1aSopenharmony_ci            "ori        %[sign1],   $zero,      0               \n\t"
2043cabdff1aSopenharmony_ci            "ori        %[sign2],   $zero,      0               \n\t"
2044cabdff1aSopenharmony_ci            "slt        %[t0],      %[t4],      %[qc1]          \n\t"
2045cabdff1aSopenharmony_ci            "slt        %[t1],      %[t4],      %[qc2]          \n\t"
2046cabdff1aSopenharmony_ci            "slt        %[t2],      %[t4],      %[qc3]          \n\t"
2047cabdff1aSopenharmony_ci            "slt        %[t3],      %[t4],      %[qc4]          \n\t"
2048cabdff1aSopenharmony_ci            "movn       %[qc1],     %[t4],      %[t0]           \n\t"
2049cabdff1aSopenharmony_ci            "movn       %[qc2],     %[t4],      %[t1]           \n\t"
2050cabdff1aSopenharmony_ci            "movn       %[qc3],     %[t4],      %[t2]           \n\t"
2051cabdff1aSopenharmony_ci            "movn       %[qc4],     %[t4],      %[t3]           \n\t"
2052cabdff1aSopenharmony_ci            "lw         %[t0],      0(%[in_int])                \n\t"
2053cabdff1aSopenharmony_ci            "lw         %[t1],      4(%[in_int])                \n\t"
2054cabdff1aSopenharmony_ci            "lw         %[t2],      8(%[in_int])                \n\t"
2055cabdff1aSopenharmony_ci            "lw         %[t3],      12(%[in_int])               \n\t"
2056cabdff1aSopenharmony_ci            "slt        %[t0],      %[t0],      $zero           \n\t"
2057cabdff1aSopenharmony_ci            "movn       %[sign1],   %[t0],      %[qc1]          \n\t"
2058cabdff1aSopenharmony_ci            "slt        %[t2],      %[t2],      $zero           \n\t"
2059cabdff1aSopenharmony_ci            "movn       %[sign2],   %[t2],      %[qc3]          \n\t"
2060cabdff1aSopenharmony_ci            "slt        %[t1],      %[t1],      $zero           \n\t"
2061cabdff1aSopenharmony_ci            "sll        %[t0],      %[sign1],   1               \n\t"
2062cabdff1aSopenharmony_ci            "or         %[t0],      %[t0],      %[t1]           \n\t"
2063cabdff1aSopenharmony_ci            "movn       %[sign1],   %[t0],      %[qc2]          \n\t"
2064cabdff1aSopenharmony_ci            "slt        %[t3],      %[t3],      $zero           \n\t"
2065cabdff1aSopenharmony_ci            "sll        %[t0],      %[sign2],   1               \n\t"
2066cabdff1aSopenharmony_ci            "or         %[t0],      %[t0],      %[t3]           \n\t"
2067cabdff1aSopenharmony_ci            "movn       %[sign2],   %[t0],      %[qc4]          \n\t"
2068cabdff1aSopenharmony_ci            "slt        %[count1],  $zero,      %[qc1]          \n\t"
2069cabdff1aSopenharmony_ci            "slt        %[t1],      $zero,      %[qc2]          \n\t"
2070cabdff1aSopenharmony_ci            "slt        %[count2],  $zero,      %[qc3]          \n\t"
2071cabdff1aSopenharmony_ci            "slt        %[t2],      $zero,      %[qc4]          \n\t"
2072cabdff1aSopenharmony_ci            "addu       %[count1],  %[count1],  %[t1]           \n\t"
2073cabdff1aSopenharmony_ci            "addu       %[count2],  %[count2],  %[t2]           \n\t"
2074cabdff1aSopenharmony_ci
2075cabdff1aSopenharmony_ci            ".set pop                                           \n\t"
2076cabdff1aSopenharmony_ci
2077cabdff1aSopenharmony_ci            : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
2078cabdff1aSopenharmony_ci              [qc3]"+r"(qc3), [qc4]"+r"(qc4),
2079cabdff1aSopenharmony_ci              [sign1]"=&r"(sign1), [count1]"=&r"(count1),
2080cabdff1aSopenharmony_ci              [sign2]"=&r"(sign2), [count2]"=&r"(count2),
2081cabdff1aSopenharmony_ci              [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
2082cabdff1aSopenharmony_ci              [t4]"=&r"(t4)
2083cabdff1aSopenharmony_ci            : [in_int]"r"(in_int)
2084cabdff1aSopenharmony_ci            : "memory"
2085cabdff1aSopenharmony_ci        );
2086cabdff1aSopenharmony_ci
2087cabdff1aSopenharmony_ci        curidx = 13 * qc1;
2088cabdff1aSopenharmony_ci        curidx += qc2;
2089cabdff1aSopenharmony_ci
2090cabdff1aSopenharmony_ci        curidx2 = 13 * qc3;
2091cabdff1aSopenharmony_ci        curidx2 += qc4;
2092cabdff1aSopenharmony_ci
2093cabdff1aSopenharmony_ci        curbits += p_bits[curidx];
2094cabdff1aSopenharmony_ci        curbits += p_bits[curidx2];
2095cabdff1aSopenharmony_ci        curbits += upair12_sign_bits[curidx];
2096cabdff1aSopenharmony_ci        curbits += upair12_sign_bits[curidx2];
2097cabdff1aSopenharmony_ci        vec     = &p_codes[curidx*2];
2098cabdff1aSopenharmony_ci        vec2    = &p_codes[curidx2*2];
2099cabdff1aSopenharmony_ci
2100cabdff1aSopenharmony_ci        qenergy += vec[0]*vec[0] + vec[1]*vec[1]
2101cabdff1aSopenharmony_ci                +  vec2[0]*vec2[0] + vec2[1]*vec2[1];
2102cabdff1aSopenharmony_ci
2103cabdff1aSopenharmony_ci        __asm__ volatile (
2104cabdff1aSopenharmony_ci            ".set push                                          \n\t"
2105cabdff1aSopenharmony_ci            ".set noreorder                                     \n\t"
2106cabdff1aSopenharmony_ci
2107cabdff1aSopenharmony_ci            "lwc1       %[di0],     0(%[in_pos])                \n\t"
2108cabdff1aSopenharmony_ci            "lwc1       %[di1],     4(%[in_pos])                \n\t"
2109cabdff1aSopenharmony_ci            "lwc1       %[di2],     8(%[in_pos])                \n\t"
2110cabdff1aSopenharmony_ci            "lwc1       %[di3],     12(%[in_pos])               \n\t"
2111cabdff1aSopenharmony_ci            "abs.s      %[di0],     %[di0]                      \n\t"
2112cabdff1aSopenharmony_ci            "abs.s      %[di1],     %[di1]                      \n\t"
2113cabdff1aSopenharmony_ci            "abs.s      %[di2],     %[di2]                      \n\t"
2114cabdff1aSopenharmony_ci            "abs.s      %[di3],     %[di3]                      \n\t"
2115cabdff1aSopenharmony_ci            "lwc1       $f0,        0(%[vec])                   \n\t"
2116cabdff1aSopenharmony_ci            "lwc1       $f1,        4(%[vec])                   \n\t"
2117cabdff1aSopenharmony_ci            "lwc1       $f2,        0(%[vec2])                  \n\t"
2118cabdff1aSopenharmony_ci            "lwc1       $f3,        4(%[vec2])                  \n\t"
2119cabdff1aSopenharmony_ci            "nmsub.s    %[di0],     %[di0],     $f0,    %[IQ]   \n\t"
2120cabdff1aSopenharmony_ci            "nmsub.s    %[di1],     %[di1],     $f1,    %[IQ]   \n\t"
2121cabdff1aSopenharmony_ci            "nmsub.s    %[di2],     %[di2],     $f2,    %[IQ]   \n\t"
2122cabdff1aSopenharmony_ci            "nmsub.s    %[di3],     %[di3],     $f3,    %[IQ]   \n\t"
2123cabdff1aSopenharmony_ci
2124cabdff1aSopenharmony_ci            ".set pop                                           \n\t"
2125cabdff1aSopenharmony_ci
2126cabdff1aSopenharmony_ci            : [di0]"=&f"(di0), [di1]"=&f"(di1),
2127cabdff1aSopenharmony_ci              [di2]"=&f"(di2), [di3]"=&f"(di3)
2128cabdff1aSopenharmony_ci            : [in_pos]"r"(in_pos), [vec]"r"(vec),
2129cabdff1aSopenharmony_ci              [vec2]"r"(vec2), [IQ]"f"(IQ)
2130cabdff1aSopenharmony_ci            : "$f0", "$f1", "$f2", "$f3",
2131cabdff1aSopenharmony_ci              "memory"
2132cabdff1aSopenharmony_ci        );
2133cabdff1aSopenharmony_ci
2134cabdff1aSopenharmony_ci        cost += di0 * di0 + di1 * di1
2135cabdff1aSopenharmony_ci                + di2 * di2 + di3 * di3;
2136cabdff1aSopenharmony_ci    }
2137cabdff1aSopenharmony_ci
2138cabdff1aSopenharmony_ci    if (bits)
2139cabdff1aSopenharmony_ci        *bits = curbits;
2140cabdff1aSopenharmony_ci    if (energy)
2141cabdff1aSopenharmony_ci        *energy = qenergy * (IQ*IQ);
2142cabdff1aSopenharmony_ci    return cost * lambda + curbits;
2143cabdff1aSopenharmony_ci}
2144cabdff1aSopenharmony_ci
2145cabdff1aSopenharmony_cistatic float get_band_cost_ESC_mips(struct AACEncContext *s,
2146cabdff1aSopenharmony_ci                                    PutBitContext *pb, const float *in,
2147cabdff1aSopenharmony_ci                                    const float *scaled, int size, int scale_idx,
2148cabdff1aSopenharmony_ci                                    int cb, const float lambda, const float uplim,
2149cabdff1aSopenharmony_ci                                    int *bits, float *energy)
2150cabdff1aSopenharmony_ci{
2151cabdff1aSopenharmony_ci    const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
2152cabdff1aSopenharmony_ci    const float IQ  = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
2153cabdff1aSopenharmony_ci    const float CLIPPED_ESCAPE = 165140.0f * IQ;
2154cabdff1aSopenharmony_ci    int i;
2155cabdff1aSopenharmony_ci    float cost = 0;
2156cabdff1aSopenharmony_ci    float qenergy = 0.0f;
2157cabdff1aSopenharmony_ci    int qc1, qc2, qc3, qc4;
2158cabdff1aSopenharmony_ci    int curbits = 0;
2159cabdff1aSopenharmony_ci
2160cabdff1aSopenharmony_ci    uint8_t *p_bits  = (uint8_t*)ff_aac_spectral_bits[cb-1];
2161cabdff1aSopenharmony_ci    float   *p_codes = (float*  )ff_aac_codebook_vectors[cb-1];
2162cabdff1aSopenharmony_ci
2163cabdff1aSopenharmony_ci    for (i = 0; i < size; i += 4) {
2164cabdff1aSopenharmony_ci        const float *vec, *vec2;
2165cabdff1aSopenharmony_ci        int curidx, curidx2;
2166cabdff1aSopenharmony_ci        float t1, t2, t3, t4, V;
2167cabdff1aSopenharmony_ci        float di1, di2, di3, di4;
2168cabdff1aSopenharmony_ci        int cond0, cond1, cond2, cond3;
2169cabdff1aSopenharmony_ci        int c1, c2, c3, c4;
2170cabdff1aSopenharmony_ci        int t6, t7;
2171cabdff1aSopenharmony_ci
2172cabdff1aSopenharmony_ci        qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;
2173cabdff1aSopenharmony_ci        qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
2174cabdff1aSopenharmony_ci        qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
2175cabdff1aSopenharmony_ci        qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
2176cabdff1aSopenharmony_ci
2177cabdff1aSopenharmony_ci        __asm__ volatile (
2178cabdff1aSopenharmony_ci            ".set push                                  \n\t"
2179cabdff1aSopenharmony_ci            ".set noreorder                             \n\t"
2180cabdff1aSopenharmony_ci
2181cabdff1aSopenharmony_ci            "ori        %[t6],      $zero,  15          \n\t"
2182cabdff1aSopenharmony_ci            "ori        %[t7],      $zero,  16          \n\t"
2183cabdff1aSopenharmony_ci            "shll_s.w   %[c1],      %[qc1], 18          \n\t"
2184cabdff1aSopenharmony_ci            "shll_s.w   %[c2],      %[qc2], 18          \n\t"
2185cabdff1aSopenharmony_ci            "shll_s.w   %[c3],      %[qc3], 18          \n\t"
2186cabdff1aSopenharmony_ci            "shll_s.w   %[c4],      %[qc4], 18          \n\t"
2187cabdff1aSopenharmony_ci            "srl        %[c1],      %[c1],  18          \n\t"
2188cabdff1aSopenharmony_ci            "srl        %[c2],      %[c2],  18          \n\t"
2189cabdff1aSopenharmony_ci            "srl        %[c3],      %[c3],  18          \n\t"
2190cabdff1aSopenharmony_ci            "srl        %[c4],      %[c4],  18          \n\t"
2191cabdff1aSopenharmony_ci            "slt        %[cond0],   %[t6],  %[qc1]      \n\t"
2192cabdff1aSopenharmony_ci            "slt        %[cond1],   %[t6],  %[qc2]      \n\t"
2193cabdff1aSopenharmony_ci            "slt        %[cond2],   %[t6],  %[qc3]      \n\t"
2194cabdff1aSopenharmony_ci            "slt        %[cond3],   %[t6],  %[qc4]      \n\t"
2195cabdff1aSopenharmony_ci            "movn       %[qc1],     %[t7],  %[cond0]    \n\t"
2196cabdff1aSopenharmony_ci            "movn       %[qc2],     %[t7],  %[cond1]    \n\t"
2197cabdff1aSopenharmony_ci            "movn       %[qc3],     %[t7],  %[cond2]    \n\t"
2198cabdff1aSopenharmony_ci            "movn       %[qc4],     %[t7],  %[cond3]    \n\t"
2199cabdff1aSopenharmony_ci
2200cabdff1aSopenharmony_ci            ".set pop                                   \n\t"
2201cabdff1aSopenharmony_ci
2202cabdff1aSopenharmony_ci            : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
2203cabdff1aSopenharmony_ci              [qc3]"+r"(qc3), [qc4]"+r"(qc4),
2204cabdff1aSopenharmony_ci              [cond0]"=&r"(cond0), [cond1]"=&r"(cond1),
2205cabdff1aSopenharmony_ci              [cond2]"=&r"(cond2), [cond3]"=&r"(cond3),
2206cabdff1aSopenharmony_ci              [c1]"=&r"(c1), [c2]"=&r"(c2),
2207cabdff1aSopenharmony_ci              [c3]"=&r"(c3), [c4]"=&r"(c4),
2208cabdff1aSopenharmony_ci              [t6]"=&r"(t6), [t7]"=&r"(t7)
2209cabdff1aSopenharmony_ci        );
2210cabdff1aSopenharmony_ci
2211cabdff1aSopenharmony_ci        curidx = 17 * qc1;
2212cabdff1aSopenharmony_ci        curidx += qc2;
2213cabdff1aSopenharmony_ci
2214cabdff1aSopenharmony_ci        curidx2 = 17 * qc3;
2215cabdff1aSopenharmony_ci        curidx2 += qc4;
2216cabdff1aSopenharmony_ci
2217cabdff1aSopenharmony_ci        curbits += p_bits[curidx];
2218cabdff1aSopenharmony_ci        curbits += esc_sign_bits[curidx];
2219cabdff1aSopenharmony_ci        vec     = &p_codes[curidx*2];
2220cabdff1aSopenharmony_ci
2221cabdff1aSopenharmony_ci        curbits += p_bits[curidx2];
2222cabdff1aSopenharmony_ci        curbits += esc_sign_bits[curidx2];
2223cabdff1aSopenharmony_ci        vec2     = &p_codes[curidx2*2];
2224cabdff1aSopenharmony_ci
2225cabdff1aSopenharmony_ci        curbits += (av_log2(c1) * 2 - 3) & (-cond0);
2226cabdff1aSopenharmony_ci        curbits += (av_log2(c2) * 2 - 3) & (-cond1);
2227cabdff1aSopenharmony_ci        curbits += (av_log2(c3) * 2 - 3) & (-cond2);
2228cabdff1aSopenharmony_ci        curbits += (av_log2(c4) * 2 - 3) & (-cond3);
2229cabdff1aSopenharmony_ci
2230cabdff1aSopenharmony_ci        t1 = fabsf(in[i  ]);
2231cabdff1aSopenharmony_ci        t2 = fabsf(in[i+1]);
2232cabdff1aSopenharmony_ci        t3 = fabsf(in[i+2]);
2233cabdff1aSopenharmony_ci        t4 = fabsf(in[i+3]);
2234cabdff1aSopenharmony_ci
2235cabdff1aSopenharmony_ci        if (cond0) {
2236cabdff1aSopenharmony_ci            if (t1 >= CLIPPED_ESCAPE) {
2237cabdff1aSopenharmony_ci                di1 = t1 - CLIPPED_ESCAPE;
2238cabdff1aSopenharmony_ci                qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
2239cabdff1aSopenharmony_ci            } else {
2240cabdff1aSopenharmony_ci                di1 = t1 - (V = c1 * cbrtf(c1) * IQ);
2241cabdff1aSopenharmony_ci                qenergy += V*V;
2242cabdff1aSopenharmony_ci            }
2243cabdff1aSopenharmony_ci        } else {
2244cabdff1aSopenharmony_ci            di1 = t1 - (V = vec[0] * IQ);
2245cabdff1aSopenharmony_ci            qenergy += V*V;
2246cabdff1aSopenharmony_ci        }
2247cabdff1aSopenharmony_ci
2248cabdff1aSopenharmony_ci        if (cond1) {
2249cabdff1aSopenharmony_ci            if (t2 >= CLIPPED_ESCAPE) {
2250cabdff1aSopenharmony_ci                di2 = t2 - CLIPPED_ESCAPE;
2251cabdff1aSopenharmony_ci                qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
2252cabdff1aSopenharmony_ci            } else {
2253cabdff1aSopenharmony_ci                di2 = t2 - (V = c2 * cbrtf(c2) * IQ);
2254cabdff1aSopenharmony_ci                qenergy += V*V;
2255cabdff1aSopenharmony_ci            }
2256cabdff1aSopenharmony_ci        } else {
2257cabdff1aSopenharmony_ci            di2 = t2 - (V = vec[1] * IQ);
2258cabdff1aSopenharmony_ci            qenergy += V*V;
2259cabdff1aSopenharmony_ci        }
2260cabdff1aSopenharmony_ci
2261cabdff1aSopenharmony_ci        if (cond2) {
2262cabdff1aSopenharmony_ci            if (t3 >= CLIPPED_ESCAPE) {
2263cabdff1aSopenharmony_ci                di3 = t3 - CLIPPED_ESCAPE;
2264cabdff1aSopenharmony_ci                qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
2265cabdff1aSopenharmony_ci            } else {
2266cabdff1aSopenharmony_ci                di3 = t3 - (V = c3 * cbrtf(c3) * IQ);
2267cabdff1aSopenharmony_ci                qenergy += V*V;
2268cabdff1aSopenharmony_ci            }
2269cabdff1aSopenharmony_ci        } else {
2270cabdff1aSopenharmony_ci            di3 = t3 - (V = vec2[0] * IQ);
2271cabdff1aSopenharmony_ci            qenergy += V*V;
2272cabdff1aSopenharmony_ci        }
2273cabdff1aSopenharmony_ci
2274cabdff1aSopenharmony_ci        if (cond3) {
2275cabdff1aSopenharmony_ci            if (t4 >= CLIPPED_ESCAPE) {
2276cabdff1aSopenharmony_ci                di4 = t4 - CLIPPED_ESCAPE;
2277cabdff1aSopenharmony_ci                qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
2278cabdff1aSopenharmony_ci            } else {
2279cabdff1aSopenharmony_ci                di4 = t4 - (V = c4 * cbrtf(c4) * IQ);
2280cabdff1aSopenharmony_ci                qenergy += V*V;
2281cabdff1aSopenharmony_ci            }
2282cabdff1aSopenharmony_ci        } else {
2283cabdff1aSopenharmony_ci            di4 = t4 - (V = vec2[1]*IQ);
2284cabdff1aSopenharmony_ci            qenergy += V*V;
2285cabdff1aSopenharmony_ci        }
2286cabdff1aSopenharmony_ci
2287cabdff1aSopenharmony_ci        cost += di1 * di1 + di2 * di2
2288cabdff1aSopenharmony_ci                + di3 * di3 + di4 * di4;
2289cabdff1aSopenharmony_ci    }
2290cabdff1aSopenharmony_ci
2291cabdff1aSopenharmony_ci    if (bits)
2292cabdff1aSopenharmony_ci        *bits = curbits;
2293cabdff1aSopenharmony_ci    return cost * lambda + curbits;
2294cabdff1aSopenharmony_ci}
2295cabdff1aSopenharmony_ci
2296cabdff1aSopenharmony_cistatic float (*const get_band_cost_arr[])(struct AACEncContext *s,
2297cabdff1aSopenharmony_ci                                          PutBitContext *pb, const float *in,
2298cabdff1aSopenharmony_ci                                          const float *scaled, int size, int scale_idx,
2299cabdff1aSopenharmony_ci                                          int cb, const float lambda, const float uplim,
2300cabdff1aSopenharmony_ci                                          int *bits, float *energy) = {
2301cabdff1aSopenharmony_ci    get_band_cost_ZERO_mips,
2302cabdff1aSopenharmony_ci    get_band_cost_SQUAD_mips,
2303cabdff1aSopenharmony_ci    get_band_cost_SQUAD_mips,
2304cabdff1aSopenharmony_ci    get_band_cost_UQUAD_mips,
2305cabdff1aSopenharmony_ci    get_band_cost_UQUAD_mips,
2306cabdff1aSopenharmony_ci    get_band_cost_SPAIR_mips,
2307cabdff1aSopenharmony_ci    get_band_cost_SPAIR_mips,
2308cabdff1aSopenharmony_ci    get_band_cost_UPAIR7_mips,
2309cabdff1aSopenharmony_ci    get_band_cost_UPAIR7_mips,
2310cabdff1aSopenharmony_ci    get_band_cost_UPAIR12_mips,
2311cabdff1aSopenharmony_ci    get_band_cost_UPAIR12_mips,
2312cabdff1aSopenharmony_ci    get_band_cost_ESC_mips,
2313cabdff1aSopenharmony_ci    get_band_cost_NONE_mips, /* cb 12 doesn't exist */
2314cabdff1aSopenharmony_ci    get_band_cost_ZERO_mips,
2315cabdff1aSopenharmony_ci    get_band_cost_ZERO_mips,
2316cabdff1aSopenharmony_ci    get_band_cost_ZERO_mips,
2317cabdff1aSopenharmony_ci};
2318cabdff1aSopenharmony_ci
2319cabdff1aSopenharmony_ci#define get_band_cost(                                  \
2320cabdff1aSopenharmony_ci                                s, pb, in, scaled, size, scale_idx, cb, \
2321cabdff1aSopenharmony_ci                                lambda, uplim, bits, energy)            \
2322cabdff1aSopenharmony_ci    get_band_cost_arr[cb](                              \
2323cabdff1aSopenharmony_ci                                s, pb, in, scaled, size, scale_idx, cb, \
2324cabdff1aSopenharmony_ci                                lambda, uplim, bits, energy)
2325cabdff1aSopenharmony_ci
2326cabdff1aSopenharmony_cistatic float quantize_band_cost(struct AACEncContext *s, const float *in,
2327cabdff1aSopenharmony_ci                                const float *scaled, int size, int scale_idx,
2328cabdff1aSopenharmony_ci                                int cb, const float lambda, const float uplim,
2329cabdff1aSopenharmony_ci                                int *bits, float *energy, int rtz)
2330cabdff1aSopenharmony_ci{
2331cabdff1aSopenharmony_ci    return get_band_cost(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits, energy);
2332cabdff1aSopenharmony_ci}
2333cabdff1aSopenharmony_ci
2334cabdff1aSopenharmony_ci#include "libavcodec/aacenc_quantization_misc.h"
2335cabdff1aSopenharmony_ci
2336cabdff1aSopenharmony_ci#include "libavcodec/aaccoder_twoloop.h"
2337cabdff1aSopenharmony_ci
2338cabdff1aSopenharmony_cistatic void search_for_ms_mips(AACEncContext *s, ChannelElement *cpe)
2339cabdff1aSopenharmony_ci{
2340cabdff1aSopenharmony_ci    int start = 0, i, w, w2, g, sid_sf_boost, prev_mid, prev_side;
2341cabdff1aSopenharmony_ci    uint8_t nextband0[128], nextband1[128];
2342cabdff1aSopenharmony_ci    float M[128], S[128];
2343cabdff1aSopenharmony_ci    float *L34 = s->scoefs, *R34 = s->scoefs + 128, *M34 = s->scoefs + 128*2, *S34 = s->scoefs + 128*3;
2344cabdff1aSopenharmony_ci    const float lambda = s->lambda;
2345cabdff1aSopenharmony_ci    const float mslambda = FFMIN(1.0f, lambda / 120.f);
2346cabdff1aSopenharmony_ci    SingleChannelElement *sce0 = &cpe->ch[0];
2347cabdff1aSopenharmony_ci    SingleChannelElement *sce1 = &cpe->ch[1];
2348cabdff1aSopenharmony_ci    if (!cpe->common_window)
2349cabdff1aSopenharmony_ci        return;
2350cabdff1aSopenharmony_ci
2351cabdff1aSopenharmony_ci    /** Scout out next nonzero bands */
2352cabdff1aSopenharmony_ci    ff_init_nextband_map(sce0, nextband0);
2353cabdff1aSopenharmony_ci    ff_init_nextband_map(sce1, nextband1);
2354cabdff1aSopenharmony_ci
2355cabdff1aSopenharmony_ci    prev_mid = sce0->sf_idx[0];
2356cabdff1aSopenharmony_ci    prev_side = sce1->sf_idx[0];
2357cabdff1aSopenharmony_ci    for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
2358cabdff1aSopenharmony_ci        start = 0;
2359cabdff1aSopenharmony_ci        for (g = 0;  g < sce0->ics.num_swb; g++) {
2360cabdff1aSopenharmony_ci            float bmax = bval2bmax(g * 17.0f / sce0->ics.num_swb) / 0.0045f;
2361cabdff1aSopenharmony_ci            if (!cpe->is_mask[w*16+g])
2362cabdff1aSopenharmony_ci                cpe->ms_mask[w*16+g] = 0;
2363cabdff1aSopenharmony_ci            if (!sce0->zeroes[w*16+g] && !sce1->zeroes[w*16+g] && !cpe->is_mask[w*16+g]) {
2364cabdff1aSopenharmony_ci                float Mmax = 0.0f, Smax = 0.0f;
2365cabdff1aSopenharmony_ci
2366cabdff1aSopenharmony_ci                /* Must compute mid/side SF and book for the whole window group */
2367cabdff1aSopenharmony_ci                for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
2368cabdff1aSopenharmony_ci                    for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
2369cabdff1aSopenharmony_ci                        M[i] = (sce0->coeffs[start+(w+w2)*128+i]
2370cabdff1aSopenharmony_ci                              + sce1->coeffs[start+(w+w2)*128+i]) * 0.5;
2371cabdff1aSopenharmony_ci                        S[i] =  M[i]
2372cabdff1aSopenharmony_ci                              - sce1->coeffs[start+(w+w2)*128+i];
2373cabdff1aSopenharmony_ci                    }
2374cabdff1aSopenharmony_ci                    abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]);
2375cabdff1aSopenharmony_ci                    abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]);
2376cabdff1aSopenharmony_ci                    for (i = 0; i < sce0->ics.swb_sizes[g]; i++ ) {
2377cabdff1aSopenharmony_ci                        Mmax = FFMAX(Mmax, M34[i]);
2378cabdff1aSopenharmony_ci                        Smax = FFMAX(Smax, S34[i]);
2379cabdff1aSopenharmony_ci                    }
2380cabdff1aSopenharmony_ci                }
2381cabdff1aSopenharmony_ci
2382cabdff1aSopenharmony_ci                for (sid_sf_boost = 0; sid_sf_boost < 4; sid_sf_boost++) {
2383cabdff1aSopenharmony_ci                    float dist1 = 0.0f, dist2 = 0.0f;
2384cabdff1aSopenharmony_ci                    int B0 = 0, B1 = 0;
2385cabdff1aSopenharmony_ci                    int minidx;
2386cabdff1aSopenharmony_ci                    int mididx, sididx;
2387cabdff1aSopenharmony_ci                    int midcb, sidcb;
2388cabdff1aSopenharmony_ci
2389cabdff1aSopenharmony_ci                    minidx = FFMIN(sce0->sf_idx[w*16+g], sce1->sf_idx[w*16+g]);
2390cabdff1aSopenharmony_ci                    mididx = av_clip(minidx, 0, SCALE_MAX_POS - SCALE_DIV_512);
2391cabdff1aSopenharmony_ci                    sididx = av_clip(minidx - sid_sf_boost * 3, 0, SCALE_MAX_POS - SCALE_DIV_512);
2392cabdff1aSopenharmony_ci                    if (sce0->band_type[w*16+g] != NOISE_BT && sce1->band_type[w*16+g] != NOISE_BT
2393cabdff1aSopenharmony_ci                        && (   !ff_sfdelta_can_replace(sce0, nextband0, prev_mid, mididx, w*16+g)
2394cabdff1aSopenharmony_ci                            || !ff_sfdelta_can_replace(sce1, nextband1, prev_side, sididx, w*16+g))) {
2395cabdff1aSopenharmony_ci                        /* scalefactor range violation, bad stuff, will decrease quality unacceptably */
2396cabdff1aSopenharmony_ci                        continue;
2397cabdff1aSopenharmony_ci                    }
2398cabdff1aSopenharmony_ci
2399cabdff1aSopenharmony_ci                    midcb = find_min_book(Mmax, mididx);
2400cabdff1aSopenharmony_ci                    sidcb = find_min_book(Smax, sididx);
2401cabdff1aSopenharmony_ci
2402cabdff1aSopenharmony_ci                    /* No CB can be zero */
2403cabdff1aSopenharmony_ci                    midcb = FFMAX(1,midcb);
2404cabdff1aSopenharmony_ci                    sidcb = FFMAX(1,sidcb);
2405cabdff1aSopenharmony_ci
2406cabdff1aSopenharmony_ci                    for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
2407cabdff1aSopenharmony_ci                        FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
2408cabdff1aSopenharmony_ci                        FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
2409cabdff1aSopenharmony_ci                        float minthr = FFMIN(band0->threshold, band1->threshold);
2410cabdff1aSopenharmony_ci                        int b1,b2,b3,b4;
2411cabdff1aSopenharmony_ci                        for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
2412cabdff1aSopenharmony_ci                            M[i] = (sce0->coeffs[start+(w+w2)*128+i]
2413cabdff1aSopenharmony_ci                                  + sce1->coeffs[start+(w+w2)*128+i]) * 0.5;
2414cabdff1aSopenharmony_ci                            S[i] =  M[i]
2415cabdff1aSopenharmony_ci                                  - sce1->coeffs[start+(w+w2)*128+i];
2416cabdff1aSopenharmony_ci                        }
2417cabdff1aSopenharmony_ci
2418cabdff1aSopenharmony_ci                        abs_pow34_v(L34, sce0->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
2419cabdff1aSopenharmony_ci                        abs_pow34_v(R34, sce1->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
2420cabdff1aSopenharmony_ci                        abs_pow34_v(M34, M,                         sce0->ics.swb_sizes[g]);
2421cabdff1aSopenharmony_ci                        abs_pow34_v(S34, S,                         sce0->ics.swb_sizes[g]);
2422cabdff1aSopenharmony_ci                        dist1 += quantize_band_cost(s, &sce0->coeffs[start + (w+w2)*128],
2423cabdff1aSopenharmony_ci                                                    L34,
2424cabdff1aSopenharmony_ci                                                    sce0->ics.swb_sizes[g],
2425cabdff1aSopenharmony_ci                                                    sce0->sf_idx[w*16+g],
2426cabdff1aSopenharmony_ci                                                    sce0->band_type[w*16+g],
2427cabdff1aSopenharmony_ci                                                    lambda / band0->threshold, INFINITY, &b1, NULL, 0);
2428cabdff1aSopenharmony_ci                        dist1 += quantize_band_cost(s, &sce1->coeffs[start + (w+w2)*128],
2429cabdff1aSopenharmony_ci                                                    R34,
2430cabdff1aSopenharmony_ci                                                    sce1->ics.swb_sizes[g],
2431cabdff1aSopenharmony_ci                                                    sce1->sf_idx[w*16+g],
2432cabdff1aSopenharmony_ci                                                    sce1->band_type[w*16+g],
2433cabdff1aSopenharmony_ci                                                    lambda / band1->threshold, INFINITY, &b2, NULL, 0);
2434cabdff1aSopenharmony_ci                        dist2 += quantize_band_cost(s, M,
2435cabdff1aSopenharmony_ci                                                    M34,
2436cabdff1aSopenharmony_ci                                                    sce0->ics.swb_sizes[g],
2437cabdff1aSopenharmony_ci                                                    mididx,
2438cabdff1aSopenharmony_ci                                                    midcb,
2439cabdff1aSopenharmony_ci                                                    lambda / minthr, INFINITY, &b3, NULL, 0);
2440cabdff1aSopenharmony_ci                        dist2 += quantize_band_cost(s, S,
2441cabdff1aSopenharmony_ci                                                    S34,
2442cabdff1aSopenharmony_ci                                                    sce1->ics.swb_sizes[g],
2443cabdff1aSopenharmony_ci                                                    sididx,
2444cabdff1aSopenharmony_ci                                                    sidcb,
2445cabdff1aSopenharmony_ci                                                    mslambda / (minthr * bmax), INFINITY, &b4, NULL, 0);
2446cabdff1aSopenharmony_ci                        B0 += b1+b2;
2447cabdff1aSopenharmony_ci                        B1 += b3+b4;
2448cabdff1aSopenharmony_ci                        dist1 -= b1+b2;
2449cabdff1aSopenharmony_ci                        dist2 -= b3+b4;
2450cabdff1aSopenharmony_ci                    }
2451cabdff1aSopenharmony_ci                    cpe->ms_mask[w*16+g] = dist2 <= dist1 && B1 < B0;
2452cabdff1aSopenharmony_ci                    if (cpe->ms_mask[w*16+g]) {
2453cabdff1aSopenharmony_ci                        if (sce0->band_type[w*16+g] != NOISE_BT && sce1->band_type[w*16+g] != NOISE_BT) {
2454cabdff1aSopenharmony_ci                            sce0->sf_idx[w*16+g] = mididx;
2455cabdff1aSopenharmony_ci                            sce1->sf_idx[w*16+g] = sididx;
2456cabdff1aSopenharmony_ci                            sce0->band_type[w*16+g] = midcb;
2457cabdff1aSopenharmony_ci                            sce1->band_type[w*16+g] = sidcb;
2458cabdff1aSopenharmony_ci                        } else if ((sce0->band_type[w*16+g] != NOISE_BT) ^ (sce1->band_type[w*16+g] != NOISE_BT)) {
2459cabdff1aSopenharmony_ci                            /* ms_mask unneeded, and it confuses some decoders */
2460cabdff1aSopenharmony_ci                            cpe->ms_mask[w*16+g] = 0;
2461cabdff1aSopenharmony_ci                        }
2462cabdff1aSopenharmony_ci                        break;
2463cabdff1aSopenharmony_ci                    } else if (B1 > B0) {
2464cabdff1aSopenharmony_ci                        /* More boost won't fix this */
2465cabdff1aSopenharmony_ci                        break;
2466cabdff1aSopenharmony_ci                    }
2467cabdff1aSopenharmony_ci                }
2468cabdff1aSopenharmony_ci            }
2469cabdff1aSopenharmony_ci            if (!sce0->zeroes[w*16+g] && sce0->band_type[w*16+g] < RESERVED_BT)
2470cabdff1aSopenharmony_ci                prev_mid = sce0->sf_idx[w*16+g];
2471cabdff1aSopenharmony_ci            if (!sce1->zeroes[w*16+g] && !cpe->is_mask[w*16+g] && sce1->band_type[w*16+g] < RESERVED_BT)
2472cabdff1aSopenharmony_ci                prev_side = sce1->sf_idx[w*16+g];
2473cabdff1aSopenharmony_ci            start += sce0->ics.swb_sizes[g];
2474cabdff1aSopenharmony_ci        }
2475cabdff1aSopenharmony_ci    }
2476cabdff1aSopenharmony_ci}
2477cabdff1aSopenharmony_ci#endif /*HAVE_MIPSFPU */
2478cabdff1aSopenharmony_ci
2479cabdff1aSopenharmony_ci#include "libavcodec/aaccoder_trellis.h"
2480cabdff1aSopenharmony_ci
2481cabdff1aSopenharmony_ci#endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
2482cabdff1aSopenharmony_ci#endif /* HAVE_INLINE_ASM */
2483cabdff1aSopenharmony_ci
2484cabdff1aSopenharmony_civoid ff_aac_coder_init_mips(AACEncContext *c) {
2485cabdff1aSopenharmony_ci#if HAVE_INLINE_ASM
2486cabdff1aSopenharmony_ci#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
2487cabdff1aSopenharmony_ci    AACCoefficientsEncoder *e = c->coder;
2488cabdff1aSopenharmony_ci    int option = c->options.coder;
2489cabdff1aSopenharmony_ci
2490cabdff1aSopenharmony_ci    if (option == 2) {
2491cabdff1aSopenharmony_ci        e->quantize_and_encode_band = quantize_and_encode_band_mips;
2492cabdff1aSopenharmony_ci        e->encode_window_bands_info = codebook_trellis_rate;
2493cabdff1aSopenharmony_ci#if HAVE_MIPSFPU
2494cabdff1aSopenharmony_ci        e->search_for_quantizers    = search_for_quantizers_twoloop;
2495cabdff1aSopenharmony_ci#endif /* HAVE_MIPSFPU */
2496cabdff1aSopenharmony_ci    }
2497cabdff1aSopenharmony_ci#if HAVE_MIPSFPU
2498cabdff1aSopenharmony_ci    e->search_for_ms            = search_for_ms_mips;
2499cabdff1aSopenharmony_ci#endif /* HAVE_MIPSFPU */
2500cabdff1aSopenharmony_ci#endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
2501cabdff1aSopenharmony_ci#endif /* HAVE_INLINE_ASM */
2502cabdff1aSopenharmony_ci}
2503