1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * Copyright (c) 2012 3cabdff1aSopenharmony_ci * MIPS Technologies, Inc., California. 4cabdff1aSopenharmony_ci * 5cabdff1aSopenharmony_ci * Redistribution and use in source and binary forms, with or without 6cabdff1aSopenharmony_ci * modification, are permitted provided that the following conditions 7cabdff1aSopenharmony_ci * are met: 8cabdff1aSopenharmony_ci * 1. Redistributions of source code must retain the above copyright 9cabdff1aSopenharmony_ci * notice, this list of conditions and the following disclaimer. 10cabdff1aSopenharmony_ci * 2. Redistributions in binary form must reproduce the above copyright 11cabdff1aSopenharmony_ci * notice, this list of conditions and the following disclaimer in the 12cabdff1aSopenharmony_ci * documentation and/or other materials provided with the distribution. 13cabdff1aSopenharmony_ci * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its 14cabdff1aSopenharmony_ci * contributors may be used to endorse or promote products derived from 15cabdff1aSopenharmony_ci * this software without specific prior written permission. 16cabdff1aSopenharmony_ci * 17cabdff1aSopenharmony_ci * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND 18cabdff1aSopenharmony_ci * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19cabdff1aSopenharmony_ci * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20cabdff1aSopenharmony_ci * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE 21cabdff1aSopenharmony_ci * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22cabdff1aSopenharmony_ci * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23cabdff1aSopenharmony_ci * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24cabdff1aSopenharmony_ci * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25cabdff1aSopenharmony_ci * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26cabdff1aSopenharmony_ci * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27cabdff1aSopenharmony_ci * SUCH DAMAGE. 28cabdff1aSopenharmony_ci * 29cabdff1aSopenharmony_ci * Author: Stanislav Ocovaj (socovaj@mips.com) 30cabdff1aSopenharmony_ci * Szabolcs Pal (sabolc@mips.com) 31cabdff1aSopenharmony_ci * 32cabdff1aSopenharmony_ci * AAC coefficients encoder optimized for MIPS floating-point architecture 33cabdff1aSopenharmony_ci * 34cabdff1aSopenharmony_ci * This file is part of FFmpeg. 35cabdff1aSopenharmony_ci * 36cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 37cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 38cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 39cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 40cabdff1aSopenharmony_ci * 41cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 42cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 43cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 44cabdff1aSopenharmony_ci * Lesser General Public License for more details. 45cabdff1aSopenharmony_ci * 46cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 47cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 48cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 49cabdff1aSopenharmony_ci */ 50cabdff1aSopenharmony_ci 51cabdff1aSopenharmony_ci/** 52cabdff1aSopenharmony_ci * @file 53cabdff1aSopenharmony_ci * Reference: libavcodec/aaccoder.c 54cabdff1aSopenharmony_ci */ 55cabdff1aSopenharmony_ci 56cabdff1aSopenharmony_ci#include "libavutil/libm.h" 57cabdff1aSopenharmony_ci 58cabdff1aSopenharmony_ci#include <float.h> 59cabdff1aSopenharmony_ci#include "libavutil/mathematics.h" 60cabdff1aSopenharmony_ci#include "libavcodec/avcodec.h" 61cabdff1aSopenharmony_ci#include "libavcodec/put_bits.h" 62cabdff1aSopenharmony_ci#include "libavcodec/aac.h" 63cabdff1aSopenharmony_ci#include "libavcodec/aacenc.h" 64cabdff1aSopenharmony_ci#include "libavcodec/aactab.h" 65cabdff1aSopenharmony_ci#include "libavcodec/aacenctab.h" 66cabdff1aSopenharmony_ci#include "libavcodec/aacenc_utils.h" 67cabdff1aSopenharmony_ci 68cabdff1aSopenharmony_ci#if HAVE_INLINE_ASM 69cabdff1aSopenharmony_ci#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6 70cabdff1aSopenharmony_citypedef struct BandCodingPath { 71cabdff1aSopenharmony_ci int prev_idx; 72cabdff1aSopenharmony_ci float cost; 73cabdff1aSopenharmony_ci int run; 74cabdff1aSopenharmony_ci} BandCodingPath; 75cabdff1aSopenharmony_ci 76cabdff1aSopenharmony_cistatic const uint8_t uquad_sign_bits[81] = { 77cabdff1aSopenharmony_ci 0, 1, 1, 1, 2, 2, 1, 2, 2, 78cabdff1aSopenharmony_ci 1, 2, 2, 2, 3, 3, 2, 3, 3, 79cabdff1aSopenharmony_ci 1, 2, 2, 2, 3, 3, 2, 3, 3, 80cabdff1aSopenharmony_ci 1, 2, 2, 2, 3, 3, 2, 3, 3, 81cabdff1aSopenharmony_ci 2, 3, 3, 3, 4, 4, 3, 4, 4, 82cabdff1aSopenharmony_ci 2, 3, 3, 3, 4, 4, 3, 4, 4, 83cabdff1aSopenharmony_ci 1, 2, 2, 2, 3, 3, 2, 3, 3, 84cabdff1aSopenharmony_ci 2, 3, 3, 3, 4, 4, 3, 4, 4, 85cabdff1aSopenharmony_ci 2, 3, 3, 3, 4, 4, 3, 4, 4 86cabdff1aSopenharmony_ci}; 87cabdff1aSopenharmony_ci 88cabdff1aSopenharmony_cistatic const uint8_t upair7_sign_bits[64] = { 89cabdff1aSopenharmony_ci 0, 1, 1, 1, 1, 1, 1, 1, 90cabdff1aSopenharmony_ci 1, 2, 2, 2, 2, 2, 2, 2, 91cabdff1aSopenharmony_ci 1, 2, 2, 2, 2, 2, 2, 2, 92cabdff1aSopenharmony_ci 1, 2, 2, 2, 2, 2, 2, 2, 93cabdff1aSopenharmony_ci 1, 2, 2, 2, 2, 2, 2, 2, 94cabdff1aSopenharmony_ci 1, 2, 2, 2, 2, 2, 2, 2, 95cabdff1aSopenharmony_ci 1, 2, 2, 2, 2, 2, 2, 2, 96cabdff1aSopenharmony_ci 1, 2, 2, 2, 2, 2, 2, 2, 97cabdff1aSopenharmony_ci}; 98cabdff1aSopenharmony_ci 99cabdff1aSopenharmony_cistatic const uint8_t upair12_sign_bits[169] = { 100cabdff1aSopenharmony_ci 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 101cabdff1aSopenharmony_ci 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 102cabdff1aSopenharmony_ci 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 103cabdff1aSopenharmony_ci 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 104cabdff1aSopenharmony_ci 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 105cabdff1aSopenharmony_ci 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 106cabdff1aSopenharmony_ci 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 107cabdff1aSopenharmony_ci 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 108cabdff1aSopenharmony_ci 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 109cabdff1aSopenharmony_ci 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 110cabdff1aSopenharmony_ci 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 111cabdff1aSopenharmony_ci 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 112cabdff1aSopenharmony_ci 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 113cabdff1aSopenharmony_ci}; 114cabdff1aSopenharmony_ci 115cabdff1aSopenharmony_cistatic const uint8_t esc_sign_bits[289] = { 116cabdff1aSopenharmony_ci 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 117cabdff1aSopenharmony_ci 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 118cabdff1aSopenharmony_ci 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 119cabdff1aSopenharmony_ci 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 120cabdff1aSopenharmony_ci 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 121cabdff1aSopenharmony_ci 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 122cabdff1aSopenharmony_ci 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 123cabdff1aSopenharmony_ci 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 124cabdff1aSopenharmony_ci 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 125cabdff1aSopenharmony_ci 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 126cabdff1aSopenharmony_ci 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 127cabdff1aSopenharmony_ci 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 128cabdff1aSopenharmony_ci 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 129cabdff1aSopenharmony_ci 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 130cabdff1aSopenharmony_ci 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 131cabdff1aSopenharmony_ci 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 132cabdff1aSopenharmony_ci 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 133cabdff1aSopenharmony_ci}; 134cabdff1aSopenharmony_ci 135cabdff1aSopenharmony_ci/** 136cabdff1aSopenharmony_ci * Functions developed from template function and optimized for quantizing and encoding band 137cabdff1aSopenharmony_ci */ 138cabdff1aSopenharmony_cistatic void quantize_and_encode_band_cost_SQUAD_mips(struct AACEncContext *s, 139cabdff1aSopenharmony_ci PutBitContext *pb, const float *in, float *out, 140cabdff1aSopenharmony_ci const float *scaled, int size, int scale_idx, 141cabdff1aSopenharmony_ci int cb, const float lambda, const float uplim, 142cabdff1aSopenharmony_ci int *bits, float *energy, const float ROUNDING) 143cabdff1aSopenharmony_ci{ 144cabdff1aSopenharmony_ci const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512]; 145cabdff1aSopenharmony_ci const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512]; 146cabdff1aSopenharmony_ci int i; 147cabdff1aSopenharmony_ci int qc1, qc2, qc3, qc4; 148cabdff1aSopenharmony_ci float qenergy = 0.0f; 149cabdff1aSopenharmony_ci 150cabdff1aSopenharmony_ci uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1]; 151cabdff1aSopenharmony_ci uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1]; 152cabdff1aSopenharmony_ci float *p_vec = (float *)ff_aac_codebook_vectors[cb-1]; 153cabdff1aSopenharmony_ci 154cabdff1aSopenharmony_ci abs_pow34_v(s->scoefs, in, size); 155cabdff1aSopenharmony_ci scaled = s->scoefs; 156cabdff1aSopenharmony_ci for (i = 0; i < size; i += 4) { 157cabdff1aSopenharmony_ci int curidx; 158cabdff1aSopenharmony_ci int *in_int = (int *)&in[i]; 159cabdff1aSopenharmony_ci int t0, t1, t2, t3, t4, t5, t6, t7; 160cabdff1aSopenharmony_ci const float *vec; 161cabdff1aSopenharmony_ci 162cabdff1aSopenharmony_ci qc1 = scaled[i ] * Q34 + ROUND_STANDARD; 163cabdff1aSopenharmony_ci qc2 = scaled[i+1] * Q34 + ROUND_STANDARD; 164cabdff1aSopenharmony_ci qc3 = scaled[i+2] * Q34 + ROUND_STANDARD; 165cabdff1aSopenharmony_ci qc4 = scaled[i+3] * Q34 + ROUND_STANDARD; 166cabdff1aSopenharmony_ci 167cabdff1aSopenharmony_ci __asm__ volatile ( 168cabdff1aSopenharmony_ci ".set push \n\t" 169cabdff1aSopenharmony_ci ".set noreorder \n\t" 170cabdff1aSopenharmony_ci 171cabdff1aSopenharmony_ci "slt %[qc1], $zero, %[qc1] \n\t" 172cabdff1aSopenharmony_ci "slt %[qc2], $zero, %[qc2] \n\t" 173cabdff1aSopenharmony_ci "slt %[qc3], $zero, %[qc3] \n\t" 174cabdff1aSopenharmony_ci "slt %[qc4], $zero, %[qc4] \n\t" 175cabdff1aSopenharmony_ci "lw %[t0], 0(%[in_int]) \n\t" 176cabdff1aSopenharmony_ci "lw %[t1], 4(%[in_int]) \n\t" 177cabdff1aSopenharmony_ci "lw %[t2], 8(%[in_int]) \n\t" 178cabdff1aSopenharmony_ci "lw %[t3], 12(%[in_int]) \n\t" 179cabdff1aSopenharmony_ci "srl %[t0], %[t0], 31 \n\t" 180cabdff1aSopenharmony_ci "srl %[t1], %[t1], 31 \n\t" 181cabdff1aSopenharmony_ci "srl %[t2], %[t2], 31 \n\t" 182cabdff1aSopenharmony_ci "srl %[t3], %[t3], 31 \n\t" 183cabdff1aSopenharmony_ci "subu %[t4], $zero, %[qc1] \n\t" 184cabdff1aSopenharmony_ci "subu %[t5], $zero, %[qc2] \n\t" 185cabdff1aSopenharmony_ci "subu %[t6], $zero, %[qc3] \n\t" 186cabdff1aSopenharmony_ci "subu %[t7], $zero, %[qc4] \n\t" 187cabdff1aSopenharmony_ci "movn %[qc1], %[t4], %[t0] \n\t" 188cabdff1aSopenharmony_ci "movn %[qc2], %[t5], %[t1] \n\t" 189cabdff1aSopenharmony_ci "movn %[qc3], %[t6], %[t2] \n\t" 190cabdff1aSopenharmony_ci "movn %[qc4], %[t7], %[t3] \n\t" 191cabdff1aSopenharmony_ci 192cabdff1aSopenharmony_ci ".set pop \n\t" 193cabdff1aSopenharmony_ci 194cabdff1aSopenharmony_ci : [qc1]"+r"(qc1), [qc2]"+r"(qc2), 195cabdff1aSopenharmony_ci [qc3]"+r"(qc3), [qc4]"+r"(qc4), 196cabdff1aSopenharmony_ci [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3), 197cabdff1aSopenharmony_ci [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7) 198cabdff1aSopenharmony_ci : [in_int]"r"(in_int) 199cabdff1aSopenharmony_ci : "memory" 200cabdff1aSopenharmony_ci ); 201cabdff1aSopenharmony_ci 202cabdff1aSopenharmony_ci curidx = qc1; 203cabdff1aSopenharmony_ci curidx *= 3; 204cabdff1aSopenharmony_ci curidx += qc2; 205cabdff1aSopenharmony_ci curidx *= 3; 206cabdff1aSopenharmony_ci curidx += qc3; 207cabdff1aSopenharmony_ci curidx *= 3; 208cabdff1aSopenharmony_ci curidx += qc4; 209cabdff1aSopenharmony_ci curidx += 40; 210cabdff1aSopenharmony_ci 211cabdff1aSopenharmony_ci put_bits(pb, p_bits[curidx], p_codes[curidx]); 212cabdff1aSopenharmony_ci 213cabdff1aSopenharmony_ci if (out || energy) { 214cabdff1aSopenharmony_ci float e1,e2,e3,e4; 215cabdff1aSopenharmony_ci vec = &p_vec[curidx*4]; 216cabdff1aSopenharmony_ci e1 = vec[0] * IQ; 217cabdff1aSopenharmony_ci e2 = vec[1] * IQ; 218cabdff1aSopenharmony_ci e3 = vec[2] * IQ; 219cabdff1aSopenharmony_ci e4 = vec[3] * IQ; 220cabdff1aSopenharmony_ci if (out) { 221cabdff1aSopenharmony_ci out[i+0] = e1; 222cabdff1aSopenharmony_ci out[i+1] = e2; 223cabdff1aSopenharmony_ci out[i+2] = e3; 224cabdff1aSopenharmony_ci out[i+3] = e4; 225cabdff1aSopenharmony_ci } 226cabdff1aSopenharmony_ci if (energy) 227cabdff1aSopenharmony_ci qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4); 228cabdff1aSopenharmony_ci } 229cabdff1aSopenharmony_ci } 230cabdff1aSopenharmony_ci if (energy) 231cabdff1aSopenharmony_ci *energy = qenergy; 232cabdff1aSopenharmony_ci} 233cabdff1aSopenharmony_ci 234cabdff1aSopenharmony_cistatic void quantize_and_encode_band_cost_UQUAD_mips(struct AACEncContext *s, 235cabdff1aSopenharmony_ci PutBitContext *pb, const float *in, float *out, 236cabdff1aSopenharmony_ci const float *scaled, int size, int scale_idx, 237cabdff1aSopenharmony_ci int cb, const float lambda, const float uplim, 238cabdff1aSopenharmony_ci int *bits, float *energy, const float ROUNDING) 239cabdff1aSopenharmony_ci{ 240cabdff1aSopenharmony_ci const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512]; 241cabdff1aSopenharmony_ci const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512]; 242cabdff1aSopenharmony_ci int i; 243cabdff1aSopenharmony_ci int qc1, qc2, qc3, qc4; 244cabdff1aSopenharmony_ci float qenergy = 0.0f; 245cabdff1aSopenharmony_ci 246cabdff1aSopenharmony_ci uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1]; 247cabdff1aSopenharmony_ci uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1]; 248cabdff1aSopenharmony_ci float *p_vec = (float *)ff_aac_codebook_vectors[cb-1]; 249cabdff1aSopenharmony_ci 250cabdff1aSopenharmony_ci abs_pow34_v(s->scoefs, in, size); 251cabdff1aSopenharmony_ci scaled = s->scoefs; 252cabdff1aSopenharmony_ci for (i = 0; i < size; i += 4) { 253cabdff1aSopenharmony_ci int curidx, sign, count; 254cabdff1aSopenharmony_ci int *in_int = (int *)&in[i]; 255cabdff1aSopenharmony_ci uint8_t v_bits; 256cabdff1aSopenharmony_ci unsigned int v_codes; 257cabdff1aSopenharmony_ci int t0, t1, t2, t3, t4; 258cabdff1aSopenharmony_ci const float *vec; 259cabdff1aSopenharmony_ci 260cabdff1aSopenharmony_ci qc1 = scaled[i ] * Q34 + ROUND_STANDARD; 261cabdff1aSopenharmony_ci qc2 = scaled[i+1] * Q34 + ROUND_STANDARD; 262cabdff1aSopenharmony_ci qc3 = scaled[i+2] * Q34 + ROUND_STANDARD; 263cabdff1aSopenharmony_ci qc4 = scaled[i+3] * Q34 + ROUND_STANDARD; 264cabdff1aSopenharmony_ci 265cabdff1aSopenharmony_ci __asm__ volatile ( 266cabdff1aSopenharmony_ci ".set push \n\t" 267cabdff1aSopenharmony_ci ".set noreorder \n\t" 268cabdff1aSopenharmony_ci 269cabdff1aSopenharmony_ci "ori %[t4], $zero, 2 \n\t" 270cabdff1aSopenharmony_ci "ori %[sign], $zero, 0 \n\t" 271cabdff1aSopenharmony_ci "slt %[t0], %[t4], %[qc1] \n\t" 272cabdff1aSopenharmony_ci "slt %[t1], %[t4], %[qc2] \n\t" 273cabdff1aSopenharmony_ci "slt %[t2], %[t4], %[qc3] \n\t" 274cabdff1aSopenharmony_ci "slt %[t3], %[t4], %[qc4] \n\t" 275cabdff1aSopenharmony_ci "movn %[qc1], %[t4], %[t0] \n\t" 276cabdff1aSopenharmony_ci "movn %[qc2], %[t4], %[t1] \n\t" 277cabdff1aSopenharmony_ci "movn %[qc3], %[t4], %[t2] \n\t" 278cabdff1aSopenharmony_ci "movn %[qc4], %[t4], %[t3] \n\t" 279cabdff1aSopenharmony_ci "lw %[t0], 0(%[in_int]) \n\t" 280cabdff1aSopenharmony_ci "lw %[t1], 4(%[in_int]) \n\t" 281cabdff1aSopenharmony_ci "lw %[t2], 8(%[in_int]) \n\t" 282cabdff1aSopenharmony_ci "lw %[t3], 12(%[in_int]) \n\t" 283cabdff1aSopenharmony_ci "slt %[t0], %[t0], $zero \n\t" 284cabdff1aSopenharmony_ci "movn %[sign], %[t0], %[qc1] \n\t" 285cabdff1aSopenharmony_ci "slt %[t1], %[t1], $zero \n\t" 286cabdff1aSopenharmony_ci "slt %[t2], %[t2], $zero \n\t" 287cabdff1aSopenharmony_ci "slt %[t3], %[t3], $zero \n\t" 288cabdff1aSopenharmony_ci "sll %[t0], %[sign], 1 \n\t" 289cabdff1aSopenharmony_ci "or %[t0], %[t0], %[t1] \n\t" 290cabdff1aSopenharmony_ci "movn %[sign], %[t0], %[qc2] \n\t" 291cabdff1aSopenharmony_ci "slt %[t4], $zero, %[qc1] \n\t" 292cabdff1aSopenharmony_ci "slt %[t1], $zero, %[qc2] \n\t" 293cabdff1aSopenharmony_ci "slt %[count], $zero, %[qc3] \n\t" 294cabdff1aSopenharmony_ci "sll %[t0], %[sign], 1 \n\t" 295cabdff1aSopenharmony_ci "or %[t0], %[t0], %[t2] \n\t" 296cabdff1aSopenharmony_ci "movn %[sign], %[t0], %[qc3] \n\t" 297cabdff1aSopenharmony_ci "slt %[t2], $zero, %[qc4] \n\t" 298cabdff1aSopenharmony_ci "addu %[count], %[count], %[t4] \n\t" 299cabdff1aSopenharmony_ci "addu %[count], %[count], %[t1] \n\t" 300cabdff1aSopenharmony_ci "sll %[t0], %[sign], 1 \n\t" 301cabdff1aSopenharmony_ci "or %[t0], %[t0], %[t3] \n\t" 302cabdff1aSopenharmony_ci "movn %[sign], %[t0], %[qc4] \n\t" 303cabdff1aSopenharmony_ci "addu %[count], %[count], %[t2] \n\t" 304cabdff1aSopenharmony_ci 305cabdff1aSopenharmony_ci ".set pop \n\t" 306cabdff1aSopenharmony_ci 307cabdff1aSopenharmony_ci : [qc1]"+r"(qc1), [qc2]"+r"(qc2), 308cabdff1aSopenharmony_ci [qc3]"+r"(qc3), [qc4]"+r"(qc4), 309cabdff1aSopenharmony_ci [sign]"=&r"(sign), [count]"=&r"(count), 310cabdff1aSopenharmony_ci [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3), 311cabdff1aSopenharmony_ci [t4]"=&r"(t4) 312cabdff1aSopenharmony_ci : [in_int]"r"(in_int) 313cabdff1aSopenharmony_ci : "memory" 314cabdff1aSopenharmony_ci ); 315cabdff1aSopenharmony_ci 316cabdff1aSopenharmony_ci curidx = qc1; 317cabdff1aSopenharmony_ci curidx *= 3; 318cabdff1aSopenharmony_ci curidx += qc2; 319cabdff1aSopenharmony_ci curidx *= 3; 320cabdff1aSopenharmony_ci curidx += qc3; 321cabdff1aSopenharmony_ci curidx *= 3; 322cabdff1aSopenharmony_ci curidx += qc4; 323cabdff1aSopenharmony_ci 324cabdff1aSopenharmony_ci v_codes = (p_codes[curidx] << count) | (sign & ((1 << count) - 1)); 325cabdff1aSopenharmony_ci v_bits = p_bits[curidx] + count; 326cabdff1aSopenharmony_ci put_bits(pb, v_bits, v_codes); 327cabdff1aSopenharmony_ci 328cabdff1aSopenharmony_ci if (out || energy) { 329cabdff1aSopenharmony_ci float e1,e2,e3,e4; 330cabdff1aSopenharmony_ci vec = &p_vec[curidx*4]; 331cabdff1aSopenharmony_ci e1 = copysignf(vec[0] * IQ, in[i+0]); 332cabdff1aSopenharmony_ci e2 = copysignf(vec[1] * IQ, in[i+1]); 333cabdff1aSopenharmony_ci e3 = copysignf(vec[2] * IQ, in[i+2]); 334cabdff1aSopenharmony_ci e4 = copysignf(vec[3] * IQ, in[i+3]); 335cabdff1aSopenharmony_ci if (out) { 336cabdff1aSopenharmony_ci out[i+0] = e1; 337cabdff1aSopenharmony_ci out[i+1] = e2; 338cabdff1aSopenharmony_ci out[i+2] = e3; 339cabdff1aSopenharmony_ci out[i+3] = e4; 340cabdff1aSopenharmony_ci } 341cabdff1aSopenharmony_ci if (energy) 342cabdff1aSopenharmony_ci qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4); 343cabdff1aSopenharmony_ci } 344cabdff1aSopenharmony_ci } 345cabdff1aSopenharmony_ci if (energy) 346cabdff1aSopenharmony_ci *energy = qenergy; 347cabdff1aSopenharmony_ci} 348cabdff1aSopenharmony_ci 349cabdff1aSopenharmony_cistatic void quantize_and_encode_band_cost_SPAIR_mips(struct AACEncContext *s, 350cabdff1aSopenharmony_ci PutBitContext *pb, const float *in, float *out, 351cabdff1aSopenharmony_ci const float *scaled, int size, int scale_idx, 352cabdff1aSopenharmony_ci int cb, const float lambda, const float uplim, 353cabdff1aSopenharmony_ci int *bits, float *energy, const float ROUNDING) 354cabdff1aSopenharmony_ci{ 355cabdff1aSopenharmony_ci const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512]; 356cabdff1aSopenharmony_ci const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512]; 357cabdff1aSopenharmony_ci int i; 358cabdff1aSopenharmony_ci int qc1, qc2, qc3, qc4; 359cabdff1aSopenharmony_ci float qenergy = 0.0f; 360cabdff1aSopenharmony_ci 361cabdff1aSopenharmony_ci uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1]; 362cabdff1aSopenharmony_ci uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1]; 363cabdff1aSopenharmony_ci float *p_vec = (float *)ff_aac_codebook_vectors[cb-1]; 364cabdff1aSopenharmony_ci 365cabdff1aSopenharmony_ci abs_pow34_v(s->scoefs, in, size); 366cabdff1aSopenharmony_ci scaled = s->scoefs; 367cabdff1aSopenharmony_ci for (i = 0; i < size; i += 4) { 368cabdff1aSopenharmony_ci int curidx, curidx2; 369cabdff1aSopenharmony_ci int *in_int = (int *)&in[i]; 370cabdff1aSopenharmony_ci uint8_t v_bits; 371cabdff1aSopenharmony_ci unsigned int v_codes; 372cabdff1aSopenharmony_ci int t0, t1, t2, t3, t4, t5, t6, t7; 373cabdff1aSopenharmony_ci const float *vec1, *vec2; 374cabdff1aSopenharmony_ci 375cabdff1aSopenharmony_ci qc1 = scaled[i ] * Q34 + ROUND_STANDARD; 376cabdff1aSopenharmony_ci qc2 = scaled[i+1] * Q34 + ROUND_STANDARD; 377cabdff1aSopenharmony_ci qc3 = scaled[i+2] * Q34 + ROUND_STANDARD; 378cabdff1aSopenharmony_ci qc4 = scaled[i+3] * Q34 + ROUND_STANDARD; 379cabdff1aSopenharmony_ci 380cabdff1aSopenharmony_ci __asm__ volatile ( 381cabdff1aSopenharmony_ci ".set push \n\t" 382cabdff1aSopenharmony_ci ".set noreorder \n\t" 383cabdff1aSopenharmony_ci 384cabdff1aSopenharmony_ci "ori %[t4], $zero, 4 \n\t" 385cabdff1aSopenharmony_ci "slt %[t0], %[t4], %[qc1] \n\t" 386cabdff1aSopenharmony_ci "slt %[t1], %[t4], %[qc2] \n\t" 387cabdff1aSopenharmony_ci "slt %[t2], %[t4], %[qc3] \n\t" 388cabdff1aSopenharmony_ci "slt %[t3], %[t4], %[qc4] \n\t" 389cabdff1aSopenharmony_ci "movn %[qc1], %[t4], %[t0] \n\t" 390cabdff1aSopenharmony_ci "movn %[qc2], %[t4], %[t1] \n\t" 391cabdff1aSopenharmony_ci "movn %[qc3], %[t4], %[t2] \n\t" 392cabdff1aSopenharmony_ci "movn %[qc4], %[t4], %[t3] \n\t" 393cabdff1aSopenharmony_ci "lw %[t0], 0(%[in_int]) \n\t" 394cabdff1aSopenharmony_ci "lw %[t1], 4(%[in_int]) \n\t" 395cabdff1aSopenharmony_ci "lw %[t2], 8(%[in_int]) \n\t" 396cabdff1aSopenharmony_ci "lw %[t3], 12(%[in_int]) \n\t" 397cabdff1aSopenharmony_ci "srl %[t0], %[t0], 31 \n\t" 398cabdff1aSopenharmony_ci "srl %[t1], %[t1], 31 \n\t" 399cabdff1aSopenharmony_ci "srl %[t2], %[t2], 31 \n\t" 400cabdff1aSopenharmony_ci "srl %[t3], %[t3], 31 \n\t" 401cabdff1aSopenharmony_ci "subu %[t4], $zero, %[qc1] \n\t" 402cabdff1aSopenharmony_ci "subu %[t5], $zero, %[qc2] \n\t" 403cabdff1aSopenharmony_ci "subu %[t6], $zero, %[qc3] \n\t" 404cabdff1aSopenharmony_ci "subu %[t7], $zero, %[qc4] \n\t" 405cabdff1aSopenharmony_ci "movn %[qc1], %[t4], %[t0] \n\t" 406cabdff1aSopenharmony_ci "movn %[qc2], %[t5], %[t1] \n\t" 407cabdff1aSopenharmony_ci "movn %[qc3], %[t6], %[t2] \n\t" 408cabdff1aSopenharmony_ci "movn %[qc4], %[t7], %[t3] \n\t" 409cabdff1aSopenharmony_ci 410cabdff1aSopenharmony_ci ".set pop \n\t" 411cabdff1aSopenharmony_ci 412cabdff1aSopenharmony_ci : [qc1]"+r"(qc1), [qc2]"+r"(qc2), 413cabdff1aSopenharmony_ci [qc3]"+r"(qc3), [qc4]"+r"(qc4), 414cabdff1aSopenharmony_ci [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3), 415cabdff1aSopenharmony_ci [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7) 416cabdff1aSopenharmony_ci : [in_int]"r"(in_int) 417cabdff1aSopenharmony_ci : "memory" 418cabdff1aSopenharmony_ci ); 419cabdff1aSopenharmony_ci 420cabdff1aSopenharmony_ci curidx = 9 * qc1; 421cabdff1aSopenharmony_ci curidx += qc2 + 40; 422cabdff1aSopenharmony_ci 423cabdff1aSopenharmony_ci curidx2 = 9 * qc3; 424cabdff1aSopenharmony_ci curidx2 += qc4 + 40; 425cabdff1aSopenharmony_ci 426cabdff1aSopenharmony_ci v_codes = (p_codes[curidx] << p_bits[curidx2]) | (p_codes[curidx2]); 427cabdff1aSopenharmony_ci v_bits = p_bits[curidx] + p_bits[curidx2]; 428cabdff1aSopenharmony_ci put_bits(pb, v_bits, v_codes); 429cabdff1aSopenharmony_ci 430cabdff1aSopenharmony_ci if (out || energy) { 431cabdff1aSopenharmony_ci float e1,e2,e3,e4; 432cabdff1aSopenharmony_ci vec1 = &p_vec[curidx*2 ]; 433cabdff1aSopenharmony_ci vec2 = &p_vec[curidx2*2]; 434cabdff1aSopenharmony_ci e1 = vec1[0] * IQ; 435cabdff1aSopenharmony_ci e2 = vec1[1] * IQ; 436cabdff1aSopenharmony_ci e3 = vec2[0] * IQ; 437cabdff1aSopenharmony_ci e4 = vec2[1] * IQ; 438cabdff1aSopenharmony_ci if (out) { 439cabdff1aSopenharmony_ci out[i+0] = e1; 440cabdff1aSopenharmony_ci out[i+1] = e2; 441cabdff1aSopenharmony_ci out[i+2] = e3; 442cabdff1aSopenharmony_ci out[i+3] = e4; 443cabdff1aSopenharmony_ci } 444cabdff1aSopenharmony_ci if (energy) 445cabdff1aSopenharmony_ci qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4); 446cabdff1aSopenharmony_ci } 447cabdff1aSopenharmony_ci } 448cabdff1aSopenharmony_ci if (energy) 449cabdff1aSopenharmony_ci *energy = qenergy; 450cabdff1aSopenharmony_ci} 451cabdff1aSopenharmony_ci 452cabdff1aSopenharmony_cistatic void quantize_and_encode_band_cost_UPAIR7_mips(struct AACEncContext *s, 453cabdff1aSopenharmony_ci PutBitContext *pb, const float *in, float *out, 454cabdff1aSopenharmony_ci const float *scaled, int size, int scale_idx, 455cabdff1aSopenharmony_ci int cb, const float lambda, const float uplim, 456cabdff1aSopenharmony_ci int *bits, float *energy, const float ROUNDING) 457cabdff1aSopenharmony_ci{ 458cabdff1aSopenharmony_ci const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512]; 459cabdff1aSopenharmony_ci const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512]; 460cabdff1aSopenharmony_ci int i; 461cabdff1aSopenharmony_ci int qc1, qc2, qc3, qc4; 462cabdff1aSopenharmony_ci float qenergy = 0.0f; 463cabdff1aSopenharmony_ci 464cabdff1aSopenharmony_ci uint8_t *p_bits = (uint8_t*) ff_aac_spectral_bits[cb-1]; 465cabdff1aSopenharmony_ci uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1]; 466cabdff1aSopenharmony_ci float *p_vec = (float *)ff_aac_codebook_vectors[cb-1]; 467cabdff1aSopenharmony_ci 468cabdff1aSopenharmony_ci abs_pow34_v(s->scoefs, in, size); 469cabdff1aSopenharmony_ci scaled = s->scoefs; 470cabdff1aSopenharmony_ci for (i = 0; i < size; i += 4) { 471cabdff1aSopenharmony_ci int curidx1, curidx2, sign1, count1, sign2, count2; 472cabdff1aSopenharmony_ci int *in_int = (int *)&in[i]; 473cabdff1aSopenharmony_ci uint8_t v_bits; 474cabdff1aSopenharmony_ci unsigned int v_codes; 475cabdff1aSopenharmony_ci int t0, t1, t2, t3, t4; 476cabdff1aSopenharmony_ci const float *vec1, *vec2; 477cabdff1aSopenharmony_ci 478cabdff1aSopenharmony_ci qc1 = scaled[i ] * Q34 + ROUND_STANDARD; 479cabdff1aSopenharmony_ci qc2 = scaled[i+1] * Q34 + ROUND_STANDARD; 480cabdff1aSopenharmony_ci qc3 = scaled[i+2] * Q34 + ROUND_STANDARD; 481cabdff1aSopenharmony_ci qc4 = scaled[i+3] * Q34 + ROUND_STANDARD; 482cabdff1aSopenharmony_ci 483cabdff1aSopenharmony_ci __asm__ volatile ( 484cabdff1aSopenharmony_ci ".set push \n\t" 485cabdff1aSopenharmony_ci ".set noreorder \n\t" 486cabdff1aSopenharmony_ci 487cabdff1aSopenharmony_ci "ori %[t4], $zero, 7 \n\t" 488cabdff1aSopenharmony_ci "ori %[sign1], $zero, 0 \n\t" 489cabdff1aSopenharmony_ci "ori %[sign2], $zero, 0 \n\t" 490cabdff1aSopenharmony_ci "slt %[t0], %[t4], %[qc1] \n\t" 491cabdff1aSopenharmony_ci "slt %[t1], %[t4], %[qc2] \n\t" 492cabdff1aSopenharmony_ci "slt %[t2], %[t4], %[qc3] \n\t" 493cabdff1aSopenharmony_ci "slt %[t3], %[t4], %[qc4] \n\t" 494cabdff1aSopenharmony_ci "movn %[qc1], %[t4], %[t0] \n\t" 495cabdff1aSopenharmony_ci "movn %[qc2], %[t4], %[t1] \n\t" 496cabdff1aSopenharmony_ci "movn %[qc3], %[t4], %[t2] \n\t" 497cabdff1aSopenharmony_ci "movn %[qc4], %[t4], %[t3] \n\t" 498cabdff1aSopenharmony_ci "lw %[t0], 0(%[in_int]) \n\t" 499cabdff1aSopenharmony_ci "lw %[t1], 4(%[in_int]) \n\t" 500cabdff1aSopenharmony_ci "lw %[t2], 8(%[in_int]) \n\t" 501cabdff1aSopenharmony_ci "lw %[t3], 12(%[in_int]) \n\t" 502cabdff1aSopenharmony_ci "slt %[t0], %[t0], $zero \n\t" 503cabdff1aSopenharmony_ci "movn %[sign1], %[t0], %[qc1] \n\t" 504cabdff1aSopenharmony_ci "slt %[t2], %[t2], $zero \n\t" 505cabdff1aSopenharmony_ci "movn %[sign2], %[t2], %[qc3] \n\t" 506cabdff1aSopenharmony_ci "slt %[t1], %[t1], $zero \n\t" 507cabdff1aSopenharmony_ci "sll %[t0], %[sign1], 1 \n\t" 508cabdff1aSopenharmony_ci "or %[t0], %[t0], %[t1] \n\t" 509cabdff1aSopenharmony_ci "movn %[sign1], %[t0], %[qc2] \n\t" 510cabdff1aSopenharmony_ci "slt %[t3], %[t3], $zero \n\t" 511cabdff1aSopenharmony_ci "sll %[t0], %[sign2], 1 \n\t" 512cabdff1aSopenharmony_ci "or %[t0], %[t0], %[t3] \n\t" 513cabdff1aSopenharmony_ci "movn %[sign2], %[t0], %[qc4] \n\t" 514cabdff1aSopenharmony_ci "slt %[count1], $zero, %[qc1] \n\t" 515cabdff1aSopenharmony_ci "slt %[t1], $zero, %[qc2] \n\t" 516cabdff1aSopenharmony_ci "slt %[count2], $zero, %[qc3] \n\t" 517cabdff1aSopenharmony_ci "slt %[t2], $zero, %[qc4] \n\t" 518cabdff1aSopenharmony_ci "addu %[count1], %[count1], %[t1] \n\t" 519cabdff1aSopenharmony_ci "addu %[count2], %[count2], %[t2] \n\t" 520cabdff1aSopenharmony_ci 521cabdff1aSopenharmony_ci ".set pop \n\t" 522cabdff1aSopenharmony_ci 523cabdff1aSopenharmony_ci : [qc1]"+r"(qc1), [qc2]"+r"(qc2), 524cabdff1aSopenharmony_ci [qc3]"+r"(qc3), [qc4]"+r"(qc4), 525cabdff1aSopenharmony_ci [sign1]"=&r"(sign1), [count1]"=&r"(count1), 526cabdff1aSopenharmony_ci [sign2]"=&r"(sign2), [count2]"=&r"(count2), 527cabdff1aSopenharmony_ci [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3), 528cabdff1aSopenharmony_ci [t4]"=&r"(t4) 529cabdff1aSopenharmony_ci : [in_int]"r"(in_int) 530cabdff1aSopenharmony_ci : "t0", "t1", "t2", "t3", "t4", 531cabdff1aSopenharmony_ci "memory" 532cabdff1aSopenharmony_ci ); 533cabdff1aSopenharmony_ci 534cabdff1aSopenharmony_ci curidx1 = 8 * qc1; 535cabdff1aSopenharmony_ci curidx1 += qc2; 536cabdff1aSopenharmony_ci 537cabdff1aSopenharmony_ci v_codes = (p_codes[curidx1] << count1) | sign1; 538cabdff1aSopenharmony_ci v_bits = p_bits[curidx1] + count1; 539cabdff1aSopenharmony_ci put_bits(pb, v_bits, v_codes); 540cabdff1aSopenharmony_ci 541cabdff1aSopenharmony_ci curidx2 = 8 * qc3; 542cabdff1aSopenharmony_ci curidx2 += qc4; 543cabdff1aSopenharmony_ci 544cabdff1aSopenharmony_ci v_codes = (p_codes[curidx2] << count2) | sign2; 545cabdff1aSopenharmony_ci v_bits = p_bits[curidx2] + count2; 546cabdff1aSopenharmony_ci put_bits(pb, v_bits, v_codes); 547cabdff1aSopenharmony_ci 548cabdff1aSopenharmony_ci if (out || energy) { 549cabdff1aSopenharmony_ci float e1,e2,e3,e4; 550cabdff1aSopenharmony_ci vec1 = &p_vec[curidx1*2]; 551cabdff1aSopenharmony_ci vec2 = &p_vec[curidx2*2]; 552cabdff1aSopenharmony_ci e1 = copysignf(vec1[0] * IQ, in[i+0]); 553cabdff1aSopenharmony_ci e2 = copysignf(vec1[1] * IQ, in[i+1]); 554cabdff1aSopenharmony_ci e3 = copysignf(vec2[0] * IQ, in[i+2]); 555cabdff1aSopenharmony_ci e4 = copysignf(vec2[1] * IQ, in[i+3]); 556cabdff1aSopenharmony_ci if (out) { 557cabdff1aSopenharmony_ci out[i+0] = e1; 558cabdff1aSopenharmony_ci out[i+1] = e2; 559cabdff1aSopenharmony_ci out[i+2] = e3; 560cabdff1aSopenharmony_ci out[i+3] = e4; 561cabdff1aSopenharmony_ci } 562cabdff1aSopenharmony_ci if (energy) 563cabdff1aSopenharmony_ci qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4); 564cabdff1aSopenharmony_ci } 565cabdff1aSopenharmony_ci } 566cabdff1aSopenharmony_ci if (energy) 567cabdff1aSopenharmony_ci *energy = qenergy; 568cabdff1aSopenharmony_ci} 569cabdff1aSopenharmony_ci 570cabdff1aSopenharmony_cistatic void quantize_and_encode_band_cost_UPAIR12_mips(struct AACEncContext *s, 571cabdff1aSopenharmony_ci PutBitContext *pb, const float *in, float *out, 572cabdff1aSopenharmony_ci const float *scaled, int size, int scale_idx, 573cabdff1aSopenharmony_ci int cb, const float lambda, const float uplim, 574cabdff1aSopenharmony_ci int *bits, float *energy, const float ROUNDING) 575cabdff1aSopenharmony_ci{ 576cabdff1aSopenharmony_ci const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512]; 577cabdff1aSopenharmony_ci const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512]; 578cabdff1aSopenharmony_ci int i; 579cabdff1aSopenharmony_ci int qc1, qc2, qc3, qc4; 580cabdff1aSopenharmony_ci float qenergy = 0.0f; 581cabdff1aSopenharmony_ci 582cabdff1aSopenharmony_ci uint8_t *p_bits = (uint8_t*) ff_aac_spectral_bits[cb-1]; 583cabdff1aSopenharmony_ci uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1]; 584cabdff1aSopenharmony_ci float *p_vec = (float *)ff_aac_codebook_vectors[cb-1]; 585cabdff1aSopenharmony_ci 586cabdff1aSopenharmony_ci abs_pow34_v(s->scoefs, in, size); 587cabdff1aSopenharmony_ci scaled = s->scoefs; 588cabdff1aSopenharmony_ci for (i = 0; i < size; i += 4) { 589cabdff1aSopenharmony_ci int curidx1, curidx2, sign1, count1, sign2, count2; 590cabdff1aSopenharmony_ci int *in_int = (int *)&in[i]; 591cabdff1aSopenharmony_ci uint8_t v_bits; 592cabdff1aSopenharmony_ci unsigned int v_codes; 593cabdff1aSopenharmony_ci int t0, t1, t2, t3, t4; 594cabdff1aSopenharmony_ci const float *vec1, *vec2; 595cabdff1aSopenharmony_ci 596cabdff1aSopenharmony_ci qc1 = scaled[i ] * Q34 + ROUND_STANDARD; 597cabdff1aSopenharmony_ci qc2 = scaled[i+1] * Q34 + ROUND_STANDARD; 598cabdff1aSopenharmony_ci qc3 = scaled[i+2] * Q34 + ROUND_STANDARD; 599cabdff1aSopenharmony_ci qc4 = scaled[i+3] * Q34 + ROUND_STANDARD; 600cabdff1aSopenharmony_ci 601cabdff1aSopenharmony_ci __asm__ volatile ( 602cabdff1aSopenharmony_ci ".set push \n\t" 603cabdff1aSopenharmony_ci ".set noreorder \n\t" 604cabdff1aSopenharmony_ci 605cabdff1aSopenharmony_ci "ori %[t4], $zero, 12 \n\t" 606cabdff1aSopenharmony_ci "ori %[sign1], $zero, 0 \n\t" 607cabdff1aSopenharmony_ci "ori %[sign2], $zero, 0 \n\t" 608cabdff1aSopenharmony_ci "slt %[t0], %[t4], %[qc1] \n\t" 609cabdff1aSopenharmony_ci "slt %[t1], %[t4], %[qc2] \n\t" 610cabdff1aSopenharmony_ci "slt %[t2], %[t4], %[qc3] \n\t" 611cabdff1aSopenharmony_ci "slt %[t3], %[t4], %[qc4] \n\t" 612cabdff1aSopenharmony_ci "movn %[qc1], %[t4], %[t0] \n\t" 613cabdff1aSopenharmony_ci "movn %[qc2], %[t4], %[t1] \n\t" 614cabdff1aSopenharmony_ci "movn %[qc3], %[t4], %[t2] \n\t" 615cabdff1aSopenharmony_ci "movn %[qc4], %[t4], %[t3] \n\t" 616cabdff1aSopenharmony_ci "lw %[t0], 0(%[in_int]) \n\t" 617cabdff1aSopenharmony_ci "lw %[t1], 4(%[in_int]) \n\t" 618cabdff1aSopenharmony_ci "lw %[t2], 8(%[in_int]) \n\t" 619cabdff1aSopenharmony_ci "lw %[t3], 12(%[in_int]) \n\t" 620cabdff1aSopenharmony_ci "slt %[t0], %[t0], $zero \n\t" 621cabdff1aSopenharmony_ci "movn %[sign1], %[t0], %[qc1] \n\t" 622cabdff1aSopenharmony_ci "slt %[t2], %[t2], $zero \n\t" 623cabdff1aSopenharmony_ci "movn %[sign2], %[t2], %[qc3] \n\t" 624cabdff1aSopenharmony_ci "slt %[t1], %[t1], $zero \n\t" 625cabdff1aSopenharmony_ci "sll %[t0], %[sign1], 1 \n\t" 626cabdff1aSopenharmony_ci "or %[t0], %[t0], %[t1] \n\t" 627cabdff1aSopenharmony_ci "movn %[sign1], %[t0], %[qc2] \n\t" 628cabdff1aSopenharmony_ci "slt %[t3], %[t3], $zero \n\t" 629cabdff1aSopenharmony_ci "sll %[t0], %[sign2], 1 \n\t" 630cabdff1aSopenharmony_ci "or %[t0], %[t0], %[t3] \n\t" 631cabdff1aSopenharmony_ci "movn %[sign2], %[t0], %[qc4] \n\t" 632cabdff1aSopenharmony_ci "slt %[count1], $zero, %[qc1] \n\t" 633cabdff1aSopenharmony_ci "slt %[t1], $zero, %[qc2] \n\t" 634cabdff1aSopenharmony_ci "slt %[count2], $zero, %[qc3] \n\t" 635cabdff1aSopenharmony_ci "slt %[t2], $zero, %[qc4] \n\t" 636cabdff1aSopenharmony_ci "addu %[count1], %[count1], %[t1] \n\t" 637cabdff1aSopenharmony_ci "addu %[count2], %[count2], %[t2] \n\t" 638cabdff1aSopenharmony_ci 639cabdff1aSopenharmony_ci ".set pop \n\t" 640cabdff1aSopenharmony_ci 641cabdff1aSopenharmony_ci : [qc1]"+r"(qc1), [qc2]"+r"(qc2), 642cabdff1aSopenharmony_ci [qc3]"+r"(qc3), [qc4]"+r"(qc4), 643cabdff1aSopenharmony_ci [sign1]"=&r"(sign1), [count1]"=&r"(count1), 644cabdff1aSopenharmony_ci [sign2]"=&r"(sign2), [count2]"=&r"(count2), 645cabdff1aSopenharmony_ci [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3), 646cabdff1aSopenharmony_ci [t4]"=&r"(t4) 647cabdff1aSopenharmony_ci : [in_int]"r"(in_int) 648cabdff1aSopenharmony_ci : "memory" 649cabdff1aSopenharmony_ci ); 650cabdff1aSopenharmony_ci 651cabdff1aSopenharmony_ci curidx1 = 13 * qc1; 652cabdff1aSopenharmony_ci curidx1 += qc2; 653cabdff1aSopenharmony_ci 654cabdff1aSopenharmony_ci v_codes = (p_codes[curidx1] << count1) | sign1; 655cabdff1aSopenharmony_ci v_bits = p_bits[curidx1] + count1; 656cabdff1aSopenharmony_ci put_bits(pb, v_bits, v_codes); 657cabdff1aSopenharmony_ci 658cabdff1aSopenharmony_ci curidx2 = 13 * qc3; 659cabdff1aSopenharmony_ci curidx2 += qc4; 660cabdff1aSopenharmony_ci 661cabdff1aSopenharmony_ci v_codes = (p_codes[curidx2] << count2) | sign2; 662cabdff1aSopenharmony_ci v_bits = p_bits[curidx2] + count2; 663cabdff1aSopenharmony_ci put_bits(pb, v_bits, v_codes); 664cabdff1aSopenharmony_ci 665cabdff1aSopenharmony_ci if (out || energy) { 666cabdff1aSopenharmony_ci float e1,e2,e3,e4; 667cabdff1aSopenharmony_ci vec1 = &p_vec[curidx1*2]; 668cabdff1aSopenharmony_ci vec2 = &p_vec[curidx2*2]; 669cabdff1aSopenharmony_ci e1 = copysignf(vec1[0] * IQ, in[i+0]); 670cabdff1aSopenharmony_ci e2 = copysignf(vec1[1] * IQ, in[i+1]); 671cabdff1aSopenharmony_ci e3 = copysignf(vec2[0] * IQ, in[i+2]); 672cabdff1aSopenharmony_ci e4 = copysignf(vec2[1] * IQ, in[i+3]); 673cabdff1aSopenharmony_ci if (out) { 674cabdff1aSopenharmony_ci out[i+0] = e1; 675cabdff1aSopenharmony_ci out[i+1] = e2; 676cabdff1aSopenharmony_ci out[i+2] = e3; 677cabdff1aSopenharmony_ci out[i+3] = e4; 678cabdff1aSopenharmony_ci } 679cabdff1aSopenharmony_ci if (energy) 680cabdff1aSopenharmony_ci qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4); 681cabdff1aSopenharmony_ci } 682cabdff1aSopenharmony_ci } 683cabdff1aSopenharmony_ci if (energy) 684cabdff1aSopenharmony_ci *energy = qenergy; 685cabdff1aSopenharmony_ci} 686cabdff1aSopenharmony_ci 687cabdff1aSopenharmony_cistatic void quantize_and_encode_band_cost_ESC_mips(struct AACEncContext *s, 688cabdff1aSopenharmony_ci PutBitContext *pb, const float *in, float *out, 689cabdff1aSopenharmony_ci const float *scaled, int size, int scale_idx, 690cabdff1aSopenharmony_ci int cb, const float lambda, const float uplim, 691cabdff1aSopenharmony_ci int *bits, float *energy, const float ROUNDING) 692cabdff1aSopenharmony_ci{ 693cabdff1aSopenharmony_ci const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512]; 694cabdff1aSopenharmony_ci const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512]; 695cabdff1aSopenharmony_ci int i; 696cabdff1aSopenharmony_ci int qc1, qc2, qc3, qc4; 697cabdff1aSopenharmony_ci float qenergy = 0.0f; 698cabdff1aSopenharmony_ci 699cabdff1aSopenharmony_ci uint8_t *p_bits = (uint8_t* )ff_aac_spectral_bits[cb-1]; 700cabdff1aSopenharmony_ci uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1]; 701cabdff1aSopenharmony_ci float *p_vectors = (float* )ff_aac_codebook_vectors[cb-1]; 702cabdff1aSopenharmony_ci 703cabdff1aSopenharmony_ci abs_pow34_v(s->scoefs, in, size); 704cabdff1aSopenharmony_ci scaled = s->scoefs; 705cabdff1aSopenharmony_ci 706cabdff1aSopenharmony_ci if (cb < 11) { 707cabdff1aSopenharmony_ci for (i = 0; i < size; i += 4) { 708cabdff1aSopenharmony_ci int curidx, curidx2, sign1, count1, sign2, count2; 709cabdff1aSopenharmony_ci int *in_int = (int *)&in[i]; 710cabdff1aSopenharmony_ci uint8_t v_bits; 711cabdff1aSopenharmony_ci unsigned int v_codes; 712cabdff1aSopenharmony_ci int t0, t1, t2, t3, t4; 713cabdff1aSopenharmony_ci const float *vec1, *vec2; 714cabdff1aSopenharmony_ci 715cabdff1aSopenharmony_ci qc1 = scaled[i ] * Q34 + ROUNDING; 716cabdff1aSopenharmony_ci qc2 = scaled[i+1] * Q34 + ROUNDING; 717cabdff1aSopenharmony_ci qc3 = scaled[i+2] * Q34 + ROUNDING; 718cabdff1aSopenharmony_ci qc4 = scaled[i+3] * Q34 + ROUNDING; 719cabdff1aSopenharmony_ci 720cabdff1aSopenharmony_ci __asm__ volatile ( 721cabdff1aSopenharmony_ci ".set push \n\t" 722cabdff1aSopenharmony_ci ".set noreorder \n\t" 723cabdff1aSopenharmony_ci 724cabdff1aSopenharmony_ci "ori %[t4], $zero, 16 \n\t" 725cabdff1aSopenharmony_ci "ori %[sign1], $zero, 0 \n\t" 726cabdff1aSopenharmony_ci "ori %[sign2], $zero, 0 \n\t" 727cabdff1aSopenharmony_ci "slt %[t0], %[t4], %[qc1] \n\t" 728cabdff1aSopenharmony_ci "slt %[t1], %[t4], %[qc2] \n\t" 729cabdff1aSopenharmony_ci "slt %[t2], %[t4], %[qc3] \n\t" 730cabdff1aSopenharmony_ci "slt %[t3], %[t4], %[qc4] \n\t" 731cabdff1aSopenharmony_ci "movn %[qc1], %[t4], %[t0] \n\t" 732cabdff1aSopenharmony_ci "movn %[qc2], %[t4], %[t1] \n\t" 733cabdff1aSopenharmony_ci "movn %[qc3], %[t4], %[t2] \n\t" 734cabdff1aSopenharmony_ci "movn %[qc4], %[t4], %[t3] \n\t" 735cabdff1aSopenharmony_ci "lw %[t0], 0(%[in_int]) \n\t" 736cabdff1aSopenharmony_ci "lw %[t1], 4(%[in_int]) \n\t" 737cabdff1aSopenharmony_ci "lw %[t2], 8(%[in_int]) \n\t" 738cabdff1aSopenharmony_ci "lw %[t3], 12(%[in_int]) \n\t" 739cabdff1aSopenharmony_ci "slt %[t0], %[t0], $zero \n\t" 740cabdff1aSopenharmony_ci "movn %[sign1], %[t0], %[qc1] \n\t" 741cabdff1aSopenharmony_ci "slt %[t2], %[t2], $zero \n\t" 742cabdff1aSopenharmony_ci "movn %[sign2], %[t2], %[qc3] \n\t" 743cabdff1aSopenharmony_ci "slt %[t1], %[t1], $zero \n\t" 744cabdff1aSopenharmony_ci "sll %[t0], %[sign1], 1 \n\t" 745cabdff1aSopenharmony_ci "or %[t0], %[t0], %[t1] \n\t" 746cabdff1aSopenharmony_ci "movn %[sign1], %[t0], %[qc2] \n\t" 747cabdff1aSopenharmony_ci "slt %[t3], %[t3], $zero \n\t" 748cabdff1aSopenharmony_ci "sll %[t0], %[sign2], 1 \n\t" 749cabdff1aSopenharmony_ci "or %[t0], %[t0], %[t3] \n\t" 750cabdff1aSopenharmony_ci "movn %[sign2], %[t0], %[qc4] \n\t" 751cabdff1aSopenharmony_ci "slt %[count1], $zero, %[qc1] \n\t" 752cabdff1aSopenharmony_ci "slt %[t1], $zero, %[qc2] \n\t" 753cabdff1aSopenharmony_ci "slt %[count2], $zero, %[qc3] \n\t" 754cabdff1aSopenharmony_ci "slt %[t2], $zero, %[qc4] \n\t" 755cabdff1aSopenharmony_ci "addu %[count1], %[count1], %[t1] \n\t" 756cabdff1aSopenharmony_ci "addu %[count2], %[count2], %[t2] \n\t" 757cabdff1aSopenharmony_ci 758cabdff1aSopenharmony_ci ".set pop \n\t" 759cabdff1aSopenharmony_ci 760cabdff1aSopenharmony_ci : [qc1]"+r"(qc1), [qc2]"+r"(qc2), 761cabdff1aSopenharmony_ci [qc3]"+r"(qc3), [qc4]"+r"(qc4), 762cabdff1aSopenharmony_ci [sign1]"=&r"(sign1), [count1]"=&r"(count1), 763cabdff1aSopenharmony_ci [sign2]"=&r"(sign2), [count2]"=&r"(count2), 764cabdff1aSopenharmony_ci [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3), 765cabdff1aSopenharmony_ci [t4]"=&r"(t4) 766cabdff1aSopenharmony_ci : [in_int]"r"(in_int) 767cabdff1aSopenharmony_ci : "memory" 768cabdff1aSopenharmony_ci ); 769cabdff1aSopenharmony_ci 770cabdff1aSopenharmony_ci curidx = 17 * qc1; 771cabdff1aSopenharmony_ci curidx += qc2; 772cabdff1aSopenharmony_ci curidx2 = 17 * qc3; 773cabdff1aSopenharmony_ci curidx2 += qc4; 774cabdff1aSopenharmony_ci 775cabdff1aSopenharmony_ci v_codes = (p_codes[curidx] << count1) | sign1; 776cabdff1aSopenharmony_ci v_bits = p_bits[curidx] + count1; 777cabdff1aSopenharmony_ci put_bits(pb, v_bits, v_codes); 778cabdff1aSopenharmony_ci 779cabdff1aSopenharmony_ci v_codes = (p_codes[curidx2] << count2) | sign2; 780cabdff1aSopenharmony_ci v_bits = p_bits[curidx2] + count2; 781cabdff1aSopenharmony_ci put_bits(pb, v_bits, v_codes); 782cabdff1aSopenharmony_ci 783cabdff1aSopenharmony_ci if (out || energy) { 784cabdff1aSopenharmony_ci float e1,e2,e3,e4; 785cabdff1aSopenharmony_ci vec1 = &p_vectors[curidx*2 ]; 786cabdff1aSopenharmony_ci vec2 = &p_vectors[curidx2*2]; 787cabdff1aSopenharmony_ci e1 = copysignf(vec1[0] * IQ, in[i+0]); 788cabdff1aSopenharmony_ci e2 = copysignf(vec1[1] * IQ, in[i+1]); 789cabdff1aSopenharmony_ci e3 = copysignf(vec2[0] * IQ, in[i+2]); 790cabdff1aSopenharmony_ci e4 = copysignf(vec2[1] * IQ, in[i+3]); 791cabdff1aSopenharmony_ci if (out) { 792cabdff1aSopenharmony_ci out[i+0] = e1; 793cabdff1aSopenharmony_ci out[i+1] = e2; 794cabdff1aSopenharmony_ci out[i+2] = e3; 795cabdff1aSopenharmony_ci out[i+3] = e4; 796cabdff1aSopenharmony_ci } 797cabdff1aSopenharmony_ci if (energy) 798cabdff1aSopenharmony_ci qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4); 799cabdff1aSopenharmony_ci } 800cabdff1aSopenharmony_ci } 801cabdff1aSopenharmony_ci } else { 802cabdff1aSopenharmony_ci for (i = 0; i < size; i += 4) { 803cabdff1aSopenharmony_ci int curidx, curidx2, sign1, count1, sign2, count2; 804cabdff1aSopenharmony_ci int *in_int = (int *)&in[i]; 805cabdff1aSopenharmony_ci uint8_t v_bits; 806cabdff1aSopenharmony_ci unsigned int v_codes; 807cabdff1aSopenharmony_ci int c1, c2, c3, c4; 808cabdff1aSopenharmony_ci int t0, t1, t2, t3, t4; 809cabdff1aSopenharmony_ci 810cabdff1aSopenharmony_ci qc1 = scaled[i ] * Q34 + ROUNDING; 811cabdff1aSopenharmony_ci qc2 = scaled[i+1] * Q34 + ROUNDING; 812cabdff1aSopenharmony_ci qc3 = scaled[i+2] * Q34 + ROUNDING; 813cabdff1aSopenharmony_ci qc4 = scaled[i+3] * Q34 + ROUNDING; 814cabdff1aSopenharmony_ci 815cabdff1aSopenharmony_ci __asm__ volatile ( 816cabdff1aSopenharmony_ci ".set push \n\t" 817cabdff1aSopenharmony_ci ".set noreorder \n\t" 818cabdff1aSopenharmony_ci 819cabdff1aSopenharmony_ci "ori %[t4], $zero, 16 \n\t" 820cabdff1aSopenharmony_ci "ori %[sign1], $zero, 0 \n\t" 821cabdff1aSopenharmony_ci "ori %[sign2], $zero, 0 \n\t" 822cabdff1aSopenharmony_ci "shll_s.w %[c1], %[qc1], 18 \n\t" 823cabdff1aSopenharmony_ci "shll_s.w %[c2], %[qc2], 18 \n\t" 824cabdff1aSopenharmony_ci "shll_s.w %[c3], %[qc3], 18 \n\t" 825cabdff1aSopenharmony_ci "shll_s.w %[c4], %[qc4], 18 \n\t" 826cabdff1aSopenharmony_ci "srl %[c1], %[c1], 18 \n\t" 827cabdff1aSopenharmony_ci "srl %[c2], %[c2], 18 \n\t" 828cabdff1aSopenharmony_ci "srl %[c3], %[c3], 18 \n\t" 829cabdff1aSopenharmony_ci "srl %[c4], %[c4], 18 \n\t" 830cabdff1aSopenharmony_ci "slt %[t0], %[t4], %[qc1] \n\t" 831cabdff1aSopenharmony_ci "slt %[t1], %[t4], %[qc2] \n\t" 832cabdff1aSopenharmony_ci "slt %[t2], %[t4], %[qc3] \n\t" 833cabdff1aSopenharmony_ci "slt %[t3], %[t4], %[qc4] \n\t" 834cabdff1aSopenharmony_ci "movn %[qc1], %[t4], %[t0] \n\t" 835cabdff1aSopenharmony_ci "movn %[qc2], %[t4], %[t1] \n\t" 836cabdff1aSopenharmony_ci "movn %[qc3], %[t4], %[t2] \n\t" 837cabdff1aSopenharmony_ci "movn %[qc4], %[t4], %[t3] \n\t" 838cabdff1aSopenharmony_ci "lw %[t0], 0(%[in_int]) \n\t" 839cabdff1aSopenharmony_ci "lw %[t1], 4(%[in_int]) \n\t" 840cabdff1aSopenharmony_ci "lw %[t2], 8(%[in_int]) \n\t" 841cabdff1aSopenharmony_ci "lw %[t3], 12(%[in_int]) \n\t" 842cabdff1aSopenharmony_ci "slt %[t0], %[t0], $zero \n\t" 843cabdff1aSopenharmony_ci "movn %[sign1], %[t0], %[qc1] \n\t" 844cabdff1aSopenharmony_ci "slt %[t2], %[t2], $zero \n\t" 845cabdff1aSopenharmony_ci "movn %[sign2], %[t2], %[qc3] \n\t" 846cabdff1aSopenharmony_ci "slt %[t1], %[t1], $zero \n\t" 847cabdff1aSopenharmony_ci "sll %[t0], %[sign1], 1 \n\t" 848cabdff1aSopenharmony_ci "or %[t0], %[t0], %[t1] \n\t" 849cabdff1aSopenharmony_ci "movn %[sign1], %[t0], %[qc2] \n\t" 850cabdff1aSopenharmony_ci "slt %[t3], %[t3], $zero \n\t" 851cabdff1aSopenharmony_ci "sll %[t0], %[sign2], 1 \n\t" 852cabdff1aSopenharmony_ci "or %[t0], %[t0], %[t3] \n\t" 853cabdff1aSopenharmony_ci "movn %[sign2], %[t0], %[qc4] \n\t" 854cabdff1aSopenharmony_ci "slt %[count1], $zero, %[qc1] \n\t" 855cabdff1aSopenharmony_ci "slt %[t1], $zero, %[qc2] \n\t" 856cabdff1aSopenharmony_ci "slt %[count2], $zero, %[qc3] \n\t" 857cabdff1aSopenharmony_ci "slt %[t2], $zero, %[qc4] \n\t" 858cabdff1aSopenharmony_ci "addu %[count1], %[count1], %[t1] \n\t" 859cabdff1aSopenharmony_ci "addu %[count2], %[count2], %[t2] \n\t" 860cabdff1aSopenharmony_ci 861cabdff1aSopenharmony_ci ".set pop \n\t" 862cabdff1aSopenharmony_ci 863cabdff1aSopenharmony_ci : [qc1]"+r"(qc1), [qc2]"+r"(qc2), 864cabdff1aSopenharmony_ci [qc3]"+r"(qc3), [qc4]"+r"(qc4), 865cabdff1aSopenharmony_ci [sign1]"=&r"(sign1), [count1]"=&r"(count1), 866cabdff1aSopenharmony_ci [sign2]"=&r"(sign2), [count2]"=&r"(count2), 867cabdff1aSopenharmony_ci [c1]"=&r"(c1), [c2]"=&r"(c2), 868cabdff1aSopenharmony_ci [c3]"=&r"(c3), [c4]"=&r"(c4), 869cabdff1aSopenharmony_ci [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3), 870cabdff1aSopenharmony_ci [t4]"=&r"(t4) 871cabdff1aSopenharmony_ci : [in_int]"r"(in_int) 872cabdff1aSopenharmony_ci : "memory" 873cabdff1aSopenharmony_ci ); 874cabdff1aSopenharmony_ci 875cabdff1aSopenharmony_ci curidx = 17 * qc1; 876cabdff1aSopenharmony_ci curidx += qc2; 877cabdff1aSopenharmony_ci 878cabdff1aSopenharmony_ci curidx2 = 17 * qc3; 879cabdff1aSopenharmony_ci curidx2 += qc4; 880cabdff1aSopenharmony_ci 881cabdff1aSopenharmony_ci v_codes = (p_codes[curidx] << count1) | sign1; 882cabdff1aSopenharmony_ci v_bits = p_bits[curidx] + count1; 883cabdff1aSopenharmony_ci put_bits(pb, v_bits, v_codes); 884cabdff1aSopenharmony_ci 885cabdff1aSopenharmony_ci if (p_vectors[curidx*2 ] == 64.0f) { 886cabdff1aSopenharmony_ci int len = av_log2(c1); 887cabdff1aSopenharmony_ci v_codes = (((1 << (len - 3)) - 2) << len) | (c1 & ((1 << len) - 1)); 888cabdff1aSopenharmony_ci put_bits(pb, len * 2 - 3, v_codes); 889cabdff1aSopenharmony_ci } 890cabdff1aSopenharmony_ci if (p_vectors[curidx*2+1] == 64.0f) { 891cabdff1aSopenharmony_ci int len = av_log2(c2); 892cabdff1aSopenharmony_ci v_codes = (((1 << (len - 3)) - 2) << len) | (c2 & ((1 << len) - 1)); 893cabdff1aSopenharmony_ci put_bits(pb, len*2-3, v_codes); 894cabdff1aSopenharmony_ci } 895cabdff1aSopenharmony_ci 896cabdff1aSopenharmony_ci v_codes = (p_codes[curidx2] << count2) | sign2; 897cabdff1aSopenharmony_ci v_bits = p_bits[curidx2] + count2; 898cabdff1aSopenharmony_ci put_bits(pb, v_bits, v_codes); 899cabdff1aSopenharmony_ci 900cabdff1aSopenharmony_ci if (p_vectors[curidx2*2 ] == 64.0f) { 901cabdff1aSopenharmony_ci int len = av_log2(c3); 902cabdff1aSopenharmony_ci v_codes = (((1 << (len - 3)) - 2) << len) | (c3 & ((1 << len) - 1)); 903cabdff1aSopenharmony_ci put_bits(pb, len* 2 - 3, v_codes); 904cabdff1aSopenharmony_ci } 905cabdff1aSopenharmony_ci if (p_vectors[curidx2*2+1] == 64.0f) { 906cabdff1aSopenharmony_ci int len = av_log2(c4); 907cabdff1aSopenharmony_ci v_codes = (((1 << (len - 3)) - 2) << len) | (c4 & ((1 << len) - 1)); 908cabdff1aSopenharmony_ci put_bits(pb, len * 2 - 3, v_codes); 909cabdff1aSopenharmony_ci } 910cabdff1aSopenharmony_ci 911cabdff1aSopenharmony_ci if (out || energy) { 912cabdff1aSopenharmony_ci float e1, e2, e3, e4; 913cabdff1aSopenharmony_ci e1 = copysignf(c1 * cbrtf(c1) * IQ, in[i+0]); 914cabdff1aSopenharmony_ci e2 = copysignf(c2 * cbrtf(c2) * IQ, in[i+1]); 915cabdff1aSopenharmony_ci e3 = copysignf(c3 * cbrtf(c3) * IQ, in[i+2]); 916cabdff1aSopenharmony_ci e4 = copysignf(c4 * cbrtf(c4) * IQ, in[i+3]); 917cabdff1aSopenharmony_ci if (out) { 918cabdff1aSopenharmony_ci out[i+0] = e1; 919cabdff1aSopenharmony_ci out[i+1] = e2; 920cabdff1aSopenharmony_ci out[i+2] = e3; 921cabdff1aSopenharmony_ci out[i+3] = e4; 922cabdff1aSopenharmony_ci } 923cabdff1aSopenharmony_ci if (energy) 924cabdff1aSopenharmony_ci qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4); 925cabdff1aSopenharmony_ci } 926cabdff1aSopenharmony_ci } 927cabdff1aSopenharmony_ci } 928cabdff1aSopenharmony_ci if (energy) 929cabdff1aSopenharmony_ci *energy = qenergy; 930cabdff1aSopenharmony_ci} 931cabdff1aSopenharmony_ci 932cabdff1aSopenharmony_cistatic void quantize_and_encode_band_cost_NONE_mips(struct AACEncContext *s, 933cabdff1aSopenharmony_ci PutBitContext *pb, const float *in, float *out, 934cabdff1aSopenharmony_ci const float *scaled, int size, int scale_idx, 935cabdff1aSopenharmony_ci int cb, const float lambda, const float uplim, 936cabdff1aSopenharmony_ci int *bits, float *energy, const float ROUNDING) { 937cabdff1aSopenharmony_ci av_assert0(0); 938cabdff1aSopenharmony_ci} 939cabdff1aSopenharmony_ci 940cabdff1aSopenharmony_cistatic void quantize_and_encode_band_cost_ZERO_mips(struct AACEncContext *s, 941cabdff1aSopenharmony_ci PutBitContext *pb, const float *in, float *out, 942cabdff1aSopenharmony_ci const float *scaled, int size, int scale_idx, 943cabdff1aSopenharmony_ci int cb, const float lambda, const float uplim, 944cabdff1aSopenharmony_ci int *bits, float *energy, const float ROUNDING) { 945cabdff1aSopenharmony_ci int i; 946cabdff1aSopenharmony_ci if (bits) 947cabdff1aSopenharmony_ci *bits = 0; 948cabdff1aSopenharmony_ci if (out) { 949cabdff1aSopenharmony_ci for (i = 0; i < size; i += 4) { 950cabdff1aSopenharmony_ci out[i ] = 0.0f; 951cabdff1aSopenharmony_ci out[i+1] = 0.0f; 952cabdff1aSopenharmony_ci out[i+2] = 0.0f; 953cabdff1aSopenharmony_ci out[i+3] = 0.0f; 954cabdff1aSopenharmony_ci } 955cabdff1aSopenharmony_ci } 956cabdff1aSopenharmony_ci if (energy) 957cabdff1aSopenharmony_ci *energy = 0.0f; 958cabdff1aSopenharmony_ci} 959cabdff1aSopenharmony_ci 960cabdff1aSopenharmony_cistatic void (*const quantize_and_encode_band_cost_arr[])(struct AACEncContext *s, 961cabdff1aSopenharmony_ci PutBitContext *pb, const float *in, float *out, 962cabdff1aSopenharmony_ci const float *scaled, int size, int scale_idx, 963cabdff1aSopenharmony_ci int cb, const float lambda, const float uplim, 964cabdff1aSopenharmony_ci int *bits, float *energy, const float ROUNDING) = { 965cabdff1aSopenharmony_ci quantize_and_encode_band_cost_ZERO_mips, 966cabdff1aSopenharmony_ci quantize_and_encode_band_cost_SQUAD_mips, 967cabdff1aSopenharmony_ci quantize_and_encode_band_cost_SQUAD_mips, 968cabdff1aSopenharmony_ci quantize_and_encode_band_cost_UQUAD_mips, 969cabdff1aSopenharmony_ci quantize_and_encode_band_cost_UQUAD_mips, 970cabdff1aSopenharmony_ci quantize_and_encode_band_cost_SPAIR_mips, 971cabdff1aSopenharmony_ci quantize_and_encode_band_cost_SPAIR_mips, 972cabdff1aSopenharmony_ci quantize_and_encode_band_cost_UPAIR7_mips, 973cabdff1aSopenharmony_ci quantize_and_encode_band_cost_UPAIR7_mips, 974cabdff1aSopenharmony_ci quantize_and_encode_band_cost_UPAIR12_mips, 975cabdff1aSopenharmony_ci quantize_and_encode_band_cost_UPAIR12_mips, 976cabdff1aSopenharmony_ci quantize_and_encode_band_cost_ESC_mips, 977cabdff1aSopenharmony_ci quantize_and_encode_band_cost_NONE_mips, /* cb 12 doesn't exist */ 978cabdff1aSopenharmony_ci quantize_and_encode_band_cost_ZERO_mips, 979cabdff1aSopenharmony_ci quantize_and_encode_band_cost_ZERO_mips, 980cabdff1aSopenharmony_ci quantize_and_encode_band_cost_ZERO_mips, 981cabdff1aSopenharmony_ci}; 982cabdff1aSopenharmony_ci 983cabdff1aSopenharmony_ci#define quantize_and_encode_band_cost( \ 984cabdff1aSopenharmony_ci s, pb, in, out, scaled, size, scale_idx, cb, \ 985cabdff1aSopenharmony_ci lambda, uplim, bits, energy, ROUNDING) \ 986cabdff1aSopenharmony_ci quantize_and_encode_band_cost_arr[cb]( \ 987cabdff1aSopenharmony_ci s, pb, in, out, scaled, size, scale_idx, cb, \ 988cabdff1aSopenharmony_ci lambda, uplim, bits, energy, ROUNDING) 989cabdff1aSopenharmony_ci 990cabdff1aSopenharmony_cistatic void quantize_and_encode_band_mips(struct AACEncContext *s, PutBitContext *pb, 991cabdff1aSopenharmony_ci const float *in, float *out, int size, int scale_idx, 992cabdff1aSopenharmony_ci int cb, const float lambda, int rtz) 993cabdff1aSopenharmony_ci{ 994cabdff1aSopenharmony_ci quantize_and_encode_band_cost(s, pb, in, out, NULL, size, scale_idx, cb, lambda, 995cabdff1aSopenharmony_ci INFINITY, NULL, NULL, (rtz) ? ROUND_TO_ZERO : ROUND_STANDARD); 996cabdff1aSopenharmony_ci} 997cabdff1aSopenharmony_ci 998cabdff1aSopenharmony_ci/** 999cabdff1aSopenharmony_ci * Functions developed from template function and optimized for getting the number of bits 1000cabdff1aSopenharmony_ci */ 1001cabdff1aSopenharmony_cistatic float get_band_numbits_ZERO_mips(struct AACEncContext *s, 1002cabdff1aSopenharmony_ci PutBitContext *pb, const float *in, 1003cabdff1aSopenharmony_ci const float *scaled, int size, int scale_idx, 1004cabdff1aSopenharmony_ci int cb, const float lambda, const float uplim, 1005cabdff1aSopenharmony_ci int *bits) 1006cabdff1aSopenharmony_ci{ 1007cabdff1aSopenharmony_ci return 0; 1008cabdff1aSopenharmony_ci} 1009cabdff1aSopenharmony_ci 1010cabdff1aSopenharmony_cistatic float get_band_numbits_NONE_mips(struct AACEncContext *s, 1011cabdff1aSopenharmony_ci PutBitContext *pb, const float *in, 1012cabdff1aSopenharmony_ci const float *scaled, int size, int scale_idx, 1013cabdff1aSopenharmony_ci int cb, const float lambda, const float uplim, 1014cabdff1aSopenharmony_ci int *bits) 1015cabdff1aSopenharmony_ci{ 1016cabdff1aSopenharmony_ci av_assert0(0); 1017cabdff1aSopenharmony_ci return 0; 1018cabdff1aSopenharmony_ci} 1019cabdff1aSopenharmony_ci 1020cabdff1aSopenharmony_cistatic float get_band_numbits_SQUAD_mips(struct AACEncContext *s, 1021cabdff1aSopenharmony_ci PutBitContext *pb, const float *in, 1022cabdff1aSopenharmony_ci const float *scaled, int size, int scale_idx, 1023cabdff1aSopenharmony_ci int cb, const float lambda, const float uplim, 1024cabdff1aSopenharmony_ci int *bits) 1025cabdff1aSopenharmony_ci{ 1026cabdff1aSopenharmony_ci const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512]; 1027cabdff1aSopenharmony_ci int i; 1028cabdff1aSopenharmony_ci int qc1, qc2, qc3, qc4; 1029cabdff1aSopenharmony_ci int curbits = 0; 1030cabdff1aSopenharmony_ci 1031cabdff1aSopenharmony_ci uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1]; 1032cabdff1aSopenharmony_ci 1033cabdff1aSopenharmony_ci for (i = 0; i < size; i += 4) { 1034cabdff1aSopenharmony_ci int curidx; 1035cabdff1aSopenharmony_ci int *in_int = (int *)&in[i]; 1036cabdff1aSopenharmony_ci int t0, t1, t2, t3, t4, t5, t6, t7; 1037cabdff1aSopenharmony_ci 1038cabdff1aSopenharmony_ci qc1 = scaled[i ] * Q34 + ROUND_STANDARD; 1039cabdff1aSopenharmony_ci qc2 = scaled[i+1] * Q34 + ROUND_STANDARD; 1040cabdff1aSopenharmony_ci qc3 = scaled[i+2] * Q34 + ROUND_STANDARD; 1041cabdff1aSopenharmony_ci qc4 = scaled[i+3] * Q34 + ROUND_STANDARD; 1042cabdff1aSopenharmony_ci 1043cabdff1aSopenharmony_ci __asm__ volatile ( 1044cabdff1aSopenharmony_ci ".set push \n\t" 1045cabdff1aSopenharmony_ci ".set noreorder \n\t" 1046cabdff1aSopenharmony_ci 1047cabdff1aSopenharmony_ci "slt %[qc1], $zero, %[qc1] \n\t" 1048cabdff1aSopenharmony_ci "slt %[qc2], $zero, %[qc2] \n\t" 1049cabdff1aSopenharmony_ci "slt %[qc3], $zero, %[qc3] \n\t" 1050cabdff1aSopenharmony_ci "slt %[qc4], $zero, %[qc4] \n\t" 1051cabdff1aSopenharmony_ci "lw %[t0], 0(%[in_int]) \n\t" 1052cabdff1aSopenharmony_ci "lw %[t1], 4(%[in_int]) \n\t" 1053cabdff1aSopenharmony_ci "lw %[t2], 8(%[in_int]) \n\t" 1054cabdff1aSopenharmony_ci "lw %[t3], 12(%[in_int]) \n\t" 1055cabdff1aSopenharmony_ci "srl %[t0], %[t0], 31 \n\t" 1056cabdff1aSopenharmony_ci "srl %[t1], %[t1], 31 \n\t" 1057cabdff1aSopenharmony_ci "srl %[t2], %[t2], 31 \n\t" 1058cabdff1aSopenharmony_ci "srl %[t3], %[t3], 31 \n\t" 1059cabdff1aSopenharmony_ci "subu %[t4], $zero, %[qc1] \n\t" 1060cabdff1aSopenharmony_ci "subu %[t5], $zero, %[qc2] \n\t" 1061cabdff1aSopenharmony_ci "subu %[t6], $zero, %[qc3] \n\t" 1062cabdff1aSopenharmony_ci "subu %[t7], $zero, %[qc4] \n\t" 1063cabdff1aSopenharmony_ci "movn %[qc1], %[t4], %[t0] \n\t" 1064cabdff1aSopenharmony_ci "movn %[qc2], %[t5], %[t1] \n\t" 1065cabdff1aSopenharmony_ci "movn %[qc3], %[t6], %[t2] \n\t" 1066cabdff1aSopenharmony_ci "movn %[qc4], %[t7], %[t3] \n\t" 1067cabdff1aSopenharmony_ci 1068cabdff1aSopenharmony_ci ".set pop \n\t" 1069cabdff1aSopenharmony_ci 1070cabdff1aSopenharmony_ci : [qc1]"+r"(qc1), [qc2]"+r"(qc2), 1071cabdff1aSopenharmony_ci [qc3]"+r"(qc3), [qc4]"+r"(qc4), 1072cabdff1aSopenharmony_ci [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3), 1073cabdff1aSopenharmony_ci [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7) 1074cabdff1aSopenharmony_ci : [in_int]"r"(in_int) 1075cabdff1aSopenharmony_ci : "memory" 1076cabdff1aSopenharmony_ci ); 1077cabdff1aSopenharmony_ci 1078cabdff1aSopenharmony_ci curidx = qc1; 1079cabdff1aSopenharmony_ci curidx *= 3; 1080cabdff1aSopenharmony_ci curidx += qc2; 1081cabdff1aSopenharmony_ci curidx *= 3; 1082cabdff1aSopenharmony_ci curidx += qc3; 1083cabdff1aSopenharmony_ci curidx *= 3; 1084cabdff1aSopenharmony_ci curidx += qc4; 1085cabdff1aSopenharmony_ci curidx += 40; 1086cabdff1aSopenharmony_ci 1087cabdff1aSopenharmony_ci curbits += p_bits[curidx]; 1088cabdff1aSopenharmony_ci } 1089cabdff1aSopenharmony_ci return curbits; 1090cabdff1aSopenharmony_ci} 1091cabdff1aSopenharmony_ci 1092cabdff1aSopenharmony_cistatic float get_band_numbits_UQUAD_mips(struct AACEncContext *s, 1093cabdff1aSopenharmony_ci PutBitContext *pb, const float *in, 1094cabdff1aSopenharmony_ci const float *scaled, int size, int scale_idx, 1095cabdff1aSopenharmony_ci int cb, const float lambda, const float uplim, 1096cabdff1aSopenharmony_ci int *bits) 1097cabdff1aSopenharmony_ci{ 1098cabdff1aSopenharmony_ci const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512]; 1099cabdff1aSopenharmony_ci int i; 1100cabdff1aSopenharmony_ci int curbits = 0; 1101cabdff1aSopenharmony_ci int qc1, qc2, qc3, qc4; 1102cabdff1aSopenharmony_ci 1103cabdff1aSopenharmony_ci uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1]; 1104cabdff1aSopenharmony_ci 1105cabdff1aSopenharmony_ci for (i = 0; i < size; i += 4) { 1106cabdff1aSopenharmony_ci int curidx; 1107cabdff1aSopenharmony_ci int t0, t1, t2, t3, t4; 1108cabdff1aSopenharmony_ci 1109cabdff1aSopenharmony_ci qc1 = scaled[i ] * Q34 + ROUND_STANDARD; 1110cabdff1aSopenharmony_ci qc2 = scaled[i+1] * Q34 + ROUND_STANDARD; 1111cabdff1aSopenharmony_ci qc3 = scaled[i+2] * Q34 + ROUND_STANDARD; 1112cabdff1aSopenharmony_ci qc4 = scaled[i+3] * Q34 + ROUND_STANDARD; 1113cabdff1aSopenharmony_ci 1114cabdff1aSopenharmony_ci __asm__ volatile ( 1115cabdff1aSopenharmony_ci ".set push \n\t" 1116cabdff1aSopenharmony_ci ".set noreorder \n\t" 1117cabdff1aSopenharmony_ci 1118cabdff1aSopenharmony_ci "ori %[t4], $zero, 2 \n\t" 1119cabdff1aSopenharmony_ci "slt %[t0], %[t4], %[qc1] \n\t" 1120cabdff1aSopenharmony_ci "slt %[t1], %[t4], %[qc2] \n\t" 1121cabdff1aSopenharmony_ci "slt %[t2], %[t4], %[qc3] \n\t" 1122cabdff1aSopenharmony_ci "slt %[t3], %[t4], %[qc4] \n\t" 1123cabdff1aSopenharmony_ci "movn %[qc1], %[t4], %[t0] \n\t" 1124cabdff1aSopenharmony_ci "movn %[qc2], %[t4], %[t1] \n\t" 1125cabdff1aSopenharmony_ci "movn %[qc3], %[t4], %[t2] \n\t" 1126cabdff1aSopenharmony_ci "movn %[qc4], %[t4], %[t3] \n\t" 1127cabdff1aSopenharmony_ci 1128cabdff1aSopenharmony_ci ".set pop \n\t" 1129cabdff1aSopenharmony_ci 1130cabdff1aSopenharmony_ci : [qc1]"+r"(qc1), [qc2]"+r"(qc2), 1131cabdff1aSopenharmony_ci [qc3]"+r"(qc3), [qc4]"+r"(qc4), 1132cabdff1aSopenharmony_ci [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3), 1133cabdff1aSopenharmony_ci [t4]"=&r"(t4) 1134cabdff1aSopenharmony_ci ); 1135cabdff1aSopenharmony_ci 1136cabdff1aSopenharmony_ci curidx = qc1; 1137cabdff1aSopenharmony_ci curidx *= 3; 1138cabdff1aSopenharmony_ci curidx += qc2; 1139cabdff1aSopenharmony_ci curidx *= 3; 1140cabdff1aSopenharmony_ci curidx += qc3; 1141cabdff1aSopenharmony_ci curidx *= 3; 1142cabdff1aSopenharmony_ci curidx += qc4; 1143cabdff1aSopenharmony_ci 1144cabdff1aSopenharmony_ci curbits += p_bits[curidx]; 1145cabdff1aSopenharmony_ci curbits += uquad_sign_bits[curidx]; 1146cabdff1aSopenharmony_ci } 1147cabdff1aSopenharmony_ci return curbits; 1148cabdff1aSopenharmony_ci} 1149cabdff1aSopenharmony_ci 1150cabdff1aSopenharmony_cistatic float get_band_numbits_SPAIR_mips(struct AACEncContext *s, 1151cabdff1aSopenharmony_ci PutBitContext *pb, const float *in, 1152cabdff1aSopenharmony_ci const float *scaled, int size, int scale_idx, 1153cabdff1aSopenharmony_ci int cb, const float lambda, const float uplim, 1154cabdff1aSopenharmony_ci int *bits) 1155cabdff1aSopenharmony_ci{ 1156cabdff1aSopenharmony_ci const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512]; 1157cabdff1aSopenharmony_ci int i; 1158cabdff1aSopenharmony_ci int qc1, qc2, qc3, qc4; 1159cabdff1aSopenharmony_ci int curbits = 0; 1160cabdff1aSopenharmony_ci 1161cabdff1aSopenharmony_ci uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1]; 1162cabdff1aSopenharmony_ci 1163cabdff1aSopenharmony_ci for (i = 0; i < size; i += 4) { 1164cabdff1aSopenharmony_ci int curidx, curidx2; 1165cabdff1aSopenharmony_ci int *in_int = (int *)&in[i]; 1166cabdff1aSopenharmony_ci int t0, t1, t2, t3, t4, t5, t6, t7; 1167cabdff1aSopenharmony_ci 1168cabdff1aSopenharmony_ci qc1 = scaled[i ] * Q34 + ROUND_STANDARD; 1169cabdff1aSopenharmony_ci qc2 = scaled[i+1] * Q34 + ROUND_STANDARD; 1170cabdff1aSopenharmony_ci qc3 = scaled[i+2] * Q34 + ROUND_STANDARD; 1171cabdff1aSopenharmony_ci qc4 = scaled[i+3] * Q34 + ROUND_STANDARD; 1172cabdff1aSopenharmony_ci 1173cabdff1aSopenharmony_ci __asm__ volatile ( 1174cabdff1aSopenharmony_ci ".set push \n\t" 1175cabdff1aSopenharmony_ci ".set noreorder \n\t" 1176cabdff1aSopenharmony_ci 1177cabdff1aSopenharmony_ci "ori %[t4], $zero, 4 \n\t" 1178cabdff1aSopenharmony_ci "slt %[t0], %[t4], %[qc1] \n\t" 1179cabdff1aSopenharmony_ci "slt %[t1], %[t4], %[qc2] \n\t" 1180cabdff1aSopenharmony_ci "slt %[t2], %[t4], %[qc3] \n\t" 1181cabdff1aSopenharmony_ci "slt %[t3], %[t4], %[qc4] \n\t" 1182cabdff1aSopenharmony_ci "movn %[qc1], %[t4], %[t0] \n\t" 1183cabdff1aSopenharmony_ci "movn %[qc2], %[t4], %[t1] \n\t" 1184cabdff1aSopenharmony_ci "movn %[qc3], %[t4], %[t2] \n\t" 1185cabdff1aSopenharmony_ci "movn %[qc4], %[t4], %[t3] \n\t" 1186cabdff1aSopenharmony_ci "lw %[t0], 0(%[in_int]) \n\t" 1187cabdff1aSopenharmony_ci "lw %[t1], 4(%[in_int]) \n\t" 1188cabdff1aSopenharmony_ci "lw %[t2], 8(%[in_int]) \n\t" 1189cabdff1aSopenharmony_ci "lw %[t3], 12(%[in_int]) \n\t" 1190cabdff1aSopenharmony_ci "srl %[t0], %[t0], 31 \n\t" 1191cabdff1aSopenharmony_ci "srl %[t1], %[t1], 31 \n\t" 1192cabdff1aSopenharmony_ci "srl %[t2], %[t2], 31 \n\t" 1193cabdff1aSopenharmony_ci "srl %[t3], %[t3], 31 \n\t" 1194cabdff1aSopenharmony_ci "subu %[t4], $zero, %[qc1] \n\t" 1195cabdff1aSopenharmony_ci "subu %[t5], $zero, %[qc2] \n\t" 1196cabdff1aSopenharmony_ci "subu %[t6], $zero, %[qc3] \n\t" 1197cabdff1aSopenharmony_ci "subu %[t7], $zero, %[qc4] \n\t" 1198cabdff1aSopenharmony_ci "movn %[qc1], %[t4], %[t0] \n\t" 1199cabdff1aSopenharmony_ci "movn %[qc2], %[t5], %[t1] \n\t" 1200cabdff1aSopenharmony_ci "movn %[qc3], %[t6], %[t2] \n\t" 1201cabdff1aSopenharmony_ci "movn %[qc4], %[t7], %[t3] \n\t" 1202cabdff1aSopenharmony_ci 1203cabdff1aSopenharmony_ci ".set pop \n\t" 1204cabdff1aSopenharmony_ci 1205cabdff1aSopenharmony_ci : [qc1]"+r"(qc1), [qc2]"+r"(qc2), 1206cabdff1aSopenharmony_ci [qc3]"+r"(qc3), [qc4]"+r"(qc4), 1207cabdff1aSopenharmony_ci [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3), 1208cabdff1aSopenharmony_ci [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7) 1209cabdff1aSopenharmony_ci : [in_int]"r"(in_int) 1210cabdff1aSopenharmony_ci : "memory" 1211cabdff1aSopenharmony_ci ); 1212cabdff1aSopenharmony_ci 1213cabdff1aSopenharmony_ci curidx = 9 * qc1; 1214cabdff1aSopenharmony_ci curidx += qc2 + 40; 1215cabdff1aSopenharmony_ci 1216cabdff1aSopenharmony_ci curidx2 = 9 * qc3; 1217cabdff1aSopenharmony_ci curidx2 += qc4 + 40; 1218cabdff1aSopenharmony_ci 1219cabdff1aSopenharmony_ci curbits += p_bits[curidx] + p_bits[curidx2]; 1220cabdff1aSopenharmony_ci } 1221cabdff1aSopenharmony_ci return curbits; 1222cabdff1aSopenharmony_ci} 1223cabdff1aSopenharmony_ci 1224cabdff1aSopenharmony_cistatic float get_band_numbits_UPAIR7_mips(struct AACEncContext *s, 1225cabdff1aSopenharmony_ci PutBitContext *pb, const float *in, 1226cabdff1aSopenharmony_ci const float *scaled, int size, int scale_idx, 1227cabdff1aSopenharmony_ci int cb, const float lambda, const float uplim, 1228cabdff1aSopenharmony_ci int *bits) 1229cabdff1aSopenharmony_ci{ 1230cabdff1aSopenharmony_ci const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512]; 1231cabdff1aSopenharmony_ci int i; 1232cabdff1aSopenharmony_ci int qc1, qc2, qc3, qc4; 1233cabdff1aSopenharmony_ci int curbits = 0; 1234cabdff1aSopenharmony_ci 1235cabdff1aSopenharmony_ci uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1]; 1236cabdff1aSopenharmony_ci 1237cabdff1aSopenharmony_ci for (i = 0; i < size; i += 4) { 1238cabdff1aSopenharmony_ci int curidx, curidx2; 1239cabdff1aSopenharmony_ci int t0, t1, t2, t3, t4; 1240cabdff1aSopenharmony_ci 1241cabdff1aSopenharmony_ci qc1 = scaled[i ] * Q34 + ROUND_STANDARD; 1242cabdff1aSopenharmony_ci qc2 = scaled[i+1] * Q34 + ROUND_STANDARD; 1243cabdff1aSopenharmony_ci qc3 = scaled[i+2] * Q34 + ROUND_STANDARD; 1244cabdff1aSopenharmony_ci qc4 = scaled[i+3] * Q34 + ROUND_STANDARD; 1245cabdff1aSopenharmony_ci 1246cabdff1aSopenharmony_ci __asm__ volatile ( 1247cabdff1aSopenharmony_ci ".set push \n\t" 1248cabdff1aSopenharmony_ci ".set noreorder \n\t" 1249cabdff1aSopenharmony_ci 1250cabdff1aSopenharmony_ci "ori %[t4], $zero, 7 \n\t" 1251cabdff1aSopenharmony_ci "slt %[t0], %[t4], %[qc1] \n\t" 1252cabdff1aSopenharmony_ci "slt %[t1], %[t4], %[qc2] \n\t" 1253cabdff1aSopenharmony_ci "slt %[t2], %[t4], %[qc3] \n\t" 1254cabdff1aSopenharmony_ci "slt %[t3], %[t4], %[qc4] \n\t" 1255cabdff1aSopenharmony_ci "movn %[qc1], %[t4], %[t0] \n\t" 1256cabdff1aSopenharmony_ci "movn %[qc2], %[t4], %[t1] \n\t" 1257cabdff1aSopenharmony_ci "movn %[qc3], %[t4], %[t2] \n\t" 1258cabdff1aSopenharmony_ci "movn %[qc4], %[t4], %[t3] \n\t" 1259cabdff1aSopenharmony_ci 1260cabdff1aSopenharmony_ci ".set pop \n\t" 1261cabdff1aSopenharmony_ci 1262cabdff1aSopenharmony_ci : [qc1]"+r"(qc1), [qc2]"+r"(qc2), 1263cabdff1aSopenharmony_ci [qc3]"+r"(qc3), [qc4]"+r"(qc4), 1264cabdff1aSopenharmony_ci [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3), 1265cabdff1aSopenharmony_ci [t4]"=&r"(t4) 1266cabdff1aSopenharmony_ci ); 1267cabdff1aSopenharmony_ci 1268cabdff1aSopenharmony_ci curidx = 8 * qc1; 1269cabdff1aSopenharmony_ci curidx += qc2; 1270cabdff1aSopenharmony_ci 1271cabdff1aSopenharmony_ci curidx2 = 8 * qc3; 1272cabdff1aSopenharmony_ci curidx2 += qc4; 1273cabdff1aSopenharmony_ci 1274cabdff1aSopenharmony_ci curbits += p_bits[curidx] + 1275cabdff1aSopenharmony_ci upair7_sign_bits[curidx] + 1276cabdff1aSopenharmony_ci p_bits[curidx2] + 1277cabdff1aSopenharmony_ci upair7_sign_bits[curidx2]; 1278cabdff1aSopenharmony_ci } 1279cabdff1aSopenharmony_ci return curbits; 1280cabdff1aSopenharmony_ci} 1281cabdff1aSopenharmony_ci 1282cabdff1aSopenharmony_cistatic float get_band_numbits_UPAIR12_mips(struct AACEncContext *s, 1283cabdff1aSopenharmony_ci PutBitContext *pb, const float *in, 1284cabdff1aSopenharmony_ci const float *scaled, int size, int scale_idx, 1285cabdff1aSopenharmony_ci int cb, const float lambda, const float uplim, 1286cabdff1aSopenharmony_ci int *bits) 1287cabdff1aSopenharmony_ci{ 1288cabdff1aSopenharmony_ci const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512]; 1289cabdff1aSopenharmony_ci int i; 1290cabdff1aSopenharmony_ci int qc1, qc2, qc3, qc4; 1291cabdff1aSopenharmony_ci int curbits = 0; 1292cabdff1aSopenharmony_ci 1293cabdff1aSopenharmony_ci uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1]; 1294cabdff1aSopenharmony_ci 1295cabdff1aSopenharmony_ci for (i = 0; i < size; i += 4) { 1296cabdff1aSopenharmony_ci int curidx, curidx2; 1297cabdff1aSopenharmony_ci int t0, t1, t2, t3, t4; 1298cabdff1aSopenharmony_ci 1299cabdff1aSopenharmony_ci qc1 = scaled[i ] * Q34 + ROUND_STANDARD; 1300cabdff1aSopenharmony_ci qc2 = scaled[i+1] * Q34 + ROUND_STANDARD; 1301cabdff1aSopenharmony_ci qc3 = scaled[i+2] * Q34 + ROUND_STANDARD; 1302cabdff1aSopenharmony_ci qc4 = scaled[i+3] * Q34 + ROUND_STANDARD; 1303cabdff1aSopenharmony_ci 1304cabdff1aSopenharmony_ci __asm__ volatile ( 1305cabdff1aSopenharmony_ci ".set push \n\t" 1306cabdff1aSopenharmony_ci ".set noreorder \n\t" 1307cabdff1aSopenharmony_ci 1308cabdff1aSopenharmony_ci "ori %[t4], $zero, 12 \n\t" 1309cabdff1aSopenharmony_ci "slt %[t0], %[t4], %[qc1] \n\t" 1310cabdff1aSopenharmony_ci "slt %[t1], %[t4], %[qc2] \n\t" 1311cabdff1aSopenharmony_ci "slt %[t2], %[t4], %[qc3] \n\t" 1312cabdff1aSopenharmony_ci "slt %[t3], %[t4], %[qc4] \n\t" 1313cabdff1aSopenharmony_ci "movn %[qc1], %[t4], %[t0] \n\t" 1314cabdff1aSopenharmony_ci "movn %[qc2], %[t4], %[t1] \n\t" 1315cabdff1aSopenharmony_ci "movn %[qc3], %[t4], %[t2] \n\t" 1316cabdff1aSopenharmony_ci "movn %[qc4], %[t4], %[t3] \n\t" 1317cabdff1aSopenharmony_ci 1318cabdff1aSopenharmony_ci ".set pop \n\t" 1319cabdff1aSopenharmony_ci 1320cabdff1aSopenharmony_ci : [qc1]"+r"(qc1), [qc2]"+r"(qc2), 1321cabdff1aSopenharmony_ci [qc3]"+r"(qc3), [qc4]"+r"(qc4), 1322cabdff1aSopenharmony_ci [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3), 1323cabdff1aSopenharmony_ci [t4]"=&r"(t4) 1324cabdff1aSopenharmony_ci ); 1325cabdff1aSopenharmony_ci 1326cabdff1aSopenharmony_ci curidx = 13 * qc1; 1327cabdff1aSopenharmony_ci curidx += qc2; 1328cabdff1aSopenharmony_ci 1329cabdff1aSopenharmony_ci curidx2 = 13 * qc3; 1330cabdff1aSopenharmony_ci curidx2 += qc4; 1331cabdff1aSopenharmony_ci 1332cabdff1aSopenharmony_ci curbits += p_bits[curidx] + 1333cabdff1aSopenharmony_ci p_bits[curidx2] + 1334cabdff1aSopenharmony_ci upair12_sign_bits[curidx] + 1335cabdff1aSopenharmony_ci upair12_sign_bits[curidx2]; 1336cabdff1aSopenharmony_ci } 1337cabdff1aSopenharmony_ci return curbits; 1338cabdff1aSopenharmony_ci} 1339cabdff1aSopenharmony_ci 1340cabdff1aSopenharmony_cistatic float get_band_numbits_ESC_mips(struct AACEncContext *s, 1341cabdff1aSopenharmony_ci PutBitContext *pb, const float *in, 1342cabdff1aSopenharmony_ci const float *scaled, int size, int scale_idx, 1343cabdff1aSopenharmony_ci int cb, const float lambda, const float uplim, 1344cabdff1aSopenharmony_ci int *bits) 1345cabdff1aSopenharmony_ci{ 1346cabdff1aSopenharmony_ci const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512]; 1347cabdff1aSopenharmony_ci int i; 1348cabdff1aSopenharmony_ci int qc1, qc2, qc3, qc4; 1349cabdff1aSopenharmony_ci int curbits = 0; 1350cabdff1aSopenharmony_ci 1351cabdff1aSopenharmony_ci uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1]; 1352cabdff1aSopenharmony_ci 1353cabdff1aSopenharmony_ci for (i = 0; i < size; i += 4) { 1354cabdff1aSopenharmony_ci int curidx, curidx2; 1355cabdff1aSopenharmony_ci int cond0, cond1, cond2, cond3; 1356cabdff1aSopenharmony_ci int c1, c2, c3, c4; 1357cabdff1aSopenharmony_ci int t4, t5; 1358cabdff1aSopenharmony_ci 1359cabdff1aSopenharmony_ci qc1 = scaled[i ] * Q34 + ROUND_STANDARD; 1360cabdff1aSopenharmony_ci qc2 = scaled[i+1] * Q34 + ROUND_STANDARD; 1361cabdff1aSopenharmony_ci qc3 = scaled[i+2] * Q34 + ROUND_STANDARD; 1362cabdff1aSopenharmony_ci qc4 = scaled[i+3] * Q34 + ROUND_STANDARD; 1363cabdff1aSopenharmony_ci 1364cabdff1aSopenharmony_ci __asm__ volatile ( 1365cabdff1aSopenharmony_ci ".set push \n\t" 1366cabdff1aSopenharmony_ci ".set noreorder \n\t" 1367cabdff1aSopenharmony_ci 1368cabdff1aSopenharmony_ci "ori %[t4], $zero, 15 \n\t" 1369cabdff1aSopenharmony_ci "ori %[t5], $zero, 16 \n\t" 1370cabdff1aSopenharmony_ci "shll_s.w %[c1], %[qc1], 18 \n\t" 1371cabdff1aSopenharmony_ci "shll_s.w %[c2], %[qc2], 18 \n\t" 1372cabdff1aSopenharmony_ci "shll_s.w %[c3], %[qc3], 18 \n\t" 1373cabdff1aSopenharmony_ci "shll_s.w %[c4], %[qc4], 18 \n\t" 1374cabdff1aSopenharmony_ci "srl %[c1], %[c1], 18 \n\t" 1375cabdff1aSopenharmony_ci "srl %[c2], %[c2], 18 \n\t" 1376cabdff1aSopenharmony_ci "srl %[c3], %[c3], 18 \n\t" 1377cabdff1aSopenharmony_ci "srl %[c4], %[c4], 18 \n\t" 1378cabdff1aSopenharmony_ci "slt %[cond0], %[t4], %[qc1] \n\t" 1379cabdff1aSopenharmony_ci "slt %[cond1], %[t4], %[qc2] \n\t" 1380cabdff1aSopenharmony_ci "slt %[cond2], %[t4], %[qc3] \n\t" 1381cabdff1aSopenharmony_ci "slt %[cond3], %[t4], %[qc4] \n\t" 1382cabdff1aSopenharmony_ci "movn %[qc1], %[t5], %[cond0] \n\t" 1383cabdff1aSopenharmony_ci "movn %[qc2], %[t5], %[cond1] \n\t" 1384cabdff1aSopenharmony_ci "movn %[qc3], %[t5], %[cond2] \n\t" 1385cabdff1aSopenharmony_ci "movn %[qc4], %[t5], %[cond3] \n\t" 1386cabdff1aSopenharmony_ci "ori %[t5], $zero, 31 \n\t" 1387cabdff1aSopenharmony_ci "clz %[c1], %[c1] \n\t" 1388cabdff1aSopenharmony_ci "clz %[c2], %[c2] \n\t" 1389cabdff1aSopenharmony_ci "clz %[c3], %[c3] \n\t" 1390cabdff1aSopenharmony_ci "clz %[c4], %[c4] \n\t" 1391cabdff1aSopenharmony_ci "subu %[c1], %[t5], %[c1] \n\t" 1392cabdff1aSopenharmony_ci "subu %[c2], %[t5], %[c2] \n\t" 1393cabdff1aSopenharmony_ci "subu %[c3], %[t5], %[c3] \n\t" 1394cabdff1aSopenharmony_ci "subu %[c4], %[t5], %[c4] \n\t" 1395cabdff1aSopenharmony_ci "sll %[c1], %[c1], 1 \n\t" 1396cabdff1aSopenharmony_ci "sll %[c2], %[c2], 1 \n\t" 1397cabdff1aSopenharmony_ci "sll %[c3], %[c3], 1 \n\t" 1398cabdff1aSopenharmony_ci "sll %[c4], %[c4], 1 \n\t" 1399cabdff1aSopenharmony_ci "addiu %[c1], %[c1], -3 \n\t" 1400cabdff1aSopenharmony_ci "addiu %[c2], %[c2], -3 \n\t" 1401cabdff1aSopenharmony_ci "addiu %[c3], %[c3], -3 \n\t" 1402cabdff1aSopenharmony_ci "addiu %[c4], %[c4], -3 \n\t" 1403cabdff1aSopenharmony_ci "subu %[cond0], $zero, %[cond0] \n\t" 1404cabdff1aSopenharmony_ci "subu %[cond1], $zero, %[cond1] \n\t" 1405cabdff1aSopenharmony_ci "subu %[cond2], $zero, %[cond2] \n\t" 1406cabdff1aSopenharmony_ci "subu %[cond3], $zero, %[cond3] \n\t" 1407cabdff1aSopenharmony_ci "and %[c1], %[c1], %[cond0] \n\t" 1408cabdff1aSopenharmony_ci "and %[c2], %[c2], %[cond1] \n\t" 1409cabdff1aSopenharmony_ci "and %[c3], %[c3], %[cond2] \n\t" 1410cabdff1aSopenharmony_ci "and %[c4], %[c4], %[cond3] \n\t" 1411cabdff1aSopenharmony_ci 1412cabdff1aSopenharmony_ci ".set pop \n\t" 1413cabdff1aSopenharmony_ci 1414cabdff1aSopenharmony_ci : [qc1]"+r"(qc1), [qc2]"+r"(qc2), 1415cabdff1aSopenharmony_ci [qc3]"+r"(qc3), [qc4]"+r"(qc4), 1416cabdff1aSopenharmony_ci [cond0]"=&r"(cond0), [cond1]"=&r"(cond1), 1417cabdff1aSopenharmony_ci [cond2]"=&r"(cond2), [cond3]"=&r"(cond3), 1418cabdff1aSopenharmony_ci [c1]"=&r"(c1), [c2]"=&r"(c2), 1419cabdff1aSopenharmony_ci [c3]"=&r"(c3), [c4]"=&r"(c4), 1420cabdff1aSopenharmony_ci [t4]"=&r"(t4), [t5]"=&r"(t5) 1421cabdff1aSopenharmony_ci ); 1422cabdff1aSopenharmony_ci 1423cabdff1aSopenharmony_ci curidx = 17 * qc1; 1424cabdff1aSopenharmony_ci curidx += qc2; 1425cabdff1aSopenharmony_ci 1426cabdff1aSopenharmony_ci curidx2 = 17 * qc3; 1427cabdff1aSopenharmony_ci curidx2 += qc4; 1428cabdff1aSopenharmony_ci 1429cabdff1aSopenharmony_ci curbits += p_bits[curidx]; 1430cabdff1aSopenharmony_ci curbits += esc_sign_bits[curidx]; 1431cabdff1aSopenharmony_ci curbits += p_bits[curidx2]; 1432cabdff1aSopenharmony_ci curbits += esc_sign_bits[curidx2]; 1433cabdff1aSopenharmony_ci 1434cabdff1aSopenharmony_ci curbits += c1; 1435cabdff1aSopenharmony_ci curbits += c2; 1436cabdff1aSopenharmony_ci curbits += c3; 1437cabdff1aSopenharmony_ci curbits += c4; 1438cabdff1aSopenharmony_ci } 1439cabdff1aSopenharmony_ci return curbits; 1440cabdff1aSopenharmony_ci} 1441cabdff1aSopenharmony_ci 1442cabdff1aSopenharmony_cistatic float (*const get_band_numbits_arr[])(struct AACEncContext *s, 1443cabdff1aSopenharmony_ci PutBitContext *pb, const float *in, 1444cabdff1aSopenharmony_ci const float *scaled, int size, int scale_idx, 1445cabdff1aSopenharmony_ci int cb, const float lambda, const float uplim, 1446cabdff1aSopenharmony_ci int *bits) = { 1447cabdff1aSopenharmony_ci get_band_numbits_ZERO_mips, 1448cabdff1aSopenharmony_ci get_band_numbits_SQUAD_mips, 1449cabdff1aSopenharmony_ci get_band_numbits_SQUAD_mips, 1450cabdff1aSopenharmony_ci get_band_numbits_UQUAD_mips, 1451cabdff1aSopenharmony_ci get_band_numbits_UQUAD_mips, 1452cabdff1aSopenharmony_ci get_band_numbits_SPAIR_mips, 1453cabdff1aSopenharmony_ci get_band_numbits_SPAIR_mips, 1454cabdff1aSopenharmony_ci get_band_numbits_UPAIR7_mips, 1455cabdff1aSopenharmony_ci get_band_numbits_UPAIR7_mips, 1456cabdff1aSopenharmony_ci get_band_numbits_UPAIR12_mips, 1457cabdff1aSopenharmony_ci get_band_numbits_UPAIR12_mips, 1458cabdff1aSopenharmony_ci get_band_numbits_ESC_mips, 1459cabdff1aSopenharmony_ci get_band_numbits_NONE_mips, /* cb 12 doesn't exist */ 1460cabdff1aSopenharmony_ci get_band_numbits_ZERO_mips, 1461cabdff1aSopenharmony_ci get_band_numbits_ZERO_mips, 1462cabdff1aSopenharmony_ci get_band_numbits_ZERO_mips, 1463cabdff1aSopenharmony_ci}; 1464cabdff1aSopenharmony_ci 1465cabdff1aSopenharmony_ci#define get_band_numbits( \ 1466cabdff1aSopenharmony_ci s, pb, in, scaled, size, scale_idx, cb, \ 1467cabdff1aSopenharmony_ci lambda, uplim, bits) \ 1468cabdff1aSopenharmony_ci get_band_numbits_arr[cb]( \ 1469cabdff1aSopenharmony_ci s, pb, in, scaled, size, scale_idx, cb, \ 1470cabdff1aSopenharmony_ci lambda, uplim, bits) 1471cabdff1aSopenharmony_ci 1472cabdff1aSopenharmony_cistatic float quantize_band_cost_bits(struct AACEncContext *s, const float *in, 1473cabdff1aSopenharmony_ci const float *scaled, int size, int scale_idx, 1474cabdff1aSopenharmony_ci int cb, const float lambda, const float uplim, 1475cabdff1aSopenharmony_ci int *bits, float *energy, int rtz) 1476cabdff1aSopenharmony_ci{ 1477cabdff1aSopenharmony_ci return get_band_numbits(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits); 1478cabdff1aSopenharmony_ci} 1479cabdff1aSopenharmony_ci 1480cabdff1aSopenharmony_ci/** 1481cabdff1aSopenharmony_ci * Functions developed from template function and optimized for getting the band cost 1482cabdff1aSopenharmony_ci */ 1483cabdff1aSopenharmony_ci#if HAVE_MIPSFPU 1484cabdff1aSopenharmony_cistatic float get_band_cost_ZERO_mips(struct AACEncContext *s, 1485cabdff1aSopenharmony_ci PutBitContext *pb, const float *in, 1486cabdff1aSopenharmony_ci const float *scaled, int size, int scale_idx, 1487cabdff1aSopenharmony_ci int cb, const float lambda, const float uplim, 1488cabdff1aSopenharmony_ci int *bits, float *energy) 1489cabdff1aSopenharmony_ci{ 1490cabdff1aSopenharmony_ci int i; 1491cabdff1aSopenharmony_ci float cost = 0; 1492cabdff1aSopenharmony_ci 1493cabdff1aSopenharmony_ci for (i = 0; i < size; i += 4) { 1494cabdff1aSopenharmony_ci cost += in[i ] * in[i ]; 1495cabdff1aSopenharmony_ci cost += in[i+1] * in[i+1]; 1496cabdff1aSopenharmony_ci cost += in[i+2] * in[i+2]; 1497cabdff1aSopenharmony_ci cost += in[i+3] * in[i+3]; 1498cabdff1aSopenharmony_ci } 1499cabdff1aSopenharmony_ci if (bits) 1500cabdff1aSopenharmony_ci *bits = 0; 1501cabdff1aSopenharmony_ci if (energy) 1502cabdff1aSopenharmony_ci *energy = 0.0f; 1503cabdff1aSopenharmony_ci return cost * lambda; 1504cabdff1aSopenharmony_ci} 1505cabdff1aSopenharmony_ci 1506cabdff1aSopenharmony_cistatic float get_band_cost_NONE_mips(struct AACEncContext *s, 1507cabdff1aSopenharmony_ci PutBitContext *pb, const float *in, 1508cabdff1aSopenharmony_ci const float *scaled, int size, int scale_idx, 1509cabdff1aSopenharmony_ci int cb, const float lambda, const float uplim, 1510cabdff1aSopenharmony_ci int *bits, float *energy) 1511cabdff1aSopenharmony_ci{ 1512cabdff1aSopenharmony_ci av_assert0(0); 1513cabdff1aSopenharmony_ci return 0; 1514cabdff1aSopenharmony_ci} 1515cabdff1aSopenharmony_ci 1516cabdff1aSopenharmony_cistatic float get_band_cost_SQUAD_mips(struct AACEncContext *s, 1517cabdff1aSopenharmony_ci PutBitContext *pb, const float *in, 1518cabdff1aSopenharmony_ci const float *scaled, int size, int scale_idx, 1519cabdff1aSopenharmony_ci int cb, const float lambda, const float uplim, 1520cabdff1aSopenharmony_ci int *bits, float *energy) 1521cabdff1aSopenharmony_ci{ 1522cabdff1aSopenharmony_ci const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512]; 1523cabdff1aSopenharmony_ci const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512]; 1524cabdff1aSopenharmony_ci int i; 1525cabdff1aSopenharmony_ci float cost = 0; 1526cabdff1aSopenharmony_ci float qenergy = 0.0f; 1527cabdff1aSopenharmony_ci int qc1, qc2, qc3, qc4; 1528cabdff1aSopenharmony_ci int curbits = 0; 1529cabdff1aSopenharmony_ci 1530cabdff1aSopenharmony_ci uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1]; 1531cabdff1aSopenharmony_ci float *p_codes = (float *)ff_aac_codebook_vectors[cb-1]; 1532cabdff1aSopenharmony_ci 1533cabdff1aSopenharmony_ci for (i = 0; i < size; i += 4) { 1534cabdff1aSopenharmony_ci const float *vec; 1535cabdff1aSopenharmony_ci int curidx; 1536cabdff1aSopenharmony_ci int *in_int = (int *)&in[i]; 1537cabdff1aSopenharmony_ci float *in_pos = (float *)&in[i]; 1538cabdff1aSopenharmony_ci float di0, di1, di2, di3; 1539cabdff1aSopenharmony_ci int t0, t1, t2, t3, t4, t5, t6, t7; 1540cabdff1aSopenharmony_ci 1541cabdff1aSopenharmony_ci qc1 = scaled[i ] * Q34 + ROUND_STANDARD; 1542cabdff1aSopenharmony_ci qc2 = scaled[i+1] * Q34 + ROUND_STANDARD; 1543cabdff1aSopenharmony_ci qc3 = scaled[i+2] * Q34 + ROUND_STANDARD; 1544cabdff1aSopenharmony_ci qc4 = scaled[i+3] * Q34 + ROUND_STANDARD; 1545cabdff1aSopenharmony_ci 1546cabdff1aSopenharmony_ci __asm__ volatile ( 1547cabdff1aSopenharmony_ci ".set push \n\t" 1548cabdff1aSopenharmony_ci ".set noreorder \n\t" 1549cabdff1aSopenharmony_ci 1550cabdff1aSopenharmony_ci "slt %[qc1], $zero, %[qc1] \n\t" 1551cabdff1aSopenharmony_ci "slt %[qc2], $zero, %[qc2] \n\t" 1552cabdff1aSopenharmony_ci "slt %[qc3], $zero, %[qc3] \n\t" 1553cabdff1aSopenharmony_ci "slt %[qc4], $zero, %[qc4] \n\t" 1554cabdff1aSopenharmony_ci "lw %[t0], 0(%[in_int]) \n\t" 1555cabdff1aSopenharmony_ci "lw %[t1], 4(%[in_int]) \n\t" 1556cabdff1aSopenharmony_ci "lw %[t2], 8(%[in_int]) \n\t" 1557cabdff1aSopenharmony_ci "lw %[t3], 12(%[in_int]) \n\t" 1558cabdff1aSopenharmony_ci "srl %[t0], %[t0], 31 \n\t" 1559cabdff1aSopenharmony_ci "srl %[t1], %[t1], 31 \n\t" 1560cabdff1aSopenharmony_ci "srl %[t2], %[t2], 31 \n\t" 1561cabdff1aSopenharmony_ci "srl %[t3], %[t3], 31 \n\t" 1562cabdff1aSopenharmony_ci "subu %[t4], $zero, %[qc1] \n\t" 1563cabdff1aSopenharmony_ci "subu %[t5], $zero, %[qc2] \n\t" 1564cabdff1aSopenharmony_ci "subu %[t6], $zero, %[qc3] \n\t" 1565cabdff1aSopenharmony_ci "subu %[t7], $zero, %[qc4] \n\t" 1566cabdff1aSopenharmony_ci "movn %[qc1], %[t4], %[t0] \n\t" 1567cabdff1aSopenharmony_ci "movn %[qc2], %[t5], %[t1] \n\t" 1568cabdff1aSopenharmony_ci "movn %[qc3], %[t6], %[t2] \n\t" 1569cabdff1aSopenharmony_ci "movn %[qc4], %[t7], %[t3] \n\t" 1570cabdff1aSopenharmony_ci 1571cabdff1aSopenharmony_ci ".set pop \n\t" 1572cabdff1aSopenharmony_ci 1573cabdff1aSopenharmony_ci : [qc1]"+r"(qc1), [qc2]"+r"(qc2), 1574cabdff1aSopenharmony_ci [qc3]"+r"(qc3), [qc4]"+r"(qc4), 1575cabdff1aSopenharmony_ci [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3), 1576cabdff1aSopenharmony_ci [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7) 1577cabdff1aSopenharmony_ci : [in_int]"r"(in_int) 1578cabdff1aSopenharmony_ci : "memory" 1579cabdff1aSopenharmony_ci ); 1580cabdff1aSopenharmony_ci 1581cabdff1aSopenharmony_ci curidx = qc1; 1582cabdff1aSopenharmony_ci curidx *= 3; 1583cabdff1aSopenharmony_ci curidx += qc2; 1584cabdff1aSopenharmony_ci curidx *= 3; 1585cabdff1aSopenharmony_ci curidx += qc3; 1586cabdff1aSopenharmony_ci curidx *= 3; 1587cabdff1aSopenharmony_ci curidx += qc4; 1588cabdff1aSopenharmony_ci curidx += 40; 1589cabdff1aSopenharmony_ci 1590cabdff1aSopenharmony_ci curbits += p_bits[curidx]; 1591cabdff1aSopenharmony_ci vec = &p_codes[curidx*4]; 1592cabdff1aSopenharmony_ci 1593cabdff1aSopenharmony_ci qenergy += vec[0]*vec[0] + vec[1]*vec[1] 1594cabdff1aSopenharmony_ci + vec[2]*vec[2] + vec[3]*vec[3]; 1595cabdff1aSopenharmony_ci 1596cabdff1aSopenharmony_ci __asm__ volatile ( 1597cabdff1aSopenharmony_ci ".set push \n\t" 1598cabdff1aSopenharmony_ci ".set noreorder \n\t" 1599cabdff1aSopenharmony_ci 1600cabdff1aSopenharmony_ci "lwc1 $f0, 0(%[in_pos]) \n\t" 1601cabdff1aSopenharmony_ci "lwc1 $f1, 0(%[vec]) \n\t" 1602cabdff1aSopenharmony_ci "lwc1 $f2, 4(%[in_pos]) \n\t" 1603cabdff1aSopenharmony_ci "lwc1 $f3, 4(%[vec]) \n\t" 1604cabdff1aSopenharmony_ci "lwc1 $f4, 8(%[in_pos]) \n\t" 1605cabdff1aSopenharmony_ci "lwc1 $f5, 8(%[vec]) \n\t" 1606cabdff1aSopenharmony_ci "lwc1 $f6, 12(%[in_pos]) \n\t" 1607cabdff1aSopenharmony_ci "lwc1 $f7, 12(%[vec]) \n\t" 1608cabdff1aSopenharmony_ci "nmsub.s %[di0], $f0, $f1, %[IQ] \n\t" 1609cabdff1aSopenharmony_ci "nmsub.s %[di1], $f2, $f3, %[IQ] \n\t" 1610cabdff1aSopenharmony_ci "nmsub.s %[di2], $f4, $f5, %[IQ] \n\t" 1611cabdff1aSopenharmony_ci "nmsub.s %[di3], $f6, $f7, %[IQ] \n\t" 1612cabdff1aSopenharmony_ci 1613cabdff1aSopenharmony_ci ".set pop \n\t" 1614cabdff1aSopenharmony_ci 1615cabdff1aSopenharmony_ci : [di0]"=&f"(di0), [di1]"=&f"(di1), 1616cabdff1aSopenharmony_ci [di2]"=&f"(di2), [di3]"=&f"(di3) 1617cabdff1aSopenharmony_ci : [in_pos]"r"(in_pos), [vec]"r"(vec), 1618cabdff1aSopenharmony_ci [IQ]"f"(IQ) 1619cabdff1aSopenharmony_ci : "$f0", "$f1", "$f2", "$f3", 1620cabdff1aSopenharmony_ci "$f4", "$f5", "$f6", "$f7", 1621cabdff1aSopenharmony_ci "memory" 1622cabdff1aSopenharmony_ci ); 1623cabdff1aSopenharmony_ci 1624cabdff1aSopenharmony_ci cost += di0 * di0 + di1 * di1 1625cabdff1aSopenharmony_ci + di2 * di2 + di3 * di3; 1626cabdff1aSopenharmony_ci } 1627cabdff1aSopenharmony_ci 1628cabdff1aSopenharmony_ci if (bits) 1629cabdff1aSopenharmony_ci *bits = curbits; 1630cabdff1aSopenharmony_ci if (energy) 1631cabdff1aSopenharmony_ci *energy = qenergy * (IQ*IQ); 1632cabdff1aSopenharmony_ci return cost * lambda + curbits; 1633cabdff1aSopenharmony_ci} 1634cabdff1aSopenharmony_ci 1635cabdff1aSopenharmony_cistatic float get_band_cost_UQUAD_mips(struct AACEncContext *s, 1636cabdff1aSopenharmony_ci PutBitContext *pb, const float *in, 1637cabdff1aSopenharmony_ci const float *scaled, int size, int scale_idx, 1638cabdff1aSopenharmony_ci int cb, const float lambda, const float uplim, 1639cabdff1aSopenharmony_ci int *bits, float *energy) 1640cabdff1aSopenharmony_ci{ 1641cabdff1aSopenharmony_ci const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512]; 1642cabdff1aSopenharmony_ci const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512]; 1643cabdff1aSopenharmony_ci int i; 1644cabdff1aSopenharmony_ci float cost = 0; 1645cabdff1aSopenharmony_ci float qenergy = 0.0f; 1646cabdff1aSopenharmony_ci int curbits = 0; 1647cabdff1aSopenharmony_ci int qc1, qc2, qc3, qc4; 1648cabdff1aSopenharmony_ci 1649cabdff1aSopenharmony_ci uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1]; 1650cabdff1aSopenharmony_ci float *p_codes = (float *)ff_aac_codebook_vectors[cb-1]; 1651cabdff1aSopenharmony_ci 1652cabdff1aSopenharmony_ci for (i = 0; i < size; i += 4) { 1653cabdff1aSopenharmony_ci const float *vec; 1654cabdff1aSopenharmony_ci int curidx; 1655cabdff1aSopenharmony_ci float *in_pos = (float *)&in[i]; 1656cabdff1aSopenharmony_ci float di0, di1, di2, di3; 1657cabdff1aSopenharmony_ci int t0, t1, t2, t3, t4; 1658cabdff1aSopenharmony_ci 1659cabdff1aSopenharmony_ci qc1 = scaled[i ] * Q34 + ROUND_STANDARD; 1660cabdff1aSopenharmony_ci qc2 = scaled[i+1] * Q34 + ROUND_STANDARD; 1661cabdff1aSopenharmony_ci qc3 = scaled[i+2] * Q34 + ROUND_STANDARD; 1662cabdff1aSopenharmony_ci qc4 = scaled[i+3] * Q34 + ROUND_STANDARD; 1663cabdff1aSopenharmony_ci 1664cabdff1aSopenharmony_ci __asm__ volatile ( 1665cabdff1aSopenharmony_ci ".set push \n\t" 1666cabdff1aSopenharmony_ci ".set noreorder \n\t" 1667cabdff1aSopenharmony_ci 1668cabdff1aSopenharmony_ci "ori %[t4], $zero, 2 \n\t" 1669cabdff1aSopenharmony_ci "slt %[t0], %[t4], %[qc1] \n\t" 1670cabdff1aSopenharmony_ci "slt %[t1], %[t4], %[qc2] \n\t" 1671cabdff1aSopenharmony_ci "slt %[t2], %[t4], %[qc3] \n\t" 1672cabdff1aSopenharmony_ci "slt %[t3], %[t4], %[qc4] \n\t" 1673cabdff1aSopenharmony_ci "movn %[qc1], %[t4], %[t0] \n\t" 1674cabdff1aSopenharmony_ci "movn %[qc2], %[t4], %[t1] \n\t" 1675cabdff1aSopenharmony_ci "movn %[qc3], %[t4], %[t2] \n\t" 1676cabdff1aSopenharmony_ci "movn %[qc4], %[t4], %[t3] \n\t" 1677cabdff1aSopenharmony_ci 1678cabdff1aSopenharmony_ci ".set pop \n\t" 1679cabdff1aSopenharmony_ci 1680cabdff1aSopenharmony_ci : [qc1]"+r"(qc1), [qc2]"+r"(qc2), 1681cabdff1aSopenharmony_ci [qc3]"+r"(qc3), [qc4]"+r"(qc4), 1682cabdff1aSopenharmony_ci [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3), 1683cabdff1aSopenharmony_ci [t4]"=&r"(t4) 1684cabdff1aSopenharmony_ci ); 1685cabdff1aSopenharmony_ci 1686cabdff1aSopenharmony_ci curidx = qc1; 1687cabdff1aSopenharmony_ci curidx *= 3; 1688cabdff1aSopenharmony_ci curidx += qc2; 1689cabdff1aSopenharmony_ci curidx *= 3; 1690cabdff1aSopenharmony_ci curidx += qc3; 1691cabdff1aSopenharmony_ci curidx *= 3; 1692cabdff1aSopenharmony_ci curidx += qc4; 1693cabdff1aSopenharmony_ci 1694cabdff1aSopenharmony_ci curbits += p_bits[curidx]; 1695cabdff1aSopenharmony_ci curbits += uquad_sign_bits[curidx]; 1696cabdff1aSopenharmony_ci vec = &p_codes[curidx*4]; 1697cabdff1aSopenharmony_ci 1698cabdff1aSopenharmony_ci qenergy += vec[0]*vec[0] + vec[1]*vec[1] 1699cabdff1aSopenharmony_ci + vec[2]*vec[2] + vec[3]*vec[3]; 1700cabdff1aSopenharmony_ci 1701cabdff1aSopenharmony_ci __asm__ volatile ( 1702cabdff1aSopenharmony_ci ".set push \n\t" 1703cabdff1aSopenharmony_ci ".set noreorder \n\t" 1704cabdff1aSopenharmony_ci 1705cabdff1aSopenharmony_ci "lwc1 %[di0], 0(%[in_pos]) \n\t" 1706cabdff1aSopenharmony_ci "lwc1 %[di1], 4(%[in_pos]) \n\t" 1707cabdff1aSopenharmony_ci "lwc1 %[di2], 8(%[in_pos]) \n\t" 1708cabdff1aSopenharmony_ci "lwc1 %[di3], 12(%[in_pos]) \n\t" 1709cabdff1aSopenharmony_ci "abs.s %[di0], %[di0] \n\t" 1710cabdff1aSopenharmony_ci "abs.s %[di1], %[di1] \n\t" 1711cabdff1aSopenharmony_ci "abs.s %[di2], %[di2] \n\t" 1712cabdff1aSopenharmony_ci "abs.s %[di3], %[di3] \n\t" 1713cabdff1aSopenharmony_ci "lwc1 $f0, 0(%[vec]) \n\t" 1714cabdff1aSopenharmony_ci "lwc1 $f1, 4(%[vec]) \n\t" 1715cabdff1aSopenharmony_ci "lwc1 $f2, 8(%[vec]) \n\t" 1716cabdff1aSopenharmony_ci "lwc1 $f3, 12(%[vec]) \n\t" 1717cabdff1aSopenharmony_ci "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t" 1718cabdff1aSopenharmony_ci "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t" 1719cabdff1aSopenharmony_ci "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t" 1720cabdff1aSopenharmony_ci "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t" 1721cabdff1aSopenharmony_ci 1722cabdff1aSopenharmony_ci ".set pop \n\t" 1723cabdff1aSopenharmony_ci 1724cabdff1aSopenharmony_ci : [di0]"=&f"(di0), [di1]"=&f"(di1), 1725cabdff1aSopenharmony_ci [di2]"=&f"(di2), [di3]"=&f"(di3) 1726cabdff1aSopenharmony_ci : [in_pos]"r"(in_pos), [vec]"r"(vec), 1727cabdff1aSopenharmony_ci [IQ]"f"(IQ) 1728cabdff1aSopenharmony_ci : "$f0", "$f1", "$f2", "$f3", 1729cabdff1aSopenharmony_ci "memory" 1730cabdff1aSopenharmony_ci ); 1731cabdff1aSopenharmony_ci 1732cabdff1aSopenharmony_ci cost += di0 * di0 + di1 * di1 1733cabdff1aSopenharmony_ci + di2 * di2 + di3 * di3; 1734cabdff1aSopenharmony_ci } 1735cabdff1aSopenharmony_ci 1736cabdff1aSopenharmony_ci if (bits) 1737cabdff1aSopenharmony_ci *bits = curbits; 1738cabdff1aSopenharmony_ci if (energy) 1739cabdff1aSopenharmony_ci *energy = qenergy * (IQ*IQ); 1740cabdff1aSopenharmony_ci return cost * lambda + curbits; 1741cabdff1aSopenharmony_ci} 1742cabdff1aSopenharmony_ci 1743cabdff1aSopenharmony_cistatic float get_band_cost_SPAIR_mips(struct AACEncContext *s, 1744cabdff1aSopenharmony_ci PutBitContext *pb, const float *in, 1745cabdff1aSopenharmony_ci const float *scaled, int size, int scale_idx, 1746cabdff1aSopenharmony_ci int cb, const float lambda, const float uplim, 1747cabdff1aSopenharmony_ci int *bits, float *energy) 1748cabdff1aSopenharmony_ci{ 1749cabdff1aSopenharmony_ci const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512]; 1750cabdff1aSopenharmony_ci const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512]; 1751cabdff1aSopenharmony_ci int i; 1752cabdff1aSopenharmony_ci float cost = 0; 1753cabdff1aSopenharmony_ci float qenergy = 0.0f; 1754cabdff1aSopenharmony_ci int qc1, qc2, qc3, qc4; 1755cabdff1aSopenharmony_ci int curbits = 0; 1756cabdff1aSopenharmony_ci 1757cabdff1aSopenharmony_ci uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1]; 1758cabdff1aSopenharmony_ci float *p_codes = (float *)ff_aac_codebook_vectors[cb-1]; 1759cabdff1aSopenharmony_ci 1760cabdff1aSopenharmony_ci for (i = 0; i < size; i += 4) { 1761cabdff1aSopenharmony_ci const float *vec, *vec2; 1762cabdff1aSopenharmony_ci int curidx, curidx2; 1763cabdff1aSopenharmony_ci int *in_int = (int *)&in[i]; 1764cabdff1aSopenharmony_ci float *in_pos = (float *)&in[i]; 1765cabdff1aSopenharmony_ci float di0, di1, di2, di3; 1766cabdff1aSopenharmony_ci int t0, t1, t2, t3, t4, t5, t6, t7; 1767cabdff1aSopenharmony_ci 1768cabdff1aSopenharmony_ci qc1 = scaled[i ] * Q34 + ROUND_STANDARD; 1769cabdff1aSopenharmony_ci qc2 = scaled[i+1] * Q34 + ROUND_STANDARD; 1770cabdff1aSopenharmony_ci qc3 = scaled[i+2] * Q34 + ROUND_STANDARD; 1771cabdff1aSopenharmony_ci qc4 = scaled[i+3] * Q34 + ROUND_STANDARD; 1772cabdff1aSopenharmony_ci 1773cabdff1aSopenharmony_ci __asm__ volatile ( 1774cabdff1aSopenharmony_ci ".set push \n\t" 1775cabdff1aSopenharmony_ci ".set noreorder \n\t" 1776cabdff1aSopenharmony_ci 1777cabdff1aSopenharmony_ci "ori %[t4], $zero, 4 \n\t" 1778cabdff1aSopenharmony_ci "slt %[t0], %[t4], %[qc1] \n\t" 1779cabdff1aSopenharmony_ci "slt %[t1], %[t4], %[qc2] \n\t" 1780cabdff1aSopenharmony_ci "slt %[t2], %[t4], %[qc3] \n\t" 1781cabdff1aSopenharmony_ci "slt %[t3], %[t4], %[qc4] \n\t" 1782cabdff1aSopenharmony_ci "movn %[qc1], %[t4], %[t0] \n\t" 1783cabdff1aSopenharmony_ci "movn %[qc2], %[t4], %[t1] \n\t" 1784cabdff1aSopenharmony_ci "movn %[qc3], %[t4], %[t2] \n\t" 1785cabdff1aSopenharmony_ci "movn %[qc4], %[t4], %[t3] \n\t" 1786cabdff1aSopenharmony_ci "lw %[t0], 0(%[in_int]) \n\t" 1787cabdff1aSopenharmony_ci "lw %[t1], 4(%[in_int]) \n\t" 1788cabdff1aSopenharmony_ci "lw %[t2], 8(%[in_int]) \n\t" 1789cabdff1aSopenharmony_ci "lw %[t3], 12(%[in_int]) \n\t" 1790cabdff1aSopenharmony_ci "srl %[t0], %[t0], 31 \n\t" 1791cabdff1aSopenharmony_ci "srl %[t1], %[t1], 31 \n\t" 1792cabdff1aSopenharmony_ci "srl %[t2], %[t2], 31 \n\t" 1793cabdff1aSopenharmony_ci "srl %[t3], %[t3], 31 \n\t" 1794cabdff1aSopenharmony_ci "subu %[t4], $zero, %[qc1] \n\t" 1795cabdff1aSopenharmony_ci "subu %[t5], $zero, %[qc2] \n\t" 1796cabdff1aSopenharmony_ci "subu %[t6], $zero, %[qc3] \n\t" 1797cabdff1aSopenharmony_ci "subu %[t7], $zero, %[qc4] \n\t" 1798cabdff1aSopenharmony_ci "movn %[qc1], %[t4], %[t0] \n\t" 1799cabdff1aSopenharmony_ci "movn %[qc2], %[t5], %[t1] \n\t" 1800cabdff1aSopenharmony_ci "movn %[qc3], %[t6], %[t2] \n\t" 1801cabdff1aSopenharmony_ci "movn %[qc4], %[t7], %[t3] \n\t" 1802cabdff1aSopenharmony_ci 1803cabdff1aSopenharmony_ci ".set pop \n\t" 1804cabdff1aSopenharmony_ci 1805cabdff1aSopenharmony_ci : [qc1]"+r"(qc1), [qc2]"+r"(qc2), 1806cabdff1aSopenharmony_ci [qc3]"+r"(qc3), [qc4]"+r"(qc4), 1807cabdff1aSopenharmony_ci [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3), 1808cabdff1aSopenharmony_ci [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7) 1809cabdff1aSopenharmony_ci : [in_int]"r"(in_int) 1810cabdff1aSopenharmony_ci : "memory" 1811cabdff1aSopenharmony_ci ); 1812cabdff1aSopenharmony_ci 1813cabdff1aSopenharmony_ci curidx = 9 * qc1; 1814cabdff1aSopenharmony_ci curidx += qc2 + 40; 1815cabdff1aSopenharmony_ci 1816cabdff1aSopenharmony_ci curidx2 = 9 * qc3; 1817cabdff1aSopenharmony_ci curidx2 += qc4 + 40; 1818cabdff1aSopenharmony_ci 1819cabdff1aSopenharmony_ci curbits += p_bits[curidx]; 1820cabdff1aSopenharmony_ci curbits += p_bits[curidx2]; 1821cabdff1aSopenharmony_ci 1822cabdff1aSopenharmony_ci vec = &p_codes[curidx*2]; 1823cabdff1aSopenharmony_ci vec2 = &p_codes[curidx2*2]; 1824cabdff1aSopenharmony_ci 1825cabdff1aSopenharmony_ci qenergy += vec[0]*vec[0] + vec[1]*vec[1] 1826cabdff1aSopenharmony_ci + vec2[0]*vec2[0] + vec2[1]*vec2[1]; 1827cabdff1aSopenharmony_ci 1828cabdff1aSopenharmony_ci __asm__ volatile ( 1829cabdff1aSopenharmony_ci ".set push \n\t" 1830cabdff1aSopenharmony_ci ".set noreorder \n\t" 1831cabdff1aSopenharmony_ci 1832cabdff1aSopenharmony_ci "lwc1 $f0, 0(%[in_pos]) \n\t" 1833cabdff1aSopenharmony_ci "lwc1 $f1, 0(%[vec]) \n\t" 1834cabdff1aSopenharmony_ci "lwc1 $f2, 4(%[in_pos]) \n\t" 1835cabdff1aSopenharmony_ci "lwc1 $f3, 4(%[vec]) \n\t" 1836cabdff1aSopenharmony_ci "lwc1 $f4, 8(%[in_pos]) \n\t" 1837cabdff1aSopenharmony_ci "lwc1 $f5, 0(%[vec2]) \n\t" 1838cabdff1aSopenharmony_ci "lwc1 $f6, 12(%[in_pos]) \n\t" 1839cabdff1aSopenharmony_ci "lwc1 $f7, 4(%[vec2]) \n\t" 1840cabdff1aSopenharmony_ci "nmsub.s %[di0], $f0, $f1, %[IQ] \n\t" 1841cabdff1aSopenharmony_ci "nmsub.s %[di1], $f2, $f3, %[IQ] \n\t" 1842cabdff1aSopenharmony_ci "nmsub.s %[di2], $f4, $f5, %[IQ] \n\t" 1843cabdff1aSopenharmony_ci "nmsub.s %[di3], $f6, $f7, %[IQ] \n\t" 1844cabdff1aSopenharmony_ci 1845cabdff1aSopenharmony_ci ".set pop \n\t" 1846cabdff1aSopenharmony_ci 1847cabdff1aSopenharmony_ci : [di0]"=&f"(di0), [di1]"=&f"(di1), 1848cabdff1aSopenharmony_ci [di2]"=&f"(di2), [di3]"=&f"(di3) 1849cabdff1aSopenharmony_ci : [in_pos]"r"(in_pos), [vec]"r"(vec), 1850cabdff1aSopenharmony_ci [vec2]"r"(vec2), [IQ]"f"(IQ) 1851cabdff1aSopenharmony_ci : "$f0", "$f1", "$f2", "$f3", 1852cabdff1aSopenharmony_ci "$f4", "$f5", "$f6", "$f7", 1853cabdff1aSopenharmony_ci "memory" 1854cabdff1aSopenharmony_ci ); 1855cabdff1aSopenharmony_ci 1856cabdff1aSopenharmony_ci cost += di0 * di0 + di1 * di1 1857cabdff1aSopenharmony_ci + di2 * di2 + di3 * di3; 1858cabdff1aSopenharmony_ci } 1859cabdff1aSopenharmony_ci 1860cabdff1aSopenharmony_ci if (bits) 1861cabdff1aSopenharmony_ci *bits = curbits; 1862cabdff1aSopenharmony_ci if (energy) 1863cabdff1aSopenharmony_ci *energy = qenergy * (IQ*IQ); 1864cabdff1aSopenharmony_ci return cost * lambda + curbits; 1865cabdff1aSopenharmony_ci} 1866cabdff1aSopenharmony_ci 1867cabdff1aSopenharmony_cistatic float get_band_cost_UPAIR7_mips(struct AACEncContext *s, 1868cabdff1aSopenharmony_ci PutBitContext *pb, const float *in, 1869cabdff1aSopenharmony_ci const float *scaled, int size, int scale_idx, 1870cabdff1aSopenharmony_ci int cb, const float lambda, const float uplim, 1871cabdff1aSopenharmony_ci int *bits, float *energy) 1872cabdff1aSopenharmony_ci{ 1873cabdff1aSopenharmony_ci const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512]; 1874cabdff1aSopenharmony_ci const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512]; 1875cabdff1aSopenharmony_ci int i; 1876cabdff1aSopenharmony_ci float cost = 0; 1877cabdff1aSopenharmony_ci float qenergy = 0.0f; 1878cabdff1aSopenharmony_ci int qc1, qc2, qc3, qc4; 1879cabdff1aSopenharmony_ci int curbits = 0; 1880cabdff1aSopenharmony_ci 1881cabdff1aSopenharmony_ci uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1]; 1882cabdff1aSopenharmony_ci float *p_codes = (float *)ff_aac_codebook_vectors[cb-1]; 1883cabdff1aSopenharmony_ci 1884cabdff1aSopenharmony_ci for (i = 0; i < size; i += 4) { 1885cabdff1aSopenharmony_ci const float *vec, *vec2; 1886cabdff1aSopenharmony_ci int curidx, curidx2, sign1, count1, sign2, count2; 1887cabdff1aSopenharmony_ci int *in_int = (int *)&in[i]; 1888cabdff1aSopenharmony_ci float *in_pos = (float *)&in[i]; 1889cabdff1aSopenharmony_ci float di0, di1, di2, di3; 1890cabdff1aSopenharmony_ci int t0, t1, t2, t3, t4; 1891cabdff1aSopenharmony_ci 1892cabdff1aSopenharmony_ci qc1 = scaled[i ] * Q34 + ROUND_STANDARD; 1893cabdff1aSopenharmony_ci qc2 = scaled[i+1] * Q34 + ROUND_STANDARD; 1894cabdff1aSopenharmony_ci qc3 = scaled[i+2] * Q34 + ROUND_STANDARD; 1895cabdff1aSopenharmony_ci qc4 = scaled[i+3] * Q34 + ROUND_STANDARD; 1896cabdff1aSopenharmony_ci 1897cabdff1aSopenharmony_ci __asm__ volatile ( 1898cabdff1aSopenharmony_ci ".set push \n\t" 1899cabdff1aSopenharmony_ci ".set noreorder \n\t" 1900cabdff1aSopenharmony_ci 1901cabdff1aSopenharmony_ci "ori %[t4], $zero, 7 \n\t" 1902cabdff1aSopenharmony_ci "ori %[sign1], $zero, 0 \n\t" 1903cabdff1aSopenharmony_ci "ori %[sign2], $zero, 0 \n\t" 1904cabdff1aSopenharmony_ci "slt %[t0], %[t4], %[qc1] \n\t" 1905cabdff1aSopenharmony_ci "slt %[t1], %[t4], %[qc2] \n\t" 1906cabdff1aSopenharmony_ci "slt %[t2], %[t4], %[qc3] \n\t" 1907cabdff1aSopenharmony_ci "slt %[t3], %[t4], %[qc4] \n\t" 1908cabdff1aSopenharmony_ci "movn %[qc1], %[t4], %[t0] \n\t" 1909cabdff1aSopenharmony_ci "movn %[qc2], %[t4], %[t1] \n\t" 1910cabdff1aSopenharmony_ci "movn %[qc3], %[t4], %[t2] \n\t" 1911cabdff1aSopenharmony_ci "movn %[qc4], %[t4], %[t3] \n\t" 1912cabdff1aSopenharmony_ci "lw %[t0], 0(%[in_int]) \n\t" 1913cabdff1aSopenharmony_ci "lw %[t1], 4(%[in_int]) \n\t" 1914cabdff1aSopenharmony_ci "lw %[t2], 8(%[in_int]) \n\t" 1915cabdff1aSopenharmony_ci "lw %[t3], 12(%[in_int]) \n\t" 1916cabdff1aSopenharmony_ci "slt %[t0], %[t0], $zero \n\t" 1917cabdff1aSopenharmony_ci "movn %[sign1], %[t0], %[qc1] \n\t" 1918cabdff1aSopenharmony_ci "slt %[t2], %[t2], $zero \n\t" 1919cabdff1aSopenharmony_ci "movn %[sign2], %[t2], %[qc3] \n\t" 1920cabdff1aSopenharmony_ci "slt %[t1], %[t1], $zero \n\t" 1921cabdff1aSopenharmony_ci "sll %[t0], %[sign1], 1 \n\t" 1922cabdff1aSopenharmony_ci "or %[t0], %[t0], %[t1] \n\t" 1923cabdff1aSopenharmony_ci "movn %[sign1], %[t0], %[qc2] \n\t" 1924cabdff1aSopenharmony_ci "slt %[t3], %[t3], $zero \n\t" 1925cabdff1aSopenharmony_ci "sll %[t0], %[sign2], 1 \n\t" 1926cabdff1aSopenharmony_ci "or %[t0], %[t0], %[t3] \n\t" 1927cabdff1aSopenharmony_ci "movn %[sign2], %[t0], %[qc4] \n\t" 1928cabdff1aSopenharmony_ci "slt %[count1], $zero, %[qc1] \n\t" 1929cabdff1aSopenharmony_ci "slt %[t1], $zero, %[qc2] \n\t" 1930cabdff1aSopenharmony_ci "slt %[count2], $zero, %[qc3] \n\t" 1931cabdff1aSopenharmony_ci "slt %[t2], $zero, %[qc4] \n\t" 1932cabdff1aSopenharmony_ci "addu %[count1], %[count1], %[t1] \n\t" 1933cabdff1aSopenharmony_ci "addu %[count2], %[count2], %[t2] \n\t" 1934cabdff1aSopenharmony_ci 1935cabdff1aSopenharmony_ci ".set pop \n\t" 1936cabdff1aSopenharmony_ci 1937cabdff1aSopenharmony_ci : [qc1]"+r"(qc1), [qc2]"+r"(qc2), 1938cabdff1aSopenharmony_ci [qc3]"+r"(qc3), [qc4]"+r"(qc4), 1939cabdff1aSopenharmony_ci [sign1]"=&r"(sign1), [count1]"=&r"(count1), 1940cabdff1aSopenharmony_ci [sign2]"=&r"(sign2), [count2]"=&r"(count2), 1941cabdff1aSopenharmony_ci [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3), 1942cabdff1aSopenharmony_ci [t4]"=&r"(t4) 1943cabdff1aSopenharmony_ci : [in_int]"r"(in_int) 1944cabdff1aSopenharmony_ci : "memory" 1945cabdff1aSopenharmony_ci ); 1946cabdff1aSopenharmony_ci 1947cabdff1aSopenharmony_ci curidx = 8 * qc1; 1948cabdff1aSopenharmony_ci curidx += qc2; 1949cabdff1aSopenharmony_ci 1950cabdff1aSopenharmony_ci curidx2 = 8 * qc3; 1951cabdff1aSopenharmony_ci curidx2 += qc4; 1952cabdff1aSopenharmony_ci 1953cabdff1aSopenharmony_ci curbits += p_bits[curidx]; 1954cabdff1aSopenharmony_ci curbits += upair7_sign_bits[curidx]; 1955cabdff1aSopenharmony_ci vec = &p_codes[curidx*2]; 1956cabdff1aSopenharmony_ci 1957cabdff1aSopenharmony_ci curbits += p_bits[curidx2]; 1958cabdff1aSopenharmony_ci curbits += upair7_sign_bits[curidx2]; 1959cabdff1aSopenharmony_ci vec2 = &p_codes[curidx2*2]; 1960cabdff1aSopenharmony_ci 1961cabdff1aSopenharmony_ci qenergy += vec[0]*vec[0] + vec[1]*vec[1] 1962cabdff1aSopenharmony_ci + vec2[0]*vec2[0] + vec2[1]*vec2[1]; 1963cabdff1aSopenharmony_ci 1964cabdff1aSopenharmony_ci __asm__ volatile ( 1965cabdff1aSopenharmony_ci ".set push \n\t" 1966cabdff1aSopenharmony_ci ".set noreorder \n\t" 1967cabdff1aSopenharmony_ci 1968cabdff1aSopenharmony_ci "lwc1 %[di0], 0(%[in_pos]) \n\t" 1969cabdff1aSopenharmony_ci "lwc1 %[di1], 4(%[in_pos]) \n\t" 1970cabdff1aSopenharmony_ci "lwc1 %[di2], 8(%[in_pos]) \n\t" 1971cabdff1aSopenharmony_ci "lwc1 %[di3], 12(%[in_pos]) \n\t" 1972cabdff1aSopenharmony_ci "abs.s %[di0], %[di0] \n\t" 1973cabdff1aSopenharmony_ci "abs.s %[di1], %[di1] \n\t" 1974cabdff1aSopenharmony_ci "abs.s %[di2], %[di2] \n\t" 1975cabdff1aSopenharmony_ci "abs.s %[di3], %[di3] \n\t" 1976cabdff1aSopenharmony_ci "lwc1 $f0, 0(%[vec]) \n\t" 1977cabdff1aSopenharmony_ci "lwc1 $f1, 4(%[vec]) \n\t" 1978cabdff1aSopenharmony_ci "lwc1 $f2, 0(%[vec2]) \n\t" 1979cabdff1aSopenharmony_ci "lwc1 $f3, 4(%[vec2]) \n\t" 1980cabdff1aSopenharmony_ci "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t" 1981cabdff1aSopenharmony_ci "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t" 1982cabdff1aSopenharmony_ci "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t" 1983cabdff1aSopenharmony_ci "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t" 1984cabdff1aSopenharmony_ci 1985cabdff1aSopenharmony_ci ".set pop \n\t" 1986cabdff1aSopenharmony_ci 1987cabdff1aSopenharmony_ci : [di0]"=&f"(di0), [di1]"=&f"(di1), 1988cabdff1aSopenharmony_ci [di2]"=&f"(di2), [di3]"=&f"(di3) 1989cabdff1aSopenharmony_ci : [in_pos]"r"(in_pos), [vec]"r"(vec), 1990cabdff1aSopenharmony_ci [vec2]"r"(vec2), [IQ]"f"(IQ) 1991cabdff1aSopenharmony_ci : "$f0", "$f1", "$f2", "$f3", 1992cabdff1aSopenharmony_ci "memory" 1993cabdff1aSopenharmony_ci ); 1994cabdff1aSopenharmony_ci 1995cabdff1aSopenharmony_ci cost += di0 * di0 + di1 * di1 1996cabdff1aSopenharmony_ci + di2 * di2 + di3 * di3; 1997cabdff1aSopenharmony_ci } 1998cabdff1aSopenharmony_ci 1999cabdff1aSopenharmony_ci if (bits) 2000cabdff1aSopenharmony_ci *bits = curbits; 2001cabdff1aSopenharmony_ci if (energy) 2002cabdff1aSopenharmony_ci *energy = qenergy * (IQ*IQ); 2003cabdff1aSopenharmony_ci return cost * lambda + curbits; 2004cabdff1aSopenharmony_ci} 2005cabdff1aSopenharmony_ci 2006cabdff1aSopenharmony_cistatic float get_band_cost_UPAIR12_mips(struct AACEncContext *s, 2007cabdff1aSopenharmony_ci PutBitContext *pb, const float *in, 2008cabdff1aSopenharmony_ci const float *scaled, int size, int scale_idx, 2009cabdff1aSopenharmony_ci int cb, const float lambda, const float uplim, 2010cabdff1aSopenharmony_ci int *bits, float *energy) 2011cabdff1aSopenharmony_ci{ 2012cabdff1aSopenharmony_ci const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512]; 2013cabdff1aSopenharmony_ci const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512]; 2014cabdff1aSopenharmony_ci int i; 2015cabdff1aSopenharmony_ci float cost = 0; 2016cabdff1aSopenharmony_ci float qenergy = 0.0f; 2017cabdff1aSopenharmony_ci int qc1, qc2, qc3, qc4; 2018cabdff1aSopenharmony_ci int curbits = 0; 2019cabdff1aSopenharmony_ci 2020cabdff1aSopenharmony_ci uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1]; 2021cabdff1aSopenharmony_ci float *p_codes = (float *)ff_aac_codebook_vectors[cb-1]; 2022cabdff1aSopenharmony_ci 2023cabdff1aSopenharmony_ci for (i = 0; i < size; i += 4) { 2024cabdff1aSopenharmony_ci const float *vec, *vec2; 2025cabdff1aSopenharmony_ci int curidx, curidx2; 2026cabdff1aSopenharmony_ci int sign1, count1, sign2, count2; 2027cabdff1aSopenharmony_ci int *in_int = (int *)&in[i]; 2028cabdff1aSopenharmony_ci float *in_pos = (float *)&in[i]; 2029cabdff1aSopenharmony_ci float di0, di1, di2, di3; 2030cabdff1aSopenharmony_ci int t0, t1, t2, t3, t4; 2031cabdff1aSopenharmony_ci 2032cabdff1aSopenharmony_ci qc1 = scaled[i ] * Q34 + ROUND_STANDARD; 2033cabdff1aSopenharmony_ci qc2 = scaled[i+1] * Q34 + ROUND_STANDARD; 2034cabdff1aSopenharmony_ci qc3 = scaled[i+2] * Q34 + ROUND_STANDARD; 2035cabdff1aSopenharmony_ci qc4 = scaled[i+3] * Q34 + ROUND_STANDARD; 2036cabdff1aSopenharmony_ci 2037cabdff1aSopenharmony_ci __asm__ volatile ( 2038cabdff1aSopenharmony_ci ".set push \n\t" 2039cabdff1aSopenharmony_ci ".set noreorder \n\t" 2040cabdff1aSopenharmony_ci 2041cabdff1aSopenharmony_ci "ori %[t4], $zero, 12 \n\t" 2042cabdff1aSopenharmony_ci "ori %[sign1], $zero, 0 \n\t" 2043cabdff1aSopenharmony_ci "ori %[sign2], $zero, 0 \n\t" 2044cabdff1aSopenharmony_ci "slt %[t0], %[t4], %[qc1] \n\t" 2045cabdff1aSopenharmony_ci "slt %[t1], %[t4], %[qc2] \n\t" 2046cabdff1aSopenharmony_ci "slt %[t2], %[t4], %[qc3] \n\t" 2047cabdff1aSopenharmony_ci "slt %[t3], %[t4], %[qc4] \n\t" 2048cabdff1aSopenharmony_ci "movn %[qc1], %[t4], %[t0] \n\t" 2049cabdff1aSopenharmony_ci "movn %[qc2], %[t4], %[t1] \n\t" 2050cabdff1aSopenharmony_ci "movn %[qc3], %[t4], %[t2] \n\t" 2051cabdff1aSopenharmony_ci "movn %[qc4], %[t4], %[t3] \n\t" 2052cabdff1aSopenharmony_ci "lw %[t0], 0(%[in_int]) \n\t" 2053cabdff1aSopenharmony_ci "lw %[t1], 4(%[in_int]) \n\t" 2054cabdff1aSopenharmony_ci "lw %[t2], 8(%[in_int]) \n\t" 2055cabdff1aSopenharmony_ci "lw %[t3], 12(%[in_int]) \n\t" 2056cabdff1aSopenharmony_ci "slt %[t0], %[t0], $zero \n\t" 2057cabdff1aSopenharmony_ci "movn %[sign1], %[t0], %[qc1] \n\t" 2058cabdff1aSopenharmony_ci "slt %[t2], %[t2], $zero \n\t" 2059cabdff1aSopenharmony_ci "movn %[sign2], %[t2], %[qc3] \n\t" 2060cabdff1aSopenharmony_ci "slt %[t1], %[t1], $zero \n\t" 2061cabdff1aSopenharmony_ci "sll %[t0], %[sign1], 1 \n\t" 2062cabdff1aSopenharmony_ci "or %[t0], %[t0], %[t1] \n\t" 2063cabdff1aSopenharmony_ci "movn %[sign1], %[t0], %[qc2] \n\t" 2064cabdff1aSopenharmony_ci "slt %[t3], %[t3], $zero \n\t" 2065cabdff1aSopenharmony_ci "sll %[t0], %[sign2], 1 \n\t" 2066cabdff1aSopenharmony_ci "or %[t0], %[t0], %[t3] \n\t" 2067cabdff1aSopenharmony_ci "movn %[sign2], %[t0], %[qc4] \n\t" 2068cabdff1aSopenharmony_ci "slt %[count1], $zero, %[qc1] \n\t" 2069cabdff1aSopenharmony_ci "slt %[t1], $zero, %[qc2] \n\t" 2070cabdff1aSopenharmony_ci "slt %[count2], $zero, %[qc3] \n\t" 2071cabdff1aSopenharmony_ci "slt %[t2], $zero, %[qc4] \n\t" 2072cabdff1aSopenharmony_ci "addu %[count1], %[count1], %[t1] \n\t" 2073cabdff1aSopenharmony_ci "addu %[count2], %[count2], %[t2] \n\t" 2074cabdff1aSopenharmony_ci 2075cabdff1aSopenharmony_ci ".set pop \n\t" 2076cabdff1aSopenharmony_ci 2077cabdff1aSopenharmony_ci : [qc1]"+r"(qc1), [qc2]"+r"(qc2), 2078cabdff1aSopenharmony_ci [qc3]"+r"(qc3), [qc4]"+r"(qc4), 2079cabdff1aSopenharmony_ci [sign1]"=&r"(sign1), [count1]"=&r"(count1), 2080cabdff1aSopenharmony_ci [sign2]"=&r"(sign2), [count2]"=&r"(count2), 2081cabdff1aSopenharmony_ci [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3), 2082cabdff1aSopenharmony_ci [t4]"=&r"(t4) 2083cabdff1aSopenharmony_ci : [in_int]"r"(in_int) 2084cabdff1aSopenharmony_ci : "memory" 2085cabdff1aSopenharmony_ci ); 2086cabdff1aSopenharmony_ci 2087cabdff1aSopenharmony_ci curidx = 13 * qc1; 2088cabdff1aSopenharmony_ci curidx += qc2; 2089cabdff1aSopenharmony_ci 2090cabdff1aSopenharmony_ci curidx2 = 13 * qc3; 2091cabdff1aSopenharmony_ci curidx2 += qc4; 2092cabdff1aSopenharmony_ci 2093cabdff1aSopenharmony_ci curbits += p_bits[curidx]; 2094cabdff1aSopenharmony_ci curbits += p_bits[curidx2]; 2095cabdff1aSopenharmony_ci curbits += upair12_sign_bits[curidx]; 2096cabdff1aSopenharmony_ci curbits += upair12_sign_bits[curidx2]; 2097cabdff1aSopenharmony_ci vec = &p_codes[curidx*2]; 2098cabdff1aSopenharmony_ci vec2 = &p_codes[curidx2*2]; 2099cabdff1aSopenharmony_ci 2100cabdff1aSopenharmony_ci qenergy += vec[0]*vec[0] + vec[1]*vec[1] 2101cabdff1aSopenharmony_ci + vec2[0]*vec2[0] + vec2[1]*vec2[1]; 2102cabdff1aSopenharmony_ci 2103cabdff1aSopenharmony_ci __asm__ volatile ( 2104cabdff1aSopenharmony_ci ".set push \n\t" 2105cabdff1aSopenharmony_ci ".set noreorder \n\t" 2106cabdff1aSopenharmony_ci 2107cabdff1aSopenharmony_ci "lwc1 %[di0], 0(%[in_pos]) \n\t" 2108cabdff1aSopenharmony_ci "lwc1 %[di1], 4(%[in_pos]) \n\t" 2109cabdff1aSopenharmony_ci "lwc1 %[di2], 8(%[in_pos]) \n\t" 2110cabdff1aSopenharmony_ci "lwc1 %[di3], 12(%[in_pos]) \n\t" 2111cabdff1aSopenharmony_ci "abs.s %[di0], %[di0] \n\t" 2112cabdff1aSopenharmony_ci "abs.s %[di1], %[di1] \n\t" 2113cabdff1aSopenharmony_ci "abs.s %[di2], %[di2] \n\t" 2114cabdff1aSopenharmony_ci "abs.s %[di3], %[di3] \n\t" 2115cabdff1aSopenharmony_ci "lwc1 $f0, 0(%[vec]) \n\t" 2116cabdff1aSopenharmony_ci "lwc1 $f1, 4(%[vec]) \n\t" 2117cabdff1aSopenharmony_ci "lwc1 $f2, 0(%[vec2]) \n\t" 2118cabdff1aSopenharmony_ci "lwc1 $f3, 4(%[vec2]) \n\t" 2119cabdff1aSopenharmony_ci "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t" 2120cabdff1aSopenharmony_ci "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t" 2121cabdff1aSopenharmony_ci "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t" 2122cabdff1aSopenharmony_ci "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t" 2123cabdff1aSopenharmony_ci 2124cabdff1aSopenharmony_ci ".set pop \n\t" 2125cabdff1aSopenharmony_ci 2126cabdff1aSopenharmony_ci : [di0]"=&f"(di0), [di1]"=&f"(di1), 2127cabdff1aSopenharmony_ci [di2]"=&f"(di2), [di3]"=&f"(di3) 2128cabdff1aSopenharmony_ci : [in_pos]"r"(in_pos), [vec]"r"(vec), 2129cabdff1aSopenharmony_ci [vec2]"r"(vec2), [IQ]"f"(IQ) 2130cabdff1aSopenharmony_ci : "$f0", "$f1", "$f2", "$f3", 2131cabdff1aSopenharmony_ci "memory" 2132cabdff1aSopenharmony_ci ); 2133cabdff1aSopenharmony_ci 2134cabdff1aSopenharmony_ci cost += di0 * di0 + di1 * di1 2135cabdff1aSopenharmony_ci + di2 * di2 + di3 * di3; 2136cabdff1aSopenharmony_ci } 2137cabdff1aSopenharmony_ci 2138cabdff1aSopenharmony_ci if (bits) 2139cabdff1aSopenharmony_ci *bits = curbits; 2140cabdff1aSopenharmony_ci if (energy) 2141cabdff1aSopenharmony_ci *energy = qenergy * (IQ*IQ); 2142cabdff1aSopenharmony_ci return cost * lambda + curbits; 2143cabdff1aSopenharmony_ci} 2144cabdff1aSopenharmony_ci 2145cabdff1aSopenharmony_cistatic float get_band_cost_ESC_mips(struct AACEncContext *s, 2146cabdff1aSopenharmony_ci PutBitContext *pb, const float *in, 2147cabdff1aSopenharmony_ci const float *scaled, int size, int scale_idx, 2148cabdff1aSopenharmony_ci int cb, const float lambda, const float uplim, 2149cabdff1aSopenharmony_ci int *bits, float *energy) 2150cabdff1aSopenharmony_ci{ 2151cabdff1aSopenharmony_ci const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512]; 2152cabdff1aSopenharmony_ci const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512]; 2153cabdff1aSopenharmony_ci const float CLIPPED_ESCAPE = 165140.0f * IQ; 2154cabdff1aSopenharmony_ci int i; 2155cabdff1aSopenharmony_ci float cost = 0; 2156cabdff1aSopenharmony_ci float qenergy = 0.0f; 2157cabdff1aSopenharmony_ci int qc1, qc2, qc3, qc4; 2158cabdff1aSopenharmony_ci int curbits = 0; 2159cabdff1aSopenharmony_ci 2160cabdff1aSopenharmony_ci uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1]; 2161cabdff1aSopenharmony_ci float *p_codes = (float* )ff_aac_codebook_vectors[cb-1]; 2162cabdff1aSopenharmony_ci 2163cabdff1aSopenharmony_ci for (i = 0; i < size; i += 4) { 2164cabdff1aSopenharmony_ci const float *vec, *vec2; 2165cabdff1aSopenharmony_ci int curidx, curidx2; 2166cabdff1aSopenharmony_ci float t1, t2, t3, t4, V; 2167cabdff1aSopenharmony_ci float di1, di2, di3, di4; 2168cabdff1aSopenharmony_ci int cond0, cond1, cond2, cond3; 2169cabdff1aSopenharmony_ci int c1, c2, c3, c4; 2170cabdff1aSopenharmony_ci int t6, t7; 2171cabdff1aSopenharmony_ci 2172cabdff1aSopenharmony_ci qc1 = scaled[i ] * Q34 + ROUND_STANDARD; 2173cabdff1aSopenharmony_ci qc2 = scaled[i+1] * Q34 + ROUND_STANDARD; 2174cabdff1aSopenharmony_ci qc3 = scaled[i+2] * Q34 + ROUND_STANDARD; 2175cabdff1aSopenharmony_ci qc4 = scaled[i+3] * Q34 + ROUND_STANDARD; 2176cabdff1aSopenharmony_ci 2177cabdff1aSopenharmony_ci __asm__ volatile ( 2178cabdff1aSopenharmony_ci ".set push \n\t" 2179cabdff1aSopenharmony_ci ".set noreorder \n\t" 2180cabdff1aSopenharmony_ci 2181cabdff1aSopenharmony_ci "ori %[t6], $zero, 15 \n\t" 2182cabdff1aSopenharmony_ci "ori %[t7], $zero, 16 \n\t" 2183cabdff1aSopenharmony_ci "shll_s.w %[c1], %[qc1], 18 \n\t" 2184cabdff1aSopenharmony_ci "shll_s.w %[c2], %[qc2], 18 \n\t" 2185cabdff1aSopenharmony_ci "shll_s.w %[c3], %[qc3], 18 \n\t" 2186cabdff1aSopenharmony_ci "shll_s.w %[c4], %[qc4], 18 \n\t" 2187cabdff1aSopenharmony_ci "srl %[c1], %[c1], 18 \n\t" 2188cabdff1aSopenharmony_ci "srl %[c2], %[c2], 18 \n\t" 2189cabdff1aSopenharmony_ci "srl %[c3], %[c3], 18 \n\t" 2190cabdff1aSopenharmony_ci "srl %[c4], %[c4], 18 \n\t" 2191cabdff1aSopenharmony_ci "slt %[cond0], %[t6], %[qc1] \n\t" 2192cabdff1aSopenharmony_ci "slt %[cond1], %[t6], %[qc2] \n\t" 2193cabdff1aSopenharmony_ci "slt %[cond2], %[t6], %[qc3] \n\t" 2194cabdff1aSopenharmony_ci "slt %[cond3], %[t6], %[qc4] \n\t" 2195cabdff1aSopenharmony_ci "movn %[qc1], %[t7], %[cond0] \n\t" 2196cabdff1aSopenharmony_ci "movn %[qc2], %[t7], %[cond1] \n\t" 2197cabdff1aSopenharmony_ci "movn %[qc3], %[t7], %[cond2] \n\t" 2198cabdff1aSopenharmony_ci "movn %[qc4], %[t7], %[cond3] \n\t" 2199cabdff1aSopenharmony_ci 2200cabdff1aSopenharmony_ci ".set pop \n\t" 2201cabdff1aSopenharmony_ci 2202cabdff1aSopenharmony_ci : [qc1]"+r"(qc1), [qc2]"+r"(qc2), 2203cabdff1aSopenharmony_ci [qc3]"+r"(qc3), [qc4]"+r"(qc4), 2204cabdff1aSopenharmony_ci [cond0]"=&r"(cond0), [cond1]"=&r"(cond1), 2205cabdff1aSopenharmony_ci [cond2]"=&r"(cond2), [cond3]"=&r"(cond3), 2206cabdff1aSopenharmony_ci [c1]"=&r"(c1), [c2]"=&r"(c2), 2207cabdff1aSopenharmony_ci [c3]"=&r"(c3), [c4]"=&r"(c4), 2208cabdff1aSopenharmony_ci [t6]"=&r"(t6), [t7]"=&r"(t7) 2209cabdff1aSopenharmony_ci ); 2210cabdff1aSopenharmony_ci 2211cabdff1aSopenharmony_ci curidx = 17 * qc1; 2212cabdff1aSopenharmony_ci curidx += qc2; 2213cabdff1aSopenharmony_ci 2214cabdff1aSopenharmony_ci curidx2 = 17 * qc3; 2215cabdff1aSopenharmony_ci curidx2 += qc4; 2216cabdff1aSopenharmony_ci 2217cabdff1aSopenharmony_ci curbits += p_bits[curidx]; 2218cabdff1aSopenharmony_ci curbits += esc_sign_bits[curidx]; 2219cabdff1aSopenharmony_ci vec = &p_codes[curidx*2]; 2220cabdff1aSopenharmony_ci 2221cabdff1aSopenharmony_ci curbits += p_bits[curidx2]; 2222cabdff1aSopenharmony_ci curbits += esc_sign_bits[curidx2]; 2223cabdff1aSopenharmony_ci vec2 = &p_codes[curidx2*2]; 2224cabdff1aSopenharmony_ci 2225cabdff1aSopenharmony_ci curbits += (av_log2(c1) * 2 - 3) & (-cond0); 2226cabdff1aSopenharmony_ci curbits += (av_log2(c2) * 2 - 3) & (-cond1); 2227cabdff1aSopenharmony_ci curbits += (av_log2(c3) * 2 - 3) & (-cond2); 2228cabdff1aSopenharmony_ci curbits += (av_log2(c4) * 2 - 3) & (-cond3); 2229cabdff1aSopenharmony_ci 2230cabdff1aSopenharmony_ci t1 = fabsf(in[i ]); 2231cabdff1aSopenharmony_ci t2 = fabsf(in[i+1]); 2232cabdff1aSopenharmony_ci t3 = fabsf(in[i+2]); 2233cabdff1aSopenharmony_ci t4 = fabsf(in[i+3]); 2234cabdff1aSopenharmony_ci 2235cabdff1aSopenharmony_ci if (cond0) { 2236cabdff1aSopenharmony_ci if (t1 >= CLIPPED_ESCAPE) { 2237cabdff1aSopenharmony_ci di1 = t1 - CLIPPED_ESCAPE; 2238cabdff1aSopenharmony_ci qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE; 2239cabdff1aSopenharmony_ci } else { 2240cabdff1aSopenharmony_ci di1 = t1 - (V = c1 * cbrtf(c1) * IQ); 2241cabdff1aSopenharmony_ci qenergy += V*V; 2242cabdff1aSopenharmony_ci } 2243cabdff1aSopenharmony_ci } else { 2244cabdff1aSopenharmony_ci di1 = t1 - (V = vec[0] * IQ); 2245cabdff1aSopenharmony_ci qenergy += V*V; 2246cabdff1aSopenharmony_ci } 2247cabdff1aSopenharmony_ci 2248cabdff1aSopenharmony_ci if (cond1) { 2249cabdff1aSopenharmony_ci if (t2 >= CLIPPED_ESCAPE) { 2250cabdff1aSopenharmony_ci di2 = t2 - CLIPPED_ESCAPE; 2251cabdff1aSopenharmony_ci qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE; 2252cabdff1aSopenharmony_ci } else { 2253cabdff1aSopenharmony_ci di2 = t2 - (V = c2 * cbrtf(c2) * IQ); 2254cabdff1aSopenharmony_ci qenergy += V*V; 2255cabdff1aSopenharmony_ci } 2256cabdff1aSopenharmony_ci } else { 2257cabdff1aSopenharmony_ci di2 = t2 - (V = vec[1] * IQ); 2258cabdff1aSopenharmony_ci qenergy += V*V; 2259cabdff1aSopenharmony_ci } 2260cabdff1aSopenharmony_ci 2261cabdff1aSopenharmony_ci if (cond2) { 2262cabdff1aSopenharmony_ci if (t3 >= CLIPPED_ESCAPE) { 2263cabdff1aSopenharmony_ci di3 = t3 - CLIPPED_ESCAPE; 2264cabdff1aSopenharmony_ci qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE; 2265cabdff1aSopenharmony_ci } else { 2266cabdff1aSopenharmony_ci di3 = t3 - (V = c3 * cbrtf(c3) * IQ); 2267cabdff1aSopenharmony_ci qenergy += V*V; 2268cabdff1aSopenharmony_ci } 2269cabdff1aSopenharmony_ci } else { 2270cabdff1aSopenharmony_ci di3 = t3 - (V = vec2[0] * IQ); 2271cabdff1aSopenharmony_ci qenergy += V*V; 2272cabdff1aSopenharmony_ci } 2273cabdff1aSopenharmony_ci 2274cabdff1aSopenharmony_ci if (cond3) { 2275cabdff1aSopenharmony_ci if (t4 >= CLIPPED_ESCAPE) { 2276cabdff1aSopenharmony_ci di4 = t4 - CLIPPED_ESCAPE; 2277cabdff1aSopenharmony_ci qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE; 2278cabdff1aSopenharmony_ci } else { 2279cabdff1aSopenharmony_ci di4 = t4 - (V = c4 * cbrtf(c4) * IQ); 2280cabdff1aSopenharmony_ci qenergy += V*V; 2281cabdff1aSopenharmony_ci } 2282cabdff1aSopenharmony_ci } else { 2283cabdff1aSopenharmony_ci di4 = t4 - (V = vec2[1]*IQ); 2284cabdff1aSopenharmony_ci qenergy += V*V; 2285cabdff1aSopenharmony_ci } 2286cabdff1aSopenharmony_ci 2287cabdff1aSopenharmony_ci cost += di1 * di1 + di2 * di2 2288cabdff1aSopenharmony_ci + di3 * di3 + di4 * di4; 2289cabdff1aSopenharmony_ci } 2290cabdff1aSopenharmony_ci 2291cabdff1aSopenharmony_ci if (bits) 2292cabdff1aSopenharmony_ci *bits = curbits; 2293cabdff1aSopenharmony_ci return cost * lambda + curbits; 2294cabdff1aSopenharmony_ci} 2295cabdff1aSopenharmony_ci 2296cabdff1aSopenharmony_cistatic float (*const get_band_cost_arr[])(struct AACEncContext *s, 2297cabdff1aSopenharmony_ci PutBitContext *pb, const float *in, 2298cabdff1aSopenharmony_ci const float *scaled, int size, int scale_idx, 2299cabdff1aSopenharmony_ci int cb, const float lambda, const float uplim, 2300cabdff1aSopenharmony_ci int *bits, float *energy) = { 2301cabdff1aSopenharmony_ci get_band_cost_ZERO_mips, 2302cabdff1aSopenharmony_ci get_band_cost_SQUAD_mips, 2303cabdff1aSopenharmony_ci get_band_cost_SQUAD_mips, 2304cabdff1aSopenharmony_ci get_band_cost_UQUAD_mips, 2305cabdff1aSopenharmony_ci get_band_cost_UQUAD_mips, 2306cabdff1aSopenharmony_ci get_band_cost_SPAIR_mips, 2307cabdff1aSopenharmony_ci get_band_cost_SPAIR_mips, 2308cabdff1aSopenharmony_ci get_band_cost_UPAIR7_mips, 2309cabdff1aSopenharmony_ci get_band_cost_UPAIR7_mips, 2310cabdff1aSopenharmony_ci get_band_cost_UPAIR12_mips, 2311cabdff1aSopenharmony_ci get_band_cost_UPAIR12_mips, 2312cabdff1aSopenharmony_ci get_band_cost_ESC_mips, 2313cabdff1aSopenharmony_ci get_band_cost_NONE_mips, /* cb 12 doesn't exist */ 2314cabdff1aSopenharmony_ci get_band_cost_ZERO_mips, 2315cabdff1aSopenharmony_ci get_band_cost_ZERO_mips, 2316cabdff1aSopenharmony_ci get_band_cost_ZERO_mips, 2317cabdff1aSopenharmony_ci}; 2318cabdff1aSopenharmony_ci 2319cabdff1aSopenharmony_ci#define get_band_cost( \ 2320cabdff1aSopenharmony_ci s, pb, in, scaled, size, scale_idx, cb, \ 2321cabdff1aSopenharmony_ci lambda, uplim, bits, energy) \ 2322cabdff1aSopenharmony_ci get_band_cost_arr[cb]( \ 2323cabdff1aSopenharmony_ci s, pb, in, scaled, size, scale_idx, cb, \ 2324cabdff1aSopenharmony_ci lambda, uplim, bits, energy) 2325cabdff1aSopenharmony_ci 2326cabdff1aSopenharmony_cistatic float quantize_band_cost(struct AACEncContext *s, const float *in, 2327cabdff1aSopenharmony_ci const float *scaled, int size, int scale_idx, 2328cabdff1aSopenharmony_ci int cb, const float lambda, const float uplim, 2329cabdff1aSopenharmony_ci int *bits, float *energy, int rtz) 2330cabdff1aSopenharmony_ci{ 2331cabdff1aSopenharmony_ci return get_band_cost(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits, energy); 2332cabdff1aSopenharmony_ci} 2333cabdff1aSopenharmony_ci 2334cabdff1aSopenharmony_ci#include "libavcodec/aacenc_quantization_misc.h" 2335cabdff1aSopenharmony_ci 2336cabdff1aSopenharmony_ci#include "libavcodec/aaccoder_twoloop.h" 2337cabdff1aSopenharmony_ci 2338cabdff1aSopenharmony_cistatic void search_for_ms_mips(AACEncContext *s, ChannelElement *cpe) 2339cabdff1aSopenharmony_ci{ 2340cabdff1aSopenharmony_ci int start = 0, i, w, w2, g, sid_sf_boost, prev_mid, prev_side; 2341cabdff1aSopenharmony_ci uint8_t nextband0[128], nextband1[128]; 2342cabdff1aSopenharmony_ci float M[128], S[128]; 2343cabdff1aSopenharmony_ci float *L34 = s->scoefs, *R34 = s->scoefs + 128, *M34 = s->scoefs + 128*2, *S34 = s->scoefs + 128*3; 2344cabdff1aSopenharmony_ci const float lambda = s->lambda; 2345cabdff1aSopenharmony_ci const float mslambda = FFMIN(1.0f, lambda / 120.f); 2346cabdff1aSopenharmony_ci SingleChannelElement *sce0 = &cpe->ch[0]; 2347cabdff1aSopenharmony_ci SingleChannelElement *sce1 = &cpe->ch[1]; 2348cabdff1aSopenharmony_ci if (!cpe->common_window) 2349cabdff1aSopenharmony_ci return; 2350cabdff1aSopenharmony_ci 2351cabdff1aSopenharmony_ci /** Scout out next nonzero bands */ 2352cabdff1aSopenharmony_ci ff_init_nextband_map(sce0, nextband0); 2353cabdff1aSopenharmony_ci ff_init_nextband_map(sce1, nextband1); 2354cabdff1aSopenharmony_ci 2355cabdff1aSopenharmony_ci prev_mid = sce0->sf_idx[0]; 2356cabdff1aSopenharmony_ci prev_side = sce1->sf_idx[0]; 2357cabdff1aSopenharmony_ci for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) { 2358cabdff1aSopenharmony_ci start = 0; 2359cabdff1aSopenharmony_ci for (g = 0; g < sce0->ics.num_swb; g++) { 2360cabdff1aSopenharmony_ci float bmax = bval2bmax(g * 17.0f / sce0->ics.num_swb) / 0.0045f; 2361cabdff1aSopenharmony_ci if (!cpe->is_mask[w*16+g]) 2362cabdff1aSopenharmony_ci cpe->ms_mask[w*16+g] = 0; 2363cabdff1aSopenharmony_ci if (!sce0->zeroes[w*16+g] && !sce1->zeroes[w*16+g] && !cpe->is_mask[w*16+g]) { 2364cabdff1aSopenharmony_ci float Mmax = 0.0f, Smax = 0.0f; 2365cabdff1aSopenharmony_ci 2366cabdff1aSopenharmony_ci /* Must compute mid/side SF and book for the whole window group */ 2367cabdff1aSopenharmony_ci for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) { 2368cabdff1aSopenharmony_ci for (i = 0; i < sce0->ics.swb_sizes[g]; i++) { 2369cabdff1aSopenharmony_ci M[i] = (sce0->coeffs[start+(w+w2)*128+i] 2370cabdff1aSopenharmony_ci + sce1->coeffs[start+(w+w2)*128+i]) * 0.5; 2371cabdff1aSopenharmony_ci S[i] = M[i] 2372cabdff1aSopenharmony_ci - sce1->coeffs[start+(w+w2)*128+i]; 2373cabdff1aSopenharmony_ci } 2374cabdff1aSopenharmony_ci abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]); 2375cabdff1aSopenharmony_ci abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]); 2376cabdff1aSopenharmony_ci for (i = 0; i < sce0->ics.swb_sizes[g]; i++ ) { 2377cabdff1aSopenharmony_ci Mmax = FFMAX(Mmax, M34[i]); 2378cabdff1aSopenharmony_ci Smax = FFMAX(Smax, S34[i]); 2379cabdff1aSopenharmony_ci } 2380cabdff1aSopenharmony_ci } 2381cabdff1aSopenharmony_ci 2382cabdff1aSopenharmony_ci for (sid_sf_boost = 0; sid_sf_boost < 4; sid_sf_boost++) { 2383cabdff1aSopenharmony_ci float dist1 = 0.0f, dist2 = 0.0f; 2384cabdff1aSopenharmony_ci int B0 = 0, B1 = 0; 2385cabdff1aSopenharmony_ci int minidx; 2386cabdff1aSopenharmony_ci int mididx, sididx; 2387cabdff1aSopenharmony_ci int midcb, sidcb; 2388cabdff1aSopenharmony_ci 2389cabdff1aSopenharmony_ci minidx = FFMIN(sce0->sf_idx[w*16+g], sce1->sf_idx[w*16+g]); 2390cabdff1aSopenharmony_ci mididx = av_clip(minidx, 0, SCALE_MAX_POS - SCALE_DIV_512); 2391cabdff1aSopenharmony_ci sididx = av_clip(minidx - sid_sf_boost * 3, 0, SCALE_MAX_POS - SCALE_DIV_512); 2392cabdff1aSopenharmony_ci if (sce0->band_type[w*16+g] != NOISE_BT && sce1->band_type[w*16+g] != NOISE_BT 2393cabdff1aSopenharmony_ci && ( !ff_sfdelta_can_replace(sce0, nextband0, prev_mid, mididx, w*16+g) 2394cabdff1aSopenharmony_ci || !ff_sfdelta_can_replace(sce1, nextband1, prev_side, sididx, w*16+g))) { 2395cabdff1aSopenharmony_ci /* scalefactor range violation, bad stuff, will decrease quality unacceptably */ 2396cabdff1aSopenharmony_ci continue; 2397cabdff1aSopenharmony_ci } 2398cabdff1aSopenharmony_ci 2399cabdff1aSopenharmony_ci midcb = find_min_book(Mmax, mididx); 2400cabdff1aSopenharmony_ci sidcb = find_min_book(Smax, sididx); 2401cabdff1aSopenharmony_ci 2402cabdff1aSopenharmony_ci /* No CB can be zero */ 2403cabdff1aSopenharmony_ci midcb = FFMAX(1,midcb); 2404cabdff1aSopenharmony_ci sidcb = FFMAX(1,sidcb); 2405cabdff1aSopenharmony_ci 2406cabdff1aSopenharmony_ci for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) { 2407cabdff1aSopenharmony_ci FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g]; 2408cabdff1aSopenharmony_ci FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g]; 2409cabdff1aSopenharmony_ci float minthr = FFMIN(band0->threshold, band1->threshold); 2410cabdff1aSopenharmony_ci int b1,b2,b3,b4; 2411cabdff1aSopenharmony_ci for (i = 0; i < sce0->ics.swb_sizes[g]; i++) { 2412cabdff1aSopenharmony_ci M[i] = (sce0->coeffs[start+(w+w2)*128+i] 2413cabdff1aSopenharmony_ci + sce1->coeffs[start+(w+w2)*128+i]) * 0.5; 2414cabdff1aSopenharmony_ci S[i] = M[i] 2415cabdff1aSopenharmony_ci - sce1->coeffs[start+(w+w2)*128+i]; 2416cabdff1aSopenharmony_ci } 2417cabdff1aSopenharmony_ci 2418cabdff1aSopenharmony_ci abs_pow34_v(L34, sce0->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]); 2419cabdff1aSopenharmony_ci abs_pow34_v(R34, sce1->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]); 2420cabdff1aSopenharmony_ci abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]); 2421cabdff1aSopenharmony_ci abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]); 2422cabdff1aSopenharmony_ci dist1 += quantize_band_cost(s, &sce0->coeffs[start + (w+w2)*128], 2423cabdff1aSopenharmony_ci L34, 2424cabdff1aSopenharmony_ci sce0->ics.swb_sizes[g], 2425cabdff1aSopenharmony_ci sce0->sf_idx[w*16+g], 2426cabdff1aSopenharmony_ci sce0->band_type[w*16+g], 2427cabdff1aSopenharmony_ci lambda / band0->threshold, INFINITY, &b1, NULL, 0); 2428cabdff1aSopenharmony_ci dist1 += quantize_band_cost(s, &sce1->coeffs[start + (w+w2)*128], 2429cabdff1aSopenharmony_ci R34, 2430cabdff1aSopenharmony_ci sce1->ics.swb_sizes[g], 2431cabdff1aSopenharmony_ci sce1->sf_idx[w*16+g], 2432cabdff1aSopenharmony_ci sce1->band_type[w*16+g], 2433cabdff1aSopenharmony_ci lambda / band1->threshold, INFINITY, &b2, NULL, 0); 2434cabdff1aSopenharmony_ci dist2 += quantize_band_cost(s, M, 2435cabdff1aSopenharmony_ci M34, 2436cabdff1aSopenharmony_ci sce0->ics.swb_sizes[g], 2437cabdff1aSopenharmony_ci mididx, 2438cabdff1aSopenharmony_ci midcb, 2439cabdff1aSopenharmony_ci lambda / minthr, INFINITY, &b3, NULL, 0); 2440cabdff1aSopenharmony_ci dist2 += quantize_band_cost(s, S, 2441cabdff1aSopenharmony_ci S34, 2442cabdff1aSopenharmony_ci sce1->ics.swb_sizes[g], 2443cabdff1aSopenharmony_ci sididx, 2444cabdff1aSopenharmony_ci sidcb, 2445cabdff1aSopenharmony_ci mslambda / (minthr * bmax), INFINITY, &b4, NULL, 0); 2446cabdff1aSopenharmony_ci B0 += b1+b2; 2447cabdff1aSopenharmony_ci B1 += b3+b4; 2448cabdff1aSopenharmony_ci dist1 -= b1+b2; 2449cabdff1aSopenharmony_ci dist2 -= b3+b4; 2450cabdff1aSopenharmony_ci } 2451cabdff1aSopenharmony_ci cpe->ms_mask[w*16+g] = dist2 <= dist1 && B1 < B0; 2452cabdff1aSopenharmony_ci if (cpe->ms_mask[w*16+g]) { 2453cabdff1aSopenharmony_ci if (sce0->band_type[w*16+g] != NOISE_BT && sce1->band_type[w*16+g] != NOISE_BT) { 2454cabdff1aSopenharmony_ci sce0->sf_idx[w*16+g] = mididx; 2455cabdff1aSopenharmony_ci sce1->sf_idx[w*16+g] = sididx; 2456cabdff1aSopenharmony_ci sce0->band_type[w*16+g] = midcb; 2457cabdff1aSopenharmony_ci sce1->band_type[w*16+g] = sidcb; 2458cabdff1aSopenharmony_ci } else if ((sce0->band_type[w*16+g] != NOISE_BT) ^ (sce1->band_type[w*16+g] != NOISE_BT)) { 2459cabdff1aSopenharmony_ci /* ms_mask unneeded, and it confuses some decoders */ 2460cabdff1aSopenharmony_ci cpe->ms_mask[w*16+g] = 0; 2461cabdff1aSopenharmony_ci } 2462cabdff1aSopenharmony_ci break; 2463cabdff1aSopenharmony_ci } else if (B1 > B0) { 2464cabdff1aSopenharmony_ci /* More boost won't fix this */ 2465cabdff1aSopenharmony_ci break; 2466cabdff1aSopenharmony_ci } 2467cabdff1aSopenharmony_ci } 2468cabdff1aSopenharmony_ci } 2469cabdff1aSopenharmony_ci if (!sce0->zeroes[w*16+g] && sce0->band_type[w*16+g] < RESERVED_BT) 2470cabdff1aSopenharmony_ci prev_mid = sce0->sf_idx[w*16+g]; 2471cabdff1aSopenharmony_ci if (!sce1->zeroes[w*16+g] && !cpe->is_mask[w*16+g] && sce1->band_type[w*16+g] < RESERVED_BT) 2472cabdff1aSopenharmony_ci prev_side = sce1->sf_idx[w*16+g]; 2473cabdff1aSopenharmony_ci start += sce0->ics.swb_sizes[g]; 2474cabdff1aSopenharmony_ci } 2475cabdff1aSopenharmony_ci } 2476cabdff1aSopenharmony_ci} 2477cabdff1aSopenharmony_ci#endif /*HAVE_MIPSFPU */ 2478cabdff1aSopenharmony_ci 2479cabdff1aSopenharmony_ci#include "libavcodec/aaccoder_trellis.h" 2480cabdff1aSopenharmony_ci 2481cabdff1aSopenharmony_ci#endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */ 2482cabdff1aSopenharmony_ci#endif /* HAVE_INLINE_ASM */ 2483cabdff1aSopenharmony_ci 2484cabdff1aSopenharmony_civoid ff_aac_coder_init_mips(AACEncContext *c) { 2485cabdff1aSopenharmony_ci#if HAVE_INLINE_ASM 2486cabdff1aSopenharmony_ci#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6 2487cabdff1aSopenharmony_ci AACCoefficientsEncoder *e = c->coder; 2488cabdff1aSopenharmony_ci int option = c->options.coder; 2489cabdff1aSopenharmony_ci 2490cabdff1aSopenharmony_ci if (option == 2) { 2491cabdff1aSopenharmony_ci e->quantize_and_encode_band = quantize_and_encode_band_mips; 2492cabdff1aSopenharmony_ci e->encode_window_bands_info = codebook_trellis_rate; 2493cabdff1aSopenharmony_ci#if HAVE_MIPSFPU 2494cabdff1aSopenharmony_ci e->search_for_quantizers = search_for_quantizers_twoloop; 2495cabdff1aSopenharmony_ci#endif /* HAVE_MIPSFPU */ 2496cabdff1aSopenharmony_ci } 2497cabdff1aSopenharmony_ci#if HAVE_MIPSFPU 2498cabdff1aSopenharmony_ci e->search_for_ms = search_for_ms_mips; 2499cabdff1aSopenharmony_ci#endif /* HAVE_MIPSFPU */ 2500cabdff1aSopenharmony_ci#endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */ 2501cabdff1aSopenharmony_ci#endif /* HAVE_INLINE_ASM */ 2502cabdff1aSopenharmony_ci} 2503