1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * Copyright (c) 2012 3cabdff1aSopenharmony_ci * MIPS Technologies, Inc., California. 4cabdff1aSopenharmony_ci * 5cabdff1aSopenharmony_ci * Redistribution and use in source and binary forms, with or without 6cabdff1aSopenharmony_ci * modification, are permitted provided that the following conditions 7cabdff1aSopenharmony_ci * are met: 8cabdff1aSopenharmony_ci * 1. Redistributions of source code must retain the above copyright 9cabdff1aSopenharmony_ci * notice, this list of conditions and the following disclaimer. 10cabdff1aSopenharmony_ci * 2. Redistributions in binary form must reproduce the above copyright 11cabdff1aSopenharmony_ci * notice, this list of conditions and the following disclaimer in the 12cabdff1aSopenharmony_ci * documentation and/or other materials provided with the distribution. 13cabdff1aSopenharmony_ci * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its 14cabdff1aSopenharmony_ci * contributors may be used to endorse or promote products derived from 15cabdff1aSopenharmony_ci * this software without specific prior written permission. 16cabdff1aSopenharmony_ci * 17cabdff1aSopenharmony_ci * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND 18cabdff1aSopenharmony_ci * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19cabdff1aSopenharmony_ci * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20cabdff1aSopenharmony_ci * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE 21cabdff1aSopenharmony_ci * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22cabdff1aSopenharmony_ci * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23cabdff1aSopenharmony_ci * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24cabdff1aSopenharmony_ci * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25cabdff1aSopenharmony_ci * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26cabdff1aSopenharmony_ci * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27cabdff1aSopenharmony_ci * SUCH DAMAGE. 28cabdff1aSopenharmony_ci * 29cabdff1aSopenharmony_ci * Authors: Darko Laus (darko@mips.com) 30cabdff1aSopenharmony_ci * Djordje Pesut (djordje@mips.com) 31cabdff1aSopenharmony_ci * Mirjana Vulin (mvulin@mips.com) 32cabdff1aSopenharmony_ci * 33cabdff1aSopenharmony_ci * This file is part of FFmpeg. 34cabdff1aSopenharmony_ci * 35cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 36cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 37cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 38cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 39cabdff1aSopenharmony_ci * 40cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 41cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 42cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 43cabdff1aSopenharmony_ci * Lesser General Public License for more details. 44cabdff1aSopenharmony_ci * 45cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 46cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 47cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 48cabdff1aSopenharmony_ci */ 49cabdff1aSopenharmony_ci 50cabdff1aSopenharmony_ci/** 51cabdff1aSopenharmony_ci * @file 52cabdff1aSopenharmony_ci * Reference: libavcodec/aacdec.c 53cabdff1aSopenharmony_ci */ 54cabdff1aSopenharmony_ci 55cabdff1aSopenharmony_ci#include "libavutil/attributes.h" 56cabdff1aSopenharmony_ci#include "libavcodec/aac.h" 57cabdff1aSopenharmony_ci#include "aacdec_mips.h" 58cabdff1aSopenharmony_ci#include "libavcodec/aactab.h" 59cabdff1aSopenharmony_ci#include "libavcodec/sinewin.h" 60cabdff1aSopenharmony_ci#include "libavutil/mips/asmdefs.h" 61cabdff1aSopenharmony_ci 62cabdff1aSopenharmony_ci#if HAVE_INLINE_ASM 63cabdff1aSopenharmony_ci#if HAVE_MIPSFPU 64cabdff1aSopenharmony_cistatic av_always_inline void float_copy(float *dst, const float *src, int count) 65cabdff1aSopenharmony_ci{ 66cabdff1aSopenharmony_ci // Copy 'count' floats from src to dst 67cabdff1aSopenharmony_ci const float *loop_end = src + count; 68cabdff1aSopenharmony_ci int temp[8]; 69cabdff1aSopenharmony_ci 70cabdff1aSopenharmony_ci // count must be a multiple of 8 71cabdff1aSopenharmony_ci av_assert2(count % 8 == 0); 72cabdff1aSopenharmony_ci 73cabdff1aSopenharmony_ci // loop unrolled 8 times 74cabdff1aSopenharmony_ci __asm__ volatile ( 75cabdff1aSopenharmony_ci ".set push \n\t" 76cabdff1aSopenharmony_ci ".set noreorder \n\t" 77cabdff1aSopenharmony_ci "1: \n\t" 78cabdff1aSopenharmony_ci "lw %[temp0], 0(%[src]) \n\t" 79cabdff1aSopenharmony_ci "lw %[temp1], 4(%[src]) \n\t" 80cabdff1aSopenharmony_ci "lw %[temp2], 8(%[src]) \n\t" 81cabdff1aSopenharmony_ci "lw %[temp3], 12(%[src]) \n\t" 82cabdff1aSopenharmony_ci "lw %[temp4], 16(%[src]) \n\t" 83cabdff1aSopenharmony_ci "lw %[temp5], 20(%[src]) \n\t" 84cabdff1aSopenharmony_ci "lw %[temp6], 24(%[src]) \n\t" 85cabdff1aSopenharmony_ci "lw %[temp7], 28(%[src]) \n\t" 86cabdff1aSopenharmony_ci PTR_ADDIU "%[src], %[src], 32 \n\t" 87cabdff1aSopenharmony_ci "sw %[temp0], 0(%[dst]) \n\t" 88cabdff1aSopenharmony_ci "sw %[temp1], 4(%[dst]) \n\t" 89cabdff1aSopenharmony_ci "sw %[temp2], 8(%[dst]) \n\t" 90cabdff1aSopenharmony_ci "sw %[temp3], 12(%[dst]) \n\t" 91cabdff1aSopenharmony_ci "sw %[temp4], 16(%[dst]) \n\t" 92cabdff1aSopenharmony_ci "sw %[temp5], 20(%[dst]) \n\t" 93cabdff1aSopenharmony_ci "sw %[temp6], 24(%[dst]) \n\t" 94cabdff1aSopenharmony_ci "sw %[temp7], 28(%[dst]) \n\t" 95cabdff1aSopenharmony_ci "bne %[src], %[loop_end], 1b \n\t" 96cabdff1aSopenharmony_ci PTR_ADDIU "%[dst], %[dst], 32 \n\t" 97cabdff1aSopenharmony_ci ".set pop \n\t" 98cabdff1aSopenharmony_ci 99cabdff1aSopenharmony_ci : [temp0]"=&r"(temp[0]), [temp1]"=&r"(temp[1]), 100cabdff1aSopenharmony_ci [temp2]"=&r"(temp[2]), [temp3]"=&r"(temp[3]), 101cabdff1aSopenharmony_ci [temp4]"=&r"(temp[4]), [temp5]"=&r"(temp[5]), 102cabdff1aSopenharmony_ci [temp6]"=&r"(temp[6]), [temp7]"=&r"(temp[7]), 103cabdff1aSopenharmony_ci [src]"+r"(src), [dst]"+r"(dst) 104cabdff1aSopenharmony_ci : [loop_end]"r"(loop_end) 105cabdff1aSopenharmony_ci : "memory" 106cabdff1aSopenharmony_ci ); 107cabdff1aSopenharmony_ci} 108cabdff1aSopenharmony_ci 109cabdff1aSopenharmony_cistatic av_always_inline int lcg_random(unsigned previous_val) 110cabdff1aSopenharmony_ci{ 111cabdff1aSopenharmony_ci union { unsigned u; int s; } v = { previous_val * 1664525u + 1013904223 }; 112cabdff1aSopenharmony_ci return v.s; 113cabdff1aSopenharmony_ci} 114cabdff1aSopenharmony_ci 115cabdff1aSopenharmony_cistatic void imdct_and_windowing_mips(AACContext *ac, SingleChannelElement *sce) 116cabdff1aSopenharmony_ci{ 117cabdff1aSopenharmony_ci IndividualChannelStream *ics = &sce->ics; 118cabdff1aSopenharmony_ci float *in = sce->coeffs; 119cabdff1aSopenharmony_ci float *out = sce->ret; 120cabdff1aSopenharmony_ci float *saved = sce->saved; 121cabdff1aSopenharmony_ci const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128; 122cabdff1aSopenharmony_ci const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024; 123cabdff1aSopenharmony_ci const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128; 124cabdff1aSopenharmony_ci float *buf = ac->buf_mdct; 125cabdff1aSopenharmony_ci int i; 126cabdff1aSopenharmony_ci 127cabdff1aSopenharmony_ci if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { 128cabdff1aSopenharmony_ci for (i = 0; i < 1024; i += 128) 129cabdff1aSopenharmony_ci ac->mdct_small.imdct_half(&ac->mdct_small, buf + i, in + i); 130cabdff1aSopenharmony_ci } else 131cabdff1aSopenharmony_ci ac->mdct.imdct_half(&ac->mdct, buf, in); 132cabdff1aSopenharmony_ci 133cabdff1aSopenharmony_ci /* window overlapping 134cabdff1aSopenharmony_ci * NOTE: To simplify the overlapping code, all 'meaningless' short to long 135cabdff1aSopenharmony_ci * and long to short transitions are considered to be short to short 136cabdff1aSopenharmony_ci * transitions. This leaves just two cases (long to long and short to short) 137cabdff1aSopenharmony_ci * with a little special sauce for EIGHT_SHORT_SEQUENCE. 138cabdff1aSopenharmony_ci */ 139cabdff1aSopenharmony_ci if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) && 140cabdff1aSopenharmony_ci (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) { 141cabdff1aSopenharmony_ci ac->fdsp->vector_fmul_window( out, saved, buf, lwindow_prev, 512); 142cabdff1aSopenharmony_ci } else { 143cabdff1aSopenharmony_ci float_copy(out, saved, 448); 144cabdff1aSopenharmony_ci 145cabdff1aSopenharmony_ci if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { 146cabdff1aSopenharmony_ci { 147cabdff1aSopenharmony_ci float wi; 148cabdff1aSopenharmony_ci float wj; 149cabdff1aSopenharmony_ci int i; 150cabdff1aSopenharmony_ci float temp0, temp1, temp2, temp3; 151cabdff1aSopenharmony_ci float *dst0 = out + 448 + 0*128; 152cabdff1aSopenharmony_ci float *dst1 = dst0 + 64 + 63; 153cabdff1aSopenharmony_ci float *dst2 = saved + 63; 154cabdff1aSopenharmony_ci float *win0 = (float*)swindow; 155cabdff1aSopenharmony_ci float *win1 = win0 + 64 + 63; 156cabdff1aSopenharmony_ci float *win0_prev = (float*)swindow_prev; 157cabdff1aSopenharmony_ci float *win1_prev = win0_prev + 64 + 63; 158cabdff1aSopenharmony_ci float *src0_prev = saved + 448; 159cabdff1aSopenharmony_ci float *src1_prev = buf + 0*128 + 63; 160cabdff1aSopenharmony_ci float *src0 = buf + 0*128 + 64; 161cabdff1aSopenharmony_ci float *src1 = buf + 1*128 + 63; 162cabdff1aSopenharmony_ci 163cabdff1aSopenharmony_ci for(i = 0; i < 64; i++) 164cabdff1aSopenharmony_ci { 165cabdff1aSopenharmony_ci temp0 = src0_prev[0]; 166cabdff1aSopenharmony_ci temp1 = src1_prev[0]; 167cabdff1aSopenharmony_ci wi = *win0_prev; 168cabdff1aSopenharmony_ci wj = *win1_prev; 169cabdff1aSopenharmony_ci temp2 = src0[0]; 170cabdff1aSopenharmony_ci temp3 = src1[0]; 171cabdff1aSopenharmony_ci dst0[0] = temp0 * wj - temp1 * wi; 172cabdff1aSopenharmony_ci dst1[0] = temp0 * wi + temp1 * wj; 173cabdff1aSopenharmony_ci 174cabdff1aSopenharmony_ci wi = *win0; 175cabdff1aSopenharmony_ci wj = *win1; 176cabdff1aSopenharmony_ci 177cabdff1aSopenharmony_ci temp0 = src0[128]; 178cabdff1aSopenharmony_ci temp1 = src1[128]; 179cabdff1aSopenharmony_ci dst0[128] = temp2 * wj - temp3 * wi; 180cabdff1aSopenharmony_ci dst1[128] = temp2 * wi + temp3 * wj; 181cabdff1aSopenharmony_ci 182cabdff1aSopenharmony_ci temp2 = src0[256]; 183cabdff1aSopenharmony_ci temp3 = src1[256]; 184cabdff1aSopenharmony_ci dst0[256] = temp0 * wj - temp1 * wi; 185cabdff1aSopenharmony_ci dst1[256] = temp0 * wi + temp1 * wj; 186cabdff1aSopenharmony_ci dst0[384] = temp2 * wj - temp3 * wi; 187cabdff1aSopenharmony_ci dst1[384] = temp2 * wi + temp3 * wj; 188cabdff1aSopenharmony_ci 189cabdff1aSopenharmony_ci temp0 = src0[384]; 190cabdff1aSopenharmony_ci temp1 = src1[384]; 191cabdff1aSopenharmony_ci dst0[512] = temp0 * wj - temp1 * wi; 192cabdff1aSopenharmony_ci dst2[0] = temp0 * wi + temp1 * wj; 193cabdff1aSopenharmony_ci 194cabdff1aSopenharmony_ci src0++; 195cabdff1aSopenharmony_ci src1--; 196cabdff1aSopenharmony_ci src0_prev++; 197cabdff1aSopenharmony_ci src1_prev--; 198cabdff1aSopenharmony_ci win0++; 199cabdff1aSopenharmony_ci win1--; 200cabdff1aSopenharmony_ci win0_prev++; 201cabdff1aSopenharmony_ci win1_prev--; 202cabdff1aSopenharmony_ci dst0++; 203cabdff1aSopenharmony_ci dst1--; 204cabdff1aSopenharmony_ci dst2--; 205cabdff1aSopenharmony_ci } 206cabdff1aSopenharmony_ci } 207cabdff1aSopenharmony_ci } else { 208cabdff1aSopenharmony_ci ac->fdsp->vector_fmul_window(out + 448, saved + 448, buf, swindow_prev, 64); 209cabdff1aSopenharmony_ci float_copy(out + 576, buf + 64, 448); 210cabdff1aSopenharmony_ci } 211cabdff1aSopenharmony_ci } 212cabdff1aSopenharmony_ci 213cabdff1aSopenharmony_ci // buffer update 214cabdff1aSopenharmony_ci if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { 215cabdff1aSopenharmony_ci ac->fdsp->vector_fmul_window(saved + 64, buf + 4*128 + 64, buf + 5*128, swindow, 64); 216cabdff1aSopenharmony_ci ac->fdsp->vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 64); 217cabdff1aSopenharmony_ci ac->fdsp->vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 64); 218cabdff1aSopenharmony_ci float_copy(saved + 448, buf + 7*128 + 64, 64); 219cabdff1aSopenharmony_ci } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) { 220cabdff1aSopenharmony_ci float_copy(saved, buf + 512, 448); 221cabdff1aSopenharmony_ci float_copy(saved + 448, buf + 7*128 + 64, 64); 222cabdff1aSopenharmony_ci } else { // LONG_STOP or ONLY_LONG 223cabdff1aSopenharmony_ci float_copy(saved, buf + 512, 512); 224cabdff1aSopenharmony_ci } 225cabdff1aSopenharmony_ci} 226cabdff1aSopenharmony_ci 227cabdff1aSopenharmony_cistatic void apply_ltp_mips(AACContext *ac, SingleChannelElement *sce) 228cabdff1aSopenharmony_ci{ 229cabdff1aSopenharmony_ci const LongTermPrediction *ltp = &sce->ics.ltp; 230cabdff1aSopenharmony_ci const uint16_t *offsets = sce->ics.swb_offset; 231cabdff1aSopenharmony_ci int i, sfb; 232cabdff1aSopenharmony_ci int j, k; 233cabdff1aSopenharmony_ci 234cabdff1aSopenharmony_ci if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) { 235cabdff1aSopenharmony_ci float *predTime = sce->ret; 236cabdff1aSopenharmony_ci float *predFreq = ac->buf_mdct; 237cabdff1aSopenharmony_ci float *p_predTime; 238cabdff1aSopenharmony_ci int16_t num_samples = 2048; 239cabdff1aSopenharmony_ci 240cabdff1aSopenharmony_ci if (ltp->lag < 1024) 241cabdff1aSopenharmony_ci num_samples = ltp->lag + 1024; 242cabdff1aSopenharmony_ci j = (2048 - num_samples) >> 2; 243cabdff1aSopenharmony_ci k = (2048 - num_samples) & 3; 244cabdff1aSopenharmony_ci p_predTime = &predTime[num_samples]; 245cabdff1aSopenharmony_ci 246cabdff1aSopenharmony_ci for (i = 0; i < num_samples; i++) 247cabdff1aSopenharmony_ci predTime[i] = sce->ltp_state[i + 2048 - ltp->lag] * ltp->coef; 248cabdff1aSopenharmony_ci for (i = 0; i < j; i++) { 249cabdff1aSopenharmony_ci 250cabdff1aSopenharmony_ci /* loop unrolled 4 times */ 251cabdff1aSopenharmony_ci __asm__ volatile ( 252cabdff1aSopenharmony_ci "sw $0, 0(%[p_predTime]) \n\t" 253cabdff1aSopenharmony_ci "sw $0, 4(%[p_predTime]) \n\t" 254cabdff1aSopenharmony_ci "sw $0, 8(%[p_predTime]) \n\t" 255cabdff1aSopenharmony_ci "sw $0, 12(%[p_predTime]) \n\t" 256cabdff1aSopenharmony_ci PTR_ADDIU "%[p_predTime], %[p_predTime], 16 \n\t" 257cabdff1aSopenharmony_ci 258cabdff1aSopenharmony_ci : [p_predTime]"+r"(p_predTime) 259cabdff1aSopenharmony_ci : 260cabdff1aSopenharmony_ci : "memory" 261cabdff1aSopenharmony_ci ); 262cabdff1aSopenharmony_ci } 263cabdff1aSopenharmony_ci for (i = 0; i < k; i++) { 264cabdff1aSopenharmony_ci 265cabdff1aSopenharmony_ci __asm__ volatile ( 266cabdff1aSopenharmony_ci "sw $0, 0(%[p_predTime]) \n\t" 267cabdff1aSopenharmony_ci PTR_ADDIU "%[p_predTime], %[p_predTime], 4 \n\t" 268cabdff1aSopenharmony_ci 269cabdff1aSopenharmony_ci : [p_predTime]"+r"(p_predTime) 270cabdff1aSopenharmony_ci : 271cabdff1aSopenharmony_ci : "memory" 272cabdff1aSopenharmony_ci ); 273cabdff1aSopenharmony_ci } 274cabdff1aSopenharmony_ci 275cabdff1aSopenharmony_ci ac->windowing_and_mdct_ltp(ac, predFreq, predTime, &sce->ics); 276cabdff1aSopenharmony_ci 277cabdff1aSopenharmony_ci if (sce->tns.present) 278cabdff1aSopenharmony_ci ac->apply_tns(predFreq, &sce->tns, &sce->ics, 0); 279cabdff1aSopenharmony_ci 280cabdff1aSopenharmony_ci for (sfb = 0; sfb < FFMIN(sce->ics.max_sfb, MAX_LTP_LONG_SFB); sfb++) 281cabdff1aSopenharmony_ci if (ltp->used[sfb]) 282cabdff1aSopenharmony_ci for (i = offsets[sfb]; i < offsets[sfb + 1]; i++) 283cabdff1aSopenharmony_ci sce->coeffs[i] += predFreq[i]; 284cabdff1aSopenharmony_ci } 285cabdff1aSopenharmony_ci} 286cabdff1aSopenharmony_ci 287cabdff1aSopenharmony_cistatic av_always_inline void fmul_and_reverse(float *dst, const float *src0, const float *src1, int count) 288cabdff1aSopenharmony_ci{ 289cabdff1aSopenharmony_ci /* Multiply 'count' floats in src0 by src1 and store the results in dst in reverse */ 290cabdff1aSopenharmony_ci /* This should be equivalent to a normal fmul, followed by reversing dst */ 291cabdff1aSopenharmony_ci 292cabdff1aSopenharmony_ci // count must be a multiple of 4 293cabdff1aSopenharmony_ci av_assert2(count % 4 == 0); 294cabdff1aSopenharmony_ci 295cabdff1aSopenharmony_ci // move src0 and src1 to the last element of their arrays 296cabdff1aSopenharmony_ci src0 += count - 1; 297cabdff1aSopenharmony_ci src1 += count - 1; 298cabdff1aSopenharmony_ci 299cabdff1aSopenharmony_ci for (; count > 0; count -= 4){ 300cabdff1aSopenharmony_ci float temp[12]; 301cabdff1aSopenharmony_ci 302cabdff1aSopenharmony_ci /* loop unrolled 4 times */ 303cabdff1aSopenharmony_ci __asm__ volatile ( 304cabdff1aSopenharmony_ci "lwc1 %[temp0], 0(%[ptr2]) \n\t" 305cabdff1aSopenharmony_ci "lwc1 %[temp1], -4(%[ptr2]) \n\t" 306cabdff1aSopenharmony_ci "lwc1 %[temp2], -8(%[ptr2]) \n\t" 307cabdff1aSopenharmony_ci "lwc1 %[temp3], -12(%[ptr2]) \n\t" 308cabdff1aSopenharmony_ci "lwc1 %[temp4], 0(%[ptr3]) \n\t" 309cabdff1aSopenharmony_ci "lwc1 %[temp5], -4(%[ptr3]) \n\t" 310cabdff1aSopenharmony_ci "lwc1 %[temp6], -8(%[ptr3]) \n\t" 311cabdff1aSopenharmony_ci "lwc1 %[temp7], -12(%[ptr3]) \n\t" 312cabdff1aSopenharmony_ci "mul.s %[temp8], %[temp0], %[temp4] \n\t" 313cabdff1aSopenharmony_ci "mul.s %[temp9], %[temp1], %[temp5] \n\t" 314cabdff1aSopenharmony_ci "mul.s %[temp10], %[temp2], %[temp6] \n\t" 315cabdff1aSopenharmony_ci "mul.s %[temp11], %[temp3], %[temp7] \n\t" 316cabdff1aSopenharmony_ci "swc1 %[temp8], 0(%[ptr1]) \n\t" 317cabdff1aSopenharmony_ci "swc1 %[temp9], 4(%[ptr1]) \n\t" 318cabdff1aSopenharmony_ci "swc1 %[temp10], 8(%[ptr1]) \n\t" 319cabdff1aSopenharmony_ci "swc1 %[temp11], 12(%[ptr1]) \n\t" 320cabdff1aSopenharmony_ci PTR_ADDIU "%[ptr1], %[ptr1], 16 \n\t" 321cabdff1aSopenharmony_ci PTR_ADDIU "%[ptr2], %[ptr2], -16 \n\t" 322cabdff1aSopenharmony_ci PTR_ADDIU "%[ptr3], %[ptr3], -16 \n\t" 323cabdff1aSopenharmony_ci 324cabdff1aSopenharmony_ci : [temp0]"=&f"(temp[0]), [temp1]"=&f"(temp[1]), 325cabdff1aSopenharmony_ci [temp2]"=&f"(temp[2]), [temp3]"=&f"(temp[3]), 326cabdff1aSopenharmony_ci [temp4]"=&f"(temp[4]), [temp5]"=&f"(temp[5]), 327cabdff1aSopenharmony_ci [temp6]"=&f"(temp[6]), [temp7]"=&f"(temp[7]), 328cabdff1aSopenharmony_ci [temp8]"=&f"(temp[8]), [temp9]"=&f"(temp[9]), 329cabdff1aSopenharmony_ci [temp10]"=&f"(temp[10]), [temp11]"=&f"(temp[11]), 330cabdff1aSopenharmony_ci [ptr1]"+r"(dst), [ptr2]"+r"(src0), [ptr3]"+r"(src1) 331cabdff1aSopenharmony_ci : 332cabdff1aSopenharmony_ci : "memory" 333cabdff1aSopenharmony_ci ); 334cabdff1aSopenharmony_ci } 335cabdff1aSopenharmony_ci} 336cabdff1aSopenharmony_ci 337cabdff1aSopenharmony_cistatic void update_ltp_mips(AACContext *ac, SingleChannelElement *sce) 338cabdff1aSopenharmony_ci{ 339cabdff1aSopenharmony_ci IndividualChannelStream *ics = &sce->ics; 340cabdff1aSopenharmony_ci float *saved = sce->saved; 341cabdff1aSopenharmony_ci float *saved_ltp = sce->coeffs; 342cabdff1aSopenharmony_ci const float *lwindow = ics->use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024; 343cabdff1aSopenharmony_ci const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128; 344cabdff1aSopenharmony_ci uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; 345cabdff1aSopenharmony_ci 346cabdff1aSopenharmony_ci if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { 347cabdff1aSopenharmony_ci float *p_saved_ltp = saved_ltp + 576; 348cabdff1aSopenharmony_ci float *loop_end1 = p_saved_ltp + 448; 349cabdff1aSopenharmony_ci 350cabdff1aSopenharmony_ci float_copy(saved_ltp, saved, 512); 351cabdff1aSopenharmony_ci 352cabdff1aSopenharmony_ci /* loop unrolled 8 times */ 353cabdff1aSopenharmony_ci __asm__ volatile ( 354cabdff1aSopenharmony_ci "1: \n\t" 355cabdff1aSopenharmony_ci "sw $0, 0(%[p_saved_ltp]) \n\t" 356cabdff1aSopenharmony_ci "sw $0, 4(%[p_saved_ltp]) \n\t" 357cabdff1aSopenharmony_ci "sw $0, 8(%[p_saved_ltp]) \n\t" 358cabdff1aSopenharmony_ci "sw $0, 12(%[p_saved_ltp]) \n\t" 359cabdff1aSopenharmony_ci "sw $0, 16(%[p_saved_ltp]) \n\t" 360cabdff1aSopenharmony_ci "sw $0, 20(%[p_saved_ltp]) \n\t" 361cabdff1aSopenharmony_ci "sw $0, 24(%[p_saved_ltp]) \n\t" 362cabdff1aSopenharmony_ci "sw $0, 28(%[p_saved_ltp]) \n\t" 363cabdff1aSopenharmony_ci PTR_ADDIU "%[p_saved_ltp],%[p_saved_ltp], 32 \n\t" 364cabdff1aSopenharmony_ci "bne %[p_saved_ltp], %[loop_end1], 1b \n\t" 365cabdff1aSopenharmony_ci 366cabdff1aSopenharmony_ci : [p_saved_ltp]"+r"(p_saved_ltp) 367cabdff1aSopenharmony_ci : [loop_end1]"r"(loop_end1) 368cabdff1aSopenharmony_ci : "memory" 369cabdff1aSopenharmony_ci ); 370cabdff1aSopenharmony_ci 371cabdff1aSopenharmony_ci ac->fdsp->vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960, &swindow[64], 64); 372cabdff1aSopenharmony_ci fmul_and_reverse(saved_ltp + 512, ac->buf_mdct + 960, swindow, 64); 373cabdff1aSopenharmony_ci } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) { 374cabdff1aSopenharmony_ci float *buff0 = saved; 375cabdff1aSopenharmony_ci float *buff1 = saved_ltp; 376cabdff1aSopenharmony_ci float *loop_end = saved + 448; 377cabdff1aSopenharmony_ci 378cabdff1aSopenharmony_ci /* loop unrolled 8 times */ 379cabdff1aSopenharmony_ci __asm__ volatile ( 380cabdff1aSopenharmony_ci ".set push \n\t" 381cabdff1aSopenharmony_ci ".set noreorder \n\t" 382cabdff1aSopenharmony_ci "1: \n\t" 383cabdff1aSopenharmony_ci "lw %[temp0], 0(%[src]) \n\t" 384cabdff1aSopenharmony_ci "lw %[temp1], 4(%[src]) \n\t" 385cabdff1aSopenharmony_ci "lw %[temp2], 8(%[src]) \n\t" 386cabdff1aSopenharmony_ci "lw %[temp3], 12(%[src]) \n\t" 387cabdff1aSopenharmony_ci "lw %[temp4], 16(%[src]) \n\t" 388cabdff1aSopenharmony_ci "lw %[temp5], 20(%[src]) \n\t" 389cabdff1aSopenharmony_ci "lw %[temp6], 24(%[src]) \n\t" 390cabdff1aSopenharmony_ci "lw %[temp7], 28(%[src]) \n\t" 391cabdff1aSopenharmony_ci PTR_ADDIU "%[src], %[src], 32 \n\t" 392cabdff1aSopenharmony_ci "sw %[temp0], 0(%[dst]) \n\t" 393cabdff1aSopenharmony_ci "sw %[temp1], 4(%[dst]) \n\t" 394cabdff1aSopenharmony_ci "sw %[temp2], 8(%[dst]) \n\t" 395cabdff1aSopenharmony_ci "sw %[temp3], 12(%[dst]) \n\t" 396cabdff1aSopenharmony_ci "sw %[temp4], 16(%[dst]) \n\t" 397cabdff1aSopenharmony_ci "sw %[temp5], 20(%[dst]) \n\t" 398cabdff1aSopenharmony_ci "sw %[temp6], 24(%[dst]) \n\t" 399cabdff1aSopenharmony_ci "sw %[temp7], 28(%[dst]) \n\t" 400cabdff1aSopenharmony_ci "sw $0, 2304(%[dst]) \n\t" 401cabdff1aSopenharmony_ci "sw $0, 2308(%[dst]) \n\t" 402cabdff1aSopenharmony_ci "sw $0, 2312(%[dst]) \n\t" 403cabdff1aSopenharmony_ci "sw $0, 2316(%[dst]) \n\t" 404cabdff1aSopenharmony_ci "sw $0, 2320(%[dst]) \n\t" 405cabdff1aSopenharmony_ci "sw $0, 2324(%[dst]) \n\t" 406cabdff1aSopenharmony_ci "sw $0, 2328(%[dst]) \n\t" 407cabdff1aSopenharmony_ci "sw $0, 2332(%[dst]) \n\t" 408cabdff1aSopenharmony_ci "bne %[src], %[loop_end], 1b \n\t" 409cabdff1aSopenharmony_ci PTR_ADDIU "%[dst], %[dst], 32 \n\t" 410cabdff1aSopenharmony_ci ".set pop \n\t" 411cabdff1aSopenharmony_ci 412cabdff1aSopenharmony_ci : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), 413cabdff1aSopenharmony_ci [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), 414cabdff1aSopenharmony_ci [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), 415cabdff1aSopenharmony_ci [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), 416cabdff1aSopenharmony_ci [src]"+r"(buff0), [dst]"+r"(buff1) 417cabdff1aSopenharmony_ci : [loop_end]"r"(loop_end) 418cabdff1aSopenharmony_ci : "memory" 419cabdff1aSopenharmony_ci ); 420cabdff1aSopenharmony_ci ac->fdsp->vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960, &swindow[64], 64); 421cabdff1aSopenharmony_ci fmul_and_reverse(saved_ltp + 512, ac->buf_mdct + 960, swindow, 64); 422cabdff1aSopenharmony_ci } else { // LONG_STOP or ONLY_LONG 423cabdff1aSopenharmony_ci ac->fdsp->vector_fmul_reverse(saved_ltp, ac->buf_mdct + 512, &lwindow[512], 512); 424cabdff1aSopenharmony_ci fmul_and_reverse(saved_ltp + 512, ac->buf_mdct + 512, lwindow, 512); 425cabdff1aSopenharmony_ci } 426cabdff1aSopenharmony_ci 427cabdff1aSopenharmony_ci float_copy(sce->ltp_state, sce->ltp_state + 1024, 1024); 428cabdff1aSopenharmony_ci float_copy(sce->ltp_state + 1024, sce->ret, 1024); 429cabdff1aSopenharmony_ci float_copy(sce->ltp_state + 2048, saved_ltp, 1024); 430cabdff1aSopenharmony_ci} 431cabdff1aSopenharmony_ci#endif /* HAVE_MIPSFPU */ 432cabdff1aSopenharmony_ci#endif /* HAVE_INLINE_ASM */ 433cabdff1aSopenharmony_ci 434cabdff1aSopenharmony_civoid ff_aacdec_init_mips(AACContext *c) 435cabdff1aSopenharmony_ci{ 436cabdff1aSopenharmony_ci#if HAVE_INLINE_ASM 437cabdff1aSopenharmony_ci#if HAVE_MIPSFPU 438cabdff1aSopenharmony_ci c->imdct_and_windowing = imdct_and_windowing_mips; 439cabdff1aSopenharmony_ci c->apply_ltp = apply_ltp_mips; 440cabdff1aSopenharmony_ci c->update_ltp = update_ltp_mips; 441cabdff1aSopenharmony_ci#endif /* HAVE_MIPSFPU */ 442cabdff1aSopenharmony_ci#endif /* HAVE_INLINE_ASM */ 443cabdff1aSopenharmony_ci} 444