1/* 2 * Copyright (c) 2010 Alex Converse <alex.converse@gmail.com> 3 * 4 * This file is part of FFmpeg. 5 * 6 * FFmpeg is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * FFmpeg is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with FFmpeg; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19 * 20 * Note: Rounding-to-nearest used unless otherwise stated 21 * 22 */ 23#include <stdint.h> 24 25#include "config.h" 26#include "libavutil/attributes.h" 27#include "aacpsdsp.h" 28 29static void ps_add_squares_c(INTFLOAT *dst, const INTFLOAT (*src)[2], int n) 30{ 31 int i; 32 for (i = 0; i < n; i++) 33 dst[i] += (UINTFLOAT)AAC_MADD28(src[i][0], src[i][0], src[i][1], src[i][1]); 34} 35 36static void ps_mul_pair_single_c(INTFLOAT (*dst)[2], INTFLOAT (*src0)[2], INTFLOAT *src1, 37 int n) 38{ 39 int i; 40 for (i = 0; i < n; i++) { 41 dst[i][0] = AAC_MUL16(src0[i][0], src1[i]); 42 dst[i][1] = AAC_MUL16(src0[i][1], src1[i]); 43 } 44} 45 46static void ps_hybrid_analysis_c(INTFLOAT (*out)[2], INTFLOAT (*in)[2], 47 const INTFLOAT (*filter)[8][2], 48 ptrdiff_t stride, int n) 49{ 50 int i, j; 51 52 for (i = 0; i < n; i++) { 53 INT64FLOAT sum_re = (INT64FLOAT)filter[i][6][0] * in[6][0]; 54 INT64FLOAT sum_im = (INT64FLOAT)filter[i][6][0] * in[6][1]; 55 56 for (j = 0; j < 6; j++) { 57 INT64FLOAT in0_re = in[j][0]; 58 INT64FLOAT in0_im = in[j][1]; 59 INT64FLOAT in1_re = in[12-j][0]; 60 INT64FLOAT in1_im = in[12-j][1]; 61 sum_re += (INT64FLOAT)filter[i][j][0] * (in0_re + in1_re) - 62 (INT64FLOAT)filter[i][j][1] * (in0_im - in1_im); 63 sum_im += (INT64FLOAT)filter[i][j][0] * (in0_im + in1_im) + 64 (INT64FLOAT)filter[i][j][1] * (in0_re - in1_re); 65 } 66#if USE_FIXED 67 out[i * stride][0] = (int)((sum_re + 0x40000000) >> 31); 68 out[i * stride][1] = (int)((sum_im + 0x40000000) >> 31); 69#else 70 out[i * stride][0] = sum_re; 71 out[i * stride][1] = sum_im; 72#endif /* USE_FIXED */ 73 } 74} 75 76static void ps_hybrid_analysis_ileave_c(INTFLOAT (*out)[32][2], INTFLOAT L[2][38][64], 77 int i, int len) 78{ 79 int j; 80 81 for (; i < 64; i++) { 82 for (j = 0; j < len; j++) { 83 out[i][j][0] = L[0][j][i]; 84 out[i][j][1] = L[1][j][i]; 85 } 86 } 87} 88 89static void ps_hybrid_synthesis_deint_c(INTFLOAT out[2][38][64], 90 INTFLOAT (*in)[32][2], 91 int i, int len) 92{ 93 int n; 94 95 for (; i < 64; i++) { 96 for (n = 0; n < len; n++) { 97 out[0][n][i] = in[i][n][0]; 98 out[1][n][i] = in[i][n][1]; 99 } 100 } 101} 102 103static void ps_decorrelate_c(INTFLOAT (*out)[2], INTFLOAT (*delay)[2], 104 INTFLOAT (*ap_delay)[PS_QMF_TIME_SLOTS + PS_MAX_AP_DELAY][2], 105 const INTFLOAT phi_fract[2], const INTFLOAT (*Q_fract)[2], 106 const INTFLOAT *transient_gain, 107 INTFLOAT g_decay_slope, 108 int len) 109{ 110 static const INTFLOAT a[] = { Q31(0.65143905753106f), 111 Q31(0.56471812200776f), 112 Q31(0.48954165955695f) }; 113 INTFLOAT ag[PS_AP_LINKS]; 114 int m, n; 115 116 for (m = 0; m < PS_AP_LINKS; m++) 117 ag[m] = AAC_MUL30(a[m], g_decay_slope); 118 119 for (n = 0; n < len; n++) { 120 INTFLOAT in_re = AAC_MSUB30(delay[n][0], phi_fract[0], delay[n][1], phi_fract[1]); 121 INTFLOAT in_im = AAC_MADD30(delay[n][0], phi_fract[1], delay[n][1], phi_fract[0]); 122 for (m = 0; m < PS_AP_LINKS; m++) { 123 INTFLOAT a_re = AAC_MUL31(ag[m], in_re); 124 INTFLOAT a_im = AAC_MUL31(ag[m], in_im); 125 INTFLOAT link_delay_re = ap_delay[m][n+2-m][0]; 126 INTFLOAT link_delay_im = ap_delay[m][n+2-m][1]; 127 INTFLOAT fractional_delay_re = Q_fract[m][0]; 128 INTFLOAT fractional_delay_im = Q_fract[m][1]; 129 INTFLOAT apd_re = in_re; 130 INTFLOAT apd_im = in_im; 131 in_re = AAC_MSUB30(link_delay_re, fractional_delay_re, 132 link_delay_im, fractional_delay_im); 133 in_re -= (UINTFLOAT)a_re; 134 in_im = AAC_MADD30(link_delay_re, fractional_delay_im, 135 link_delay_im, fractional_delay_re); 136 in_im -= (UINTFLOAT)a_im; 137 ap_delay[m][n+5][0] = apd_re + (UINTFLOAT)AAC_MUL31(ag[m], in_re); 138 ap_delay[m][n+5][1] = apd_im + (UINTFLOAT)AAC_MUL31(ag[m], in_im); 139 } 140 out[n][0] = AAC_MUL16(transient_gain[n], in_re); 141 out[n][1] = AAC_MUL16(transient_gain[n], in_im); 142 } 143} 144 145static void ps_stereo_interpolate_c(INTFLOAT (*l)[2], INTFLOAT (*r)[2], 146 INTFLOAT h[2][4], INTFLOAT h_step[2][4], 147 int len) 148{ 149 INTFLOAT h0 = h[0][0]; 150 INTFLOAT h1 = h[0][1]; 151 INTFLOAT h2 = h[0][2]; 152 INTFLOAT h3 = h[0][3]; 153 UINTFLOAT hs0 = h_step[0][0]; 154 UINTFLOAT hs1 = h_step[0][1]; 155 UINTFLOAT hs2 = h_step[0][2]; 156 UINTFLOAT hs3 = h_step[0][3]; 157 int n; 158 159 for (n = 0; n < len; n++) { 160 //l is s, r is d 161 INTFLOAT l_re = l[n][0]; 162 INTFLOAT l_im = l[n][1]; 163 INTFLOAT r_re = r[n][0]; 164 INTFLOAT r_im = r[n][1]; 165 h0 += hs0; 166 h1 += hs1; 167 h2 += hs2; 168 h3 += hs3; 169 l[n][0] = AAC_MADD30(h0, l_re, h2, r_re); 170 l[n][1] = AAC_MADD30(h0, l_im, h2, r_im); 171 r[n][0] = AAC_MADD30(h1, l_re, h3, r_re); 172 r[n][1] = AAC_MADD30(h1, l_im, h3, r_im); 173 } 174} 175 176static void ps_stereo_interpolate_ipdopd_c(INTFLOAT (*l)[2], INTFLOAT (*r)[2], 177 INTFLOAT h[2][4], INTFLOAT h_step[2][4], 178 int len) 179{ 180 INTFLOAT h00 = h[0][0], h10 = h[1][0]; 181 INTFLOAT h01 = h[0][1], h11 = h[1][1]; 182 INTFLOAT h02 = h[0][2], h12 = h[1][2]; 183 INTFLOAT h03 = h[0][3], h13 = h[1][3]; 184 UINTFLOAT hs00 = h_step[0][0], hs10 = h_step[1][0]; 185 UINTFLOAT hs01 = h_step[0][1], hs11 = h_step[1][1]; 186 UINTFLOAT hs02 = h_step[0][2], hs12 = h_step[1][2]; 187 UINTFLOAT hs03 = h_step[0][3], hs13 = h_step[1][3]; 188 int n; 189 190 for (n = 0; n < len; n++) { 191 //l is s, r is d 192 INTFLOAT l_re = l[n][0]; 193 INTFLOAT l_im = l[n][1]; 194 INTFLOAT r_re = r[n][0]; 195 INTFLOAT r_im = r[n][1]; 196 h00 += hs00; 197 h01 += hs01; 198 h02 += hs02; 199 h03 += hs03; 200 h10 += hs10; 201 h11 += hs11; 202 h12 += hs12; 203 h13 += hs13; 204 205 l[n][0] = AAC_MSUB30_V8(h00, l_re, h02, r_re, h10, l_im, h12, r_im); 206 l[n][1] = AAC_MADD30_V8(h00, l_im, h02, r_im, h10, l_re, h12, r_re); 207 r[n][0] = AAC_MSUB30_V8(h01, l_re, h03, r_re, h11, l_im, h13, r_im); 208 r[n][1] = AAC_MADD30_V8(h01, l_im, h03, r_im, h11, l_re, h13, r_re); 209 } 210} 211 212av_cold void AAC_RENAME(ff_psdsp_init)(PSDSPContext *s) 213{ 214 s->add_squares = ps_add_squares_c; 215 s->mul_pair_single = ps_mul_pair_single_c; 216 s->hybrid_analysis = ps_hybrid_analysis_c; 217 s->hybrid_analysis_ileave = ps_hybrid_analysis_ileave_c; 218 s->hybrid_synthesis_deint = ps_hybrid_synthesis_deint_c; 219 s->decorrelate = ps_decorrelate_c; 220 s->stereo_interpolate[0] = ps_stereo_interpolate_c; 221 s->stereo_interpolate[1] = ps_stereo_interpolate_ipdopd_c; 222 223#if !USE_FIXED 224#if ARCH_ARM 225 ff_psdsp_init_arm(s); 226#elif ARCH_AARCH64 227 ff_psdsp_init_aarch64(s); 228#elif ARCH_MIPS 229 ff_psdsp_init_mips(s); 230#elif ARCH_X86 231 ff_psdsp_init_x86(s); 232#endif 233#endif /* !USE_FIXED */ 234} 235