1/* 2 * Copyright (C) 2016 foo86 3 * 4 * This file is part of FFmpeg. 5 * 6 * FFmpeg is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * FFmpeg is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with FFmpeg; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19 */ 20 21#include "libavutil/mem_internal.h" 22 23#include "dcadsp.h" 24#include "dcamath.h" 25 26static void decode_hf_c(int32_t **dst, 27 const int32_t *vq_index, 28 const int8_t hf_vq[1024][32], 29 int32_t scale_factors[32][2], 30 ptrdiff_t sb_start, ptrdiff_t sb_end, 31 ptrdiff_t ofs, ptrdiff_t len) 32{ 33 int i, j; 34 35 for (i = sb_start; i < sb_end; i++) { 36 const int8_t *coeff = hf_vq[vq_index[i]]; 37 int32_t scale = scale_factors[i][0]; 38 for (j = 0; j < len; j++) 39 dst[i][j + ofs] = clip23(coeff[j] * scale + (1 << 3) >> 4); 40 } 41} 42 43static void decode_joint_c(int32_t **dst, int32_t **src, 44 const int32_t *scale_factors, 45 ptrdiff_t sb_start, ptrdiff_t sb_end, 46 ptrdiff_t ofs, ptrdiff_t len) 47{ 48 int i, j; 49 50 for (i = sb_start; i < sb_end; i++) { 51 int32_t scale = scale_factors[i]; 52 for (j = 0; j < len; j++) 53 dst[i][j + ofs] = clip23(mul17(src[i][j + ofs], scale)); 54 } 55} 56 57static void lfe_fir_float_c(float *pcm_samples, int32_t *lfe_samples, 58 const float *filter_coeff, ptrdiff_t npcmblocks, 59 int dec_select) 60{ 61 // Select decimation factor 62 int factor = 64 << dec_select; 63 int ncoeffs = 8 >> dec_select; 64 int nlfesamples = npcmblocks >> (dec_select + 1); 65 int i, j, k; 66 67 for (i = 0; i < nlfesamples; i++) { 68 // One decimated sample generates 64 or 128 interpolated ones 69 for (j = 0; j < factor / 2; j++) { 70 float a = 0; 71 float b = 0; 72 73 for (k = 0; k < ncoeffs; k++) { 74 a += filter_coeff[ j * ncoeffs + k] * lfe_samples[-k]; 75 b += filter_coeff[255 - j * ncoeffs - k] * lfe_samples[-k]; 76 } 77 78 pcm_samples[ j] = a; 79 pcm_samples[factor / 2 + j] = b; 80 } 81 82 lfe_samples++; 83 pcm_samples += factor; 84 } 85} 86 87static void lfe_fir0_float_c(float *pcm_samples, int32_t *lfe_samples, 88 const float *filter_coeff, ptrdiff_t npcmblocks) 89{ 90 lfe_fir_float_c(pcm_samples, lfe_samples, filter_coeff, npcmblocks, 0); 91} 92 93static void lfe_fir1_float_c(float *pcm_samples, int32_t *lfe_samples, 94 const float *filter_coeff, ptrdiff_t npcmblocks) 95{ 96 lfe_fir_float_c(pcm_samples, lfe_samples, filter_coeff, npcmblocks, 1); 97} 98 99static void lfe_x96_float_c(float *dst, const float *src, 100 float *hist, ptrdiff_t len) 101{ 102 float prev = *hist; 103 int i; 104 105 for (i = 0; i < len; i++) { 106 float a = 0.25f * src[i] + 0.75f * prev; 107 float b = 0.75f * src[i] + 0.25f * prev; 108 prev = src[i]; 109 *dst++ = a; 110 *dst++ = b; 111 } 112 113 *hist = prev; 114} 115 116static void sub_qmf32_float_c(SynthFilterContext *synth, 117 FFTContext *imdct, 118 float *pcm_samples, 119 int32_t **subband_samples_lo, 120 int32_t **subband_samples_hi, 121 float *hist1, int *offset, float *hist2, 122 const float *filter_coeff, ptrdiff_t npcmblocks, 123 float scale) 124{ 125 LOCAL_ALIGNED_32(float, input, [32]); 126 int i, j; 127 128 for (j = 0; j < npcmblocks; j++) { 129 // Load in one sample from each subband 130 for (i = 0; i < 32; i++) { 131 if ((i - 1) & 2) 132 input[i] = -subband_samples_lo[i][j]; 133 else 134 input[i] = subband_samples_lo[i][j]; 135 } 136 137 // One subband sample generates 32 interpolated ones 138 synth->synth_filter_float(imdct, hist1, offset, 139 hist2, filter_coeff, 140 pcm_samples, input, scale); 141 pcm_samples += 32; 142 } 143} 144 145static void sub_qmf64_float_c(SynthFilterContext *synth, 146 FFTContext *imdct, 147 float *pcm_samples, 148 int32_t **subband_samples_lo, 149 int32_t **subband_samples_hi, 150 float *hist1, int *offset, float *hist2, 151 const float *filter_coeff, ptrdiff_t npcmblocks, 152 float scale) 153{ 154 LOCAL_ALIGNED_32(float, input, [64]); 155 int i, j; 156 157 if (!subband_samples_hi) 158 memset(&input[32], 0, sizeof(input[0]) * 32); 159 160 for (j = 0; j < npcmblocks; j++) { 161 // Load in one sample from each subband 162 if (subband_samples_hi) { 163 // Full 64 subbands, first 32 are residual coded 164 for (i = 0; i < 32; i++) { 165 if ((i - 1) & 2) 166 input[i] = -subband_samples_lo[i][j] - subband_samples_hi[i][j]; 167 else 168 input[i] = subband_samples_lo[i][j] + subband_samples_hi[i][j]; 169 } 170 for (i = 32; i < 64; i++) { 171 if ((i - 1) & 2) 172 input[i] = -subband_samples_hi[i][j]; 173 else 174 input[i] = subband_samples_hi[i][j]; 175 } 176 } else { 177 // Only first 32 subbands 178 for (i = 0; i < 32; i++) { 179 if ((i - 1) & 2) 180 input[i] = -subband_samples_lo[i][j]; 181 else 182 input[i] = subband_samples_lo[i][j]; 183 } 184 } 185 186 // One subband sample generates 64 interpolated ones 187 synth->synth_filter_float_64(imdct, hist1, offset, 188 hist2, filter_coeff, 189 pcm_samples, input, scale); 190 pcm_samples += 64; 191 } 192} 193 194static void lfe_fir_fixed_c(int32_t *pcm_samples, int32_t *lfe_samples, 195 const int32_t *filter_coeff, ptrdiff_t npcmblocks) 196{ 197 // Select decimation factor 198 int nlfesamples = npcmblocks >> 1; 199 int i, j, k; 200 201 for (i = 0; i < nlfesamples; i++) { 202 // One decimated sample generates 64 interpolated ones 203 for (j = 0; j < 32; j++) { 204 int64_t a = 0; 205 int64_t b = 0; 206 207 for (k = 0; k < 8; k++) { 208 a += (int64_t)filter_coeff[ j * 8 + k] * lfe_samples[-k]; 209 b += (int64_t)filter_coeff[255 - j * 8 - k] * lfe_samples[-k]; 210 } 211 212 pcm_samples[ j] = clip23(norm23(a)); 213 pcm_samples[32 + j] = clip23(norm23(b)); 214 } 215 216 lfe_samples++; 217 pcm_samples += 64; 218 } 219} 220 221static void lfe_x96_fixed_c(int32_t *dst, const int32_t *src, 222 int32_t *hist, ptrdiff_t len) 223{ 224 int32_t prev = *hist; 225 int i; 226 227 for (i = 0; i < len; i++) { 228 int64_t a = INT64_C(2097471) * src[i] + INT64_C(6291137) * prev; 229 int64_t b = INT64_C(6291137) * src[i] + INT64_C(2097471) * prev; 230 prev = src[i]; 231 *dst++ = clip23(norm23(a)); 232 *dst++ = clip23(norm23(b)); 233 } 234 235 *hist = prev; 236} 237 238static void sub_qmf32_fixed_c(SynthFilterContext *synth, 239 DCADCTContext *imdct, 240 int32_t *pcm_samples, 241 int32_t **subband_samples_lo, 242 int32_t **subband_samples_hi, 243 int32_t *hist1, int *offset, int32_t *hist2, 244 const int32_t *filter_coeff, ptrdiff_t npcmblocks) 245{ 246 LOCAL_ALIGNED_32(int32_t, input, [32]); 247 int i, j; 248 249 for (j = 0; j < npcmblocks; j++) { 250 // Load in one sample from each subband 251 for (i = 0; i < 32; i++) 252 input[i] = subband_samples_lo[i][j]; 253 254 // One subband sample generates 32 interpolated ones 255 synth->synth_filter_fixed(imdct, hist1, offset, 256 hist2, filter_coeff, 257 pcm_samples, input); 258 pcm_samples += 32; 259 } 260} 261 262static void sub_qmf64_fixed_c(SynthFilterContext *synth, 263 DCADCTContext *imdct, 264 int32_t *pcm_samples, 265 int32_t **subband_samples_lo, 266 int32_t **subband_samples_hi, 267 int32_t *hist1, int *offset, int32_t *hist2, 268 const int32_t *filter_coeff, ptrdiff_t npcmblocks) 269{ 270 LOCAL_ALIGNED_32(int32_t, input, [64]); 271 int i, j; 272 273 if (!subband_samples_hi) 274 memset(&input[32], 0, sizeof(input[0]) * 32); 275 276 for (j = 0; j < npcmblocks; j++) { 277 // Load in one sample from each subband 278 if (subband_samples_hi) { 279 // Full 64 subbands, first 32 are residual coded 280 for (i = 0; i < 32; i++) 281 input[i] = subband_samples_lo[i][j] + subband_samples_hi[i][j]; 282 for (i = 32; i < 64; i++) 283 input[i] = subband_samples_hi[i][j]; 284 } else { 285 // Only first 32 subbands 286 for (i = 0; i < 32; i++) 287 input[i] = subband_samples_lo[i][j]; 288 } 289 290 // One subband sample generates 64 interpolated ones 291 synth->synth_filter_fixed_64(imdct, hist1, offset, 292 hist2, filter_coeff, 293 pcm_samples, input); 294 pcm_samples += 64; 295 } 296} 297 298static void decor_c(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t len) 299{ 300 int i; 301 302 for (i = 0; i < len; i++) 303 dst[i] += (SUINT)((int)(src[i] * (SUINT)coeff + (1 << 2)) >> 3); 304} 305 306static void dmix_sub_xch_c(int32_t *dst1, int32_t *dst2, 307 const int32_t *src, ptrdiff_t len) 308{ 309 int i; 310 311 for (i = 0; i < len; i++) { 312 int32_t cs = mul23(src[i], 5931520 /* M_SQRT1_2 * (1 << 23) */); 313 dst1[i] -= cs; 314 dst2[i] -= cs; 315 } 316} 317 318static void dmix_sub_c(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t len) 319{ 320 int i; 321 322 for (i = 0; i < len; i++) 323 dst[i] -= (unsigned)mul15(src[i], coeff); 324} 325 326static void dmix_add_c(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t len) 327{ 328 int i; 329 330 for (i = 0; i < len; i++) 331 dst[i] += (unsigned)mul15(src[i], coeff); 332} 333 334static void dmix_scale_c(int32_t *dst, int scale, ptrdiff_t len) 335{ 336 int i; 337 338 for (i = 0; i < len; i++) 339 dst[i] = mul15(dst[i], scale); 340} 341 342static void dmix_scale_inv_c(int32_t *dst, int scale_inv, ptrdiff_t len) 343{ 344 int i; 345 346 for (i = 0; i < len; i++) 347 dst[i] = mul16(dst[i], scale_inv); 348} 349 350static void filter0(SUINT32 *dst, const int32_t *src, int32_t coeff, ptrdiff_t len) 351{ 352 int i; 353 354 for (i = 0; i < len; i++) 355 dst[i] -= mul22(src[i], coeff); 356} 357 358static void filter1(SUINT32 *dst, const int32_t *src, int32_t coeff, ptrdiff_t len) 359{ 360 int i; 361 362 for (i = 0; i < len; i++) 363 dst[i] -= mul23(src[i], coeff); 364} 365 366static void assemble_freq_bands_c(int32_t *dst, int32_t *src0, int32_t *src1, 367 const int32_t *coeff, ptrdiff_t len) 368{ 369 int i; 370 371 filter0(src0, src1, coeff[0], len); 372 filter0(src1, src0, coeff[1], len); 373 filter0(src0, src1, coeff[2], len); 374 filter0(src1, src0, coeff[3], len); 375 376 for (i = 0; i < 8; i++, src0--) { 377 filter1(src0, src1, coeff[i + 4], len); 378 filter1(src1, src0, coeff[i + 12], len); 379 filter1(src0, src1, coeff[i + 4], len); 380 } 381 382 for (i = 0; i < len; i++) { 383 *dst++ = *src1++; 384 *dst++ = *++src0; 385 } 386} 387 388static void lbr_bank_c(float output[32][4], float **input, 389 const float *coeff, ptrdiff_t ofs, ptrdiff_t len) 390{ 391 float SW0 = coeff[0]; 392 float SW1 = coeff[1]; 393 float SW2 = coeff[2]; 394 float SW3 = coeff[3]; 395 396 float C1 = coeff[4]; 397 float C2 = coeff[5]; 398 float C3 = coeff[6]; 399 float C4 = coeff[7]; 400 401 float AL1 = coeff[8]; 402 float AL2 = coeff[9]; 403 404 int i; 405 406 // Short window and 8 point forward MDCT 407 for (i = 0; i < len; i++) { 408 float *src = input[i] + ofs; 409 410 float a = src[-4] * SW0 - src[-1] * SW3; 411 float b = src[-3] * SW1 - src[-2] * SW2; 412 float c = src[ 2] * SW1 + src[ 1] * SW2; 413 float d = src[ 3] * SW0 + src[ 0] * SW3; 414 415 output[i][0] = C1 * b - C2 * c + C4 * a - C3 * d; 416 output[i][1] = C1 * d - C2 * a - C4 * b - C3 * c; 417 output[i][2] = C3 * b + C2 * d - C4 * c + C1 * a; 418 output[i][3] = C3 * a - C2 * b + C4 * d - C1 * c; 419 } 420 421 // Aliasing cancellation for high frequencies 422 for (i = 12; i < len - 1; i++) { 423 float a = output[i ][3] * AL1; 424 float b = output[i+1][0] * AL1; 425 output[i ][3] += b - a; 426 output[i+1][0] -= b + a; 427 a = output[i ][2] * AL2; 428 b = output[i+1][1] * AL2; 429 output[i ][2] += b - a; 430 output[i+1][1] -= b + a; 431 } 432} 433 434static void lfe_iir_c(float *output, const float *input, 435 const float iir[5][4], float hist[5][2], 436 ptrdiff_t factor) 437{ 438 float res, tmp; 439 int i, j, k; 440 441 for (i = 0; i < 64; i++) { 442 res = *input++; 443 444 for (j = 0; j < factor; j++) { 445 for (k = 0; k < 5; k++) { 446 tmp = hist[k][0] * iir[k][0] + hist[k][1] * iir[k][1] + res; 447 res = hist[k][0] * iir[k][2] + hist[k][1] * iir[k][3] + tmp; 448 449 hist[k][0] = hist[k][1]; 450 hist[k][1] = tmp; 451 } 452 453 *output++ = res; 454 res = 0; 455 } 456 } 457} 458 459av_cold void ff_dcadsp_init(DCADSPContext *s) 460{ 461 s->decode_hf = decode_hf_c; 462 s->decode_joint = decode_joint_c; 463 464 s->lfe_fir_float[0] = lfe_fir0_float_c; 465 s->lfe_fir_float[1] = lfe_fir1_float_c; 466 s->lfe_x96_float = lfe_x96_float_c; 467 s->sub_qmf_float[0] = sub_qmf32_float_c; 468 s->sub_qmf_float[1] = sub_qmf64_float_c; 469 470 s->lfe_fir_fixed = lfe_fir_fixed_c; 471 s->lfe_x96_fixed = lfe_x96_fixed_c; 472 s->sub_qmf_fixed[0] = sub_qmf32_fixed_c; 473 s->sub_qmf_fixed[1] = sub_qmf64_fixed_c; 474 475 s->decor = decor_c; 476 477 s->dmix_sub_xch = dmix_sub_xch_c; 478 s->dmix_sub = dmix_sub_c; 479 s->dmix_add = dmix_add_c; 480 s->dmix_scale = dmix_scale_c; 481 s->dmix_scale_inv = dmix_scale_inv_c; 482 483 s->assemble_freq_bands = assemble_freq_bands_c; 484 485 s->lbr_bank = lbr_bank_c; 486 s->lfe_iir = lfe_iir_c; 487 488#if ARCH_X86 489 ff_dcadsp_init_x86(s); 490#endif 491} 492