1/* 2 * AAC decoder 3 * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org ) 4 * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com ) 5 * Copyright (c) 2008-2013 Alex Converse <alex.converse@gmail.com> 6 * 7 * AAC LATM decoder 8 * Copyright (c) 2008-2010 Paul Kendall <paul@kcbbs.gen.nz> 9 * Copyright (c) 2010 Janne Grunau <janne-libav@jannau.net> 10 * 11 * This file is part of FFmpeg. 12 * 13 * FFmpeg is free software; you can redistribute it and/or 14 * modify it under the terms of the GNU Lesser General Public 15 * License as published by the Free Software Foundation; either 16 * version 2.1 of the License, or (at your option) any later version. 17 * 18 * FFmpeg is distributed in the hope that it will be useful, 19 * but WITHOUT ANY WARRANTY; without even the implied warranty of 20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 * Lesser General Public License for more details. 22 * 23 * You should have received a copy of the GNU Lesser General Public 24 * License along with FFmpeg; if not, write to the Free Software 25 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 26 */ 27 28/** 29 * @file 30 * AAC decoder 31 * @author Oded Shimon ( ods15 ods15 dyndns org ) 32 * @author Maxim Gavrilov ( maxim.gavrilov gmail com ) 33 */ 34 35#define FFT_FLOAT 1 36#define USE_FIXED 0 37 38#include "libavutil/float_dsp.h" 39#include "libavutil/opt.h" 40#include "avcodec.h" 41#include "codec_internal.h" 42#include "get_bits.h" 43#include "fft.h" 44#include "mdct15.h" 45#include "lpc.h" 46#include "kbdwin.h" 47#include "sinewin.h" 48 49#include "aac.h" 50#include "aactab.h" 51#include "aacdectab.h" 52#include "adts_header.h" 53#include "cbrt_data.h" 54#include "sbr.h" 55#include "aacsbr.h" 56#include "mpeg4audio.h" 57#include "profiles.h" 58#include "libavutil/intfloat.h" 59 60#include <errno.h> 61#include <math.h> 62#include <stdint.h> 63#include <string.h> 64 65#if ARCH_ARM 66# include "arm/aac.h" 67#elif ARCH_MIPS 68# include "mips/aacdec_mips.h" 69#endif 70 71DECLARE_ALIGNED(32, static INTFLOAT, AAC_RENAME(sine_120))[120]; 72DECLARE_ALIGNED(32, static INTFLOAT, AAC_RENAME(sine_960))[960]; 73DECLARE_ALIGNED(32, static INTFLOAT, AAC_RENAME(aac_kbd_long_960))[960]; 74DECLARE_ALIGNED(32, static INTFLOAT, AAC_RENAME(aac_kbd_short_120))[120]; 75 76static av_always_inline void reset_predict_state(PredictorState *ps) 77{ 78 ps->r0 = 0.0f; 79 ps->r1 = 0.0f; 80 ps->cor0 = 0.0f; 81 ps->cor1 = 0.0f; 82 ps->var0 = 1.0f; 83 ps->var1 = 1.0f; 84} 85 86#ifndef VMUL2 87static inline float *VMUL2(float *dst, const float *v, unsigned idx, 88 const float *scale) 89{ 90 float s = *scale; 91 *dst++ = v[idx & 15] * s; 92 *dst++ = v[idx>>4 & 15] * s; 93 return dst; 94} 95#endif 96 97#ifndef VMUL4 98static inline float *VMUL4(float *dst, const float *v, unsigned idx, 99 const float *scale) 100{ 101 float s = *scale; 102 *dst++ = v[idx & 3] * s; 103 *dst++ = v[idx>>2 & 3] * s; 104 *dst++ = v[idx>>4 & 3] * s; 105 *dst++ = v[idx>>6 & 3] * s; 106 return dst; 107} 108#endif 109 110#ifndef VMUL2S 111static inline float *VMUL2S(float *dst, const float *v, unsigned idx, 112 unsigned sign, const float *scale) 113{ 114 union av_intfloat32 s0, s1; 115 116 s0.f = s1.f = *scale; 117 s0.i ^= sign >> 1 << 31; 118 s1.i ^= sign << 31; 119 120 *dst++ = v[idx & 15] * s0.f; 121 *dst++ = v[idx>>4 & 15] * s1.f; 122 123 return dst; 124} 125#endif 126 127#ifndef VMUL4S 128static inline float *VMUL4S(float *dst, const float *v, unsigned idx, 129 unsigned sign, const float *scale) 130{ 131 unsigned nz = idx >> 12; 132 union av_intfloat32 s = { .f = *scale }; 133 union av_intfloat32 t; 134 135 t.i = s.i ^ (sign & 1U<<31); 136 *dst++ = v[idx & 3] * t.f; 137 138 sign <<= nz & 1; nz >>= 1; 139 t.i = s.i ^ (sign & 1U<<31); 140 *dst++ = v[idx>>2 & 3] * t.f; 141 142 sign <<= nz & 1; nz >>= 1; 143 t.i = s.i ^ (sign & 1U<<31); 144 *dst++ = v[idx>>4 & 3] * t.f; 145 146 sign <<= nz & 1; 147 t.i = s.i ^ (sign & 1U<<31); 148 *dst++ = v[idx>>6 & 3] * t.f; 149 150 return dst; 151} 152#endif 153 154static av_always_inline float flt16_round(float pf) 155{ 156 union av_intfloat32 tmp; 157 tmp.f = pf; 158 tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U; 159 return tmp.f; 160} 161 162static av_always_inline float flt16_even(float pf) 163{ 164 union av_intfloat32 tmp; 165 tmp.f = pf; 166 tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U; 167 return tmp.f; 168} 169 170static av_always_inline float flt16_trunc(float pf) 171{ 172 union av_intfloat32 pun; 173 pun.f = pf; 174 pun.i &= 0xFFFF0000U; 175 return pun.f; 176} 177 178static av_always_inline void predict(PredictorState *ps, float *coef, 179 int output_enable) 180{ 181 const float a = 0.953125; // 61.0 / 64 182 const float alpha = 0.90625; // 29.0 / 32 183 float e0, e1; 184 float pv; 185 float k1, k2; 186 float r0 = ps->r0, r1 = ps->r1; 187 float cor0 = ps->cor0, cor1 = ps->cor1; 188 float var0 = ps->var0, var1 = ps->var1; 189 190 k1 = var0 > 1 ? cor0 * flt16_even(a / var0) : 0; 191 k2 = var1 > 1 ? cor1 * flt16_even(a / var1) : 0; 192 193 pv = flt16_round(k1 * r0 + k2 * r1); 194 if (output_enable) 195 *coef += pv; 196 197 e0 = *coef; 198 e1 = e0 - k1 * r0; 199 200 ps->cor1 = flt16_trunc(alpha * cor1 + r1 * e1); 201 ps->var1 = flt16_trunc(alpha * var1 + 0.5f * (r1 * r1 + e1 * e1)); 202 ps->cor0 = flt16_trunc(alpha * cor0 + r0 * e0); 203 ps->var0 = flt16_trunc(alpha * var0 + 0.5f * (r0 * r0 + e0 * e0)); 204 205 ps->r1 = flt16_trunc(a * (r0 - k1 * e0)); 206 ps->r0 = flt16_trunc(a * e0); 207} 208 209/** 210 * Apply dependent channel coupling (applied before IMDCT). 211 * 212 * @param index index into coupling gain array 213 */ 214static void apply_dependent_coupling(AACContext *ac, 215 SingleChannelElement *target, 216 ChannelElement *cce, int index) 217{ 218 IndividualChannelStream *ics = &cce->ch[0].ics; 219 const uint16_t *offsets = ics->swb_offset; 220 float *dest = target->coeffs; 221 const float *src = cce->ch[0].coeffs; 222 int g, i, group, k, idx = 0; 223 if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) { 224 av_log(ac->avctx, AV_LOG_ERROR, 225 "Dependent coupling is not supported together with LTP\n"); 226 return; 227 } 228 for (g = 0; g < ics->num_window_groups; g++) { 229 for (i = 0; i < ics->max_sfb; i++, idx++) { 230 if (cce->ch[0].band_type[idx] != ZERO_BT) { 231 const float gain = cce->coup.gain[index][idx]; 232 for (group = 0; group < ics->group_len[g]; group++) { 233 for (k = offsets[i]; k < offsets[i + 1]; k++) { 234 // FIXME: SIMDify 235 dest[group * 128 + k] += gain * src[group * 128 + k]; 236 } 237 } 238 } 239 } 240 dest += ics->group_len[g] * 128; 241 src += ics->group_len[g] * 128; 242 } 243} 244 245/** 246 * Apply independent channel coupling (applied after IMDCT). 247 * 248 * @param index index into coupling gain array 249 */ 250static void apply_independent_coupling(AACContext *ac, 251 SingleChannelElement *target, 252 ChannelElement *cce, int index) 253{ 254 const float gain = cce->coup.gain[index][0]; 255 const float *src = cce->ch[0].ret; 256 float *dest = target->ret; 257 const int len = 1024 << (ac->oc[1].m4ac.sbr == 1); 258 259 ac->fdsp->vector_fmac_scalar(dest, src, gain, len); 260} 261 262#include "aacdec_template.c" 263 264#define LOAS_SYNC_WORD 0x2b7 ///< 11 bits LOAS sync word 265 266struct LATMContext { 267 AACContext aac_ctx; ///< containing AACContext 268 int initialized; ///< initialized after a valid extradata was seen 269 270 // parser data 271 int audio_mux_version_A; ///< LATM syntax version 272 int frame_length_type; ///< 0/1 variable/fixed frame length 273 int frame_length; ///< frame length for fixed frame length 274}; 275 276static inline uint32_t latm_get_value(GetBitContext *b) 277{ 278 int length = get_bits(b, 2); 279 280 return get_bits_long(b, (length+1)*8); 281} 282 283static int latm_decode_audio_specific_config(struct LATMContext *latmctx, 284 GetBitContext *gb, int asclen) 285{ 286 AACContext *ac = &latmctx->aac_ctx; 287 AVCodecContext *avctx = ac->avctx; 288 MPEG4AudioConfig m4ac = { 0 }; 289 GetBitContext gbc; 290 int config_start_bit = get_bits_count(gb); 291 int sync_extension = 0; 292 int bits_consumed, esize, i; 293 294 if (asclen > 0) { 295 sync_extension = 1; 296 asclen = FFMIN(asclen, get_bits_left(gb)); 297 init_get_bits(&gbc, gb->buffer, config_start_bit + asclen); 298 skip_bits_long(&gbc, config_start_bit); 299 } else if (asclen == 0) { 300 gbc = *gb; 301 } else { 302 return AVERROR_INVALIDDATA; 303 } 304 305 if (get_bits_left(gb) <= 0) 306 return AVERROR_INVALIDDATA; 307 308 bits_consumed = decode_audio_specific_config_gb(NULL, avctx, &m4ac, 309 &gbc, config_start_bit, 310 sync_extension); 311 312 if (bits_consumed < config_start_bit) 313 return AVERROR_INVALIDDATA; 314 bits_consumed -= config_start_bit; 315 316 if (asclen == 0) 317 asclen = bits_consumed; 318 319 if (!latmctx->initialized || 320 ac->oc[1].m4ac.sample_rate != m4ac.sample_rate || 321 ac->oc[1].m4ac.chan_config != m4ac.chan_config) { 322 323 if (latmctx->initialized) { 324 av_log(avctx, AV_LOG_INFO, "audio config changed (sample_rate=%d, chan_config=%d)\n", m4ac.sample_rate, m4ac.chan_config); 325 } else { 326 av_log(avctx, AV_LOG_DEBUG, "initializing latmctx\n"); 327 } 328 latmctx->initialized = 0; 329 330 esize = (asclen + 7) / 8; 331 332 if (avctx->extradata_size < esize) { 333 av_free(avctx->extradata); 334 avctx->extradata = av_malloc(esize + AV_INPUT_BUFFER_PADDING_SIZE); 335 if (!avctx->extradata) 336 return AVERROR(ENOMEM); 337 } 338 339 avctx->extradata_size = esize; 340 gbc = *gb; 341 for (i = 0; i < esize; i++) { 342 avctx->extradata[i] = get_bits(&gbc, 8); 343 } 344 memset(avctx->extradata+esize, 0, AV_INPUT_BUFFER_PADDING_SIZE); 345 } 346 skip_bits_long(gb, asclen); 347 348 return 0; 349} 350 351static int read_stream_mux_config(struct LATMContext *latmctx, 352 GetBitContext *gb) 353{ 354 int ret, audio_mux_version = get_bits(gb, 1); 355 356 latmctx->audio_mux_version_A = 0; 357 if (audio_mux_version) 358 latmctx->audio_mux_version_A = get_bits(gb, 1); 359 360 if (!latmctx->audio_mux_version_A) { 361 362 if (audio_mux_version) 363 latm_get_value(gb); // taraFullness 364 365 skip_bits(gb, 1); // allStreamSameTimeFraming 366 skip_bits(gb, 6); // numSubFrames 367 // numPrograms 368 if (get_bits(gb, 4)) { // numPrograms 369 avpriv_request_sample(latmctx->aac_ctx.avctx, "Multiple programs"); 370 return AVERROR_PATCHWELCOME; 371 } 372 373 // for each program (which there is only one in DVB) 374 375 // for each layer (which there is only one in DVB) 376 if (get_bits(gb, 3)) { // numLayer 377 avpriv_request_sample(latmctx->aac_ctx.avctx, "Multiple layers"); 378 return AVERROR_PATCHWELCOME; 379 } 380 381 // for all but first stream: use_same_config = get_bits(gb, 1); 382 if (!audio_mux_version) { 383 if ((ret = latm_decode_audio_specific_config(latmctx, gb, 0)) < 0) 384 return ret; 385 } else { 386 int ascLen = latm_get_value(gb); 387 if ((ret = latm_decode_audio_specific_config(latmctx, gb, ascLen)) < 0) 388 return ret; 389 } 390 391 latmctx->frame_length_type = get_bits(gb, 3); 392 switch (latmctx->frame_length_type) { 393 case 0: 394 skip_bits(gb, 8); // latmBufferFullness 395 break; 396 case 1: 397 latmctx->frame_length = get_bits(gb, 9); 398 break; 399 case 3: 400 case 4: 401 case 5: 402 skip_bits(gb, 6); // CELP frame length table index 403 break; 404 case 6: 405 case 7: 406 skip_bits(gb, 1); // HVXC frame length table index 407 break; 408 } 409 410 if (get_bits(gb, 1)) { // other data 411 if (audio_mux_version) { 412 latm_get_value(gb); // other_data_bits 413 } else { 414 int esc; 415 do { 416 if (get_bits_left(gb) < 9) 417 return AVERROR_INVALIDDATA; 418 esc = get_bits(gb, 1); 419 skip_bits(gb, 8); 420 } while (esc); 421 } 422 } 423 424 if (get_bits(gb, 1)) // crc present 425 skip_bits(gb, 8); // config_crc 426 } 427 428 return 0; 429} 430 431static int read_payload_length_info(struct LATMContext *ctx, GetBitContext *gb) 432{ 433 uint8_t tmp; 434 435 if (ctx->frame_length_type == 0) { 436 int mux_slot_length = 0; 437 do { 438 if (get_bits_left(gb) < 8) 439 return AVERROR_INVALIDDATA; 440 tmp = get_bits(gb, 8); 441 mux_slot_length += tmp; 442 } while (tmp == 255); 443 return mux_slot_length; 444 } else if (ctx->frame_length_type == 1) { 445 return ctx->frame_length; 446 } else if (ctx->frame_length_type == 3 || 447 ctx->frame_length_type == 5 || 448 ctx->frame_length_type == 7) { 449 skip_bits(gb, 2); // mux_slot_length_coded 450 } 451 return 0; 452} 453 454static int read_audio_mux_element(struct LATMContext *latmctx, 455 GetBitContext *gb) 456{ 457 int err; 458 uint8_t use_same_mux = get_bits(gb, 1); 459 if (!use_same_mux) { 460 if ((err = read_stream_mux_config(latmctx, gb)) < 0) 461 return err; 462 } else if (!latmctx->aac_ctx.avctx->extradata) { 463 av_log(latmctx->aac_ctx.avctx, AV_LOG_DEBUG, 464 "no decoder config found\n"); 465 return 1; 466 } 467 if (latmctx->audio_mux_version_A == 0) { 468 int mux_slot_length_bytes = read_payload_length_info(latmctx, gb); 469 if (mux_slot_length_bytes < 0 || mux_slot_length_bytes * 8LL > get_bits_left(gb)) { 470 av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR, "incomplete frame\n"); 471 return AVERROR_INVALIDDATA; 472 } else if (mux_slot_length_bytes * 8 + 256 < get_bits_left(gb)) { 473 av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR, 474 "frame length mismatch %d << %d\n", 475 mux_slot_length_bytes * 8, get_bits_left(gb)); 476 return AVERROR_INVALIDDATA; 477 } 478 } 479 return 0; 480} 481 482 483static int latm_decode_frame(AVCodecContext *avctx, AVFrame *out, 484 int *got_frame_ptr, AVPacket *avpkt) 485{ 486 struct LATMContext *latmctx = avctx->priv_data; 487 int muxlength, err; 488 GetBitContext gb; 489 490 if ((err = init_get_bits8(&gb, avpkt->data, avpkt->size)) < 0) 491 return err; 492 493 // check for LOAS sync word 494 if (get_bits(&gb, 11) != LOAS_SYNC_WORD) 495 return AVERROR_INVALIDDATA; 496 497 muxlength = get_bits(&gb, 13) + 3; 498 // not enough data, the parser should have sorted this out 499 if (muxlength > avpkt->size) 500 return AVERROR_INVALIDDATA; 501 502 if ((err = read_audio_mux_element(latmctx, &gb))) 503 return (err < 0) ? err : avpkt->size; 504 505 if (!latmctx->initialized) { 506 if (!avctx->extradata) { 507 *got_frame_ptr = 0; 508 return avpkt->size; 509 } else { 510 push_output_configuration(&latmctx->aac_ctx); 511 if ((err = decode_audio_specific_config( 512 &latmctx->aac_ctx, avctx, &latmctx->aac_ctx.oc[1].m4ac, 513 avctx->extradata, avctx->extradata_size*8LL, 1)) < 0) { 514 pop_output_configuration(&latmctx->aac_ctx); 515 return err; 516 } 517 latmctx->initialized = 1; 518 } 519 } 520 521 if (show_bits(&gb, 12) == 0xfff) { 522 av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR, 523 "ADTS header detected, probably as result of configuration " 524 "misparsing\n"); 525 return AVERROR_INVALIDDATA; 526 } 527 528 switch (latmctx->aac_ctx.oc[1].m4ac.object_type) { 529 case AOT_ER_AAC_LC: 530 case AOT_ER_AAC_LTP: 531 case AOT_ER_AAC_LD: 532 case AOT_ER_AAC_ELD: 533 err = aac_decode_er_frame(avctx, out, got_frame_ptr, &gb); 534 break; 535 default: 536 err = aac_decode_frame_int(avctx, out, got_frame_ptr, &gb, avpkt); 537 } 538 if (err < 0) 539 return err; 540 541 return muxlength; 542} 543 544static av_cold int latm_decode_init(AVCodecContext *avctx) 545{ 546 struct LATMContext *latmctx = avctx->priv_data; 547 int ret = aac_decode_init(avctx); 548 549 if (avctx->extradata_size > 0) 550 latmctx->initialized = !ret; 551 552 return ret; 553} 554 555const FFCodec ff_aac_decoder = { 556 .p.name = "aac", 557 .p.long_name = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"), 558 .p.type = AVMEDIA_TYPE_AUDIO, 559 .p.id = AV_CODEC_ID_AAC, 560 .priv_data_size = sizeof(AACContext), 561 .init = aac_decode_init, 562 .close = aac_decode_close, 563 FF_CODEC_DECODE_CB(aac_decode_frame), 564 .p.sample_fmts = (const enum AVSampleFormat[]) { 565 AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE 566 }, 567 .p.capabilities = AV_CODEC_CAP_CHANNEL_CONF | AV_CODEC_CAP_DR1, 568 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP, 569#if FF_API_OLD_CHANNEL_LAYOUT 570 .p.channel_layouts = aac_channel_layout, 571#endif 572 .p.ch_layouts = aac_ch_layout, 573 .flush = flush, 574 .p.priv_class = &aac_decoder_class, 575 .p.profiles = NULL_IF_CONFIG_SMALL(ff_aac_profiles), 576}; 577 578/* 579 Note: This decoder filter is intended to decode LATM streams transferred 580 in MPEG transport streams which only contain one program. 581 To do a more complex LATM demuxing a separate LATM demuxer should be used. 582*/ 583const FFCodec ff_aac_latm_decoder = { 584 .p.name = "aac_latm", 585 .p.long_name = NULL_IF_CONFIG_SMALL("AAC LATM (Advanced Audio Coding LATM syntax)"), 586 .p.type = AVMEDIA_TYPE_AUDIO, 587 .p.id = AV_CODEC_ID_AAC_LATM, 588 .priv_data_size = sizeof(struct LATMContext), 589 .init = latm_decode_init, 590 .close = aac_decode_close, 591 FF_CODEC_DECODE_CB(latm_decode_frame), 592 .p.sample_fmts = (const enum AVSampleFormat[]) { 593 AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE 594 }, 595 .p.capabilities = AV_CODEC_CAP_CHANNEL_CONF | AV_CODEC_CAP_DR1, 596 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP, 597#if FF_API_OLD_CHANNEL_LAYOUT 598 .p.channel_layouts = aac_channel_layout, 599#endif 600 .p.ch_layouts = aac_ch_layout, 601 .flush = flush, 602 .p.profiles = NULL_IF_CONFIG_SMALL(ff_aac_profiles), 603}; 604