1/* 2 * AAC decoder 3 * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org ) 4 * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com ) 5 * Copyright (c) 2008-2013 Alex Converse <alex.converse@gmail.com> 6 * 7 * AAC LATM decoder 8 * Copyright (c) 2008-2010 Paul Kendall <paul@kcbbs.gen.nz> 9 * Copyright (c) 2010 Janne Grunau <janne-libav@jannau.net> 10 * 11 * AAC decoder fixed-point implementation 12 * Copyright (c) 2013 13 * MIPS Technologies, Inc., California. 14 * 15 * This file is part of FFmpeg. 16 * 17 * FFmpeg is free software; you can redistribute it and/or 18 * modify it under the terms of the GNU Lesser General Public 19 * License as published by the Free Software Foundation; either 20 * version 2.1 of the License, or (at your option) any later version. 21 * 22 * FFmpeg is distributed in the hope that it will be useful, 23 * but WITHOUT ANY WARRANTY; without even the implied warranty of 24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 25 * Lesser General Public License for more details. 26 * 27 * You should have received a copy of the GNU Lesser General Public 28 * License along with FFmpeg; if not, write to the Free Software 29 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 30 */ 31 32/** 33 * @file 34 * AAC decoder 35 * @author Oded Shimon ( ods15 ods15 dyndns org ) 36 * @author Maxim Gavrilov ( maxim.gavrilov gmail com ) 37 * 38 * AAC decoder fixed-point implementation 39 * @author Stanislav Ocovaj ( stanislav.ocovaj imgtec com ) 40 * @author Nedeljko Babic ( nedeljko.babic imgtec com ) 41 */ 42 43/* 44 * supported tools 45 * 46 * Support? Name 47 * N (code in SoC repo) gain control 48 * Y block switching 49 * Y window shapes - standard 50 * N window shapes - Low Delay 51 * Y filterbank - standard 52 * N (code in SoC repo) filterbank - Scalable Sample Rate 53 * Y Temporal Noise Shaping 54 * Y Long Term Prediction 55 * Y intensity stereo 56 * Y channel coupling 57 * Y frequency domain prediction 58 * Y Perceptual Noise Substitution 59 * Y Mid/Side stereo 60 * N Scalable Inverse AAC Quantization 61 * N Frequency Selective Switch 62 * N upsampling filter 63 * Y quantization & coding - AAC 64 * N quantization & coding - TwinVQ 65 * N quantization & coding - BSAC 66 * N AAC Error Resilience tools 67 * N Error Resilience payload syntax 68 * N Error Protection tool 69 * N CELP 70 * N Silence Compression 71 * N HVXC 72 * N HVXC 4kbits/s VR 73 * N Structured Audio tools 74 * N Structured Audio Sample Bank Format 75 * N MIDI 76 * N Harmonic and Individual Lines plus Noise 77 * N Text-To-Speech Interface 78 * Y Spectral Band Replication 79 * Y (not in this code) Layer-1 80 * Y (not in this code) Layer-2 81 * Y (not in this code) Layer-3 82 * N SinuSoidal Coding (Transient, Sinusoid, Noise) 83 * Y Parametric Stereo 84 * N Direct Stream Transfer 85 * Y (not in fixed point code) Enhanced AAC Low Delay (ER AAC ELD) 86 * 87 * Note: - HE AAC v1 comprises LC AAC with Spectral Band Replication. 88 * - HE AAC v2 comprises LC AAC with Spectral Band Replication and 89 Parametric Stereo. 90 */ 91 92#include "libavutil/channel_layout.h" 93#include "libavutil/thread.h" 94#include "internal.h" 95 96static VLC vlc_scalefactors; 97static VLC vlc_spectral[11]; 98 99static int output_configure(AACContext *ac, 100 uint8_t layout_map[MAX_ELEM_ID*4][3], int tags, 101 enum OCStatus oc_type, int get_new_frame); 102 103#define overread_err "Input buffer exhausted before END element found\n" 104 105static int count_channels(uint8_t (*layout)[3], int tags) 106{ 107 int i, sum = 0; 108 for (i = 0; i < tags; i++) { 109 int syn_ele = layout[i][0]; 110 int pos = layout[i][2]; 111 sum += (1 + (syn_ele == TYPE_CPE)) * 112 (pos != AAC_CHANNEL_OFF && pos != AAC_CHANNEL_CC); 113 } 114 return sum; 115} 116 117/** 118 * Check for the channel element in the current channel position configuration. 119 * If it exists, make sure the appropriate element is allocated and map the 120 * channel order to match the internal FFmpeg channel layout. 121 * 122 * @param che_pos current channel position configuration 123 * @param type channel element type 124 * @param id channel element id 125 * @param channels count of the number of channels in the configuration 126 * 127 * @return Returns error status. 0 - OK, !0 - error 128 */ 129static av_cold int che_configure(AACContext *ac, 130 enum ChannelPosition che_pos, 131 int type, int id, int *channels) 132{ 133 if (*channels >= MAX_CHANNELS) 134 return AVERROR_INVALIDDATA; 135 if (che_pos) { 136 if (!ac->che[type][id]) { 137 if (!(ac->che[type][id] = av_mallocz(sizeof(ChannelElement)))) 138 return AVERROR(ENOMEM); 139 AAC_RENAME(ff_aac_sbr_ctx_init)(ac, &ac->che[type][id]->sbr, type); 140 } 141 if (type != TYPE_CCE) { 142 if (*channels >= MAX_CHANNELS - (type == TYPE_CPE || (type == TYPE_SCE && ac->oc[1].m4ac.ps == 1))) { 143 av_log(ac->avctx, AV_LOG_ERROR, "Too many channels\n"); 144 return AVERROR_INVALIDDATA; 145 } 146 ac->output_element[(*channels)++] = &ac->che[type][id]->ch[0]; 147 if (type == TYPE_CPE || 148 (type == TYPE_SCE && ac->oc[1].m4ac.ps == 1)) { 149 ac->output_element[(*channels)++] = &ac->che[type][id]->ch[1]; 150 } 151 } 152 } else { 153 if (ac->che[type][id]) 154 AAC_RENAME(ff_aac_sbr_ctx_close)(&ac->che[type][id]->sbr); 155 av_freep(&ac->che[type][id]); 156 } 157 return 0; 158} 159 160static int frame_configure_elements(AVCodecContext *avctx) 161{ 162 AACContext *ac = avctx->priv_data; 163 int type, id, ch, ret; 164 165 /* set channel pointers to internal buffers by default */ 166 for (type = 0; type < 4; type++) { 167 for (id = 0; id < MAX_ELEM_ID; id++) { 168 ChannelElement *che = ac->che[type][id]; 169 if (che) { 170 che->ch[0].ret = che->ch[0].ret_buf; 171 che->ch[1].ret = che->ch[1].ret_buf; 172 } 173 } 174 } 175 176 /* get output buffer */ 177 av_frame_unref(ac->frame); 178 if (!avctx->ch_layout.nb_channels) 179 return 1; 180 181 ac->frame->nb_samples = 2048; 182 if ((ret = ff_get_buffer(avctx, ac->frame, 0)) < 0) 183 return ret; 184 185 /* map output channel pointers to AVFrame data */ 186 for (ch = 0; ch < avctx->ch_layout.nb_channels; ch++) { 187 if (ac->output_element[ch]) 188 ac->output_element[ch]->ret = (INTFLOAT *)ac->frame->extended_data[ch]; 189 } 190 191 return 0; 192} 193 194struct elem_to_channel { 195 uint64_t av_position; 196 uint8_t syn_ele; 197 uint8_t elem_id; 198 uint8_t aac_position; 199}; 200 201static int assign_pair(struct elem_to_channel e2c_vec[MAX_ELEM_ID], 202 uint8_t (*layout_map)[3], int offset, uint64_t left, 203 uint64_t right, int pos, uint64_t *layout) 204{ 205 if (layout_map[offset][0] == TYPE_CPE) { 206 e2c_vec[offset] = (struct elem_to_channel) { 207 .av_position = left | right, 208 .syn_ele = TYPE_CPE, 209 .elem_id = layout_map[offset][1], 210 .aac_position = pos 211 }; 212 if (e2c_vec[offset].av_position != UINT64_MAX) 213 *layout |= e2c_vec[offset].av_position; 214 215 return 1; 216 } else { 217 e2c_vec[offset] = (struct elem_to_channel) { 218 .av_position = left, 219 .syn_ele = TYPE_SCE, 220 .elem_id = layout_map[offset][1], 221 .aac_position = pos 222 }; 223 e2c_vec[offset + 1] = (struct elem_to_channel) { 224 .av_position = right, 225 .syn_ele = TYPE_SCE, 226 .elem_id = layout_map[offset + 1][1], 227 .aac_position = pos 228 }; 229 if (left != UINT64_MAX) 230 *layout |= left; 231 232 if (right != UINT64_MAX) 233 *layout |= right; 234 235 return 2; 236 } 237} 238 239static int count_paired_channels(uint8_t (*layout_map)[3], int tags, int pos, 240 int *current) 241{ 242 int num_pos_channels = 0; 243 int first_cpe = 0; 244 int sce_parity = 0; 245 int i; 246 for (i = *current; i < tags; i++) { 247 if (layout_map[i][2] != pos) 248 break; 249 if (layout_map[i][0] == TYPE_CPE) { 250 if (sce_parity) { 251 if (pos == AAC_CHANNEL_FRONT && !first_cpe) { 252 sce_parity = 0; 253 } else { 254 return -1; 255 } 256 } 257 num_pos_channels += 2; 258 first_cpe = 1; 259 } else { 260 num_pos_channels++; 261 sce_parity ^= 1; 262 } 263 } 264 if (sce_parity && 265 ((pos == AAC_CHANNEL_FRONT && first_cpe) || pos == AAC_CHANNEL_SIDE)) 266 return -1; 267 *current = i; 268 return num_pos_channels; 269} 270 271#define PREFIX_FOR_22POINT2 (AV_CH_LAYOUT_7POINT1_WIDE_BACK|AV_CH_BACK_CENTER|AV_CH_SIDE_LEFT|AV_CH_SIDE_RIGHT|AV_CH_LOW_FREQUENCY_2) 272static uint64_t sniff_channel_order(uint8_t (*layout_map)[3], int tags) 273{ 274 int i, n, total_non_cc_elements; 275 struct elem_to_channel e2c_vec[4 * MAX_ELEM_ID] = { { 0 } }; 276 int num_front_channels, num_side_channels, num_back_channels; 277 uint64_t layout = 0; 278 279 if (FF_ARRAY_ELEMS(e2c_vec) < tags) 280 return 0; 281 282 i = 0; 283 num_front_channels = 284 count_paired_channels(layout_map, tags, AAC_CHANNEL_FRONT, &i); 285 if (num_front_channels < 0) 286 return 0; 287 num_side_channels = 288 count_paired_channels(layout_map, tags, AAC_CHANNEL_SIDE, &i); 289 if (num_side_channels < 0) 290 return 0; 291 num_back_channels = 292 count_paired_channels(layout_map, tags, AAC_CHANNEL_BACK, &i); 293 if (num_back_channels < 0) 294 return 0; 295 296 if (num_side_channels == 0 && num_back_channels >= 4) { 297 num_side_channels = 2; 298 num_back_channels -= 2; 299 } 300 301 i = 0; 302 if (num_front_channels & 1) { 303 e2c_vec[i] = (struct elem_to_channel) { 304 .av_position = AV_CH_FRONT_CENTER, 305 .syn_ele = TYPE_SCE, 306 .elem_id = layout_map[i][1], 307 .aac_position = AAC_CHANNEL_FRONT 308 }; 309 layout |= e2c_vec[i].av_position; 310 i++; 311 num_front_channels--; 312 } 313 if (num_front_channels >= 4) { 314 i += assign_pair(e2c_vec, layout_map, i, 315 AV_CH_FRONT_LEFT_OF_CENTER, 316 AV_CH_FRONT_RIGHT_OF_CENTER, 317 AAC_CHANNEL_FRONT, &layout); 318 num_front_channels -= 2; 319 } 320 if (num_front_channels >= 2) { 321 i += assign_pair(e2c_vec, layout_map, i, 322 AV_CH_FRONT_LEFT, 323 AV_CH_FRONT_RIGHT, 324 AAC_CHANNEL_FRONT, &layout); 325 num_front_channels -= 2; 326 } 327 while (num_front_channels >= 2) { 328 i += assign_pair(e2c_vec, layout_map, i, 329 UINT64_MAX, 330 UINT64_MAX, 331 AAC_CHANNEL_FRONT, &layout); 332 num_front_channels -= 2; 333 } 334 335 if (num_side_channels >= 2) { 336 i += assign_pair(e2c_vec, layout_map, i, 337 AV_CH_SIDE_LEFT, 338 AV_CH_SIDE_RIGHT, 339 AAC_CHANNEL_FRONT, &layout); 340 num_side_channels -= 2; 341 } 342 while (num_side_channels >= 2) { 343 i += assign_pair(e2c_vec, layout_map, i, 344 UINT64_MAX, 345 UINT64_MAX, 346 AAC_CHANNEL_SIDE, &layout); 347 num_side_channels -= 2; 348 } 349 350 while (num_back_channels >= 4) { 351 i += assign_pair(e2c_vec, layout_map, i, 352 UINT64_MAX, 353 UINT64_MAX, 354 AAC_CHANNEL_BACK, &layout); 355 num_back_channels -= 2; 356 } 357 if (num_back_channels >= 2) { 358 i += assign_pair(e2c_vec, layout_map, i, 359 AV_CH_BACK_LEFT, 360 AV_CH_BACK_RIGHT, 361 AAC_CHANNEL_BACK, &layout); 362 num_back_channels -= 2; 363 } 364 if (num_back_channels) { 365 e2c_vec[i] = (struct elem_to_channel) { 366 .av_position = AV_CH_BACK_CENTER, 367 .syn_ele = TYPE_SCE, 368 .elem_id = layout_map[i][1], 369 .aac_position = AAC_CHANNEL_BACK 370 }; 371 layout |= e2c_vec[i].av_position; 372 i++; 373 num_back_channels--; 374 } 375 376 if (i < tags && layout_map[i][2] == AAC_CHANNEL_LFE) { 377 e2c_vec[i] = (struct elem_to_channel) { 378 .av_position = AV_CH_LOW_FREQUENCY, 379 .syn_ele = TYPE_LFE, 380 .elem_id = layout_map[i][1], 381 .aac_position = AAC_CHANNEL_LFE 382 }; 383 layout |= e2c_vec[i].av_position; 384 i++; 385 } 386 if (i < tags && layout_map[i][2] == AAC_CHANNEL_LFE) { 387 e2c_vec[i] = (struct elem_to_channel) { 388 .av_position = AV_CH_LOW_FREQUENCY_2, 389 .syn_ele = TYPE_LFE, 390 .elem_id = layout_map[i][1], 391 .aac_position = AAC_CHANNEL_LFE 392 }; 393 layout |= e2c_vec[i].av_position; 394 i++; 395 } 396 while (i < tags && layout_map[i][2] == AAC_CHANNEL_LFE) { 397 e2c_vec[i] = (struct elem_to_channel) { 398 .av_position = UINT64_MAX, 399 .syn_ele = TYPE_LFE, 400 .elem_id = layout_map[i][1], 401 .aac_position = AAC_CHANNEL_LFE 402 }; 403 i++; 404 } 405 406 // The previous checks would end up at 8 at this point for 22.2 407 if (layout == PREFIX_FOR_22POINT2 && tags == 16 && i == 8) { 408 const uint8_t (*reference_layout_map)[3] = aac_channel_layout_map[12]; 409 for (int j = 0; j < tags; j++) { 410 if (layout_map[j][0] != reference_layout_map[j][0] || 411 layout_map[j][2] != reference_layout_map[j][2]) 412 goto end_of_layout_definition; 413 } 414 415 e2c_vec[i] = (struct elem_to_channel) { 416 .av_position = AV_CH_TOP_FRONT_CENTER, 417 .syn_ele = layout_map[i][0], 418 .elem_id = layout_map[i][1], 419 .aac_position = layout_map[i][2] 420 }; layout |= e2c_vec[i].av_position; i++; 421 i += assign_pair(e2c_vec, layout_map, i, 422 AV_CH_TOP_FRONT_LEFT, 423 AV_CH_TOP_FRONT_RIGHT, 424 AAC_CHANNEL_FRONT, 425 &layout); 426 i += assign_pair(e2c_vec, layout_map, i, 427 AV_CH_TOP_SIDE_LEFT, 428 AV_CH_TOP_SIDE_RIGHT, 429 AAC_CHANNEL_SIDE, 430 &layout); 431 e2c_vec[i] = (struct elem_to_channel) { 432 .av_position = AV_CH_TOP_CENTER, 433 .syn_ele = layout_map[i][0], 434 .elem_id = layout_map[i][1], 435 .aac_position = layout_map[i][2] 436 }; layout |= e2c_vec[i].av_position; i++; 437 i += assign_pair(e2c_vec, layout_map, i, 438 AV_CH_TOP_BACK_LEFT, 439 AV_CH_TOP_BACK_RIGHT, 440 AAC_CHANNEL_BACK, 441 &layout); 442 e2c_vec[i] = (struct elem_to_channel) { 443 .av_position = AV_CH_TOP_BACK_CENTER, 444 .syn_ele = layout_map[i][0], 445 .elem_id = layout_map[i][1], 446 .aac_position = layout_map[i][2] 447 }; layout |= e2c_vec[i].av_position; i++; 448 e2c_vec[i] = (struct elem_to_channel) { 449 .av_position = AV_CH_BOTTOM_FRONT_CENTER, 450 .syn_ele = layout_map[i][0], 451 .elem_id = layout_map[i][1], 452 .aac_position = layout_map[i][2] 453 }; layout |= e2c_vec[i].av_position; i++; 454 i += assign_pair(e2c_vec, layout_map, i, 455 AV_CH_BOTTOM_FRONT_LEFT, 456 AV_CH_BOTTOM_FRONT_RIGHT, 457 AAC_CHANNEL_FRONT, 458 &layout); 459 } 460 461end_of_layout_definition: 462 463 total_non_cc_elements = n = i; 464 465 if (layout == AV_CH_LAYOUT_22POINT2) { 466 // For 22.2 reorder the result as needed 467 FFSWAP(struct elem_to_channel, e2c_vec[2], e2c_vec[0]); // FL & FR first (final), FC third 468 FFSWAP(struct elem_to_channel, e2c_vec[2], e2c_vec[1]); // FC second (final), FLc & FRc third 469 FFSWAP(struct elem_to_channel, e2c_vec[6], e2c_vec[2]); // LFE1 third (final), FLc & FRc seventh 470 FFSWAP(struct elem_to_channel, e2c_vec[4], e2c_vec[3]); // BL & BR fourth (final), SiL & SiR fifth 471 FFSWAP(struct elem_to_channel, e2c_vec[6], e2c_vec[4]); // FLc & FRc fifth (final), SiL & SiR seventh 472 FFSWAP(struct elem_to_channel, e2c_vec[7], e2c_vec[6]); // LFE2 seventh (final), SiL & SiR eight (final) 473 FFSWAP(struct elem_to_channel, e2c_vec[9], e2c_vec[8]); // TpFL & TpFR ninth (final), TFC tenth (final) 474 FFSWAP(struct elem_to_channel, e2c_vec[11], e2c_vec[10]); // TC eleventh (final), TpSiL & TpSiR twelth 475 FFSWAP(struct elem_to_channel, e2c_vec[12], e2c_vec[11]); // TpBL & TpBR twelth (final), TpSiL & TpSiR thirteenth (final) 476 } else { 477 // For everything else, utilize the AV channel position define as a 478 // stable sort. 479 do { 480 int next_n = 0; 481 for (i = 1; i < n; i++) 482 if (e2c_vec[i - 1].av_position > e2c_vec[i].av_position) { 483 FFSWAP(struct elem_to_channel, e2c_vec[i - 1], e2c_vec[i]); 484 next_n = i; 485 } 486 n = next_n; 487 } while (n > 0); 488 489 } 490 491 for (i = 0; i < total_non_cc_elements; i++) { 492 layout_map[i][0] = e2c_vec[i].syn_ele; 493 layout_map[i][1] = e2c_vec[i].elem_id; 494 layout_map[i][2] = e2c_vec[i].aac_position; 495 } 496 497 return layout; 498} 499 500/** 501 * Save current output configuration if and only if it has been locked. 502 */ 503static int push_output_configuration(AACContext *ac) { 504 int pushed = 0; 505 506 if (ac->oc[1].status == OC_LOCKED || ac->oc[0].status == OC_NONE) { 507 ac->oc[0] = ac->oc[1]; 508 pushed = 1; 509 } 510 ac->oc[1].status = OC_NONE; 511 return pushed; 512} 513 514/** 515 * Restore the previous output configuration if and only if the current 516 * configuration is unlocked. 517 */ 518static void pop_output_configuration(AACContext *ac) { 519 if (ac->oc[1].status != OC_LOCKED && ac->oc[0].status != OC_NONE) { 520 ac->oc[1] = ac->oc[0]; 521 ac->avctx->ch_layout = ac->oc[1].ch_layout; 522 output_configure(ac, ac->oc[1].layout_map, ac->oc[1].layout_map_tags, 523 ac->oc[1].status, 0); 524 } 525} 526 527/** 528 * Configure output channel order based on the current program 529 * configuration element. 530 * 531 * @return Returns error status. 0 - OK, !0 - error 532 */ 533static int output_configure(AACContext *ac, 534 uint8_t layout_map[MAX_ELEM_ID * 4][3], int tags, 535 enum OCStatus oc_type, int get_new_frame) 536{ 537 AVCodecContext *avctx = ac->avctx; 538 int i, channels = 0, ret; 539 uint64_t layout = 0; 540 uint8_t id_map[TYPE_END][MAX_ELEM_ID] = {{ 0 }}; 541 uint8_t type_counts[TYPE_END] = { 0 }; 542 543 if (ac->oc[1].layout_map != layout_map) { 544 memcpy(ac->oc[1].layout_map, layout_map, tags * sizeof(layout_map[0])); 545 ac->oc[1].layout_map_tags = tags; 546 } 547 for (i = 0; i < tags; i++) { 548 int type = layout_map[i][0]; 549 int id = layout_map[i][1]; 550 id_map[type][id] = type_counts[type]++; 551 if (id_map[type][id] >= MAX_ELEM_ID) { 552 avpriv_request_sample(ac->avctx, "Too large remapped id"); 553 return AVERROR_PATCHWELCOME; 554 } 555 } 556 // Try to sniff a reasonable channel order, otherwise output the 557 // channels in the order the PCE declared them. 558#if FF_API_OLD_CHANNEL_LAYOUT 559FF_DISABLE_DEPRECATION_WARNINGS 560 if (avctx->request_channel_layout == AV_CH_LAYOUT_NATIVE) 561 ac->output_channel_order = CHANNEL_ORDER_CODED; 562FF_ENABLE_DEPRECATION_WARNINGS 563#endif 564 565 if (ac->output_channel_order == CHANNEL_ORDER_DEFAULT) 566 layout = sniff_channel_order(layout_map, tags); 567 for (i = 0; i < tags; i++) { 568 int type = layout_map[i][0]; 569 int id = layout_map[i][1]; 570 int iid = id_map[type][id]; 571 int position = layout_map[i][2]; 572 // Allocate or free elements depending on if they are in the 573 // current program configuration. 574 ret = che_configure(ac, position, type, iid, &channels); 575 if (ret < 0) 576 return ret; 577 ac->tag_che_map[type][id] = ac->che[type][iid]; 578 } 579 if (ac->oc[1].m4ac.ps == 1 && channels == 2) { 580 if (layout == AV_CH_FRONT_CENTER) { 581 layout = AV_CH_FRONT_LEFT|AV_CH_FRONT_RIGHT; 582 } else { 583 layout = 0; 584 } 585 } 586 587 av_channel_layout_uninit(&ac->oc[1].ch_layout); 588 if (layout) 589 av_channel_layout_from_mask(&ac->oc[1].ch_layout, layout); 590 else { 591 ac->oc[1].ch_layout.order = AV_CHANNEL_ORDER_UNSPEC; 592 ac->oc[1].ch_layout.nb_channels = channels; 593 } 594 595 av_channel_layout_copy(&avctx->ch_layout, &ac->oc[1].ch_layout); 596 ac->oc[1].status = oc_type; 597 598 if (get_new_frame) { 599 if ((ret = frame_configure_elements(ac->avctx)) < 0) 600 return ret; 601 } 602 603 return 0; 604} 605 606static void flush(AVCodecContext *avctx) 607{ 608 AACContext *ac= avctx->priv_data; 609 int type, i, j; 610 611 for (type = 3; type >= 0; type--) { 612 for (i = 0; i < MAX_ELEM_ID; i++) { 613 ChannelElement *che = ac->che[type][i]; 614 if (che) { 615 for (j = 0; j <= 1; j++) { 616 memset(che->ch[j].saved, 0, sizeof(che->ch[j].saved)); 617 } 618 } 619 } 620 } 621} 622 623/** 624 * Set up channel positions based on a default channel configuration 625 * as specified in table 1.17. 626 * 627 * @return Returns error status. 0 - OK, !0 - error 628 */ 629static int set_default_channel_config(AACContext *ac, AVCodecContext *avctx, 630 uint8_t (*layout_map)[3], 631 int *tags, 632 int channel_config) 633{ 634 if (channel_config < 1 || (channel_config > 7 && channel_config < 11) || 635 channel_config > 13) { 636 av_log(avctx, AV_LOG_ERROR, 637 "invalid default channel configuration (%d)\n", 638 channel_config); 639 return AVERROR_INVALIDDATA; 640 } 641 *tags = tags_per_config[channel_config]; 642 memcpy(layout_map, aac_channel_layout_map[channel_config - 1], 643 *tags * sizeof(*layout_map)); 644 645 /* 646 * AAC specification has 7.1(wide) as a default layout for 8-channel streams. 647 * However, at least Nero AAC encoder encodes 7.1 streams using the default 648 * channel config 7, mapping the side channels of the original audio stream 649 * to the second AAC_CHANNEL_FRONT pair in the AAC stream. Similarly, e.g. FAAD 650 * decodes the second AAC_CHANNEL_FRONT pair as side channels, therefore decoding 651 * the incorrect streams as if they were correct (and as the encoder intended). 652 * 653 * As actual intended 7.1(wide) streams are very rare, default to assuming a 654 * 7.1 layout was intended. 655 */ 656 if (channel_config == 7 && avctx->strict_std_compliance < FF_COMPLIANCE_STRICT) { 657 layout_map[2][2] = AAC_CHANNEL_SIDE; 658 659 if (!ac || !ac->warned_71_wide++) { 660 av_log(avctx, AV_LOG_INFO, "Assuming an incorrectly encoded 7.1 channel layout" 661 " instead of a spec-compliant 7.1(wide) layout, use -strict %d to decode" 662 " according to the specification instead.\n", FF_COMPLIANCE_STRICT); 663 } 664 } 665 666 return 0; 667} 668 669static ChannelElement *get_che(AACContext *ac, int type, int elem_id) 670{ 671 /* For PCE based channel configurations map the channels solely based 672 * on tags. */ 673 if (!ac->oc[1].m4ac.chan_config) { 674 return ac->tag_che_map[type][elem_id]; 675 } 676 // Allow single CPE stereo files to be signalled with mono configuration. 677 if (!ac->tags_mapped && type == TYPE_CPE && 678 ac->oc[1].m4ac.chan_config == 1) { 679 uint8_t layout_map[MAX_ELEM_ID*4][3]; 680 int layout_map_tags; 681 push_output_configuration(ac); 682 683 av_log(ac->avctx, AV_LOG_DEBUG, "mono with CPE\n"); 684 685 if (set_default_channel_config(ac, ac->avctx, layout_map, 686 &layout_map_tags, 2) < 0) 687 return NULL; 688 if (output_configure(ac, layout_map, layout_map_tags, 689 OC_TRIAL_FRAME, 1) < 0) 690 return NULL; 691 692 ac->oc[1].m4ac.chan_config = 2; 693 ac->oc[1].m4ac.ps = 0; 694 } 695 // And vice-versa 696 if (!ac->tags_mapped && type == TYPE_SCE && 697 ac->oc[1].m4ac.chan_config == 2) { 698 uint8_t layout_map[MAX_ELEM_ID * 4][3]; 699 int layout_map_tags; 700 push_output_configuration(ac); 701 702 av_log(ac->avctx, AV_LOG_DEBUG, "stereo with SCE\n"); 703 704 layout_map_tags = 2; 705 layout_map[0][0] = layout_map[1][0] = TYPE_SCE; 706 layout_map[0][2] = layout_map[1][2] = AAC_CHANNEL_FRONT; 707 layout_map[0][1] = 0; 708 layout_map[1][1] = 1; 709 if (output_configure(ac, layout_map, layout_map_tags, 710 OC_TRIAL_FRAME, 1) < 0) 711 return NULL; 712 713 if (ac->oc[1].m4ac.sbr) 714 ac->oc[1].m4ac.ps = -1; 715 } 716 /* For indexed channel configurations map the channels solely based 717 * on position. */ 718 switch (ac->oc[1].m4ac.chan_config) { 719 case 13: 720 if (ac->tags_mapped > 3 && ((type == TYPE_CPE && elem_id < 8) || 721 (type == TYPE_SCE && elem_id < 6) || 722 (type == TYPE_LFE && elem_id < 2))) { 723 ac->tags_mapped++; 724 return ac->tag_che_map[type][elem_id] = ac->che[type][elem_id]; 725 } 726 case 12: 727 case 7: 728 if (ac->tags_mapped == 3 && type == TYPE_CPE) { 729 ac->tags_mapped++; 730 return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][2]; 731 } 732 case 11: 733 if (ac->tags_mapped == 3 && type == TYPE_SCE) { 734 ac->tags_mapped++; 735 return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][1]; 736 } 737 case 6: 738 /* Some streams incorrectly code 5.1 audio as 739 * SCE[0] CPE[0] CPE[1] SCE[1] 740 * instead of 741 * SCE[0] CPE[0] CPE[1] LFE[0]. 742 * If we seem to have encountered such a stream, transfer 743 * the LFE[0] element to the SCE[1]'s mapping */ 744 if (ac->tags_mapped == tags_per_config[ac->oc[1].m4ac.chan_config] - 1 && (type == TYPE_LFE || type == TYPE_SCE)) { 745 if (!ac->warned_remapping_once && (type != TYPE_LFE || elem_id != 0)) { 746 av_log(ac->avctx, AV_LOG_WARNING, 747 "This stream seems to incorrectly report its last channel as %s[%d], mapping to LFE[0]\n", 748 type == TYPE_SCE ? "SCE" : "LFE", elem_id); 749 ac->warned_remapping_once++; 750 } 751 ac->tags_mapped++; 752 return ac->tag_che_map[type][elem_id] = ac->che[TYPE_LFE][0]; 753 } 754 case 5: 755 if (ac->tags_mapped == 2 && type == TYPE_CPE) { 756 ac->tags_mapped++; 757 return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][1]; 758 } 759 case 4: 760 /* Some streams incorrectly code 4.0 audio as 761 * SCE[0] CPE[0] LFE[0] 762 * instead of 763 * SCE[0] CPE[0] SCE[1]. 764 * If we seem to have encountered such a stream, transfer 765 * the SCE[1] element to the LFE[0]'s mapping */ 766 if (ac->tags_mapped == tags_per_config[ac->oc[1].m4ac.chan_config] - 1 && (type == TYPE_LFE || type == TYPE_SCE)) { 767 if (!ac->warned_remapping_once && (type != TYPE_SCE || elem_id != 1)) { 768 av_log(ac->avctx, AV_LOG_WARNING, 769 "This stream seems to incorrectly report its last channel as %s[%d], mapping to SCE[1]\n", 770 type == TYPE_SCE ? "SCE" : "LFE", elem_id); 771 ac->warned_remapping_once++; 772 } 773 ac->tags_mapped++; 774 return ac->tag_che_map[type][elem_id] = ac->che[TYPE_SCE][1]; 775 } 776 if (ac->tags_mapped == 2 && 777 ac->oc[1].m4ac.chan_config == 4 && 778 type == TYPE_SCE) { 779 ac->tags_mapped++; 780 return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][1]; 781 } 782 case 3: 783 case 2: 784 if (ac->tags_mapped == (ac->oc[1].m4ac.chan_config != 2) && 785 type == TYPE_CPE) { 786 ac->tags_mapped++; 787 return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][0]; 788 } else if (ac->tags_mapped == 1 && ac->oc[1].m4ac.chan_config == 2 && 789 type == TYPE_SCE) { 790 ac->tags_mapped++; 791 return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][1]; 792 } 793 case 1: 794 if (!ac->tags_mapped && type == TYPE_SCE) { 795 ac->tags_mapped++; 796 return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][0]; 797 } 798 default: 799 return NULL; 800 } 801} 802 803/** 804 * Decode an array of 4 bit element IDs, optionally interleaved with a 805 * stereo/mono switching bit. 806 * 807 * @param type speaker type/position for these channels 808 */ 809static void decode_channel_map(uint8_t layout_map[][3], 810 enum ChannelPosition type, 811 GetBitContext *gb, int n) 812{ 813 while (n--) { 814 enum RawDataBlockType syn_ele; 815 switch (type) { 816 case AAC_CHANNEL_FRONT: 817 case AAC_CHANNEL_BACK: 818 case AAC_CHANNEL_SIDE: 819 syn_ele = get_bits1(gb); 820 break; 821 case AAC_CHANNEL_CC: 822 skip_bits1(gb); 823 syn_ele = TYPE_CCE; 824 break; 825 case AAC_CHANNEL_LFE: 826 syn_ele = TYPE_LFE; 827 break; 828 default: 829 // AAC_CHANNEL_OFF has no channel map 830 av_assert0(0); 831 } 832 layout_map[0][0] = syn_ele; 833 layout_map[0][1] = get_bits(gb, 4); 834 layout_map[0][2] = type; 835 layout_map++; 836 } 837} 838 839static inline void relative_align_get_bits(GetBitContext *gb, 840 int reference_position) { 841 int n = (reference_position - get_bits_count(gb) & 7); 842 if (n) 843 skip_bits(gb, n); 844} 845 846/** 847 * Decode program configuration element; reference: table 4.2. 848 * 849 * @return Returns error status. 0 - OK, !0 - error 850 */ 851static int decode_pce(AVCodecContext *avctx, MPEG4AudioConfig *m4ac, 852 uint8_t (*layout_map)[3], 853 GetBitContext *gb, int byte_align_ref) 854{ 855 int num_front, num_side, num_back, num_lfe, num_assoc_data, num_cc; 856 int sampling_index; 857 int comment_len; 858 int tags; 859 860 skip_bits(gb, 2); // object_type 861 862 sampling_index = get_bits(gb, 4); 863 if (m4ac->sampling_index != sampling_index) 864 av_log(avctx, AV_LOG_WARNING, 865 "Sample rate index in program config element does not " 866 "match the sample rate index configured by the container.\n"); 867 868 num_front = get_bits(gb, 4); 869 num_side = get_bits(gb, 4); 870 num_back = get_bits(gb, 4); 871 num_lfe = get_bits(gb, 2); 872 num_assoc_data = get_bits(gb, 3); 873 num_cc = get_bits(gb, 4); 874 875 if (get_bits1(gb)) 876 skip_bits(gb, 4); // mono_mixdown_tag 877 if (get_bits1(gb)) 878 skip_bits(gb, 4); // stereo_mixdown_tag 879 880 if (get_bits1(gb)) 881 skip_bits(gb, 3); // mixdown_coeff_index and pseudo_surround 882 883 if (get_bits_left(gb) < 5 * (num_front + num_side + num_back + num_cc) + 4 *(num_lfe + num_assoc_data + num_cc)) { 884 av_log(avctx, AV_LOG_ERROR, "decode_pce: " overread_err); 885 return -1; 886 } 887 decode_channel_map(layout_map , AAC_CHANNEL_FRONT, gb, num_front); 888 tags = num_front; 889 decode_channel_map(layout_map + tags, AAC_CHANNEL_SIDE, gb, num_side); 890 tags += num_side; 891 decode_channel_map(layout_map + tags, AAC_CHANNEL_BACK, gb, num_back); 892 tags += num_back; 893 decode_channel_map(layout_map + tags, AAC_CHANNEL_LFE, gb, num_lfe); 894 tags += num_lfe; 895 896 skip_bits_long(gb, 4 * num_assoc_data); 897 898 decode_channel_map(layout_map + tags, AAC_CHANNEL_CC, gb, num_cc); 899 tags += num_cc; 900 901 relative_align_get_bits(gb, byte_align_ref); 902 903 /* comment field, first byte is length */ 904 comment_len = get_bits(gb, 8) * 8; 905 if (get_bits_left(gb) < comment_len) { 906 av_log(avctx, AV_LOG_ERROR, "decode_pce: " overread_err); 907 return AVERROR_INVALIDDATA; 908 } 909 skip_bits_long(gb, comment_len); 910 return tags; 911} 912 913/** 914 * Decode GA "General Audio" specific configuration; reference: table 4.1. 915 * 916 * @param ac pointer to AACContext, may be null 917 * @param avctx pointer to AVCCodecContext, used for logging 918 * 919 * @return Returns error status. 0 - OK, !0 - error 920 */ 921static int decode_ga_specific_config(AACContext *ac, AVCodecContext *avctx, 922 GetBitContext *gb, 923 int get_bit_alignment, 924 MPEG4AudioConfig *m4ac, 925 int channel_config) 926{ 927 int extension_flag, ret, ep_config, res_flags; 928 uint8_t layout_map[MAX_ELEM_ID*4][3]; 929 int tags = 0; 930 931#if USE_FIXED 932 if (get_bits1(gb)) { // frameLengthFlag 933 avpriv_report_missing_feature(avctx, "Fixed point 960/120 MDCT window"); 934 return AVERROR_PATCHWELCOME; 935 } 936 m4ac->frame_length_short = 0; 937#else 938 m4ac->frame_length_short = get_bits1(gb); 939 if (m4ac->frame_length_short && m4ac->sbr == 1) { 940 avpriv_report_missing_feature(avctx, "SBR with 960 frame length"); 941 if (ac) ac->warned_960_sbr = 1; 942 m4ac->sbr = 0; 943 m4ac->ps = 0; 944 } 945#endif 946 947 if (get_bits1(gb)) // dependsOnCoreCoder 948 skip_bits(gb, 14); // coreCoderDelay 949 extension_flag = get_bits1(gb); 950 951 if (m4ac->object_type == AOT_AAC_SCALABLE || 952 m4ac->object_type == AOT_ER_AAC_SCALABLE) 953 skip_bits(gb, 3); // layerNr 954 955 if (channel_config == 0) { 956 skip_bits(gb, 4); // element_instance_tag 957 tags = decode_pce(avctx, m4ac, layout_map, gb, get_bit_alignment); 958 if (tags < 0) 959 return tags; 960 } else { 961 if ((ret = set_default_channel_config(ac, avctx, layout_map, 962 &tags, channel_config))) 963 return ret; 964 } 965 966 if (count_channels(layout_map, tags) > 1) { 967 m4ac->ps = 0; 968 } else if (m4ac->sbr == 1 && m4ac->ps == -1) 969 m4ac->ps = 1; 970 971 if (ac && (ret = output_configure(ac, layout_map, tags, OC_GLOBAL_HDR, 0))) 972 return ret; 973 974 if (extension_flag) { 975 switch (m4ac->object_type) { 976 case AOT_ER_BSAC: 977 skip_bits(gb, 5); // numOfSubFrame 978 skip_bits(gb, 11); // layer_length 979 break; 980 case AOT_ER_AAC_LC: 981 case AOT_ER_AAC_LTP: 982 case AOT_ER_AAC_SCALABLE: 983 case AOT_ER_AAC_LD: 984 res_flags = get_bits(gb, 3); 985 if (res_flags) { 986 avpriv_report_missing_feature(avctx, 987 "AAC data resilience (flags %x)", 988 res_flags); 989 return AVERROR_PATCHWELCOME; 990 } 991 break; 992 } 993 skip_bits1(gb); // extensionFlag3 (TBD in version 3) 994 } 995 switch (m4ac->object_type) { 996 case AOT_ER_AAC_LC: 997 case AOT_ER_AAC_LTP: 998 case AOT_ER_AAC_SCALABLE: 999 case AOT_ER_AAC_LD: 1000 ep_config = get_bits(gb, 2); 1001 if (ep_config) { 1002 avpriv_report_missing_feature(avctx, 1003 "epConfig %d", ep_config); 1004 return AVERROR_PATCHWELCOME; 1005 } 1006 } 1007 return 0; 1008} 1009 1010static int decode_eld_specific_config(AACContext *ac, AVCodecContext *avctx, 1011 GetBitContext *gb, 1012 MPEG4AudioConfig *m4ac, 1013 int channel_config) 1014{ 1015 int ret, ep_config, res_flags; 1016 uint8_t layout_map[MAX_ELEM_ID*4][3]; 1017 int tags = 0; 1018 const int ELDEXT_TERM = 0; 1019 1020 m4ac->ps = 0; 1021 m4ac->sbr = 0; 1022#if USE_FIXED 1023 if (get_bits1(gb)) { // frameLengthFlag 1024 avpriv_request_sample(avctx, "960/120 MDCT window"); 1025 return AVERROR_PATCHWELCOME; 1026 } 1027#else 1028 m4ac->frame_length_short = get_bits1(gb); 1029#endif 1030 res_flags = get_bits(gb, 3); 1031 if (res_flags) { 1032 avpriv_report_missing_feature(avctx, 1033 "AAC data resilience (flags %x)", 1034 res_flags); 1035 return AVERROR_PATCHWELCOME; 1036 } 1037 1038 if (get_bits1(gb)) { // ldSbrPresentFlag 1039 avpriv_report_missing_feature(avctx, 1040 "Low Delay SBR"); 1041 return AVERROR_PATCHWELCOME; 1042 } 1043 1044 while (get_bits(gb, 4) != ELDEXT_TERM) { 1045 int len = get_bits(gb, 4); 1046 if (len == 15) 1047 len += get_bits(gb, 8); 1048 if (len == 15 + 255) 1049 len += get_bits(gb, 16); 1050 if (get_bits_left(gb) < len * 8 + 4) { 1051 av_log(avctx, AV_LOG_ERROR, overread_err); 1052 return AVERROR_INVALIDDATA; 1053 } 1054 skip_bits_long(gb, 8 * len); 1055 } 1056 1057 if ((ret = set_default_channel_config(ac, avctx, layout_map, 1058 &tags, channel_config))) 1059 return ret; 1060 1061 if (ac && (ret = output_configure(ac, layout_map, tags, OC_GLOBAL_HDR, 0))) 1062 return ret; 1063 1064 ep_config = get_bits(gb, 2); 1065 if (ep_config) { 1066 avpriv_report_missing_feature(avctx, 1067 "epConfig %d", ep_config); 1068 return AVERROR_PATCHWELCOME; 1069 } 1070 return 0; 1071} 1072 1073/** 1074 * Decode audio specific configuration; reference: table 1.13. 1075 * 1076 * @param ac pointer to AACContext, may be null 1077 * @param avctx pointer to AVCCodecContext, used for logging 1078 * @param m4ac pointer to MPEG4AudioConfig, used for parsing 1079 * @param gb buffer holding an audio specific config 1080 * @param get_bit_alignment relative alignment for byte align operations 1081 * @param sync_extension look for an appended sync extension 1082 * 1083 * @return Returns error status or number of consumed bits. <0 - error 1084 */ 1085static int decode_audio_specific_config_gb(AACContext *ac, 1086 AVCodecContext *avctx, 1087 MPEG4AudioConfig *m4ac, 1088 GetBitContext *gb, 1089 int get_bit_alignment, 1090 int sync_extension) 1091{ 1092 int i, ret; 1093 GetBitContext gbc = *gb; 1094 MPEG4AudioConfig m4ac_bak = *m4ac; 1095 1096 if ((i = ff_mpeg4audio_get_config_gb(m4ac, &gbc, sync_extension, avctx)) < 0) { 1097 *m4ac = m4ac_bak; 1098 return AVERROR_INVALIDDATA; 1099 } 1100 1101 if (m4ac->sampling_index > 12) { 1102 av_log(avctx, AV_LOG_ERROR, 1103 "invalid sampling rate index %d\n", 1104 m4ac->sampling_index); 1105 *m4ac = m4ac_bak; 1106 return AVERROR_INVALIDDATA; 1107 } 1108 if (m4ac->object_type == AOT_ER_AAC_LD && 1109 (m4ac->sampling_index < 3 || m4ac->sampling_index > 7)) { 1110 av_log(avctx, AV_LOG_ERROR, 1111 "invalid low delay sampling rate index %d\n", 1112 m4ac->sampling_index); 1113 *m4ac = m4ac_bak; 1114 return AVERROR_INVALIDDATA; 1115 } 1116 1117 skip_bits_long(gb, i); 1118 1119 switch (m4ac->object_type) { 1120 case AOT_AAC_MAIN: 1121 case AOT_AAC_LC: 1122 case AOT_AAC_SSR: 1123 case AOT_AAC_LTP: 1124 case AOT_ER_AAC_LC: 1125 case AOT_ER_AAC_LD: 1126 if ((ret = decode_ga_specific_config(ac, avctx, gb, get_bit_alignment, 1127 m4ac, m4ac->chan_config)) < 0) 1128 return ret; 1129 break; 1130 case AOT_ER_AAC_ELD: 1131 if ((ret = decode_eld_specific_config(ac, avctx, gb, 1132 m4ac, m4ac->chan_config)) < 0) 1133 return ret; 1134 break; 1135 default: 1136 avpriv_report_missing_feature(avctx, 1137 "Audio object type %s%d", 1138 m4ac->sbr == 1 ? "SBR+" : "", 1139 m4ac->object_type); 1140 return AVERROR(ENOSYS); 1141 } 1142 1143 ff_dlog(avctx, 1144 "AOT %d chan config %d sampling index %d (%d) SBR %d PS %d\n", 1145 m4ac->object_type, m4ac->chan_config, m4ac->sampling_index, 1146 m4ac->sample_rate, m4ac->sbr, 1147 m4ac->ps); 1148 1149 return get_bits_count(gb); 1150} 1151 1152static int decode_audio_specific_config(AACContext *ac, 1153 AVCodecContext *avctx, 1154 MPEG4AudioConfig *m4ac, 1155 const uint8_t *data, int64_t bit_size, 1156 int sync_extension) 1157{ 1158 int i, ret; 1159 GetBitContext gb; 1160 1161 if (bit_size < 0 || bit_size > INT_MAX) { 1162 av_log(avctx, AV_LOG_ERROR, "Audio specific config size is invalid\n"); 1163 return AVERROR_INVALIDDATA; 1164 } 1165 1166 ff_dlog(avctx, "audio specific config size %d\n", (int)bit_size >> 3); 1167 for (i = 0; i < bit_size >> 3; i++) 1168 ff_dlog(avctx, "%02x ", data[i]); 1169 ff_dlog(avctx, "\n"); 1170 1171 if ((ret = init_get_bits(&gb, data, bit_size)) < 0) 1172 return ret; 1173 1174 return decode_audio_specific_config_gb(ac, avctx, m4ac, &gb, 0, 1175 sync_extension); 1176} 1177 1178/** 1179 * linear congruential pseudorandom number generator 1180 * 1181 * @param previous_val pointer to the current state of the generator 1182 * 1183 * @return Returns a 32-bit pseudorandom integer 1184 */ 1185static av_always_inline int lcg_random(unsigned previous_val) 1186{ 1187 union { unsigned u; int s; } v = { previous_val * 1664525u + 1013904223 }; 1188 return v.s; 1189} 1190 1191static void reset_all_predictors(PredictorState *ps) 1192{ 1193 int i; 1194 for (i = 0; i < MAX_PREDICTORS; i++) 1195 reset_predict_state(&ps[i]); 1196} 1197 1198static int sample_rate_idx (int rate) 1199{ 1200 if (92017 <= rate) return 0; 1201 else if (75132 <= rate) return 1; 1202 else if (55426 <= rate) return 2; 1203 else if (46009 <= rate) return 3; 1204 else if (37566 <= rate) return 4; 1205 else if (27713 <= rate) return 5; 1206 else if (23004 <= rate) return 6; 1207 else if (18783 <= rate) return 7; 1208 else if (13856 <= rate) return 8; 1209 else if (11502 <= rate) return 9; 1210 else if (9391 <= rate) return 10; 1211 else return 11; 1212} 1213 1214static void reset_predictor_group(PredictorState *ps, int group_num) 1215{ 1216 int i; 1217 for (i = group_num - 1; i < MAX_PREDICTORS; i += 30) 1218 reset_predict_state(&ps[i]); 1219} 1220 1221static void aacdec_init(AACContext *ac); 1222 1223static av_cold void aac_static_table_init(void) 1224{ 1225 static VLCElem vlc_buf[304 + 270 + 550 + 300 + 328 + 1226 294 + 306 + 268 + 510 + 366 + 462]; 1227 for (unsigned i = 0, offset = 0; i < 11; i++) { 1228 vlc_spectral[i].table = &vlc_buf[offset]; 1229 vlc_spectral[i].table_allocated = FF_ARRAY_ELEMS(vlc_buf) - offset; 1230 ff_init_vlc_sparse(&vlc_spectral[i], 8, ff_aac_spectral_sizes[i], 1231 ff_aac_spectral_bits[i], sizeof(ff_aac_spectral_bits[i][0]), 1232 sizeof(ff_aac_spectral_bits[i][0]), 1233 ff_aac_spectral_codes[i], sizeof(ff_aac_spectral_codes[i][0]), 1234 sizeof(ff_aac_spectral_codes[i][0]), 1235 ff_aac_codebook_vector_idx[i], sizeof(ff_aac_codebook_vector_idx[i][0]), 1236 sizeof(ff_aac_codebook_vector_idx[i][0]), 1237 INIT_VLC_STATIC_OVERLONG); 1238 offset += vlc_spectral[i].table_size; 1239 } 1240 1241 AAC_RENAME(ff_aac_sbr_init)(); 1242 1243 ff_aac_tableinit(); 1244 1245 INIT_VLC_STATIC(&vlc_scalefactors, 7, 1246 FF_ARRAY_ELEMS(ff_aac_scalefactor_code), 1247 ff_aac_scalefactor_bits, 1248 sizeof(ff_aac_scalefactor_bits[0]), 1249 sizeof(ff_aac_scalefactor_bits[0]), 1250 ff_aac_scalefactor_code, 1251 sizeof(ff_aac_scalefactor_code[0]), 1252 sizeof(ff_aac_scalefactor_code[0]), 1253 352); 1254 1255 // window initialization 1256#if !USE_FIXED 1257 AAC_RENAME(ff_kbd_window_init)(AAC_RENAME(aac_kbd_long_960), 4.0, 960); 1258 AAC_RENAME(ff_kbd_window_init)(AAC_RENAME(aac_kbd_short_120), 6.0, 120); 1259 AAC_RENAME(ff_sine_window_init)(AAC_RENAME(sine_960), 960); 1260 AAC_RENAME(ff_sine_window_init)(AAC_RENAME(sine_120), 120); 1261 AAC_RENAME(ff_init_ff_sine_windows)(9); 1262 ff_aac_float_common_init(); 1263#else 1264 AAC_RENAME(ff_kbd_window_init)(AAC_RENAME2(aac_kbd_long_1024), 4.0, 1024); 1265 AAC_RENAME(ff_kbd_window_init)(AAC_RENAME2(aac_kbd_short_128), 6.0, 128); 1266 init_sine_windows_fixed(); 1267#endif 1268 1269 AAC_RENAME(ff_cbrt_tableinit)(); 1270} 1271 1272static AVOnce aac_table_init = AV_ONCE_INIT; 1273 1274static av_cold int aac_decode_init(AVCodecContext *avctx) 1275{ 1276 AACContext *ac = avctx->priv_data; 1277 int ret; 1278 1279 if (avctx->sample_rate > 96000) 1280 return AVERROR_INVALIDDATA; 1281 1282 ret = ff_thread_once(&aac_table_init, &aac_static_table_init); 1283 if (ret != 0) 1284 return AVERROR_UNKNOWN; 1285 1286 ac->avctx = avctx; 1287 ac->oc[1].m4ac.sample_rate = avctx->sample_rate; 1288 1289 aacdec_init(ac); 1290#if USE_FIXED 1291 avctx->sample_fmt = AV_SAMPLE_FMT_S32P; 1292#else 1293 avctx->sample_fmt = AV_SAMPLE_FMT_FLTP; 1294#endif /* USE_FIXED */ 1295 1296 if (avctx->extradata_size > 0) { 1297 if ((ret = decode_audio_specific_config(ac, ac->avctx, &ac->oc[1].m4ac, 1298 avctx->extradata, 1299 avctx->extradata_size * 8LL, 1300 1)) < 0) 1301 return ret; 1302 } else { 1303 int sr, i; 1304 uint8_t layout_map[MAX_ELEM_ID*4][3]; 1305 int layout_map_tags; 1306 1307 sr = sample_rate_idx(avctx->sample_rate); 1308 ac->oc[1].m4ac.sampling_index = sr; 1309 ac->oc[1].m4ac.channels = avctx->ch_layout.nb_channels; 1310 ac->oc[1].m4ac.sbr = -1; 1311 ac->oc[1].m4ac.ps = -1; 1312 1313 for (i = 0; i < FF_ARRAY_ELEMS(ff_mpeg4audio_channels); i++) 1314 if (ff_mpeg4audio_channels[i] == avctx->ch_layout.nb_channels) 1315 break; 1316 if (i == FF_ARRAY_ELEMS(ff_mpeg4audio_channels)) { 1317 i = 0; 1318 } 1319 ac->oc[1].m4ac.chan_config = i; 1320 1321 if (ac->oc[1].m4ac.chan_config) { 1322 int ret = set_default_channel_config(ac, avctx, layout_map, 1323 &layout_map_tags, ac->oc[1].m4ac.chan_config); 1324 if (!ret) 1325 output_configure(ac, layout_map, layout_map_tags, 1326 OC_GLOBAL_HDR, 0); 1327 else if (avctx->err_recognition & AV_EF_EXPLODE) 1328 return AVERROR_INVALIDDATA; 1329 } 1330 } 1331 1332 if (avctx->ch_layout.nb_channels > MAX_CHANNELS) { 1333 av_log(avctx, AV_LOG_ERROR, "Too many channels\n"); 1334 return AVERROR_INVALIDDATA; 1335 } 1336 1337#if USE_FIXED 1338 ac->fdsp = avpriv_alloc_fixed_dsp(avctx->flags & AV_CODEC_FLAG_BITEXACT); 1339#else 1340 ac->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT); 1341#endif /* USE_FIXED */ 1342 if (!ac->fdsp) { 1343 return AVERROR(ENOMEM); 1344 } 1345 1346 ac->random_state = 0x1f2e3d4c; 1347 1348 AAC_RENAME_32(ff_mdct_init)(&ac->mdct, 11, 1, 1.0 / RANGE15(1024.0)); 1349 AAC_RENAME_32(ff_mdct_init)(&ac->mdct_ld, 10, 1, 1.0 / RANGE15(512.0)); 1350 AAC_RENAME_32(ff_mdct_init)(&ac->mdct_small, 8, 1, 1.0 / RANGE15(128.0)); 1351 AAC_RENAME_32(ff_mdct_init)(&ac->mdct_ltp, 11, 0, RANGE15(-2.0)); 1352#if !USE_FIXED 1353 ret = ff_mdct15_init(&ac->mdct120, 1, 3, 1.0f/(16*1024*120*2)); 1354 if (ret < 0) 1355 return ret; 1356 ret = ff_mdct15_init(&ac->mdct480, 1, 5, 1.0f/(16*1024*960)); 1357 if (ret < 0) 1358 return ret; 1359 ret = ff_mdct15_init(&ac->mdct960, 1, 6, 1.0f/(16*1024*960*2)); 1360 if (ret < 0) 1361 return ret; 1362#endif 1363 1364 return 0; 1365} 1366 1367/** 1368 * Skip data_stream_element; reference: table 4.10. 1369 */ 1370static int skip_data_stream_element(AACContext *ac, GetBitContext *gb) 1371{ 1372 int byte_align = get_bits1(gb); 1373 int count = get_bits(gb, 8); 1374 if (count == 255) 1375 count += get_bits(gb, 8); 1376 if (byte_align) 1377 align_get_bits(gb); 1378 1379 if (get_bits_left(gb) < 8 * count) { 1380 av_log(ac->avctx, AV_LOG_ERROR, "skip_data_stream_element: "overread_err); 1381 return AVERROR_INVALIDDATA; 1382 } 1383 skip_bits_long(gb, 8 * count); 1384 return 0; 1385} 1386 1387static int decode_prediction(AACContext *ac, IndividualChannelStream *ics, 1388 GetBitContext *gb) 1389{ 1390 int sfb; 1391 if (get_bits1(gb)) { 1392 ics->predictor_reset_group = get_bits(gb, 5); 1393 if (ics->predictor_reset_group == 0 || 1394 ics->predictor_reset_group > 30) { 1395 av_log(ac->avctx, AV_LOG_ERROR, 1396 "Invalid Predictor Reset Group.\n"); 1397 return AVERROR_INVALIDDATA; 1398 } 1399 } 1400 for (sfb = 0; sfb < FFMIN(ics->max_sfb, ff_aac_pred_sfb_max[ac->oc[1].m4ac.sampling_index]); sfb++) { 1401 ics->prediction_used[sfb] = get_bits1(gb); 1402 } 1403 return 0; 1404} 1405 1406/** 1407 * Decode Long Term Prediction data; reference: table 4.xx. 1408 */ 1409static void decode_ltp(LongTermPrediction *ltp, 1410 GetBitContext *gb, uint8_t max_sfb) 1411{ 1412 int sfb; 1413 1414 ltp->lag = get_bits(gb, 11); 1415 ltp->coef = ltp_coef[get_bits(gb, 3)]; 1416 for (sfb = 0; sfb < FFMIN(max_sfb, MAX_LTP_LONG_SFB); sfb++) 1417 ltp->used[sfb] = get_bits1(gb); 1418} 1419 1420/** 1421 * Decode Individual Channel Stream info; reference: table 4.6. 1422 */ 1423static int decode_ics_info(AACContext *ac, IndividualChannelStream *ics, 1424 GetBitContext *gb) 1425{ 1426 const MPEG4AudioConfig *const m4ac = &ac->oc[1].m4ac; 1427 const int aot = m4ac->object_type; 1428 const int sampling_index = m4ac->sampling_index; 1429 int ret_fail = AVERROR_INVALIDDATA; 1430 1431 if (aot != AOT_ER_AAC_ELD) { 1432 if (get_bits1(gb)) { 1433 av_log(ac->avctx, AV_LOG_ERROR, "Reserved bit set.\n"); 1434 if (ac->avctx->err_recognition & AV_EF_BITSTREAM) 1435 return AVERROR_INVALIDDATA; 1436 } 1437 ics->window_sequence[1] = ics->window_sequence[0]; 1438 ics->window_sequence[0] = get_bits(gb, 2); 1439 if (aot == AOT_ER_AAC_LD && 1440 ics->window_sequence[0] != ONLY_LONG_SEQUENCE) { 1441 av_log(ac->avctx, AV_LOG_ERROR, 1442 "AAC LD is only defined for ONLY_LONG_SEQUENCE but " 1443 "window sequence %d found.\n", ics->window_sequence[0]); 1444 ics->window_sequence[0] = ONLY_LONG_SEQUENCE; 1445 return AVERROR_INVALIDDATA; 1446 } 1447 ics->use_kb_window[1] = ics->use_kb_window[0]; 1448 ics->use_kb_window[0] = get_bits1(gb); 1449 } 1450 ics->num_window_groups = 1; 1451 ics->group_len[0] = 1; 1452 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { 1453 int i; 1454 ics->max_sfb = get_bits(gb, 4); 1455 for (i = 0; i < 7; i++) { 1456 if (get_bits1(gb)) { 1457 ics->group_len[ics->num_window_groups - 1]++; 1458 } else { 1459 ics->num_window_groups++; 1460 ics->group_len[ics->num_window_groups - 1] = 1; 1461 } 1462 } 1463 ics->num_windows = 8; 1464 if (m4ac->frame_length_short) { 1465 ics->swb_offset = ff_swb_offset_120[sampling_index]; 1466 ics->num_swb = ff_aac_num_swb_120[sampling_index]; 1467 } else { 1468 ics->swb_offset = ff_swb_offset_128[sampling_index]; 1469 ics->num_swb = ff_aac_num_swb_128[sampling_index]; 1470 } 1471 ics->tns_max_bands = ff_tns_max_bands_128[sampling_index]; 1472 ics->predictor_present = 0; 1473 } else { 1474 ics->max_sfb = get_bits(gb, 6); 1475 ics->num_windows = 1; 1476 if (aot == AOT_ER_AAC_LD || aot == AOT_ER_AAC_ELD) { 1477 if (m4ac->frame_length_short) { 1478 ics->swb_offset = ff_swb_offset_480[sampling_index]; 1479 ics->num_swb = ff_aac_num_swb_480[sampling_index]; 1480 ics->tns_max_bands = ff_tns_max_bands_480[sampling_index]; 1481 } else { 1482 ics->swb_offset = ff_swb_offset_512[sampling_index]; 1483 ics->num_swb = ff_aac_num_swb_512[sampling_index]; 1484 ics->tns_max_bands = ff_tns_max_bands_512[sampling_index]; 1485 } 1486 if (!ics->num_swb || !ics->swb_offset) { 1487 ret_fail = AVERROR_BUG; 1488 goto fail; 1489 } 1490 } else { 1491 if (m4ac->frame_length_short) { 1492 ics->num_swb = ff_aac_num_swb_960[sampling_index]; 1493 ics->swb_offset = ff_swb_offset_960[sampling_index]; 1494 } else { 1495 ics->num_swb = ff_aac_num_swb_1024[sampling_index]; 1496 ics->swb_offset = ff_swb_offset_1024[sampling_index]; 1497 } 1498 ics->tns_max_bands = ff_tns_max_bands_1024[sampling_index]; 1499 } 1500 if (aot != AOT_ER_AAC_ELD) { 1501 ics->predictor_present = get_bits1(gb); 1502 ics->predictor_reset_group = 0; 1503 } 1504 if (ics->predictor_present) { 1505 if (aot == AOT_AAC_MAIN) { 1506 if (decode_prediction(ac, ics, gb)) { 1507 goto fail; 1508 } 1509 } else if (aot == AOT_AAC_LC || 1510 aot == AOT_ER_AAC_LC) { 1511 av_log(ac->avctx, AV_LOG_ERROR, 1512 "Prediction is not allowed in AAC-LC.\n"); 1513 goto fail; 1514 } else { 1515 if (aot == AOT_ER_AAC_LD) { 1516 av_log(ac->avctx, AV_LOG_ERROR, 1517 "LTP in ER AAC LD not yet implemented.\n"); 1518 ret_fail = AVERROR_PATCHWELCOME; 1519 goto fail; 1520 } 1521 if ((ics->ltp.present = get_bits(gb, 1))) 1522 decode_ltp(&ics->ltp, gb, ics->max_sfb); 1523 } 1524 } 1525 } 1526 1527 if (ics->max_sfb > ics->num_swb) { 1528 av_log(ac->avctx, AV_LOG_ERROR, 1529 "Number of scalefactor bands in group (%d) " 1530 "exceeds limit (%d).\n", 1531 ics->max_sfb, ics->num_swb); 1532 goto fail; 1533 } 1534 1535 return 0; 1536fail: 1537 ics->max_sfb = 0; 1538 return ret_fail; 1539} 1540 1541/** 1542 * Decode band types (section_data payload); reference: table 4.46. 1543 * 1544 * @param band_type array of the used band type 1545 * @param band_type_run_end array of the last scalefactor band of a band type run 1546 * 1547 * @return Returns error status. 0 - OK, !0 - error 1548 */ 1549static int decode_band_types(AACContext *ac, enum BandType band_type[120], 1550 int band_type_run_end[120], GetBitContext *gb, 1551 IndividualChannelStream *ics) 1552{ 1553 int g, idx = 0; 1554 const int bits = (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) ? 3 : 5; 1555 for (g = 0; g < ics->num_window_groups; g++) { 1556 int k = 0; 1557 while (k < ics->max_sfb) { 1558 uint8_t sect_end = k; 1559 int sect_len_incr; 1560 int sect_band_type = get_bits(gb, 4); 1561 if (sect_band_type == 12) { 1562 av_log(ac->avctx, AV_LOG_ERROR, "invalid band type\n"); 1563 return AVERROR_INVALIDDATA; 1564 } 1565 do { 1566 sect_len_incr = get_bits(gb, bits); 1567 sect_end += sect_len_incr; 1568 if (get_bits_left(gb) < 0) { 1569 av_log(ac->avctx, AV_LOG_ERROR, "decode_band_types: "overread_err); 1570 return AVERROR_INVALIDDATA; 1571 } 1572 if (sect_end > ics->max_sfb) { 1573 av_log(ac->avctx, AV_LOG_ERROR, 1574 "Number of bands (%d) exceeds limit (%d).\n", 1575 sect_end, ics->max_sfb); 1576 return AVERROR_INVALIDDATA; 1577 } 1578 } while (sect_len_incr == (1 << bits) - 1); 1579 for (; k < sect_end; k++) { 1580 band_type [idx] = sect_band_type; 1581 band_type_run_end[idx++] = sect_end; 1582 } 1583 } 1584 } 1585 return 0; 1586} 1587 1588/** 1589 * Decode scalefactors; reference: table 4.47. 1590 * 1591 * @param global_gain first scalefactor value as scalefactors are differentially coded 1592 * @param band_type array of the used band type 1593 * @param band_type_run_end array of the last scalefactor band of a band type run 1594 * @param sf array of scalefactors or intensity stereo positions 1595 * 1596 * @return Returns error status. 0 - OK, !0 - error 1597 */ 1598static int decode_scalefactors(AACContext *ac, INTFLOAT sf[120], GetBitContext *gb, 1599 unsigned int global_gain, 1600 IndividualChannelStream *ics, 1601 enum BandType band_type[120], 1602 int band_type_run_end[120]) 1603{ 1604 int g, i, idx = 0; 1605 int offset[3] = { global_gain, global_gain - NOISE_OFFSET, 0 }; 1606 int clipped_offset; 1607 int noise_flag = 1; 1608 for (g = 0; g < ics->num_window_groups; g++) { 1609 for (i = 0; i < ics->max_sfb;) { 1610 int run_end = band_type_run_end[idx]; 1611 if (band_type[idx] == ZERO_BT) { 1612 for (; i < run_end; i++, idx++) 1613 sf[idx] = FIXR(0.); 1614 } else if ((band_type[idx] == INTENSITY_BT) || 1615 (band_type[idx] == INTENSITY_BT2)) { 1616 for (; i < run_end; i++, idx++) { 1617 offset[2] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - SCALE_DIFF_ZERO; 1618 clipped_offset = av_clip(offset[2], -155, 100); 1619 if (offset[2] != clipped_offset) { 1620 avpriv_request_sample(ac->avctx, 1621 "If you heard an audible artifact, there may be a bug in the decoder. " 1622 "Clipped intensity stereo position (%d -> %d)", 1623 offset[2], clipped_offset); 1624 } 1625#if USE_FIXED 1626 sf[idx] = 100 - clipped_offset; 1627#else 1628 sf[idx] = ff_aac_pow2sf_tab[-clipped_offset + POW_SF2_ZERO]; 1629#endif /* USE_FIXED */ 1630 } 1631 } else if (band_type[idx] == NOISE_BT) { 1632 for (; i < run_end; i++, idx++) { 1633 if (noise_flag-- > 0) 1634 offset[1] += get_bits(gb, NOISE_PRE_BITS) - NOISE_PRE; 1635 else 1636 offset[1] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - SCALE_DIFF_ZERO; 1637 clipped_offset = av_clip(offset[1], -100, 155); 1638 if (offset[1] != clipped_offset) { 1639 avpriv_request_sample(ac->avctx, 1640 "If you heard an audible artifact, there may be a bug in the decoder. " 1641 "Clipped noise gain (%d -> %d)", 1642 offset[1], clipped_offset); 1643 } 1644#if USE_FIXED 1645 sf[idx] = -(100 + clipped_offset); 1646#else 1647 sf[idx] = -ff_aac_pow2sf_tab[clipped_offset + POW_SF2_ZERO]; 1648#endif /* USE_FIXED */ 1649 } 1650 } else { 1651 for (; i < run_end; i++, idx++) { 1652 offset[0] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - SCALE_DIFF_ZERO; 1653 if (offset[0] > 255U) { 1654 av_log(ac->avctx, AV_LOG_ERROR, 1655 "Scalefactor (%d) out of range.\n", offset[0]); 1656 return AVERROR_INVALIDDATA; 1657 } 1658#if USE_FIXED 1659 sf[idx] = -offset[0]; 1660#else 1661 sf[idx] = -ff_aac_pow2sf_tab[offset[0] - 100 + POW_SF2_ZERO]; 1662#endif /* USE_FIXED */ 1663 } 1664 } 1665 } 1666 } 1667 return 0; 1668} 1669 1670/** 1671 * Decode pulse data; reference: table 4.7. 1672 */ 1673static int decode_pulses(Pulse *pulse, GetBitContext *gb, 1674 const uint16_t *swb_offset, int num_swb) 1675{ 1676 int i, pulse_swb; 1677 pulse->num_pulse = get_bits(gb, 2) + 1; 1678 pulse_swb = get_bits(gb, 6); 1679 if (pulse_swb >= num_swb) 1680 return -1; 1681 pulse->pos[0] = swb_offset[pulse_swb]; 1682 pulse->pos[0] += get_bits(gb, 5); 1683 if (pulse->pos[0] >= swb_offset[num_swb]) 1684 return -1; 1685 pulse->amp[0] = get_bits(gb, 4); 1686 for (i = 1; i < pulse->num_pulse; i++) { 1687 pulse->pos[i] = get_bits(gb, 5) + pulse->pos[i - 1]; 1688 if (pulse->pos[i] >= swb_offset[num_swb]) 1689 return -1; 1690 pulse->amp[i] = get_bits(gb, 4); 1691 } 1692 return 0; 1693} 1694 1695/** 1696 * Decode Temporal Noise Shaping data; reference: table 4.48. 1697 * 1698 * @return Returns error status. 0 - OK, !0 - error 1699 */ 1700static int decode_tns(AACContext *ac, TemporalNoiseShaping *tns, 1701 GetBitContext *gb, const IndividualChannelStream *ics) 1702{ 1703 int w, filt, i, coef_len, coef_res, coef_compress; 1704 const int is8 = ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE; 1705 const int tns_max_order = is8 ? 7 : ac->oc[1].m4ac.object_type == AOT_AAC_MAIN ? 20 : 12; 1706 for (w = 0; w < ics->num_windows; w++) { 1707 if ((tns->n_filt[w] = get_bits(gb, 2 - is8))) { 1708 coef_res = get_bits1(gb); 1709 1710 for (filt = 0; filt < tns->n_filt[w]; filt++) { 1711 int tmp2_idx; 1712 tns->length[w][filt] = get_bits(gb, 6 - 2 * is8); 1713 1714 if ((tns->order[w][filt] = get_bits(gb, 5 - 2 * is8)) > tns_max_order) { 1715 av_log(ac->avctx, AV_LOG_ERROR, 1716 "TNS filter order %d is greater than maximum %d.\n", 1717 tns->order[w][filt], tns_max_order); 1718 tns->order[w][filt] = 0; 1719 return AVERROR_INVALIDDATA; 1720 } 1721 if (tns->order[w][filt]) { 1722 tns->direction[w][filt] = get_bits1(gb); 1723 coef_compress = get_bits1(gb); 1724 coef_len = coef_res + 3 - coef_compress; 1725 tmp2_idx = 2 * coef_compress + coef_res; 1726 1727 for (i = 0; i < tns->order[w][filt]; i++) 1728 tns->coef[w][filt][i] = tns_tmp2_map[tmp2_idx][get_bits(gb, coef_len)]; 1729 } 1730 } 1731 } 1732 } 1733 return 0; 1734} 1735 1736/** 1737 * Decode Mid/Side data; reference: table 4.54. 1738 * 1739 * @param ms_present Indicates mid/side stereo presence. [0] mask is all 0s; 1740 * [1] mask is decoded from bitstream; [2] mask is all 1s; 1741 * [3] reserved for scalable AAC 1742 */ 1743static void decode_mid_side_stereo(ChannelElement *cpe, GetBitContext *gb, 1744 int ms_present) 1745{ 1746 int idx; 1747 int max_idx = cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb; 1748 if (ms_present == 1) { 1749 for (idx = 0; idx < max_idx; idx++) 1750 cpe->ms_mask[idx] = get_bits1(gb); 1751 } else if (ms_present == 2) { 1752 memset(cpe->ms_mask, 1, max_idx * sizeof(cpe->ms_mask[0])); 1753 } 1754} 1755 1756/** 1757 * Decode spectral data; reference: table 4.50. 1758 * Dequantize and scale spectral data; reference: 4.6.3.3. 1759 * 1760 * @param coef array of dequantized, scaled spectral data 1761 * @param sf array of scalefactors or intensity stereo positions 1762 * @param pulse_present set if pulses are present 1763 * @param pulse pointer to pulse data struct 1764 * @param band_type array of the used band type 1765 * 1766 * @return Returns error status. 0 - OK, !0 - error 1767 */ 1768static int decode_spectrum_and_dequant(AACContext *ac, INTFLOAT coef[1024], 1769 GetBitContext *gb, const INTFLOAT sf[120], 1770 int pulse_present, const Pulse *pulse, 1771 const IndividualChannelStream *ics, 1772 enum BandType band_type[120]) 1773{ 1774 int i, k, g, idx = 0; 1775 const int c = 1024 / ics->num_windows; 1776 const uint16_t *offsets = ics->swb_offset; 1777 INTFLOAT *coef_base = coef; 1778 1779 for (g = 0; g < ics->num_windows; g++) 1780 memset(coef + g * 128 + offsets[ics->max_sfb], 0, 1781 sizeof(INTFLOAT) * (c - offsets[ics->max_sfb])); 1782 1783 for (g = 0; g < ics->num_window_groups; g++) { 1784 unsigned g_len = ics->group_len[g]; 1785 1786 for (i = 0; i < ics->max_sfb; i++, idx++) { 1787 const unsigned cbt_m1 = band_type[idx] - 1; 1788 INTFLOAT *cfo = coef + offsets[i]; 1789 int off_len = offsets[i + 1] - offsets[i]; 1790 int group; 1791 1792 if (cbt_m1 >= INTENSITY_BT2 - 1) { 1793 for (group = 0; group < (AAC_SIGNE)g_len; group++, cfo+=128) { 1794 memset(cfo, 0, off_len * sizeof(*cfo)); 1795 } 1796 } else if (cbt_m1 == NOISE_BT - 1) { 1797 for (group = 0; group < (AAC_SIGNE)g_len; group++, cfo+=128) { 1798 INTFLOAT band_energy; 1799#if USE_FIXED 1800 for (k = 0; k < off_len; k++) { 1801 ac->random_state = lcg_random(ac->random_state); 1802 cfo[k] = ac->random_state >> 3; 1803 } 1804 1805 band_energy = ac->fdsp->scalarproduct_fixed(cfo, cfo, off_len); 1806 band_energy = fixed_sqrt(band_energy, 31); 1807 noise_scale(cfo, sf[idx], band_energy, off_len); 1808#else 1809 float scale; 1810 1811 for (k = 0; k < off_len; k++) { 1812 ac->random_state = lcg_random(ac->random_state); 1813 cfo[k] = ac->random_state; 1814 } 1815 1816 band_energy = ac->fdsp->scalarproduct_float(cfo, cfo, off_len); 1817 scale = sf[idx] / sqrtf(band_energy); 1818 ac->fdsp->vector_fmul_scalar(cfo, cfo, scale, off_len); 1819#endif /* USE_FIXED */ 1820 } 1821 } else { 1822#if !USE_FIXED 1823 const float *vq = ff_aac_codebook_vector_vals[cbt_m1]; 1824#endif /* !USE_FIXED */ 1825 const VLCElem *vlc_tab = vlc_spectral[cbt_m1].table; 1826 OPEN_READER(re, gb); 1827 1828 switch (cbt_m1 >> 1) { 1829 case 0: 1830 for (group = 0; group < (AAC_SIGNE)g_len; group++, cfo+=128) { 1831 INTFLOAT *cf = cfo; 1832 int len = off_len; 1833 1834 do { 1835 int code; 1836 unsigned cb_idx; 1837 1838 UPDATE_CACHE(re, gb); 1839 GET_VLC(code, re, gb, vlc_tab, 8, 2); 1840 cb_idx = code; 1841#if USE_FIXED 1842 cf = DEC_SQUAD(cf, cb_idx); 1843#else 1844 cf = VMUL4(cf, vq, cb_idx, sf + idx); 1845#endif /* USE_FIXED */ 1846 } while (len -= 4); 1847 } 1848 break; 1849 1850 case 1: 1851 for (group = 0; group < (AAC_SIGNE)g_len; group++, cfo+=128) { 1852 INTFLOAT *cf = cfo; 1853 int len = off_len; 1854 1855 do { 1856 int code; 1857 unsigned nnz; 1858 unsigned cb_idx; 1859 uint32_t bits; 1860 1861 UPDATE_CACHE(re, gb); 1862 GET_VLC(code, re, gb, vlc_tab, 8, 2); 1863 cb_idx = code; 1864 nnz = cb_idx >> 8 & 15; 1865 bits = nnz ? GET_CACHE(re, gb) : 0; 1866 LAST_SKIP_BITS(re, gb, nnz); 1867#if USE_FIXED 1868 cf = DEC_UQUAD(cf, cb_idx, bits); 1869#else 1870 cf = VMUL4S(cf, vq, cb_idx, bits, sf + idx); 1871#endif /* USE_FIXED */ 1872 } while (len -= 4); 1873 } 1874 break; 1875 1876 case 2: 1877 for (group = 0; group < (AAC_SIGNE)g_len; group++, cfo+=128) { 1878 INTFLOAT *cf = cfo; 1879 int len = off_len; 1880 1881 do { 1882 int code; 1883 unsigned cb_idx; 1884 1885 UPDATE_CACHE(re, gb); 1886 GET_VLC(code, re, gb, vlc_tab, 8, 2); 1887 cb_idx = code; 1888#if USE_FIXED 1889 cf = DEC_SPAIR(cf, cb_idx); 1890#else 1891 cf = VMUL2(cf, vq, cb_idx, sf + idx); 1892#endif /* USE_FIXED */ 1893 } while (len -= 2); 1894 } 1895 break; 1896 1897 case 3: 1898 case 4: 1899 for (group = 0; group < (AAC_SIGNE)g_len; group++, cfo+=128) { 1900 INTFLOAT *cf = cfo; 1901 int len = off_len; 1902 1903 do { 1904 int code; 1905 unsigned nnz; 1906 unsigned cb_idx; 1907 unsigned sign; 1908 1909 UPDATE_CACHE(re, gb); 1910 GET_VLC(code, re, gb, vlc_tab, 8, 2); 1911 cb_idx = code; 1912 nnz = cb_idx >> 8 & 15; 1913 sign = nnz ? SHOW_UBITS(re, gb, nnz) << (cb_idx >> 12) : 0; 1914 LAST_SKIP_BITS(re, gb, nnz); 1915#if USE_FIXED 1916 cf = DEC_UPAIR(cf, cb_idx, sign); 1917#else 1918 cf = VMUL2S(cf, vq, cb_idx, sign, sf + idx); 1919#endif /* USE_FIXED */ 1920 } while (len -= 2); 1921 } 1922 break; 1923 1924 default: 1925 for (group = 0; group < (AAC_SIGNE)g_len; group++, cfo+=128) { 1926#if USE_FIXED 1927 int *icf = cfo; 1928 int v; 1929#else 1930 float *cf = cfo; 1931 uint32_t *icf = (uint32_t *) cf; 1932#endif /* USE_FIXED */ 1933 int len = off_len; 1934 1935 do { 1936 int code; 1937 unsigned nzt, nnz; 1938 unsigned cb_idx; 1939 uint32_t bits; 1940 int j; 1941 1942 UPDATE_CACHE(re, gb); 1943 GET_VLC(code, re, gb, vlc_tab, 8, 2); 1944 cb_idx = code; 1945 1946 if (cb_idx == 0x0000) { 1947 *icf++ = 0; 1948 *icf++ = 0; 1949 continue; 1950 } 1951 1952 nnz = cb_idx >> 12; 1953 nzt = cb_idx >> 8; 1954 bits = SHOW_UBITS(re, gb, nnz) << (32-nnz); 1955 LAST_SKIP_BITS(re, gb, nnz); 1956 1957 for (j = 0; j < 2; j++) { 1958 if (nzt & 1<<j) { 1959 uint32_t b; 1960 int n; 1961 /* The total length of escape_sequence must be < 22 bits according 1962 to the specification (i.e. max is 111111110xxxxxxxxxxxx). */ 1963 UPDATE_CACHE(re, gb); 1964 b = GET_CACHE(re, gb); 1965 b = 31 - av_log2(~b); 1966 1967 if (b > 8) { 1968 av_log(ac->avctx, AV_LOG_ERROR, "error in spectral data, ESC overflow\n"); 1969 return AVERROR_INVALIDDATA; 1970 } 1971 1972 SKIP_BITS(re, gb, b + 1); 1973 b += 4; 1974 n = (1 << b) + SHOW_UBITS(re, gb, b); 1975 LAST_SKIP_BITS(re, gb, b); 1976#if USE_FIXED 1977 v = n; 1978 if (bits & 1U<<31) 1979 v = -v; 1980 *icf++ = v; 1981#else 1982 *icf++ = ff_cbrt_tab[n] | (bits & 1U<<31); 1983#endif /* USE_FIXED */ 1984 bits <<= 1; 1985 } else { 1986#if USE_FIXED 1987 v = cb_idx & 15; 1988 if (bits & 1U<<31) 1989 v = -v; 1990 *icf++ = v; 1991#else 1992 unsigned v = ((const uint32_t*)vq)[cb_idx & 15]; 1993 *icf++ = (bits & 1U<<31) | v; 1994#endif /* USE_FIXED */ 1995 bits <<= !!v; 1996 } 1997 cb_idx >>= 4; 1998 } 1999 } while (len -= 2); 2000#if !USE_FIXED 2001 ac->fdsp->vector_fmul_scalar(cfo, cfo, sf[idx], off_len); 2002#endif /* !USE_FIXED */ 2003 } 2004 } 2005 2006 CLOSE_READER(re, gb); 2007 } 2008 } 2009 coef += g_len << 7; 2010 } 2011 2012 if (pulse_present) { 2013 idx = 0; 2014 for (i = 0; i < pulse->num_pulse; i++) { 2015 INTFLOAT co = coef_base[ pulse->pos[i] ]; 2016 while (offsets[idx + 1] <= pulse->pos[i]) 2017 idx++; 2018 if (band_type[idx] != NOISE_BT && sf[idx]) { 2019 INTFLOAT ico = -pulse->amp[i]; 2020#if USE_FIXED 2021 if (co) { 2022 ico = co + (co > 0 ? -ico : ico); 2023 } 2024 coef_base[ pulse->pos[i] ] = ico; 2025#else 2026 if (co) { 2027 co /= sf[idx]; 2028 ico = co / sqrtf(sqrtf(fabsf(co))) + (co > 0 ? -ico : ico); 2029 } 2030 coef_base[ pulse->pos[i] ] = cbrtf(fabsf(ico)) * ico * sf[idx]; 2031#endif /* USE_FIXED */ 2032 } 2033 } 2034 } 2035#if USE_FIXED 2036 coef = coef_base; 2037 idx = 0; 2038 for (g = 0; g < ics->num_window_groups; g++) { 2039 unsigned g_len = ics->group_len[g]; 2040 2041 for (i = 0; i < ics->max_sfb; i++, idx++) { 2042 const unsigned cbt_m1 = band_type[idx] - 1; 2043 int *cfo = coef + offsets[i]; 2044 int off_len = offsets[i + 1] - offsets[i]; 2045 int group; 2046 2047 if (cbt_m1 < NOISE_BT - 1) { 2048 for (group = 0; group < (int)g_len; group++, cfo+=128) { 2049 ac->vector_pow43(cfo, off_len); 2050 ac->subband_scale(cfo, cfo, sf[idx], 34, off_len, ac->avctx); 2051 } 2052 } 2053 } 2054 coef += g_len << 7; 2055 } 2056#endif /* USE_FIXED */ 2057 return 0; 2058} 2059 2060/** 2061 * Apply AAC-Main style frequency domain prediction. 2062 */ 2063static void apply_prediction(AACContext *ac, SingleChannelElement *sce) 2064{ 2065 int sfb, k; 2066 2067 if (!sce->ics.predictor_initialized) { 2068 reset_all_predictors(sce->predictor_state); 2069 sce->ics.predictor_initialized = 1; 2070 } 2071 2072 if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) { 2073 for (sfb = 0; 2074 sfb < ff_aac_pred_sfb_max[ac->oc[1].m4ac.sampling_index]; 2075 sfb++) { 2076 for (k = sce->ics.swb_offset[sfb]; 2077 k < sce->ics.swb_offset[sfb + 1]; 2078 k++) { 2079 predict(&sce->predictor_state[k], &sce->coeffs[k], 2080 sce->ics.predictor_present && 2081 sce->ics.prediction_used[sfb]); 2082 } 2083 } 2084 if (sce->ics.predictor_reset_group) 2085 reset_predictor_group(sce->predictor_state, 2086 sce->ics.predictor_reset_group); 2087 } else 2088 reset_all_predictors(sce->predictor_state); 2089} 2090 2091static void decode_gain_control(SingleChannelElement * sce, GetBitContext * gb) 2092{ 2093 // wd_num, wd_test, aloc_size 2094 static const uint8_t gain_mode[4][3] = { 2095 {1, 0, 5}, // ONLY_LONG_SEQUENCE = 0, 2096 {2, 1, 2}, // LONG_START_SEQUENCE, 2097 {8, 0, 2}, // EIGHT_SHORT_SEQUENCE, 2098 {2, 1, 5}, // LONG_STOP_SEQUENCE 2099 }; 2100 2101 const int mode = sce->ics.window_sequence[0]; 2102 uint8_t bd, wd, ad; 2103 2104 // FIXME: Store the gain control data on |sce| and do something with it. 2105 uint8_t max_band = get_bits(gb, 2); 2106 for (bd = 0; bd < max_band; bd++) { 2107 for (wd = 0; wd < gain_mode[mode][0]; wd++) { 2108 uint8_t adjust_num = get_bits(gb, 3); 2109 for (ad = 0; ad < adjust_num; ad++) { 2110 skip_bits(gb, 4 + ((wd == 0 && gain_mode[mode][1]) 2111 ? 4 2112 : gain_mode[mode][2])); 2113 } 2114 } 2115 } 2116} 2117 2118/** 2119 * Decode an individual_channel_stream payload; reference: table 4.44. 2120 * 2121 * @param common_window Channels have independent [0], or shared [1], Individual Channel Stream information. 2122 * @param scale_flag scalable [1] or non-scalable [0] AAC (Unused until scalable AAC is implemented.) 2123 * 2124 * @return Returns error status. 0 - OK, !0 - error 2125 */ 2126static int decode_ics(AACContext *ac, SingleChannelElement *sce, 2127 GetBitContext *gb, int common_window, int scale_flag) 2128{ 2129 Pulse pulse; 2130 TemporalNoiseShaping *tns = &sce->tns; 2131 IndividualChannelStream *ics = &sce->ics; 2132 INTFLOAT *out = sce->coeffs; 2133 int global_gain, eld_syntax, er_syntax, pulse_present = 0; 2134 int ret; 2135 2136 eld_syntax = ac->oc[1].m4ac.object_type == AOT_ER_AAC_ELD; 2137 er_syntax = ac->oc[1].m4ac.object_type == AOT_ER_AAC_LC || 2138 ac->oc[1].m4ac.object_type == AOT_ER_AAC_LTP || 2139 ac->oc[1].m4ac.object_type == AOT_ER_AAC_LD || 2140 ac->oc[1].m4ac.object_type == AOT_ER_AAC_ELD; 2141 2142 /* This assignment is to silence a GCC warning about the variable being used 2143 * uninitialized when in fact it always is. 2144 */ 2145 pulse.num_pulse = 0; 2146 2147 global_gain = get_bits(gb, 8); 2148 2149 if (!common_window && !scale_flag) { 2150 ret = decode_ics_info(ac, ics, gb); 2151 if (ret < 0) 2152 goto fail; 2153 } 2154 2155 if ((ret = decode_band_types(ac, sce->band_type, 2156 sce->band_type_run_end, gb, ics)) < 0) 2157 goto fail; 2158 if ((ret = decode_scalefactors(ac, sce->sf, gb, global_gain, ics, 2159 sce->band_type, sce->band_type_run_end)) < 0) 2160 goto fail; 2161 2162 pulse_present = 0; 2163 if (!scale_flag) { 2164 if (!eld_syntax && (pulse_present = get_bits1(gb))) { 2165 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { 2166 av_log(ac->avctx, AV_LOG_ERROR, 2167 "Pulse tool not allowed in eight short sequence.\n"); 2168 ret = AVERROR_INVALIDDATA; 2169 goto fail; 2170 } 2171 if (decode_pulses(&pulse, gb, ics->swb_offset, ics->num_swb)) { 2172 av_log(ac->avctx, AV_LOG_ERROR, 2173 "Pulse data corrupt or invalid.\n"); 2174 ret = AVERROR_INVALIDDATA; 2175 goto fail; 2176 } 2177 } 2178 tns->present = get_bits1(gb); 2179 if (tns->present && !er_syntax) { 2180 ret = decode_tns(ac, tns, gb, ics); 2181 if (ret < 0) 2182 goto fail; 2183 } 2184 if (!eld_syntax && get_bits1(gb)) { 2185 decode_gain_control(sce, gb); 2186 if (!ac->warned_gain_control) { 2187 avpriv_report_missing_feature(ac->avctx, "Gain control"); 2188 ac->warned_gain_control = 1; 2189 } 2190 } 2191 // I see no textual basis in the spec for this occurring after SSR gain 2192 // control, but this is what both reference and real implmentations do 2193 if (tns->present && er_syntax) { 2194 ret = decode_tns(ac, tns, gb, ics); 2195 if (ret < 0) 2196 goto fail; 2197 } 2198 } 2199 2200 ret = decode_spectrum_and_dequant(ac, out, gb, sce->sf, pulse_present, 2201 &pulse, ics, sce->band_type); 2202 if (ret < 0) 2203 goto fail; 2204 2205 if (ac->oc[1].m4ac.object_type == AOT_AAC_MAIN && !common_window) 2206 apply_prediction(ac, sce); 2207 2208 return 0; 2209fail: 2210 tns->present = 0; 2211 return ret; 2212} 2213 2214/** 2215 * Mid/Side stereo decoding; reference: 4.6.8.1.3. 2216 */ 2217static void apply_mid_side_stereo(AACContext *ac, ChannelElement *cpe) 2218{ 2219 const IndividualChannelStream *ics = &cpe->ch[0].ics; 2220 INTFLOAT *ch0 = cpe->ch[0].coeffs; 2221 INTFLOAT *ch1 = cpe->ch[1].coeffs; 2222 int g, i, group, idx = 0; 2223 const uint16_t *offsets = ics->swb_offset; 2224 for (g = 0; g < ics->num_window_groups; g++) { 2225 for (i = 0; i < ics->max_sfb; i++, idx++) { 2226 if (cpe->ms_mask[idx] && 2227 cpe->ch[0].band_type[idx] < NOISE_BT && 2228 cpe->ch[1].band_type[idx] < NOISE_BT) { 2229#if USE_FIXED 2230 for (group = 0; group < ics->group_len[g]; group++) { 2231 ac->fdsp->butterflies_fixed(ch0 + group * 128 + offsets[i], 2232 ch1 + group * 128 + offsets[i], 2233 offsets[i+1] - offsets[i]); 2234#else 2235 for (group = 0; group < ics->group_len[g]; group++) { 2236 ac->fdsp->butterflies_float(ch0 + group * 128 + offsets[i], 2237 ch1 + group * 128 + offsets[i], 2238 offsets[i+1] - offsets[i]); 2239#endif /* USE_FIXED */ 2240 } 2241 } 2242 } 2243 ch0 += ics->group_len[g] * 128; 2244 ch1 += ics->group_len[g] * 128; 2245 } 2246} 2247 2248/** 2249 * intensity stereo decoding; reference: 4.6.8.2.3 2250 * 2251 * @param ms_present Indicates mid/side stereo presence. [0] mask is all 0s; 2252 * [1] mask is decoded from bitstream; [2] mask is all 1s; 2253 * [3] reserved for scalable AAC 2254 */ 2255static void apply_intensity_stereo(AACContext *ac, 2256 ChannelElement *cpe, int ms_present) 2257{ 2258 const IndividualChannelStream *ics = &cpe->ch[1].ics; 2259 SingleChannelElement *sce1 = &cpe->ch[1]; 2260 INTFLOAT *coef0 = cpe->ch[0].coeffs, *coef1 = cpe->ch[1].coeffs; 2261 const uint16_t *offsets = ics->swb_offset; 2262 int g, group, i, idx = 0; 2263 int c; 2264 INTFLOAT scale; 2265 for (g = 0; g < ics->num_window_groups; g++) { 2266 for (i = 0; i < ics->max_sfb;) { 2267 if (sce1->band_type[idx] == INTENSITY_BT || 2268 sce1->band_type[idx] == INTENSITY_BT2) { 2269 const int bt_run_end = sce1->band_type_run_end[idx]; 2270 for (; i < bt_run_end; i++, idx++) { 2271 c = -1 + 2 * (sce1->band_type[idx] - 14); 2272 if (ms_present) 2273 c *= 1 - 2 * cpe->ms_mask[idx]; 2274 scale = c * sce1->sf[idx]; 2275 for (group = 0; group < ics->group_len[g]; group++) 2276#if USE_FIXED 2277 ac->subband_scale(coef1 + group * 128 + offsets[i], 2278 coef0 + group * 128 + offsets[i], 2279 scale, 2280 23, 2281 offsets[i + 1] - offsets[i] ,ac->avctx); 2282#else 2283 ac->fdsp->vector_fmul_scalar(coef1 + group * 128 + offsets[i], 2284 coef0 + group * 128 + offsets[i], 2285 scale, 2286 offsets[i + 1] - offsets[i]); 2287#endif /* USE_FIXED */ 2288 } 2289 } else { 2290 int bt_run_end = sce1->band_type_run_end[idx]; 2291 idx += bt_run_end - i; 2292 i = bt_run_end; 2293 } 2294 } 2295 coef0 += ics->group_len[g] * 128; 2296 coef1 += ics->group_len[g] * 128; 2297 } 2298} 2299 2300/** 2301 * Decode a channel_pair_element; reference: table 4.4. 2302 * 2303 * @return Returns error status. 0 - OK, !0 - error 2304 */ 2305static int decode_cpe(AACContext *ac, GetBitContext *gb, ChannelElement *cpe) 2306{ 2307 int i, ret, common_window, ms_present = 0; 2308 int eld_syntax = ac->oc[1].m4ac.object_type == AOT_ER_AAC_ELD; 2309 2310 common_window = eld_syntax || get_bits1(gb); 2311 if (common_window) { 2312 if (decode_ics_info(ac, &cpe->ch[0].ics, gb)) 2313 return AVERROR_INVALIDDATA; 2314 i = cpe->ch[1].ics.use_kb_window[0]; 2315 cpe->ch[1].ics = cpe->ch[0].ics; 2316 cpe->ch[1].ics.use_kb_window[1] = i; 2317 if (cpe->ch[1].ics.predictor_present && 2318 (ac->oc[1].m4ac.object_type != AOT_AAC_MAIN)) 2319 if ((cpe->ch[1].ics.ltp.present = get_bits(gb, 1))) 2320 decode_ltp(&cpe->ch[1].ics.ltp, gb, cpe->ch[1].ics.max_sfb); 2321 ms_present = get_bits(gb, 2); 2322 if (ms_present == 3) { 2323 av_log(ac->avctx, AV_LOG_ERROR, "ms_present = 3 is reserved.\n"); 2324 return AVERROR_INVALIDDATA; 2325 } else if (ms_present) 2326 decode_mid_side_stereo(cpe, gb, ms_present); 2327 } 2328 if ((ret = decode_ics(ac, &cpe->ch[0], gb, common_window, 0))) 2329 return ret; 2330 if ((ret = decode_ics(ac, &cpe->ch[1], gb, common_window, 0))) 2331 return ret; 2332 2333 if (common_window) { 2334 if (ms_present) 2335 apply_mid_side_stereo(ac, cpe); 2336 if (ac->oc[1].m4ac.object_type == AOT_AAC_MAIN) { 2337 apply_prediction(ac, &cpe->ch[0]); 2338 apply_prediction(ac, &cpe->ch[1]); 2339 } 2340 } 2341 2342 apply_intensity_stereo(ac, cpe, ms_present); 2343 return 0; 2344} 2345 2346static const float cce_scale[] = { 2347 1.09050773266525765921, //2^(1/8) 2348 1.18920711500272106672, //2^(1/4) 2349 M_SQRT2, 2350 2, 2351}; 2352 2353/** 2354 * Decode coupling_channel_element; reference: table 4.8. 2355 * 2356 * @return Returns error status. 0 - OK, !0 - error 2357 */ 2358static int decode_cce(AACContext *ac, GetBitContext *gb, ChannelElement *che) 2359{ 2360 int num_gain = 0; 2361 int c, g, sfb, ret; 2362 int sign; 2363 INTFLOAT scale; 2364 SingleChannelElement *sce = &che->ch[0]; 2365 ChannelCoupling *coup = &che->coup; 2366 2367 coup->coupling_point = 2 * get_bits1(gb); 2368 coup->num_coupled = get_bits(gb, 3); 2369 for (c = 0; c <= coup->num_coupled; c++) { 2370 num_gain++; 2371 coup->type[c] = get_bits1(gb) ? TYPE_CPE : TYPE_SCE; 2372 coup->id_select[c] = get_bits(gb, 4); 2373 if (coup->type[c] == TYPE_CPE) { 2374 coup->ch_select[c] = get_bits(gb, 2); 2375 if (coup->ch_select[c] == 3) 2376 num_gain++; 2377 } else 2378 coup->ch_select[c] = 2; 2379 } 2380 coup->coupling_point += get_bits1(gb) || (coup->coupling_point >> 1); 2381 2382 sign = get_bits(gb, 1); 2383#if USE_FIXED 2384 scale = get_bits(gb, 2); 2385#else 2386 scale = cce_scale[get_bits(gb, 2)]; 2387#endif 2388 2389 if ((ret = decode_ics(ac, sce, gb, 0, 0))) 2390 return ret; 2391 2392 for (c = 0; c < num_gain; c++) { 2393 int idx = 0; 2394 int cge = 1; 2395 int gain = 0; 2396 INTFLOAT gain_cache = FIXR10(1.); 2397 if (c) { 2398 cge = coup->coupling_point == AFTER_IMDCT ? 1 : get_bits1(gb); 2399 gain = cge ? get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60: 0; 2400 gain_cache = GET_GAIN(scale, gain); 2401#if USE_FIXED 2402 if ((abs(gain_cache)-1024) >> 3 > 30) 2403 return AVERROR(ERANGE); 2404#endif 2405 } 2406 if (coup->coupling_point == AFTER_IMDCT) { 2407 coup->gain[c][0] = gain_cache; 2408 } else { 2409 for (g = 0; g < sce->ics.num_window_groups; g++) { 2410 for (sfb = 0; sfb < sce->ics.max_sfb; sfb++, idx++) { 2411 if (sce->band_type[idx] != ZERO_BT) { 2412 if (!cge) { 2413 int t = get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60; 2414 if (t) { 2415 int s = 1; 2416 t = gain += t; 2417 if (sign) { 2418 s -= 2 * (t & 0x1); 2419 t >>= 1; 2420 } 2421 gain_cache = GET_GAIN(scale, t) * s; 2422#if USE_FIXED 2423 if ((abs(gain_cache)-1024) >> 3 > 30) 2424 return AVERROR(ERANGE); 2425#endif 2426 } 2427 } 2428 coup->gain[c][idx] = gain_cache; 2429 } 2430 } 2431 } 2432 } 2433 } 2434 return 0; 2435} 2436 2437/** 2438 * Parse whether channels are to be excluded from Dynamic Range Compression; reference: table 4.53. 2439 * 2440 * @return Returns number of bytes consumed. 2441 */ 2442static int decode_drc_channel_exclusions(DynamicRangeControl *che_drc, 2443 GetBitContext *gb) 2444{ 2445 int i; 2446 int num_excl_chan = 0; 2447 2448 do { 2449 for (i = 0; i < 7; i++) 2450 che_drc->exclude_mask[num_excl_chan++] = get_bits1(gb); 2451 } while (num_excl_chan < MAX_CHANNELS - 7 && get_bits1(gb)); 2452 2453 return num_excl_chan / 7; 2454} 2455 2456/** 2457 * Decode dynamic range information; reference: table 4.52. 2458 * 2459 * @return Returns number of bytes consumed. 2460 */ 2461static int decode_dynamic_range(DynamicRangeControl *che_drc, 2462 GetBitContext *gb) 2463{ 2464 int n = 1; 2465 int drc_num_bands = 1; 2466 int i; 2467 2468 /* pce_tag_present? */ 2469 if (get_bits1(gb)) { 2470 che_drc->pce_instance_tag = get_bits(gb, 4); 2471 skip_bits(gb, 4); // tag_reserved_bits 2472 n++; 2473 } 2474 2475 /* excluded_chns_present? */ 2476 if (get_bits1(gb)) { 2477 n += decode_drc_channel_exclusions(che_drc, gb); 2478 } 2479 2480 /* drc_bands_present? */ 2481 if (get_bits1(gb)) { 2482 che_drc->band_incr = get_bits(gb, 4); 2483 che_drc->interpolation_scheme = get_bits(gb, 4); 2484 n++; 2485 drc_num_bands += che_drc->band_incr; 2486 for (i = 0; i < drc_num_bands; i++) { 2487 che_drc->band_top[i] = get_bits(gb, 8); 2488 n++; 2489 } 2490 } 2491 2492 /* prog_ref_level_present? */ 2493 if (get_bits1(gb)) { 2494 che_drc->prog_ref_level = get_bits(gb, 7); 2495 skip_bits1(gb); // prog_ref_level_reserved_bits 2496 n++; 2497 } 2498 2499 for (i = 0; i < drc_num_bands; i++) { 2500 che_drc->dyn_rng_sgn[i] = get_bits1(gb); 2501 che_drc->dyn_rng_ctl[i] = get_bits(gb, 7); 2502 n++; 2503 } 2504 2505 return n; 2506} 2507 2508static int decode_fill(AACContext *ac, GetBitContext *gb, int len) { 2509 uint8_t buf[256]; 2510 int i, major, minor; 2511 2512 if (len < 13+7*8) 2513 goto unknown; 2514 2515 get_bits(gb, 13); len -= 13; 2516 2517 for(i=0; i+1<sizeof(buf) && len>=8; i++, len-=8) 2518 buf[i] = get_bits(gb, 8); 2519 2520 buf[i] = 0; 2521 if (ac->avctx->debug & FF_DEBUG_PICT_INFO) 2522 av_log(ac->avctx, AV_LOG_DEBUG, "FILL:%s\n", buf); 2523 2524 if (sscanf(buf, "libfaac %d.%d", &major, &minor) == 2){ 2525 ac->avctx->internal->skip_samples = 1024; 2526 } 2527 2528unknown: 2529 skip_bits_long(gb, len); 2530 2531 return 0; 2532} 2533 2534/** 2535 * Decode extension data (incomplete); reference: table 4.51. 2536 * 2537 * @param cnt length of TYPE_FIL syntactic element in bytes 2538 * 2539 * @return Returns number of bytes consumed 2540 */ 2541static int decode_extension_payload(AACContext *ac, GetBitContext *gb, int cnt, 2542 ChannelElement *che, enum RawDataBlockType elem_type) 2543{ 2544 int crc_flag = 0; 2545 int res = cnt; 2546 int type = get_bits(gb, 4); 2547 2548 if (ac->avctx->debug & FF_DEBUG_STARTCODE) 2549 av_log(ac->avctx, AV_LOG_DEBUG, "extension type: %d len:%d\n", type, cnt); 2550 2551 switch (type) { // extension type 2552 case EXT_SBR_DATA_CRC: 2553 crc_flag++; 2554 case EXT_SBR_DATA: 2555 if (!che) { 2556 av_log(ac->avctx, AV_LOG_ERROR, "SBR was found before the first channel element.\n"); 2557 return res; 2558 } else if (ac->oc[1].m4ac.frame_length_short) { 2559 if (!ac->warned_960_sbr) 2560 avpriv_report_missing_feature(ac->avctx, 2561 "SBR with 960 frame length"); 2562 ac->warned_960_sbr = 1; 2563 skip_bits_long(gb, 8 * cnt - 4); 2564 return res; 2565 } else if (!ac->oc[1].m4ac.sbr) { 2566 av_log(ac->avctx, AV_LOG_ERROR, "SBR signaled to be not-present but was found in the bitstream.\n"); 2567 skip_bits_long(gb, 8 * cnt - 4); 2568 return res; 2569 } else if (ac->oc[1].m4ac.sbr == -1 && ac->oc[1].status == OC_LOCKED) { 2570 av_log(ac->avctx, AV_LOG_ERROR, "Implicit SBR was found with a first occurrence after the first frame.\n"); 2571 skip_bits_long(gb, 8 * cnt - 4); 2572 return res; 2573 } else if (ac->oc[1].m4ac.ps == -1 && ac->oc[1].status < OC_LOCKED && 2574 ac->avctx->ch_layout.nb_channels == 1) { 2575 ac->oc[1].m4ac.sbr = 1; 2576 ac->oc[1].m4ac.ps = 1; 2577 ac->avctx->profile = FF_PROFILE_AAC_HE_V2; 2578 output_configure(ac, ac->oc[1].layout_map, ac->oc[1].layout_map_tags, 2579 ac->oc[1].status, 1); 2580 } else { 2581 ac->oc[1].m4ac.sbr = 1; 2582 ac->avctx->profile = FF_PROFILE_AAC_HE; 2583 } 2584 res = AAC_RENAME(ff_decode_sbr_extension)(ac, &che->sbr, gb, crc_flag, cnt, elem_type); 2585 break; 2586 case EXT_DYNAMIC_RANGE: 2587 res = decode_dynamic_range(&ac->che_drc, gb); 2588 break; 2589 case EXT_FILL: 2590 decode_fill(ac, gb, 8 * cnt - 4); 2591 break; 2592 case EXT_FILL_DATA: 2593 case EXT_DATA_ELEMENT: 2594 default: 2595 skip_bits_long(gb, 8 * cnt - 4); 2596 break; 2597 }; 2598 return res; 2599} 2600 2601/** 2602 * Decode Temporal Noise Shaping filter coefficients and apply all-pole filters; reference: 4.6.9.3. 2603 * 2604 * @param decode 1 if tool is used normally, 0 if tool is used in LTP. 2605 * @param coef spectral coefficients 2606 */ 2607static void apply_tns(INTFLOAT coef_param[1024], TemporalNoiseShaping *tns, 2608 IndividualChannelStream *ics, int decode) 2609{ 2610 const int mmm = FFMIN(ics->tns_max_bands, ics->max_sfb); 2611 int w, filt, m, i; 2612 int bottom, top, order, start, end, size, inc; 2613 INTFLOAT lpc[TNS_MAX_ORDER]; 2614 INTFLOAT tmp[TNS_MAX_ORDER+1]; 2615 UINTFLOAT *coef = coef_param; 2616 2617 if(!mmm) 2618 return; 2619 2620 for (w = 0; w < ics->num_windows; w++) { 2621 bottom = ics->num_swb; 2622 for (filt = 0; filt < tns->n_filt[w]; filt++) { 2623 top = bottom; 2624 bottom = FFMAX(0, top - tns->length[w][filt]); 2625 order = tns->order[w][filt]; 2626 if (order == 0) 2627 continue; 2628 2629 // tns_decode_coef 2630 AAC_RENAME(compute_lpc_coefs)(tns->coef[w][filt], order, lpc, 0, 0, 0); 2631 2632 start = ics->swb_offset[FFMIN(bottom, mmm)]; 2633 end = ics->swb_offset[FFMIN( top, mmm)]; 2634 if ((size = end - start) <= 0) 2635 continue; 2636 if (tns->direction[w][filt]) { 2637 inc = -1; 2638 start = end - 1; 2639 } else { 2640 inc = 1; 2641 } 2642 start += w * 128; 2643 2644 if (decode) { 2645 // ar filter 2646 for (m = 0; m < size; m++, start += inc) 2647 for (i = 1; i <= FFMIN(m, order); i++) 2648 coef[start] -= AAC_MUL26((INTFLOAT)coef[start - i * inc], lpc[i - 1]); 2649 } else { 2650 // ma filter 2651 for (m = 0; m < size; m++, start += inc) { 2652 tmp[0] = coef[start]; 2653 for (i = 1; i <= FFMIN(m, order); i++) 2654 coef[start] += AAC_MUL26(tmp[i], lpc[i - 1]); 2655 for (i = order; i > 0; i--) 2656 tmp[i] = tmp[i - 1]; 2657 } 2658 } 2659 } 2660 } 2661} 2662 2663/** 2664 * Apply windowing and MDCT to obtain the spectral 2665 * coefficient from the predicted sample by LTP. 2666 */ 2667static void windowing_and_mdct_ltp(AACContext *ac, INTFLOAT *out, 2668 INTFLOAT *in, IndividualChannelStream *ics) 2669{ 2670 const INTFLOAT *lwindow = ics->use_kb_window[0] ? AAC_RENAME2(aac_kbd_long_1024) : AAC_RENAME2(sine_1024); 2671 const INTFLOAT *swindow = ics->use_kb_window[0] ? AAC_RENAME2(aac_kbd_short_128) : AAC_RENAME2(sine_128); 2672 const INTFLOAT *lwindow_prev = ics->use_kb_window[1] ? AAC_RENAME2(aac_kbd_long_1024) : AAC_RENAME2(sine_1024); 2673 const INTFLOAT *swindow_prev = ics->use_kb_window[1] ? AAC_RENAME2(aac_kbd_short_128) : AAC_RENAME2(sine_128); 2674 2675 if (ics->window_sequence[0] != LONG_STOP_SEQUENCE) { 2676 ac->fdsp->vector_fmul(in, in, lwindow_prev, 1024); 2677 } else { 2678 memset(in, 0, 448 * sizeof(*in)); 2679 ac->fdsp->vector_fmul(in + 448, in + 448, swindow_prev, 128); 2680 } 2681 if (ics->window_sequence[0] != LONG_START_SEQUENCE) { 2682 ac->fdsp->vector_fmul_reverse(in + 1024, in + 1024, lwindow, 1024); 2683 } else { 2684 ac->fdsp->vector_fmul_reverse(in + 1024 + 448, in + 1024 + 448, swindow, 128); 2685 memset(in + 1024 + 576, 0, 448 * sizeof(*in)); 2686 } 2687 ac->mdct_ltp.mdct_calc(&ac->mdct_ltp, out, in); 2688} 2689 2690/** 2691 * Apply the long term prediction 2692 */ 2693static void apply_ltp(AACContext *ac, SingleChannelElement *sce) 2694{ 2695 const LongTermPrediction *ltp = &sce->ics.ltp; 2696 const uint16_t *offsets = sce->ics.swb_offset; 2697 int i, sfb; 2698 2699 if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) { 2700 INTFLOAT *predTime = sce->ret; 2701 INTFLOAT *predFreq = ac->buf_mdct; 2702 int16_t num_samples = 2048; 2703 2704 if (ltp->lag < 1024) 2705 num_samples = ltp->lag + 1024; 2706 for (i = 0; i < num_samples; i++) 2707 predTime[i] = AAC_MUL30(sce->ltp_state[i + 2048 - ltp->lag], ltp->coef); 2708 memset(&predTime[i], 0, (2048 - i) * sizeof(*predTime)); 2709 2710 ac->windowing_and_mdct_ltp(ac, predFreq, predTime, &sce->ics); 2711 2712 if (sce->tns.present) 2713 ac->apply_tns(predFreq, &sce->tns, &sce->ics, 0); 2714 2715 for (sfb = 0; sfb < FFMIN(sce->ics.max_sfb, MAX_LTP_LONG_SFB); sfb++) 2716 if (ltp->used[sfb]) 2717 for (i = offsets[sfb]; i < offsets[sfb + 1]; i++) 2718 sce->coeffs[i] += (UINTFLOAT)predFreq[i]; 2719 } 2720} 2721 2722/** 2723 * Update the LTP buffer for next frame 2724 */ 2725static void update_ltp(AACContext *ac, SingleChannelElement *sce) 2726{ 2727 IndividualChannelStream *ics = &sce->ics; 2728 INTFLOAT *saved = sce->saved; 2729 INTFLOAT *saved_ltp = sce->coeffs; 2730 const INTFLOAT *lwindow = ics->use_kb_window[0] ? AAC_RENAME2(aac_kbd_long_1024) : AAC_RENAME2(sine_1024); 2731 const INTFLOAT *swindow = ics->use_kb_window[0] ? AAC_RENAME2(aac_kbd_short_128) : AAC_RENAME2(sine_128); 2732 int i; 2733 2734 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { 2735 memcpy(saved_ltp, saved, 512 * sizeof(*saved_ltp)); 2736 memset(saved_ltp + 576, 0, 448 * sizeof(*saved_ltp)); 2737 ac->fdsp->vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960, &swindow[64], 64); 2738 2739 for (i = 0; i < 64; i++) 2740 saved_ltp[i + 512] = AAC_MUL31(ac->buf_mdct[1023 - i], swindow[63 - i]); 2741 } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) { 2742 memcpy(saved_ltp, ac->buf_mdct + 512, 448 * sizeof(*saved_ltp)); 2743 memset(saved_ltp + 576, 0, 448 * sizeof(*saved_ltp)); 2744 ac->fdsp->vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960, &swindow[64], 64); 2745 2746 for (i = 0; i < 64; i++) 2747 saved_ltp[i + 512] = AAC_MUL31(ac->buf_mdct[1023 - i], swindow[63 - i]); 2748 } else { // LONG_STOP or ONLY_LONG 2749 ac->fdsp->vector_fmul_reverse(saved_ltp, ac->buf_mdct + 512, &lwindow[512], 512); 2750 2751 for (i = 0; i < 512; i++) 2752 saved_ltp[i + 512] = AAC_MUL31(ac->buf_mdct[1023 - i], lwindow[511 - i]); 2753 } 2754 2755 memcpy(sce->ltp_state, sce->ltp_state+1024, 1024 * sizeof(*sce->ltp_state)); 2756 memcpy(sce->ltp_state+1024, sce->ret, 1024 * sizeof(*sce->ltp_state)); 2757 memcpy(sce->ltp_state+2048, saved_ltp, 1024 * sizeof(*sce->ltp_state)); 2758} 2759 2760/** 2761 * Conduct IMDCT and windowing. 2762 */ 2763static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce) 2764{ 2765 IndividualChannelStream *ics = &sce->ics; 2766 INTFLOAT *in = sce->coeffs; 2767 INTFLOAT *out = sce->ret; 2768 INTFLOAT *saved = sce->saved; 2769 const INTFLOAT *swindow = ics->use_kb_window[0] ? AAC_RENAME2(aac_kbd_short_128) : AAC_RENAME2(sine_128); 2770 const INTFLOAT *lwindow_prev = ics->use_kb_window[1] ? AAC_RENAME2(aac_kbd_long_1024) : AAC_RENAME2(sine_1024); 2771 const INTFLOAT *swindow_prev = ics->use_kb_window[1] ? AAC_RENAME2(aac_kbd_short_128) : AAC_RENAME2(sine_128); 2772 INTFLOAT *buf = ac->buf_mdct; 2773 INTFLOAT *temp = ac->temp; 2774 int i; 2775 2776 // imdct 2777 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { 2778 for (i = 0; i < 1024; i += 128) 2779 ac->mdct_small.imdct_half(&ac->mdct_small, buf + i, in + i); 2780 } else { 2781 ac->mdct.imdct_half(&ac->mdct, buf, in); 2782#if USE_FIXED 2783 for (i=0; i<1024; i++) 2784 buf[i] = (buf[i] + 4LL) >> 3; 2785#endif /* USE_FIXED */ 2786 } 2787 2788 /* window overlapping 2789 * NOTE: To simplify the overlapping code, all 'meaningless' short to long 2790 * and long to short transitions are considered to be short to short 2791 * transitions. This leaves just two cases (long to long and short to short) 2792 * with a little special sauce for EIGHT_SHORT_SEQUENCE. 2793 */ 2794 if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) && 2795 (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) { 2796 ac->fdsp->vector_fmul_window( out, saved, buf, lwindow_prev, 512); 2797 } else { 2798 memcpy( out, saved, 448 * sizeof(*out)); 2799 2800 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { 2801 ac->fdsp->vector_fmul_window(out + 448 + 0*128, saved + 448, buf + 0*128, swindow_prev, 64); 2802 ac->fdsp->vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow, 64); 2803 ac->fdsp->vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow, 64); 2804 ac->fdsp->vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow, 64); 2805 ac->fdsp->vector_fmul_window(temp, buf + 3*128 + 64, buf + 4*128, swindow, 64); 2806 memcpy( out + 448 + 4*128, temp, 64 * sizeof(*out)); 2807 } else { 2808 ac->fdsp->vector_fmul_window(out + 448, saved + 448, buf, swindow_prev, 64); 2809 memcpy( out + 576, buf + 64, 448 * sizeof(*out)); 2810 } 2811 } 2812 2813 // buffer update 2814 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { 2815 memcpy( saved, temp + 64, 64 * sizeof(*saved)); 2816 ac->fdsp->vector_fmul_window(saved + 64, buf + 4*128 + 64, buf + 5*128, swindow, 64); 2817 ac->fdsp->vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 64); 2818 ac->fdsp->vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 64); 2819 memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(*saved)); 2820 } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) { 2821 memcpy( saved, buf + 512, 448 * sizeof(*saved)); 2822 memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(*saved)); 2823 } else { // LONG_STOP or ONLY_LONG 2824 memcpy( saved, buf + 512, 512 * sizeof(*saved)); 2825 } 2826} 2827 2828/** 2829 * Conduct IMDCT and windowing. 2830 */ 2831static void imdct_and_windowing_960(AACContext *ac, SingleChannelElement *sce) 2832{ 2833#if !USE_FIXED 2834 IndividualChannelStream *ics = &sce->ics; 2835 INTFLOAT *in = sce->coeffs; 2836 INTFLOAT *out = sce->ret; 2837 INTFLOAT *saved = sce->saved; 2838 const INTFLOAT *swindow = ics->use_kb_window[0] ? AAC_RENAME(aac_kbd_short_120) : AAC_RENAME(sine_120); 2839 const INTFLOAT *lwindow_prev = ics->use_kb_window[1] ? AAC_RENAME(aac_kbd_long_960) : AAC_RENAME(sine_960); 2840 const INTFLOAT *swindow_prev = ics->use_kb_window[1] ? AAC_RENAME(aac_kbd_short_120) : AAC_RENAME(sine_120); 2841 INTFLOAT *buf = ac->buf_mdct; 2842 INTFLOAT *temp = ac->temp; 2843 int i; 2844 2845 // imdct 2846 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { 2847 for (i = 0; i < 8; i++) 2848 ac->mdct120->imdct_half(ac->mdct120, buf + i * 120, in + i * 128, 1); 2849 } else { 2850 ac->mdct960->imdct_half(ac->mdct960, buf, in, 1); 2851 } 2852 2853 /* window overlapping 2854 * NOTE: To simplify the overlapping code, all 'meaningless' short to long 2855 * and long to short transitions are considered to be short to short 2856 * transitions. This leaves just two cases (long to long and short to short) 2857 * with a little special sauce for EIGHT_SHORT_SEQUENCE. 2858 */ 2859 2860 if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) && 2861 (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) { 2862 ac->fdsp->vector_fmul_window( out, saved, buf, lwindow_prev, 480); 2863 } else { 2864 memcpy( out, saved, 420 * sizeof(*out)); 2865 2866 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { 2867 ac->fdsp->vector_fmul_window(out + 420 + 0*120, saved + 420, buf + 0*120, swindow_prev, 60); 2868 ac->fdsp->vector_fmul_window(out + 420 + 1*120, buf + 0*120 + 60, buf + 1*120, swindow, 60); 2869 ac->fdsp->vector_fmul_window(out + 420 + 2*120, buf + 1*120 + 60, buf + 2*120, swindow, 60); 2870 ac->fdsp->vector_fmul_window(out + 420 + 3*120, buf + 2*120 + 60, buf + 3*120, swindow, 60); 2871 ac->fdsp->vector_fmul_window(temp, buf + 3*120 + 60, buf + 4*120, swindow, 60); 2872 memcpy( out + 420 + 4*120, temp, 60 * sizeof(*out)); 2873 } else { 2874 ac->fdsp->vector_fmul_window(out + 420, saved + 420, buf, swindow_prev, 60); 2875 memcpy( out + 540, buf + 60, 420 * sizeof(*out)); 2876 } 2877 } 2878 2879 // buffer update 2880 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { 2881 memcpy( saved, temp + 60, 60 * sizeof(*saved)); 2882 ac->fdsp->vector_fmul_window(saved + 60, buf + 4*120 + 60, buf + 5*120, swindow, 60); 2883 ac->fdsp->vector_fmul_window(saved + 180, buf + 5*120 + 60, buf + 6*120, swindow, 60); 2884 ac->fdsp->vector_fmul_window(saved + 300, buf + 6*120 + 60, buf + 7*120, swindow, 60); 2885 memcpy( saved + 420, buf + 7*120 + 60, 60 * sizeof(*saved)); 2886 } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) { 2887 memcpy( saved, buf + 480, 420 * sizeof(*saved)); 2888 memcpy( saved + 420, buf + 7*120 + 60, 60 * sizeof(*saved)); 2889 } else { // LONG_STOP or ONLY_LONG 2890 memcpy( saved, buf + 480, 480 * sizeof(*saved)); 2891 } 2892#endif 2893} 2894static void imdct_and_windowing_ld(AACContext *ac, SingleChannelElement *sce) 2895{ 2896 IndividualChannelStream *ics = &sce->ics; 2897 INTFLOAT *in = sce->coeffs; 2898 INTFLOAT *out = sce->ret; 2899 INTFLOAT *saved = sce->saved; 2900 INTFLOAT *buf = ac->buf_mdct; 2901#if USE_FIXED 2902 int i; 2903#endif /* USE_FIXED */ 2904 2905 // imdct 2906 ac->mdct.imdct_half(&ac->mdct_ld, buf, in); 2907 2908#if USE_FIXED 2909 for (i = 0; i < 1024; i++) 2910 buf[i] = (buf[i] + 2) >> 2; 2911#endif /* USE_FIXED */ 2912 2913 // window overlapping 2914 if (ics->use_kb_window[1]) { 2915 // AAC LD uses a low overlap sine window instead of a KBD window 2916 memcpy(out, saved, 192 * sizeof(*out)); 2917 ac->fdsp->vector_fmul_window(out + 192, saved + 192, buf, AAC_RENAME2(sine_128), 64); 2918 memcpy( out + 320, buf + 64, 192 * sizeof(*out)); 2919 } else { 2920 ac->fdsp->vector_fmul_window(out, saved, buf, AAC_RENAME2(sine_512), 256); 2921 } 2922 2923 // buffer update 2924 memcpy(saved, buf + 256, 256 * sizeof(*saved)); 2925} 2926 2927static void imdct_and_windowing_eld(AACContext *ac, SingleChannelElement *sce) 2928{ 2929 UINTFLOAT *in = sce->coeffs; 2930 INTFLOAT *out = sce->ret; 2931 INTFLOAT *saved = sce->saved; 2932 INTFLOAT *buf = ac->buf_mdct; 2933 int i; 2934 const int n = ac->oc[1].m4ac.frame_length_short ? 480 : 512; 2935 const int n2 = n >> 1; 2936 const int n4 = n >> 2; 2937 const INTFLOAT *const window = n == 480 ? AAC_RENAME(ff_aac_eld_window_480) : 2938 AAC_RENAME(ff_aac_eld_window_512); 2939 2940 // Inverse transform, mapped to the conventional IMDCT by 2941 // Chivukula, R.K.; Reznik, Y.A.; Devarajan, V., 2942 // "Efficient algorithms for MPEG-4 AAC-ELD, AAC-LD and AAC-LC filterbanks," 2943 // International Conference on Audio, Language and Image Processing, ICALIP 2008. 2944 // URL: http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=4590245&isnumber=4589950 2945 for (i = 0; i < n2; i+=2) { 2946 INTFLOAT temp; 2947 temp = in[i ]; in[i ] = -in[n - 1 - i]; in[n - 1 - i] = temp; 2948 temp = -in[i + 1]; in[i + 1] = in[n - 2 - i]; in[n - 2 - i] = temp; 2949 } 2950#if !USE_FIXED 2951 if (n == 480) 2952 ac->mdct480->imdct_half(ac->mdct480, buf, in, 1); 2953 else 2954#endif 2955 ac->mdct.imdct_half(&ac->mdct_ld, buf, in); 2956 2957#if USE_FIXED 2958 for (i = 0; i < 1024; i++) 2959 buf[i] = (buf[i] + 1) >> 1; 2960#endif /* USE_FIXED */ 2961 2962 for (i = 0; i < n; i+=2) { 2963 buf[i] = -buf[i]; 2964 } 2965 // Like with the regular IMDCT at this point we still have the middle half 2966 // of a transform but with even symmetry on the left and odd symmetry on 2967 // the right 2968 2969 // window overlapping 2970 // The spec says to use samples [0..511] but the reference decoder uses 2971 // samples [128..639]. 2972 for (i = n4; i < n2; i ++) { 2973 out[i - n4] = AAC_MUL31( buf[ n2 - 1 - i] , window[i - n4]) + 2974 AAC_MUL31( saved[ i + n2] , window[i + n - n4]) + 2975 AAC_MUL31(-saved[n + n2 - 1 - i] , window[i + 2*n - n4]) + 2976 AAC_MUL31(-saved[ 2*n + n2 + i] , window[i + 3*n - n4]); 2977 } 2978 for (i = 0; i < n2; i ++) { 2979 out[n4 + i] = AAC_MUL31( buf[ i] , window[i + n2 - n4]) + 2980 AAC_MUL31(-saved[ n - 1 - i] , window[i + n2 + n - n4]) + 2981 AAC_MUL31(-saved[ n + i] , window[i + n2 + 2*n - n4]) + 2982 AAC_MUL31( saved[2*n + n - 1 - i] , window[i + n2 + 3*n - n4]); 2983 } 2984 for (i = 0; i < n4; i ++) { 2985 out[n2 + n4 + i] = AAC_MUL31( buf[ i + n2] , window[i + n - n4]) + 2986 AAC_MUL31(-saved[n2 - 1 - i] , window[i + 2*n - n4]) + 2987 AAC_MUL31(-saved[n + n2 + i] , window[i + 3*n - n4]); 2988 } 2989 2990 // buffer update 2991 memmove(saved + n, saved, 2 * n * sizeof(*saved)); 2992 memcpy( saved, buf, n * sizeof(*saved)); 2993} 2994 2995/** 2996 * channel coupling transformation interface 2997 * 2998 * @param apply_coupling_method pointer to (in)dependent coupling function 2999 */ 3000static void apply_channel_coupling(AACContext *ac, ChannelElement *cc, 3001 enum RawDataBlockType type, int elem_id, 3002 enum CouplingPoint coupling_point, 3003 void (*apply_coupling_method)(AACContext *ac, SingleChannelElement *target, ChannelElement *cce, int index)) 3004{ 3005 int i, c; 3006 3007 for (i = 0; i < MAX_ELEM_ID; i++) { 3008 ChannelElement *cce = ac->che[TYPE_CCE][i]; 3009 int index = 0; 3010 3011 if (cce && cce->coup.coupling_point == coupling_point) { 3012 ChannelCoupling *coup = &cce->coup; 3013 3014 for (c = 0; c <= coup->num_coupled; c++) { 3015 if (coup->type[c] == type && coup->id_select[c] == elem_id) { 3016 if (coup->ch_select[c] != 1) { 3017 apply_coupling_method(ac, &cc->ch[0], cce, index); 3018 if (coup->ch_select[c] != 0) 3019 index++; 3020 } 3021 if (coup->ch_select[c] != 2) 3022 apply_coupling_method(ac, &cc->ch[1], cce, index++); 3023 } else 3024 index += 1 + (coup->ch_select[c] == 3); 3025 } 3026 } 3027 } 3028} 3029 3030/** 3031 * Convert spectral data to samples, applying all supported tools as appropriate. 3032 */ 3033static void spectral_to_sample(AACContext *ac, int samples) 3034{ 3035 int i, type; 3036 void (*imdct_and_window)(AACContext *ac, SingleChannelElement *sce); 3037 switch (ac->oc[1].m4ac.object_type) { 3038 case AOT_ER_AAC_LD: 3039 imdct_and_window = imdct_and_windowing_ld; 3040 break; 3041 case AOT_ER_AAC_ELD: 3042 imdct_and_window = imdct_and_windowing_eld; 3043 break; 3044 default: 3045 if (ac->oc[1].m4ac.frame_length_short) 3046 imdct_and_window = imdct_and_windowing_960; 3047 else 3048 imdct_and_window = ac->imdct_and_windowing; 3049 } 3050 for (type = 3; type >= 0; type--) { 3051 for (i = 0; i < MAX_ELEM_ID; i++) { 3052 ChannelElement *che = ac->che[type][i]; 3053 if (che && che->present) { 3054 if (type <= TYPE_CPE) 3055 apply_channel_coupling(ac, che, type, i, BEFORE_TNS, AAC_RENAME(apply_dependent_coupling)); 3056 if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) { 3057 if (che->ch[0].ics.predictor_present) { 3058 if (che->ch[0].ics.ltp.present) 3059 ac->apply_ltp(ac, &che->ch[0]); 3060 if (che->ch[1].ics.ltp.present && type == TYPE_CPE) 3061 ac->apply_ltp(ac, &che->ch[1]); 3062 } 3063 } 3064 if (che->ch[0].tns.present) 3065 ac->apply_tns(che->ch[0].coeffs, &che->ch[0].tns, &che->ch[0].ics, 1); 3066 if (che->ch[1].tns.present) 3067 ac->apply_tns(che->ch[1].coeffs, &che->ch[1].tns, &che->ch[1].ics, 1); 3068 if (type <= TYPE_CPE) 3069 apply_channel_coupling(ac, che, type, i, BETWEEN_TNS_AND_IMDCT, AAC_RENAME(apply_dependent_coupling)); 3070 if (type != TYPE_CCE || che->coup.coupling_point == AFTER_IMDCT) { 3071 imdct_and_window(ac, &che->ch[0]); 3072 if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) 3073 ac->update_ltp(ac, &che->ch[0]); 3074 if (type == TYPE_CPE) { 3075 imdct_and_window(ac, &che->ch[1]); 3076 if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) 3077 ac->update_ltp(ac, &che->ch[1]); 3078 } 3079 if (ac->oc[1].m4ac.sbr > 0) { 3080 AAC_RENAME(ff_sbr_apply)(ac, &che->sbr, type, che->ch[0].ret, che->ch[1].ret); 3081 } 3082 } 3083 if (type <= TYPE_CCE) 3084 apply_channel_coupling(ac, che, type, i, AFTER_IMDCT, AAC_RENAME(apply_independent_coupling)); 3085 3086#if USE_FIXED 3087 { 3088 int j; 3089 /* preparation for resampler */ 3090 for(j = 0; j<samples; j++){ 3091 che->ch[0].ret[j] = (int32_t)av_clip64((int64_t)che->ch[0].ret[j]*128, INT32_MIN, INT32_MAX-0x8000)+0x8000; 3092 if(type == TYPE_CPE) 3093 che->ch[1].ret[j] = (int32_t)av_clip64((int64_t)che->ch[1].ret[j]*128, INT32_MIN, INT32_MAX-0x8000)+0x8000; 3094 } 3095 } 3096#endif /* USE_FIXED */ 3097 che->present = 0; 3098 } else if (che) { 3099 av_log(ac->avctx, AV_LOG_VERBOSE, "ChannelElement %d.%d missing \n", type, i); 3100 } 3101 } 3102 } 3103} 3104 3105static int parse_adts_frame_header(AACContext *ac, GetBitContext *gb) 3106{ 3107 int size; 3108 AACADTSHeaderInfo hdr_info; 3109 uint8_t layout_map[MAX_ELEM_ID*4][3]; 3110 int layout_map_tags, ret; 3111 3112 size = ff_adts_header_parse(gb, &hdr_info); 3113 if (size > 0) { 3114 if (!ac->warned_num_aac_frames && hdr_info.num_aac_frames != 1) { 3115 // This is 2 for "VLB " audio in NSV files. 3116 // See samples/nsv/vlb_audio. 3117 avpriv_report_missing_feature(ac->avctx, 3118 "More than one AAC RDB per ADTS frame"); 3119 ac->warned_num_aac_frames = 1; 3120 } 3121 push_output_configuration(ac); 3122 if (hdr_info.chan_config) { 3123 ac->oc[1].m4ac.chan_config = hdr_info.chan_config; 3124 if ((ret = set_default_channel_config(ac, ac->avctx, 3125 layout_map, 3126 &layout_map_tags, 3127 hdr_info.chan_config)) < 0) 3128 return ret; 3129 if ((ret = output_configure(ac, layout_map, layout_map_tags, 3130 FFMAX(ac->oc[1].status, 3131 OC_TRIAL_FRAME), 0)) < 0) 3132 return ret; 3133 } else { 3134 ac->oc[1].m4ac.chan_config = 0; 3135 /** 3136 * dual mono frames in Japanese DTV can have chan_config 0 3137 * WITHOUT specifying PCE. 3138 * thus, set dual mono as default. 3139 */ 3140 if (ac->dmono_mode && ac->oc[0].status == OC_NONE) { 3141 layout_map_tags = 2; 3142 layout_map[0][0] = layout_map[1][0] = TYPE_SCE; 3143 layout_map[0][2] = layout_map[1][2] = AAC_CHANNEL_FRONT; 3144 layout_map[0][1] = 0; 3145 layout_map[1][1] = 1; 3146 if (output_configure(ac, layout_map, layout_map_tags, 3147 OC_TRIAL_FRAME, 0)) 3148 return -7; 3149 } 3150 } 3151 ac->oc[1].m4ac.sample_rate = hdr_info.sample_rate; 3152 ac->oc[1].m4ac.sampling_index = hdr_info.sampling_index; 3153 ac->oc[1].m4ac.object_type = hdr_info.object_type; 3154 ac->oc[1].m4ac.frame_length_short = 0; 3155 if (ac->oc[0].status != OC_LOCKED || 3156 ac->oc[0].m4ac.chan_config != hdr_info.chan_config || 3157 ac->oc[0].m4ac.sample_rate != hdr_info.sample_rate) { 3158 ac->oc[1].m4ac.sbr = -1; 3159 ac->oc[1].m4ac.ps = -1; 3160 } 3161 if (!hdr_info.crc_absent) 3162 skip_bits(gb, 16); 3163 } 3164 return size; 3165} 3166 3167static int aac_decode_er_frame(AVCodecContext *avctx, void *data, 3168 int *got_frame_ptr, GetBitContext *gb) 3169{ 3170 AACContext *ac = avctx->priv_data; 3171 const MPEG4AudioConfig *const m4ac = &ac->oc[1].m4ac; 3172 ChannelElement *che; 3173 int err, i; 3174 int samples = m4ac->frame_length_short ? 960 : 1024; 3175 int chan_config = m4ac->chan_config; 3176 int aot = m4ac->object_type; 3177 3178 if (aot == AOT_ER_AAC_LD || aot == AOT_ER_AAC_ELD) 3179 samples >>= 1; 3180 3181 ac->frame = data; 3182 3183 if ((err = frame_configure_elements(avctx)) < 0) 3184 return err; 3185 3186 // The FF_PROFILE_AAC_* defines are all object_type - 1 3187 // This may lead to an undefined profile being signaled 3188 ac->avctx->profile = aot - 1; 3189 3190 ac->tags_mapped = 0; 3191 3192 if (chan_config < 0 || (chan_config >= 8 && chan_config < 11) || chan_config >= 13) { 3193 avpriv_request_sample(avctx, "Unknown ER channel configuration %d", 3194 chan_config); 3195 return AVERROR_INVALIDDATA; 3196 } 3197 for (i = 0; i < tags_per_config[chan_config]; i++) { 3198 const int elem_type = aac_channel_layout_map[chan_config-1][i][0]; 3199 const int elem_id = aac_channel_layout_map[chan_config-1][i][1]; 3200 if (!(che=get_che(ac, elem_type, elem_id))) { 3201 av_log(ac->avctx, AV_LOG_ERROR, 3202 "channel element %d.%d is not allocated\n", 3203 elem_type, elem_id); 3204 return AVERROR_INVALIDDATA; 3205 } 3206 che->present = 1; 3207 if (aot != AOT_ER_AAC_ELD) 3208 skip_bits(gb, 4); 3209 switch (elem_type) { 3210 case TYPE_SCE: 3211 err = decode_ics(ac, &che->ch[0], gb, 0, 0); 3212 break; 3213 case TYPE_CPE: 3214 err = decode_cpe(ac, gb, che); 3215 break; 3216 case TYPE_LFE: 3217 err = decode_ics(ac, &che->ch[0], gb, 0, 0); 3218 break; 3219 } 3220 if (err < 0) 3221 return err; 3222 } 3223 3224 spectral_to_sample(ac, samples); 3225 3226 if (!ac->frame->data[0] && samples) { 3227 av_log(avctx, AV_LOG_ERROR, "no frame data found\n"); 3228 return AVERROR_INVALIDDATA; 3229 } 3230 3231 ac->frame->nb_samples = samples; 3232 ac->frame->sample_rate = avctx->sample_rate; 3233 *got_frame_ptr = 1; 3234 3235 skip_bits_long(gb, get_bits_left(gb)); 3236 return 0; 3237} 3238 3239static int aac_decode_frame_int(AVCodecContext *avctx, AVFrame *frame, 3240 int *got_frame_ptr, GetBitContext *gb, 3241 const AVPacket *avpkt) 3242{ 3243 AACContext *ac = avctx->priv_data; 3244 ChannelElement *che = NULL, *che_prev = NULL; 3245 enum RawDataBlockType elem_type, che_prev_type = TYPE_END; 3246 int err, elem_id; 3247 int samples = 0, multiplier, audio_found = 0, pce_found = 0; 3248 int is_dmono, sce_count = 0; 3249 int payload_alignment; 3250 uint8_t che_presence[4][MAX_ELEM_ID] = {{0}}; 3251 3252 ac->frame = frame; 3253 3254 if (show_bits(gb, 12) == 0xfff) { 3255 if ((err = parse_adts_frame_header(ac, gb)) < 0) { 3256 av_log(avctx, AV_LOG_ERROR, "Error decoding AAC frame header.\n"); 3257 goto fail; 3258 } 3259 if (ac->oc[1].m4ac.sampling_index > 12) { 3260 av_log(ac->avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->oc[1].m4ac.sampling_index); 3261 err = AVERROR_INVALIDDATA; 3262 goto fail; 3263 } 3264 } 3265 3266 if ((err = frame_configure_elements(avctx)) < 0) 3267 goto fail; 3268 3269 // The FF_PROFILE_AAC_* defines are all object_type - 1 3270 // This may lead to an undefined profile being signaled 3271 ac->avctx->profile = ac->oc[1].m4ac.object_type - 1; 3272 3273 payload_alignment = get_bits_count(gb); 3274 ac->tags_mapped = 0; 3275 // parse 3276 while ((elem_type = get_bits(gb, 3)) != TYPE_END) { 3277 elem_id = get_bits(gb, 4); 3278 3279 if (avctx->debug & FF_DEBUG_STARTCODE) 3280 av_log(avctx, AV_LOG_DEBUG, "Elem type:%x id:%x\n", elem_type, elem_id); 3281 3282 if (!avctx->ch_layout.nb_channels && elem_type != TYPE_PCE) { 3283 err = AVERROR_INVALIDDATA; 3284 goto fail; 3285 } 3286 3287 if (elem_type < TYPE_DSE) { 3288 if (che_presence[elem_type][elem_id]) { 3289 int error = che_presence[elem_type][elem_id] > 1; 3290 av_log(ac->avctx, error ? AV_LOG_ERROR : AV_LOG_DEBUG, "channel element %d.%d duplicate\n", 3291 elem_type, elem_id); 3292 if (error) { 3293 err = AVERROR_INVALIDDATA; 3294 goto fail; 3295 } 3296 } 3297 che_presence[elem_type][elem_id]++; 3298 3299 if (!(che=get_che(ac, elem_type, elem_id))) { 3300 av_log(ac->avctx, AV_LOG_ERROR, "channel element %d.%d is not allocated\n", 3301 elem_type, elem_id); 3302 err = AVERROR_INVALIDDATA; 3303 goto fail; 3304 } 3305 samples = ac->oc[1].m4ac.frame_length_short ? 960 : 1024; 3306 che->present = 1; 3307 } 3308 3309 switch (elem_type) { 3310 3311 case TYPE_SCE: 3312 err = decode_ics(ac, &che->ch[0], gb, 0, 0); 3313 audio_found = 1; 3314 sce_count++; 3315 break; 3316 3317 case TYPE_CPE: 3318 err = decode_cpe(ac, gb, che); 3319 audio_found = 1; 3320 break; 3321 3322 case TYPE_CCE: 3323 err = decode_cce(ac, gb, che); 3324 break; 3325 3326 case TYPE_LFE: 3327 err = decode_ics(ac, &che->ch[0], gb, 0, 0); 3328 audio_found = 1; 3329 break; 3330 3331 case TYPE_DSE: 3332 err = skip_data_stream_element(ac, gb); 3333 break; 3334 3335 case TYPE_PCE: { 3336 uint8_t layout_map[MAX_ELEM_ID*4][3] = {{0}}; 3337 int tags; 3338 3339 int pushed = push_output_configuration(ac); 3340 if (pce_found && !pushed) { 3341 err = AVERROR_INVALIDDATA; 3342 goto fail; 3343 } 3344 3345 tags = decode_pce(avctx, &ac->oc[1].m4ac, layout_map, gb, 3346 payload_alignment); 3347 if (tags < 0) { 3348 err = tags; 3349 break; 3350 } 3351 if (pce_found) { 3352 av_log(avctx, AV_LOG_ERROR, 3353 "Not evaluating a further program_config_element as this construct is dubious at best.\n"); 3354 pop_output_configuration(ac); 3355 } else { 3356 err = output_configure(ac, layout_map, tags, OC_TRIAL_PCE, 1); 3357 if (!err) 3358 ac->oc[1].m4ac.chan_config = 0; 3359 pce_found = 1; 3360 } 3361 break; 3362 } 3363 3364 case TYPE_FIL: 3365 if (elem_id == 15) 3366 elem_id += get_bits(gb, 8) - 1; 3367 if (get_bits_left(gb) < 8 * elem_id) { 3368 av_log(avctx, AV_LOG_ERROR, "TYPE_FIL: "overread_err); 3369 err = AVERROR_INVALIDDATA; 3370 goto fail; 3371 } 3372 err = 0; 3373 while (elem_id > 0) { 3374 int ret = decode_extension_payload(ac, gb, elem_id, che_prev, che_prev_type); 3375 if (ret < 0) { 3376 err = ret; 3377 break; 3378 } 3379 elem_id -= ret; 3380 } 3381 break; 3382 3383 default: 3384 err = AVERROR_BUG; /* should not happen, but keeps compiler happy */ 3385 break; 3386 } 3387 3388 if (elem_type < TYPE_DSE) { 3389 che_prev = che; 3390 che_prev_type = elem_type; 3391 } 3392 3393 if (err) 3394 goto fail; 3395 3396 if (get_bits_left(gb) < 3) { 3397 av_log(avctx, AV_LOG_ERROR, overread_err); 3398 err = AVERROR_INVALIDDATA; 3399 goto fail; 3400 } 3401 } 3402 3403 if (!avctx->ch_layout.nb_channels) { 3404 *got_frame_ptr = 0; 3405 return 0; 3406 } 3407 3408 multiplier = (ac->oc[1].m4ac.sbr == 1) ? ac->oc[1].m4ac.ext_sample_rate > ac->oc[1].m4ac.sample_rate : 0; 3409 samples <<= multiplier; 3410 3411 spectral_to_sample(ac, samples); 3412 3413 if (ac->oc[1].status && audio_found) { 3414 avctx->sample_rate = ac->oc[1].m4ac.sample_rate << multiplier; 3415 avctx->frame_size = samples; 3416 ac->oc[1].status = OC_LOCKED; 3417 } 3418 3419 if (multiplier) 3420 avctx->internal->skip_samples_multiplier = 2; 3421 3422 if (!ac->frame->data[0] && samples) { 3423 av_log(avctx, AV_LOG_ERROR, "no frame data found\n"); 3424 err = AVERROR_INVALIDDATA; 3425 goto fail; 3426 } 3427 3428 if (samples) { 3429 ac->frame->nb_samples = samples; 3430 ac->frame->sample_rate = avctx->sample_rate; 3431 } else 3432 av_frame_unref(ac->frame); 3433 *got_frame_ptr = !!samples; 3434 3435 /* for dual-mono audio (SCE + SCE) */ 3436 is_dmono = ac->dmono_mode && sce_count == 2 && 3437 !av_channel_layout_compare(&ac->oc[1].ch_layout, 3438 &(AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO); 3439 if (is_dmono) { 3440 if (ac->dmono_mode == 1) 3441 frame->data[1] = frame->data[0]; 3442 else if (ac->dmono_mode == 2) 3443 frame->data[0] = frame->data[1]; 3444 } 3445 3446 return 0; 3447fail: 3448 pop_output_configuration(ac); 3449 return err; 3450} 3451 3452static int aac_decode_frame(AVCodecContext *avctx, AVFrame *frame, 3453 int *got_frame_ptr, AVPacket *avpkt) 3454{ 3455 AACContext *ac = avctx->priv_data; 3456 const uint8_t *buf = avpkt->data; 3457 int buf_size = avpkt->size; 3458 GetBitContext gb; 3459 int buf_consumed; 3460 int buf_offset; 3461 int err; 3462 size_t new_extradata_size; 3463 const uint8_t *new_extradata = av_packet_get_side_data(avpkt, 3464 AV_PKT_DATA_NEW_EXTRADATA, 3465 &new_extradata_size); 3466 size_t jp_dualmono_size; 3467 const uint8_t *jp_dualmono = av_packet_get_side_data(avpkt, 3468 AV_PKT_DATA_JP_DUALMONO, 3469 &jp_dualmono_size); 3470 3471 if (new_extradata) { 3472 /* discard previous configuration */ 3473 ac->oc[1].status = OC_NONE; 3474 err = decode_audio_specific_config(ac, ac->avctx, &ac->oc[1].m4ac, 3475 new_extradata, 3476 new_extradata_size * 8LL, 1); 3477 if (err < 0) { 3478 return err; 3479 } 3480 } 3481 3482 ac->dmono_mode = 0; 3483 if (jp_dualmono && jp_dualmono_size > 0) 3484 ac->dmono_mode = 1 + *jp_dualmono; 3485 if (ac->force_dmono_mode >= 0) 3486 ac->dmono_mode = ac->force_dmono_mode; 3487 3488 if (INT_MAX / 8 <= buf_size) 3489 return AVERROR_INVALIDDATA; 3490 3491 if ((err = init_get_bits8(&gb, buf, buf_size)) < 0) 3492 return err; 3493 3494 switch (ac->oc[1].m4ac.object_type) { 3495 case AOT_ER_AAC_LC: 3496 case AOT_ER_AAC_LTP: 3497 case AOT_ER_AAC_LD: 3498 case AOT_ER_AAC_ELD: 3499 err = aac_decode_er_frame(avctx, frame, got_frame_ptr, &gb); 3500 break; 3501 default: 3502 err = aac_decode_frame_int(avctx, frame, got_frame_ptr, &gb, avpkt); 3503 } 3504 if (err < 0) 3505 return err; 3506 3507 buf_consumed = (get_bits_count(&gb) + 7) >> 3; 3508 for (buf_offset = buf_consumed; buf_offset < buf_size; buf_offset++) 3509 if (buf[buf_offset]) 3510 break; 3511 3512 return buf_size > buf_offset ? buf_consumed : buf_size; 3513} 3514 3515static av_cold int aac_decode_close(AVCodecContext *avctx) 3516{ 3517 AACContext *ac = avctx->priv_data; 3518 int i, type; 3519 3520 for (i = 0; i < MAX_ELEM_ID; i++) { 3521 for (type = 0; type < 4; type++) { 3522 if (ac->che[type][i]) 3523 AAC_RENAME(ff_aac_sbr_ctx_close)(&ac->che[type][i]->sbr); 3524 av_freep(&ac->che[type][i]); 3525 } 3526 } 3527 3528 ff_mdct_end(&ac->mdct); 3529 ff_mdct_end(&ac->mdct_small); 3530 ff_mdct_end(&ac->mdct_ld); 3531 ff_mdct_end(&ac->mdct_ltp); 3532#if !USE_FIXED 3533 ff_mdct15_uninit(&ac->mdct120); 3534 ff_mdct15_uninit(&ac->mdct480); 3535 ff_mdct15_uninit(&ac->mdct960); 3536#endif 3537 av_freep(&ac->fdsp); 3538 return 0; 3539} 3540 3541static void aacdec_init(AACContext *c) 3542{ 3543 c->imdct_and_windowing = imdct_and_windowing; 3544 c->apply_ltp = apply_ltp; 3545 c->apply_tns = apply_tns; 3546 c->windowing_and_mdct_ltp = windowing_and_mdct_ltp; 3547 c->update_ltp = update_ltp; 3548#if USE_FIXED 3549 c->vector_pow43 = vector_pow43; 3550 c->subband_scale = subband_scale; 3551#endif 3552 3553#if !USE_FIXED 3554#if ARCH_MIPS 3555 ff_aacdec_init_mips(c); 3556#endif 3557#endif /* !USE_FIXED */ 3558} 3559/** 3560 * AVOptions for Japanese DTV specific extensions (ADTS only) 3561 */ 3562#define AACDEC_FLAGS AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM 3563static const AVOption options[] = { 3564 {"dual_mono_mode", "Select the channel to decode for dual mono", 3565 offsetof(AACContext, force_dmono_mode), AV_OPT_TYPE_INT, {.i64=-1}, -1, 2, 3566 AACDEC_FLAGS, "dual_mono_mode"}, 3567 3568 {"auto", "autoselection", 0, AV_OPT_TYPE_CONST, {.i64=-1}, INT_MIN, INT_MAX, AACDEC_FLAGS, "dual_mono_mode"}, 3569 {"main", "Select Main/Left channel", 0, AV_OPT_TYPE_CONST, {.i64= 1}, INT_MIN, INT_MAX, AACDEC_FLAGS, "dual_mono_mode"}, 3570 {"sub" , "Select Sub/Right channel", 0, AV_OPT_TYPE_CONST, {.i64= 2}, INT_MIN, INT_MAX, AACDEC_FLAGS, "dual_mono_mode"}, 3571 {"both", "Select both channels", 0, AV_OPT_TYPE_CONST, {.i64= 0}, INT_MIN, INT_MAX, AACDEC_FLAGS, "dual_mono_mode"}, 3572 3573 { "channel_order", "Order in which the channels are to be exported", 3574 offsetof(AACContext, output_channel_order), AV_OPT_TYPE_INT, 3575 { .i64 = CHANNEL_ORDER_DEFAULT }, 0, 1, AACDEC_FLAGS, "channel_order" }, 3576 { "default", "normal libavcodec channel order", 0, AV_OPT_TYPE_CONST, 3577 { .i64 = CHANNEL_ORDER_DEFAULT }, .flags = AACDEC_FLAGS, "channel_order" }, 3578 { "coded", "order in which the channels are coded in the bitstream", 3579 0, AV_OPT_TYPE_CONST, { .i64 = CHANNEL_ORDER_CODED }, .flags = AACDEC_FLAGS, "channel_order" }, 3580 3581 {NULL}, 3582}; 3583 3584static const AVClass aac_decoder_class = { 3585 .class_name = "AAC decoder", 3586 .item_name = av_default_item_name, 3587 .option = options, 3588 .version = LIBAVUTIL_VERSION_INT, 3589}; 3590