1 /*
2 * AAC decoder
3 * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
4 * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
5 * Copyright (c) 2008-2013 Alex Converse <alex.converse@gmail.com>
6 *
7 * AAC LATM decoder
8 * Copyright (c) 2008-2010 Paul Kendall <paul@kcbbs.gen.nz>
9 * Copyright (c) 2010 Janne Grunau <janne-libav@jannau.net>
10 *
11 * This file is part of FFmpeg.
12 *
13 * FFmpeg is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU Lesser General Public
15 * License as published by the Free Software Foundation; either
16 * version 2.1 of the License, or (at your option) any later version.
17 *
18 * FFmpeg is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 * Lesser General Public License for more details.
22 *
23 * You should have received a copy of the GNU Lesser General Public
24 * License along with FFmpeg; if not, write to the Free Software
25 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 */
27
28 /**
29 * @file
30 * AAC decoder
31 * @author Oded Shimon ( ods15 ods15 dyndns org )
32 * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
33 */
34
35 #define FFT_FLOAT 1
36 #define USE_FIXED 0
37
38 #include "libavutil/float_dsp.h"
39 #include "libavutil/opt.h"
40 #include "avcodec.h"
41 #include "codec_internal.h"
42 #include "get_bits.h"
43 #include "fft.h"
44 #include "mdct15.h"
45 #include "lpc.h"
46 #include "kbdwin.h"
47 #include "sinewin.h"
48
49 #include "aac.h"
50 #include "aactab.h"
51 #include "aacdectab.h"
52 #include "adts_header.h"
53 #include "cbrt_data.h"
54 #include "sbr.h"
55 #include "aacsbr.h"
56 #include "mpeg4audio.h"
57 #include "profiles.h"
58 #include "libavutil/intfloat.h"
59
60 #include <errno.h>
61 #include <math.h>
62 #include <stdint.h>
63 #include <string.h>
64
65 #if ARCH_ARM
66 # include "arm/aac.h"
67 #elif ARCH_MIPS
68 # include "mips/aacdec_mips.h"
69 #endif
70
71 DECLARE_ALIGNED(32, static INTFLOAT, AAC_RENAME(sine_120))[120];
72 DECLARE_ALIGNED(32, static INTFLOAT, AAC_RENAME(sine_960))[960];
73 DECLARE_ALIGNED(32, static INTFLOAT, AAC_RENAME(aac_kbd_long_960))[960];
74 DECLARE_ALIGNED(32, static INTFLOAT, AAC_RENAME(aac_kbd_short_120))[120];
75
reset_predict_state(PredictorState *ps)76 static av_always_inline void reset_predict_state(PredictorState *ps)
77 {
78 ps->r0 = 0.0f;
79 ps->r1 = 0.0f;
80 ps->cor0 = 0.0f;
81 ps->cor1 = 0.0f;
82 ps->var0 = 1.0f;
83 ps->var1 = 1.0f;
84 }
85
86 #ifndef VMUL2
VMUL2(float *dst, const float *v, unsigned idx, const float *scale)87 static inline float *VMUL2(float *dst, const float *v, unsigned idx,
88 const float *scale)
89 {
90 float s = *scale;
91 *dst++ = v[idx & 15] * s;
92 *dst++ = v[idx>>4 & 15] * s;
93 return dst;
94 }
95 #endif
96
97 #ifndef VMUL4
VMUL4(float *dst, const float *v, unsigned idx, const float *scale)98 static inline float *VMUL4(float *dst, const float *v, unsigned idx,
99 const float *scale)
100 {
101 float s = *scale;
102 *dst++ = v[idx & 3] * s;
103 *dst++ = v[idx>>2 & 3] * s;
104 *dst++ = v[idx>>4 & 3] * s;
105 *dst++ = v[idx>>6 & 3] * s;
106 return dst;
107 }
108 #endif
109
110 #ifndef VMUL2S
VMUL2S(float *dst, const float *v, unsigned idx, unsigned sign, const float *scale)111 static inline float *VMUL2S(float *dst, const float *v, unsigned idx,
112 unsigned sign, const float *scale)
113 {
114 union av_intfloat32 s0, s1;
115
116 s0.f = s1.f = *scale;
117 s0.i ^= sign >> 1 << 31;
118 s1.i ^= sign << 31;
119
120 *dst++ = v[idx & 15] * s0.f;
121 *dst++ = v[idx>>4 & 15] * s1.f;
122
123 return dst;
124 }
125 #endif
126
127 #ifndef VMUL4S
VMUL4S(float *dst, const float *v, unsigned idx, unsigned sign, const float *scale)128 static inline float *VMUL4S(float *dst, const float *v, unsigned idx,
129 unsigned sign, const float *scale)
130 {
131 unsigned nz = idx >> 12;
132 union av_intfloat32 s = { .f = *scale };
133 union av_intfloat32 t;
134
135 t.i = s.i ^ (sign & 1U<<31);
136 *dst++ = v[idx & 3] * t.f;
137
138 sign <<= nz & 1; nz >>= 1;
139 t.i = s.i ^ (sign & 1U<<31);
140 *dst++ = v[idx>>2 & 3] * t.f;
141
142 sign <<= nz & 1; nz >>= 1;
143 t.i = s.i ^ (sign & 1U<<31);
144 *dst++ = v[idx>>4 & 3] * t.f;
145
146 sign <<= nz & 1;
147 t.i = s.i ^ (sign & 1U<<31);
148 *dst++ = v[idx>>6 & 3] * t.f;
149
150 return dst;
151 }
152 #endif
153
flt16_round(float pf)154 static av_always_inline float flt16_round(float pf)
155 {
156 union av_intfloat32 tmp;
157 tmp.f = pf;
158 tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U;
159 return tmp.f;
160 }
161
flt16_even(float pf)162 static av_always_inline float flt16_even(float pf)
163 {
164 union av_intfloat32 tmp;
165 tmp.f = pf;
166 tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U;
167 return tmp.f;
168 }
169
flt16_trunc(float pf)170 static av_always_inline float flt16_trunc(float pf)
171 {
172 union av_intfloat32 pun;
173 pun.f = pf;
174 pun.i &= 0xFFFF0000U;
175 return pun.f;
176 }
177
predict(PredictorState *ps, float *coef, int output_enable)178 static av_always_inline void predict(PredictorState *ps, float *coef,
179 int output_enable)
180 {
181 const float a = 0.953125; // 61.0 / 64
182 const float alpha = 0.90625; // 29.0 / 32
183 float e0, e1;
184 float pv;
185 float k1, k2;
186 float r0 = ps->r0, r1 = ps->r1;
187 float cor0 = ps->cor0, cor1 = ps->cor1;
188 float var0 = ps->var0, var1 = ps->var1;
189
190 k1 = var0 > 1 ? cor0 * flt16_even(a / var0) : 0;
191 k2 = var1 > 1 ? cor1 * flt16_even(a / var1) : 0;
192
193 pv = flt16_round(k1 * r0 + k2 * r1);
194 if (output_enable)
195 *coef += pv;
196
197 e0 = *coef;
198 e1 = e0 - k1 * r0;
199
200 ps->cor1 = flt16_trunc(alpha * cor1 + r1 * e1);
201 ps->var1 = flt16_trunc(alpha * var1 + 0.5f * (r1 * r1 + e1 * e1));
202 ps->cor0 = flt16_trunc(alpha * cor0 + r0 * e0);
203 ps->var0 = flt16_trunc(alpha * var0 + 0.5f * (r0 * r0 + e0 * e0));
204
205 ps->r1 = flt16_trunc(a * (r0 - k1 * e0));
206 ps->r0 = flt16_trunc(a * e0);
207 }
208
209 /**
210 * Apply dependent channel coupling (applied before IMDCT).
211 *
212 * @param index index into coupling gain array
213 */
apply_dependent_coupling(AACContext *ac, SingleChannelElement *target, ChannelElement *cce, int index)214 static void apply_dependent_coupling(AACContext *ac,
215 SingleChannelElement *target,
216 ChannelElement *cce, int index)
217 {
218 IndividualChannelStream *ics = &cce->ch[0].ics;
219 const uint16_t *offsets = ics->swb_offset;
220 float *dest = target->coeffs;
221 const float *src = cce->ch[0].coeffs;
222 int g, i, group, k, idx = 0;
223 if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) {
224 av_log(ac->avctx, AV_LOG_ERROR,
225 "Dependent coupling is not supported together with LTP\n");
226 return;
227 }
228 for (g = 0; g < ics->num_window_groups; g++) {
229 for (i = 0; i < ics->max_sfb; i++, idx++) {
230 if (cce->ch[0].band_type[idx] != ZERO_BT) {
231 const float gain = cce->coup.gain[index][idx];
232 for (group = 0; group < ics->group_len[g]; group++) {
233 for (k = offsets[i]; k < offsets[i + 1]; k++) {
234 // FIXME: SIMDify
235 dest[group * 128 + k] += gain * src[group * 128 + k];
236 }
237 }
238 }
239 }
240 dest += ics->group_len[g] * 128;
241 src += ics->group_len[g] * 128;
242 }
243 }
244
245 /**
246 * Apply independent channel coupling (applied after IMDCT).
247 *
248 * @param index index into coupling gain array
249 */
apply_independent_coupling(AACContext *ac, SingleChannelElement *target, ChannelElement *cce, int index)250 static void apply_independent_coupling(AACContext *ac,
251 SingleChannelElement *target,
252 ChannelElement *cce, int index)
253 {
254 const float gain = cce->coup.gain[index][0];
255 const float *src = cce->ch[0].ret;
256 float *dest = target->ret;
257 const int len = 1024 << (ac->oc[1].m4ac.sbr == 1);
258
259 ac->fdsp->vector_fmac_scalar(dest, src, gain, len);
260 }
261
262 #include "aacdec_template.c"
263
264 #define LOAS_SYNC_WORD 0x2b7 ///< 11 bits LOAS sync word
265
266 struct LATMContext {
267 AACContext aac_ctx; ///< containing AACContext
268 int initialized; ///< initialized after a valid extradata was seen
269
270 // parser data
271 int audio_mux_version_A; ///< LATM syntax version
272 int frame_length_type; ///< 0/1 variable/fixed frame length
273 int frame_length; ///< frame length for fixed frame length
274 };
275
latm_get_value(GetBitContext *b)276 static inline uint32_t latm_get_value(GetBitContext *b)
277 {
278 int length = get_bits(b, 2);
279
280 return get_bits_long(b, (length+1)*8);
281 }
282
latm_decode_audio_specific_config(struct LATMContext *latmctx, GetBitContext *gb, int asclen)283 static int latm_decode_audio_specific_config(struct LATMContext *latmctx,
284 GetBitContext *gb, int asclen)
285 {
286 AACContext *ac = &latmctx->aac_ctx;
287 AVCodecContext *avctx = ac->avctx;
288 MPEG4AudioConfig m4ac = { 0 };
289 GetBitContext gbc;
290 int config_start_bit = get_bits_count(gb);
291 int sync_extension = 0;
292 int bits_consumed, esize, i;
293
294 if (asclen > 0) {
295 sync_extension = 1;
296 asclen = FFMIN(asclen, get_bits_left(gb));
297 init_get_bits(&gbc, gb->buffer, config_start_bit + asclen);
298 skip_bits_long(&gbc, config_start_bit);
299 } else if (asclen == 0) {
300 gbc = *gb;
301 } else {
302 return AVERROR_INVALIDDATA;
303 }
304
305 if (get_bits_left(gb) <= 0)
306 return AVERROR_INVALIDDATA;
307
308 bits_consumed = decode_audio_specific_config_gb(NULL, avctx, &m4ac,
309 &gbc, config_start_bit,
310 sync_extension);
311
312 if (bits_consumed < config_start_bit)
313 return AVERROR_INVALIDDATA;
314 bits_consumed -= config_start_bit;
315
316 if (asclen == 0)
317 asclen = bits_consumed;
318
319 if (!latmctx->initialized ||
320 ac->oc[1].m4ac.sample_rate != m4ac.sample_rate ||
321 ac->oc[1].m4ac.chan_config != m4ac.chan_config) {
322
323 if (latmctx->initialized) {
324 av_log(avctx, AV_LOG_INFO, "audio config changed (sample_rate=%d, chan_config=%d)\n", m4ac.sample_rate, m4ac.chan_config);
325 } else {
326 av_log(avctx, AV_LOG_DEBUG, "initializing latmctx\n");
327 }
328 latmctx->initialized = 0;
329
330 esize = (asclen + 7) / 8;
331
332 if (avctx->extradata_size < esize) {
333 av_free(avctx->extradata);
334 avctx->extradata = av_malloc(esize + AV_INPUT_BUFFER_PADDING_SIZE);
335 if (!avctx->extradata)
336 return AVERROR(ENOMEM);
337 }
338
339 avctx->extradata_size = esize;
340 gbc = *gb;
341 for (i = 0; i < esize; i++) {
342 avctx->extradata[i] = get_bits(&gbc, 8);
343 }
344 memset(avctx->extradata+esize, 0, AV_INPUT_BUFFER_PADDING_SIZE);
345 }
346 skip_bits_long(gb, asclen);
347
348 return 0;
349 }
350
read_stream_mux_config(struct LATMContext *latmctx, GetBitContext *gb)351 static int read_stream_mux_config(struct LATMContext *latmctx,
352 GetBitContext *gb)
353 {
354 int ret, audio_mux_version = get_bits(gb, 1);
355
356 latmctx->audio_mux_version_A = 0;
357 if (audio_mux_version)
358 latmctx->audio_mux_version_A = get_bits(gb, 1);
359
360 if (!latmctx->audio_mux_version_A) {
361
362 if (audio_mux_version)
363 latm_get_value(gb); // taraFullness
364
365 skip_bits(gb, 1); // allStreamSameTimeFraming
366 skip_bits(gb, 6); // numSubFrames
367 // numPrograms
368 if (get_bits(gb, 4)) { // numPrograms
369 avpriv_request_sample(latmctx->aac_ctx.avctx, "Multiple programs");
370 return AVERROR_PATCHWELCOME;
371 }
372
373 // for each program (which there is only one in DVB)
374
375 // for each layer (which there is only one in DVB)
376 if (get_bits(gb, 3)) { // numLayer
377 avpriv_request_sample(latmctx->aac_ctx.avctx, "Multiple layers");
378 return AVERROR_PATCHWELCOME;
379 }
380
381 // for all but first stream: use_same_config = get_bits(gb, 1);
382 if (!audio_mux_version) {
383 if ((ret = latm_decode_audio_specific_config(latmctx, gb, 0)) < 0)
384 return ret;
385 } else {
386 int ascLen = latm_get_value(gb);
387 if ((ret = latm_decode_audio_specific_config(latmctx, gb, ascLen)) < 0)
388 return ret;
389 }
390
391 latmctx->frame_length_type = get_bits(gb, 3);
392 switch (latmctx->frame_length_type) {
393 case 0:
394 skip_bits(gb, 8); // latmBufferFullness
395 break;
396 case 1:
397 latmctx->frame_length = get_bits(gb, 9);
398 break;
399 case 3:
400 case 4:
401 case 5:
402 skip_bits(gb, 6); // CELP frame length table index
403 break;
404 case 6:
405 case 7:
406 skip_bits(gb, 1); // HVXC frame length table index
407 break;
408 }
409
410 if (get_bits(gb, 1)) { // other data
411 if (audio_mux_version) {
412 latm_get_value(gb); // other_data_bits
413 } else {
414 int esc;
415 do {
416 if (get_bits_left(gb) < 9)
417 return AVERROR_INVALIDDATA;
418 esc = get_bits(gb, 1);
419 skip_bits(gb, 8);
420 } while (esc);
421 }
422 }
423
424 if (get_bits(gb, 1)) // crc present
425 skip_bits(gb, 8); // config_crc
426 }
427
428 return 0;
429 }
430
read_payload_length_info(struct LATMContext *ctx, GetBitContext *gb)431 static int read_payload_length_info(struct LATMContext *ctx, GetBitContext *gb)
432 {
433 uint8_t tmp;
434
435 if (ctx->frame_length_type == 0) {
436 int mux_slot_length = 0;
437 do {
438 if (get_bits_left(gb) < 8)
439 return AVERROR_INVALIDDATA;
440 tmp = get_bits(gb, 8);
441 mux_slot_length += tmp;
442 } while (tmp == 255);
443 return mux_slot_length;
444 } else if (ctx->frame_length_type == 1) {
445 return ctx->frame_length;
446 } else if (ctx->frame_length_type == 3 ||
447 ctx->frame_length_type == 5 ||
448 ctx->frame_length_type == 7) {
449 skip_bits(gb, 2); // mux_slot_length_coded
450 }
451 return 0;
452 }
453
read_audio_mux_element(struct LATMContext *latmctx, GetBitContext *gb)454 static int read_audio_mux_element(struct LATMContext *latmctx,
455 GetBitContext *gb)
456 {
457 int err;
458 uint8_t use_same_mux = get_bits(gb, 1);
459 if (!use_same_mux) {
460 if ((err = read_stream_mux_config(latmctx, gb)) < 0)
461 return err;
462 } else if (!latmctx->aac_ctx.avctx->extradata) {
463 av_log(latmctx->aac_ctx.avctx, AV_LOG_DEBUG,
464 "no decoder config found\n");
465 return 1;
466 }
467 if (latmctx->audio_mux_version_A == 0) {
468 int mux_slot_length_bytes = read_payload_length_info(latmctx, gb);
469 if (mux_slot_length_bytes < 0 || mux_slot_length_bytes * 8LL > get_bits_left(gb)) {
470 av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR, "incomplete frame\n");
471 return AVERROR_INVALIDDATA;
472 } else if (mux_slot_length_bytes * 8 + 256 < get_bits_left(gb)) {
473 av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR,
474 "frame length mismatch %d << %d\n",
475 mux_slot_length_bytes * 8, get_bits_left(gb));
476 return AVERROR_INVALIDDATA;
477 }
478 }
479 return 0;
480 }
481
482
latm_decode_frame(AVCodecContext *avctx, AVFrame *out, int *got_frame_ptr, AVPacket *avpkt)483 static int latm_decode_frame(AVCodecContext *avctx, AVFrame *out,
484 int *got_frame_ptr, AVPacket *avpkt)
485 {
486 struct LATMContext *latmctx = avctx->priv_data;
487 int muxlength, err;
488 GetBitContext gb;
489
490 if ((err = init_get_bits8(&gb, avpkt->data, avpkt->size)) < 0)
491 return err;
492
493 // check for LOAS sync word
494 if (get_bits(&gb, 11) != LOAS_SYNC_WORD)
495 return AVERROR_INVALIDDATA;
496
497 muxlength = get_bits(&gb, 13) + 3;
498 // not enough data, the parser should have sorted this out
499 if (muxlength > avpkt->size)
500 return AVERROR_INVALIDDATA;
501
502 if ((err = read_audio_mux_element(latmctx, &gb)))
503 return (err < 0) ? err : avpkt->size;
504
505 if (!latmctx->initialized) {
506 if (!avctx->extradata) {
507 *got_frame_ptr = 0;
508 return avpkt->size;
509 } else {
510 push_output_configuration(&latmctx->aac_ctx);
511 if ((err = decode_audio_specific_config(
512 &latmctx->aac_ctx, avctx, &latmctx->aac_ctx.oc[1].m4ac,
513 avctx->extradata, avctx->extradata_size*8LL, 1)) < 0) {
514 pop_output_configuration(&latmctx->aac_ctx);
515 return err;
516 }
517 latmctx->initialized = 1;
518 }
519 }
520
521 if (show_bits(&gb, 12) == 0xfff) {
522 av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR,
523 "ADTS header detected, probably as result of configuration "
524 "misparsing\n");
525 return AVERROR_INVALIDDATA;
526 }
527
528 switch (latmctx->aac_ctx.oc[1].m4ac.object_type) {
529 case AOT_ER_AAC_LC:
530 case AOT_ER_AAC_LTP:
531 case AOT_ER_AAC_LD:
532 case AOT_ER_AAC_ELD:
533 err = aac_decode_er_frame(avctx, out, got_frame_ptr, &gb);
534 break;
535 default:
536 err = aac_decode_frame_int(avctx, out, got_frame_ptr, &gb, avpkt);
537 }
538 if (err < 0)
539 return err;
540
541 return muxlength;
542 }
543
latm_decode_init(AVCodecContext *avctx)544 static av_cold int latm_decode_init(AVCodecContext *avctx)
545 {
546 struct LATMContext *latmctx = avctx->priv_data;
547 int ret = aac_decode_init(avctx);
548
549 if (avctx->extradata_size > 0)
550 latmctx->initialized = !ret;
551
552 return ret;
553 }
554
555 const FFCodec ff_aac_decoder = {
556 .p.name = "aac",
557 .p.long_name = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
558 .p.type = AVMEDIA_TYPE_AUDIO,
559 .p.id = AV_CODEC_ID_AAC,
560 .priv_data_size = sizeof(AACContext),
561 .init = aac_decode_init,
562 .close = aac_decode_close,
563 FF_CODEC_DECODE_CB(aac_decode_frame),
564 .p.sample_fmts = (const enum AVSampleFormat[]) {
565 AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE
566 },
567 .p.capabilities = AV_CODEC_CAP_CHANNEL_CONF | AV_CODEC_CAP_DR1,
568 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
569 #if FF_API_OLD_CHANNEL_LAYOUT
570 .p.channel_layouts = aac_channel_layout,
571 #endif
572 .p.ch_layouts = aac_ch_layout,
573 .flush = flush,
574 .p.priv_class = &aac_decoder_class,
575 .p.profiles = NULL_IF_CONFIG_SMALL(ff_aac_profiles),
576 };
577
578 /*
579 Note: This decoder filter is intended to decode LATM streams transferred
580 in MPEG transport streams which only contain one program.
581 To do a more complex LATM demuxing a separate LATM demuxer should be used.
582 */
583 const FFCodec ff_aac_latm_decoder = {
584 .p.name = "aac_latm",
585 .p.long_name = NULL_IF_CONFIG_SMALL("AAC LATM (Advanced Audio Coding LATM syntax)"),
586 .p.type = AVMEDIA_TYPE_AUDIO,
587 .p.id = AV_CODEC_ID_AAC_LATM,
588 .priv_data_size = sizeof(struct LATMContext),
589 .init = latm_decode_init,
590 .close = aac_decode_close,
591 FF_CODEC_DECODE_CB(latm_decode_frame),
592 .p.sample_fmts = (const enum AVSampleFormat[]) {
593 AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE
594 },
595 .p.capabilities = AV_CODEC_CAP_CHANNEL_CONF | AV_CODEC_CAP_DR1,
596 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
597 #if FF_API_OLD_CHANNEL_LAYOUT
598 .p.channel_layouts = aac_channel_layout,
599 #endif
600 .p.ch_layouts = aac_ch_layout,
601 .flush = flush,
602 .p.profiles = NULL_IF_CONFIG_SMALL(ff_aac_profiles),
603 };
604