xref: /third_party/ffmpeg/libavcodec/aacdec.c (revision cabdff1a)
1/*
2 * AAC decoder
3 * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
4 * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
5 * Copyright (c) 2008-2013 Alex Converse <alex.converse@gmail.com>
6 *
7 * AAC LATM decoder
8 * Copyright (c) 2008-2010 Paul Kendall <paul@kcbbs.gen.nz>
9 * Copyright (c) 2010      Janne Grunau <janne-libav@jannau.net>
10 *
11 * This file is part of FFmpeg.
12 *
13 * FFmpeg is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU Lesser General Public
15 * License as published by the Free Software Foundation; either
16 * version 2.1 of the License, or (at your option) any later version.
17 *
18 * FFmpeg is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21 * Lesser General Public License for more details.
22 *
23 * You should have received a copy of the GNU Lesser General Public
24 * License along with FFmpeg; if not, write to the Free Software
25 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 */
27
28/**
29 * @file
30 * AAC decoder
31 * @author Oded Shimon  ( ods15 ods15 dyndns org )
32 * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
33 */
34
35#define FFT_FLOAT 1
36#define USE_FIXED 0
37
38#include "libavutil/float_dsp.h"
39#include "libavutil/opt.h"
40#include "avcodec.h"
41#include "codec_internal.h"
42#include "get_bits.h"
43#include "fft.h"
44#include "mdct15.h"
45#include "lpc.h"
46#include "kbdwin.h"
47#include "sinewin.h"
48
49#include "aac.h"
50#include "aactab.h"
51#include "aacdectab.h"
52#include "adts_header.h"
53#include "cbrt_data.h"
54#include "sbr.h"
55#include "aacsbr.h"
56#include "mpeg4audio.h"
57#include "profiles.h"
58#include "libavutil/intfloat.h"
59
60#include <errno.h>
61#include <math.h>
62#include <stdint.h>
63#include <string.h>
64
65#if ARCH_ARM
66#   include "arm/aac.h"
67#elif ARCH_MIPS
68#   include "mips/aacdec_mips.h"
69#endif
70
71DECLARE_ALIGNED(32, static INTFLOAT, AAC_RENAME(sine_120))[120];
72DECLARE_ALIGNED(32, static INTFLOAT, AAC_RENAME(sine_960))[960];
73DECLARE_ALIGNED(32, static INTFLOAT, AAC_RENAME(aac_kbd_long_960))[960];
74DECLARE_ALIGNED(32, static INTFLOAT, AAC_RENAME(aac_kbd_short_120))[120];
75
76static av_always_inline void reset_predict_state(PredictorState *ps)
77{
78    ps->r0   = 0.0f;
79    ps->r1   = 0.0f;
80    ps->cor0 = 0.0f;
81    ps->cor1 = 0.0f;
82    ps->var0 = 1.0f;
83    ps->var1 = 1.0f;
84}
85
86#ifndef VMUL2
87static inline float *VMUL2(float *dst, const float *v, unsigned idx,
88                           const float *scale)
89{
90    float s = *scale;
91    *dst++ = v[idx    & 15] * s;
92    *dst++ = v[idx>>4 & 15] * s;
93    return dst;
94}
95#endif
96
97#ifndef VMUL4
98static inline float *VMUL4(float *dst, const float *v, unsigned idx,
99                           const float *scale)
100{
101    float s = *scale;
102    *dst++ = v[idx    & 3] * s;
103    *dst++ = v[idx>>2 & 3] * s;
104    *dst++ = v[idx>>4 & 3] * s;
105    *dst++ = v[idx>>6 & 3] * s;
106    return dst;
107}
108#endif
109
110#ifndef VMUL2S
111static inline float *VMUL2S(float *dst, const float *v, unsigned idx,
112                            unsigned sign, const float *scale)
113{
114    union av_intfloat32 s0, s1;
115
116    s0.f = s1.f = *scale;
117    s0.i ^= sign >> 1 << 31;
118    s1.i ^= sign      << 31;
119
120    *dst++ = v[idx    & 15] * s0.f;
121    *dst++ = v[idx>>4 & 15] * s1.f;
122
123    return dst;
124}
125#endif
126
127#ifndef VMUL4S
128static inline float *VMUL4S(float *dst, const float *v, unsigned idx,
129                            unsigned sign, const float *scale)
130{
131    unsigned nz = idx >> 12;
132    union av_intfloat32 s = { .f = *scale };
133    union av_intfloat32 t;
134
135    t.i = s.i ^ (sign & 1U<<31);
136    *dst++ = v[idx    & 3] * t.f;
137
138    sign <<= nz & 1; nz >>= 1;
139    t.i = s.i ^ (sign & 1U<<31);
140    *dst++ = v[idx>>2 & 3] * t.f;
141
142    sign <<= nz & 1; nz >>= 1;
143    t.i = s.i ^ (sign & 1U<<31);
144    *dst++ = v[idx>>4 & 3] * t.f;
145
146    sign <<= nz & 1;
147    t.i = s.i ^ (sign & 1U<<31);
148    *dst++ = v[idx>>6 & 3] * t.f;
149
150    return dst;
151}
152#endif
153
154static av_always_inline float flt16_round(float pf)
155{
156    union av_intfloat32 tmp;
157    tmp.f = pf;
158    tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U;
159    return tmp.f;
160}
161
162static av_always_inline float flt16_even(float pf)
163{
164    union av_intfloat32 tmp;
165    tmp.f = pf;
166    tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U;
167    return tmp.f;
168}
169
170static av_always_inline float flt16_trunc(float pf)
171{
172    union av_intfloat32 pun;
173    pun.f = pf;
174    pun.i &= 0xFFFF0000U;
175    return pun.f;
176}
177
178static av_always_inline void predict(PredictorState *ps, float *coef,
179                                     int output_enable)
180{
181    const float a     = 0.953125; // 61.0 / 64
182    const float alpha = 0.90625;  // 29.0 / 32
183    float e0, e1;
184    float pv;
185    float k1, k2;
186    float   r0 = ps->r0,     r1 = ps->r1;
187    float cor0 = ps->cor0, cor1 = ps->cor1;
188    float var0 = ps->var0, var1 = ps->var1;
189
190    k1 = var0 > 1 ? cor0 * flt16_even(a / var0) : 0;
191    k2 = var1 > 1 ? cor1 * flt16_even(a / var1) : 0;
192
193    pv = flt16_round(k1 * r0 + k2 * r1);
194    if (output_enable)
195        *coef += pv;
196
197    e0 = *coef;
198    e1 = e0 - k1 * r0;
199
200    ps->cor1 = flt16_trunc(alpha * cor1 + r1 * e1);
201    ps->var1 = flt16_trunc(alpha * var1 + 0.5f * (r1 * r1 + e1 * e1));
202    ps->cor0 = flt16_trunc(alpha * cor0 + r0 * e0);
203    ps->var0 = flt16_trunc(alpha * var0 + 0.5f * (r0 * r0 + e0 * e0));
204
205    ps->r1 = flt16_trunc(a * (r0 - k1 * e0));
206    ps->r0 = flt16_trunc(a * e0);
207}
208
209/**
210 * Apply dependent channel coupling (applied before IMDCT).
211 *
212 * @param   index   index into coupling gain array
213 */
214static void apply_dependent_coupling(AACContext *ac,
215                                     SingleChannelElement *target,
216                                     ChannelElement *cce, int index)
217{
218    IndividualChannelStream *ics = &cce->ch[0].ics;
219    const uint16_t *offsets = ics->swb_offset;
220    float *dest = target->coeffs;
221    const float *src = cce->ch[0].coeffs;
222    int g, i, group, k, idx = 0;
223    if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) {
224        av_log(ac->avctx, AV_LOG_ERROR,
225               "Dependent coupling is not supported together with LTP\n");
226        return;
227    }
228    for (g = 0; g < ics->num_window_groups; g++) {
229        for (i = 0; i < ics->max_sfb; i++, idx++) {
230            if (cce->ch[0].band_type[idx] != ZERO_BT) {
231                const float gain = cce->coup.gain[index][idx];
232                for (group = 0; group < ics->group_len[g]; group++) {
233                    for (k = offsets[i]; k < offsets[i + 1]; k++) {
234                        // FIXME: SIMDify
235                        dest[group * 128 + k] += gain * src[group * 128 + k];
236                    }
237                }
238            }
239        }
240        dest += ics->group_len[g] * 128;
241        src  += ics->group_len[g] * 128;
242    }
243}
244
245/**
246 * Apply independent channel coupling (applied after IMDCT).
247 *
248 * @param   index   index into coupling gain array
249 */
250static void apply_independent_coupling(AACContext *ac,
251                                       SingleChannelElement *target,
252                                       ChannelElement *cce, int index)
253{
254    const float gain = cce->coup.gain[index][0];
255    const float *src = cce->ch[0].ret;
256    float *dest = target->ret;
257    const int len = 1024 << (ac->oc[1].m4ac.sbr == 1);
258
259    ac->fdsp->vector_fmac_scalar(dest, src, gain, len);
260}
261
262#include "aacdec_template.c"
263
264#define LOAS_SYNC_WORD   0x2b7       ///< 11 bits LOAS sync word
265
266struct LATMContext {
267    AACContext aac_ctx;     ///< containing AACContext
268    int initialized;        ///< initialized after a valid extradata was seen
269
270    // parser data
271    int audio_mux_version_A; ///< LATM syntax version
272    int frame_length_type;   ///< 0/1 variable/fixed frame length
273    int frame_length;        ///< frame length for fixed frame length
274};
275
276static inline uint32_t latm_get_value(GetBitContext *b)
277{
278    int length = get_bits(b, 2);
279
280    return get_bits_long(b, (length+1)*8);
281}
282
283static int latm_decode_audio_specific_config(struct LATMContext *latmctx,
284                                             GetBitContext *gb, int asclen)
285{
286    AACContext *ac        = &latmctx->aac_ctx;
287    AVCodecContext *avctx = ac->avctx;
288    MPEG4AudioConfig m4ac = { 0 };
289    GetBitContext gbc;
290    int config_start_bit  = get_bits_count(gb);
291    int sync_extension    = 0;
292    int bits_consumed, esize, i;
293
294    if (asclen > 0) {
295        sync_extension = 1;
296        asclen         = FFMIN(asclen, get_bits_left(gb));
297        init_get_bits(&gbc, gb->buffer, config_start_bit + asclen);
298        skip_bits_long(&gbc, config_start_bit);
299    } else if (asclen == 0) {
300        gbc = *gb;
301    } else {
302        return AVERROR_INVALIDDATA;
303    }
304
305    if (get_bits_left(gb) <= 0)
306        return AVERROR_INVALIDDATA;
307
308    bits_consumed = decode_audio_specific_config_gb(NULL, avctx, &m4ac,
309                                                    &gbc, config_start_bit,
310                                                    sync_extension);
311
312    if (bits_consumed < config_start_bit)
313        return AVERROR_INVALIDDATA;
314    bits_consumed -= config_start_bit;
315
316    if (asclen == 0)
317      asclen = bits_consumed;
318
319    if (!latmctx->initialized ||
320        ac->oc[1].m4ac.sample_rate != m4ac.sample_rate ||
321        ac->oc[1].m4ac.chan_config != m4ac.chan_config) {
322
323        if (latmctx->initialized) {
324            av_log(avctx, AV_LOG_INFO, "audio config changed (sample_rate=%d, chan_config=%d)\n", m4ac.sample_rate, m4ac.chan_config);
325        } else {
326            av_log(avctx, AV_LOG_DEBUG, "initializing latmctx\n");
327        }
328        latmctx->initialized = 0;
329
330        esize = (asclen + 7) / 8;
331
332        if (avctx->extradata_size < esize) {
333            av_free(avctx->extradata);
334            avctx->extradata = av_malloc(esize + AV_INPUT_BUFFER_PADDING_SIZE);
335            if (!avctx->extradata)
336                return AVERROR(ENOMEM);
337        }
338
339        avctx->extradata_size = esize;
340        gbc = *gb;
341        for (i = 0; i < esize; i++) {
342          avctx->extradata[i] = get_bits(&gbc, 8);
343        }
344        memset(avctx->extradata+esize, 0, AV_INPUT_BUFFER_PADDING_SIZE);
345    }
346    skip_bits_long(gb, asclen);
347
348    return 0;
349}
350
351static int read_stream_mux_config(struct LATMContext *latmctx,
352                                  GetBitContext *gb)
353{
354    int ret, audio_mux_version = get_bits(gb, 1);
355
356    latmctx->audio_mux_version_A = 0;
357    if (audio_mux_version)
358        latmctx->audio_mux_version_A = get_bits(gb, 1);
359
360    if (!latmctx->audio_mux_version_A) {
361
362        if (audio_mux_version)
363            latm_get_value(gb);                 // taraFullness
364
365        skip_bits(gb, 1);                       // allStreamSameTimeFraming
366        skip_bits(gb, 6);                       // numSubFrames
367        // numPrograms
368        if (get_bits(gb, 4)) {                  // numPrograms
369            avpriv_request_sample(latmctx->aac_ctx.avctx, "Multiple programs");
370            return AVERROR_PATCHWELCOME;
371        }
372
373        // for each program (which there is only one in DVB)
374
375        // for each layer (which there is only one in DVB)
376        if (get_bits(gb, 3)) {                   // numLayer
377            avpriv_request_sample(latmctx->aac_ctx.avctx, "Multiple layers");
378            return AVERROR_PATCHWELCOME;
379        }
380
381        // for all but first stream: use_same_config = get_bits(gb, 1);
382        if (!audio_mux_version) {
383            if ((ret = latm_decode_audio_specific_config(latmctx, gb, 0)) < 0)
384                return ret;
385        } else {
386            int ascLen = latm_get_value(gb);
387            if ((ret = latm_decode_audio_specific_config(latmctx, gb, ascLen)) < 0)
388                return ret;
389        }
390
391        latmctx->frame_length_type = get_bits(gb, 3);
392        switch (latmctx->frame_length_type) {
393        case 0:
394            skip_bits(gb, 8);       // latmBufferFullness
395            break;
396        case 1:
397            latmctx->frame_length = get_bits(gb, 9);
398            break;
399        case 3:
400        case 4:
401        case 5:
402            skip_bits(gb, 6);       // CELP frame length table index
403            break;
404        case 6:
405        case 7:
406            skip_bits(gb, 1);       // HVXC frame length table index
407            break;
408        }
409
410        if (get_bits(gb, 1)) {                  // other data
411            if (audio_mux_version) {
412                latm_get_value(gb);             // other_data_bits
413            } else {
414                int esc;
415                do {
416                    if (get_bits_left(gb) < 9)
417                        return AVERROR_INVALIDDATA;
418                    esc = get_bits(gb, 1);
419                    skip_bits(gb, 8);
420                } while (esc);
421            }
422        }
423
424        if (get_bits(gb, 1))                     // crc present
425            skip_bits(gb, 8);                    // config_crc
426    }
427
428    return 0;
429}
430
431static int read_payload_length_info(struct LATMContext *ctx, GetBitContext *gb)
432{
433    uint8_t tmp;
434
435    if (ctx->frame_length_type == 0) {
436        int mux_slot_length = 0;
437        do {
438            if (get_bits_left(gb) < 8)
439                return AVERROR_INVALIDDATA;
440            tmp = get_bits(gb, 8);
441            mux_slot_length += tmp;
442        } while (tmp == 255);
443        return mux_slot_length;
444    } else if (ctx->frame_length_type == 1) {
445        return ctx->frame_length;
446    } else if (ctx->frame_length_type == 3 ||
447               ctx->frame_length_type == 5 ||
448               ctx->frame_length_type == 7) {
449        skip_bits(gb, 2);          // mux_slot_length_coded
450    }
451    return 0;
452}
453
454static int read_audio_mux_element(struct LATMContext *latmctx,
455                                  GetBitContext *gb)
456{
457    int err;
458    uint8_t use_same_mux = get_bits(gb, 1);
459    if (!use_same_mux) {
460        if ((err = read_stream_mux_config(latmctx, gb)) < 0)
461            return err;
462    } else if (!latmctx->aac_ctx.avctx->extradata) {
463        av_log(latmctx->aac_ctx.avctx, AV_LOG_DEBUG,
464               "no decoder config found\n");
465        return 1;
466    }
467    if (latmctx->audio_mux_version_A == 0) {
468        int mux_slot_length_bytes = read_payload_length_info(latmctx, gb);
469        if (mux_slot_length_bytes < 0 || mux_slot_length_bytes * 8LL > get_bits_left(gb)) {
470            av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR, "incomplete frame\n");
471            return AVERROR_INVALIDDATA;
472        } else if (mux_slot_length_bytes * 8 + 256 < get_bits_left(gb)) {
473            av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR,
474                   "frame length mismatch %d << %d\n",
475                   mux_slot_length_bytes * 8, get_bits_left(gb));
476            return AVERROR_INVALIDDATA;
477        }
478    }
479    return 0;
480}
481
482
483static int latm_decode_frame(AVCodecContext *avctx, AVFrame *out,
484                             int *got_frame_ptr, AVPacket *avpkt)
485{
486    struct LATMContext *latmctx = avctx->priv_data;
487    int                 muxlength, err;
488    GetBitContext       gb;
489
490    if ((err = init_get_bits8(&gb, avpkt->data, avpkt->size)) < 0)
491        return err;
492
493    // check for LOAS sync word
494    if (get_bits(&gb, 11) != LOAS_SYNC_WORD)
495        return AVERROR_INVALIDDATA;
496
497    muxlength = get_bits(&gb, 13) + 3;
498    // not enough data, the parser should have sorted this out
499    if (muxlength > avpkt->size)
500        return AVERROR_INVALIDDATA;
501
502    if ((err = read_audio_mux_element(latmctx, &gb)))
503        return (err < 0) ? err : avpkt->size;
504
505    if (!latmctx->initialized) {
506        if (!avctx->extradata) {
507            *got_frame_ptr = 0;
508            return avpkt->size;
509        } else {
510            push_output_configuration(&latmctx->aac_ctx);
511            if ((err = decode_audio_specific_config(
512                    &latmctx->aac_ctx, avctx, &latmctx->aac_ctx.oc[1].m4ac,
513                    avctx->extradata, avctx->extradata_size*8LL, 1)) < 0) {
514                pop_output_configuration(&latmctx->aac_ctx);
515                return err;
516            }
517            latmctx->initialized = 1;
518        }
519    }
520
521    if (show_bits(&gb, 12) == 0xfff) {
522        av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR,
523               "ADTS header detected, probably as result of configuration "
524               "misparsing\n");
525        return AVERROR_INVALIDDATA;
526    }
527
528    switch (latmctx->aac_ctx.oc[1].m4ac.object_type) {
529    case AOT_ER_AAC_LC:
530    case AOT_ER_AAC_LTP:
531    case AOT_ER_AAC_LD:
532    case AOT_ER_AAC_ELD:
533        err = aac_decode_er_frame(avctx, out, got_frame_ptr, &gb);
534        break;
535    default:
536        err = aac_decode_frame_int(avctx, out, got_frame_ptr, &gb, avpkt);
537    }
538    if (err < 0)
539        return err;
540
541    return muxlength;
542}
543
544static av_cold int latm_decode_init(AVCodecContext *avctx)
545{
546    struct LATMContext *latmctx = avctx->priv_data;
547    int ret = aac_decode_init(avctx);
548
549    if (avctx->extradata_size > 0)
550        latmctx->initialized = !ret;
551
552    return ret;
553}
554
555const FFCodec ff_aac_decoder = {
556    .p.name          = "aac",
557    .p.long_name     = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
558    .p.type          = AVMEDIA_TYPE_AUDIO,
559    .p.id            = AV_CODEC_ID_AAC,
560    .priv_data_size  = sizeof(AACContext),
561    .init            = aac_decode_init,
562    .close           = aac_decode_close,
563    FF_CODEC_DECODE_CB(aac_decode_frame),
564    .p.sample_fmts   = (const enum AVSampleFormat[]) {
565        AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE
566    },
567    .p.capabilities  = AV_CODEC_CAP_CHANNEL_CONF | AV_CODEC_CAP_DR1,
568    .caps_internal   = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
569#if FF_API_OLD_CHANNEL_LAYOUT
570    .p.channel_layouts = aac_channel_layout,
571#endif
572    .p.ch_layouts    = aac_ch_layout,
573    .flush = flush,
574    .p.priv_class    = &aac_decoder_class,
575    .p.profiles      = NULL_IF_CONFIG_SMALL(ff_aac_profiles),
576};
577
578/*
579    Note: This decoder filter is intended to decode LATM streams transferred
580    in MPEG transport streams which only contain one program.
581    To do a more complex LATM demuxing a separate LATM demuxer should be used.
582*/
583const FFCodec ff_aac_latm_decoder = {
584    .p.name          = "aac_latm",
585    .p.long_name     = NULL_IF_CONFIG_SMALL("AAC LATM (Advanced Audio Coding LATM syntax)"),
586    .p.type          = AVMEDIA_TYPE_AUDIO,
587    .p.id            = AV_CODEC_ID_AAC_LATM,
588    .priv_data_size  = sizeof(struct LATMContext),
589    .init            = latm_decode_init,
590    .close           = aac_decode_close,
591    FF_CODEC_DECODE_CB(latm_decode_frame),
592    .p.sample_fmts   = (const enum AVSampleFormat[]) {
593        AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE
594    },
595    .p.capabilities  = AV_CODEC_CAP_CHANNEL_CONF | AV_CODEC_CAP_DR1,
596    .caps_internal   = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
597#if FF_API_OLD_CHANNEL_LAYOUT
598    .p.channel_layouts = aac_channel_layout,
599#endif
600    .p.ch_layouts    = aac_ch_layout,
601    .flush = flush,
602    .p.profiles      = NULL_IF_CONFIG_SMALL(ff_aac_profiles),
603};
604