1/*
2 * AAC decoder wrapper
3 * Copyright (c) 2012 Martin Storsjo
4 *
5 * This file is part of FFmpeg.
6 *
7 * Permission to use, copy, modify, and/or distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19
20#include <fdk-aac/aacdecoder_lib.h>
21
22#include "libavutil/channel_layout.h"
23#include "libavutil/common.h"
24#include "libavutil/opt.h"
25#include "avcodec.h"
26#include "codec_internal.h"
27#include "internal.h"
28
29#ifdef AACDECODER_LIB_VL0
30#define FDKDEC_VER_AT_LEAST(vl0, vl1) \
31    ((AACDECODER_LIB_VL0 > vl0) || \
32     (AACDECODER_LIB_VL0 == vl0 && AACDECODER_LIB_VL1 >= vl1))
33#else
34#define FDKDEC_VER_AT_LEAST(vl0, vl1) 0
35#endif
36
37#if !FDKDEC_VER_AT_LEAST(2, 5) // < 2.5.10
38#define AAC_PCM_MAX_OUTPUT_CHANNELS AAC_PCM_OUTPUT_CHANNELS
39#endif
40
41enum ConcealMethod {
42    CONCEAL_METHOD_SPECTRAL_MUTING      =  0,
43    CONCEAL_METHOD_NOISE_SUBSTITUTION   =  1,
44    CONCEAL_METHOD_ENERGY_INTERPOLATION =  2,
45    CONCEAL_METHOD_NB,
46};
47
48typedef struct FDKAACDecContext {
49    const AVClass *class;
50    HANDLE_AACDECODER handle;
51    uint8_t *decoder_buffer;
52    int decoder_buffer_size;
53    uint8_t *anc_buffer;
54    int conceal_method;
55    int drc_level;
56    int drc_boost;
57    int drc_heavy;
58    int drc_effect;
59    int drc_cut;
60    int album_mode;
61    int level_limit;
62#if FDKDEC_VER_AT_LEAST(2, 5) // 2.5.10
63    int output_delay_set;
64    int flush_samples;
65    int delay_samples;
66#endif
67    AVChannelLayout downmix_layout;
68} FDKAACDecContext;
69
70
71#define DMX_ANC_BUFFSIZE       128
72#define DECODER_MAX_CHANNELS     8
73#define DECODER_BUFFSIZE      2048 * sizeof(INT_PCM)
74
75#define OFFSET(x) offsetof(FDKAACDecContext, x)
76#define AD AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_DECODING_PARAM
77static const AVOption fdk_aac_dec_options[] = {
78    { "conceal", "Error concealment method", OFFSET(conceal_method), AV_OPT_TYPE_INT, { .i64 = CONCEAL_METHOD_NOISE_SUBSTITUTION }, CONCEAL_METHOD_SPECTRAL_MUTING, CONCEAL_METHOD_NB - 1, AD, "conceal" },
79    { "spectral", "Spectral muting",      0, AV_OPT_TYPE_CONST, { .i64 = CONCEAL_METHOD_SPECTRAL_MUTING },      INT_MIN, INT_MAX, AD, "conceal" },
80    { "noise",    "Noise Substitution",   0, AV_OPT_TYPE_CONST, { .i64 = CONCEAL_METHOD_NOISE_SUBSTITUTION },   INT_MIN, INT_MAX, AD, "conceal" },
81    { "energy",   "Energy Interpolation", 0, AV_OPT_TYPE_CONST, { .i64 = CONCEAL_METHOD_ENERGY_INTERPOLATION }, INT_MIN, INT_MAX, AD, "conceal" },
82    { "drc_boost", "Dynamic Range Control: boost, where [0] is none and [127] is max boost",
83                     OFFSET(drc_boost),      AV_OPT_TYPE_INT,   { .i64 = -1 }, -1, 127, AD, NULL    },
84    { "drc_cut",   "Dynamic Range Control: attenuation factor, where [0] is none and [127] is max compression",
85                     OFFSET(drc_cut),        AV_OPT_TYPE_INT,   { .i64 = -1 }, -1, 127, AD, NULL    },
86    { "drc_level", "Dynamic Range Control: reference level, quantized to 0.25dB steps where [0] is 0dB and [127] is -31.75dB, -1 for auto, and -2 for disabled",
87                     OFFSET(drc_level),      AV_OPT_TYPE_INT,   { .i64 = -1},  -2, 127, AD, NULL    },
88    { "drc_heavy", "Dynamic Range Control: heavy compression, where [1] is on (RF mode) and [0] is off",
89                     OFFSET(drc_heavy),      AV_OPT_TYPE_INT,   { .i64 = -1},  -1, 1,   AD, NULL    },
90#if FDKDEC_VER_AT_LEAST(2, 5) // 2.5.10
91    { "level_limit", "Signal level limiting",
92                     OFFSET(level_limit),    AV_OPT_TYPE_BOOL,  { .i64 = -1 }, -1, 1, AD },
93#endif
94#if FDKDEC_VER_AT_LEAST(3, 0) // 3.0.0
95    { "drc_effect","Dynamic Range Control: effect type, where e.g. [0] is none and [6] is general",
96                     OFFSET(drc_effect),     AV_OPT_TYPE_INT,   { .i64 = -1},  -1, 8,   AD, NULL    },
97#endif
98#if FDKDEC_VER_AT_LEAST(3, 1) // 3.1.0
99    { "album_mode","Dynamic Range Control: album mode, where [0] is off and [1] is on",
100                     OFFSET(album_mode),     AV_OPT_TYPE_INT,   { .i64 = -1},  -1, 1,   AD, NULL    },
101#endif
102    { "downmix", "Request a specific channel layout from the decoder", OFFSET(downmix_layout), AV_OPT_TYPE_CHLAYOUT, {.str = NULL}, .flags = AD },
103    { NULL }
104};
105
106static const AVClass fdk_aac_dec_class = {
107    .class_name = "libfdk-aac decoder",
108    .item_name  = av_default_item_name,
109    .option     = fdk_aac_dec_options,
110    .version    = LIBAVUTIL_VERSION_INT,
111};
112
113static int get_stream_info(AVCodecContext *avctx)
114{
115    FDKAACDecContext *s   = avctx->priv_data;
116    CStreamInfo *info     = aacDecoder_GetStreamInfo(s->handle);
117    int channel_counts[0x24] = { 0 };
118    int i, ch_error       = 0;
119    uint64_t ch_layout    = 0;
120
121    if (!info) {
122        av_log(avctx, AV_LOG_ERROR, "Unable to get stream info\n");
123        return AVERROR_UNKNOWN;
124    }
125
126    if (info->sampleRate <= 0) {
127        av_log(avctx, AV_LOG_ERROR, "Stream info not initialized\n");
128        return AVERROR_UNKNOWN;
129    }
130    avctx->sample_rate = info->sampleRate;
131    avctx->frame_size  = info->frameSize;
132#if FDKDEC_VER_AT_LEAST(2, 5) // 2.5.10
133    if (!s->output_delay_set && info->outputDelay) {
134        // Set this only once.
135        s->flush_samples    = info->outputDelay;
136        s->delay_samples    = info->outputDelay;
137        s->output_delay_set = 1;
138    }
139#endif
140
141    for (i = 0; i < info->numChannels; i++) {
142        AUDIO_CHANNEL_TYPE ctype = info->pChannelType[i];
143        if (ctype <= ACT_NONE || ctype >= FF_ARRAY_ELEMS(channel_counts)) {
144            av_log(avctx, AV_LOG_WARNING, "unknown channel type\n");
145            break;
146        }
147        channel_counts[ctype]++;
148    }
149    av_log(avctx, AV_LOG_DEBUG,
150           "%d channels - front:%d side:%d back:%d lfe:%d top:%d\n",
151           info->numChannels,
152           channel_counts[ACT_FRONT], channel_counts[ACT_SIDE],
153           channel_counts[ACT_BACK],  channel_counts[ACT_LFE],
154           channel_counts[ACT_FRONT_TOP] + channel_counts[ACT_SIDE_TOP] +
155           channel_counts[ACT_BACK_TOP]  + channel_counts[ACT_TOP]);
156
157    switch (channel_counts[ACT_FRONT]) {
158    case 4:
159        ch_layout |= AV_CH_LAYOUT_STEREO | AV_CH_FRONT_LEFT_OF_CENTER |
160                     AV_CH_FRONT_RIGHT_OF_CENTER;
161        break;
162    case 3:
163        ch_layout |= AV_CH_LAYOUT_STEREO | AV_CH_FRONT_CENTER;
164        break;
165    case 2:
166        ch_layout |= AV_CH_LAYOUT_STEREO;
167        break;
168    case 1:
169        ch_layout |= AV_CH_FRONT_CENTER;
170        break;
171    default:
172        av_log(avctx, AV_LOG_WARNING,
173               "unsupported number of front channels: %d\n",
174               channel_counts[ACT_FRONT]);
175        ch_error = 1;
176        break;
177    }
178    if (channel_counts[ACT_SIDE] > 0) {
179        if (channel_counts[ACT_SIDE] == 2) {
180            ch_layout |= AV_CH_SIDE_LEFT | AV_CH_SIDE_RIGHT;
181        } else {
182            av_log(avctx, AV_LOG_WARNING,
183                   "unsupported number of side channels: %d\n",
184                   channel_counts[ACT_SIDE]);
185            ch_error = 1;
186        }
187    }
188    if (channel_counts[ACT_BACK] > 0) {
189        switch (channel_counts[ACT_BACK]) {
190        case 3:
191            ch_layout |= AV_CH_BACK_LEFT | AV_CH_BACK_RIGHT | AV_CH_BACK_CENTER;
192            break;
193        case 2:
194            ch_layout |= AV_CH_BACK_LEFT | AV_CH_BACK_RIGHT;
195            break;
196        case 1:
197            ch_layout |= AV_CH_BACK_CENTER;
198            break;
199        default:
200            av_log(avctx, AV_LOG_WARNING,
201                   "unsupported number of back channels: %d\n",
202                   channel_counts[ACT_BACK]);
203            ch_error = 1;
204            break;
205        }
206    }
207    if (channel_counts[ACT_LFE] > 0) {
208        if (channel_counts[ACT_LFE] == 1) {
209            ch_layout |= AV_CH_LOW_FREQUENCY;
210        } else {
211            av_log(avctx, AV_LOG_WARNING,
212                   "unsupported number of LFE channels: %d\n",
213                   channel_counts[ACT_LFE]);
214            ch_error = 1;
215        }
216    }
217
218    av_channel_layout_uninit(&avctx->ch_layout);
219    av_channel_layout_from_mask(&avctx->ch_layout, ch_layout);
220    if (!ch_error && avctx->ch_layout.nb_channels != info->numChannels) {
221        av_log(avctx, AV_LOG_WARNING, "unsupported channel configuration\n");
222        ch_error = 1;
223    }
224    if (ch_error)
225        avctx->ch_layout.order = AV_CHANNEL_ORDER_UNSPEC;
226
227    return 0;
228}
229
230static av_cold int fdk_aac_decode_close(AVCodecContext *avctx)
231{
232    FDKAACDecContext *s = avctx->priv_data;
233
234    if (s->handle)
235        aacDecoder_Close(s->handle);
236    av_freep(&s->decoder_buffer);
237    av_freep(&s->anc_buffer);
238
239    return 0;
240}
241
242static av_cold int fdk_aac_decode_init(AVCodecContext *avctx)
243{
244    FDKAACDecContext *s = avctx->priv_data;
245    AAC_DECODER_ERROR err;
246
247    s->handle = aacDecoder_Open(avctx->extradata_size ? TT_MP4_RAW : TT_MP4_ADTS, 1);
248    if (!s->handle) {
249        av_log(avctx, AV_LOG_ERROR, "Error opening decoder\n");
250        return AVERROR_UNKNOWN;
251    }
252
253    if (avctx->extradata_size) {
254        if ((err = aacDecoder_ConfigRaw(s->handle, &avctx->extradata,
255                                        &avctx->extradata_size)) != AAC_DEC_OK) {
256            av_log(avctx, AV_LOG_ERROR, "Unable to set extradata\n");
257            return AVERROR_INVALIDDATA;
258        }
259    }
260
261    if ((err = aacDecoder_SetParam(s->handle, AAC_CONCEAL_METHOD,
262                                   s->conceal_method)) != AAC_DEC_OK) {
263        av_log(avctx, AV_LOG_ERROR, "Unable to set error concealment method\n");
264        return AVERROR_UNKNOWN;
265    }
266
267#if FF_API_OLD_CHANNEL_LAYOUT
268FF_DISABLE_DEPRECATION_WARNINGS
269    if (avctx->request_channel_layout) {
270        av_channel_layout_uninit(&s->downmix_layout);
271        av_channel_layout_from_mask(&s->downmix_layout, avctx->request_channel_layout);
272    }
273FF_ENABLE_DEPRECATION_WARNINGS
274#endif
275    if (s->downmix_layout.nb_channels > 0 &&
276        s->downmix_layout.order != AV_CHANNEL_ORDER_NATIVE) {
277        int downmix_channels = -1;
278
279        switch (s->downmix_layout.u.mask) {
280        case AV_CH_LAYOUT_STEREO:
281        case AV_CH_LAYOUT_STEREO_DOWNMIX:
282            downmix_channels = 2;
283            break;
284        case AV_CH_LAYOUT_MONO:
285            downmix_channels = 1;
286            break;
287        default:
288            av_log(avctx, AV_LOG_WARNING, "Invalid downmix option\n");
289            break;
290        }
291
292        if (downmix_channels != -1) {
293            if (aacDecoder_SetParam(s->handle, AAC_PCM_MAX_OUTPUT_CHANNELS,
294                                    downmix_channels) != AAC_DEC_OK) {
295               av_log(avctx, AV_LOG_WARNING, "Unable to set output channels in the decoder\n");
296            } else {
297               s->anc_buffer = av_malloc(DMX_ANC_BUFFSIZE);
298               if (!s->anc_buffer) {
299                   av_log(avctx, AV_LOG_ERROR, "Unable to allocate ancillary buffer for the decoder\n");
300                   return AVERROR(ENOMEM);
301               }
302               if (aacDecoder_AncDataInit(s->handle, s->anc_buffer, DMX_ANC_BUFFSIZE)) {
303                   av_log(avctx, AV_LOG_ERROR, "Unable to register downmix ancillary buffer in the decoder\n");
304                   return AVERROR_UNKNOWN;
305               }
306            }
307        }
308    }
309
310    if (s->drc_boost != -1) {
311        if (aacDecoder_SetParam(s->handle, AAC_DRC_BOOST_FACTOR, s->drc_boost) != AAC_DEC_OK) {
312            av_log(avctx, AV_LOG_ERROR, "Unable to set DRC boost factor in the decoder\n");
313            return AVERROR_UNKNOWN;
314        }
315    }
316
317    if (s->drc_cut != -1) {
318        if (aacDecoder_SetParam(s->handle, AAC_DRC_ATTENUATION_FACTOR, s->drc_cut) != AAC_DEC_OK) {
319            av_log(avctx, AV_LOG_ERROR, "Unable to set DRC attenuation factor in the decoder\n");
320            return AVERROR_UNKNOWN;
321        }
322    }
323
324    if (s->drc_level != -1) {
325        // This option defaults to -1, i.e. not calling
326        // aacDecoder_SetParam(AAC_DRC_REFERENCE_LEVEL) at all, which defaults
327        // to the level from DRC metadata, if available. The user can set
328        // -drc_level -2, which calls aacDecoder_SetParam(
329        // AAC_DRC_REFERENCE_LEVEL) with a negative value, which then
330        // explicitly disables the feature.
331        if (aacDecoder_SetParam(s->handle, AAC_DRC_REFERENCE_LEVEL, s->drc_level) != AAC_DEC_OK) {
332            av_log(avctx, AV_LOG_ERROR, "Unable to set DRC reference level in the decoder\n");
333            return AVERROR_UNKNOWN;
334        }
335    }
336
337    if (s->drc_heavy != -1) {
338        if (aacDecoder_SetParam(s->handle, AAC_DRC_HEAVY_COMPRESSION, s->drc_heavy) != AAC_DEC_OK) {
339            av_log(avctx, AV_LOG_ERROR, "Unable to set DRC heavy compression in the decoder\n");
340            return AVERROR_UNKNOWN;
341        }
342    }
343
344#if FDKDEC_VER_AT_LEAST(2, 5) // 2.5.10
345    // Setting this parameter to -1 enables the auto behaviour in the library.
346    if (aacDecoder_SetParam(s->handle, AAC_PCM_LIMITER_ENABLE, s->level_limit) != AAC_DEC_OK) {
347        av_log(avctx, AV_LOG_ERROR, "Unable to set in signal level limiting in the decoder\n");
348        return AVERROR_UNKNOWN;
349    }
350#endif
351
352#if FDKDEC_VER_AT_LEAST(3, 0) // 3.0.0
353    if (s->drc_effect != -1) {
354        if (aacDecoder_SetParam(s->handle, AAC_UNIDRC_SET_EFFECT, s->drc_effect) != AAC_DEC_OK) {
355            av_log(avctx, AV_LOG_ERROR, "Unable to set DRC effect type in the decoder\n");
356            return AVERROR_UNKNOWN;
357        }
358    }
359#endif
360
361#if FDKDEC_VER_AT_LEAST(3, 1) // 3.1.0
362    if (s->album_mode != -1) {
363        if (aacDecoder_SetParam(s->handle, AAC_UNIDRC_ALBUM_MODE, s->album_mode) != AAC_DEC_OK) {
364            av_log(avctx, AV_LOG_ERROR, "Unable to set album mode in the decoder\n");
365            return AVERROR_UNKNOWN;
366        }
367    }
368#endif
369
370    avctx->sample_fmt = AV_SAMPLE_FMT_S16;
371
372    s->decoder_buffer_size = DECODER_BUFFSIZE * DECODER_MAX_CHANNELS;
373    s->decoder_buffer = av_malloc(s->decoder_buffer_size);
374    if (!s->decoder_buffer)
375        return AVERROR(ENOMEM);
376
377    return 0;
378}
379
380static int fdk_aac_decode_frame(AVCodecContext *avctx, AVFrame *frame,
381                                int *got_frame_ptr, AVPacket *avpkt)
382{
383    FDKAACDecContext *s = avctx->priv_data;
384    int ret;
385    AAC_DECODER_ERROR err;
386    UINT valid = avpkt->size;
387    UINT flags = 0;
388    int input_offset = 0;
389
390    if (avpkt->size) {
391        err = aacDecoder_Fill(s->handle, &avpkt->data, &avpkt->size, &valid);
392        if (err != AAC_DEC_OK) {
393            av_log(avctx, AV_LOG_ERROR, "aacDecoder_Fill() failed: %x\n", err);
394            return AVERROR_INVALIDDATA;
395        }
396    } else {
397#if FDKDEC_VER_AT_LEAST(2, 5) // 2.5.10
398        /* Handle decoder draining */
399        if (s->flush_samples > 0) {
400            flags |= AACDEC_FLUSH;
401        } else {
402            return AVERROR_EOF;
403        }
404#else
405        return AVERROR_EOF;
406#endif
407    }
408
409    err = aacDecoder_DecodeFrame(s->handle, (INT_PCM *) s->decoder_buffer,
410                                 s->decoder_buffer_size / sizeof(INT_PCM),
411                                 flags);
412    if (err == AAC_DEC_NOT_ENOUGH_BITS) {
413        ret = avpkt->size - valid;
414        goto end;
415    }
416    if (err != AAC_DEC_OK) {
417        av_log(avctx, AV_LOG_ERROR,
418               "aacDecoder_DecodeFrame() failed: %x\n", err);
419        ret = AVERROR_UNKNOWN;
420        goto end;
421    }
422
423    if ((ret = get_stream_info(avctx)) < 0)
424        goto end;
425    frame->nb_samples = avctx->frame_size;
426
427#if FDKDEC_VER_AT_LEAST(2, 5) // 2.5.10
428    if (flags & AACDEC_FLUSH) {
429        // Only return the right amount of samples at the end; if calling the
430        // decoder with AACDEC_FLUSH, it will keep returning frames indefinitely.
431        frame->nb_samples = FFMIN(s->flush_samples, frame->nb_samples);
432        av_log(s, AV_LOG_DEBUG, "Returning %d/%d delayed samples.\n",
433                                frame->nb_samples, s->flush_samples);
434        s->flush_samples -= frame->nb_samples;
435    } else {
436        // Trim off samples from the start to compensate for extra decoder
437        // delay. We could also just adjust the pts, but this avoids
438        // including the extra samples in the output altogether.
439        if (s->delay_samples) {
440            int drop_samples = FFMIN(s->delay_samples, frame->nb_samples);
441            av_log(s, AV_LOG_DEBUG, "Dropping %d/%d delayed samples.\n",
442                                    drop_samples, s->delay_samples);
443            s->delay_samples  -= drop_samples;
444            frame->nb_samples -= drop_samples;
445            input_offset = drop_samples * avctx->ch_layout.nb_channels;
446            if (frame->nb_samples <= 0)
447                return 0;
448        }
449    }
450#endif
451
452    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
453        goto end;
454
455    memcpy(frame->extended_data[0], s->decoder_buffer + input_offset,
456           avctx->ch_layout.nb_channels * frame->nb_samples *
457           av_get_bytes_per_sample(avctx->sample_fmt));
458
459    *got_frame_ptr = 1;
460    ret = avpkt->size - valid;
461
462end:
463    return ret;
464}
465
466static av_cold void fdk_aac_decode_flush(AVCodecContext *avctx)
467{
468    FDKAACDecContext *s = avctx->priv_data;
469    AAC_DECODER_ERROR err;
470
471    if (!s->handle)
472        return;
473
474    if ((err = aacDecoder_SetParam(s->handle,
475                                   AAC_TPDEC_CLEAR_BUFFER, 1)) != AAC_DEC_OK)
476        av_log(avctx, AV_LOG_WARNING, "failed to clear buffer when flushing\n");
477}
478
479const FFCodec ff_libfdk_aac_decoder = {
480    .p.name         = "libfdk_aac",
481    .p.long_name    = NULL_IF_CONFIG_SMALL("Fraunhofer FDK AAC"),
482    .p.type         = AVMEDIA_TYPE_AUDIO,
483    .p.id           = AV_CODEC_ID_AAC,
484    .priv_data_size = sizeof(FDKAACDecContext),
485    .init           = fdk_aac_decode_init,
486    FF_CODEC_DECODE_CB(fdk_aac_decode_frame),
487    .close          = fdk_aac_decode_close,
488    .flush          = fdk_aac_decode_flush,
489    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_CHANNEL_CONF
490#if FDKDEC_VER_AT_LEAST(2, 5) // 2.5.10
491                      | AV_CODEC_CAP_DELAY
492#endif
493    ,
494    .p.priv_class   = &fdk_aac_dec_class,
495    .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE |
496                      FF_CODEC_CAP_INIT_CLEANUP,
497    .p.wrapper_name = "libfdk",
498};
499