1 /*
2 * AAC decoder wrapper
3 * Copyright (c) 2012 Martin Storsjo
4 *
5 * This file is part of FFmpeg.
6 *
7 * Permission to use, copy, modify, and/or distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19
20 #include <fdk-aac/aacdecoder_lib.h>
21
22 #include "libavutil/channel_layout.h"
23 #include "libavutil/common.h"
24 #include "libavutil/opt.h"
25 #include "avcodec.h"
26 #include "codec_internal.h"
27 #include "internal.h"
28
29 #ifdef AACDECODER_LIB_VL0
30 #define FDKDEC_VER_AT_LEAST(vl0, vl1) \
31 ((AACDECODER_LIB_VL0 > vl0) || \
32 (AACDECODER_LIB_VL0 == vl0 && AACDECODER_LIB_VL1 >= vl1))
33 #else
34 #define FDKDEC_VER_AT_LEAST(vl0, vl1) 0
35 #endif
36
37 #if !FDKDEC_VER_AT_LEAST(2, 5) // < 2.5.10
38 #define AAC_PCM_MAX_OUTPUT_CHANNELS AAC_PCM_OUTPUT_CHANNELS
39 #endif
40
41 enum ConcealMethod {
42 CONCEAL_METHOD_SPECTRAL_MUTING = 0,
43 CONCEAL_METHOD_NOISE_SUBSTITUTION = 1,
44 CONCEAL_METHOD_ENERGY_INTERPOLATION = 2,
45 CONCEAL_METHOD_NB,
46 };
47
48 typedef struct FDKAACDecContext {
49 const AVClass *class;
50 HANDLE_AACDECODER handle;
51 uint8_t *decoder_buffer;
52 int decoder_buffer_size;
53 uint8_t *anc_buffer;
54 int conceal_method;
55 int drc_level;
56 int drc_boost;
57 int drc_heavy;
58 int drc_effect;
59 int drc_cut;
60 int album_mode;
61 int level_limit;
62 #if FDKDEC_VER_AT_LEAST(2, 5) // 2.5.10
63 int output_delay_set;
64 int flush_samples;
65 int delay_samples;
66 #endif
67 AVChannelLayout downmix_layout;
68 } FDKAACDecContext;
69
70
71 #define DMX_ANC_BUFFSIZE 128
72 #define DECODER_MAX_CHANNELS 8
73 #define DECODER_BUFFSIZE 2048 * sizeof(INT_PCM)
74
75 #define OFFSET(x) offsetof(FDKAACDecContext, x)
76 #define AD AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_DECODING_PARAM
77 static const AVOption fdk_aac_dec_options[] = {
78 { "conceal", "Error concealment method", OFFSET(conceal_method), AV_OPT_TYPE_INT, { .i64 = CONCEAL_METHOD_NOISE_SUBSTITUTION }, CONCEAL_METHOD_SPECTRAL_MUTING, CONCEAL_METHOD_NB - 1, AD, "conceal" },
79 { "spectral", "Spectral muting", 0, AV_OPT_TYPE_CONST, { .i64 = CONCEAL_METHOD_SPECTRAL_MUTING }, INT_MIN, INT_MAX, AD, "conceal" },
80 { "noise", "Noise Substitution", 0, AV_OPT_TYPE_CONST, { .i64 = CONCEAL_METHOD_NOISE_SUBSTITUTION }, INT_MIN, INT_MAX, AD, "conceal" },
81 { "energy", "Energy Interpolation", 0, AV_OPT_TYPE_CONST, { .i64 = CONCEAL_METHOD_ENERGY_INTERPOLATION }, INT_MIN, INT_MAX, AD, "conceal" },
82 { "drc_boost", "Dynamic Range Control: boost, where [0] is none and [127] is max boost",
83 OFFSET(drc_boost), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 127, AD, NULL },
84 { "drc_cut", "Dynamic Range Control: attenuation factor, where [0] is none and [127] is max compression",
85 OFFSET(drc_cut), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 127, AD, NULL },
86 { "drc_level", "Dynamic Range Control: reference level, quantized to 0.25dB steps where [0] is 0dB and [127] is -31.75dB, -1 for auto, and -2 for disabled",
87 OFFSET(drc_level), AV_OPT_TYPE_INT, { .i64 = -1}, -2, 127, AD, NULL },
88 { "drc_heavy", "Dynamic Range Control: heavy compression, where [1] is on (RF mode) and [0] is off",
89 OFFSET(drc_heavy), AV_OPT_TYPE_INT, { .i64 = -1}, -1, 1, AD, NULL },
90 #if FDKDEC_VER_AT_LEAST(2, 5) // 2.5.10
91 { "level_limit", "Signal level limiting",
92 OFFSET(level_limit), AV_OPT_TYPE_BOOL, { .i64 = -1 }, -1, 1, AD },
93 #endif
94 #if FDKDEC_VER_AT_LEAST(3, 0) // 3.0.0
95 { "drc_effect","Dynamic Range Control: effect type, where e.g. [0] is none and [6] is general",
96 OFFSET(drc_effect), AV_OPT_TYPE_INT, { .i64 = -1}, -1, 8, AD, NULL },
97 #endif
98 #if FDKDEC_VER_AT_LEAST(3, 1) // 3.1.0
99 { "album_mode","Dynamic Range Control: album mode, where [0] is off and [1] is on",
100 OFFSET(album_mode), AV_OPT_TYPE_INT, { .i64 = -1}, -1, 1, AD, NULL },
101 #endif
102 { "downmix", "Request a specific channel layout from the decoder", OFFSET(downmix_layout), AV_OPT_TYPE_CHLAYOUT, {.str = NULL}, .flags = AD },
103 { NULL }
104 };
105
106 static const AVClass fdk_aac_dec_class = {
107 .class_name = "libfdk-aac decoder",
108 .item_name = av_default_item_name,
109 .option = fdk_aac_dec_options,
110 .version = LIBAVUTIL_VERSION_INT,
111 };
112
get_stream_info(AVCodecContext *avctx)113 static int get_stream_info(AVCodecContext *avctx)
114 {
115 FDKAACDecContext *s = avctx->priv_data;
116 CStreamInfo *info = aacDecoder_GetStreamInfo(s->handle);
117 int channel_counts[0x24] = { 0 };
118 int i, ch_error = 0;
119 uint64_t ch_layout = 0;
120
121 if (!info) {
122 av_log(avctx, AV_LOG_ERROR, "Unable to get stream info\n");
123 return AVERROR_UNKNOWN;
124 }
125
126 if (info->sampleRate <= 0) {
127 av_log(avctx, AV_LOG_ERROR, "Stream info not initialized\n");
128 return AVERROR_UNKNOWN;
129 }
130 avctx->sample_rate = info->sampleRate;
131 avctx->frame_size = info->frameSize;
132 #if FDKDEC_VER_AT_LEAST(2, 5) // 2.5.10
133 if (!s->output_delay_set && info->outputDelay) {
134 // Set this only once.
135 s->flush_samples = info->outputDelay;
136 s->delay_samples = info->outputDelay;
137 s->output_delay_set = 1;
138 }
139 #endif
140
141 for (i = 0; i < info->numChannels; i++) {
142 AUDIO_CHANNEL_TYPE ctype = info->pChannelType[i];
143 if (ctype <= ACT_NONE || ctype >= FF_ARRAY_ELEMS(channel_counts)) {
144 av_log(avctx, AV_LOG_WARNING, "unknown channel type\n");
145 break;
146 }
147 channel_counts[ctype]++;
148 }
149 av_log(avctx, AV_LOG_DEBUG,
150 "%d channels - front:%d side:%d back:%d lfe:%d top:%d\n",
151 info->numChannels,
152 channel_counts[ACT_FRONT], channel_counts[ACT_SIDE],
153 channel_counts[ACT_BACK], channel_counts[ACT_LFE],
154 channel_counts[ACT_FRONT_TOP] + channel_counts[ACT_SIDE_TOP] +
155 channel_counts[ACT_BACK_TOP] + channel_counts[ACT_TOP]);
156
157 switch (channel_counts[ACT_FRONT]) {
158 case 4:
159 ch_layout |= AV_CH_LAYOUT_STEREO | AV_CH_FRONT_LEFT_OF_CENTER |
160 AV_CH_FRONT_RIGHT_OF_CENTER;
161 break;
162 case 3:
163 ch_layout |= AV_CH_LAYOUT_STEREO | AV_CH_FRONT_CENTER;
164 break;
165 case 2:
166 ch_layout |= AV_CH_LAYOUT_STEREO;
167 break;
168 case 1:
169 ch_layout |= AV_CH_FRONT_CENTER;
170 break;
171 default:
172 av_log(avctx, AV_LOG_WARNING,
173 "unsupported number of front channels: %d\n",
174 channel_counts[ACT_FRONT]);
175 ch_error = 1;
176 break;
177 }
178 if (channel_counts[ACT_SIDE] > 0) {
179 if (channel_counts[ACT_SIDE] == 2) {
180 ch_layout |= AV_CH_SIDE_LEFT | AV_CH_SIDE_RIGHT;
181 } else {
182 av_log(avctx, AV_LOG_WARNING,
183 "unsupported number of side channels: %d\n",
184 channel_counts[ACT_SIDE]);
185 ch_error = 1;
186 }
187 }
188 if (channel_counts[ACT_BACK] > 0) {
189 switch (channel_counts[ACT_BACK]) {
190 case 3:
191 ch_layout |= AV_CH_BACK_LEFT | AV_CH_BACK_RIGHT | AV_CH_BACK_CENTER;
192 break;
193 case 2:
194 ch_layout |= AV_CH_BACK_LEFT | AV_CH_BACK_RIGHT;
195 break;
196 case 1:
197 ch_layout |= AV_CH_BACK_CENTER;
198 break;
199 default:
200 av_log(avctx, AV_LOG_WARNING,
201 "unsupported number of back channels: %d\n",
202 channel_counts[ACT_BACK]);
203 ch_error = 1;
204 break;
205 }
206 }
207 if (channel_counts[ACT_LFE] > 0) {
208 if (channel_counts[ACT_LFE] == 1) {
209 ch_layout |= AV_CH_LOW_FREQUENCY;
210 } else {
211 av_log(avctx, AV_LOG_WARNING,
212 "unsupported number of LFE channels: %d\n",
213 channel_counts[ACT_LFE]);
214 ch_error = 1;
215 }
216 }
217
218 av_channel_layout_uninit(&avctx->ch_layout);
219 av_channel_layout_from_mask(&avctx->ch_layout, ch_layout);
220 if (!ch_error && avctx->ch_layout.nb_channels != info->numChannels) {
221 av_log(avctx, AV_LOG_WARNING, "unsupported channel configuration\n");
222 ch_error = 1;
223 }
224 if (ch_error)
225 avctx->ch_layout.order = AV_CHANNEL_ORDER_UNSPEC;
226
227 return 0;
228 }
229
fdk_aac_decode_close(AVCodecContext *avctx)230 static av_cold int fdk_aac_decode_close(AVCodecContext *avctx)
231 {
232 FDKAACDecContext *s = avctx->priv_data;
233
234 if (s->handle)
235 aacDecoder_Close(s->handle);
236 av_freep(&s->decoder_buffer);
237 av_freep(&s->anc_buffer);
238
239 return 0;
240 }
241
fdk_aac_decode_init(AVCodecContext *avctx)242 static av_cold int fdk_aac_decode_init(AVCodecContext *avctx)
243 {
244 FDKAACDecContext *s = avctx->priv_data;
245 AAC_DECODER_ERROR err;
246
247 s->handle = aacDecoder_Open(avctx->extradata_size ? TT_MP4_RAW : TT_MP4_ADTS, 1);
248 if (!s->handle) {
249 av_log(avctx, AV_LOG_ERROR, "Error opening decoder\n");
250 return AVERROR_UNKNOWN;
251 }
252
253 if (avctx->extradata_size) {
254 if ((err = aacDecoder_ConfigRaw(s->handle, &avctx->extradata,
255 &avctx->extradata_size)) != AAC_DEC_OK) {
256 av_log(avctx, AV_LOG_ERROR, "Unable to set extradata\n");
257 return AVERROR_INVALIDDATA;
258 }
259 }
260
261 if ((err = aacDecoder_SetParam(s->handle, AAC_CONCEAL_METHOD,
262 s->conceal_method)) != AAC_DEC_OK) {
263 av_log(avctx, AV_LOG_ERROR, "Unable to set error concealment method\n");
264 return AVERROR_UNKNOWN;
265 }
266
267 #if FF_API_OLD_CHANNEL_LAYOUT
268 FF_DISABLE_DEPRECATION_WARNINGS
269 if (avctx->request_channel_layout) {
270 av_channel_layout_uninit(&s->downmix_layout);
271 av_channel_layout_from_mask(&s->downmix_layout, avctx->request_channel_layout);
272 }
273 FF_ENABLE_DEPRECATION_WARNINGS
274 #endif
275 if (s->downmix_layout.nb_channels > 0 &&
276 s->downmix_layout.order != AV_CHANNEL_ORDER_NATIVE) {
277 int downmix_channels = -1;
278
279 switch (s->downmix_layout.u.mask) {
280 case AV_CH_LAYOUT_STEREO:
281 case AV_CH_LAYOUT_STEREO_DOWNMIX:
282 downmix_channels = 2;
283 break;
284 case AV_CH_LAYOUT_MONO:
285 downmix_channels = 1;
286 break;
287 default:
288 av_log(avctx, AV_LOG_WARNING, "Invalid downmix option\n");
289 break;
290 }
291
292 if (downmix_channels != -1) {
293 if (aacDecoder_SetParam(s->handle, AAC_PCM_MAX_OUTPUT_CHANNELS,
294 downmix_channels) != AAC_DEC_OK) {
295 av_log(avctx, AV_LOG_WARNING, "Unable to set output channels in the decoder\n");
296 } else {
297 s->anc_buffer = av_malloc(DMX_ANC_BUFFSIZE);
298 if (!s->anc_buffer) {
299 av_log(avctx, AV_LOG_ERROR, "Unable to allocate ancillary buffer for the decoder\n");
300 return AVERROR(ENOMEM);
301 }
302 if (aacDecoder_AncDataInit(s->handle, s->anc_buffer, DMX_ANC_BUFFSIZE)) {
303 av_log(avctx, AV_LOG_ERROR, "Unable to register downmix ancillary buffer in the decoder\n");
304 return AVERROR_UNKNOWN;
305 }
306 }
307 }
308 }
309
310 if (s->drc_boost != -1) {
311 if (aacDecoder_SetParam(s->handle, AAC_DRC_BOOST_FACTOR, s->drc_boost) != AAC_DEC_OK) {
312 av_log(avctx, AV_LOG_ERROR, "Unable to set DRC boost factor in the decoder\n");
313 return AVERROR_UNKNOWN;
314 }
315 }
316
317 if (s->drc_cut != -1) {
318 if (aacDecoder_SetParam(s->handle, AAC_DRC_ATTENUATION_FACTOR, s->drc_cut) != AAC_DEC_OK) {
319 av_log(avctx, AV_LOG_ERROR, "Unable to set DRC attenuation factor in the decoder\n");
320 return AVERROR_UNKNOWN;
321 }
322 }
323
324 if (s->drc_level != -1) {
325 // This option defaults to -1, i.e. not calling
326 // aacDecoder_SetParam(AAC_DRC_REFERENCE_LEVEL) at all, which defaults
327 // to the level from DRC metadata, if available. The user can set
328 // -drc_level -2, which calls aacDecoder_SetParam(
329 // AAC_DRC_REFERENCE_LEVEL) with a negative value, which then
330 // explicitly disables the feature.
331 if (aacDecoder_SetParam(s->handle, AAC_DRC_REFERENCE_LEVEL, s->drc_level) != AAC_DEC_OK) {
332 av_log(avctx, AV_LOG_ERROR, "Unable to set DRC reference level in the decoder\n");
333 return AVERROR_UNKNOWN;
334 }
335 }
336
337 if (s->drc_heavy != -1) {
338 if (aacDecoder_SetParam(s->handle, AAC_DRC_HEAVY_COMPRESSION, s->drc_heavy) != AAC_DEC_OK) {
339 av_log(avctx, AV_LOG_ERROR, "Unable to set DRC heavy compression in the decoder\n");
340 return AVERROR_UNKNOWN;
341 }
342 }
343
344 #if FDKDEC_VER_AT_LEAST(2, 5) // 2.5.10
345 // Setting this parameter to -1 enables the auto behaviour in the library.
346 if (aacDecoder_SetParam(s->handle, AAC_PCM_LIMITER_ENABLE, s->level_limit) != AAC_DEC_OK) {
347 av_log(avctx, AV_LOG_ERROR, "Unable to set in signal level limiting in the decoder\n");
348 return AVERROR_UNKNOWN;
349 }
350 #endif
351
352 #if FDKDEC_VER_AT_LEAST(3, 0) // 3.0.0
353 if (s->drc_effect != -1) {
354 if (aacDecoder_SetParam(s->handle, AAC_UNIDRC_SET_EFFECT, s->drc_effect) != AAC_DEC_OK) {
355 av_log(avctx, AV_LOG_ERROR, "Unable to set DRC effect type in the decoder\n");
356 return AVERROR_UNKNOWN;
357 }
358 }
359 #endif
360
361 #if FDKDEC_VER_AT_LEAST(3, 1) // 3.1.0
362 if (s->album_mode != -1) {
363 if (aacDecoder_SetParam(s->handle, AAC_UNIDRC_ALBUM_MODE, s->album_mode) != AAC_DEC_OK) {
364 av_log(avctx, AV_LOG_ERROR, "Unable to set album mode in the decoder\n");
365 return AVERROR_UNKNOWN;
366 }
367 }
368 #endif
369
370 avctx->sample_fmt = AV_SAMPLE_FMT_S16;
371
372 s->decoder_buffer_size = DECODER_BUFFSIZE * DECODER_MAX_CHANNELS;
373 s->decoder_buffer = av_malloc(s->decoder_buffer_size);
374 if (!s->decoder_buffer)
375 return AVERROR(ENOMEM);
376
377 return 0;
378 }
379
fdk_aac_decode_frame(AVCodecContext *avctx, AVFrame *frame, int *got_frame_ptr, AVPacket *avpkt)380 static int fdk_aac_decode_frame(AVCodecContext *avctx, AVFrame *frame,
381 int *got_frame_ptr, AVPacket *avpkt)
382 {
383 FDKAACDecContext *s = avctx->priv_data;
384 int ret;
385 AAC_DECODER_ERROR err;
386 UINT valid = avpkt->size;
387 UINT flags = 0;
388 int input_offset = 0;
389
390 if (avpkt->size) {
391 err = aacDecoder_Fill(s->handle, &avpkt->data, &avpkt->size, &valid);
392 if (err != AAC_DEC_OK) {
393 av_log(avctx, AV_LOG_ERROR, "aacDecoder_Fill() failed: %x\n", err);
394 return AVERROR_INVALIDDATA;
395 }
396 } else {
397 #if FDKDEC_VER_AT_LEAST(2, 5) // 2.5.10
398 /* Handle decoder draining */
399 if (s->flush_samples > 0) {
400 flags |= AACDEC_FLUSH;
401 } else {
402 return AVERROR_EOF;
403 }
404 #else
405 return AVERROR_EOF;
406 #endif
407 }
408
409 err = aacDecoder_DecodeFrame(s->handle, (INT_PCM *) s->decoder_buffer,
410 s->decoder_buffer_size / sizeof(INT_PCM),
411 flags);
412 if (err == AAC_DEC_NOT_ENOUGH_BITS) {
413 ret = avpkt->size - valid;
414 goto end;
415 }
416 if (err != AAC_DEC_OK) {
417 av_log(avctx, AV_LOG_ERROR,
418 "aacDecoder_DecodeFrame() failed: %x\n", err);
419 ret = AVERROR_UNKNOWN;
420 goto end;
421 }
422
423 if ((ret = get_stream_info(avctx)) < 0)
424 goto end;
425 frame->nb_samples = avctx->frame_size;
426
427 #if FDKDEC_VER_AT_LEAST(2, 5) // 2.5.10
428 if (flags & AACDEC_FLUSH) {
429 // Only return the right amount of samples at the end; if calling the
430 // decoder with AACDEC_FLUSH, it will keep returning frames indefinitely.
431 frame->nb_samples = FFMIN(s->flush_samples, frame->nb_samples);
432 av_log(s, AV_LOG_DEBUG, "Returning %d/%d delayed samples.\n",
433 frame->nb_samples, s->flush_samples);
434 s->flush_samples -= frame->nb_samples;
435 } else {
436 // Trim off samples from the start to compensate for extra decoder
437 // delay. We could also just adjust the pts, but this avoids
438 // including the extra samples in the output altogether.
439 if (s->delay_samples) {
440 int drop_samples = FFMIN(s->delay_samples, frame->nb_samples);
441 av_log(s, AV_LOG_DEBUG, "Dropping %d/%d delayed samples.\n",
442 drop_samples, s->delay_samples);
443 s->delay_samples -= drop_samples;
444 frame->nb_samples -= drop_samples;
445 input_offset = drop_samples * avctx->ch_layout.nb_channels;
446 if (frame->nb_samples <= 0)
447 return 0;
448 }
449 }
450 #endif
451
452 if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
453 goto end;
454
455 memcpy(frame->extended_data[0], s->decoder_buffer + input_offset,
456 avctx->ch_layout.nb_channels * frame->nb_samples *
457 av_get_bytes_per_sample(avctx->sample_fmt));
458
459 *got_frame_ptr = 1;
460 ret = avpkt->size - valid;
461
462 end:
463 return ret;
464 }
465
fdk_aac_decode_flush(AVCodecContext *avctx)466 static av_cold void fdk_aac_decode_flush(AVCodecContext *avctx)
467 {
468 FDKAACDecContext *s = avctx->priv_data;
469 AAC_DECODER_ERROR err;
470
471 if (!s->handle)
472 return;
473
474 if ((err = aacDecoder_SetParam(s->handle,
475 AAC_TPDEC_CLEAR_BUFFER, 1)) != AAC_DEC_OK)
476 av_log(avctx, AV_LOG_WARNING, "failed to clear buffer when flushing\n");
477 }
478
479 const FFCodec ff_libfdk_aac_decoder = {
480 .p.name = "libfdk_aac",
481 .p.long_name = NULL_IF_CONFIG_SMALL("Fraunhofer FDK AAC"),
482 .p.type = AVMEDIA_TYPE_AUDIO,
483 .p.id = AV_CODEC_ID_AAC,
484 .priv_data_size = sizeof(FDKAACDecContext),
485 .init = fdk_aac_decode_init,
486 FF_CODEC_DECODE_CB(fdk_aac_decode_frame),
487 .close = fdk_aac_decode_close,
488 .flush = fdk_aac_decode_flush,
489 .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_CHANNEL_CONF
490 #if FDKDEC_VER_AT_LEAST(2, 5) // 2.5.10
491 | AV_CODEC_CAP_DELAY
492 #endif
493 ,
494 .p.priv_class = &fdk_aac_dec_class,
495 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE |
496 FF_CODEC_CAP_INIT_CLEANUP,
497 .p.wrapper_name = "libfdk",
498 };
499