1/* 2 * ALAC (Apple Lossless Audio Codec) decoder 3 * Copyright (c) 2005 David Hammerton 4 * 5 * This file is part of FFmpeg. 6 * 7 * FFmpeg is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU Lesser General Public 9 * License as published by the Free Software Foundation; either 10 * version 2.1 of the License, or (at your option) any later version. 11 * 12 * FFmpeg is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Lesser General Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General Public 18 * License along with FFmpeg; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20 */ 21 22/** 23 * @file 24 * ALAC (Apple Lossless Audio Codec) decoder 25 * @author 2005 David Hammerton 26 * @see http://crazney.net/programs/itunes/alac.html 27 * 28 * Note: This decoder expects a 36-byte QuickTime atom to be 29 * passed through the extradata[_size] fields. This atom is tacked onto 30 * the end of an 'alac' stsd atom and has the following format: 31 * 32 * 32 bits atom size 33 * 32 bits tag ("alac") 34 * 32 bits tag version (0) 35 * 32 bits samples per frame (used when not set explicitly in the frames) 36 * 8 bits compatible version (0) 37 * 8 bits sample size 38 * 8 bits history mult (40) 39 * 8 bits initial history (10) 40 * 8 bits rice param limit (14) 41 * 8 bits channels 42 * 16 bits maxRun (255) 43 * 32 bits max coded frame size (0 means unknown) 44 * 32 bits average bitrate (0 means unknown) 45 * 32 bits samplerate 46 */ 47 48#include <inttypes.h> 49 50#include "libavutil/channel_layout.h" 51#include "libavutil/opt.h" 52#include "avcodec.h" 53#include "get_bits.h" 54#include "bytestream.h" 55#include "codec_internal.h" 56#include "thread.h" 57#include "unary.h" 58#include "mathops.h" 59#include "alac_data.h" 60#include "alacdsp.h" 61 62#define ALAC_EXTRADATA_SIZE 36 63 64typedef struct ALACContext { 65 AVClass *class; 66 AVCodecContext *avctx; 67 GetBitContext gb; 68 int channels; 69 70 int32_t *predict_error_buffer[2]; 71 int32_t *output_samples_buffer[2]; 72 int32_t *extra_bits_buffer[2]; 73 74 uint32_t max_samples_per_frame; 75 uint8_t sample_size; 76 uint8_t rice_history_mult; 77 uint8_t rice_initial_history; 78 uint8_t rice_limit; 79 int sample_rate; 80 81 int extra_bits; /**< number of extra bits beyond 16-bit */ 82 int nb_samples; /**< number of samples in the current frame */ 83 84 int direct_output; 85 int extra_bit_bug; 86 87 ALACDSPContext dsp; 88} ALACContext; 89 90static inline unsigned int decode_scalar(GetBitContext *gb, int k, int bps) 91{ 92 unsigned int x = get_unary_0_9(gb); 93 94 if (x > 8) { /* RICE THRESHOLD */ 95 /* use alternative encoding */ 96 x = get_bits_long(gb, bps); 97 } else if (k != 1) { 98 int extrabits = show_bits(gb, k); 99 100 /* multiply x by 2^k - 1, as part of their strange algorithm */ 101 x = (x << k) - x; 102 103 if (extrabits > 1) { 104 x += extrabits - 1; 105 skip_bits(gb, k); 106 } else 107 skip_bits(gb, k - 1); 108 } 109 return x; 110} 111 112static int rice_decompress(ALACContext *alac, int32_t *output_buffer, 113 int nb_samples, int bps, int rice_history_mult) 114{ 115 int i; 116 unsigned int history = alac->rice_initial_history; 117 int sign_modifier = 0; 118 119 for (i = 0; i < nb_samples; i++) { 120 int k; 121 unsigned int x; 122 123 if(get_bits_left(&alac->gb) <= 0) 124 return AVERROR_INVALIDDATA; 125 126 /* calculate rice param and decode next value */ 127 k = av_log2((history >> 9) + 3); 128 k = FFMIN(k, alac->rice_limit); 129 x = decode_scalar(&alac->gb, k, bps); 130 x += sign_modifier; 131 sign_modifier = 0; 132 output_buffer[i] = (x >> 1) ^ -(x & 1); 133 134 /* update the history */ 135 if (x > 0xffff) 136 history = 0xffff; 137 else 138 history += x * rice_history_mult - 139 ((history * rice_history_mult) >> 9); 140 141 /* special case: there may be compressed blocks of 0 */ 142 if ((history < 128) && (i + 1 < nb_samples)) { 143 int block_size; 144 145 /* calculate rice param and decode block size */ 146 k = 7 - av_log2(history) + ((history + 16) >> 6); 147 k = FFMIN(k, alac->rice_limit); 148 block_size = decode_scalar(&alac->gb, k, 16); 149 150 if (block_size > 0) { 151 if (block_size >= nb_samples - i) { 152 av_log(alac->avctx, AV_LOG_ERROR, 153 "invalid zero block size of %d %d %d\n", block_size, 154 nb_samples, i); 155 block_size = nb_samples - i - 1; 156 } 157 memset(&output_buffer[i + 1], 0, 158 block_size * sizeof(*output_buffer)); 159 i += block_size; 160 } 161 if (block_size <= 0xffff) 162 sign_modifier = 1; 163 history = 0; 164 } 165 } 166 return 0; 167} 168 169static inline int sign_only(int v) 170{ 171 return v ? FFSIGN(v) : 0; 172} 173 174static void lpc_prediction(int32_t *error_buffer, uint32_t *buffer_out, 175 int nb_samples, int bps, int16_t *lpc_coefs, 176 int lpc_order, int lpc_quant) 177{ 178 int i; 179 uint32_t *pred = buffer_out; 180 181 /* first sample always copies */ 182 *buffer_out = *error_buffer; 183 184 if (nb_samples <= 1) 185 return; 186 187 if (!lpc_order) { 188 memcpy(&buffer_out[1], &error_buffer[1], 189 (nb_samples - 1) * sizeof(*buffer_out)); 190 return; 191 } 192 193 if (lpc_order == 31) { 194 /* simple 1st-order prediction */ 195 for (i = 1; i < nb_samples; i++) { 196 buffer_out[i] = sign_extend(buffer_out[i - 1] + error_buffer[i], 197 bps); 198 } 199 return; 200 } 201 202 /* read warm-up samples */ 203 for (i = 1; i <= lpc_order && i < nb_samples; i++) 204 buffer_out[i] = sign_extend(buffer_out[i - 1] + error_buffer[i], bps); 205 206 /* NOTE: 4 and 8 are very common cases that could be optimized. */ 207 208 for (; i < nb_samples; i++) { 209 int j; 210 int val = 0; 211 unsigned error_val = error_buffer[i]; 212 int error_sign; 213 int d = *pred++; 214 215 /* LPC prediction */ 216 for (j = 0; j < lpc_order; j++) 217 val += (pred[j] - d) * lpc_coefs[j]; 218 val = (val + (1LL << (lpc_quant - 1))) >> lpc_quant; 219 val += d + error_val; 220 buffer_out[i] = sign_extend(val, bps); 221 222 /* adapt LPC coefficients */ 223 error_sign = sign_only(error_val); 224 if (error_sign) { 225 for (j = 0; j < lpc_order && (int)(error_val * error_sign) > 0; j++) { 226 int sign; 227 val = d - pred[j]; 228 sign = sign_only(val) * error_sign; 229 lpc_coefs[j] -= sign; 230 val *= (unsigned)sign; 231 error_val -= (val >> lpc_quant) * (j + 1U); 232 } 233 } 234 } 235} 236 237static int decode_element(AVCodecContext *avctx, AVFrame *frame, int ch_index, 238 int channels) 239{ 240 ALACContext *alac = avctx->priv_data; 241 int has_size, bps, is_compressed, decorr_shift, decorr_left_weight, ret; 242 uint32_t output_samples; 243 int i, ch; 244 245 skip_bits(&alac->gb, 4); /* element instance tag */ 246 skip_bits(&alac->gb, 12); /* unused header bits */ 247 248 /* the number of output samples is stored in the frame */ 249 has_size = get_bits1(&alac->gb); 250 251 alac->extra_bits = get_bits(&alac->gb, 2) << 3; 252 bps = alac->sample_size - alac->extra_bits + channels - 1; 253 if (bps > 32) { 254 avpriv_report_missing_feature(avctx, "bps %d", bps); 255 return AVERROR_PATCHWELCOME; 256 } 257 if (bps < 1) 258 return AVERROR_INVALIDDATA; 259 260 /* whether the frame is compressed */ 261 is_compressed = !get_bits1(&alac->gb); 262 263 if (has_size) 264 output_samples = get_bits_long(&alac->gb, 32); 265 else 266 output_samples = alac->max_samples_per_frame; 267 if (!output_samples || output_samples > alac->max_samples_per_frame) { 268 av_log(avctx, AV_LOG_ERROR, "invalid samples per frame: %"PRIu32"\n", 269 output_samples); 270 return AVERROR_INVALIDDATA; 271 } 272 if (!alac->nb_samples) { 273 /* get output buffer */ 274 frame->nb_samples = output_samples; 275 if ((ret = ff_thread_get_buffer(avctx, frame, 0)) < 0) 276 return ret; 277 } else if (output_samples != alac->nb_samples) { 278 av_log(avctx, AV_LOG_ERROR, "sample count mismatch: %"PRIu32" != %d\n", 279 output_samples, alac->nb_samples); 280 return AVERROR_INVALIDDATA; 281 } 282 alac->nb_samples = output_samples; 283 if (alac->direct_output) { 284 for (ch = 0; ch < channels; ch++) 285 alac->output_samples_buffer[ch] = (int32_t *)frame->extended_data[ch_index + ch]; 286 } 287 288 if (is_compressed) { 289 int16_t lpc_coefs[2][32]; 290 int lpc_order[2]; 291 int prediction_type[2]; 292 int lpc_quant[2]; 293 int rice_history_mult[2]; 294 295 if (!alac->rice_limit) { 296 avpriv_request_sample(alac->avctx, 297 "Compression with rice limit 0"); 298 return AVERROR(ENOSYS); 299 } 300 301 decorr_shift = get_bits(&alac->gb, 8); 302 decorr_left_weight = get_bits(&alac->gb, 8); 303 304 if (channels == 2 && decorr_left_weight && decorr_shift > 31) 305 return AVERROR_INVALIDDATA; 306 307 for (ch = 0; ch < channels; ch++) { 308 prediction_type[ch] = get_bits(&alac->gb, 4); 309 lpc_quant[ch] = get_bits(&alac->gb, 4); 310 rice_history_mult[ch] = get_bits(&alac->gb, 3); 311 lpc_order[ch] = get_bits(&alac->gb, 5); 312 313 if (lpc_order[ch] >= alac->max_samples_per_frame || !lpc_quant[ch]) 314 return AVERROR_INVALIDDATA; 315 316 /* read the predictor table */ 317 for (i = lpc_order[ch] - 1; i >= 0; i--) 318 lpc_coefs[ch][i] = get_sbits(&alac->gb, 16); 319 } 320 321 if (alac->extra_bits) { 322 for (i = 0; i < alac->nb_samples; i++) { 323 if(get_bits_left(&alac->gb) <= 0) 324 return AVERROR_INVALIDDATA; 325 for (ch = 0; ch < channels; ch++) 326 alac->extra_bits_buffer[ch][i] = get_bits(&alac->gb, alac->extra_bits); 327 } 328 } 329 for (ch = 0; ch < channels; ch++) { 330 int ret=rice_decompress(alac, alac->predict_error_buffer[ch], 331 alac->nb_samples, bps, 332 rice_history_mult[ch] * alac->rice_history_mult / 4); 333 if(ret<0) 334 return ret; 335 336 /* adaptive FIR filter */ 337 if (prediction_type[ch] == 15) { 338 /* Prediction type 15 runs the adaptive FIR twice. 339 * The first pass uses the special-case coef_num = 31, while 340 * the second pass uses the coefs from the bitstream. 341 * 342 * However, this prediction type is not currently used by the 343 * reference encoder. 344 */ 345 lpc_prediction(alac->predict_error_buffer[ch], 346 alac->predict_error_buffer[ch], 347 alac->nb_samples, bps, NULL, 31, 0); 348 } else if (prediction_type[ch] > 0) { 349 av_log(avctx, AV_LOG_WARNING, "unknown prediction type: %i\n", 350 prediction_type[ch]); 351 } 352 lpc_prediction(alac->predict_error_buffer[ch], 353 alac->output_samples_buffer[ch], alac->nb_samples, 354 bps, lpc_coefs[ch], lpc_order[ch], lpc_quant[ch]); 355 } 356 } else { 357 /* not compressed, easy case */ 358 for (i = 0; i < alac->nb_samples; i++) { 359 if(get_bits_left(&alac->gb) <= 0) 360 return AVERROR_INVALIDDATA; 361 for (ch = 0; ch < channels; ch++) { 362 alac->output_samples_buffer[ch][i] = 363 get_sbits_long(&alac->gb, alac->sample_size); 364 } 365 } 366 alac->extra_bits = 0; 367 decorr_shift = 0; 368 decorr_left_weight = 0; 369 } 370 371 if (channels == 2) { 372 if (alac->extra_bits && alac->extra_bit_bug) { 373 alac->dsp.append_extra_bits[1](alac->output_samples_buffer, alac->extra_bits_buffer, 374 alac->extra_bits, channels, alac->nb_samples); 375 } 376 377 if (decorr_left_weight) { 378 alac->dsp.decorrelate_stereo(alac->output_samples_buffer, alac->nb_samples, 379 decorr_shift, decorr_left_weight); 380 } 381 382 if (alac->extra_bits && !alac->extra_bit_bug) { 383 alac->dsp.append_extra_bits[1](alac->output_samples_buffer, alac->extra_bits_buffer, 384 alac->extra_bits, channels, alac->nb_samples); 385 } 386 } else if (alac->extra_bits) { 387 alac->dsp.append_extra_bits[0](alac->output_samples_buffer, alac->extra_bits_buffer, 388 alac->extra_bits, channels, alac->nb_samples); 389 } 390 391 switch(alac->sample_size) { 392 case 16: { 393 for (ch = 0; ch < channels; ch++) { 394 int16_t *outbuffer = (int16_t *)frame->extended_data[ch_index + ch]; 395 for (i = 0; i < alac->nb_samples; i++) 396 *outbuffer++ = alac->output_samples_buffer[ch][i]; 397 }} 398 break; 399 case 20: { 400 for (ch = 0; ch < channels; ch++) { 401 for (i = 0; i < alac->nb_samples; i++) 402 alac->output_samples_buffer[ch][i] *= 1U << 12; 403 }} 404 break; 405 case 24: { 406 for (ch = 0; ch < channels; ch++) { 407 for (i = 0; i < alac->nb_samples; i++) 408 alac->output_samples_buffer[ch][i] *= 1U << 8; 409 }} 410 break; 411 } 412 413 return 0; 414} 415 416static int alac_decode_frame(AVCodecContext *avctx, AVFrame *frame, 417 int *got_frame_ptr, AVPacket *avpkt) 418{ 419 ALACContext *alac = avctx->priv_data; 420 enum AlacRawDataBlockType element; 421 int channels; 422 int ch, ret, got_end; 423 424 if ((ret = init_get_bits8(&alac->gb, avpkt->data, avpkt->size)) < 0) 425 return ret; 426 427 got_end = 0; 428 alac->nb_samples = 0; 429 ch = 0; 430 while (get_bits_left(&alac->gb) >= 3) { 431 element = get_bits(&alac->gb, 3); 432 if (element == TYPE_END) { 433 got_end = 1; 434 break; 435 } 436 if (element > TYPE_CPE && element != TYPE_LFE) { 437 avpriv_report_missing_feature(avctx, "Syntax element %d", element); 438 return AVERROR_PATCHWELCOME; 439 } 440 441 channels = (element == TYPE_CPE) ? 2 : 1; 442 if (ch + channels > alac->channels || 443 ff_alac_channel_layout_offsets[alac->channels - 1][ch] + channels > alac->channels) { 444 av_log(avctx, AV_LOG_ERROR, "invalid element channel count\n"); 445 return AVERROR_INVALIDDATA; 446 } 447 448 ret = decode_element(avctx, frame, 449 ff_alac_channel_layout_offsets[alac->channels - 1][ch], 450 channels); 451 if (ret < 0 && get_bits_left(&alac->gb)) 452 return ret; 453 454 ch += channels; 455 } 456 if (!got_end) { 457 av_log(avctx, AV_LOG_ERROR, "no end tag found. incomplete packet.\n"); 458 return AVERROR_INVALIDDATA; 459 } 460 461 if (avpkt->size * 8 - get_bits_count(&alac->gb) > 8) { 462 av_log(avctx, AV_LOG_ERROR, "Error : %d bits left\n", 463 avpkt->size * 8 - get_bits_count(&alac->gb)); 464 } 465 466 if (alac->channels == ch && alac->nb_samples) 467 *got_frame_ptr = 1; 468 else 469 av_log(avctx, AV_LOG_WARNING, "Failed to decode all channels\n"); 470 471 return avpkt->size; 472} 473 474static av_cold int alac_decode_close(AVCodecContext *avctx) 475{ 476 ALACContext *alac = avctx->priv_data; 477 478 int ch; 479 for (ch = 0; ch < FFMIN(alac->channels, 2); ch++) { 480 av_freep(&alac->predict_error_buffer[ch]); 481 if (!alac->direct_output) 482 av_freep(&alac->output_samples_buffer[ch]); 483 av_freep(&alac->extra_bits_buffer[ch]); 484 } 485 486 return 0; 487} 488 489static int allocate_buffers(ALACContext *alac) 490{ 491 int ch; 492 unsigned buf_size = alac->max_samples_per_frame * sizeof(int32_t); 493 unsigned extra_buf_size = buf_size + AV_INPUT_BUFFER_PADDING_SIZE; 494 495 for (ch = 0; ch < 2; ch++) { 496 alac->predict_error_buffer[ch] = NULL; 497 alac->output_samples_buffer[ch] = NULL; 498 alac->extra_bits_buffer[ch] = NULL; 499 } 500 501 for (ch = 0; ch < FFMIN(alac->channels, 2); ch++) { 502 if (!(alac->predict_error_buffer[ch] = av_malloc(buf_size))) 503 return AVERROR(ENOMEM); 504 505 alac->direct_output = alac->sample_size > 16; 506 if (!alac->direct_output) { 507 if (!(alac->output_samples_buffer[ch] = av_malloc(extra_buf_size))) 508 return AVERROR(ENOMEM); 509 } 510 511 if (!(alac->extra_bits_buffer[ch] = av_malloc(extra_buf_size))) 512 return AVERROR(ENOMEM); 513 } 514 return 0; 515} 516 517static int alac_set_info(ALACContext *alac) 518{ 519 GetByteContext gb; 520 521 bytestream2_init(&gb, alac->avctx->extradata, 522 alac->avctx->extradata_size); 523 524 bytestream2_skipu(&gb, 12); // size:4, alac:4, version:4 525 526 alac->max_samples_per_frame = bytestream2_get_be32u(&gb); 527 if (!alac->max_samples_per_frame || 528 alac->max_samples_per_frame > 4096 * 4096) { 529 av_log(alac->avctx, AV_LOG_ERROR, 530 "max samples per frame invalid: %"PRIu32"\n", 531 alac->max_samples_per_frame); 532 return AVERROR_INVALIDDATA; 533 } 534 bytestream2_skipu(&gb, 1); // compatible version 535 alac->sample_size = bytestream2_get_byteu(&gb); 536 alac->rice_history_mult = bytestream2_get_byteu(&gb); 537 alac->rice_initial_history = bytestream2_get_byteu(&gb); 538 alac->rice_limit = bytestream2_get_byteu(&gb); 539 alac->channels = bytestream2_get_byteu(&gb); 540 bytestream2_get_be16u(&gb); // maxRun 541 bytestream2_get_be32u(&gb); // max coded frame size 542 bytestream2_get_be32u(&gb); // average bitrate 543 alac->sample_rate = bytestream2_get_be32u(&gb); 544 545 return 0; 546} 547 548static av_cold int alac_decode_init(AVCodecContext * avctx) 549{ 550 int ret; 551 ALACContext *alac = avctx->priv_data; 552 alac->avctx = avctx; 553 554 /* initialize from the extradata */ 555 if (alac->avctx->extradata_size < ALAC_EXTRADATA_SIZE) { 556 av_log(avctx, AV_LOG_ERROR, "extradata is too small\n"); 557 return AVERROR_INVALIDDATA; 558 } 559 if ((ret = alac_set_info(alac)) < 0) { 560 av_log(avctx, AV_LOG_ERROR, "set_info failed\n"); 561 return ret; 562 } 563 564 switch (alac->sample_size) { 565 case 16: avctx->sample_fmt = AV_SAMPLE_FMT_S16P; 566 break; 567 case 20: 568 case 24: 569 case 32: avctx->sample_fmt = AV_SAMPLE_FMT_S32P; 570 break; 571 default: avpriv_request_sample(avctx, "Sample depth %d", alac->sample_size); 572 return AVERROR_PATCHWELCOME; 573 } 574 avctx->bits_per_raw_sample = alac->sample_size; 575 avctx->sample_rate = alac->sample_rate; 576 577 if (alac->channels < 1) { 578 av_log(avctx, AV_LOG_WARNING, "Invalid channel count\n"); 579 if (avctx->ch_layout.nb_channels < 1) 580 return AVERROR(EINVAL); 581 alac->channels = avctx->ch_layout.nb_channels; 582 } 583 if (alac->channels > ALAC_MAX_CHANNELS) { 584 avpriv_report_missing_feature(avctx, "Channel count %d", 585 alac->channels); 586 return AVERROR_PATCHWELCOME; 587 } 588 av_channel_layout_uninit(&avctx->ch_layout); 589 avctx->ch_layout = ff_alac_ch_layouts[alac->channels - 1]; 590 591 if ((ret = allocate_buffers(alac)) < 0) { 592 av_log(avctx, AV_LOG_ERROR, "Error allocating buffers\n"); 593 return ret; 594 } 595 596 ff_alacdsp_init(&alac->dsp); 597 598 return 0; 599} 600 601static const AVOption options[] = { 602 { "extra_bits_bug", "Force non-standard decoding process", 603 offsetof(ALACContext, extra_bit_bug), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 604 0, 1, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_DECODING_PARAM }, 605 { NULL }, 606}; 607 608static const AVClass alac_class = { 609 .class_name = "alac", 610 .item_name = av_default_item_name, 611 .option = options, 612 .version = LIBAVUTIL_VERSION_INT, 613}; 614 615const FFCodec ff_alac_decoder = { 616 .p.name = "alac", 617 .p.long_name = NULL_IF_CONFIG_SMALL("ALAC (Apple Lossless Audio Codec)"), 618 .p.type = AVMEDIA_TYPE_AUDIO, 619 .p.id = AV_CODEC_ID_ALAC, 620 .priv_data_size = sizeof(ALACContext), 621 .init = alac_decode_init, 622 .close = alac_decode_close, 623 FF_CODEC_DECODE_CB(alac_decode_frame), 624 .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_CHANNEL_CONF, 625 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP, 626 .p.priv_class = &alac_class 627}; 628