1/* 2 * Copyright (c) 2011 Apple Inc. All rights reserved. 3 * Copyright (C) 2012-2015 Erik de Castro Lopo <erikd@mega-nerd.com> 4 * 5 * @APPLE_APACHE_LICENSE_HEADER_START@ 6 * 7 * Licensed under the Apache License, Version 2.0 (the "License") ; 8 * you may not use this file except in compliance with the License. 9 * You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, software 14 * distributed under the License is distributed on an "AS IS" BASIS, 15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 * 19 * @APPLE_APACHE_LICENSE_HEADER_END@ 20 */ 21 22/* 23 File: ALACEncoder.cpp 24*/ 25 26// build stuff 27#define VERBOSE_DEBUG 0 28#define DebugMsg printf 29 30// headers 31#include <stdio.h> 32#include <stdlib.h> 33#include <string.h> 34 35#include "sfendian.h" 36 37#include "alac_codec.h" 38 39#include "aglib.h" 40#include "dplib.h" 41#include "matrixlib.h" 42 43#include "ALACBitUtilities.h" 44#include "ALACAudioTypes.h" 45#include "EndianPortable.h" 46 47typedef enum 48{ 49 false = 0, 50 true = 1 51} bool ; 52 53static void GetConfig (ALAC_ENCODER *p, ALACSpecificConfig * config) ; 54 55static int32_t EncodeStereo (ALAC_ENCODER *p, struct BitBuffer * bitstream, const int32_t * input, uint32_t stride, uint32_t channelIndex, uint32_t numSamples) ; 56static int32_t EncodeStereoFast (ALAC_ENCODER *p, struct BitBuffer * bitstream, const int32_t * input, uint32_t stride, uint32_t channelIndex, uint32_t numSamples) ; 57static int32_t EncodeStereoEscape (ALAC_ENCODER *p, struct BitBuffer * bitstream, const int32_t * input, uint32_t stride, uint32_t numSamples) ; 58static int32_t EncodeMono (ALAC_ENCODER *p, struct BitBuffer * bitstream, const int32_t * input, uint32_t stride, uint32_t channelIndex, uint32_t numSamples) ; 59 60 61 62// Note: in C you can't typecast to a 2-dimensional array pointer but that's what we need when 63// picking which coefs to use so we declare this typedef b/c we *can* typecast to this type 64typedef int16_t (*SearchCoefs) [kALACMaxCoefs] ; 65 66// defines/constants 67const uint32_t kALACEncoderMagic = MAKE_MARKER ('d', 'p', 'g', 'e') ; 68const uint32_t kMaxSampleSize = 32 ; // max allowed bit width is 32 69const uint32_t kDefaultMixBits = 2 ; 70const uint32_t kDefaultMixRes = 0 ; 71const uint32_t kMaxRes = 4 ; 72const uint32_t kDefaultNumUV = 8 ; 73const uint32_t kMinUV = 4 ; 74const uint32_t kMaxUV = 8 ; 75 76// static functions 77#if VERBOSE_DEBUG 78static void AddFiller (BitBuffer * bits, int32_t numBytes) ; 79#endif 80 81 82/* 83 Map Format: 3-bit field per channel which is the same as the "element tag" that should be placed 84 at the beginning of the frame for that channel. Indicates whether SCE, CPE, or LFE. 85 Each particular field is accessed via the current channel indx. Note that the channel 86 indx increments by two for channel pairs. 87 88 For example: 89 90 C L R 3-channel input = (ID_CPE << 3) | (ID_SCE) 91 indx 0 value = (map & (0x7ul << (0 * 3))) >> (0 * 3) 92 indx 1 value = (map & (0x7ul << (1 * 3))) >> (1 * 3) 93 94 C L R Ls Rs LFE 5.1-channel input = (ID_LFE << 15) | (ID_CPE << 9) | (ID_CPE << 3) | (ID_SCE) 95 indx 0 value = (map & (0x7ul << (0 * 3))) >> (0 * 3) 96 indx 1 value = (map & (0x7ul << (1 * 3))) >> (1 * 3) 97 indx 3 value = (map & (0x7ul << (3 * 3))) >> (3 * 3) 98 indx 5 value = (map & (0x7ul << (5 * 3))) >> (5 * 3) 99 indx 7 value = (map & (0x7ul << (7 * 3))) >> (7 * 3) 100*/ 101static const uint32_t sChannelMaps [kALACMaxChannels] = 102{ 103 ID_SCE, 104 ID_CPE, 105 (ID_CPE << 3) | (ID_SCE), 106 (ID_SCE << 9) | (ID_CPE << 3) | (ID_SCE), 107 (ID_CPE << 9) | (ID_CPE << 3) | (ID_SCE), 108 (ID_SCE << 15) | (ID_CPE << 9) | (ID_CPE << 3) | (ID_SCE), 109 (ID_SCE << 18) | (ID_SCE << 15) | (ID_CPE << 9) | (ID_CPE << 3) | (ID_SCE), 110 (ID_SCE << 21) | (ID_CPE << 15) | (ID_CPE << 9) | (ID_CPE << 3) | (ID_SCE) 111} ; 112 113#if PRAGMA_MARK 114#pragma mark - 115#endif 116 117void 118alac_set_fastmode (ALAC_ENCODER * p, int32_t fast) 119{ 120 p->mFastMode = fast ; 121} 122 123 124/* 125 HEADER SPECIFICATION 126 127 For every segment we adopt the following header: 128 129 1 byte reserved (always 0) 130 1 byte flags (see below) 131 [4 byte frame length] (optional, see below) 132 ---Next, the per-segment ALAC parameters--- 133 1 byte mixBits (middle-side parameter) 134 1 byte mixRes (middle-side parameter, interpreted as signed char) 135 136 1 byte shiftU (4 bits modeU, 4 bits denShiftU) 137 1 byte filterU (3 bits pbFactorU, 5 bits numU) 138 (numU) shorts (signed DP coefficients for V channel) 139 ---Next, 2nd-channel ALAC parameters in case of stereo mode--- 140 1 byte shiftV (4 bits modeV, 4 bits denShiftV) 141 1 byte filterV (3 bits pbFactorV, 5 bits numV) 142 (numV) shorts (signed DP coefficients for V channel) 143 ---After this come the shift-off bytes for (>= 24)-bit data (n-byte shift) if indicated--- 144 ---Then comes the AG-compressor bitstream--- 145 146 147 FLAGS 148 ----- 149 150 The presence of certain flag bits changes the header format such that the parameters might 151 not even be sent. The currently defined flags format is: 152 153 0000psse 154 155 where 0 = reserved, must be 0 156 p = 1-bit field "partial frame" flag indicating 32-bit frame length follows this byte 157 ss = 2-bit field indicating "number of shift-off bytes ignored by compression" 158 e = 1-bit field indicating "escape" 159 160 The "partial frame" flag means that the following segment is not equal to the frame length specified 161 in the out-of-band decoder configuration. This allows the decoder to deal with end-of-file partial 162 segments without incurring the 32-bit overhead for each segment. 163 164 The "shift-off" field indicates the number of bytes at the bottom of the word that were passed through 165 uncompressed. The reason for this is that the entropy inherent in the LS bytes of >= 24-bit words 166 quite often means that the frame would have to be "escaped" b/c the compressed size would be >= the 167 uncompressed size. However, by shifting the input values down and running the remaining bits through 168 the normal compression algorithm, a net win can be achieved. If this field is non-zero, it means that 169 the shifted-off bytes follow after the parameter section of the header and before the compressed 170 bitstream. Note that doing this also allows us to use matrixing on 32-bit inputs after one or more 171 bytes are shifted off the bottom which helps the eventual compression ratio. For stereo channels, 172 the shifted off bytes are interleaved. 173 174 The "escape" flag means that this segment was not compressed b/c the compressed size would be 175 >= uncompressed size. In that case, the audio data was passed through uncompressed after the header. 176 The other header parameter bytes will not be sent. 177 178 179 PARAMETERS 180 ---------- 181 182 If the segment is not a partial or escape segment, the total header size (in bytes) is given exactly by: 183 184 4 + (2 + 2 * numU) (mono mode) 185 4 + (2 + 2 * numV) + (2 + 2 * numV) (stereo mode) 186 187 where the ALAC filter-lengths numU, numV are bounded by a 188 constant (in the current source, numU, numV <= NUMCOEPAIRS), and 189 this forces an absolute upper bound on header size. 190 191 Each segment-decode process loads up these bytes from the front of the 192 local stream, in the above order, then follows with the entropy-encoded 193 bits for the given segment. 194 195 To generalize middle-side, there are various mixing modes including middle-side, each lossless, 196 as embodied in the mix () and unmix () functions. These functions exploit a generalized middle-side 197 transformation: 198 199 u := [(rL + (m-r)R)/m] ; 200 v := L - R ; 201 202 where [ ] denotes integer floor. The (lossless) inverse is 203 204 L = u + v - [rV/m] ; 205 R = L - v ; 206 207 In the segment header, m and r are encoded in mixBits and mixRes. 208 Classical "middle-side" is obtained with m = 2, r = 1, but now 209 we have more generalized mixes. 210 211 NOTES 212 ----- 213 The relevance of the ALAC coefficients is explained in detail 214 in patent documents. 215*/ 216 217/* 218 EncodeStereo () 219 - encode a channel pair 220*/ 221static int32_t 222EncodeStereo (ALAC_ENCODER *p, struct BitBuffer * bitstream, const int32_t * inputBuffer, uint32_t stride, uint32_t channelIndex, uint32_t numSamples) 223{ 224 BitBuffer workBits ; 225 BitBuffer startBits = *bitstream ; // squirrel away copy of current state in case we need to go back and do an escape packet 226 AGParamRec agParams ; 227 uint32_t bits1, bits2 ; 228 uint32_t dilate ; 229 int32_t mixBits, mixRes, maxRes ; 230 uint32_t minBits, minBits1, minBits2 ; 231 uint32_t numU, numV ; 232 uint32_t mode ; 233 uint32_t pbFactor ; 234 uint32_t chanBits ; 235 uint8_t bytesShifted ; 236 SearchCoefs coefsU ; 237 SearchCoefs coefsV ; 238 uint32_t indx ; 239 uint8_t partialFrame ; 240 uint32_t escapeBits ; 241 bool doEscape ; 242 int32_t status = ALAC_noErr ; 243 int32_t bestRes ; 244 245 // make sure we handle this bit-depth before we get going 246 RequireAction ((p->mBitDepth == 16) || (p->mBitDepth == 20) || (p->mBitDepth == 24) || (p->mBitDepth == 32), return kALAC_ParamError ;) ; 247 248 // reload coefs pointers for this channel pair 249 // - note that, while you might think they should be re-initialized per block, retaining state across blocks 250 // actually results in better overall compression 251 // - strangely, re-using the same coefs for the different passes of the "mixRes" search loop instead of using 252 // different coefs for the different passes of "mixRes" results in even better compression 253 coefsU = (SearchCoefs) p->mCoefsU [channelIndex] ; 254 coefsV = (SearchCoefs) p->mCoefsV [channelIndex] ; 255 256 // matrix encoding adds an extra bit but 32-bit inputs cannot be matrixed b/c 33 is too many 257 // so enable 16-bit "shift off" and encode in 17-bit mode 258 // - in addition, 24-bit mode really improves with one byte shifted off 259 if (p->mBitDepth == 32) 260 bytesShifted = 2 ; 261 else if (p->mBitDepth >= 24) 262 bytesShifted = 1 ; 263 else 264 bytesShifted = 0 ; 265 266 chanBits = p->mBitDepth - (bytesShifted * 8) + 1 ; 267 268 // flag whether or not this is a partial frame 269 partialFrame = (numSamples == p->mFrameSize) ? 0 : 1 ; 270 271 // brute-force encode optimization loop 272 // - run over variations of the encoding params to find the best choice 273 mixBits = kDefaultMixBits ; 274 maxRes = kMaxRes ; 275 numU = numV = kDefaultNumUV ; 276 mode = 0 ; 277 pbFactor = 4 ; 278 dilate = 8 ; 279 280 minBits = minBits1 = minBits2 = 1ul << 31 ; 281 282 bestRes = p->mLastMixRes [channelIndex] ; 283 284 for (mixRes = 0 ; mixRes <= maxRes ; mixRes++) 285 { 286 // mix the stereo inputs 287 switch (p->mBitDepth) 288 { 289 case 16: 290 mix16 (inputBuffer, stride, p->mMixBufferU, p->mMixBufferV, numSamples / dilate, mixBits, mixRes) ; 291 break ; 292 case 20: 293 mix20 (inputBuffer, stride, p->mMixBufferU, p->mMixBufferV, numSamples / dilate, mixBits, mixRes) ; 294 break ; 295 case 24: 296 // includes extraction of shifted-off bytes 297 mix24 (inputBuffer, stride, p->mMixBufferU, p->mMixBufferV, numSamples / dilate, 298 mixBits, mixRes, p->mShiftBufferUV, bytesShifted) ; 299 break ; 300 case 32: 301 // includes extraction of shifted-off bytes 302 mix32 (inputBuffer, stride, p->mMixBufferU, p->mMixBufferV, numSamples / dilate, 303 mixBits, mixRes, p->mShiftBufferUV, bytesShifted) ; 304 break ; 305 } 306 307 BitBufferInit (&workBits, p->mWorkBuffer, p->mMaxOutputBytes) ; 308 309 // run the dynamic predictors 310 pc_block (p->mMixBufferU, p->mPredictorU, numSamples / dilate, coefsU [numU - 1], numU, chanBits, DENSHIFT_DEFAULT) ; 311 pc_block (p->mMixBufferV, p->mPredictorV, numSamples / dilate, coefsV [numV - 1], numV, chanBits, DENSHIFT_DEFAULT) ; 312 313 // run the lossless compressor on each channel 314 set_ag_params (&agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples / dilate, numSamples / dilate, MAX_RUN_DEFAULT) ; 315 status = dyn_comp (&agParams, p->mPredictorU, &workBits, numSamples / dilate, chanBits, &bits1) ; 316 RequireNoErr (status, goto Exit ;) ; 317 318 set_ag_params (&agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples / dilate, numSamples / dilate, MAX_RUN_DEFAULT) ; 319 status = dyn_comp (&agParams, p->mPredictorV, &workBits, numSamples / dilate, chanBits, &bits2) ; 320 RequireNoErr (status, goto Exit ;) ; 321 322 // look for best match 323 if ((bits1 + bits2) < minBits1) 324 { 325 minBits1 = bits1 + bits2 ; 326 bestRes = mixRes ; 327 } 328 } 329 330 p->mLastMixRes [channelIndex] = (int16_t) bestRes ; 331 332 // mix the stereo inputs with the current best mixRes 333 mixRes = p->mLastMixRes [channelIndex] ; 334 switch (p->mBitDepth) 335 { 336 case 16: 337 mix16 (inputBuffer, stride, p->mMixBufferU, p->mMixBufferV, numSamples, mixBits, mixRes) ; 338 break ; 339 case 20: 340 mix20 (inputBuffer, stride, p->mMixBufferU, p->mMixBufferV, numSamples, mixBits, mixRes) ; 341 break ; 342 case 24: 343 // also extracts the shifted off bytes into the shift buffers 344 mix24 (inputBuffer, stride, p->mMixBufferU, p->mMixBufferV, numSamples, 345 mixBits, mixRes, p->mShiftBufferUV, bytesShifted) ; 346 break ; 347 case 32: 348 // also extracts the shifted off bytes into the shift buffers 349 mix32 (inputBuffer, stride, p->mMixBufferU, p->mMixBufferV, numSamples, 350 mixBits, mixRes, p->mShiftBufferUV, bytesShifted) ; 351 break ; 352 } 353 354 // now it's time for the predictor coefficient search loop 355 numU = numV = kMinUV ; 356 minBits1 = minBits2 = 1ul << 31 ; 357 358 for (uint32_t numUV = kMinUV ; numUV <= kMaxUV ; numUV += 4) 359 { 360 BitBufferInit (&workBits, p->mWorkBuffer, p->mMaxOutputBytes) ; 361 362 dilate = 32 ; 363 364 // run the predictor over the same data multiple times to help it converge 365 for (uint32_t converge = 0 ; converge < 8 ; converge++) 366 { 367 pc_block (p->mMixBufferU, p->mPredictorU, numSamples / dilate, coefsU [numUV-1], numUV, chanBits, DENSHIFT_DEFAULT) ; 368 pc_block (p->mMixBufferV, p->mPredictorV, numSamples / dilate, coefsV [numUV-1], numUV, chanBits, DENSHIFT_DEFAULT) ; 369 } 370 371 dilate = 8 ; 372 373 set_ag_params (&agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples / dilate, numSamples / dilate, MAX_RUN_DEFAULT) ; 374 status = dyn_comp (&agParams, p->mPredictorU, &workBits, numSamples / dilate, chanBits, &bits1) ; 375 376 if ((bits1 * dilate + 16 * numUV) < minBits1) 377 { 378 minBits1 = bits1 * dilate + 16 * numUV ; 379 numU = numUV ; 380 } 381 382 set_ag_params (&agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples / dilate, numSamples / dilate, MAX_RUN_DEFAULT) ; 383 status = dyn_comp (&agParams, p->mPredictorV, &workBits, numSamples / dilate, chanBits, &bits2) ; 384 385 if ((bits2 * dilate + 16 * numUV) < minBits2) 386 { 387 minBits2 = bits2 * dilate + 16 * numUV ; 388 numV = numUV ; 389 } 390 } 391 392 // test for escape hatch if best calculated compressed size turns out to be more than the input size 393 minBits = minBits1 + minBits2 + (8 /* mixRes/maxRes/etc. */ * 8) + ((partialFrame == true) ? 32 : 0) ; 394 if (bytesShifted != 0) 395 minBits += (numSamples * (bytesShifted * 8) * 2) ; 396 397 escapeBits = (numSamples * p->mBitDepth * 2) + ((partialFrame == true) ? 32 : 0) + (2 * 8) ; /* 2 common header bytes */ 398 399 doEscape = (minBits >= escapeBits) ? true : false ; 400 401 if (doEscape == false) 402 { 403 // write bitstream header and coefs 404 BitBufferWrite (bitstream, 0, 12) ; 405 BitBufferWrite (bitstream, (partialFrame << 3) | (bytesShifted << 1), 4) ; 406 if (partialFrame) 407 BitBufferWrite (bitstream, numSamples, 32) ; 408 BitBufferWrite (bitstream, mixBits, 8) ; 409 BitBufferWrite (bitstream, mixRes, 8) ; 410 411 //Assert ((mode < 16) && (DENSHIFT_DEFAULT < 16)) ; 412 //Assert ((pbFactor < 8) && (numU < 32)) ; 413 //Assert ((pbFactor < 8) && (numV < 32)) ; 414 415 BitBufferWrite (bitstream, (mode << 4) | DENSHIFT_DEFAULT, 8) ; 416 BitBufferWrite (bitstream, (pbFactor << 5) | numU, 8) ; 417 for (indx = 0 ; indx < numU ; indx++) 418 BitBufferWrite (bitstream, coefsU [numU - 1][indx], 16) ; 419 420 BitBufferWrite (bitstream, (mode << 4) | DENSHIFT_DEFAULT, 8) ; 421 BitBufferWrite (bitstream, (pbFactor << 5) | numV, 8) ; 422 for (indx = 0 ; indx < numV ; indx++) 423 BitBufferWrite (bitstream, coefsV [numV - 1][indx], 16) ; 424 425 // if shift active, write the interleaved shift buffers 426 if (bytesShifted != 0) 427 { 428 uint32_t bitShift = bytesShifted * 8 ; 429 430 //Assert (bitShift <= 16) ; 431 432 for (indx = 0 ; indx < (numSamples * 2) ; indx += 2) 433 { 434 uint32_t shiftedVal ; 435 436 shiftedVal = ((uint32_t) p->mShiftBufferUV [indx + 0] << bitShift) | (uint32_t) p->mShiftBufferUV [indx + 1] ; 437 BitBufferWrite (bitstream, shiftedVal, bitShift * 2) ; 438 } 439 } 440 441 // run the dynamic predictor and lossless compression for the "left" channel 442 // - note: to avoid allocating more buffers, we're mixing and matching between the available buffers instead 443 // of only using "U" buffers for the U-channel and "V" buffers for the V-channel 444 if (mode == 0) 445 { 446 pc_block (p->mMixBufferU, p->mPredictorU, numSamples, coefsU [numU - 1], numU, chanBits, DENSHIFT_DEFAULT) ; 447 } 448 else 449 { 450 pc_block (p->mMixBufferU, p->mPredictorV, numSamples, coefsU [numU - 1], numU, chanBits, DENSHIFT_DEFAULT) ; 451 pc_block (p->mPredictorV, p->mPredictorU, numSamples, NULL, 31, chanBits, 0) ; 452 } 453 454 set_ag_params (&agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples, numSamples, MAX_RUN_DEFAULT) ; 455 status = dyn_comp (&agParams, p->mPredictorU, bitstream, numSamples, chanBits, &bits1) ; 456 RequireNoErr (status, goto Exit ;) ; 457 458 // run the dynamic predictor and lossless compression for the "right" channel 459 if (mode == 0) 460 { 461 pc_block (p->mMixBufferV, p->mPredictorV, numSamples, coefsV [numV - 1], numV, chanBits, DENSHIFT_DEFAULT) ; 462 } 463 else 464 { 465 pc_block (p->mMixBufferV, p->mPredictorU, numSamples, coefsV [numV - 1], numV, chanBits, DENSHIFT_DEFAULT) ; 466 pc_block (p->mPredictorU, p->mPredictorV, numSamples, NULL, 31, chanBits, 0) ; 467 } 468 469 set_ag_params (&agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples, numSamples, MAX_RUN_DEFAULT) ; 470 status = dyn_comp (&agParams, p->mPredictorV, bitstream, numSamples, chanBits, &bits2) ; 471 RequireNoErr (status, goto Exit ;) ; 472 473 /* if we happened to create a compressed packet that was actually bigger than an escape packet would be, 474 chuck it and do an escape packet 475 */ 476 minBits = BitBufferGetPosition (bitstream) - BitBufferGetPosition (&startBits) ; 477 if (minBits >= escapeBits) 478 { 479 *bitstream = startBits ; // reset bitstream state 480 doEscape = true ; 481 printf ("compressed frame too big: %u vs. %u \n", minBits, escapeBits) ; 482 } 483 } 484 485 if (doEscape == true) 486 { 487 /* escape */ 488 status = EncodeStereoEscape (p, bitstream, inputBuffer, stride, numSamples) ; 489 490#if VERBOSE_DEBUG 491 DebugMsg ("escape!: %u vs %u\n", minBits, escapeBits) ; 492#endif 493 } 494 495Exit: 496 return status ; 497} 498 499/* 500 EncodeStereoFast () 501 - encode a channel pair without the search loop for maximum possible speed 502*/ 503static int32_t 504EncodeStereoFast (ALAC_ENCODER *p, struct BitBuffer * bitstream, const int32_t * inputBuffer, uint32_t stride, uint32_t channelIndex, uint32_t numSamples) 505{ 506 BitBuffer startBits = *bitstream ; // squirrel away current bit position in case we decide to use escape hatch 507 AGParamRec agParams ; 508 uint32_t bits1, bits2 ; 509 int32_t mixBits, mixRes ; 510 uint32_t minBits, minBits1, minBits2 ; 511 uint32_t numU, numV ; 512 uint32_t mode ; 513 uint32_t pbFactor ; 514 uint32_t chanBits ; 515 uint8_t bytesShifted ; 516 SearchCoefs coefsU ; 517 SearchCoefs coefsV ; 518 uint32_t indx ; 519 uint8_t partialFrame ; 520 uint32_t escapeBits ; 521 bool doEscape ; 522 int32_t status ; 523 524 // make sure we handle this bit-depth before we get going 525 RequireAction ((p->mBitDepth == 16) || (p->mBitDepth == 20) || (p->mBitDepth == 24) || (p->mBitDepth == 32), return kALAC_ParamError ;) ; 526 527 // reload coefs pointers for this channel pair 528 // - note that, while you might think they should be re-initialized per block, retaining state across blocks 529 // actually results in better overall compression 530 // - strangely, re-using the same coefs for the different passes of the "mixRes" search loop instead of using 531 // different coefs for the different passes of "mixRes" results in even better compression 532 coefsU = (SearchCoefs) p->mCoefsU [channelIndex] ; 533 coefsV = (SearchCoefs) p->mCoefsV [channelIndex] ; 534 535 // matrix encoding adds an extra bit but 32-bit inputs cannot be matrixed b/c 33 is too many 536 // so enable 16-bit "shift off" and encode in 17-bit mode 537 // - in addition, 24-bit mode really improves with one byte shifted off 538 if (p->mBitDepth == 32) 539 bytesShifted = 2 ; 540 else if (p->mBitDepth >= 24) 541 bytesShifted = 1 ; 542 else 543 bytesShifted = 0 ; 544 545 chanBits = p->mBitDepth - (bytesShifted * 8) + 1 ; 546 547 // flag whether or not this is a partial frame 548 partialFrame = (numSamples == p->mFrameSize) ? 0 : 1 ; 549 550 // set up default encoding parameters for "fast" mode 551 mixBits = kDefaultMixBits ; 552 mixRes = kDefaultMixRes ; 553 numU = numV = kDefaultNumUV ; 554 mode = 0 ; 555 pbFactor = 4 ; 556 557 minBits = minBits1 = minBits2 = 1ul << 31 ; 558 559 // mix the stereo inputs with default mixBits/mixRes 560 switch (p->mBitDepth) 561 { 562 case 16: 563 mix16 (inputBuffer, stride, p->mMixBufferU, p->mMixBufferV, numSamples, mixBits, mixRes) ; 564 break ; 565 case 20: 566 mix20 (inputBuffer, stride, p->mMixBufferU, p->mMixBufferV, numSamples, mixBits, mixRes) ; 567 break ; 568 case 24: 569 // also extracts the shifted off bytes into the shift buffers 570 mix24 (inputBuffer, stride, p->mMixBufferU, p->mMixBufferV, numSamples, 571 mixBits, mixRes, p->mShiftBufferUV, bytesShifted) ; 572 break ; 573 case 32: 574 // also extracts the shifted off bytes into the shift buffers 575 mix32 (inputBuffer, stride, p->mMixBufferU, p->mMixBufferV, numSamples, 576 mixBits, mixRes, p->mShiftBufferUV, bytesShifted) ; 577 break ; 578 } 579 580 /* speculatively write the bitstream assuming the compressed version will be smaller */ 581 582 // write bitstream header and coefs 583 BitBufferWrite (bitstream, 0, 12) ; 584 BitBufferWrite (bitstream, (partialFrame << 3) | (bytesShifted << 1), 4) ; 585 if (partialFrame) 586 BitBufferWrite (bitstream, numSamples, 32) ; 587 BitBufferWrite (bitstream, mixBits, 8) ; 588 BitBufferWrite (bitstream, mixRes, 8) ; 589 590 //Assert ((mode < 16) && (DENSHIFT_DEFAULT < 16)) ; 591 //Assert ((pbFactor < 8) && (numU < 32)) ; 592 //Assert ((pbFactor < 8) && (numV < 32)) ; 593 594 BitBufferWrite (bitstream, (mode << 4) | DENSHIFT_DEFAULT, 8) ; 595 BitBufferWrite (bitstream, (pbFactor << 5) | numU, 8) ; 596 for (indx = 0 ; indx < numU ; indx++) 597 BitBufferWrite (bitstream, coefsU [numU - 1][indx], 16) ; 598 599 BitBufferWrite (bitstream, (mode << 4) | DENSHIFT_DEFAULT, 8) ; 600 BitBufferWrite (bitstream, (pbFactor << 5) | numV, 8) ; 601 for (indx = 0 ; indx < numV ; indx++) 602 BitBufferWrite (bitstream, coefsV [numV - 1][indx], 16) ; 603 604 // if shift active, write the interleaved shift buffers 605 if (bytesShifted != 0) 606 { 607 uint32_t bitShift = bytesShifted * 8 ; 608 609 //Assert (bitShift <= 16) ; 610 611 for (indx = 0 ; indx < (numSamples * 2) ; indx += 2) 612 { 613 uint32_t shiftedVal ; 614 615 shiftedVal = ((uint32_t) p->mShiftBufferUV [indx + 0] << bitShift) | (uint32_t) p->mShiftBufferUV [indx + 1] ; 616 BitBufferWrite (bitstream, shiftedVal, bitShift * 2) ; 617 } 618 } 619 620 // run the dynamic predictor and lossless compression for the "left" channel 621 // - note: we always use mode 0 in the "fast" path so we don't need the code for mode != 0 622 pc_block (p->mMixBufferU, p->mPredictorU, numSamples, coefsU [numU - 1], numU, chanBits, DENSHIFT_DEFAULT) ; 623 624 set_ag_params (&agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples, numSamples, MAX_RUN_DEFAULT) ; 625 status = dyn_comp (&agParams, p->mPredictorU, bitstream, numSamples, chanBits, &bits1) ; 626 RequireNoErr (status, goto Exit ;) ; 627 628 // run the dynamic predictor and lossless compression for the "right" channel 629 pc_block (p->mMixBufferV, p->mPredictorV, numSamples, coefsV [numV - 1], numV, chanBits, DENSHIFT_DEFAULT) ; 630 631 set_ag_params (&agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples, numSamples, MAX_RUN_DEFAULT) ; 632 status = dyn_comp (&agParams, p->mPredictorV, bitstream, numSamples, chanBits, &bits2) ; 633 RequireNoErr (status, goto Exit ;) ; 634 635 // do bit requirement calculations 636 minBits1 = bits1 + (numU * sizeof (int16_t) * 8) ; 637 minBits2 = bits2 + (numV * sizeof (int16_t) * 8) ; 638 639 // test for escape hatch if best calculated compressed size turns out to be more than the input size 640 minBits = minBits1 + minBits2 + (8 /* mixRes/maxRes/etc. */ * 8) + ((partialFrame == true) ? 32 : 0) ; 641 if (bytesShifted != 0) 642 minBits += (numSamples * (bytesShifted * 8) * 2) ; 643 644 escapeBits = (numSamples * p->mBitDepth * 2) + ((partialFrame == true) ? 32 : 0) + (2 * 8) ; /* 2 common header bytes */ 645 646 doEscape = (minBits >= escapeBits) ? true : false ; 647 648 if (doEscape == false) 649 { 650 /* if we happened to create a compressed packet that was actually bigger than an escape packet would be, 651 chuck it and do an escape packet 652 */ 653 minBits = BitBufferGetPosition (bitstream) - BitBufferGetPosition (&startBits) ; 654 if (minBits >= escapeBits) 655 { 656 doEscape = true ; 657 printf ("compressed frame too big: %u vs. %u\n", minBits, escapeBits) ; 658 } 659 660 } 661 662 if (doEscape == true) 663 { 664 /* escape */ 665 666 // reset bitstream position since we speculatively wrote the compressed version 667 *bitstream = startBits ; 668 669 // write escape frame 670 status = EncodeStereoEscape (p, bitstream, inputBuffer, stride, numSamples) ; 671 672#if VERBOSE_DEBUG 673 DebugMsg ("escape!: %u vs %u\n", minBits, (numSamples * p->mBitDepth * 2)) ; 674#endif 675 } 676 677Exit: 678 return status ; 679} 680 681/* 682 EncodeStereoEscape () 683 - encode stereo escape frame 684*/ 685static int32_t 686EncodeStereoEscape (ALAC_ENCODER *p, struct BitBuffer * bitstream, const int32_t * inputBuffer, uint32_t stride, uint32_t numSamples) 687{ 688 uint8_t partialFrame ; 689 uint32_t indx ; 690 691 // flag whether or not this is a partial frame 692 partialFrame = (numSamples == p->mFrameSize) ? 0 : 1 ; 693 694 // write bitstream header 695 BitBufferWrite (bitstream, 0, 12) ; 696 BitBufferWrite (bitstream, (partialFrame << 3) | 1, 4) ; // LSB = 1 means "frame not compressed" 697 if (partialFrame) 698 BitBufferWrite (bitstream, numSamples, 32) ; 699 700 // just copy the input data to the output buffer 701 switch (p->mBitDepth) 702 { 703 case 16: 704 for (indx = 0 ; indx < (numSamples * stride) ; indx += stride) 705 { 706 BitBufferWrite (bitstream, inputBuffer [indx + 0] >> 16, 16) ; 707 BitBufferWrite (bitstream, inputBuffer [indx + 1] >> 16, 16) ; 708 } 709 break ; 710 case 20: 711 for (indx = 0 ; indx < (numSamples * stride) ; indx += stride) 712 { 713 BitBufferWrite (bitstream, inputBuffer [indx + 0] >> 12, 16) ; 714 BitBufferWrite (bitstream, inputBuffer [indx + 1] >> 12, 16) ; 715 } 716 break ; 717 case 24: 718 // mix24 () with mixres param = 0 means de-interleave so use it to simplify things 719 mix24 (inputBuffer, stride, p->mMixBufferU, p->mMixBufferV, numSamples, 0, 0, p->mShiftBufferUV, 0) ; 720 for (indx = 0 ; indx < numSamples ; indx++) 721 { 722 BitBufferWrite (bitstream, p->mMixBufferU [indx] >> 8, 24) ; 723 BitBufferWrite (bitstream, p->mMixBufferV [indx] >> 8, 24) ; 724 } 725 break ; 726 case 32: 727 for (indx = 0 ; indx < (numSamples * stride) ; indx += stride) 728 { 729 BitBufferWrite (bitstream, inputBuffer [indx + 0], 32) ; 730 BitBufferWrite (bitstream, inputBuffer [indx + 1], 32) ; 731 } 732 break ; 733 } 734 735 return ALAC_noErr ; 736} 737 738/* 739 EncodeMono () 740 - encode a mono input buffer 741*/ 742static int32_t 743EncodeMono (ALAC_ENCODER *p, struct BitBuffer * bitstream, const int32_t * inputBuffer, uint32_t stride, uint32_t channelIndex, uint32_t numSamples) 744{ 745 BitBuffer startBits = *bitstream ; // squirrel away copy of current state in case we need to go back and do an escape packet 746 AGParamRec agParams ; 747 uint32_t bits1 ; 748 uint32_t numU ; 749 SearchCoefs coefsU ; 750 uint32_t dilate ; 751 uint32_t minBits, bestU ; 752 uint32_t minU, maxU ; 753 uint32_t indx, indx2 ; 754 uint8_t bytesShifted ; 755 uint32_t shift ; 756 uint32_t mask ; 757 uint32_t chanBits ; 758 uint8_t pbFactor ; 759 uint8_t partialFrame ; 760 uint32_t escapeBits ; 761 bool doEscape ; 762 int32_t status = ALAC_noErr ; 763 764 765 // make sure we handle this bit-depth before we get going 766 RequireAction ((p->mBitDepth == 16) || (p->mBitDepth == 20) || (p->mBitDepth == 24) || (p->mBitDepth == 32), return kALAC_ParamError ;) ; 767 768 // reload coefs array from previous frame 769 coefsU = (SearchCoefs) p->mCoefsU [channelIndex] ; 770 771 // pick bit depth for actual encoding 772 // - we lop off the lower byte (s) for 24-/32-bit encodings 773 if (p->mBitDepth == 32) 774 bytesShifted = 2 ; 775 else if (p->mBitDepth >= 24) 776 bytesShifted = 1 ; 777 else 778 bytesShifted = 0 ; 779 780 shift = bytesShifted * 8 ; 781 mask = (1ul << shift) - 1 ; 782 chanBits = p->mBitDepth - (bytesShifted * 8) ; 783 784 // flag whether or not this is a partial frame 785 partialFrame = (numSamples == p->mFrameSize) ? 0 : 1 ; 786 787 // convert N-bit data to 32-bit for predictor 788 switch (p->mBitDepth) 789 { 790 case 16: 791 // convert 16-bit data to 32-bit for predictor 792 for (indx = 0, indx2 = 0 ; indx < numSamples ; indx++, indx2 += stride) 793 p->mMixBufferU [indx] = inputBuffer [indx2] >> 16 ; 794 break ; 795 796 case 20: 797 // convert 20-bit data to 32-bit for predictor 798 for (indx = 0, indx2 = 0 ; indx < numSamples ; indx++, indx2 += stride) 799 p->mMixBufferU [indx] = inputBuffer [indx2] >> 12 ; 800 break ; 801 case 24: 802 // convert 24-bit data to 32-bit for the predictor and extract the shifted off byte (s) 803 for (indx = 0, indx2 = 0 ; indx < numSamples ; indx++, indx2 += stride) 804 { 805 p->mMixBufferU [indx] = inputBuffer [indx2] >> 8 ; 806 p->mShiftBufferUV [indx] = (uint16_t) (p->mMixBufferU [indx] & mask) ; 807 p->mMixBufferU [indx] >>= shift ; 808 } 809 810 break ; 811 case 32: 812 // just copy the 32-bit input data for the predictor and extract the shifted off byte (s) 813 for (indx = 0, indx2 = 0 ; indx < numSamples ; indx++, indx2 += stride) 814 { 815 p->mShiftBufferUV [indx] = (uint16_t) (inputBuffer [indx2] & mask) ; 816 p->mMixBufferU [indx] = inputBuffer [indx2] >> shift ; 817 } 818 break ; 819 } 820 821 // brute-force encode optimization loop (implied "encode depth" of 0 if comparing to cmd line tool) 822 // - run over variations of the encoding params to find the best choice 823 minU = 4 ; 824 maxU = 8 ; 825 minBits = 1ul << 31 ; 826 pbFactor = 4 ; 827 828 bestU = minU ; 829 830 for (numU = minU ; numU <= maxU ; numU += 4) 831 { 832 BitBuffer workBits ; 833 uint32_t numBits ; 834 835 BitBufferInit (&workBits, p->mWorkBuffer, p->mMaxOutputBytes) ; 836 837 dilate = 32 ; 838 for (uint32_t converge = 0 ; converge < 7 ; converge++) 839 pc_block (p->mMixBufferU, p->mPredictorU, numSamples / dilate, coefsU [numU - 1], numU, chanBits, DENSHIFT_DEFAULT) ; 840 841 dilate = 8 ; 842 pc_block (p->mMixBufferU, p->mPredictorU, numSamples / dilate, coefsU [numU - 1], numU, chanBits, DENSHIFT_DEFAULT) ; 843 844 set_ag_params (&agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples / dilate, numSamples / dilate, MAX_RUN_DEFAULT) ; 845 status = dyn_comp (&agParams, p->mPredictorU, &workBits, numSamples / dilate, chanBits, &bits1) ; 846 RequireNoErr (status, goto Exit ;) ; 847 848 numBits = (dilate * bits1) + (16 * numU) ; 849 if (numBits < minBits) 850 { 851 bestU = numU ; 852 minBits = numBits ; 853 } 854 } 855 856 // test for escape hatch if best calculated compressed size turns out to be more than the input size 857 // - first, add bits for the header bytes mixRes/maxRes/shiftU/filterU 858 minBits += (4 /* mixRes/maxRes/etc. */ * 8) + ((partialFrame == true) ? 32 : 0) ; 859 if (bytesShifted != 0) 860 minBits += (numSamples * (bytesShifted * 8)) ; 861 862 escapeBits = (numSamples * p->mBitDepth) + ((partialFrame == true) ? 32 : 0) + (2 * 8) ; /* 2 common header bytes */ 863 864 doEscape = (minBits >= escapeBits) ? true : false ; 865 866 if (doEscape == false) 867 { 868 // write bitstream header 869 BitBufferWrite (bitstream, 0, 12) ; 870 BitBufferWrite (bitstream, (partialFrame << 3) | (bytesShifted << 1), 4) ; 871 if (partialFrame) 872 BitBufferWrite (bitstream, numSamples, 32) ; 873 BitBufferWrite (bitstream, 0, 16) ; // mixBits = mixRes = 0 874 875 // write the params and predictor coefs 876 numU = bestU ; 877 BitBufferWrite (bitstream, (0 << 4) | DENSHIFT_DEFAULT, 8) ; // modeU = 0 878 BitBufferWrite (bitstream, (pbFactor << 5) | numU, 8) ; 879 for (indx = 0 ; indx < numU ; indx++) 880 BitBufferWrite (bitstream, coefsU [numU-1][indx], 16) ; 881 882 // if shift active, write the interleaved shift buffers 883 if (bytesShifted != 0) 884 { 885 for (indx = 0 ; indx < numSamples ; indx++) 886 BitBufferWrite (bitstream, p->mShiftBufferUV [indx], shift) ; 887 } 888 889 // run the dynamic predictor with the best result 890 pc_block (p->mMixBufferU, p->mPredictorU, numSamples, coefsU [numU-1], numU, chanBits, DENSHIFT_DEFAULT) ; 891 892 // do lossless compression 893 set_standard_ag_params (&agParams, numSamples, numSamples) ; 894 status = dyn_comp (&agParams, p->mPredictorU, bitstream, numSamples, chanBits, &bits1) ; 895 //AssertNoErr (status) ; 896 897 898 /* if we happened to create a compressed packet that was actually bigger than an escape packet would be, 899 chuck it and do an escape packet 900 */ 901 minBits = BitBufferGetPosition (bitstream) - BitBufferGetPosition (&startBits) ; 902 if (minBits >= escapeBits) 903 { 904 *bitstream = startBits ; // reset bitstream state 905 doEscape = true ; 906 printf ("compressed frame too big: %u vs. %u\n", minBits, escapeBits) ; 907 } 908 } 909 910 if (doEscape == true) 911 { 912 // write bitstream header and coefs 913 BitBufferWrite (bitstream, 0, 12) ; 914 BitBufferWrite (bitstream, (partialFrame << 3) | 1, 4) ; // LSB = 1 means "frame not compressed" 915 if (partialFrame) 916 BitBufferWrite (bitstream, numSamples, 32) ; 917 918 // just copy the input data to the output buffer 919 switch (p->mBitDepth) 920 { 921 case 16: 922 for (indx = 0 ; indx < (numSamples * stride) ; indx += stride) 923 BitBufferWrite (bitstream, inputBuffer [indx] >> 16, 16) ; 924 break ; 925 case 20: 926 // convert 20-bit data to 32-bit for simplicity 927 for (indx = 0 ; indx < (numSamples * stride) ; indx += stride) 928 BitBufferWrite (bitstream, inputBuffer [indx] >> 12, 20) ; 929 break ; 930 case 24: 931 // convert 24-bit data to 32-bit for simplicity 932 for (indx = 0, indx2 = 0 ; indx < numSamples ; indx++, indx2 += stride) 933 { 934 p->mMixBufferU [indx] = inputBuffer [indx2] >> 8 ; 935 BitBufferWrite (bitstream, p->mMixBufferU [indx], 24) ; 936 } 937 break ; 938 case 32: 939 for (indx = 0 ; indx < (numSamples * stride) ; indx += stride) 940 BitBufferWrite (bitstream, inputBuffer [indx], 32) ; 941 break ; 942 } 943#if VERBOSE_DEBUG 944 DebugMsg ("escape!: %u vs %u\n", minBits, (numSamples * p->mBitDepth)) ; 945#endif 946 } 947 948Exit: 949 return status ; 950} 951 952#if PRAGMA_MARK 953#pragma mark - 954#endif 955 956/* 957 Encode () 958 - encode the next block of samples 959*/ 960int32_t 961alac_encode (ALAC_ENCODER *p, uint32_t numSamples, 962 const int32_t * theReadBuffer, unsigned char * theWriteBuffer, uint32_t * ioNumBytes) 963{ 964 uint32_t outputSize ; 965 BitBuffer bitstream ; 966 int32_t status ; 967 uint32_t numChannels = p->mNumChannels ; 968 969 // make sure we handle this bit-depth before we get going 970 RequireAction ((p->mBitDepth == 16) || (p->mBitDepth == 20) || (p->mBitDepth == 24) || (p->mBitDepth == 32), return kALAC_ParamError ;) ; 971 972 // create a bit buffer structure pointing to our output buffer 973 BitBufferInit (&bitstream, theWriteBuffer, p->mMaxOutputBytes) ; 974 975 if (numChannels == 2) 976 { 977 // add 3-bit frame start tag ID_CPE = channel pair & 4-bit element instance tag = 0 978 BitBufferWrite (&bitstream, ID_CPE, 3) ; 979 BitBufferWrite (&bitstream, 0, 4) ; 980 981 // encode stereo input buffer 982 if (p->mFastMode == false) 983 status = EncodeStereo (p, &bitstream, theReadBuffer, 2, 0, numSamples) ; 984 else 985 status = EncodeStereoFast (p, &bitstream, theReadBuffer, 2, 0, numSamples) ; 986 RequireNoErr (status, goto Exit ;) ; 987 } 988 else if (numChannels == 1) 989 { 990 // add 3-bit frame start tag ID_SCE = mono channel & 4-bit element instance tag = 0 991 BitBufferWrite (&bitstream, ID_SCE, 3) ; 992 BitBufferWrite (&bitstream, 0, 4) ; 993 994 // encode mono input buffer 995 status = EncodeMono (p, &bitstream, theReadBuffer, 1, 0, numSamples) ; 996 RequireNoErr (status, goto Exit ;) ; 997 } 998 else 999 { 1000 const int32_t * inputBuffer ; 1001 uint32_t tag ; 1002 uint32_t channelIndex ; 1003 uint8_t stereoElementTag ; 1004 uint8_t monoElementTag ; 1005 uint8_t lfeElementTag ; 1006 1007 inputBuffer = theReadBuffer ; 1008 1009 stereoElementTag = 0 ; 1010 monoElementTag = 0 ; 1011 lfeElementTag = 0 ; 1012 1013 for (channelIndex = 0 ; channelIndex < numChannels ;) 1014 { 1015 tag = (sChannelMaps [numChannels - 1] & (0x7ul << (channelIndex * 3))) >> (channelIndex * 3) ; 1016 1017 BitBufferWrite (&bitstream, tag, 3) ; 1018 switch (tag) 1019 { 1020 case ID_SCE: 1021 // mono 1022 BitBufferWrite (&bitstream, monoElementTag, 4) ; 1023 1024 status = EncodeMono (p, &bitstream, inputBuffer, numChannels, channelIndex, numSamples) ; 1025 1026 inputBuffer += 1 ; 1027 channelIndex++ ; 1028 monoElementTag++ ; 1029 break ; 1030 1031 case ID_CPE: 1032 // stereo 1033 BitBufferWrite (&bitstream, stereoElementTag, 4) ; 1034 1035 status = EncodeStereo (p, &bitstream, inputBuffer, numChannels, channelIndex, numSamples) ; 1036 1037 inputBuffer += 2 ; 1038 channelIndex += 2 ; 1039 stereoElementTag++ ; 1040 break ; 1041 1042 case ID_LFE: 1043 // LFE channel (subwoofer) 1044 BitBufferWrite (&bitstream, lfeElementTag, 4) ; 1045 1046 status = EncodeMono (p, &bitstream, inputBuffer, numChannels, channelIndex, numSamples) ; 1047 1048 inputBuffer += 1 ; 1049 channelIndex++ ; 1050 lfeElementTag++ ; 1051 break ; 1052 1053 default: 1054 printf ("That ain't right! (%u)\n", tag) ; 1055 status = kALAC_ParamError ; 1056 goto Exit ; 1057 } 1058 1059 RequireNoErr (status, goto Exit ;) ; 1060 } 1061 } 1062 1063#if VERBOSE_DEBUG 1064{ 1065 // if there is room left in the output buffer, add some random fill data to test decoder 1066 int32_t bitsLeft ; 1067 int32_t bytesLeft ; 1068 1069 bitsLeft = BitBufferGetPosition (&bitstream) - 3 ; // - 3 for ID_END tag 1070 bytesLeft = bitstream.byteSize - ((bitsLeft + 7) / 8) ; 1071 1072 if ((bytesLeft > 20) && ((bytesLeft & 0x4u) != 0)) 1073 AddFiller (&bitstream, bytesLeft) ; 1074} 1075#endif 1076 1077 // add 3-bit frame end tag: ID_END 1078 BitBufferWrite (&bitstream, ID_END, 3) ; 1079 1080 // byte-align the output data 1081 BitBufferByteAlign (&bitstream, true) ; 1082 1083 outputSize = BitBufferGetPosition (&bitstream) / 8 ; 1084 //Assert (outputSize <= mMaxOutputBytes) ; 1085 1086 1087 // all good, let iTunes know what happened and remember the total number of input sample frames 1088 *ioNumBytes = outputSize ; 1089 //mEncodedFrames += encodeMsg->numInputSamples ; 1090 1091 // gather encoding stats 1092 p->mTotalBytesGenerated += outputSize ; 1093 p->mMaxFrameBytes = MAX (p->mMaxFrameBytes, outputSize) ; 1094 1095 status = ALAC_noErr ; 1096 1097Exit: 1098 return status ; 1099} 1100 1101 1102#if PRAGMA_MARK 1103#pragma mark - 1104#endif 1105 1106/* 1107 GetConfig () 1108*/ 1109void 1110GetConfig (ALAC_ENCODER *p, ALACSpecificConfig * config) 1111{ 1112 config->frameLength = Swap32NtoB (p->mFrameSize) ; 1113 config->compatibleVersion = (uint8_t) kALACCompatibleVersion ; 1114 config->bitDepth = (uint8_t) p->mBitDepth ; 1115 config->pb = (uint8_t) PB0 ; 1116 config->kb = (uint8_t) KB0 ; 1117 config->mb = (uint8_t) MB0 ; 1118 config->numChannels = (uint8_t) p->mNumChannels ; 1119 config->maxRun = Swap16NtoB ((uint16_t) MAX_RUN_DEFAULT) ; 1120 config->maxFrameBytes = Swap32NtoB (p->mMaxFrameBytes) ; 1121 config->avgBitRate = Swap32NtoB (p->mAvgBitRate) ; 1122 config->sampleRate = Swap32NtoB (p->mOutputSampleRate) ; 1123} 1124 1125uint32_t 1126alac_get_magic_cookie_size (uint32_t inNumChannels) 1127{ 1128 if (inNumChannels > 2) 1129 { 1130 return sizeof (ALACSpecificConfig) + kChannelAtomSize + sizeof (ALACAudioChannelLayout) ; 1131 } 1132 else 1133 { 1134 return sizeof (ALACSpecificConfig) ; 1135 } 1136} 1137 1138void 1139alac_get_magic_cookie (ALAC_ENCODER *p, void * outCookie, uint32_t * ioSize) 1140{ 1141 ALACSpecificConfig theConfig = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } ; 1142 ALACAudioChannelLayout theChannelLayout = { 0, 0, 0 } ; 1143 uint8_t theChannelAtom [kChannelAtomSize] = { 0, 0, 0, 0, 'c', 'h', 'a', 'n', 0, 0, 0, 0 } ; 1144 uint32_t theCookieSize = sizeof (ALACSpecificConfig) ; 1145 uint8_t * theCookiePointer = (uint8_t *) outCookie ; 1146 1147 GetConfig (p, &theConfig) ; 1148 if (theConfig.numChannels > 2) 1149 { 1150 theChannelLayout.mChannelLayoutTag = Swap32NtoB (ALACChannelLayoutTags [theConfig.numChannels - 1]) ; 1151 theCookieSize += (sizeof (ALACAudioChannelLayout) + kChannelAtomSize) ; 1152 } 1153 if (*ioSize >= theCookieSize) 1154 { 1155 memcpy (theCookiePointer, &theConfig, sizeof (ALACSpecificConfig)) ; 1156 theChannelAtom [3] = (sizeof (ALACAudioChannelLayout) + kChannelAtomSize) ; 1157 if (theConfig.numChannels > 2) 1158 { 1159 theCookiePointer += sizeof (ALACSpecificConfig) ; 1160 memcpy (theCookiePointer, theChannelAtom, kChannelAtomSize) ; 1161 theCookiePointer += kChannelAtomSize ; 1162 memcpy (theCookiePointer, &theChannelLayout, sizeof (ALACAudioChannelLayout)) ; 1163 } 1164 *ioSize = theCookieSize ; 1165 } 1166 else 1167 { 1168 *ioSize = 0 ; // no incomplete cookies 1169 } 1170} 1171 1172/* 1173 alac_encoder_init () 1174 - initialize the encoder component with the current config 1175*/ 1176int32_t 1177alac_encoder_init (ALAC_ENCODER *p, uint32_t samplerate, uint32_t channels, uint32_t format_flags, uint32_t frameSize) 1178{ 1179 int32_t status ; 1180 1181 p->mFrameSize = (frameSize > 0 && frameSize <= ALAC_FRAME_LENGTH) ? frameSize : ALAC_FRAME_LENGTH ; 1182 1183 p->mOutputSampleRate = samplerate ; 1184 p->mNumChannels = channels ; 1185 switch (format_flags) 1186 { 1187 case 1: 1188 p->mBitDepth = 16 ; 1189 break ; 1190 case 2: 1191 p->mBitDepth = 20 ; 1192 break ; 1193 case 3: 1194 p->mBitDepth = 24 ; 1195 break ; 1196 case 4: 1197 p->mBitDepth = 32 ; 1198 break ; 1199 default: 1200 break ; 1201 } 1202 1203 // set up default encoding parameters and state 1204 // - note: mFrameSize is set in the constructor or via alac_set_frame_size () which must be called before this routine 1205 for (uint32_t indx = 0 ; indx < kALACMaxChannels ; indx++) 1206 p->mLastMixRes [indx] = kDefaultMixRes ; 1207 1208 // the maximum output frame size can be no bigger than (samplesPerBlock * numChannels * ((10 + sampleSize)/8) + 1) 1209 // but note that this can be bigger than the input size! 1210 // - since we don't yet know what our input format will be, use our max allowed sample size in the calculation 1211 p->mMaxOutputBytes = p->mFrameSize * p->mNumChannels * ((10 + kMaxSampleSize) / 8) + 1 ; 1212 1213 status = ALAC_noErr ; 1214 1215 // initialize coefs arrays once b/c retaining state across blocks actually improves the encode ratio 1216 for (int32_t channel = 0 ; channel < (int32_t) p->mNumChannels ; channel++) 1217 { 1218 for (int32_t search = 0 ; search < kALACMaxSearches ; search++) 1219 { 1220 init_coefs (p->mCoefsU [channel][search], DENSHIFT_DEFAULT, kALACMaxCoefs) ; 1221 init_coefs (p->mCoefsV [channel][search], DENSHIFT_DEFAULT, kALACMaxCoefs) ; 1222 } 1223 } 1224 1225 return status ; 1226} 1227 1228/* 1229 alac_get_source_format () 1230 - given the input format, return one of our supported formats 1231*/ 1232void 1233alac_get_source_format (ALAC_ENCODER *p, const AudioFormatDescription * source, AudioFormatDescription * output) 1234{ 1235 (void) output ; 1236 // default is 16-bit native endian 1237 // - note: for float input we assume that's coming from one of our decoders (mp3, aac) so it only makes sense 1238 // to encode to 16-bit since the source was lossy in the first place 1239 // - note: if not a supported bit depth, find the closest supported bit depth to the input one 1240 if ((source->mFormatID != kALACFormatLinearPCM) || ((source->mFormatFlags & kALACFormatFlagIsFloat) != 0) || (source->mBitsPerChannel <= 16)) 1241 p->mBitDepth = 16 ; 1242 else if (source->mBitsPerChannel <= 20) 1243 p->mBitDepth = 20 ; 1244 else if (source->mBitsPerChannel <= 24) 1245 p->mBitDepth = 24 ; 1246 else 1247 p->mBitDepth = 32 ; 1248 1249 // we support 16/20/24/32-bit integer data at any sample rate and our target number of channels 1250 // and sample rate were specified when we were configured 1251 /* 1252 MakeUncompressedAudioFormat (mNumChannels, (float) mOutputSampleRate, mBitDepth, kAudioFormatFlagsNativeIntegerPacked, output) ; 1253 */ 1254} 1255 1256 1257 1258#if VERBOSE_DEBUG 1259 1260#if PRAGMA_MARK 1261#pragma mark - 1262#endif 1263 1264/* 1265 AddFiller () 1266 - add fill and data stream elements to the bitstream to test the decoder 1267*/ 1268static void AddFiller (BitBuffer * bits, int32_t numBytes) 1269{ 1270 uint8_t tag ; 1271 int32_t indx ; 1272 1273 // out of lameness, subtract 6 bytes to deal with header + alignment as required for fill/data elements 1274 numBytes -= 6 ; 1275 if (numBytes <= 0) 1276 return ; 1277 1278 // randomly pick Fill or Data Stream Element based on numBytes requested 1279 tag = (numBytes & 0x8) ? ID_FIL : ID_DSE ; 1280 1281 BitBufferWrite (bits, tag, 3) ; 1282 if (tag == ID_FIL) 1283 { 1284 // can't write more than 269 bytes in a fill element 1285 numBytes = (numBytes > 269) ? 269 : numBytes ; 1286 1287 // fill element = 4-bit size unless >= 15 then 4-bit size + 8-bit extension size 1288 if (numBytes >= 15) 1289 { 1290 uint16_t extensionSize ; 1291 1292 BitBufferWrite (bits, 15, 4) ; 1293 1294 // 8-bit extension count field is "extra + 1" which is weird but I didn't define the syntax 1295 // - otherwise, there's no way to represent 15 1296 // - for example, to really mean 15 bytes you must encode extensionSize = 1 1297 // - why it's not like data stream elements I have no idea 1298 extensionSize = (numBytes - 15) + 1 ; 1299 //Assert (extensionSize <= 255) ; 1300 BitBufferWrite (bits, extensionSize, 8) ; 1301 } 1302 else 1303 BitBufferWrite (bits, numBytes, 4) ; 1304 1305 BitBufferWrite (bits, 0x10, 8) ; // extension_type = FILL_DATA = b0001 or'ed with fill_nibble = b0000 1306 for (indx = 0 ; indx < (numBytes - 1) ; indx++) 1307 BitBufferWrite (bits, 0xa5, 8) ; // fill_byte = b10100101 = 0xa5 1308 } 1309 else 1310 { 1311 // can't write more than 510 bytes in a data stream element 1312 numBytes = (numBytes > 510) ? 510 : numBytes ; 1313 1314 BitBufferWrite (bits, 0, 4) ; // element instance tag 1315 BitBufferWrite (bits, 1, 1) ; // byte-align flag = true 1316 1317 // data stream element = 8-bit size unless >= 255 then 8-bit size + 8-bit size 1318 if (numBytes >= 255) 1319 { 1320 BitBufferWrite (bits, 255, 8) ; 1321 BitBufferWrite (bits, numBytes - 255, 8) ; 1322 } 1323 else 1324 BitBufferWrite (bits, numBytes, 8) ; 1325 1326 BitBufferByteAlign (bits, true) ; // byte-align with zeros 1327 1328 for (indx = 0 ; indx < numBytes ; indx++) 1329 BitBufferWrite (bits, 0x5a, 8) ; 1330 } 1331} 1332 1333#endif /* VERBOSE_DEBUG */ 1334