1/* 2 * AAC encoder 3 * Copyright (C) 2008 Konstantin Shishkov 4 * 5 * This file is part of FFmpeg. 6 * 7 * FFmpeg is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU Lesser General Public 9 * License as published by the Free Software Foundation; either 10 * version 2.1 of the License, or (at your option) any later version. 11 * 12 * FFmpeg is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Lesser General Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General Public 18 * License along with FFmpeg; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20 */ 21 22/** 23 * @file 24 * AAC encoder 25 */ 26 27/*********************************** 28 * TODOs: 29 * add sane pulse detection 30 ***********************************/ 31#include <float.h> 32 33#include "libavutil/channel_layout.h" 34#include "libavutil/libm.h" 35#include "libavutil/float_dsp.h" 36#include "libavutil/opt.h" 37#include "avcodec.h" 38#include "codec_internal.h" 39#include "encode.h" 40#include "put_bits.h" 41#include "mpeg4audio.h" 42#include "sinewin.h" 43#include "profiles.h" 44#include "version.h" 45 46#include "aac.h" 47#include "aactab.h" 48#include "aacenc.h" 49#include "aacenctab.h" 50#include "aacenc_utils.h" 51 52#include "psymodel.h" 53 54static void put_pce(PutBitContext *pb, AVCodecContext *avctx) 55{ 56 int i, j; 57 AACEncContext *s = avctx->priv_data; 58 AACPCEInfo *pce = &s->pce; 59 const int bitexact = avctx->flags & AV_CODEC_FLAG_BITEXACT; 60 const char *aux_data = bitexact ? "Lavc" : LIBAVCODEC_IDENT; 61 62 put_bits(pb, 4, 0); 63 64 put_bits(pb, 2, avctx->profile); 65 put_bits(pb, 4, s->samplerate_index); 66 67 put_bits(pb, 4, pce->num_ele[0]); /* Front */ 68 put_bits(pb, 4, pce->num_ele[1]); /* Side */ 69 put_bits(pb, 4, pce->num_ele[2]); /* Back */ 70 put_bits(pb, 2, pce->num_ele[3]); /* LFE */ 71 put_bits(pb, 3, 0); /* Assoc data */ 72 put_bits(pb, 4, 0); /* CCs */ 73 74 put_bits(pb, 1, 0); /* Stereo mixdown */ 75 put_bits(pb, 1, 0); /* Mono mixdown */ 76 put_bits(pb, 1, 0); /* Something else */ 77 78 for (i = 0; i < 4; i++) { 79 for (j = 0; j < pce->num_ele[i]; j++) { 80 if (i < 3) 81 put_bits(pb, 1, pce->pairing[i][j]); 82 put_bits(pb, 4, pce->index[i][j]); 83 } 84 } 85 86 align_put_bits(pb); 87 put_bits(pb, 8, strlen(aux_data)); 88 ff_put_string(pb, aux_data, 0); 89} 90 91/** 92 * Make AAC audio config object. 93 * @see 1.6.2.1 "Syntax - AudioSpecificConfig" 94 */ 95static int put_audio_specific_config(AVCodecContext *avctx) 96{ 97 PutBitContext pb; 98 AACEncContext *s = avctx->priv_data; 99 int channels = (!s->needs_pce)*(s->channels - (s->channels == 8 ? 1 : 0)); 100 const int max_size = 32; 101 102 avctx->extradata = av_mallocz(max_size); 103 if (!avctx->extradata) 104 return AVERROR(ENOMEM); 105 106 init_put_bits(&pb, avctx->extradata, max_size); 107 put_bits(&pb, 5, s->profile+1); //profile 108 put_bits(&pb, 4, s->samplerate_index); //sample rate index 109 put_bits(&pb, 4, channels); 110 //GASpecificConfig 111 put_bits(&pb, 1, 0); //frame length - 1024 samples 112 put_bits(&pb, 1, 0); //does not depend on core coder 113 put_bits(&pb, 1, 0); //is not extension 114 if (s->needs_pce) 115 put_pce(&pb, avctx); 116 117 //Explicitly Mark SBR absent 118 put_bits(&pb, 11, 0x2b7); //sync extension 119 put_bits(&pb, 5, AOT_SBR); 120 put_bits(&pb, 1, 0); 121 flush_put_bits(&pb); 122 avctx->extradata_size = put_bytes_output(&pb); 123 124 return 0; 125} 126 127void ff_quantize_band_cost_cache_init(struct AACEncContext *s) 128{ 129 ++s->quantize_band_cost_cache_generation; 130 if (s->quantize_band_cost_cache_generation == 0) { 131 memset(s->quantize_band_cost_cache, 0, sizeof(s->quantize_band_cost_cache)); 132 s->quantize_band_cost_cache_generation = 1; 133 } 134} 135 136#define WINDOW_FUNC(type) \ 137static void apply_ ##type ##_window(AVFloatDSPContext *fdsp, \ 138 SingleChannelElement *sce, \ 139 const float *audio) 140 141WINDOW_FUNC(only_long) 142{ 143 const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024; 144 const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024; 145 float *out = sce->ret_buf; 146 147 fdsp->vector_fmul (out, audio, lwindow, 1024); 148 fdsp->vector_fmul_reverse(out + 1024, audio + 1024, pwindow, 1024); 149} 150 151WINDOW_FUNC(long_start) 152{ 153 const float *lwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024; 154 const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128; 155 float *out = sce->ret_buf; 156 157 fdsp->vector_fmul(out, audio, lwindow, 1024); 158 memcpy(out + 1024, audio + 1024, sizeof(out[0]) * 448); 159 fdsp->vector_fmul_reverse(out + 1024 + 448, audio + 1024 + 448, swindow, 128); 160 memset(out + 1024 + 576, 0, sizeof(out[0]) * 448); 161} 162 163WINDOW_FUNC(long_stop) 164{ 165 const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024; 166 const float *swindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128; 167 float *out = sce->ret_buf; 168 169 memset(out, 0, sizeof(out[0]) * 448); 170 fdsp->vector_fmul(out + 448, audio + 448, swindow, 128); 171 memcpy(out + 576, audio + 576, sizeof(out[0]) * 448); 172 fdsp->vector_fmul_reverse(out + 1024, audio + 1024, lwindow, 1024); 173} 174 175WINDOW_FUNC(eight_short) 176{ 177 const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128; 178 const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128; 179 const float *in = audio + 448; 180 float *out = sce->ret_buf; 181 int w; 182 183 for (w = 0; w < 8; w++) { 184 fdsp->vector_fmul (out, in, w ? pwindow : swindow, 128); 185 out += 128; 186 in += 128; 187 fdsp->vector_fmul_reverse(out, in, swindow, 128); 188 out += 128; 189 } 190} 191 192static void (*const apply_window[4])(AVFloatDSPContext *fdsp, 193 SingleChannelElement *sce, 194 const float *audio) = { 195 [ONLY_LONG_SEQUENCE] = apply_only_long_window, 196 [LONG_START_SEQUENCE] = apply_long_start_window, 197 [EIGHT_SHORT_SEQUENCE] = apply_eight_short_window, 198 [LONG_STOP_SEQUENCE] = apply_long_stop_window 199}; 200 201static void apply_window_and_mdct(AACEncContext *s, SingleChannelElement *sce, 202 float *audio) 203{ 204 int i; 205 const float *output = sce->ret_buf; 206 207 apply_window[sce->ics.window_sequence[0]](s->fdsp, sce, audio); 208 209 if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) 210 s->mdct1024.mdct_calc(&s->mdct1024, sce->coeffs, output); 211 else 212 for (i = 0; i < 1024; i += 128) 213 s->mdct128.mdct_calc(&s->mdct128, &sce->coeffs[i], output + i*2); 214 memcpy(audio, audio + 1024, sizeof(audio[0]) * 1024); 215 memcpy(sce->pcoeffs, sce->coeffs, sizeof(sce->pcoeffs)); 216} 217 218/** 219 * Encode ics_info element. 220 * @see Table 4.6 (syntax of ics_info) 221 */ 222static void put_ics_info(AACEncContext *s, IndividualChannelStream *info) 223{ 224 int w; 225 226 put_bits(&s->pb, 1, 0); // ics_reserved bit 227 put_bits(&s->pb, 2, info->window_sequence[0]); 228 put_bits(&s->pb, 1, info->use_kb_window[0]); 229 if (info->window_sequence[0] != EIGHT_SHORT_SEQUENCE) { 230 put_bits(&s->pb, 6, info->max_sfb); 231 put_bits(&s->pb, 1, !!info->predictor_present); 232 } else { 233 put_bits(&s->pb, 4, info->max_sfb); 234 for (w = 1; w < 8; w++) 235 put_bits(&s->pb, 1, !info->group_len[w]); 236 } 237} 238 239/** 240 * Encode MS data. 241 * @see 4.6.8.1 "Joint Coding - M/S Stereo" 242 */ 243static void encode_ms_info(PutBitContext *pb, ChannelElement *cpe) 244{ 245 int i, w; 246 247 put_bits(pb, 2, cpe->ms_mode); 248 if (cpe->ms_mode == 1) 249 for (w = 0; w < cpe->ch[0].ics.num_windows; w += cpe->ch[0].ics.group_len[w]) 250 for (i = 0; i < cpe->ch[0].ics.max_sfb; i++) 251 put_bits(pb, 1, cpe->ms_mask[w*16 + i]); 252} 253 254/** 255 * Produce integer coefficients from scalefactors provided by the model. 256 */ 257static void adjust_frame_information(ChannelElement *cpe, int chans) 258{ 259 int i, w, w2, g, ch; 260 int maxsfb, cmaxsfb; 261 262 for (ch = 0; ch < chans; ch++) { 263 IndividualChannelStream *ics = &cpe->ch[ch].ics; 264 maxsfb = 0; 265 cpe->ch[ch].pulse.num_pulse = 0; 266 for (w = 0; w < ics->num_windows; w += ics->group_len[w]) { 267 for (w2 = 0; w2 < ics->group_len[w]; w2++) { 268 for (cmaxsfb = ics->num_swb; cmaxsfb > 0 && cpe->ch[ch].zeroes[w*16+cmaxsfb-1]; cmaxsfb--) 269 ; 270 maxsfb = FFMAX(maxsfb, cmaxsfb); 271 } 272 } 273 ics->max_sfb = maxsfb; 274 275 //adjust zero bands for window groups 276 for (w = 0; w < ics->num_windows; w += ics->group_len[w]) { 277 for (g = 0; g < ics->max_sfb; g++) { 278 i = 1; 279 for (w2 = w; w2 < w + ics->group_len[w]; w2++) { 280 if (!cpe->ch[ch].zeroes[w2*16 + g]) { 281 i = 0; 282 break; 283 } 284 } 285 cpe->ch[ch].zeroes[w*16 + g] = i; 286 } 287 } 288 } 289 290 if (chans > 1 && cpe->common_window) { 291 IndividualChannelStream *ics0 = &cpe->ch[0].ics; 292 IndividualChannelStream *ics1 = &cpe->ch[1].ics; 293 int msc = 0; 294 ics0->max_sfb = FFMAX(ics0->max_sfb, ics1->max_sfb); 295 ics1->max_sfb = ics0->max_sfb; 296 for (w = 0; w < ics0->num_windows*16; w += 16) 297 for (i = 0; i < ics0->max_sfb; i++) 298 if (cpe->ms_mask[w+i]) 299 msc++; 300 if (msc == 0 || ics0->max_sfb == 0) 301 cpe->ms_mode = 0; 302 else 303 cpe->ms_mode = msc < ics0->max_sfb * ics0->num_windows ? 1 : 2; 304 } 305} 306 307static void apply_intensity_stereo(ChannelElement *cpe) 308{ 309 int w, w2, g, i; 310 IndividualChannelStream *ics = &cpe->ch[0].ics; 311 if (!cpe->common_window) 312 return; 313 for (w = 0; w < ics->num_windows; w += ics->group_len[w]) { 314 for (w2 = 0; w2 < ics->group_len[w]; w2++) { 315 int start = (w+w2) * 128; 316 for (g = 0; g < ics->num_swb; g++) { 317 int p = -1 + 2 * (cpe->ch[1].band_type[w*16+g] - 14); 318 float scale = cpe->ch[0].is_ener[w*16+g]; 319 if (!cpe->is_mask[w*16 + g]) { 320 start += ics->swb_sizes[g]; 321 continue; 322 } 323 if (cpe->ms_mask[w*16 + g]) 324 p *= -1; 325 for (i = 0; i < ics->swb_sizes[g]; i++) { 326 float sum = (cpe->ch[0].coeffs[start+i] + p*cpe->ch[1].coeffs[start+i])*scale; 327 cpe->ch[0].coeffs[start+i] = sum; 328 cpe->ch[1].coeffs[start+i] = 0.0f; 329 } 330 start += ics->swb_sizes[g]; 331 } 332 } 333 } 334} 335 336static void apply_mid_side_stereo(ChannelElement *cpe) 337{ 338 int w, w2, g, i; 339 IndividualChannelStream *ics = &cpe->ch[0].ics; 340 if (!cpe->common_window) 341 return; 342 for (w = 0; w < ics->num_windows; w += ics->group_len[w]) { 343 for (w2 = 0; w2 < ics->group_len[w]; w2++) { 344 int start = (w+w2) * 128; 345 for (g = 0; g < ics->num_swb; g++) { 346 /* ms_mask can be used for other purposes in PNS and I/S, 347 * so must not apply M/S if any band uses either, even if 348 * ms_mask is set. 349 */ 350 if (!cpe->ms_mask[w*16 + g] || cpe->is_mask[w*16 + g] 351 || cpe->ch[0].band_type[w*16 + g] >= NOISE_BT 352 || cpe->ch[1].band_type[w*16 + g] >= NOISE_BT) { 353 start += ics->swb_sizes[g]; 354 continue; 355 } 356 for (i = 0; i < ics->swb_sizes[g]; i++) { 357 float L = (cpe->ch[0].coeffs[start+i] + cpe->ch[1].coeffs[start+i]) * 0.5f; 358 float R = L - cpe->ch[1].coeffs[start+i]; 359 cpe->ch[0].coeffs[start+i] = L; 360 cpe->ch[1].coeffs[start+i] = R; 361 } 362 start += ics->swb_sizes[g]; 363 } 364 } 365 } 366} 367 368/** 369 * Encode scalefactor band coding type. 370 */ 371static void encode_band_info(AACEncContext *s, SingleChannelElement *sce) 372{ 373 int w; 374 375 if (s->coder->set_special_band_scalefactors) 376 s->coder->set_special_band_scalefactors(s, sce); 377 378 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) 379 s->coder->encode_window_bands_info(s, sce, w, sce->ics.group_len[w], s->lambda); 380} 381 382/** 383 * Encode scalefactors. 384 */ 385static void encode_scale_factors(AVCodecContext *avctx, AACEncContext *s, 386 SingleChannelElement *sce) 387{ 388 int diff, off_sf = sce->sf_idx[0], off_pns = sce->sf_idx[0] - NOISE_OFFSET; 389 int off_is = 0, noise_flag = 1; 390 int i, w; 391 392 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { 393 for (i = 0; i < sce->ics.max_sfb; i++) { 394 if (!sce->zeroes[w*16 + i]) { 395 if (sce->band_type[w*16 + i] == NOISE_BT) { 396 diff = sce->sf_idx[w*16 + i] - off_pns; 397 off_pns = sce->sf_idx[w*16 + i]; 398 if (noise_flag-- > 0) { 399 put_bits(&s->pb, NOISE_PRE_BITS, diff + NOISE_PRE); 400 continue; 401 } 402 } else if (sce->band_type[w*16 + i] == INTENSITY_BT || 403 sce->band_type[w*16 + i] == INTENSITY_BT2) { 404 diff = sce->sf_idx[w*16 + i] - off_is; 405 off_is = sce->sf_idx[w*16 + i]; 406 } else { 407 diff = sce->sf_idx[w*16 + i] - off_sf; 408 off_sf = sce->sf_idx[w*16 + i]; 409 } 410 diff += SCALE_DIFF_ZERO; 411 av_assert0(diff >= 0 && diff <= 120); 412 put_bits(&s->pb, ff_aac_scalefactor_bits[diff], ff_aac_scalefactor_code[diff]); 413 } 414 } 415 } 416} 417 418/** 419 * Encode pulse data. 420 */ 421static void encode_pulses(AACEncContext *s, Pulse *pulse) 422{ 423 int i; 424 425 put_bits(&s->pb, 1, !!pulse->num_pulse); 426 if (!pulse->num_pulse) 427 return; 428 429 put_bits(&s->pb, 2, pulse->num_pulse - 1); 430 put_bits(&s->pb, 6, pulse->start); 431 for (i = 0; i < pulse->num_pulse; i++) { 432 put_bits(&s->pb, 5, pulse->pos[i]); 433 put_bits(&s->pb, 4, pulse->amp[i]); 434 } 435} 436 437/** 438 * Encode spectral coefficients processed by psychoacoustic model. 439 */ 440static void encode_spectral_coeffs(AACEncContext *s, SingleChannelElement *sce) 441{ 442 int start, i, w, w2; 443 444 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { 445 start = 0; 446 for (i = 0; i < sce->ics.max_sfb; i++) { 447 if (sce->zeroes[w*16 + i]) { 448 start += sce->ics.swb_sizes[i]; 449 continue; 450 } 451 for (w2 = w; w2 < w + sce->ics.group_len[w]; w2++) { 452 s->coder->quantize_and_encode_band(s, &s->pb, 453 &sce->coeffs[start + w2*128], 454 NULL, sce->ics.swb_sizes[i], 455 sce->sf_idx[w*16 + i], 456 sce->band_type[w*16 + i], 457 s->lambda, 458 sce->ics.window_clipping[w]); 459 } 460 start += sce->ics.swb_sizes[i]; 461 } 462 } 463} 464 465/** 466 * Downscale spectral coefficients for near-clipping windows to avoid artifacts 467 */ 468static void avoid_clipping(AACEncContext *s, SingleChannelElement *sce) 469{ 470 int start, i, j, w; 471 472 if (sce->ics.clip_avoidance_factor < 1.0f) { 473 for (w = 0; w < sce->ics.num_windows; w++) { 474 start = 0; 475 for (i = 0; i < sce->ics.max_sfb; i++) { 476 float *swb_coeffs = &sce->coeffs[start + w*128]; 477 for (j = 0; j < sce->ics.swb_sizes[i]; j++) 478 swb_coeffs[j] *= sce->ics.clip_avoidance_factor; 479 start += sce->ics.swb_sizes[i]; 480 } 481 } 482 } 483} 484 485/** 486 * Encode one channel of audio data. 487 */ 488static int encode_individual_channel(AVCodecContext *avctx, AACEncContext *s, 489 SingleChannelElement *sce, 490 int common_window) 491{ 492 put_bits(&s->pb, 8, sce->sf_idx[0]); 493 if (!common_window) { 494 put_ics_info(s, &sce->ics); 495 if (s->coder->encode_main_pred) 496 s->coder->encode_main_pred(s, sce); 497 if (s->coder->encode_ltp_info) 498 s->coder->encode_ltp_info(s, sce, 0); 499 } 500 encode_band_info(s, sce); 501 encode_scale_factors(avctx, s, sce); 502 encode_pulses(s, &sce->pulse); 503 put_bits(&s->pb, 1, !!sce->tns.present); 504 if (s->coder->encode_tns_info) 505 s->coder->encode_tns_info(s, sce); 506 put_bits(&s->pb, 1, 0); //ssr 507 encode_spectral_coeffs(s, sce); 508 return 0; 509} 510 511/** 512 * Write some auxiliary information about the created AAC file. 513 */ 514static void put_bitstream_info(AACEncContext *s, const char *name) 515{ 516 int i, namelen, padbits; 517 518 namelen = strlen(name) + 2; 519 put_bits(&s->pb, 3, TYPE_FIL); 520 put_bits(&s->pb, 4, FFMIN(namelen, 15)); 521 if (namelen >= 15) 522 put_bits(&s->pb, 8, namelen - 14); 523 put_bits(&s->pb, 4, 0); //extension type - filler 524 padbits = -put_bits_count(&s->pb) & 7; 525 align_put_bits(&s->pb); 526 for (i = 0; i < namelen - 2; i++) 527 put_bits(&s->pb, 8, name[i]); 528 put_bits(&s->pb, 12 - padbits, 0); 529} 530 531/* 532 * Copy input samples. 533 * Channels are reordered from libavcodec's default order to AAC order. 534 */ 535static void copy_input_samples(AACEncContext *s, const AVFrame *frame) 536{ 537 int ch; 538 int end = 2048 + (frame ? frame->nb_samples : 0); 539 const uint8_t *channel_map = s->reorder_map; 540 541 /* copy and remap input samples */ 542 for (ch = 0; ch < s->channels; ch++) { 543 /* copy last 1024 samples of previous frame to the start of the current frame */ 544 memcpy(&s->planar_samples[ch][1024], &s->planar_samples[ch][2048], 1024 * sizeof(s->planar_samples[0][0])); 545 546 /* copy new samples and zero any remaining samples */ 547 if (frame) { 548 memcpy(&s->planar_samples[ch][2048], 549 frame->extended_data[channel_map[ch]], 550 frame->nb_samples * sizeof(s->planar_samples[0][0])); 551 } 552 memset(&s->planar_samples[ch][end], 0, 553 (3072 - end) * sizeof(s->planar_samples[0][0])); 554 } 555} 556 557static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, 558 const AVFrame *frame, int *got_packet_ptr) 559{ 560 AACEncContext *s = avctx->priv_data; 561 float **samples = s->planar_samples, *samples2, *la, *overlap; 562 ChannelElement *cpe; 563 SingleChannelElement *sce; 564 IndividualChannelStream *ics; 565 int i, its, ch, w, chans, tag, start_ch, ret, frame_bits; 566 int target_bits, rate_bits, too_many_bits, too_few_bits; 567 int ms_mode = 0, is_mode = 0, tns_mode = 0, pred_mode = 0; 568 int chan_el_counter[4]; 569 FFPsyWindowInfo windows[AAC_MAX_CHANNELS]; 570 571 /* add current frame to queue */ 572 if (frame) { 573 if ((ret = ff_af_queue_add(&s->afq, frame)) < 0) 574 return ret; 575 } else { 576 if (!s->afq.remaining_samples || (!s->afq.frame_alloc && !s->afq.frame_count)) 577 return 0; 578 } 579 580 copy_input_samples(s, frame); 581 if (s->psypp) 582 ff_psy_preprocess(s->psypp, s->planar_samples, s->channels); 583 584 if (!avctx->frame_number) 585 return 0; 586 587 start_ch = 0; 588 for (i = 0; i < s->chan_map[0]; i++) { 589 FFPsyWindowInfo* wi = windows + start_ch; 590 tag = s->chan_map[i+1]; 591 chans = tag == TYPE_CPE ? 2 : 1; 592 cpe = &s->cpe[i]; 593 for (ch = 0; ch < chans; ch++) { 594 int k; 595 float clip_avoidance_factor; 596 sce = &cpe->ch[ch]; 597 ics = &sce->ics; 598 s->cur_channel = start_ch + ch; 599 overlap = &samples[s->cur_channel][0]; 600 samples2 = overlap + 1024; 601 la = samples2 + (448+64); 602 if (!frame) 603 la = NULL; 604 if (tag == TYPE_LFE) { 605 wi[ch].window_type[0] = wi[ch].window_type[1] = ONLY_LONG_SEQUENCE; 606 wi[ch].window_shape = 0; 607 wi[ch].num_windows = 1; 608 wi[ch].grouping[0] = 1; 609 wi[ch].clipping[0] = 0; 610 611 /* Only the lowest 12 coefficients are used in a LFE channel. 612 * The expression below results in only the bottom 8 coefficients 613 * being used for 11.025kHz to 16kHz sample rates. 614 */ 615 ics->num_swb = s->samplerate_index >= 8 ? 1 : 3; 616 } else { 617 wi[ch] = s->psy.model->window(&s->psy, samples2, la, s->cur_channel, 618 ics->window_sequence[0]); 619 } 620 ics->window_sequence[1] = ics->window_sequence[0]; 621 ics->window_sequence[0] = wi[ch].window_type[0]; 622 ics->use_kb_window[1] = ics->use_kb_window[0]; 623 ics->use_kb_window[0] = wi[ch].window_shape; 624 ics->num_windows = wi[ch].num_windows; 625 ics->swb_sizes = s->psy.bands [ics->num_windows == 8]; 626 ics->num_swb = tag == TYPE_LFE ? ics->num_swb : s->psy.num_bands[ics->num_windows == 8]; 627 ics->max_sfb = FFMIN(ics->max_sfb, ics->num_swb); 628 ics->swb_offset = wi[ch].window_type[0] == EIGHT_SHORT_SEQUENCE ? 629 ff_swb_offset_128 [s->samplerate_index]: 630 ff_swb_offset_1024[s->samplerate_index]; 631 ics->tns_max_bands = wi[ch].window_type[0] == EIGHT_SHORT_SEQUENCE ? 632 ff_tns_max_bands_128 [s->samplerate_index]: 633 ff_tns_max_bands_1024[s->samplerate_index]; 634 635 for (w = 0; w < ics->num_windows; w++) 636 ics->group_len[w] = wi[ch].grouping[w]; 637 638 /* Calculate input sample maximums and evaluate clipping risk */ 639 clip_avoidance_factor = 0.0f; 640 for (w = 0; w < ics->num_windows; w++) { 641 const float *wbuf = overlap + w * 128; 642 const int wlen = 2048 / ics->num_windows; 643 float max = 0; 644 int j; 645 /* mdct input is 2 * output */ 646 for (j = 0; j < wlen; j++) 647 max = FFMAX(max, fabsf(wbuf[j])); 648 wi[ch].clipping[w] = max; 649 } 650 for (w = 0; w < ics->num_windows; w++) { 651 if (wi[ch].clipping[w] > CLIP_AVOIDANCE_FACTOR) { 652 ics->window_clipping[w] = 1; 653 clip_avoidance_factor = FFMAX(clip_avoidance_factor, wi[ch].clipping[w]); 654 } else { 655 ics->window_clipping[w] = 0; 656 } 657 } 658 if (clip_avoidance_factor > CLIP_AVOIDANCE_FACTOR) { 659 ics->clip_avoidance_factor = CLIP_AVOIDANCE_FACTOR / clip_avoidance_factor; 660 } else { 661 ics->clip_avoidance_factor = 1.0f; 662 } 663 664 apply_window_and_mdct(s, sce, overlap); 665 666 if (s->options.ltp && s->coder->update_ltp) { 667 s->coder->update_ltp(s, sce); 668 apply_window[sce->ics.window_sequence[0]](s->fdsp, sce, &sce->ltp_state[0]); 669 s->mdct1024.mdct_calc(&s->mdct1024, sce->lcoeffs, sce->ret_buf); 670 } 671 672 for (k = 0; k < 1024; k++) { 673 if (!(fabs(cpe->ch[ch].coeffs[k]) < 1E16)) { // Ensure headroom for energy calculation 674 av_log(avctx, AV_LOG_ERROR, "Input contains (near) NaN/+-Inf\n"); 675 return AVERROR(EINVAL); 676 } 677 } 678 avoid_clipping(s, sce); 679 } 680 start_ch += chans; 681 } 682 if ((ret = ff_alloc_packet(avctx, avpkt, 8192 * s->channels)) < 0) 683 return ret; 684 frame_bits = its = 0; 685 do { 686 init_put_bits(&s->pb, avpkt->data, avpkt->size); 687 688 if ((avctx->frame_number & 0xFF)==1 && !(avctx->flags & AV_CODEC_FLAG_BITEXACT)) 689 put_bitstream_info(s, LIBAVCODEC_IDENT); 690 start_ch = 0; 691 target_bits = 0; 692 memset(chan_el_counter, 0, sizeof(chan_el_counter)); 693 for (i = 0; i < s->chan_map[0]; i++) { 694 FFPsyWindowInfo* wi = windows + start_ch; 695 const float *coeffs[2]; 696 tag = s->chan_map[i+1]; 697 chans = tag == TYPE_CPE ? 2 : 1; 698 cpe = &s->cpe[i]; 699 cpe->common_window = 0; 700 memset(cpe->is_mask, 0, sizeof(cpe->is_mask)); 701 memset(cpe->ms_mask, 0, sizeof(cpe->ms_mask)); 702 put_bits(&s->pb, 3, tag); 703 put_bits(&s->pb, 4, chan_el_counter[tag]++); 704 for (ch = 0; ch < chans; ch++) { 705 sce = &cpe->ch[ch]; 706 coeffs[ch] = sce->coeffs; 707 sce->ics.predictor_present = 0; 708 sce->ics.ltp.present = 0; 709 memset(sce->ics.ltp.used, 0, sizeof(sce->ics.ltp.used)); 710 memset(sce->ics.prediction_used, 0, sizeof(sce->ics.prediction_used)); 711 memset(&sce->tns, 0, sizeof(TemporalNoiseShaping)); 712 for (w = 0; w < 128; w++) 713 if (sce->band_type[w] > RESERVED_BT) 714 sce->band_type[w] = 0; 715 } 716 s->psy.bitres.alloc = -1; 717 s->psy.bitres.bits = s->last_frame_pb_count / s->channels; 718 s->psy.model->analyze(&s->psy, start_ch, coeffs, wi); 719 if (s->psy.bitres.alloc > 0) { 720 /* Lambda unused here on purpose, we need to take psy's unscaled allocation */ 721 target_bits += s->psy.bitres.alloc 722 * (s->lambda / (avctx->global_quality ? avctx->global_quality : 120)); 723 s->psy.bitres.alloc /= chans; 724 } 725 s->cur_type = tag; 726 for (ch = 0; ch < chans; ch++) { 727 s->cur_channel = start_ch + ch; 728 if (s->options.pns && s->coder->mark_pns) 729 s->coder->mark_pns(s, avctx, &cpe->ch[ch]); 730 s->coder->search_for_quantizers(avctx, s, &cpe->ch[ch], s->lambda); 731 } 732 if (chans > 1 733 && wi[0].window_type[0] == wi[1].window_type[0] 734 && wi[0].window_shape == wi[1].window_shape) { 735 736 cpe->common_window = 1; 737 for (w = 0; w < wi[0].num_windows; w++) { 738 if (wi[0].grouping[w] != wi[1].grouping[w]) { 739 cpe->common_window = 0; 740 break; 741 } 742 } 743 } 744 for (ch = 0; ch < chans; ch++) { /* TNS and PNS */ 745 sce = &cpe->ch[ch]; 746 s->cur_channel = start_ch + ch; 747 if (s->options.tns && s->coder->search_for_tns) 748 s->coder->search_for_tns(s, sce); 749 if (s->options.tns && s->coder->apply_tns_filt) 750 s->coder->apply_tns_filt(s, sce); 751 if (sce->tns.present) 752 tns_mode = 1; 753 if (s->options.pns && s->coder->search_for_pns) 754 s->coder->search_for_pns(s, avctx, sce); 755 } 756 s->cur_channel = start_ch; 757 if (s->options.intensity_stereo) { /* Intensity Stereo */ 758 if (s->coder->search_for_is) 759 s->coder->search_for_is(s, avctx, cpe); 760 if (cpe->is_mode) is_mode = 1; 761 apply_intensity_stereo(cpe); 762 } 763 if (s->options.pred) { /* Prediction */ 764 for (ch = 0; ch < chans; ch++) { 765 sce = &cpe->ch[ch]; 766 s->cur_channel = start_ch + ch; 767 if (s->options.pred && s->coder->search_for_pred) 768 s->coder->search_for_pred(s, sce); 769 if (cpe->ch[ch].ics.predictor_present) pred_mode = 1; 770 } 771 if (s->coder->adjust_common_pred) 772 s->coder->adjust_common_pred(s, cpe); 773 for (ch = 0; ch < chans; ch++) { 774 sce = &cpe->ch[ch]; 775 s->cur_channel = start_ch + ch; 776 if (s->options.pred && s->coder->apply_main_pred) 777 s->coder->apply_main_pred(s, sce); 778 } 779 s->cur_channel = start_ch; 780 } 781 if (s->options.mid_side) { /* Mid/Side stereo */ 782 if (s->options.mid_side == -1 && s->coder->search_for_ms) 783 s->coder->search_for_ms(s, cpe); 784 else if (cpe->common_window) 785 memset(cpe->ms_mask, 1, sizeof(cpe->ms_mask)); 786 apply_mid_side_stereo(cpe); 787 } 788 adjust_frame_information(cpe, chans); 789 if (s->options.ltp) { /* LTP */ 790 for (ch = 0; ch < chans; ch++) { 791 sce = &cpe->ch[ch]; 792 s->cur_channel = start_ch + ch; 793 if (s->coder->search_for_ltp) 794 s->coder->search_for_ltp(s, sce, cpe->common_window); 795 if (sce->ics.ltp.present) pred_mode = 1; 796 } 797 s->cur_channel = start_ch; 798 if (s->coder->adjust_common_ltp) 799 s->coder->adjust_common_ltp(s, cpe); 800 } 801 if (chans == 2) { 802 put_bits(&s->pb, 1, cpe->common_window); 803 if (cpe->common_window) { 804 put_ics_info(s, &cpe->ch[0].ics); 805 if (s->coder->encode_main_pred) 806 s->coder->encode_main_pred(s, &cpe->ch[0]); 807 if (s->coder->encode_ltp_info) 808 s->coder->encode_ltp_info(s, &cpe->ch[0], 1); 809 encode_ms_info(&s->pb, cpe); 810 if (cpe->ms_mode) ms_mode = 1; 811 } 812 } 813 for (ch = 0; ch < chans; ch++) { 814 s->cur_channel = start_ch + ch; 815 encode_individual_channel(avctx, s, &cpe->ch[ch], cpe->common_window); 816 } 817 start_ch += chans; 818 } 819 820 if (avctx->flags & AV_CODEC_FLAG_QSCALE) { 821 /* When using a constant Q-scale, don't mess with lambda */ 822 break; 823 } 824 825 /* rate control stuff 826 * allow between the nominal bitrate, and what psy's bit reservoir says to target 827 * but drift towards the nominal bitrate always 828 */ 829 frame_bits = put_bits_count(&s->pb); 830 rate_bits = avctx->bit_rate * 1024 / avctx->sample_rate; 831 rate_bits = FFMIN(rate_bits, 6144 * s->channels - 3); 832 too_many_bits = FFMAX(target_bits, rate_bits); 833 too_many_bits = FFMIN(too_many_bits, 6144 * s->channels - 3); 834 too_few_bits = FFMIN(FFMAX(rate_bits - rate_bits/4, target_bits), too_many_bits); 835 836 /* When using ABR, be strict (but only for increasing) */ 837 too_few_bits = too_few_bits - too_few_bits/8; 838 too_many_bits = too_many_bits + too_many_bits/2; 839 840 if ( its == 0 /* for steady-state Q-scale tracking */ 841 || (its < 5 && (frame_bits < too_few_bits || frame_bits > too_many_bits)) 842 || frame_bits >= 6144 * s->channels - 3 ) 843 { 844 float ratio = ((float)rate_bits) / frame_bits; 845 846 if (frame_bits >= too_few_bits && frame_bits <= too_many_bits) { 847 /* 848 * This path is for steady-state Q-scale tracking 849 * When frame bits fall within the stable range, we still need to adjust 850 * lambda to maintain it like so in a stable fashion (large jumps in lambda 851 * create artifacts and should be avoided), but slowly 852 */ 853 ratio = sqrtf(sqrtf(ratio)); 854 ratio = av_clipf(ratio, 0.9f, 1.1f); 855 } else { 856 /* Not so fast though */ 857 ratio = sqrtf(ratio); 858 } 859 s->lambda = av_clipf(s->lambda * ratio, FLT_EPSILON, 65536.f); 860 861 /* Keep iterating if we must reduce and lambda is in the sky */ 862 if (ratio > 0.9f && ratio < 1.1f) { 863 break; 864 } else { 865 if (is_mode || ms_mode || tns_mode || pred_mode) { 866 for (i = 0; i < s->chan_map[0]; i++) { 867 // Must restore coeffs 868 chans = tag == TYPE_CPE ? 2 : 1; 869 cpe = &s->cpe[i]; 870 for (ch = 0; ch < chans; ch++) 871 memcpy(cpe->ch[ch].coeffs, cpe->ch[ch].pcoeffs, sizeof(cpe->ch[ch].coeffs)); 872 } 873 } 874 its++; 875 } 876 } else { 877 break; 878 } 879 } while (1); 880 881 if (s->options.ltp && s->coder->ltp_insert_new_frame) 882 s->coder->ltp_insert_new_frame(s); 883 884 put_bits(&s->pb, 3, TYPE_END); 885 flush_put_bits(&s->pb); 886 887 s->last_frame_pb_count = put_bits_count(&s->pb); 888 avpkt->size = put_bytes_output(&s->pb); 889 890 s->lambda_sum += s->lambda; 891 s->lambda_count++; 892 893 ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts, 894 &avpkt->duration); 895 896 *got_packet_ptr = 1; 897 return 0; 898} 899 900static av_cold int aac_encode_end(AVCodecContext *avctx) 901{ 902 AACEncContext *s = avctx->priv_data; 903 904 av_log(avctx, AV_LOG_INFO, "Qavg: %.3f\n", s->lambda_count ? s->lambda_sum / s->lambda_count : NAN); 905 906 ff_mdct_end(&s->mdct1024); 907 ff_mdct_end(&s->mdct128); 908 ff_psy_end(&s->psy); 909 ff_lpc_end(&s->lpc); 910 if (s->psypp) 911 ff_psy_preprocess_end(s->psypp); 912 av_freep(&s->buffer.samples); 913 av_freep(&s->cpe); 914 av_freep(&s->fdsp); 915 ff_af_queue_close(&s->afq); 916 return 0; 917} 918 919static av_cold int dsp_init(AVCodecContext *avctx, AACEncContext *s) 920{ 921 int ret = 0; 922 923 s->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT); 924 if (!s->fdsp) 925 return AVERROR(ENOMEM); 926 927 // window init 928 ff_aac_float_common_init(); 929 930 if ((ret = ff_mdct_init(&s->mdct1024, 11, 0, 32768.0)) < 0) 931 return ret; 932 if ((ret = ff_mdct_init(&s->mdct128, 8, 0, 32768.0)) < 0) 933 return ret; 934 935 return 0; 936} 937 938static av_cold int alloc_buffers(AVCodecContext *avctx, AACEncContext *s) 939{ 940 int ch; 941 if (!FF_ALLOCZ_TYPED_ARRAY(s->buffer.samples, s->channels * 3 * 1024) || 942 !FF_ALLOCZ_TYPED_ARRAY(s->cpe, s->chan_map[0])) 943 return AVERROR(ENOMEM); 944 945 for(ch = 0; ch < s->channels; ch++) 946 s->planar_samples[ch] = s->buffer.samples + 3 * 1024 * ch; 947 948 return 0; 949} 950 951static av_cold int aac_encode_init(AVCodecContext *avctx) 952{ 953 AACEncContext *s = avctx->priv_data; 954 int i, ret = 0; 955 const uint8_t *sizes[2]; 956 uint8_t grouping[AAC_MAX_CHANNELS]; 957 int lengths[2]; 958 959 /* Constants */ 960 s->last_frame_pb_count = 0; 961 avctx->frame_size = 1024; 962 avctx->initial_padding = 1024; 963 s->lambda = avctx->global_quality > 0 ? avctx->global_quality : 120; 964 965 /* Channel map and unspecified bitrate guessing */ 966 s->channels = avctx->ch_layout.nb_channels; 967 968 s->needs_pce = 1; 969 for (i = 0; i < FF_ARRAY_ELEMS(aac_normal_chan_layouts); i++) { 970 if (!av_channel_layout_compare(&avctx->ch_layout, &aac_normal_chan_layouts[i])) { 971 s->needs_pce = s->options.pce; 972 break; 973 } 974 } 975 976 if (s->needs_pce) { 977 char buf[64]; 978 for (i = 0; i < FF_ARRAY_ELEMS(aac_pce_configs); i++) 979 if (!av_channel_layout_compare(&avctx->ch_layout, &aac_pce_configs[i].layout)) 980 break; 981 av_channel_layout_describe(&avctx->ch_layout, buf, sizeof(buf)); 982 if (i == FF_ARRAY_ELEMS(aac_pce_configs)) { 983 av_log(avctx, AV_LOG_ERROR, "Unsupported channel layout \"%s\"\n", buf); 984 return AVERROR(EINVAL); 985 } 986 av_log(avctx, AV_LOG_INFO, "Using a PCE to encode channel layout \"%s\"\n", buf); 987 s->pce = aac_pce_configs[i]; 988 s->reorder_map = s->pce.reorder_map; 989 s->chan_map = s->pce.config_map; 990 } else { 991 s->reorder_map = aac_chan_maps[s->channels - 1]; 992 s->chan_map = aac_chan_configs[s->channels - 1]; 993 } 994 995 if (!avctx->bit_rate) { 996 for (i = 1; i <= s->chan_map[0]; i++) { 997 avctx->bit_rate += s->chan_map[i] == TYPE_CPE ? 128000 : /* Pair */ 998 s->chan_map[i] == TYPE_LFE ? 16000 : /* LFE */ 999 69000 ; /* SCE */ 1000 } 1001 } 1002 1003 /* Samplerate */ 1004 for (i = 0; i < 16; i++) 1005 if (avctx->sample_rate == ff_mpeg4audio_sample_rates[i]) 1006 break; 1007 s->samplerate_index = i; 1008 ERROR_IF(s->samplerate_index == 16 || 1009 s->samplerate_index >= ff_aac_swb_size_1024_len || 1010 s->samplerate_index >= ff_aac_swb_size_128_len, 1011 "Unsupported sample rate %d\n", avctx->sample_rate); 1012 1013 /* Bitrate limiting */ 1014 WARN_IF(1024.0 * avctx->bit_rate / avctx->sample_rate > 6144 * s->channels, 1015 "Too many bits %f > %d per frame requested, clamping to max\n", 1016 1024.0 * avctx->bit_rate / avctx->sample_rate, 1017 6144 * s->channels); 1018 avctx->bit_rate = (int64_t)FFMIN(6144 * s->channels / 1024.0 * avctx->sample_rate, 1019 avctx->bit_rate); 1020 1021 /* Profile and option setting */ 1022 avctx->profile = avctx->profile == FF_PROFILE_UNKNOWN ? FF_PROFILE_AAC_LOW : 1023 avctx->profile; 1024 for (i = 0; i < FF_ARRAY_ELEMS(aacenc_profiles); i++) 1025 if (avctx->profile == aacenc_profiles[i]) 1026 break; 1027 if (avctx->profile == FF_PROFILE_MPEG2_AAC_LOW) { 1028 avctx->profile = FF_PROFILE_AAC_LOW; 1029 ERROR_IF(s->options.pred, 1030 "Main prediction unavailable in the \"mpeg2_aac_low\" profile\n"); 1031 ERROR_IF(s->options.ltp, 1032 "LTP prediction unavailable in the \"mpeg2_aac_low\" profile\n"); 1033 WARN_IF(s->options.pns, 1034 "PNS unavailable in the \"mpeg2_aac_low\" profile, turning off\n"); 1035 s->options.pns = 0; 1036 } else if (avctx->profile == FF_PROFILE_AAC_LTP) { 1037 s->options.ltp = 1; 1038 ERROR_IF(s->options.pred, 1039 "Main prediction unavailable in the \"aac_ltp\" profile\n"); 1040 } else if (avctx->profile == FF_PROFILE_AAC_MAIN) { 1041 s->options.pred = 1; 1042 ERROR_IF(s->options.ltp, 1043 "LTP prediction unavailable in the \"aac_main\" profile\n"); 1044 } else if (s->options.ltp) { 1045 avctx->profile = FF_PROFILE_AAC_LTP; 1046 WARN_IF(1, 1047 "Chainging profile to \"aac_ltp\"\n"); 1048 ERROR_IF(s->options.pred, 1049 "Main prediction unavailable in the \"aac_ltp\" profile\n"); 1050 } else if (s->options.pred) { 1051 avctx->profile = FF_PROFILE_AAC_MAIN; 1052 WARN_IF(1, 1053 "Chainging profile to \"aac_main\"\n"); 1054 ERROR_IF(s->options.ltp, 1055 "LTP prediction unavailable in the \"aac_main\" profile\n"); 1056 } 1057 s->profile = avctx->profile; 1058 1059 /* Coder limitations */ 1060 s->coder = &ff_aac_coders[s->options.coder]; 1061 if (s->options.coder == AAC_CODER_ANMR) { 1062 ERROR_IF(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL, 1063 "The ANMR coder is considered experimental, add -strict -2 to enable!\n"); 1064 s->options.intensity_stereo = 0; 1065 s->options.pns = 0; 1066 } 1067 ERROR_IF(s->options.ltp && avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL, 1068 "The LPT profile requires experimental compliance, add -strict -2 to enable!\n"); 1069 1070 /* M/S introduces horrible artifacts with multichannel files, this is temporary */ 1071 if (s->channels > 3) 1072 s->options.mid_side = 0; 1073 1074 if ((ret = dsp_init(avctx, s)) < 0) 1075 return ret; 1076 1077 if ((ret = alloc_buffers(avctx, s)) < 0) 1078 return ret; 1079 1080 if ((ret = put_audio_specific_config(avctx))) 1081 return ret; 1082 1083 sizes[0] = ff_aac_swb_size_1024[s->samplerate_index]; 1084 sizes[1] = ff_aac_swb_size_128[s->samplerate_index]; 1085 lengths[0] = ff_aac_num_swb_1024[s->samplerate_index]; 1086 lengths[1] = ff_aac_num_swb_128[s->samplerate_index]; 1087 for (i = 0; i < s->chan_map[0]; i++) 1088 grouping[i] = s->chan_map[i + 1] == TYPE_CPE; 1089 if ((ret = ff_psy_init(&s->psy, avctx, 2, sizes, lengths, 1090 s->chan_map[0], grouping)) < 0) 1091 return ret; 1092 s->psypp = ff_psy_preprocess_init(avctx); 1093 ff_lpc_init(&s->lpc, 2*avctx->frame_size, TNS_MAX_ORDER, FF_LPC_TYPE_LEVINSON); 1094 s->random_state = 0x1f2e3d4c; 1095 1096 s->abs_pow34 = abs_pow34_v; 1097 s->quant_bands = quantize_bands; 1098 1099#if ARCH_X86 1100 ff_aac_dsp_init_x86(s); 1101#endif 1102 1103#if HAVE_MIPSDSP 1104 ff_aac_coder_init_mips(s); 1105#endif 1106 1107 ff_af_queue_init(avctx, &s->afq); 1108 ff_aac_tableinit(); 1109 1110 return 0; 1111} 1112 1113#define AACENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM 1114static const AVOption aacenc_options[] = { 1115 {"aac_coder", "Coding algorithm", offsetof(AACEncContext, options.coder), AV_OPT_TYPE_INT, {.i64 = AAC_CODER_FAST}, 0, AAC_CODER_NB-1, AACENC_FLAGS, "coder"}, 1116 {"anmr", "ANMR method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_ANMR}, INT_MIN, INT_MAX, AACENC_FLAGS, "coder"}, 1117 {"twoloop", "Two loop searching method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_TWOLOOP}, INT_MIN, INT_MAX, AACENC_FLAGS, "coder"}, 1118 {"fast", "Default fast search", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAST}, INT_MIN, INT_MAX, AACENC_FLAGS, "coder"}, 1119 {"aac_ms", "Force M/S stereo coding", offsetof(AACEncContext, options.mid_side), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, AACENC_FLAGS}, 1120 {"aac_is", "Intensity stereo coding", offsetof(AACEncContext, options.intensity_stereo), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, AACENC_FLAGS}, 1121 {"aac_pns", "Perceptual noise substitution", offsetof(AACEncContext, options.pns), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, AACENC_FLAGS}, 1122 {"aac_tns", "Temporal noise shaping", offsetof(AACEncContext, options.tns), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, AACENC_FLAGS}, 1123 {"aac_ltp", "Long term prediction", offsetof(AACEncContext, options.ltp), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, AACENC_FLAGS}, 1124 {"aac_pred", "AAC-Main prediction", offsetof(AACEncContext, options.pred), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, AACENC_FLAGS}, 1125 {"aac_pce", "Forces the use of PCEs", offsetof(AACEncContext, options.pce), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, AACENC_FLAGS}, 1126 FF_AAC_PROFILE_OPTS 1127 {NULL} 1128}; 1129 1130static const AVClass aacenc_class = { 1131 .class_name = "AAC encoder", 1132 .item_name = av_default_item_name, 1133 .option = aacenc_options, 1134 .version = LIBAVUTIL_VERSION_INT, 1135}; 1136 1137static const FFCodecDefault aac_encode_defaults[] = { 1138 { "b", "0" }, 1139 { NULL } 1140}; 1141 1142const FFCodec ff_aac_encoder = { 1143 .p.name = "aac", 1144 .p.long_name = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"), 1145 .p.type = AVMEDIA_TYPE_AUDIO, 1146 .p.id = AV_CODEC_ID_AAC, 1147 .priv_data_size = sizeof(AACEncContext), 1148 .init = aac_encode_init, 1149 FF_CODEC_ENCODE_CB(aac_encode_frame), 1150 .close = aac_encode_end, 1151 .defaults = aac_encode_defaults, 1152 .p.supported_samplerates = ff_mpeg4audio_sample_rates, 1153 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP, 1154 .p.capabilities = AV_CODEC_CAP_SMALL_LAST_FRAME | AV_CODEC_CAP_DELAY, 1155 .p.sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLTP, 1156 AV_SAMPLE_FMT_NONE }, 1157 .p.priv_class = &aacenc_class, 1158}; 1159