xref: /third_party/ffmpeg/libavcodec/aacenc.c (revision cabdff1a)
1/*
2 * AAC encoder
3 * Copyright (C) 2008 Konstantin Shishkov
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22/**
23 * @file
24 * AAC encoder
25 */
26
27/***********************************
28 *              TODOs:
29 * add sane pulse detection
30 ***********************************/
31#include <float.h>
32
33#include "libavutil/channel_layout.h"
34#include "libavutil/libm.h"
35#include "libavutil/float_dsp.h"
36#include "libavutil/opt.h"
37#include "avcodec.h"
38#include "codec_internal.h"
39#include "encode.h"
40#include "put_bits.h"
41#include "mpeg4audio.h"
42#include "sinewin.h"
43#include "profiles.h"
44#include "version.h"
45
46#include "aac.h"
47#include "aactab.h"
48#include "aacenc.h"
49#include "aacenctab.h"
50#include "aacenc_utils.h"
51
52#include "psymodel.h"
53
54static void put_pce(PutBitContext *pb, AVCodecContext *avctx)
55{
56    int i, j;
57    AACEncContext *s = avctx->priv_data;
58    AACPCEInfo *pce = &s->pce;
59    const int bitexact = avctx->flags & AV_CODEC_FLAG_BITEXACT;
60    const char *aux_data = bitexact ? "Lavc" : LIBAVCODEC_IDENT;
61
62    put_bits(pb, 4, 0);
63
64    put_bits(pb, 2, avctx->profile);
65    put_bits(pb, 4, s->samplerate_index);
66
67    put_bits(pb, 4, pce->num_ele[0]); /* Front */
68    put_bits(pb, 4, pce->num_ele[1]); /* Side */
69    put_bits(pb, 4, pce->num_ele[2]); /* Back */
70    put_bits(pb, 2, pce->num_ele[3]); /* LFE */
71    put_bits(pb, 3, 0); /* Assoc data */
72    put_bits(pb, 4, 0); /* CCs */
73
74    put_bits(pb, 1, 0); /* Stereo mixdown */
75    put_bits(pb, 1, 0); /* Mono mixdown */
76    put_bits(pb, 1, 0); /* Something else */
77
78    for (i = 0; i < 4; i++) {
79        for (j = 0; j < pce->num_ele[i]; j++) {
80            if (i < 3)
81                put_bits(pb, 1, pce->pairing[i][j]);
82            put_bits(pb, 4, pce->index[i][j]);
83        }
84    }
85
86    align_put_bits(pb);
87    put_bits(pb, 8, strlen(aux_data));
88    ff_put_string(pb, aux_data, 0);
89}
90
91/**
92 * Make AAC audio config object.
93 * @see 1.6.2.1 "Syntax - AudioSpecificConfig"
94 */
95static int put_audio_specific_config(AVCodecContext *avctx)
96{
97    PutBitContext pb;
98    AACEncContext *s = avctx->priv_data;
99    int channels = (!s->needs_pce)*(s->channels - (s->channels == 8 ? 1 : 0));
100    const int max_size = 32;
101
102    avctx->extradata = av_mallocz(max_size);
103    if (!avctx->extradata)
104        return AVERROR(ENOMEM);
105
106    init_put_bits(&pb, avctx->extradata, max_size);
107    put_bits(&pb, 5, s->profile+1); //profile
108    put_bits(&pb, 4, s->samplerate_index); //sample rate index
109    put_bits(&pb, 4, channels);
110    //GASpecificConfig
111    put_bits(&pb, 1, 0); //frame length - 1024 samples
112    put_bits(&pb, 1, 0); //does not depend on core coder
113    put_bits(&pb, 1, 0); //is not extension
114    if (s->needs_pce)
115        put_pce(&pb, avctx);
116
117    //Explicitly Mark SBR absent
118    put_bits(&pb, 11, 0x2b7); //sync extension
119    put_bits(&pb, 5,  AOT_SBR);
120    put_bits(&pb, 1,  0);
121    flush_put_bits(&pb);
122    avctx->extradata_size = put_bytes_output(&pb);
123
124    return 0;
125}
126
127void ff_quantize_band_cost_cache_init(struct AACEncContext *s)
128{
129    ++s->quantize_band_cost_cache_generation;
130    if (s->quantize_band_cost_cache_generation == 0) {
131        memset(s->quantize_band_cost_cache, 0, sizeof(s->quantize_band_cost_cache));
132        s->quantize_band_cost_cache_generation = 1;
133    }
134}
135
136#define WINDOW_FUNC(type) \
137static void apply_ ##type ##_window(AVFloatDSPContext *fdsp, \
138                                    SingleChannelElement *sce, \
139                                    const float *audio)
140
141WINDOW_FUNC(only_long)
142{
143    const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
144    const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
145    float *out = sce->ret_buf;
146
147    fdsp->vector_fmul        (out,        audio,        lwindow, 1024);
148    fdsp->vector_fmul_reverse(out + 1024, audio + 1024, pwindow, 1024);
149}
150
151WINDOW_FUNC(long_start)
152{
153    const float *lwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
154    const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
155    float *out = sce->ret_buf;
156
157    fdsp->vector_fmul(out, audio, lwindow, 1024);
158    memcpy(out + 1024, audio + 1024, sizeof(out[0]) * 448);
159    fdsp->vector_fmul_reverse(out + 1024 + 448, audio + 1024 + 448, swindow, 128);
160    memset(out + 1024 + 576, 0, sizeof(out[0]) * 448);
161}
162
163WINDOW_FUNC(long_stop)
164{
165    const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
166    const float *swindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
167    float *out = sce->ret_buf;
168
169    memset(out, 0, sizeof(out[0]) * 448);
170    fdsp->vector_fmul(out + 448, audio + 448, swindow, 128);
171    memcpy(out + 576, audio + 576, sizeof(out[0]) * 448);
172    fdsp->vector_fmul_reverse(out + 1024, audio + 1024, lwindow, 1024);
173}
174
175WINDOW_FUNC(eight_short)
176{
177    const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
178    const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
179    const float *in = audio + 448;
180    float *out = sce->ret_buf;
181    int w;
182
183    for (w = 0; w < 8; w++) {
184        fdsp->vector_fmul        (out, in, w ? pwindow : swindow, 128);
185        out += 128;
186        in  += 128;
187        fdsp->vector_fmul_reverse(out, in, swindow, 128);
188        out += 128;
189    }
190}
191
192static void (*const apply_window[4])(AVFloatDSPContext *fdsp,
193                                     SingleChannelElement *sce,
194                                     const float *audio) = {
195    [ONLY_LONG_SEQUENCE]   = apply_only_long_window,
196    [LONG_START_SEQUENCE]  = apply_long_start_window,
197    [EIGHT_SHORT_SEQUENCE] = apply_eight_short_window,
198    [LONG_STOP_SEQUENCE]   = apply_long_stop_window
199};
200
201static void apply_window_and_mdct(AACEncContext *s, SingleChannelElement *sce,
202                                  float *audio)
203{
204    int i;
205    const float *output = sce->ret_buf;
206
207    apply_window[sce->ics.window_sequence[0]](s->fdsp, sce, audio);
208
209    if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE)
210        s->mdct1024.mdct_calc(&s->mdct1024, sce->coeffs, output);
211    else
212        for (i = 0; i < 1024; i += 128)
213            s->mdct128.mdct_calc(&s->mdct128, &sce->coeffs[i], output + i*2);
214    memcpy(audio, audio + 1024, sizeof(audio[0]) * 1024);
215    memcpy(sce->pcoeffs, sce->coeffs, sizeof(sce->pcoeffs));
216}
217
218/**
219 * Encode ics_info element.
220 * @see Table 4.6 (syntax of ics_info)
221 */
222static void put_ics_info(AACEncContext *s, IndividualChannelStream *info)
223{
224    int w;
225
226    put_bits(&s->pb, 1, 0);                // ics_reserved bit
227    put_bits(&s->pb, 2, info->window_sequence[0]);
228    put_bits(&s->pb, 1, info->use_kb_window[0]);
229    if (info->window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
230        put_bits(&s->pb, 6, info->max_sfb);
231        put_bits(&s->pb, 1, !!info->predictor_present);
232    } else {
233        put_bits(&s->pb, 4, info->max_sfb);
234        for (w = 1; w < 8; w++)
235            put_bits(&s->pb, 1, !info->group_len[w]);
236    }
237}
238
239/**
240 * Encode MS data.
241 * @see 4.6.8.1 "Joint Coding - M/S Stereo"
242 */
243static void encode_ms_info(PutBitContext *pb, ChannelElement *cpe)
244{
245    int i, w;
246
247    put_bits(pb, 2, cpe->ms_mode);
248    if (cpe->ms_mode == 1)
249        for (w = 0; w < cpe->ch[0].ics.num_windows; w += cpe->ch[0].ics.group_len[w])
250            for (i = 0; i < cpe->ch[0].ics.max_sfb; i++)
251                put_bits(pb, 1, cpe->ms_mask[w*16 + i]);
252}
253
254/**
255 * Produce integer coefficients from scalefactors provided by the model.
256 */
257static void adjust_frame_information(ChannelElement *cpe, int chans)
258{
259    int i, w, w2, g, ch;
260    int maxsfb, cmaxsfb;
261
262    for (ch = 0; ch < chans; ch++) {
263        IndividualChannelStream *ics = &cpe->ch[ch].ics;
264        maxsfb = 0;
265        cpe->ch[ch].pulse.num_pulse = 0;
266        for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
267            for (w2 =  0; w2 < ics->group_len[w]; w2++) {
268                for (cmaxsfb = ics->num_swb; cmaxsfb > 0 && cpe->ch[ch].zeroes[w*16+cmaxsfb-1]; cmaxsfb--)
269                    ;
270                maxsfb = FFMAX(maxsfb, cmaxsfb);
271            }
272        }
273        ics->max_sfb = maxsfb;
274
275        //adjust zero bands for window groups
276        for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
277            for (g = 0; g < ics->max_sfb; g++) {
278                i = 1;
279                for (w2 = w; w2 < w + ics->group_len[w]; w2++) {
280                    if (!cpe->ch[ch].zeroes[w2*16 + g]) {
281                        i = 0;
282                        break;
283                    }
284                }
285                cpe->ch[ch].zeroes[w*16 + g] = i;
286            }
287        }
288    }
289
290    if (chans > 1 && cpe->common_window) {
291        IndividualChannelStream *ics0 = &cpe->ch[0].ics;
292        IndividualChannelStream *ics1 = &cpe->ch[1].ics;
293        int msc = 0;
294        ics0->max_sfb = FFMAX(ics0->max_sfb, ics1->max_sfb);
295        ics1->max_sfb = ics0->max_sfb;
296        for (w = 0; w < ics0->num_windows*16; w += 16)
297            for (i = 0; i < ics0->max_sfb; i++)
298                if (cpe->ms_mask[w+i])
299                    msc++;
300        if (msc == 0 || ics0->max_sfb == 0)
301            cpe->ms_mode = 0;
302        else
303            cpe->ms_mode = msc < ics0->max_sfb * ics0->num_windows ? 1 : 2;
304    }
305}
306
307static void apply_intensity_stereo(ChannelElement *cpe)
308{
309    int w, w2, g, i;
310    IndividualChannelStream *ics = &cpe->ch[0].ics;
311    if (!cpe->common_window)
312        return;
313    for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
314        for (w2 =  0; w2 < ics->group_len[w]; w2++) {
315            int start = (w+w2) * 128;
316            for (g = 0; g < ics->num_swb; g++) {
317                int p  = -1 + 2 * (cpe->ch[1].band_type[w*16+g] - 14);
318                float scale = cpe->ch[0].is_ener[w*16+g];
319                if (!cpe->is_mask[w*16 + g]) {
320                    start += ics->swb_sizes[g];
321                    continue;
322                }
323                if (cpe->ms_mask[w*16 + g])
324                    p *= -1;
325                for (i = 0; i < ics->swb_sizes[g]; i++) {
326                    float sum = (cpe->ch[0].coeffs[start+i] + p*cpe->ch[1].coeffs[start+i])*scale;
327                    cpe->ch[0].coeffs[start+i] = sum;
328                    cpe->ch[1].coeffs[start+i] = 0.0f;
329                }
330                start += ics->swb_sizes[g];
331            }
332        }
333    }
334}
335
336static void apply_mid_side_stereo(ChannelElement *cpe)
337{
338    int w, w2, g, i;
339    IndividualChannelStream *ics = &cpe->ch[0].ics;
340    if (!cpe->common_window)
341        return;
342    for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
343        for (w2 =  0; w2 < ics->group_len[w]; w2++) {
344            int start = (w+w2) * 128;
345            for (g = 0; g < ics->num_swb; g++) {
346                /* ms_mask can be used for other purposes in PNS and I/S,
347                 * so must not apply M/S if any band uses either, even if
348                 * ms_mask is set.
349                 */
350                if (!cpe->ms_mask[w*16 + g] || cpe->is_mask[w*16 + g]
351                    || cpe->ch[0].band_type[w*16 + g] >= NOISE_BT
352                    || cpe->ch[1].band_type[w*16 + g] >= NOISE_BT) {
353                    start += ics->swb_sizes[g];
354                    continue;
355                }
356                for (i = 0; i < ics->swb_sizes[g]; i++) {
357                    float L = (cpe->ch[0].coeffs[start+i] + cpe->ch[1].coeffs[start+i]) * 0.5f;
358                    float R = L - cpe->ch[1].coeffs[start+i];
359                    cpe->ch[0].coeffs[start+i] = L;
360                    cpe->ch[1].coeffs[start+i] = R;
361                }
362                start += ics->swb_sizes[g];
363            }
364        }
365    }
366}
367
368/**
369 * Encode scalefactor band coding type.
370 */
371static void encode_band_info(AACEncContext *s, SingleChannelElement *sce)
372{
373    int w;
374
375    if (s->coder->set_special_band_scalefactors)
376        s->coder->set_special_band_scalefactors(s, sce);
377
378    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
379        s->coder->encode_window_bands_info(s, sce, w, sce->ics.group_len[w], s->lambda);
380}
381
382/**
383 * Encode scalefactors.
384 */
385static void encode_scale_factors(AVCodecContext *avctx, AACEncContext *s,
386                                 SingleChannelElement *sce)
387{
388    int diff, off_sf = sce->sf_idx[0], off_pns = sce->sf_idx[0] - NOISE_OFFSET;
389    int off_is = 0, noise_flag = 1;
390    int i, w;
391
392    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
393        for (i = 0; i < sce->ics.max_sfb; i++) {
394            if (!sce->zeroes[w*16 + i]) {
395                if (sce->band_type[w*16 + i] == NOISE_BT) {
396                    diff = sce->sf_idx[w*16 + i] - off_pns;
397                    off_pns = sce->sf_idx[w*16 + i];
398                    if (noise_flag-- > 0) {
399                        put_bits(&s->pb, NOISE_PRE_BITS, diff + NOISE_PRE);
400                        continue;
401                    }
402                } else if (sce->band_type[w*16 + i] == INTENSITY_BT  ||
403                           sce->band_type[w*16 + i] == INTENSITY_BT2) {
404                    diff = sce->sf_idx[w*16 + i] - off_is;
405                    off_is = sce->sf_idx[w*16 + i];
406                } else {
407                    diff = sce->sf_idx[w*16 + i] - off_sf;
408                    off_sf = sce->sf_idx[w*16 + i];
409                }
410                diff += SCALE_DIFF_ZERO;
411                av_assert0(diff >= 0 && diff <= 120);
412                put_bits(&s->pb, ff_aac_scalefactor_bits[diff], ff_aac_scalefactor_code[diff]);
413            }
414        }
415    }
416}
417
418/**
419 * Encode pulse data.
420 */
421static void encode_pulses(AACEncContext *s, Pulse *pulse)
422{
423    int i;
424
425    put_bits(&s->pb, 1, !!pulse->num_pulse);
426    if (!pulse->num_pulse)
427        return;
428
429    put_bits(&s->pb, 2, pulse->num_pulse - 1);
430    put_bits(&s->pb, 6, pulse->start);
431    for (i = 0; i < pulse->num_pulse; i++) {
432        put_bits(&s->pb, 5, pulse->pos[i]);
433        put_bits(&s->pb, 4, pulse->amp[i]);
434    }
435}
436
437/**
438 * Encode spectral coefficients processed by psychoacoustic model.
439 */
440static void encode_spectral_coeffs(AACEncContext *s, SingleChannelElement *sce)
441{
442    int start, i, w, w2;
443
444    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
445        start = 0;
446        for (i = 0; i < sce->ics.max_sfb; i++) {
447            if (sce->zeroes[w*16 + i]) {
448                start += sce->ics.swb_sizes[i];
449                continue;
450            }
451            for (w2 = w; w2 < w + sce->ics.group_len[w]; w2++) {
452                s->coder->quantize_and_encode_band(s, &s->pb,
453                                                   &sce->coeffs[start + w2*128],
454                                                   NULL, sce->ics.swb_sizes[i],
455                                                   sce->sf_idx[w*16 + i],
456                                                   sce->band_type[w*16 + i],
457                                                   s->lambda,
458                                                   sce->ics.window_clipping[w]);
459            }
460            start += sce->ics.swb_sizes[i];
461        }
462    }
463}
464
465/**
466 * Downscale spectral coefficients for near-clipping windows to avoid artifacts
467 */
468static void avoid_clipping(AACEncContext *s, SingleChannelElement *sce)
469{
470    int start, i, j, w;
471
472    if (sce->ics.clip_avoidance_factor < 1.0f) {
473        for (w = 0; w < sce->ics.num_windows; w++) {
474            start = 0;
475            for (i = 0; i < sce->ics.max_sfb; i++) {
476                float *swb_coeffs = &sce->coeffs[start + w*128];
477                for (j = 0; j < sce->ics.swb_sizes[i]; j++)
478                    swb_coeffs[j] *= sce->ics.clip_avoidance_factor;
479                start += sce->ics.swb_sizes[i];
480            }
481        }
482    }
483}
484
485/**
486 * Encode one channel of audio data.
487 */
488static int encode_individual_channel(AVCodecContext *avctx, AACEncContext *s,
489                                     SingleChannelElement *sce,
490                                     int common_window)
491{
492    put_bits(&s->pb, 8, sce->sf_idx[0]);
493    if (!common_window) {
494        put_ics_info(s, &sce->ics);
495        if (s->coder->encode_main_pred)
496            s->coder->encode_main_pred(s, sce);
497        if (s->coder->encode_ltp_info)
498            s->coder->encode_ltp_info(s, sce, 0);
499    }
500    encode_band_info(s, sce);
501    encode_scale_factors(avctx, s, sce);
502    encode_pulses(s, &sce->pulse);
503    put_bits(&s->pb, 1, !!sce->tns.present);
504    if (s->coder->encode_tns_info)
505        s->coder->encode_tns_info(s, sce);
506    put_bits(&s->pb, 1, 0); //ssr
507    encode_spectral_coeffs(s, sce);
508    return 0;
509}
510
511/**
512 * Write some auxiliary information about the created AAC file.
513 */
514static void put_bitstream_info(AACEncContext *s, const char *name)
515{
516    int i, namelen, padbits;
517
518    namelen = strlen(name) + 2;
519    put_bits(&s->pb, 3, TYPE_FIL);
520    put_bits(&s->pb, 4, FFMIN(namelen, 15));
521    if (namelen >= 15)
522        put_bits(&s->pb, 8, namelen - 14);
523    put_bits(&s->pb, 4, 0); //extension type - filler
524    padbits = -put_bits_count(&s->pb) & 7;
525    align_put_bits(&s->pb);
526    for (i = 0; i < namelen - 2; i++)
527        put_bits(&s->pb, 8, name[i]);
528    put_bits(&s->pb, 12 - padbits, 0);
529}
530
531/*
532 * Copy input samples.
533 * Channels are reordered from libavcodec's default order to AAC order.
534 */
535static void copy_input_samples(AACEncContext *s, const AVFrame *frame)
536{
537    int ch;
538    int end = 2048 + (frame ? frame->nb_samples : 0);
539    const uint8_t *channel_map = s->reorder_map;
540
541    /* copy and remap input samples */
542    for (ch = 0; ch < s->channels; ch++) {
543        /* copy last 1024 samples of previous frame to the start of the current frame */
544        memcpy(&s->planar_samples[ch][1024], &s->planar_samples[ch][2048], 1024 * sizeof(s->planar_samples[0][0]));
545
546        /* copy new samples and zero any remaining samples */
547        if (frame) {
548            memcpy(&s->planar_samples[ch][2048],
549                   frame->extended_data[channel_map[ch]],
550                   frame->nb_samples * sizeof(s->planar_samples[0][0]));
551        }
552        memset(&s->planar_samples[ch][end], 0,
553               (3072 - end) * sizeof(s->planar_samples[0][0]));
554    }
555}
556
557static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
558                            const AVFrame *frame, int *got_packet_ptr)
559{
560    AACEncContext *s = avctx->priv_data;
561    float **samples = s->planar_samples, *samples2, *la, *overlap;
562    ChannelElement *cpe;
563    SingleChannelElement *sce;
564    IndividualChannelStream *ics;
565    int i, its, ch, w, chans, tag, start_ch, ret, frame_bits;
566    int target_bits, rate_bits, too_many_bits, too_few_bits;
567    int ms_mode = 0, is_mode = 0, tns_mode = 0, pred_mode = 0;
568    int chan_el_counter[4];
569    FFPsyWindowInfo windows[AAC_MAX_CHANNELS];
570
571    /* add current frame to queue */
572    if (frame) {
573        if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
574            return ret;
575    } else {
576        if (!s->afq.remaining_samples || (!s->afq.frame_alloc && !s->afq.frame_count))
577            return 0;
578    }
579
580    copy_input_samples(s, frame);
581    if (s->psypp)
582        ff_psy_preprocess(s->psypp, s->planar_samples, s->channels);
583
584    if (!avctx->frame_number)
585        return 0;
586
587    start_ch = 0;
588    for (i = 0; i < s->chan_map[0]; i++) {
589        FFPsyWindowInfo* wi = windows + start_ch;
590        tag      = s->chan_map[i+1];
591        chans    = tag == TYPE_CPE ? 2 : 1;
592        cpe      = &s->cpe[i];
593        for (ch = 0; ch < chans; ch++) {
594            int k;
595            float clip_avoidance_factor;
596            sce = &cpe->ch[ch];
597            ics = &sce->ics;
598            s->cur_channel = start_ch + ch;
599            overlap  = &samples[s->cur_channel][0];
600            samples2 = overlap + 1024;
601            la       = samples2 + (448+64);
602            if (!frame)
603                la = NULL;
604            if (tag == TYPE_LFE) {
605                wi[ch].window_type[0] = wi[ch].window_type[1] = ONLY_LONG_SEQUENCE;
606                wi[ch].window_shape   = 0;
607                wi[ch].num_windows    = 1;
608                wi[ch].grouping[0]    = 1;
609                wi[ch].clipping[0]    = 0;
610
611                /* Only the lowest 12 coefficients are used in a LFE channel.
612                 * The expression below results in only the bottom 8 coefficients
613                 * being used for 11.025kHz to 16kHz sample rates.
614                 */
615                ics->num_swb = s->samplerate_index >= 8 ? 1 : 3;
616            } else {
617                wi[ch] = s->psy.model->window(&s->psy, samples2, la, s->cur_channel,
618                                              ics->window_sequence[0]);
619            }
620            ics->window_sequence[1] = ics->window_sequence[0];
621            ics->window_sequence[0] = wi[ch].window_type[0];
622            ics->use_kb_window[1]   = ics->use_kb_window[0];
623            ics->use_kb_window[0]   = wi[ch].window_shape;
624            ics->num_windows        = wi[ch].num_windows;
625            ics->swb_sizes          = s->psy.bands    [ics->num_windows == 8];
626            ics->num_swb            = tag == TYPE_LFE ? ics->num_swb : s->psy.num_bands[ics->num_windows == 8];
627            ics->max_sfb            = FFMIN(ics->max_sfb, ics->num_swb);
628            ics->swb_offset         = wi[ch].window_type[0] == EIGHT_SHORT_SEQUENCE ?
629                                        ff_swb_offset_128 [s->samplerate_index]:
630                                        ff_swb_offset_1024[s->samplerate_index];
631            ics->tns_max_bands      = wi[ch].window_type[0] == EIGHT_SHORT_SEQUENCE ?
632                                        ff_tns_max_bands_128 [s->samplerate_index]:
633                                        ff_tns_max_bands_1024[s->samplerate_index];
634
635            for (w = 0; w < ics->num_windows; w++)
636                ics->group_len[w] = wi[ch].grouping[w];
637
638            /* Calculate input sample maximums and evaluate clipping risk */
639            clip_avoidance_factor = 0.0f;
640            for (w = 0; w < ics->num_windows; w++) {
641                const float *wbuf = overlap + w * 128;
642                const int wlen = 2048 / ics->num_windows;
643                float max = 0;
644                int j;
645                /* mdct input is 2 * output */
646                for (j = 0; j < wlen; j++)
647                    max = FFMAX(max, fabsf(wbuf[j]));
648                wi[ch].clipping[w] = max;
649            }
650            for (w = 0; w < ics->num_windows; w++) {
651                if (wi[ch].clipping[w] > CLIP_AVOIDANCE_FACTOR) {
652                    ics->window_clipping[w] = 1;
653                    clip_avoidance_factor = FFMAX(clip_avoidance_factor, wi[ch].clipping[w]);
654                } else {
655                    ics->window_clipping[w] = 0;
656                }
657            }
658            if (clip_avoidance_factor > CLIP_AVOIDANCE_FACTOR) {
659                ics->clip_avoidance_factor = CLIP_AVOIDANCE_FACTOR / clip_avoidance_factor;
660            } else {
661                ics->clip_avoidance_factor = 1.0f;
662            }
663
664            apply_window_and_mdct(s, sce, overlap);
665
666            if (s->options.ltp && s->coder->update_ltp) {
667                s->coder->update_ltp(s, sce);
668                apply_window[sce->ics.window_sequence[0]](s->fdsp, sce, &sce->ltp_state[0]);
669                s->mdct1024.mdct_calc(&s->mdct1024, sce->lcoeffs, sce->ret_buf);
670            }
671
672            for (k = 0; k < 1024; k++) {
673                if (!(fabs(cpe->ch[ch].coeffs[k]) < 1E16)) { // Ensure headroom for energy calculation
674                    av_log(avctx, AV_LOG_ERROR, "Input contains (near) NaN/+-Inf\n");
675                    return AVERROR(EINVAL);
676                }
677            }
678            avoid_clipping(s, sce);
679        }
680        start_ch += chans;
681    }
682    if ((ret = ff_alloc_packet(avctx, avpkt, 8192 * s->channels)) < 0)
683        return ret;
684    frame_bits = its = 0;
685    do {
686        init_put_bits(&s->pb, avpkt->data, avpkt->size);
687
688        if ((avctx->frame_number & 0xFF)==1 && !(avctx->flags & AV_CODEC_FLAG_BITEXACT))
689            put_bitstream_info(s, LIBAVCODEC_IDENT);
690        start_ch = 0;
691        target_bits = 0;
692        memset(chan_el_counter, 0, sizeof(chan_el_counter));
693        for (i = 0; i < s->chan_map[0]; i++) {
694            FFPsyWindowInfo* wi = windows + start_ch;
695            const float *coeffs[2];
696            tag      = s->chan_map[i+1];
697            chans    = tag == TYPE_CPE ? 2 : 1;
698            cpe      = &s->cpe[i];
699            cpe->common_window = 0;
700            memset(cpe->is_mask, 0, sizeof(cpe->is_mask));
701            memset(cpe->ms_mask, 0, sizeof(cpe->ms_mask));
702            put_bits(&s->pb, 3, tag);
703            put_bits(&s->pb, 4, chan_el_counter[tag]++);
704            for (ch = 0; ch < chans; ch++) {
705                sce = &cpe->ch[ch];
706                coeffs[ch] = sce->coeffs;
707                sce->ics.predictor_present = 0;
708                sce->ics.ltp.present = 0;
709                memset(sce->ics.ltp.used, 0, sizeof(sce->ics.ltp.used));
710                memset(sce->ics.prediction_used, 0, sizeof(sce->ics.prediction_used));
711                memset(&sce->tns, 0, sizeof(TemporalNoiseShaping));
712                for (w = 0; w < 128; w++)
713                    if (sce->band_type[w] > RESERVED_BT)
714                        sce->band_type[w] = 0;
715            }
716            s->psy.bitres.alloc = -1;
717            s->psy.bitres.bits = s->last_frame_pb_count / s->channels;
718            s->psy.model->analyze(&s->psy, start_ch, coeffs, wi);
719            if (s->psy.bitres.alloc > 0) {
720                /* Lambda unused here on purpose, we need to take psy's unscaled allocation */
721                target_bits += s->psy.bitres.alloc
722                    * (s->lambda / (avctx->global_quality ? avctx->global_quality : 120));
723                s->psy.bitres.alloc /= chans;
724            }
725            s->cur_type = tag;
726            for (ch = 0; ch < chans; ch++) {
727                s->cur_channel = start_ch + ch;
728                if (s->options.pns && s->coder->mark_pns)
729                    s->coder->mark_pns(s, avctx, &cpe->ch[ch]);
730                s->coder->search_for_quantizers(avctx, s, &cpe->ch[ch], s->lambda);
731            }
732            if (chans > 1
733                && wi[0].window_type[0] == wi[1].window_type[0]
734                && wi[0].window_shape   == wi[1].window_shape) {
735
736                cpe->common_window = 1;
737                for (w = 0; w < wi[0].num_windows; w++) {
738                    if (wi[0].grouping[w] != wi[1].grouping[w]) {
739                        cpe->common_window = 0;
740                        break;
741                    }
742                }
743            }
744            for (ch = 0; ch < chans; ch++) { /* TNS and PNS */
745                sce = &cpe->ch[ch];
746                s->cur_channel = start_ch + ch;
747                if (s->options.tns && s->coder->search_for_tns)
748                    s->coder->search_for_tns(s, sce);
749                if (s->options.tns && s->coder->apply_tns_filt)
750                    s->coder->apply_tns_filt(s, sce);
751                if (sce->tns.present)
752                    tns_mode = 1;
753                if (s->options.pns && s->coder->search_for_pns)
754                    s->coder->search_for_pns(s, avctx, sce);
755            }
756            s->cur_channel = start_ch;
757            if (s->options.intensity_stereo) { /* Intensity Stereo */
758                if (s->coder->search_for_is)
759                    s->coder->search_for_is(s, avctx, cpe);
760                if (cpe->is_mode) is_mode = 1;
761                apply_intensity_stereo(cpe);
762            }
763            if (s->options.pred) { /* Prediction */
764                for (ch = 0; ch < chans; ch++) {
765                    sce = &cpe->ch[ch];
766                    s->cur_channel = start_ch + ch;
767                    if (s->options.pred && s->coder->search_for_pred)
768                        s->coder->search_for_pred(s, sce);
769                    if (cpe->ch[ch].ics.predictor_present) pred_mode = 1;
770                }
771                if (s->coder->adjust_common_pred)
772                    s->coder->adjust_common_pred(s, cpe);
773                for (ch = 0; ch < chans; ch++) {
774                    sce = &cpe->ch[ch];
775                    s->cur_channel = start_ch + ch;
776                    if (s->options.pred && s->coder->apply_main_pred)
777                        s->coder->apply_main_pred(s, sce);
778                }
779                s->cur_channel = start_ch;
780            }
781            if (s->options.mid_side) { /* Mid/Side stereo */
782                if (s->options.mid_side == -1 && s->coder->search_for_ms)
783                    s->coder->search_for_ms(s, cpe);
784                else if (cpe->common_window)
785                    memset(cpe->ms_mask, 1, sizeof(cpe->ms_mask));
786                apply_mid_side_stereo(cpe);
787            }
788            adjust_frame_information(cpe, chans);
789            if (s->options.ltp) { /* LTP */
790                for (ch = 0; ch < chans; ch++) {
791                    sce = &cpe->ch[ch];
792                    s->cur_channel = start_ch + ch;
793                    if (s->coder->search_for_ltp)
794                        s->coder->search_for_ltp(s, sce, cpe->common_window);
795                    if (sce->ics.ltp.present) pred_mode = 1;
796                }
797                s->cur_channel = start_ch;
798                if (s->coder->adjust_common_ltp)
799                    s->coder->adjust_common_ltp(s, cpe);
800            }
801            if (chans == 2) {
802                put_bits(&s->pb, 1, cpe->common_window);
803                if (cpe->common_window) {
804                    put_ics_info(s, &cpe->ch[0].ics);
805                    if (s->coder->encode_main_pred)
806                        s->coder->encode_main_pred(s, &cpe->ch[0]);
807                    if (s->coder->encode_ltp_info)
808                        s->coder->encode_ltp_info(s, &cpe->ch[0], 1);
809                    encode_ms_info(&s->pb, cpe);
810                    if (cpe->ms_mode) ms_mode = 1;
811                }
812            }
813            for (ch = 0; ch < chans; ch++) {
814                s->cur_channel = start_ch + ch;
815                encode_individual_channel(avctx, s, &cpe->ch[ch], cpe->common_window);
816            }
817            start_ch += chans;
818        }
819
820        if (avctx->flags & AV_CODEC_FLAG_QSCALE) {
821            /* When using a constant Q-scale, don't mess with lambda */
822            break;
823        }
824
825        /* rate control stuff
826         * allow between the nominal bitrate, and what psy's bit reservoir says to target
827         * but drift towards the nominal bitrate always
828         */
829        frame_bits = put_bits_count(&s->pb);
830        rate_bits = avctx->bit_rate * 1024 / avctx->sample_rate;
831        rate_bits = FFMIN(rate_bits, 6144 * s->channels - 3);
832        too_many_bits = FFMAX(target_bits, rate_bits);
833        too_many_bits = FFMIN(too_many_bits, 6144 * s->channels - 3);
834        too_few_bits = FFMIN(FFMAX(rate_bits - rate_bits/4, target_bits), too_many_bits);
835
836        /* When using ABR, be strict (but only for increasing) */
837        too_few_bits = too_few_bits - too_few_bits/8;
838        too_many_bits = too_many_bits + too_many_bits/2;
839
840        if (   its == 0 /* for steady-state Q-scale tracking */
841            || (its < 5 && (frame_bits < too_few_bits || frame_bits > too_many_bits))
842            || frame_bits >= 6144 * s->channels - 3  )
843        {
844            float ratio = ((float)rate_bits) / frame_bits;
845
846            if (frame_bits >= too_few_bits && frame_bits <= too_many_bits) {
847                /*
848                 * This path is for steady-state Q-scale tracking
849                 * When frame bits fall within the stable range, we still need to adjust
850                 * lambda to maintain it like so in a stable fashion (large jumps in lambda
851                 * create artifacts and should be avoided), but slowly
852                 */
853                ratio = sqrtf(sqrtf(ratio));
854                ratio = av_clipf(ratio, 0.9f, 1.1f);
855            } else {
856                /* Not so fast though */
857                ratio = sqrtf(ratio);
858            }
859            s->lambda = av_clipf(s->lambda * ratio, FLT_EPSILON, 65536.f);
860
861            /* Keep iterating if we must reduce and lambda is in the sky */
862            if (ratio > 0.9f && ratio < 1.1f) {
863                break;
864            } else {
865                if (is_mode || ms_mode || tns_mode || pred_mode) {
866                    for (i = 0; i < s->chan_map[0]; i++) {
867                        // Must restore coeffs
868                        chans = tag == TYPE_CPE ? 2 : 1;
869                        cpe = &s->cpe[i];
870                        for (ch = 0; ch < chans; ch++)
871                            memcpy(cpe->ch[ch].coeffs, cpe->ch[ch].pcoeffs, sizeof(cpe->ch[ch].coeffs));
872                    }
873                }
874                its++;
875            }
876        } else {
877            break;
878        }
879    } while (1);
880
881    if (s->options.ltp && s->coder->ltp_insert_new_frame)
882        s->coder->ltp_insert_new_frame(s);
883
884    put_bits(&s->pb, 3, TYPE_END);
885    flush_put_bits(&s->pb);
886
887    s->last_frame_pb_count = put_bits_count(&s->pb);
888    avpkt->size            = put_bytes_output(&s->pb);
889
890    s->lambda_sum += s->lambda;
891    s->lambda_count++;
892
893    ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
894                       &avpkt->duration);
895
896    *got_packet_ptr = 1;
897    return 0;
898}
899
900static av_cold int aac_encode_end(AVCodecContext *avctx)
901{
902    AACEncContext *s = avctx->priv_data;
903
904    av_log(avctx, AV_LOG_INFO, "Qavg: %.3f\n", s->lambda_count ? s->lambda_sum / s->lambda_count : NAN);
905
906    ff_mdct_end(&s->mdct1024);
907    ff_mdct_end(&s->mdct128);
908    ff_psy_end(&s->psy);
909    ff_lpc_end(&s->lpc);
910    if (s->psypp)
911        ff_psy_preprocess_end(s->psypp);
912    av_freep(&s->buffer.samples);
913    av_freep(&s->cpe);
914    av_freep(&s->fdsp);
915    ff_af_queue_close(&s->afq);
916    return 0;
917}
918
919static av_cold int dsp_init(AVCodecContext *avctx, AACEncContext *s)
920{
921    int ret = 0;
922
923    s->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
924    if (!s->fdsp)
925        return AVERROR(ENOMEM);
926
927    // window init
928    ff_aac_float_common_init();
929
930    if ((ret = ff_mdct_init(&s->mdct1024, 11, 0, 32768.0)) < 0)
931        return ret;
932    if ((ret = ff_mdct_init(&s->mdct128,   8, 0, 32768.0)) < 0)
933        return ret;
934
935    return 0;
936}
937
938static av_cold int alloc_buffers(AVCodecContext *avctx, AACEncContext *s)
939{
940    int ch;
941    if (!FF_ALLOCZ_TYPED_ARRAY(s->buffer.samples, s->channels * 3 * 1024) ||
942        !FF_ALLOCZ_TYPED_ARRAY(s->cpe,            s->chan_map[0]))
943        return AVERROR(ENOMEM);
944
945    for(ch = 0; ch < s->channels; ch++)
946        s->planar_samples[ch] = s->buffer.samples + 3 * 1024 * ch;
947
948    return 0;
949}
950
951static av_cold int aac_encode_init(AVCodecContext *avctx)
952{
953    AACEncContext *s = avctx->priv_data;
954    int i, ret = 0;
955    const uint8_t *sizes[2];
956    uint8_t grouping[AAC_MAX_CHANNELS];
957    int lengths[2];
958
959    /* Constants */
960    s->last_frame_pb_count = 0;
961    avctx->frame_size = 1024;
962    avctx->initial_padding = 1024;
963    s->lambda = avctx->global_quality > 0 ? avctx->global_quality : 120;
964
965    /* Channel map and unspecified bitrate guessing */
966    s->channels = avctx->ch_layout.nb_channels;
967
968    s->needs_pce = 1;
969    for (i = 0; i < FF_ARRAY_ELEMS(aac_normal_chan_layouts); i++) {
970        if (!av_channel_layout_compare(&avctx->ch_layout, &aac_normal_chan_layouts[i])) {
971            s->needs_pce = s->options.pce;
972            break;
973        }
974    }
975
976    if (s->needs_pce) {
977        char buf[64];
978        for (i = 0; i < FF_ARRAY_ELEMS(aac_pce_configs); i++)
979            if (!av_channel_layout_compare(&avctx->ch_layout, &aac_pce_configs[i].layout))
980                break;
981        av_channel_layout_describe(&avctx->ch_layout, buf, sizeof(buf));
982        if (i == FF_ARRAY_ELEMS(aac_pce_configs)) {
983            av_log(avctx, AV_LOG_ERROR, "Unsupported channel layout \"%s\"\n", buf);
984            return AVERROR(EINVAL);
985        }
986        av_log(avctx, AV_LOG_INFO, "Using a PCE to encode channel layout \"%s\"\n", buf);
987        s->pce = aac_pce_configs[i];
988        s->reorder_map = s->pce.reorder_map;
989        s->chan_map = s->pce.config_map;
990    } else {
991        s->reorder_map = aac_chan_maps[s->channels - 1];
992        s->chan_map = aac_chan_configs[s->channels - 1];
993    }
994
995    if (!avctx->bit_rate) {
996        for (i = 1; i <= s->chan_map[0]; i++) {
997            avctx->bit_rate += s->chan_map[i] == TYPE_CPE ? 128000 : /* Pair */
998                               s->chan_map[i] == TYPE_LFE ? 16000  : /* LFE  */
999                                                            69000  ; /* SCE  */
1000        }
1001    }
1002
1003    /* Samplerate */
1004    for (i = 0; i < 16; i++)
1005        if (avctx->sample_rate == ff_mpeg4audio_sample_rates[i])
1006            break;
1007    s->samplerate_index = i;
1008    ERROR_IF(s->samplerate_index == 16 ||
1009             s->samplerate_index >= ff_aac_swb_size_1024_len ||
1010             s->samplerate_index >= ff_aac_swb_size_128_len,
1011             "Unsupported sample rate %d\n", avctx->sample_rate);
1012
1013    /* Bitrate limiting */
1014    WARN_IF(1024.0 * avctx->bit_rate / avctx->sample_rate > 6144 * s->channels,
1015             "Too many bits %f > %d per frame requested, clamping to max\n",
1016             1024.0 * avctx->bit_rate / avctx->sample_rate,
1017             6144 * s->channels);
1018    avctx->bit_rate = (int64_t)FFMIN(6144 * s->channels / 1024.0 * avctx->sample_rate,
1019                                     avctx->bit_rate);
1020
1021    /* Profile and option setting */
1022    avctx->profile = avctx->profile == FF_PROFILE_UNKNOWN ? FF_PROFILE_AAC_LOW :
1023                     avctx->profile;
1024    for (i = 0; i < FF_ARRAY_ELEMS(aacenc_profiles); i++)
1025        if (avctx->profile == aacenc_profiles[i])
1026            break;
1027    if (avctx->profile == FF_PROFILE_MPEG2_AAC_LOW) {
1028        avctx->profile = FF_PROFILE_AAC_LOW;
1029        ERROR_IF(s->options.pred,
1030                 "Main prediction unavailable in the \"mpeg2_aac_low\" profile\n");
1031        ERROR_IF(s->options.ltp,
1032                 "LTP prediction unavailable in the \"mpeg2_aac_low\" profile\n");
1033        WARN_IF(s->options.pns,
1034                "PNS unavailable in the \"mpeg2_aac_low\" profile, turning off\n");
1035        s->options.pns = 0;
1036    } else if (avctx->profile == FF_PROFILE_AAC_LTP) {
1037        s->options.ltp = 1;
1038        ERROR_IF(s->options.pred,
1039                 "Main prediction unavailable in the \"aac_ltp\" profile\n");
1040    } else if (avctx->profile == FF_PROFILE_AAC_MAIN) {
1041        s->options.pred = 1;
1042        ERROR_IF(s->options.ltp,
1043                 "LTP prediction unavailable in the \"aac_main\" profile\n");
1044    } else if (s->options.ltp) {
1045        avctx->profile = FF_PROFILE_AAC_LTP;
1046        WARN_IF(1,
1047                "Chainging profile to \"aac_ltp\"\n");
1048        ERROR_IF(s->options.pred,
1049                 "Main prediction unavailable in the \"aac_ltp\" profile\n");
1050    } else if (s->options.pred) {
1051        avctx->profile = FF_PROFILE_AAC_MAIN;
1052        WARN_IF(1,
1053                "Chainging profile to \"aac_main\"\n");
1054        ERROR_IF(s->options.ltp,
1055                 "LTP prediction unavailable in the \"aac_main\" profile\n");
1056    }
1057    s->profile = avctx->profile;
1058
1059    /* Coder limitations */
1060    s->coder = &ff_aac_coders[s->options.coder];
1061    if (s->options.coder == AAC_CODER_ANMR) {
1062        ERROR_IF(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL,
1063                 "The ANMR coder is considered experimental, add -strict -2 to enable!\n");
1064        s->options.intensity_stereo = 0;
1065        s->options.pns = 0;
1066    }
1067    ERROR_IF(s->options.ltp && avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL,
1068             "The LPT profile requires experimental compliance, add -strict -2 to enable!\n");
1069
1070    /* M/S introduces horrible artifacts with multichannel files, this is temporary */
1071    if (s->channels > 3)
1072        s->options.mid_side = 0;
1073
1074    if ((ret = dsp_init(avctx, s)) < 0)
1075        return ret;
1076
1077    if ((ret = alloc_buffers(avctx, s)) < 0)
1078        return ret;
1079
1080    if ((ret = put_audio_specific_config(avctx)))
1081        return ret;
1082
1083    sizes[0]   = ff_aac_swb_size_1024[s->samplerate_index];
1084    sizes[1]   = ff_aac_swb_size_128[s->samplerate_index];
1085    lengths[0] = ff_aac_num_swb_1024[s->samplerate_index];
1086    lengths[1] = ff_aac_num_swb_128[s->samplerate_index];
1087    for (i = 0; i < s->chan_map[0]; i++)
1088        grouping[i] = s->chan_map[i + 1] == TYPE_CPE;
1089    if ((ret = ff_psy_init(&s->psy, avctx, 2, sizes, lengths,
1090                           s->chan_map[0], grouping)) < 0)
1091        return ret;
1092    s->psypp = ff_psy_preprocess_init(avctx);
1093    ff_lpc_init(&s->lpc, 2*avctx->frame_size, TNS_MAX_ORDER, FF_LPC_TYPE_LEVINSON);
1094    s->random_state = 0x1f2e3d4c;
1095
1096    s->abs_pow34   = abs_pow34_v;
1097    s->quant_bands = quantize_bands;
1098
1099#if ARCH_X86
1100    ff_aac_dsp_init_x86(s);
1101#endif
1102
1103#if HAVE_MIPSDSP
1104    ff_aac_coder_init_mips(s);
1105#endif
1106
1107    ff_af_queue_init(avctx, &s->afq);
1108    ff_aac_tableinit();
1109
1110    return 0;
1111}
1112
1113#define AACENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
1114static const AVOption aacenc_options[] = {
1115    {"aac_coder", "Coding algorithm", offsetof(AACEncContext, options.coder), AV_OPT_TYPE_INT, {.i64 = AAC_CODER_FAST}, 0, AAC_CODER_NB-1, AACENC_FLAGS, "coder"},
1116        {"anmr",     "ANMR method",               0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_ANMR},    INT_MIN, INT_MAX, AACENC_FLAGS, "coder"},
1117        {"twoloop",  "Two loop searching method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_TWOLOOP}, INT_MIN, INT_MAX, AACENC_FLAGS, "coder"},
1118        {"fast",     "Default fast search",       0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAST},    INT_MIN, INT_MAX, AACENC_FLAGS, "coder"},
1119    {"aac_ms", "Force M/S stereo coding", offsetof(AACEncContext, options.mid_side), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, AACENC_FLAGS},
1120    {"aac_is", "Intensity stereo coding", offsetof(AACEncContext, options.intensity_stereo), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, AACENC_FLAGS},
1121    {"aac_pns", "Perceptual noise substitution", offsetof(AACEncContext, options.pns), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, AACENC_FLAGS},
1122    {"aac_tns", "Temporal noise shaping", offsetof(AACEncContext, options.tns), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, AACENC_FLAGS},
1123    {"aac_ltp", "Long term prediction", offsetof(AACEncContext, options.ltp), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, AACENC_FLAGS},
1124    {"aac_pred", "AAC-Main prediction", offsetof(AACEncContext, options.pred), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, AACENC_FLAGS},
1125    {"aac_pce", "Forces the use of PCEs", offsetof(AACEncContext, options.pce), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, AACENC_FLAGS},
1126    FF_AAC_PROFILE_OPTS
1127    {NULL}
1128};
1129
1130static const AVClass aacenc_class = {
1131    .class_name = "AAC encoder",
1132    .item_name  = av_default_item_name,
1133    .option     = aacenc_options,
1134    .version    = LIBAVUTIL_VERSION_INT,
1135};
1136
1137static const FFCodecDefault aac_encode_defaults[] = {
1138    { "b", "0" },
1139    { NULL }
1140};
1141
1142const FFCodec ff_aac_encoder = {
1143    .p.name         = "aac",
1144    .p.long_name    = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
1145    .p.type         = AVMEDIA_TYPE_AUDIO,
1146    .p.id           = AV_CODEC_ID_AAC,
1147    .priv_data_size = sizeof(AACEncContext),
1148    .init           = aac_encode_init,
1149    FF_CODEC_ENCODE_CB(aac_encode_frame),
1150    .close          = aac_encode_end,
1151    .defaults       = aac_encode_defaults,
1152    .p.supported_samplerates = ff_mpeg4audio_sample_rates,
1153    .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
1154    .p.capabilities = AV_CODEC_CAP_SMALL_LAST_FRAME | AV_CODEC_CAP_DELAY,
1155    .p.sample_fmts  = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLTP,
1156                                                     AV_SAMPLE_FMT_NONE },
1157    .p.priv_class   = &aacenc_class,
1158};
1159