xref: /third_party/ffmpeg/libavcodec/aacenc.h (revision cabdff1a)
1/*
2 * AAC encoder
3 * Copyright (C) 2008 Konstantin Shishkov
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22#ifndef AVCODEC_AACENC_H
23#define AVCODEC_AACENC_H
24
25#include "libavutil/channel_layout.h"
26#include "libavutil/float_dsp.h"
27#include "libavutil/mem_internal.h"
28
29#include "avcodec.h"
30#include "put_bits.h"
31
32#include "aac.h"
33#include "audio_frame_queue.h"
34#include "psymodel.h"
35
36#include "lpc.h"
37
38typedef enum AACCoder {
39    AAC_CODER_ANMR = 0,
40    AAC_CODER_TWOLOOP,
41    AAC_CODER_FAST,
42
43    AAC_CODER_NB,
44}AACCoder;
45
46typedef struct AACEncOptions {
47    int coder;
48    int pns;
49    int tns;
50    int ltp;
51    int pce;
52    int pred;
53    int mid_side;
54    int intensity_stereo;
55} AACEncOptions;
56
57struct AACEncContext;
58
59typedef struct AACCoefficientsEncoder {
60    void (*search_for_quantizers)(AVCodecContext *avctx, struct AACEncContext *s,
61                                  SingleChannelElement *sce, const float lambda);
62    void (*encode_window_bands_info)(struct AACEncContext *s, SingleChannelElement *sce,
63                                     int win, int group_len, const float lambda);
64    void (*quantize_and_encode_band)(struct AACEncContext *s, PutBitContext *pb, const float *in, float *out, int size,
65                                     int scale_idx, int cb, const float lambda, int rtz);
66    void (*encode_tns_info)(struct AACEncContext *s, SingleChannelElement *sce);
67    void (*encode_ltp_info)(struct AACEncContext *s, SingleChannelElement *sce, int common_window);
68    void (*encode_main_pred)(struct AACEncContext *s, SingleChannelElement *sce);
69    void (*adjust_common_pred)(struct AACEncContext *s, ChannelElement *cpe);
70    void (*adjust_common_ltp)(struct AACEncContext *s, ChannelElement *cpe);
71    void (*apply_main_pred)(struct AACEncContext *s, SingleChannelElement *sce);
72    void (*apply_tns_filt)(struct AACEncContext *s, SingleChannelElement *sce);
73    void (*update_ltp)(struct AACEncContext *s, SingleChannelElement *sce);
74    void (*ltp_insert_new_frame)(struct AACEncContext *s);
75    void (*set_special_band_scalefactors)(struct AACEncContext *s, SingleChannelElement *sce);
76    void (*search_for_pns)(struct AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce);
77    void (*mark_pns)(struct AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce);
78    void (*search_for_tns)(struct AACEncContext *s, SingleChannelElement *sce);
79    void (*search_for_ltp)(struct AACEncContext *s, SingleChannelElement *sce, int common_window);
80    void (*search_for_ms)(struct AACEncContext *s, ChannelElement *cpe);
81    void (*search_for_is)(struct AACEncContext *s, AVCodecContext *avctx, ChannelElement *cpe);
82    void (*search_for_pred)(struct AACEncContext *s, SingleChannelElement *sce);
83} AACCoefficientsEncoder;
84
85extern const AACCoefficientsEncoder ff_aac_coders[];
86
87typedef struct AACQuantizeBandCostCacheEntry {
88    float rd;
89    float energy;
90    int bits;
91    char cb;
92    char rtz;
93    uint16_t generation;
94} AACQuantizeBandCostCacheEntry;
95
96typedef struct AACPCEInfo {
97    AVChannelLayout layout;
98    int num_ele[4];                              ///< front, side, back, lfe
99    int pairing[3][8];                           ///< front, side, back
100    int index[4][8];                             ///< front, side, back, lfe
101    uint8_t config_map[16];                      ///< configs the encoder's channel specific settings
102    uint8_t reorder_map[16];                     ///< maps channels from lavc to aac order
103} AACPCEInfo;
104
105/**
106 * List of PCE (Program Configuration Element) for the channel layouts listed
107 * in channel_layout.h
108 *
109 * For those wishing in the future to add other layouts:
110 *
111 * - num_ele: number of elements in each group of front, side, back, lfe channels
112 *            (an element is of type SCE (single channel), CPE (channel pair) for
113 *            the first 3 groups; and is LFE for LFE group).
114 *
115 * - pairing: 0 for an SCE element or 1 for a CPE; does not apply to LFE group
116 *
117 * - index: there are three independent indices for SCE, CPE and LFE;
118 *     they are incremented irrespective of the group to which the element belongs;
119 *     they are not reset when going from one group to another
120 *
121 *     Example: for 7.0 channel layout,
122 *        .pairing = { { 1, 0 }, { 1 }, { 1 }, }, (3 CPE and 1 SCE in front group)
123 *        .index = { { 0, 0 }, { 1 }, { 2 }, },
124 *               (index is 0 for the single SCE but goes from 0 to 2 for the CPEs)
125 *
126 *     The index order impacts the channel ordering. But is otherwise arbitrary
127 *     (the sequence could have been 2, 0, 1 instead of 0, 1, 2).
128 *
129 *     Spec allows for discontinuous indices, e.g. if one has a total of two SCE,
130 *     SCE.0 SCE.15 is OK per spec; BUT it won't be decoded by our AAC decoder
131 *     which at this time requires that indices fully cover some range starting
132 *     from 0 (SCE.1 SCE.0 is OK but not SCE.0 SCE.15).
133 *
134 * - config_map: total number of elements and their types. Beware, the way the
135 *               types are ordered impacts the final channel ordering.
136 *
137 * - reorder_map: reorders the channels.
138 *
139 */
140static const AACPCEInfo aac_pce_configs[] = {
141    {
142        .layout = AV_CHANNEL_LAYOUT_MONO,
143        .num_ele = { 1, 0, 0, 0 },
144        .pairing = { { 0 }, },
145        .index = { { 0 }, },
146        .config_map = { 1, TYPE_SCE, },
147        .reorder_map = { 0 },
148    },
149    {
150        .layout = AV_CHANNEL_LAYOUT_STEREO,
151        .num_ele = { 1, 0, 0, 0 },
152        .pairing = { { 1 }, },
153        .index = { { 0 }, },
154        .config_map = { 1, TYPE_CPE, },
155        .reorder_map = { 0, 1 },
156    },
157    {
158        .layout = AV_CHANNEL_LAYOUT_2POINT1,
159        .num_ele = { 1, 0, 0, 1 },
160        .pairing = { { 1 }, },
161        .index = { { 0 },{ 0 },{ 0 },{ 0 } },
162        .config_map = { 2, TYPE_CPE, TYPE_LFE },
163        .reorder_map = { 0, 1, 2 },
164    },
165    {
166        .layout = AV_CHANNEL_LAYOUT_2_1,
167        .num_ele = { 1, 0, 1, 0 },
168        .pairing = { { 1 },{ 0 },{ 0 } },
169        .index = { { 0 },{ 0 },{ 0 }, },
170        .config_map = { 2, TYPE_CPE, TYPE_SCE },
171        .reorder_map = { 0, 1, 2 },
172    },
173    {
174        .layout = AV_CHANNEL_LAYOUT_SURROUND,
175        .num_ele = { 2, 0, 0, 0 },
176        .pairing = { { 1, 0 }, },
177        .index = { { 0, 0 }, },
178        .config_map = { 2, TYPE_CPE, TYPE_SCE, },
179        .reorder_map = { 0, 1, 2 },
180    },
181    {
182        .layout = AV_CHANNEL_LAYOUT_3POINT1,
183        .num_ele = { 2, 0, 0, 1 },
184        .pairing = { { 1, 0 }, },
185        .index = { { 0, 0 }, { 0 }, { 0 }, { 0 }, },
186        .config_map = { 3, TYPE_CPE, TYPE_SCE, TYPE_LFE },
187        .reorder_map = { 0, 1, 2, 3 },
188    },
189    {
190        .layout = AV_CHANNEL_LAYOUT_4POINT0,
191        .num_ele = { 2, 0, 1, 0 },
192        .pairing = { { 1, 0 }, { 0 }, { 0 }, },
193        .index = { { 0, 0 }, { 0 }, { 1 } },
194        .config_map = { 3, TYPE_CPE, TYPE_SCE, TYPE_SCE },
195        .reorder_map = {  0, 1, 2, 3 },
196    },
197    {
198        .layout = AV_CHANNEL_LAYOUT_4POINT1,
199        .num_ele = { 2, 1, 1, 0 },
200        .pairing = { { 1, 0 }, { 0 }, { 0 }, },
201        .index = { { 0, 0 }, { 1 }, { 2 }, { 0 } },
202        .config_map = { 4, TYPE_CPE, TYPE_SCE, TYPE_SCE, TYPE_SCE },
203        .reorder_map = { 0, 1, 2, 3, 4 },
204    },
205    {
206        .layout = AV_CHANNEL_LAYOUT_2_2,
207        .num_ele = { 1, 1, 0, 0 },
208        .pairing = { { 1 }, { 1 }, },
209        .index = { { 0 }, { 1 }, },
210        .config_map = { 2, TYPE_CPE, TYPE_CPE },
211        .reorder_map = { 0, 1, 2, 3 },
212    },
213    {
214        .layout = AV_CHANNEL_LAYOUT_QUAD,
215        .num_ele = { 1, 0, 1, 0 },
216        .pairing = { { 1 }, { 0 }, { 1 }, },
217        .index = { { 0 }, { 0 }, { 1 } },
218        .config_map = { 2, TYPE_CPE, TYPE_CPE },
219        .reorder_map = { 0, 1, 2, 3 },
220    },
221    {
222        .layout = AV_CHANNEL_LAYOUT_5POINT0,
223        .num_ele = { 2, 1, 0, 0 },
224        .pairing = { { 1, 0 }, { 1 }, },
225        .index = { { 0, 0 }, { 1 } },
226        .config_map = { 3, TYPE_CPE, TYPE_SCE, TYPE_CPE },
227        .reorder_map = { 0, 1, 2, 3, 4 },
228    },
229    {
230        .layout = AV_CHANNEL_LAYOUT_5POINT1,
231        .num_ele = { 2, 1, 1, 0 },
232        .pairing = { { 1, 0 }, { 0 }, { 1 }, },
233        .index = { { 0, 0 }, { 1 }, { 1 } },
234        .config_map = { 4, TYPE_CPE, TYPE_SCE, TYPE_SCE, TYPE_CPE },
235        .reorder_map = { 0, 1, 2, 3, 4, 5 },
236    },
237    {
238        .layout = AV_CHANNEL_LAYOUT_5POINT0_BACK,
239        .num_ele = { 2, 0, 1, 0 },
240        .pairing = { { 1, 0 }, { 0 }, { 1 } },
241        .index = { { 0, 0 }, { 0 }, { 1 } },
242        .config_map = { 3, TYPE_CPE, TYPE_SCE, TYPE_CPE },
243        .reorder_map = { 0, 1, 2, 3, 4 },
244    },
245    {
246        .layout = AV_CHANNEL_LAYOUT_5POINT1_BACK,
247        .num_ele = { 2, 1, 1, 0 },
248        .pairing = { { 1, 0 }, { 0 }, { 1 }, },
249        .index = { { 0, 0 }, { 1 }, { 1 } },
250        .config_map = { 4, TYPE_CPE, TYPE_SCE, TYPE_SCE, TYPE_CPE },
251        .reorder_map = { 0, 1, 2, 3, 4, 5 },
252    },
253    {
254        .layout = AV_CHANNEL_LAYOUT_6POINT0,
255        .num_ele = { 2, 1, 1, 0 },
256        .pairing = { { 1, 0 }, { 1 }, { 0 }, },
257        .index = { { 0, 0 }, { 1 }, { 1 } },
258        .config_map = { 4, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_SCE },
259        .reorder_map = { 0, 1, 2, 3, 4, 5 },
260    },
261    {
262        .layout = AV_CHANNEL_LAYOUT_6POINT0_FRONT,
263        .num_ele = { 2, 1, 0, 0 },
264        .pairing = { { 1, 1 }, { 1 } },
265        .index = { { 1, 0 }, { 2 }, },
266        .config_map = { 3, TYPE_CPE, TYPE_CPE, TYPE_CPE, },
267        .reorder_map = { 0, 1, 2, 3, 4, 5 },
268    },
269    {
270        .layout = AV_CHANNEL_LAYOUT_HEXAGONAL,
271        .num_ele = { 2, 0, 2, 0 },
272        .pairing = { { 1, 0 },{ 0 },{ 1, 0 }, },
273        .index = { { 0, 0 },{ 0 },{ 1, 1 } },
274        .config_map = { 4, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_SCE, },
275        .reorder_map = { 0, 1, 2, 3, 4, 5 },
276    },
277    {
278        .layout = AV_CHANNEL_LAYOUT_6POINT1,
279        .num_ele = { 2, 1, 2, 0 },
280        .pairing = { { 1, 0 },{ 0 },{ 1, 0 }, },
281        .index = { { 0, 0 },{ 1 },{ 1, 2 } },
282        .config_map = { 5, TYPE_CPE, TYPE_SCE, TYPE_SCE, TYPE_CPE, TYPE_SCE },
283        .reorder_map = { 0, 1, 2, 3, 4, 5, 6 },
284    },
285    {
286        .layout = AV_CHANNEL_LAYOUT_6POINT1_BACK,
287        .num_ele = { 2, 1, 2, 0 },
288        .pairing = { { 1, 0 }, { 0 }, { 1, 0 }, },
289        .index = { { 0, 0 }, { 1 }, { 1, 2 } },
290        .config_map = { 5, TYPE_CPE, TYPE_SCE, TYPE_SCE, TYPE_CPE, TYPE_SCE },
291        .reorder_map = { 0, 1, 2, 3, 4, 5, 6 },
292    },
293    {
294        .layout = AV_CHANNEL_LAYOUT_6POINT1_FRONT,
295        .num_ele = { 2, 1, 2, 0 },
296        .pairing = { { 1, 0 }, { 0 }, { 1, 0 }, },
297        .index = { { 0, 0 }, { 1 }, { 1, 2 } },
298        .config_map = { 5, TYPE_CPE, TYPE_SCE, TYPE_SCE, TYPE_CPE, TYPE_SCE },
299        .reorder_map = { 0, 1, 2, 3, 4, 5, 6 },
300    },
301    {
302        .layout = AV_CHANNEL_LAYOUT_7POINT0,
303        .num_ele = { 2, 1, 1, 0 },
304        .pairing = { { 1, 0 }, { 1 }, { 1 }, },
305        .index = { { 0, 0 }, { 1 }, { 2 }, },
306        .config_map = { 4, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_CPE },
307        .reorder_map = { 0, 1, 2, 3, 4, 5, 6 },
308    },
309    {
310        .layout = AV_CHANNEL_LAYOUT_7POINT0_FRONT,
311        .num_ele = { 2, 1, 1, 0 },
312        .pairing = { { 1, 0 }, { 1 }, { 1 }, },
313        .index = { { 0, 0 }, { 1 }, { 2 }, },
314        .config_map = { 4, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_CPE },
315        .reorder_map = { 0, 1, 2, 3, 4, 5, 6 },
316    },
317    {
318        .layout = AV_CHANNEL_LAYOUT_7POINT1,
319        .num_ele = { 2, 1, 2, 0 },
320        .pairing = { { 1, 0 }, { 0 }, { 1, 1 }, },
321        .index = { { 0, 0 }, { 1 }, { 1, 2 }, { 0 } },
322        .config_map = { 5, TYPE_CPE, TYPE_SCE,  TYPE_SCE, TYPE_CPE, TYPE_CPE },
323        .reorder_map = { 0, 1, 2, 3, 4, 5, 6, 7 },
324    },
325    {
326        .layout = AV_CHANNEL_LAYOUT_7POINT1_WIDE,
327        .num_ele = { 2, 1, 2, 0 },
328        .pairing = { { 1, 0 }, { 0 },{  1, 1 }, },
329        .index = { { 0, 0 }, { 1 }, { 1, 2 }, { 0 } },
330        .config_map = { 5, TYPE_CPE, TYPE_SCE, TYPE_SCE, TYPE_CPE, TYPE_CPE },
331        .reorder_map = { 0, 1, 2, 3, 4, 5, 6, 7 },
332    },
333    {
334        .layout = AV_CHANNEL_LAYOUT_7POINT1_WIDE_BACK,
335        .num_ele = { 2, 1, 2, 0 },
336        .pairing = { { 1, 0 }, { 0 }, { 1, 1 }, },
337        .index = { { 0, 0 }, { 1 }, { 1, 2 }, { 0 } },
338        .config_map = { 5, TYPE_CPE, TYPE_SCE, TYPE_SCE, TYPE_CPE, TYPE_CPE },
339        .reorder_map = { 0, 1, 2, 3, 4, 5, 6, 7 },
340    },
341    {
342        .layout = AV_CHANNEL_LAYOUT_OCTAGONAL,
343        .num_ele = { 2, 1, 2, 0 },
344        .pairing = { { 1, 0 }, { 1 }, { 1, 0 }, },
345        .index = { { 0, 0 }, { 1 }, { 2, 1 } },
346        .config_map = { 5, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_SCE },
347        .reorder_map = { 0, 1, 2, 3, 4, 5, 6, 7 },
348    },
349    {   /* Meant for order 2/mixed ambisonics */
350        .layout = { .order = AV_CHANNEL_ORDER_NATIVE, .nb_channels = 9,
351                    .u.mask = AV_CH_LAYOUT_OCTAGONAL | AV_CH_TOP_CENTER },
352        .num_ele = { 2, 2, 2, 0 },
353        .pairing = { { 1, 0 }, { 1, 0 }, { 1, 0 }, },
354        .index = { { 0, 0 }, { 1, 1 }, { 2, 2 } },
355        .config_map = { 6, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_SCE },
356        .reorder_map = { 0, 1, 2, 3, 4, 5, 6, 7, 8 },
357    },
358    {   /* Meant for order 2/mixed ambisonics */
359        .layout = { .order = AV_CHANNEL_ORDER_NATIVE, .nb_channels = 10,
360                    .u.mask = AV_CH_LAYOUT_6POINT0_FRONT | AV_CH_BACK_CENTER |
361                              AV_CH_BACK_LEFT | AV_CH_BACK_RIGHT | AV_CH_TOP_CENTER },
362        .num_ele = { 2, 2, 2, 0 },
363        .pairing = { { 1, 1 }, { 1, 0 }, { 1, 0 }, },
364        .index = { { 0, 1 }, { 2, 0 }, { 3, 1 } },
365        .config_map = { 6, TYPE_CPE, TYPE_CPE, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_SCE },
366        .reorder_map = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 },
367    },
368    {
369        .layout = AV_CHANNEL_LAYOUT_HEXADECAGONAL,
370        .num_ele = { 4, 2, 4, 0 },
371        .pairing = { { 1, 0, 1, 0 }, { 1, 1 }, { 1, 0, 1, 0 }, },
372        .index = { { 0, 0, 1, 1 }, { 2, 3 }, { 4, 2, 5, 3 } },
373        .config_map = { 10, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_SCE },
374        .reorder_map = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
375    },
376};
377
378/**
379 * AAC encoder context
380 */
381typedef struct AACEncContext {
382    AVClass *av_class;
383    AACEncOptions options;                       ///< encoding options
384    PutBitContext pb;
385    FFTContext mdct1024;                         ///< long (1024 samples) frame transform context
386    FFTContext mdct128;                          ///< short (128 samples) frame transform context
387    AVFloatDSPContext *fdsp;
388    AACPCEInfo pce;                              ///< PCE data, if needed
389    float *planar_samples[16];                   ///< saved preprocessed input
390
391    int profile;                                 ///< copied from avctx
392    int needs_pce;                               ///< flag for non-standard layout
393    LPCContext lpc;                              ///< used by TNS
394    int samplerate_index;                        ///< MPEG-4 samplerate index
395    int channels;                                ///< channel count
396    const uint8_t *reorder_map;                  ///< lavc to aac reorder map
397    const uint8_t *chan_map;                     ///< channel configuration map
398
399    ChannelElement *cpe;                         ///< channel elements
400    FFPsyContext psy;
401    struct FFPsyPreprocessContext* psypp;
402    const AACCoefficientsEncoder *coder;
403    int cur_channel;                             ///< current channel for coder context
404    int random_state;
405    float lambda;
406    int last_frame_pb_count;                     ///< number of bits for the previous frame
407    float lambda_sum;                            ///< sum(lambda), for Qvg reporting
408    int lambda_count;                            ///< count(lambda), for Qvg reporting
409    enum RawDataBlockType cur_type;              ///< channel group type cur_channel belongs to
410
411    AudioFrameQueue afq;
412    DECLARE_ALIGNED(16, int,   qcoefs)[96];      ///< quantized coefficients
413    DECLARE_ALIGNED(32, float, scoefs)[1024];    ///< scaled coefficients
414
415    uint16_t quantize_band_cost_cache_generation;
416    AACQuantizeBandCostCacheEntry quantize_band_cost_cache[256][128]; ///< memoization area for quantize_band_cost
417
418    void (*abs_pow34)(float *out, const float *in, const int size);
419    void (*quant_bands)(int *out, const float *in, const float *scaled,
420                        int size, int is_signed, int maxval, const float Q34,
421                        const float rounding);
422
423    struct {
424        float *samples;
425    } buffer;
426} AACEncContext;
427
428void ff_aac_dsp_init_x86(AACEncContext *s);
429void ff_aac_coder_init_mips(AACEncContext *c);
430void ff_quantize_band_cost_cache_init(struct AACEncContext *s);
431
432
433#endif /* AVCODEC_AACENC_H */
434