xref: /third_party/ffmpeg/libavcodec/aaccoder.c (revision cabdff1a)
1/*
2 * AAC coefficients encoder
3 * Copyright (C) 2008-2009 Konstantin Shishkov
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22/**
23 * @file
24 * AAC coefficients encoder
25 */
26
27/***********************************
28 *              TODOs:
29 * speedup quantizer selection
30 * add sane pulse detection
31 ***********************************/
32
33#include "libavutil/libm.h" // brought forward to work around cygwin header breakage
34
35#include <float.h>
36
37#include "libavutil/mathematics.h"
38#include "mathops.h"
39#include "avcodec.h"
40#include "put_bits.h"
41#include "aac.h"
42#include "aacenc.h"
43#include "aactab.h"
44#include "aacenctab.h"
45#include "aacenc_utils.h"
46#include "aacenc_quantization.h"
47
48#include "aacenc_is.h"
49#include "aacenc_tns.h"
50#include "aacenc_ltp.h"
51#include "aacenc_pred.h"
52
53#include "libavcodec/aaccoder_twoloop.h"
54
55/* Parameter of f(x) = a*(lambda/100), defines the maximum fourier spread
56 * beyond which no PNS is used (since the SFBs contain tone rather than noise) */
57#define NOISE_SPREAD_THRESHOLD 0.9f
58
59/* Parameter of f(x) = a*(100/lambda), defines how much PNS is allowed to
60 * replace low energy non zero bands */
61#define NOISE_LAMBDA_REPLACE 1.948f
62
63#include "libavcodec/aaccoder_trellis.h"
64
65/**
66 * structure used in optimal codebook search
67 */
68typedef struct BandCodingPath {
69    int prev_idx; ///< pointer to the previous path point
70    float cost;   ///< path cost
71    int run;
72} BandCodingPath;
73
74/**
75 * Encode band info for single window group bands.
76 */
77static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce,
78                                     int win, int group_len, const float lambda)
79{
80    BandCodingPath path[120][CB_TOT_ALL];
81    int w, swb, cb, start, size;
82    int i, j;
83    const int max_sfb  = sce->ics.max_sfb;
84    const int run_bits = sce->ics.num_windows == 1 ? 5 : 3;
85    const int run_esc  = (1 << run_bits) - 1;
86    int idx, ppos, count;
87    int stackrun[120], stackcb[120], stack_len;
88    float next_minrd = INFINITY;
89    int next_mincb = 0;
90
91    s->abs_pow34(s->scoefs, sce->coeffs, 1024);
92    start = win*128;
93    for (cb = 0; cb < CB_TOT_ALL; cb++) {
94        path[0][cb].cost     = 0.0f;
95        path[0][cb].prev_idx = -1;
96        path[0][cb].run      = 0;
97    }
98    for (swb = 0; swb < max_sfb; swb++) {
99        size = sce->ics.swb_sizes[swb];
100        if (sce->zeroes[win*16 + swb]) {
101            for (cb = 0; cb < CB_TOT_ALL; cb++) {
102                path[swb+1][cb].prev_idx = cb;
103                path[swb+1][cb].cost     = path[swb][cb].cost;
104                path[swb+1][cb].run      = path[swb][cb].run + 1;
105            }
106        } else {
107            float minrd = next_minrd;
108            int mincb = next_mincb;
109            next_minrd = INFINITY;
110            next_mincb = 0;
111            for (cb = 0; cb < CB_TOT_ALL; cb++) {
112                float cost_stay_here, cost_get_here;
113                float rd = 0.0f;
114                if (cb >= 12 && sce->band_type[win*16+swb] < aac_cb_out_map[cb] ||
115                    cb  < aac_cb_in_map[sce->band_type[win*16+swb]] && sce->band_type[win*16+swb] > aac_cb_out_map[cb]) {
116                    path[swb+1][cb].prev_idx = -1;
117                    path[swb+1][cb].cost     = INFINITY;
118                    path[swb+1][cb].run      = path[swb][cb].run + 1;
119                    continue;
120                }
121                for (w = 0; w < group_len; w++) {
122                    FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(win+w)*16+swb];
123                    rd += quantize_band_cost(s, &sce->coeffs[start + w*128],
124                                             &s->scoefs[start + w*128], size,
125                                             sce->sf_idx[(win+w)*16+swb], aac_cb_out_map[cb],
126                                             lambda / band->threshold, INFINITY, NULL, NULL, 0);
127                }
128                cost_stay_here = path[swb][cb].cost + rd;
129                cost_get_here  = minrd              + rd + run_bits + 4;
130                if (   run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run]
131                    != run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run+1])
132                    cost_stay_here += run_bits;
133                if (cost_get_here < cost_stay_here) {
134                    path[swb+1][cb].prev_idx = mincb;
135                    path[swb+1][cb].cost     = cost_get_here;
136                    path[swb+1][cb].run      = 1;
137                } else {
138                    path[swb+1][cb].prev_idx = cb;
139                    path[swb+1][cb].cost     = cost_stay_here;
140                    path[swb+1][cb].run      = path[swb][cb].run + 1;
141                }
142                if (path[swb+1][cb].cost < next_minrd) {
143                    next_minrd = path[swb+1][cb].cost;
144                    next_mincb = cb;
145                }
146            }
147        }
148        start += sce->ics.swb_sizes[swb];
149    }
150
151    //convert resulting path from backward-linked list
152    stack_len = 0;
153    idx       = 0;
154    for (cb = 1; cb < CB_TOT_ALL; cb++)
155        if (path[max_sfb][cb].cost < path[max_sfb][idx].cost)
156            idx = cb;
157    ppos = max_sfb;
158    while (ppos > 0) {
159        av_assert1(idx >= 0);
160        cb = idx;
161        stackrun[stack_len] = path[ppos][cb].run;
162        stackcb [stack_len] = cb;
163        idx = path[ppos-path[ppos][cb].run+1][cb].prev_idx;
164        ppos -= path[ppos][cb].run;
165        stack_len++;
166    }
167    //perform actual band info encoding
168    start = 0;
169    for (i = stack_len - 1; i >= 0; i--) {
170        cb = aac_cb_out_map[stackcb[i]];
171        put_bits(&s->pb, 4, cb);
172        count = stackrun[i];
173        memset(sce->zeroes + win*16 + start, !cb, count);
174        //XXX: memset when band_type is also uint8_t
175        for (j = 0; j < count; j++) {
176            sce->band_type[win*16 + start] = cb;
177            start++;
178        }
179        while (count >= run_esc) {
180            put_bits(&s->pb, run_bits, run_esc);
181            count -= run_esc;
182        }
183        put_bits(&s->pb, run_bits, count);
184    }
185}
186
187
188typedef struct TrellisPath {
189    float cost;
190    int prev;
191} TrellisPath;
192
193#define TRELLIS_STAGES 121
194#define TRELLIS_STATES (SCALE_MAX_DIFF+1)
195
196static void set_special_band_scalefactors(AACEncContext *s, SingleChannelElement *sce)
197{
198    int w, g;
199    int prevscaler_n = -255, prevscaler_i = 0;
200    int bands = 0;
201
202    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
203        for (g = 0; g < sce->ics.num_swb; g++) {
204            if (sce->zeroes[w*16+g])
205                continue;
206            if (sce->band_type[w*16+g] == INTENSITY_BT || sce->band_type[w*16+g] == INTENSITY_BT2) {
207                sce->sf_idx[w*16+g] = av_clip(roundf(log2f(sce->is_ener[w*16+g])*2), -155, 100);
208                bands++;
209            } else if (sce->band_type[w*16+g] == NOISE_BT) {
210                sce->sf_idx[w*16+g] = av_clip(3+ceilf(log2f(sce->pns_ener[w*16+g])*2), -100, 155);
211                if (prevscaler_n == -255)
212                    prevscaler_n = sce->sf_idx[w*16+g];
213                bands++;
214            }
215        }
216    }
217
218    if (!bands)
219        return;
220
221    /* Clip the scalefactor indices */
222    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
223        for (g = 0; g < sce->ics.num_swb; g++) {
224            if (sce->zeroes[w*16+g])
225                continue;
226            if (sce->band_type[w*16+g] == INTENSITY_BT || sce->band_type[w*16+g] == INTENSITY_BT2) {
227                sce->sf_idx[w*16+g] = prevscaler_i = av_clip(sce->sf_idx[w*16+g], prevscaler_i - SCALE_MAX_DIFF, prevscaler_i + SCALE_MAX_DIFF);
228            } else if (sce->band_type[w*16+g] == NOISE_BT) {
229                sce->sf_idx[w*16+g] = prevscaler_n = av_clip(sce->sf_idx[w*16+g], prevscaler_n - SCALE_MAX_DIFF, prevscaler_n + SCALE_MAX_DIFF);
230            }
231        }
232    }
233}
234
235static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s,
236                                       SingleChannelElement *sce,
237                                       const float lambda)
238{
239    int q, w, w2, g, start = 0;
240    int i, j;
241    int idx;
242    TrellisPath paths[TRELLIS_STAGES][TRELLIS_STATES];
243    int bandaddr[TRELLIS_STAGES];
244    int minq;
245    float mincost;
246    float q0f = FLT_MAX, q1f = 0.0f, qnrgf = 0.0f;
247    int q0, q1, qcnt = 0;
248
249    for (i = 0; i < 1024; i++) {
250        float t = fabsf(sce->coeffs[i]);
251        if (t > 0.0f) {
252            q0f = FFMIN(q0f, t);
253            q1f = FFMAX(q1f, t);
254            qnrgf += t*t;
255            qcnt++;
256        }
257    }
258
259    if (!qcnt) {
260        memset(sce->sf_idx, 0, sizeof(sce->sf_idx));
261        memset(sce->zeroes, 1, sizeof(sce->zeroes));
262        return;
263    }
264
265    //minimum scalefactor index is when minimum nonzero coefficient after quantizing is not clipped
266    q0 = av_clip(coef2minsf(q0f), 0, SCALE_MAX_POS-1);
267    //maximum scalefactor index is when maximum coefficient after quantizing is still not zero
268    q1 = av_clip(coef2maxsf(q1f), 1, SCALE_MAX_POS);
269    if (q1 - q0 > 60) {
270        int q0low  = q0;
271        int q1high = q1;
272        //minimum scalefactor index is when maximum nonzero coefficient after quantizing is not clipped
273        int qnrg = av_clip_uint8(log2f(sqrtf(qnrgf/qcnt))*4 - 31 + SCALE_ONE_POS - SCALE_DIV_512);
274        q1 = qnrg + 30;
275        q0 = qnrg - 30;
276        if (q0 < q0low) {
277            q1 += q0low - q0;
278            q0  = q0low;
279        } else if (q1 > q1high) {
280            q0 -= q1 - q1high;
281            q1  = q1high;
282        }
283    }
284    // q0 == q1 isn't really a legal situation
285    if (q0 == q1) {
286        // the following is indirect but guarantees q1 != q0 && q1 near q0
287        q1 = av_clip(q0+1, 1, SCALE_MAX_POS);
288        q0 = av_clip(q1-1, 0, SCALE_MAX_POS - 1);
289    }
290
291    for (i = 0; i < TRELLIS_STATES; i++) {
292        paths[0][i].cost    = 0.0f;
293        paths[0][i].prev    = -1;
294    }
295    for (j = 1; j < TRELLIS_STAGES; j++) {
296        for (i = 0; i < TRELLIS_STATES; i++) {
297            paths[j][i].cost    = INFINITY;
298            paths[j][i].prev    = -2;
299        }
300    }
301    idx = 1;
302    s->abs_pow34(s->scoefs, sce->coeffs, 1024);
303    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
304        start = w*128;
305        for (g = 0; g < sce->ics.num_swb; g++) {
306            const float *coefs = &sce->coeffs[start];
307            float qmin, qmax;
308            int nz = 0;
309
310            bandaddr[idx] = w * 16 + g;
311            qmin = INT_MAX;
312            qmax = 0.0f;
313            for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
314                FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
315                if (band->energy <= band->threshold || band->threshold == 0.0f) {
316                    sce->zeroes[(w+w2)*16+g] = 1;
317                    continue;
318                }
319                sce->zeroes[(w+w2)*16+g] = 0;
320                nz = 1;
321                for (i = 0; i < sce->ics.swb_sizes[g]; i++) {
322                    float t = fabsf(coefs[w2*128+i]);
323                    if (t > 0.0f)
324                        qmin = FFMIN(qmin, t);
325                    qmax = FFMAX(qmax, t);
326                }
327            }
328            if (nz) {
329                int minscale, maxscale;
330                float minrd = INFINITY;
331                float maxval;
332                //minimum scalefactor index is when minimum nonzero coefficient after quantizing is not clipped
333                minscale = coef2minsf(qmin);
334                //maximum scalefactor index is when maximum coefficient after quantizing is still not zero
335                maxscale = coef2maxsf(qmax);
336                minscale = av_clip(minscale - q0, 0, TRELLIS_STATES - 1);
337                maxscale = av_clip(maxscale - q0, 0, TRELLIS_STATES);
338                if (minscale == maxscale) {
339                    maxscale = av_clip(minscale+1, 1, TRELLIS_STATES);
340                    minscale = av_clip(maxscale-1, 0, TRELLIS_STATES - 1);
341                }
342                maxval = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], s->scoefs+start);
343                for (q = minscale; q < maxscale; q++) {
344                    float dist = 0;
345                    int cb = find_min_book(maxval, sce->sf_idx[w*16+g]);
346                    for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
347                        FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
348                        dist += quantize_band_cost(s, coefs + w2*128, s->scoefs + start + w2*128, sce->ics.swb_sizes[g],
349                                                   q + q0, cb, lambda / band->threshold, INFINITY, NULL, NULL, 0);
350                    }
351                    minrd = FFMIN(minrd, dist);
352
353                    for (i = 0; i < q1 - q0; i++) {
354                        float cost;
355                        cost = paths[idx - 1][i].cost + dist
356                               + ff_aac_scalefactor_bits[q - i + SCALE_DIFF_ZERO];
357                        if (cost < paths[idx][q].cost) {
358                            paths[idx][q].cost    = cost;
359                            paths[idx][q].prev    = i;
360                        }
361                    }
362                }
363            } else {
364                for (q = 0; q < q1 - q0; q++) {
365                    paths[idx][q].cost = paths[idx - 1][q].cost + 1;
366                    paths[idx][q].prev = q;
367                }
368            }
369            sce->zeroes[w*16+g] = !nz;
370            start += sce->ics.swb_sizes[g];
371            idx++;
372        }
373    }
374    idx--;
375    mincost = paths[idx][0].cost;
376    minq    = 0;
377    for (i = 1; i < TRELLIS_STATES; i++) {
378        if (paths[idx][i].cost < mincost) {
379            mincost = paths[idx][i].cost;
380            minq = i;
381        }
382    }
383    while (idx) {
384        sce->sf_idx[bandaddr[idx]] = minq + q0;
385        minq = FFMAX(paths[idx][minq].prev, 0);
386        idx--;
387    }
388    //set the same quantizers inside window groups
389    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
390        for (g = 0; g < sce->ics.num_swb; g++)
391            for (w2 = 1; w2 < sce->ics.group_len[w]; w2++)
392                sce->sf_idx[(w+w2)*16+g] = sce->sf_idx[w*16+g];
393}
394
395static void search_for_quantizers_fast(AVCodecContext *avctx, AACEncContext *s,
396                                       SingleChannelElement *sce,
397                                       const float lambda)
398{
399    int start = 0, i, w, w2, g;
400    int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate / avctx->ch_layout.nb_channels * (lambda / 120.f);
401    float dists[128] = { 0 }, uplims[128] = { 0 };
402    float maxvals[128];
403    int fflag, minscaler;
404    int its  = 0;
405    int allz = 0;
406    float minthr = INFINITY;
407
408    // for values above this the decoder might end up in an endless loop
409    // due to always having more bits than what can be encoded.
410    destbits = FFMIN(destbits, 5800);
411    //some heuristic to determine initial quantizers will reduce search time
412    //determine zero bands and upper limits
413    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
414        start = 0;
415        for (g = 0; g < sce->ics.num_swb; g++) {
416            int nz = 0;
417            float uplim = 0.0f;
418            for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
419                FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
420                uplim += band->threshold;
421                if (band->energy <= band->threshold || band->threshold == 0.0f) {
422                    sce->zeroes[(w+w2)*16+g] = 1;
423                    continue;
424                }
425                nz = 1;
426            }
427            uplims[w*16+g] = uplim *512;
428            sce->band_type[w*16+g] = 0;
429            sce->zeroes[w*16+g] = !nz;
430            if (nz)
431                minthr = FFMIN(minthr, uplim);
432            allz |= nz;
433            start += sce->ics.swb_sizes[g];
434        }
435    }
436    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
437        for (g = 0; g < sce->ics.num_swb; g++) {
438            if (sce->zeroes[w*16+g]) {
439                sce->sf_idx[w*16+g] = SCALE_ONE_POS;
440                continue;
441            }
442            sce->sf_idx[w*16+g] = SCALE_ONE_POS + FFMIN(log2f(uplims[w*16+g]/minthr)*4,59);
443        }
444    }
445
446    if (!allz)
447        return;
448    s->abs_pow34(s->scoefs, sce->coeffs, 1024);
449    ff_quantize_band_cost_cache_init(s);
450
451    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
452        start = w*128;
453        for (g = 0; g < sce->ics.num_swb; g++) {
454            const float *scaled = s->scoefs + start;
455            maxvals[w*16+g] = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled);
456            start += sce->ics.swb_sizes[g];
457        }
458    }
459
460    //perform two-loop search
461    //outer loop - improve quality
462    do {
463        int tbits, qstep;
464        minscaler = sce->sf_idx[0];
465        //inner loop - quantize spectrum to fit into given number of bits
466        qstep = its ? 1 : 32;
467        do {
468            int prev = -1;
469            tbits = 0;
470            for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
471                start = w*128;
472                for (g = 0; g < sce->ics.num_swb; g++) {
473                    const float *coefs = sce->coeffs + start;
474                    const float *scaled = s->scoefs + start;
475                    int bits = 0;
476                    int cb;
477                    float dist = 0.0f;
478
479                    if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
480                        start += sce->ics.swb_sizes[g];
481                        continue;
482                    }
483                    minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
484                    cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
485                    for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
486                        int b;
487                        dist += quantize_band_cost_cached(s, w + w2, g,
488                                                          coefs + w2*128,
489                                                          scaled + w2*128,
490                                                          sce->ics.swb_sizes[g],
491                                                          sce->sf_idx[w*16+g],
492                                                          cb, 1.0f, INFINITY,
493                                                          &b, NULL, 0);
494                        bits += b;
495                    }
496                    dists[w*16+g] = dist - bits;
497                    if (prev != -1) {
498                        bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO];
499                    }
500                    tbits += bits;
501                    start += sce->ics.swb_sizes[g];
502                    prev = sce->sf_idx[w*16+g];
503                }
504            }
505            if (tbits > destbits) {
506                for (i = 0; i < 128; i++)
507                    if (sce->sf_idx[i] < 218 - qstep)
508                        sce->sf_idx[i] += qstep;
509            } else {
510                for (i = 0; i < 128; i++)
511                    if (sce->sf_idx[i] > 60 - qstep)
512                        sce->sf_idx[i] -= qstep;
513            }
514            qstep >>= 1;
515            if (!qstep && tbits > destbits*1.02 && sce->sf_idx[0] < 217)
516                qstep = 1;
517        } while (qstep);
518
519        fflag = 0;
520        minscaler = av_clip(minscaler, 60, 255 - SCALE_MAX_DIFF);
521
522        for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
523            for (g = 0; g < sce->ics.num_swb; g++) {
524                int prevsc = sce->sf_idx[w*16+g];
525                if (dists[w*16+g] > uplims[w*16+g] && sce->sf_idx[w*16+g] > 60) {
526                    if (find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1))
527                        sce->sf_idx[w*16+g]--;
528                    else //Try to make sure there is some energy in every band
529                        sce->sf_idx[w*16+g]-=2;
530                }
531                sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF);
532                sce->sf_idx[w*16+g] = FFMIN(sce->sf_idx[w*16+g], 219);
533                if (sce->sf_idx[w*16+g] != prevsc)
534                    fflag = 1;
535                sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
536            }
537        }
538        its++;
539    } while (fflag && its < 10);
540}
541
542static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce)
543{
544    FFPsyBand *band;
545    int w, g, w2, i;
546    int wlen = 1024 / sce->ics.num_windows;
547    int bandwidth, cutoff;
548    float *PNS = &s->scoefs[0*128], *PNS34 = &s->scoefs[1*128];
549    float *NOR34 = &s->scoefs[3*128];
550    uint8_t nextband[128];
551    const float lambda = s->lambda;
552    const float freq_mult = avctx->sample_rate*0.5f/wlen;
553    const float thr_mult = NOISE_LAMBDA_REPLACE*(100.0f/lambda);
554    const float spread_threshold = FFMIN(0.75f, NOISE_SPREAD_THRESHOLD*FFMAX(0.5f, lambda/100.f));
555    const float dist_bias = av_clipf(4.f * 120 / lambda, 0.25f, 4.0f);
556    const float pns_transient_energy_r = FFMIN(0.7f, lambda / 140.f);
557
558    int refbits = avctx->bit_rate * 1024.0 / avctx->sample_rate
559        / ((avctx->flags & AV_CODEC_FLAG_QSCALE) ? 2.0f : avctx->ch_layout.nb_channels)
560        * (lambda / 120.f);
561
562    /** Keep this in sync with twoloop's cutoff selection */
563    float rate_bandwidth_multiplier = 1.5f;
564    int prev = -1000, prev_sf = -1;
565    int frame_bit_rate = (avctx->flags & AV_CODEC_FLAG_QSCALE)
566        ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024)
567        : (avctx->bit_rate / avctx->ch_layout.nb_channels);
568
569    frame_bit_rate *= 1.15f;
570
571    if (avctx->cutoff > 0) {
572        bandwidth = avctx->cutoff;
573    } else {
574        bandwidth = FFMAX(3000, AAC_CUTOFF_FROM_BITRATE(frame_bit_rate, 1, avctx->sample_rate));
575    }
576
577    cutoff = bandwidth * 2 * wlen / avctx->sample_rate;
578
579    memcpy(sce->band_alt, sce->band_type, sizeof(sce->band_type));
580    ff_init_nextband_map(sce, nextband);
581    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
582        int wstart = w*128;
583        for (g = 0; g < sce->ics.num_swb; g++) {
584            int noise_sfi;
585            float dist1 = 0.0f, dist2 = 0.0f, noise_amp;
586            float pns_energy = 0.0f, pns_tgt_energy, energy_ratio, dist_thresh;
587            float sfb_energy = 0.0f, threshold = 0.0f, spread = 2.0f;
588            float min_energy = -1.0f, max_energy = 0.0f;
589            const int start = wstart+sce->ics.swb_offset[g];
590            const float freq = (start-wstart)*freq_mult;
591            const float freq_boost = FFMAX(0.88f*freq/NOISE_LOW_LIMIT, 1.0f);
592            if (freq < NOISE_LOW_LIMIT || (start-wstart) >= cutoff) {
593                if (!sce->zeroes[w*16+g])
594                    prev_sf = sce->sf_idx[w*16+g];
595                continue;
596            }
597            for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
598                band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
599                sfb_energy += band->energy;
600                spread     = FFMIN(spread, band->spread);
601                threshold  += band->threshold;
602                if (!w2) {
603                    min_energy = max_energy = band->energy;
604                } else {
605                    min_energy = FFMIN(min_energy, band->energy);
606                    max_energy = FFMAX(max_energy, band->energy);
607                }
608            }
609
610            /* Ramps down at ~8000Hz and loosens the dist threshold */
611            dist_thresh = av_clipf(2.5f*NOISE_LOW_LIMIT/freq, 0.5f, 2.5f) * dist_bias;
612
613            /* PNS is acceptable when all of these are true:
614             * 1. high spread energy (noise-like band)
615             * 2. near-threshold energy (high PE means the random nature of PNS content will be noticed)
616             * 3. on short window groups, all windows have similar energy (variations in energy would be destroyed by PNS)
617             *
618             * At this stage, point 2 is relaxed for zeroed bands near the noise threshold (hole avoidance is more important)
619             */
620            if ((!sce->zeroes[w*16+g] && !ff_sfdelta_can_remove_band(sce, nextband, prev_sf, w*16+g)) ||
621                ((sce->zeroes[w*16+g] || !sce->band_alt[w*16+g]) && sfb_energy < threshold*sqrtf(1.0f/freq_boost)) || spread < spread_threshold ||
622                (!sce->zeroes[w*16+g] && sce->band_alt[w*16+g] && sfb_energy > threshold*thr_mult*freq_boost) ||
623                min_energy < pns_transient_energy_r * max_energy ) {
624                sce->pns_ener[w*16+g] = sfb_energy;
625                if (!sce->zeroes[w*16+g])
626                    prev_sf = sce->sf_idx[w*16+g];
627                continue;
628            }
629
630            pns_tgt_energy = sfb_energy*FFMIN(1.0f, spread*spread);
631            noise_sfi = av_clip(roundf(log2f(pns_tgt_energy)*2), -100, 155); /* Quantize */
632            noise_amp = -ff_aac_pow2sf_tab[noise_sfi + POW_SF2_ZERO];    /* Dequantize */
633            if (prev != -1000) {
634                int noise_sfdiff = noise_sfi - prev + SCALE_DIFF_ZERO;
635                if (noise_sfdiff < 0 || noise_sfdiff > 2*SCALE_MAX_DIFF) {
636                    if (!sce->zeroes[w*16+g])
637                        prev_sf = sce->sf_idx[w*16+g];
638                    continue;
639                }
640            }
641            for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
642                float band_energy, scale, pns_senergy;
643                const int start_c = (w+w2)*128+sce->ics.swb_offset[g];
644                band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
645                for (i = 0; i < sce->ics.swb_sizes[g]; i++) {
646                    s->random_state  = lcg_random(s->random_state);
647                    PNS[i] = s->random_state;
648                }
649                band_energy = s->fdsp->scalarproduct_float(PNS, PNS, sce->ics.swb_sizes[g]);
650                scale = noise_amp/sqrtf(band_energy);
651                s->fdsp->vector_fmul_scalar(PNS, PNS, scale, sce->ics.swb_sizes[g]);
652                pns_senergy = s->fdsp->scalarproduct_float(PNS, PNS, sce->ics.swb_sizes[g]);
653                pns_energy += pns_senergy;
654                s->abs_pow34(NOR34, &sce->coeffs[start_c], sce->ics.swb_sizes[g]);
655                s->abs_pow34(PNS34, PNS, sce->ics.swb_sizes[g]);
656                dist1 += quantize_band_cost(s, &sce->coeffs[start_c],
657                                            NOR34,
658                                            sce->ics.swb_sizes[g],
659                                            sce->sf_idx[(w+w2)*16+g],
660                                            sce->band_alt[(w+w2)*16+g],
661                                            lambda/band->threshold, INFINITY, NULL, NULL, 0);
662                /* Estimate rd on average as 5 bits for SF, 4 for the CB, plus spread energy * lambda/thr */
663                dist2 += band->energy/(band->spread*band->spread)*lambda*dist_thresh/band->threshold;
664            }
665            if (g && sce->band_type[w*16+g-1] == NOISE_BT) {
666                dist2 += 5;
667            } else {
668                dist2 += 9;
669            }
670            energy_ratio = pns_tgt_energy/pns_energy; /* Compensates for quantization error */
671            sce->pns_ener[w*16+g] = energy_ratio*pns_tgt_energy;
672            if (sce->zeroes[w*16+g] || !sce->band_alt[w*16+g] || (energy_ratio > 0.85f && energy_ratio < 1.25f && dist2 < dist1)) {
673                sce->band_type[w*16+g] = NOISE_BT;
674                sce->zeroes[w*16+g] = 0;
675                prev = noise_sfi;
676            } else {
677                if (!sce->zeroes[w*16+g])
678                    prev_sf = sce->sf_idx[w*16+g];
679            }
680        }
681    }
682}
683
684static void mark_pns(AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce)
685{
686    FFPsyBand *band;
687    int w, g, w2;
688    int wlen = 1024 / sce->ics.num_windows;
689    int bandwidth, cutoff;
690    const float lambda = s->lambda;
691    const float freq_mult = avctx->sample_rate*0.5f/wlen;
692    const float spread_threshold = FFMIN(0.75f, NOISE_SPREAD_THRESHOLD*FFMAX(0.5f, lambda/100.f));
693    const float pns_transient_energy_r = FFMIN(0.7f, lambda / 140.f);
694
695    int refbits = avctx->bit_rate * 1024.0 / avctx->sample_rate
696        / ((avctx->flags & AV_CODEC_FLAG_QSCALE) ? 2.0f : avctx->ch_layout.nb_channels)
697        * (lambda / 120.f);
698
699    /** Keep this in sync with twoloop's cutoff selection */
700    float rate_bandwidth_multiplier = 1.5f;
701    int frame_bit_rate = (avctx->flags & AV_CODEC_FLAG_QSCALE)
702        ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024)
703        : (avctx->bit_rate / avctx->ch_layout.nb_channels);
704
705    frame_bit_rate *= 1.15f;
706
707    if (avctx->cutoff > 0) {
708        bandwidth = avctx->cutoff;
709    } else {
710        bandwidth = FFMAX(3000, AAC_CUTOFF_FROM_BITRATE(frame_bit_rate, 1, avctx->sample_rate));
711    }
712
713    cutoff = bandwidth * 2 * wlen / avctx->sample_rate;
714
715    memcpy(sce->band_alt, sce->band_type, sizeof(sce->band_type));
716    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
717        for (g = 0; g < sce->ics.num_swb; g++) {
718            float sfb_energy = 0.0f, threshold = 0.0f, spread = 2.0f;
719            float min_energy = -1.0f, max_energy = 0.0f;
720            const int start = sce->ics.swb_offset[g];
721            const float freq = start*freq_mult;
722            const float freq_boost = FFMAX(0.88f*freq/NOISE_LOW_LIMIT, 1.0f);
723            if (freq < NOISE_LOW_LIMIT || start >= cutoff) {
724                sce->can_pns[w*16+g] = 0;
725                continue;
726            }
727            for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
728                band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
729                sfb_energy += band->energy;
730                spread     = FFMIN(spread, band->spread);
731                threshold  += band->threshold;
732                if (!w2) {
733                    min_energy = max_energy = band->energy;
734                } else {
735                    min_energy = FFMIN(min_energy, band->energy);
736                    max_energy = FFMAX(max_energy, band->energy);
737                }
738            }
739
740            /* PNS is acceptable when all of these are true:
741             * 1. high spread energy (noise-like band)
742             * 2. near-threshold energy (high PE means the random nature of PNS content will be noticed)
743             * 3. on short window groups, all windows have similar energy (variations in energy would be destroyed by PNS)
744             */
745            sce->pns_ener[w*16+g] = sfb_energy;
746            if (sfb_energy < threshold*sqrtf(1.5f/freq_boost) || spread < spread_threshold || min_energy < pns_transient_energy_r * max_energy) {
747                sce->can_pns[w*16+g] = 0;
748            } else {
749                sce->can_pns[w*16+g] = 1;
750            }
751        }
752    }
753}
754
755static void search_for_ms(AACEncContext *s, ChannelElement *cpe)
756{
757    int start = 0, i, w, w2, g, sid_sf_boost, prev_mid, prev_side;
758    uint8_t nextband0[128], nextband1[128];
759    float *M   = s->scoefs + 128*0, *S   = s->scoefs + 128*1;
760    float *L34 = s->scoefs + 128*2, *R34 = s->scoefs + 128*3;
761    float *M34 = s->scoefs + 128*4, *S34 = s->scoefs + 128*5;
762    const float lambda = s->lambda;
763    const float mslambda = FFMIN(1.0f, lambda / 120.f);
764    SingleChannelElement *sce0 = &cpe->ch[0];
765    SingleChannelElement *sce1 = &cpe->ch[1];
766    if (!cpe->common_window)
767        return;
768
769    /** Scout out next nonzero bands */
770    ff_init_nextband_map(sce0, nextband0);
771    ff_init_nextband_map(sce1, nextband1);
772
773    prev_mid = sce0->sf_idx[0];
774    prev_side = sce1->sf_idx[0];
775    for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
776        start = 0;
777        for (g = 0; g < sce0->ics.num_swb; g++) {
778            float bmax = bval2bmax(g * 17.0f / sce0->ics.num_swb) / 0.0045f;
779            if (!cpe->is_mask[w*16+g])
780                cpe->ms_mask[w*16+g] = 0;
781            if (!sce0->zeroes[w*16+g] && !sce1->zeroes[w*16+g] && !cpe->is_mask[w*16+g]) {
782                float Mmax = 0.0f, Smax = 0.0f;
783
784                /* Must compute mid/side SF and book for the whole window group */
785                for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
786                    for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
787                        M[i] = (sce0->coeffs[start+(w+w2)*128+i]
788                              + sce1->coeffs[start+(w+w2)*128+i]) * 0.5;
789                        S[i] =  M[i]
790                              - sce1->coeffs[start+(w+w2)*128+i];
791                    }
792                    s->abs_pow34(M34, M, sce0->ics.swb_sizes[g]);
793                    s->abs_pow34(S34, S, sce0->ics.swb_sizes[g]);
794                    for (i = 0; i < sce0->ics.swb_sizes[g]; i++ ) {
795                        Mmax = FFMAX(Mmax, M34[i]);
796                        Smax = FFMAX(Smax, S34[i]);
797                    }
798                }
799
800                for (sid_sf_boost = 0; sid_sf_boost < 4; sid_sf_boost++) {
801                    float dist1 = 0.0f, dist2 = 0.0f;
802                    int B0 = 0, B1 = 0;
803                    int minidx;
804                    int mididx, sididx;
805                    int midcb, sidcb;
806
807                    minidx = FFMIN(sce0->sf_idx[w*16+g], sce1->sf_idx[w*16+g]);
808                    mididx = av_clip(minidx, 0, SCALE_MAX_POS - SCALE_DIV_512);
809                    sididx = av_clip(minidx - sid_sf_boost * 3, 0, SCALE_MAX_POS - SCALE_DIV_512);
810                    if (sce0->band_type[w*16+g] != NOISE_BT && sce1->band_type[w*16+g] != NOISE_BT
811                        && (   !ff_sfdelta_can_replace(sce0, nextband0, prev_mid, mididx, w*16+g)
812                            || !ff_sfdelta_can_replace(sce1, nextband1, prev_side, sididx, w*16+g))) {
813                        /* scalefactor range violation, bad stuff, will decrease quality unacceptably */
814                        continue;
815                    }
816
817                    midcb = find_min_book(Mmax, mididx);
818                    sidcb = find_min_book(Smax, sididx);
819
820                    /* No CB can be zero */
821                    midcb = FFMAX(1,midcb);
822                    sidcb = FFMAX(1,sidcb);
823
824                    for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
825                        FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
826                        FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
827                        float minthr = FFMIN(band0->threshold, band1->threshold);
828                        int b1,b2,b3,b4;
829                        for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
830                            M[i] = (sce0->coeffs[start+(w+w2)*128+i]
831                                  + sce1->coeffs[start+(w+w2)*128+i]) * 0.5;
832                            S[i] =  M[i]
833                                  - sce1->coeffs[start+(w+w2)*128+i];
834                        }
835
836                        s->abs_pow34(L34, sce0->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
837                        s->abs_pow34(R34, sce1->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
838                        s->abs_pow34(M34, M,                         sce0->ics.swb_sizes[g]);
839                        s->abs_pow34(S34, S,                         sce0->ics.swb_sizes[g]);
840                        dist1 += quantize_band_cost(s, &sce0->coeffs[start + (w+w2)*128],
841                                                    L34,
842                                                    sce0->ics.swb_sizes[g],
843                                                    sce0->sf_idx[w*16+g],
844                                                    sce0->band_type[w*16+g],
845                                                    lambda / (band0->threshold + FLT_MIN), INFINITY, &b1, NULL, 0);
846                        dist1 += quantize_band_cost(s, &sce1->coeffs[start + (w+w2)*128],
847                                                    R34,
848                                                    sce1->ics.swb_sizes[g],
849                                                    sce1->sf_idx[w*16+g],
850                                                    sce1->band_type[w*16+g],
851                                                    lambda / (band1->threshold + FLT_MIN), INFINITY, &b2, NULL, 0);
852                        dist2 += quantize_band_cost(s, M,
853                                                    M34,
854                                                    sce0->ics.swb_sizes[g],
855                                                    mididx,
856                                                    midcb,
857                                                    lambda / (minthr + FLT_MIN), INFINITY, &b3, NULL, 0);
858                        dist2 += quantize_band_cost(s, S,
859                                                    S34,
860                                                    sce1->ics.swb_sizes[g],
861                                                    sididx,
862                                                    sidcb,
863                                                    mslambda / (minthr * bmax + FLT_MIN), INFINITY, &b4, NULL, 0);
864                        B0 += b1+b2;
865                        B1 += b3+b4;
866                        dist1 -= b1+b2;
867                        dist2 -= b3+b4;
868                    }
869                    cpe->ms_mask[w*16+g] = dist2 <= dist1 && B1 < B0;
870                    if (cpe->ms_mask[w*16+g]) {
871                        if (sce0->band_type[w*16+g] != NOISE_BT && sce1->band_type[w*16+g] != NOISE_BT) {
872                            sce0->sf_idx[w*16+g] = mididx;
873                            sce1->sf_idx[w*16+g] = sididx;
874                            sce0->band_type[w*16+g] = midcb;
875                            sce1->band_type[w*16+g] = sidcb;
876                        } else if ((sce0->band_type[w*16+g] != NOISE_BT) ^ (sce1->band_type[w*16+g] != NOISE_BT)) {
877                            /* ms_mask unneeded, and it confuses some decoders */
878                            cpe->ms_mask[w*16+g] = 0;
879                        }
880                        break;
881                    } else if (B1 > B0) {
882                        /* More boost won't fix this */
883                        break;
884                    }
885                }
886            }
887            if (!sce0->zeroes[w*16+g] && sce0->band_type[w*16+g] < RESERVED_BT)
888                prev_mid = sce0->sf_idx[w*16+g];
889            if (!sce1->zeroes[w*16+g] && !cpe->is_mask[w*16+g] && sce1->band_type[w*16+g] < RESERVED_BT)
890                prev_side = sce1->sf_idx[w*16+g];
891            start += sce0->ics.swb_sizes[g];
892        }
893    }
894}
895
896const AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
897    [AAC_CODER_ANMR] = {
898        search_for_quantizers_anmr,
899        encode_window_bands_info,
900        quantize_and_encode_band,
901        ff_aac_encode_tns_info,
902        ff_aac_encode_ltp_info,
903        ff_aac_encode_main_pred,
904        ff_aac_adjust_common_pred,
905        ff_aac_adjust_common_ltp,
906        ff_aac_apply_main_pred,
907        ff_aac_apply_tns,
908        ff_aac_update_ltp,
909        ff_aac_ltp_insert_new_frame,
910        set_special_band_scalefactors,
911        search_for_pns,
912        mark_pns,
913        ff_aac_search_for_tns,
914        ff_aac_search_for_ltp,
915        search_for_ms,
916        ff_aac_search_for_is,
917        ff_aac_search_for_pred,
918    },
919    [AAC_CODER_TWOLOOP] = {
920        search_for_quantizers_twoloop,
921        codebook_trellis_rate,
922        quantize_and_encode_band,
923        ff_aac_encode_tns_info,
924        ff_aac_encode_ltp_info,
925        ff_aac_encode_main_pred,
926        ff_aac_adjust_common_pred,
927        ff_aac_adjust_common_ltp,
928        ff_aac_apply_main_pred,
929        ff_aac_apply_tns,
930        ff_aac_update_ltp,
931        ff_aac_ltp_insert_new_frame,
932        set_special_band_scalefactors,
933        search_for_pns,
934        mark_pns,
935        ff_aac_search_for_tns,
936        ff_aac_search_for_ltp,
937        search_for_ms,
938        ff_aac_search_for_is,
939        ff_aac_search_for_pred,
940    },
941    [AAC_CODER_FAST] = {
942        search_for_quantizers_fast,
943        codebook_trellis_rate,
944        quantize_and_encode_band,
945        ff_aac_encode_tns_info,
946        ff_aac_encode_ltp_info,
947        ff_aac_encode_main_pred,
948        ff_aac_adjust_common_pred,
949        ff_aac_adjust_common_ltp,
950        ff_aac_apply_main_pred,
951        ff_aac_apply_tns,
952        ff_aac_update_ltp,
953        ff_aac_ltp_insert_new_frame,
954        set_special_band_scalefactors,
955        search_for_pns,
956        mark_pns,
957        ff_aac_search_for_tns,
958        ff_aac_search_for_ltp,
959        search_for_ms,
960        ff_aac_search_for_is,
961        ff_aac_search_for_pred,
962    },
963};
964