1/* 2 * AAC coefficients encoder 3 * Copyright (C) 2008-2009 Konstantin Shishkov 4 * 5 * This file is part of FFmpeg. 6 * 7 * FFmpeg is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU Lesser General Public 9 * License as published by the Free Software Foundation; either 10 * version 2.1 of the License, or (at your option) any later version. 11 * 12 * FFmpeg is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Lesser General Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General Public 18 * License along with FFmpeg; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20 */ 21 22/** 23 * @file 24 * AAC coefficients encoder 25 */ 26 27/*********************************** 28 * TODOs: 29 * speedup quantizer selection 30 * add sane pulse detection 31 ***********************************/ 32 33#include "libavutil/libm.h" // brought forward to work around cygwin header breakage 34 35#include <float.h> 36 37#include "libavutil/mathematics.h" 38#include "mathops.h" 39#include "avcodec.h" 40#include "put_bits.h" 41#include "aac.h" 42#include "aacenc.h" 43#include "aactab.h" 44#include "aacenctab.h" 45#include "aacenc_utils.h" 46#include "aacenc_quantization.h" 47 48#include "aacenc_is.h" 49#include "aacenc_tns.h" 50#include "aacenc_ltp.h" 51#include "aacenc_pred.h" 52 53#include "libavcodec/aaccoder_twoloop.h" 54 55/* Parameter of f(x) = a*(lambda/100), defines the maximum fourier spread 56 * beyond which no PNS is used (since the SFBs contain tone rather than noise) */ 57#define NOISE_SPREAD_THRESHOLD 0.9f 58 59/* Parameter of f(x) = a*(100/lambda), defines how much PNS is allowed to 60 * replace low energy non zero bands */ 61#define NOISE_LAMBDA_REPLACE 1.948f 62 63#include "libavcodec/aaccoder_trellis.h" 64 65/** 66 * structure used in optimal codebook search 67 */ 68typedef struct BandCodingPath { 69 int prev_idx; ///< pointer to the previous path point 70 float cost; ///< path cost 71 int run; 72} BandCodingPath; 73 74/** 75 * Encode band info for single window group bands. 76 */ 77static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce, 78 int win, int group_len, const float lambda) 79{ 80 BandCodingPath path[120][CB_TOT_ALL]; 81 int w, swb, cb, start, size; 82 int i, j; 83 const int max_sfb = sce->ics.max_sfb; 84 const int run_bits = sce->ics.num_windows == 1 ? 5 : 3; 85 const int run_esc = (1 << run_bits) - 1; 86 int idx, ppos, count; 87 int stackrun[120], stackcb[120], stack_len; 88 float next_minrd = INFINITY; 89 int next_mincb = 0; 90 91 s->abs_pow34(s->scoefs, sce->coeffs, 1024); 92 start = win*128; 93 for (cb = 0; cb < CB_TOT_ALL; cb++) { 94 path[0][cb].cost = 0.0f; 95 path[0][cb].prev_idx = -1; 96 path[0][cb].run = 0; 97 } 98 for (swb = 0; swb < max_sfb; swb++) { 99 size = sce->ics.swb_sizes[swb]; 100 if (sce->zeroes[win*16 + swb]) { 101 for (cb = 0; cb < CB_TOT_ALL; cb++) { 102 path[swb+1][cb].prev_idx = cb; 103 path[swb+1][cb].cost = path[swb][cb].cost; 104 path[swb+1][cb].run = path[swb][cb].run + 1; 105 } 106 } else { 107 float minrd = next_minrd; 108 int mincb = next_mincb; 109 next_minrd = INFINITY; 110 next_mincb = 0; 111 for (cb = 0; cb < CB_TOT_ALL; cb++) { 112 float cost_stay_here, cost_get_here; 113 float rd = 0.0f; 114 if (cb >= 12 && sce->band_type[win*16+swb] < aac_cb_out_map[cb] || 115 cb < aac_cb_in_map[sce->band_type[win*16+swb]] && sce->band_type[win*16+swb] > aac_cb_out_map[cb]) { 116 path[swb+1][cb].prev_idx = -1; 117 path[swb+1][cb].cost = INFINITY; 118 path[swb+1][cb].run = path[swb][cb].run + 1; 119 continue; 120 } 121 for (w = 0; w < group_len; w++) { 122 FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(win+w)*16+swb]; 123 rd += quantize_band_cost(s, &sce->coeffs[start + w*128], 124 &s->scoefs[start + w*128], size, 125 sce->sf_idx[(win+w)*16+swb], aac_cb_out_map[cb], 126 lambda / band->threshold, INFINITY, NULL, NULL, 0); 127 } 128 cost_stay_here = path[swb][cb].cost + rd; 129 cost_get_here = minrd + rd + run_bits + 4; 130 if ( run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run] 131 != run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run+1]) 132 cost_stay_here += run_bits; 133 if (cost_get_here < cost_stay_here) { 134 path[swb+1][cb].prev_idx = mincb; 135 path[swb+1][cb].cost = cost_get_here; 136 path[swb+1][cb].run = 1; 137 } else { 138 path[swb+1][cb].prev_idx = cb; 139 path[swb+1][cb].cost = cost_stay_here; 140 path[swb+1][cb].run = path[swb][cb].run + 1; 141 } 142 if (path[swb+1][cb].cost < next_minrd) { 143 next_minrd = path[swb+1][cb].cost; 144 next_mincb = cb; 145 } 146 } 147 } 148 start += sce->ics.swb_sizes[swb]; 149 } 150 151 //convert resulting path from backward-linked list 152 stack_len = 0; 153 idx = 0; 154 for (cb = 1; cb < CB_TOT_ALL; cb++) 155 if (path[max_sfb][cb].cost < path[max_sfb][idx].cost) 156 idx = cb; 157 ppos = max_sfb; 158 while (ppos > 0) { 159 av_assert1(idx >= 0); 160 cb = idx; 161 stackrun[stack_len] = path[ppos][cb].run; 162 stackcb [stack_len] = cb; 163 idx = path[ppos-path[ppos][cb].run+1][cb].prev_idx; 164 ppos -= path[ppos][cb].run; 165 stack_len++; 166 } 167 //perform actual band info encoding 168 start = 0; 169 for (i = stack_len - 1; i >= 0; i--) { 170 cb = aac_cb_out_map[stackcb[i]]; 171 put_bits(&s->pb, 4, cb); 172 count = stackrun[i]; 173 memset(sce->zeroes + win*16 + start, !cb, count); 174 //XXX: memset when band_type is also uint8_t 175 for (j = 0; j < count; j++) { 176 sce->band_type[win*16 + start] = cb; 177 start++; 178 } 179 while (count >= run_esc) { 180 put_bits(&s->pb, run_bits, run_esc); 181 count -= run_esc; 182 } 183 put_bits(&s->pb, run_bits, count); 184 } 185} 186 187 188typedef struct TrellisPath { 189 float cost; 190 int prev; 191} TrellisPath; 192 193#define TRELLIS_STAGES 121 194#define TRELLIS_STATES (SCALE_MAX_DIFF+1) 195 196static void set_special_band_scalefactors(AACEncContext *s, SingleChannelElement *sce) 197{ 198 int w, g; 199 int prevscaler_n = -255, prevscaler_i = 0; 200 int bands = 0; 201 202 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { 203 for (g = 0; g < sce->ics.num_swb; g++) { 204 if (sce->zeroes[w*16+g]) 205 continue; 206 if (sce->band_type[w*16+g] == INTENSITY_BT || sce->band_type[w*16+g] == INTENSITY_BT2) { 207 sce->sf_idx[w*16+g] = av_clip(roundf(log2f(sce->is_ener[w*16+g])*2), -155, 100); 208 bands++; 209 } else if (sce->band_type[w*16+g] == NOISE_BT) { 210 sce->sf_idx[w*16+g] = av_clip(3+ceilf(log2f(sce->pns_ener[w*16+g])*2), -100, 155); 211 if (prevscaler_n == -255) 212 prevscaler_n = sce->sf_idx[w*16+g]; 213 bands++; 214 } 215 } 216 } 217 218 if (!bands) 219 return; 220 221 /* Clip the scalefactor indices */ 222 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { 223 for (g = 0; g < sce->ics.num_swb; g++) { 224 if (sce->zeroes[w*16+g]) 225 continue; 226 if (sce->band_type[w*16+g] == INTENSITY_BT || sce->band_type[w*16+g] == INTENSITY_BT2) { 227 sce->sf_idx[w*16+g] = prevscaler_i = av_clip(sce->sf_idx[w*16+g], prevscaler_i - SCALE_MAX_DIFF, prevscaler_i + SCALE_MAX_DIFF); 228 } else if (sce->band_type[w*16+g] == NOISE_BT) { 229 sce->sf_idx[w*16+g] = prevscaler_n = av_clip(sce->sf_idx[w*16+g], prevscaler_n - SCALE_MAX_DIFF, prevscaler_n + SCALE_MAX_DIFF); 230 } 231 } 232 } 233} 234 235static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s, 236 SingleChannelElement *sce, 237 const float lambda) 238{ 239 int q, w, w2, g, start = 0; 240 int i, j; 241 int idx; 242 TrellisPath paths[TRELLIS_STAGES][TRELLIS_STATES]; 243 int bandaddr[TRELLIS_STAGES]; 244 int minq; 245 float mincost; 246 float q0f = FLT_MAX, q1f = 0.0f, qnrgf = 0.0f; 247 int q0, q1, qcnt = 0; 248 249 for (i = 0; i < 1024; i++) { 250 float t = fabsf(sce->coeffs[i]); 251 if (t > 0.0f) { 252 q0f = FFMIN(q0f, t); 253 q1f = FFMAX(q1f, t); 254 qnrgf += t*t; 255 qcnt++; 256 } 257 } 258 259 if (!qcnt) { 260 memset(sce->sf_idx, 0, sizeof(sce->sf_idx)); 261 memset(sce->zeroes, 1, sizeof(sce->zeroes)); 262 return; 263 } 264 265 //minimum scalefactor index is when minimum nonzero coefficient after quantizing is not clipped 266 q0 = av_clip(coef2minsf(q0f), 0, SCALE_MAX_POS-1); 267 //maximum scalefactor index is when maximum coefficient after quantizing is still not zero 268 q1 = av_clip(coef2maxsf(q1f), 1, SCALE_MAX_POS); 269 if (q1 - q0 > 60) { 270 int q0low = q0; 271 int q1high = q1; 272 //minimum scalefactor index is when maximum nonzero coefficient after quantizing is not clipped 273 int qnrg = av_clip_uint8(log2f(sqrtf(qnrgf/qcnt))*4 - 31 + SCALE_ONE_POS - SCALE_DIV_512); 274 q1 = qnrg + 30; 275 q0 = qnrg - 30; 276 if (q0 < q0low) { 277 q1 += q0low - q0; 278 q0 = q0low; 279 } else if (q1 > q1high) { 280 q0 -= q1 - q1high; 281 q1 = q1high; 282 } 283 } 284 // q0 == q1 isn't really a legal situation 285 if (q0 == q1) { 286 // the following is indirect but guarantees q1 != q0 && q1 near q0 287 q1 = av_clip(q0+1, 1, SCALE_MAX_POS); 288 q0 = av_clip(q1-1, 0, SCALE_MAX_POS - 1); 289 } 290 291 for (i = 0; i < TRELLIS_STATES; i++) { 292 paths[0][i].cost = 0.0f; 293 paths[0][i].prev = -1; 294 } 295 for (j = 1; j < TRELLIS_STAGES; j++) { 296 for (i = 0; i < TRELLIS_STATES; i++) { 297 paths[j][i].cost = INFINITY; 298 paths[j][i].prev = -2; 299 } 300 } 301 idx = 1; 302 s->abs_pow34(s->scoefs, sce->coeffs, 1024); 303 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { 304 start = w*128; 305 for (g = 0; g < sce->ics.num_swb; g++) { 306 const float *coefs = &sce->coeffs[start]; 307 float qmin, qmax; 308 int nz = 0; 309 310 bandaddr[idx] = w * 16 + g; 311 qmin = INT_MAX; 312 qmax = 0.0f; 313 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { 314 FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g]; 315 if (band->energy <= band->threshold || band->threshold == 0.0f) { 316 sce->zeroes[(w+w2)*16+g] = 1; 317 continue; 318 } 319 sce->zeroes[(w+w2)*16+g] = 0; 320 nz = 1; 321 for (i = 0; i < sce->ics.swb_sizes[g]; i++) { 322 float t = fabsf(coefs[w2*128+i]); 323 if (t > 0.0f) 324 qmin = FFMIN(qmin, t); 325 qmax = FFMAX(qmax, t); 326 } 327 } 328 if (nz) { 329 int minscale, maxscale; 330 float minrd = INFINITY; 331 float maxval; 332 //minimum scalefactor index is when minimum nonzero coefficient after quantizing is not clipped 333 minscale = coef2minsf(qmin); 334 //maximum scalefactor index is when maximum coefficient after quantizing is still not zero 335 maxscale = coef2maxsf(qmax); 336 minscale = av_clip(minscale - q0, 0, TRELLIS_STATES - 1); 337 maxscale = av_clip(maxscale - q0, 0, TRELLIS_STATES); 338 if (minscale == maxscale) { 339 maxscale = av_clip(minscale+1, 1, TRELLIS_STATES); 340 minscale = av_clip(maxscale-1, 0, TRELLIS_STATES - 1); 341 } 342 maxval = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], s->scoefs+start); 343 for (q = minscale; q < maxscale; q++) { 344 float dist = 0; 345 int cb = find_min_book(maxval, sce->sf_idx[w*16+g]); 346 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { 347 FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g]; 348 dist += quantize_band_cost(s, coefs + w2*128, s->scoefs + start + w2*128, sce->ics.swb_sizes[g], 349 q + q0, cb, lambda / band->threshold, INFINITY, NULL, NULL, 0); 350 } 351 minrd = FFMIN(minrd, dist); 352 353 for (i = 0; i < q1 - q0; i++) { 354 float cost; 355 cost = paths[idx - 1][i].cost + dist 356 + ff_aac_scalefactor_bits[q - i + SCALE_DIFF_ZERO]; 357 if (cost < paths[idx][q].cost) { 358 paths[idx][q].cost = cost; 359 paths[idx][q].prev = i; 360 } 361 } 362 } 363 } else { 364 for (q = 0; q < q1 - q0; q++) { 365 paths[idx][q].cost = paths[idx - 1][q].cost + 1; 366 paths[idx][q].prev = q; 367 } 368 } 369 sce->zeroes[w*16+g] = !nz; 370 start += sce->ics.swb_sizes[g]; 371 idx++; 372 } 373 } 374 idx--; 375 mincost = paths[idx][0].cost; 376 minq = 0; 377 for (i = 1; i < TRELLIS_STATES; i++) { 378 if (paths[idx][i].cost < mincost) { 379 mincost = paths[idx][i].cost; 380 minq = i; 381 } 382 } 383 while (idx) { 384 sce->sf_idx[bandaddr[idx]] = minq + q0; 385 minq = FFMAX(paths[idx][minq].prev, 0); 386 idx--; 387 } 388 //set the same quantizers inside window groups 389 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) 390 for (g = 0; g < sce->ics.num_swb; g++) 391 for (w2 = 1; w2 < sce->ics.group_len[w]; w2++) 392 sce->sf_idx[(w+w2)*16+g] = sce->sf_idx[w*16+g]; 393} 394 395static void search_for_quantizers_fast(AVCodecContext *avctx, AACEncContext *s, 396 SingleChannelElement *sce, 397 const float lambda) 398{ 399 int start = 0, i, w, w2, g; 400 int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate / avctx->ch_layout.nb_channels * (lambda / 120.f); 401 float dists[128] = { 0 }, uplims[128] = { 0 }; 402 float maxvals[128]; 403 int fflag, minscaler; 404 int its = 0; 405 int allz = 0; 406 float minthr = INFINITY; 407 408 // for values above this the decoder might end up in an endless loop 409 // due to always having more bits than what can be encoded. 410 destbits = FFMIN(destbits, 5800); 411 //some heuristic to determine initial quantizers will reduce search time 412 //determine zero bands and upper limits 413 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { 414 start = 0; 415 for (g = 0; g < sce->ics.num_swb; g++) { 416 int nz = 0; 417 float uplim = 0.0f; 418 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { 419 FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g]; 420 uplim += band->threshold; 421 if (band->energy <= band->threshold || band->threshold == 0.0f) { 422 sce->zeroes[(w+w2)*16+g] = 1; 423 continue; 424 } 425 nz = 1; 426 } 427 uplims[w*16+g] = uplim *512; 428 sce->band_type[w*16+g] = 0; 429 sce->zeroes[w*16+g] = !nz; 430 if (nz) 431 minthr = FFMIN(minthr, uplim); 432 allz |= nz; 433 start += sce->ics.swb_sizes[g]; 434 } 435 } 436 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { 437 for (g = 0; g < sce->ics.num_swb; g++) { 438 if (sce->zeroes[w*16+g]) { 439 sce->sf_idx[w*16+g] = SCALE_ONE_POS; 440 continue; 441 } 442 sce->sf_idx[w*16+g] = SCALE_ONE_POS + FFMIN(log2f(uplims[w*16+g]/minthr)*4,59); 443 } 444 } 445 446 if (!allz) 447 return; 448 s->abs_pow34(s->scoefs, sce->coeffs, 1024); 449 ff_quantize_band_cost_cache_init(s); 450 451 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { 452 start = w*128; 453 for (g = 0; g < sce->ics.num_swb; g++) { 454 const float *scaled = s->scoefs + start; 455 maxvals[w*16+g] = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled); 456 start += sce->ics.swb_sizes[g]; 457 } 458 } 459 460 //perform two-loop search 461 //outer loop - improve quality 462 do { 463 int tbits, qstep; 464 minscaler = sce->sf_idx[0]; 465 //inner loop - quantize spectrum to fit into given number of bits 466 qstep = its ? 1 : 32; 467 do { 468 int prev = -1; 469 tbits = 0; 470 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { 471 start = w*128; 472 for (g = 0; g < sce->ics.num_swb; g++) { 473 const float *coefs = sce->coeffs + start; 474 const float *scaled = s->scoefs + start; 475 int bits = 0; 476 int cb; 477 float dist = 0.0f; 478 479 if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) { 480 start += sce->ics.swb_sizes[g]; 481 continue; 482 } 483 minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]); 484 cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]); 485 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { 486 int b; 487 dist += quantize_band_cost_cached(s, w + w2, g, 488 coefs + w2*128, 489 scaled + w2*128, 490 sce->ics.swb_sizes[g], 491 sce->sf_idx[w*16+g], 492 cb, 1.0f, INFINITY, 493 &b, NULL, 0); 494 bits += b; 495 } 496 dists[w*16+g] = dist - bits; 497 if (prev != -1) { 498 bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO]; 499 } 500 tbits += bits; 501 start += sce->ics.swb_sizes[g]; 502 prev = sce->sf_idx[w*16+g]; 503 } 504 } 505 if (tbits > destbits) { 506 for (i = 0; i < 128; i++) 507 if (sce->sf_idx[i] < 218 - qstep) 508 sce->sf_idx[i] += qstep; 509 } else { 510 for (i = 0; i < 128; i++) 511 if (sce->sf_idx[i] > 60 - qstep) 512 sce->sf_idx[i] -= qstep; 513 } 514 qstep >>= 1; 515 if (!qstep && tbits > destbits*1.02 && sce->sf_idx[0] < 217) 516 qstep = 1; 517 } while (qstep); 518 519 fflag = 0; 520 minscaler = av_clip(minscaler, 60, 255 - SCALE_MAX_DIFF); 521 522 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { 523 for (g = 0; g < sce->ics.num_swb; g++) { 524 int prevsc = sce->sf_idx[w*16+g]; 525 if (dists[w*16+g] > uplims[w*16+g] && sce->sf_idx[w*16+g] > 60) { 526 if (find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1)) 527 sce->sf_idx[w*16+g]--; 528 else //Try to make sure there is some energy in every band 529 sce->sf_idx[w*16+g]-=2; 530 } 531 sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF); 532 sce->sf_idx[w*16+g] = FFMIN(sce->sf_idx[w*16+g], 219); 533 if (sce->sf_idx[w*16+g] != prevsc) 534 fflag = 1; 535 sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]); 536 } 537 } 538 its++; 539 } while (fflag && its < 10); 540} 541 542static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce) 543{ 544 FFPsyBand *band; 545 int w, g, w2, i; 546 int wlen = 1024 / sce->ics.num_windows; 547 int bandwidth, cutoff; 548 float *PNS = &s->scoefs[0*128], *PNS34 = &s->scoefs[1*128]; 549 float *NOR34 = &s->scoefs[3*128]; 550 uint8_t nextband[128]; 551 const float lambda = s->lambda; 552 const float freq_mult = avctx->sample_rate*0.5f/wlen; 553 const float thr_mult = NOISE_LAMBDA_REPLACE*(100.0f/lambda); 554 const float spread_threshold = FFMIN(0.75f, NOISE_SPREAD_THRESHOLD*FFMAX(0.5f, lambda/100.f)); 555 const float dist_bias = av_clipf(4.f * 120 / lambda, 0.25f, 4.0f); 556 const float pns_transient_energy_r = FFMIN(0.7f, lambda / 140.f); 557 558 int refbits = avctx->bit_rate * 1024.0 / avctx->sample_rate 559 / ((avctx->flags & AV_CODEC_FLAG_QSCALE) ? 2.0f : avctx->ch_layout.nb_channels) 560 * (lambda / 120.f); 561 562 /** Keep this in sync with twoloop's cutoff selection */ 563 float rate_bandwidth_multiplier = 1.5f; 564 int prev = -1000, prev_sf = -1; 565 int frame_bit_rate = (avctx->flags & AV_CODEC_FLAG_QSCALE) 566 ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024) 567 : (avctx->bit_rate / avctx->ch_layout.nb_channels); 568 569 frame_bit_rate *= 1.15f; 570 571 if (avctx->cutoff > 0) { 572 bandwidth = avctx->cutoff; 573 } else { 574 bandwidth = FFMAX(3000, AAC_CUTOFF_FROM_BITRATE(frame_bit_rate, 1, avctx->sample_rate)); 575 } 576 577 cutoff = bandwidth * 2 * wlen / avctx->sample_rate; 578 579 memcpy(sce->band_alt, sce->band_type, sizeof(sce->band_type)); 580 ff_init_nextband_map(sce, nextband); 581 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { 582 int wstart = w*128; 583 for (g = 0; g < sce->ics.num_swb; g++) { 584 int noise_sfi; 585 float dist1 = 0.0f, dist2 = 0.0f, noise_amp; 586 float pns_energy = 0.0f, pns_tgt_energy, energy_ratio, dist_thresh; 587 float sfb_energy = 0.0f, threshold = 0.0f, spread = 2.0f; 588 float min_energy = -1.0f, max_energy = 0.0f; 589 const int start = wstart+sce->ics.swb_offset[g]; 590 const float freq = (start-wstart)*freq_mult; 591 const float freq_boost = FFMAX(0.88f*freq/NOISE_LOW_LIMIT, 1.0f); 592 if (freq < NOISE_LOW_LIMIT || (start-wstart) >= cutoff) { 593 if (!sce->zeroes[w*16+g]) 594 prev_sf = sce->sf_idx[w*16+g]; 595 continue; 596 } 597 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { 598 band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g]; 599 sfb_energy += band->energy; 600 spread = FFMIN(spread, band->spread); 601 threshold += band->threshold; 602 if (!w2) { 603 min_energy = max_energy = band->energy; 604 } else { 605 min_energy = FFMIN(min_energy, band->energy); 606 max_energy = FFMAX(max_energy, band->energy); 607 } 608 } 609 610 /* Ramps down at ~8000Hz and loosens the dist threshold */ 611 dist_thresh = av_clipf(2.5f*NOISE_LOW_LIMIT/freq, 0.5f, 2.5f) * dist_bias; 612 613 /* PNS is acceptable when all of these are true: 614 * 1. high spread energy (noise-like band) 615 * 2. near-threshold energy (high PE means the random nature of PNS content will be noticed) 616 * 3. on short window groups, all windows have similar energy (variations in energy would be destroyed by PNS) 617 * 618 * At this stage, point 2 is relaxed for zeroed bands near the noise threshold (hole avoidance is more important) 619 */ 620 if ((!sce->zeroes[w*16+g] && !ff_sfdelta_can_remove_band(sce, nextband, prev_sf, w*16+g)) || 621 ((sce->zeroes[w*16+g] || !sce->band_alt[w*16+g]) && sfb_energy < threshold*sqrtf(1.0f/freq_boost)) || spread < spread_threshold || 622 (!sce->zeroes[w*16+g] && sce->band_alt[w*16+g] && sfb_energy > threshold*thr_mult*freq_boost) || 623 min_energy < pns_transient_energy_r * max_energy ) { 624 sce->pns_ener[w*16+g] = sfb_energy; 625 if (!sce->zeroes[w*16+g]) 626 prev_sf = sce->sf_idx[w*16+g]; 627 continue; 628 } 629 630 pns_tgt_energy = sfb_energy*FFMIN(1.0f, spread*spread); 631 noise_sfi = av_clip(roundf(log2f(pns_tgt_energy)*2), -100, 155); /* Quantize */ 632 noise_amp = -ff_aac_pow2sf_tab[noise_sfi + POW_SF2_ZERO]; /* Dequantize */ 633 if (prev != -1000) { 634 int noise_sfdiff = noise_sfi - prev + SCALE_DIFF_ZERO; 635 if (noise_sfdiff < 0 || noise_sfdiff > 2*SCALE_MAX_DIFF) { 636 if (!sce->zeroes[w*16+g]) 637 prev_sf = sce->sf_idx[w*16+g]; 638 continue; 639 } 640 } 641 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { 642 float band_energy, scale, pns_senergy; 643 const int start_c = (w+w2)*128+sce->ics.swb_offset[g]; 644 band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g]; 645 for (i = 0; i < sce->ics.swb_sizes[g]; i++) { 646 s->random_state = lcg_random(s->random_state); 647 PNS[i] = s->random_state; 648 } 649 band_energy = s->fdsp->scalarproduct_float(PNS, PNS, sce->ics.swb_sizes[g]); 650 scale = noise_amp/sqrtf(band_energy); 651 s->fdsp->vector_fmul_scalar(PNS, PNS, scale, sce->ics.swb_sizes[g]); 652 pns_senergy = s->fdsp->scalarproduct_float(PNS, PNS, sce->ics.swb_sizes[g]); 653 pns_energy += pns_senergy; 654 s->abs_pow34(NOR34, &sce->coeffs[start_c], sce->ics.swb_sizes[g]); 655 s->abs_pow34(PNS34, PNS, sce->ics.swb_sizes[g]); 656 dist1 += quantize_band_cost(s, &sce->coeffs[start_c], 657 NOR34, 658 sce->ics.swb_sizes[g], 659 sce->sf_idx[(w+w2)*16+g], 660 sce->band_alt[(w+w2)*16+g], 661 lambda/band->threshold, INFINITY, NULL, NULL, 0); 662 /* Estimate rd on average as 5 bits for SF, 4 for the CB, plus spread energy * lambda/thr */ 663 dist2 += band->energy/(band->spread*band->spread)*lambda*dist_thresh/band->threshold; 664 } 665 if (g && sce->band_type[w*16+g-1] == NOISE_BT) { 666 dist2 += 5; 667 } else { 668 dist2 += 9; 669 } 670 energy_ratio = pns_tgt_energy/pns_energy; /* Compensates for quantization error */ 671 sce->pns_ener[w*16+g] = energy_ratio*pns_tgt_energy; 672 if (sce->zeroes[w*16+g] || !sce->band_alt[w*16+g] || (energy_ratio > 0.85f && energy_ratio < 1.25f && dist2 < dist1)) { 673 sce->band_type[w*16+g] = NOISE_BT; 674 sce->zeroes[w*16+g] = 0; 675 prev = noise_sfi; 676 } else { 677 if (!sce->zeroes[w*16+g]) 678 prev_sf = sce->sf_idx[w*16+g]; 679 } 680 } 681 } 682} 683 684static void mark_pns(AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce) 685{ 686 FFPsyBand *band; 687 int w, g, w2; 688 int wlen = 1024 / sce->ics.num_windows; 689 int bandwidth, cutoff; 690 const float lambda = s->lambda; 691 const float freq_mult = avctx->sample_rate*0.5f/wlen; 692 const float spread_threshold = FFMIN(0.75f, NOISE_SPREAD_THRESHOLD*FFMAX(0.5f, lambda/100.f)); 693 const float pns_transient_energy_r = FFMIN(0.7f, lambda / 140.f); 694 695 int refbits = avctx->bit_rate * 1024.0 / avctx->sample_rate 696 / ((avctx->flags & AV_CODEC_FLAG_QSCALE) ? 2.0f : avctx->ch_layout.nb_channels) 697 * (lambda / 120.f); 698 699 /** Keep this in sync with twoloop's cutoff selection */ 700 float rate_bandwidth_multiplier = 1.5f; 701 int frame_bit_rate = (avctx->flags & AV_CODEC_FLAG_QSCALE) 702 ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024) 703 : (avctx->bit_rate / avctx->ch_layout.nb_channels); 704 705 frame_bit_rate *= 1.15f; 706 707 if (avctx->cutoff > 0) { 708 bandwidth = avctx->cutoff; 709 } else { 710 bandwidth = FFMAX(3000, AAC_CUTOFF_FROM_BITRATE(frame_bit_rate, 1, avctx->sample_rate)); 711 } 712 713 cutoff = bandwidth * 2 * wlen / avctx->sample_rate; 714 715 memcpy(sce->band_alt, sce->band_type, sizeof(sce->band_type)); 716 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { 717 for (g = 0; g < sce->ics.num_swb; g++) { 718 float sfb_energy = 0.0f, threshold = 0.0f, spread = 2.0f; 719 float min_energy = -1.0f, max_energy = 0.0f; 720 const int start = sce->ics.swb_offset[g]; 721 const float freq = start*freq_mult; 722 const float freq_boost = FFMAX(0.88f*freq/NOISE_LOW_LIMIT, 1.0f); 723 if (freq < NOISE_LOW_LIMIT || start >= cutoff) { 724 sce->can_pns[w*16+g] = 0; 725 continue; 726 } 727 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { 728 band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g]; 729 sfb_energy += band->energy; 730 spread = FFMIN(spread, band->spread); 731 threshold += band->threshold; 732 if (!w2) { 733 min_energy = max_energy = band->energy; 734 } else { 735 min_energy = FFMIN(min_energy, band->energy); 736 max_energy = FFMAX(max_energy, band->energy); 737 } 738 } 739 740 /* PNS is acceptable when all of these are true: 741 * 1. high spread energy (noise-like band) 742 * 2. near-threshold energy (high PE means the random nature of PNS content will be noticed) 743 * 3. on short window groups, all windows have similar energy (variations in energy would be destroyed by PNS) 744 */ 745 sce->pns_ener[w*16+g] = sfb_energy; 746 if (sfb_energy < threshold*sqrtf(1.5f/freq_boost) || spread < spread_threshold || min_energy < pns_transient_energy_r * max_energy) { 747 sce->can_pns[w*16+g] = 0; 748 } else { 749 sce->can_pns[w*16+g] = 1; 750 } 751 } 752 } 753} 754 755static void search_for_ms(AACEncContext *s, ChannelElement *cpe) 756{ 757 int start = 0, i, w, w2, g, sid_sf_boost, prev_mid, prev_side; 758 uint8_t nextband0[128], nextband1[128]; 759 float *M = s->scoefs + 128*0, *S = s->scoefs + 128*1; 760 float *L34 = s->scoefs + 128*2, *R34 = s->scoefs + 128*3; 761 float *M34 = s->scoefs + 128*4, *S34 = s->scoefs + 128*5; 762 const float lambda = s->lambda; 763 const float mslambda = FFMIN(1.0f, lambda / 120.f); 764 SingleChannelElement *sce0 = &cpe->ch[0]; 765 SingleChannelElement *sce1 = &cpe->ch[1]; 766 if (!cpe->common_window) 767 return; 768 769 /** Scout out next nonzero bands */ 770 ff_init_nextband_map(sce0, nextband0); 771 ff_init_nextband_map(sce1, nextband1); 772 773 prev_mid = sce0->sf_idx[0]; 774 prev_side = sce1->sf_idx[0]; 775 for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) { 776 start = 0; 777 for (g = 0; g < sce0->ics.num_swb; g++) { 778 float bmax = bval2bmax(g * 17.0f / sce0->ics.num_swb) / 0.0045f; 779 if (!cpe->is_mask[w*16+g]) 780 cpe->ms_mask[w*16+g] = 0; 781 if (!sce0->zeroes[w*16+g] && !sce1->zeroes[w*16+g] && !cpe->is_mask[w*16+g]) { 782 float Mmax = 0.0f, Smax = 0.0f; 783 784 /* Must compute mid/side SF and book for the whole window group */ 785 for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) { 786 for (i = 0; i < sce0->ics.swb_sizes[g]; i++) { 787 M[i] = (sce0->coeffs[start+(w+w2)*128+i] 788 + sce1->coeffs[start+(w+w2)*128+i]) * 0.5; 789 S[i] = M[i] 790 - sce1->coeffs[start+(w+w2)*128+i]; 791 } 792 s->abs_pow34(M34, M, sce0->ics.swb_sizes[g]); 793 s->abs_pow34(S34, S, sce0->ics.swb_sizes[g]); 794 for (i = 0; i < sce0->ics.swb_sizes[g]; i++ ) { 795 Mmax = FFMAX(Mmax, M34[i]); 796 Smax = FFMAX(Smax, S34[i]); 797 } 798 } 799 800 for (sid_sf_boost = 0; sid_sf_boost < 4; sid_sf_boost++) { 801 float dist1 = 0.0f, dist2 = 0.0f; 802 int B0 = 0, B1 = 0; 803 int minidx; 804 int mididx, sididx; 805 int midcb, sidcb; 806 807 minidx = FFMIN(sce0->sf_idx[w*16+g], sce1->sf_idx[w*16+g]); 808 mididx = av_clip(minidx, 0, SCALE_MAX_POS - SCALE_DIV_512); 809 sididx = av_clip(minidx - sid_sf_boost * 3, 0, SCALE_MAX_POS - SCALE_DIV_512); 810 if (sce0->band_type[w*16+g] != NOISE_BT && sce1->band_type[w*16+g] != NOISE_BT 811 && ( !ff_sfdelta_can_replace(sce0, nextband0, prev_mid, mididx, w*16+g) 812 || !ff_sfdelta_can_replace(sce1, nextband1, prev_side, sididx, w*16+g))) { 813 /* scalefactor range violation, bad stuff, will decrease quality unacceptably */ 814 continue; 815 } 816 817 midcb = find_min_book(Mmax, mididx); 818 sidcb = find_min_book(Smax, sididx); 819 820 /* No CB can be zero */ 821 midcb = FFMAX(1,midcb); 822 sidcb = FFMAX(1,sidcb); 823 824 for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) { 825 FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g]; 826 FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g]; 827 float minthr = FFMIN(band0->threshold, band1->threshold); 828 int b1,b2,b3,b4; 829 for (i = 0; i < sce0->ics.swb_sizes[g]; i++) { 830 M[i] = (sce0->coeffs[start+(w+w2)*128+i] 831 + sce1->coeffs[start+(w+w2)*128+i]) * 0.5; 832 S[i] = M[i] 833 - sce1->coeffs[start+(w+w2)*128+i]; 834 } 835 836 s->abs_pow34(L34, sce0->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]); 837 s->abs_pow34(R34, sce1->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]); 838 s->abs_pow34(M34, M, sce0->ics.swb_sizes[g]); 839 s->abs_pow34(S34, S, sce0->ics.swb_sizes[g]); 840 dist1 += quantize_band_cost(s, &sce0->coeffs[start + (w+w2)*128], 841 L34, 842 sce0->ics.swb_sizes[g], 843 sce0->sf_idx[w*16+g], 844 sce0->band_type[w*16+g], 845 lambda / (band0->threshold + FLT_MIN), INFINITY, &b1, NULL, 0); 846 dist1 += quantize_band_cost(s, &sce1->coeffs[start + (w+w2)*128], 847 R34, 848 sce1->ics.swb_sizes[g], 849 sce1->sf_idx[w*16+g], 850 sce1->band_type[w*16+g], 851 lambda / (band1->threshold + FLT_MIN), INFINITY, &b2, NULL, 0); 852 dist2 += quantize_band_cost(s, M, 853 M34, 854 sce0->ics.swb_sizes[g], 855 mididx, 856 midcb, 857 lambda / (minthr + FLT_MIN), INFINITY, &b3, NULL, 0); 858 dist2 += quantize_band_cost(s, S, 859 S34, 860 sce1->ics.swb_sizes[g], 861 sididx, 862 sidcb, 863 mslambda / (minthr * bmax + FLT_MIN), INFINITY, &b4, NULL, 0); 864 B0 += b1+b2; 865 B1 += b3+b4; 866 dist1 -= b1+b2; 867 dist2 -= b3+b4; 868 } 869 cpe->ms_mask[w*16+g] = dist2 <= dist1 && B1 < B0; 870 if (cpe->ms_mask[w*16+g]) { 871 if (sce0->band_type[w*16+g] != NOISE_BT && sce1->band_type[w*16+g] != NOISE_BT) { 872 sce0->sf_idx[w*16+g] = mididx; 873 sce1->sf_idx[w*16+g] = sididx; 874 sce0->band_type[w*16+g] = midcb; 875 sce1->band_type[w*16+g] = sidcb; 876 } else if ((sce0->band_type[w*16+g] != NOISE_BT) ^ (sce1->band_type[w*16+g] != NOISE_BT)) { 877 /* ms_mask unneeded, and it confuses some decoders */ 878 cpe->ms_mask[w*16+g] = 0; 879 } 880 break; 881 } else if (B1 > B0) { 882 /* More boost won't fix this */ 883 break; 884 } 885 } 886 } 887 if (!sce0->zeroes[w*16+g] && sce0->band_type[w*16+g] < RESERVED_BT) 888 prev_mid = sce0->sf_idx[w*16+g]; 889 if (!sce1->zeroes[w*16+g] && !cpe->is_mask[w*16+g] && sce1->band_type[w*16+g] < RESERVED_BT) 890 prev_side = sce1->sf_idx[w*16+g]; 891 start += sce0->ics.swb_sizes[g]; 892 } 893 } 894} 895 896const AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = { 897 [AAC_CODER_ANMR] = { 898 search_for_quantizers_anmr, 899 encode_window_bands_info, 900 quantize_and_encode_band, 901 ff_aac_encode_tns_info, 902 ff_aac_encode_ltp_info, 903 ff_aac_encode_main_pred, 904 ff_aac_adjust_common_pred, 905 ff_aac_adjust_common_ltp, 906 ff_aac_apply_main_pred, 907 ff_aac_apply_tns, 908 ff_aac_update_ltp, 909 ff_aac_ltp_insert_new_frame, 910 set_special_band_scalefactors, 911 search_for_pns, 912 mark_pns, 913 ff_aac_search_for_tns, 914 ff_aac_search_for_ltp, 915 search_for_ms, 916 ff_aac_search_for_is, 917 ff_aac_search_for_pred, 918 }, 919 [AAC_CODER_TWOLOOP] = { 920 search_for_quantizers_twoloop, 921 codebook_trellis_rate, 922 quantize_and_encode_band, 923 ff_aac_encode_tns_info, 924 ff_aac_encode_ltp_info, 925 ff_aac_encode_main_pred, 926 ff_aac_adjust_common_pred, 927 ff_aac_adjust_common_ltp, 928 ff_aac_apply_main_pred, 929 ff_aac_apply_tns, 930 ff_aac_update_ltp, 931 ff_aac_ltp_insert_new_frame, 932 set_special_band_scalefactors, 933 search_for_pns, 934 mark_pns, 935 ff_aac_search_for_tns, 936 ff_aac_search_for_ltp, 937 search_for_ms, 938 ff_aac_search_for_is, 939 ff_aac_search_for_pred, 940 }, 941 [AAC_CODER_FAST] = { 942 search_for_quantizers_fast, 943 codebook_trellis_rate, 944 quantize_and_encode_band, 945 ff_aac_encode_tns_info, 946 ff_aac_encode_ltp_info, 947 ff_aac_encode_main_pred, 948 ff_aac_adjust_common_pred, 949 ff_aac_adjust_common_ltp, 950 ff_aac_apply_main_pred, 951 ff_aac_apply_tns, 952 ff_aac_update_ltp, 953 ff_aac_ltp_insert_new_frame, 954 set_special_band_scalefactors, 955 search_for_pns, 956 mark_pns, 957 ff_aac_search_for_tns, 958 ff_aac_search_for_ltp, 959 search_for_ms, 960 ff_aac_search_for_is, 961 ff_aac_search_for_pred, 962 }, 963}; 964