1/* 2 * Copyright (c) 2007-2008 CSIRO 3 * Copyright (c) 2007-2009 Xiph.Org Foundation 4 * Copyright (c) 2008-2009 Gregory Maxwell 5 * Copyright (c) 2012 Andrew D'Addesio 6 * Copyright (c) 2013-2014 Mozilla Corporation 7 * Copyright (c) 2017 Rostislav Pehlivanov <atomnuker@gmail.com> 8 * 9 * This file is part of FFmpeg. 10 * 11 * FFmpeg is free software; you can redistribute it and/or 12 * modify it under the terms of the GNU Lesser General Public 13 * License as published by the Free Software Foundation; either 14 * version 2.1 of the License, or (at your option) any later version. 15 * 16 * FFmpeg is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 * Lesser General Public License for more details. 20 * 21 * You should have received a copy of the GNU Lesser General Public 22 * License along with FFmpeg; if not, write to the Free Software 23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 24 */ 25 26#include "config_components.h" 27 28#include "opustab.h" 29#include "opus_pvq.h" 30 31#define CELT_PVQ_U(n, k) (ff_celt_pvq_u_row[FFMIN(n, k)][FFMAX(n, k)]) 32#define CELT_PVQ_V(n, k) (CELT_PVQ_U(n, k) + CELT_PVQ_U(n, (k) + 1)) 33 34static inline int16_t celt_cos(int16_t x) 35{ 36 x = (MUL16(x, x) + 4096) >> 13; 37 x = (32767-x) + ROUND_MUL16(x, (-7651 + ROUND_MUL16(x, (8277 + ROUND_MUL16(-626, x))))); 38 return x + 1; 39} 40 41static inline int celt_log2tan(int isin, int icos) 42{ 43 int lc, ls; 44 lc = opus_ilog(icos); 45 ls = opus_ilog(isin); 46 icos <<= 15 - lc; 47 isin <<= 15 - ls; 48 return (ls << 11) - (lc << 11) + 49 ROUND_MUL16(isin, ROUND_MUL16(isin, -2597) + 7932) - 50 ROUND_MUL16(icos, ROUND_MUL16(icos, -2597) + 7932); 51} 52 53static inline int celt_bits2pulses(const uint8_t *cache, int bits) 54{ 55 // TODO: Find the size of cache and make it into an array in the parameters list 56 int i, low = 0, high; 57 58 high = cache[0]; 59 bits--; 60 61 for (i = 0; i < 6; i++) { 62 int center = (low + high + 1) >> 1; 63 if (cache[center] >= bits) 64 high = center; 65 else 66 low = center; 67 } 68 69 return (bits - (low == 0 ? -1 : cache[low]) <= cache[high] - bits) ? low : high; 70} 71 72static inline int celt_pulses2bits(const uint8_t *cache, int pulses) 73{ 74 // TODO: Find the size of cache and make it into an array in the parameters list 75 return (pulses == 0) ? 0 : cache[pulses] + 1; 76} 77 78static inline void celt_normalize_residual(const int * av_restrict iy, float * av_restrict X, 79 int N, float g) 80{ 81 int i; 82 for (i = 0; i < N; i++) 83 X[i] = g * iy[i]; 84} 85 86static void celt_exp_rotation_impl(float *X, uint32_t len, uint32_t stride, 87 float c, float s) 88{ 89 float *Xptr; 90 int i; 91 92 Xptr = X; 93 for (i = 0; i < len - stride; i++) { 94 float x1 = Xptr[0]; 95 float x2 = Xptr[stride]; 96 Xptr[stride] = c * x2 + s * x1; 97 *Xptr++ = c * x1 - s * x2; 98 } 99 100 Xptr = &X[len - 2 * stride - 1]; 101 for (i = len - 2 * stride - 1; i >= 0; i--) { 102 float x1 = Xptr[0]; 103 float x2 = Xptr[stride]; 104 Xptr[stride] = c * x2 + s * x1; 105 *Xptr-- = c * x1 - s * x2; 106 } 107} 108 109static inline void celt_exp_rotation(float *X, uint32_t len, 110 uint32_t stride, uint32_t K, 111 enum CeltSpread spread, const int encode) 112{ 113 uint32_t stride2 = 0; 114 float c, s; 115 float gain, theta; 116 int i; 117 118 if (2*K >= len || spread == CELT_SPREAD_NONE) 119 return; 120 121 gain = (float)len / (len + (20 - 5*spread) * K); 122 theta = M_PI * gain * gain / 4; 123 124 c = cosf(theta); 125 s = sinf(theta); 126 127 if (len >= stride << 3) { 128 stride2 = 1; 129 /* This is just a simple (equivalent) way of computing sqrt(len/stride) with rounding. 130 It's basically incrementing long as (stride2+0.5)^2 < len/stride. */ 131 while ((stride2 * stride2 + stride2) * stride + (stride >> 2) < len) 132 stride2++; 133 } 134 135 len /= stride; 136 for (i = 0; i < stride; i++) { 137 if (encode) { 138 celt_exp_rotation_impl(X + i * len, len, 1, c, -s); 139 if (stride2) 140 celt_exp_rotation_impl(X + i * len, len, stride2, s, -c); 141 } else { 142 if (stride2) 143 celt_exp_rotation_impl(X + i * len, len, stride2, s, c); 144 celt_exp_rotation_impl(X + i * len, len, 1, c, s); 145 } 146 } 147} 148 149static inline uint32_t celt_extract_collapse_mask(const int *iy, uint32_t N, uint32_t B) 150{ 151 int i, j, N0 = N / B; 152 uint32_t collapse_mask = 0; 153 154 if (B <= 1) 155 return 1; 156 157 for (i = 0; i < B; i++) 158 for (j = 0; j < N0; j++) 159 collapse_mask |= (!!iy[i*N0+j]) << i; 160 return collapse_mask; 161} 162 163static inline void celt_stereo_merge(float *X, float *Y, float mid, int N) 164{ 165 int i; 166 float xp = 0, side = 0; 167 float E[2]; 168 float mid2; 169 float gain[2]; 170 171 /* Compute the norm of X+Y and X-Y as |X|^2 + |Y|^2 +/- sum(xy) */ 172 for (i = 0; i < N; i++) { 173 xp += X[i] * Y[i]; 174 side += Y[i] * Y[i]; 175 } 176 177 /* Compensating for the mid normalization */ 178 xp *= mid; 179 mid2 = mid; 180 E[0] = mid2 * mid2 + side - 2 * xp; 181 E[1] = mid2 * mid2 + side + 2 * xp; 182 if (E[0] < 6e-4f || E[1] < 6e-4f) { 183 for (i = 0; i < N; i++) 184 Y[i] = X[i]; 185 return; 186 } 187 188 gain[0] = 1.0f / sqrtf(E[0]); 189 gain[1] = 1.0f / sqrtf(E[1]); 190 191 for (i = 0; i < N; i++) { 192 float value[2]; 193 /* Apply mid scaling (side is already scaled) */ 194 value[0] = mid * X[i]; 195 value[1] = Y[i]; 196 X[i] = gain[0] * (value[0] - value[1]); 197 Y[i] = gain[1] * (value[0] + value[1]); 198 } 199} 200 201static void celt_interleave_hadamard(float *tmp, float *X, int N0, 202 int stride, int hadamard) 203{ 204 int i, j, N = N0*stride; 205 const uint8_t *order = &ff_celt_hadamard_order[hadamard ? stride - 2 : 30]; 206 207 for (i = 0; i < stride; i++) 208 for (j = 0; j < N0; j++) 209 tmp[j*stride+i] = X[order[i]*N0+j]; 210 211 memcpy(X, tmp, N*sizeof(float)); 212} 213 214static void celt_deinterleave_hadamard(float *tmp, float *X, int N0, 215 int stride, int hadamard) 216{ 217 int i, j, N = N0*stride; 218 const uint8_t *order = &ff_celt_hadamard_order[hadamard ? stride - 2 : 30]; 219 220 for (i = 0; i < stride; i++) 221 for (j = 0; j < N0; j++) 222 tmp[order[i]*N0+j] = X[j*stride+i]; 223 224 memcpy(X, tmp, N*sizeof(float)); 225} 226 227static void celt_haar1(float *X, int N0, int stride) 228{ 229 int i, j; 230 N0 >>= 1; 231 for (i = 0; i < stride; i++) { 232 for (j = 0; j < N0; j++) { 233 float x0 = X[stride * (2 * j + 0) + i]; 234 float x1 = X[stride * (2 * j + 1) + i]; 235 X[stride * (2 * j + 0) + i] = (x0 + x1) * M_SQRT1_2; 236 X[stride * (2 * j + 1) + i] = (x0 - x1) * M_SQRT1_2; 237 } 238 } 239} 240 241static inline int celt_compute_qn(int N, int b, int offset, int pulse_cap, 242 int stereo) 243{ 244 int qn, qb; 245 int N2 = 2 * N - 1; 246 if (stereo && N == 2) 247 N2--; 248 249 /* The upper limit ensures that in a stereo split with itheta==16384, we'll 250 * always have enough bits left over to code at least one pulse in the 251 * side; otherwise it would collapse, since it doesn't get folded. */ 252 qb = FFMIN3(b - pulse_cap - (4 << 3), (b + N2 * offset) / N2, 8 << 3); 253 qn = (qb < (1 << 3 >> 1)) ? 1 : ((ff_celt_qn_exp2[qb & 0x7] >> (14 - (qb >> 3))) + 1) >> 1 << 1; 254 return qn; 255} 256 257/* Convert the quantized vector to an index */ 258static inline uint32_t celt_icwrsi(uint32_t N, uint32_t K, const int *y) 259{ 260 int i, idx = 0, sum = 0; 261 for (i = N - 1; i >= 0; i--) { 262 const uint32_t i_s = CELT_PVQ_U(N - i, sum + FFABS(y[i]) + 1); 263 idx += CELT_PVQ_U(N - i, sum) + (y[i] < 0)*i_s; 264 sum += FFABS(y[i]); 265 } 266 return idx; 267} 268 269// this code was adapted from libopus 270static inline uint64_t celt_cwrsi(uint32_t N, uint32_t K, uint32_t i, int *y) 271{ 272 uint64_t norm = 0; 273 uint32_t q, p; 274 int s, val; 275 int k0; 276 277 while (N > 2) { 278 /*Lots of pulses case:*/ 279 if (K >= N) { 280 const uint32_t *row = ff_celt_pvq_u_row[N]; 281 282 /* Are the pulses in this dimension negative? */ 283 p = row[K + 1]; 284 s = -(i >= p); 285 i -= p & s; 286 287 /*Count how many pulses were placed in this dimension.*/ 288 k0 = K; 289 q = row[N]; 290 if (q > i) { 291 K = N; 292 do { 293 p = ff_celt_pvq_u_row[--K][N]; 294 } while (p > i); 295 } else 296 for (p = row[K]; p > i; p = row[K]) 297 K--; 298 299 i -= p; 300 val = (k0 - K + s) ^ s; 301 norm += val * val; 302 *y++ = val; 303 } else { /*Lots of dimensions case:*/ 304 /*Are there any pulses in this dimension at all?*/ 305 p = ff_celt_pvq_u_row[K ][N]; 306 q = ff_celt_pvq_u_row[K + 1][N]; 307 308 if (p <= i && i < q) { 309 i -= p; 310 *y++ = 0; 311 } else { 312 /*Are the pulses in this dimension negative?*/ 313 s = -(i >= q); 314 i -= q & s; 315 316 /*Count how many pulses were placed in this dimension.*/ 317 k0 = K; 318 do p = ff_celt_pvq_u_row[--K][N]; 319 while (p > i); 320 321 i -= p; 322 val = (k0 - K + s) ^ s; 323 norm += val * val; 324 *y++ = val; 325 } 326 } 327 N--; 328 } 329 330 /* N == 2 */ 331 p = 2 * K + 1; 332 s = -(i >= p); 333 i -= p & s; 334 k0 = K; 335 K = (i + 1) / 2; 336 337 if (K) 338 i -= 2 * K - 1; 339 340 val = (k0 - K + s) ^ s; 341 norm += val * val; 342 *y++ = val; 343 344 /* N==1 */ 345 s = -i; 346 val = (K + s) ^ s; 347 norm += val * val; 348 *y = val; 349 350 return norm; 351} 352 353static inline void celt_encode_pulses(OpusRangeCoder *rc, int *y, uint32_t N, uint32_t K) 354{ 355 ff_opus_rc_enc_uint(rc, celt_icwrsi(N, K, y), CELT_PVQ_V(N, K)); 356} 357 358static inline float celt_decode_pulses(OpusRangeCoder *rc, int *y, uint32_t N, uint32_t K) 359{ 360 const uint32_t idx = ff_opus_rc_dec_uint(rc, CELT_PVQ_V(N, K)); 361 return celt_cwrsi(N, K, idx, y); 362} 363 364/* 365 * Faster than libopus's search, operates entirely in the signed domain. 366 * Slightly worse/better depending on N, K and the input vector. 367 */ 368static float ppp_pvq_search_c(float *X, int *y, int K, int N) 369{ 370 int i, y_norm = 0; 371 float res = 0.0f, xy_norm = 0.0f; 372 373 for (i = 0; i < N; i++) 374 res += FFABS(X[i]); 375 376 res = K/(res + FLT_EPSILON); 377 378 for (i = 0; i < N; i++) { 379 y[i] = lrintf(res*X[i]); 380 y_norm += y[i]*y[i]; 381 xy_norm += y[i]*X[i]; 382 K -= FFABS(y[i]); 383 } 384 385 while (K) { 386 int max_idx = 0, phase = FFSIGN(K); 387 float max_num = 0.0f; 388 float max_den = 1.0f; 389 y_norm += 1.0f; 390 391 for (i = 0; i < N; i++) { 392 /* If the sum has been overshot and the best place has 0 pulses allocated 393 * to it, attempting to decrease it further will actually increase the 394 * sum. Prevent this by disregarding any 0 positions when decrementing. */ 395 const int ca = 1 ^ ((y[i] == 0) & (phase < 0)); 396 const int y_new = y_norm + 2*phase*FFABS(y[i]); 397 float xy_new = xy_norm + 1*phase*FFABS(X[i]); 398 xy_new = xy_new * xy_new; 399 if (ca && (max_den*xy_new) > (y_new*max_num)) { 400 max_den = y_new; 401 max_num = xy_new; 402 max_idx = i; 403 } 404 } 405 406 K -= phase; 407 408 phase *= FFSIGN(X[max_idx]); 409 xy_norm += 1*phase*X[max_idx]; 410 y_norm += 2*phase*y[max_idx]; 411 y[max_idx] += phase; 412 } 413 414 return (float)y_norm; 415} 416 417static uint32_t celt_alg_quant(OpusRangeCoder *rc, float *X, uint32_t N, uint32_t K, 418 enum CeltSpread spread, uint32_t blocks, float gain, 419 CeltPVQ *pvq) 420{ 421 int *y = pvq->qcoeff; 422 423 celt_exp_rotation(X, N, blocks, K, spread, 1); 424 gain /= sqrtf(pvq->pvq_search(X, y, K, N)); 425 celt_encode_pulses(rc, y, N, K); 426 celt_normalize_residual(y, X, N, gain); 427 celt_exp_rotation(X, N, blocks, K, spread, 0); 428 return celt_extract_collapse_mask(y, N, blocks); 429} 430 431/** Decode pulse vector and combine the result with the pitch vector to produce 432 the final normalised signal in the current band. */ 433static uint32_t celt_alg_unquant(OpusRangeCoder *rc, float *X, uint32_t N, uint32_t K, 434 enum CeltSpread spread, uint32_t blocks, float gain, 435 CeltPVQ *pvq) 436{ 437 int *y = pvq->qcoeff; 438 439 gain /= sqrtf(celt_decode_pulses(rc, y, N, K)); 440 celt_normalize_residual(y, X, N, gain); 441 celt_exp_rotation(X, N, blocks, K, spread, 0); 442 return celt_extract_collapse_mask(y, N, blocks); 443} 444 445static int celt_calc_theta(const float *X, const float *Y, int coupling, int N) 446{ 447 int i; 448 float e[2] = { 0.0f, 0.0f }; 449 if (coupling) { /* Coupling case */ 450 for (i = 0; i < N; i++) { 451 e[0] += (X[i] + Y[i])*(X[i] + Y[i]); 452 e[1] += (X[i] - Y[i])*(X[i] - Y[i]); 453 } 454 } else { 455 for (i = 0; i < N; i++) { 456 e[0] += X[i]*X[i]; 457 e[1] += Y[i]*Y[i]; 458 } 459 } 460 return lrintf(32768.0f*atan2f(sqrtf(e[1]), sqrtf(e[0]))/M_PI); 461} 462 463static void celt_stereo_is_decouple(float *X, float *Y, float e_l, float e_r, int N) 464{ 465 int i; 466 const float energy_n = 1.0f/(sqrtf(e_l*e_l + e_r*e_r) + FLT_EPSILON); 467 e_l *= energy_n; 468 e_r *= energy_n; 469 for (i = 0; i < N; i++) 470 X[i] = e_l*X[i] + e_r*Y[i]; 471} 472 473static void celt_stereo_ms_decouple(float *X, float *Y, int N) 474{ 475 int i; 476 for (i = 0; i < N; i++) { 477 const float Xret = X[i]; 478 X[i] = (X[i] + Y[i])*M_SQRT1_2; 479 Y[i] = (Y[i] - Xret)*M_SQRT1_2; 480 } 481} 482 483static av_always_inline uint32_t quant_band_template(CeltPVQ *pvq, CeltFrame *f, 484 OpusRangeCoder *rc, 485 const int band, float *X, 486 float *Y, int N, int b, 487 uint32_t blocks, float *lowband, 488 int duration, float *lowband_out, 489 int level, float gain, 490 float *lowband_scratch, 491 int fill, int quant) 492{ 493 int i; 494 const uint8_t *cache; 495 int stereo = !!Y, split = stereo; 496 int imid = 0, iside = 0; 497 uint32_t N0 = N; 498 int N_B = N / blocks; 499 int N_B0 = N_B; 500 int B0 = blocks; 501 int time_divide = 0; 502 int recombine = 0; 503 int inv = 0; 504 float mid = 0, side = 0; 505 int longblocks = (B0 == 1); 506 uint32_t cm = 0; 507 508 if (N == 1) { 509 float *x = X; 510 for (i = 0; i <= stereo; i++) { 511 int sign = 0; 512 if (f->remaining2 >= 1 << 3) { 513 if (quant) { 514 sign = x[0] < 0; 515 ff_opus_rc_put_raw(rc, sign, 1); 516 } else { 517 sign = ff_opus_rc_get_raw(rc, 1); 518 } 519 f->remaining2 -= 1 << 3; 520 } 521 x[0] = 1.0f - 2.0f*sign; 522 x = Y; 523 } 524 if (lowband_out) 525 lowband_out[0] = X[0]; 526 return 1; 527 } 528 529 if (!stereo && level == 0) { 530 int tf_change = f->tf_change[band]; 531 int k; 532 if (tf_change > 0) 533 recombine = tf_change; 534 /* Band recombining to increase frequency resolution */ 535 536 if (lowband && 537 (recombine || ((N_B & 1) == 0 && tf_change < 0) || B0 > 1)) { 538 for (i = 0; i < N; i++) 539 lowband_scratch[i] = lowband[i]; 540 lowband = lowband_scratch; 541 } 542 543 for (k = 0; k < recombine; k++) { 544 if (quant || lowband) 545 celt_haar1(quant ? X : lowband, N >> k, 1 << k); 546 fill = ff_celt_bit_interleave[fill & 0xF] | ff_celt_bit_interleave[fill >> 4] << 2; 547 } 548 blocks >>= recombine; 549 N_B <<= recombine; 550 551 /* Increasing the time resolution */ 552 while ((N_B & 1) == 0 && tf_change < 0) { 553 if (quant || lowband) 554 celt_haar1(quant ? X : lowband, N_B, blocks); 555 fill |= fill << blocks; 556 blocks <<= 1; 557 N_B >>= 1; 558 time_divide++; 559 tf_change++; 560 } 561 B0 = blocks; 562 N_B0 = N_B; 563 564 /* Reorganize the samples in time order instead of frequency order */ 565 if (B0 > 1 && (quant || lowband)) 566 celt_deinterleave_hadamard(pvq->hadamard_tmp, quant ? X : lowband, 567 N_B >> recombine, B0 << recombine, 568 longblocks); 569 } 570 571 /* If we need 1.5 more bit than we can produce, split the band in two. */ 572 cache = ff_celt_cache_bits + 573 ff_celt_cache_index[(duration + 1) * CELT_MAX_BANDS + band]; 574 if (!stereo && duration >= 0 && b > cache[cache[0]] + 12 && N > 2) { 575 N >>= 1; 576 Y = X + N; 577 split = 1; 578 duration -= 1; 579 if (blocks == 1) 580 fill = (fill & 1) | (fill << 1); 581 blocks = (blocks + 1) >> 1; 582 } 583 584 if (split) { 585 int qn; 586 int itheta = quant ? celt_calc_theta(X, Y, stereo, N) : 0; 587 int mbits, sbits, delta; 588 int qalloc; 589 int pulse_cap; 590 int offset; 591 int orig_fill; 592 int tell; 593 594 /* Decide on the resolution to give to the split parameter theta */ 595 pulse_cap = ff_celt_log_freq_range[band] + duration * 8; 596 offset = (pulse_cap >> 1) - (stereo && N == 2 ? CELT_QTHETA_OFFSET_TWOPHASE : 597 CELT_QTHETA_OFFSET); 598 qn = (stereo && band >= f->intensity_stereo) ? 1 : 599 celt_compute_qn(N, b, offset, pulse_cap, stereo); 600 tell = opus_rc_tell_frac(rc); 601 if (qn != 1) { 602 if (quant) 603 itheta = (itheta*qn + 8192) >> 14; 604 /* Entropy coding of the angle. We use a uniform pdf for the 605 * time split, a step for stereo, and a triangular one for the rest. */ 606 if (quant) { 607 if (stereo && N > 2) 608 ff_opus_rc_enc_uint_step(rc, itheta, qn / 2); 609 else if (stereo || B0 > 1) 610 ff_opus_rc_enc_uint(rc, itheta, qn + 1); 611 else 612 ff_opus_rc_enc_uint_tri(rc, itheta, qn); 613 itheta = itheta * 16384 / qn; 614 if (stereo) { 615 if (itheta == 0) 616 celt_stereo_is_decouple(X, Y, f->block[0].lin_energy[band], 617 f->block[1].lin_energy[band], N); 618 else 619 celt_stereo_ms_decouple(X, Y, N); 620 } 621 } else { 622 if (stereo && N > 2) 623 itheta = ff_opus_rc_dec_uint_step(rc, qn / 2); 624 else if (stereo || B0 > 1) 625 itheta = ff_opus_rc_dec_uint(rc, qn+1); 626 else 627 itheta = ff_opus_rc_dec_uint_tri(rc, qn); 628 itheta = itheta * 16384 / qn; 629 } 630 } else if (stereo) { 631 if (quant) { 632 inv = f->apply_phase_inv ? itheta > 8192 : 0; 633 if (inv) { 634 for (i = 0; i < N; i++) 635 Y[i] *= -1; 636 } 637 celt_stereo_is_decouple(X, Y, f->block[0].lin_energy[band], 638 f->block[1].lin_energy[band], N); 639 640 if (b > 2 << 3 && f->remaining2 > 2 << 3) { 641 ff_opus_rc_enc_log(rc, inv, 2); 642 } else { 643 inv = 0; 644 } 645 } else { 646 inv = (b > 2 << 3 && f->remaining2 > 2 << 3) ? ff_opus_rc_dec_log(rc, 2) : 0; 647 inv = f->apply_phase_inv ? inv : 0; 648 } 649 itheta = 0; 650 } 651 qalloc = opus_rc_tell_frac(rc) - tell; 652 b -= qalloc; 653 654 orig_fill = fill; 655 if (itheta == 0) { 656 imid = 32767; 657 iside = 0; 658 fill = av_mod_uintp2(fill, blocks); 659 delta = -16384; 660 } else if (itheta == 16384) { 661 imid = 0; 662 iside = 32767; 663 fill &= ((1 << blocks) - 1) << blocks; 664 delta = 16384; 665 } else { 666 imid = celt_cos(itheta); 667 iside = celt_cos(16384-itheta); 668 /* This is the mid vs side allocation that minimizes squared error 669 in that band. */ 670 delta = ROUND_MUL16((N - 1) << 7, celt_log2tan(iside, imid)); 671 } 672 673 mid = imid / 32768.0f; 674 side = iside / 32768.0f; 675 676 /* This is a special case for N=2 that only works for stereo and takes 677 advantage of the fact that mid and side are orthogonal to encode 678 the side with just one bit. */ 679 if (N == 2 && stereo) { 680 int c; 681 int sign = 0; 682 float tmp; 683 float *x2, *y2; 684 mbits = b; 685 /* Only need one bit for the side */ 686 sbits = (itheta != 0 && itheta != 16384) ? 1 << 3 : 0; 687 mbits -= sbits; 688 c = (itheta > 8192); 689 f->remaining2 -= qalloc+sbits; 690 691 x2 = c ? Y : X; 692 y2 = c ? X : Y; 693 if (sbits) { 694 if (quant) { 695 sign = x2[0]*y2[1] - x2[1]*y2[0] < 0; 696 ff_opus_rc_put_raw(rc, sign, 1); 697 } else { 698 sign = ff_opus_rc_get_raw(rc, 1); 699 } 700 } 701 sign = 1 - 2 * sign; 702 /* We use orig_fill here because we want to fold the side, but if 703 itheta==16384, we'll have cleared the low bits of fill. */ 704 cm = pvq->quant_band(pvq, f, rc, band, x2, NULL, N, mbits, blocks, lowband, duration, 705 lowband_out, level, gain, lowband_scratch, orig_fill); 706 /* We don't split N=2 bands, so cm is either 1 or 0 (for a fold-collapse), 707 and there's no need to worry about mixing with the other channel. */ 708 y2[0] = -sign * x2[1]; 709 y2[1] = sign * x2[0]; 710 X[0] *= mid; 711 X[1] *= mid; 712 Y[0] *= side; 713 Y[1] *= side; 714 tmp = X[0]; 715 X[0] = tmp - Y[0]; 716 Y[0] = tmp + Y[0]; 717 tmp = X[1]; 718 X[1] = tmp - Y[1]; 719 Y[1] = tmp + Y[1]; 720 } else { 721 /* "Normal" split code */ 722 float *next_lowband2 = NULL; 723 float *next_lowband_out1 = NULL; 724 int next_level = 0; 725 int rebalance; 726 uint32_t cmt; 727 728 /* Give more bits to low-energy MDCTs than they would 729 * otherwise deserve */ 730 if (B0 > 1 && !stereo && (itheta & 0x3fff)) { 731 if (itheta > 8192) 732 /* Rough approximation for pre-echo masking */ 733 delta -= delta >> (4 - duration); 734 else 735 /* Corresponds to a forward-masking slope of 736 * 1.5 dB per 10 ms */ 737 delta = FFMIN(0, delta + (N << 3 >> (5 - duration))); 738 } 739 mbits = av_clip((b - delta) / 2, 0, b); 740 sbits = b - mbits; 741 f->remaining2 -= qalloc; 742 743 if (lowband && !stereo) 744 next_lowband2 = lowband + N; /* >32-bit split case */ 745 746 /* Only stereo needs to pass on lowband_out. 747 * Otherwise, it's handled at the end */ 748 if (stereo) 749 next_lowband_out1 = lowband_out; 750 else 751 next_level = level + 1; 752 753 rebalance = f->remaining2; 754 if (mbits >= sbits) { 755 /* In stereo mode, we do not apply a scaling to the mid 756 * because we need the normalized mid for folding later */ 757 cm = pvq->quant_band(pvq, f, rc, band, X, NULL, N, mbits, blocks, 758 lowband, duration, next_lowband_out1, next_level, 759 stereo ? 1.0f : (gain * mid), lowband_scratch, fill); 760 rebalance = mbits - (rebalance - f->remaining2); 761 if (rebalance > 3 << 3 && itheta != 0) 762 sbits += rebalance - (3 << 3); 763 764 /* For a stereo split, the high bits of fill are always zero, 765 * so no folding will be done to the side. */ 766 cmt = pvq->quant_band(pvq, f, rc, band, Y, NULL, N, sbits, blocks, 767 next_lowband2, duration, NULL, next_level, 768 gain * side, NULL, fill >> blocks); 769 cm |= cmt << ((B0 >> 1) & (stereo - 1)); 770 } else { 771 /* For a stereo split, the high bits of fill are always zero, 772 * so no folding will be done to the side. */ 773 cm = pvq->quant_band(pvq, f, rc, band, Y, NULL, N, sbits, blocks, 774 next_lowband2, duration, NULL, next_level, 775 gain * side, NULL, fill >> blocks); 776 cm <<= ((B0 >> 1) & (stereo - 1)); 777 rebalance = sbits - (rebalance - f->remaining2); 778 if (rebalance > 3 << 3 && itheta != 16384) 779 mbits += rebalance - (3 << 3); 780 781 /* In stereo mode, we do not apply a scaling to the mid because 782 * we need the normalized mid for folding later */ 783 cm |= pvq->quant_band(pvq, f, rc, band, X, NULL, N, mbits, blocks, 784 lowband, duration, next_lowband_out1, next_level, 785 stereo ? 1.0f : (gain * mid), lowband_scratch, fill); 786 } 787 } 788 } else { 789 /* This is the basic no-split case */ 790 uint32_t q = celt_bits2pulses(cache, b); 791 uint32_t curr_bits = celt_pulses2bits(cache, q); 792 f->remaining2 -= curr_bits; 793 794 /* Ensures we can never bust the budget */ 795 while (f->remaining2 < 0 && q > 0) { 796 f->remaining2 += curr_bits; 797 curr_bits = celt_pulses2bits(cache, --q); 798 f->remaining2 -= curr_bits; 799 } 800 801 if (q != 0) { 802 /* Finally do the actual (de)quantization */ 803 if (quant) { 804 cm = celt_alg_quant(rc, X, N, (q < 8) ? q : (8 + (q & 7)) << ((q >> 3) - 1), 805 f->spread, blocks, gain, pvq); 806 } else { 807 cm = celt_alg_unquant(rc, X, N, (q < 8) ? q : (8 + (q & 7)) << ((q >> 3) - 1), 808 f->spread, blocks, gain, pvq); 809 } 810 } else { 811 /* If there's no pulse, fill the band anyway */ 812 uint32_t cm_mask = (1 << blocks) - 1; 813 fill &= cm_mask; 814 if (fill) { 815 if (!lowband) { 816 /* Noise */ 817 for (i = 0; i < N; i++) 818 X[i] = (((int32_t)celt_rng(f)) >> 20); 819 cm = cm_mask; 820 } else { 821 /* Folded spectrum */ 822 for (i = 0; i < N; i++) { 823 /* About 48 dB below the "normal" folding level */ 824 X[i] = lowband[i] + (((celt_rng(f)) & 0x8000) ? 1.0f / 256 : -1.0f / 256); 825 } 826 cm = fill; 827 } 828 celt_renormalize_vector(X, N, gain); 829 } else { 830 memset(X, 0, N*sizeof(float)); 831 } 832 } 833 } 834 835 /* This code is used by the decoder and by the resynthesis-enabled encoder */ 836 if (stereo) { 837 if (N > 2) 838 celt_stereo_merge(X, Y, mid, N); 839 if (inv) { 840 for (i = 0; i < N; i++) 841 Y[i] *= -1; 842 } 843 } else if (level == 0) { 844 int k; 845 846 /* Undo the sample reorganization going from time order to frequency order */ 847 if (B0 > 1) 848 celt_interleave_hadamard(pvq->hadamard_tmp, X, N_B >> recombine, 849 B0 << recombine, longblocks); 850 851 /* Undo time-freq changes that we did earlier */ 852 N_B = N_B0; 853 blocks = B0; 854 for (k = 0; k < time_divide; k++) { 855 blocks >>= 1; 856 N_B <<= 1; 857 cm |= cm >> blocks; 858 celt_haar1(X, N_B, blocks); 859 } 860 861 for (k = 0; k < recombine; k++) { 862 cm = ff_celt_bit_deinterleave[cm]; 863 celt_haar1(X, N0>>k, 1<<k); 864 } 865 blocks <<= recombine; 866 867 /* Scale output for later folding */ 868 if (lowband_out) { 869 float n = sqrtf(N0); 870 for (i = 0; i < N0; i++) 871 lowband_out[i] = n * X[i]; 872 } 873 cm = av_mod_uintp2(cm, blocks); 874 } 875 876 return cm; 877} 878 879static QUANT_FN(pvq_decode_band) 880{ 881#if CONFIG_OPUS_DECODER 882 return quant_band_template(pvq, f, rc, band, X, Y, N, b, blocks, lowband, duration, 883 lowband_out, level, gain, lowband_scratch, fill, 0); 884#else 885 return 0; 886#endif 887} 888 889static QUANT_FN(pvq_encode_band) 890{ 891#if CONFIG_OPUS_ENCODER 892 return quant_band_template(pvq, f, rc, band, X, Y, N, b, blocks, lowband, duration, 893 lowband_out, level, gain, lowband_scratch, fill, 1); 894#else 895 return 0; 896#endif 897} 898 899int av_cold ff_celt_pvq_init(CeltPVQ **pvq, int encode) 900{ 901 CeltPVQ *s = av_malloc(sizeof(CeltPVQ)); 902 if (!s) 903 return AVERROR(ENOMEM); 904 905 s->pvq_search = ppp_pvq_search_c; 906 s->quant_band = encode ? pvq_encode_band : pvq_decode_band; 907 908#if CONFIG_OPUS_ENCODER && ARCH_X86 909 ff_celt_pvq_init_x86(s); 910#endif 911 912 *pvq = s; 913 914 return 0; 915} 916 917void av_cold ff_celt_pvq_uninit(CeltPVQ **pvq) 918{ 919 av_freep(pvq); 920} 921