1/* 2 * Copyright (c) 2012 Andrew D'Addesio 3 * Copyright (c) 2013-2014 Mozilla Corporation 4 * 5 * This file is part of FFmpeg. 6 * 7 * FFmpeg is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU Lesser General Public 9 * License as published by the Free Software Foundation; either 10 * version 2.1 of the License, or (at your option) any later version. 11 * 12 * FFmpeg is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Lesser General Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General Public 18 * License along with FFmpeg; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20 */ 21 22/** 23 * @file 24 * Opus SILK decoder 25 */ 26 27#include <stdint.h> 28 29#include "opus.h" 30#include "opustab.h" 31 32typedef struct SilkFrame { 33 int coded; 34 int log_gain; 35 int16_t nlsf[16]; 36 float lpc[16]; 37 38 float output [2 * SILK_HISTORY]; 39 float lpc_history[2 * SILK_HISTORY]; 40 int primarylag; 41 42 int prev_voiced; 43} SilkFrame; 44 45struct SilkContext { 46 AVCodecContext *avctx; 47 int output_channels; 48 49 int midonly; 50 int subframes; 51 int sflength; 52 int flength; 53 int nlsf_interp_factor; 54 55 enum OpusBandwidth bandwidth; 56 int wb; 57 58 SilkFrame frame[2]; 59 float prev_stereo_weights[2]; 60 float stereo_weights[2]; 61 62 int prev_coded_channels; 63}; 64 65static inline void silk_stabilize_lsf(int16_t nlsf[16], int order, const uint16_t min_delta[17]) 66{ 67 int pass, i; 68 for (pass = 0; pass < 20; pass++) { 69 int k, min_diff = 0; 70 for (i = 0; i < order+1; i++) { 71 int low = i != 0 ? nlsf[i-1] : 0; 72 int high = i != order ? nlsf[i] : 32768; 73 int diff = (high - low) - (min_delta[i]); 74 75 if (diff < min_diff) { 76 min_diff = diff; 77 k = i; 78 79 if (pass == 20) 80 break; 81 } 82 } 83 if (min_diff == 0) /* no issues; stabilized */ 84 return; 85 86 /* wiggle one or two LSFs */ 87 if (k == 0) { 88 /* repel away from lower bound */ 89 nlsf[0] = min_delta[0]; 90 } else if (k == order) { 91 /* repel away from higher bound */ 92 nlsf[order-1] = 32768 - min_delta[order]; 93 } else { 94 /* repel away from current position */ 95 int min_center = 0, max_center = 32768, center_val; 96 97 /* lower extent */ 98 for (i = 0; i < k; i++) 99 min_center += min_delta[i]; 100 min_center += min_delta[k] >> 1; 101 102 /* upper extent */ 103 for (i = order; i > k; i--) 104 max_center -= min_delta[i]; 105 max_center -= min_delta[k] >> 1; 106 107 /* move apart */ 108 center_val = nlsf[k - 1] + nlsf[k]; 109 center_val = (center_val >> 1) + (center_val & 1); // rounded divide by 2 110 center_val = FFMIN(max_center, FFMAX(min_center, center_val)); 111 112 nlsf[k - 1] = center_val - (min_delta[k] >> 1); 113 nlsf[k] = nlsf[k - 1] + min_delta[k]; 114 } 115 } 116 117 /* resort to the fall-back method, the standard method for LSF stabilization */ 118 119 /* sort; as the LSFs should be nearly sorted, use insertion sort */ 120 for (i = 1; i < order; i++) { 121 int j, value = nlsf[i]; 122 for (j = i - 1; j >= 0 && nlsf[j] > value; j--) 123 nlsf[j + 1] = nlsf[j]; 124 nlsf[j + 1] = value; 125 } 126 127 /* push forwards to increase distance */ 128 if (nlsf[0] < min_delta[0]) 129 nlsf[0] = min_delta[0]; 130 for (i = 1; i < order; i++) 131 nlsf[i] = FFMAX(nlsf[i], FFMIN(nlsf[i - 1] + min_delta[i], 32767)); 132 133 /* push backwards to increase distance */ 134 if (nlsf[order-1] > 32768 - min_delta[order]) 135 nlsf[order-1] = 32768 - min_delta[order]; 136 for (i = order-2; i >= 0; i--) 137 if (nlsf[i] > nlsf[i + 1] - min_delta[i+1]) 138 nlsf[i] = nlsf[i + 1] - min_delta[i+1]; 139 140 return; 141} 142 143static inline int silk_is_lpc_stable(const int16_t lpc[16], int order) 144{ 145 int k, j, DC_resp = 0; 146 int32_t lpc32[2][16]; // Q24 147 int totalinvgain = 1 << 30; // 1.0 in Q30 148 int32_t *row = lpc32[0], *prevrow; 149 150 /* initialize the first row for the Levinson recursion */ 151 for (k = 0; k < order; k++) { 152 DC_resp += lpc[k]; 153 row[k] = lpc[k] * 4096; 154 } 155 156 if (DC_resp >= 4096) 157 return 0; 158 159 /* check if prediction gain pushes any coefficients too far */ 160 for (k = order - 1; 1; k--) { 161 int rc; // Q31; reflection coefficient 162 int gaindiv; // Q30; inverse of the gain (the divisor) 163 int gain; // gain for this reflection coefficient 164 int fbits; // fractional bits used for the gain 165 int error; // Q29; estimate of the error of our partial estimate of 1/gaindiv 166 167 if (FFABS(row[k]) > 16773022) 168 return 0; 169 170 rc = -(row[k] * 128); 171 gaindiv = (1 << 30) - MULH(rc, rc); 172 173 totalinvgain = MULH(totalinvgain, gaindiv) << 2; 174 if (k == 0) 175 return (totalinvgain >= 107374); 176 177 /* approximate 1.0/gaindiv */ 178 fbits = opus_ilog(gaindiv); 179 gain = ((1 << 29) - 1) / (gaindiv >> (fbits + 1 - 16)); // Q<fbits-16> 180 error = (1 << 29) - MULL(gaindiv << (15 + 16 - fbits), gain, 16); 181 gain = ((gain << 16) + (error * gain >> 13)); 182 183 /* switch to the next row of the LPC coefficients */ 184 prevrow = row; 185 row = lpc32[k & 1]; 186 187 for (j = 0; j < k; j++) { 188 int x = av_sat_sub32(prevrow[j], ROUND_MULL(prevrow[k - j - 1], rc, 31)); 189 int64_t tmp = ROUND_MULL(x, gain, fbits); 190 191 /* per RFC 8251 section 6, if this calculation overflows, the filter 192 is considered unstable. */ 193 if (tmp < INT32_MIN || tmp > INT32_MAX) 194 return 0; 195 196 row[j] = (int32_t)tmp; 197 } 198 } 199} 200 201static void silk_lsp2poly(const int32_t lsp[/* 2 * half_order - 1 */], 202 int32_t pol[/* half_order + 1 */], int half_order) 203{ 204 int i, j; 205 206 pol[0] = 65536; // 1.0 in Q16 207 pol[1] = -lsp[0]; 208 209 for (i = 1; i < half_order; i++) { 210 pol[i + 1] = pol[i - 1] * 2 - ROUND_MULL(lsp[2 * i], pol[i], 16); 211 for (j = i; j > 1; j--) 212 pol[j] += pol[j - 2] - ROUND_MULL(lsp[2 * i], pol[j - 1], 16); 213 214 pol[1] -= lsp[2 * i]; 215 } 216} 217 218static void silk_lsf2lpc(const int16_t nlsf[16], float lpcf[16], int order) 219{ 220 int i, k; 221 int32_t lsp[16]; // Q17; 2*cos(LSF) 222 int32_t p[9], q[9]; // Q16 223 int32_t lpc32[16]; // Q17 224 int16_t lpc[16]; // Q12 225 226 /* convert the LSFs to LSPs, i.e. 2*cos(LSF) */ 227 for (k = 0; k < order; k++) { 228 int index = nlsf[k] >> 8; 229 int offset = nlsf[k] & 255; 230 int k2 = (order == 10) ? ff_silk_lsf_ordering_nbmb[k] : ff_silk_lsf_ordering_wb[k]; 231 232 /* interpolate and round */ 233 lsp[k2] = ff_silk_cosine[index] * 256; 234 lsp[k2] += (ff_silk_cosine[index + 1] - ff_silk_cosine[index]) * offset; 235 lsp[k2] = (lsp[k2] + 4) >> 3; 236 } 237 238 silk_lsp2poly(lsp , p, order >> 1); 239 silk_lsp2poly(lsp + 1, q, order >> 1); 240 241 /* reconstruct A(z) */ 242 for (k = 0; k < order>>1; k++) { 243 int32_t p_tmp = p[k + 1] + p[k]; 244 int32_t q_tmp = q[k + 1] - q[k]; 245 lpc32[k] = -q_tmp - p_tmp; 246 lpc32[order-k-1] = q_tmp - p_tmp; 247 } 248 249 /* limit the range of the LPC coefficients to each fit within an int16_t */ 250 for (i = 0; i < 10; i++) { 251 int j; 252 unsigned int maxabs = 0; 253 for (j = 0, k = 0; j < order; j++) { 254 unsigned int x = FFABS(lpc32[k]); 255 if (x > maxabs) { 256 maxabs = x; // Q17 257 k = j; 258 } 259 } 260 261 maxabs = (maxabs + 16) >> 5; // convert to Q12 262 263 if (maxabs > 32767) { 264 /* perform bandwidth expansion */ 265 unsigned int chirp, chirp_base; // Q16 266 maxabs = FFMIN(maxabs, 163838); // anything above this overflows chirp's numerator 267 chirp_base = chirp = 65470 - ((maxabs - 32767) << 14) / ((maxabs * (k+1)) >> 2); 268 269 for (k = 0; k < order; k++) { 270 lpc32[k] = ROUND_MULL(lpc32[k], chirp, 16); 271 chirp = (chirp_base * chirp + 32768) >> 16; 272 } 273 } else break; 274 } 275 276 if (i == 10) { 277 /* time's up: just clamp */ 278 for (k = 0; k < order; k++) { 279 int x = (lpc32[k] + 16) >> 5; 280 lpc[k] = av_clip_int16(x); 281 lpc32[k] = lpc[k] << 5; // shortcut mandated by the spec; drops lower 5 bits 282 } 283 } else { 284 for (k = 0; k < order; k++) 285 lpc[k] = (lpc32[k] + 16) >> 5; 286 } 287 288 /* if the prediction gain causes the LPC filter to become unstable, 289 apply further bandwidth expansion on the Q17 coefficients */ 290 for (i = 1; i <= 16 && !silk_is_lpc_stable(lpc, order); i++) { 291 unsigned int chirp, chirp_base; 292 chirp_base = chirp = 65536 - (1 << i); 293 294 for (k = 0; k < order; k++) { 295 lpc32[k] = ROUND_MULL(lpc32[k], chirp, 16); 296 lpc[k] = (lpc32[k] + 16) >> 5; 297 chirp = (chirp_base * chirp + 32768) >> 16; 298 } 299 } 300 301 for (i = 0; i < order; i++) 302 lpcf[i] = lpc[i] / 4096.0f; 303} 304 305static inline void silk_decode_lpc(SilkContext *s, SilkFrame *frame, 306 OpusRangeCoder *rc, 307 float lpc_leadin[16], float lpc[16], 308 int *lpc_order, int *has_lpc_leadin, int voiced) 309{ 310 int i; 311 int order; // order of the LP polynomial; 10 for NB/MB and 16 for WB 312 int8_t lsf_i1, lsf_i2[16]; // stage-1 and stage-2 codebook indices 313 int16_t lsf_res[16]; // residual as a Q10 value 314 int16_t nlsf[16]; // Q15 315 316 *lpc_order = order = s->wb ? 16 : 10; 317 318 /* obtain LSF stage-1 and stage-2 indices */ 319 lsf_i1 = ff_opus_rc_dec_cdf(rc, ff_silk_model_lsf_s1[s->wb][voiced]); 320 for (i = 0; i < order; i++) { 321 int index = s->wb ? ff_silk_lsf_s2_model_sel_wb [lsf_i1][i] : 322 ff_silk_lsf_s2_model_sel_nbmb[lsf_i1][i]; 323 lsf_i2[i] = ff_opus_rc_dec_cdf(rc, ff_silk_model_lsf_s2[index]) - 4; 324 if (lsf_i2[i] == -4) 325 lsf_i2[i] -= ff_opus_rc_dec_cdf(rc, ff_silk_model_lsf_s2_ext); 326 else if (lsf_i2[i] == 4) 327 lsf_i2[i] += ff_opus_rc_dec_cdf(rc, ff_silk_model_lsf_s2_ext); 328 } 329 330 /* reverse the backwards-prediction step */ 331 for (i = order - 1; i >= 0; i--) { 332 int qstep = s->wb ? 9830 : 11796; 333 334 lsf_res[i] = lsf_i2[i] * 1024; 335 if (lsf_i2[i] < 0) lsf_res[i] += 102; 336 else if (lsf_i2[i] > 0) lsf_res[i] -= 102; 337 lsf_res[i] = (lsf_res[i] * qstep) >> 16; 338 339 if (i + 1 < order) { 340 int weight = s->wb ? ff_silk_lsf_pred_weights_wb [ff_silk_lsf_weight_sel_wb [lsf_i1][i]][i] : 341 ff_silk_lsf_pred_weights_nbmb[ff_silk_lsf_weight_sel_nbmb[lsf_i1][i]][i]; 342 lsf_res[i] += (lsf_res[i+1] * weight) >> 8; 343 } 344 } 345 346 /* reconstruct the NLSF coefficients from the supplied indices */ 347 for (i = 0; i < order; i++) { 348 const uint8_t * codebook = s->wb ? ff_silk_lsf_codebook_wb [lsf_i1] : 349 ff_silk_lsf_codebook_nbmb[lsf_i1]; 350 int cur, prev, next, weight_sq, weight, ipart, fpart, y, value; 351 352 /* find the weight of the residual */ 353 /* TODO: precompute */ 354 cur = codebook[i]; 355 prev = i ? codebook[i - 1] : 0; 356 next = i + 1 < order ? codebook[i + 1] : 256; 357 weight_sq = (1024 / (cur - prev) + 1024 / (next - cur)) << 16; 358 359 /* approximate square-root with mandated fixed-point arithmetic */ 360 ipart = opus_ilog(weight_sq); 361 fpart = (weight_sq >> (ipart-8)) & 127; 362 y = ((ipart & 1) ? 32768 : 46214) >> ((32 - ipart)>>1); 363 weight = y + ((213 * fpart * y) >> 16); 364 365 value = cur * 128 + (lsf_res[i] * 16384) / weight; 366 nlsf[i] = av_clip_uintp2(value, 15); 367 } 368 369 /* stabilize the NLSF coefficients */ 370 silk_stabilize_lsf(nlsf, order, s->wb ? ff_silk_lsf_min_spacing_wb : 371 ff_silk_lsf_min_spacing_nbmb); 372 373 /* produce an interpolation for the first 2 subframes, */ 374 /* and then convert both sets of NLSFs to LPC coefficients */ 375 *has_lpc_leadin = 0; 376 if (s->subframes == 4) { 377 int offset = ff_opus_rc_dec_cdf(rc, ff_silk_model_lsf_interpolation_offset); 378 if (offset != 4 && frame->coded) { 379 *has_lpc_leadin = 1; 380 if (offset != 0) { 381 int16_t nlsf_leadin[16]; 382 for (i = 0; i < order; i++) 383 nlsf_leadin[i] = frame->nlsf[i] + 384 ((nlsf[i] - frame->nlsf[i]) * offset >> 2); 385 silk_lsf2lpc(nlsf_leadin, lpc_leadin, order); 386 } else /* avoid re-computation for a (roughly) 1-in-4 occurrence */ 387 memcpy(lpc_leadin, frame->lpc, 16 * sizeof(float)); 388 } else 389 offset = 4; 390 s->nlsf_interp_factor = offset; 391 392 silk_lsf2lpc(nlsf, lpc, order); 393 } else { 394 s->nlsf_interp_factor = 4; 395 silk_lsf2lpc(nlsf, lpc, order); 396 } 397 398 memcpy(frame->nlsf, nlsf, order * sizeof(nlsf[0])); 399 memcpy(frame->lpc, lpc, order * sizeof(lpc[0])); 400} 401 402static inline void silk_count_children(OpusRangeCoder *rc, int model, int32_t total, 403 int32_t child[2]) 404{ 405 if (total != 0) { 406 child[0] = ff_opus_rc_dec_cdf(rc, 407 ff_silk_model_pulse_location[model] + (((total - 1 + 5) * (total - 1)) >> 1)); 408 child[1] = total - child[0]; 409 } else { 410 child[0] = 0; 411 child[1] = 0; 412 } 413} 414 415static inline void silk_decode_excitation(SilkContext *s, OpusRangeCoder *rc, 416 float* excitationf, 417 int qoffset_high, int active, int voiced) 418{ 419 int i; 420 uint32_t seed; 421 int shellblocks; 422 int ratelevel; 423 uint8_t pulsecount[20]; // total pulses in each shell block 424 uint8_t lsbcount[20] = {0}; // raw lsbits defined for each pulse in each shell block 425 int32_t excitation[320]; // Q23 426 427 /* excitation parameters */ 428 seed = ff_opus_rc_dec_cdf(rc, ff_silk_model_lcg_seed); 429 shellblocks = ff_silk_shell_blocks[s->bandwidth][s->subframes >> 2]; 430 ratelevel = ff_opus_rc_dec_cdf(rc, ff_silk_model_exc_rate[voiced]); 431 432 for (i = 0; i < shellblocks; i++) { 433 pulsecount[i] = ff_opus_rc_dec_cdf(rc, ff_silk_model_pulse_count[ratelevel]); 434 if (pulsecount[i] == 17) { 435 while (pulsecount[i] == 17 && ++lsbcount[i] != 10) 436 pulsecount[i] = ff_opus_rc_dec_cdf(rc, ff_silk_model_pulse_count[9]); 437 if (lsbcount[i] == 10) 438 pulsecount[i] = ff_opus_rc_dec_cdf(rc, ff_silk_model_pulse_count[10]); 439 } 440 } 441 442 /* decode pulse locations using PVQ */ 443 for (i = 0; i < shellblocks; i++) { 444 if (pulsecount[i] != 0) { 445 int a, b, c, d; 446 int32_t * location = excitation + 16*i; 447 int32_t branch[4][2]; 448 branch[0][0] = pulsecount[i]; 449 450 /* unrolled tail recursion */ 451 for (a = 0; a < 1; a++) { 452 silk_count_children(rc, 0, branch[0][a], branch[1]); 453 for (b = 0; b < 2; b++) { 454 silk_count_children(rc, 1, branch[1][b], branch[2]); 455 for (c = 0; c < 2; c++) { 456 silk_count_children(rc, 2, branch[2][c], branch[3]); 457 for (d = 0; d < 2; d++) { 458 silk_count_children(rc, 3, branch[3][d], location); 459 location += 2; 460 } 461 } 462 } 463 } 464 } else 465 memset(excitation + 16*i, 0, 16*sizeof(int32_t)); 466 } 467 468 /* decode least significant bits */ 469 for (i = 0; i < shellblocks << 4; i++) { 470 int bit; 471 for (bit = 0; bit < lsbcount[i >> 4]; bit++) 472 excitation[i] = (excitation[i] << 1) | 473 ff_opus_rc_dec_cdf(rc, ff_silk_model_excitation_lsb); 474 } 475 476 /* decode signs */ 477 for (i = 0; i < shellblocks << 4; i++) { 478 if (excitation[i] != 0) { 479 int sign = ff_opus_rc_dec_cdf(rc, ff_silk_model_excitation_sign[active + 480 voiced][qoffset_high][FFMIN(pulsecount[i >> 4], 6)]); 481 if (sign == 0) 482 excitation[i] *= -1; 483 } 484 } 485 486 /* assemble the excitation */ 487 for (i = 0; i < shellblocks << 4; i++) { 488 int value = excitation[i]; 489 excitation[i] = value * 256 | ff_silk_quant_offset[voiced][qoffset_high]; 490 if (value < 0) excitation[i] += 20; 491 else if (value > 0) excitation[i] -= 20; 492 493 /* invert samples pseudorandomly */ 494 seed = 196314165 * seed + 907633515; 495 if (seed & 0x80000000) 496 excitation[i] *= -1; 497 seed += value; 498 499 excitationf[i] = excitation[i] / 8388608.0f; 500 } 501} 502 503/** Maximum residual history according to 4.2.7.6.1 */ 504#define SILK_MAX_LAG (288 + LTP_ORDER / 2) 505 506/** Order of the LTP filter */ 507#define LTP_ORDER 5 508 509static void silk_decode_frame(SilkContext *s, OpusRangeCoder *rc, 510 int frame_num, int channel, int coded_channels, 511 int active, int active1, int redundant) 512{ 513 /* per frame */ 514 int voiced; // combines with active to indicate inactive, active, or active+voiced 515 int qoffset_high; 516 int order; // order of the LPC coefficients 517 float lpc_leadin[16], lpc_body[16], residual[SILK_MAX_LAG + SILK_HISTORY]; 518 int has_lpc_leadin; 519 float ltpscale; 520 521 /* per subframe */ 522 struct { 523 float gain; 524 int pitchlag; 525 float ltptaps[5]; 526 } sf[4]; 527 528 SilkFrame * const frame = s->frame + channel; 529 530 int i; 531 532 /* obtain stereo weights */ 533 if (coded_channels == 2 && channel == 0) { 534 int n, wi[2], ws[2], w[2]; 535 n = ff_opus_rc_dec_cdf(rc, ff_silk_model_stereo_s1); 536 wi[0] = ff_opus_rc_dec_cdf(rc, ff_silk_model_stereo_s2) + 3 * (n / 5); 537 ws[0] = ff_opus_rc_dec_cdf(rc, ff_silk_model_stereo_s3); 538 wi[1] = ff_opus_rc_dec_cdf(rc, ff_silk_model_stereo_s2) + 3 * (n % 5); 539 ws[1] = ff_opus_rc_dec_cdf(rc, ff_silk_model_stereo_s3); 540 541 for (i = 0; i < 2; i++) 542 w[i] = ff_silk_stereo_weights[wi[i]] + 543 (((ff_silk_stereo_weights[wi[i] + 1] - ff_silk_stereo_weights[wi[i]]) * 6554) >> 16) 544 * (ws[i]*2 + 1); 545 546 s->stereo_weights[0] = (w[0] - w[1]) / 8192.0; 547 s->stereo_weights[1] = w[1] / 8192.0; 548 549 /* and read the mid-only flag */ 550 s->midonly = active1 ? 0 : ff_opus_rc_dec_cdf(rc, ff_silk_model_mid_only); 551 } 552 553 /* obtain frame type */ 554 if (!active) { 555 qoffset_high = ff_opus_rc_dec_cdf(rc, ff_silk_model_frame_type_inactive); 556 voiced = 0; 557 } else { 558 int type = ff_opus_rc_dec_cdf(rc, ff_silk_model_frame_type_active); 559 qoffset_high = type & 1; 560 voiced = type >> 1; 561 } 562 563 /* obtain subframe quantization gains */ 564 for (i = 0; i < s->subframes; i++) { 565 int log_gain; //Q7 566 int ipart, fpart, lingain; 567 568 if (i == 0 && (frame_num == 0 || !frame->coded)) { 569 /* gain is coded absolute */ 570 int x = ff_opus_rc_dec_cdf(rc, ff_silk_model_gain_highbits[active + voiced]); 571 log_gain = (x<<3) | ff_opus_rc_dec_cdf(rc, ff_silk_model_gain_lowbits); 572 573 if (frame->coded) 574 log_gain = FFMAX(log_gain, frame->log_gain - 16); 575 } else { 576 /* gain is coded relative */ 577 int delta_gain = ff_opus_rc_dec_cdf(rc, ff_silk_model_gain_delta); 578 log_gain = av_clip_uintp2(FFMAX((delta_gain<<1) - 16, 579 frame->log_gain + delta_gain - 4), 6); 580 } 581 582 frame->log_gain = log_gain; 583 584 /* approximate 2**(x/128) with a Q7 (i.e. non-integer) input */ 585 log_gain = (log_gain * 0x1D1C71 >> 16) + 2090; 586 ipart = log_gain >> 7; 587 fpart = log_gain & 127; 588 lingain = (1 << ipart) + ((-174 * fpart * (128-fpart) >>16) + fpart) * ((1<<ipart) >> 7); 589 sf[i].gain = lingain / 65536.0f; 590 } 591 592 /* obtain LPC filter coefficients */ 593 silk_decode_lpc(s, frame, rc, lpc_leadin, lpc_body, &order, &has_lpc_leadin, voiced); 594 595 /* obtain pitch lags, if this is a voiced frame */ 596 if (voiced) { 597 int lag_absolute = (!frame_num || !frame->prev_voiced); 598 int primarylag; // primary pitch lag for the entire SILK frame 599 int ltpfilter; 600 const int8_t * offsets; 601 602 if (!lag_absolute) { 603 int delta = ff_opus_rc_dec_cdf(rc, ff_silk_model_pitch_delta); 604 if (delta) 605 primarylag = frame->primarylag + delta - 9; 606 else 607 lag_absolute = 1; 608 } 609 610 if (lag_absolute) { 611 /* primary lag is coded absolute */ 612 int highbits, lowbits; 613 static const uint16_t * const model[] = { 614 ff_silk_model_pitch_lowbits_nb, ff_silk_model_pitch_lowbits_mb, 615 ff_silk_model_pitch_lowbits_wb 616 }; 617 highbits = ff_opus_rc_dec_cdf(rc, ff_silk_model_pitch_highbits); 618 lowbits = ff_opus_rc_dec_cdf(rc, model[s->bandwidth]); 619 620 primarylag = ff_silk_pitch_min_lag[s->bandwidth] + 621 highbits*ff_silk_pitch_scale[s->bandwidth] + lowbits; 622 } 623 frame->primarylag = primarylag; 624 625 if (s->subframes == 2) 626 offsets = (s->bandwidth == OPUS_BANDWIDTH_NARROWBAND) 627 ? ff_silk_pitch_offset_nb10ms[ff_opus_rc_dec_cdf(rc, 628 ff_silk_model_pitch_contour_nb10ms)] 629 : ff_silk_pitch_offset_mbwb10ms[ff_opus_rc_dec_cdf(rc, 630 ff_silk_model_pitch_contour_mbwb10ms)]; 631 else 632 offsets = (s->bandwidth == OPUS_BANDWIDTH_NARROWBAND) 633 ? ff_silk_pitch_offset_nb20ms[ff_opus_rc_dec_cdf(rc, 634 ff_silk_model_pitch_contour_nb20ms)] 635 : ff_silk_pitch_offset_mbwb20ms[ff_opus_rc_dec_cdf(rc, 636 ff_silk_model_pitch_contour_mbwb20ms)]; 637 638 for (i = 0; i < s->subframes; i++) 639 sf[i].pitchlag = av_clip(primarylag + offsets[i], 640 ff_silk_pitch_min_lag[s->bandwidth], 641 ff_silk_pitch_max_lag[s->bandwidth]); 642 643 /* obtain LTP filter coefficients */ 644 ltpfilter = ff_opus_rc_dec_cdf(rc, ff_silk_model_ltp_filter); 645 for (i = 0; i < s->subframes; i++) { 646 int index, j; 647 static const uint16_t * const filter_sel[] = { 648 ff_silk_model_ltp_filter0_sel, ff_silk_model_ltp_filter1_sel, 649 ff_silk_model_ltp_filter2_sel 650 }; 651 static const int8_t (* const filter_taps[])[5] = { 652 ff_silk_ltp_filter0_taps, ff_silk_ltp_filter1_taps, ff_silk_ltp_filter2_taps 653 }; 654 index = ff_opus_rc_dec_cdf(rc, filter_sel[ltpfilter]); 655 for (j = 0; j < 5; j++) 656 sf[i].ltptaps[j] = filter_taps[ltpfilter][index][j] / 128.0f; 657 } 658 } 659 660 /* obtain LTP scale factor */ 661 if (voiced && frame_num == 0) 662 ltpscale = ff_silk_ltp_scale_factor[ff_opus_rc_dec_cdf(rc, 663 ff_silk_model_ltp_scale_index)] / 16384.0f; 664 else ltpscale = 15565.0f/16384.0f; 665 666 /* generate the excitation signal for the entire frame */ 667 silk_decode_excitation(s, rc, residual + SILK_MAX_LAG, qoffset_high, 668 active, voiced); 669 670 /* skip synthesising the output if we do not need it */ 671 // TODO: implement error recovery 672 if (s->output_channels == channel || redundant) 673 return; 674 675 /* generate the output signal */ 676 for (i = 0; i < s->subframes; i++) { 677 const float * lpc_coeff = (i < 2 && has_lpc_leadin) ? lpc_leadin : lpc_body; 678 float *dst = frame->output + SILK_HISTORY + i * s->sflength; 679 float *resptr = residual + SILK_MAX_LAG + i * s->sflength; 680 float *lpc = frame->lpc_history + SILK_HISTORY + i * s->sflength; 681 float sum; 682 int j, k; 683 684 if (voiced) { 685 int out_end; 686 float scale; 687 688 if (i < 2 || s->nlsf_interp_factor == 4) { 689 out_end = -i * s->sflength; 690 scale = ltpscale; 691 } else { 692 out_end = -(i - 2) * s->sflength; 693 scale = 1.0f; 694 } 695 696 /* when the LPC coefficients change, a re-whitening filter is used */ 697 /* to produce a residual that accounts for the change */ 698 for (j = - sf[i].pitchlag - LTP_ORDER/2; j < out_end; j++) { 699 sum = dst[j]; 700 for (k = 0; k < order; k++) 701 sum -= lpc_coeff[k] * dst[j - k - 1]; 702 resptr[j] = av_clipf(sum, -1.0f, 1.0f) * scale / sf[i].gain; 703 } 704 705 if (out_end) { 706 float rescale = sf[i-1].gain / sf[i].gain; 707 for (j = out_end; j < 0; j++) 708 resptr[j] *= rescale; 709 } 710 711 /* LTP synthesis */ 712 for (j = 0; j < s->sflength; j++) { 713 sum = resptr[j]; 714 for (k = 0; k < LTP_ORDER; k++) 715 sum += sf[i].ltptaps[k] * resptr[j - sf[i].pitchlag + LTP_ORDER/2 - k]; 716 resptr[j] = sum; 717 } 718 } 719 720 /* LPC synthesis */ 721 for (j = 0; j < s->sflength; j++) { 722 sum = resptr[j] * sf[i].gain; 723 for (k = 1; k <= order; k++) 724 sum += lpc_coeff[k - 1] * lpc[j - k]; 725 726 lpc[j] = sum; 727 dst[j] = av_clipf(sum, -1.0f, 1.0f); 728 } 729 } 730 731 frame->prev_voiced = voiced; 732 memmove(frame->lpc_history, frame->lpc_history + s->flength, SILK_HISTORY * sizeof(float)); 733 memmove(frame->output, frame->output + s->flength, SILK_HISTORY * sizeof(float)); 734 735 frame->coded = 1; 736} 737 738static void silk_unmix_ms(SilkContext *s, float *l, float *r) 739{ 740 float *mid = s->frame[0].output + SILK_HISTORY - s->flength; 741 float *side = s->frame[1].output + SILK_HISTORY - s->flength; 742 float w0_prev = s->prev_stereo_weights[0]; 743 float w1_prev = s->prev_stereo_weights[1]; 744 float w0 = s->stereo_weights[0]; 745 float w1 = s->stereo_weights[1]; 746 int n1 = ff_silk_stereo_interp_len[s->bandwidth]; 747 int i; 748 749 for (i = 0; i < n1; i++) { 750 float interp0 = w0_prev + i * (w0 - w0_prev) / n1; 751 float interp1 = w1_prev + i * (w1 - w1_prev) / n1; 752 float p0 = 0.25 * (mid[i - 2] + 2 * mid[i - 1] + mid[i]); 753 754 l[i] = av_clipf((1 + interp1) * mid[i - 1] + side[i - 1] + interp0 * p0, -1.0, 1.0); 755 r[i] = av_clipf((1 - interp1) * mid[i - 1] - side[i - 1] - interp0 * p0, -1.0, 1.0); 756 } 757 758 for (; i < s->flength; i++) { 759 float p0 = 0.25 * (mid[i - 2] + 2 * mid[i - 1] + mid[i]); 760 761 l[i] = av_clipf((1 + w1) * mid[i - 1] + side[i - 1] + w0 * p0, -1.0, 1.0); 762 r[i] = av_clipf((1 - w1) * mid[i - 1] - side[i - 1] - w0 * p0, -1.0, 1.0); 763 } 764 765 memcpy(s->prev_stereo_weights, s->stereo_weights, sizeof(s->stereo_weights)); 766} 767 768static void silk_flush_frame(SilkFrame *frame) 769{ 770 if (!frame->coded) 771 return; 772 773 memset(frame->output, 0, sizeof(frame->output)); 774 memset(frame->lpc_history, 0, sizeof(frame->lpc_history)); 775 776 memset(frame->lpc, 0, sizeof(frame->lpc)); 777 memset(frame->nlsf, 0, sizeof(frame->nlsf)); 778 779 frame->log_gain = 0; 780 781 frame->primarylag = 0; 782 frame->prev_voiced = 0; 783 frame->coded = 0; 784} 785 786int ff_silk_decode_superframe(SilkContext *s, OpusRangeCoder *rc, 787 float *output[2], 788 enum OpusBandwidth bandwidth, 789 int coded_channels, 790 int duration_ms) 791{ 792 int active[2][6], redundancy[2]; 793 int nb_frames, i, j; 794 795 if (bandwidth > OPUS_BANDWIDTH_WIDEBAND || 796 coded_channels > 2 || duration_ms > 60) { 797 av_log(s->avctx, AV_LOG_ERROR, "Invalid parameters passed " 798 "to the SILK decoder.\n"); 799 return AVERROR(EINVAL); 800 } 801 802 nb_frames = 1 + (duration_ms > 20) + (duration_ms > 40); 803 s->subframes = duration_ms / nb_frames / 5; // 5ms subframes 804 s->sflength = 20 * (bandwidth + 2); 805 s->flength = s->sflength * s->subframes; 806 s->bandwidth = bandwidth; 807 s->wb = bandwidth == OPUS_BANDWIDTH_WIDEBAND; 808 809 /* make sure to flush the side channel when switching from mono to stereo */ 810 if (coded_channels > s->prev_coded_channels) 811 silk_flush_frame(&s->frame[1]); 812 s->prev_coded_channels = coded_channels; 813 814 /* read the LP-layer header bits */ 815 for (i = 0; i < coded_channels; i++) { 816 for (j = 0; j < nb_frames; j++) 817 active[i][j] = ff_opus_rc_dec_log(rc, 1); 818 819 redundancy[i] = ff_opus_rc_dec_log(rc, 1); 820 } 821 822 /* read the per-frame LBRR flags */ 823 for (i = 0; i < coded_channels; i++) 824 if (redundancy[i] && duration_ms > 20) { 825 redundancy[i] = ff_opus_rc_dec_cdf(rc, duration_ms == 40 ? 826 ff_silk_model_lbrr_flags_40 : ff_silk_model_lbrr_flags_60); 827 } 828 829 /* decode the LBRR frames */ 830 for (i = 0; i < nb_frames; i++) { 831 for (j = 0; j < coded_channels; j++) 832 if (redundancy[j] & (1 << i)) { 833 int active1 = (j == 0 && !(redundancy[1] & (1 << i))) ? 0 : 1; 834 silk_decode_frame(s, rc, i, j, coded_channels, 1, active1, 1); 835 } 836 } 837 838 for (i = 0; i < nb_frames; i++) { 839 for (j = 0; j < coded_channels && !s->midonly; j++) 840 silk_decode_frame(s, rc, i, j, coded_channels, active[j][i], active[1][i], 0); 841 842 /* reset the side channel if it is not coded */ 843 if (s->midonly && s->frame[1].coded) 844 silk_flush_frame(&s->frame[1]); 845 846 if (coded_channels == 1 || s->output_channels == 1) { 847 for (j = 0; j < s->output_channels; j++) { 848 memcpy(output[j] + i * s->flength, 849 s->frame[0].output + SILK_HISTORY - s->flength - 2, 850 s->flength * sizeof(float)); 851 } 852 } else { 853 silk_unmix_ms(s, output[0] + i * s->flength, output[1] + i * s->flength); 854 } 855 856 s->midonly = 0; 857 } 858 859 return nb_frames * s->flength; 860} 861 862void ff_silk_free(SilkContext **ps) 863{ 864 av_freep(ps); 865} 866 867void ff_silk_flush(SilkContext *s) 868{ 869 silk_flush_frame(&s->frame[0]); 870 silk_flush_frame(&s->frame[1]); 871 872 memset(s->prev_stereo_weights, 0, sizeof(s->prev_stereo_weights)); 873} 874 875int ff_silk_init(AVCodecContext *avctx, SilkContext **ps, int output_channels) 876{ 877 SilkContext *s; 878 879 if (output_channels != 1 && output_channels != 2) { 880 av_log(avctx, AV_LOG_ERROR, "Invalid number of output channels: %d\n", 881 output_channels); 882 return AVERROR(EINVAL); 883 } 884 885 s = av_mallocz(sizeof(*s)); 886 if (!s) 887 return AVERROR(ENOMEM); 888 889 s->avctx = avctx; 890 s->output_channels = output_channels; 891 892 ff_silk_flush(s); 893 894 *ps = s; 895 896 return 0; 897} 898