1/* 2 * G.723.1 compatible decoder 3 * Copyright (c) 2006 Benjamin Larsson 4 * Copyright (c) 2010 Mohamed Naufal Basheer 5 * 6 * This file is part of FFmpeg. 7 * 8 * FFmpeg is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU Lesser General Public 10 * License as published by the Free Software Foundation; either 11 * version 2.1 of the License, or (at your option) any later version. 12 * 13 * FFmpeg is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * Lesser General Public License for more details. 17 * 18 * You should have received a copy of the GNU Lesser General Public 19 * License along with FFmpeg; if not, write to the Free Software 20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 21 */ 22 23/** 24 * @file 25 * G.723.1 compatible decoder 26 */ 27 28#include "libavutil/channel_layout.h" 29#include "libavutil/mem.h" 30#include "libavutil/opt.h" 31 32#define BITSTREAM_READER_LE 33#include "acelp_vectors.h" 34#include "avcodec.h" 35#include "celp_filters.h" 36#include "celp_math.h" 37#include "codec_internal.h" 38#include "get_bits.h" 39#include "internal.h" 40#include "g723_1.h" 41 42#define CNG_RANDOM_SEED 12345 43 44/** 45 * Postfilter gain weighting factors scaled by 2^15 46 */ 47static const int16_t ppf_gain_weight[2] = {0x1800, 0x2000}; 48 49static const int16_t pitch_contrib[340] = { 50 60, 0, 0, 2489, 60, 0, 0, 5217, 51 1, 6171, 0, 3953, 0, 10364, 1, 9357, 52 -1, 8843, 1, 9396, 0, 5794, -1, 10816, 53 2, 11606, -2, 12072, 0, 8616, 1, 12170, 54 0, 14440, 0, 7787, -1, 13721, 0, 18205, 55 0, 14471, 0, 15807, 1, 15275, 0, 13480, 56 -1, 18375, -1, 0, 1, 11194, -1, 13010, 57 1, 18836, -2, 20354, 1, 16233, -1, 0, 58 60, 0, 0, 12130, 0, 13385, 1, 17834, 59 1, 20875, 0, 21996, 1, 0, 1, 18277, 60 -1, 21321, 1, 13738, -1, 19094, -1, 20387, 61 -1, 0, 0, 21008, 60, 0, -2, 22807, 62 0, 15900, 1, 0, 0, 17989, -1, 22259, 63 1, 24395, 1, 23138, 0, 23948, 1, 22997, 64 2, 22604, -1, 25942, 0, 26246, 1, 25321, 65 0, 26423, 0, 24061, 0, 27247, 60, 0, 66 -1, 25572, 1, 23918, 1, 25930, 2, 26408, 67 -1, 19049, 1, 27357, -1, 24538, 60, 0, 68 -1, 25093, 0, 28549, 1, 0, 0, 22793, 69 -1, 25659, 0, 29377, 0, 30276, 0, 26198, 70 1, 22521, -1, 28919, 0, 27384, 1, 30162, 71 -1, 0, 0, 24237, -1, 30062, 0, 21763, 72 1, 30917, 60, 0, 0, 31284, 0, 29433, 73 1, 26821, 1, 28655, 0, 31327, 2, 30799, 74 1, 31389, 0, 32322, 1, 31760, -2, 31830, 75 0, 26936, -1, 31180, 1, 30875, 0, 27873, 76 -1, 30429, 1, 31050, 0, 0, 0, 31912, 77 1, 31611, 0, 31565, 0, 25557, 0, 31357, 78 60, 0, 1, 29536, 1, 28985, -1, 26984, 79 -1, 31587, 2, 30836, -2, 31133, 0, 30243, 80 -1, 30742, -1, 32090, 60, 0, 2, 30902, 81 60, 0, 0, 30027, 0, 29042, 60, 0, 82 0, 31756, 0, 24553, 0, 25636, -2, 30501, 83 60, 0, -1, 29617, 0, 30649, 60, 0, 84 0, 29274, 2, 30415, 0, 27480, 0, 31213, 85 -1, 28147, 0, 30600, 1, 31652, 2, 29068, 86 60, 0, 1, 28571, 1, 28730, 1, 31422, 87 0, 28257, 0, 24797, 60, 0, 0, 0, 88 60, 0, 0, 22105, 0, 27852, 60, 0, 89 60, 0, -1, 24214, 0, 24642, 0, 23305, 90 60, 0, 60, 0, 1, 22883, 0, 21601, 91 60, 0, 2, 25650, 60, 0, -2, 31253, 92 -2, 25144, 0, 17998 93}; 94 95/** 96 * Size of the MP-MLQ fixed excitation codebooks 97 */ 98static const int32_t max_pos[4] = {593775, 142506, 593775, 142506}; 99 100/** 101 * 0.65^i (Zero part) and 0.75^i (Pole part) scaled by 2^15 102 */ 103static const int16_t postfilter_tbl[2][LPC_ORDER] = { 104 /* Zero */ 105 {21299, 13844, 8999, 5849, 3802, 2471, 1606, 1044, 679, 441}, 106 /* Pole */ 107 {24576, 18432, 13824, 10368, 7776, 5832, 4374, 3281, 2460, 1845} 108}; 109 110static const int cng_adaptive_cb_lag[4] = { 1, 0, 1, 3 }; 111 112static const int cng_filt[4] = { 273, 998, 499, 333 }; 113 114static const int cng_bseg[3] = { 2048, 18432, 231233 }; 115 116static av_cold int g723_1_decode_init(AVCodecContext *avctx) 117{ 118 G723_1_Context *s = avctx->priv_data; 119 120 avctx->sample_fmt = AV_SAMPLE_FMT_S16P; 121 if (avctx->ch_layout.nb_channels < 1 || avctx->ch_layout.nb_channels > 2) { 122 av_log(avctx, AV_LOG_ERROR, "Only mono and stereo are supported (requested channels: %d).\n", 123 avctx->ch_layout.nb_channels); 124 return AVERROR(EINVAL); 125 } 126 for (int ch = 0; ch < avctx->ch_layout.nb_channels; ch++) { 127 G723_1_ChannelContext *p = &s->ch[ch]; 128 129 p->pf_gain = 1 << 12; 130 131 memcpy(p->prev_lsp, dc_lsp, LPC_ORDER * sizeof(*p->prev_lsp)); 132 memcpy(p->sid_lsp, dc_lsp, LPC_ORDER * sizeof(*p->sid_lsp)); 133 134 p->cng_random_seed = CNG_RANDOM_SEED; 135 p->past_frame_type = SID_FRAME; 136 } 137 138 return 0; 139} 140 141/** 142 * Unpack the frame into parameters. 143 * 144 * @param p the context 145 * @param buf pointer to the input buffer 146 * @param buf_size size of the input buffer 147 */ 148static int unpack_bitstream(G723_1_ChannelContext *p, const uint8_t *buf, 149 int buf_size) 150{ 151 GetBitContext gb; 152 int ad_cb_len; 153 int temp, info_bits, i; 154 int ret; 155 156 ret = init_get_bits8(&gb, buf, buf_size); 157 if (ret < 0) 158 return ret; 159 160 /* Extract frame type and rate info */ 161 info_bits = get_bits(&gb, 2); 162 163 if (info_bits == 3) { 164 p->cur_frame_type = UNTRANSMITTED_FRAME; 165 return 0; 166 } 167 168 /* Extract 24 bit lsp indices, 8 bit for each band */ 169 p->lsp_index[2] = get_bits(&gb, 8); 170 p->lsp_index[1] = get_bits(&gb, 8); 171 p->lsp_index[0] = get_bits(&gb, 8); 172 173 if (info_bits == 2) { 174 p->cur_frame_type = SID_FRAME; 175 p->subframe[0].amp_index = get_bits(&gb, 6); 176 return 0; 177 } 178 179 /* Extract the info common to both rates */ 180 p->cur_rate = info_bits ? RATE_5300 : RATE_6300; 181 p->cur_frame_type = ACTIVE_FRAME; 182 183 p->pitch_lag[0] = get_bits(&gb, 7); 184 if (p->pitch_lag[0] > 123) /* test if forbidden code */ 185 return -1; 186 p->pitch_lag[0] += PITCH_MIN; 187 p->subframe[1].ad_cb_lag = get_bits(&gb, 2); 188 189 p->pitch_lag[1] = get_bits(&gb, 7); 190 if (p->pitch_lag[1] > 123) 191 return -1; 192 p->pitch_lag[1] += PITCH_MIN; 193 p->subframe[3].ad_cb_lag = get_bits(&gb, 2); 194 p->subframe[0].ad_cb_lag = 1; 195 p->subframe[2].ad_cb_lag = 1; 196 197 for (i = 0; i < SUBFRAMES; i++) { 198 /* Extract combined gain */ 199 temp = get_bits(&gb, 12); 200 ad_cb_len = 170; 201 p->subframe[i].dirac_train = 0; 202 if (p->cur_rate == RATE_6300 && p->pitch_lag[i >> 1] < SUBFRAME_LEN - 2) { 203 p->subframe[i].dirac_train = temp >> 11; 204 temp &= 0x7FF; 205 ad_cb_len = 85; 206 } 207 p->subframe[i].ad_cb_gain = FASTDIV(temp, GAIN_LEVELS); 208 if (p->subframe[i].ad_cb_gain < ad_cb_len) { 209 p->subframe[i].amp_index = temp - p->subframe[i].ad_cb_gain * 210 GAIN_LEVELS; 211 } else { 212 return -1; 213 } 214 } 215 216 p->subframe[0].grid_index = get_bits1(&gb); 217 p->subframe[1].grid_index = get_bits1(&gb); 218 p->subframe[2].grid_index = get_bits1(&gb); 219 p->subframe[3].grid_index = get_bits1(&gb); 220 221 if (p->cur_rate == RATE_6300) { 222 skip_bits1(&gb); /* skip reserved bit */ 223 224 /* Compute pulse_pos index using the 13-bit combined position index */ 225 temp = get_bits(&gb, 13); 226 p->subframe[0].pulse_pos = temp / 810; 227 228 temp -= p->subframe[0].pulse_pos * 810; 229 p->subframe[1].pulse_pos = FASTDIV(temp, 90); 230 231 temp -= p->subframe[1].pulse_pos * 90; 232 p->subframe[2].pulse_pos = FASTDIV(temp, 9); 233 p->subframe[3].pulse_pos = temp - p->subframe[2].pulse_pos * 9; 234 235 p->subframe[0].pulse_pos = (p->subframe[0].pulse_pos << 16) + 236 get_bits(&gb, 16); 237 p->subframe[1].pulse_pos = (p->subframe[1].pulse_pos << 14) + 238 get_bits(&gb, 14); 239 p->subframe[2].pulse_pos = (p->subframe[2].pulse_pos << 16) + 240 get_bits(&gb, 16); 241 p->subframe[3].pulse_pos = (p->subframe[3].pulse_pos << 14) + 242 get_bits(&gb, 14); 243 244 p->subframe[0].pulse_sign = get_bits(&gb, 6); 245 p->subframe[1].pulse_sign = get_bits(&gb, 5); 246 p->subframe[2].pulse_sign = get_bits(&gb, 6); 247 p->subframe[3].pulse_sign = get_bits(&gb, 5); 248 } else { /* 5300 bps */ 249 p->subframe[0].pulse_pos = get_bits(&gb, 12); 250 p->subframe[1].pulse_pos = get_bits(&gb, 12); 251 p->subframe[2].pulse_pos = get_bits(&gb, 12); 252 p->subframe[3].pulse_pos = get_bits(&gb, 12); 253 254 p->subframe[0].pulse_sign = get_bits(&gb, 4); 255 p->subframe[1].pulse_sign = get_bits(&gb, 4); 256 p->subframe[2].pulse_sign = get_bits(&gb, 4); 257 p->subframe[3].pulse_sign = get_bits(&gb, 4); 258 } 259 260 return 0; 261} 262 263/** 264 * Bitexact implementation of sqrt(val/2). 265 */ 266static int16_t square_root(unsigned val) 267{ 268 av_assert2(!(val & 0x80000000)); 269 270 return (ff_sqrt(val << 1) >> 1) & (~1); 271} 272 273/** 274 * Generate fixed codebook excitation vector. 275 * 276 * @param vector decoded excitation vector 277 * @param subfrm current subframe 278 * @param cur_rate current bitrate 279 * @param pitch_lag closed loop pitch lag 280 * @param index current subframe index 281 */ 282static void gen_fcb_excitation(int16_t *vector, G723_1_Subframe *subfrm, 283 enum Rate cur_rate, int pitch_lag, int index) 284{ 285 int temp, i, j; 286 287 memset(vector, 0, SUBFRAME_LEN * sizeof(*vector)); 288 289 if (cur_rate == RATE_6300) { 290 if (subfrm->pulse_pos >= max_pos[index]) 291 return; 292 293 /* Decode amplitudes and positions */ 294 j = PULSE_MAX - pulses[index]; 295 temp = subfrm->pulse_pos; 296 for (i = 0; i < SUBFRAME_LEN / GRID_SIZE; i++) { 297 temp -= ff_g723_1_combinatorial_table[j][i]; 298 if (temp >= 0) 299 continue; 300 temp += ff_g723_1_combinatorial_table[j++][i]; 301 if (subfrm->pulse_sign & (1 << (PULSE_MAX - j))) { 302 vector[subfrm->grid_index + GRID_SIZE * i] = 303 -ff_g723_1_fixed_cb_gain[subfrm->amp_index]; 304 } else { 305 vector[subfrm->grid_index + GRID_SIZE * i] = 306 ff_g723_1_fixed_cb_gain[subfrm->amp_index]; 307 } 308 if (j == PULSE_MAX) 309 break; 310 } 311 if (subfrm->dirac_train == 1) 312 ff_g723_1_gen_dirac_train(vector, pitch_lag); 313 } else { /* 5300 bps */ 314 int cb_gain = ff_g723_1_fixed_cb_gain[subfrm->amp_index]; 315 int cb_shift = subfrm->grid_index; 316 int cb_sign = subfrm->pulse_sign; 317 int cb_pos = subfrm->pulse_pos; 318 int offset, beta, lag; 319 320 for (i = 0; i < 8; i += 2) { 321 offset = ((cb_pos & 7) << 3) + cb_shift + i; 322 vector[offset] = (cb_sign & 1) ? cb_gain : -cb_gain; 323 cb_pos >>= 3; 324 cb_sign >>= 1; 325 } 326 327 /* Enhance harmonic components */ 328 lag = pitch_contrib[subfrm->ad_cb_gain << 1] + pitch_lag + 329 subfrm->ad_cb_lag - 1; 330 beta = pitch_contrib[(subfrm->ad_cb_gain << 1) + 1]; 331 332 if (lag < SUBFRAME_LEN - 2) { 333 for (i = lag; i < SUBFRAME_LEN; i++) 334 vector[i] += beta * vector[i - lag] >> 15; 335 } 336 } 337} 338 339/** 340 * Estimate maximum auto-correlation around pitch lag. 341 * 342 * @param buf buffer with offset applied 343 * @param offset offset of the excitation vector 344 * @param ccr_max pointer to the maximum auto-correlation 345 * @param pitch_lag decoded pitch lag 346 * @param length length of autocorrelation 347 * @param dir forward lag(1) / backward lag(-1) 348 */ 349static int autocorr_max(const int16_t *buf, int offset, int *ccr_max, 350 int pitch_lag, int length, int dir) 351{ 352 int limit, ccr, lag = 0; 353 int i; 354 355 pitch_lag = FFMIN(PITCH_MAX - 3, pitch_lag); 356 if (dir > 0) 357 limit = FFMIN(FRAME_LEN + PITCH_MAX - offset - length, pitch_lag + 3); 358 else 359 limit = pitch_lag + 3; 360 361 for (i = pitch_lag - 3; i <= limit; i++) { 362 ccr = ff_g723_1_dot_product(buf, buf + dir * i, length); 363 364 if (ccr > *ccr_max) { 365 *ccr_max = ccr; 366 lag = i; 367 } 368 } 369 return lag; 370} 371 372/** 373 * Calculate pitch postfilter optimal and scaling gains. 374 * 375 * @param lag pitch postfilter forward/backward lag 376 * @param ppf pitch postfilter parameters 377 * @param cur_rate current bitrate 378 * @param tgt_eng target energy 379 * @param ccr cross-correlation 380 * @param res_eng residual energy 381 */ 382static void comp_ppf_gains(int lag, PPFParam *ppf, enum Rate cur_rate, 383 int tgt_eng, int ccr, int res_eng) 384{ 385 int pf_residual; /* square of postfiltered residual */ 386 int temp1, temp2; 387 388 ppf->index = lag; 389 390 temp1 = tgt_eng * res_eng >> 1; 391 temp2 = ccr * ccr << 1; 392 393 if (temp2 > temp1) { 394 if (ccr >= res_eng) { 395 ppf->opt_gain = ppf_gain_weight[cur_rate]; 396 } else { 397 ppf->opt_gain = (ccr << 15) / res_eng * 398 ppf_gain_weight[cur_rate] >> 15; 399 } 400 /* pf_res^2 = tgt_eng + 2*ccr*gain + res_eng*gain^2 */ 401 temp1 = (tgt_eng << 15) + (ccr * ppf->opt_gain << 1); 402 temp2 = (ppf->opt_gain * ppf->opt_gain >> 15) * res_eng; 403 pf_residual = av_sat_add32(temp1, temp2 + (1 << 15)) >> 16; 404 405 if (tgt_eng >= pf_residual << 1) { 406 temp1 = 0x7fff; 407 } else { 408 temp1 = (tgt_eng << 14) / pf_residual; 409 } 410 411 /* scaling_gain = sqrt(tgt_eng/pf_res^2) */ 412 ppf->sc_gain = square_root(temp1 << 16); 413 } else { 414 ppf->opt_gain = 0; 415 ppf->sc_gain = 0x7fff; 416 } 417 418 ppf->opt_gain = av_clip_int16(ppf->opt_gain * ppf->sc_gain >> 15); 419} 420 421/** 422 * Calculate pitch postfilter parameters. 423 * 424 * @param p the context 425 * @param offset offset of the excitation vector 426 * @param pitch_lag decoded pitch lag 427 * @param ppf pitch postfilter parameters 428 * @param cur_rate current bitrate 429 */ 430static void comp_ppf_coeff(G723_1_ChannelContext *p, int offset, int pitch_lag, 431 PPFParam *ppf, enum Rate cur_rate) 432{ 433 434 int16_t scale; 435 int i; 436 int temp1, temp2; 437 438 /* 439 * 0 - target energy 440 * 1 - forward cross-correlation 441 * 2 - forward residual energy 442 * 3 - backward cross-correlation 443 * 4 - backward residual energy 444 */ 445 int energy[5] = {0, 0, 0, 0, 0}; 446 int16_t *buf = p->audio + LPC_ORDER + offset; 447 int fwd_lag = autocorr_max(buf, offset, &energy[1], pitch_lag, 448 SUBFRAME_LEN, 1); 449 int back_lag = autocorr_max(buf, offset, &energy[3], pitch_lag, 450 SUBFRAME_LEN, -1); 451 452 ppf->index = 0; 453 ppf->opt_gain = 0; 454 ppf->sc_gain = 0x7fff; 455 456 /* Case 0, Section 3.6 */ 457 if (!back_lag && !fwd_lag) 458 return; 459 460 /* Compute target energy */ 461 energy[0] = ff_g723_1_dot_product(buf, buf, SUBFRAME_LEN); 462 463 /* Compute forward residual energy */ 464 if (fwd_lag) 465 energy[2] = ff_g723_1_dot_product(buf + fwd_lag, buf + fwd_lag, 466 SUBFRAME_LEN); 467 468 /* Compute backward residual energy */ 469 if (back_lag) 470 energy[4] = ff_g723_1_dot_product(buf - back_lag, buf - back_lag, 471 SUBFRAME_LEN); 472 473 /* Normalize and shorten */ 474 temp1 = 0; 475 for (i = 0; i < 5; i++) 476 temp1 = FFMAX(energy[i], temp1); 477 478 scale = ff_g723_1_normalize_bits(temp1, 31); 479 for (i = 0; i < 5; i++) 480 energy[i] = (energy[i] << scale) >> 16; 481 482 if (fwd_lag && !back_lag) { /* Case 1 */ 483 comp_ppf_gains(fwd_lag, ppf, cur_rate, energy[0], energy[1], 484 energy[2]); 485 } else if (!fwd_lag) { /* Case 2 */ 486 comp_ppf_gains(-back_lag, ppf, cur_rate, energy[0], energy[3], 487 energy[4]); 488 } else { /* Case 3 */ 489 490 /* 491 * Select the largest of energy[1]^2/energy[2] 492 * and energy[3]^2/energy[4] 493 */ 494 temp1 = energy[4] * ((energy[1] * energy[1] + (1 << 14)) >> 15); 495 temp2 = energy[2] * ((energy[3] * energy[3] + (1 << 14)) >> 15); 496 if (temp1 >= temp2) { 497 comp_ppf_gains(fwd_lag, ppf, cur_rate, energy[0], energy[1], 498 energy[2]); 499 } else { 500 comp_ppf_gains(-back_lag, ppf, cur_rate, energy[0], energy[3], 501 energy[4]); 502 } 503 } 504} 505 506/** 507 * Classify frames as voiced/unvoiced. 508 * 509 * @param p the context 510 * @param pitch_lag decoded pitch_lag 511 * @param exc_eng excitation energy estimation 512 * @param scale scaling factor of exc_eng 513 * 514 * @return residual interpolation index if voiced, 0 otherwise 515 */ 516static int comp_interp_index(G723_1_ChannelContext *p, int pitch_lag, 517 int *exc_eng, int *scale) 518{ 519 int offset = PITCH_MAX + 2 * SUBFRAME_LEN; 520 int16_t *buf = p->audio + LPC_ORDER; 521 522 int index, ccr, tgt_eng, best_eng, temp; 523 524 *scale = ff_g723_1_scale_vector(buf, p->excitation, FRAME_LEN + PITCH_MAX); 525 buf += offset; 526 527 /* Compute maximum backward cross-correlation */ 528 ccr = 0; 529 index = autocorr_max(buf, offset, &ccr, pitch_lag, SUBFRAME_LEN * 2, -1); 530 ccr = av_sat_add32(ccr, 1 << 15) >> 16; 531 532 /* Compute target energy */ 533 tgt_eng = ff_g723_1_dot_product(buf, buf, SUBFRAME_LEN * 2); 534 *exc_eng = av_sat_add32(tgt_eng, 1 << 15) >> 16; 535 536 if (ccr <= 0) 537 return 0; 538 539 /* Compute best energy */ 540 best_eng = ff_g723_1_dot_product(buf - index, buf - index, 541 SUBFRAME_LEN * 2); 542 best_eng = av_sat_add32(best_eng, 1 << 15) >> 16; 543 544 temp = best_eng * *exc_eng >> 3; 545 546 if (temp < ccr * ccr) { 547 return index; 548 } else 549 return 0; 550} 551 552/** 553 * Perform residual interpolation based on frame classification. 554 * 555 * @param buf decoded excitation vector 556 * @param out output vector 557 * @param lag decoded pitch lag 558 * @param gain interpolated gain 559 * @param rseed seed for random number generator 560 */ 561static void residual_interp(int16_t *buf, int16_t *out, int lag, 562 int gain, int *rseed) 563{ 564 int i; 565 if (lag) { /* Voiced */ 566 int16_t *vector_ptr = buf + PITCH_MAX; 567 /* Attenuate */ 568 for (i = 0; i < lag; i++) 569 out[i] = vector_ptr[i - lag] * 3 >> 2; 570 av_memcpy_backptr((uint8_t*)(out + lag), lag * sizeof(*out), 571 (FRAME_LEN - lag) * sizeof(*out)); 572 } else { /* Unvoiced */ 573 for (i = 0; i < FRAME_LEN; i++) { 574 *rseed = (int16_t)(*rseed * 521 + 259); 575 out[i] = gain * *rseed >> 15; 576 } 577 memset(buf, 0, (FRAME_LEN + PITCH_MAX) * sizeof(*buf)); 578 } 579} 580 581/** 582 * Perform IIR filtering. 583 * 584 * @param fir_coef FIR coefficients 585 * @param iir_coef IIR coefficients 586 * @param src source vector 587 * @param dest destination vector 588 * @param width width of the output, 16 bits(0) / 32 bits(1) 589 */ 590#define iir_filter(fir_coef, iir_coef, src, dest, width)\ 591{\ 592 int m, n;\ 593 int res_shift = 16 & ~-(width);\ 594 int in_shift = 16 - res_shift;\ 595\ 596 for (m = 0; m < SUBFRAME_LEN; m++) {\ 597 int64_t filter = 0;\ 598 for (n = 1; n <= LPC_ORDER; n++) {\ 599 filter -= (fir_coef)[n - 1] * (src)[m - n] -\ 600 (iir_coef)[n - 1] * ((dest)[m - n] >> in_shift);\ 601 }\ 602\ 603 (dest)[m] = av_clipl_int32(((src)[m] * 65536) + (filter * 8) +\ 604 (1 << 15)) >> res_shift;\ 605 }\ 606} 607 608/** 609 * Adjust gain of postfiltered signal. 610 * 611 * @param p the context 612 * @param buf postfiltered output vector 613 * @param energy input energy coefficient 614 */ 615static void gain_scale(G723_1_ChannelContext *p, int16_t * buf, int energy) 616{ 617 int num, denom, gain, bits1, bits2; 618 int i; 619 620 num = energy; 621 denom = 0; 622 for (i = 0; i < SUBFRAME_LEN; i++) { 623 int temp = buf[i] >> 2; 624 temp *= temp; 625 denom = av_sat_dadd32(denom, temp); 626 } 627 628 if (num && denom) { 629 bits1 = ff_g723_1_normalize_bits(num, 31); 630 bits2 = ff_g723_1_normalize_bits(denom, 31); 631 num = num << bits1 >> 1; 632 denom <<= bits2; 633 634 bits2 = 5 + bits1 - bits2; 635 bits2 = av_clip_uintp2(bits2, 5); 636 637 gain = (num >> 1) / (denom >> 16); 638 gain = square_root(gain << 16 >> bits2); 639 } else { 640 gain = 1 << 12; 641 } 642 643 for (i = 0; i < SUBFRAME_LEN; i++) { 644 p->pf_gain = (15 * p->pf_gain + gain + (1 << 3)) >> 4; 645 buf[i] = av_clip_int16((buf[i] * (p->pf_gain + (p->pf_gain >> 4)) + 646 (1 << 10)) >> 11); 647 } 648} 649 650/** 651 * Perform formant filtering. 652 * 653 * @param p the context 654 * @param lpc quantized lpc coefficients 655 * @param buf input buffer 656 * @param dst output buffer 657 */ 658static void formant_postfilter(G723_1_ChannelContext *p, int16_t *lpc, 659 int16_t *buf, int16_t *dst) 660{ 661 int16_t filter_coef[2][LPC_ORDER]; 662 int filter_signal[LPC_ORDER + FRAME_LEN], *signal_ptr; 663 int i, j, k; 664 665 memcpy(buf, p->fir_mem, LPC_ORDER * sizeof(*buf)); 666 memcpy(filter_signal, p->iir_mem, LPC_ORDER * sizeof(*filter_signal)); 667 668 for (i = LPC_ORDER, j = 0; j < SUBFRAMES; i += SUBFRAME_LEN, j++) { 669 for (k = 0; k < LPC_ORDER; k++) { 670 filter_coef[0][k] = (-lpc[k] * postfilter_tbl[0][k] + 671 (1 << 14)) >> 15; 672 filter_coef[1][k] = (-lpc[k] * postfilter_tbl[1][k] + 673 (1 << 14)) >> 15; 674 } 675 iir_filter(filter_coef[0], filter_coef[1], buf + i, filter_signal + i, 1); 676 lpc += LPC_ORDER; 677 } 678 679 memcpy(p->fir_mem, buf + FRAME_LEN, LPC_ORDER * sizeof(int16_t)); 680 memcpy(p->iir_mem, filter_signal + FRAME_LEN, LPC_ORDER * sizeof(int)); 681 682 buf += LPC_ORDER; 683 signal_ptr = filter_signal + LPC_ORDER; 684 for (i = 0; i < SUBFRAMES; i++) { 685 int temp; 686 int auto_corr[2]; 687 int scale, energy; 688 689 /* Normalize */ 690 scale = ff_g723_1_scale_vector(dst, buf, SUBFRAME_LEN); 691 692 /* Compute auto correlation coefficients */ 693 auto_corr[0] = ff_g723_1_dot_product(dst, dst + 1, SUBFRAME_LEN - 1); 694 auto_corr[1] = ff_g723_1_dot_product(dst, dst, SUBFRAME_LEN); 695 696 /* Compute reflection coefficient */ 697 temp = auto_corr[1] >> 16; 698 if (temp) { 699 temp = (auto_corr[0] >> 2) / temp; 700 } 701 p->reflection_coef = (3 * p->reflection_coef + temp + 2) >> 2; 702 temp = -p->reflection_coef >> 1 & ~3; 703 704 /* Compensation filter */ 705 for (j = 0; j < SUBFRAME_LEN; j++) { 706 dst[j] = av_sat_dadd32(signal_ptr[j], 707 (signal_ptr[j - 1] >> 16) * temp) >> 16; 708 } 709 710 /* Compute normalized signal energy */ 711 temp = 2 * scale + 4; 712 if (temp < 0) { 713 energy = av_clipl_int32((int64_t)auto_corr[1] << -temp); 714 } else 715 energy = auto_corr[1] >> temp; 716 717 gain_scale(p, dst, energy); 718 719 buf += SUBFRAME_LEN; 720 signal_ptr += SUBFRAME_LEN; 721 dst += SUBFRAME_LEN; 722 } 723} 724 725static int sid_gain_to_lsp_index(int gain) 726{ 727 if (gain < 0x10) 728 return gain << 6; 729 else if (gain < 0x20) 730 return gain - 8 << 7; 731 else 732 return gain - 20 << 8; 733} 734 735static inline int cng_rand(int *state, int base) 736{ 737 *state = (*state * 521 + 259) & 0xFFFF; 738 return (*state & 0x7FFF) * base >> 15; 739} 740 741static int estimate_sid_gain(G723_1_ChannelContext *p) 742{ 743 int i, shift, seg, seg2, t, val, val_add, x, y; 744 745 shift = 16 - p->cur_gain * 2; 746 if (shift > 0) { 747 if (p->sid_gain == 0) { 748 t = 0; 749 } else if (shift >= 31 || (int32_t)((uint32_t)p->sid_gain << shift) >> shift != p->sid_gain) { 750 if (p->sid_gain < 0) t = INT32_MIN; 751 else t = INT32_MAX; 752 } else 753 t = p->sid_gain * (1 << shift); 754 } else if(shift < -31) { 755 t = (p->sid_gain < 0) ? -1 : 0; 756 }else 757 t = p->sid_gain >> -shift; 758 x = av_clipl_int32(t * (int64_t)cng_filt[0] >> 16); 759 760 if (x >= cng_bseg[2]) 761 return 0x3F; 762 763 if (x >= cng_bseg[1]) { 764 shift = 4; 765 seg = 3; 766 } else { 767 shift = 3; 768 seg = (x >= cng_bseg[0]); 769 } 770 seg2 = FFMIN(seg, 3); 771 772 val = 1 << shift; 773 val_add = val >> 1; 774 for (i = 0; i < shift; i++) { 775 t = seg * 32 + (val << seg2); 776 t *= t; 777 if (x >= t) 778 val += val_add; 779 else 780 val -= val_add; 781 val_add >>= 1; 782 } 783 784 t = seg * 32 + (val << seg2); 785 y = t * t - x; 786 if (y <= 0) { 787 t = seg * 32 + (val + 1 << seg2); 788 t = t * t - x; 789 val = (seg2 - 1) * 16 + val; 790 if (t >= y) 791 val++; 792 } else { 793 t = seg * 32 + (val - 1 << seg2); 794 t = t * t - x; 795 val = (seg2 - 1) * 16 + val; 796 if (t >= y) 797 val--; 798 } 799 800 return val; 801} 802 803static void generate_noise(G723_1_ChannelContext *p) 804{ 805 int i, j, idx, t; 806 int off[SUBFRAMES]; 807 int signs[SUBFRAMES / 2 * 11], pos[SUBFRAMES / 2 * 11]; 808 int tmp[SUBFRAME_LEN * 2]; 809 int16_t *vector_ptr; 810 int64_t sum; 811 int b0, c, delta, x, shift; 812 813 p->pitch_lag[0] = cng_rand(&p->cng_random_seed, 21) + 123; 814 p->pitch_lag[1] = cng_rand(&p->cng_random_seed, 19) + 123; 815 816 for (i = 0; i < SUBFRAMES; i++) { 817 p->subframe[i].ad_cb_gain = cng_rand(&p->cng_random_seed, 50) + 1; 818 p->subframe[i].ad_cb_lag = cng_adaptive_cb_lag[i]; 819 } 820 821 for (i = 0; i < SUBFRAMES / 2; i++) { 822 t = cng_rand(&p->cng_random_seed, 1 << 13); 823 off[i * 2] = t & 1; 824 off[i * 2 + 1] = ((t >> 1) & 1) + SUBFRAME_LEN; 825 t >>= 2; 826 for (j = 0; j < 11; j++) { 827 signs[i * 11 + j] = ((t & 1) * 2 - 1) * (1 << 14); 828 t >>= 1; 829 } 830 } 831 832 idx = 0; 833 for (i = 0; i < SUBFRAMES; i++) { 834 for (j = 0; j < SUBFRAME_LEN / 2; j++) 835 tmp[j] = j; 836 t = SUBFRAME_LEN / 2; 837 for (j = 0; j < pulses[i]; j++, idx++) { 838 int idx2 = cng_rand(&p->cng_random_seed, t); 839 840 pos[idx] = tmp[idx2] * 2 + off[i]; 841 tmp[idx2] = tmp[--t]; 842 } 843 } 844 845 vector_ptr = p->audio + LPC_ORDER; 846 memcpy(vector_ptr, p->prev_excitation, 847 PITCH_MAX * sizeof(*p->excitation)); 848 for (i = 0; i < SUBFRAMES; i += 2) { 849 ff_g723_1_gen_acb_excitation(vector_ptr, vector_ptr, 850 p->pitch_lag[i >> 1], &p->subframe[i], 851 p->cur_rate); 852 ff_g723_1_gen_acb_excitation(vector_ptr + SUBFRAME_LEN, 853 vector_ptr + SUBFRAME_LEN, 854 p->pitch_lag[i >> 1], &p->subframe[i + 1], 855 p->cur_rate); 856 857 t = 0; 858 for (j = 0; j < SUBFRAME_LEN * 2; j++) 859 t |= FFABS(vector_ptr[j]); 860 t = FFMIN(t, 0x7FFF); 861 if (!t) { 862 shift = 0; 863 } else { 864 shift = -10 + av_log2(t); 865 if (shift < -2) 866 shift = -2; 867 } 868 sum = 0; 869 if (shift < 0) { 870 for (j = 0; j < SUBFRAME_LEN * 2; j++) { 871 t = vector_ptr[j] * (1 << -shift); 872 sum += t * t; 873 tmp[j] = t; 874 } 875 } else { 876 for (j = 0; j < SUBFRAME_LEN * 2; j++) { 877 t = vector_ptr[j] >> shift; 878 sum += t * t; 879 tmp[j] = t; 880 } 881 } 882 883 b0 = 0; 884 for (j = 0; j < 11; j++) 885 b0 += tmp[pos[(i / 2) * 11 + j]] * signs[(i / 2) * 11 + j]; 886 b0 = b0 * 2 * 2979LL + (1 << 29) >> 30; // approximated division by 11 887 888 c = p->cur_gain * (p->cur_gain * SUBFRAME_LEN >> 5); 889 if (shift * 2 + 3 >= 0) 890 c >>= shift * 2 + 3; 891 else 892 c <<= -(shift * 2 + 3); 893 c = (av_clipl_int32(sum << 1) - c) * 2979LL >> 15; 894 895 delta = b0 * b0 * 2 - c; 896 if (delta <= 0) { 897 x = -b0; 898 } else { 899 delta = square_root(delta); 900 x = delta - b0; 901 t = delta + b0; 902 if (FFABS(t) < FFABS(x)) 903 x = -t; 904 } 905 shift++; 906 if (shift < 0) 907 x >>= -shift; 908 else 909 x *= 1 << shift; 910 x = av_clip(x, -10000, 10000); 911 912 for (j = 0; j < 11; j++) { 913 idx = (i / 2) * 11 + j; 914 vector_ptr[pos[idx]] = av_clip_int16(vector_ptr[pos[idx]] + 915 (x * signs[idx] >> 15)); 916 } 917 918 /* copy decoded data to serve as a history for the next decoded subframes */ 919 memcpy(vector_ptr + PITCH_MAX, vector_ptr, 920 sizeof(*vector_ptr) * SUBFRAME_LEN * 2); 921 vector_ptr += SUBFRAME_LEN * 2; 922 } 923 /* Save the excitation for the next frame */ 924 memcpy(p->prev_excitation, p->audio + LPC_ORDER + FRAME_LEN, 925 PITCH_MAX * sizeof(*p->excitation)); 926} 927 928static int g723_1_decode_frame(AVCodecContext *avctx, AVFrame *frame, 929 int *got_frame_ptr, AVPacket *avpkt) 930{ 931 G723_1_Context *s = avctx->priv_data; 932 const uint8_t *buf = avpkt->data; 933 int buf_size = avpkt->size; 934 int dec_mode = buf[0] & 3; 935 int channels = avctx->ch_layout.nb_channels; 936 937 PPFParam ppf[SUBFRAMES]; 938 int16_t cur_lsp[LPC_ORDER]; 939 int16_t lpc[SUBFRAMES * LPC_ORDER]; 940 int16_t acb_vector[SUBFRAME_LEN]; 941 int16_t *out; 942 int bad_frame = 0, i, j, ret; 943 944 if (buf_size < frame_size[dec_mode] * channels) { 945 if (buf_size) 946 av_log(avctx, AV_LOG_WARNING, 947 "Expected %d bytes, got %d - skipping packet\n", 948 frame_size[dec_mode], buf_size); 949 *got_frame_ptr = 0; 950 return buf_size; 951 } 952 953 frame->nb_samples = FRAME_LEN; 954 if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) 955 return ret; 956 957 for (int ch = 0; ch < channels; ch++) { 958 G723_1_ChannelContext *p = &s->ch[ch]; 959 int16_t *audio = p->audio; 960 961 if (unpack_bitstream(p, buf + ch * (buf_size / channels), 962 buf_size / channels) < 0) { 963 bad_frame = 1; 964 if (p->past_frame_type == ACTIVE_FRAME) 965 p->cur_frame_type = ACTIVE_FRAME; 966 else 967 p->cur_frame_type = UNTRANSMITTED_FRAME; 968 } 969 970 out = (int16_t *)frame->extended_data[ch]; 971 972 if (p->cur_frame_type == ACTIVE_FRAME) { 973 if (!bad_frame) 974 p->erased_frames = 0; 975 else if (p->erased_frames != 3) 976 p->erased_frames++; 977 978 ff_g723_1_inverse_quant(cur_lsp, p->prev_lsp, p->lsp_index, bad_frame); 979 ff_g723_1_lsp_interpolate(lpc, cur_lsp, p->prev_lsp); 980 981 /* Save the lsp_vector for the next frame */ 982 memcpy(p->prev_lsp, cur_lsp, LPC_ORDER * sizeof(*p->prev_lsp)); 983 984 /* Generate the excitation for the frame */ 985 memcpy(p->excitation, p->prev_excitation, 986 PITCH_MAX * sizeof(*p->excitation)); 987 if (!p->erased_frames) { 988 int16_t *vector_ptr = p->excitation + PITCH_MAX; 989 990 /* Update interpolation gain memory */ 991 p->interp_gain = ff_g723_1_fixed_cb_gain[(p->subframe[2].amp_index + 992 p->subframe[3].amp_index) >> 1]; 993 for (i = 0; i < SUBFRAMES; i++) { 994 gen_fcb_excitation(vector_ptr, &p->subframe[i], p->cur_rate, 995 p->pitch_lag[i >> 1], i); 996 ff_g723_1_gen_acb_excitation(acb_vector, 997 &p->excitation[SUBFRAME_LEN * i], 998 p->pitch_lag[i >> 1], 999 &p->subframe[i], p->cur_rate); 1000 /* Get the total excitation */ 1001 for (j = 0; j < SUBFRAME_LEN; j++) { 1002 int v = av_clip_int16(vector_ptr[j] * 2); 1003 vector_ptr[j] = av_clip_int16(v + acb_vector[j]); 1004 } 1005 vector_ptr += SUBFRAME_LEN; 1006 } 1007 1008 vector_ptr = p->excitation + PITCH_MAX; 1009 1010 p->interp_index = comp_interp_index(p, p->pitch_lag[1], 1011 &p->sid_gain, &p->cur_gain); 1012 1013 /* Perform pitch postfiltering */ 1014 if (s->postfilter) { 1015 i = PITCH_MAX; 1016 for (j = 0; j < SUBFRAMES; i += SUBFRAME_LEN, j++) 1017 comp_ppf_coeff(p, i, p->pitch_lag[j >> 1], 1018 ppf + j, p->cur_rate); 1019 1020 for (i = 0, j = 0; j < SUBFRAMES; i += SUBFRAME_LEN, j++) 1021 ff_acelp_weighted_vector_sum(p->audio + LPC_ORDER + i, 1022 vector_ptr + i, 1023 vector_ptr + i + ppf[j].index, 1024 ppf[j].sc_gain, 1025 ppf[j].opt_gain, 1026 1 << 14, 15, SUBFRAME_LEN); 1027 } else { 1028 audio = vector_ptr - LPC_ORDER; 1029 } 1030 1031 /* Save the excitation for the next frame */ 1032 memcpy(p->prev_excitation, p->excitation + FRAME_LEN, 1033 PITCH_MAX * sizeof(*p->excitation)); 1034 } else { 1035 p->interp_gain = (p->interp_gain * 3 + 2) >> 2; 1036 if (p->erased_frames == 3) { 1037 /* Mute output */ 1038 memset(p->excitation, 0, 1039 (FRAME_LEN + PITCH_MAX) * sizeof(*p->excitation)); 1040 memset(p->prev_excitation, 0, 1041 PITCH_MAX * sizeof(*p->excitation)); 1042 memset(frame->data[0], 0, 1043 (FRAME_LEN + LPC_ORDER) * sizeof(int16_t)); 1044 } else { 1045 int16_t *buf = p->audio + LPC_ORDER; 1046 1047 /* Regenerate frame */ 1048 residual_interp(p->excitation, buf, p->interp_index, 1049 p->interp_gain, &p->random_seed); 1050 1051 /* Save the excitation for the next frame */ 1052 memcpy(p->prev_excitation, buf + (FRAME_LEN - PITCH_MAX), 1053 PITCH_MAX * sizeof(*p->excitation)); 1054 } 1055 } 1056 p->cng_random_seed = CNG_RANDOM_SEED; 1057 } else { 1058 if (p->cur_frame_type == SID_FRAME) { 1059 p->sid_gain = sid_gain_to_lsp_index(p->subframe[0].amp_index); 1060 ff_g723_1_inverse_quant(p->sid_lsp, p->prev_lsp, p->lsp_index, 0); 1061 } else if (p->past_frame_type == ACTIVE_FRAME) { 1062 p->sid_gain = estimate_sid_gain(p); 1063 } 1064 1065 if (p->past_frame_type == ACTIVE_FRAME) 1066 p->cur_gain = p->sid_gain; 1067 else 1068 p->cur_gain = (p->cur_gain * 7 + p->sid_gain) >> 3; 1069 generate_noise(p); 1070 ff_g723_1_lsp_interpolate(lpc, p->sid_lsp, p->prev_lsp); 1071 /* Save the lsp_vector for the next frame */ 1072 memcpy(p->prev_lsp, p->sid_lsp, LPC_ORDER * sizeof(*p->prev_lsp)); 1073 } 1074 1075 p->past_frame_type = p->cur_frame_type; 1076 1077 memcpy(p->audio, p->synth_mem, LPC_ORDER * sizeof(*p->audio)); 1078 for (i = LPC_ORDER, j = 0; j < SUBFRAMES; i += SUBFRAME_LEN, j++) 1079 ff_celp_lp_synthesis_filter(p->audio + i, &lpc[j * LPC_ORDER], 1080 audio + i, SUBFRAME_LEN, LPC_ORDER, 1081 0, 1, 1 << 12); 1082 memcpy(p->synth_mem, p->audio + FRAME_LEN, LPC_ORDER * sizeof(*p->audio)); 1083 1084 if (s->postfilter) { 1085 formant_postfilter(p, lpc, p->audio, out); 1086 } else { // if output is not postfiltered it should be scaled by 2 1087 for (i = 0; i < FRAME_LEN; i++) 1088 out[i] = av_clip_int16(2 * p->audio[LPC_ORDER + i]); 1089 } 1090 } 1091 1092 *got_frame_ptr = 1; 1093 1094 return frame_size[dec_mode] * channels; 1095} 1096 1097#define OFFSET(x) offsetof(G723_1_Context, x) 1098#define AD AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_DECODING_PARAM 1099 1100static const AVOption options[] = { 1101 { "postfilter", "enable postfilter", OFFSET(postfilter), AV_OPT_TYPE_BOOL, 1102 { .i64 = 1 }, 0, 1, AD }, 1103 { NULL } 1104}; 1105 1106 1107static const AVClass g723_1dec_class = { 1108 .class_name = "G.723.1 decoder", 1109 .item_name = av_default_item_name, 1110 .option = options, 1111 .version = LIBAVUTIL_VERSION_INT, 1112}; 1113 1114const FFCodec ff_g723_1_decoder = { 1115 .p.name = "g723_1", 1116 .p.long_name = NULL_IF_CONFIG_SMALL("G.723.1"), 1117 .p.type = AVMEDIA_TYPE_AUDIO, 1118 .p.id = AV_CODEC_ID_G723_1, 1119 .priv_data_size = sizeof(G723_1_Context), 1120 .init = g723_1_decode_init, 1121 FF_CODEC_DECODE_CB(g723_1_decode_frame), 1122 .p.capabilities = AV_CODEC_CAP_SUBFRAMES | AV_CODEC_CAP_DR1, 1123 .p.priv_class = &g723_1dec_class, 1124 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE, 1125}; 1126