1/* Copyright (C) 2007 Hong Zhiqian */ 2/** 3 @file mdf_tm.h 4 @author Hong Zhiqian 5 @brief Various compatibility routines for Speex (TriMedia version) 6*/ 7/* 8 Redistribution and use in source and binary forms, with or without 9 modification, are permitted provided that the following conditions 10 are met: 11 12 - Redistributions of source code must retain the above copyright 13 notice, this list of conditions and the following disclaimer. 14 15 - Redistributions in binary form must reproduce the above copyright 16 notice, this list of conditions and the following disclaimer in the 17 documentation and/or other materials provided with the distribution. 18 19 - Neither the name of the Xiph.org Foundation nor the names of its 20 contributors may be used to endorse or promote products derived from 21 this software without specific prior written permission. 22 23 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR 27 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 28 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 29 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 30 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 31 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 32 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34*/ 35#include <ops/custom_defs.h> 36#include "profile_tm.h" 37 38// shifted power spectrum to fftwrap.c so that optimisation can be shared between mdf.c and preprocess.c 39#define OVERRIDE_POWER_SPECTRUM 40 41#ifdef FIXED_POINT 42 43#else 44 45#define OVERRIDE_FILTER_DC_NOTCH16 46void filter_dc_notch16( 47 const spx_int16_t * restrict in, 48 float radius, 49 float * restrict out, 50 int len, 51 float * restrict mem 52) 53{ 54 register int i; 55 register float den2, r1; 56 register float mem0, mem1; 57 58 FILTERDCNOTCH16_START(); 59 60 r1 = 1 - radius; 61 den2 = (radius * radius) + (0.7 * r1 * r1); 62 mem0 = mem[0]; 63 mem1 = mem[1]; 64 65#if (TM_UNROLL && TM_UNROLL_FILTERDCNOTCH16) 66#pragma TCS_unroll=4 67#pragma TCS_unrollexact=1 68#endif 69 for ( i=0 ; i<len ; ++i ) 70 { 71 register float vin = in[i]; 72 register float vout = mem0 + vin; 73 register float rvout = radius * vout; 74 75 mem0 = mem1 + 2 * (-vin + rvout); 76 mem1 = vin - (den2 * vout); 77 78 out[i] = rvout; 79 } 80#if (TM_UNROLL && TM_UNROLL_FILTERDCNOTCH16) 81#pragma TCS_unrollexact=0 82#pragma TCS_unroll=0 83#endif 84 85 mem[0] = mem0; 86 mem[1] = mem1; 87 88 FILTERDCNOTCH16_STOP(); 89} 90 91#define OVERRIDE_MDF_INNER_PROD 92float mdf_inner_prod( 93 const float * restrict x, 94 const float * restrict y, 95 int len 96) 97{ 98 register float sum = 0; 99 100 MDFINNERPROD_START(); 101 102 len >>= 1; 103 104#if (TM_UNROLL && TM_UNROLL_MDFINNERPRODUCT) 105#pragma TCS_unroll=4 106#pragma TCS_unrollexact=1 107#endif 108 while(len--) 109 { 110 register float acc0, acc1; 111 112 acc0 = (*x++) * (*y++); 113 acc1 = (*x++) * (*y++); 114 115 sum += acc0 + acc1; 116 } 117#if (TM_UNROLL && TM_UNROLL_MDFINNERPRODUCT) 118#pragma TCS_unrollexact=0 119#pragma TCS_unroll=0 120#endif 121 122 MDFINNERPROD_STOP(); 123 124 return sum; 125} 126 127#define OVERRIDE_SPECTRAL_MUL_ACCUM 128void spectral_mul_accum( 129 const float * restrict X, 130 const float * restrict Y, 131 float * restrict acc, 132 int N, int M 133) 134{ 135 register int i, j; 136 register float Xi, Yi, Xii, Yii; 137 register int _N; 138 139 SPECTRALMULACCUM_START(); 140 141 acc[0] = X[0] * Y[0]; 142 _N = N-1; 143 144 for ( i=1 ; i<_N ; i+=2 ) 145 { 146 Xi = X[i]; 147 Yi = Y[i]; 148 Xii = X[i+1]; 149 Yii = Y[i+1]; 150 151 acc[i] = (Xi * Yi - Xii * Yii); 152 acc[i+1]= (Xii * Yi + Xi * Yii); 153 } 154 155 acc[_N] = X[_N] * Y[_N]; 156 157 for ( j=1,X+=N,Y+=N ; j<M ; j++ ) 158 { 159 acc[0] += X[0] * Y[0]; 160 161 for ( i=1 ; i<N-1 ; i+=2 ) 162 { 163 Xi = X[i]; 164 Yi = Y[i]; 165 Xii = X[i+1]; 166 Yii = Y[i+1]; 167 168 acc[i] += (Xi * Yi - Xii * Yii); 169 acc[i+1]+= (Xii * Yi + Xi * Yii); 170 } 171 172 acc[_N] += X[_N] * Y[_N]; 173 X += N; 174 Y += N; 175 } 176 177 SPECTRALMULACCUM_STOP(); 178} 179 180#define OVERRIDE_WEIGHTED_SPECTRAL_MUL_CONJ 181void weighted_spectral_mul_conj( 182 const float * restrict w, 183 const float p, 184 const float * restrict X, 185 const float * restrict Y, 186 float * restrict prod, 187 int N 188) 189{ 190 register int i, j; 191 register int _N; 192 193 WEIGHTEDSPECTRALMULCONJ_START(); 194 195 prod[0] = p * w[0] * X[0] * Y[0]; 196 _N = N-1; 197 198 for (i=1,j=1;i<_N;i+=2,j++) 199 { 200 register float W; 201 register float Xi, Yi, Xii, Yii; 202 203 Xi = X[i]; 204 Yi = Y[i]; 205 Xii = X[i+1]; 206 Yii = Y[i+1]; 207 W = p * w[j]; 208 209 210 prod[i] = W * (Xi * Yi + Xii * Yii); 211 prod[i+1]= W * (Xi * Yii - Xii * Yi); 212 } 213 214 prod[_N] = p * w[j] * X[_N] * Y[_N]; 215 216 WEIGHTEDSPECTRALMULCONJ_STOP(); 217} 218 219#define OVERRIDE_MDF_ADJUST_PROP 220void mdf_adjust_prop( 221 const float * restrict W, 222 int N, 223 int M, 224 float * restrict prop 225) 226{ 227 register int i, j; 228 register float max_sum = 1; 229 register float prop_sum = 1; 230 231 MDFADJUSTPROP_START(); 232 233 for ( i=0 ; i<M ; ++i ) 234 { 235 register float tmp = 1; 236 register int k = i * N; 237 register int l = k + N; 238 register float propi; 239 240#if (TM_UNROLL && TM_UNROLL_MDFADJUSTPROP) 241#pragma TCS_unroll=4 242#pragma TCS_unrollexact=1 243#endif 244 for ( j=k ; j<l ; ++j ) 245 { 246 register float wi = W[j]; 247 248 tmp += wi * wi; 249 } 250#if (TM_UNROLL && TM_UNROLL_MDFADJUSTPROP) 251#pragma TCS_unrollexact=0 252#pragma TCS_unroll=0 253#endif 254 255 propi = spx_sqrt(tmp); 256 prop[i]= propi; 257 max_sum= fmux(propi > max_sum, propi, max_sum); 258 } 259 260 for ( i=0 ; i<M ; ++i ) 261 { 262 register float propi = prop[i]; 263 264 propi += .1f * max_sum; 265 prop_sum += propi; 266 prop[i] = propi; 267 } 268 269 prop_sum = 0.99f / prop_sum; 270 for ( i=0 ; i<M ; ++i ) 271 { prop[i] = prop_sum * prop[i]; 272 } 273 274 MDFADJUSTPROP_STOP(); 275} 276 277 278#define OVERRIDE_SPEEX_ECHO_GET_RESIDUAL 279void speex_echo_get_residual( 280 SpeexEchoState * restrict st, 281 float * restrict residual_echo, 282 int len 283) 284{ 285 register int i; 286 register float leak2, leake; 287 register int N; 288 register float * restrict window; 289 register float * restrict last_y; 290 register float * restrict y; 291 292 SPEEXECHOGETRESIDUAL_START(); 293 294 window = st->window; 295 last_y = st->last_y; 296 y = st->y; 297 N = st->window_size; 298 299 300#if (TM_UNROLL && TM_UNROLL_SPEEXECHOGETRESIDUAL) 301#pragma TCS_unroll=4 302#pragma TCS_unrollexact=1 303#endif 304 for (i=0;i<N;i++) 305 { y[i] = window[i] * last_y[i]; 306 } 307#if (TM_UNROLL && TM_UNROLL_SPEEXECHOGETRESIDUAL) 308#pragma TCS_unrollexact=0 309#pragma TCS_unroll=0 310#endif 311 312 spx_fft(st->fft_table, st->y, st->Y); 313 power_spectrum(st->Y, residual_echo, N); 314 315 leake = st->leak_estimate; 316 leak2 = fmux(leake > .5, 1, 2 * leake); 317 N = st->frame_size; 318 319#if (TM_UNROLL && TM_UNROLL_SPEEXECHOGETRESIDUAL) 320#pragma TCS_unroll=4 321#pragma TCS_unrollexact=1 322#endif 323 for ( i=0 ; i<N ; ++i ) 324 { residual_echo[i] *= leak2; 325 } 326#if (TM_UNROLL && TM_UNROLL_SPEEXECHOGETRESIDUAL) 327#pragma TCS_unrollexact=0 328#pragma TCS_unroll=0 329#endif 330 331 residual_echo[N] *= leak2; 332 333#ifndef NO_REMARK 334 (void)len; 335#endif 336 337 SPEEXECHOGETRESIDUAL_STOP(); 338} 339#endif 340 341 342void mdf_preemph( 343 SpeexEchoState * restrict st, 344 spx_word16_t * restrict x, 345 const spx_int16_t * restrict far_end, 346 int framesize 347) 348{ 349 register spx_word16_t preemph = st->preemph; 350 register spx_word16_t memX = st->memX; 351 register spx_word16_t memD = st->memD; 352 register spx_word16_t * restrict input = st->input; 353 register int i; 354#ifdef FIXED_POINT 355 register int saturated = st->saturated; 356#endif 357 358#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION) 359#pragma TCS_unroll=4 360#pragma TCS_unrollexact=1 361#endif 362 for ( i=0 ; i<framesize ; ++i ) 363 { 364 register spx_int16_t far_endi = far_end[i]; 365 register spx_word32_t tmp32; 366 register spx_word16_t inputi = input[i]; 367 368 tmp32 = SUB32(EXTEND32(far_endi), EXTEND32(MULT16_16_P15(preemph,memX))); 369 370#ifdef FIXED_POINT 371 saturated = mux(iabs(tmp32) > 32767, M+1, saturated); 372 tmp32 = iclipi(tmp32,32767); 373#endif 374 375 x[i] = EXTRACT16(tmp32); 376 memX = far_endi; 377 tmp32 = SUB32(EXTEND32(inputi), EXTEND32(MULT16_16_P15(preemph, memD))); 378 379#ifdef FIXED_POINT 380 saturated = mux( ((tmp32 > 32767) && (saturated == 0)), 1, 381 mux( ((tmp32 <-32767) && (saturated == 0)), 1, saturated )); 382 tmp32 = iclipi(tmp32,32767); 383#endif 384 memD = inputi; 385 input[i] = tmp32; 386 } 387#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION) 388#pragma TCS_unrollexact=0 389#pragma TCS_unroll=0 390#endif 391 392 st->memD = memD; 393 st->memX = memX; 394 395#ifdef FIXED_POINT 396 st->saturated = saturated; 397#endif 398} 399 400void mdf_sub( 401 spx_word16_t * restrict dest, 402 const spx_word16_t * restrict src1, 403 const spx_word16_t * restrict src2, 404 int framesize 405) 406{ 407 register int i; 408 409#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION) 410#pragma TCS_unroll=4 411#pragma TCS_unrollexact=1 412#endif 413 414#ifdef FIXED_POINT 415 for ( i=0,framesize<<=1 ; i<framesize ; i+=4 ) 416 { register int src1i, src2i, desti; 417 418 src1i = ld32d(src1,i); 419 src2i = ld32d(src2,i); 420 desti = dspidualsub(src1i,src2i); 421 st32d(i, dest, desti); 422 } 423#else 424 for ( i=0 ; i<framesize ; ++i ) 425 { dest[i] = src1[i] - src2[i]; 426 } 427#endif 428 429#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION) 430#pragma TCS_unrollexact=0 431#pragma TCS_unroll=0 432#endif 433} 434 435void mdf_sub_int( 436 spx_word16_t * restrict dest, 437 const spx_int16_t * restrict src1, 438 const spx_int16_t * restrict src2, 439 int framesize 440) 441{ 442 register int i, j; 443 444#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION) 445#pragma TCS_unroll=4 446#pragma TCS_unrollexact=1 447#endif 448 449#ifdef FIXED_POINT 450 for ( i=0,framesize<<=1 ; i<framesize ; i+=4 ) 451 { register int src1i, src2i, desti; 452 453 src1i = ld32d(src1,i); 454 src2i = ld32d(src2,i); 455 desti = dspidualsub(src1i,src2i); 456 st32d(i, dest, desti); 457 } 458#else 459 for ( i=0,j=0 ; i<framesize ; i+=2,++j ) 460 { register int src1i, src2i, desti; 461 462 463 src1i = ld32d(src1,j); 464 src2i = ld32d(src2,j); 465 desti = dspidualsub(src1i,src2i); 466 467 dest[i] = sex16(desti); 468 dest[i+1] = asri(16,desti); 469 } 470#endif 471 472#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION) 473#pragma TCS_unrollexact=0 474#pragma TCS_unroll=0 475#endif 476} 477 478void mdf_compute_weight_gradient( 479 SpeexEchoState * restrict st, 480 spx_word16_t * restrict X, 481 int N, 482 int M 483) 484{ 485 register int i, j; 486 register spx_word32_t * restrict PHI = st->PHI; 487 488 for (j=M-1;j>=0;j--) 489 { 490 register spx_word32_t * restrict W = &(st->W[j*N]); 491 492 weighted_spectral_mul_conj( 493 st->power_1, 494 FLOAT_SHL(PSEUDOFLOAT(st->prop[j]),-15), 495 &X[(j+1)*N], 496 st->E, 497 st->PHI, 498 N); 499#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION) 500#pragma TCS_unroll=4 501#pragma TCS_unrollexact=1 502#endif 503 for (i=0;i<N;i++) 504 { W[i] = ADD32(W[i],PHI[i]); 505 } 506#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION) 507#pragma TCS_unrollexact=0 508#pragma TCS_unroll=0 509#endif 510 } 511} 512 513void mdf_update_weight( 514 SpeexEchoState * restrict st, 515 int N, 516 int M, 517 int framesize 518) 519{ 520 register int j; 521 register int cancel_count = st->cancel_count; 522 register spx_word16_t * restrict wtmp = st->wtmp; 523#ifdef FIXED_POINT 524 register spx_word16_t * restrict wtmp2 = st->wtmp2; 525 register int i; 526#endif 527 528 for ( j=0 ; j<M ; j++ ) 529 { 530 register spx_word32_t * restrict W = &(st->W[j*N]); 531 532 if (j==0 || cancel_count%(M-1) == j-1) 533 { 534#ifdef FIXED_POINT 535 536#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION) 537#pragma TCS_unroll=4 538#pragma TCS_unrollexact=1 539#endif 540 for ( i=0 ; i<N ; i++ ) 541 wtmp2[i] = EXTRACT16(PSHR32(W[i],NORMALIZE_SCALEDOWN+16)); 542#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION) 543#pragma TCS_unrollexact=0 544#pragma TCS_unroll=0 545#endif 546 spx_ifft(st->fft_table, wtmp2, wtmp); 547 memset(wtmp, 0, framesize * sizeof(spx_word16_t)); 548 549#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION) 550#pragma TCS_unroll=4 551#pragma TCS_unrollexact=1 552#endif 553 for (j=framesize; j<N ; ++j) 554 { wtmp[j]=SHL16(wtmp[j],NORMALIZE_SCALEUP); 555 } 556#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION) 557#pragma TCS_unrollexact=0 558#pragma TCS_unroll=0 559#endif 560 561 spx_fft(st->fft_table, wtmp, wtmp2); 562 563#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION) 564#pragma TCS_unroll=4 565#pragma TCS_unrollexact=1 566#endif 567 for (i=0;i<N;i++) 568 { W[i] -= SHL32(EXTEND32(wtmp2[i]),16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1); 569 } 570#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION) 571#pragma TCS_unrollexact=0 572#pragma TCS_unroll=0 573#endif 574 575#else 576 spx_ifft(st->fft_table, W, wtmp); 577 memset(&wtmp[framesize], 0, (N-framesize) * sizeof(spx_word16_t)); 578 spx_fft(st->fft_table, wtmp, W); 579#endif 580 } 581 } 582} 583 584#ifdef TWO_PATH 585// first four parameters is passed by registers 586// generate faster performance with 4 parameters functions 587spx_word32_t mdf_update_foreground( 588 SpeexEchoState * restrict st, 589 spx_word32_t Dbf, 590 spx_word32_t Sff, 591 spx_word32_t See 592) 593{ 594 register spx_word32_t Davg1 = st->Davg1; 595 register spx_word32_t Davg2 = st->Davg2; 596 register spx_word32_t Dvar1 = st->Dvar1; 597 register spx_word32_t Dvar2 = st->Dvar2; 598 register spx_word16_t * restrict input = st->input; 599 register int framesize = st->frame_size; 600 register spx_word16_t * restrict xx = st->x + framesize; 601 register spx_word16_t * restrict y = st->y + framesize; 602 register spx_word16_t * restrict ee = st->e + framesize; 603 register int update_foreground; 604 register int i; 605 register int N = st->window_size; 606 register int M = st->M; 607 608#ifdef FIXED_POINT 609 register spx_word32_t sc0 = SUB32(Sff,See); 610 register spx_float_t sc1 = FLOAT_MUL32U(Sff,Dbf); 611 612 Davg1 = ADD32(MULT16_32_Q15(QCONST16(.6f,15),Davg1), MULT16_32_Q15(QCONST16(.4f,15),sc0)); 613 Davg2 = ADD32(MULT16_32_Q15(QCONST16(.85f,15),Davg2), MULT16_32_Q15(QCONST16(.15f,15),sc0)); 614 Dvar1 = FLOAT_ADD( 615 FLOAT_MULT(VAR1_SMOOTH,Dvar1), 616 FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.4f,15),Sff), 617 MULT16_32_Q15(QCONST16(.4f,15),Dbf))); 618 Dvar2 = FLOAT_ADD( 619 FLOAT_MULT(VAR2_SMOOTH,Dvar2), 620 FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.15f,15),Sff), 621 MULT16_32_Q15(QCONST16(.15f,15),Dbf))); 622#else 623 register spx_word32_t sc0 = Sff - See; 624 register spx_word32_t sc1 = Sff * Dbf; 625 626 Davg1 = .6*Davg1 + .4*sc0; 627 Davg2 = .85*Davg2 + .15*sc0; 628 Dvar1 = VAR1_SMOOTH*Dvar1 + .16*sc1; 629 Dvar2 = VAR2_SMOOTH*Dvar2 + .0225*sc1; 630#endif 631 632 update_foreground = 633 mux( FLOAT_GT(FLOAT_MUL32U(sc0, VABS(sc0)), sc1), 1, 634 mux( FLOAT_GT(FLOAT_MUL32U(Davg1, VABS(Davg1)), FLOAT_MULT(VAR1_UPDATE,(Dvar1))), 1, 635 mux( FLOAT_GT(FLOAT_MUL32U(Davg2, VABS(Davg2)), FLOAT_MULT(VAR2_UPDATE,(Dvar2))), 1, 0))); 636 637 if ( update_foreground ) 638 { 639 register spx_word16_t * restrict windowf = st->window + framesize; 640 register spx_word16_t * restrict window = st->window; 641 642 st->Davg1 = st->Davg2 = 0; 643 st->Dvar1 = st->Dvar2 = FLOAT_ZERO; 644 645 memcpy(st->foreground, st->W, N*M*sizeof(spx_word32_t)); 646 647#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION) 648#pragma TCS_unroll=4 649#pragma TCS_unrollexact=1 650#endif 651 for ( i=0 ; i<framesize ; ++i) 652 { register spx_word16_t wi = window[i]; 653 register spx_word16_t wfi = windowf[i]; 654 register spx_word16_t ei = ee[i]; 655 register spx_word16_t yi = y[i]; 656 657 ee[i] = MULT16_16_Q15(wfi,ei) + MULT16_16_Q15(wi,yi); 658 } 659#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION) 660#pragma TCS_unrollexact=0 661#pragma TCS_unroll=0 662#endif 663 664 } else 665 { 666 register int reset_background; 667 668 reset_background = 669 mux( FLOAT_GT(FLOAT_MUL32U(-(sc0),VABS(sc0)), FLOAT_MULT(VAR_BACKTRACK,sc1)), 1, 670 mux( FLOAT_GT(FLOAT_MUL32U(-(Davg1), VABS(Davg1)), FLOAT_MULT(VAR_BACKTRACK,Dvar1)), 1, 671 mux( FLOAT_GT(FLOAT_MUL32U(-(Davg2), VABS(Davg2)), FLOAT_MULT(VAR_BACKTRACK,Dvar2)), 1, 0))); 672 673 if ( reset_background ) 674 { 675 memcpy(st->W, st->foreground, N*M*sizeof(spx_word32_t)); 676 memcpy(y, ee, framesize * sizeof(spx_word16_t)); 677 mdf_sub(xx,input,y,framesize); 678 See = Sff; 679 st->Davg1 = st->Davg2 = 0; 680 st->Dvar1 = st->Dvar2 = FLOAT_ZERO; 681 } else 682 { 683 st->Davg1 = Davg1; 684 st->Davg2 = Davg2; 685 st->Dvar1 = Dvar1; 686 st->Dvar2 = Dvar2; 687 } 688 } 689 690 return See; 691} 692#endif 693 694void mdf_compute_error_signal( 695 SpeexEchoState * restrict st, 696 const spx_int16_t * restrict in, 697 spx_int16_t * restrict out, 698 int framesize 699) 700{ 701 register spx_word16_t preemph = st->preemph; 702 register spx_word16_t memE = st->memE; 703 register int saturated = st->saturated; 704 register spx_word16_t * restrict e = st->e; 705 register spx_word16_t * restrict ee = st->e + framesize; 706 register spx_word16_t * restrict input = st->input; 707 register spx_word16_t * restrict xx = st->x + framesize; 708 register int i; 709 710#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION) 711#pragma TCS_unroll=4 712#pragma TCS_unrollexact=1 713#endif 714 for ( i=0 ; i<framesize ; ++i ) 715 { 716 register spx_word32_t tmp_out; 717 register spx_int16_t ini = in[i]; 718 register int flg; 719 720#ifdef FIXED_POINT 721 722#ifdef TWO_PATH 723 tmp_out = SUB32(EXTEND32(input[i]), EXTEND32(ee[i])); 724 tmp_out = iclipi(tmp_out,32767); 725#else 726 tmp_out = SUB32(EXTEND32(input[i]), EXTEND32(y[i])); 727 tmp_out = iclipi(tmp_out,32767); 728#endif 729 730#else 731#ifdef TWO_PATH 732 tmp_out = SUB32(EXTEND32(input[i]), EXTEND32(ee[i])); 733#else 734 tmp_out = SUB32(EXTEND32(input[i]), EXTEND32(y[i])); 735#endif 736 tmp_out = 737 fmux( tmp_out > 32767, 32767, 738 fmux( tmp_out < -32768, -32768, tmp_out)); 739#endif 740 741 tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(preemph,memE))); 742 flg = iabs(ini) >= 32000; 743 tmp_out = VMUX( flg, 0, tmp_out); 744 saturated = mux( flg && (saturated == 0), 1, saturated); 745 746 out[i] = (spx_int16_t)tmp_out; 747 memE = tmp_out; 748 } 749#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION) 750#pragma TCS_unrollexact=0 751#pragma TCS_unroll=0 752#endif 753 754 st->memE = memE; 755 st->saturated = saturated; 756 memset(e, 0, framesize * sizeof(spx_word16_t)); 757 memcpy(ee, xx, framesize * sizeof(spx_word16_t)); 758} 759 760inline int mdf_check( 761 SpeexEchoState * restrict st, 762 spx_int16_t * out, 763 spx_word32_t Syy, 764 spx_word32_t Sxx, 765 spx_word32_t See, 766 spx_word32_t Sff, 767 spx_word32_t Sdd 768) 769{ 770 register int N = st->window_size; 771 register spx_word32_t N1e9 = N * 1e9; 772 register int screwed_up = st->screwed_up; 773 register int framesize = st->frame_size; 774 775 if (!(Syy>=0 && Sxx>=0 && See >= 0) 776#ifndef FIXED_POINT 777 || !(Sff < N1e9 && Syy < N1e9 && Sxx < N1e9 ) 778#endif 779 ) 780 { 781 screwed_up += 50; 782 memset(out, 0, framesize * sizeof(spx_int16_t)); 783 784 } else 785 { screwed_up = mux( SHR32(Sff, 2) > ADD32(Sdd, SHR32(MULT16_16(N, 10000),6)), screwed_up+1, 0); 786 } 787 788 st->screwed_up = screwed_up; 789 790 return screwed_up; 791} 792 793void mdf_smooth( 794 spx_word32_t * restrict power, 795 spx_word32_t * restrict Xf, 796 int framesize, 797 int M 798) 799{ 800 register spx_word16_t ss, ss_1, pf, xff; 801 register int j; 802 803#ifdef FIXED_POINT 804 ss=DIV32_16(11469,M); 805 ss_1 = SUB16(32767,ss); 806#else 807 ss=.35/M; 808 ss_1 = 1-ss; 809#endif 810 811#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION) 812#pragma TCS_unroll=4 813#pragma TCS_unrollexact=1 814#endif 815 for ( j=0 ; j<framesize ; ++j ) 816 { register spx_word32_t pi = power[j]; 817 register spx_word32_t xfi = Xf[j]; 818 819 power[j] = MULT16_32_Q15(ss_1,pi) + 1 + MULT16_32_Q15(ss,xfi); 820 } 821#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION) 822#pragma TCS_unrollexact=0 823#pragma TCS_unroll=0 824#endif 825 826 pf = power[framesize]; 827 xff = Xf[framesize]; 828 power[framesize] = MULT16_32_Q15(ss_1,pf) + 1 + MULT16_32_Q15(ss,xff); 829} 830 831void mdf_compute_filtered_spectra_crosscorrelations( 832 SpeexEchoState * restrict st, 833 spx_word32_t Syy, 834 spx_word32_t See, 835 int framesize 836) 837{ 838 register spx_float_t Pey = FLOAT_ONE; 839 register spx_float_t Pyy = FLOAT_ONE; 840 register spx_word16_t spec_average = st->spec_average; 841 register spx_word32_t * restrict pRf = st->Rf; 842 register spx_word32_t * restrict pYf = st->Yf; 843 register spx_word32_t * restrict pEh = st->Eh; 844 register spx_word32_t * restrict pYh = st->Yh; 845 register spx_word16_t beta0 = st->beta0; 846 register spx_word16_t beta_max = st->beta_max; 847 register spx_float_t alpha, alpha_1; 848 register spx_word32_t tmp32, tmpx; 849 register spx_float_t sPey = st->Pey; 850 register spx_float_t sPyy = st->Pyy; 851 register spx_float_t tmp; 852 register spx_word16_t leak_estimate; 853 register int j; 854 register spx_float_t Eh, Yh; 855 register spx_word32_t _Ehj, _Rfj, _Yfj, _Yhj; 856 857#ifdef FIXED_POINT 858 register spx_word16_t spec_average1 = SUB16(32767,spec_average); 859#else 860 register spx_word16_t spec_average1 = 1 - spec_average; 861#endif 862 863#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION) 864#pragma TCS_unroll=4 865#pragma TCS_unrollexact=1 866#endif 867 for (j=framesize; j>0 ; --j) 868 { 869 _Ehj = pEh[j]; 870 _Rfj = pRf[j]; 871 _Yfj = pYf[j]; 872 _Yhj = pYh[j]; 873 874 Eh = PSEUDOFLOAT(_Rfj - _Ehj); 875 Yh = PSEUDOFLOAT(_Yfj - _Yhj); 876 877 Pey = FLOAT_ADD(Pey,FLOAT_MULT(Eh,Yh)); 878 Pyy = FLOAT_ADD(Pyy,FLOAT_MULT(Yh,Yh)); 879 880 pEh[j] = MAC16_32_Q15(MULT16_32_Q15(spec_average1, _Ehj), spec_average, _Rfj); 881 pYh[j] = MAC16_32_Q15(MULT16_32_Q15(spec_average1, _Yhj), spec_average, _Yfj); 882 } 883#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION) 884#pragma TCS_unrollexact=0 885#pragma TCS_unroll=0 886#endif 887 _Ehj = pEh[0]; 888 _Rfj = pRf[0]; 889 _Yfj = pYf[0]; 890 _Yhj = pYh[0]; 891 892 Eh = PSEUDOFLOAT(_Rfj - _Ehj); 893 Yh = PSEUDOFLOAT(_Yfj - _Yhj); 894 895 Pey = FLOAT_ADD(Pey,FLOAT_MULT(Eh,Yh)); 896 Pyy = FLOAT_ADD(Pyy,FLOAT_MULT(Yh,Yh)); 897 898 pEh[0] = MAC16_32_Q15(MULT16_32_Q15(spec_average1, _Ehj), spec_average, _Rfj); 899 pYh[0] = MAC16_32_Q15(MULT16_32_Q15(spec_average1, _Yhj), spec_average, _Yfj); 900 901 Pyy = FLOAT_SQRT(Pyy); 902 Pey = FLOAT_DIVU(Pey,Pyy); 903 904 tmp32 = MULT16_32_Q15(beta0,Syy); 905 tmpx = MULT16_32_Q15(beta_max,See); 906 tmp32 = VMUX(tmp32 > tmpx, tmpx, tmp32); 907 alpha = FLOAT_DIV32(tmp32, See); 908 alpha_1 = FLOAT_SUB(FLOAT_ONE, alpha); 909 910 sPey = FLOAT_ADD(FLOAT_MULT(alpha_1,sPey) , FLOAT_MULT(alpha,Pey)); 911 sPyy = FLOAT_ADD(FLOAT_MULT(alpha_1,sPyy) , FLOAT_MULT(alpha,Pyy)); 912 tmp = FLOAT_MULT(MIN_LEAK,sPyy); 913 914#ifndef FIXED_POINT 915 sPyy = VMUX(FLOAT_LT(sPyy, FLOAT_ONE), FLOAT_ONE, sPyy); 916 sPey = VMUX(FLOAT_LT(sPey, tmp), tmp, sPey); 917 sPey = VMUX(FLOAT_LT(sPey, sPyy), sPey, sPyy); 918#else 919 sPyy = FLOAT_LT(sPyy, FLOAT_ONE) ? FLOAT_ONE : sPyy; 920 sPey = FLOAT_LT(sPey, tmp) ? tmp : sPey; 921 sPey = FLOAT_LT(sPey, sPyy) ? sPey : sPyy; 922#endif 923 924 leak_estimate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIVU(sPey, sPyy),14)); 925 926 leak_estimate = VMUX( leak_estimate > 16383, 32767, SHL16(leak_estimate,1)); 927 st->Pey = sPey; 928 st->Pyy = sPyy; 929 st->leak_estimate = leak_estimate; 930} 931 932inline spx_word16_t mdf_compute_RER( 933 spx_word32_t See, 934 spx_word32_t Syy, 935 spx_word32_t Sey, 936 spx_word32_t Sxx, 937 spx_word16_t leake 938) 939{ 940 register spx_word16_t RER; 941 942#ifdef FIXED_POINT 943 register spx_word32_t tmp32; 944 register spx_word32_t tmp; 945 spx_float_t bound = PSEUDOFLOAT(Sey); 946 947 tmp32 = MULT16_32_Q15(leake,Syy); 948 tmp32 = ADD32(SHR32(Sxx,13), ADD32(tmp32, SHL32(tmp32,1))); 949 950 bound = FLOAT_DIVU(FLOAT_MULT(bound, bound), PSEUDOFLOAT(ADD32(1,Syy))); 951 tmp = FLOAT_EXTRACT32(bound); 952 tmp32 = imux( FLOAT_GT(bound, PSEUDOFLOAT(See)), See, 953 imux( tmp32 < tmp, tmp, tmp32)); 954 955 tmp = SHR32(See,1); 956 tmp32 = imux(tmp32 > tmp, tmp, tmp32); 957 RER = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32,See),15)); 958#else 959 register spx_word32_t r0; 960 961 r0 = (Sey * Sey)/(1 + See * Syy); 962 963 RER = (.0001*Sxx + 3.* MULT16_32_Q15(leake,Syy)) / See; 964 RER = fmux( RER < r0, r0, RER); 965 RER = fmux( RER > .5, .5, RER); 966#endif 967 968 return RER; 969} 970 971void mdf_adapt( 972 SpeexEchoState * restrict st, 973 spx_word16_t RER, 974 spx_word32_t Syy, 975 spx_word32_t See, 976 spx_word32_t Sxx 977) 978{ 979 register spx_float_t * restrict power_1 = st->power_1; 980 register spx_word32_t * restrict power = st->power; 981 register int adapted = st->adapted; 982 register spx_word32_t sum_adapt = st->sum_adapt; 983 register spx_word16_t leake = st->leak_estimate; 984 register int framesize = st->frame_size; 985 register int i; 986 register int M = st->M; 987 988 adapted = mux( !adapted && sum_adapt > QCONST32(M,15) && 989 MULT16_32_Q15(leake,Syy) > MULT16_32_Q15(QCONST16(.03f,15),Syy), 1, adapted); 990 991 if ( adapted ) 992 { register spx_word32_t * restrict Yf = st->Yf; 993 register spx_word32_t * restrict Rf = st->Rf; 994 register spx_word32_t r, e, e2; 995 996#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION) 997#pragma TCS_unroll=4 998#pragma TCS_unrollexact=1 999#endif 1000 for ( i=0 ; i<framesize ; ++i ) 1001 { 1002 r = SHL32(Yf[i],3); 1003 r = MULT16_32_Q15(leake,r); 1004 e = SHL32(Rf[i],3)+1; 1005 1006#ifdef FIXED_POINT 1007 e2 = SHR32(e,1); 1008 r = mux( r > e2, e2, r); 1009#else 1010 e2 = e * .5; 1011 r = fmux( r > e2, e2, r); 1012#endif 1013 1014 r = MULT16_32_Q15(QCONST16(.7,15),r) + 1015 MULT16_32_Q15(QCONST16(.3,15),(spx_word32_t)(MULT16_32_Q15(RER,e))); 1016 1017 power_1[i] = FLOAT_SHL(FLOAT_DIV32_FLOAT(r,FLOAT_MUL32U(e,power[i]+10)),WEIGHT_SHIFT+16); 1018 } 1019#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION) 1020#pragma TCS_unrollexact=0 1021#pragma TCS_unroll=0 1022#endif 1023 1024 r = SHL32(Yf[framesize],3); 1025 r = MULT16_32_Q15(leake,r); 1026 e = SHL32(Rf[framesize],3)+1; 1027 1028#ifdef FIXED_POINT 1029 e2 = SHR32(e,1); 1030 r = mux( r > e2, e2, r); 1031#else 1032 e2 = e * .5; 1033 r = fmux( r > e2, e2, r); 1034#endif 1035 1036 r = MULT16_32_Q15(QCONST16(.7,15),r) + 1037 MULT16_32_Q15(QCONST16(.3,15),(spx_word32_t)(MULT16_32_Q15(RER,e))); 1038 1039 power_1[framesize] = FLOAT_SHL(FLOAT_DIV32_FLOAT(r,FLOAT_MUL32U(e,power[framesize]+10)),WEIGHT_SHIFT+16); 1040 1041 } else 1042 { 1043 register spx_word16_t adapt_rate=0; 1044 register int N = st->window_size; 1045 1046 if ( Sxx > SHR32(MULT16_16(N, 1000),6) ) 1047 { register spx_word32_t tmp32, tmp32q; 1048 1049 tmp32 = MULT16_32_Q15(QCONST16(.25f, 15), Sxx); 1050#ifdef FIXED_POINT 1051 tmp32q = SHR32(See,2); 1052 tmp32 = mux(tmp32 > tmp32q, tmp32q, tmp32); 1053#else 1054 tmp32q = 0.25 * See; 1055 tmp32 = fmux(tmp32 > tmp32q, tmp32q, tmp32); 1056#endif 1057 adapt_rate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32, See),15)); 1058 } 1059 1060#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION) 1061#pragma TCS_unroll=4 1062#pragma TCS_unrollexact=1 1063#endif 1064 for (i=0;i<framesize;i++) 1065 power_1[i] = FLOAT_SHL(FLOAT_DIV32(EXTEND32(adapt_rate),ADD32(power[i],10)),WEIGHT_SHIFT+1); 1066#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION) 1067#pragma TCS_unrollexact=0 1068#pragma TCS_unroll=0 1069#endif 1070 power_1[framesize] = FLOAT_SHL(FLOAT_DIV32(EXTEND32(adapt_rate),ADD32(power[framesize],10)),WEIGHT_SHIFT+1); 1071 sum_adapt = ADD32(sum_adapt,adapt_rate); 1072 } 1073 1074 st->sum_adapt = sum_adapt; 1075 st->adapted = adapted; 1076} 1077 1078#define OVERRIDE_ECHO_CANCELLATION 1079void speex_echo_cancellation( 1080 SpeexEchoState * restrict st, 1081 const spx_int16_t * restrict in, 1082 const spx_int16_t * restrict far_end, 1083 spx_int16_t * restrict out 1084) 1085{ 1086 register int framesize = st->frame_size; 1087 register spx_word16_t * restrict x = st->x; 1088 register spx_word16_t * restrict xx = st->x + framesize; 1089 register spx_word16_t * restrict yy = st->y + framesize; 1090 register spx_word16_t * restrict ee = st->e + framesize; 1091 register spx_word32_t Syy, See, Sxx, Sdd, Sff; 1092 register spx_word16_t RER; 1093 register spx_word32_t Sey; 1094 register int j; 1095 register int N,M; 1096#ifdef TWO_PATH 1097 register spx_word32_t Dbf; 1098#endif 1099 1100 N = st->window_size; 1101 M = st->M; 1102 st->cancel_count++; 1103 1104 filter_dc_notch16(in, st->notch_radius, st->input, framesize, st->notch_mem); 1105 mdf_preemph(st, xx, far_end, framesize); 1106 1107 { 1108 1109 register spx_word16_t * restrict X = st->X; 1110 1111 for ( j=M-1 ; j>=0 ; j-- ) 1112 { register spx_word16_t * restrict Xdes = &(X[(j+1)*N]); 1113 register spx_word16_t * restrict Xsrc = &(X[j*N]); 1114 1115 memcpy(Xdes, Xsrc, N * sizeof(spx_word16_t)); 1116 } 1117 1118 spx_fft(st->fft_table, x, X); 1119 memcpy(st->last_y, st->x, N * sizeof(spx_word16_t)); 1120 Sxx = mdf_inner_prod(xx, xx, framesize); 1121 memcpy(x, xx, framesize * sizeof(spx_word16_t)); 1122 1123#ifdef TWO_PATH 1124 spectral_mul_accum(st->X, st->foreground, st->Y, N, M); 1125 spx_ifft(st->fft_table, st->Y, st->e); 1126 mdf_sub(xx, st->input, ee, framesize); 1127 Sff = mdf_inner_prod(xx, xx, framesize); 1128#endif 1129 1130 mdf_adjust_prop (st->W, N, M, st->prop); 1131 1132 if (st->saturated == 0) 1133 { mdf_compute_weight_gradient(st, X, N, M); 1134 } else 1135 { st->saturated--; 1136 } 1137 } 1138 1139 mdf_update_weight(st, N, M, framesize); 1140 spectral_mul_accum(st->X, st->W, st->Y, N, M); 1141 spx_ifft(st->fft_table, st->Y, st->y); 1142 1143#ifdef TWO_PATH 1144 mdf_sub(xx, ee, yy, framesize); 1145 Dbf = 10+mdf_inner_prod(xx, xx, framesize); 1146#endif 1147 1148 mdf_sub(xx, st->input, yy, framesize); 1149 See = mdf_inner_prod(xx, xx, framesize); 1150 1151#ifndef TWO_PATH 1152 Sff = See; 1153#else 1154 See = mdf_update_foreground(st,Dbf,Sff,See); 1155#endif 1156 1157 1158 mdf_compute_error_signal(st, in, out, framesize); 1159 Sey = mdf_inner_prod(ee, yy, framesize); 1160 Syy = mdf_inner_prod(yy, yy, framesize); 1161 Sdd = mdf_inner_prod(st->input, st->input, framesize); 1162 1163 if ( mdf_check(st,out,Syy,Sxx,See,Sff,Sdd) >= 50 ) 1164 { speex_warning("The echo canceller started acting funny and got slapped (reset). It swears it will behave now."); 1165 speex_echo_state_reset(st); 1166 return; 1167 } 1168 1169 See = MAX32(See, SHR32(MULT16_16(N, 100),6)); 1170 spx_fft(st->fft_table, st->e, st->E); 1171 memset(st->y, 0, framesize * sizeof(spx_word16_t)); 1172 spx_fft(st->fft_table, st->y, st->Y); 1173 power_spectrum(st->E, st->Rf, N); 1174 power_spectrum(st->Y, st->Yf, N); 1175 power_spectrum(st->X, st->Xf, N); 1176 1177 mdf_smooth(st->power,st->Xf,framesize, M); 1178 mdf_compute_filtered_spectra_crosscorrelations(st,Syy,See,framesize); 1179 RER = mdf_compute_RER(See,Syy,Sey,Sxx,st->leak_estimate); 1180 mdf_adapt(st, RER, Syy, See, Sxx); 1181 1182 if ( st->adapted ) 1183 { register spx_word16_t * restrict last_yy = st->last_y + framesize; 1184 1185 memcpy(st->last_y, last_yy, framesize * sizeof(spx_word16_t)); 1186 mdf_sub_int(last_yy, in, out, framesize); 1187 1188 } 1189} 1190 1191 1192 1193