1/* Copyright (C) 2003-2008 Jean-Marc Valin 2 3 File: mdf.c 4 Echo canceller based on the MDF algorithm (see below) 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are 8 met: 9 10 1. Redistributions of source code must retain the above copyright notice, 11 this list of conditions and the following disclaimer. 12 13 2. Redistributions in binary form must reproduce the above copyright 14 notice, this list of conditions and the following disclaimer in the 15 documentation and/or other materials provided with the distribution. 16 17 3. The name of the author may not be used to endorse or promote products 18 derived from this software without specific prior written permission. 19 20 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, 24 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 25 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 28 STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 29 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 POSSIBILITY OF SUCH DAMAGE. 31*/ 32 33/* 34 The echo canceller is based on the MDF algorithm described in: 35 36 J. S. Soo, K. K. Pang Multidelay block frequency adaptive filter, 37 IEEE Trans. Acoust. Speech Signal Process., Vol. ASSP-38, No. 2, 38 February 1990. 39 40 We use the Alternatively Updated MDF (AUMDF) variant. Robustness to 41 double-talk is achieved using a variable learning rate as described in: 42 43 Valin, J.-M., On Adjusting the Learning Rate in Frequency Domain Echo 44 Cancellation With Double-Talk. IEEE Transactions on Audio, 45 Speech and Language Processing, Vol. 15, No. 3, pp. 1030-1034, 2007. 46 http://people.xiph.org/~jm/papers/valin_taslp2006.pdf 47 48 There is no explicit double-talk detection, but a continuous variation 49 in the learning rate based on residual echo, double-talk and background 50 noise. 51 52 About the fixed-point version: 53 All the signals are represented with 16-bit words. The filter weights 54 are represented with 32-bit words, but only the top 16 bits are used 55 in most cases. The lower 16 bits are completely unreliable (due to the 56 fact that the update is done only on the top bits), but help in the 57 adaptation -- probably by removing a "threshold effect" due to 58 quantization (rounding going to zero) when the gradient is small. 59 60 Another kludge that seems to work good: when performing the weight 61 update, we only move half the way toward the "goal" this seems to 62 reduce the effect of quantization noise in the update phase. This 63 can be seen as applying a gradient descent on a "soft constraint" 64 instead of having a hard constraint. 65 66*/ 67 68#ifdef HAVE_CONFIG_H 69#include "config.h" 70#endif 71 72#include "arch.h" 73#include "speex/speex_echo.h" 74#include "fftwrap.h" 75#include "pseudofloat.h" 76#include "math_approx.h" 77#include "os_support.h" 78 79#ifndef M_PI 80#define M_PI 3.14159265358979323846 81#endif 82 83#ifdef FIXED_POINT 84#define WEIGHT_SHIFT 11 85#define NORMALIZE_SCALEDOWN 5 86#define NORMALIZE_SCALEUP 3 87#else 88#define WEIGHT_SHIFT 0 89#endif 90 91/* If enabled, the AEC will use a foreground filter and a background filter to be more robust to double-talk 92 and difficult signals in general. The cost is an extra FFT and a matrix-vector multiply */ 93#define TWO_PATH 94 95#ifdef FIXED_POINT 96static const spx_float_t MIN_LEAK = {20972, -22}; 97 98/* Constants for the two-path filter */ 99static const spx_float_t VAR1_SMOOTH = {23593, -16}; 100static const spx_float_t VAR2_SMOOTH = {23675, -15}; 101static const spx_float_t VAR1_UPDATE = {16384, -15}; 102static const spx_float_t VAR2_UPDATE = {16384, -16}; 103static const spx_float_t VAR_BACKTRACK = {16384, -12}; 104#define TOP16(x) ((x)>>16) 105 106#else 107 108static const spx_float_t MIN_LEAK = .005f; 109 110/* Constants for the two-path filter */ 111static const spx_float_t VAR1_SMOOTH = .36f; 112static const spx_float_t VAR2_SMOOTH = .7225f; 113static const spx_float_t VAR1_UPDATE = .5f; 114static const spx_float_t VAR2_UPDATE = .25f; 115static const spx_float_t VAR_BACKTRACK = 4.f; 116#define TOP16(x) (x) 117#endif 118 119 120#define PLAYBACK_DELAY 2 121 122void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *Yout, int len); 123 124 125/** Speex echo cancellation state. */ 126struct SpeexEchoState_ { 127 int frame_size; /**< Number of samples processed each time */ 128 int window_size; 129 int M; 130 int cancel_count; 131 int adapted; 132 int saturated; 133 int screwed_up; 134 int C; /** Number of input channels (microphones) */ 135 int K; /** Number of output channels (loudspeakers) */ 136 spx_int32_t sampling_rate; 137 spx_word16_t spec_average; 138 spx_word16_t beta0; 139 spx_word16_t beta_max; 140 spx_word32_t sum_adapt; 141 spx_word16_t leak_estimate; 142 143 spx_word16_t *e; /* scratch */ 144 spx_word16_t *x; /* Far-end input buffer (2N) */ 145 spx_word16_t *X; /* Far-end buffer (M+1 frames) in frequency domain */ 146 spx_word16_t *input; /* scratch */ 147 spx_word16_t *y; /* scratch */ 148 spx_word16_t *last_y; 149 spx_word16_t *Y; /* scratch */ 150 spx_word16_t *E; 151 spx_word32_t *PHI; /* scratch */ 152 spx_word32_t *W; /* (Background) filter weights */ 153#ifdef TWO_PATH 154 spx_word16_t *foreground; /* Foreground filter weights */ 155 spx_word32_t Davg1; /* 1st recursive average of the residual power difference */ 156 spx_word32_t Davg2; /* 2nd recursive average of the residual power difference */ 157 spx_float_t Dvar1; /* Estimated variance of 1st estimator */ 158 spx_float_t Dvar2; /* Estimated variance of 2nd estimator */ 159#endif 160 spx_word32_t *power; /* Power of the far-end signal */ 161 spx_float_t *power_1;/* Inverse power of far-end */ 162 spx_word16_t *wtmp; /* scratch */ 163#ifdef FIXED_POINT 164 spx_word16_t *wtmp2; /* scratch */ 165#endif 166 spx_word32_t *Rf; /* scratch */ 167 spx_word32_t *Yf; /* scratch */ 168 spx_word32_t *Xf; /* scratch */ 169 spx_word32_t *Eh; 170 spx_word32_t *Yh; 171 spx_float_t Pey; 172 spx_float_t Pyy; 173 spx_word16_t *window; 174 spx_word16_t *prop; 175 void *fft_table; 176 spx_word16_t *memX, *memD, *memE; 177 spx_word16_t preemph; 178 spx_word16_t notch_radius; 179 spx_mem_t *notch_mem; 180 181 /* NOTE: If you only use speex_echo_cancel() and want to save some memory, remove this */ 182 spx_int16_t *play_buf; 183 int play_buf_pos; 184 int play_buf_started; 185}; 186 187static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem, int stride) 188{ 189 int i; 190 spx_word16_t den2; 191#ifdef FIXED_POINT 192 den2 = MULT16_16_Q15(radius,radius) + MULT16_16_Q15(QCONST16(.7,15),MULT16_16_Q15(32767-radius,32767-radius)); 193#else 194 den2 = radius*radius + .7*(1-radius)*(1-radius); 195#endif 196 /*printf ("%d %d %d %d %d %d\n", num[0], num[1], num[2], den[0], den[1], den[2]);*/ 197 for (i=0;i<len;i++) 198 { 199 spx_word16_t vin = in[i*stride]; 200 spx_word32_t vout = mem[0] + SHL32(EXTEND32(vin),15); 201#ifdef FIXED_POINT 202 mem[0] = mem[1] + SHL32(SHL32(-EXTEND32(vin),15) + MULT16_32_Q15(radius,vout),1); 203#else 204 mem[0] = mem[1] + 2*(-vin + radius*vout); 205#endif 206 mem[1] = SHL32(EXTEND32(vin),15) - MULT16_32_Q15(den2,vout); 207 out[i] = SATURATE32(PSHR32(MULT16_32_Q15(radius,vout),15),32767); 208 } 209} 210 211/* This inner product is slightly different from the codec version because of fixed-point */ 212static inline spx_word32_t mdf_inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len) 213{ 214 spx_word32_t sum=0; 215 len >>= 1; 216 while(len--) 217 { 218 spx_word32_t part=0; 219 part = MAC16_16(part,*x++,*y++); 220 part = MAC16_16(part,*x++,*y++); 221 /* HINT: If you had a 40-bit accumulator, you could shift only at the end */ 222 sum = ADD32(sum,SHR32(part,6)); 223 } 224 return sum; 225} 226 227/** Compute power spectrum of a half-complex (packed) vector */ 228static inline void power_spectrum(const spx_word16_t *X, spx_word32_t *ps, int N) 229{ 230 int i, j; 231 ps[0]=MULT16_16(X[0],X[0]); 232 for (i=1,j=1;i<N-1;i+=2,j++) 233 { 234 ps[j] = MULT16_16(X[i],X[i]) + MULT16_16(X[i+1],X[i+1]); 235 } 236 ps[j]=MULT16_16(X[i],X[i]); 237} 238 239/** Compute power spectrum of a half-complex (packed) vector and accumulate */ 240static inline void power_spectrum_accum(const spx_word16_t *X, spx_word32_t *ps, int N) 241{ 242 int i, j; 243 ps[0]+=MULT16_16(X[0],X[0]); 244 for (i=1,j=1;i<N-1;i+=2,j++) 245 { 246 ps[j] += MULT16_16(X[i],X[i]) + MULT16_16(X[i+1],X[i+1]); 247 } 248 ps[j]+=MULT16_16(X[i],X[i]); 249} 250 251/** Compute cross-power spectrum of a half-complex (packed) vectors and add to acc */ 252#ifdef FIXED_POINT 253static inline void spectral_mul_accum(const spx_word16_t *X, const spx_word32_t *Y, spx_word16_t *acc, int N, int M) 254{ 255 int i,j; 256 spx_word32_t tmp1=0,tmp2=0; 257 for (j=0;j<M;j++) 258 { 259 tmp1 = MAC16_16(tmp1, X[j*N],TOP16(Y[j*N])); 260 } 261 acc[0] = PSHR32(tmp1,WEIGHT_SHIFT); 262 for (i=1;i<N-1;i+=2) 263 { 264 tmp1 = tmp2 = 0; 265 for (j=0;j<M;j++) 266 { 267 tmp1 = SUB32(MAC16_16(tmp1, X[j*N+i],TOP16(Y[j*N+i])), MULT16_16(X[j*N+i+1],TOP16(Y[j*N+i+1]))); 268 tmp2 = MAC16_16(MAC16_16(tmp2, X[j*N+i+1],TOP16(Y[j*N+i])), X[j*N+i], TOP16(Y[j*N+i+1])); 269 } 270 acc[i] = PSHR32(tmp1,WEIGHT_SHIFT); 271 acc[i+1] = PSHR32(tmp2,WEIGHT_SHIFT); 272 } 273 tmp1 = tmp2 = 0; 274 for (j=0;j<M;j++) 275 { 276 tmp1 = MAC16_16(tmp1, X[(j+1)*N-1],TOP16(Y[(j+1)*N-1])); 277 } 278 acc[N-1] = PSHR32(tmp1,WEIGHT_SHIFT); 279} 280static inline void spectral_mul_accum16(const spx_word16_t *X, const spx_word16_t *Y, spx_word16_t *acc, int N, int M) 281{ 282 int i,j; 283 spx_word32_t tmp1=0,tmp2=0; 284 for (j=0;j<M;j++) 285 { 286 tmp1 = MAC16_16(tmp1, X[j*N],Y[j*N]); 287 } 288 acc[0] = PSHR32(tmp1,WEIGHT_SHIFT); 289 for (i=1;i<N-1;i+=2) 290 { 291 tmp1 = tmp2 = 0; 292 for (j=0;j<M;j++) 293 { 294 tmp1 = SUB32(MAC16_16(tmp1, X[j*N+i],Y[j*N+i]), MULT16_16(X[j*N+i+1],Y[j*N+i+1])); 295 tmp2 = MAC16_16(MAC16_16(tmp2, X[j*N+i+1],Y[j*N+i]), X[j*N+i], Y[j*N+i+1]); 296 } 297 acc[i] = PSHR32(tmp1,WEIGHT_SHIFT); 298 acc[i+1] = PSHR32(tmp2,WEIGHT_SHIFT); 299 } 300 tmp1 = tmp2 = 0; 301 for (j=0;j<M;j++) 302 { 303 tmp1 = MAC16_16(tmp1, X[(j+1)*N-1],Y[(j+1)*N-1]); 304 } 305 acc[N-1] = PSHR32(tmp1,WEIGHT_SHIFT); 306} 307 308#else 309static inline void spectral_mul_accum(const spx_word16_t *X, const spx_word32_t *Y, spx_word16_t *acc, int N, int M) 310{ 311 int i,j; 312 for (i=0;i<N;i++) 313 acc[i] = 0; 314 for (j=0;j<M;j++) 315 { 316 acc[0] += X[0]*Y[0]; 317 for (i=1;i<N-1;i+=2) 318 { 319 acc[i] += (X[i]*Y[i] - X[i+1]*Y[i+1]); 320 acc[i+1] += (X[i+1]*Y[i] + X[i]*Y[i+1]); 321 } 322 acc[i] += X[i]*Y[i]; 323 X += N; 324 Y += N; 325 } 326} 327#define spectral_mul_accum16 spectral_mul_accum 328#endif 329 330/** Compute weighted cross-power spectrum of a half-complex (packed) vector with conjugate */ 331static inline void weighted_spectral_mul_conj(const spx_float_t *w, const spx_float_t p, const spx_word16_t *X, const spx_word16_t *Y, spx_word32_t *prod, int N) 332{ 333 int i, j; 334 spx_float_t W; 335 W = FLOAT_AMULT(p, w[0]); 336 prod[0] = FLOAT_MUL32(W,MULT16_16(X[0],Y[0])); 337 for (i=1,j=1;i<N-1;i+=2,j++) 338 { 339 W = FLOAT_AMULT(p, w[j]); 340 prod[i] = FLOAT_MUL32(W,MAC16_16(MULT16_16(X[i],Y[i]), X[i+1],Y[i+1])); 341 prod[i+1] = FLOAT_MUL32(W,MAC16_16(MULT16_16(-X[i+1],Y[i]), X[i],Y[i+1])); 342 } 343 W = FLOAT_AMULT(p, w[j]); 344 prod[i] = FLOAT_MUL32(W,MULT16_16(X[i],Y[i])); 345} 346 347static inline void mdf_adjust_prop(const spx_word32_t *W, int N, int M, int P, spx_word16_t *prop) 348{ 349 int i, j, p; 350 spx_word16_t max_sum = 1; 351 spx_word32_t prop_sum = 1; 352 for (i=0;i<M;i++) 353 { 354 spx_word32_t tmp = 1; 355 for (p=0;p<P;p++) 356 for (j=0;j<N;j++) 357 tmp += MULT16_16(EXTRACT16(SHR32(W[p*N*M + i*N+j],18)), EXTRACT16(SHR32(W[p*N*M + i*N+j],18))); 358#ifdef FIXED_POINT 359 /* Just a security in case an overflow were to occur */ 360 tmp = MIN32(ABS32(tmp), 536870912); 361#endif 362 prop[i] = spx_sqrt(tmp); 363 if (prop[i] > max_sum) 364 max_sum = prop[i]; 365 } 366 for (i=0;i<M;i++) 367 { 368 prop[i] += MULT16_16_Q15(QCONST16(.1f,15),max_sum); 369 prop_sum += EXTEND32(prop[i]); 370 } 371 for (i=0;i<M;i++) 372 { 373 prop[i] = DIV32(MULT16_16(QCONST16(.99f,15), prop[i]),prop_sum); 374 /*printf ("%f ", prop[i]);*/ 375 } 376 /*printf ("\n");*/ 377} 378 379#ifdef DUMP_ECHO_CANCEL_DATA 380#include <stdio.h> 381static FILE *rFile=NULL, *pFile=NULL, *oFile=NULL; 382 383static void dump_audio(const spx_int16_t *rec, const spx_int16_t *play, const spx_int16_t *out, int len) 384{ 385 if (!(rFile && pFile && oFile)) 386 { 387 speex_fatal("Dump files not open"); 388 } 389 fwrite(rec, sizeof(spx_int16_t), len, rFile); 390 fwrite(play, sizeof(spx_int16_t), len, pFile); 391 fwrite(out, sizeof(spx_int16_t), len, oFile); 392} 393#endif 394 395/** Creates a new echo canceller state */ 396EXPORT SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length) 397{ 398 return speex_echo_state_init_mc(frame_size, filter_length, 1, 1); 399} 400 401EXPORT SpeexEchoState *speex_echo_state_init_mc(int frame_size, int filter_length, int nb_mic, int nb_speakers) 402{ 403 int i,N,M, C, K; 404 SpeexEchoState *st = (SpeexEchoState *)speex_alloc(sizeof(SpeexEchoState)); 405 406 st->K = nb_speakers; 407 st->C = nb_mic; 408 C=st->C; 409 K=st->K; 410#ifdef DUMP_ECHO_CANCEL_DATA 411 if (rFile || pFile || oFile) 412 speex_fatal("Opening dump files twice"); 413 rFile = fopen("aec_rec.sw", "wb"); 414 pFile = fopen("aec_play.sw", "wb"); 415 oFile = fopen("aec_out.sw", "wb"); 416#endif 417 418 st->frame_size = frame_size; 419 st->window_size = 2*frame_size; 420 N = st->window_size; 421 M = st->M = (filter_length+st->frame_size-1)/frame_size; 422 st->cancel_count=0; 423 st->sum_adapt = 0; 424 st->saturated = 0; 425 st->screwed_up = 0; 426 /* This is the default sampling rate */ 427 st->sampling_rate = 8000; 428 st->spec_average = DIV32_16(SHL32(EXTEND32(st->frame_size), 15), st->sampling_rate); 429#ifdef FIXED_POINT 430 st->beta0 = DIV32_16(SHL32(EXTEND32(st->frame_size), 16), st->sampling_rate); 431 st->beta_max = DIV32_16(SHL32(EXTEND32(st->frame_size), 14), st->sampling_rate); 432#else 433 st->beta0 = (2.0f*st->frame_size)/st->sampling_rate; 434 st->beta_max = (.5f*st->frame_size)/st->sampling_rate; 435#endif 436 st->leak_estimate = 0; 437 438 st->fft_table = spx_fft_init(N); 439 440 st->e = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); 441 st->x = (spx_word16_t*)speex_alloc(K*N*sizeof(spx_word16_t)); 442 st->input = (spx_word16_t*)speex_alloc(C*st->frame_size*sizeof(spx_word16_t)); 443 st->y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); 444 st->last_y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); 445 st->Yf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); 446 st->Rf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); 447 st->Xf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); 448 st->Yh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); 449 st->Eh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); 450 451 st->X = (spx_word16_t*)speex_alloc(K*(M+1)*N*sizeof(spx_word16_t)); 452 st->Y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); 453 st->E = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); 454 st->W = (spx_word32_t*)speex_alloc(C*K*M*N*sizeof(spx_word32_t)); 455#ifdef TWO_PATH 456 st->foreground = (spx_word16_t*)speex_alloc(M*N*C*K*sizeof(spx_word16_t)); 457#endif 458 st->PHI = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t)); 459 st->power = (spx_word32_t*)speex_alloc((frame_size+1)*sizeof(spx_word32_t)); 460 st->power_1 = (spx_float_t*)speex_alloc((frame_size+1)*sizeof(spx_float_t)); 461 st->window = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); 462 st->prop = (spx_word16_t*)speex_alloc(M*sizeof(spx_word16_t)); 463 st->wtmp = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); 464#ifdef FIXED_POINT 465 st->wtmp2 = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); 466 for (i=0;i<N>>1;i++) 467 { 468 st->window[i] = (16383-SHL16(spx_cos(DIV32_16(MULT16_16(25736,i<<1),N)),1)); 469 st->window[N-i-1] = st->window[i]; 470 } 471#else 472 for (i=0;i<N;i++) 473 st->window[i] = .5-.5*cos(2*M_PI*i/N); 474#endif 475 for (i=0;i<=st->frame_size;i++) 476 st->power_1[i] = FLOAT_ONE; 477 for (i=0;i<N*M*K*C;i++) 478 st->W[i] = 0; 479 { 480 spx_word32_t sum = 0; 481 /* Ratio of ~10 between adaptation rate of first and last block */ 482 spx_word16_t decay = SHR32(spx_exp(NEG16(DIV32_16(QCONST16(2.4,11),M))),1); 483 st->prop[0] = QCONST16(.7, 15); 484 sum = EXTEND32(st->prop[0]); 485 for (i=1;i<M;i++) 486 { 487 st->prop[i] = MULT16_16_Q15(st->prop[i-1], decay); 488 sum = ADD32(sum, EXTEND32(st->prop[i])); 489 } 490 for (i=M-1;i>=0;i--) 491 { 492 st->prop[i] = DIV32(MULT16_16(QCONST16(.8f,15), st->prop[i]),sum); 493 } 494 } 495 496 st->memX = (spx_word16_t*)speex_alloc(K*sizeof(spx_word16_t)); 497 st->memD = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t)); 498 st->memE = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t)); 499 st->preemph = QCONST16(.9,15); 500 if (st->sampling_rate<12000) 501 st->notch_radius = QCONST16(.9, 15); 502 else if (st->sampling_rate<24000) 503 st->notch_radius = QCONST16(.982, 15); 504 else 505 st->notch_radius = QCONST16(.992, 15); 506 507 st->notch_mem = (spx_mem_t*)speex_alloc(2*C*sizeof(spx_mem_t)); 508 st->adapted = 0; 509 st->Pey = st->Pyy = FLOAT_ONE; 510 511#ifdef TWO_PATH 512 st->Davg1 = st->Davg2 = 0; 513 st->Dvar1 = st->Dvar2 = FLOAT_ZERO; 514#endif 515 516 st->play_buf = (spx_int16_t*)speex_alloc(K*(PLAYBACK_DELAY+1)*st->frame_size*sizeof(spx_int16_t)); 517 st->play_buf_pos = PLAYBACK_DELAY*st->frame_size; 518 st->play_buf_started = 0; 519 520 return st; 521} 522 523/** Resets echo canceller state */ 524EXPORT void speex_echo_state_reset(SpeexEchoState *st) 525{ 526 int i, M, N, C, K; 527 st->cancel_count=0; 528 st->screwed_up = 0; 529 N = st->window_size; 530 M = st->M; 531 C=st->C; 532 K=st->K; 533 for (i=0;i<N*M;i++) 534 st->W[i] = 0; 535#ifdef TWO_PATH 536 for (i=0;i<N*M;i++) 537 st->foreground[i] = 0; 538#endif 539 for (i=0;i<N*(M+1);i++) 540 st->X[i] = 0; 541 for (i=0;i<=st->frame_size;i++) 542 { 543 st->power[i] = 0; 544 st->power_1[i] = FLOAT_ONE; 545 st->Eh[i] = 0; 546 st->Yh[i] = 0; 547 } 548 for (i=0;i<st->frame_size;i++) 549 { 550 st->last_y[i] = 0; 551 } 552 for (i=0;i<N*C;i++) 553 { 554 st->E[i] = 0; 555 } 556 for (i=0;i<N*K;i++) 557 { 558 st->x[i] = 0; 559 } 560 for (i=0;i<2*C;i++) 561 st->notch_mem[i] = 0; 562 for (i=0;i<C;i++) 563 st->memD[i]=st->memE[i]=0; 564 for (i=0;i<K;i++) 565 st->memX[i]=0; 566 567 st->saturated = 0; 568 st->adapted = 0; 569 st->sum_adapt = 0; 570 st->Pey = st->Pyy = FLOAT_ONE; 571#ifdef TWO_PATH 572 st->Davg1 = st->Davg2 = 0; 573 st->Dvar1 = st->Dvar2 = FLOAT_ZERO; 574#endif 575 for (i=0;i<3*st->frame_size;i++) 576 st->play_buf[i] = 0; 577 st->play_buf_pos = PLAYBACK_DELAY*st->frame_size; 578 st->play_buf_started = 0; 579 580} 581 582/** Destroys an echo canceller state */ 583EXPORT void speex_echo_state_destroy(SpeexEchoState *st) 584{ 585 spx_fft_destroy(st->fft_table); 586 587 speex_free(st->e); 588 speex_free(st->x); 589 speex_free(st->input); 590 speex_free(st->y); 591 speex_free(st->last_y); 592 speex_free(st->Yf); 593 speex_free(st->Rf); 594 speex_free(st->Xf); 595 speex_free(st->Yh); 596 speex_free(st->Eh); 597 598 speex_free(st->X); 599 speex_free(st->Y); 600 speex_free(st->E); 601 speex_free(st->W); 602#ifdef TWO_PATH 603 speex_free(st->foreground); 604#endif 605 speex_free(st->PHI); 606 speex_free(st->power); 607 speex_free(st->power_1); 608 speex_free(st->window); 609 speex_free(st->prop); 610 speex_free(st->wtmp); 611#ifdef FIXED_POINT 612 speex_free(st->wtmp2); 613#endif 614 speex_free(st->memX); 615 speex_free(st->memD); 616 speex_free(st->memE); 617 speex_free(st->notch_mem); 618 619 speex_free(st->play_buf); 620 speex_free(st); 621 622#ifdef DUMP_ECHO_CANCEL_DATA 623 fclose(rFile); 624 fclose(pFile); 625 fclose(oFile); 626 rFile = pFile = oFile = NULL; 627#endif 628} 629 630EXPORT void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out) 631{ 632 int i; 633 /*speex_warning_int("capture with fill level ", st->play_buf_pos/st->frame_size);*/ 634 st->play_buf_started = 1; 635 if (st->play_buf_pos>=st->frame_size) 636 { 637 speex_echo_cancellation(st, rec, st->play_buf, out); 638 st->play_buf_pos -= st->frame_size; 639 for (i=0;i<st->play_buf_pos;i++) 640 st->play_buf[i] = st->play_buf[i+st->frame_size]; 641 } else { 642 speex_warning("No playback frame available (your application is buggy and/or got xruns)"); 643 if (st->play_buf_pos!=0) 644 { 645 speex_warning("internal playback buffer corruption?"); 646 st->play_buf_pos = 0; 647 } 648 for (i=0;i<st->frame_size;i++) 649 out[i] = rec[i]; 650 } 651} 652 653EXPORT void speex_echo_playback(SpeexEchoState *st, const spx_int16_t *play) 654{ 655 /*speex_warning_int("playback with fill level ", st->play_buf_pos/st->frame_size);*/ 656 if (!st->play_buf_started) 657 { 658 speex_warning("discarded first playback frame"); 659 return; 660 } 661 if (st->play_buf_pos<=PLAYBACK_DELAY*st->frame_size) 662 { 663 int i; 664 for (i=0;i<st->frame_size;i++) 665 st->play_buf[st->play_buf_pos+i] = play[i]; 666 st->play_buf_pos += st->frame_size; 667 if (st->play_buf_pos <= (PLAYBACK_DELAY-1)*st->frame_size) 668 { 669 speex_warning("Auto-filling the buffer (your application is buggy and/or got xruns)"); 670 for (i=0;i<st->frame_size;i++) 671 st->play_buf[st->play_buf_pos+i] = play[i]; 672 st->play_buf_pos += st->frame_size; 673 } 674 } else { 675 speex_warning("Had to discard a playback frame (your application is buggy and/or got xruns)"); 676 } 677} 678 679/** Performs echo cancellation on a frame (deprecated, last arg now ignored) */ 680EXPORT void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out, spx_int32_t *Yout) 681{ 682 speex_echo_cancellation(st, in, far_end, out); 683} 684 685/** Performs echo cancellation on a frame */ 686EXPORT void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out) 687{ 688 int i,j, chan, speak; 689 int N,M, C, K; 690 spx_word32_t Syy,See,Sxx,Sdd, Sff; 691#ifdef TWO_PATH 692 spx_word32_t Dbf; 693 int update_foreground; 694#endif 695 spx_word32_t Sey; 696 spx_word16_t ss, ss_1; 697 spx_float_t Pey = FLOAT_ONE, Pyy=FLOAT_ONE; 698 spx_float_t alpha, alpha_1; 699 spx_word16_t RER; 700 spx_word32_t tmp32; 701 702 N = st->window_size; 703 M = st->M; 704 C = st->C; 705 K = st->K; 706 707 st->cancel_count++; 708#ifdef FIXED_POINT 709 ss=DIV32_16(11469,M); 710 ss_1 = SUB16(32767,ss); 711#else 712 ss=.35/M; 713 ss_1 = 1-ss; 714#endif 715 716 for (chan = 0; chan < C; chan++) 717 { 718 /* Apply a notch filter to make sure DC doesn't end up causing problems */ 719 filter_dc_notch16(in+chan, st->notch_radius, st->input+chan*st->frame_size, st->frame_size, st->notch_mem+2*chan, C); 720 /* Copy input data to buffer and apply pre-emphasis */ 721 /* Copy input data to buffer */ 722 for (i=0;i<st->frame_size;i++) 723 { 724 spx_word32_t tmp32; 725 /* FIXME: This core has changed a bit, need to merge properly */ 726 tmp32 = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(MULT16_16_P15(st->preemph, st->memD[chan]))); 727#ifdef FIXED_POINT 728 if (tmp32 > 32767) 729 { 730 tmp32 = 32767; 731 if (st->saturated == 0) 732 st->saturated = 1; 733 } 734 if (tmp32 < -32767) 735 { 736 tmp32 = -32767; 737 if (st->saturated == 0) 738 st->saturated = 1; 739 } 740#endif 741 st->memD[chan] = st->input[chan*st->frame_size+i]; 742 st->input[chan*st->frame_size+i] = EXTRACT16(tmp32); 743 } 744 } 745 746 for (speak = 0; speak < K; speak++) 747 { 748 for (i=0;i<st->frame_size;i++) 749 { 750 spx_word32_t tmp32; 751 st->x[speak*N+i] = st->x[speak*N+i+st->frame_size]; 752 tmp32 = SUB32(EXTEND32(far_end[i*K+speak]), EXTEND32(MULT16_16_P15(st->preemph, st->memX[speak]))); 753#ifdef FIXED_POINT 754 /*FIXME: If saturation occurs here, we need to freeze adaptation for M frames (not just one) */ 755 if (tmp32 > 32767) 756 { 757 tmp32 = 32767; 758 st->saturated = M+1; 759 } 760 if (tmp32 < -32767) 761 { 762 tmp32 = -32767; 763 st->saturated = M+1; 764 } 765#endif 766 st->x[speak*N+i+st->frame_size] = EXTRACT16(tmp32); 767 st->memX[speak] = far_end[i*K+speak]; 768 } 769 } 770 771 for (speak = 0; speak < K; speak++) 772 { 773 /* Shift memory: this could be optimized eventually*/ 774 for (j=M-1;j>=0;j--) 775 { 776 for (i=0;i<N;i++) 777 st->X[(j+1)*N*K+speak*N+i] = st->X[j*N*K+speak*N+i]; 778 } 779 /* Convert x (echo input) to frequency domain */ 780 spx_fft(st->fft_table, st->x+speak*N, &st->X[speak*N]); 781 } 782 783 Sxx = 0; 784 for (speak = 0; speak < K; speak++) 785 { 786 Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size); 787 power_spectrum_accum(st->X+speak*N, st->Xf, N); 788 } 789 790 Sff = 0; 791 for (chan = 0; chan < C; chan++) 792 { 793#ifdef TWO_PATH 794 /* Compute foreground filter */ 795 spectral_mul_accum16(st->X, st->foreground+chan*N*K*M, st->Y+chan*N, N, M*K); 796 spx_ifft(st->fft_table, st->Y+chan*N, st->e+chan*N); 797 for (i=0;i<st->frame_size;i++) 798 st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->e[chan*N+i+st->frame_size]); 799 Sff += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size); 800#endif 801 } 802 803 /* Adjust proportional adaption rate */ 804 /* FIXME: Adjust that for C, K*/ 805 if (st->adapted) 806 mdf_adjust_prop (st->W, N, M, C*K, st->prop); 807 /* Compute weight gradient */ 808 if (st->saturated == 0) 809 { 810 for (chan = 0; chan < C; chan++) 811 { 812 for (speak = 0; speak < K; speak++) 813 { 814 for (j=M-1;j>=0;j--) 815 { 816 weighted_spectral_mul_conj(st->power_1, FLOAT_SHL(PSEUDOFLOAT(st->prop[j]),-15), &st->X[(j+1)*N*K+speak*N], st->E+chan*N, st->PHI, N); 817 for (i=0;i<N;i++) 818 st->W[chan*N*K*M + j*N*K + speak*N + i] += st->PHI[i]; 819 } 820 } 821 } 822 } else { 823 st->saturated--; 824 } 825 826 /* FIXME: MC conversion required */ 827 /* Update weight to prevent circular convolution (MDF / AUMDF) */ 828 for (chan = 0; chan < C; chan++) 829 { 830 for (speak = 0; speak < K; speak++) 831 { 832 for (j=0;j<M;j++) 833 { 834 /* This is a variant of the Alternatively Updated MDF (AUMDF) */ 835 /* Remove the "if" to make this an MDF filter */ 836 if (j==0 || st->cancel_count%(M-1) == j-1) 837 { 838#ifdef FIXED_POINT 839 for (i=0;i<N;i++) 840 st->wtmp2[i] = EXTRACT16(PSHR32(st->W[chan*N*K*M + j*N*K + speak*N + i],NORMALIZE_SCALEDOWN+16)); 841 spx_ifft(st->fft_table, st->wtmp2, st->wtmp); 842 for (i=0;i<st->frame_size;i++) 843 { 844 st->wtmp[i]=0; 845 } 846 for (i=st->frame_size;i<N;i++) 847 { 848 st->wtmp[i]=SHL16(st->wtmp[i],NORMALIZE_SCALEUP); 849 } 850 spx_fft(st->fft_table, st->wtmp, st->wtmp2); 851 /* The "-1" in the shift is a sort of kludge that trades less efficient update speed for decrease noise */ 852 for (i=0;i<N;i++) 853 st->W[chan*N*K*M + j*N*K + speak*N + i] -= SHL32(EXTEND32(st->wtmp2[i]),16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1); 854#else 855 spx_ifft(st->fft_table, &st->W[chan*N*K*M + j*N*K + speak*N], st->wtmp); 856 for (i=st->frame_size;i<N;i++) 857 { 858 st->wtmp[i]=0; 859 } 860 spx_fft(st->fft_table, st->wtmp, &st->W[chan*N*K*M + j*N*K + speak*N]); 861#endif 862 } 863 } 864 } 865 } 866 867 /* So we can use power_spectrum_accum */ 868 for (i=0;i<=st->frame_size;i++) 869 st->Rf[i] = st->Yf[i] = st->Xf[i] = 0; 870 871 Dbf = 0; 872 See = 0; 873#ifdef TWO_PATH 874 /* Difference in response, this is used to estimate the variance of our residual power estimate */ 875 for (chan = 0; chan < C; chan++) 876 { 877 spectral_mul_accum(st->X, st->W+chan*N*K*M, st->Y+chan*N, N, M*K); 878 spx_ifft(st->fft_table, st->Y+chan*N, st->y+chan*N); 879 for (i=0;i<st->frame_size;i++) 880 st->e[chan*N+i] = SUB16(st->e[chan*N+i+st->frame_size], st->y[chan*N+i+st->frame_size]); 881 Dbf += 10+mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size); 882 for (i=0;i<st->frame_size;i++) 883 st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]); 884 See += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size); 885 } 886#endif 887 888#ifndef TWO_PATH 889 Sff = See; 890#endif 891 892#ifdef TWO_PATH 893 /* Logic for updating the foreground filter */ 894 895 /* For two time windows, compute the mean of the energy difference, as well as the variance */ 896 st->Davg1 = ADD32(MULT16_32_Q15(QCONST16(.6f,15),st->Davg1), MULT16_32_Q15(QCONST16(.4f,15),SUB32(Sff,See))); 897 st->Davg2 = ADD32(MULT16_32_Q15(QCONST16(.85f,15),st->Davg2), MULT16_32_Q15(QCONST16(.15f,15),SUB32(Sff,See))); 898 st->Dvar1 = FLOAT_ADD(FLOAT_MULT(VAR1_SMOOTH, st->Dvar1), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.4f,15),Sff), MULT16_32_Q15(QCONST16(.4f,15),Dbf))); 899 st->Dvar2 = FLOAT_ADD(FLOAT_MULT(VAR2_SMOOTH, st->Dvar2), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.15f,15),Sff), MULT16_32_Q15(QCONST16(.15f,15),Dbf))); 900 901 /* Equivalent float code: 902 st->Davg1 = .6*st->Davg1 + .4*(Sff-See); 903 st->Davg2 = .85*st->Davg2 + .15*(Sff-See); 904 st->Dvar1 = .36*st->Dvar1 + .16*Sff*Dbf; 905 st->Dvar2 = .7225*st->Dvar2 + .0225*Sff*Dbf; 906 */ 907 908 update_foreground = 0; 909 /* Check if we have a statistically significant reduction in the residual echo */ 910 /* Note that this is *not* Gaussian, so we need to be careful about the longer tail */ 911 if (FLOAT_GT(FLOAT_MUL32U(SUB32(Sff,See),ABS32(SUB32(Sff,See))), FLOAT_MUL32U(Sff,Dbf))) 912 update_foreground = 1; 913 else if (FLOAT_GT(FLOAT_MUL32U(st->Davg1, ABS32(st->Davg1)), FLOAT_MULT(VAR1_UPDATE,(st->Dvar1)))) 914 update_foreground = 1; 915 else if (FLOAT_GT(FLOAT_MUL32U(st->Davg2, ABS32(st->Davg2)), FLOAT_MULT(VAR2_UPDATE,(st->Dvar2)))) 916 update_foreground = 1; 917 918 /* Do we update? */ 919 if (update_foreground) 920 { 921 st->Davg1 = st->Davg2 = 0; 922 st->Dvar1 = st->Dvar2 = FLOAT_ZERO; 923 /* Copy background filter to foreground filter */ 924 for (i=0;i<N*M*C*K;i++) 925 st->foreground[i] = EXTRACT16(PSHR32(st->W[i],16)); 926 /* Apply a smooth transition so as to not introduce blocking artifacts */ 927 for (chan = 0; chan < C; chan++) 928 for (i=0;i<st->frame_size;i++) 929 st->e[chan*N+i+st->frame_size] = MULT16_16_Q15(st->window[i+st->frame_size],st->e[chan*N+i+st->frame_size]) + MULT16_16_Q15(st->window[i],st->y[chan*N+i+st->frame_size]); 930 } else { 931 int reset_background=0; 932 /* Otherwise, check if the background filter is significantly worse */ 933 if (FLOAT_GT(FLOAT_MUL32U(NEG32(SUB32(Sff,See)),ABS32(SUB32(Sff,See))), FLOAT_MULT(VAR_BACKTRACK,FLOAT_MUL32U(Sff,Dbf)))) 934 reset_background = 1; 935 if (FLOAT_GT(FLOAT_MUL32U(NEG32(st->Davg1), ABS32(st->Davg1)), FLOAT_MULT(VAR_BACKTRACK,st->Dvar1))) 936 reset_background = 1; 937 if (FLOAT_GT(FLOAT_MUL32U(NEG32(st->Davg2), ABS32(st->Davg2)), FLOAT_MULT(VAR_BACKTRACK,st->Dvar2))) 938 reset_background = 1; 939 if (reset_background) 940 { 941 /* Copy foreground filter to background filter */ 942 for (i=0;i<N*M*C*K;i++) 943 st->W[i] = SHL32(EXTEND32(st->foreground[i]),16); 944 /* We also need to copy the output so as to get correct adaptation */ 945 for (chan = 0; chan < C; chan++) 946 { 947 for (i=0;i<st->frame_size;i++) 948 st->y[chan*N+i+st->frame_size] = st->e[chan*N+i+st->frame_size]; 949 for (i=0;i<st->frame_size;i++) 950 st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]); 951 } 952 See = Sff; 953 st->Davg1 = st->Davg2 = 0; 954 st->Dvar1 = st->Dvar2 = FLOAT_ZERO; 955 } 956 } 957#endif 958 959 Sey = Syy = Sdd = 0; 960 for (chan = 0; chan < C; chan++) 961 { 962 /* Compute error signal (for the output with de-emphasis) */ 963 for (i=0;i<st->frame_size;i++) 964 { 965 spx_word32_t tmp_out; 966#ifdef TWO_PATH 967 tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->e[chan*N+i+st->frame_size])); 968#else 969 tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->y[chan*N+i+st->frame_size])); 970#endif 971 tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(st->preemph, st->memE[chan]))); 972 /* This is an arbitrary test for saturation in the microphone signal */ 973 if (in[i*C+chan] <= -32000 || in[i*C+chan] >= 32000) 974 { 975 if (st->saturated == 0) 976 st->saturated = 1; 977 } 978 out[i*C+chan] = WORD2INT(tmp_out); 979 st->memE[chan] = tmp_out; 980 } 981 982#ifdef DUMP_ECHO_CANCEL_DATA 983 dump_audio(in, far_end, out, st->frame_size); 984#endif 985 986 /* Compute error signal (filter update version) */ 987 for (i=0;i<st->frame_size;i++) 988 { 989 st->e[chan*N+i+st->frame_size] = st->e[chan*N+i]; 990 st->e[chan*N+i] = 0; 991 } 992 993 /* Compute a bunch of correlations */ 994 /* FIXME: bad merge */ 995 Sey += mdf_inner_prod(st->e+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size); 996 Syy += mdf_inner_prod(st->y+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size); 997 Sdd += mdf_inner_prod(st->input+chan*st->frame_size, st->input+chan*st->frame_size, st->frame_size); 998 999 /* Convert error to frequency domain */ 1000 spx_fft(st->fft_table, st->e+chan*N, st->E+chan*N); 1001 for (i=0;i<st->frame_size;i++) 1002 st->y[i+chan*N] = 0; 1003 spx_fft(st->fft_table, st->y+chan*N, st->Y+chan*N); 1004 1005 /* Compute power spectrum of echo (X), error (E) and filter response (Y) */ 1006 power_spectrum_accum(st->E+chan*N, st->Rf, N); 1007 power_spectrum_accum(st->Y+chan*N, st->Yf, N); 1008 1009 } 1010 1011 /*printf ("%f %f %f %f\n", Sff, See, Syy, Sdd, st->update_cond);*/ 1012 1013 /* Do some sanity check */ 1014 if (!(Syy>=0 && Sxx>=0 && See >= 0) 1015#ifndef FIXED_POINT 1016 || !(Sff < N*1e9 && Syy < N*1e9 && Sxx < N*1e9) 1017#endif 1018 ) 1019 { 1020 /* Things have gone really bad */ 1021 st->screwed_up += 50; 1022 for (i=0;i<st->frame_size*C;i++) 1023 out[i] = 0; 1024 } else if (SHR32(Sff, 2) > ADD32(Sdd, SHR32(MULT16_16(N, 10000),6))) 1025 { 1026 /* AEC seems to add lots of echo instead of removing it, let's see if it will improve */ 1027 st->screwed_up++; 1028 } else { 1029 /* Everything's fine */ 1030 st->screwed_up=0; 1031 } 1032 if (st->screwed_up>=50) 1033 { 1034 speex_warning("The echo canceller started acting funny and got slapped (reset). It swears it will behave now."); 1035 speex_echo_state_reset(st); 1036 return; 1037 } 1038 1039 /* Add a small noise floor to make sure not to have problems when dividing */ 1040 See = MAX32(See, SHR32(MULT16_16(N, 100),6)); 1041 1042 for (speak = 0; speak < K; speak++) 1043 { 1044 Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size); 1045 power_spectrum_accum(st->X+speak*N, st->Xf, N); 1046 } 1047 1048 1049 /* Smooth far end energy estimate over time */ 1050 for (j=0;j<=st->frame_size;j++) 1051 st->power[j] = MULT16_32_Q15(ss_1,st->power[j]) + 1 + MULT16_32_Q15(ss,st->Xf[j]); 1052 1053 /* Compute filtered spectra and (cross-)correlations */ 1054 for (j=st->frame_size;j>=0;j--) 1055 { 1056 spx_float_t Eh, Yh; 1057 Eh = PSEUDOFLOAT(st->Rf[j] - st->Eh[j]); 1058 Yh = PSEUDOFLOAT(st->Yf[j] - st->Yh[j]); 1059 Pey = FLOAT_ADD(Pey,FLOAT_MULT(Eh,Yh)); 1060 Pyy = FLOAT_ADD(Pyy,FLOAT_MULT(Yh,Yh)); 1061#ifdef FIXED_POINT 1062 st->Eh[j] = MAC16_32_Q15(MULT16_32_Q15(SUB16(32767,st->spec_average),st->Eh[j]), st->spec_average, st->Rf[j]); 1063 st->Yh[j] = MAC16_32_Q15(MULT16_32_Q15(SUB16(32767,st->spec_average),st->Yh[j]), st->spec_average, st->Yf[j]); 1064#else 1065 st->Eh[j] = (1-st->spec_average)*st->Eh[j] + st->spec_average*st->Rf[j]; 1066 st->Yh[j] = (1-st->spec_average)*st->Yh[j] + st->spec_average*st->Yf[j]; 1067#endif 1068 } 1069 1070 Pyy = FLOAT_SQRT(Pyy); 1071 Pey = FLOAT_DIVU(Pey,Pyy); 1072 1073 /* Compute correlation updatete rate */ 1074 tmp32 = MULT16_32_Q15(st->beta0,Syy); 1075 if (tmp32 > MULT16_32_Q15(st->beta_max,See)) 1076 tmp32 = MULT16_32_Q15(st->beta_max,See); 1077 alpha = FLOAT_DIV32(tmp32, See); 1078 alpha_1 = FLOAT_SUB(FLOAT_ONE, alpha); 1079 /* Update correlations (recursive average) */ 1080 st->Pey = FLOAT_ADD(FLOAT_MULT(alpha_1,st->Pey) , FLOAT_MULT(alpha,Pey)); 1081 st->Pyy = FLOAT_ADD(FLOAT_MULT(alpha_1,st->Pyy) , FLOAT_MULT(alpha,Pyy)); 1082 if (FLOAT_LT(st->Pyy, FLOAT_ONE)) 1083 st->Pyy = FLOAT_ONE; 1084 /* We don't really hope to get better than 33 dB (MIN_LEAK-3dB) attenuation anyway */ 1085 if (FLOAT_LT(st->Pey, FLOAT_MULT(MIN_LEAK,st->Pyy))) 1086 st->Pey = FLOAT_MULT(MIN_LEAK,st->Pyy); 1087 if (FLOAT_GT(st->Pey, st->Pyy)) 1088 st->Pey = st->Pyy; 1089 /* leak_estimate is the linear regression result */ 1090 st->leak_estimate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIVU(st->Pey, st->Pyy),14)); 1091 /* This looks like a stupid bug, but it's right (because we convert from Q14 to Q15) */ 1092 if (st->leak_estimate > 16383) 1093 st->leak_estimate = 32767; 1094 else 1095 st->leak_estimate = SHL16(st->leak_estimate,1); 1096 /*printf ("%f\n", st->leak_estimate);*/ 1097 1098 /* Compute Residual to Error Ratio */ 1099#ifdef FIXED_POINT 1100 tmp32 = MULT16_32_Q15(st->leak_estimate,Syy); 1101 tmp32 = ADD32(SHR32(Sxx,13), ADD32(tmp32, SHL32(tmp32,1))); 1102 /* Check for y in e (lower bound on RER) */ 1103 { 1104 spx_float_t bound = PSEUDOFLOAT(Sey); 1105 bound = FLOAT_DIVU(FLOAT_MULT(bound, bound), PSEUDOFLOAT(ADD32(1,Syy))); 1106 if (FLOAT_GT(bound, PSEUDOFLOAT(See))) 1107 tmp32 = See; 1108 else if (tmp32 < FLOAT_EXTRACT32(bound)) 1109 tmp32 = FLOAT_EXTRACT32(bound); 1110 } 1111 if (tmp32 > SHR32(See,1)) 1112 tmp32 = SHR32(See,1); 1113 RER = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32,See),15)); 1114#else 1115 RER = (.0001*Sxx + 3.*MULT16_32_Q15(st->leak_estimate,Syy)) / See; 1116 /* Check for y in e (lower bound on RER) */ 1117 if (RER < Sey*Sey/(1+See*Syy)) 1118 RER = Sey*Sey/(1+See*Syy); 1119 if (RER > .5) 1120 RER = .5; 1121#endif 1122 1123 /* We consider that the filter has had minimal adaptation if the following is true*/ 1124 if (!st->adapted && st->sum_adapt > SHL32(EXTEND32(M),15) && MULT16_32_Q15(st->leak_estimate,Syy) > MULT16_32_Q15(QCONST16(.03f,15),Syy)) 1125 { 1126 st->adapted = 1; 1127 } 1128 1129 if (st->adapted) 1130 { 1131 /* Normal learning rate calculation once we're past the minimal adaptation phase */ 1132 for (i=0;i<=st->frame_size;i++) 1133 { 1134 spx_word32_t r, e; 1135 /* Compute frequency-domain adaptation mask */ 1136 r = MULT16_32_Q15(st->leak_estimate,SHL32(st->Yf[i],3)); 1137 e = SHL32(st->Rf[i],3)+1; 1138#ifdef FIXED_POINT 1139 if (r>SHR32(e,1)) 1140 r = SHR32(e,1); 1141#else 1142 if (r>.5*e) 1143 r = .5*e; 1144#endif 1145 r = MULT16_32_Q15(QCONST16(.7,15),r) + MULT16_32_Q15(QCONST16(.3,15),(spx_word32_t)(MULT16_32_Q15(RER,e))); 1146 /*st->power_1[i] = adapt_rate*r/(e*(1+st->power[i]));*/ 1147 st->power_1[i] = FLOAT_SHL(FLOAT_DIV32_FLOAT(r,FLOAT_MUL32U(e,st->power[i]+10)),WEIGHT_SHIFT+16); 1148 } 1149 } else { 1150 /* Temporary adaption rate if filter is not yet adapted enough */ 1151 spx_word16_t adapt_rate=0; 1152 1153 if (Sxx > SHR32(MULT16_16(N, 1000),6)) 1154 { 1155 tmp32 = MULT16_32_Q15(QCONST16(.25f, 15), Sxx); 1156#ifdef FIXED_POINT 1157 if (tmp32 > SHR32(See,2)) 1158 tmp32 = SHR32(See,2); 1159#else 1160 if (tmp32 > .25*See) 1161 tmp32 = .25*See; 1162#endif 1163 adapt_rate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32, See),15)); 1164 } 1165 for (i=0;i<=st->frame_size;i++) 1166 st->power_1[i] = FLOAT_SHL(FLOAT_DIV32(EXTEND32(adapt_rate),ADD32(st->power[i],10)),WEIGHT_SHIFT+1); 1167 1168 1169 /* How much have we adapted so far? */ 1170 st->sum_adapt = ADD32(st->sum_adapt,adapt_rate); 1171 } 1172 1173 /* FIXME: MC conversion required */ 1174 for (i=0;i<st->frame_size;i++) 1175 st->last_y[i] = st->last_y[st->frame_size+i]; 1176 if (st->adapted) 1177 { 1178 /* If the filter is adapted, take the filtered echo */ 1179 for (i=0;i<st->frame_size;i++) 1180 st->last_y[st->frame_size+i] = in[i]-out[i]; 1181 } else { 1182 /* If filter isn't adapted yet, all we can do is take the far end signal directly */ 1183 /* moved earlier: for (i=0;i<N;i++) 1184 st->last_y[i] = st->x[i];*/ 1185 } 1186 1187} 1188 1189/* Compute spectrum of estimated echo for use in an echo post-filter */ 1190void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *residual_echo, int len) 1191{ 1192 int i; 1193 spx_word16_t leak2; 1194 int N; 1195 1196 N = st->window_size; 1197 1198 /* Apply hanning window (should pre-compute it)*/ 1199 for (i=0;i<N;i++) 1200 st->y[i] = MULT16_16_Q15(st->window[i],st->last_y[i]); 1201 1202 /* Compute power spectrum of the echo */ 1203 spx_fft(st->fft_table, st->y, st->Y); 1204 power_spectrum(st->Y, residual_echo, N); 1205 1206#ifdef FIXED_POINT 1207 if (st->leak_estimate > 16383) 1208 leak2 = 32767; 1209 else 1210 leak2 = SHL16(st->leak_estimate, 1); 1211#else 1212 if (st->leak_estimate>.5) 1213 leak2 = 1; 1214 else 1215 leak2 = 2*st->leak_estimate; 1216#endif 1217 /* Estimate residual echo */ 1218 for (i=0;i<=st->frame_size;i++) 1219 residual_echo[i] = (spx_int32_t)MULT16_32_Q15(leak2,residual_echo[i]); 1220 1221} 1222 1223EXPORT int speex_echo_ctl(SpeexEchoState *st, int request, void *ptr) 1224{ 1225 switch(request) 1226 { 1227 1228 case SPEEX_ECHO_GET_FRAME_SIZE: 1229 (*(int*)ptr) = st->frame_size; 1230 break; 1231 case SPEEX_ECHO_SET_SAMPLING_RATE: 1232 st->sampling_rate = (*(int*)ptr); 1233 st->spec_average = DIV32_16(SHL32(EXTEND32(st->frame_size), 15), st->sampling_rate); 1234#ifdef FIXED_POINT 1235 st->beta0 = DIV32_16(SHL32(EXTEND32(st->frame_size), 16), st->sampling_rate); 1236 st->beta_max = DIV32_16(SHL32(EXTEND32(st->frame_size), 14), st->sampling_rate); 1237#else 1238 st->beta0 = (2.0f*st->frame_size)/st->sampling_rate; 1239 st->beta_max = (.5f*st->frame_size)/st->sampling_rate; 1240#endif 1241 if (st->sampling_rate<12000) 1242 st->notch_radius = QCONST16(.9, 15); 1243 else if (st->sampling_rate<24000) 1244 st->notch_radius = QCONST16(.982, 15); 1245 else 1246 st->notch_radius = QCONST16(.992, 15); 1247 break; 1248 case SPEEX_ECHO_GET_SAMPLING_RATE: 1249 (*(int*)ptr) = st->sampling_rate; 1250 break; 1251 case SPEEX_ECHO_GET_IMPULSE_RESPONSE_SIZE: 1252 /*FIXME: Implement this for multiple channels */ 1253 *((spx_int32_t *)ptr) = st->M * st->frame_size; 1254 break; 1255 case SPEEX_ECHO_GET_IMPULSE_RESPONSE: 1256 { 1257 int M = st->M, N = st->window_size, n = st->frame_size, i, j; 1258 spx_int32_t *filt = (spx_int32_t *) ptr; 1259 for(j=0;j<M;j++) 1260 { 1261 /*FIXME: Implement this for multiple channels */ 1262#ifdef FIXED_POINT 1263 for (i=0;i<N;i++) 1264 st->wtmp2[i] = EXTRACT16(PSHR32(st->W[j*N+i],16+NORMALIZE_SCALEDOWN)); 1265 spx_ifft(st->fft_table, st->wtmp2, st->wtmp); 1266#else 1267 spx_ifft(st->fft_table, &st->W[j*N], st->wtmp); 1268#endif 1269 for(i=0;i<n;i++) 1270 filt[j*n+i] = PSHR32(MULT16_16(32767,st->wtmp[i]), WEIGHT_SHIFT-NORMALIZE_SCALEDOWN); 1271 } 1272 } 1273 break; 1274 default: 1275 speex_warning_int("Unknown speex_echo_ctl request: ", request); 1276 return -1; 1277 } 1278 return 0; 1279} 1280