1/* Copyright (C) 2003-2008 Jean-Marc Valin
2
3   File: mdf.c
4   Echo canceller based on the MDF algorithm (see below)
5
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are
8   met:
9
10   1. Redistributions of source code must retain the above copyright notice,
11   this list of conditions and the following disclaimer.
12
13   2. Redistributions in binary form must reproduce the above copyright
14   notice, this list of conditions and the following disclaimer in the
15   documentation and/or other materials provided with the distribution.
16
17   3. The name of the author may not be used to endorse or promote products
18   derived from this software without specific prior written permission.
19
20   THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21   IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22   OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23   DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
24   INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25   (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26   SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27   HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
28   STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29   ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31*/
32
33/*
34   The echo canceller is based on the MDF algorithm described in:
35
36   J. S. Soo, K. K. Pang Multidelay block frequency adaptive filter,
37   IEEE Trans. Acoust. Speech Signal Process., Vol. ASSP-38, No. 2,
38   February 1990.
39
40   We use the Alternatively Updated MDF (AUMDF) variant. Robustness to
41   double-talk is achieved using a variable learning rate as described in:
42
43   Valin, J.-M., On Adjusting the Learning Rate in Frequency Domain Echo
44   Cancellation With Double-Talk. IEEE Transactions on Audio,
45   Speech and Language Processing, Vol. 15, No. 3, pp. 1030-1034, 2007.
46   http://people.xiph.org/~jm/papers/valin_taslp2006.pdf
47
48   There is no explicit double-talk detection, but a continuous variation
49   in the learning rate based on residual echo, double-talk and background
50   noise.
51
52   About the fixed-point version:
53   All the signals are represented with 16-bit words. The filter weights
54   are represented with 32-bit words, but only the top 16 bits are used
55   in most cases. The lower 16 bits are completely unreliable (due to the
56   fact that the update is done only on the top bits), but help in the
57   adaptation -- probably by removing a "threshold effect" due to
58   quantization (rounding going to zero) when the gradient is small.
59
60   Another kludge that seems to work good: when performing the weight
61   update, we only move half the way toward the "goal" this seems to
62   reduce the effect of quantization noise in the update phase. This
63   can be seen as applying a gradient descent on a "soft constraint"
64   instead of having a hard constraint.
65
66*/
67
68#ifdef HAVE_CONFIG_H
69#include "config.h"
70#endif
71
72#include "arch.h"
73#include "speex/speex_echo.h"
74#include "fftwrap.h"
75#include "pseudofloat.h"
76#include "math_approx.h"
77#include "os_support.h"
78
79#ifndef M_PI
80#define M_PI 3.14159265358979323846
81#endif
82
83#ifdef FIXED_POINT
84#define WEIGHT_SHIFT 11
85#define NORMALIZE_SCALEDOWN 5
86#define NORMALIZE_SCALEUP 3
87#else
88#define WEIGHT_SHIFT 0
89#endif
90
91/* If enabled, the AEC will use a foreground filter and a background filter to be more robust to double-talk
92   and difficult signals in general. The cost is an extra FFT and a matrix-vector multiply */
93#define TWO_PATH
94
95#ifdef FIXED_POINT
96static const spx_float_t MIN_LEAK = {20972, -22};
97
98/* Constants for the two-path filter */
99static const spx_float_t VAR1_SMOOTH = {23593, -16};
100static const spx_float_t VAR2_SMOOTH = {23675, -15};
101static const spx_float_t VAR1_UPDATE = {16384, -15};
102static const spx_float_t VAR2_UPDATE = {16384, -16};
103static const spx_float_t VAR_BACKTRACK = {16384, -12};
104#define TOP16(x) ((x)>>16)
105
106#else
107
108static const spx_float_t MIN_LEAK = .005f;
109
110/* Constants for the two-path filter */
111static const spx_float_t VAR1_SMOOTH = .36f;
112static const spx_float_t VAR2_SMOOTH = .7225f;
113static const spx_float_t VAR1_UPDATE = .5f;
114static const spx_float_t VAR2_UPDATE = .25f;
115static const spx_float_t VAR_BACKTRACK = 4.f;
116#define TOP16(x) (x)
117#endif
118
119
120#define PLAYBACK_DELAY 2
121
122void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *Yout, int len);
123
124
125/** Speex echo cancellation state. */
126struct SpeexEchoState_ {
127   int frame_size;           /**< Number of samples processed each time */
128   int window_size;
129   int M;
130   int cancel_count;
131   int adapted;
132   int saturated;
133   int screwed_up;
134   int C;                    /** Number of input channels (microphones) */
135   int K;                    /** Number of output channels (loudspeakers) */
136   spx_int32_t sampling_rate;
137   spx_word16_t spec_average;
138   spx_word16_t beta0;
139   spx_word16_t beta_max;
140   spx_word32_t sum_adapt;
141   spx_word16_t leak_estimate;
142
143   spx_word16_t *e;      /* scratch */
144   spx_word16_t *x;      /* Far-end input buffer (2N) */
145   spx_word16_t *X;      /* Far-end buffer (M+1 frames) in frequency domain */
146   spx_word16_t *input;  /* scratch */
147   spx_word16_t *y;      /* scratch */
148   spx_word16_t *last_y;
149   spx_word16_t *Y;      /* scratch */
150   spx_word16_t *E;
151   spx_word32_t *PHI;    /* scratch */
152   spx_word32_t *W;      /* (Background) filter weights */
153#ifdef TWO_PATH
154   spx_word16_t *foreground; /* Foreground filter weights */
155   spx_word32_t  Davg1;  /* 1st recursive average of the residual power difference */
156   spx_word32_t  Davg2;  /* 2nd recursive average of the residual power difference */
157   spx_float_t   Dvar1;  /* Estimated variance of 1st estimator */
158   spx_float_t   Dvar2;  /* Estimated variance of 2nd estimator */
159#endif
160   spx_word32_t *power;  /* Power of the far-end signal */
161   spx_float_t  *power_1;/* Inverse power of far-end */
162   spx_word16_t *wtmp;   /* scratch */
163#ifdef FIXED_POINT
164   spx_word16_t *wtmp2;  /* scratch */
165#endif
166   spx_word32_t *Rf;     /* scratch */
167   spx_word32_t *Yf;     /* scratch */
168   spx_word32_t *Xf;     /* scratch */
169   spx_word32_t *Eh;
170   spx_word32_t *Yh;
171   spx_float_t   Pey;
172   spx_float_t   Pyy;
173   spx_word16_t *window;
174   spx_word16_t *prop;
175   void *fft_table;
176   spx_word16_t *memX, *memD, *memE;
177   spx_word16_t preemph;
178   spx_word16_t notch_radius;
179   spx_mem_t *notch_mem;
180
181   /* NOTE: If you only use speex_echo_cancel() and want to save some memory, remove this */
182   spx_int16_t *play_buf;
183   int play_buf_pos;
184   int play_buf_started;
185};
186
187static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem, int stride)
188{
189   int i;
190   spx_word16_t den2;
191#ifdef FIXED_POINT
192   den2 = MULT16_16_Q15(radius,radius) + MULT16_16_Q15(QCONST16(.7,15),MULT16_16_Q15(32767-radius,32767-radius));
193#else
194   den2 = radius*radius + .7*(1-radius)*(1-radius);
195#endif
196   /*printf ("%d %d %d %d %d %d\n", num[0], num[1], num[2], den[0], den[1], den[2]);*/
197   for (i=0;i<len;i++)
198   {
199      spx_word16_t vin = in[i*stride];
200      spx_word32_t vout = mem[0] + SHL32(EXTEND32(vin),15);
201#ifdef FIXED_POINT
202      mem[0] = mem[1] + SHL32(SHL32(-EXTEND32(vin),15) + MULT16_32_Q15(radius,vout),1);
203#else
204      mem[0] = mem[1] + 2*(-vin + radius*vout);
205#endif
206      mem[1] = SHL32(EXTEND32(vin),15) - MULT16_32_Q15(den2,vout);
207      out[i] = SATURATE32(PSHR32(MULT16_32_Q15(radius,vout),15),32767);
208   }
209}
210
211/* This inner product is slightly different from the codec version because of fixed-point */
212static inline spx_word32_t mdf_inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len)
213{
214   spx_word32_t sum=0;
215   len >>= 1;
216   while(len--)
217   {
218      spx_word32_t part=0;
219      part = MAC16_16(part,*x++,*y++);
220      part = MAC16_16(part,*x++,*y++);
221      /* HINT: If you had a 40-bit accumulator, you could shift only at the end */
222      sum = ADD32(sum,SHR32(part,6));
223   }
224   return sum;
225}
226
227/** Compute power spectrum of a half-complex (packed) vector */
228static inline void power_spectrum(const spx_word16_t *X, spx_word32_t *ps, int N)
229{
230   int i, j;
231   ps[0]=MULT16_16(X[0],X[0]);
232   for (i=1,j=1;i<N-1;i+=2,j++)
233   {
234      ps[j] =  MULT16_16(X[i],X[i]) + MULT16_16(X[i+1],X[i+1]);
235   }
236   ps[j]=MULT16_16(X[i],X[i]);
237}
238
239/** Compute power spectrum of a half-complex (packed) vector and accumulate */
240static inline void power_spectrum_accum(const spx_word16_t *X, spx_word32_t *ps, int N)
241{
242   int i, j;
243   ps[0]+=MULT16_16(X[0],X[0]);
244   for (i=1,j=1;i<N-1;i+=2,j++)
245   {
246      ps[j] +=  MULT16_16(X[i],X[i]) + MULT16_16(X[i+1],X[i+1]);
247   }
248   ps[j]+=MULT16_16(X[i],X[i]);
249}
250
251/** Compute cross-power spectrum of a half-complex (packed) vectors and add to acc */
252#ifdef FIXED_POINT
253static inline void spectral_mul_accum(const spx_word16_t *X, const spx_word32_t *Y, spx_word16_t *acc, int N, int M)
254{
255   int i,j;
256   spx_word32_t tmp1=0,tmp2=0;
257   for (j=0;j<M;j++)
258   {
259      tmp1 = MAC16_16(tmp1, X[j*N],TOP16(Y[j*N]));
260   }
261   acc[0] = PSHR32(tmp1,WEIGHT_SHIFT);
262   for (i=1;i<N-1;i+=2)
263   {
264      tmp1 = tmp2 = 0;
265      for (j=0;j<M;j++)
266      {
267         tmp1 = SUB32(MAC16_16(tmp1, X[j*N+i],TOP16(Y[j*N+i])), MULT16_16(X[j*N+i+1],TOP16(Y[j*N+i+1])));
268         tmp2 = MAC16_16(MAC16_16(tmp2, X[j*N+i+1],TOP16(Y[j*N+i])), X[j*N+i], TOP16(Y[j*N+i+1]));
269      }
270      acc[i] = PSHR32(tmp1,WEIGHT_SHIFT);
271      acc[i+1] = PSHR32(tmp2,WEIGHT_SHIFT);
272   }
273   tmp1 = tmp2 = 0;
274   for (j=0;j<M;j++)
275   {
276      tmp1 = MAC16_16(tmp1, X[(j+1)*N-1],TOP16(Y[(j+1)*N-1]));
277   }
278   acc[N-1] = PSHR32(tmp1,WEIGHT_SHIFT);
279}
280static inline void spectral_mul_accum16(const spx_word16_t *X, const spx_word16_t *Y, spx_word16_t *acc, int N, int M)
281{
282   int i,j;
283   spx_word32_t tmp1=0,tmp2=0;
284   for (j=0;j<M;j++)
285   {
286      tmp1 = MAC16_16(tmp1, X[j*N],Y[j*N]);
287   }
288   acc[0] = PSHR32(tmp1,WEIGHT_SHIFT);
289   for (i=1;i<N-1;i+=2)
290   {
291      tmp1 = tmp2 = 0;
292      for (j=0;j<M;j++)
293      {
294         tmp1 = SUB32(MAC16_16(tmp1, X[j*N+i],Y[j*N+i]), MULT16_16(X[j*N+i+1],Y[j*N+i+1]));
295         tmp2 = MAC16_16(MAC16_16(tmp2, X[j*N+i+1],Y[j*N+i]), X[j*N+i], Y[j*N+i+1]);
296      }
297      acc[i] = PSHR32(tmp1,WEIGHT_SHIFT);
298      acc[i+1] = PSHR32(tmp2,WEIGHT_SHIFT);
299   }
300   tmp1 = tmp2 = 0;
301   for (j=0;j<M;j++)
302   {
303      tmp1 = MAC16_16(tmp1, X[(j+1)*N-1],Y[(j+1)*N-1]);
304   }
305   acc[N-1] = PSHR32(tmp1,WEIGHT_SHIFT);
306}
307
308#else
309static inline void spectral_mul_accum(const spx_word16_t *X, const spx_word32_t *Y, spx_word16_t *acc, int N, int M)
310{
311   int i,j;
312   for (i=0;i<N;i++)
313      acc[i] = 0;
314   for (j=0;j<M;j++)
315   {
316      acc[0] += X[0]*Y[0];
317      for (i=1;i<N-1;i+=2)
318      {
319         acc[i] += (X[i]*Y[i] - X[i+1]*Y[i+1]);
320         acc[i+1] += (X[i+1]*Y[i] + X[i]*Y[i+1]);
321      }
322      acc[i] += X[i]*Y[i];
323      X += N;
324      Y += N;
325   }
326}
327#define spectral_mul_accum16 spectral_mul_accum
328#endif
329
330/** Compute weighted cross-power spectrum of a half-complex (packed) vector with conjugate */
331static inline void weighted_spectral_mul_conj(const spx_float_t *w, const spx_float_t p, const spx_word16_t *X, const spx_word16_t *Y, spx_word32_t *prod, int N)
332{
333   int i, j;
334   spx_float_t W;
335   W = FLOAT_AMULT(p, w[0]);
336   prod[0] = FLOAT_MUL32(W,MULT16_16(X[0],Y[0]));
337   for (i=1,j=1;i<N-1;i+=2,j++)
338   {
339      W = FLOAT_AMULT(p, w[j]);
340      prod[i] = FLOAT_MUL32(W,MAC16_16(MULT16_16(X[i],Y[i]), X[i+1],Y[i+1]));
341      prod[i+1] = FLOAT_MUL32(W,MAC16_16(MULT16_16(-X[i+1],Y[i]), X[i],Y[i+1]));
342   }
343   W = FLOAT_AMULT(p, w[j]);
344   prod[i] = FLOAT_MUL32(W,MULT16_16(X[i],Y[i]));
345}
346
347static inline void mdf_adjust_prop(const spx_word32_t *W, int N, int M, int P, spx_word16_t *prop)
348{
349   int i, j, p;
350   spx_word16_t max_sum = 1;
351   spx_word32_t prop_sum = 1;
352   for (i=0;i<M;i++)
353   {
354      spx_word32_t tmp = 1;
355      for (p=0;p<P;p++)
356         for (j=0;j<N;j++)
357            tmp += MULT16_16(EXTRACT16(SHR32(W[p*N*M + i*N+j],18)), EXTRACT16(SHR32(W[p*N*M + i*N+j],18)));
358#ifdef FIXED_POINT
359      /* Just a security in case an overflow were to occur */
360      tmp = MIN32(ABS32(tmp), 536870912);
361#endif
362      prop[i] = spx_sqrt(tmp);
363      if (prop[i] > max_sum)
364         max_sum = prop[i];
365   }
366   for (i=0;i<M;i++)
367   {
368      prop[i] += MULT16_16_Q15(QCONST16(.1f,15),max_sum);
369      prop_sum += EXTEND32(prop[i]);
370   }
371   for (i=0;i<M;i++)
372   {
373      prop[i] = DIV32(MULT16_16(QCONST16(.99f,15), prop[i]),prop_sum);
374      /*printf ("%f ", prop[i]);*/
375   }
376   /*printf ("\n");*/
377}
378
379#ifdef DUMP_ECHO_CANCEL_DATA
380#include <stdio.h>
381static FILE *rFile=NULL, *pFile=NULL, *oFile=NULL;
382
383static void dump_audio(const spx_int16_t *rec, const spx_int16_t *play, const spx_int16_t *out, int len)
384{
385   if (!(rFile && pFile && oFile))
386   {
387      speex_fatal("Dump files not open");
388   }
389   fwrite(rec, sizeof(spx_int16_t), len, rFile);
390   fwrite(play, sizeof(spx_int16_t), len, pFile);
391   fwrite(out, sizeof(spx_int16_t), len, oFile);
392}
393#endif
394
395/** Creates a new echo canceller state */
396EXPORT SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length)
397{
398   return speex_echo_state_init_mc(frame_size, filter_length, 1, 1);
399}
400
401EXPORT SpeexEchoState *speex_echo_state_init_mc(int frame_size, int filter_length, int nb_mic, int nb_speakers)
402{
403   int i,N,M, C, K;
404   SpeexEchoState *st = (SpeexEchoState *)speex_alloc(sizeof(SpeexEchoState));
405
406   st->K = nb_speakers;
407   st->C = nb_mic;
408   C=st->C;
409   K=st->K;
410#ifdef DUMP_ECHO_CANCEL_DATA
411   if (rFile || pFile || oFile)
412      speex_fatal("Opening dump files twice");
413   rFile = fopen("aec_rec.sw", "wb");
414   pFile = fopen("aec_play.sw", "wb");
415   oFile = fopen("aec_out.sw", "wb");
416#endif
417
418   st->frame_size = frame_size;
419   st->window_size = 2*frame_size;
420   N = st->window_size;
421   M = st->M = (filter_length+st->frame_size-1)/frame_size;
422   st->cancel_count=0;
423   st->sum_adapt = 0;
424   st->saturated = 0;
425   st->screwed_up = 0;
426   /* This is the default sampling rate */
427   st->sampling_rate = 8000;
428   st->spec_average = DIV32_16(SHL32(EXTEND32(st->frame_size), 15), st->sampling_rate);
429#ifdef FIXED_POINT
430   st->beta0 = DIV32_16(SHL32(EXTEND32(st->frame_size), 16), st->sampling_rate);
431   st->beta_max = DIV32_16(SHL32(EXTEND32(st->frame_size), 14), st->sampling_rate);
432#else
433   st->beta0 = (2.0f*st->frame_size)/st->sampling_rate;
434   st->beta_max = (.5f*st->frame_size)/st->sampling_rate;
435#endif
436   st->leak_estimate = 0;
437
438   st->fft_table = spx_fft_init(N);
439
440   st->e = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
441   st->x = (spx_word16_t*)speex_alloc(K*N*sizeof(spx_word16_t));
442   st->input = (spx_word16_t*)speex_alloc(C*st->frame_size*sizeof(spx_word16_t));
443   st->y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
444   st->last_y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
445   st->Yf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
446   st->Rf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
447   st->Xf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
448   st->Yh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
449   st->Eh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
450
451   st->X = (spx_word16_t*)speex_alloc(K*(M+1)*N*sizeof(spx_word16_t));
452   st->Y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
453   st->E = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
454   st->W = (spx_word32_t*)speex_alloc(C*K*M*N*sizeof(spx_word32_t));
455#ifdef TWO_PATH
456   st->foreground = (spx_word16_t*)speex_alloc(M*N*C*K*sizeof(spx_word16_t));
457#endif
458   st->PHI = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t));
459   st->power = (spx_word32_t*)speex_alloc((frame_size+1)*sizeof(spx_word32_t));
460   st->power_1 = (spx_float_t*)speex_alloc((frame_size+1)*sizeof(spx_float_t));
461   st->window = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
462   st->prop = (spx_word16_t*)speex_alloc(M*sizeof(spx_word16_t));
463   st->wtmp = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
464#ifdef FIXED_POINT
465   st->wtmp2 = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
466   for (i=0;i<N>>1;i++)
467   {
468      st->window[i] = (16383-SHL16(spx_cos(DIV32_16(MULT16_16(25736,i<<1),N)),1));
469      st->window[N-i-1] = st->window[i];
470   }
471#else
472   for (i=0;i<N;i++)
473      st->window[i] = .5-.5*cos(2*M_PI*i/N);
474#endif
475   for (i=0;i<=st->frame_size;i++)
476      st->power_1[i] = FLOAT_ONE;
477   for (i=0;i<N*M*K*C;i++)
478      st->W[i] = 0;
479   {
480      spx_word32_t sum = 0;
481      /* Ratio of ~10 between adaptation rate of first and last block */
482      spx_word16_t decay = SHR32(spx_exp(NEG16(DIV32_16(QCONST16(2.4,11),M))),1);
483      st->prop[0] = QCONST16(.7, 15);
484      sum = EXTEND32(st->prop[0]);
485      for (i=1;i<M;i++)
486      {
487         st->prop[i] = MULT16_16_Q15(st->prop[i-1], decay);
488         sum = ADD32(sum, EXTEND32(st->prop[i]));
489      }
490      for (i=M-1;i>=0;i--)
491      {
492         st->prop[i] = DIV32(MULT16_16(QCONST16(.8f,15), st->prop[i]),sum);
493      }
494   }
495
496   st->memX = (spx_word16_t*)speex_alloc(K*sizeof(spx_word16_t));
497   st->memD = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t));
498   st->memE = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t));
499   st->preemph = QCONST16(.9,15);
500   if (st->sampling_rate<12000)
501      st->notch_radius = QCONST16(.9, 15);
502   else if (st->sampling_rate<24000)
503      st->notch_radius = QCONST16(.982, 15);
504   else
505      st->notch_radius = QCONST16(.992, 15);
506
507   st->notch_mem = (spx_mem_t*)speex_alloc(2*C*sizeof(spx_mem_t));
508   st->adapted = 0;
509   st->Pey = st->Pyy = FLOAT_ONE;
510
511#ifdef TWO_PATH
512   st->Davg1 = st->Davg2 = 0;
513   st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
514#endif
515
516   st->play_buf = (spx_int16_t*)speex_alloc(K*(PLAYBACK_DELAY+1)*st->frame_size*sizeof(spx_int16_t));
517   st->play_buf_pos = PLAYBACK_DELAY*st->frame_size;
518   st->play_buf_started = 0;
519
520   return st;
521}
522
523/** Resets echo canceller state */
524EXPORT void speex_echo_state_reset(SpeexEchoState *st)
525{
526   int i, M, N, C, K;
527   st->cancel_count=0;
528   st->screwed_up = 0;
529   N = st->window_size;
530   M = st->M;
531   C=st->C;
532   K=st->K;
533   for (i=0;i<N*M;i++)
534      st->W[i] = 0;
535#ifdef TWO_PATH
536   for (i=0;i<N*M;i++)
537      st->foreground[i] = 0;
538#endif
539   for (i=0;i<N*(M+1);i++)
540      st->X[i] = 0;
541   for (i=0;i<=st->frame_size;i++)
542   {
543      st->power[i] = 0;
544      st->power_1[i] = FLOAT_ONE;
545      st->Eh[i] = 0;
546      st->Yh[i] = 0;
547   }
548   for (i=0;i<st->frame_size;i++)
549   {
550      st->last_y[i] = 0;
551   }
552   for (i=0;i<N*C;i++)
553   {
554      st->E[i] = 0;
555   }
556   for (i=0;i<N*K;i++)
557   {
558      st->x[i] = 0;
559   }
560   for (i=0;i<2*C;i++)
561      st->notch_mem[i] = 0;
562   for (i=0;i<C;i++)
563      st->memD[i]=st->memE[i]=0;
564   for (i=0;i<K;i++)
565      st->memX[i]=0;
566
567   st->saturated = 0;
568   st->adapted = 0;
569   st->sum_adapt = 0;
570   st->Pey = st->Pyy = FLOAT_ONE;
571#ifdef TWO_PATH
572   st->Davg1 = st->Davg2 = 0;
573   st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
574#endif
575   for (i=0;i<3*st->frame_size;i++)
576      st->play_buf[i] = 0;
577   st->play_buf_pos = PLAYBACK_DELAY*st->frame_size;
578   st->play_buf_started = 0;
579
580}
581
582/** Destroys an echo canceller state */
583EXPORT void speex_echo_state_destroy(SpeexEchoState *st)
584{
585   spx_fft_destroy(st->fft_table);
586
587   speex_free(st->e);
588   speex_free(st->x);
589   speex_free(st->input);
590   speex_free(st->y);
591   speex_free(st->last_y);
592   speex_free(st->Yf);
593   speex_free(st->Rf);
594   speex_free(st->Xf);
595   speex_free(st->Yh);
596   speex_free(st->Eh);
597
598   speex_free(st->X);
599   speex_free(st->Y);
600   speex_free(st->E);
601   speex_free(st->W);
602#ifdef TWO_PATH
603   speex_free(st->foreground);
604#endif
605   speex_free(st->PHI);
606   speex_free(st->power);
607   speex_free(st->power_1);
608   speex_free(st->window);
609   speex_free(st->prop);
610   speex_free(st->wtmp);
611#ifdef FIXED_POINT
612   speex_free(st->wtmp2);
613#endif
614   speex_free(st->memX);
615   speex_free(st->memD);
616   speex_free(st->memE);
617   speex_free(st->notch_mem);
618
619   speex_free(st->play_buf);
620   speex_free(st);
621
622#ifdef DUMP_ECHO_CANCEL_DATA
623   fclose(rFile);
624   fclose(pFile);
625   fclose(oFile);
626   rFile = pFile = oFile = NULL;
627#endif
628}
629
630EXPORT void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out)
631{
632   int i;
633   /*speex_warning_int("capture with fill level ", st->play_buf_pos/st->frame_size);*/
634   st->play_buf_started = 1;
635   if (st->play_buf_pos>=st->frame_size)
636   {
637      speex_echo_cancellation(st, rec, st->play_buf, out);
638      st->play_buf_pos -= st->frame_size;
639      for (i=0;i<st->play_buf_pos;i++)
640         st->play_buf[i] = st->play_buf[i+st->frame_size];
641   } else {
642      speex_warning("No playback frame available (your application is buggy and/or got xruns)");
643      if (st->play_buf_pos!=0)
644      {
645         speex_warning("internal playback buffer corruption?");
646         st->play_buf_pos = 0;
647      }
648      for (i=0;i<st->frame_size;i++)
649         out[i] = rec[i];
650   }
651}
652
653EXPORT void speex_echo_playback(SpeexEchoState *st, const spx_int16_t *play)
654{
655   /*speex_warning_int("playback with fill level ", st->play_buf_pos/st->frame_size);*/
656   if (!st->play_buf_started)
657   {
658      speex_warning("discarded first playback frame");
659      return;
660   }
661   if (st->play_buf_pos<=PLAYBACK_DELAY*st->frame_size)
662   {
663      int i;
664      for (i=0;i<st->frame_size;i++)
665         st->play_buf[st->play_buf_pos+i] = play[i];
666      st->play_buf_pos += st->frame_size;
667      if (st->play_buf_pos <= (PLAYBACK_DELAY-1)*st->frame_size)
668      {
669         speex_warning("Auto-filling the buffer (your application is buggy and/or got xruns)");
670         for (i=0;i<st->frame_size;i++)
671            st->play_buf[st->play_buf_pos+i] = play[i];
672         st->play_buf_pos += st->frame_size;
673      }
674   } else {
675      speex_warning("Had to discard a playback frame (your application is buggy and/or got xruns)");
676   }
677}
678
679/** Performs echo cancellation on a frame (deprecated, last arg now ignored) */
680EXPORT void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out, spx_int32_t *Yout)
681{
682   speex_echo_cancellation(st, in, far_end, out);
683}
684
685/** Performs echo cancellation on a frame */
686EXPORT void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out)
687{
688   int i,j, chan, speak;
689   int N,M, C, K;
690   spx_word32_t Syy,See,Sxx,Sdd, Sff;
691#ifdef TWO_PATH
692   spx_word32_t Dbf;
693   int update_foreground;
694#endif
695   spx_word32_t Sey;
696   spx_word16_t ss, ss_1;
697   spx_float_t Pey = FLOAT_ONE, Pyy=FLOAT_ONE;
698   spx_float_t alpha, alpha_1;
699   spx_word16_t RER;
700   spx_word32_t tmp32;
701
702   N = st->window_size;
703   M = st->M;
704   C = st->C;
705   K = st->K;
706
707   st->cancel_count++;
708#ifdef FIXED_POINT
709   ss=DIV32_16(11469,M);
710   ss_1 = SUB16(32767,ss);
711#else
712   ss=.35/M;
713   ss_1 = 1-ss;
714#endif
715
716   for (chan = 0; chan < C; chan++)
717   {
718      /* Apply a notch filter to make sure DC doesn't end up causing problems */
719      filter_dc_notch16(in+chan, st->notch_radius, st->input+chan*st->frame_size, st->frame_size, st->notch_mem+2*chan, C);
720      /* Copy input data to buffer and apply pre-emphasis */
721      /* Copy input data to buffer */
722      for (i=0;i<st->frame_size;i++)
723      {
724         spx_word32_t tmp32;
725         /* FIXME: This core has changed a bit, need to merge properly */
726         tmp32 = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(MULT16_16_P15(st->preemph, st->memD[chan])));
727#ifdef FIXED_POINT
728         if (tmp32 > 32767)
729         {
730            tmp32 = 32767;
731            if (st->saturated == 0)
732               st->saturated = 1;
733         }
734         if (tmp32 < -32767)
735         {
736            tmp32 = -32767;
737            if (st->saturated == 0)
738               st->saturated = 1;
739         }
740#endif
741         st->memD[chan] = st->input[chan*st->frame_size+i];
742         st->input[chan*st->frame_size+i] = EXTRACT16(tmp32);
743      }
744   }
745
746   for (speak = 0; speak < K; speak++)
747   {
748      for (i=0;i<st->frame_size;i++)
749      {
750         spx_word32_t tmp32;
751         st->x[speak*N+i] = st->x[speak*N+i+st->frame_size];
752         tmp32 = SUB32(EXTEND32(far_end[i*K+speak]), EXTEND32(MULT16_16_P15(st->preemph, st->memX[speak])));
753#ifdef FIXED_POINT
754         /*FIXME: If saturation occurs here, we need to freeze adaptation for M frames (not just one) */
755         if (tmp32 > 32767)
756         {
757            tmp32 = 32767;
758            st->saturated = M+1;
759         }
760         if (tmp32 < -32767)
761         {
762            tmp32 = -32767;
763            st->saturated = M+1;
764         }
765#endif
766         st->x[speak*N+i+st->frame_size] = EXTRACT16(tmp32);
767         st->memX[speak] = far_end[i*K+speak];
768      }
769   }
770
771   for (speak = 0; speak < K; speak++)
772   {
773      /* Shift memory: this could be optimized eventually*/
774      for (j=M-1;j>=0;j--)
775      {
776         for (i=0;i<N;i++)
777            st->X[(j+1)*N*K+speak*N+i] = st->X[j*N*K+speak*N+i];
778      }
779      /* Convert x (echo input) to frequency domain */
780      spx_fft(st->fft_table, st->x+speak*N, &st->X[speak*N]);
781   }
782
783   Sxx = 0;
784   for (speak = 0; speak < K; speak++)
785   {
786      Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size);
787      power_spectrum_accum(st->X+speak*N, st->Xf, N);
788   }
789
790   Sff = 0;
791   for (chan = 0; chan < C; chan++)
792   {
793#ifdef TWO_PATH
794      /* Compute foreground filter */
795      spectral_mul_accum16(st->X, st->foreground+chan*N*K*M, st->Y+chan*N, N, M*K);
796      spx_ifft(st->fft_table, st->Y+chan*N, st->e+chan*N);
797      for (i=0;i<st->frame_size;i++)
798         st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->e[chan*N+i+st->frame_size]);
799      Sff += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size);
800#endif
801   }
802
803   /* Adjust proportional adaption rate */
804   /* FIXME: Adjust that for C, K*/
805   if (st->adapted)
806      mdf_adjust_prop (st->W, N, M, C*K, st->prop);
807   /* Compute weight gradient */
808   if (st->saturated == 0)
809   {
810      for (chan = 0; chan < C; chan++)
811      {
812         for (speak = 0; speak < K; speak++)
813         {
814            for (j=M-1;j>=0;j--)
815            {
816               weighted_spectral_mul_conj(st->power_1, FLOAT_SHL(PSEUDOFLOAT(st->prop[j]),-15), &st->X[(j+1)*N*K+speak*N], st->E+chan*N, st->PHI, N);
817               for (i=0;i<N;i++)
818                  st->W[chan*N*K*M + j*N*K + speak*N + i] += st->PHI[i];
819            }
820         }
821      }
822   } else {
823      st->saturated--;
824   }
825
826   /* FIXME: MC conversion required */
827   /* Update weight to prevent circular convolution (MDF / AUMDF) */
828   for (chan = 0; chan < C; chan++)
829   {
830      for (speak = 0; speak < K; speak++)
831      {
832         for (j=0;j<M;j++)
833         {
834            /* This is a variant of the Alternatively Updated MDF (AUMDF) */
835            /* Remove the "if" to make this an MDF filter */
836            if (j==0 || st->cancel_count%(M-1) == j-1)
837            {
838#ifdef FIXED_POINT
839               for (i=0;i<N;i++)
840                  st->wtmp2[i] = EXTRACT16(PSHR32(st->W[chan*N*K*M + j*N*K + speak*N + i],NORMALIZE_SCALEDOWN+16));
841               spx_ifft(st->fft_table, st->wtmp2, st->wtmp);
842               for (i=0;i<st->frame_size;i++)
843               {
844                  st->wtmp[i]=0;
845               }
846               for (i=st->frame_size;i<N;i++)
847               {
848                  st->wtmp[i]=SHL16(st->wtmp[i],NORMALIZE_SCALEUP);
849               }
850               spx_fft(st->fft_table, st->wtmp, st->wtmp2);
851               /* The "-1" in the shift is a sort of kludge that trades less efficient update speed for decrease noise */
852               for (i=0;i<N;i++)
853                  st->W[chan*N*K*M + j*N*K + speak*N + i] -= SHL32(EXTEND32(st->wtmp2[i]),16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1);
854#else
855               spx_ifft(st->fft_table, &st->W[chan*N*K*M + j*N*K + speak*N], st->wtmp);
856               for (i=st->frame_size;i<N;i++)
857               {
858                  st->wtmp[i]=0;
859               }
860               spx_fft(st->fft_table, st->wtmp, &st->W[chan*N*K*M + j*N*K + speak*N]);
861#endif
862            }
863         }
864      }
865   }
866
867   /* So we can use power_spectrum_accum */
868   for (i=0;i<=st->frame_size;i++)
869      st->Rf[i] = st->Yf[i] = st->Xf[i] = 0;
870
871   Dbf = 0;
872   See = 0;
873#ifdef TWO_PATH
874   /* Difference in response, this is used to estimate the variance of our residual power estimate */
875   for (chan = 0; chan < C; chan++)
876   {
877      spectral_mul_accum(st->X, st->W+chan*N*K*M, st->Y+chan*N, N, M*K);
878      spx_ifft(st->fft_table, st->Y+chan*N, st->y+chan*N);
879      for (i=0;i<st->frame_size;i++)
880         st->e[chan*N+i] = SUB16(st->e[chan*N+i+st->frame_size], st->y[chan*N+i+st->frame_size]);
881      Dbf += 10+mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size);
882      for (i=0;i<st->frame_size;i++)
883         st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]);
884      See += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size);
885   }
886#endif
887
888#ifndef TWO_PATH
889   Sff = See;
890#endif
891
892#ifdef TWO_PATH
893   /* Logic for updating the foreground filter */
894
895   /* For two time windows, compute the mean of the energy difference, as well as the variance */
896   st->Davg1 = ADD32(MULT16_32_Q15(QCONST16(.6f,15),st->Davg1), MULT16_32_Q15(QCONST16(.4f,15),SUB32(Sff,See)));
897   st->Davg2 = ADD32(MULT16_32_Q15(QCONST16(.85f,15),st->Davg2), MULT16_32_Q15(QCONST16(.15f,15),SUB32(Sff,See)));
898   st->Dvar1 = FLOAT_ADD(FLOAT_MULT(VAR1_SMOOTH, st->Dvar1), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.4f,15),Sff), MULT16_32_Q15(QCONST16(.4f,15),Dbf)));
899   st->Dvar2 = FLOAT_ADD(FLOAT_MULT(VAR2_SMOOTH, st->Dvar2), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.15f,15),Sff), MULT16_32_Q15(QCONST16(.15f,15),Dbf)));
900
901   /* Equivalent float code:
902   st->Davg1 = .6*st->Davg1 + .4*(Sff-See);
903   st->Davg2 = .85*st->Davg2 + .15*(Sff-See);
904   st->Dvar1 = .36*st->Dvar1 + .16*Sff*Dbf;
905   st->Dvar2 = .7225*st->Dvar2 + .0225*Sff*Dbf;
906   */
907
908   update_foreground = 0;
909   /* Check if we have a statistically significant reduction in the residual echo */
910   /* Note that this is *not* Gaussian, so we need to be careful about the longer tail */
911   if (FLOAT_GT(FLOAT_MUL32U(SUB32(Sff,See),ABS32(SUB32(Sff,See))), FLOAT_MUL32U(Sff,Dbf)))
912      update_foreground = 1;
913   else if (FLOAT_GT(FLOAT_MUL32U(st->Davg1, ABS32(st->Davg1)), FLOAT_MULT(VAR1_UPDATE,(st->Dvar1))))
914      update_foreground = 1;
915   else if (FLOAT_GT(FLOAT_MUL32U(st->Davg2, ABS32(st->Davg2)), FLOAT_MULT(VAR2_UPDATE,(st->Dvar2))))
916      update_foreground = 1;
917
918   /* Do we update? */
919   if (update_foreground)
920   {
921      st->Davg1 = st->Davg2 = 0;
922      st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
923      /* Copy background filter to foreground filter */
924      for (i=0;i<N*M*C*K;i++)
925         st->foreground[i] = EXTRACT16(PSHR32(st->W[i],16));
926      /* Apply a smooth transition so as to not introduce blocking artifacts */
927      for (chan = 0; chan < C; chan++)
928         for (i=0;i<st->frame_size;i++)
929            st->e[chan*N+i+st->frame_size] = MULT16_16_Q15(st->window[i+st->frame_size],st->e[chan*N+i+st->frame_size]) + MULT16_16_Q15(st->window[i],st->y[chan*N+i+st->frame_size]);
930   } else {
931      int reset_background=0;
932      /* Otherwise, check if the background filter is significantly worse */
933      if (FLOAT_GT(FLOAT_MUL32U(NEG32(SUB32(Sff,See)),ABS32(SUB32(Sff,See))), FLOAT_MULT(VAR_BACKTRACK,FLOAT_MUL32U(Sff,Dbf))))
934         reset_background = 1;
935      if (FLOAT_GT(FLOAT_MUL32U(NEG32(st->Davg1), ABS32(st->Davg1)), FLOAT_MULT(VAR_BACKTRACK,st->Dvar1)))
936         reset_background = 1;
937      if (FLOAT_GT(FLOAT_MUL32U(NEG32(st->Davg2), ABS32(st->Davg2)), FLOAT_MULT(VAR_BACKTRACK,st->Dvar2)))
938         reset_background = 1;
939      if (reset_background)
940      {
941         /* Copy foreground filter to background filter */
942         for (i=0;i<N*M*C*K;i++)
943            st->W[i] = SHL32(EXTEND32(st->foreground[i]),16);
944         /* We also need to copy the output so as to get correct adaptation */
945         for (chan = 0; chan < C; chan++)
946         {
947            for (i=0;i<st->frame_size;i++)
948               st->y[chan*N+i+st->frame_size] = st->e[chan*N+i+st->frame_size];
949            for (i=0;i<st->frame_size;i++)
950               st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]);
951         }
952         See = Sff;
953         st->Davg1 = st->Davg2 = 0;
954         st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
955      }
956   }
957#endif
958
959   Sey = Syy = Sdd = 0;
960   for (chan = 0; chan < C; chan++)
961   {
962      /* Compute error signal (for the output with de-emphasis) */
963      for (i=0;i<st->frame_size;i++)
964      {
965         spx_word32_t tmp_out;
966#ifdef TWO_PATH
967         tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->e[chan*N+i+st->frame_size]));
968#else
969         tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->y[chan*N+i+st->frame_size]));
970#endif
971         tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(st->preemph, st->memE[chan])));
972      /* This is an arbitrary test for saturation in the microphone signal */
973         if (in[i*C+chan] <= -32000 || in[i*C+chan] >= 32000)
974         {
975         if (st->saturated == 0)
976            st->saturated = 1;
977         }
978         out[i*C+chan] = WORD2INT(tmp_out);
979         st->memE[chan] = tmp_out;
980      }
981
982#ifdef DUMP_ECHO_CANCEL_DATA
983      dump_audio(in, far_end, out, st->frame_size);
984#endif
985
986      /* Compute error signal (filter update version) */
987      for (i=0;i<st->frame_size;i++)
988      {
989         st->e[chan*N+i+st->frame_size] = st->e[chan*N+i];
990         st->e[chan*N+i] = 0;
991      }
992
993      /* Compute a bunch of correlations */
994      /* FIXME: bad merge */
995      Sey += mdf_inner_prod(st->e+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size);
996      Syy += mdf_inner_prod(st->y+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size);
997      Sdd += mdf_inner_prod(st->input+chan*st->frame_size, st->input+chan*st->frame_size, st->frame_size);
998
999      /* Convert error to frequency domain */
1000      spx_fft(st->fft_table, st->e+chan*N, st->E+chan*N);
1001      for (i=0;i<st->frame_size;i++)
1002         st->y[i+chan*N] = 0;
1003      spx_fft(st->fft_table, st->y+chan*N, st->Y+chan*N);
1004
1005      /* Compute power spectrum of echo (X), error (E) and filter response (Y) */
1006      power_spectrum_accum(st->E+chan*N, st->Rf, N);
1007      power_spectrum_accum(st->Y+chan*N, st->Yf, N);
1008
1009   }
1010
1011   /*printf ("%f %f %f %f\n", Sff, See, Syy, Sdd, st->update_cond);*/
1012
1013   /* Do some sanity check */
1014   if (!(Syy>=0 && Sxx>=0 && See >= 0)
1015#ifndef FIXED_POINT
1016       || !(Sff < N*1e9 && Syy < N*1e9 && Sxx < N*1e9)
1017#endif
1018      )
1019   {
1020      /* Things have gone really bad */
1021      st->screwed_up += 50;
1022      for (i=0;i<st->frame_size*C;i++)
1023         out[i] = 0;
1024   } else if (SHR32(Sff, 2) > ADD32(Sdd, SHR32(MULT16_16(N, 10000),6)))
1025   {
1026      /* AEC seems to add lots of echo instead of removing it, let's see if it will improve */
1027      st->screwed_up++;
1028   } else {
1029      /* Everything's fine */
1030      st->screwed_up=0;
1031   }
1032   if (st->screwed_up>=50)
1033   {
1034      speex_warning("The echo canceller started acting funny and got slapped (reset). It swears it will behave now.");
1035      speex_echo_state_reset(st);
1036      return;
1037   }
1038
1039   /* Add a small noise floor to make sure not to have problems when dividing */
1040   See = MAX32(See, SHR32(MULT16_16(N, 100),6));
1041
1042   for (speak = 0; speak < K; speak++)
1043   {
1044      Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size);
1045      power_spectrum_accum(st->X+speak*N, st->Xf, N);
1046   }
1047
1048
1049   /* Smooth far end energy estimate over time */
1050   for (j=0;j<=st->frame_size;j++)
1051      st->power[j] = MULT16_32_Q15(ss_1,st->power[j]) + 1 + MULT16_32_Q15(ss,st->Xf[j]);
1052
1053   /* Compute filtered spectra and (cross-)correlations */
1054   for (j=st->frame_size;j>=0;j--)
1055   {
1056      spx_float_t Eh, Yh;
1057      Eh = PSEUDOFLOAT(st->Rf[j] - st->Eh[j]);
1058      Yh = PSEUDOFLOAT(st->Yf[j] - st->Yh[j]);
1059      Pey = FLOAT_ADD(Pey,FLOAT_MULT(Eh,Yh));
1060      Pyy = FLOAT_ADD(Pyy,FLOAT_MULT(Yh,Yh));
1061#ifdef FIXED_POINT
1062      st->Eh[j] = MAC16_32_Q15(MULT16_32_Q15(SUB16(32767,st->spec_average),st->Eh[j]), st->spec_average, st->Rf[j]);
1063      st->Yh[j] = MAC16_32_Q15(MULT16_32_Q15(SUB16(32767,st->spec_average),st->Yh[j]), st->spec_average, st->Yf[j]);
1064#else
1065      st->Eh[j] = (1-st->spec_average)*st->Eh[j] + st->spec_average*st->Rf[j];
1066      st->Yh[j] = (1-st->spec_average)*st->Yh[j] + st->spec_average*st->Yf[j];
1067#endif
1068   }
1069
1070   Pyy = FLOAT_SQRT(Pyy);
1071   Pey = FLOAT_DIVU(Pey,Pyy);
1072
1073   /* Compute correlation updatete rate */
1074   tmp32 = MULT16_32_Q15(st->beta0,Syy);
1075   if (tmp32 > MULT16_32_Q15(st->beta_max,See))
1076      tmp32 = MULT16_32_Q15(st->beta_max,See);
1077   alpha = FLOAT_DIV32(tmp32, See);
1078   alpha_1 = FLOAT_SUB(FLOAT_ONE, alpha);
1079   /* Update correlations (recursive average) */
1080   st->Pey = FLOAT_ADD(FLOAT_MULT(alpha_1,st->Pey) , FLOAT_MULT(alpha,Pey));
1081   st->Pyy = FLOAT_ADD(FLOAT_MULT(alpha_1,st->Pyy) , FLOAT_MULT(alpha,Pyy));
1082   if (FLOAT_LT(st->Pyy, FLOAT_ONE))
1083      st->Pyy = FLOAT_ONE;
1084   /* We don't really hope to get better than 33 dB (MIN_LEAK-3dB) attenuation anyway */
1085   if (FLOAT_LT(st->Pey, FLOAT_MULT(MIN_LEAK,st->Pyy)))
1086      st->Pey = FLOAT_MULT(MIN_LEAK,st->Pyy);
1087   if (FLOAT_GT(st->Pey, st->Pyy))
1088      st->Pey = st->Pyy;
1089   /* leak_estimate is the linear regression result */
1090   st->leak_estimate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIVU(st->Pey, st->Pyy),14));
1091   /* This looks like a stupid bug, but it's right (because we convert from Q14 to Q15) */
1092   if (st->leak_estimate > 16383)
1093      st->leak_estimate = 32767;
1094   else
1095      st->leak_estimate = SHL16(st->leak_estimate,1);
1096   /*printf ("%f\n", st->leak_estimate);*/
1097
1098   /* Compute Residual to Error Ratio */
1099#ifdef FIXED_POINT
1100   tmp32 = MULT16_32_Q15(st->leak_estimate,Syy);
1101   tmp32 = ADD32(SHR32(Sxx,13), ADD32(tmp32, SHL32(tmp32,1)));
1102   /* Check for y in e (lower bound on RER) */
1103   {
1104      spx_float_t bound = PSEUDOFLOAT(Sey);
1105      bound = FLOAT_DIVU(FLOAT_MULT(bound, bound), PSEUDOFLOAT(ADD32(1,Syy)));
1106      if (FLOAT_GT(bound, PSEUDOFLOAT(See)))
1107         tmp32 = See;
1108      else if (tmp32 < FLOAT_EXTRACT32(bound))
1109         tmp32 = FLOAT_EXTRACT32(bound);
1110   }
1111   if (tmp32 > SHR32(See,1))
1112      tmp32 = SHR32(See,1);
1113   RER = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32,See),15));
1114#else
1115   RER = (.0001*Sxx + 3.*MULT16_32_Q15(st->leak_estimate,Syy)) / See;
1116   /* Check for y in e (lower bound on RER) */
1117   if (RER < Sey*Sey/(1+See*Syy))
1118      RER = Sey*Sey/(1+See*Syy);
1119   if (RER > .5)
1120      RER = .5;
1121#endif
1122
1123   /* We consider that the filter has had minimal adaptation if the following is true*/
1124   if (!st->adapted && st->sum_adapt > SHL32(EXTEND32(M),15) && MULT16_32_Q15(st->leak_estimate,Syy) > MULT16_32_Q15(QCONST16(.03f,15),Syy))
1125   {
1126      st->adapted = 1;
1127   }
1128
1129   if (st->adapted)
1130   {
1131      /* Normal learning rate calculation once we're past the minimal adaptation phase */
1132      for (i=0;i<=st->frame_size;i++)
1133      {
1134         spx_word32_t r, e;
1135         /* Compute frequency-domain adaptation mask */
1136         r = MULT16_32_Q15(st->leak_estimate,SHL32(st->Yf[i],3));
1137         e = SHL32(st->Rf[i],3)+1;
1138#ifdef FIXED_POINT
1139         if (r>SHR32(e,1))
1140            r = SHR32(e,1);
1141#else
1142         if (r>.5*e)
1143            r = .5*e;
1144#endif
1145         r = MULT16_32_Q15(QCONST16(.7,15),r) + MULT16_32_Q15(QCONST16(.3,15),(spx_word32_t)(MULT16_32_Q15(RER,e)));
1146         /*st->power_1[i] = adapt_rate*r/(e*(1+st->power[i]));*/
1147         st->power_1[i] = FLOAT_SHL(FLOAT_DIV32_FLOAT(r,FLOAT_MUL32U(e,st->power[i]+10)),WEIGHT_SHIFT+16);
1148      }
1149   } else {
1150      /* Temporary adaption rate if filter is not yet adapted enough */
1151      spx_word16_t adapt_rate=0;
1152
1153      if (Sxx > SHR32(MULT16_16(N, 1000),6))
1154      {
1155         tmp32 = MULT16_32_Q15(QCONST16(.25f, 15), Sxx);
1156#ifdef FIXED_POINT
1157         if (tmp32 > SHR32(See,2))
1158            tmp32 = SHR32(See,2);
1159#else
1160         if (tmp32 > .25*See)
1161            tmp32 = .25*See;
1162#endif
1163         adapt_rate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32, See),15));
1164      }
1165      for (i=0;i<=st->frame_size;i++)
1166         st->power_1[i] = FLOAT_SHL(FLOAT_DIV32(EXTEND32(adapt_rate),ADD32(st->power[i],10)),WEIGHT_SHIFT+1);
1167
1168
1169      /* How much have we adapted so far? */
1170      st->sum_adapt = ADD32(st->sum_adapt,adapt_rate);
1171   }
1172
1173   /* FIXME: MC conversion required */
1174      for (i=0;i<st->frame_size;i++)
1175         st->last_y[i] = st->last_y[st->frame_size+i];
1176   if (st->adapted)
1177   {
1178      /* If the filter is adapted, take the filtered echo */
1179      for (i=0;i<st->frame_size;i++)
1180         st->last_y[st->frame_size+i] = in[i]-out[i];
1181   } else {
1182      /* If filter isn't adapted yet, all we can do is take the far end signal directly */
1183      /* moved earlier: for (i=0;i<N;i++)
1184      st->last_y[i] = st->x[i];*/
1185   }
1186
1187}
1188
1189/* Compute spectrum of estimated echo for use in an echo post-filter */
1190void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *residual_echo, int len)
1191{
1192   int i;
1193   spx_word16_t leak2;
1194   int N;
1195
1196   N = st->window_size;
1197
1198   /* Apply hanning window (should pre-compute it)*/
1199   for (i=0;i<N;i++)
1200      st->y[i] = MULT16_16_Q15(st->window[i],st->last_y[i]);
1201
1202   /* Compute power spectrum of the echo */
1203   spx_fft(st->fft_table, st->y, st->Y);
1204   power_spectrum(st->Y, residual_echo, N);
1205
1206#ifdef FIXED_POINT
1207   if (st->leak_estimate > 16383)
1208      leak2 = 32767;
1209   else
1210      leak2 = SHL16(st->leak_estimate, 1);
1211#else
1212   if (st->leak_estimate>.5)
1213      leak2 = 1;
1214   else
1215      leak2 = 2*st->leak_estimate;
1216#endif
1217   /* Estimate residual echo */
1218   for (i=0;i<=st->frame_size;i++)
1219      residual_echo[i] = (spx_int32_t)MULT16_32_Q15(leak2,residual_echo[i]);
1220
1221}
1222
1223EXPORT int speex_echo_ctl(SpeexEchoState *st, int request, void *ptr)
1224{
1225   switch(request)
1226   {
1227
1228      case SPEEX_ECHO_GET_FRAME_SIZE:
1229         (*(int*)ptr) = st->frame_size;
1230         break;
1231      case SPEEX_ECHO_SET_SAMPLING_RATE:
1232         st->sampling_rate = (*(int*)ptr);
1233         st->spec_average = DIV32_16(SHL32(EXTEND32(st->frame_size), 15), st->sampling_rate);
1234#ifdef FIXED_POINT
1235         st->beta0 = DIV32_16(SHL32(EXTEND32(st->frame_size), 16), st->sampling_rate);
1236         st->beta_max = DIV32_16(SHL32(EXTEND32(st->frame_size), 14), st->sampling_rate);
1237#else
1238         st->beta0 = (2.0f*st->frame_size)/st->sampling_rate;
1239         st->beta_max = (.5f*st->frame_size)/st->sampling_rate;
1240#endif
1241         if (st->sampling_rate<12000)
1242            st->notch_radius = QCONST16(.9, 15);
1243         else if (st->sampling_rate<24000)
1244            st->notch_radius = QCONST16(.982, 15);
1245         else
1246            st->notch_radius = QCONST16(.992, 15);
1247         break;
1248      case SPEEX_ECHO_GET_SAMPLING_RATE:
1249         (*(int*)ptr) = st->sampling_rate;
1250         break;
1251      case SPEEX_ECHO_GET_IMPULSE_RESPONSE_SIZE:
1252         /*FIXME: Implement this for multiple channels */
1253         *((spx_int32_t *)ptr) = st->M * st->frame_size;
1254         break;
1255      case SPEEX_ECHO_GET_IMPULSE_RESPONSE:
1256      {
1257         int M = st->M, N = st->window_size, n = st->frame_size, i, j;
1258         spx_int32_t *filt = (spx_int32_t *) ptr;
1259         for(j=0;j<M;j++)
1260         {
1261            /*FIXME: Implement this for multiple channels */
1262#ifdef FIXED_POINT
1263            for (i=0;i<N;i++)
1264               st->wtmp2[i] = EXTRACT16(PSHR32(st->W[j*N+i],16+NORMALIZE_SCALEDOWN));
1265            spx_ifft(st->fft_table, st->wtmp2, st->wtmp);
1266#else
1267            spx_ifft(st->fft_table, &st->W[j*N], st->wtmp);
1268#endif
1269            for(i=0;i<n;i++)
1270               filt[j*n+i] = PSHR32(MULT16_16(32767,st->wtmp[i]), WEIGHT_SHIFT-NORMALIZE_SCALEDOWN);
1271         }
1272      }
1273         break;
1274      default:
1275         speex_warning_int("Unknown speex_echo_ctl request: ", request);
1276         return -1;
1277   }
1278   return 0;
1279}
1280