1/* Copyright (C) 2003 Epic Games (written by Jean-Marc Valin)
2   Copyright (C) 2004-2006 Epic Games
3
4   File: preprocess.c
5   Preprocessor with denoising based on the algorithm by Ephraim and Malah
6
7   Redistribution and use in source and binary forms, with or without
8   modification, are permitted provided that the following conditions are
9   met:
10
11   1. Redistributions of source code must retain the above copyright notice,
12   this list of conditions and the following disclaimer.
13
14   2. Redistributions in binary form must reproduce the above copyright
15   notice, this list of conditions and the following disclaimer in the
16   documentation and/or other materials provided with the distribution.
17
18   3. The name of the author may not be used to endorse or promote products
19   derived from this software without specific prior written permission.
20
21   THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22   IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23   OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24   DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
25   INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26   (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27   SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28   HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
29   STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30   ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31   POSSIBILITY OF SUCH DAMAGE.
32*/
33
34
35/*
36   Recommended papers:
37
38   Y. Ephraim and D. Malah, "Speech enhancement using minimum mean-square error
39   short-time spectral amplitude estimator". IEEE Transactions on Acoustics,
40   Speech and Signal Processing, vol. ASSP-32, no. 6, pp. 1109-1121, 1984.
41
42   Y. Ephraim and D. Malah, "Speech enhancement using minimum mean-square error
43   log-spectral amplitude estimator". IEEE Transactions on Acoustics, Speech and
44   Signal Processing, vol. ASSP-33, no. 2, pp. 443-445, 1985.
45
46   I. Cohen and B. Berdugo, "Speech enhancement for non-stationary noise environments".
47   Signal Processing, vol. 81, no. 2, pp. 2403-2418, 2001.
48
49   Stefan Gustafsson, Rainer Martin, Peter Jax, and Peter Vary. "A psychoacoustic
50   approach to combined acoustic echo cancellation and noise reduction". IEEE
51   Transactions on Speech and Audio Processing, 2002.
52
53   J.-M. Valin, J. Rouat, and F. Michaud, "Microphone array post-filter for separation
54   of simultaneous non-stationary sources". In Proceedings IEEE International
55   Conference on Acoustics, Speech, and Signal Processing, 2004.
56*/
57
58#ifdef HAVE_CONFIG_H
59#include "config.h"
60#endif
61
62#include <math.h>
63#include "speex/speex_preprocess.h"
64#include "speex/speex_echo.h"
65#include "arch.h"
66#include "fftwrap.h"
67#include "filterbank.h"
68#include "math_approx.h"
69#include "os_support.h"
70
71#define LOUDNESS_EXP 5.f
72#define AMP_SCALE .001f
73#define AMP_SCALE_1 1000.f
74
75#define NB_BANDS 24
76
77#define SPEECH_PROB_START_DEFAULT       QCONST16(0.35f,15)
78#define SPEECH_PROB_CONTINUE_DEFAULT    QCONST16(0.20f,15)
79#define NOISE_SUPPRESS_DEFAULT       -15
80#define ECHO_SUPPRESS_DEFAULT        -40
81#define ECHO_SUPPRESS_ACTIVE_DEFAULT -15
82
83#ifndef NULL
84#define NULL 0
85#endif
86
87#define SQR(x) ((x)*(x))
88#define SQR16(x) (MULT16_16((x),(x)))
89#define SQR16_Q15(x) (MULT16_16_Q15((x),(x)))
90
91#ifdef FIXED_POINT
92static inline spx_word16_t DIV32_16_Q8(spx_word32_t a, spx_word32_t b)
93{
94   if (SHR32(a,7) >= b)
95   {
96      return 32767;
97   } else {
98      if (b>=QCONST32(1,23))
99      {
100         a = SHR32(a,8);
101         b = SHR32(b,8);
102      }
103      if (b>=QCONST32(1,19))
104      {
105         a = SHR32(a,4);
106         b = SHR32(b,4);
107      }
108      if (b>=QCONST32(1,15))
109      {
110         a = SHR32(a,4);
111         b = SHR32(b,4);
112      }
113      a = SHL32(a,8);
114      return PDIV32_16(a,b);
115   }
116
117}
118static inline spx_word16_t DIV32_16_Q15(spx_word32_t a, spx_word32_t b)
119{
120   if (SHR32(a,15) >= b)
121   {
122      return 32767;
123   } else {
124      if (b>=QCONST32(1,23))
125      {
126         a = SHR32(a,8);
127         b = SHR32(b,8);
128      }
129      if (b>=QCONST32(1,19))
130      {
131         a = SHR32(a,4);
132         b = SHR32(b,4);
133      }
134      if (b>=QCONST32(1,15))
135      {
136         a = SHR32(a,4);
137         b = SHR32(b,4);
138      }
139      a = SHL32(a,15)-a;
140      return DIV32_16(a,b);
141   }
142}
143#define SNR_SCALING 256.f
144#define SNR_SCALING_1 0.0039062f
145#define SNR_SHIFT 8
146
147#define FRAC_SCALING 32767.f
148#define FRAC_SCALING_1 3.0518e-05
149#define FRAC_SHIFT 1
150
151#define EXPIN_SCALING 2048.f
152#define EXPIN_SCALING_1 0.00048828f
153#define EXPIN_SHIFT 11
154#define EXPOUT_SCALING_1 1.5259e-05
155
156#define NOISE_SHIFT 7
157
158#else
159
160#define DIV32_16_Q8(a,b) ((a)/(b))
161#define DIV32_16_Q15(a,b) ((a)/(b))
162#define SNR_SCALING 1.f
163#define SNR_SCALING_1 1.f
164#define SNR_SHIFT 0
165#define FRAC_SCALING 1.f
166#define FRAC_SCALING_1 1.f
167#define FRAC_SHIFT 0
168#define NOISE_SHIFT 0
169
170#define EXPIN_SCALING 1.f
171#define EXPIN_SCALING_1 1.f
172#define EXPOUT_SCALING_1 1.f
173
174#endif
175
176/** Speex pre-processor state. */
177struct SpeexPreprocessState_ {
178   /* Basic info */
179   int    frame_size;        /**< Number of samples processed each time */
180   int    ps_size;           /**< Number of points in the power spectrum */
181   int    sampling_rate;     /**< Sampling rate of the input/output */
182   int    nbands;
183   FilterBank *bank;
184
185   /* Parameters */
186   int    denoise_enabled;
187   int    vad_enabled;
188   int    dereverb_enabled;
189   spx_word16_t  reverb_decay;
190   spx_word16_t  reverb_level;
191   spx_word16_t speech_prob_start;
192   spx_word16_t speech_prob_continue;
193   int    noise_suppress;
194   int    echo_suppress;
195   int    echo_suppress_active;
196   SpeexEchoState *echo_state;
197
198   spx_word16_t	speech_prob;  /**< Probability last frame was speech */
199
200   /* DSP-related arrays */
201   spx_word16_t *frame;      /**< Processing frame (2*ps_size) */
202   spx_word16_t *ft;         /**< Processing frame in freq domain (2*ps_size) */
203   spx_word32_t *ps;         /**< Current power spectrum */
204   spx_word16_t *gain2;      /**< Adjusted gains */
205   spx_word16_t *gain_floor; /**< Minimum gain allowed */
206   spx_word16_t *window;     /**< Analysis/Synthesis window */
207   spx_word32_t *noise;      /**< Noise estimate */
208   spx_word32_t *reverb_estimate; /**< Estimate of reverb energy */
209   spx_word32_t *old_ps;     /**< Power spectrum for last frame */
210   spx_word16_t *gain;       /**< Ephraim Malah gain */
211   spx_word16_t *prior;      /**< A-priori SNR */
212   spx_word16_t *post;       /**< A-posteriori SNR */
213
214   spx_word32_t *S;          /**< Smoothed power spectrum */
215   spx_word32_t *Smin;       /**< See Cohen paper */
216   spx_word32_t *Stmp;       /**< See Cohen paper */
217   int *update_prob;         /**< Probability of speech presence for noise update */
218
219   spx_word16_t *zeta;       /**< Smoothed a priori SNR */
220   spx_word32_t *echo_noise;
221   spx_word32_t *residual_echo;
222
223   /* Misc */
224   spx_word16_t *inbuf;      /**< Input buffer (overlapped analysis) */
225   spx_word16_t *outbuf;     /**< Output buffer (for overlap and add) */
226
227   /* AGC stuff, only for floating point for now */
228#ifndef FIXED_POINT
229   int    agc_enabled;
230   float  agc_level;
231   float  loudness_accum;
232   float *loudness_weight;   /**< Perceptual loudness curve */
233   float  loudness;          /**< Loudness estimate */
234   float  agc_gain;          /**< Current AGC gain */
235   float  max_gain;          /**< Maximum gain allowed */
236   float  max_increase_step; /**< Maximum increase in gain from one frame to another */
237   float  max_decrease_step; /**< Maximum decrease in gain from one frame to another */
238   float  prev_loudness;     /**< Loudness of previous frame */
239   float  init_max;          /**< Current gain limit during initialisation */
240#endif
241   int    nb_adapt;          /**< Number of frames used for adaptation so far */
242   int    was_speech;
243   int    min_count;         /**< Number of frames processed so far */
244   void  *fft_lookup;        /**< Lookup table for the FFT */
245#ifdef FIXED_POINT
246   int    frame_shift;
247#endif
248};
249
250
251static void conj_window(spx_word16_t *w, int len)
252{
253   int i;
254   for (i=0;i<len;i++)
255   {
256      spx_word16_t tmp;
257#ifdef FIXED_POINT
258      spx_word16_t x = DIV32_16(MULT16_16(32767,i),len);
259#else
260      spx_word16_t x = DIV32_16(MULT16_16(QCONST16(4.f,13),i),len);
261#endif
262      int inv=0;
263      if (x<QCONST16(1.f,13))
264      {
265      } else if (x<QCONST16(2.f,13))
266      {
267         x=QCONST16(2.f,13)-x;
268         inv=1;
269      } else if (x<QCONST16(3.f,13))
270      {
271         x=x-QCONST16(2.f,13);
272         inv=1;
273      } else {
274         x=QCONST16(2.f,13)-x+QCONST16(2.f,13); /* 4 - x */
275      }
276      x = MULT16_16_Q14(QCONST16(1.271903f,14), x);
277      tmp = SQR16_Q15(QCONST16(.5f,15)-MULT16_16_P15(QCONST16(.5f,15),spx_cos_norm(SHL32(EXTEND32(x),2))));
278      if (inv)
279         tmp=SUB16(Q15_ONE,tmp);
280      w[i]=spx_sqrt(SHL32(EXTEND32(tmp),15));
281   }
282}
283
284
285#ifdef FIXED_POINT
286/* This function approximates the gain function
287   y = gamma(1.25)^2 * M(-.25;1;-x) / sqrt(x)
288   which multiplied by xi/(1+xi) is the optimal gain
289   in the loudness domain ( sqrt[amplitude] )
290   Input in Q11 format, output in Q15
291*/
292static inline spx_word32_t hypergeom_gain(spx_word32_t xx)
293{
294   int ind;
295   spx_word16_t frac;
296   /* Q13 table */
297   static const spx_word16_t table[21] = {
298       6730,  8357,  9868, 11267, 12563, 13770, 14898,
299      15959, 16961, 17911, 18816, 19682, 20512, 21311,
300      22082, 22827, 23549, 24250, 24931, 25594, 26241};
301      ind = SHR32(xx,10);
302      if (ind<0)
303         return Q15_ONE;
304      if (ind>19)
305         return ADD32(EXTEND32(Q15_ONE),EXTEND32(DIV32_16(QCONST32(.1296,23), SHR32(xx,EXPIN_SHIFT-SNR_SHIFT))));
306      frac = SHL32(xx-SHL32(ind,10),5);
307      return SHL32(DIV32_16(PSHR32(MULT16_16(Q15_ONE-frac,table[ind]) + MULT16_16(frac,table[ind+1]),7),(spx_sqrt(SHL32(xx,15)+6711))),7);
308}
309
310static inline spx_word16_t qcurve(spx_word16_t x)
311{
312   x = MAX16(x, 1);
313   return DIV32_16(SHL32(EXTEND32(32767),9),ADD16(512,MULT16_16_Q15(QCONST16(.60f,15),DIV32_16(32767,x))));
314}
315
316/* Compute the gain floor based on different floors for the background noise and residual echo */
317static void compute_gain_floor(int noise_suppress, int effective_echo_suppress, spx_word32_t *noise, spx_word32_t *echo, spx_word16_t *gain_floor, int len)
318{
319   int i;
320
321   if (noise_suppress > effective_echo_suppress)
322   {
323      spx_word16_t noise_gain, gain_ratio;
324      noise_gain = EXTRACT16(MIN32(Q15_ONE,SHR32(spx_exp(MULT16_16(QCONST16(0.11513,11),noise_suppress)),1)));
325      gain_ratio = EXTRACT16(MIN32(Q15_ONE,SHR32(spx_exp(MULT16_16(QCONST16(.2302585f,11),effective_echo_suppress-noise_suppress)),1)));
326
327      /* gain_floor = sqrt [ (noise*noise_floor + echo*echo_floor) / (noise+echo) ] */
328      for (i=0;i<len;i++)
329         gain_floor[i] = MULT16_16_Q15(noise_gain,
330                                       spx_sqrt(SHL32(EXTEND32(DIV32_16_Q15(PSHR32(noise[i],NOISE_SHIFT) + MULT16_32_Q15(gain_ratio,echo[i]),
331                                             (1+PSHR32(noise[i],NOISE_SHIFT) + echo[i]) )),15)));
332   } else {
333      spx_word16_t echo_gain, gain_ratio;
334      echo_gain = EXTRACT16(MIN32(Q15_ONE,SHR32(spx_exp(MULT16_16(QCONST16(0.11513,11),effective_echo_suppress)),1)));
335      gain_ratio = EXTRACT16(MIN32(Q15_ONE,SHR32(spx_exp(MULT16_16(QCONST16(.2302585f,11),noise_suppress-effective_echo_suppress)),1)));
336
337      /* gain_floor = sqrt [ (noise*noise_floor + echo*echo_floor) / (noise+echo) ] */
338      for (i=0;i<len;i++)
339         gain_floor[i] = MULT16_16_Q15(echo_gain,
340                                       spx_sqrt(SHL32(EXTEND32(DIV32_16_Q15(MULT16_32_Q15(gain_ratio,PSHR32(noise[i],NOISE_SHIFT)) + echo[i],
341                                             (1+PSHR32(noise[i],NOISE_SHIFT) + echo[i]) )),15)));
342   }
343}
344
345#else
346/* This function approximates the gain function
347   y = gamma(1.25)^2 * M(-.25;1;-x) / sqrt(x)
348   which multiplied by xi/(1+xi) is the optimal gain
349   in the loudness domain ( sqrt[amplitude] )
350*/
351static inline spx_word32_t hypergeom_gain(spx_word32_t xx)
352{
353   int ind;
354   float integer, frac;
355   float x;
356   static const float table[21] = {
357      0.82157f, 1.02017f, 1.20461f, 1.37534f, 1.53363f, 1.68092f, 1.81865f,
358      1.94811f, 2.07038f, 2.18638f, 2.29688f, 2.40255f, 2.50391f, 2.60144f,
359      2.69551f, 2.78647f, 2.87458f, 2.96015f, 3.04333f, 3.12431f, 3.20326f};
360      x = EXPIN_SCALING_1*xx;
361      integer = floor(2*x);
362      ind = (int)integer;
363      if (ind<0)
364         return FRAC_SCALING;
365      if (ind>19)
366         return FRAC_SCALING*(1+.1296/x);
367      frac = 2*x-integer;
368      return FRAC_SCALING*((1-frac)*table[ind] + frac*table[ind+1])/sqrt(x+.0001f);
369}
370
371static inline spx_word16_t qcurve(spx_word16_t x)
372{
373   return 1.f/(1.f+.15f/(SNR_SCALING_1*x));
374}
375
376static void compute_gain_floor(int noise_suppress, int effective_echo_suppress, spx_word32_t *noise, spx_word32_t *echo, spx_word16_t *gain_floor, int len)
377{
378   int i;
379   float echo_floor;
380   float noise_floor;
381
382   noise_floor = exp(.2302585f*noise_suppress);
383   echo_floor = exp(.2302585f*effective_echo_suppress);
384
385   /* Compute the gain floor based on different floors for the background noise and residual echo */
386   for (i=0;i<len;i++)
387      gain_floor[i] = FRAC_SCALING*sqrt(noise_floor*PSHR32(noise[i],NOISE_SHIFT) + echo_floor*echo[i])/sqrt(1+PSHR32(noise[i],NOISE_SHIFT) + echo[i]);
388}
389
390#endif
391EXPORT SpeexPreprocessState *speex_preprocess_state_init(int frame_size, int sampling_rate)
392{
393   int i;
394   int N, N3, N4, M;
395
396   SpeexPreprocessState *st = (SpeexPreprocessState *)speex_alloc(sizeof(SpeexPreprocessState));
397   st->frame_size = frame_size;
398
399   /* Round ps_size down to the nearest power of two */
400#if 0
401   i=1;
402   st->ps_size = st->frame_size;
403   while(1)
404   {
405      if (st->ps_size & ~i)
406      {
407         st->ps_size &= ~i;
408         i<<=1;
409      } else {
410         break;
411      }
412   }
413
414
415   if (st->ps_size < 3*st->frame_size/4)
416      st->ps_size = st->ps_size * 3 / 2;
417#else
418   st->ps_size = st->frame_size;
419#endif
420
421   N = st->ps_size;
422   N3 = 2*N - st->frame_size;
423   N4 = st->frame_size - N3;
424
425   st->sampling_rate = sampling_rate;
426   st->denoise_enabled = 1;
427   st->vad_enabled = 0;
428   st->dereverb_enabled = 0;
429   st->reverb_decay = 0;
430   st->reverb_level = 0;
431   st->noise_suppress = NOISE_SUPPRESS_DEFAULT;
432   st->echo_suppress = ECHO_SUPPRESS_DEFAULT;
433   st->echo_suppress_active = ECHO_SUPPRESS_ACTIVE_DEFAULT;
434
435   st->speech_prob_start = SPEECH_PROB_START_DEFAULT;
436   st->speech_prob_continue = SPEECH_PROB_CONTINUE_DEFAULT;
437
438   st->echo_state = NULL;
439
440   st->nbands = NB_BANDS;
441   M = st->nbands;
442   st->bank = filterbank_new(M, sampling_rate, N, 1);
443
444   st->frame = (spx_word16_t*)speex_alloc(2*N*sizeof(spx_word16_t));
445   st->window = (spx_word16_t*)speex_alloc(2*N*sizeof(spx_word16_t));
446   st->ft = (spx_word16_t*)speex_alloc(2*N*sizeof(spx_word16_t));
447
448   st->ps = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));
449   st->noise = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));
450   st->echo_noise = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));
451   st->residual_echo = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));
452   st->reverb_estimate = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));
453   st->old_ps = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));
454   st->prior = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));
455   st->post = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));
456   st->gain = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));
457   st->gain2 = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));
458   st->gain_floor = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));
459   st->zeta = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));
460
461   st->S = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t));
462   st->Smin = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t));
463   st->Stmp = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t));
464   st->update_prob = (int*)speex_alloc(N*sizeof(int));
465
466   st->inbuf = (spx_word16_t*)speex_alloc(N3*sizeof(spx_word16_t));
467   st->outbuf = (spx_word16_t*)speex_alloc(N3*sizeof(spx_word16_t));
468
469   conj_window(st->window, 2*N3);
470   for (i=2*N3;i<2*st->ps_size;i++)
471      st->window[i]=Q15_ONE;
472
473   if (N4>0)
474   {
475      for (i=N3-1;i>=0;i--)
476      {
477         st->window[i+N3+N4]=st->window[i+N3];
478         st->window[i+N3]=1;
479      }
480   }
481   for (i=0;i<N+M;i++)
482   {
483      st->noise[i]=QCONST32(1.f,NOISE_SHIFT);
484      st->reverb_estimate[i]=0;
485      st->old_ps[i]=1;
486      st->gain[i]=Q15_ONE;
487      st->post[i]=SHL16(1, SNR_SHIFT);
488      st->prior[i]=SHL16(1, SNR_SHIFT);
489   }
490
491   for (i=0;i<N;i++)
492      st->update_prob[i] = 1;
493   for (i=0;i<N3;i++)
494   {
495      st->inbuf[i]=0;
496      st->outbuf[i]=0;
497   }
498#ifndef FIXED_POINT
499   st->agc_enabled = 0;
500   st->agc_level = 8000;
501   st->loudness_weight = (float*)speex_alloc(N*sizeof(float));
502   for (i=0;i<N;i++)
503   {
504      float ff=((float)i)*.5*sampling_rate/((float)N);
505      /*st->loudness_weight[i] = .5f*(1.f/(1.f+ff/8000.f))+1.f*exp(-.5f*(ff-3800.f)*(ff-3800.f)/9e5f);*/
506      st->loudness_weight[i] = .35f-.35f*ff/16000.f+.73f*exp(-.5f*(ff-3800)*(ff-3800)/9e5f);
507      if (st->loudness_weight[i]<.01f)
508         st->loudness_weight[i]=.01f;
509      st->loudness_weight[i] *= st->loudness_weight[i];
510   }
511   /*st->loudness = pow(AMP_SCALE*st->agc_level,LOUDNESS_EXP);*/
512   st->loudness = 1e-15;
513   st->agc_gain = 1;
514   st->max_gain = 30;
515   st->max_increase_step = exp(0.11513f * 12.*st->frame_size / st->sampling_rate);
516   st->max_decrease_step = exp(-0.11513f * 40.*st->frame_size / st->sampling_rate);
517   st->prev_loudness = 1;
518   st->init_max = 1;
519#endif
520   st->was_speech = 0;
521
522   st->fft_lookup = spx_fft_init(2*N);
523
524   st->nb_adapt=0;
525   st->min_count=0;
526   return st;
527}
528
529EXPORT void speex_preprocess_state_destroy(SpeexPreprocessState *st)
530{
531   speex_free(st->frame);
532   speex_free(st->ft);
533   speex_free(st->ps);
534   speex_free(st->gain2);
535   speex_free(st->gain_floor);
536   speex_free(st->window);
537   speex_free(st->noise);
538   speex_free(st->reverb_estimate);
539   speex_free(st->old_ps);
540   speex_free(st->gain);
541   speex_free(st->prior);
542   speex_free(st->post);
543#ifndef FIXED_POINT
544   speex_free(st->loudness_weight);
545#endif
546   speex_free(st->echo_noise);
547   speex_free(st->residual_echo);
548
549   speex_free(st->S);
550   speex_free(st->Smin);
551   speex_free(st->Stmp);
552   speex_free(st->update_prob);
553   speex_free(st->zeta);
554
555   speex_free(st->inbuf);
556   speex_free(st->outbuf);
557
558   spx_fft_destroy(st->fft_lookup);
559   filterbank_destroy(st->bank);
560   speex_free(st);
561}
562
563/* FIXME: The AGC doesn't work yet with fixed-point*/
564#ifndef FIXED_POINT
565static void speex_compute_agc(SpeexPreprocessState *st, spx_word16_t Pframe, spx_word16_t *ft)
566{
567   int i;
568   int N = st->ps_size;
569   float target_gain;
570   float loudness=1.f;
571   float rate;
572
573   for (i=2;i<N;i++)
574   {
575      loudness += 2.f*N*st->ps[i]* st->loudness_weight[i];
576   }
577   loudness=sqrt(loudness);
578      /*if (loudness < 2*pow(st->loudness, 1.0/LOUDNESS_EXP) &&
579   loudness*2 > pow(st->loudness, 1.0/LOUDNESS_EXP))*/
580   if (Pframe>.3f)
581   {
582      /*rate=2.0f*Pframe*Pframe/(1+st->nb_loudness_adapt);*/
583      rate = .03*Pframe*Pframe;
584      st->loudness = (1-rate)*st->loudness + (rate)*pow(AMP_SCALE*loudness, LOUDNESS_EXP);
585      st->loudness_accum = (1-rate)*st->loudness_accum + rate;
586      if (st->init_max < st->max_gain && st->nb_adapt > 20)
587         st->init_max *= 1.f + .1f*Pframe*Pframe;
588   }
589   /*printf ("%f %f %f %f\n", Pframe, loudness, pow(st->loudness, 1.0f/LOUDNESS_EXP), st->loudness2);*/
590
591   target_gain = AMP_SCALE*st->agc_level*pow(st->loudness/(1e-4+st->loudness_accum), -1.0f/LOUDNESS_EXP);
592
593   if ((Pframe>.5  && st->nb_adapt > 20) || target_gain < st->agc_gain)
594   {
595      if (target_gain > st->max_increase_step*st->agc_gain)
596         target_gain = st->max_increase_step*st->agc_gain;
597      if (target_gain < st->max_decrease_step*st->agc_gain && loudness < 10*st->prev_loudness)
598         target_gain = st->max_decrease_step*st->agc_gain;
599      if (target_gain > st->max_gain)
600         target_gain = st->max_gain;
601      if (target_gain > st->init_max)
602         target_gain = st->init_max;
603
604      st->agc_gain = target_gain;
605   }
606   /*fprintf (stderr, "%f %f %f\n", loudness, (float)AMP_SCALE_1*pow(st->loudness, 1.0f/LOUDNESS_EXP), st->agc_gain);*/
607
608   for (i=0;i<2*N;i++)
609      ft[i] *= st->agc_gain;
610   st->prev_loudness = loudness;
611}
612#endif
613
614static void preprocess_analysis(SpeexPreprocessState *st, spx_int16_t *x)
615{
616   int i;
617   int N = st->ps_size;
618   int N3 = 2*N - st->frame_size;
619   int N4 = st->frame_size - N3;
620   spx_word32_t *ps=st->ps;
621
622   /* 'Build' input frame */
623   for (i=0;i<N3;i++)
624      st->frame[i]=st->inbuf[i];
625   for (i=0;i<st->frame_size;i++)
626      st->frame[N3+i]=x[i];
627
628   /* Update inbuf */
629   for (i=0;i<N3;i++)
630      st->inbuf[i]=x[N4+i];
631
632   /* Windowing */
633   for (i=0;i<2*N;i++)
634      st->frame[i] = MULT16_16_Q15(st->frame[i], st->window[i]);
635
636#ifdef FIXED_POINT
637   {
638      spx_word16_t max_val=0;
639      for (i=0;i<2*N;i++)
640         max_val = MAX16(max_val, ABS16(st->frame[i]));
641      st->frame_shift = 14-spx_ilog2(EXTEND32(max_val));
642      for (i=0;i<2*N;i++)
643         st->frame[i] = SHL16(st->frame[i], st->frame_shift);
644   }
645#endif
646
647   /* Perform FFT */
648   spx_fft(st->fft_lookup, st->frame, st->ft);
649
650   /* Power spectrum */
651   ps[0]=MULT16_16(st->ft[0],st->ft[0]);
652   for (i=1;i<N;i++)
653      ps[i]=MULT16_16(st->ft[2*i-1],st->ft[2*i-1]) + MULT16_16(st->ft[2*i],st->ft[2*i]);
654   for (i=0;i<N;i++)
655      st->ps[i] = PSHR32(st->ps[i], 2*st->frame_shift);
656
657   filterbank_compute_bank32(st->bank, ps, ps+N);
658}
659
660static void update_noise_prob(SpeexPreprocessState *st)
661{
662   int i;
663   int min_range;
664   int N = st->ps_size;
665
666   for (i=1;i<N-1;i++)
667      st->S[i] =  MULT16_32_Q15(QCONST16(.8f,15),st->S[i]) + MULT16_32_Q15(QCONST16(.05f,15),st->ps[i-1])
668                      + MULT16_32_Q15(QCONST16(.1f,15),st->ps[i]) + MULT16_32_Q15(QCONST16(.05f,15),st->ps[i+1]);
669   st->S[0] =  MULT16_32_Q15(QCONST16(.8f,15),st->S[0]) + MULT16_32_Q15(QCONST16(.2f,15),st->ps[0]);
670   st->S[N-1] =  MULT16_32_Q15(QCONST16(.8f,15),st->S[N-1]) + MULT16_32_Q15(QCONST16(.2f,15),st->ps[N-1]);
671
672   if (st->nb_adapt==1)
673   {
674      for (i=0;i<N;i++)
675         st->Smin[i] = st->Stmp[i] = 0;
676   }
677
678   if (st->nb_adapt < 100)
679      min_range = 15;
680   else if (st->nb_adapt < 1000)
681      min_range = 50;
682   else if (st->nb_adapt < 10000)
683      min_range = 150;
684   else
685      min_range = 300;
686   if (st->min_count > min_range)
687   {
688      st->min_count = 0;
689      for (i=0;i<N;i++)
690      {
691         st->Smin[i] = MIN32(st->Stmp[i], st->S[i]);
692         st->Stmp[i] = st->S[i];
693      }
694   } else {
695      for (i=0;i<N;i++)
696      {
697         st->Smin[i] = MIN32(st->Smin[i], st->S[i]);
698         st->Stmp[i] = MIN32(st->Stmp[i], st->S[i]);
699      }
700   }
701   for (i=0;i<N;i++)
702   {
703      if (MULT16_32_Q15(QCONST16(.4f,15),st->S[i]) > st->Smin[i])
704         st->update_prob[i] = 1;
705      else
706         st->update_prob[i] = 0;
707      /*fprintf (stderr, "%f ", st->S[i]/st->Smin[i]);*/
708      /*fprintf (stderr, "%f ", st->update_prob[i]);*/
709   }
710
711}
712
713#define NOISE_OVERCOMPENS 1.
714
715void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *Yout, int len);
716
717EXPORT int speex_preprocess(SpeexPreprocessState *st, spx_int16_t *x, spx_int32_t *echo)
718{
719   return speex_preprocess_run(st, x);
720}
721
722EXPORT int speex_preprocess_run(SpeexPreprocessState *st, spx_int16_t *x)
723{
724   int i;
725   int M;
726   int N = st->ps_size;
727   int N3 = 2*N - st->frame_size;
728   int N4 = st->frame_size - N3;
729   spx_word32_t *ps=st->ps;
730   spx_word32_t Zframe;
731   spx_word16_t Pframe;
732   spx_word16_t beta, beta_1;
733   spx_word16_t effective_echo_suppress;
734
735   st->nb_adapt++;
736   if (st->nb_adapt>20000)
737      st->nb_adapt = 20000;
738   st->min_count++;
739
740   beta = MAX16(QCONST16(.03,15),DIV32_16(Q15_ONE,st->nb_adapt));
741   beta_1 = Q15_ONE-beta;
742   M = st->nbands;
743   /* Deal with residual echo if provided */
744   if (st->echo_state)
745   {
746      speex_echo_get_residual(st->echo_state, st->residual_echo, N);
747#ifndef FIXED_POINT
748      /* If there are NaNs or ridiculous values, it'll show up in the DC and we just reset everything to zero */
749      if (!(st->residual_echo[0] >=0 && st->residual_echo[0]<N*1e9f))
750      {
751         for (i=0;i<N;i++)
752            st->residual_echo[i] = 0;
753      }
754#endif
755      for (i=0;i<N;i++)
756         st->echo_noise[i] = MAX32(MULT16_32_Q15(QCONST16(.6f,15),st->echo_noise[i]), st->residual_echo[i]);
757      filterbank_compute_bank32(st->bank, st->echo_noise, st->echo_noise+N);
758   } else {
759      for (i=0;i<N+M;i++)
760         st->echo_noise[i] = 0;
761   }
762   preprocess_analysis(st, x);
763
764   update_noise_prob(st);
765
766   /* Noise estimation always updated for the 10 first frames */
767   /*if (st->nb_adapt<10)
768   {
769      for (i=1;i<N-1;i++)
770         st->update_prob[i] = 0;
771   }
772   */
773
774   /* Update the noise estimate for the frequencies where it can be */
775   for (i=0;i<N;i++)
776   {
777      if (!st->update_prob[i] || st->ps[i] < PSHR32(st->noise[i], NOISE_SHIFT))
778         st->noise[i] = MAX32(EXTEND32(0),MULT16_32_Q15(beta_1,st->noise[i]) + MULT16_32_Q15(beta,SHL32(st->ps[i],NOISE_SHIFT)));
779   }
780   filterbank_compute_bank32(st->bank, st->noise, st->noise+N);
781
782   /* Special case for first frame */
783   if (st->nb_adapt==1)
784      for (i=0;i<N+M;i++)
785         st->old_ps[i] = ps[i];
786
787   /* Compute a posteriori SNR */
788   for (i=0;i<N+M;i++)
789   {
790      spx_word16_t gamma;
791
792      /* Total noise estimate including residual echo and reverberation */
793      spx_word32_t tot_noise = ADD32(ADD32(ADD32(EXTEND32(1), PSHR32(st->noise[i],NOISE_SHIFT)) , st->echo_noise[i]) , st->reverb_estimate[i]);
794
795      /* A posteriori SNR = ps/noise - 1*/
796      st->post[i] = SUB16(DIV32_16_Q8(ps[i],tot_noise), QCONST16(1.f,SNR_SHIFT));
797      st->post[i]=MIN16(st->post[i], QCONST16(100.f,SNR_SHIFT));
798
799      /* Computing update gamma = .1 + .9*(old/(old+noise))^2 */
800      gamma = QCONST16(.1f,15)+MULT16_16_Q15(QCONST16(.89f,15),SQR16_Q15(DIV32_16_Q15(st->old_ps[i],ADD32(st->old_ps[i],tot_noise))));
801
802      /* A priori SNR update = gamma*max(0,post) + (1-gamma)*old/noise */
803      st->prior[i] = EXTRACT16(PSHR32(ADD32(MULT16_16(gamma,MAX16(0,st->post[i])), MULT16_16(Q15_ONE-gamma,DIV32_16_Q8(st->old_ps[i],tot_noise))), 15));
804      st->prior[i]=MIN16(st->prior[i], QCONST16(100.f,SNR_SHIFT));
805   }
806
807   /*print_vec(st->post, N+M, "");*/
808
809   /* Recursive average of the a priori SNR. A bit smoothed for the psd components */
810   st->zeta[0] = PSHR32(ADD32(MULT16_16(QCONST16(.7f,15),st->zeta[0]), MULT16_16(QCONST16(.3f,15),st->prior[0])),15);
811   for (i=1;i<N-1;i++)
812      st->zeta[i] = PSHR32(ADD32(ADD32(ADD32(MULT16_16(QCONST16(.7f,15),st->zeta[i]), MULT16_16(QCONST16(.15f,15),st->prior[i])),
813                           MULT16_16(QCONST16(.075f,15),st->prior[i-1])), MULT16_16(QCONST16(.075f,15),st->prior[i+1])),15);
814   for (i=N-1;i<N+M;i++)
815      st->zeta[i] = PSHR32(ADD32(MULT16_16(QCONST16(.7f,15),st->zeta[i]), MULT16_16(QCONST16(.3f,15),st->prior[i])),15);
816
817   /* Speech probability of presence for the entire frame is based on the average filterbank a priori SNR */
818   Zframe = 0;
819   for (i=N;i<N+M;i++)
820      Zframe = ADD32(Zframe, EXTEND32(st->zeta[i]));
821   Pframe = QCONST16(.1f,15)+MULT16_16_Q15(QCONST16(.899f,15),qcurve(DIV32_16(Zframe,st->nbands)));
822
823   effective_echo_suppress = EXTRACT16(PSHR32(ADD32(MULT16_16(SUB16(Q15_ONE,Pframe), st->echo_suppress), MULT16_16(Pframe, st->echo_suppress_active)),15));
824
825   compute_gain_floor(st->noise_suppress, effective_echo_suppress, st->noise+N, st->echo_noise+N, st->gain_floor+N, M);
826
827   /* Compute Ephraim & Malah gain speech probability of presence for each critical band (Bark scale)
828      Technically this is actually wrong because the EM gaim assumes a slightly different probability
829      distribution */
830   for (i=N;i<N+M;i++)
831   {
832      /* See EM and Cohen papers*/
833      spx_word32_t theta;
834      /* Gain from hypergeometric function */
835      spx_word32_t MM;
836      /* Weiner filter gain */
837      spx_word16_t prior_ratio;
838      /* a priority probability of speech presence based on Bark sub-band alone */
839      spx_word16_t P1;
840      /* Speech absence a priori probability (considering sub-band and frame) */
841      spx_word16_t q;
842#ifdef FIXED_POINT
843      spx_word16_t tmp;
844#endif
845
846      prior_ratio = PDIV32_16(SHL32(EXTEND32(st->prior[i]), 15), ADD16(st->prior[i], SHL32(1,SNR_SHIFT)));
847      theta = MULT16_32_P15(prior_ratio, QCONST32(1.f,EXPIN_SHIFT)+SHL32(EXTEND32(st->post[i]),EXPIN_SHIFT-SNR_SHIFT));
848
849      MM = hypergeom_gain(theta);
850      /* Gain with bound */
851      st->gain[i] = EXTRACT16(MIN32(Q15_ONE, MULT16_32_Q15(prior_ratio, MM)));
852      /* Save old Bark power spectrum */
853      st->old_ps[i] = MULT16_32_P15(QCONST16(.2f,15),st->old_ps[i]) + MULT16_32_P15(MULT16_16_P15(QCONST16(.8f,15),SQR16_Q15(st->gain[i])),ps[i]);
854
855      P1 = QCONST16(.199f,15)+MULT16_16_Q15(QCONST16(.8f,15),qcurve (st->zeta[i]));
856      q = Q15_ONE-MULT16_16_Q15(Pframe,P1);
857#ifdef FIXED_POINT
858      theta = MIN32(theta, EXTEND32(32767));
859/*Q8*/tmp = MULT16_16_Q15((SHL32(1,SNR_SHIFT)+st->prior[i]),EXTRACT16(MIN32(Q15ONE,SHR32(spx_exp(-EXTRACT16(theta)),1))));
860      tmp = MIN16(QCONST16(3.,SNR_SHIFT), tmp); /* Prevent overflows in the next line*/
861/*Q8*/tmp = EXTRACT16(PSHR32(MULT16_16(PDIV32_16(SHL32(EXTEND32(q),8),(Q15_ONE-q)),tmp),8));
862      st->gain2[i]=DIV32_16(SHL32(EXTEND32(32767),SNR_SHIFT), ADD16(256,tmp));
863#else
864      st->gain2[i]=1/(1.f + (q/(1.f-q))*(1+st->prior[i])*exp(-theta));
865#endif
866   }
867   /* Convert the EM gains and speech prob to linear frequency */
868   filterbank_compute_psd16(st->bank,st->gain2+N, st->gain2);
869   filterbank_compute_psd16(st->bank,st->gain+N, st->gain);
870
871   /* Use 1 for linear gain resolution (best) or 0 for Bark gain resolution (faster) */
872   if (1)
873   {
874      filterbank_compute_psd16(st->bank,st->gain_floor+N, st->gain_floor);
875
876      /* Compute gain according to the Ephraim-Malah algorithm -- linear frequency */
877      for (i=0;i<N;i++)
878      {
879         spx_word32_t MM;
880         spx_word32_t theta;
881         spx_word16_t prior_ratio;
882         spx_word16_t tmp;
883         spx_word16_t p;
884         spx_word16_t g;
885
886         /* Wiener filter gain */
887         prior_ratio = PDIV32_16(SHL32(EXTEND32(st->prior[i]), 15), ADD16(st->prior[i], SHL32(1,SNR_SHIFT)));
888         theta = MULT16_32_P15(prior_ratio, QCONST32(1.f,EXPIN_SHIFT)+SHL32(EXTEND32(st->post[i]),EXPIN_SHIFT-SNR_SHIFT));
889
890         /* Optimal estimator for loudness domain */
891         MM = hypergeom_gain(theta);
892         /* EM gain with bound */
893         g = EXTRACT16(MIN32(Q15_ONE, MULT16_32_Q15(prior_ratio, MM)));
894         /* Interpolated speech probability of presence */
895         p = st->gain2[i];
896
897         /* Constrain the gain to be close to the Bark scale gain */
898         if (MULT16_16_Q15(QCONST16(.333f,15),g) > st->gain[i])
899            g = MULT16_16(3,st->gain[i]);
900         st->gain[i] = g;
901
902         /* Save old power spectrum */
903         st->old_ps[i] = MULT16_32_P15(QCONST16(.2f,15),st->old_ps[i]) + MULT16_32_P15(MULT16_16_P15(QCONST16(.8f,15),SQR16_Q15(st->gain[i])),ps[i]);
904
905         /* Apply gain floor */
906         if (st->gain[i] < st->gain_floor[i])
907            st->gain[i] = st->gain_floor[i];
908
909         /* Exponential decay model for reverberation (unused) */
910         /*st->reverb_estimate[i] = st->reverb_decay*st->reverb_estimate[i] + st->reverb_decay*st->reverb_level*st->gain[i]*st->gain[i]*st->ps[i];*/
911
912         /* Take into account speech probability of presence (loudness domain MMSE estimator) */
913         /* gain2 = [p*sqrt(gain)+(1-p)*sqrt(gain _floor) ]^2 */
914         tmp = MULT16_16_P15(p,spx_sqrt(SHL32(EXTEND32(st->gain[i]),15))) + MULT16_16_P15(SUB16(Q15_ONE,p),spx_sqrt(SHL32(EXTEND32(st->gain_floor[i]),15)));
915         st->gain2[i]=SQR16_Q15(tmp);
916
917         /* Use this if you want a log-domain MMSE estimator instead */
918         /*st->gain2[i] = pow(st->gain[i], p) * pow(st->gain_floor[i],1.f-p);*/
919      }
920   } else {
921      for (i=N;i<N+M;i++)
922      {
923         spx_word16_t tmp;
924         spx_word16_t p = st->gain2[i];
925         st->gain[i] = MAX16(st->gain[i], st->gain_floor[i]);
926         tmp = MULT16_16_P15(p,spx_sqrt(SHL32(EXTEND32(st->gain[i]),15))) + MULT16_16_P15(SUB16(Q15_ONE,p),spx_sqrt(SHL32(EXTEND32(st->gain_floor[i]),15)));
927         st->gain2[i]=SQR16_Q15(tmp);
928      }
929      filterbank_compute_psd16(st->bank,st->gain2+N, st->gain2);
930   }
931
932   /* If noise suppression is off, don't apply the gain (but then why call this in the first place!) */
933   if (!st->denoise_enabled)
934   {
935      for (i=0;i<N+M;i++)
936         st->gain2[i]=Q15_ONE;
937   }
938
939   /* Apply computed gain */
940   for (i=1;i<N;i++)
941   {
942      st->ft[2*i-1] = MULT16_16_P15(st->gain2[i],st->ft[2*i-1]);
943      st->ft[2*i] = MULT16_16_P15(st->gain2[i],st->ft[2*i]);
944   }
945   st->ft[0] = MULT16_16_P15(st->gain2[0],st->ft[0]);
946   st->ft[2*N-1] = MULT16_16_P15(st->gain2[N-1],st->ft[2*N-1]);
947
948   /*FIXME: This *will* not work for fixed-point */
949#ifndef FIXED_POINT
950   if (st->agc_enabled)
951      speex_compute_agc(st, Pframe, st->ft);
952#endif
953
954   /* Inverse FFT with 1/N scaling */
955   spx_ifft(st->fft_lookup, st->ft, st->frame);
956   /* Scale back to original (lower) amplitude */
957   for (i=0;i<2*N;i++)
958      st->frame[i] = PSHR16(st->frame[i], st->frame_shift);
959
960   /*FIXME: This *will* not work for fixed-point */
961#ifndef FIXED_POINT
962   if (st->agc_enabled)
963   {
964      float max_sample=0;
965      for (i=0;i<2*N;i++)
966         if (fabs(st->frame[i])>max_sample)
967            max_sample = fabs(st->frame[i]);
968      if (max_sample>28000.f)
969      {
970         float damp = 28000.f/max_sample;
971         for (i=0;i<2*N;i++)
972            st->frame[i] *= damp;
973      }
974   }
975#endif
976
977   /* Synthesis window (for WOLA) */
978   for (i=0;i<2*N;i++)
979      st->frame[i] = MULT16_16_Q15(st->frame[i], st->window[i]);
980
981   /* Perform overlap and add */
982   for (i=0;i<N3;i++)
983      x[i] = WORD2INT(ADD32(EXTEND32(st->outbuf[i]), EXTEND32(st->frame[i])));
984   for (i=0;i<N4;i++)
985      x[N3+i] = st->frame[N3+i];
986
987   /* Update outbuf */
988   for (i=0;i<N3;i++)
989      st->outbuf[i] = st->frame[st->frame_size+i];
990
991   /* FIXME: This VAD is a kludge */
992   st->speech_prob = Pframe;
993   if (st->vad_enabled)
994   {
995      if (st->speech_prob > st->speech_prob_start || (st->was_speech && st->speech_prob > st->speech_prob_continue))
996      {
997         st->was_speech=1;
998         return 1;
999      } else
1000      {
1001         st->was_speech=0;
1002         return 0;
1003      }
1004   } else {
1005      return 1;
1006   }
1007}
1008
1009EXPORT void speex_preprocess_estimate_update(SpeexPreprocessState *st, spx_int16_t *x)
1010{
1011   int i;
1012   int N = st->ps_size;
1013   int N3 = 2*N - st->frame_size;
1014   int M;
1015   spx_word32_t *ps=st->ps;
1016
1017   M = st->nbands;
1018   st->min_count++;
1019
1020   preprocess_analysis(st, x);
1021
1022   update_noise_prob(st);
1023
1024   for (i=1;i<N-1;i++)
1025   {
1026      if (!st->update_prob[i] || st->ps[i] < PSHR32(st->noise[i],NOISE_SHIFT))
1027      {
1028         st->noise[i] = MULT16_32_Q15(QCONST16(.95f,15),st->noise[i]) + MULT16_32_Q15(QCONST16(.05f,15),SHL32(st->ps[i],NOISE_SHIFT));
1029      }
1030   }
1031
1032   for (i=0;i<N3;i++)
1033      st->outbuf[i] = MULT16_16_Q15(x[st->frame_size-N3+i],st->window[st->frame_size+i]);
1034
1035   /* Save old power spectrum */
1036   for (i=0;i<N+M;i++)
1037      st->old_ps[i] = ps[i];
1038
1039   for (i=0;i<N;i++)
1040      st->reverb_estimate[i] = MULT16_32_Q15(st->reverb_decay, st->reverb_estimate[i]);
1041}
1042
1043
1044EXPORT int speex_preprocess_ctl(SpeexPreprocessState *state, int request, void *ptr)
1045{
1046   int i;
1047   SpeexPreprocessState *st;
1048   st=(SpeexPreprocessState*)state;
1049   switch(request)
1050   {
1051   case SPEEX_PREPROCESS_SET_DENOISE:
1052      st->denoise_enabled = (*(spx_int32_t*)ptr);
1053      break;
1054   case SPEEX_PREPROCESS_GET_DENOISE:
1055      (*(spx_int32_t*)ptr) = st->denoise_enabled;
1056      break;
1057#ifndef FIXED_POINT
1058   case SPEEX_PREPROCESS_SET_AGC:
1059      st->agc_enabled = (*(spx_int32_t*)ptr);
1060      break;
1061   case SPEEX_PREPROCESS_GET_AGC:
1062      (*(spx_int32_t*)ptr) = st->agc_enabled;
1063      break;
1064#ifndef DISABLE_FLOAT_API
1065   case SPEEX_PREPROCESS_SET_AGC_LEVEL:
1066      st->agc_level = (*(float*)ptr);
1067      if (st->agc_level<1)
1068         st->agc_level=1;
1069      if (st->agc_level>32768)
1070         st->agc_level=32768;
1071      break;
1072   case SPEEX_PREPROCESS_GET_AGC_LEVEL:
1073      (*(float*)ptr) = st->agc_level;
1074      break;
1075#endif /* #ifndef DISABLE_FLOAT_API */
1076   case SPEEX_PREPROCESS_SET_AGC_INCREMENT:
1077      st->max_increase_step = exp(0.11513f * (*(spx_int32_t*)ptr)*st->frame_size / st->sampling_rate);
1078      break;
1079   case SPEEX_PREPROCESS_GET_AGC_INCREMENT:
1080      (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->max_increase_step)*st->sampling_rate/st->frame_size);
1081      break;
1082   case SPEEX_PREPROCESS_SET_AGC_DECREMENT:
1083      st->max_decrease_step = exp(0.11513f * (*(spx_int32_t*)ptr)*st->frame_size / st->sampling_rate);
1084      break;
1085   case SPEEX_PREPROCESS_GET_AGC_DECREMENT:
1086      (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->max_decrease_step)*st->sampling_rate/st->frame_size);
1087      break;
1088   case SPEEX_PREPROCESS_SET_AGC_MAX_GAIN:
1089      st->max_gain = exp(0.11513f * (*(spx_int32_t*)ptr));
1090      break;
1091   case SPEEX_PREPROCESS_GET_AGC_MAX_GAIN:
1092      (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->max_gain));
1093      break;
1094#endif
1095   case SPEEX_PREPROCESS_SET_VAD:
1096      speex_warning("The VAD has been replaced by a hack pending a complete rewrite");
1097      st->vad_enabled = (*(spx_int32_t*)ptr);
1098      break;
1099   case SPEEX_PREPROCESS_GET_VAD:
1100      (*(spx_int32_t*)ptr) = st->vad_enabled;
1101      break;
1102
1103   case SPEEX_PREPROCESS_SET_DEREVERB:
1104      st->dereverb_enabled = (*(spx_int32_t*)ptr);
1105      for (i=0;i<st->ps_size;i++)
1106         st->reverb_estimate[i]=0;
1107      break;
1108   case SPEEX_PREPROCESS_GET_DEREVERB:
1109      (*(spx_int32_t*)ptr) = st->dereverb_enabled;
1110      break;
1111
1112   case SPEEX_PREPROCESS_SET_DEREVERB_LEVEL:
1113      /* FIXME: Re-enable when de-reverberation is actually enabled again */
1114      /*st->reverb_level = (*(float*)ptr);*/
1115      break;
1116   case SPEEX_PREPROCESS_GET_DEREVERB_LEVEL:
1117      /* FIXME: Re-enable when de-reverberation is actually enabled again */
1118      /*(*(float*)ptr) = st->reverb_level;*/
1119      break;
1120
1121   case SPEEX_PREPROCESS_SET_DEREVERB_DECAY:
1122      /* FIXME: Re-enable when de-reverberation is actually enabled again */
1123      /*st->reverb_decay = (*(float*)ptr);*/
1124      break;
1125   case SPEEX_PREPROCESS_GET_DEREVERB_DECAY:
1126      /* FIXME: Re-enable when de-reverberation is actually enabled again */
1127      /*(*(float*)ptr) = st->reverb_decay;*/
1128      break;
1129
1130   case SPEEX_PREPROCESS_SET_PROB_START:
1131      *(spx_int32_t*)ptr = MIN32(100,MAX32(0, *(spx_int32_t*)ptr));
1132      st->speech_prob_start = DIV32_16(MULT16_16(Q15ONE,*(spx_int32_t*)ptr), 100);
1133      break;
1134   case SPEEX_PREPROCESS_GET_PROB_START:
1135      (*(spx_int32_t*)ptr) = MULT16_16_Q15(st->speech_prob_start, 100);
1136      break;
1137
1138   case SPEEX_PREPROCESS_SET_PROB_CONTINUE:
1139      *(spx_int32_t*)ptr = MIN32(100,MAX32(0, *(spx_int32_t*)ptr));
1140      st->speech_prob_continue = DIV32_16(MULT16_16(Q15ONE,*(spx_int32_t*)ptr), 100);
1141      break;
1142   case SPEEX_PREPROCESS_GET_PROB_CONTINUE:
1143      (*(spx_int32_t*)ptr) = MULT16_16_Q15(st->speech_prob_continue, 100);
1144      break;
1145
1146   case SPEEX_PREPROCESS_SET_NOISE_SUPPRESS:
1147      st->noise_suppress = -ABS(*(spx_int32_t*)ptr);
1148      break;
1149   case SPEEX_PREPROCESS_GET_NOISE_SUPPRESS:
1150      (*(spx_int32_t*)ptr) = st->noise_suppress;
1151      break;
1152   case SPEEX_PREPROCESS_SET_ECHO_SUPPRESS:
1153      st->echo_suppress = -ABS(*(spx_int32_t*)ptr);
1154      break;
1155   case SPEEX_PREPROCESS_GET_ECHO_SUPPRESS:
1156      (*(spx_int32_t*)ptr) = st->echo_suppress;
1157      break;
1158   case SPEEX_PREPROCESS_SET_ECHO_SUPPRESS_ACTIVE:
1159      st->echo_suppress_active = -ABS(*(spx_int32_t*)ptr);
1160      break;
1161   case SPEEX_PREPROCESS_GET_ECHO_SUPPRESS_ACTIVE:
1162      (*(spx_int32_t*)ptr) = st->echo_suppress_active;
1163      break;
1164   case SPEEX_PREPROCESS_SET_ECHO_STATE:
1165      st->echo_state = (SpeexEchoState*)ptr;
1166      break;
1167   case SPEEX_PREPROCESS_GET_ECHO_STATE:
1168      (*(SpeexEchoState**)ptr) = (SpeexEchoState*)st->echo_state;
1169      break;
1170#ifndef FIXED_POINT
1171   case SPEEX_PREPROCESS_GET_AGC_LOUDNESS:
1172      (*(spx_int32_t*)ptr) = pow(st->loudness, 1.0/LOUDNESS_EXP);
1173      break;
1174   case SPEEX_PREPROCESS_GET_AGC_GAIN:
1175      (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->agc_gain));
1176      break;
1177#endif
1178   case SPEEX_PREPROCESS_GET_PSD_SIZE:
1179   case SPEEX_PREPROCESS_GET_NOISE_PSD_SIZE:
1180      (*(spx_int32_t*)ptr) = st->ps_size;
1181      break;
1182   case SPEEX_PREPROCESS_GET_PSD:
1183      for(i=0;i<st->ps_size;i++)
1184      	((spx_int32_t *)ptr)[i] = (spx_int32_t) st->ps[i];
1185      break;
1186   case SPEEX_PREPROCESS_GET_NOISE_PSD:
1187      for(i=0;i<st->ps_size;i++)
1188      	((spx_int32_t *)ptr)[i] = (spx_int32_t) PSHR32(st->noise[i], NOISE_SHIFT);
1189      break;
1190   case SPEEX_PREPROCESS_GET_PROB:
1191      (*(spx_int32_t*)ptr) = MULT16_16_Q15(st->speech_prob, 100);
1192      break;
1193#ifndef FIXED_POINT
1194   case SPEEX_PREPROCESS_SET_AGC_TARGET:
1195      st->agc_level = (*(spx_int32_t*)ptr);
1196      if (st->agc_level<1)
1197         st->agc_level=1;
1198      if (st->agc_level>32768)
1199         st->agc_level=32768;
1200      break;
1201   case SPEEX_PREPROCESS_GET_AGC_TARGET:
1202      (*(spx_int32_t*)ptr) = st->agc_level;
1203      break;
1204#endif
1205   default:
1206      speex_warning_int("Unknown speex_preprocess_ctl request: ", request);
1207      return -1;
1208   }
1209   return 0;
1210}
1211
1212#ifdef FIXED_DEBUG
1213long long spx_mips=0;
1214#endif
1215
1216