1/* Copyright (C) 2007 Hong Zhiqian */
2/**
3   @file preprocess_tm.h
4   @author Hong Zhiqian
5   @brief Various compatibility routines for Speex (TriMedia version)
6*/
7/*
8   Redistribution and use in source and binary forms, with or without
9   modification, are permitted provided that the following conditions
10   are met:
11
12   - Redistributions of source code must retain the above copyright
13   notice, this list of conditions and the following disclaimer.
14
15   - Redistributions in binary form must reproduce the above copyright
16   notice, this list of conditions and the following disclaimer in the
17   documentation and/or other materials provided with the distribution.
18
19   - Neither the name of the Xiph.org Foundation nor the names of its
20   contributors may be used to endorse or promote products derived from
21   this software without specific prior written permission.
22
23   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
27   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
28   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
29   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34*/
35#include <ops/custom_defs.h>
36#include "profile_tm.h"
37
38#ifdef FIXED_POINT
39#define OVERRIDE_PREPROCESS_ANALYSIS
40static void preprocess_analysis(SpeexPreprocessState * restrict st, spx_int16_t * restrict x)
41{
42	register int i, j, framesize = st->frame_size;
43	register int N = st->ps_size;
44	register int N3 = 2*N - framesize;
45	register int N4 = framesize - N3;
46	register int * restrict ps = st->ps;
47	register int * restrict frame;
48	register int * restrict inbuf;
49	register int * restrict ptr;
50	register int max_val;
51
52	frame = (int*)(st->frame);
53	inbuf = (int*)(st->inbuf);
54	ptr = (int*)(st->frame+N3);
55
56	TMDEBUG_ALIGNMEM(x);
57   	TMDEBUG_ALIGNMEM(frame);
58	TMDEBUG_ALIGNMEM(inbuf);
59
60	PREPROCESSANAYLSIS_START();
61
62	N3 >>= 1;
63	framesize >>= 1;
64	max_val = 0;
65
66	for ( i=0,j=0 ; i<N3 ; i+=2,j+=8 )
67	{	register int r1, r2;
68
69		r1 = ld32x(inbuf,i);
70		r2 = ld32x(inbuf,i+1);
71
72		st32d(j,   frame, r1);
73		st32d(j+4, frame, r2);
74	}
75
76	for ( i=0,j=0 ; i<framesize ; i+=2,j+=8 )
77	{	register int r1, r2;
78
79		r1 = ld32x(x, i);
80		r2 = ld32x(x, i+1);
81
82		st32d(j,  ptr, r1);
83		st32d(j+4,ptr, r2);
84	}
85
86	for ( i=0,j=0,ptr=(int*)(x+N4) ; i<N3 ; i+=2,j+=8 )
87	{	register int r1, r2;
88
89		r1 = ld32x(ptr, i);
90		r2 = ld32x(ptr, i+1);
91
92		st32d(j,  inbuf, r1);
93		st32d(j+4,inbuf, r2);
94	}
95#if (TM_UNROLL && TM_UNROLL_PREPROCESSANALYSIS)
96#pragma TCS_unroll=2
97#pragma TCS_unrollexact=1
98#endif
99   for ( i=0,j=0,ptr=(int*)st->window ; i<N ; ++i,j+=4 )
100   {	register int f10, w10, r0, r1;
101
102		f10 = ld32x(frame, i);
103		w10 = ld32x(ptr,   i);
104
105		r0  = (sex16(f10) * sex16(w10)) >> 15;
106		r1	= (asri(16,f10) * asri(16,w10)) >> 15;
107
108		max_val = imax(iabs(sex16(r0)), max_val);
109		max_val = imax(iabs(sex16(r1)), max_val);
110
111		r0	= pack16lsb(r1, r0);
112		st32d(j, frame, r0);
113	}
114#if (TM_UNROLL && TM_UNROLL_PREPROCESSANALYSIS)
115#pragma TCS_unrollexact=0
116#pragma TCS_unroll=0
117#endif
118
119	max_val = 14 - spx_ilog2(max_val);
120	st->frame_shift = max_val;
121
122	if ( max_val != 0 )
123	{
124#if (TM_UNROLL && TM_UNROLL_PREPROCESSANALYSIS)
125#pragma TCS_unroll=4
126#pragma TCS_unrollexact=1
127#endif
128		for ( i=0,j=0 ; i<N ; ++i,j+=4 )
129		{	register int f10;
130
131			f10 = ld32x(frame, i);
132			f10 = dualasl(f10, max_val);
133			st32d(j, frame, f10);
134
135		}
136#if (TM_UNROLL && TM_UNROLL_PREPROCESSANALYSIS)
137#pragma TCS_unrollexact=0
138#pragma TCS_unroll=0
139#endif
140	}
141
142	spx_fft(st->fft_lookup, st->frame, st->ft);
143	power_spectrum(st->ft, ps, N << 1);
144
145#if (TM_UNROLL && TM_UNROLL_PREPROCESSANALYSIS)
146#pragma TCS_unroll=4
147#pragma TCS_unrollexact=1
148#endif
149	for ( i=0,ptr=(int*)st->ps,max_val<<=1,j=((1<<((max_val))>>1)) ;i<N ; ++i )
150	{
151		ps[i] = (ps[i] + j) >> max_val;
152	}
153#if (TM_UNROLL && TM_UNROLL_PREPROCESSANALYSIS)
154#pragma TCS_unrollexact=0
155#pragma TCS_unroll=0
156#endif
157
158	filterbank_compute_bank32(st->bank, ps, ps+N);
159
160	PREPROCESSANAYLSIS_STOP();
161}
162
163#define _MULT16_32_Q15(a,b,c) ADD32(MULT16_16((a),(b)), SHR(MULT16_16((a),(c)),15))
164
165#define OVERRIDE_UPDATE_NOISE_PROB
166static void update_noise_prob(SpeexPreprocessState * restrict st)
167{
168	register int i;
169	register int min_range, nb_adapt;
170	register int N = st->ps_size;
171	register int * restrict Smin = (int*)st->Smin;
172	register int * restrict Stmp = (int*)st->Stmp;
173	register int * restrict S = (int*)st->S;
174
175	UPDATENOISEPROB_START();
176
177	{
178		register int psi_lsb, psi_msb, ips_lsb, ips_msb, psii_lsb, psii_msb;
179		register int psiii_lsb, psiii_msb;
180		register int q8, q05, q2, q1;
181		register int *ps = (int*)st->ps;
182		register int si_lsb, si_msb, sii_lsb, sii_msb;
183
184		q8	= QCONST16(.8f,15);
185		q05 = QCONST16(.05f,15);
186		q2	= QCONST16(.2f,15);
187		q1	= QCONST16(.1f,15);
188
189		ips_lsb	 = ps[0];
190		psi_lsb	 = ps[1];
191		si_lsb	 = S[0];
192		ips_msb	 = ips_lsb >> 15;
193		psi_msb	 = psi_lsb >> 15;
194		si_msb	 = si_lsb >> 15;
195
196		ips_lsb	 &= 0x00007fff;
197		psi_lsb	 &= 0x00007fff;
198		si_lsb	 &= 0x00007fff;
199
200		S[0] = _MULT16_32_Q15(q8,si_msb,si_lsb) +  _MULT16_32_Q15(q2,ips_msb,ips_lsb);
201
202		for ( i=1 ; i<N-1 ; i+=2 )
203		{
204			psii_lsb = ps[i+1];
205			si_lsb	 = S[i];
206
207			psii_msb = psii_lsb >> 15;
208			si_msb	 = si_lsb >> 15;
209			si_lsb	 &= 0x00007fff;
210			psii_lsb &= 0x00007fff;
211
212			S[i]= _MULT16_32_Q15(q8,si_msb,si_lsb) +
213				  _MULT16_32_Q15(q05,ips_msb,ips_lsb) +
214				  _MULT16_32_Q15(q1,psi_msb,psi_lsb) +
215				  _MULT16_32_Q15(q05,psii_msb,psii_lsb);
216
217			psiii_lsb = ps[i+2];
218			sii_lsb	 = S[i+1];
219
220			sii_msb	 =  sii_lsb >> 15;
221			psiii_msb=  psiii_lsb >> 15;
222			sii_lsb	 &= 0x00007fff;
223			psiii_lsb&= 0x00007fff;
224
225			S[i+1]= _MULT16_32_Q15(q8,sii_msb,sii_lsb) +
226					_MULT16_32_Q15(q05,psi_msb,psi_lsb) +
227					_MULT16_32_Q15(q1,psii_msb,psii_lsb) +
228					_MULT16_32_Q15(q05,psiii_msb,psiii_lsb);
229
230			ips_lsb = psii_lsb;
231			ips_msb = psii_msb;
232			psi_lsb = psiii_lsb;
233			psi_msb = psiii_msb;
234		}
235
236		S[N-1] = MULT16_32_Q15(q8,S[N-1]) + MULT16_32_Q15(q2,ps[N-1]);
237	}
238
239	nb_adapt = st->nb_adapt;
240
241	if ( nb_adapt==1 )
242	{	for ( i=0 ; i<N ; ++i )
243			Smin[i] = Stmp[i] = 0;
244
245	}
246
247	min_range = mux(nb_adapt < 100,		15,
248				mux(nb_adapt < 1000,	50,
249				mux(nb_adapt < 10000,	150, 300)));
250
251	if ( st->min_count > min_range )
252	{
253		st->min_count = 0;
254
255#if (TM_UNROLL && TM_UNROLL_UPDATENOISEPROB)
256#pragma TCS_unroll=2
257#pragma TCS_unrollexact=1
258#endif
259		for ( i=0 ; i<N ; ++i )
260		{	register int si, stmpi;
261
262			si		= S[i];
263			stmpi	= Stmp[i];
264
265			Smin[i] = imin(stmpi,si);
266			Stmp[i] = si;
267		}
268#if (TM_UNROLL && TM_UNROLL_UPDATENOISEPROB)
269#pragma TCS_unrollexact=0
270#pragma TCS_unroll=0
271#endif
272	} else
273	{
274
275#if (TM_UNROLL && TM_UNROLL_UPDATENOISEPROB)
276#pragma TCS_unroll=2
277#pragma TCS_unrollexact=1
278#endif
279		for ( i=0 ; i<N ; ++i )
280		{	register int si, stmpi, smini;
281
282			si		= S[i];
283			stmpi	= Stmp[i];
284			smini	= Smin[i];
285
286			Smin[i] = imin(smini,si);
287			Stmp[i] = imin(stmpi,si);
288		}
289#if (TM_UNROLL && TM_UNROLL_UPDATENOISEPROB)
290#pragma TCS_unrollexact=0
291#pragma TCS_unroll=0
292#endif
293	}
294
295
296	{
297		register int q4;
298		register int * restrict update_prob = (int*)st->update_prob;
299
300		q4 = QCONST16(.4f,15);
301
302#if (TM_UNROLL && TM_UNROLL_UPDATENOISEPROB)
303#pragma TCS_unroll=4
304#pragma TCS_unrollexact=1
305#endif
306		for ( i=0 ; i<N ; ++i )
307		{	register int si;
308			register int smini;
309
310			si = S[i];
311			smini = Smin[i];
312			update_prob[i] = mux(MULT16_32_Q15(q4,si) > ADD32(smini,20), 1, 0);
313		}
314#if (TM_UNROLL && TM_UNROLL_UPDATENOISEPROB)
315#pragma TCS_unrollexact=0
316#pragma TCS_unroll=0
317#endif
318	}
319
320	UPDATENOISEPROB_STOP();
321}
322
323#else
324
325#define OVERRIDE_PREPROCESS_ANALYSIS
326static void preprocess_analysis(SpeexPreprocessState * restrict st, spx_int16_t * restrict x)
327{
328	register int i;
329	register int framesize = st->frame_size;
330	register int N = st->ps_size;
331	register int N3 = 2*N - framesize;
332	register int N4 = framesize - N3;
333	register float * restrict ps = st->ps;
334	register float * restrict frame = st->frame;
335	register float * restrict inbuf = st->inbuf;
336
337	PREPROCESSANAYLSIS_START();
338
339#if (TM_UNROLL && TM_UNROLL_PREPROCESSANALYSIS)
340#pragma TCS_unroll=4
341#pragma TCS_unrollexact=1
342#endif
343	for ( i=0 ; i<N3 ; ++i )
344	{
345		frame[i] = inbuf[i];
346	}
347#if (TM_UNROLL && TM_UNROLL_PREPROCESSANALYSIS)
348#pragma TCS_unrollexact=0
349#pragma TCS_unroll=0
350#endif
351
352#if (TM_UNROLL && TM_UNROLL_PREPROCESSANALYSIS)
353#pragma TCS_unroll=4
354#pragma TCS_unrollexact=1
355#endif
356   	for ( i=0 ; i<framesize ; ++i )
357	{	frame[N3+i] = x[i];
358	}
359#if (TM_UNROLL && TM_UNROLL_PREPROCESSANALYSIS)
360#pragma TCS_unrollexact=0
361#pragma TCS_unroll=0
362#endif
363
364#if (TM_UNROLL && TM_UNROLL_PREPROCESSANALYSIS)
365#pragma TCS_unroll=4
366#pragma TCS_unrollexact=1
367#endif
368	for ( i=0,x+=N4 ; i<N3 ; ++i )
369	{	inbuf[i]  = x[i];
370	}
371#if (TM_UNROLL && TM_UNROLL_PREPROCESSANALYSIS)
372#pragma TCS_unrollexact=0
373#pragma TCS_unroll=0
374#endif
375
376	inbuf = st->window;
377
378#if (TM_UNROLL && TM_UNROLL_PREPROCESSANALYSIS)
379#pragma TCS_unroll=4
380#pragma TCS_unrollexact=1
381#endif
382	for ( i=0 ; i<2*N ; ++i )
383	{
384		frame[i] = frame[i] * inbuf[i];
385	}
386#if (TM_UNROLL && TM_UNROLL_PREPROCESSANALYSIS)
387#pragma TCS_unrollexact=0
388#pragma TCS_unroll=0
389#endif
390
391	spx_fft(st->fft_lookup, frame, st->ft);
392	power_spectrum(st->ft, ps, N << 1);
393	filterbank_compute_bank32(st->bank, ps, ps+N);
394
395	PREPROCESSANAYLSIS_STOP();
396}
397
398
399#define OVERRIDE_UPDATE_NOISE_PROB
400static void update_noise_prob(SpeexPreprocessState * restrict st)
401{
402
403	register float * restrict S = st->S;
404   	register float * restrict ps = st->ps;
405	register int N = st->ps_size;
406    register int min_range;
407	register int i;
408	register int nb_adapt;
409	register float * restrict Smin = st->Smin;
410	register float * restrict Stmp = st->Stmp;
411
412	UPDATENOISEPROB_START();
413
414	{
415		register float ips, psi;
416
417		ips  = ps[0];
418		psi  = ps[1];
419
420		S[0] = .8f * S[0] + .2f * ips;
421
422		for ( i=1 ; i<N-1 ; i+=2 )
423		{
424			register float psii, psiii;
425
426			psii	= ps[i+1];
427			psiii	= ps[i+2];
428			S[i]	= .8f * S[i]	+ .05f * ips + .1f * psi  + .05f * psii;
429			S[i+1]	= .8f * S[i+1]	+ .05f * psi + .1f * psii + .05f * psiii;
430			ips		= psii;
431			psi		= psiii;
432		}
433
434		S[N-1] = .8f * S[N-1] + .2f * ps[N-1];
435	}
436
437	nb_adapt = st->nb_adapt;
438
439   	if ( nb_adapt==1 )
440	{
441		for (i=0;i<N;i++)
442			Smin[i] = st->Stmp[i] = 0;
443	}
444
445	min_range = mux(nb_adapt < 100,		15,
446				mux(nb_adapt < 1000,	50,
447				mux(nb_adapt < 10000,	150, 300)));
448
449
450	if ( st->min_count > min_range )
451	{
452		st->min_count = 0;
453#if (TM_UNROLL && TM_UNROLL_UPDATENOISEPROB)
454#pragma TCS_unroll=4
455#pragma TCS_unrollexact=1
456#endif
457		for ( i=0 ; i<N ; ++i )
458		{
459			register float stmpi, si;
460
461			stmpi	= Stmp[i];
462			si		= S[i];
463
464			Smin[i] = fmin(stmpi,si);
465			Stmp[i] = si;
466		}
467#if (TM_UNROLL && TM_UNROLL_UPDATENOISEPROB)
468#pragma TCS_unrollexact=0
469#pragma TCS_unroll=0
470#endif
471
472	} else
473	{
474		register float * restrict Smin = st->Smin;
475
476#if (TM_UNROLL && TM_UNROLL_UPDATENOISEPROB)
477#pragma TCS_unroll=4
478#pragma TCS_unrollexact=1
479#endif
480		for ( i=0 ; i<N ; ++i )
481		{
482			register float stmpi, si, smini;
483
484			stmpi	= Stmp[i];
485			si		= S[i];
486			smini	= Smin[i];
487
488			Smin[i] = fmin(smini,si);
489			Stmp[i] = fmin(stmpi,si);
490		}
491#if (TM_UNROLL && TM_UNROLL_UPDATENOISEPROB)
492#pragma TCS_unrollexact=0
493#pragma TCS_unroll=0
494#endif
495	}
496
497	{
498		register int * restrict update_prob = (int*)st->update_prob;
499
500#if (TM_UNROLL && TM_UNROLL_UPDATENOISEPROB)
501#pragma TCS_unroll=4
502#pragma TCS_unrollexact=1
503#endif
504		for (i=0;i<N;i++)
505		{	register float si;
506			register float smini;
507
508			si		= S[i];
509			smini	= Smin[i];
510			update_prob[i] = mux( (.4 * si) > (smini + 20.f), 1, 0);
511
512		}
513#if (TM_UNROLL && TM_UNROLL_UPDATENOISEPROB)
514#pragma TCS_unrollexact=0
515#pragma TCS_unroll=0
516#endif
517	}
518
519	UPDATENOISEPROB_STOP();
520}
521
522
523#define OVERRIDE_COMPUTE_GAIN_FLOOR
524static void compute_gain_floor(
525	int noise_suppress,
526	int effective_echo_suppress,
527	float * restrict noise,
528	float * restrict echo,
529	float * gain_floor,
530	int len
531)
532{
533	register int i;
534	register float echo_floor;
535	register float noise_floor;
536
537	COMPUTEGAINFLOOR_START();
538
539	noise_floor = exp(.2302585f*noise_suppress);
540	echo_floor = exp(.2302585f*effective_echo_suppress);
541
542#if (TM_UNROLL && TM_UNROLL_COMPUTEGAINFLOOR)
543#pragma TCS_unroll=4
544#pragma TCS_unrollexact=1
545#endif
546	for (i=0;i<len;i++)
547	{	register float noisei, echoi;
548
549		noisei = noise[i];
550		echoi  = echo[i];
551
552		gain_floor[i] = FRAC_SCALING * sqrt(noise_floor * noisei +	echo_floor * echoi) / sqrt(1+noisei+echoi);
553
554	}
555#if (TM_UNROLL && TM_UNROLL_COMPUTEGAINFLOOR)
556#pragma TCS_unrollexact=0
557#pragma TCS_unroll=0
558#endif
559
560	COMPUTEGAINFLOOR_STOP();
561}
562
563#endif
564
565static inline spx_word32_t hypergeom_gain(spx_word32_t xx);
566static inline spx_word16_t qcurve(spx_word16_t x);
567static void compute_gain_floor(int noise_suppress, int effective_echo_suppress, spx_word32_t *noise, spx_word32_t *echo, spx_word16_t *gain_floor, int len);
568void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *Yout, int len);
569
570#ifndef FIXED_POINT
571static void speex_compute_agc(SpeexPreprocessState *st, spx_word16_t Pframe, spx_word16_t *ft);
572#endif
573
574void preprocess_residue_echo(
575	SpeexPreprocessState * restrict st,
576	int N,
577	int NM
578)
579{
580	if (st->echo_state)
581	{
582		register spx_word32_t * restrict r_echo = st->residual_echo;
583		register spx_word32_t * restrict e_noise = st->echo_noise;
584		register int i;
585
586#ifndef FIXED_POINT
587		register spx_word32_t r;
588#endif
589
590		speex_echo_get_residual(st->echo_state, r_echo, N);
591
592#ifndef FIXED_POINT
593		r = r_echo[0];
594		if (!(r >=0 && r < N*1e9f) )
595		{
596			memset(r_echo, 0, N * sizeof(spx_word32_t));
597		}
598#endif
599
600#if (TM_UNROLL && TM_UNROLL_SPEEXPREPROCESSRUN)
601#pragma TCS_unroll=4
602#pragma TCS_unrollexact=1
603#endif
604		for (i=0;i<N;i++)
605		{	register spx_word32_t eni = e_noise[i];
606			e_noise[i] = MAX32(MULT16_32_Q15(QCONST16(.6f,15),eni), r_echo[i]);
607		}
608#if (TM_UNROLL && TM_UNROLL_SPEEXPREPROCESSRUN)
609#pragma TCS_unrollexact=0
610#pragma TCS_unroll=0
611#endif
612		filterbank_compute_bank32(st->bank, e_noise, e_noise+N);
613
614	} else
615	{	memset(st->echo_noise, 0, (NM) * sizeof(spx_word32_t));
616	}
617}
618
619void preprocess_update_noise(
620	SpeexPreprocessState * restrict st,
621	spx_word32_t * restrict ps,
622	int N
623)
624{
625	register spx_word16_t beta, beta_1;
626	register int * restrict up = st->update_prob;
627	register spx_word32_t * restrict noise = st->noise;
628	register int i;
629
630	beta = MAX16(QCONST16(.03,15),DIV32_16(Q15_ONE,st->nb_adapt));
631	beta_1 = Q15_ONE-beta;
632
633#if (TM_UNROLL && TM_UNROLL_SPEEXPREPROCESSRUN)
634#pragma TCS_unroll=4
635#pragma TCS_unrollexact=1
636#endif
637	for (i=0;i<N;i++)
638	{	register spx_word32_t ni = noise[i];
639		register spx_word32_t psi = ps[i];
640
641		if ( !up[i] || psi < PSHR32(ni, NOISE_SHIFT) )
642		{	noise[i] =	MAX32(EXTEND32(0),MULT16_32_Q15(beta_1,ni) +
643						MULT16_32_Q15(beta,SHL32(psi,NOISE_SHIFT)));
644		}
645	}
646#if (TM_UNROLL && TM_UNROLL_SPEEXPREPROCESSRUN)
647#pragma TCS_unrollexact=0
648#pragma TCS_unroll=0
649#endif
650	filterbank_compute_bank32(st->bank, noise, noise+N);
651}
652
653void preprocess_compute_SNR(
654	SpeexPreprocessState * restrict st,
655	spx_word32_t * restrict ps,
656	int NM
657)
658{
659	register spx_word32_t * restrict noise = st->noise;
660	register spx_word32_t * restrict echo = st->echo_noise;
661	register spx_word32_t * restrict reverb = st->reverb_estimate;
662	register spx_word16_t * restrict post = st->post;
663	register spx_word32_t * restrict old_ps = st->old_ps;
664	register spx_word16_t * restrict prior = st->prior;
665	register int i;
666
667#if (TM_UNROLL && TM_UNROLL_SPEEXPREPROCESSRUN)
668#pragma TCS_unroll=4
669#pragma TCS_unrollexact=1
670#endif
671	for ( i=0 ; i<NM ; i++)
672	{
673		register spx_word16_t gamma;
674		register spx_word32_t tot_noise;
675		register spx_word16_t posti;
676		register spx_word32_t opsi;
677		register spx_word16_t priori;
678
679		tot_noise = ADD32(ADD32(ADD32(EXTEND32(1), PSHR32(noise[i],NOISE_SHIFT)), echo[i]) , reverb[i]);
680
681		posti = SUB16(DIV32_16_Q8(ps[i],tot_noise), QCONST16(1.f,SNR_SHIFT));
682		posti = MIN16(posti, QCONST16(100.f,SNR_SHIFT));
683		post[i] = posti;
684
685		opsi = old_ps[i];
686
687		gamma = QCONST16(.1f,15)+MULT16_16_Q15(QCONST16(.89f,15),SQR16_Q15(DIV32_16_Q15(opsi,ADD32(opsi,tot_noise))));
688
689		priori = EXTRACT16(PSHR32(ADD32(MULT16_16(gamma,MAX16(0,posti)), MULT16_16(Q15_ONE-gamma,DIV32_16_Q8(opsi,tot_noise))), 15));
690		prior[i]=MIN16(priori, QCONST16(100.f,SNR_SHIFT));
691	}
692#if (TM_UNROLL && TM_UNROLL_SPEEXPREPROCESSRUN)
693#pragma TCS_unrollexact=0
694#pragma TCS_unroll=0
695#endif
696}
697
698spx_word32_t preprocess_smooth_SNR(
699	SpeexPreprocessState * restrict st,
700	int N,
701	int NM
702)
703{
704	register spx_word16_t * restrict zeta = st->zeta;
705	register spx_word16_t * restrict prior = st->prior;
706	register spx_word32_t Zframe;
707	register spx_word16_t iprior, priori;
708	register int _N = N-1;
709	register int i;
710
711	iprior = prior[0];
712	priori = prior[1];
713	zeta[0] = PSHR32(ADD32(MULT16_16(QCONST16(.7f,15),zeta[0]), MULT16_16(QCONST16(.3f,15),iprior)),15);
714
715#if (TM_UNROLL && TM_UNROLL_SPEEXPREPROCESSRUN)
716#pragma TCS_unroll=2
717#pragma TCS_unrollexact=1
718#endif
719	for ( i=1 ; i<_N ; i++)
720	{	register spx_word16_t zetai = zeta[i];
721		register spx_word16_t priorii = prior[i+1];
722
723		zeta[i] = PSHR32(ADD32(ADD32(ADD32(MULT16_16(QCONST16(.7f,15),zetai), MULT16_16(QCONST16(.15f,15),priori)),
724                  MULT16_16(QCONST16(.075f,15),iprior)), MULT16_16(QCONST16(.075f,15),priorii)),15);
725
726		iprior = priori;
727		priori = priorii;
728	}
729#if (TM_UNROLL && TM_UNROLL_SPEEXPREPROCESSRUN)
730#pragma TCS_unrollexact=0
731#pragma TCS_unroll=0
732#endif
733
734	for (i=_N; i<NM ; i++)
735	{	register spx_word16_t zetai = zeta[i];
736
737		priori = prior[i];
738		zeta[i] = PSHR32(ADD32(MULT16_16(QCONST16(.7f,15),zetai), MULT16_16(QCONST16(.3f,15),priori)),15);
739	}
740
741	Zframe = 0;
742
743#if (TM_UNROLL && TM_UNROLL_SPEEXPREPROCESSRUN)
744#pragma TCS_unroll=4
745#pragma TCS_unrollexact=1
746#endif
747	for ( i=N ; i<NM ; i++ )
748	{	Zframe = ADD32(Zframe, EXTEND32(zeta[i]));
749	}
750#if (TM_UNROLL && TM_UNROLL_SPEEXPREPROCESSRUN)
751#pragma TCS_unrollexact=0
752#pragma TCS_unroll=0
753#endif
754
755	return Zframe;
756}
757
758void preprocess_compute_emgain(
759	SpeexPreprocessState * restrict st,
760	spx_word32_t * restrict ps,
761	spx_word16_t Pframe,
762	int NM
763)
764{
765	register spx_word16_t * restrict zeta = st->zeta;
766	register spx_word16_t * restrict prior = st->prior;
767	register spx_word16_t * restrict gain = st->gain;
768	register spx_word32_t * restrict old_ps = st->old_ps;
769	register spx_word16_t * restrict post = st->post;
770	register spx_word16_t * restrict gain2 = st->gain2;
771	register int i;
772	register int N=st->ps_size;
773
774	for ( i=N ; i<NM ; ++i )
775	{
776		register spx_word32_t theta;
777		register spx_word32_t MM;
778		register spx_word16_t prior_ratio;
779		register spx_word16_t P1;
780		register spx_word16_t q;
781
782#ifdef FIXED_POINT
783		register spx_word16_t tmp;
784#endif
785		register spx_word16_t priori = prior[i];
786
787		prior_ratio = PDIV32_16(SHL32(EXTEND32(priori), 15), ADD16(priori, SHL32(1,SNR_SHIFT)));
788		theta = MULT16_32_P15(prior_ratio, QCONST32(1.f,EXPIN_SHIFT)+SHL32(EXTEND32(post[i]),EXPIN_SHIFT-SNR_SHIFT));
789
790		MM = hypergeom_gain(theta);
791		gain[i] = EXTRACT16(MIN32(Q15_ONE, MULT16_32_Q15(prior_ratio, MM)));
792		old_ps[i] = MULT16_32_P15(QCONST16(.2f,15),old_ps[i]) + MULT16_32_P15(MULT16_16_P15(QCONST16(.8f,15),SQR16_Q15(gain[i])),ps[i]);
793
794		P1 = QCONST16(.199f,15)+MULT16_16_Q15(QCONST16(.8f,15),qcurve (zeta[i]));
795		q = Q15_ONE-MULT16_16_Q15(Pframe,P1);
796
797#ifdef FIXED_POINT
798		theta = MIN32(theta, EXTEND32(32767));
799		tmp = MULT16_16_Q15((SHL32(1,SNR_SHIFT)+priori),EXTRACT16(MIN32(Q15ONE,SHR32(spx_exp(-EXTRACT16(theta)),1))));
800		tmp = MIN16(QCONST16(3.,SNR_SHIFT), tmp);
801		tmp = EXTRACT16(PSHR32(MULT16_16(PDIV32_16(SHL32(EXTEND32(q),8),(Q15_ONE-q)),tmp),8));
802		gain2[i]=DIV32_16(SHL32(EXTEND32(32767),SNR_SHIFT), ADD16(256,tmp));
803#else
804		gain2[i]=1/(1.f + (q/(1.f-q))*(1+priori)*exp(-theta));
805#endif
806	}
807
808	filterbank_compute_psd16(st->bank,gain2+N, gain2);
809	filterbank_compute_psd16(st->bank,gain+N, gain);
810}
811
812void preprocess_compute_linear_gain(
813	SpeexPreprocessState * restrict st,
814	spx_word32_t * restrict ps,
815	int N
816)
817{
818	register spx_word16_t * restrict gain_floor = st->gain_floor;
819	register spx_word16_t * restrict prior = st->prior;
820	register spx_word16_t * restrict gain = st->gain;
821	register spx_word32_t * restrict old_ps = st->old_ps;
822	register spx_word16_t * restrict post = st->post;
823	register spx_word16_t * restrict gain2 = st->gain2;
824	register int i;
825
826	filterbank_compute_psd16(st->bank,gain_floor+N,gain_floor);
827
828#if (TM_UNROLL && TM_UNROLL_SPEEXPREPROCESSRUN)
829#pragma TCS_unroll=4
830#pragma TCS_unrollexact=1
831#endif
832	for (i=0;i<N;i++)
833    {
834         register spx_word32_t MM;
835         register spx_word32_t theta;
836         register spx_word16_t prior_ratio;
837         register spx_word16_t tmp;
838         register spx_word16_t p;
839         register spx_word16_t g;
840		 register spx_word16_t gfi = gain_floor[i];
841
842		 prior_ratio = PDIV32_16(SHL32(EXTEND32(st->prior[i]), 15), ADD16(prior[i], SHL32(1,SNR_SHIFT)));
843         theta = MULT16_32_P15(prior_ratio, QCONST32(1.f,EXPIN_SHIFT)+SHL32(EXTEND32(post[i]),EXPIN_SHIFT-SNR_SHIFT));
844		 MM = hypergeom_gain(theta);
845
846		 g = EXTRACT16(MIN32(Q15_ONE, MULT16_32_Q15(prior_ratio, MM)));
847		 p = gain2[i];
848
849		 g = VMUX( MULT16_16_Q15(QCONST16(.333f,15),g) > gain[i], MULT16_16(3,gain[i]), g);
850
851         old_ps[i]= MULT16_32_P15(QCONST16(.2f,15),old_ps[i]) +
852					MULT16_32_P15(MULT16_16_P15(QCONST16(.8f,15),SQR16_Q15(g)),ps[i]);
853
854		 g = VMUX( g < gfi, gfi, g );
855         gain[i] = g;
856
857         tmp =	MULT16_16_P15(p,spx_sqrt(SHL32(EXTEND32(g),15))) +
858				MULT16_16_P15(SUB16(Q15_ONE,p),spx_sqrt(SHL32(EXTEND32(gfi),15)));
859
860         gain2[i]=SQR16_Q15(tmp);
861
862         /* Use this if you want a log-domain MMSE estimator instead */
863         /* gain2[i] = pow(g, p) * pow(gfi,1.f-p);*/
864    }
865#if (TM_UNROLL && TM_UNROLL_SPEEXPREPROCESSRUN)
866#pragma TCS_unrollexact=0
867#pragma TCS_unroll=0
868#endif
869}
870
871
872#if 0
873void preprocess_compute_bark_gain(
874	SpeexPreprocessState * restrict st,
875	int N,
876	int NM
877)
878{
879	register spx_word16_t * restrict gain_floor = st->gain_floor;
880	register spx_word16_t * restrict gain = st->gain;
881	register spx_word16_t * restrict gain2 = st->gain2;
882	register int i;
883
884    for (i=N;i<NM;i++)
885    {
886		register spx_word16_t tmp;
887		register spx_word16_t p = gain2[i];
888		register spx_word16_t gaini;
889		register spx_word16_t gfi = gain_floor[i];
890
891		gaini = MAX16(gain[i], gfi);
892
893		gain[i] = gaini;
894
895		tmp =	MULT16_16_P15(p,spx_sqrt(SHL32(EXTEND32(gaini),15))) +
896			MULT16_16_P15(SUB16(Q15_ONE,p),spx_sqrt(SHL32(EXTEND32(gfi),15)));
897
898		gain2[i]=SQR16_Q15(tmp);
899    }
900
901    filterbank_compute_psd16(st->bank,gain2+N, gain2);
902}
903#endif
904
905void preprocess_apply_gain(
906	SpeexPreprocessState * restrict st,
907	int N
908)
909{
910	register spx_word16_t * restrict ft = st->ft;
911	register spx_word16_t * restrict gain2 = st->gain2;
912	register int j, i;
913
914	ft[0] = MULT16_16_P15(gain2[0],ft[0]);
915
916	for (i=1,j=1; i<N ; i++,j+=2)
917	{
918		register spx_word16_t gain2i = gain2[i];
919		register spx_word16_t ftj = ft[j];
920		register spx_word16_t ftjj = ft[j+1];
921
922		ft[j] = MULT16_16_P15(gain2i,ftj);
923		ft[j+1] = MULT16_16_P15(gain2i,ftjj);
924	}
925
926	ft[(N<<1)-1] = MULT16_16_P15(gain2[N-1],ft[(N<<1)-1]);
927}
928
929#ifdef FIXED_POINT
930void preprocess_scale(
931	SpeexPreprocessState * restrict st,
932	int N
933)
934{
935	register spx_word16_t * restrict frame = st->frame;
936	register int shift = st->frame_shift;
937	register int i;
938	register int N2 = N << 1;
939
940#if (TM_UNROLL && TM_UNROLL_SPEEXPREPROCESSRUN)
941#pragma TCS_unroll=4
942#pragma TCS_unrollexact=1
943#endif
944	for ( i=0 ; i<N2 ;i++)
945	{	register spx_word16_t framei = frame[i];
946
947		frame[i] = PSHR16(framei,shift);
948	}
949#if (TM_UNROLL && TM_UNROLL_SPEEXPREPROCESSRUN)
950#pragma TCS_unrollexact=0
951#pragma TCS_unroll=0
952#endif
953}
954
955#else
956
957void preprocess_apply_agc(
958	SpeexPreprocessState * restrict st,
959	int N
960)
961{
962	register spx_word16_t max_sample=0;
963	register spx_word16_t * restrict frame = st->frame;
964	register int i;
965	register int N2 = N << 1;
966
967#if (TM_UNROLL && TM_UNROLL_SPEEXPREPROCESSRUN)
968#pragma TCS_unroll=4
969#pragma TCS_unrollexact=1
970#endif
971	for (i=0;i<N2;i++)
972	{	register spx_word16_t framei = VABS(frame[i]);
973
974		max_sample = VMUX( framei > max_sample, framei, max_sample);
975	}
976#if (TM_UNROLL && TM_UNROLL_SPEEXPREPROCESSRUN)
977#pragma TCS_unrollexact=0
978#pragma TCS_unroll=0
979#endif
980
981	if ( max_sample > 28000.f )
982	{
983		float damp = 28000.f/max_sample;
984
985#if (TM_UNROLL && TM_UNROLL_SPEEXPREPROCESSRUN)
986#pragma TCS_unroll=4
987#pragma TCS_unrollexact=1
988#endif
989		for ( i=0 ; i< N2 ; i++ )
990		{	frame[i] *= damp;
991		}
992#if (TM_UNROLL && TM_UNROLL_SPEEXPREPROCESSRUN)
993#pragma TCS_unrollexact=0
994#pragma TCS_unroll=0
995#endif
996	}
997}
998#endif
999
1000
1001void preprocess_update(
1002	SpeexPreprocessState * restrict st,
1003	spx_int16_t * restrict x,
1004	int N
1005)
1006{
1007	register spx_word16_t * restrict frame = st->frame;
1008	register spx_word16_t * restrict window = st->window;
1009	register spx_word16_t * restrict outbuf = st->outbuf;
1010	register int framesize = st->frame_size;
1011	register int N2 = N << 1;
1012	register int N3 = N2 - framesize;
1013	register int N4 = (framesize) - N3;
1014	register int i;
1015
1016#if (TM_UNROLL && TM_UNROLL_SPEEXPREPROCESSRUN)
1017#pragma TCS_unroll=4
1018#pragma TCS_unrollexact=1
1019#endif
1020	for ( i=0 ; i<N2 ; i++)
1021	{	register spx_word16_t fi = frame[i];
1022		register spx_word16_t wi = window[i];
1023
1024		frame[i] = MULT16_16_Q15(fi, wi);
1025	}
1026	for (i=0;i<N3;i++)
1027	{	x[i] = outbuf[i] + frame[i];
1028	}
1029#if (TM_UNROLL && TM_UNROLL_SPEEXPREPROCESSRUN)
1030#pragma TCS_unrollexact=0
1031#pragma TCS_unroll=0
1032#endif
1033
1034	for ( i=0;i<N4;i++)
1035	{	x[N3+i] = frame[N3+i];
1036	}
1037
1038	memcpy(outbuf, frame+framesize, (N3) * sizeof(spx_word16_t));
1039}
1040
1041#define OVERRIDE_SPEEX_PREPROCESS_RUN
1042int speex_preprocess_run(SpeexPreprocessState * restrict st, spx_int16_t * restrict x)
1043{
1044	register int i, N, M, NM;
1045	register spx_word32_t * restrict ps=st->ps;
1046	register spx_word32_t Zframe;
1047	register spx_word16_t Pframe;
1048
1049	st->nb_adapt++;
1050	st->min_count++;
1051	N = st->ps_size;
1052	M = st->nbands;
1053	NM = N + M;
1054
1055	preprocess_residue_echo(st, N, NM);
1056	preprocess_analysis(st, x);
1057	update_noise_prob(st);
1058	preprocess_update_noise(st, ps, N);
1059
1060	if ( st->nb_adapt == 1 )
1061	{	memcpy(st->old_ps, ps, (NM) * sizeof(spx_word32_t));
1062	}
1063
1064	preprocess_compute_SNR(st, ps, NM);
1065	Zframe = preprocess_smooth_SNR(st, N, NM);
1066
1067
1068	{
1069	register spx_word16_t effective_echo_suppress;
1070
1071	Pframe = QCONST16(.1f,15)+MULT16_16_Q15(QCONST16(.899f,15),qcurve(DIV32_16(Zframe,M)));
1072	effective_echo_suppress =	EXTRACT16(PSHR32(ADD32(MULT16_16(SUB16(Q15_ONE,Pframe), st->echo_suppress),
1073								MULT16_16(Pframe, st->echo_suppress_active)),15));
1074	compute_gain_floor(st->noise_suppress, effective_echo_suppress, st->noise+N, st->echo_noise+N, st->gain_floor+N, M);
1075
1076	}
1077
1078	preprocess_compute_emgain(st, ps, Pframe, NM);
1079	preprocess_compute_linear_gain(st, ps, N);
1080
1081
1082	if (!st->denoise_enabled)
1083	{
1084	   register spx_word16_t * restrict gain2 = st->gain2;
1085
1086#if (TM_UNROLL && TM_UNROLL_SPEEXPREPROCESSRUN)
1087#pragma TCS_unroll=4
1088#pragma TCS_unrollexact=1
1089#endif
1090		for ( i=0 ; i<NM ; i++ )
1091		{   gain2[i] = Q15_ONE;
1092		}
1093#if (TM_UNROLL && TM_UNROLL_SPEEXPREPROCESSRUN)
1094#pragma TCS_unrollexact=0
1095#pragma TCS_unroll=0
1096#endif
1097	}
1098
1099	preprocess_apply_gain(st, N);
1100
1101#ifndef FIXED_POINT
1102	if (st->agc_enabled)
1103	{	speex_compute_agc(st, Pframe, st->ft);
1104	}
1105#endif
1106
1107
1108   spx_ifft(st->fft_lookup, st->ft, st->frame);
1109
1110#ifdef FIXED_POINT
1111	preprocess_scale(st, N);
1112#endif
1113
1114#ifndef FIXED_POINT
1115	if ( st->agc_enabled )
1116	{	preprocess_apply_agc(st, N);
1117	}
1118#endif
1119
1120	preprocess_update(st, x, N);
1121
1122	if ( st->vad_enabled )
1123	{
1124		if (Pframe > st->speech_prob_start || (st->was_speech && Pframe > st->speech_prob_continue))
1125		{	st->was_speech=1;
1126			return 1;
1127
1128		} else
1129		{	st->was_speech=0;
1130			return 0;
1131		}
1132	} else
1133	{	return 1;
1134	}
1135}
1136