xref: /third_party/pulseaudio/speex/tmv/mdf_tm.h (revision 53a5a1b3)
1/* Copyright (C) 2007 Hong Zhiqian */
2/**
3   @file mdf_tm.h
4   @author Hong Zhiqian
5   @brief Various compatibility routines for Speex (TriMedia version)
6*/
7/*
8   Redistribution and use in source and binary forms, with or without
9   modification, are permitted provided that the following conditions
10   are met:
11
12   - Redistributions of source code must retain the above copyright
13   notice, this list of conditions and the following disclaimer.
14
15   - Redistributions in binary form must reproduce the above copyright
16   notice, this list of conditions and the following disclaimer in the
17   documentation and/or other materials provided with the distribution.
18
19   - Neither the name of the Xiph.org Foundation nor the names of its
20   contributors may be used to endorse or promote products derived from
21   this software without specific prior written permission.
22
23   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
27   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
28   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
29   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34*/
35#include <ops/custom_defs.h>
36#include "profile_tm.h"
37
38// shifted power spectrum to fftwrap.c so that optimisation can be shared between mdf.c and preprocess.c
39#define OVERRIDE_POWER_SPECTRUM
40
41#ifdef FIXED_POINT
42
43#else
44
45#define OVERRIDE_FILTER_DC_NOTCH16
46void filter_dc_notch16(
47	const spx_int16_t * restrict in,
48	float radius,
49	float * restrict out,
50	int len,
51	float * restrict mem
52)
53{
54	register int i;
55	register float den2, r1;
56	register float mem0, mem1;
57
58	FILTERDCNOTCH16_START();
59
60	r1 = 1 - radius;
61	den2 = (radius * radius) + (0.7 * r1 * r1);
62	mem0 = mem[0];
63	mem1 = mem[1];
64
65#if (TM_UNROLL && TM_UNROLL_FILTERDCNOTCH16)
66#pragma TCS_unroll=4
67#pragma TCS_unrollexact=1
68#endif
69	for ( i=0 ; i<len ; ++i )
70	{
71		register float vin  = in[i];
72		register float vout = mem0 + vin;
73		register float rvout = radius * vout;
74
75		mem0 = mem1 + 2 * (-vin + rvout);
76		mem1 = vin - (den2 * vout);
77
78		out[i] = rvout;
79	}
80#if (TM_UNROLL && TM_UNROLL_FILTERDCNOTCH16)
81#pragma TCS_unrollexact=0
82#pragma TCS_unroll=0
83#endif
84
85	mem[0] = mem0;
86	mem[1] = mem1;
87
88	FILTERDCNOTCH16_STOP();
89}
90
91#define OVERRIDE_MDF_INNER_PROD
92float mdf_inner_prod(
93	const float * restrict x,
94	const float * restrict y,
95	int len
96)
97{
98	register float sum = 0;
99
100	MDFINNERPROD_START();
101
102	len >>= 1;
103
104#if (TM_UNROLL && TM_UNROLL_MDFINNERPRODUCT)
105#pragma TCS_unroll=4
106#pragma TCS_unrollexact=1
107#endif
108	while(len--)
109	{
110		register float acc0, acc1;
111
112		acc0 = (*x++) * (*y++);
113		acc1 = (*x++) * (*y++);
114
115		sum	 += acc0 + acc1;
116	}
117#if (TM_UNROLL && TM_UNROLL_MDFINNERPRODUCT)
118#pragma TCS_unrollexact=0
119#pragma TCS_unroll=0
120#endif
121
122	MDFINNERPROD_STOP();
123
124	return sum;
125}
126
127#define OVERRIDE_SPECTRAL_MUL_ACCUM
128void spectral_mul_accum(
129	const float * restrict X,
130	const float * restrict Y,
131	float * restrict acc,
132	int N, int M
133)
134{
135	register int i, j;
136	register float Xi, Yi, Xii, Yii;
137	register int _N;
138
139	SPECTRALMULACCUM_START();
140
141	acc[0] = X[0] * Y[0];
142	_N = N-1;
143
144	for ( i=1 ; i<_N ; i+=2 )
145	{
146		Xi = X[i];
147		Yi = Y[i];
148		Xii = X[i+1];
149		Yii = Y[i+1];
150
151		acc[i]	= (Xi  * Yi - Xii * Yii);
152		acc[i+1]= (Xii * Yi + Xi  * Yii);
153	}
154
155	acc[_N] = X[_N] * Y[_N];
156
157	for ( j=1,X+=N,Y+=N ; j<M ; j++ )
158	{
159		acc[0] += X[0] * Y[0];
160
161		for ( i=1 ; i<N-1 ; i+=2 )
162		{
163			Xi = X[i];
164			Yi = Y[i];
165			Xii = X[i+1];
166			Yii = Y[i+1];
167
168			acc[i]	+= (Xi  * Yi - Xii * Yii);
169			acc[i+1]+= (Xii * Yi + Xi  * Yii);
170		}
171
172		acc[_N] += X[_N] * Y[_N];
173		X += N;
174		Y += N;
175	}
176
177	SPECTRALMULACCUM_STOP();
178}
179
180#define OVERRIDE_WEIGHTED_SPECTRAL_MUL_CONJ
181void weighted_spectral_mul_conj(
182	const float * restrict w,
183	const float p,
184	const float * restrict X,
185	const float * restrict Y,
186	float * restrict prod,
187	int N
188)
189{
190	register int i, j;
191   	register int _N;
192
193	WEIGHTEDSPECTRALMULCONJ_START();
194
195	prod[0] = p * w[0] * X[0] * Y[0];
196	_N = N-1;
197
198	for (i=1,j=1;i<_N;i+=2,j++)
199	{
200		register float W;
201		register float Xi, Yi, Xii, Yii;
202
203		Xi = X[i];
204		Yi = Y[i];
205		Xii = X[i+1];
206		Yii = Y[i+1];
207		W = p * w[j];
208
209
210		prod[i]	 = W * (Xi  * Yi + Xii * Yii);
211		prod[i+1]= W * (Xi  * Yii - Xii * Yi);
212	}
213
214	prod[_N] = p * w[j] * X[_N] * Y[_N];
215
216	WEIGHTEDSPECTRALMULCONJ_STOP();
217}
218
219#define OVERRIDE_MDF_ADJUST_PROP
220void mdf_adjust_prop(
221	const float * restrict W,
222	int N,
223	int M,
224	float * restrict prop
225)
226{
227	register int i, j;
228	register float max_sum = 1;
229	register float prop_sum = 1;
230
231	MDFADJUSTPROP_START();
232
233	for ( i=0 ; i<M ; ++i )
234	{
235		register float tmp = 1;
236		register int k = i * N;
237		register int l = k + N;
238		register float propi;
239
240#if (TM_UNROLL && TM_UNROLL_MDFADJUSTPROP)
241#pragma TCS_unroll=4
242#pragma TCS_unrollexact=1
243#endif
244		for ( j=k ; j<l ; ++j )
245		{
246			register float wi = W[j];
247
248			tmp += wi * wi;
249		}
250#if (TM_UNROLL && TM_UNROLL_MDFADJUSTPROP)
251#pragma TCS_unrollexact=0
252#pragma TCS_unroll=0
253#endif
254
255		propi  = spx_sqrt(tmp);
256		prop[i]= propi;
257		max_sum= fmux(propi > max_sum, propi, max_sum);
258	}
259
260	for ( i=0 ; i<M ; ++i )
261	{
262		register float propi = prop[i];
263
264		propi += .1f * max_sum;
265		prop_sum += propi;
266		prop[i] = propi;
267	}
268
269	prop_sum = 0.99f / prop_sum;
270	for ( i=0 ; i<M ; ++i )
271	{	prop[i] = prop_sum * prop[i];
272	}
273
274	MDFADJUSTPROP_STOP();
275}
276
277
278#define OVERRIDE_SPEEX_ECHO_GET_RESIDUAL
279void speex_echo_get_residual(
280	SpeexEchoState * restrict st,
281	float * restrict residual_echo,
282	int len
283)
284{
285	register int i;
286	register float leak2, leake;
287	register int N;
288	register float * restrict window;
289	register float * restrict last_y;
290	register float * restrict y;
291
292	SPEEXECHOGETRESIDUAL_START();
293
294	window = st->window;
295	last_y = st->last_y;
296	y = st->y;
297	N = st->window_size;
298
299
300#if (TM_UNROLL && TM_UNROLL_SPEEXECHOGETRESIDUAL)
301#pragma TCS_unroll=4
302#pragma TCS_unrollexact=1
303#endif
304	for (i=0;i<N;i++)
305	{	y[i] = window[i] * last_y[i];
306	}
307#if (TM_UNROLL && TM_UNROLL_SPEEXECHOGETRESIDUAL)
308#pragma TCS_unrollexact=0
309#pragma TCS_unroll=0
310#endif
311
312	spx_fft(st->fft_table, st->y, st->Y);
313	power_spectrum(st->Y, residual_echo, N);
314
315	leake = st->leak_estimate;
316	leak2 = fmux(leake > .5, 1, 2 * leake);
317	N = st->frame_size;
318
319#if (TM_UNROLL && TM_UNROLL_SPEEXECHOGETRESIDUAL)
320#pragma TCS_unroll=4
321#pragma TCS_unrollexact=1
322#endif
323	for ( i=0 ; i<N ; ++i )
324	{	residual_echo[i] *= leak2;
325	}
326#if (TM_UNROLL && TM_UNROLL_SPEEXECHOGETRESIDUAL)
327#pragma TCS_unrollexact=0
328#pragma TCS_unroll=0
329#endif
330
331	residual_echo[N] *= leak2;
332
333#ifndef NO_REMARK
334   (void)len;
335#endif
336
337   SPEEXECHOGETRESIDUAL_STOP();
338}
339#endif
340
341
342void mdf_preemph(
343	SpeexEchoState * restrict st,
344	spx_word16_t * restrict x,
345	const spx_int16_t * restrict far_end,
346	int framesize
347)
348{
349	register spx_word16_t preemph = st->preemph;
350	register spx_word16_t memX = st->memX;
351	register spx_word16_t memD = st->memD;
352	register spx_word16_t * restrict input = st->input;
353	register int i;
354#ifdef FIXED_POINT
355	register int saturated = st->saturated;
356#endif
357
358#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
359#pragma TCS_unroll=4
360#pragma TCS_unrollexact=1
361#endif
362	for ( i=0 ; i<framesize ; ++i )
363	{
364		register spx_int16_t far_endi = far_end[i];
365		register spx_word32_t tmp32;
366		register spx_word16_t inputi = input[i];
367
368		tmp32 = SUB32(EXTEND32(far_endi), EXTEND32(MULT16_16_P15(preemph,memX)));
369
370#ifdef FIXED_POINT
371		saturated = mux(iabs(tmp32) > 32767, M+1, saturated);
372		tmp32 = iclipi(tmp32,32767);
373#endif
374
375		x[i] = EXTRACT16(tmp32);
376		memX = far_endi;
377		tmp32 = SUB32(EXTEND32(inputi), EXTEND32(MULT16_16_P15(preemph, memD)));
378
379#ifdef FIXED_POINT
380		saturated = mux( ((tmp32 > 32767) && (saturated == 0)), 1,
381					mux( ((tmp32 <-32767) && (saturated == 0)), 1, saturated ));
382		tmp32 = iclipi(tmp32,32767);
383#endif
384		memD = inputi;
385		input[i] = tmp32;
386	}
387#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
388#pragma TCS_unrollexact=0
389#pragma TCS_unroll=0
390#endif
391
392	st->memD = memD;
393	st->memX = memX;
394
395#ifdef FIXED_POINT
396	st->saturated = saturated;
397#endif
398}
399
400void mdf_sub(
401	spx_word16_t * restrict dest,
402	const spx_word16_t * restrict src1,
403	const spx_word16_t * restrict src2,
404	int framesize
405)
406{
407	register int i;
408
409#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
410#pragma TCS_unroll=4
411#pragma TCS_unrollexact=1
412#endif
413
414#ifdef FIXED_POINT
415	for ( i=0,framesize<<=1 ; i<framesize ; i+=4 )
416	{	register int src1i, src2i, desti;
417
418		src1i = ld32d(src1,i);
419		src2i = ld32d(src2,i);
420		desti = dspidualsub(src1i,src2i);
421		st32d(i, dest, desti);
422	}
423#else
424	for ( i=0 ; i<framesize ; ++i )
425	{	dest[i] = src1[i] - src2[i];
426	}
427#endif
428
429#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
430#pragma TCS_unrollexact=0
431#pragma TCS_unroll=0
432#endif
433}
434
435void mdf_sub_int(
436	spx_word16_t * restrict dest,
437	const spx_int16_t * restrict src1,
438	const spx_int16_t * restrict src2,
439	int framesize
440)
441{
442	register int i, j;
443
444#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
445#pragma TCS_unroll=4
446#pragma TCS_unrollexact=1
447#endif
448
449#ifdef FIXED_POINT
450	for ( i=0,framesize<<=1 ; i<framesize ; i+=4 )
451	{	register int src1i, src2i, desti;
452
453		src1i = ld32d(src1,i);
454		src2i = ld32d(src2,i);
455		desti = dspidualsub(src1i,src2i);
456		st32d(i, dest, desti);
457	}
458#else
459	for ( i=0,j=0 ; i<framesize ; i+=2,++j )
460	{	register int src1i, src2i, desti;
461
462
463		src1i = ld32d(src1,j);
464		src2i = ld32d(src2,j);
465		desti = dspidualsub(src1i,src2i);
466
467		dest[i] = sex16(desti);
468		dest[i+1] = asri(16,desti);
469	}
470#endif
471
472#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
473#pragma TCS_unrollexact=0
474#pragma TCS_unroll=0
475#endif
476}
477
478void mdf_compute_weight_gradient(
479	SpeexEchoState * restrict st,
480	spx_word16_t * restrict X,
481	int N,
482	int M
483)
484{
485	register int i, j;
486	register spx_word32_t * restrict PHI = st->PHI;
487
488	for (j=M-1;j>=0;j--)
489	{
490		register spx_word32_t * restrict W = &(st->W[j*N]);
491
492		weighted_spectral_mul_conj(
493			st->power_1,
494			FLOAT_SHL(PSEUDOFLOAT(st->prop[j]),-15),
495			&X[(j+1)*N],
496			st->E,
497			st->PHI,
498			N);
499#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
500#pragma TCS_unroll=4
501#pragma TCS_unrollexact=1
502#endif
503		for (i=0;i<N;i++)
504		{	W[i] = ADD32(W[i],PHI[i]);
505		}
506#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
507#pragma TCS_unrollexact=0
508#pragma TCS_unroll=0
509#endif
510	}
511}
512
513void mdf_update_weight(
514	SpeexEchoState * restrict st,
515	int N,
516	int M,
517	int framesize
518)
519{
520	register int j;
521	register int cancel_count = st->cancel_count;
522	register spx_word16_t * restrict wtmp = st->wtmp;
523#ifdef FIXED_POINT
524	register spx_word16_t * restrict wtmp2 = st->wtmp2;
525	register int i;
526#endif
527
528	for ( j=0 ; j<M ; j++ )
529	{
530		register spx_word32_t * restrict W = &(st->W[j*N]);
531
532		if (j==0 || cancel_count%(M-1) == j-1)
533		{
534#ifdef FIXED_POINT
535
536#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
537#pragma TCS_unroll=4
538#pragma TCS_unrollexact=1
539#endif
540			for ( i=0 ; i<N ; i++ )
541				wtmp2[i] = EXTRACT16(PSHR32(W[i],NORMALIZE_SCALEDOWN+16));
542#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
543#pragma TCS_unrollexact=0
544#pragma TCS_unroll=0
545#endif
546			spx_ifft(st->fft_table, wtmp2, wtmp);
547			memset(wtmp, 0, framesize * sizeof(spx_word16_t));
548
549#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
550#pragma TCS_unroll=4
551#pragma TCS_unrollexact=1
552#endif
553			for (j=framesize; j<N ; ++j)
554			{	wtmp[j]=SHL16(wtmp[j],NORMALIZE_SCALEUP);
555			}
556#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
557#pragma TCS_unrollexact=0
558#pragma TCS_unroll=0
559#endif
560
561			spx_fft(st->fft_table, wtmp, wtmp2);
562
563#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
564#pragma TCS_unroll=4
565#pragma TCS_unrollexact=1
566#endif
567			for (i=0;i<N;i++)
568			{	W[i] -= SHL32(EXTEND32(wtmp2[i]),16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1);
569			}
570#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
571#pragma TCS_unrollexact=0
572#pragma TCS_unroll=0
573#endif
574
575#else
576			spx_ifft(st->fft_table, W, wtmp);
577			memset(&wtmp[framesize], 0, (N-framesize) * sizeof(spx_word16_t));
578			spx_fft(st->fft_table, wtmp, W);
579#endif
580		}
581	}
582}
583
584#ifdef TWO_PATH
585// first four parameters is passed by registers
586// generate faster performance with 4 parameters functions
587spx_word32_t mdf_update_foreground(
588	SpeexEchoState * restrict st,
589	spx_word32_t Dbf,
590	spx_word32_t Sff,
591	spx_word32_t See
592)
593{
594	register spx_word32_t Davg1 = st->Davg1;
595	register spx_word32_t Davg2 = st->Davg2;
596	register spx_word32_t Dvar1 = st->Dvar1;
597	register spx_word32_t Dvar2 = st->Dvar2;
598	register spx_word16_t * restrict input = st->input;
599   	register int framesize = st->frame_size;
600	register spx_word16_t * restrict xx = st->x + framesize;
601	register spx_word16_t * restrict y = st->y + framesize;
602	register spx_word16_t * restrict ee = st->e + framesize;
603	register int update_foreground;
604	register int i;
605    register int N = st->window_size;
606	register int M = st->M;
607
608#ifdef FIXED_POINT
609	register spx_word32_t sc0 = SUB32(Sff,See);
610	register spx_float_t sc1 = FLOAT_MUL32U(Sff,Dbf);
611
612	Davg1 = ADD32(MULT16_32_Q15(QCONST16(.6f,15),Davg1), MULT16_32_Q15(QCONST16(.4f,15),sc0));
613	Davg2 = ADD32(MULT16_32_Q15(QCONST16(.85f,15),Davg2), MULT16_32_Q15(QCONST16(.15f,15),sc0));
614	Dvar1 = FLOAT_ADD(
615				FLOAT_MULT(VAR1_SMOOTH,Dvar1),
616				FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.4f,15),Sff),
617				MULT16_32_Q15(QCONST16(.4f,15),Dbf)));
618	Dvar2 = FLOAT_ADD(
619				FLOAT_MULT(VAR2_SMOOTH,Dvar2),
620				FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.15f,15),Sff),
621				MULT16_32_Q15(QCONST16(.15f,15),Dbf)));
622#else
623	register spx_word32_t sc0 = Sff - See;
624	register spx_word32_t sc1 = Sff * Dbf;
625
626	Davg1 = .6*Davg1 + .4*sc0;
627	Davg2 = .85*Davg2 + .15*sc0;
628	Dvar1 = VAR1_SMOOTH*Dvar1 + .16*sc1;
629	Dvar2 = VAR2_SMOOTH*Dvar2 + .0225*sc1;
630#endif
631
632	update_foreground =
633	   mux( FLOAT_GT(FLOAT_MUL32U(sc0, VABS(sc0)), sc1), 1,
634	   mux( FLOAT_GT(FLOAT_MUL32U(Davg1, VABS(Davg1)), FLOAT_MULT(VAR1_UPDATE,(Dvar1))), 1,
635	   mux( FLOAT_GT(FLOAT_MUL32U(Davg2, VABS(Davg2)), FLOAT_MULT(VAR2_UPDATE,(Dvar2))), 1, 0)));
636
637	if ( update_foreground )
638	{
639		register spx_word16_t * restrict windowf = st->window + framesize;
640		register spx_word16_t * restrict window = st->window;
641
642		st->Davg1 = st->Davg2 = 0;
643		st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
644
645		memcpy(st->foreground, st->W, N*M*sizeof(spx_word32_t));
646
647#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
648#pragma TCS_unroll=4
649#pragma TCS_unrollexact=1
650#endif
651		for ( i=0 ; i<framesize ; ++i)
652		{	register spx_word16_t wi = window[i];
653			register spx_word16_t wfi = windowf[i];
654			register spx_word16_t ei = ee[i];
655			register spx_word16_t yi = y[i];
656
657			ee[i] = MULT16_16_Q15(wfi,ei) +	MULT16_16_Q15(wi,yi);
658		}
659#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
660#pragma TCS_unrollexact=0
661#pragma TCS_unroll=0
662#endif
663
664	} else
665	{
666		register int reset_background;
667
668		reset_background =
669			mux( FLOAT_GT(FLOAT_MUL32U(-(sc0),VABS(sc0)), FLOAT_MULT(VAR_BACKTRACK,sc1)), 1,
670			mux( FLOAT_GT(FLOAT_MUL32U(-(Davg1), VABS(Davg1)), FLOAT_MULT(VAR_BACKTRACK,Dvar1)), 1,
671			mux( FLOAT_GT(FLOAT_MUL32U(-(Davg2), VABS(Davg2)), FLOAT_MULT(VAR_BACKTRACK,Dvar2)), 1, 0)));
672
673		if ( reset_background )
674		{
675			memcpy(st->W, st->foreground, N*M*sizeof(spx_word32_t));
676			memcpy(y, ee, framesize * sizeof(spx_word16_t));
677			mdf_sub(xx,input,y,framesize);
678			See = Sff;
679			st->Davg1 = st->Davg2 = 0;
680			st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
681		} else
682		{
683			st->Davg1 = Davg1;
684			st->Davg2 = Davg2;
685			st->Dvar1 = Dvar1;
686			st->Dvar2 = Dvar2;
687		}
688	}
689
690	return See;
691}
692#endif
693
694void mdf_compute_error_signal(
695	SpeexEchoState * restrict st,
696	const spx_int16_t * restrict in,
697	spx_int16_t * restrict out,
698	int framesize
699)
700{
701	register spx_word16_t preemph = st->preemph;
702	register spx_word16_t memE = st->memE;
703	register int saturated = st->saturated;
704	register spx_word16_t * restrict e = st->e;
705	register spx_word16_t * restrict ee = st->e + framesize;
706	register spx_word16_t * restrict input = st->input;
707	register spx_word16_t * restrict xx = st->x + framesize;
708	register int i;
709
710#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
711#pragma TCS_unroll=4
712#pragma TCS_unrollexact=1
713#endif
714	for ( i=0 ; i<framesize ; ++i )
715	{
716		register spx_word32_t tmp_out;
717		register spx_int16_t ini = in[i];
718		register int flg;
719
720#ifdef FIXED_POINT
721
722#ifdef TWO_PATH
723		tmp_out = SUB32(EXTEND32(input[i]), EXTEND32(ee[i]));
724		tmp_out = iclipi(tmp_out,32767);
725#else
726		tmp_out = SUB32(EXTEND32(input[i]), EXTEND32(y[i]));
727		tmp_out = iclipi(tmp_out,32767);
728#endif
729
730#else
731#ifdef TWO_PATH
732		tmp_out = SUB32(EXTEND32(input[i]), EXTEND32(ee[i]));
733#else
734		tmp_out = SUB32(EXTEND32(input[i]), EXTEND32(y[i]));
735#endif
736		tmp_out =
737			fmux( tmp_out > 32767, 32767,
738			fmux( tmp_out < -32768, -32768, tmp_out));
739#endif
740
741		tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(preemph,memE)));
742		flg = iabs(ini) >= 32000;
743		tmp_out = VMUX( flg, 0, tmp_out);
744		saturated = mux( flg && (saturated == 0), 1, saturated);
745
746		out[i] = (spx_int16_t)tmp_out;
747		memE = tmp_out;
748	}
749#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
750#pragma TCS_unrollexact=0
751#pragma TCS_unroll=0
752#endif
753
754	st->memE = memE;
755	st->saturated = saturated;
756	memset(e, 0, framesize * sizeof(spx_word16_t));
757	memcpy(ee, xx, framesize * sizeof(spx_word16_t));
758}
759
760inline int mdf_check(
761  	SpeexEchoState * restrict st,
762	spx_int16_t * out,
763	spx_word32_t Syy,
764	spx_word32_t Sxx,
765	spx_word32_t See,
766	spx_word32_t Sff,
767	spx_word32_t Sdd
768)
769{
770    register int N = st->window_size;
771	register spx_word32_t N1e9 = N * 1e9;
772	register int screwed_up = st->screwed_up;
773	register int framesize = st->frame_size;
774
775	if (!(Syy>=0 && Sxx>=0 && See >= 0)
776#ifndef FIXED_POINT
777		|| !(Sff < N1e9 && Syy < N1e9 && Sxx < N1e9 )
778#endif
779      )
780	{
781		screwed_up += 50;
782		memset(out, 0, framesize * sizeof(spx_int16_t));
783
784	} else
785	{	screwed_up = mux( SHR32(Sff, 2) > ADD32(Sdd, SHR32(MULT16_16(N, 10000),6)), screwed_up+1, 0);
786	}
787
788	st->screwed_up = screwed_up;
789
790	return screwed_up;
791}
792
793void mdf_smooth(
794	spx_word32_t * restrict power,
795	spx_word32_t * restrict Xf,
796	int framesize,
797	int M
798)
799{
800    register spx_word16_t ss, ss_1, pf, xff;
801	register int j;
802
803#ifdef FIXED_POINT
804	ss=DIV32_16(11469,M);
805	ss_1 = SUB16(32767,ss);
806#else
807	ss=.35/M;
808	ss_1 = 1-ss;
809#endif
810
811#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
812#pragma TCS_unroll=4
813#pragma TCS_unrollexact=1
814#endif
815	for ( j=0 ; j<framesize ; ++j )
816	{	register spx_word32_t pi  = power[j];
817		register spx_word32_t xfi = Xf[j];
818
819		power[j] = MULT16_32_Q15(ss_1,pi) + 1 + MULT16_32_Q15(ss,xfi);
820	}
821#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
822#pragma TCS_unrollexact=0
823#pragma TCS_unroll=0
824#endif
825
826	pf = power[framesize];
827	xff = Xf[framesize];
828	power[framesize] = MULT16_32_Q15(ss_1,pf) + 1 + MULT16_32_Q15(ss,xff);
829}
830
831void mdf_compute_filtered_spectra_crosscorrelations(
832 	SpeexEchoState * restrict st,
833	spx_word32_t Syy,
834	spx_word32_t See,
835	int framesize
836)
837{
838	register spx_float_t Pey = FLOAT_ONE;
839	register spx_float_t Pyy = FLOAT_ONE;
840	register spx_word16_t spec_average = st->spec_average;
841	register spx_word32_t * restrict pRf = st->Rf;
842	register spx_word32_t * restrict pYf = st->Yf;
843	register spx_word32_t * restrict pEh = st->Eh;
844	register spx_word32_t * restrict pYh = st->Yh;
845	register spx_word16_t beta0 = st->beta0;
846	register spx_word16_t beta_max = st->beta_max;
847	register spx_float_t alpha, alpha_1;
848	register spx_word32_t tmp32, tmpx;
849	register spx_float_t sPey = st->Pey;
850	register spx_float_t sPyy = st->Pyy;
851	register spx_float_t tmp;
852	register spx_word16_t leak_estimate;
853	register int j;
854	register spx_float_t  Eh, Yh;
855	register spx_word32_t _Ehj, _Rfj, _Yfj, _Yhj;
856
857#ifdef FIXED_POINT
858	register spx_word16_t spec_average1 = SUB16(32767,spec_average);
859#else
860	register spx_word16_t spec_average1 = 1 - spec_average;
861#endif
862
863#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
864#pragma TCS_unroll=4
865#pragma TCS_unrollexact=1
866#endif
867	for (j=framesize; j>0 ; --j)
868	{
869		_Ehj = pEh[j];
870		_Rfj = pRf[j];
871		_Yfj = pYf[j];
872		_Yhj = pYh[j];
873
874		Eh = PSEUDOFLOAT(_Rfj - _Ehj);
875		Yh = PSEUDOFLOAT(_Yfj - _Yhj);
876
877		Pey = FLOAT_ADD(Pey,FLOAT_MULT(Eh,Yh));
878		Pyy = FLOAT_ADD(Pyy,FLOAT_MULT(Yh,Yh));
879
880		pEh[j] = MAC16_32_Q15(MULT16_32_Q15(spec_average1, _Ehj), spec_average, _Rfj);
881		pYh[j] = MAC16_32_Q15(MULT16_32_Q15(spec_average1, _Yhj), spec_average, _Yfj);
882   }
883#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
884#pragma TCS_unrollexact=0
885#pragma TCS_unroll=0
886#endif
887	_Ehj = pEh[0];
888	_Rfj = pRf[0];
889	_Yfj = pYf[0];
890	_Yhj = pYh[0];
891
892	Eh = PSEUDOFLOAT(_Rfj - _Ehj);
893	Yh = PSEUDOFLOAT(_Yfj - _Yhj);
894
895	Pey = FLOAT_ADD(Pey,FLOAT_MULT(Eh,Yh));
896	Pyy = FLOAT_ADD(Pyy,FLOAT_MULT(Yh,Yh));
897
898	pEh[0] = MAC16_32_Q15(MULT16_32_Q15(spec_average1, _Ehj), spec_average, _Rfj);
899	pYh[0] = MAC16_32_Q15(MULT16_32_Q15(spec_average1, _Yhj), spec_average, _Yfj);
900
901	Pyy = FLOAT_SQRT(Pyy);
902	Pey = FLOAT_DIVU(Pey,Pyy);
903
904	tmp32 = MULT16_32_Q15(beta0,Syy);
905	tmpx = MULT16_32_Q15(beta_max,See);
906	tmp32 = VMUX(tmp32 > tmpx, tmpx, tmp32);
907	alpha = FLOAT_DIV32(tmp32, See);
908	alpha_1 = FLOAT_SUB(FLOAT_ONE, alpha);
909
910	sPey = FLOAT_ADD(FLOAT_MULT(alpha_1,sPey) , FLOAT_MULT(alpha,Pey));
911	sPyy = FLOAT_ADD(FLOAT_MULT(alpha_1,sPyy) , FLOAT_MULT(alpha,Pyy));
912	tmp = FLOAT_MULT(MIN_LEAK,sPyy);
913
914#ifndef FIXED_POINT
915	sPyy = VMUX(FLOAT_LT(sPyy, FLOAT_ONE), FLOAT_ONE, sPyy);
916	sPey = VMUX(FLOAT_LT(sPey, tmp), tmp, sPey);
917	sPey = VMUX(FLOAT_LT(sPey, sPyy), sPey, sPyy);
918#else
919	sPyy = FLOAT_LT(sPyy, FLOAT_ONE) ? FLOAT_ONE : sPyy;
920	sPey = FLOAT_LT(sPey, tmp) ? tmp : sPey;
921	sPey = FLOAT_LT(sPey, sPyy) ? sPey : sPyy;
922#endif
923
924	leak_estimate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIVU(sPey, sPyy),14));
925
926	leak_estimate = VMUX( leak_estimate > 16383, 32767, SHL16(leak_estimate,1));
927	st->Pey = sPey;
928	st->Pyy = sPyy;
929	st->leak_estimate = leak_estimate;
930}
931
932inline spx_word16_t mdf_compute_RER(
933 	spx_word32_t See,
934	spx_word32_t Syy,
935	spx_word32_t Sey,
936	spx_word32_t Sxx,
937	spx_word16_t leake
938)
939{
940	register spx_word16_t RER;
941
942#ifdef FIXED_POINT
943	register spx_word32_t tmp32;
944	register spx_word32_t tmp;
945	spx_float_t bound = PSEUDOFLOAT(Sey);
946
947	tmp32 = MULT16_32_Q15(leake,Syy);
948	tmp32 = ADD32(SHR32(Sxx,13), ADD32(tmp32, SHL32(tmp32,1)));
949
950	bound = FLOAT_DIVU(FLOAT_MULT(bound, bound), PSEUDOFLOAT(ADD32(1,Syy)));
951	tmp   = FLOAT_EXTRACT32(bound);
952	tmp32 = imux( FLOAT_GT(bound, PSEUDOFLOAT(See)), See,
953			imux( tmp32 < tmp,  tmp, tmp32));
954
955	tmp   = SHR32(See,1);
956	tmp32 = imux(tmp32 > tmp, tmp, tmp32);
957	RER   = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32,See),15));
958#else
959	register spx_word32_t r0;
960
961	r0 = (Sey * Sey)/(1 + See * Syy);
962
963	RER = (.0001*Sxx + 3.* MULT16_32_Q15(leake,Syy)) / See;
964	RER = fmux( RER < r0, r0, RER);
965	RER = fmux( RER > .5, .5, RER);
966#endif
967
968	return RER;
969}
970
971void mdf_adapt(
972 	SpeexEchoState * restrict st,
973	spx_word16_t RER,
974	spx_word32_t Syy,
975	spx_word32_t See,
976	spx_word32_t Sxx
977)
978{
979	register spx_float_t  * restrict power_1 = st->power_1;
980	register spx_word32_t  * restrict power = st->power;
981	register int adapted = st->adapted;
982	register spx_word32_t sum_adapt = st->sum_adapt;
983	register spx_word16_t leake = st->leak_estimate;
984	register int framesize = st->frame_size;
985	register int i;
986	register int M = st->M;
987
988	adapted = mux( !adapted && sum_adapt > QCONST32(M,15) &&
989					MULT16_32_Q15(leake,Syy) > MULT16_32_Q15(QCONST16(.03f,15),Syy), 1, adapted);
990
991	if ( adapted )
992	{	register spx_word32_t * restrict Yf = st->Yf;
993		register spx_word32_t * restrict Rf = st->Rf;
994		register spx_word32_t r, e, e2;
995
996#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
997#pragma TCS_unroll=4
998#pragma TCS_unrollexact=1
999#endif
1000		for ( i=0 ; i<framesize ; ++i )
1001		{
1002			r = SHL32(Yf[i],3);
1003			r = MULT16_32_Q15(leake,r);
1004			e = SHL32(Rf[i],3)+1;
1005
1006#ifdef FIXED_POINT
1007			e2 = SHR32(e,1);
1008			r = mux( r > e2, e2, r);
1009#else
1010			e2 = e * .5;
1011			r = fmux( r > e2, e2, r);
1012#endif
1013
1014			r = MULT16_32_Q15(QCONST16(.7,15),r) +
1015				MULT16_32_Q15(QCONST16(.3,15),(spx_word32_t)(MULT16_32_Q15(RER,e)));
1016
1017			power_1[i] = FLOAT_SHL(FLOAT_DIV32_FLOAT(r,FLOAT_MUL32U(e,power[i]+10)),WEIGHT_SHIFT+16);
1018		}
1019#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
1020#pragma TCS_unrollexact=0
1021#pragma TCS_unroll=0
1022#endif
1023
1024		r = SHL32(Yf[framesize],3);
1025		r = MULT16_32_Q15(leake,r);
1026		e = SHL32(Rf[framesize],3)+1;
1027
1028#ifdef FIXED_POINT
1029		e2 = SHR32(e,1);
1030		r = mux( r > e2, e2, r);
1031#else
1032		e2 = e * .5;
1033		r = fmux( r > e2, e2, r);
1034#endif
1035
1036		r = MULT16_32_Q15(QCONST16(.7,15),r) +
1037			MULT16_32_Q15(QCONST16(.3,15),(spx_word32_t)(MULT16_32_Q15(RER,e)));
1038
1039		power_1[framesize] = FLOAT_SHL(FLOAT_DIV32_FLOAT(r,FLOAT_MUL32U(e,power[framesize]+10)),WEIGHT_SHIFT+16);
1040
1041	} else
1042	{
1043		register spx_word16_t adapt_rate=0;
1044		register int N = st->window_size;
1045
1046		if ( Sxx > SHR32(MULT16_16(N, 1000),6) )
1047		{	register spx_word32_t tmp32, tmp32q;
1048
1049			tmp32 = MULT16_32_Q15(QCONST16(.25f, 15), Sxx);
1050#ifdef FIXED_POINT
1051			tmp32q = SHR32(See,2);
1052			tmp32  = mux(tmp32 > tmp32q, tmp32q, tmp32);
1053#else
1054			tmp32q = 0.25 * See;
1055			tmp32 = fmux(tmp32 > tmp32q, tmp32q, tmp32);
1056#endif
1057			adapt_rate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32, See),15));
1058		}
1059
1060#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
1061#pragma TCS_unroll=4
1062#pragma TCS_unrollexact=1
1063#endif
1064		for (i=0;i<framesize;i++)
1065			power_1[i] = FLOAT_SHL(FLOAT_DIV32(EXTEND32(adapt_rate),ADD32(power[i],10)),WEIGHT_SHIFT+1);
1066#if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
1067#pragma TCS_unrollexact=0
1068#pragma TCS_unroll=0
1069#endif
1070		power_1[framesize] = FLOAT_SHL(FLOAT_DIV32(EXTEND32(adapt_rate),ADD32(power[framesize],10)),WEIGHT_SHIFT+1);
1071		sum_adapt = ADD32(sum_adapt,adapt_rate);
1072	}
1073
1074	st->sum_adapt = sum_adapt;
1075	st->adapted = adapted;
1076}
1077
1078#define OVERRIDE_ECHO_CANCELLATION
1079void speex_echo_cancellation(
1080	SpeexEchoState * restrict st,
1081	const spx_int16_t * restrict in,
1082	const spx_int16_t * restrict far_end,
1083	spx_int16_t * restrict out
1084)
1085{
1086   	register int framesize = st->frame_size;
1087   	register spx_word16_t * restrict x = st->x;
1088	register spx_word16_t * restrict xx = st->x + framesize;
1089	register spx_word16_t * restrict yy = st->y + framesize;
1090	register spx_word16_t * restrict ee = st->e + framesize;
1091	register spx_word32_t Syy, See, Sxx, Sdd, Sff;
1092	register spx_word16_t RER;
1093	register spx_word32_t Sey;
1094	register int j;
1095	register int N,M;
1096#ifdef TWO_PATH
1097	register spx_word32_t Dbf;
1098#endif
1099
1100	N = st->window_size;
1101	M = st->M;
1102	st->cancel_count++;
1103
1104	filter_dc_notch16(in, st->notch_radius, st->input, framesize, st->notch_mem);
1105	mdf_preemph(st, xx, far_end, framesize);
1106
1107   	{
1108
1109	register spx_word16_t * restrict X = st->X;
1110
1111	for ( j=M-1 ; j>=0 ; j-- )
1112	{	register spx_word16_t * restrict Xdes = &(X[(j+1)*N]);
1113		register spx_word16_t * restrict Xsrc = &(X[j*N]);
1114
1115		memcpy(Xdes, Xsrc, N * sizeof(spx_word16_t));
1116	}
1117
1118	spx_fft(st->fft_table, x, X);
1119	memcpy(st->last_y, st->x, N * sizeof(spx_word16_t));
1120	Sxx = mdf_inner_prod(xx, xx, framesize);
1121	memcpy(x, xx, framesize * sizeof(spx_word16_t));
1122
1123#ifdef TWO_PATH
1124	spectral_mul_accum(st->X, st->foreground, st->Y, N, M);
1125	spx_ifft(st->fft_table, st->Y, st->e);
1126	mdf_sub(xx, st->input, ee, framesize);
1127	Sff = mdf_inner_prod(xx, xx, framesize);
1128#endif
1129
1130	mdf_adjust_prop (st->W, N, M, st->prop);
1131
1132	if (st->saturated == 0)
1133	{	mdf_compute_weight_gradient(st, X, N, M);
1134	} else
1135	{	st->saturated--;
1136	}
1137	}
1138
1139	mdf_update_weight(st, N, M, framesize);
1140	spectral_mul_accum(st->X, st->W, st->Y, N, M);
1141	spx_ifft(st->fft_table, st->Y, st->y);
1142
1143#ifdef TWO_PATH
1144	mdf_sub(xx, ee, yy, framesize);
1145	Dbf = 10+mdf_inner_prod(xx, xx, framesize);
1146#endif
1147
1148	mdf_sub(xx, st->input, yy, framesize);
1149	See = mdf_inner_prod(xx, xx, framesize);
1150
1151#ifndef TWO_PATH
1152	Sff = See;
1153#else
1154	See = mdf_update_foreground(st,Dbf,Sff,See);
1155#endif
1156
1157
1158	mdf_compute_error_signal(st, in, out, framesize);
1159	Sey = mdf_inner_prod(ee, yy, framesize);
1160	Syy = mdf_inner_prod(yy, yy, framesize);
1161	Sdd = mdf_inner_prod(st->input, st->input, framesize);
1162
1163	if ( mdf_check(st,out,Syy,Sxx,See,Sff,Sdd) >= 50 )
1164	{	speex_warning("The echo canceller started acting funny and got slapped (reset). It swears it will behave now.");
1165		speex_echo_state_reset(st);
1166		return;
1167	}
1168
1169	See = MAX32(See, SHR32(MULT16_16(N, 100),6));
1170	spx_fft(st->fft_table, st->e, st->E);
1171	memset(st->y, 0, framesize * sizeof(spx_word16_t));
1172	spx_fft(st->fft_table, st->y, st->Y);
1173	power_spectrum(st->E, st->Rf, N);
1174	power_spectrum(st->Y, st->Yf, N);
1175	power_spectrum(st->X, st->Xf, N);
1176
1177	mdf_smooth(st->power,st->Xf,framesize, M);
1178	mdf_compute_filtered_spectra_crosscorrelations(st,Syy,See,framesize);
1179	RER = mdf_compute_RER(See,Syy,Sey,Sxx,st->leak_estimate);
1180	mdf_adapt(st, RER, Syy, See, Sxx);
1181
1182	if ( st->adapted )
1183	{	register spx_word16_t * restrict last_yy = st->last_y + framesize;
1184
1185		memcpy(st->last_y, last_yy, framesize * sizeof(spx_word16_t));
1186		mdf_sub_int(last_yy, in, out, framesize);
1187
1188	}
1189}
1190
1191
1192
1193