153a5a1b3Sopenharmony_ci/* aec.cpp 253a5a1b3Sopenharmony_ci * 353a5a1b3Sopenharmony_ci * Copyright (C) DFS Deutsche Flugsicherung (2004, 2005). 453a5a1b3Sopenharmony_ci * All Rights Reserved. 553a5a1b3Sopenharmony_ci * 653a5a1b3Sopenharmony_ci * Acoustic Echo Cancellation NLMS-pw algorithm 753a5a1b3Sopenharmony_ci * 853a5a1b3Sopenharmony_ci * Version 0.3 filter created with www.dsptutor.freeuk.com 953a5a1b3Sopenharmony_ci * Version 0.3.1 Allow change of stability parameter delta 1053a5a1b3Sopenharmony_ci * Version 0.4 Leaky Normalized LMS - pre whitening algorithm 1153a5a1b3Sopenharmony_ci */ 1253a5a1b3Sopenharmony_ci 1353a5a1b3Sopenharmony_ci#ifndef _GNU_SOURCE 1453a5a1b3Sopenharmony_ci#define _GNU_SOURCE 1553a5a1b3Sopenharmony_ci#endif 1653a5a1b3Sopenharmony_ci 1753a5a1b3Sopenharmony_ci#ifdef HAVE_CONFIG_H 1853a5a1b3Sopenharmony_ci#include <config.h> 1953a5a1b3Sopenharmony_ci#endif 2053a5a1b3Sopenharmony_ci 2153a5a1b3Sopenharmony_ci#include <math.h> 2253a5a1b3Sopenharmony_ci#include <string.h> 2353a5a1b3Sopenharmony_ci#include <stdint.h> 2453a5a1b3Sopenharmony_ci 2553a5a1b3Sopenharmony_ci#include <pulse/xmalloc.h> 2653a5a1b3Sopenharmony_ci 2753a5a1b3Sopenharmony_ci#include "adrian-aec.h" 2853a5a1b3Sopenharmony_ci 2953a5a1b3Sopenharmony_ci#ifndef DISABLE_ORC 3053a5a1b3Sopenharmony_ci#include "adrian-aec-orc-gen.h" 3153a5a1b3Sopenharmony_ci#endif 3253a5a1b3Sopenharmony_ci 3353a5a1b3Sopenharmony_ci#ifdef __SSE__ 3453a5a1b3Sopenharmony_ci#include <xmmintrin.h> 3553a5a1b3Sopenharmony_ci#endif 3653a5a1b3Sopenharmony_ci 3753a5a1b3Sopenharmony_ci/* Vector Dot Product */ 3853a5a1b3Sopenharmony_cistatic REAL dotp(REAL a[], REAL b[]) 3953a5a1b3Sopenharmony_ci{ 4053a5a1b3Sopenharmony_ci REAL sum0 = 0.0f, sum1 = 0.0f; 4153a5a1b3Sopenharmony_ci int j; 4253a5a1b3Sopenharmony_ci 4353a5a1b3Sopenharmony_ci for (j = 0; j < NLMS_LEN; j += 2) { 4453a5a1b3Sopenharmony_ci // optimize: partial loop unrolling 4553a5a1b3Sopenharmony_ci sum0 += a[j] * b[j]; 4653a5a1b3Sopenharmony_ci sum1 += a[j + 1] * b[j + 1]; 4753a5a1b3Sopenharmony_ci } 4853a5a1b3Sopenharmony_ci return sum0 + sum1; 4953a5a1b3Sopenharmony_ci} 5053a5a1b3Sopenharmony_ci 5153a5a1b3Sopenharmony_cistatic REAL dotp_sse(REAL a[], REAL b[]) 5253a5a1b3Sopenharmony_ci{ 5353a5a1b3Sopenharmony_ci#ifdef __SSE__ 5453a5a1b3Sopenharmony_ci /* This is taken from speex's inner product implementation */ 5553a5a1b3Sopenharmony_ci int j; 5653a5a1b3Sopenharmony_ci REAL sum; 5753a5a1b3Sopenharmony_ci __m128 acc = _mm_setzero_ps(); 5853a5a1b3Sopenharmony_ci 5953a5a1b3Sopenharmony_ci for (j=0;j<NLMS_LEN;j+=8) 6053a5a1b3Sopenharmony_ci { 6153a5a1b3Sopenharmony_ci acc = _mm_add_ps(acc, _mm_mul_ps(_mm_load_ps(a+j), _mm_loadu_ps(b+j))); 6253a5a1b3Sopenharmony_ci acc = _mm_add_ps(acc, _mm_mul_ps(_mm_load_ps(a+j+4), _mm_loadu_ps(b+j+4))); 6353a5a1b3Sopenharmony_ci } 6453a5a1b3Sopenharmony_ci acc = _mm_add_ps(acc, _mm_movehl_ps(acc, acc)); 6553a5a1b3Sopenharmony_ci acc = _mm_add_ss(acc, _mm_shuffle_ps(acc, acc, 0x55)); 6653a5a1b3Sopenharmony_ci _mm_store_ss(&sum, acc); 6753a5a1b3Sopenharmony_ci 6853a5a1b3Sopenharmony_ci return sum; 6953a5a1b3Sopenharmony_ci#else 7053a5a1b3Sopenharmony_ci return dotp(a, b); 7153a5a1b3Sopenharmony_ci#endif 7253a5a1b3Sopenharmony_ci} 7353a5a1b3Sopenharmony_ci 7453a5a1b3Sopenharmony_ci 7553a5a1b3Sopenharmony_ciAEC* AEC_init(int RATE, int have_vector) 7653a5a1b3Sopenharmony_ci{ 7753a5a1b3Sopenharmony_ci AEC *a = pa_xnew0(AEC, 1); 7853a5a1b3Sopenharmony_ci a->j = NLMS_EXT; 7953a5a1b3Sopenharmony_ci AEC_setambient(a, NoiseFloor); 8053a5a1b3Sopenharmony_ci a->dfast = a->dslow = M75dB_PCM; 8153a5a1b3Sopenharmony_ci a->xfast = a->xslow = M80dB_PCM; 8253a5a1b3Sopenharmony_ci a->gain = 1.0f; 8353a5a1b3Sopenharmony_ci a->Fx = IIR1_init(2000.0f/RATE); 8453a5a1b3Sopenharmony_ci a->Fe = IIR1_init(2000.0f/RATE); 8553a5a1b3Sopenharmony_ci a->cutoff = FIR_HP_300Hz_init(); 8653a5a1b3Sopenharmony_ci a->acMic = IIR_HP_init(); 8753a5a1b3Sopenharmony_ci a->acSpk = IIR_HP_init(); 8853a5a1b3Sopenharmony_ci 8953a5a1b3Sopenharmony_ci a->aes_y2 = M0dB; 9053a5a1b3Sopenharmony_ci 9153a5a1b3Sopenharmony_ci a->fdwdisplay = -1; 9253a5a1b3Sopenharmony_ci 9353a5a1b3Sopenharmony_ci if (have_vector) { 9453a5a1b3Sopenharmony_ci /* Get a 16-byte aligned location */ 9553a5a1b3Sopenharmony_ci a->w = (REAL *) (((uintptr_t) a->w_arr) - (((uintptr_t) a->w_arr) % 16) + 16); 9653a5a1b3Sopenharmony_ci a->dotp = dotp_sse; 9753a5a1b3Sopenharmony_ci } else { 9853a5a1b3Sopenharmony_ci /* We don't care about alignment, just use the array as-is */ 9953a5a1b3Sopenharmony_ci a->w = a->w_arr; 10053a5a1b3Sopenharmony_ci a->dotp = dotp; 10153a5a1b3Sopenharmony_ci } 10253a5a1b3Sopenharmony_ci 10353a5a1b3Sopenharmony_ci return a; 10453a5a1b3Sopenharmony_ci} 10553a5a1b3Sopenharmony_ci 10653a5a1b3Sopenharmony_civoid AEC_done(AEC *a) { 10753a5a1b3Sopenharmony_ci pa_assert(a); 10853a5a1b3Sopenharmony_ci 10953a5a1b3Sopenharmony_ci pa_xfree(a->Fx); 11053a5a1b3Sopenharmony_ci pa_xfree(a->Fe); 11153a5a1b3Sopenharmony_ci pa_xfree(a->acMic); 11253a5a1b3Sopenharmony_ci pa_xfree(a->acSpk); 11353a5a1b3Sopenharmony_ci pa_xfree(a->cutoff); 11453a5a1b3Sopenharmony_ci pa_xfree(a); 11553a5a1b3Sopenharmony_ci} 11653a5a1b3Sopenharmony_ci 11753a5a1b3Sopenharmony_ci// Adrian soft decision DTD 11853a5a1b3Sopenharmony_ci// (Dual Average Near-End to Far-End signal Ratio DTD) 11953a5a1b3Sopenharmony_ci// This algorithm uses exponential smoothing with different 12053a5a1b3Sopenharmony_ci// ageing parameters to get fast and slow near-end and far-end 12153a5a1b3Sopenharmony_ci// signal averages. The ratio of NFRs term 12253a5a1b3Sopenharmony_ci// (dfast / xfast) / (dslow / xslow) is used to compute the stepsize 12353a5a1b3Sopenharmony_ci// A ratio value of 2.5 is mapped to stepsize 0, a ratio of 0 is 12453a5a1b3Sopenharmony_ci// mapped to 1.0 with a limited linear function. 12553a5a1b3Sopenharmony_cistatic float AEC_dtd(AEC *a, REAL d, REAL x) 12653a5a1b3Sopenharmony_ci{ 12753a5a1b3Sopenharmony_ci float ratio, stepsize; 12853a5a1b3Sopenharmony_ci 12953a5a1b3Sopenharmony_ci // fast near-end and far-end average 13053a5a1b3Sopenharmony_ci a->dfast += ALPHAFAST * (fabsf(d) - a->dfast); 13153a5a1b3Sopenharmony_ci a->xfast += ALPHAFAST * (fabsf(x) - a->xfast); 13253a5a1b3Sopenharmony_ci 13353a5a1b3Sopenharmony_ci // slow near-end and far-end average 13453a5a1b3Sopenharmony_ci a->dslow += ALPHASLOW * (fabsf(d) - a->dslow); 13553a5a1b3Sopenharmony_ci a->xslow += ALPHASLOW * (fabsf(x) - a->xslow); 13653a5a1b3Sopenharmony_ci 13753a5a1b3Sopenharmony_ci if (a->xfast < M70dB_PCM) { 13853a5a1b3Sopenharmony_ci return 0.0f; // no Spk signal 13953a5a1b3Sopenharmony_ci } 14053a5a1b3Sopenharmony_ci 14153a5a1b3Sopenharmony_ci if (a->dfast < M70dB_PCM) { 14253a5a1b3Sopenharmony_ci return 0.0f; // no Mic signal 14353a5a1b3Sopenharmony_ci } 14453a5a1b3Sopenharmony_ci 14553a5a1b3Sopenharmony_ci // ratio of NFRs 14653a5a1b3Sopenharmony_ci ratio = (a->dfast * a->xslow) / (a->dslow * a->xfast); 14753a5a1b3Sopenharmony_ci 14853a5a1b3Sopenharmony_ci // Linear interpolation with clamping at the limits 14953a5a1b3Sopenharmony_ci if (ratio < STEPX1) 15053a5a1b3Sopenharmony_ci stepsize = STEPY1; 15153a5a1b3Sopenharmony_ci else if (ratio > STEPX2) 15253a5a1b3Sopenharmony_ci stepsize = STEPY2; 15353a5a1b3Sopenharmony_ci else 15453a5a1b3Sopenharmony_ci stepsize = STEPY1 + (STEPY2 - STEPY1) * (ratio - STEPX1) / (STEPX2 - STEPX1); 15553a5a1b3Sopenharmony_ci 15653a5a1b3Sopenharmony_ci return stepsize; 15753a5a1b3Sopenharmony_ci} 15853a5a1b3Sopenharmony_ci 15953a5a1b3Sopenharmony_ci 16053a5a1b3Sopenharmony_cistatic void AEC_leaky(AEC *a) 16153a5a1b3Sopenharmony_ci// The xfast signal is used to charge the hangover timer to Thold. 16253a5a1b3Sopenharmony_ci// When hangover expires (no Spk signal for some time) the vector w 16353a5a1b3Sopenharmony_ci// is erased. This is my implementation of Leaky NLMS. 16453a5a1b3Sopenharmony_ci{ 16553a5a1b3Sopenharmony_ci if (a->xfast >= M70dB_PCM) { 16653a5a1b3Sopenharmony_ci // vector w is valid for hangover Thold time 16753a5a1b3Sopenharmony_ci a->hangover = Thold; 16853a5a1b3Sopenharmony_ci } else { 16953a5a1b3Sopenharmony_ci if (a->hangover > 1) { 17053a5a1b3Sopenharmony_ci --(a->hangover); 17153a5a1b3Sopenharmony_ci } else if (1 == a->hangover) { 17253a5a1b3Sopenharmony_ci --(a->hangover); 17353a5a1b3Sopenharmony_ci // My Leaky NLMS is to erase vector w when hangover expires 17453a5a1b3Sopenharmony_ci memset(a->w_arr, 0, sizeof(a->w_arr)); 17553a5a1b3Sopenharmony_ci } 17653a5a1b3Sopenharmony_ci } 17753a5a1b3Sopenharmony_ci} 17853a5a1b3Sopenharmony_ci 17953a5a1b3Sopenharmony_ci 18053a5a1b3Sopenharmony_ci#if 0 18153a5a1b3Sopenharmony_civoid AEC::openwdisplay() { 18253a5a1b3Sopenharmony_ci // open TCP connection to program wdisplay.tcl 18353a5a1b3Sopenharmony_ci fdwdisplay = socket_async("127.0.0.1", 50999); 18453a5a1b3Sopenharmony_ci}; 18553a5a1b3Sopenharmony_ci#endif 18653a5a1b3Sopenharmony_ci 18753a5a1b3Sopenharmony_ci 18853a5a1b3Sopenharmony_cistatic REAL AEC_nlms_pw(AEC *a, REAL d, REAL x_, float stepsize) 18953a5a1b3Sopenharmony_ci{ 19053a5a1b3Sopenharmony_ci REAL e; 19153a5a1b3Sopenharmony_ci REAL ef; 19253a5a1b3Sopenharmony_ci a->x[a->j] = x_; 19353a5a1b3Sopenharmony_ci a->xf[a->j] = IIR1_highpass(a->Fx, x_); // pre-whitening of x 19453a5a1b3Sopenharmony_ci 19553a5a1b3Sopenharmony_ci // calculate error value 19653a5a1b3Sopenharmony_ci // (mic signal - estimated mic signal from spk signal) 19753a5a1b3Sopenharmony_ci e = d; 19853a5a1b3Sopenharmony_ci if (a->hangover > 0) { 19953a5a1b3Sopenharmony_ci e -= a->dotp(a->w, a->x + a->j); 20053a5a1b3Sopenharmony_ci } 20153a5a1b3Sopenharmony_ci ef = IIR1_highpass(a->Fe, e); // pre-whitening of e 20253a5a1b3Sopenharmony_ci 20353a5a1b3Sopenharmony_ci // optimize: iterative dotp(xf, xf) 20453a5a1b3Sopenharmony_ci a->dotp_xf_xf += (a->xf[a->j] * a->xf[a->j] - a->xf[a->j + NLMS_LEN - 1] * a->xf[a->j + NLMS_LEN - 1]); 20553a5a1b3Sopenharmony_ci 20653a5a1b3Sopenharmony_ci if (stepsize > 0.0f) { 20753a5a1b3Sopenharmony_ci // calculate variable step size 20853a5a1b3Sopenharmony_ci REAL mikro_ef = stepsize * ef / a->dotp_xf_xf; 20953a5a1b3Sopenharmony_ci 21053a5a1b3Sopenharmony_ci#ifdef DISABLE_ORC 21153a5a1b3Sopenharmony_ci // update tap weights (filter learning) 21253a5a1b3Sopenharmony_ci int i; 21353a5a1b3Sopenharmony_ci for (i = 0; i < NLMS_LEN; i += 2) { 21453a5a1b3Sopenharmony_ci // optimize: partial loop unrolling 21553a5a1b3Sopenharmony_ci a->w[i] += mikro_ef * a->xf[i + a->j]; 21653a5a1b3Sopenharmony_ci a->w[i + 1] += mikro_ef * a->xf[i + a->j + 1]; 21753a5a1b3Sopenharmony_ci } 21853a5a1b3Sopenharmony_ci#else 21953a5a1b3Sopenharmony_ci update_tap_weights(a->w, &a->xf[a->j], mikro_ef, NLMS_LEN); 22053a5a1b3Sopenharmony_ci#endif 22153a5a1b3Sopenharmony_ci } 22253a5a1b3Sopenharmony_ci 22353a5a1b3Sopenharmony_ci if (--(a->j) < 0) { 22453a5a1b3Sopenharmony_ci // optimize: decrease number of memory copies 22553a5a1b3Sopenharmony_ci a->j = NLMS_EXT; 22653a5a1b3Sopenharmony_ci memmove(a->x + a->j + 1, a->x, (NLMS_LEN - 1) * sizeof(REAL)); 22753a5a1b3Sopenharmony_ci memmove(a->xf + a->j + 1, a->xf, (NLMS_LEN - 1) * sizeof(REAL)); 22853a5a1b3Sopenharmony_ci } 22953a5a1b3Sopenharmony_ci 23053a5a1b3Sopenharmony_ci // Saturation 23153a5a1b3Sopenharmony_ci if (e > MAXPCM) { 23253a5a1b3Sopenharmony_ci return MAXPCM; 23353a5a1b3Sopenharmony_ci } else if (e < -MAXPCM) { 23453a5a1b3Sopenharmony_ci return -MAXPCM; 23553a5a1b3Sopenharmony_ci } else { 23653a5a1b3Sopenharmony_ci return e; 23753a5a1b3Sopenharmony_ci } 23853a5a1b3Sopenharmony_ci} 23953a5a1b3Sopenharmony_ci 24053a5a1b3Sopenharmony_ci 24153a5a1b3Sopenharmony_ciint AEC_doAEC(AEC *a, int d_, int x_) 24253a5a1b3Sopenharmony_ci{ 24353a5a1b3Sopenharmony_ci REAL d = (REAL) d_; 24453a5a1b3Sopenharmony_ci REAL x = (REAL) x_; 24553a5a1b3Sopenharmony_ci 24653a5a1b3Sopenharmony_ci // Mic Highpass Filter - to remove DC 24753a5a1b3Sopenharmony_ci d = IIR_HP_highpass(a->acMic, d); 24853a5a1b3Sopenharmony_ci 24953a5a1b3Sopenharmony_ci // Mic Highpass Filter - cut-off below 300Hz 25053a5a1b3Sopenharmony_ci d = FIR_HP_300Hz_highpass(a->cutoff, d); 25153a5a1b3Sopenharmony_ci 25253a5a1b3Sopenharmony_ci // Amplify, for e.g. Soundcards with -6dB max. volume 25353a5a1b3Sopenharmony_ci d *= a->gain; 25453a5a1b3Sopenharmony_ci 25553a5a1b3Sopenharmony_ci // Spk Highpass Filter - to remove DC 25653a5a1b3Sopenharmony_ci x = IIR_HP_highpass(a->acSpk, x); 25753a5a1b3Sopenharmony_ci 25853a5a1b3Sopenharmony_ci // Double Talk Detector 25953a5a1b3Sopenharmony_ci a->stepsize = AEC_dtd(a, d, x); 26053a5a1b3Sopenharmony_ci 26153a5a1b3Sopenharmony_ci // Leaky (ageing of vector w) 26253a5a1b3Sopenharmony_ci AEC_leaky(a); 26353a5a1b3Sopenharmony_ci 26453a5a1b3Sopenharmony_ci // Acoustic Echo Cancellation 26553a5a1b3Sopenharmony_ci d = AEC_nlms_pw(a, d, x, a->stepsize); 26653a5a1b3Sopenharmony_ci 26753a5a1b3Sopenharmony_ci#if 0 26853a5a1b3Sopenharmony_ci if (fdwdisplay >= 0) { 26953a5a1b3Sopenharmony_ci if (++dumpcnt >= (WIDEB*RATE/10)) { 27053a5a1b3Sopenharmony_ci // wdisplay creates 10 dumps per seconds = large CPU load! 27153a5a1b3Sopenharmony_ci dumpcnt = 0; 27253a5a1b3Sopenharmony_ci write(fdwdisplay, ws, DUMP_LEN*sizeof(float)); 27353a5a1b3Sopenharmony_ci // we don't check return value. This is not production quality!!! 27453a5a1b3Sopenharmony_ci memset(ws, 0, sizeof(ws)); 27553a5a1b3Sopenharmony_ci } else { 27653a5a1b3Sopenharmony_ci int i; 27753a5a1b3Sopenharmony_ci for (i = 0; i < DUMP_LEN; i += 2) { 27853a5a1b3Sopenharmony_ci // optimize: partial loop unrolling 27953a5a1b3Sopenharmony_ci ws[i] += w[i]; 28053a5a1b3Sopenharmony_ci ws[i + 1] += w[i + 1]; 28153a5a1b3Sopenharmony_ci } 28253a5a1b3Sopenharmony_ci } 28353a5a1b3Sopenharmony_ci } 28453a5a1b3Sopenharmony_ci#endif 28553a5a1b3Sopenharmony_ci 28653a5a1b3Sopenharmony_ci return (int) d; 28753a5a1b3Sopenharmony_ci} 288