1/* aec.cpp 2 * 3 * Copyright (C) DFS Deutsche Flugsicherung (2004, 2005). 4 * All Rights Reserved. 5 * 6 * Acoustic Echo Cancellation NLMS-pw algorithm 7 * 8 * Version 0.3 filter created with www.dsptutor.freeuk.com 9 * Version 0.3.1 Allow change of stability parameter delta 10 * Version 0.4 Leaky Normalized LMS - pre whitening algorithm 11 */ 12 13#ifndef _GNU_SOURCE 14#define _GNU_SOURCE 15#endif 16 17#ifdef HAVE_CONFIG_H 18#include <config.h> 19#endif 20 21#include <math.h> 22#include <string.h> 23#include <stdint.h> 24 25#include <pulse/xmalloc.h> 26 27#include "adrian-aec.h" 28 29#ifndef DISABLE_ORC 30#include "adrian-aec-orc-gen.h" 31#endif 32 33#ifdef __SSE__ 34#include <xmmintrin.h> 35#endif 36 37/* Vector Dot Product */ 38static REAL dotp(REAL a[], REAL b[]) 39{ 40 REAL sum0 = 0.0f, sum1 = 0.0f; 41 int j; 42 43 for (j = 0; j < NLMS_LEN; j += 2) { 44 // optimize: partial loop unrolling 45 sum0 += a[j] * b[j]; 46 sum1 += a[j + 1] * b[j + 1]; 47 } 48 return sum0 + sum1; 49} 50 51static REAL dotp_sse(REAL a[], REAL b[]) 52{ 53#ifdef __SSE__ 54 /* This is taken from speex's inner product implementation */ 55 int j; 56 REAL sum; 57 __m128 acc = _mm_setzero_ps(); 58 59 for (j=0;j<NLMS_LEN;j+=8) 60 { 61 acc = _mm_add_ps(acc, _mm_mul_ps(_mm_load_ps(a+j), _mm_loadu_ps(b+j))); 62 acc = _mm_add_ps(acc, _mm_mul_ps(_mm_load_ps(a+j+4), _mm_loadu_ps(b+j+4))); 63 } 64 acc = _mm_add_ps(acc, _mm_movehl_ps(acc, acc)); 65 acc = _mm_add_ss(acc, _mm_shuffle_ps(acc, acc, 0x55)); 66 _mm_store_ss(&sum, acc); 67 68 return sum; 69#else 70 return dotp(a, b); 71#endif 72} 73 74 75AEC* AEC_init(int RATE, int have_vector) 76{ 77 AEC *a = pa_xnew0(AEC, 1); 78 a->j = NLMS_EXT; 79 AEC_setambient(a, NoiseFloor); 80 a->dfast = a->dslow = M75dB_PCM; 81 a->xfast = a->xslow = M80dB_PCM; 82 a->gain = 1.0f; 83 a->Fx = IIR1_init(2000.0f/RATE); 84 a->Fe = IIR1_init(2000.0f/RATE); 85 a->cutoff = FIR_HP_300Hz_init(); 86 a->acMic = IIR_HP_init(); 87 a->acSpk = IIR_HP_init(); 88 89 a->aes_y2 = M0dB; 90 91 a->fdwdisplay = -1; 92 93 if (have_vector) { 94 /* Get a 16-byte aligned location */ 95 a->w = (REAL *) (((uintptr_t) a->w_arr) - (((uintptr_t) a->w_arr) % 16) + 16); 96 a->dotp = dotp_sse; 97 } else { 98 /* We don't care about alignment, just use the array as-is */ 99 a->w = a->w_arr; 100 a->dotp = dotp; 101 } 102 103 return a; 104} 105 106void AEC_done(AEC *a) { 107 pa_assert(a); 108 109 pa_xfree(a->Fx); 110 pa_xfree(a->Fe); 111 pa_xfree(a->acMic); 112 pa_xfree(a->acSpk); 113 pa_xfree(a->cutoff); 114 pa_xfree(a); 115} 116 117// Adrian soft decision DTD 118// (Dual Average Near-End to Far-End signal Ratio DTD) 119// This algorithm uses exponential smoothing with different 120// ageing parameters to get fast and slow near-end and far-end 121// signal averages. The ratio of NFRs term 122// (dfast / xfast) / (dslow / xslow) is used to compute the stepsize 123// A ratio value of 2.5 is mapped to stepsize 0, a ratio of 0 is 124// mapped to 1.0 with a limited linear function. 125static float AEC_dtd(AEC *a, REAL d, REAL x) 126{ 127 float ratio, stepsize; 128 129 // fast near-end and far-end average 130 a->dfast += ALPHAFAST * (fabsf(d) - a->dfast); 131 a->xfast += ALPHAFAST * (fabsf(x) - a->xfast); 132 133 // slow near-end and far-end average 134 a->dslow += ALPHASLOW * (fabsf(d) - a->dslow); 135 a->xslow += ALPHASLOW * (fabsf(x) - a->xslow); 136 137 if (a->xfast < M70dB_PCM) { 138 return 0.0f; // no Spk signal 139 } 140 141 if (a->dfast < M70dB_PCM) { 142 return 0.0f; // no Mic signal 143 } 144 145 // ratio of NFRs 146 ratio = (a->dfast * a->xslow) / (a->dslow * a->xfast); 147 148 // Linear interpolation with clamping at the limits 149 if (ratio < STEPX1) 150 stepsize = STEPY1; 151 else if (ratio > STEPX2) 152 stepsize = STEPY2; 153 else 154 stepsize = STEPY1 + (STEPY2 - STEPY1) * (ratio - STEPX1) / (STEPX2 - STEPX1); 155 156 return stepsize; 157} 158 159 160static void AEC_leaky(AEC *a) 161// The xfast signal is used to charge the hangover timer to Thold. 162// When hangover expires (no Spk signal for some time) the vector w 163// is erased. This is my implementation of Leaky NLMS. 164{ 165 if (a->xfast >= M70dB_PCM) { 166 // vector w is valid for hangover Thold time 167 a->hangover = Thold; 168 } else { 169 if (a->hangover > 1) { 170 --(a->hangover); 171 } else if (1 == a->hangover) { 172 --(a->hangover); 173 // My Leaky NLMS is to erase vector w when hangover expires 174 memset(a->w_arr, 0, sizeof(a->w_arr)); 175 } 176 } 177} 178 179 180#if 0 181void AEC::openwdisplay() { 182 // open TCP connection to program wdisplay.tcl 183 fdwdisplay = socket_async("127.0.0.1", 50999); 184}; 185#endif 186 187 188static REAL AEC_nlms_pw(AEC *a, REAL d, REAL x_, float stepsize) 189{ 190 REAL e; 191 REAL ef; 192 a->x[a->j] = x_; 193 a->xf[a->j] = IIR1_highpass(a->Fx, x_); // pre-whitening of x 194 195 // calculate error value 196 // (mic signal - estimated mic signal from spk signal) 197 e = d; 198 if (a->hangover > 0) { 199 e -= a->dotp(a->w, a->x + a->j); 200 } 201 ef = IIR1_highpass(a->Fe, e); // pre-whitening of e 202 203 // optimize: iterative dotp(xf, xf) 204 a->dotp_xf_xf += (a->xf[a->j] * a->xf[a->j] - a->xf[a->j + NLMS_LEN - 1] * a->xf[a->j + NLMS_LEN - 1]); 205 206 if (stepsize > 0.0f) { 207 // calculate variable step size 208 REAL mikro_ef = stepsize * ef / a->dotp_xf_xf; 209 210#ifdef DISABLE_ORC 211 // update tap weights (filter learning) 212 int i; 213 for (i = 0; i < NLMS_LEN; i += 2) { 214 // optimize: partial loop unrolling 215 a->w[i] += mikro_ef * a->xf[i + a->j]; 216 a->w[i + 1] += mikro_ef * a->xf[i + a->j + 1]; 217 } 218#else 219 update_tap_weights(a->w, &a->xf[a->j], mikro_ef, NLMS_LEN); 220#endif 221 } 222 223 if (--(a->j) < 0) { 224 // optimize: decrease number of memory copies 225 a->j = NLMS_EXT; 226 memmove(a->x + a->j + 1, a->x, (NLMS_LEN - 1) * sizeof(REAL)); 227 memmove(a->xf + a->j + 1, a->xf, (NLMS_LEN - 1) * sizeof(REAL)); 228 } 229 230 // Saturation 231 if (e > MAXPCM) { 232 return MAXPCM; 233 } else if (e < -MAXPCM) { 234 return -MAXPCM; 235 } else { 236 return e; 237 } 238} 239 240 241int AEC_doAEC(AEC *a, int d_, int x_) 242{ 243 REAL d = (REAL) d_; 244 REAL x = (REAL) x_; 245 246 // Mic Highpass Filter - to remove DC 247 d = IIR_HP_highpass(a->acMic, d); 248 249 // Mic Highpass Filter - cut-off below 300Hz 250 d = FIR_HP_300Hz_highpass(a->cutoff, d); 251 252 // Amplify, for e.g. Soundcards with -6dB max. volume 253 d *= a->gain; 254 255 // Spk Highpass Filter - to remove DC 256 x = IIR_HP_highpass(a->acSpk, x); 257 258 // Double Talk Detector 259 a->stepsize = AEC_dtd(a, d, x); 260 261 // Leaky (ageing of vector w) 262 AEC_leaky(a); 263 264 // Acoustic Echo Cancellation 265 d = AEC_nlms_pw(a, d, x, a->stepsize); 266 267#if 0 268 if (fdwdisplay >= 0) { 269 if (++dumpcnt >= (WIDEB*RATE/10)) { 270 // wdisplay creates 10 dumps per seconds = large CPU load! 271 dumpcnt = 0; 272 write(fdwdisplay, ws, DUMP_LEN*sizeof(float)); 273 // we don't check return value. This is not production quality!!! 274 memset(ws, 0, sizeof(ws)); 275 } else { 276 int i; 277 for (i = 0; i < DUMP_LEN; i += 2) { 278 // optimize: partial loop unrolling 279 ws[i] += w[i]; 280 ws[i + 1] += w[i + 1]; 281 } 282 } 283 } 284#endif 285 286 return (int) d; 287} 288