153a5a1b3Sopenharmony_ci/* aec.cpp
253a5a1b3Sopenharmony_ci *
353a5a1b3Sopenharmony_ci * Copyright (C) DFS Deutsche Flugsicherung (2004, 2005).
453a5a1b3Sopenharmony_ci * All Rights Reserved.
553a5a1b3Sopenharmony_ci *
653a5a1b3Sopenharmony_ci * Acoustic Echo Cancellation NLMS-pw algorithm
753a5a1b3Sopenharmony_ci *
853a5a1b3Sopenharmony_ci * Version 0.3 filter created with www.dsptutor.freeuk.com
953a5a1b3Sopenharmony_ci * Version 0.3.1 Allow change of stability parameter delta
1053a5a1b3Sopenharmony_ci * Version 0.4 Leaky Normalized LMS - pre whitening algorithm
1153a5a1b3Sopenharmony_ci */
1253a5a1b3Sopenharmony_ci
1353a5a1b3Sopenharmony_ci#ifndef _GNU_SOURCE
1453a5a1b3Sopenharmony_ci#define _GNU_SOURCE
1553a5a1b3Sopenharmony_ci#endif
1653a5a1b3Sopenharmony_ci
1753a5a1b3Sopenharmony_ci#ifdef HAVE_CONFIG_H
1853a5a1b3Sopenharmony_ci#include <config.h>
1953a5a1b3Sopenharmony_ci#endif
2053a5a1b3Sopenharmony_ci
2153a5a1b3Sopenharmony_ci#include <math.h>
2253a5a1b3Sopenharmony_ci#include <string.h>
2353a5a1b3Sopenharmony_ci#include <stdint.h>
2453a5a1b3Sopenharmony_ci
2553a5a1b3Sopenharmony_ci#include <pulse/xmalloc.h>
2653a5a1b3Sopenharmony_ci
2753a5a1b3Sopenharmony_ci#include "adrian-aec.h"
2853a5a1b3Sopenharmony_ci
2953a5a1b3Sopenharmony_ci#ifndef DISABLE_ORC
3053a5a1b3Sopenharmony_ci#include "adrian-aec-orc-gen.h"
3153a5a1b3Sopenharmony_ci#endif
3253a5a1b3Sopenharmony_ci
3353a5a1b3Sopenharmony_ci#ifdef __SSE__
3453a5a1b3Sopenharmony_ci#include <xmmintrin.h>
3553a5a1b3Sopenharmony_ci#endif
3653a5a1b3Sopenharmony_ci
3753a5a1b3Sopenharmony_ci/* Vector Dot Product */
3853a5a1b3Sopenharmony_cistatic REAL dotp(REAL a[], REAL b[])
3953a5a1b3Sopenharmony_ci{
4053a5a1b3Sopenharmony_ci  REAL sum0 = 0.0f, sum1 = 0.0f;
4153a5a1b3Sopenharmony_ci  int j;
4253a5a1b3Sopenharmony_ci
4353a5a1b3Sopenharmony_ci  for (j = 0; j < NLMS_LEN; j += 2) {
4453a5a1b3Sopenharmony_ci    // optimize: partial loop unrolling
4553a5a1b3Sopenharmony_ci    sum0 += a[j] * b[j];
4653a5a1b3Sopenharmony_ci    sum1 += a[j + 1] * b[j + 1];
4753a5a1b3Sopenharmony_ci  }
4853a5a1b3Sopenharmony_ci  return sum0 + sum1;
4953a5a1b3Sopenharmony_ci}
5053a5a1b3Sopenharmony_ci
5153a5a1b3Sopenharmony_cistatic REAL dotp_sse(REAL a[], REAL b[])
5253a5a1b3Sopenharmony_ci{
5353a5a1b3Sopenharmony_ci#ifdef __SSE__
5453a5a1b3Sopenharmony_ci  /* This is taken from speex's inner product implementation */
5553a5a1b3Sopenharmony_ci  int j;
5653a5a1b3Sopenharmony_ci  REAL sum;
5753a5a1b3Sopenharmony_ci  __m128 acc = _mm_setzero_ps();
5853a5a1b3Sopenharmony_ci
5953a5a1b3Sopenharmony_ci  for (j=0;j<NLMS_LEN;j+=8)
6053a5a1b3Sopenharmony_ci  {
6153a5a1b3Sopenharmony_ci    acc = _mm_add_ps(acc, _mm_mul_ps(_mm_load_ps(a+j), _mm_loadu_ps(b+j)));
6253a5a1b3Sopenharmony_ci    acc = _mm_add_ps(acc, _mm_mul_ps(_mm_load_ps(a+j+4), _mm_loadu_ps(b+j+4)));
6353a5a1b3Sopenharmony_ci  }
6453a5a1b3Sopenharmony_ci  acc = _mm_add_ps(acc, _mm_movehl_ps(acc, acc));
6553a5a1b3Sopenharmony_ci  acc = _mm_add_ss(acc, _mm_shuffle_ps(acc, acc, 0x55));
6653a5a1b3Sopenharmony_ci  _mm_store_ss(&sum, acc);
6753a5a1b3Sopenharmony_ci
6853a5a1b3Sopenharmony_ci  return sum;
6953a5a1b3Sopenharmony_ci#else
7053a5a1b3Sopenharmony_ci  return dotp(a, b);
7153a5a1b3Sopenharmony_ci#endif
7253a5a1b3Sopenharmony_ci}
7353a5a1b3Sopenharmony_ci
7453a5a1b3Sopenharmony_ci
7553a5a1b3Sopenharmony_ciAEC* AEC_init(int RATE, int have_vector)
7653a5a1b3Sopenharmony_ci{
7753a5a1b3Sopenharmony_ci  AEC *a = pa_xnew0(AEC, 1);
7853a5a1b3Sopenharmony_ci  a->j = NLMS_EXT;
7953a5a1b3Sopenharmony_ci  AEC_setambient(a, NoiseFloor);
8053a5a1b3Sopenharmony_ci  a->dfast = a->dslow = M75dB_PCM;
8153a5a1b3Sopenharmony_ci  a->xfast = a->xslow = M80dB_PCM;
8253a5a1b3Sopenharmony_ci  a->gain = 1.0f;
8353a5a1b3Sopenharmony_ci  a->Fx = IIR1_init(2000.0f/RATE);
8453a5a1b3Sopenharmony_ci  a->Fe = IIR1_init(2000.0f/RATE);
8553a5a1b3Sopenharmony_ci  a->cutoff = FIR_HP_300Hz_init();
8653a5a1b3Sopenharmony_ci  a->acMic = IIR_HP_init();
8753a5a1b3Sopenharmony_ci  a->acSpk = IIR_HP_init();
8853a5a1b3Sopenharmony_ci
8953a5a1b3Sopenharmony_ci  a->aes_y2 = M0dB;
9053a5a1b3Sopenharmony_ci
9153a5a1b3Sopenharmony_ci  a->fdwdisplay = -1;
9253a5a1b3Sopenharmony_ci
9353a5a1b3Sopenharmony_ci  if (have_vector) {
9453a5a1b3Sopenharmony_ci      /* Get a 16-byte aligned location */
9553a5a1b3Sopenharmony_ci      a->w = (REAL *) (((uintptr_t) a->w_arr) - (((uintptr_t) a->w_arr) % 16) + 16);
9653a5a1b3Sopenharmony_ci      a->dotp = dotp_sse;
9753a5a1b3Sopenharmony_ci  } else {
9853a5a1b3Sopenharmony_ci      /* We don't care about alignment, just use the array as-is */
9953a5a1b3Sopenharmony_ci      a->w = a->w_arr;
10053a5a1b3Sopenharmony_ci      a->dotp = dotp;
10153a5a1b3Sopenharmony_ci  }
10253a5a1b3Sopenharmony_ci
10353a5a1b3Sopenharmony_ci  return a;
10453a5a1b3Sopenharmony_ci}
10553a5a1b3Sopenharmony_ci
10653a5a1b3Sopenharmony_civoid AEC_done(AEC *a) {
10753a5a1b3Sopenharmony_ci    pa_assert(a);
10853a5a1b3Sopenharmony_ci
10953a5a1b3Sopenharmony_ci    pa_xfree(a->Fx);
11053a5a1b3Sopenharmony_ci    pa_xfree(a->Fe);
11153a5a1b3Sopenharmony_ci    pa_xfree(a->acMic);
11253a5a1b3Sopenharmony_ci    pa_xfree(a->acSpk);
11353a5a1b3Sopenharmony_ci    pa_xfree(a->cutoff);
11453a5a1b3Sopenharmony_ci    pa_xfree(a);
11553a5a1b3Sopenharmony_ci}
11653a5a1b3Sopenharmony_ci
11753a5a1b3Sopenharmony_ci// Adrian soft decision DTD
11853a5a1b3Sopenharmony_ci// (Dual Average Near-End to Far-End signal Ratio DTD)
11953a5a1b3Sopenharmony_ci// This algorithm uses exponential smoothing with different
12053a5a1b3Sopenharmony_ci// ageing parameters to get fast and slow near-end and far-end
12153a5a1b3Sopenharmony_ci// signal averages. The ratio of NFRs term
12253a5a1b3Sopenharmony_ci// (dfast / xfast) / (dslow / xslow) is used to compute the stepsize
12353a5a1b3Sopenharmony_ci// A ratio value of 2.5 is mapped to stepsize 0, a ratio of 0 is
12453a5a1b3Sopenharmony_ci// mapped to 1.0 with a limited linear function.
12553a5a1b3Sopenharmony_cistatic float AEC_dtd(AEC *a, REAL d, REAL x)
12653a5a1b3Sopenharmony_ci{
12753a5a1b3Sopenharmony_ci  float ratio, stepsize;
12853a5a1b3Sopenharmony_ci
12953a5a1b3Sopenharmony_ci  // fast near-end and far-end average
13053a5a1b3Sopenharmony_ci  a->dfast += ALPHAFAST * (fabsf(d) - a->dfast);
13153a5a1b3Sopenharmony_ci  a->xfast += ALPHAFAST * (fabsf(x) - a->xfast);
13253a5a1b3Sopenharmony_ci
13353a5a1b3Sopenharmony_ci  // slow near-end and far-end average
13453a5a1b3Sopenharmony_ci  a->dslow += ALPHASLOW * (fabsf(d) - a->dslow);
13553a5a1b3Sopenharmony_ci  a->xslow += ALPHASLOW * (fabsf(x) - a->xslow);
13653a5a1b3Sopenharmony_ci
13753a5a1b3Sopenharmony_ci  if (a->xfast < M70dB_PCM) {
13853a5a1b3Sopenharmony_ci    return 0.0f;   // no Spk signal
13953a5a1b3Sopenharmony_ci  }
14053a5a1b3Sopenharmony_ci
14153a5a1b3Sopenharmony_ci  if (a->dfast < M70dB_PCM) {
14253a5a1b3Sopenharmony_ci    return 0.0f;   // no Mic signal
14353a5a1b3Sopenharmony_ci  }
14453a5a1b3Sopenharmony_ci
14553a5a1b3Sopenharmony_ci  // ratio of NFRs
14653a5a1b3Sopenharmony_ci  ratio = (a->dfast * a->xslow) / (a->dslow * a->xfast);
14753a5a1b3Sopenharmony_ci
14853a5a1b3Sopenharmony_ci  // Linear interpolation with clamping at the limits
14953a5a1b3Sopenharmony_ci  if (ratio < STEPX1)
15053a5a1b3Sopenharmony_ci    stepsize = STEPY1;
15153a5a1b3Sopenharmony_ci  else if (ratio > STEPX2)
15253a5a1b3Sopenharmony_ci    stepsize = STEPY2;
15353a5a1b3Sopenharmony_ci  else
15453a5a1b3Sopenharmony_ci    stepsize = STEPY1 + (STEPY2 - STEPY1) * (ratio - STEPX1) / (STEPX2 - STEPX1);
15553a5a1b3Sopenharmony_ci
15653a5a1b3Sopenharmony_ci  return stepsize;
15753a5a1b3Sopenharmony_ci}
15853a5a1b3Sopenharmony_ci
15953a5a1b3Sopenharmony_ci
16053a5a1b3Sopenharmony_cistatic void AEC_leaky(AEC *a)
16153a5a1b3Sopenharmony_ci// The xfast signal is used to charge the hangover timer to Thold.
16253a5a1b3Sopenharmony_ci// When hangover expires (no Spk signal for some time) the vector w
16353a5a1b3Sopenharmony_ci// is erased. This is my implementation of Leaky NLMS.
16453a5a1b3Sopenharmony_ci{
16553a5a1b3Sopenharmony_ci  if (a->xfast >= M70dB_PCM) {
16653a5a1b3Sopenharmony_ci    // vector w is valid for hangover Thold time
16753a5a1b3Sopenharmony_ci    a->hangover = Thold;
16853a5a1b3Sopenharmony_ci  } else {
16953a5a1b3Sopenharmony_ci    if (a->hangover > 1) {
17053a5a1b3Sopenharmony_ci      --(a->hangover);
17153a5a1b3Sopenharmony_ci    } else if (1 == a->hangover) {
17253a5a1b3Sopenharmony_ci      --(a->hangover);
17353a5a1b3Sopenharmony_ci      // My Leaky NLMS is to erase vector w when hangover expires
17453a5a1b3Sopenharmony_ci      memset(a->w_arr, 0, sizeof(a->w_arr));
17553a5a1b3Sopenharmony_ci    }
17653a5a1b3Sopenharmony_ci  }
17753a5a1b3Sopenharmony_ci}
17853a5a1b3Sopenharmony_ci
17953a5a1b3Sopenharmony_ci
18053a5a1b3Sopenharmony_ci#if 0
18153a5a1b3Sopenharmony_civoid AEC::openwdisplay() {
18253a5a1b3Sopenharmony_ci  // open TCP connection to program wdisplay.tcl
18353a5a1b3Sopenharmony_ci  fdwdisplay = socket_async("127.0.0.1", 50999);
18453a5a1b3Sopenharmony_ci};
18553a5a1b3Sopenharmony_ci#endif
18653a5a1b3Sopenharmony_ci
18753a5a1b3Sopenharmony_ci
18853a5a1b3Sopenharmony_cistatic REAL AEC_nlms_pw(AEC *a, REAL d, REAL x_, float stepsize)
18953a5a1b3Sopenharmony_ci{
19053a5a1b3Sopenharmony_ci  REAL e;
19153a5a1b3Sopenharmony_ci  REAL ef;
19253a5a1b3Sopenharmony_ci  a->x[a->j] = x_;
19353a5a1b3Sopenharmony_ci  a->xf[a->j] = IIR1_highpass(a->Fx, x_);     // pre-whitening of x
19453a5a1b3Sopenharmony_ci
19553a5a1b3Sopenharmony_ci  // calculate error value
19653a5a1b3Sopenharmony_ci  // (mic signal - estimated mic signal from spk signal)
19753a5a1b3Sopenharmony_ci  e = d;
19853a5a1b3Sopenharmony_ci  if (a->hangover > 0) {
19953a5a1b3Sopenharmony_ci    e -= a->dotp(a->w, a->x + a->j);
20053a5a1b3Sopenharmony_ci  }
20153a5a1b3Sopenharmony_ci  ef = IIR1_highpass(a->Fe, e);     // pre-whitening of e
20253a5a1b3Sopenharmony_ci
20353a5a1b3Sopenharmony_ci  // optimize: iterative dotp(xf, xf)
20453a5a1b3Sopenharmony_ci  a->dotp_xf_xf += (a->xf[a->j] * a->xf[a->j] - a->xf[a->j + NLMS_LEN - 1] * a->xf[a->j + NLMS_LEN - 1]);
20553a5a1b3Sopenharmony_ci
20653a5a1b3Sopenharmony_ci  if (stepsize > 0.0f) {
20753a5a1b3Sopenharmony_ci    // calculate variable step size
20853a5a1b3Sopenharmony_ci    REAL mikro_ef = stepsize * ef / a->dotp_xf_xf;
20953a5a1b3Sopenharmony_ci
21053a5a1b3Sopenharmony_ci#ifdef DISABLE_ORC
21153a5a1b3Sopenharmony_ci    // update tap weights (filter learning)
21253a5a1b3Sopenharmony_ci    int i;
21353a5a1b3Sopenharmony_ci    for (i = 0; i < NLMS_LEN; i += 2) {
21453a5a1b3Sopenharmony_ci      // optimize: partial loop unrolling
21553a5a1b3Sopenharmony_ci      a->w[i] += mikro_ef * a->xf[i + a->j];
21653a5a1b3Sopenharmony_ci      a->w[i + 1] += mikro_ef * a->xf[i + a->j + 1];
21753a5a1b3Sopenharmony_ci    }
21853a5a1b3Sopenharmony_ci#else
21953a5a1b3Sopenharmony_ci    update_tap_weights(a->w, &a->xf[a->j], mikro_ef, NLMS_LEN);
22053a5a1b3Sopenharmony_ci#endif
22153a5a1b3Sopenharmony_ci  }
22253a5a1b3Sopenharmony_ci
22353a5a1b3Sopenharmony_ci  if (--(a->j) < 0) {
22453a5a1b3Sopenharmony_ci    // optimize: decrease number of memory copies
22553a5a1b3Sopenharmony_ci    a->j = NLMS_EXT;
22653a5a1b3Sopenharmony_ci    memmove(a->x + a->j + 1, a->x, (NLMS_LEN - 1) * sizeof(REAL));
22753a5a1b3Sopenharmony_ci    memmove(a->xf + a->j + 1, a->xf, (NLMS_LEN - 1) * sizeof(REAL));
22853a5a1b3Sopenharmony_ci  }
22953a5a1b3Sopenharmony_ci
23053a5a1b3Sopenharmony_ci  // Saturation
23153a5a1b3Sopenharmony_ci  if (e > MAXPCM) {
23253a5a1b3Sopenharmony_ci    return MAXPCM;
23353a5a1b3Sopenharmony_ci  } else if (e < -MAXPCM) {
23453a5a1b3Sopenharmony_ci    return -MAXPCM;
23553a5a1b3Sopenharmony_ci  } else {
23653a5a1b3Sopenharmony_ci    return e;
23753a5a1b3Sopenharmony_ci  }
23853a5a1b3Sopenharmony_ci}
23953a5a1b3Sopenharmony_ci
24053a5a1b3Sopenharmony_ci
24153a5a1b3Sopenharmony_ciint AEC_doAEC(AEC *a, int d_, int x_)
24253a5a1b3Sopenharmony_ci{
24353a5a1b3Sopenharmony_ci  REAL d = (REAL) d_;
24453a5a1b3Sopenharmony_ci  REAL x = (REAL) x_;
24553a5a1b3Sopenharmony_ci
24653a5a1b3Sopenharmony_ci  // Mic Highpass Filter - to remove DC
24753a5a1b3Sopenharmony_ci  d = IIR_HP_highpass(a->acMic, d);
24853a5a1b3Sopenharmony_ci
24953a5a1b3Sopenharmony_ci  // Mic Highpass Filter - cut-off below 300Hz
25053a5a1b3Sopenharmony_ci  d = FIR_HP_300Hz_highpass(a->cutoff, d);
25153a5a1b3Sopenharmony_ci
25253a5a1b3Sopenharmony_ci  // Amplify, for e.g. Soundcards with -6dB max. volume
25353a5a1b3Sopenharmony_ci  d *= a->gain;
25453a5a1b3Sopenharmony_ci
25553a5a1b3Sopenharmony_ci  // Spk Highpass Filter - to remove DC
25653a5a1b3Sopenharmony_ci  x = IIR_HP_highpass(a->acSpk, x);
25753a5a1b3Sopenharmony_ci
25853a5a1b3Sopenharmony_ci  // Double Talk Detector
25953a5a1b3Sopenharmony_ci  a->stepsize = AEC_dtd(a, d, x);
26053a5a1b3Sopenharmony_ci
26153a5a1b3Sopenharmony_ci  // Leaky (ageing of vector w)
26253a5a1b3Sopenharmony_ci  AEC_leaky(a);
26353a5a1b3Sopenharmony_ci
26453a5a1b3Sopenharmony_ci  // Acoustic Echo Cancellation
26553a5a1b3Sopenharmony_ci  d = AEC_nlms_pw(a, d, x, a->stepsize);
26653a5a1b3Sopenharmony_ci
26753a5a1b3Sopenharmony_ci#if 0
26853a5a1b3Sopenharmony_ci  if (fdwdisplay >= 0) {
26953a5a1b3Sopenharmony_ci    if (++dumpcnt >= (WIDEB*RATE/10)) {
27053a5a1b3Sopenharmony_ci      // wdisplay creates 10 dumps per seconds = large CPU load!
27153a5a1b3Sopenharmony_ci      dumpcnt = 0;
27253a5a1b3Sopenharmony_ci      write(fdwdisplay, ws, DUMP_LEN*sizeof(float));
27353a5a1b3Sopenharmony_ci      // we don't check return value. This is not production quality!!!
27453a5a1b3Sopenharmony_ci      memset(ws, 0, sizeof(ws));
27553a5a1b3Sopenharmony_ci    } else {
27653a5a1b3Sopenharmony_ci      int i;
27753a5a1b3Sopenharmony_ci      for (i = 0; i < DUMP_LEN; i += 2) {
27853a5a1b3Sopenharmony_ci        // optimize: partial loop unrolling
27953a5a1b3Sopenharmony_ci        ws[i] += w[i];
28053a5a1b3Sopenharmony_ci        ws[i + 1] += w[i + 1];
28153a5a1b3Sopenharmony_ci      }
28253a5a1b3Sopenharmony_ci    }
28353a5a1b3Sopenharmony_ci  }
28453a5a1b3Sopenharmony_ci#endif
28553a5a1b3Sopenharmony_ci
28653a5a1b3Sopenharmony_ci  return (int) d;
28753a5a1b3Sopenharmony_ci}
288