1/***
2    This file is part of PulseAudio.
3
4    Copyright 2011 Collabora Ltd.
5              2015 Aldebaran SoftBank Group
6
7    Contributor: Arun Raghavan <mail@arunraghavan.net>
8
9    PulseAudio is free software; you can redistribute it and/or modify
10    it under the terms of the GNU Lesser General Public License as published
11    by the Free Software Foundation; either version 2.1 of the License,
12    or (at your option) any later version.
13
14    PulseAudio is distributed in the hope that it will be useful, but
15    WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17    General Public License for more details.
18
19    You should have received a copy of the GNU Lesser General Public License
20    along with PulseAudio; if not, see <http://www.gnu.org/licenses/>.
21***/
22
23#ifdef HAVE_CONFIG_H
24#include <config.h>
25#endif
26
27#include <pulse/cdecl.h>
28
29PA_C_DECL_BEGIN
30#include <pulsecore/core-util.h>
31#include <pulsecore/modargs.h>
32
33#include <pulse/timeval.h>
34#include "echo-cancel.h"
35PA_C_DECL_END
36
37#include <webrtc/modules/audio_processing/include/audio_processing.h>
38#include <webrtc/modules/interface/module_common_types.h>
39#include <webrtc/system_wrappers/include/trace.h>
40
41#define BLOCK_SIZE_US 10000
42
43#define DEFAULT_HIGH_PASS_FILTER true
44#define DEFAULT_NOISE_SUPPRESSION true
45#define DEFAULT_ANALOG_GAIN_CONTROL true
46#define DEFAULT_DIGITAL_GAIN_CONTROL false
47#define DEFAULT_MOBILE false
48#define DEFAULT_ROUTING_MODE "speakerphone"
49#define DEFAULT_COMFORT_NOISE true
50#define DEFAULT_DRIFT_COMPENSATION false
51#define DEFAULT_VAD true
52#define DEFAULT_EXTENDED_FILTER false
53#define DEFAULT_INTELLIGIBILITY_ENHANCER false
54#define DEFAULT_EXPERIMENTAL_AGC false
55#define DEFAULT_AGC_START_VOLUME 85
56#define DEFAULT_BEAMFORMING false
57#define DEFAULT_TRACE false
58
59#define WEBRTC_AGC_MAX_VOLUME 255
60
61static const char* const valid_modargs[] = {
62    "high_pass_filter",
63    "noise_suppression",
64    "analog_gain_control",
65    "digital_gain_control",
66    "mobile",
67    "routing_mode",
68    "comfort_noise",
69    "drift_compensation",
70    "voice_detection",
71    "extended_filter",
72    "intelligibility_enhancer",
73    "experimental_agc",
74    "agc_start_volume",
75    "beamforming",
76    "mic_geometry", /* documented in parse_mic_geometry() */
77    "target_direction", /* documented in parse_mic_geometry() */
78    "trace",
79    NULL
80};
81
82static int routing_mode_from_string(const char *rmode) {
83    if (pa_streq(rmode, "quiet-earpiece-or-headset"))
84        return webrtc::EchoControlMobile::kQuietEarpieceOrHeadset;
85    else if (pa_streq(rmode, "earpiece"))
86        return webrtc::EchoControlMobile::kEarpiece;
87    else if (pa_streq(rmode, "loud-earpiece"))
88        return webrtc::EchoControlMobile::kLoudEarpiece;
89    else if (pa_streq(rmode, "speakerphone"))
90        return webrtc::EchoControlMobile::kSpeakerphone;
91    else if (pa_streq(rmode, "loud-speakerphone"))
92        return webrtc::EchoControlMobile::kLoudSpeakerphone;
93    else
94        return -1;
95}
96
97class PaWebrtcTraceCallback : public webrtc::TraceCallback {
98    void Print(webrtc::TraceLevel level, const char *message, int length)
99    {
100        if (level & webrtc::kTraceError || level & webrtc::kTraceCritical)
101            pa_log("%s", message);
102        else if (level & webrtc::kTraceWarning)
103            pa_log_warn("%s", message);
104        else if (level & webrtc::kTraceInfo)
105            pa_log_info("%s", message);
106        else
107            pa_log_debug("%s", message);
108    }
109};
110
111static int webrtc_volume_from_pa(pa_volume_t v)
112{
113    return (v * WEBRTC_AGC_MAX_VOLUME) / PA_VOLUME_NORM;
114}
115
116static pa_volume_t webrtc_volume_to_pa(int v)
117{
118    return (v * PA_VOLUME_NORM) / WEBRTC_AGC_MAX_VOLUME;
119}
120
121static void webrtc_ec_fixate_spec(pa_sample_spec *rec_ss, pa_channel_map *rec_map,
122                                  pa_sample_spec *play_ss, pa_channel_map *play_map,
123                                  pa_sample_spec *out_ss, pa_channel_map *out_map,
124                                  bool beamforming)
125{
126    rec_ss->format = PA_SAMPLE_FLOAT32NE;
127    play_ss->format = PA_SAMPLE_FLOAT32NE;
128
129    /* AudioProcessing expects one of the following rates */
130    if (rec_ss->rate >= 48000)
131        rec_ss->rate = 48000;
132    else if (rec_ss->rate >= 32000)
133        rec_ss->rate = 32000;
134    else if (rec_ss->rate >= 16000)
135        rec_ss->rate = 16000;
136    else
137        rec_ss->rate = 8000;
138
139    *out_ss = *rec_ss;
140    *out_map = *rec_map;
141
142    if (beamforming) {
143        /* The beamformer gives us a single channel */
144        out_ss->channels = 1;
145        pa_channel_map_init_mono(out_map);
146    }
147
148    /* Playback stream rate needs to be the same as capture */
149    play_ss->rate = rec_ss->rate;
150}
151
152static bool parse_point(const char **point, float (&f)[3]) {
153    int ret, length;
154
155    ret = sscanf(*point, "%g,%g,%g%n", &f[0], &f[1], &f[2], &length);
156    if (ret != 3)
157        return false;
158
159    /* Consume the bytes we've read so far */
160    *point += length;
161
162    return true;
163}
164
165static bool parse_mic_geometry(const char **mic_geometry, std::vector<webrtc::Point>& geometry) {
166    /* The microphone geometry is expressed as cartesian point form:
167     *   x1,y1,z1,x2,y2,z2,...
168     *
169     * Where x1,y1,z1 is the position of the first microphone with regards to
170     * the array's "center", x2,y2,z2 the position of the second, and so on.
171     *
172     * 'x' is the horizontal coordinate, with positive values being to the
173     * right from the mic array's perspective.
174     *
175     * 'y' is the depth coordinate, with positive values being in front of the
176     * array.
177     *
178     * 'z' is the vertical coordinate, with positive values being above the
179     * array.
180     *
181     * All distances are in meters.
182     */
183
184    /* The target direction is expected to be in spherical point form:
185     *   a,e,r
186     *
187     * Where 'a' is the azimuth of the target point relative to the center of
188     * the array, 'e' its elevation, and 'r' the radius.
189     *
190     * 0 radians azimuth is to the right of the array, and positive angles
191     * move in a counter-clockwise direction.
192     *
193     * 0 radians elevation is horizontal w.r.t. the array, and positive
194     * angles go upwards.
195     *
196     * radius is distance from the array center in meters.
197     */
198
199    long unsigned int i;
200    float f[3];
201
202    for (i = 0; i < geometry.size(); i++) {
203        if (!parse_point(mic_geometry, f)) {
204            pa_log("Failed to parse channel %lu in mic_geometry", i);
205            return false;
206        }
207
208        /* Except for the last point, we should have a trailing comma */
209        if (i != geometry.size() - 1) {
210            if (**mic_geometry != ',') {
211                pa_log("Failed to parse channel %lu in mic_geometry", i);
212                return false;
213            }
214
215            (*mic_geometry)++;
216        }
217
218        pa_log_debug("Got mic #%lu position: (%g, %g, %g)", i, f[0], f[1], f[2]);
219
220        geometry[i].c[0] = f[0];
221        geometry[i].c[1] = f[1];
222        geometry[i].c[2] = f[2];
223    }
224
225    if (**mic_geometry != '\0') {
226        pa_log("Failed to parse mic_geometry value: more parameters than expected");
227        return false;
228    }
229
230    return true;
231}
232
233bool pa_webrtc_ec_init(pa_core *c, pa_echo_canceller *ec,
234                       pa_sample_spec *rec_ss, pa_channel_map *rec_map,
235                       pa_sample_spec *play_ss, pa_channel_map *play_map,
236                       pa_sample_spec *out_ss, pa_channel_map *out_map,
237                       uint32_t *nframes, const char *args) {
238    webrtc::AudioProcessing *apm = NULL;
239    webrtc::ProcessingConfig pconfig;
240    webrtc::Config config;
241    bool hpf, ns, agc, dgc, mobile, cn, vad, ext_filter, intelligibility, experimental_agc, beamforming;
242    int rm = -1, i;
243    uint32_t agc_start_volume;
244    pa_modargs *ma;
245    bool trace = false;
246
247    if (!(ma = pa_modargs_new(args, valid_modargs))) {
248        pa_log("Failed to parse submodule arguments.");
249        goto fail;
250    }
251
252    hpf = DEFAULT_HIGH_PASS_FILTER;
253    if (pa_modargs_get_value_boolean(ma, "high_pass_filter", &hpf) < 0) {
254        pa_log("Failed to parse high_pass_filter value");
255        goto fail;
256    }
257
258    ns = DEFAULT_NOISE_SUPPRESSION;
259    if (pa_modargs_get_value_boolean(ma, "noise_suppression", &ns) < 0) {
260        pa_log("Failed to parse noise_suppression value");
261        goto fail;
262    }
263
264    agc = DEFAULT_ANALOG_GAIN_CONTROL;
265    if (pa_modargs_get_value_boolean(ma, "analog_gain_control", &agc) < 0) {
266        pa_log("Failed to parse analog_gain_control value");
267        goto fail;
268    }
269
270    dgc = agc ? false : DEFAULT_DIGITAL_GAIN_CONTROL;
271    if (pa_modargs_get_value_boolean(ma, "digital_gain_control", &dgc) < 0) {
272        pa_log("Failed to parse digital_gain_control value");
273        goto fail;
274    }
275
276    if (agc && dgc) {
277        pa_log("You must pick only one between analog and digital gain control");
278        goto fail;
279    }
280
281    mobile = DEFAULT_MOBILE;
282    if (pa_modargs_get_value_boolean(ma, "mobile", &mobile) < 0) {
283        pa_log("Failed to parse mobile value");
284        goto fail;
285    }
286
287    ec->params.drift_compensation = DEFAULT_DRIFT_COMPENSATION;
288    if (pa_modargs_get_value_boolean(ma, "drift_compensation", &ec->params.drift_compensation) < 0) {
289        pa_log("Failed to parse drift_compensation value");
290        goto fail;
291    }
292
293    if (mobile) {
294        if (ec->params.drift_compensation) {
295            pa_log("Can't use drift_compensation in mobile mode");
296            goto fail;
297        }
298
299        if ((rm = routing_mode_from_string(pa_modargs_get_value(ma, "routing_mode", DEFAULT_ROUTING_MODE))) < 0) {
300            pa_log("Failed to parse routing_mode value");
301            goto fail;
302        }
303
304        cn = DEFAULT_COMFORT_NOISE;
305        if (pa_modargs_get_value_boolean(ma, "comfort_noise", &cn) < 0) {
306            pa_log("Failed to parse cn value");
307            goto fail;
308        }
309    } else {
310        if (pa_modargs_get_value(ma, "comfort_noise", NULL) || pa_modargs_get_value(ma, "routing_mode", NULL)) {
311            pa_log("The routing_mode and comfort_noise options are only valid with mobile=true");
312            goto fail;
313        }
314    }
315
316    vad = DEFAULT_VAD;
317    if (pa_modargs_get_value_boolean(ma, "voice_detection", &vad) < 0) {
318        pa_log("Failed to parse voice_detection value");
319        goto fail;
320    }
321
322    ext_filter = DEFAULT_EXTENDED_FILTER;
323    if (pa_modargs_get_value_boolean(ma, "extended_filter", &ext_filter) < 0) {
324        pa_log("Failed to parse extended_filter value");
325        goto fail;
326    }
327
328    intelligibility = DEFAULT_INTELLIGIBILITY_ENHANCER;
329    if (pa_modargs_get_value_boolean(ma, "intelligibility_enhancer", &intelligibility) < 0) {
330        pa_log("Failed to parse intelligibility_enhancer value");
331        goto fail;
332    }
333
334    experimental_agc = DEFAULT_EXPERIMENTAL_AGC;
335    if (pa_modargs_get_value_boolean(ma, "experimental_agc", &experimental_agc) < 0) {
336        pa_log("Failed to parse experimental_agc value");
337        goto fail;
338    }
339
340    agc_start_volume = DEFAULT_AGC_START_VOLUME;
341    if (pa_modargs_get_value_u32(ma, "agc_start_volume", &agc_start_volume) < 0) {
342        pa_log("Failed to parse agc_start_volume value");
343        goto fail;
344    }
345    if (agc_start_volume > WEBRTC_AGC_MAX_VOLUME) {
346        pa_log("AGC start volume must not exceed %u", WEBRTC_AGC_MAX_VOLUME);
347        goto fail;
348    }
349    ec->params.webrtc.agc_start_volume = agc_start_volume;
350
351    beamforming = DEFAULT_BEAMFORMING;
352    if (pa_modargs_get_value_boolean(ma, "beamforming", &beamforming) < 0) {
353        pa_log("Failed to parse beamforming value");
354        goto fail;
355    }
356
357    if (ext_filter)
358        config.Set<webrtc::ExtendedFilter>(new webrtc::ExtendedFilter(true));
359    if (intelligibility)
360        pa_log_warn("The intelligibility enhancer is not currently supported");
361    if (experimental_agc)
362        config.Set<webrtc::ExperimentalAgc>(new webrtc::ExperimentalAgc(true, ec->params.webrtc.agc_start_volume));
363
364    trace = DEFAULT_TRACE;
365    if (pa_modargs_get_value_boolean(ma, "trace", &trace) < 0) {
366        pa_log("Failed to parse trace value");
367        goto fail;
368    }
369
370    if (trace) {
371        webrtc::Trace::CreateTrace();
372        webrtc::Trace::set_level_filter(webrtc::kTraceAll);
373        ec->params.webrtc.trace_callback = new PaWebrtcTraceCallback();
374        webrtc::Trace::SetTraceCallback((PaWebrtcTraceCallback *) ec->params.webrtc.trace_callback);
375    }
376
377    webrtc_ec_fixate_spec(rec_ss, rec_map, play_ss, play_map, out_ss, out_map, beamforming);
378
379    /* We do this after fixate because we need the capture channel count */
380    if (beamforming) {
381        std::vector<webrtc::Point> geometry(rec_ss->channels);
382        webrtc::SphericalPointf direction(0.0f, 0.0f, 0.0f);
383        const char *mic_geometry, *target_direction;
384
385        if (!(mic_geometry = pa_modargs_get_value(ma, "mic_geometry", NULL))) {
386            pa_log("mic_geometry must be set if beamforming is enabled");
387            goto fail;
388        }
389
390        if (!parse_mic_geometry(&mic_geometry, geometry)) {
391            pa_log("Failed to parse mic_geometry value");
392            goto fail;
393        }
394
395        if ((target_direction = pa_modargs_get_value(ma, "target_direction", NULL))) {
396            float f[3];
397
398            if (!parse_point(&target_direction, f)) {
399                pa_log("Failed to parse target_direction value");
400                goto fail;
401            }
402
403            if (*target_direction != '\0') {
404                pa_log("Failed to parse target_direction value: more parameters than expected");
405                goto fail;
406            }
407
408#define IS_ZERO(f) ((f) < 0.000001 && (f) > -0.000001)
409
410            if (!IS_ZERO(f[1]) || !IS_ZERO(f[2])) {
411                pa_log("The beamformer currently only supports targeting along the azimuth");
412                goto fail;
413            }
414
415            direction.s[0] = f[0];
416            direction.s[1] = f[1];
417            direction.s[2] = f[2];
418        }
419
420        if (!target_direction)
421            config.Set<webrtc::Beamforming>(new webrtc::Beamforming(true, geometry));
422        else
423            config.Set<webrtc::Beamforming>(new webrtc::Beamforming(true, geometry, direction));
424    }
425
426    apm = webrtc::AudioProcessing::Create(config);
427
428    pconfig = {
429        webrtc::StreamConfig(rec_ss->rate, rec_ss->channels, false), /* input stream */
430        webrtc::StreamConfig(out_ss->rate, out_ss->channels, false), /* output stream */
431        webrtc::StreamConfig(play_ss->rate, play_ss->channels, false), /* reverse input stream */
432        webrtc::StreamConfig(play_ss->rate, play_ss->channels, false), /* reverse output stream */
433    };
434    if (apm->Initialize(pconfig) != webrtc::AudioProcessing::kNoError) {
435        pa_log("Error initialising audio processing module");
436        goto fail;
437    }
438
439    if (hpf)
440        apm->high_pass_filter()->Enable(true);
441
442    if (!mobile) {
443        apm->echo_cancellation()->enable_drift_compensation(ec->params.drift_compensation);
444        apm->echo_cancellation()->Enable(true);
445    } else {
446        apm->echo_control_mobile()->set_routing_mode(static_cast<webrtc::EchoControlMobile::RoutingMode>(rm));
447        apm->echo_control_mobile()->enable_comfort_noise(cn);
448        apm->echo_control_mobile()->Enable(true);
449    }
450
451    if (ns) {
452        apm->noise_suppression()->set_level(webrtc::NoiseSuppression::kHigh);
453        apm->noise_suppression()->Enable(true);
454    }
455
456    if (agc || dgc) {
457        if (mobile && rm <= webrtc::EchoControlMobile::kEarpiece) {
458            /* Maybe this should be a knob, but we've got a lot of knobs already */
459            apm->gain_control()->set_mode(webrtc::GainControl::kFixedDigital);
460            ec->params.webrtc.agc = false;
461        } else if (dgc) {
462            apm->gain_control()->set_mode(webrtc::GainControl::kAdaptiveDigital);
463            ec->params.webrtc.agc = false;
464        } else {
465            apm->gain_control()->set_mode(webrtc::GainControl::kAdaptiveAnalog);
466            if (apm->gain_control()->set_analog_level_limits(0, WEBRTC_AGC_MAX_VOLUME) !=
467                    webrtc::AudioProcessing::kNoError) {
468                pa_log("Failed to initialise AGC");
469                goto fail;
470            }
471            ec->params.webrtc.agc = true;
472        }
473
474        apm->gain_control()->Enable(true);
475    }
476
477    if (vad)
478        apm->voice_detection()->Enable(true);
479
480    ec->params.webrtc.apm = apm;
481    ec->params.webrtc.rec_ss = *rec_ss;
482    ec->params.webrtc.play_ss = *play_ss;
483    ec->params.webrtc.out_ss = *out_ss;
484    ec->params.webrtc.blocksize = (uint64_t) out_ss->rate * BLOCK_SIZE_US / PA_USEC_PER_SEC;
485    *nframes = ec->params.webrtc.blocksize;
486    ec->params.webrtc.first = true;
487
488    for (i = 0; i < rec_ss->channels; i++)
489        ec->params.webrtc.rec_buffer[i] = pa_xnew(float, *nframes);
490    for (i = 0; i < play_ss->channels; i++)
491        ec->params.webrtc.play_buffer[i] = pa_xnew(float, *nframes);
492
493    pa_modargs_free(ma);
494    return true;
495
496fail:
497    if (ma)
498        pa_modargs_free(ma);
499    if (ec->params.webrtc.trace_callback) {
500        webrtc::Trace::ReturnTrace();
501        delete ((PaWebrtcTraceCallback *) ec->params.webrtc.trace_callback);
502    } if (apm)
503        delete apm;
504
505    return false;
506}
507
508void pa_webrtc_ec_play(pa_echo_canceller *ec, const uint8_t *play) {
509    webrtc::AudioProcessing *apm = (webrtc::AudioProcessing*)ec->params.webrtc.apm;
510    const pa_sample_spec *ss = &ec->params.webrtc.play_ss;
511    int n = ec->params.webrtc.blocksize;
512    float **buf = ec->params.webrtc.play_buffer;
513    webrtc::StreamConfig config(ss->rate, ss->channels, false);
514
515    pa_deinterleave(play, (void **) buf, ss->channels, pa_sample_size(ss), n);
516
517    pa_assert_se(apm->ProcessReverseStream(buf, config, config, buf) == webrtc::AudioProcessing::kNoError);
518
519    /* FIXME: If ProcessReverseStream() makes any changes to the audio, such as
520     * applying intelligibility enhancement, those changes don't have any
521     * effect. This function is called at the source side, but the processing
522     * would have to be done in the sink to be able to feed the processed audio
523     * to speakers. */
524}
525
526void pa_webrtc_ec_record(pa_echo_canceller *ec, const uint8_t *rec, uint8_t *out) {
527    webrtc::AudioProcessing *apm = (webrtc::AudioProcessing*)ec->params.webrtc.apm;
528    const pa_sample_spec *rec_ss = &ec->params.webrtc.rec_ss;
529    const pa_sample_spec *out_ss = &ec->params.webrtc.out_ss;
530    float **buf = ec->params.webrtc.rec_buffer;
531    int n = ec->params.webrtc.blocksize;
532    int old_volume, new_volume;
533    webrtc::StreamConfig rec_config(rec_ss->rate, rec_ss->channels, false);
534    webrtc::StreamConfig out_config(out_ss->rate, out_ss->channels, false);
535
536    pa_deinterleave(rec, (void **) buf, rec_ss->channels, pa_sample_size(rec_ss), n);
537
538    if (ec->params.webrtc.agc) {
539        pa_volume_t v = pa_echo_canceller_get_capture_volume(ec);
540        old_volume = webrtc_volume_from_pa(v);
541        apm->gain_control()->set_stream_analog_level(old_volume);
542    }
543
544    apm->set_stream_delay_ms(0);
545    pa_assert_se(apm->ProcessStream(buf, rec_config, out_config, buf) == webrtc::AudioProcessing::kNoError);
546
547    if (ec->params.webrtc.agc) {
548        if (PA_UNLIKELY(ec->params.webrtc.first)) {
549            /* We start at a sane default volume (taken from the Chromium
550             * condition on the experimental AGC in audio_processing.h). This is
551             * needed to make sure that there's enough energy in the capture
552             * signal for the AGC to work */
553            ec->params.webrtc.first = false;
554            new_volume = ec->params.webrtc.agc_start_volume;
555        } else {
556            new_volume = apm->gain_control()->stream_analog_level();
557        }
558
559        if (old_volume != new_volume)
560            pa_echo_canceller_set_capture_volume(ec, webrtc_volume_to_pa(new_volume));
561    }
562
563    pa_interleave((const void **) buf, out_ss->channels, out, pa_sample_size(out_ss), n);
564}
565
566void pa_webrtc_ec_set_drift(pa_echo_canceller *ec, float drift) {
567    webrtc::AudioProcessing *apm = (webrtc::AudioProcessing*)ec->params.webrtc.apm;
568
569    apm->echo_cancellation()->set_stream_drift_samples(drift * ec->params.webrtc.blocksize);
570}
571
572void pa_webrtc_ec_run(pa_echo_canceller *ec, const uint8_t *rec, const uint8_t *play, uint8_t *out) {
573    pa_webrtc_ec_play(ec, play);
574    pa_webrtc_ec_record(ec, rec, out);
575}
576
577void pa_webrtc_ec_done(pa_echo_canceller *ec) {
578    int i;
579
580    if (ec->params.webrtc.trace_callback) {
581        webrtc::Trace::ReturnTrace();
582        delete ((PaWebrtcTraceCallback *) ec->params.webrtc.trace_callback);
583    }
584
585    if (ec->params.webrtc.apm) {
586        delete (webrtc::AudioProcessing*)ec->params.webrtc.apm;
587        ec->params.webrtc.apm = NULL;
588    }
589
590    for (i = 0; i < ec->params.webrtc.rec_ss.channels; i++)
591        pa_xfree(ec->params.webrtc.rec_buffer[i]);
592    for (i = 0; i < ec->params.webrtc.play_ss.channels; i++)
593        pa_xfree(ec->params.webrtc.play_buffer[i]);
594}
595