1/*** 2 This file is part of PulseAudio. 3 4 Copyright 2011 Collabora Ltd. 5 2015 Aldebaran SoftBank Group 6 7 Contributor: Arun Raghavan <mail@arunraghavan.net> 8 9 PulseAudio is free software; you can redistribute it and/or modify 10 it under the terms of the GNU Lesser General Public License as published 11 by the Free Software Foundation; either version 2.1 of the License, 12 or (at your option) any later version. 13 14 PulseAudio is distributed in the hope that it will be useful, but 15 WITHOUT ANY WARRANTY; without even the implied warranty of 16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 General Public License for more details. 18 19 You should have received a copy of the GNU Lesser General Public License 20 along with PulseAudio; if not, see <http://www.gnu.org/licenses/>. 21***/ 22 23#ifdef HAVE_CONFIG_H 24#include <config.h> 25#endif 26 27#include <pulse/cdecl.h> 28 29PA_C_DECL_BEGIN 30#include <pulsecore/core-util.h> 31#include <pulsecore/modargs.h> 32 33#include <pulse/timeval.h> 34#include "echo-cancel.h" 35PA_C_DECL_END 36 37#include <webrtc/modules/audio_processing/include/audio_processing.h> 38#include <webrtc/modules/interface/module_common_types.h> 39#include <webrtc/system_wrappers/include/trace.h> 40 41#define BLOCK_SIZE_US 10000 42 43#define DEFAULT_HIGH_PASS_FILTER true 44#define DEFAULT_NOISE_SUPPRESSION true 45#define DEFAULT_ANALOG_GAIN_CONTROL true 46#define DEFAULT_DIGITAL_GAIN_CONTROL false 47#define DEFAULT_MOBILE false 48#define DEFAULT_ROUTING_MODE "speakerphone" 49#define DEFAULT_COMFORT_NOISE true 50#define DEFAULT_DRIFT_COMPENSATION false 51#define DEFAULT_VAD true 52#define DEFAULT_EXTENDED_FILTER false 53#define DEFAULT_INTELLIGIBILITY_ENHANCER false 54#define DEFAULT_EXPERIMENTAL_AGC false 55#define DEFAULT_AGC_START_VOLUME 85 56#define DEFAULT_BEAMFORMING false 57#define DEFAULT_TRACE false 58 59#define WEBRTC_AGC_MAX_VOLUME 255 60 61static const char* const valid_modargs[] = { 62 "high_pass_filter", 63 "noise_suppression", 64 "analog_gain_control", 65 "digital_gain_control", 66 "mobile", 67 "routing_mode", 68 "comfort_noise", 69 "drift_compensation", 70 "voice_detection", 71 "extended_filter", 72 "intelligibility_enhancer", 73 "experimental_agc", 74 "agc_start_volume", 75 "beamforming", 76 "mic_geometry", /* documented in parse_mic_geometry() */ 77 "target_direction", /* documented in parse_mic_geometry() */ 78 "trace", 79 NULL 80}; 81 82static int routing_mode_from_string(const char *rmode) { 83 if (pa_streq(rmode, "quiet-earpiece-or-headset")) 84 return webrtc::EchoControlMobile::kQuietEarpieceOrHeadset; 85 else if (pa_streq(rmode, "earpiece")) 86 return webrtc::EchoControlMobile::kEarpiece; 87 else if (pa_streq(rmode, "loud-earpiece")) 88 return webrtc::EchoControlMobile::kLoudEarpiece; 89 else if (pa_streq(rmode, "speakerphone")) 90 return webrtc::EchoControlMobile::kSpeakerphone; 91 else if (pa_streq(rmode, "loud-speakerphone")) 92 return webrtc::EchoControlMobile::kLoudSpeakerphone; 93 else 94 return -1; 95} 96 97class PaWebrtcTraceCallback : public webrtc::TraceCallback { 98 void Print(webrtc::TraceLevel level, const char *message, int length) 99 { 100 if (level & webrtc::kTraceError || level & webrtc::kTraceCritical) 101 pa_log("%s", message); 102 else if (level & webrtc::kTraceWarning) 103 pa_log_warn("%s", message); 104 else if (level & webrtc::kTraceInfo) 105 pa_log_info("%s", message); 106 else 107 pa_log_debug("%s", message); 108 } 109}; 110 111static int webrtc_volume_from_pa(pa_volume_t v) 112{ 113 return (v * WEBRTC_AGC_MAX_VOLUME) / PA_VOLUME_NORM; 114} 115 116static pa_volume_t webrtc_volume_to_pa(int v) 117{ 118 return (v * PA_VOLUME_NORM) / WEBRTC_AGC_MAX_VOLUME; 119} 120 121static void webrtc_ec_fixate_spec(pa_sample_spec *rec_ss, pa_channel_map *rec_map, 122 pa_sample_spec *play_ss, pa_channel_map *play_map, 123 pa_sample_spec *out_ss, pa_channel_map *out_map, 124 bool beamforming) 125{ 126 rec_ss->format = PA_SAMPLE_FLOAT32NE; 127 play_ss->format = PA_SAMPLE_FLOAT32NE; 128 129 /* AudioProcessing expects one of the following rates */ 130 if (rec_ss->rate >= 48000) 131 rec_ss->rate = 48000; 132 else if (rec_ss->rate >= 32000) 133 rec_ss->rate = 32000; 134 else if (rec_ss->rate >= 16000) 135 rec_ss->rate = 16000; 136 else 137 rec_ss->rate = 8000; 138 139 *out_ss = *rec_ss; 140 *out_map = *rec_map; 141 142 if (beamforming) { 143 /* The beamformer gives us a single channel */ 144 out_ss->channels = 1; 145 pa_channel_map_init_mono(out_map); 146 } 147 148 /* Playback stream rate needs to be the same as capture */ 149 play_ss->rate = rec_ss->rate; 150} 151 152static bool parse_point(const char **point, float (&f)[3]) { 153 int ret, length; 154 155 ret = sscanf(*point, "%g,%g,%g%n", &f[0], &f[1], &f[2], &length); 156 if (ret != 3) 157 return false; 158 159 /* Consume the bytes we've read so far */ 160 *point += length; 161 162 return true; 163} 164 165static bool parse_mic_geometry(const char **mic_geometry, std::vector<webrtc::Point>& geometry) { 166 /* The microphone geometry is expressed as cartesian point form: 167 * x1,y1,z1,x2,y2,z2,... 168 * 169 * Where x1,y1,z1 is the position of the first microphone with regards to 170 * the array's "center", x2,y2,z2 the position of the second, and so on. 171 * 172 * 'x' is the horizontal coordinate, with positive values being to the 173 * right from the mic array's perspective. 174 * 175 * 'y' is the depth coordinate, with positive values being in front of the 176 * array. 177 * 178 * 'z' is the vertical coordinate, with positive values being above the 179 * array. 180 * 181 * All distances are in meters. 182 */ 183 184 /* The target direction is expected to be in spherical point form: 185 * a,e,r 186 * 187 * Where 'a' is the azimuth of the target point relative to the center of 188 * the array, 'e' its elevation, and 'r' the radius. 189 * 190 * 0 radians azimuth is to the right of the array, and positive angles 191 * move in a counter-clockwise direction. 192 * 193 * 0 radians elevation is horizontal w.r.t. the array, and positive 194 * angles go upwards. 195 * 196 * radius is distance from the array center in meters. 197 */ 198 199 long unsigned int i; 200 float f[3]; 201 202 for (i = 0; i < geometry.size(); i++) { 203 if (!parse_point(mic_geometry, f)) { 204 pa_log("Failed to parse channel %lu in mic_geometry", i); 205 return false; 206 } 207 208 /* Except for the last point, we should have a trailing comma */ 209 if (i != geometry.size() - 1) { 210 if (**mic_geometry != ',') { 211 pa_log("Failed to parse channel %lu in mic_geometry", i); 212 return false; 213 } 214 215 (*mic_geometry)++; 216 } 217 218 pa_log_debug("Got mic #%lu position: (%g, %g, %g)", i, f[0], f[1], f[2]); 219 220 geometry[i].c[0] = f[0]; 221 geometry[i].c[1] = f[1]; 222 geometry[i].c[2] = f[2]; 223 } 224 225 if (**mic_geometry != '\0') { 226 pa_log("Failed to parse mic_geometry value: more parameters than expected"); 227 return false; 228 } 229 230 return true; 231} 232 233bool pa_webrtc_ec_init(pa_core *c, pa_echo_canceller *ec, 234 pa_sample_spec *rec_ss, pa_channel_map *rec_map, 235 pa_sample_spec *play_ss, pa_channel_map *play_map, 236 pa_sample_spec *out_ss, pa_channel_map *out_map, 237 uint32_t *nframes, const char *args) { 238 webrtc::AudioProcessing *apm = NULL; 239 webrtc::ProcessingConfig pconfig; 240 webrtc::Config config; 241 bool hpf, ns, agc, dgc, mobile, cn, vad, ext_filter, intelligibility, experimental_agc, beamforming; 242 int rm = -1, i; 243 uint32_t agc_start_volume; 244 pa_modargs *ma; 245 bool trace = false; 246 247 if (!(ma = pa_modargs_new(args, valid_modargs))) { 248 pa_log("Failed to parse submodule arguments."); 249 goto fail; 250 } 251 252 hpf = DEFAULT_HIGH_PASS_FILTER; 253 if (pa_modargs_get_value_boolean(ma, "high_pass_filter", &hpf) < 0) { 254 pa_log("Failed to parse high_pass_filter value"); 255 goto fail; 256 } 257 258 ns = DEFAULT_NOISE_SUPPRESSION; 259 if (pa_modargs_get_value_boolean(ma, "noise_suppression", &ns) < 0) { 260 pa_log("Failed to parse noise_suppression value"); 261 goto fail; 262 } 263 264 agc = DEFAULT_ANALOG_GAIN_CONTROL; 265 if (pa_modargs_get_value_boolean(ma, "analog_gain_control", &agc) < 0) { 266 pa_log("Failed to parse analog_gain_control value"); 267 goto fail; 268 } 269 270 dgc = agc ? false : DEFAULT_DIGITAL_GAIN_CONTROL; 271 if (pa_modargs_get_value_boolean(ma, "digital_gain_control", &dgc) < 0) { 272 pa_log("Failed to parse digital_gain_control value"); 273 goto fail; 274 } 275 276 if (agc && dgc) { 277 pa_log("You must pick only one between analog and digital gain control"); 278 goto fail; 279 } 280 281 mobile = DEFAULT_MOBILE; 282 if (pa_modargs_get_value_boolean(ma, "mobile", &mobile) < 0) { 283 pa_log("Failed to parse mobile value"); 284 goto fail; 285 } 286 287 ec->params.drift_compensation = DEFAULT_DRIFT_COMPENSATION; 288 if (pa_modargs_get_value_boolean(ma, "drift_compensation", &ec->params.drift_compensation) < 0) { 289 pa_log("Failed to parse drift_compensation value"); 290 goto fail; 291 } 292 293 if (mobile) { 294 if (ec->params.drift_compensation) { 295 pa_log("Can't use drift_compensation in mobile mode"); 296 goto fail; 297 } 298 299 if ((rm = routing_mode_from_string(pa_modargs_get_value(ma, "routing_mode", DEFAULT_ROUTING_MODE))) < 0) { 300 pa_log("Failed to parse routing_mode value"); 301 goto fail; 302 } 303 304 cn = DEFAULT_COMFORT_NOISE; 305 if (pa_modargs_get_value_boolean(ma, "comfort_noise", &cn) < 0) { 306 pa_log("Failed to parse cn value"); 307 goto fail; 308 } 309 } else { 310 if (pa_modargs_get_value(ma, "comfort_noise", NULL) || pa_modargs_get_value(ma, "routing_mode", NULL)) { 311 pa_log("The routing_mode and comfort_noise options are only valid with mobile=true"); 312 goto fail; 313 } 314 } 315 316 vad = DEFAULT_VAD; 317 if (pa_modargs_get_value_boolean(ma, "voice_detection", &vad) < 0) { 318 pa_log("Failed to parse voice_detection value"); 319 goto fail; 320 } 321 322 ext_filter = DEFAULT_EXTENDED_FILTER; 323 if (pa_modargs_get_value_boolean(ma, "extended_filter", &ext_filter) < 0) { 324 pa_log("Failed to parse extended_filter value"); 325 goto fail; 326 } 327 328 intelligibility = DEFAULT_INTELLIGIBILITY_ENHANCER; 329 if (pa_modargs_get_value_boolean(ma, "intelligibility_enhancer", &intelligibility) < 0) { 330 pa_log("Failed to parse intelligibility_enhancer value"); 331 goto fail; 332 } 333 334 experimental_agc = DEFAULT_EXPERIMENTAL_AGC; 335 if (pa_modargs_get_value_boolean(ma, "experimental_agc", &experimental_agc) < 0) { 336 pa_log("Failed to parse experimental_agc value"); 337 goto fail; 338 } 339 340 agc_start_volume = DEFAULT_AGC_START_VOLUME; 341 if (pa_modargs_get_value_u32(ma, "agc_start_volume", &agc_start_volume) < 0) { 342 pa_log("Failed to parse agc_start_volume value"); 343 goto fail; 344 } 345 if (agc_start_volume > WEBRTC_AGC_MAX_VOLUME) { 346 pa_log("AGC start volume must not exceed %u", WEBRTC_AGC_MAX_VOLUME); 347 goto fail; 348 } 349 ec->params.webrtc.agc_start_volume = agc_start_volume; 350 351 beamforming = DEFAULT_BEAMFORMING; 352 if (pa_modargs_get_value_boolean(ma, "beamforming", &beamforming) < 0) { 353 pa_log("Failed to parse beamforming value"); 354 goto fail; 355 } 356 357 if (ext_filter) 358 config.Set<webrtc::ExtendedFilter>(new webrtc::ExtendedFilter(true)); 359 if (intelligibility) 360 pa_log_warn("The intelligibility enhancer is not currently supported"); 361 if (experimental_agc) 362 config.Set<webrtc::ExperimentalAgc>(new webrtc::ExperimentalAgc(true, ec->params.webrtc.agc_start_volume)); 363 364 trace = DEFAULT_TRACE; 365 if (pa_modargs_get_value_boolean(ma, "trace", &trace) < 0) { 366 pa_log("Failed to parse trace value"); 367 goto fail; 368 } 369 370 if (trace) { 371 webrtc::Trace::CreateTrace(); 372 webrtc::Trace::set_level_filter(webrtc::kTraceAll); 373 ec->params.webrtc.trace_callback = new PaWebrtcTraceCallback(); 374 webrtc::Trace::SetTraceCallback((PaWebrtcTraceCallback *) ec->params.webrtc.trace_callback); 375 } 376 377 webrtc_ec_fixate_spec(rec_ss, rec_map, play_ss, play_map, out_ss, out_map, beamforming); 378 379 /* We do this after fixate because we need the capture channel count */ 380 if (beamforming) { 381 std::vector<webrtc::Point> geometry(rec_ss->channels); 382 webrtc::SphericalPointf direction(0.0f, 0.0f, 0.0f); 383 const char *mic_geometry, *target_direction; 384 385 if (!(mic_geometry = pa_modargs_get_value(ma, "mic_geometry", NULL))) { 386 pa_log("mic_geometry must be set if beamforming is enabled"); 387 goto fail; 388 } 389 390 if (!parse_mic_geometry(&mic_geometry, geometry)) { 391 pa_log("Failed to parse mic_geometry value"); 392 goto fail; 393 } 394 395 if ((target_direction = pa_modargs_get_value(ma, "target_direction", NULL))) { 396 float f[3]; 397 398 if (!parse_point(&target_direction, f)) { 399 pa_log("Failed to parse target_direction value"); 400 goto fail; 401 } 402 403 if (*target_direction != '\0') { 404 pa_log("Failed to parse target_direction value: more parameters than expected"); 405 goto fail; 406 } 407 408#define IS_ZERO(f) ((f) < 0.000001 && (f) > -0.000001) 409 410 if (!IS_ZERO(f[1]) || !IS_ZERO(f[2])) { 411 pa_log("The beamformer currently only supports targeting along the azimuth"); 412 goto fail; 413 } 414 415 direction.s[0] = f[0]; 416 direction.s[1] = f[1]; 417 direction.s[2] = f[2]; 418 } 419 420 if (!target_direction) 421 config.Set<webrtc::Beamforming>(new webrtc::Beamforming(true, geometry)); 422 else 423 config.Set<webrtc::Beamforming>(new webrtc::Beamforming(true, geometry, direction)); 424 } 425 426 apm = webrtc::AudioProcessing::Create(config); 427 428 pconfig = { 429 webrtc::StreamConfig(rec_ss->rate, rec_ss->channels, false), /* input stream */ 430 webrtc::StreamConfig(out_ss->rate, out_ss->channels, false), /* output stream */ 431 webrtc::StreamConfig(play_ss->rate, play_ss->channels, false), /* reverse input stream */ 432 webrtc::StreamConfig(play_ss->rate, play_ss->channels, false), /* reverse output stream */ 433 }; 434 if (apm->Initialize(pconfig) != webrtc::AudioProcessing::kNoError) { 435 pa_log("Error initialising audio processing module"); 436 goto fail; 437 } 438 439 if (hpf) 440 apm->high_pass_filter()->Enable(true); 441 442 if (!mobile) { 443 apm->echo_cancellation()->enable_drift_compensation(ec->params.drift_compensation); 444 apm->echo_cancellation()->Enable(true); 445 } else { 446 apm->echo_control_mobile()->set_routing_mode(static_cast<webrtc::EchoControlMobile::RoutingMode>(rm)); 447 apm->echo_control_mobile()->enable_comfort_noise(cn); 448 apm->echo_control_mobile()->Enable(true); 449 } 450 451 if (ns) { 452 apm->noise_suppression()->set_level(webrtc::NoiseSuppression::kHigh); 453 apm->noise_suppression()->Enable(true); 454 } 455 456 if (agc || dgc) { 457 if (mobile && rm <= webrtc::EchoControlMobile::kEarpiece) { 458 /* Maybe this should be a knob, but we've got a lot of knobs already */ 459 apm->gain_control()->set_mode(webrtc::GainControl::kFixedDigital); 460 ec->params.webrtc.agc = false; 461 } else if (dgc) { 462 apm->gain_control()->set_mode(webrtc::GainControl::kAdaptiveDigital); 463 ec->params.webrtc.agc = false; 464 } else { 465 apm->gain_control()->set_mode(webrtc::GainControl::kAdaptiveAnalog); 466 if (apm->gain_control()->set_analog_level_limits(0, WEBRTC_AGC_MAX_VOLUME) != 467 webrtc::AudioProcessing::kNoError) { 468 pa_log("Failed to initialise AGC"); 469 goto fail; 470 } 471 ec->params.webrtc.agc = true; 472 } 473 474 apm->gain_control()->Enable(true); 475 } 476 477 if (vad) 478 apm->voice_detection()->Enable(true); 479 480 ec->params.webrtc.apm = apm; 481 ec->params.webrtc.rec_ss = *rec_ss; 482 ec->params.webrtc.play_ss = *play_ss; 483 ec->params.webrtc.out_ss = *out_ss; 484 ec->params.webrtc.blocksize = (uint64_t) out_ss->rate * BLOCK_SIZE_US / PA_USEC_PER_SEC; 485 *nframes = ec->params.webrtc.blocksize; 486 ec->params.webrtc.first = true; 487 488 for (i = 0; i < rec_ss->channels; i++) 489 ec->params.webrtc.rec_buffer[i] = pa_xnew(float, *nframes); 490 for (i = 0; i < play_ss->channels; i++) 491 ec->params.webrtc.play_buffer[i] = pa_xnew(float, *nframes); 492 493 pa_modargs_free(ma); 494 return true; 495 496fail: 497 if (ma) 498 pa_modargs_free(ma); 499 if (ec->params.webrtc.trace_callback) { 500 webrtc::Trace::ReturnTrace(); 501 delete ((PaWebrtcTraceCallback *) ec->params.webrtc.trace_callback); 502 } if (apm) 503 delete apm; 504 505 return false; 506} 507 508void pa_webrtc_ec_play(pa_echo_canceller *ec, const uint8_t *play) { 509 webrtc::AudioProcessing *apm = (webrtc::AudioProcessing*)ec->params.webrtc.apm; 510 const pa_sample_spec *ss = &ec->params.webrtc.play_ss; 511 int n = ec->params.webrtc.blocksize; 512 float **buf = ec->params.webrtc.play_buffer; 513 webrtc::StreamConfig config(ss->rate, ss->channels, false); 514 515 pa_deinterleave(play, (void **) buf, ss->channels, pa_sample_size(ss), n); 516 517 pa_assert_se(apm->ProcessReverseStream(buf, config, config, buf) == webrtc::AudioProcessing::kNoError); 518 519 /* FIXME: If ProcessReverseStream() makes any changes to the audio, such as 520 * applying intelligibility enhancement, those changes don't have any 521 * effect. This function is called at the source side, but the processing 522 * would have to be done in the sink to be able to feed the processed audio 523 * to speakers. */ 524} 525 526void pa_webrtc_ec_record(pa_echo_canceller *ec, const uint8_t *rec, uint8_t *out) { 527 webrtc::AudioProcessing *apm = (webrtc::AudioProcessing*)ec->params.webrtc.apm; 528 const pa_sample_spec *rec_ss = &ec->params.webrtc.rec_ss; 529 const pa_sample_spec *out_ss = &ec->params.webrtc.out_ss; 530 float **buf = ec->params.webrtc.rec_buffer; 531 int n = ec->params.webrtc.blocksize; 532 int old_volume, new_volume; 533 webrtc::StreamConfig rec_config(rec_ss->rate, rec_ss->channels, false); 534 webrtc::StreamConfig out_config(out_ss->rate, out_ss->channels, false); 535 536 pa_deinterleave(rec, (void **) buf, rec_ss->channels, pa_sample_size(rec_ss), n); 537 538 if (ec->params.webrtc.agc) { 539 pa_volume_t v = pa_echo_canceller_get_capture_volume(ec); 540 old_volume = webrtc_volume_from_pa(v); 541 apm->gain_control()->set_stream_analog_level(old_volume); 542 } 543 544 apm->set_stream_delay_ms(0); 545 pa_assert_se(apm->ProcessStream(buf, rec_config, out_config, buf) == webrtc::AudioProcessing::kNoError); 546 547 if (ec->params.webrtc.agc) { 548 if (PA_UNLIKELY(ec->params.webrtc.first)) { 549 /* We start at a sane default volume (taken from the Chromium 550 * condition on the experimental AGC in audio_processing.h). This is 551 * needed to make sure that there's enough energy in the capture 552 * signal for the AGC to work */ 553 ec->params.webrtc.first = false; 554 new_volume = ec->params.webrtc.agc_start_volume; 555 } else { 556 new_volume = apm->gain_control()->stream_analog_level(); 557 } 558 559 if (old_volume != new_volume) 560 pa_echo_canceller_set_capture_volume(ec, webrtc_volume_to_pa(new_volume)); 561 } 562 563 pa_interleave((const void **) buf, out_ss->channels, out, pa_sample_size(out_ss), n); 564} 565 566void pa_webrtc_ec_set_drift(pa_echo_canceller *ec, float drift) { 567 webrtc::AudioProcessing *apm = (webrtc::AudioProcessing*)ec->params.webrtc.apm; 568 569 apm->echo_cancellation()->set_stream_drift_samples(drift * ec->params.webrtc.blocksize); 570} 571 572void pa_webrtc_ec_run(pa_echo_canceller *ec, const uint8_t *rec, const uint8_t *play, uint8_t *out) { 573 pa_webrtc_ec_play(ec, play); 574 pa_webrtc_ec_record(ec, rec, out); 575} 576 577void pa_webrtc_ec_done(pa_echo_canceller *ec) { 578 int i; 579 580 if (ec->params.webrtc.trace_callback) { 581 webrtc::Trace::ReturnTrace(); 582 delete ((PaWebrtcTraceCallback *) ec->params.webrtc.trace_callback); 583 } 584 585 if (ec->params.webrtc.apm) { 586 delete (webrtc::AudioProcessing*)ec->params.webrtc.apm; 587 ec->params.webrtc.apm = NULL; 588 } 589 590 for (i = 0; i < ec->params.webrtc.rec_ss.channels; i++) 591 pa_xfree(ec->params.webrtc.rec_buffer[i]); 592 for (i = 0; i < ec->params.webrtc.play_ss.channels; i++) 593 pa_xfree(ec->params.webrtc.play_buffer[i]); 594} 595