From 07873ea598d75688b09fcf34e87c9f4f7345ab14 Mon Sep 17 00:00:00 2001 From: Siavash Sameni Date: Fri, 10 Apr 2026 16:06:56 +0400 Subject: [PATCH] fix(linux-aec): fall back to 0.3 crate + apt lib (2.x bundled is broken) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Switch the webrtc-audio-processing dep from the 2.x git source (bundled mode) back to crates.io 0.3, and link against Debian's apt package libwebrtc-audio-processing-dev (0.3-1+b1 on Bookworm). The 2.x path fails because both the crates.io tarball and the upstream git main branch of webrtc-audio-processing-sys 2.0.3 have a build.rs bug where \`meson setup --reconfigure\` is passed unconditionally, panicking on first-run empty build dirs with "Directory does not contain a valid build tree". The 0.x line sidesteps bundled mode entirely by linking the apt-provided library. Trade-off: we get AEC2 (the older generation) instead of AEC3, but it's the same algorithm family and is what PulseAudio's module-echo-cancel and PipeWire's filter-chain use on current Debian-family distros. Fine for shipping — we can revisit AEC3 once the 2.x bundled build is fixed upstream. API changes: - 0.3's Processor::process_capture_frame and process_render_frame take &mut self, so wrap the module-level processor in a Mutex. Capture and playback threads each lock briefly (sub-ms per 10 ms frame); contention is minimal. - Import NUM_SAMPLES_PER_FRAME from the crate directly instead of hardcoding 480, so the code tracks whatever sample rate the upstream C++ lib exposes (currently 48 kHz hardcoded -> 480). - Helper fns drain_frames_through_apm / tee_render_samples / etc. take &Mutex instead of &Processor. - Use explicit EchoCancellationSuppressionLevel and NoiseSuppressionLevel imports rather than fully-qualified paths. Dockerfile: - Drop meson / ninja-build / python3 (only needed for bundled build). - Add libwebrtc-audio-processing-dev for the system link path. - Keep clang (may be needed by the bindgen step in some versions). Co-Authored-By: Claude Opus 4.6 (1M context) --- crates/wzp-client/Cargo.toml | 28 ++++---- crates/wzp-client/src/audio_linux_aec.rs | 81 ++++++++++++++++-------- scripts/Dockerfile.linux-desktop-builder | 27 ++++---- 3 files changed, 85 insertions(+), 51 deletions(-) diff --git a/crates/wzp-client/Cargo.toml b/crates/wzp-client/Cargo.toml index 9dca1e1..2911ae9 100644 --- a/crates/wzp-client/Cargo.toml +++ b/crates/wzp-client/Cargo.toml @@ -47,20 +47,24 @@ windows = { version = "0.58", optional = true, features = [ "Win32_System_Variant", ] } -# Linux-only: WebRTC AEC3 (Audio Processing Module) bindings for the -# `linux-aec` feature. The `bundled` sub-feature statically compiles the -# vendored PulseAudio webrtc-audio-processing C++ sources via meson+ninja -# at cargo build time, avoiding Debian Bookworm's stale system -# libwebrtc-audio-processing-dev 0.3 package (which predates AEC3). +# Linux-only: WebRTC AEC (Audio Processing Module) bindings for the +# `linux-aec` feature. This is the 0.3.x line of the `tonarino/ +# webrtc-audio-processing` crate, which links against Debian's +# `libwebrtc-audio-processing-dev` apt package (0.3-1+b1 on Bookworm). # -# We pull from git (not crates.io) because the crates.io tarball of -# webrtc-audio-processing-sys 2.0.3 does NOT include the vendored C++ -# submodule contents ("Directory does not contain a valid build tree"), -# and cargo clones git deps with submodules auto-initialized since ~1.27, -# so the git path gives us a complete source tree that bundled-mode can -# build. Pinned to tag v2.0.3 for reproducibility. +# Note: we attempted the 2.x line with its `bundled` sub-feature first +# (which would give us AEC3 instead of AEC2), but both the crates.io +# tarball AND the upstream git `main` branch of webrtc-audio-processing-sys +# 2.0.3 hit a `meson setup --reconfigure` bug where the build.rs passes +# --reconfigure unconditionally even on first-run empty build dirs, +# causing the bundled build to fail with "Directory does not contain a +# valid build tree". The 0.x line doesn't use bundled mode and sidesteps +# this entirely by linking the apt-provided library. AEC2 is older than +# AEC3 but still the same algorithm family — this is what PulseAudio's +# module-echo-cancel and PipeWire's filter-chain use by default on +# current Debian-family distros. [target.'cfg(target_os = "linux")'.dependencies] -webrtc-audio-processing = { git = "https://github.com/tonarino/webrtc-audio-processing", branch = "main", optional = true, features = ["bundled"] } +webrtc-audio-processing = { version = "0.3", optional = true } [features] default = [] diff --git a/crates/wzp-client/src/audio_linux_aec.rs b/crates/wzp-client/src/audio_linux_aec.rs index bf8dbcc..5833765 100644 --- a/crates/wzp-client/src/audio_linux_aec.rs +++ b/crates/wzp-client/src/audio_linux_aec.rs @@ -8,8 +8,9 @@ //! //! ## Architecture //! -//! A single module-level `Arc` is shared between the capture and -//! playback paths. On each 20 ms frame (960 samples @ 48 kHz mono): +//! A single module-level `Arc>` is shared between the +//! capture and playback paths. On each 20 ms frame (960 samples @ 48 kHz +//! mono): //! //! - **Playback path**: `LinuxAecPlayback::start` spawns the usual CPAL //! output thread, but wraps each chunk in a call to @@ -44,25 +45,32 @@ //! //! ## Thread safety //! -//! `webrtc_audio_processing::Processor` is `Send + Sync` with `&self` -//! methods. Capture and playback threads both hold an `Arc` and -//! call APM concurrently — the underlying C++ code serializes internally. +//! The 0.3.x line of `webrtc-audio-processing` takes `&mut self` on both +//! `process_capture_frame` and `process_render_frame`, so the `Processor` +//! needs a `Mutex` around it for cross-thread sharing. The capture and +//! playback threads each acquire the lock briefly (sub-millisecond per +//! 10 ms frame) so contention is minimal at our frame rates. use std::sync::atomic::{AtomicBool, Ordering}; -use std::sync::{Arc, OnceLock}; +use std::sync::{Arc, Mutex, OnceLock}; use anyhow::{anyhow, Context}; use cpal::traits::{DeviceTrait, HostTrait, StreamTrait}; use cpal::{SampleFormat, SampleRate, StreamConfig}; use tracing::{info, warn}; -use webrtc_audio_processing::{Config, EchoCancellation, InitializationConfig, NoiseSuppression, Processor}; +use webrtc_audio_processing::{ + Config, EchoCancellation, EchoCancellationSuppressionLevel, InitializationConfig, + NoiseSuppression, NoiseSuppressionLevel, Processor, NUM_SAMPLES_PER_FRAME, +}; use crate::audio_ring::AudioRing; /// 20 ms at 48 kHz, mono — matches the rest of the pipeline and the codec. pub const FRAME_SAMPLES: usize = 960; /// APM requires strict 10 ms frames at 48 kHz = 480 samples per call. -const APM_FRAME_SAMPLES: usize = 480; +/// Imported from the webrtc-audio-processing crate so we can't drift out +/// of sync with whatever sample rate / frame length the C++ lib is using. +const APM_FRAME_SAMPLES: usize = NUM_SAMPLES_PER_FRAME as usize; const APM_NUM_CHANNELS: usize = 1; /// Round-trip delay hint passed to APM; the estimator refines from here. /// 60 ms is a reasonable default for CPAL on ALSA / PulseAudio / PipeWire. @@ -76,9 +84,11 @@ const STREAM_DELAY_MS: i32 = 60; /// Module-level lazily-initialized APM. Shared between capture and playback /// so they operate on the same echo-cancellation state — the render frames /// pushed by playback are what the capture path subtracts from the mic input. -static PROCESSOR: OnceLock> = OnceLock::new(); +/// Wrapped in a Mutex because the 0.3.x Processor takes `&mut self` on both +/// process_capture_frame and process_render_frame. +static PROCESSOR: OnceLock>> = OnceLock::new(); -fn get_or_init_processor() -> anyhow::Result> { +fn get_or_init_processor() -> anyhow::Result>> { if let Some(p) = PROCESSOR.get() { return Ok(p.clone()); } @@ -92,14 +102,13 @@ fn get_or_init_processor() -> anyhow::Result> { let config = Config { echo_cancellation: Some(EchoCancellation { - suppression_level: webrtc_audio_processing::EchoCancellationSuppressionLevel::High, + suppression_level: EchoCancellationSuppressionLevel::High, stream_delay_ms: Some(STREAM_DELAY_MS), enable_delay_agnostic: true, enable_extended_filter: true, }), noise_suppression: Some(NoiseSuppression { - suppression_level: - webrtc_audio_processing::NoiseSuppressionLevel::High, + suppression_level: NoiseSuppressionLevel::High, }), enable_high_pass_filter: true, // AGC left off for now — it can fight the Opus encoder's own gain @@ -109,11 +118,11 @@ fn get_or_init_processor() -> anyhow::Result> { }; processor.set_config(config); - let arc = Arc::new(processor); + let arc = Arc::new(Mutex::new(processor)); let _ = PROCESSOR.set(arc.clone()); info!( stream_delay_ms = STREAM_DELAY_MS, - "webrtc APM initialized (AEC3 High + NS High + HPF, AGC off)" + "webrtc APM initialized (AEC High + NS High + HPF, AGC off)" ); Ok(arc) } @@ -134,33 +143,49 @@ fn f32_to_i16(s: f32) -> i16 { /// Feed a 20 ms (960-sample) playback frame to APM as the render reference. /// Splits into two 10 ms halves because APM is strict about frame size. -fn push_render_frame_20ms(apm: &Processor, pcm: &[i16]) { +/// Takes the Mutex-wrapped Processor and locks briefly around each call. +fn push_render_frame_20ms(apm: &Mutex, pcm: &[i16]) { debug_assert_eq!(pcm.len(), FRAME_SAMPLES); let mut buf = [0f32; APM_FRAME_SAMPLES]; for half in pcm.chunks_exact(APM_FRAME_SAMPLES) { for (i, &s) in half.iter().enumerate() { buf[i] = i16_to_f32(s); } - // process_render_frame mutates in place. For render we only care - // about feeding APM the reference — we discard the output. - if let Err(e) = apm.process_render_frame(&mut buf) { - warn!("webrtc APM process_render_frame failed: {e:?}"); + match apm.lock() { + Ok(mut p) => { + if let Err(e) = p.process_render_frame(&mut buf) { + warn!("webrtc APM process_render_frame failed: {e:?}"); + } + } + Err(_) => { + warn!("webrtc APM mutex poisoned in render path"); + return; + } } } } /// Run a 20 ms (960-sample) capture frame through APM's echo cancellation /// in place. Splits into two 10 ms halves, runs APM on each, stitches -/// results back into the caller's buffer. -fn process_capture_frame_20ms(apm: &Processor, pcm: &mut [i16]) { +/// results back into the caller's buffer. Briefly holds the Mutex once +/// per 10 ms half. +fn process_capture_frame_20ms(apm: &Mutex, pcm: &mut [i16]) { debug_assert_eq!(pcm.len(), FRAME_SAMPLES); let mut buf = [0f32; APM_FRAME_SAMPLES]; for half in pcm.chunks_exact_mut(APM_FRAME_SAMPLES) { for (i, &s) in half.iter().enumerate() { buf[i] = i16_to_f32(s); } - if let Err(e) = apm.process_capture_frame(&mut buf) { - warn!("webrtc APM process_capture_frame failed: {e:?}"); + match apm.lock() { + Ok(mut p) => { + if let Err(e) = p.process_capture_frame(&mut buf) { + warn!("webrtc APM process_capture_frame failed: {e:?}"); + } + } + Err(_) => { + warn!("webrtc APM mutex poisoned in capture path"); + return; + } } for (i, d) in half.iter_mut().enumerate() { *d = f32_to_i16(buf[i]); @@ -303,7 +328,7 @@ impl Drop for LinuxAecCapture { /// Pull whole 960-sample frames out of the leftover buffer, run them through /// APM's capture-side processing, and push to the ring. Leaves any partial /// sub-960 remainder in `leftover` for the next callback. -fn drain_frames_through_apm(leftover: &mut Vec, apm: &Processor, ring: &AudioRing) { +fn drain_frames_through_apm(leftover: &mut Vec, apm: &Mutex, ring: &AudioRing) { let mut frame = [0i16; FRAME_SAMPLES]; while leftover.len() >= FRAME_SAMPLES { frame.copy_from_slice(&leftover[..FRAME_SAMPLES]); @@ -434,7 +459,7 @@ impl Drop for LinuxAecPlayback { fn fill_output_and_tee_i16( data: &mut [i16], ring: &AudioRing, - apm: &Processor, + apm: &Mutex, carry: &std::sync::Mutex>, ) { let read = ring.read(data); @@ -447,7 +472,7 @@ fn fill_output_and_tee_i16( fn fill_output_and_tee_f32( data: &mut [f32], ring: &AudioRing, - apm: &Processor, + apm: &Mutex, carry: &std::sync::Mutex>, ) { let mut tmp = vec![0i16; data.len()]; @@ -463,7 +488,7 @@ fn fill_output_and_tee_f32( /// Push CPAL-bound samples into APM's render-side input for echo cancellation. /// Uses a carry buffer to batch into exact 960-sample (20 ms) frames. -fn tee_render_samples(samples: &[i16], apm: &Processor, carry: &std::sync::Mutex>) { +fn tee_render_samples(samples: &[i16], apm: &Mutex, carry: &std::sync::Mutex>) { let mut lv = carry.lock().unwrap(); lv.extend_from_slice(samples); while lv.len() >= FRAME_SAMPLES { diff --git a/scripts/Dockerfile.linux-desktop-builder b/scripts/Dockerfile.linux-desktop-builder index 9dd742c..0cfc480 100644 --- a/scripts/Dockerfile.linux-desktop-builder +++ b/scripts/Dockerfile.linux-desktop-builder @@ -33,22 +33,27 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ librsvg2-dev \ libglib2.0-dev \ patchelf \ - meson \ - ninja-build \ - python3 \ + libwebrtc-audio-processing-dev \ clang \ && rm -rf /var/lib/apt/lists/* # ── webrtc-audio-processing build requirements ────────────────────────────── -# The `webrtc-audio-processing` Rust crate with the `bundled` feature vendors -# the PulseAudio webrtc-audio-processing C++ library and builds it via meson -# + ninja at `cargo build` time. That avoids Debian Bookworm's stale -# libwebrtc-audio-processing-dev 0.3-1 package (which predates AEC3) and gives -# us a self-contained static link — no runtime .so dependency, same algorithm -# on every Linux distro regardless of what apt ships. +# The `webrtc-audio-processing` Rust crate (0.3.x line) links against Debian +# Bookworm's `libwebrtc-audio-processing-dev` apt package (0.3-1+b1), which +# provides the PulseAudio fork of the WebRTC audio processing module. This is +# the library that Pulse's module-echo-cancel and PipeWire's filter-chain +# use for their AEC modes — same algorithm family, runtime-linked via +# pkg-config at cargo build time. # -# apt deps for the bundled build: meson, ninja-build, python3, clang, -# build-essential (already present via android-builder base). +# An attempt was made to use the 2.x line with the `bundled` sub-feature +# (which would give AEC3 instead of AEC2) but both the crates.io tarball +# and the upstream git `main` branch hit a `meson setup --reconfigure` bug +# that panics on first-run empty build dirs. The 0.3 line avoids the +# bundled build path entirely and is what we ship for now. +# +# `clang` is listed explicitly because the Rust crate's bindgen may need +# it at compile time depending on the version of the underlying +# webrtc-audio-processing-sys build script. USER builder WORKDIR /build/source