From 4ba77c8c0e39d0000290e7b4f453b286a7b79105 Mon Sep 17 00:00:00 2001
From: Siavash Sameni <manwe@MacBook-Air.local>
Date: Fri, 10 Apr 2026 15:53:23 +0400
Subject: [PATCH] feat(linux): WebRTC AEC3 capture/playback backend with
 render-side tee
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds gold-standard Linux echo cancellation: in-app WebRTC AEC3 (Audio
Processing Module) via the webrtc-audio-processing crate, using the
same algorithm as Chrome WebRTC, Zoom, Teams, and Jitsi. Runs entirely
in-process, so it works identically on ALSA / PulseAudio / PipeWire
systems — no dependency on user-configured echo-cancel modules.

Architecture:
- New crates/wzp-client/src/audio_linux_aec.rs module (~470 lines).
  Contains LinuxAecCapture and LinuxAecPlayback, both using CPAL
  under the hood but routing samples through a shared
  Arc<webrtc_audio_processing::Processor>. The playback path tees
  each 20 ms frame into APM.process_render_frame as the echo
  reference BEFORE handing the samples to CPAL's output callback.
  The capture path runs APM.process_capture_frame on each mic frame
  in place before pushing to the audio ring buffer. This is the
  "tee the playback ring" approach that Zoom/Teams/Jitsi use.
- New `linux-aec` feature in wzp-client pulling in the
  webrtc-audio-processing crate at v2.x with the `bundled`
  sub-feature. Bundled means the vendored PulseAudio WebRTC C++
  sources are statically compiled via meson+ninja at cargo build
  time — no runtime .so dependency, avoids Debian Bookworm's stale
  libwebrtc-audio-processing-dev 0.3 package (which predates AEC3).
  Dep is target-gated to Linux, so enabling the feature on non-Linux
  is a no-op.
- lib.rs re-exports LinuxAecCapture/LinuxAecPlayback as
  AudioCapture/AudioPlayback when `linux-aec` is on, otherwise
  falls back to the CPAL audio_io path. Shared public API
  (start/ring/stop/Drop) means downstream code is unchanged.
- New `linux-aec` feature in wzp-desktop forwards to
  wzp-client/linux-aec so `cargo tauri build -- --features
  wzp-desktop/linux-aec` builds the AEC variant.

APM configuration:
- EchoCancellation: High suppression, delay-agnostic mode on,
  extended filter on, stream_delay_ms=60 initial hint
- NoiseSuppression: High
- HighPassFilter: on
- AGC: off (can fight Opus encoder's own gain staging + adaptive
  quality controller; add later if users report low mic level)

Frame size handling:
- Pipeline uses 20 ms frames (960 samples @ 48 kHz mono)
- APM requires strict 10 ms (480 samples) per call
- Each 20 ms frame is split into two 480-sample halves, APM called
  twice, halves stitched back
- Same pattern for render and capture sides
- Carry-buffer logic handles the case where CPAL delivers samples in
  arbitrary chunk sizes that don't divide 960

Build infrastructure:
- scripts/Dockerfile.linux-desktop-builder adds meson, ninja-build,
  python3, clang for the webrtc-audio-processing bundled build
- scripts/build-linux-desktop-docker.sh takes a new --aec flag that
  enables the linux-aec feature and renames the output artifacts
  with an `-aec` suffix so noAEC and AEC variants can coexist on disk

Task #30.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 crates/wzp-client/Cargo.toml             |  18 +
 crates/wzp-client/src/audio_linux_aec.rs | 512 +++++++++++++++++++++++
 crates/wzp-client/src/lib.rs             |  51 ++-
 desktop/src-tauri/Cargo.toml             |  11 +-
 scripts/Dockerfile.linux-desktop-builder |  15 +
 scripts/build-linux-desktop-docker.sh    |  68 ++-
 6 files changed, 647 insertions(+), 28 deletions(-)
 create mode 100644 crates/wzp-client/src/audio_linux_aec.rs

diff --git a/crates/wzp-client/Cargo.toml b/crates/wzp-client/Cargo.toml
index 3d11f0f..3580321 100644
--- a/crates/wzp-client/Cargo.toml
+++ b/crates/wzp-client/Cargo.toml
@@ -47,6 +47,16 @@ windows = { version = "0.58", optional = true, features = [
     "Win32_System_Variant",
 ] }
 
+# Linux-only: WebRTC AEC3 (Audio Processing Module) bindings for the
+# `linux-aec` feature. The `bundled` sub-feature of webrtc-audio-processing
+# statically compiles the vendored PulseAudio webrtc-audio-processing C++
+# sources via meson + ninja at cargo build time, avoiding Debian Bookworm's
+# stale system libwebrtc-audio-processing-dev 0.3 package (which predates
+# AEC3). Produces a self-contained static link — no runtime .so dep, same
+# algorithm on every Linux distro.
+[target.'cfg(target_os = "linux")'.dependencies]
+webrtc-audio-processing = { version = "2", optional = true, features = ["bundled"] }
+
 [features]
 default = []
 audio = ["cpal"]
@@ -61,6 +71,14 @@ vpio = ["dep:coreaudio-rs"]
 # enabling this feature on non-Windows targets is a no-op (the
 # audio_wasapi module is also #[cfg(target_os = "windows")] in lib.rs).
 windows-aec = ["dep:windows"]
+# linux-aec enables a CPAL + WebRTC AEC3 capture/playback backend that
+# runs the WebRTC Audio Processing Module (same algo as Chrome / Zoom /
+# Teams) in-process, using the playback PCM as the reference signal for
+# echo cancellation. The webrtc-audio-processing dep is target-gated to
+# Linux above, so enabling this feature on non-Linux targets is a no-op
+# (the audio_linux_aec module is also #[cfg(target_os = "linux")] in
+# lib.rs).
+linux-aec = ["dep:webrtc-audio-processing"]
 
 [[bin]]
 name = "wzp-client"
diff --git a/crates/wzp-client/src/audio_linux_aec.rs b/crates/wzp-client/src/audio_linux_aec.rs
new file mode 100644
index 0000000..bf8dbcc
--- /dev/null
+++ b/crates/wzp-client/src/audio_linux_aec.rs
@@ -0,0 +1,512 @@
+//! Linux AEC backend: CPAL capture + playback wired through the WebRTC Audio
+//! Processing Module (AEC3 + noise suppression + high-pass filter).
+//!
+//! This is the same algorithm used by Chrome WebRTC, Zoom, Teams, Jitsi, and
+//! any other "serious" Linux VoIP app. It runs in-process — no dependency on
+//! PulseAudio's module-echo-cancel or PipeWire's filter-chain, so it works
+//! identically on ALSA / PulseAudio / PipeWire systems.
+//!
+//! ## Architecture
+//!
+//! A single module-level `Arc<Processor>` is shared between the capture and
+//! playback paths. On each 20 ms frame (960 samples @ 48 kHz mono):
+//!
+//! - **Playback path**: `LinuxAecPlayback::start` spawns the usual CPAL
+//!   output thread, but wraps each chunk in a call to
+//!   `Processor::process_render_frame` **before** handing it to CPAL. That
+//!   gives APM an authoritative reference of exactly what's going out to
+//!   the speakers (same approach Zoom/Teams/Jitsi use). The AEC then knows
+//!   what to cancel when it sees echo in the capture stream.
+//!
+//! - **Capture path**: `LinuxAecCapture::start` spawns the usual CPAL
+//!   input thread, and runs `Processor::process_capture_frame` on each
+//!   incoming mic chunk **in place** before pushing it into the ring
+//!   buffer. The AEC subtracts the echo using the render reference it
+//!   saw on the playback side.
+//!
+//! APM is strict about frame size: it requires exactly 10 ms = 480 samples
+//! per call at 48 kHz. Our pipeline uses 20 ms = 960 samples, so each 20 ms
+//! frame is split into two 480-sample halves, APM is called twice, and the
+//! halves are stitched back together.
+//!
+//! APM only accepts f32 samples in `[-1.0, 1.0]`, so we convert i16 → f32
+//! before the call and f32 → i16 after (with clamping on the return path).
+//!
+//! ## Stream delay
+//!
+//! AEC needs to know roughly how long it takes between a sample being passed
+//! to `process_render_frame` and its echo showing up at `process_capture_frame`
+//! — i.e. the round trip through CPAL playback → speaker → air → microphone
+//! → CPAL capture. AEC3's internal estimator tracks this within a window
+//! around whatever hint we give it. We hardcode 60 ms as a reasonable
+//! starting point for typical Linux audio stacks; the delay estimator does
+//! the fine-tuning automatically.
+//!
+//! ## Thread safety
+//!
+//! `webrtc_audio_processing::Processor` is `Send + Sync` with `&self`
+//! methods. Capture and playback threads both hold an `Arc<Processor>` and
+//! call APM concurrently — the underlying C++ code serializes internally.
+
+use std::sync::atomic::{AtomicBool, Ordering};
+use std::sync::{Arc, OnceLock};
+
+use anyhow::{anyhow, Context};
+use cpal::traits::{DeviceTrait, HostTrait, StreamTrait};
+use cpal::{SampleFormat, SampleRate, StreamConfig};
+use tracing::{info, warn};
+use webrtc_audio_processing::{Config, EchoCancellation, InitializationConfig, NoiseSuppression, Processor};
+
+use crate::audio_ring::AudioRing;
+
+/// 20 ms at 48 kHz, mono — matches the rest of the pipeline and the codec.
+pub const FRAME_SAMPLES: usize = 960;
+/// APM requires strict 10 ms frames at 48 kHz = 480 samples per call.
+const APM_FRAME_SAMPLES: usize = 480;
+const APM_NUM_CHANNELS: usize = 1;
+/// Round-trip delay hint passed to APM; the estimator refines from here.
+/// 60 ms is a reasonable default for CPAL on ALSA / PulseAudio / PipeWire.
+#[allow(dead_code)]
+const STREAM_DELAY_MS: i32 = 60;
+
+// ---------------------------------------------------------------------------
+// Shared APM instance
+// ---------------------------------------------------------------------------
+
+/// Module-level lazily-initialized APM. Shared between capture and playback
+/// so they operate on the same echo-cancellation state — the render frames
+/// pushed by playback are what the capture path subtracts from the mic input.
+static PROCESSOR: OnceLock<Arc<Processor>> = OnceLock::new();
+
+fn get_or_init_processor() -> anyhow::Result<Arc<Processor>> {
+    if let Some(p) = PROCESSOR.get() {
+        return Ok(p.clone());
+    }
+    let init_config = InitializationConfig {
+        num_capture_channels: APM_NUM_CHANNELS as i32,
+        num_render_channels: APM_NUM_CHANNELS as i32,
+        ..Default::default()
+    };
+    let mut processor = Processor::new(&init_config)
+        .map_err(|e| anyhow!("webrtc APM init failed: {e:?}"))?;
+
+    let config = Config {
+        echo_cancellation: Some(EchoCancellation {
+            suppression_level: webrtc_audio_processing::EchoCancellationSuppressionLevel::High,
+            stream_delay_ms: Some(STREAM_DELAY_MS),
+            enable_delay_agnostic: true,
+            enable_extended_filter: true,
+        }),
+        noise_suppression: Some(NoiseSuppression {
+            suppression_level:
+                webrtc_audio_processing::NoiseSuppressionLevel::High,
+        }),
+        enable_high_pass_filter: true,
+        // AGC left off for now — it can fight the Opus encoder's own gain
+        // staging and the adaptive-quality controller. Add later if users
+        // report low mic levels.
+        ..Default::default()
+    };
+    processor.set_config(config);
+
+    let arc = Arc::new(processor);
+    let _ = PROCESSOR.set(arc.clone());
+    info!(
+        stream_delay_ms = STREAM_DELAY_MS,
+        "webrtc APM initialized (AEC3 High + NS High + HPF, AGC off)"
+    );
+    Ok(arc)
+}
+
+// ---------------------------------------------------------------------------
+// Helpers: i16 ↔ f32 and APM frame processing
+// ---------------------------------------------------------------------------
+
+#[inline]
+fn i16_to_f32(s: i16) -> f32 {
+    s as f32 / 32768.0
+}
+
+#[inline]
+fn f32_to_i16(s: f32) -> i16 {
+    (s.clamp(-1.0, 1.0) * 32767.0) as i16
+}
+
+/// Feed a 20 ms (960-sample) playback frame to APM as the render reference.
+/// Splits into two 10 ms halves because APM is strict about frame size.
+fn push_render_frame_20ms(apm: &Processor, pcm: &[i16]) {
+    debug_assert_eq!(pcm.len(), FRAME_SAMPLES);
+    let mut buf = [0f32; APM_FRAME_SAMPLES];
+    for half in pcm.chunks_exact(APM_FRAME_SAMPLES) {
+        for (i, &s) in half.iter().enumerate() {
+            buf[i] = i16_to_f32(s);
+        }
+        // process_render_frame mutates in place. For render we only care
+        // about feeding APM the reference — we discard the output.
+        if let Err(e) = apm.process_render_frame(&mut buf) {
+            warn!("webrtc APM process_render_frame failed: {e:?}");
+        }
+    }
+}
+
+/// Run a 20 ms (960-sample) capture frame through APM's echo cancellation
+/// in place. Splits into two 10 ms halves, runs APM on each, stitches
+/// results back into the caller's buffer.
+fn process_capture_frame_20ms(apm: &Processor, pcm: &mut [i16]) {
+    debug_assert_eq!(pcm.len(), FRAME_SAMPLES);
+    let mut buf = [0f32; APM_FRAME_SAMPLES];
+    for half in pcm.chunks_exact_mut(APM_FRAME_SAMPLES) {
+        for (i, &s) in half.iter().enumerate() {
+            buf[i] = i16_to_f32(s);
+        }
+        if let Err(e) = apm.process_capture_frame(&mut buf) {
+            warn!("webrtc APM process_capture_frame failed: {e:?}");
+        }
+        for (i, d) in half.iter_mut().enumerate() {
+            *d = f32_to_i16(buf[i]);
+        }
+    }
+}
+
+// ---------------------------------------------------------------------------
+// LinuxAecCapture — CPAL mic + WebRTC AEC capture-side processing
+// ---------------------------------------------------------------------------
+
+/// Microphone capture with WebRTC AEC3 applied in place before the codec
+/// sees the samples. Mirrors the public API of `audio_io::AudioCapture` so
+/// downstream code doesn't change.
+pub struct LinuxAecCapture {
+    ring: Arc<AudioRing>,
+    running: Arc<AtomicBool>,
+}
+
+impl LinuxAecCapture {
+    pub fn start() -> Result<Self, anyhow::Error> {
+        // Eagerly init the APM so the playback side can find it already
+        // configured, and so init errors surface on the caller thread
+        // instead of silently failing inside the capture thread.
+        let apm = get_or_init_processor()?;
+
+        let ring = Arc::new(AudioRing::new());
+        let running = Arc::new(AtomicBool::new(true));
+
+        let (init_tx, init_rx) = std::sync::mpsc::sync_channel::<Result<(), String>>(1);
+
+        let ring_cb = ring.clone();
+        let running_clone = running.clone();
+        let apm_capture = apm.clone();
+
+        std::thread::Builder::new()
+            .name("wzp-audio-capture-linuxaec".into())
+            .spawn(move || {
+                let result = (|| -> Result<(), anyhow::Error> {
+                    let host = cpal::default_host();
+                    let device = host
+                        .default_input_device()
+                        .ok_or_else(|| anyhow!("no default input audio device found"))?;
+                    info!(device = %device.name().unwrap_or_default(), "LinuxAEC: using input device");
+
+                    let config = StreamConfig {
+                        channels: 1,
+                        sample_rate: SampleRate(48_000),
+                        buffer_size: cpal::BufferSize::Default,
+                    };
+
+                    let use_f32 = !supports_i16_input(&device)?;
+
+                    let err_cb = |e: cpal::StreamError| {
+                        warn!("LinuxAEC input stream error: {e}");
+                    };
+
+                    // Leftover buffer for when CPAL gives us partial frames.
+                    // We need exactly 960-sample chunks to feed APM.
+                    let leftover = std::sync::Mutex::new(Vec::<i16>::with_capacity(FRAME_SAMPLES * 4));
+
+                    let stream = if use_f32 {
+                        let ring = ring_cb.clone();
+                        let running = running_clone.clone();
+                        let apm = apm_capture.clone();
+                        device.build_input_stream(
+                            &config,
+                            move |data: &[f32], _: &cpal::InputCallbackInfo| {
+                                if !running.load(Ordering::Relaxed) {
+                                    return;
+                                }
+                                let mut lv = leftover.lock().unwrap();
+                                lv.reserve(data.len());
+                                for &s in data {
+                                    lv.push(f32_to_i16(s));
+                                }
+                                drain_frames_through_apm(&mut lv, &apm, &ring);
+                            },
+                            err_cb,
+                            None,
+                        )?
+                    } else {
+                        let ring = ring_cb.clone();
+                        let running = running_clone.clone();
+                        let apm = apm_capture.clone();
+                        device.build_input_stream(
+                            &config,
+                            move |data: &[i16], _: &cpal::InputCallbackInfo| {
+                                if !running.load(Ordering::Relaxed) {
+                                    return;
+                                }
+                                let mut lv = leftover.lock().unwrap();
+                                lv.extend_from_slice(data);
+                                drain_frames_through_apm(&mut lv, &apm, &ring);
+                            },
+                            err_cb,
+                            None,
+                        )?
+                    };
+
+                    stream.play().context("failed to start LinuxAEC input stream")?;
+                    let _ = init_tx.send(Ok(()));
+                    info!("LinuxAEC capture started (AEC3 active)");
+
+                    while running_clone.load(Ordering::Relaxed) {
+                        std::thread::park_timeout(std::time::Duration::from_millis(200));
+                    }
+                    drop(stream);
+                    Ok(())
+                })();
+
+                if let Err(e) = result {
+                    let _ = init_tx.send(Err(e.to_string()));
+                }
+            })?;
+
+        init_rx
+            .recv()
+            .map_err(|_| anyhow!("LinuxAEC capture thread exited before signaling"))?
+            .map_err(|e| anyhow!("{e}"))?;
+
+        Ok(Self { ring, running })
+    }
+
+    pub fn ring(&self) -> &Arc<AudioRing> {
+        &self.ring
+    }
+
+    pub fn stop(&self) {
+        self.running.store(false, Ordering::Relaxed);
+    }
+}
+
+impl Drop for LinuxAecCapture {
+    fn drop(&mut self) {
+        self.stop();
+    }
+}
+
+/// Pull whole 960-sample frames out of the leftover buffer, run them through
+/// APM's capture-side processing, and push to the ring. Leaves any partial
+/// sub-960 remainder in `leftover` for the next callback.
+fn drain_frames_through_apm(leftover: &mut Vec<i16>, apm: &Processor, ring: &AudioRing) {
+    let mut frame = [0i16; FRAME_SAMPLES];
+    while leftover.len() >= FRAME_SAMPLES {
+        frame.copy_from_slice(&leftover[..FRAME_SAMPLES]);
+        process_capture_frame_20ms(apm, &mut frame);
+        ring.write(&frame);
+        leftover.drain(..FRAME_SAMPLES);
+    }
+}
+
+// ---------------------------------------------------------------------------
+// LinuxAecPlayback — CPAL speaker output + WebRTC AEC render-side tee
+// ---------------------------------------------------------------------------
+
+/// Speaker playback with a render-side tee: each frame written to CPAL is
+/// ALSO fed to APM via `process_render_frame` as the echo-cancellation
+/// reference signal. This is the "tee the playback ring" approach (Zoom,
+/// Teams, Jitsi) — deterministic, does not depend on PulseAudio loopback or
+/// PipeWire monitor sources.
+pub struct LinuxAecPlayback {
+    ring: Arc<AudioRing>,
+    running: Arc<AtomicBool>,
+}
+
+impl LinuxAecPlayback {
+    pub fn start() -> Result<Self, anyhow::Error> {
+        let apm = get_or_init_processor()?;
+
+        let ring = Arc::new(AudioRing::new());
+        let running = Arc::new(AtomicBool::new(true));
+
+        let (init_tx, init_rx) = std::sync::mpsc::sync_channel::<Result<(), String>>(1);
+
+        let ring_cb = ring.clone();
+        let running_clone = running.clone();
+        let apm_render = apm.clone();
+
+        std::thread::Builder::new()
+            .name("wzp-audio-playback-linuxaec".into())
+            .spawn(move || {
+                let result = (|| -> Result<(), anyhow::Error> {
+                    let host = cpal::default_host();
+                    let device = host
+                        .default_output_device()
+                        .ok_or_else(|| anyhow!("no default output audio device found"))?;
+                    info!(device = %device.name().unwrap_or_default(), "LinuxAEC: using output device");
+
+                    let config = StreamConfig {
+                        channels: 1,
+                        sample_rate: SampleRate(48_000),
+                        buffer_size: cpal::BufferSize::Default,
+                    };
+
+                    let use_f32 = !supports_i16_output(&device)?;
+
+                    let err_cb = |e: cpal::StreamError| {
+                        warn!("LinuxAEC output stream error: {e}");
+                    };
+
+                    // Same 960-sample batching approach as the capture side:
+                    // CPAL may ask for N samples in a callback where N doesn't
+                    // divide 960. We accumulate partial frames in a Vec and
+                    // feed APM as soon as we have a whole 20 ms frame.
+                    let carry = std::sync::Mutex::new(Vec::<i16>::with_capacity(FRAME_SAMPLES * 4));
+
+                    let stream = if use_f32 {
+                        let ring = ring_cb.clone();
+                        let apm = apm_render.clone();
+                        device.build_output_stream(
+                            &config,
+                            move |data: &mut [f32], _: &cpal::OutputCallbackInfo| {
+                                fill_output_and_tee_f32(data, &ring, &apm, &carry);
+                            },
+                            err_cb,
+                            None,
+                        )?
+                    } else {
+                        let ring = ring_cb.clone();
+                        let apm = apm_render.clone();
+                        device.build_output_stream(
+                            &config,
+                            move |data: &mut [i16], _: &cpal::OutputCallbackInfo| {
+                                fill_output_and_tee_i16(data, &ring, &apm, &carry);
+                            },
+                            err_cb,
+                            None,
+                        )?
+                    };
+
+                    stream.play().context("failed to start LinuxAEC output stream")?;
+                    let _ = init_tx.send(Ok(()));
+                    info!("LinuxAEC playback started (render tee active)");
+
+                    while running_clone.load(Ordering::Relaxed) {
+                        std::thread::park_timeout(std::time::Duration::from_millis(200));
+                    }
+                    drop(stream);
+                    Ok(())
+                })();
+
+                if let Err(e) = result {
+                    let _ = init_tx.send(Err(e.to_string()));
+                }
+            })?;
+
+        init_rx
+            .recv()
+            .map_err(|_| anyhow!("LinuxAEC playback thread exited before signaling"))?
+            .map_err(|e| anyhow!("{e}"))?;
+
+        Ok(Self { ring, running })
+    }
+
+    pub fn ring(&self) -> &Arc<AudioRing> {
+        &self.ring
+    }
+
+    pub fn stop(&self) {
+        self.running.store(false, Ordering::Relaxed);
+    }
+}
+
+impl Drop for LinuxAecPlayback {
+    fn drop(&mut self) {
+        self.stop();
+    }
+}
+
+fn fill_output_and_tee_i16(
+    data: &mut [i16],
+    ring: &AudioRing,
+    apm: &Processor,
+    carry: &std::sync::Mutex<Vec<i16>>,
+) {
+    let read = ring.read(data);
+    for s in &mut data[read..] {
+        *s = 0;
+    }
+    tee_render_samples(data, apm, carry);
+}
+
+fn fill_output_and_tee_f32(
+    data: &mut [f32],
+    ring: &AudioRing,
+    apm: &Processor,
+    carry: &std::sync::Mutex<Vec<i16>>,
+) {
+    let mut tmp = vec![0i16; data.len()];
+    let read = ring.read(&mut tmp);
+    for s in &mut tmp[read..] {
+        *s = 0;
+    }
+    for (d, &s) in data.iter_mut().zip(tmp.iter()) {
+        *d = i16_to_f32(s);
+    }
+    tee_render_samples(&tmp, apm, carry);
+}
+
+/// Push CPAL-bound samples into APM's render-side input for echo cancellation.
+/// Uses a carry buffer to batch into exact 960-sample (20 ms) frames.
+fn tee_render_samples(samples: &[i16], apm: &Processor, carry: &std::sync::Mutex<Vec<i16>>) {
+    let mut lv = carry.lock().unwrap();
+    lv.extend_from_slice(samples);
+    while lv.len() >= FRAME_SAMPLES {
+        let mut frame = [0i16; FRAME_SAMPLES];
+        frame.copy_from_slice(&lv[..FRAME_SAMPLES]);
+        push_render_frame_20ms(apm, &frame);
+        lv.drain(..FRAME_SAMPLES);
+    }
+}
+
+// ---------------------------------------------------------------------------
+// CPAL format helpers (duplicated from audio_io.rs to keep the modules
+// independent — each backend file is a self-contained unit)
+// ---------------------------------------------------------------------------
+
+fn supports_i16_input(device: &cpal::Device) -> Result<bool, anyhow::Error> {
+    let supported = device
+        .supported_input_configs()
+        .context("failed to query input configs")?;
+    for cfg in supported {
+        if cfg.sample_format() == SampleFormat::I16
+            && cfg.min_sample_rate() <= SampleRate(48_000)
+            && cfg.max_sample_rate() >= SampleRate(48_000)
+            && cfg.channels() >= 1
+        {
+            return Ok(true);
+        }
+    }
+    Ok(false)
+}
+
+fn supports_i16_output(device: &cpal::Device) -> Result<bool, anyhow::Error> {
+    let supported = device
+        .supported_output_configs()
+        .context("failed to query output configs")?;
+    for cfg in supported {
+        if cfg.sample_format() == SampleFormat::I16
+            && cfg.min_sample_rate() <= SampleRate(48_000)
+            && cfg.max_sample_rate() >= SampleRate(48_000)
+            && cfg.channels() >= 1
+        {
+            return Ok(true);
+        }
+    }
+    Ok(false)
+}
diff --git a/crates/wzp-client/src/lib.rs b/crates/wzp-client/src/lib.rs
index 9a1309e..a9179bc 100644
--- a/crates/wzp-client/src/lib.rs
+++ b/crates/wzp-client/src/lib.rs
@@ -21,6 +21,11 @@ pub mod audio_vpio;
 // this feature on non-Windows targets is a no-op.
 #[cfg(all(feature = "windows-aec", target_os = "windows"))]
 pub mod audio_wasapi;
+// WebRTC AEC3 (Audio Processing Module) wrapper around CPAL capture + playback
+// on Linux. Only compiled when `linux-aec` feature is on AND target is Linux.
+// The webrtc-audio-processing dep is itself gated to Linux in Cargo.toml.
+#[cfg(all(feature = "linux-aec", target_os = "linux"))]
+pub mod audio_linux_aec;
 pub mod bench;
 pub mod call;
 pub mod drift_test;
@@ -30,24 +35,48 @@ pub mod handshake;
 pub mod metrics;
 pub mod sweep;
 
-// AudioPlayback always comes from the CPAL path (`audio_io`). We do not
-// need OS-level processing on the playback side because Windows's
-// communications AEC, once engaged on the capture stream, uses the system
-// render mix as the reference signal — it cancels echo from CPAL playback
-// (and any other app's audio) without special handling.
-#[cfg(feature = "audio")]
-pub use audio_io::AudioPlayback;
+// AudioPlayback: three possible backends depending on feature flags.
+//   1. Default CPAL (`audio_io::AudioPlayback`) — baseline on every platform.
+//   2. Linux AEC (`audio_linux_aec::LinuxAecPlayback`) — CPAL + WebRTC APM
+//      render-side tee, so echo from speakers gets cancelled from the mic.
+//
+// On macOS and Windows we always use the default CPAL playback because:
+//   - macOS: VoiceProcessingIO handles AEC at the capture side (Apple's
+//     native hardware AEC uses its own reference signal handling).
+//   - Windows: WASAPI AudioCategory_Communications AEC uses the system
+//     render mix as reference — no per-process plumbing needed.
+//
+// Linux is the only platform where the in-app approach is necessary, so
+// the AEC playback path is gated to target_os = "linux".
 
-// AudioCapture: two possible backends. Windows-AEC path when compiled in,
-// otherwise the plain CPAL path. The two types share the same public API
-// (`start`, `ring`, `stop`, `Drop`) so downstream code is identical.
 #[cfg(all(
     feature = "audio",
-    any(not(feature = "windows-aec"), not(target_os = "windows"))
+    any(not(feature = "linux-aec"), not(target_os = "linux"))
+))]
+pub use audio_io::AudioPlayback;
+
+#[cfg(all(feature = "linux-aec", target_os = "linux"))]
+pub use audio_linux_aec::LinuxAecPlayback as AudioPlayback;
+
+// AudioCapture: three possible backends depending on feature flags.
+//   1. Default CPAL (`audio_io::AudioCapture`) — baseline on every platform.
+//   2. Windows AEC (`audio_wasapi::WasapiAudioCapture`) — direct WASAPI
+//      with AudioCategory_Communications, OS APO chain does AEC.
+//   3. Linux AEC (`audio_linux_aec::LinuxAecCapture`) — CPAL + WebRTC APM
+//      capture-side echo cancellation using the playback tee as reference.
+// All three expose the same public API (`start`, `ring`, `stop`, `Drop`).
+
+#[cfg(all(
+    feature = "audio",
+    any(not(feature = "windows-aec"), not(target_os = "windows")),
+    any(not(feature = "linux-aec"), not(target_os = "linux"))
 ))]
 pub use audio_io::AudioCapture;
 
 #[cfg(all(feature = "windows-aec", target_os = "windows"))]
 pub use audio_wasapi::WasapiAudioCapture as AudioCapture;
+
+#[cfg(all(feature = "linux-aec", target_os = "linux"))]
+pub use audio_linux_aec::LinuxAecCapture as AudioCapture;
 pub use call::{CallConfig, CallDecoder, CallEncoder};
 pub use handshake::perform_handshake;
diff --git a/desktop/src-tauri/Cargo.toml b/desktop/src-tauri/Cargo.toml
index 4785d78..5f7947f 100644
--- a/desktop/src-tauri/Cargo.toml
+++ b/desktop/src-tauri/Cargo.toml
@@ -72,7 +72,12 @@ wzp-client = { path = "../../crates/wzp-client", features = ["audio", "vpio"] }
 [target.'cfg(target_os = "windows")'.dependencies]
 wzp-client = { path = "../../crates/wzp-client", features = ["audio", "windows-aec"] }
 
-# Linux: same as Windows for now — plain CPAL.
+# Linux: CPAL playback+capture baseline. AEC is enabled via the top-level
+# `linux-aec` feature in wzp-desktop, which forwards to wzp-client/linux-aec.
+# Keeping it opt-in at the wzp-desktop level (rather than forcing it always
+# on here) lets `cargo tauri build` produce two variants from the same
+# source tree — a noAEC baseline and an AEC build — by toggling the feature
+# at build time: `cargo tauri build -- --features wzp-desktop/linux-aec`.
 [target.'cfg(target_os = "linux")'.dependencies]
 wzp-client = { path = "../../crates/wzp-client", features = ["audio"] }
 
@@ -96,3 +101,7 @@ ndk-context = "0.1"
 [features]
 default = ["custom-protocol"]
 custom-protocol = ["tauri/custom-protocol"]
+# linux-aec: forwards to wzp-client/linux-aec so `cargo tauri build -- --features
+# wzp-desktop/linux-aec` enables the WebRTC AEC3 backend on Linux. No-op on
+# other targets because wzp-client/linux-aec is itself cfg(target_os = "linux").
+linux-aec = ["wzp-client/linux-aec"]
diff --git a/scripts/Dockerfile.linux-desktop-builder b/scripts/Dockerfile.linux-desktop-builder
index 4318da6..9dd742c 100644
--- a/scripts/Dockerfile.linux-desktop-builder
+++ b/scripts/Dockerfile.linux-desktop-builder
@@ -33,7 +33,22 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
         librsvg2-dev \
         libglib2.0-dev \
         patchelf \
+        meson \
+        ninja-build \
+        python3 \
+        clang \
     && rm -rf /var/lib/apt/lists/*
 
+# ── webrtc-audio-processing build requirements ──────────────────────────────
+# The `webrtc-audio-processing` Rust crate with the `bundled` feature vendors
+# the PulseAudio webrtc-audio-processing C++ library and builds it via meson
+# + ninja at `cargo build` time. That avoids Debian Bookworm's stale
+# libwebrtc-audio-processing-dev 0.3-1 package (which predates AEC3) and gives
+# us a self-contained static link — no runtime .so dependency, same algorithm
+# on every Linux distro regardless of what apt ships.
+#
+# apt deps for the bundled build: meson, ninja-build, python3, clang,
+# build-essential (already present via android-builder base).
+
 USER builder
 WORKDIR /build/source
diff --git a/scripts/build-linux-desktop-docker.sh b/scripts/build-linux-desktop-docker.sh
index dca491a..ccd20c4 100755
--- a/scripts/build-linux-desktop-docker.sh
+++ b/scripts/build-linux-desktop-docker.sh
@@ -31,12 +31,17 @@ SSH_OPTS="-o ConnectTimeout=15 -o ServerAliveInterval=15 -o ServerAliveCountMax=
 REBUILD_RUST=0
 DO_PULL=1
 IMAGE_BUILD=0
+# WITH_AEC=1 enables the wzp-client `linux-aec` feature (WebRTC AEC3 via
+# webrtc-audio-processing) and renames the output artifacts with an `-aec`
+# suffix so both variants can coexist on disk.
+WITH_AEC=0
 for arg in "$@"; do
     case "$arg" in
         --rust)         REBUILD_RUST=1 ;;
         --pull)         DO_PULL=1 ;;
         --no-pull)      DO_PULL=0 ;;
         --image-build)  IMAGE_BUILD=1 ;;
+        --aec)          WITH_AEC=1 ;;
         -h|--help)
             sed -n '3,25p' "$0"
             exit 0
@@ -80,11 +85,21 @@ NTFY_TOPIC="https://ntfy.sh/wzp"
 BRANCH="${1:-feat/desktop-audio-rewrite}"
 DO_PULL="${2:-1}"
 REBUILD_RUST="${3:-0}"
+WITH_AEC="${4:-0}"
 
 LOG_FILE=/tmp/wzp-linux-desktop-build.log
 GIT_HASH="unknown"
 ENV_FILE="$BASE_DIR/.env"
 
+# Variant suffix for artifact filenames so the noAEC baseline and the AEC
+# build can coexist on the host. Applied after the build to the downloaded
+# files (we can't easily rename during the cargo tauri build itself).
+if [ "$WITH_AEC" = "1" ]; then
+    VARIANT="aec"
+else
+    VARIANT="noAEC"
+fi
+
 notify() { curl -s -d "$1" "$NTFY_TOPIC" > /dev/null 2>&1 || true; }
 
 # Upload to rustypaste; print URL on stdout (or empty on failure).
@@ -155,8 +170,11 @@ mkdir -p "$BASE_DIR/data/cache/cargo-registry" \
          "$BASE_DIR/data/cache-linux-desktop/target"
 chown -R 1000:1000 "$BASE_DIR/data/cache-linux-desktop/target" 2>/dev/null || true
 
+# Pass WITH_AEC into the docker container so the inner build script can
+# decide whether to enable the wzp-client `linux-aec` feature.
 docker run --rm \
     --user 1000:1000 \
+    -e WITH_AEC="$WITH_AEC" \
     -v "$BASE_DIR/data/source:/build/source" \
     -v "$BASE_DIR/data/cache/cargo-registry:/home/builder/.cargo/registry" \
     -v "$BASE_DIR/data/cache/cargo-git:/home/builder/.cargo/git" \
@@ -173,12 +191,25 @@ npm install --silent 2>&1 | tail -5 || npm install 2>&1 | tail -20
 echo ">>> npm run build"
 npm run build 2>&1 | tail -5
 
-echo ">>> cargo tauri build (produces .deb + .AppImage + raw binary)"
-cd src-tauri
-# tauri-cli is already installed in the base image via the Android
-# builder RUN step. It produces target/release/wzp-desktop (raw ELF)
-# plus bundles under target/release/bundle/{deb,appimage}/.
-cargo tauri build 2>&1 | tail -40
+# The linux-aec feature enables a WebRTC AEC3 capture backend in
+# wzp-client. Opt in only when the caller asked for it; noAEC baseline
+# builds keep the plain CPAL path for comparison. Tauri does not
+# propagate --features through to the wzp-desktop crate directly
+# because `cargo tauri build` invokes cargo underneath — so we use
+# `cargo tauri build -- --features wzp-desktop/linux-aec` to pass it
+# through. Wait — wzp-desktop is the bin crate, and its `linux-aec`
+# feature needs to be defined there too. The simpler path is to set
+# the feature at the wzp-client level via a bin-crate feature that
+# forwards to wzp-client. Handled in Cargo.toml changes.
+if [ "${WITH_AEC:-0}" = "1" ]; then
+    echo ">>> cargo tauri build WITH linux-aec feature"
+    cd src-tauri
+    cargo tauri build -- --features wzp-desktop/linux-aec 2>&1 | tail -40
+else
+    echo ">>> cargo tauri build (noAEC baseline)"
+    cd src-tauri
+    cargo tauri build 2>&1 | tail -40
+fi
 
 echo ""
 echo ">>> Build artifacts:"
@@ -236,7 +267,7 @@ notify_local "WZP Linux desktop build dispatched (branch=$BRANCH)"
 log "Triggering remote build (branch=$BRANCH)..."
 
 # Run; last lines are *_REMOTE_PATH=...
-REMOTE_OUTPUT=$(ssh_cmd "/tmp/wzp-linux-desktop-build.sh '$BRANCH' '$DO_PULL' '$REBUILD_RUST'" || true)
+REMOTE_OUTPUT=$(ssh_cmd "/tmp/wzp-linux-desktop-build.sh '$BRANCH' '$DO_PULL' '$REBUILD_RUST' '$WITH_AEC'" || true)
 echo "$REMOTE_OUTPUT" | tail -80
 
 BIN_REMOTE=$(echo "$REMOTE_OUTPUT" | grep '^BIN_REMOTE_PATH=' | tail -1 | cut -d= -f2-)
@@ -244,21 +275,26 @@ DEB_REMOTE=$(echo "$REMOTE_OUTPUT" | grep '^DEB_REMOTE_PATH=' | tail -1 | cut -d
 APPIMAGE_REMOTE=$(echo "$REMOTE_OUTPUT" | grep '^APPIMAGE_REMOTE_PATH=' | tail -1 | cut -d= -f2-)
 
 if [ -n "$BIN_REMOTE" ]; then
-    log "Downloading wzp-desktop binary to $LOCAL_OUTPUT/..."
-    scp $SSH_OPTS "$REMOTE_HOST:$BIN_REMOTE" "$LOCAL_OUTPUT/wzp-desktop"
-    echo "  $LOCAL_OUTPUT/wzp-desktop ($(du -h "$LOCAL_OUTPUT/wzp-desktop" | cut -f1))"
+    log "Downloading wzp-desktop binary to $LOCAL_OUTPUT/wzp-desktop-$VARIANT ..."
+    scp $SSH_OPTS "$REMOTE_HOST:$BIN_REMOTE" "$LOCAL_OUTPUT/wzp-desktop-$VARIANT"
+    echo "  $LOCAL_OUTPUT/wzp-desktop-$VARIANT ($(du -h "$LOCAL_OUTPUT/wzp-desktop-$VARIANT" | cut -f1))"
 fi
 
 if [ -n "$DEB_REMOTE" ]; then
-    log "Downloading .deb to $LOCAL_OUTPUT/..."
-    scp $SSH_OPTS "$REMOTE_HOST:$DEB_REMOTE" "$LOCAL_OUTPUT/"
-    ls -lh "$LOCAL_OUTPUT"/*.deb
+    # Apply the variant suffix to the downloaded .deb: cargo-tauri names the
+    # file WarzonePhone_<version>_amd64.deb regardless of what we built, so
+    # the variant lives only in our chosen filename.
+    DEB_BASENAME=$(basename "$DEB_REMOTE" .deb)
+    log "Downloading .deb to $LOCAL_OUTPUT/${DEB_BASENAME}-$VARIANT.deb ..."
+    scp $SSH_OPTS "$REMOTE_HOST:$DEB_REMOTE" "$LOCAL_OUTPUT/${DEB_BASENAME}-$VARIANT.deb"
+    ls -lh "$LOCAL_OUTPUT/${DEB_BASENAME}-$VARIANT.deb"
 fi
 
 if [ -n "$APPIMAGE_REMOTE" ]; then
-    log "Downloading .AppImage to $LOCAL_OUTPUT/..."
-    scp $SSH_OPTS "$REMOTE_HOST:$APPIMAGE_REMOTE" "$LOCAL_OUTPUT/"
-    ls -lh "$LOCAL_OUTPUT"/*.AppImage
+    APPIMG_BASENAME=$(basename "$APPIMAGE_REMOTE" .AppImage)
+    log "Downloading .AppImage to $LOCAL_OUTPUT/${APPIMG_BASENAME}-$VARIANT.AppImage ..."
+    scp $SSH_OPTS "$REMOTE_HOST:$APPIMAGE_REMOTE" "$LOCAL_OUTPUT/${APPIMG_BASENAME}-$VARIANT.AppImage"
+    ls -lh "$LOCAL_OUTPUT/${APPIMG_BASENAME}-$VARIANT.AppImage"
 fi
 
 if [ -z "$BIN_REMOTE" ]; then