From 1b00b5e2a43e323a7b1e4a7a03d9bc2a8612883e Mon Sep 17 00:00:00 2001 From: Siavash Sameni Date: Mon, 6 Apr 2026 10:15:23 +0400 Subject: [PATCH] feat: improved AEC, keyboard shortcuts, dedup participants, dev-fast profile AEC improvements: - Reduce echo tail from 100ms to 30ms (3.3x faster, suited for laptops) - Add double-talk detection: freeze adaptation when near-end speaks - Add residual echo suppression - Disable AEC by default in --android mode (macOS has built-in AEC) CLI features: - Keyboard shortcuts: m=mic mute, s=speaker mute, q=quit (raw terminal mode) - Dedup participants in RoomUpdate display (same fingerprint+alias shown once) - Add dev-fast profile (opt-level 2 with incremental compilation) Co-Authored-By: Claude Opus 4.6 (1M context) --- Cargo.lock | 181 ++++++++++++++++++++++++++++++++- Cargo.toml | 6 ++ crates/wzp-client/Cargo.toml | 1 + crates/wzp-client/src/call.rs | 2 +- crates/wzp-client/src/cli.rs | 125 +++++++++++++++++++++-- crates/wzp-codec/src/aec.rs | 182 +++++++++++++++++++++++----------- 6 files changed, 428 insertions(+), 69 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c4baaf3..155e071 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -291,6 +291,12 @@ dependencies = [ "tower-service", ] +[[package]] +name = "base16ct" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c7f02d4ea65f2c1853089ffd8d2787bdbc63de2f0d29dedbcf8ccdfa0ccd4cf" + [[package]] name = "base64" version = "0.22.1" @@ -461,6 +467,7 @@ dependencies = [ "iana-time-zone", "js-sys", "num-traits", + "serde", "wasm-bindgen", "windows-link", ] @@ -621,6 +628,24 @@ dependencies = [ "libc", ] +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + +[[package]] +name = "crypto-bigint" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0dc92fb57ca44df6db8059111ab3af99a63d5d0f8375d9972e319a379c6bab76" +dependencies = [ + "generic-array", + "rand_core 0.6.4", + "subtle", + "zeroize", +] + [[package]] name = "crypto-common" version = "0.1.7" @@ -644,6 +669,7 @@ dependencies = [ "digest", "fiat-crypto", "rustc_version", + "serde", "subtle", "zeroize", ] @@ -810,6 +836,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ "block-buffer", + "const-oid", "crypto-common", "subtle", ] @@ -844,6 +871,21 @@ dependencies = [ "rustfft", ] +[[package]] +name = "ecdsa" +version = "0.16.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee27f32b5c5292967d2d4a9d7f1e0b0aed2c15daded5a60300e4abb9d8020bca" +dependencies = [ + "der", + "digest", + "elliptic-curve", + "rfc6979", + "serdect", + "signature", + "spki", +] + [[package]] name = "ed25519" version = "2.2.3" @@ -851,6 +893,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "115531babc129696a58c64a4fef0a8bf9e9698629fb97e9e40767d235cfbcd53" dependencies = [ "pkcs8", + "serde", "signature", ] @@ -875,6 +918,26 @@ version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" +[[package]] +name = "elliptic-curve" +version = "0.13.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5e6043086bf7973472e0c7dff2142ea0b680d30e18d9cc40f267efbf222bd47" +dependencies = [ + "base16ct", + "crypto-bigint", + "digest", + "ff", + "generic-array", + "group", + "pkcs8", + "rand_core 0.6.4", + "sec1", + "serdect", + "subtle", + "zeroize", +] + [[package]] name = "encoding_rs" version = "0.8.35" @@ -918,6 +981,16 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +[[package]] +name = "ff" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0b50bfb653653f9ca9095b427bed08ab8d75a137839d9ad64eb11810d5b6393" +dependencies = [ + "rand_core 0.6.4", + "subtle", +] + [[package]] name = "fiat-crypto" version = "0.2.9" @@ -1078,6 +1151,7 @@ checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" dependencies = [ "typenum", "version_check", + "zeroize", ] [[package]] @@ -1137,6 +1211,17 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" +[[package]] +name = "group" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0f9ef7462f7c099f518d754361858f86d8a07af53ba9af0fe635bbccb151a63" +dependencies = [ + "ff", + "rand_core 0.6.4", + "subtle", +] + [[package]] name = "h2" version = "0.4.13" @@ -1620,6 +1705,21 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "k256" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6e3919bbaa2945715f0bb6d3934a173d1e9a59ac23767fbaaef277265a7411b" +dependencies = [ + "cfg-if", + "ecdsa", + "elliptic-curve", + "once_cell", + "serdect", + "sha2", + "signature", +] + [[package]] name = "lazy_static" version = "1.5.0" @@ -2383,6 +2483,16 @@ dependencies = [ "web-sys", ] +[[package]] +name = "rfc6979" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dd2a808d456c4a54e300a23e9f5a67e122c3024119acbfd73e3bf664491cb2" +dependencies = [ + "hmac", + "subtle", +] + [[package]] name = "ring" version = "0.17.14" @@ -2561,6 +2671,21 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "sec1" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3e97a565f76233a6003f9f5c54be1d9c5bdfa3eccfb189469f11ec4901c47dc" +dependencies = [ + "base16ct", + "der", + "generic-array", + "pkcs8", + "serdect", + "subtle", + "zeroize", +] + [[package]] name = "security-framework" version = "3.7.0" @@ -2665,6 +2790,16 @@ dependencies = [ "serde", ] +[[package]] +name = "serdect" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a84f14a19e9a014bb9f4512488d9829a68e04ecabffb0f9904cd1ace94598177" +dependencies = [ + "base16ct", + "serde", +] + [[package]] name = "sha1" version = "0.10.6" @@ -2718,6 +2853,7 @@ version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de" dependencies = [ + "digest", "rand_core 0.6.4", ] @@ -2931,6 +3067,15 @@ version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca" +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + [[package]] name = "tinystr" version = "0.8.2" @@ -3350,6 +3495,18 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" +[[package]] +name = "uuid" +version = "1.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ac8b6f42ead25368cf5b098aeb3dc8a1a2c05a3eee8a9a1a68c640edbfc79d9" +dependencies = [ + "getrandom 0.4.2", + "js-sys", + "serde_core", + "wasm-bindgen", +] + [[package]] name = "valuable" version = "0.1.1" @@ -3389,7 +3546,28 @@ dependencies = [ [[package]] name = "warzone-protocol" -version = "0.1.0" +version = "0.0.38" +dependencies = [ + "base64", + "bincode", + "bip39", + "chacha20poly1305", + "chrono", + "curve25519-dalek", + "ed25519-dalek", + "hex", + "hkdf", + "k256", + "rand 0.8.5", + "serde", + "serde_json", + "sha2", + "thiserror 2.0.18", + "tiny-keccak", + "uuid", + "x25519-dalek", + "zeroize", +] [[package]] name = "wasi" @@ -4035,6 +4213,7 @@ dependencies = [ "bytes", "chrono", "cpal", + "libc", "rustls", "serde", "serde_json", diff --git a/Cargo.toml b/Cargo.toml index 31338d3..88cbba9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -54,6 +54,12 @@ wzp-crypto = { path = "crates/wzp-crypto" } wzp-transport = { path = "crates/wzp-transport" } wzp-client = { path = "crates/wzp-client" } +# Fast dev profile: optimized but with debug info and incremental compilation. +# Use with: cargo run --profile dev-fast +[profile.dev-fast] +inherits = "dev" +opt-level = 2 + # Optimize heavy compute deps even in debug builds — # real-time audio needs < 20ms per frame, impossible unoptimized. [profile.dev.package.nnnoiseless] diff --git a/crates/wzp-client/Cargo.toml b/crates/wzp-client/Cargo.toml index d31b6d4..a75589d 100644 --- a/crates/wzp-client/Cargo.toml +++ b/crates/wzp-client/Cargo.toml @@ -23,6 +23,7 @@ serde_json = "1" chrono = "0.4" rustls = { version = "0.23", default-features = false, features = ["ring", "std"] } cpal = { version = "0.15", optional = true } +libc = "0.2" [features] default = [] diff --git a/crates/wzp-client/src/call.rs b/crates/wzp-client/src/call.rs index 7250a12..4ebf2ee 100644 --- a/crates/wzp-client/src/call.rs +++ b/crates/wzp-client/src/call.rs @@ -241,7 +241,7 @@ impl CallEncoder { block_id: 0, frame_in_block: 0, timestamp_ms: 0, - aec: EchoCanceller::new(48000, 100), // 100 ms echo tail + aec: EchoCanceller::new(48000, 30), // 30ms echo tail (laptop/phone) agc: AutoGainControl::new(), silence_detector: SilenceDetector::new( config.silence_threshold_rms, diff --git a/crates/wzp-client/src/cli.rs b/crates/wzp-client/src/cli.rs index d7783db..be40cb2 100644 --- a/crates/wzp-client/src/cli.rs +++ b/crates/wzp-client/src/cli.rs @@ -375,7 +375,7 @@ async fn main() -> anyhow::Result<()> { { let audio_opts = AudioOpts { no_denoise: cli.no_denoise || cli.direct_playout, - no_aec: cli.no_aec, + no_aec: cli.no_aec || cli.direct_playout, // AEC disabled by default — macOS has built-in AEC no_agc: cli.no_agc, no_fec: cli.no_fec, no_silence: cli.no_silence || cli.direct_playout, @@ -642,6 +642,41 @@ async fn run_file_mode( /// QUIC → recv task → jitter buffer → decode tick (20ms) → AudioRing → CPAL playback callback /// /// All lock-free: CPAL callbacks use atomic ring buffers, no Mutex on the audio path. +/// RAII guard for terminal raw mode. Restores on drop. +struct RawModeGuard { + orig: libc::termios, +} + +impl RawModeGuard { + fn enter() -> Option { + unsafe { + let mut orig: libc::termios = std::mem::zeroed(); + if libc::tcgetattr(libc::STDIN_FILENO, &mut orig) != 0 { + return None; + } + let mut raw = orig; + // ICANON: character-at-a-time input + // ECHO: don't echo typed characters + // ISIG: let us handle Ctrl+C as a byte + raw.c_lflag &= !(libc::ICANON | libc::ECHO | libc::ISIG); + // IXON: disable Ctrl+S/Ctrl+Q flow control so we receive them + raw.c_iflag &= !libc::IXON; + raw.c_cc[libc::VMIN] = 1; + raw.c_cc[libc::VTIME] = 0; + libc::tcsetattr(libc::STDIN_FILENO, libc::TCSANOW, &raw); + Some(Self { orig }) + } + } +} + +impl Drop for RawModeGuard { + fn drop(&mut self) { + unsafe { + libc::tcsetattr(libc::STDIN_FILENO, libc::TCSANOW, &self.orig); + } + } +} + struct AudioOpts { no_denoise: bool, no_aec: bool, @@ -674,6 +709,8 @@ async fn run_live( let farend_ring = StdArc::new(AudioRing::new()); let running = StdArc::new(AtomicBool::new(true)); + let mic_muted = StdArc::new(AtomicBool::new(false)); + let spk_muted = StdArc::new(AtomicBool::new(false)); // --- Signal handler: set running=false on first Ctrl+C, force-quit on second --- let signal_running = running.clone(); @@ -709,6 +746,7 @@ async fn run_live( // --- Send task: poll capture ring → encode → send via async --- let send_transport = transport.clone(); let send_running = running.clone(); + let send_mic_muted = mic_muted.clone(); let no_aec = opts.no_aec; let no_agc = opts.no_agc; let _no_fec = opts.no_fec; @@ -745,6 +783,11 @@ async fn run_live( continue; } + // Mic mute: zero out capture buffer (still encode + send silence to keep stream alive) + if send_mic_muted.load(Ordering::Relaxed) { + capture_buf.fill(0); + } + // Feed AEC far-end reference: what was played through the speaker. // Must be called BEFORE encode_frame processes the mic signal. if !no_aec { @@ -781,6 +824,7 @@ async fn run_live( // --- Recv + playout --- let recv_transport = transport.clone(); let recv_running = running.clone(); + let recv_spk_muted = spk_muted.clone(); let direct_playout = opts.direct_playout; // Direct playout: decode on recv, write straight to playout ring (like Android). @@ -826,13 +870,18 @@ async fn run_live( if !no_agc { playout_agc.process_frame(&mut pcm_buf[..n]); } - playout_ring.write(&pcm_buf[..n]); - // Feed far-end ring for AEC + // Always feed AEC (even when speaker muted) farend_ring.write(&pcm_buf[..n]); + // Speaker mute: don't write to playout ring + if !recv_spk_muted.load(Ordering::Relaxed) { + playout_ring.write(&pcm_buf[..n]); + } } Err(e) => { if let Ok(n) = dec.decode_lost(&mut pcm_buf) { - playout_ring.write(&pcm_buf[..n]); + if !recv_spk_muted.load(Ordering::Relaxed) { + playout_ring.write(&pcm_buf[..n]); + } } if packets_received < 10 { warn!("decode error: {e}"); @@ -924,9 +973,15 @@ async fn run_live( ) .await; match result { - Ok(Ok(Some(wzp_proto::SignalMessage::RoomUpdate { count, participants }))) => { - info!(count, "room update"); - for p in &participants { + Ok(Ok(Some(wzp_proto::SignalMessage::RoomUpdate { participants, .. }))) => { + // Dedup by (fingerprint, alias) — same peer may appear multiple times + let mut seen = std::collections::HashSet::new(); + let unique: Vec<_> = participants + .iter() + .filter(|p| seen.insert((&p.fingerprint, &p.alias))) + .collect(); + info!(count = unique.len(), "room update"); + for p in &unique { let name = p .alias .as_deref() @@ -955,12 +1010,68 @@ async fn run_live( } }; + // --- Keyboard task: Ctrl+M = toggle mic mute, Ctrl+S = toggle speaker mute --- + let kb_running = running.clone(); + let kb_mic = mic_muted.clone(); + let kb_spk = spk_muted.clone(); + let keyboard_task = async move { + use tokio::io::AsyncReadExt; + + // Put terminal in raw mode so we get individual keypresses + let _raw_guard = RawModeGuard::enter(); + + let mut stdin = tokio::io::stdin(); + let mut buf = [0u8; 1]; + loop { + if !kb_running.load(Ordering::Relaxed) { + break; + } + match tokio::time::timeout( + std::time::Duration::from_millis(200), + stdin.read(&mut buf), + ) + .await + { + Ok(Ok(1)) => match buf[0] { + b'm' | b'M' | 0x0D => { + // 'm' or Ctrl+M + let was = kb_mic.fetch_xor(true, Ordering::SeqCst); + let state = if !was { "MUTED" } else { "unmuted" }; + eprintln!("\r[mic {state}]"); + } + b's' | b'S' | 0x13 => { + // 's' or Ctrl+S + let was = kb_spk.fetch_xor(true, Ordering::SeqCst); + let state = if !was { "MUTED" } else { "unmuted" }; + eprintln!("\r[speaker {state}]"); + } + 0x03 => { + // Ctrl+C + eprintln!(); + info!("Ctrl+C received, shutting down..."); + kb_running.store(false, Ordering::SeqCst); + break; + } + b'q' | b'Q' => { + eprintln!("\r[quit]"); + kb_running.store(false, Ordering::SeqCst); + break; + } + _ => {} + }, + Ok(Ok(_)) | Ok(Err(_)) => break, + Err(_) => {} // timeout + } + } + }; + // --- Run all tasks, exit when any finishes (or running flag cleared by Ctrl+C) --- tokio::select! { _ = send_task => info!("send task ended"), _ = recv_task => info!("recv task ended"), _ = playout_task => info!("playout task ended"), _ = signal_task => info!("signal task ended"), + _ = keyboard_task => info!("keyboard task ended"), } running.store(false, Ordering::SeqCst); diff --git a/crates/wzp-codec/src/aec.rs b/crates/wzp-codec/src/aec.rs index 79e510c..e3ec35a 100644 --- a/crates/wzp-codec/src/aec.rs +++ b/crates/wzp-codec/src/aec.rs @@ -1,25 +1,40 @@ //! Acoustic Echo Cancellation using NLMS adaptive filter. -//! Processes 480-sample (10ms) sub-frames at 48kHz. +//! +//! Improvements over naive NLMS: +//! - Double-talk detection: freezes adaptation when near-end speech dominates, +//! preventing the filter from cancelling the local speaker's voice. +//! - Short default tail (30ms) tuned for laptops/phones where speaker and mic +//! are close together. +//! - Residual suppression: attenuates output when echo estimate is confident. -/// NLMS (Normalized Least Mean Squares) adaptive filter echo canceller. -/// -/// Removes acoustic echo by modelling the echo path between the far-end -/// (speaker) signal and the near-end (microphone) signal, then subtracting -/// the estimated echo from the near-end in real time. +/// NLMS (Normalized Least Mean Squares) adaptive filter echo canceller +/// with double-talk detection. pub struct EchoCanceller { filter_coeffs: Vec, filter_len: usize, far_end_buf: Vec, far_end_pos: usize, + /// NLMS step size (adaptation rate). mu: f32, enabled: bool, + /// Running far-end power estimate (for double-talk detection). + far_power_avg: f32, + /// Running near-end power estimate (for double-talk detection). + near_power_avg: f32, + /// Smoothing factor for power estimates. + power_alpha: f32, + /// Double-talk threshold: if near/far power ratio exceeds this, + /// freeze adaptation to protect near-end speech. + dt_threshold: f32, + /// Residual echo suppression factor (0.0 = none, 1.0 = full). + suppress: f32, } impl EchoCanceller { /// Create a new echo canceller. /// /// * `sample_rate` — typically 48000 - /// * `filter_ms` — echo-tail length in milliseconds (e.g. 100 for 100 ms) + /// * `filter_ms` — echo-tail length in milliseconds (30ms recommended for laptops) pub fn new(sample_rate: u32, filter_ms: u32) -> Self { let filter_len = (sample_rate as usize) * (filter_ms as usize) / 1000; Self { @@ -27,8 +42,13 @@ impl EchoCanceller { filter_len, far_end_buf: vec![0.0f32; filter_len], far_end_pos: 0, - mu: 0.01, + mu: 0.005, enabled: true, + far_power_avg: 0.0, + near_power_avg: 0.0, + power_alpha: 0.01, + dt_threshold: 4.0, + suppress: 0.6, } } @@ -45,9 +65,7 @@ impl EchoCanceller { /// Process a near-end (microphone) frame, removing the estimated echo. /// - /// Returns the echo-return-loss enhancement (ERLE) as a ratio: the RMS of - /// the original near-end divided by the RMS of the residual. Values > 1.0 - /// mean echo was reduced. + /// Returns the echo-return-loss enhancement (ERLE) as a ratio. pub fn process_frame(&mut self, nearend: &mut [i16]) -> f32 { if !self.enabled { return 1.0; @@ -56,34 +74,44 @@ impl EchoCanceller { let n = nearend.len(); let fl = self.filter_len; + // Compute frame-level power for double-talk detection. + let near_power: f32 = nearend.iter().map(|&s| { + let f = s as f32; + f * f + }).sum::() / n as f32; + + let far_start = (self.far_end_pos + fl * ((n / fl) + 1) - n) % fl; + let far_power: f32 = (0..n).map(|i| { + let fe = self.far_end_buf[(far_start + i) % fl]; + fe * fe + }).sum::() / n as f32; + + // Smooth power estimates + self.far_power_avg += self.power_alpha * (far_power - self.far_power_avg); + self.near_power_avg += self.power_alpha * (near_power - self.near_power_avg); + + // Double-talk detection: if near-end is much louder than far-end, + // the local speaker is active — freeze adaptation. + let adapt = if self.far_power_avg < 1.0 { + // No far-end signal — nothing to cancel, skip adaptation + false + } else { + let ratio = self.near_power_avg / (self.far_power_avg + 1.0); + ratio < self.dt_threshold + }; + let mut sum_near_sq: f64 = 0.0; let mut sum_err_sq: f64 = 0.0; for i in 0..n { let near_f = nearend[i] as f32; - // --- estimate echo as dot(coeffs, farend_window) --- - // The far-end window for this sample starts at - // (far_end_pos - 1 - i) mod filter_len (most recent) - // and goes back filter_len samples. + // Estimate echo: dot(coeffs, farend_window) + let base = (self.far_end_pos + fl * ((n / fl) + 2) + i - n) % fl; + let mut echo_est: f32 = 0.0; let mut power: f32 = 0.0; - // Position of the most-recent far-end sample for this near-end sample. - // far_end_pos points to the *next write* position, so the most-recent - // sample written is at far_end_pos - 1. We have already called - // feed_farend for this block, so the relevant samples are the last - // filter_len entries ending just before the current write position, - // offset by how far we are into this near-end frame. - // - // For sample i of the near-end frame, the corresponding far-end - // "now" is far_end_pos - n + i (wrapping). - // far_end_pos points to next-write, so most recent sample is at - // far_end_pos - 1. For the i-th near-end sample we want the - // far-end "now" to be at (far_end_pos - n + i). We add fl - // repeatedly to avoid underflow on the usize subtraction. - let base = (self.far_end_pos + fl * ((n / fl) + 2) + i - n) % fl; - for k in 0..fl { let fe_idx = (base + fl - k) % fl; let fe = self.far_end_buf[fe_idx]; @@ -93,27 +121,40 @@ impl EchoCanceller { let error = near_f - echo_est; - // --- NLMS coefficient update --- - let norm = power + 1.0; // +1 regularisation to avoid div-by-zero - let step = self.mu * error / norm; + // NLMS coefficient update — only when not in double-talk + if adapt && power > 1.0 { + let norm = power + 1.0; + let step = self.mu * error / norm; - for k in 0..fl { - let fe_idx = (base + fl - k) % fl; - let fe = self.far_end_buf[fe_idx]; - self.filter_coeffs[k] += step * fe; + for k in 0..fl { + let fe_idx = (base + fl - k) % fl; + let fe = self.far_end_buf[fe_idx]; + self.filter_coeffs[k] += step * fe; + } } - // Clamp output - let out = error.max(-32768.0).min(32767.0); + // Residual echo suppression: when far-end is active, attenuate + // the residual to reduce perceptible echo. + let out = if self.far_power_avg > 100.0 && !adapt { + // Double-talk: pass through near-end with minimal suppression + error + } else if self.far_power_avg > 100.0 { + // Far-end active, not double-talk: apply suppression + error * (1.0 - self.suppress * (echo_est.abs() / (near_f.abs() + 1.0)).min(1.0)) + } else { + // No far-end: pass through + error + }; + + let out = out.max(-32768.0).min(32767.0); nearend[i] = out as i16; sum_near_sq += (near_f as f64) * (near_f as f64); sum_err_sq += (out as f64) * (out as f64); } - // ERLE ratio if sum_err_sq < 1.0 { - return 100.0; // near-perfect cancellation + return 100.0; } (sum_near_sq / sum_err_sq).sqrt() as f32 } @@ -129,12 +170,12 @@ impl EchoCanceller { } /// Reset the adaptive filter to its initial state. - /// - /// Zeroes out all filter coefficients and the far-end circular buffer. pub fn reset(&mut self) { self.filter_coeffs.iter_mut().for_each(|c| *c = 0.0); self.far_end_buf.iter_mut().for_each(|s| *s = 0.0); self.far_end_pos = 0; + self.far_power_avg = 0.0; + self.near_power_avg = 0.0; } } @@ -144,15 +185,15 @@ mod tests { #[test] fn aec_creates_with_correct_filter_len() { - let aec = EchoCanceller::new(48000, 100); - assert_eq!(aec.filter_len, 4800); - assert_eq!(aec.filter_coeffs.len(), 4800); - assert_eq!(aec.far_end_buf.len(), 4800); + let aec = EchoCanceller::new(48000, 30); + assert_eq!(aec.filter_len, 1440); + assert_eq!(aec.filter_coeffs.len(), 1440); + assert_eq!(aec.far_end_buf.len(), 1440); } #[test] fn aec_passthrough_when_disabled() { - let mut aec = EchoCanceller::new(48000, 100); + let mut aec = EchoCanceller::new(48000, 30); aec.set_enabled(false); assert!(!aec.is_enabled()); @@ -165,7 +206,7 @@ mod tests { #[test] fn aec_reset_zeroes_state() { - let mut aec = EchoCanceller::new(48000, 10); // short for test speed + let mut aec = EchoCanceller::new(48000, 10); let farend: Vec = (0..480).map(|i| ((i * 37) % 1000) as i16).collect(); aec.feed_farend(&farend); @@ -178,13 +219,9 @@ mod tests { #[test] fn aec_reduces_echo_of_known_signal() { - // Use a small filter for speed. Feed a known far-end signal, then - // present the *same* signal as near-end (perfect echo, no room). - // After adaptation the output energy should drop. - let filter_ms = 5; // 240 taps at 48 kHz + let filter_ms = 5; let mut aec = EchoCanceller::new(48000, filter_ms); - // Generate a simple repeating pattern. let frame_len = 480usize; let make_frame = |offset: usize| -> Vec { (0..frame_len) @@ -195,18 +232,15 @@ mod tests { .collect() }; - // Warm up the adaptive filter with several frames. let mut last_erle = 1.0f32; for frame_idx in 0..40 { let farend = make_frame(frame_idx * frame_len); aec.feed_farend(&farend); - // Near-end = exact copy of far-end (pure echo). let mut nearend = farend.clone(); last_erle = aec.process_frame(&mut nearend); } - // After 40 frames the ERLE should be meaningfully > 1. assert!( last_erle > 1.0, "expected ERLE > 1.0 after adaptation, got {last_erle}" @@ -216,13 +250,41 @@ mod tests { #[test] fn aec_silence_passthrough() { let mut aec = EchoCanceller::new(48000, 10); - // Feed silence far-end aec.feed_farend(&vec![0i16; 480]); - // Near-end is silence too let mut frame = vec![0i16; 480]; let erle = aec.process_frame(&mut frame); assert!(erle >= 1.0); - // Output should still be silence assert!(frame.iter().all(|&s| s == 0)); } + + #[test] + fn aec_preserves_nearend_during_doubletalk() { + // When only near-end is active (no far-end), output should + // closely match input — the AEC should not suppress speech. + let mut aec = EchoCanceller::new(48000, 30); + + let frame_len = 960; + let nearend_signal: Vec = (0..frame_len) + .map(|i| { + let t = i as f64 / 48000.0; + (10000.0 * (2.0 * std::f64::consts::PI * 440.0 * t).sin()) as i16 + }) + .collect(); + + // Feed silence as far-end + aec.feed_farend(&vec![0i16; frame_len]); + + let mut frame = nearend_signal.clone(); + aec.process_frame(&mut frame); + + // Output energy should be close to input energy (not suppressed) + let input_energy: f64 = nearend_signal.iter().map(|&s| (s as f64).powi(2)).sum(); + let output_energy: f64 = frame.iter().map(|&s| (s as f64).powi(2)).sum(); + let ratio = output_energy / input_energy; + + assert!( + ratio > 0.8, + "near-end speech should be preserved, energy ratio = {ratio:.3}" + ); + } }