feat: improved AEC, keyboard shortcuts, dedup participants, dev-fast profile
Some checks failed
Build Release Binaries / build-amd64 (push) Failing after 3m40s

AEC improvements:
- Reduce echo tail from 100ms to 30ms (3.3x faster, suited for laptops)
- Add double-talk detection: freeze adaptation when near-end speaks
- Add residual echo suppression
- Disable AEC by default in --android mode (macOS has built-in AEC)

CLI features:
- Keyboard shortcuts: m=mic mute, s=speaker mute, q=quit (raw terminal mode)
- Dedup participants in RoomUpdate display (same fingerprint+alias shown once)
- Add dev-fast profile (opt-level 2 with incremental compilation)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Siavash Sameni
2026-04-06 10:15:23 +04:00
parent cfb48df1ef
commit 1b00b5e2a4
6 changed files with 428 additions and 69 deletions

181
Cargo.lock generated
View File

@@ -291,6 +291,12 @@ dependencies = [
"tower-service", "tower-service",
] ]
[[package]]
name = "base16ct"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c7f02d4ea65f2c1853089ffd8d2787bdbc63de2f0d29dedbcf8ccdfa0ccd4cf"
[[package]] [[package]]
name = "base64" name = "base64"
version = "0.22.1" version = "0.22.1"
@@ -461,6 +467,7 @@ dependencies = [
"iana-time-zone", "iana-time-zone",
"js-sys", "js-sys",
"num-traits", "num-traits",
"serde",
"wasm-bindgen", "wasm-bindgen",
"windows-link", "windows-link",
] ]
@@ -621,6 +628,24 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "crunchy"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5"
[[package]]
name = "crypto-bigint"
version = "0.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0dc92fb57ca44df6db8059111ab3af99a63d5d0f8375d9972e319a379c6bab76"
dependencies = [
"generic-array",
"rand_core 0.6.4",
"subtle",
"zeroize",
]
[[package]] [[package]]
name = "crypto-common" name = "crypto-common"
version = "0.1.7" version = "0.1.7"
@@ -644,6 +669,7 @@ dependencies = [
"digest", "digest",
"fiat-crypto", "fiat-crypto",
"rustc_version", "rustc_version",
"serde",
"subtle", "subtle",
"zeroize", "zeroize",
] ]
@@ -810,6 +836,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
dependencies = [ dependencies = [
"block-buffer", "block-buffer",
"const-oid",
"crypto-common", "crypto-common",
"subtle", "subtle",
] ]
@@ -844,6 +871,21 @@ dependencies = [
"rustfft", "rustfft",
] ]
[[package]]
name = "ecdsa"
version = "0.16.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ee27f32b5c5292967d2d4a9d7f1e0b0aed2c15daded5a60300e4abb9d8020bca"
dependencies = [
"der",
"digest",
"elliptic-curve",
"rfc6979",
"serdect",
"signature",
"spki",
]
[[package]] [[package]]
name = "ed25519" name = "ed25519"
version = "2.2.3" version = "2.2.3"
@@ -851,6 +893,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "115531babc129696a58c64a4fef0a8bf9e9698629fb97e9e40767d235cfbcd53" checksum = "115531babc129696a58c64a4fef0a8bf9e9698629fb97e9e40767d235cfbcd53"
dependencies = [ dependencies = [
"pkcs8", "pkcs8",
"serde",
"signature", "signature",
] ]
@@ -875,6 +918,26 @@ version = "1.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
[[package]]
name = "elliptic-curve"
version = "0.13.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5e6043086bf7973472e0c7dff2142ea0b680d30e18d9cc40f267efbf222bd47"
dependencies = [
"base16ct",
"crypto-bigint",
"digest",
"ff",
"generic-array",
"group",
"pkcs8",
"rand_core 0.6.4",
"sec1",
"serdect",
"subtle",
"zeroize",
]
[[package]] [[package]]
name = "encoding_rs" name = "encoding_rs"
version = "0.8.35" version = "0.8.35"
@@ -918,6 +981,16 @@ version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
[[package]]
name = "ff"
version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0b50bfb653653f9ca9095b427bed08ab8d75a137839d9ad64eb11810d5b6393"
dependencies = [
"rand_core 0.6.4",
"subtle",
]
[[package]] [[package]]
name = "fiat-crypto" name = "fiat-crypto"
version = "0.2.9" version = "0.2.9"
@@ -1078,6 +1151,7 @@ checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
dependencies = [ dependencies = [
"typenum", "typenum",
"version_check", "version_check",
"zeroize",
] ]
[[package]] [[package]]
@@ -1137,6 +1211,17 @@ version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
[[package]]
name = "group"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0f9ef7462f7c099f518d754361858f86d8a07af53ba9af0fe635bbccb151a63"
dependencies = [
"ff",
"rand_core 0.6.4",
"subtle",
]
[[package]] [[package]]
name = "h2" name = "h2"
version = "0.4.13" version = "0.4.13"
@@ -1620,6 +1705,21 @@ dependencies = [
"wasm-bindgen", "wasm-bindgen",
] ]
[[package]]
name = "k256"
version = "0.13.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f6e3919bbaa2945715f0bb6d3934a173d1e9a59ac23767fbaaef277265a7411b"
dependencies = [
"cfg-if",
"ecdsa",
"elliptic-curve",
"once_cell",
"serdect",
"sha2",
"signature",
]
[[package]] [[package]]
name = "lazy_static" name = "lazy_static"
version = "1.5.0" version = "1.5.0"
@@ -2383,6 +2483,16 @@ dependencies = [
"web-sys", "web-sys",
] ]
[[package]]
name = "rfc6979"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8dd2a808d456c4a54e300a23e9f5a67e122c3024119acbfd73e3bf664491cb2"
dependencies = [
"hmac",
"subtle",
]
[[package]] [[package]]
name = "ring" name = "ring"
version = "0.17.14" version = "0.17.14"
@@ -2561,6 +2671,21 @@ version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "sec1"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3e97a565f76233a6003f9f5c54be1d9c5bdfa3eccfb189469f11ec4901c47dc"
dependencies = [
"base16ct",
"der",
"generic-array",
"pkcs8",
"serdect",
"subtle",
"zeroize",
]
[[package]] [[package]]
name = "security-framework" name = "security-framework"
version = "3.7.0" version = "3.7.0"
@@ -2665,6 +2790,16 @@ dependencies = [
"serde", "serde",
] ]
[[package]]
name = "serdect"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a84f14a19e9a014bb9f4512488d9829a68e04ecabffb0f9904cd1ace94598177"
dependencies = [
"base16ct",
"serde",
]
[[package]] [[package]]
name = "sha1" name = "sha1"
version = "0.10.6" version = "0.10.6"
@@ -2718,6 +2853,7 @@ version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de" checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de"
dependencies = [ dependencies = [
"digest",
"rand_core 0.6.4", "rand_core 0.6.4",
] ]
@@ -2931,6 +3067,15 @@ version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca" checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca"
[[package]]
name = "tiny-keccak"
version = "2.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237"
dependencies = [
"crunchy",
]
[[package]] [[package]]
name = "tinystr" name = "tinystr"
version = "0.8.2" version = "0.8.2"
@@ -3350,6 +3495,18 @@ version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
[[package]]
name = "uuid"
version = "1.23.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ac8b6f42ead25368cf5b098aeb3dc8a1a2c05a3eee8a9a1a68c640edbfc79d9"
dependencies = [
"getrandom 0.4.2",
"js-sys",
"serde_core",
"wasm-bindgen",
]
[[package]] [[package]]
name = "valuable" name = "valuable"
version = "0.1.1" version = "0.1.1"
@@ -3389,7 +3546,28 @@ dependencies = [
[[package]] [[package]]
name = "warzone-protocol" name = "warzone-protocol"
version = "0.1.0" version = "0.0.38"
dependencies = [
"base64",
"bincode",
"bip39",
"chacha20poly1305",
"chrono",
"curve25519-dalek",
"ed25519-dalek",
"hex",
"hkdf",
"k256",
"rand 0.8.5",
"serde",
"serde_json",
"sha2",
"thiserror 2.0.18",
"tiny-keccak",
"uuid",
"x25519-dalek",
"zeroize",
]
[[package]] [[package]]
name = "wasi" name = "wasi"
@@ -4035,6 +4213,7 @@ dependencies = [
"bytes", "bytes",
"chrono", "chrono",
"cpal", "cpal",
"libc",
"rustls", "rustls",
"serde", "serde",
"serde_json", "serde_json",

View File

@@ -54,6 +54,12 @@ wzp-crypto = { path = "crates/wzp-crypto" }
wzp-transport = { path = "crates/wzp-transport" } wzp-transport = { path = "crates/wzp-transport" }
wzp-client = { path = "crates/wzp-client" } wzp-client = { path = "crates/wzp-client" }
# Fast dev profile: optimized but with debug info and incremental compilation.
# Use with: cargo run --profile dev-fast
[profile.dev-fast]
inherits = "dev"
opt-level = 2
# Optimize heavy compute deps even in debug builds — # Optimize heavy compute deps even in debug builds —
# real-time audio needs < 20ms per frame, impossible unoptimized. # real-time audio needs < 20ms per frame, impossible unoptimized.
[profile.dev.package.nnnoiseless] [profile.dev.package.nnnoiseless]

View File

@@ -23,6 +23,7 @@ serde_json = "1"
chrono = "0.4" chrono = "0.4"
rustls = { version = "0.23", default-features = false, features = ["ring", "std"] } rustls = { version = "0.23", default-features = false, features = ["ring", "std"] }
cpal = { version = "0.15", optional = true } cpal = { version = "0.15", optional = true }
libc = "0.2"
[features] [features]
default = [] default = []

View File

@@ -241,7 +241,7 @@ impl CallEncoder {
block_id: 0, block_id: 0,
frame_in_block: 0, frame_in_block: 0,
timestamp_ms: 0, timestamp_ms: 0,
aec: EchoCanceller::new(48000, 100), // 100 ms echo tail aec: EchoCanceller::new(48000, 30), // 30ms echo tail (laptop/phone)
agc: AutoGainControl::new(), agc: AutoGainControl::new(),
silence_detector: SilenceDetector::new( silence_detector: SilenceDetector::new(
config.silence_threshold_rms, config.silence_threshold_rms,

View File

@@ -375,7 +375,7 @@ async fn main() -> anyhow::Result<()> {
{ {
let audio_opts = AudioOpts { let audio_opts = AudioOpts {
no_denoise: cli.no_denoise || cli.direct_playout, no_denoise: cli.no_denoise || cli.direct_playout,
no_aec: cli.no_aec, no_aec: cli.no_aec || cli.direct_playout, // AEC disabled by default — macOS has built-in AEC
no_agc: cli.no_agc, no_agc: cli.no_agc,
no_fec: cli.no_fec, no_fec: cli.no_fec,
no_silence: cli.no_silence || cli.direct_playout, no_silence: cli.no_silence || cli.direct_playout,
@@ -642,6 +642,41 @@ async fn run_file_mode(
/// QUIC → recv task → jitter buffer → decode tick (20ms) → AudioRing → CPAL playback callback /// QUIC → recv task → jitter buffer → decode tick (20ms) → AudioRing → CPAL playback callback
/// ///
/// All lock-free: CPAL callbacks use atomic ring buffers, no Mutex on the audio path. /// All lock-free: CPAL callbacks use atomic ring buffers, no Mutex on the audio path.
/// RAII guard for terminal raw mode. Restores on drop.
struct RawModeGuard {
orig: libc::termios,
}
impl RawModeGuard {
fn enter() -> Option<Self> {
unsafe {
let mut orig: libc::termios = std::mem::zeroed();
if libc::tcgetattr(libc::STDIN_FILENO, &mut orig) != 0 {
return None;
}
let mut raw = orig;
// ICANON: character-at-a-time input
// ECHO: don't echo typed characters
// ISIG: let us handle Ctrl+C as a byte
raw.c_lflag &= !(libc::ICANON | libc::ECHO | libc::ISIG);
// IXON: disable Ctrl+S/Ctrl+Q flow control so we receive them
raw.c_iflag &= !libc::IXON;
raw.c_cc[libc::VMIN] = 1;
raw.c_cc[libc::VTIME] = 0;
libc::tcsetattr(libc::STDIN_FILENO, libc::TCSANOW, &raw);
Some(Self { orig })
}
}
}
impl Drop for RawModeGuard {
fn drop(&mut self) {
unsafe {
libc::tcsetattr(libc::STDIN_FILENO, libc::TCSANOW, &self.orig);
}
}
}
struct AudioOpts { struct AudioOpts {
no_denoise: bool, no_denoise: bool,
no_aec: bool, no_aec: bool,
@@ -674,6 +709,8 @@ async fn run_live(
let farend_ring = StdArc::new(AudioRing::new()); let farend_ring = StdArc::new(AudioRing::new());
let running = StdArc::new(AtomicBool::new(true)); let running = StdArc::new(AtomicBool::new(true));
let mic_muted = StdArc::new(AtomicBool::new(false));
let spk_muted = StdArc::new(AtomicBool::new(false));
// --- Signal handler: set running=false on first Ctrl+C, force-quit on second --- // --- Signal handler: set running=false on first Ctrl+C, force-quit on second ---
let signal_running = running.clone(); let signal_running = running.clone();
@@ -709,6 +746,7 @@ async fn run_live(
// --- Send task: poll capture ring → encode → send via async --- // --- Send task: poll capture ring → encode → send via async ---
let send_transport = transport.clone(); let send_transport = transport.clone();
let send_running = running.clone(); let send_running = running.clone();
let send_mic_muted = mic_muted.clone();
let no_aec = opts.no_aec; let no_aec = opts.no_aec;
let no_agc = opts.no_agc; let no_agc = opts.no_agc;
let _no_fec = opts.no_fec; let _no_fec = opts.no_fec;
@@ -745,6 +783,11 @@ async fn run_live(
continue; continue;
} }
// Mic mute: zero out capture buffer (still encode + send silence to keep stream alive)
if send_mic_muted.load(Ordering::Relaxed) {
capture_buf.fill(0);
}
// Feed AEC far-end reference: what was played through the speaker. // Feed AEC far-end reference: what was played through the speaker.
// Must be called BEFORE encode_frame processes the mic signal. // Must be called BEFORE encode_frame processes the mic signal.
if !no_aec { if !no_aec {
@@ -781,6 +824,7 @@ async fn run_live(
// --- Recv + playout --- // --- Recv + playout ---
let recv_transport = transport.clone(); let recv_transport = transport.clone();
let recv_running = running.clone(); let recv_running = running.clone();
let recv_spk_muted = spk_muted.clone();
let direct_playout = opts.direct_playout; let direct_playout = opts.direct_playout;
// Direct playout: decode on recv, write straight to playout ring (like Android). // Direct playout: decode on recv, write straight to playout ring (like Android).
@@ -826,14 +870,19 @@ async fn run_live(
if !no_agc { if !no_agc {
playout_agc.process_frame(&mut pcm_buf[..n]); playout_agc.process_frame(&mut pcm_buf[..n]);
} }
playout_ring.write(&pcm_buf[..n]); // Always feed AEC (even when speaker muted)
// Feed far-end ring for AEC
farend_ring.write(&pcm_buf[..n]); farend_ring.write(&pcm_buf[..n]);
// Speaker mute: don't write to playout ring
if !recv_spk_muted.load(Ordering::Relaxed) {
playout_ring.write(&pcm_buf[..n]);
}
} }
Err(e) => { Err(e) => {
if let Ok(n) = dec.decode_lost(&mut pcm_buf) { if let Ok(n) = dec.decode_lost(&mut pcm_buf) {
if !recv_spk_muted.load(Ordering::Relaxed) {
playout_ring.write(&pcm_buf[..n]); playout_ring.write(&pcm_buf[..n]);
} }
}
if packets_received < 10 { if packets_received < 10 {
warn!("decode error: {e}"); warn!("decode error: {e}");
} }
@@ -924,9 +973,15 @@ async fn run_live(
) )
.await; .await;
match result { match result {
Ok(Ok(Some(wzp_proto::SignalMessage::RoomUpdate { count, participants }))) => { Ok(Ok(Some(wzp_proto::SignalMessage::RoomUpdate { participants, .. }))) => {
info!(count, "room update"); // Dedup by (fingerprint, alias) — same peer may appear multiple times
for p in &participants { let mut seen = std::collections::HashSet::new();
let unique: Vec<_> = participants
.iter()
.filter(|p| seen.insert((&p.fingerprint, &p.alias)))
.collect();
info!(count = unique.len(), "room update");
for p in &unique {
let name = p let name = p
.alias .alias
.as_deref() .as_deref()
@@ -955,12 +1010,68 @@ async fn run_live(
} }
}; };
// --- Keyboard task: Ctrl+M = toggle mic mute, Ctrl+S = toggle speaker mute ---
let kb_running = running.clone();
let kb_mic = mic_muted.clone();
let kb_spk = spk_muted.clone();
let keyboard_task = async move {
use tokio::io::AsyncReadExt;
// Put terminal in raw mode so we get individual keypresses
let _raw_guard = RawModeGuard::enter();
let mut stdin = tokio::io::stdin();
let mut buf = [0u8; 1];
loop {
if !kb_running.load(Ordering::Relaxed) {
break;
}
match tokio::time::timeout(
std::time::Duration::from_millis(200),
stdin.read(&mut buf),
)
.await
{
Ok(Ok(1)) => match buf[0] {
b'm' | b'M' | 0x0D => {
// 'm' or Ctrl+M
let was = kb_mic.fetch_xor(true, Ordering::SeqCst);
let state = if !was { "MUTED" } else { "unmuted" };
eprintln!("\r[mic {state}]");
}
b's' | b'S' | 0x13 => {
// 's' or Ctrl+S
let was = kb_spk.fetch_xor(true, Ordering::SeqCst);
let state = if !was { "MUTED" } else { "unmuted" };
eprintln!("\r[speaker {state}]");
}
0x03 => {
// Ctrl+C
eprintln!();
info!("Ctrl+C received, shutting down...");
kb_running.store(false, Ordering::SeqCst);
break;
}
b'q' | b'Q' => {
eprintln!("\r[quit]");
kb_running.store(false, Ordering::SeqCst);
break;
}
_ => {}
},
Ok(Ok(_)) | Ok(Err(_)) => break,
Err(_) => {} // timeout
}
}
};
// --- Run all tasks, exit when any finishes (or running flag cleared by Ctrl+C) --- // --- Run all tasks, exit when any finishes (or running flag cleared by Ctrl+C) ---
tokio::select! { tokio::select! {
_ = send_task => info!("send task ended"), _ = send_task => info!("send task ended"),
_ = recv_task => info!("recv task ended"), _ = recv_task => info!("recv task ended"),
_ = playout_task => info!("playout task ended"), _ = playout_task => info!("playout task ended"),
_ = signal_task => info!("signal task ended"), _ = signal_task => info!("signal task ended"),
_ = keyboard_task => info!("keyboard task ended"),
} }
running.store(false, Ordering::SeqCst); running.store(false, Ordering::SeqCst);

View File

@@ -1,25 +1,40 @@
//! Acoustic Echo Cancellation using NLMS adaptive filter. //! Acoustic Echo Cancellation using NLMS adaptive filter.
//! Processes 480-sample (10ms) sub-frames at 48kHz. //!
//! Improvements over naive NLMS:
//! - Double-talk detection: freezes adaptation when near-end speech dominates,
//! preventing the filter from cancelling the local speaker's voice.
//! - Short default tail (30ms) tuned for laptops/phones where speaker and mic
//! are close together.
//! - Residual suppression: attenuates output when echo estimate is confident.
/// NLMS (Normalized Least Mean Squares) adaptive filter echo canceller. /// NLMS (Normalized Least Mean Squares) adaptive filter echo canceller
/// /// with double-talk detection.
/// Removes acoustic echo by modelling the echo path between the far-end
/// (speaker) signal and the near-end (microphone) signal, then subtracting
/// the estimated echo from the near-end in real time.
pub struct EchoCanceller { pub struct EchoCanceller {
filter_coeffs: Vec<f32>, filter_coeffs: Vec<f32>,
filter_len: usize, filter_len: usize,
far_end_buf: Vec<f32>, far_end_buf: Vec<f32>,
far_end_pos: usize, far_end_pos: usize,
/// NLMS step size (adaptation rate).
mu: f32, mu: f32,
enabled: bool, enabled: bool,
/// Running far-end power estimate (for double-talk detection).
far_power_avg: f32,
/// Running near-end power estimate (for double-talk detection).
near_power_avg: f32,
/// Smoothing factor for power estimates.
power_alpha: f32,
/// Double-talk threshold: if near/far power ratio exceeds this,
/// freeze adaptation to protect near-end speech.
dt_threshold: f32,
/// Residual echo suppression factor (0.0 = none, 1.0 = full).
suppress: f32,
} }
impl EchoCanceller { impl EchoCanceller {
/// Create a new echo canceller. /// Create a new echo canceller.
/// ///
/// * `sample_rate` — typically 48000 /// * `sample_rate` — typically 48000
/// * `filter_ms` — echo-tail length in milliseconds (e.g. 100 for 100 ms) /// * `filter_ms` — echo-tail length in milliseconds (30ms recommended for laptops)
pub fn new(sample_rate: u32, filter_ms: u32) -> Self { pub fn new(sample_rate: u32, filter_ms: u32) -> Self {
let filter_len = (sample_rate as usize) * (filter_ms as usize) / 1000; let filter_len = (sample_rate as usize) * (filter_ms as usize) / 1000;
Self { Self {
@@ -27,8 +42,13 @@ impl EchoCanceller {
filter_len, filter_len,
far_end_buf: vec![0.0f32; filter_len], far_end_buf: vec![0.0f32; filter_len],
far_end_pos: 0, far_end_pos: 0,
mu: 0.01, mu: 0.005,
enabled: true, enabled: true,
far_power_avg: 0.0,
near_power_avg: 0.0,
power_alpha: 0.01,
dt_threshold: 4.0,
suppress: 0.6,
} }
} }
@@ -45,9 +65,7 @@ impl EchoCanceller {
/// Process a near-end (microphone) frame, removing the estimated echo. /// Process a near-end (microphone) frame, removing the estimated echo.
/// ///
/// Returns the echo-return-loss enhancement (ERLE) as a ratio: the RMS of /// Returns the echo-return-loss enhancement (ERLE) as a ratio.
/// the original near-end divided by the RMS of the residual. Values > 1.0
/// mean echo was reduced.
pub fn process_frame(&mut self, nearend: &mut [i16]) -> f32 { pub fn process_frame(&mut self, nearend: &mut [i16]) -> f32 {
if !self.enabled { if !self.enabled {
return 1.0; return 1.0;
@@ -56,34 +74,44 @@ impl EchoCanceller {
let n = nearend.len(); let n = nearend.len();
let fl = self.filter_len; let fl = self.filter_len;
// Compute frame-level power for double-talk detection.
let near_power: f32 = nearend.iter().map(|&s| {
let f = s as f32;
f * f
}).sum::<f32>() / n as f32;
let far_start = (self.far_end_pos + fl * ((n / fl) + 1) - n) % fl;
let far_power: f32 = (0..n).map(|i| {
let fe = self.far_end_buf[(far_start + i) % fl];
fe * fe
}).sum::<f32>() / n as f32;
// Smooth power estimates
self.far_power_avg += self.power_alpha * (far_power - self.far_power_avg);
self.near_power_avg += self.power_alpha * (near_power - self.near_power_avg);
// Double-talk detection: if near-end is much louder than far-end,
// the local speaker is active — freeze adaptation.
let adapt = if self.far_power_avg < 1.0 {
// No far-end signal — nothing to cancel, skip adaptation
false
} else {
let ratio = self.near_power_avg / (self.far_power_avg + 1.0);
ratio < self.dt_threshold
};
let mut sum_near_sq: f64 = 0.0; let mut sum_near_sq: f64 = 0.0;
let mut sum_err_sq: f64 = 0.0; let mut sum_err_sq: f64 = 0.0;
for i in 0..n { for i in 0..n {
let near_f = nearend[i] as f32; let near_f = nearend[i] as f32;
// --- estimate echo as dot(coeffs, farend_window) --- // Estimate echo: dot(coeffs, farend_window)
// The far-end window for this sample starts at let base = (self.far_end_pos + fl * ((n / fl) + 2) + i - n) % fl;
// (far_end_pos - 1 - i) mod filter_len (most recent)
// and goes back filter_len samples.
let mut echo_est: f32 = 0.0; let mut echo_est: f32 = 0.0;
let mut power: f32 = 0.0; let mut power: f32 = 0.0;
// Position of the most-recent far-end sample for this near-end sample.
// far_end_pos points to the *next write* position, so the most-recent
// sample written is at far_end_pos - 1. We have already called
// feed_farend for this block, so the relevant samples are the last
// filter_len entries ending just before the current write position,
// offset by how far we are into this near-end frame.
//
// For sample i of the near-end frame, the corresponding far-end
// "now" is far_end_pos - n + i (wrapping).
// far_end_pos points to next-write, so most recent sample is at
// far_end_pos - 1. For the i-th near-end sample we want the
// far-end "now" to be at (far_end_pos - n + i). We add fl
// repeatedly to avoid underflow on the usize subtraction.
let base = (self.far_end_pos + fl * ((n / fl) + 2) + i - n) % fl;
for k in 0..fl { for k in 0..fl {
let fe_idx = (base + fl - k) % fl; let fe_idx = (base + fl - k) % fl;
let fe = self.far_end_buf[fe_idx]; let fe = self.far_end_buf[fe_idx];
@@ -93,8 +121,9 @@ impl EchoCanceller {
let error = near_f - echo_est; let error = near_f - echo_est;
// --- NLMS coefficient update --- // NLMS coefficient update — only when not in double-talk
let norm = power + 1.0; // +1 regularisation to avoid div-by-zero if adapt && power > 1.0 {
let norm = power + 1.0;
let step = self.mu * error / norm; let step = self.mu * error / norm;
for k in 0..fl { for k in 0..fl {
@@ -102,18 +131,30 @@ impl EchoCanceller {
let fe = self.far_end_buf[fe_idx]; let fe = self.far_end_buf[fe_idx];
self.filter_coeffs[k] += step * fe; self.filter_coeffs[k] += step * fe;
} }
}
// Clamp output // Residual echo suppression: when far-end is active, attenuate
let out = error.max(-32768.0).min(32767.0); // the residual to reduce perceptible echo.
let out = if self.far_power_avg > 100.0 && !adapt {
// Double-talk: pass through near-end with minimal suppression
error
} else if self.far_power_avg > 100.0 {
// Far-end active, not double-talk: apply suppression
error * (1.0 - self.suppress * (echo_est.abs() / (near_f.abs() + 1.0)).min(1.0))
} else {
// No far-end: pass through
error
};
let out = out.max(-32768.0).min(32767.0);
nearend[i] = out as i16; nearend[i] = out as i16;
sum_near_sq += (near_f as f64) * (near_f as f64); sum_near_sq += (near_f as f64) * (near_f as f64);
sum_err_sq += (out as f64) * (out as f64); sum_err_sq += (out as f64) * (out as f64);
} }
// ERLE ratio
if sum_err_sq < 1.0 { if sum_err_sq < 1.0 {
return 100.0; // near-perfect cancellation return 100.0;
} }
(sum_near_sq / sum_err_sq).sqrt() as f32 (sum_near_sq / sum_err_sq).sqrt() as f32
} }
@@ -129,12 +170,12 @@ impl EchoCanceller {
} }
/// Reset the adaptive filter to its initial state. /// Reset the adaptive filter to its initial state.
///
/// Zeroes out all filter coefficients and the far-end circular buffer.
pub fn reset(&mut self) { pub fn reset(&mut self) {
self.filter_coeffs.iter_mut().for_each(|c| *c = 0.0); self.filter_coeffs.iter_mut().for_each(|c| *c = 0.0);
self.far_end_buf.iter_mut().for_each(|s| *s = 0.0); self.far_end_buf.iter_mut().for_each(|s| *s = 0.0);
self.far_end_pos = 0; self.far_end_pos = 0;
self.far_power_avg = 0.0;
self.near_power_avg = 0.0;
} }
} }
@@ -144,15 +185,15 @@ mod tests {
#[test] #[test]
fn aec_creates_with_correct_filter_len() { fn aec_creates_with_correct_filter_len() {
let aec = EchoCanceller::new(48000, 100); let aec = EchoCanceller::new(48000, 30);
assert_eq!(aec.filter_len, 4800); assert_eq!(aec.filter_len, 1440);
assert_eq!(aec.filter_coeffs.len(), 4800); assert_eq!(aec.filter_coeffs.len(), 1440);
assert_eq!(aec.far_end_buf.len(), 4800); assert_eq!(aec.far_end_buf.len(), 1440);
} }
#[test] #[test]
fn aec_passthrough_when_disabled() { fn aec_passthrough_when_disabled() {
let mut aec = EchoCanceller::new(48000, 100); let mut aec = EchoCanceller::new(48000, 30);
aec.set_enabled(false); aec.set_enabled(false);
assert!(!aec.is_enabled()); assert!(!aec.is_enabled());
@@ -165,7 +206,7 @@ mod tests {
#[test] #[test]
fn aec_reset_zeroes_state() { fn aec_reset_zeroes_state() {
let mut aec = EchoCanceller::new(48000, 10); // short for test speed let mut aec = EchoCanceller::new(48000, 10);
let farend: Vec<i16> = (0..480).map(|i| ((i * 37) % 1000) as i16).collect(); let farend: Vec<i16> = (0..480).map(|i| ((i * 37) % 1000) as i16).collect();
aec.feed_farend(&farend); aec.feed_farend(&farend);
@@ -178,13 +219,9 @@ mod tests {
#[test] #[test]
fn aec_reduces_echo_of_known_signal() { fn aec_reduces_echo_of_known_signal() {
// Use a small filter for speed. Feed a known far-end signal, then let filter_ms = 5;
// present the *same* signal as near-end (perfect echo, no room).
// After adaptation the output energy should drop.
let filter_ms = 5; // 240 taps at 48 kHz
let mut aec = EchoCanceller::new(48000, filter_ms); let mut aec = EchoCanceller::new(48000, filter_ms);
// Generate a simple repeating pattern.
let frame_len = 480usize; let frame_len = 480usize;
let make_frame = |offset: usize| -> Vec<i16> { let make_frame = |offset: usize| -> Vec<i16> {
(0..frame_len) (0..frame_len)
@@ -195,18 +232,15 @@ mod tests {
.collect() .collect()
}; };
// Warm up the adaptive filter with several frames.
let mut last_erle = 1.0f32; let mut last_erle = 1.0f32;
for frame_idx in 0..40 { for frame_idx in 0..40 {
let farend = make_frame(frame_idx * frame_len); let farend = make_frame(frame_idx * frame_len);
aec.feed_farend(&farend); aec.feed_farend(&farend);
// Near-end = exact copy of far-end (pure echo).
let mut nearend = farend.clone(); let mut nearend = farend.clone();
last_erle = aec.process_frame(&mut nearend); last_erle = aec.process_frame(&mut nearend);
} }
// After 40 frames the ERLE should be meaningfully > 1.
assert!( assert!(
last_erle > 1.0, last_erle > 1.0,
"expected ERLE > 1.0 after adaptation, got {last_erle}" "expected ERLE > 1.0 after adaptation, got {last_erle}"
@@ -216,13 +250,41 @@ mod tests {
#[test] #[test]
fn aec_silence_passthrough() { fn aec_silence_passthrough() {
let mut aec = EchoCanceller::new(48000, 10); let mut aec = EchoCanceller::new(48000, 10);
// Feed silence far-end
aec.feed_farend(&vec![0i16; 480]); aec.feed_farend(&vec![0i16; 480]);
// Near-end is silence too
let mut frame = vec![0i16; 480]; let mut frame = vec![0i16; 480];
let erle = aec.process_frame(&mut frame); let erle = aec.process_frame(&mut frame);
assert!(erle >= 1.0); assert!(erle >= 1.0);
// Output should still be silence
assert!(frame.iter().all(|&s| s == 0)); assert!(frame.iter().all(|&s| s == 0));
} }
#[test]
fn aec_preserves_nearend_during_doubletalk() {
// When only near-end is active (no far-end), output should
// closely match input — the AEC should not suppress speech.
let mut aec = EchoCanceller::new(48000, 30);
let frame_len = 960;
let nearend_signal: Vec<i16> = (0..frame_len)
.map(|i| {
let t = i as f64 / 48000.0;
(10000.0 * (2.0 * std::f64::consts::PI * 440.0 * t).sin()) as i16
})
.collect();
// Feed silence as far-end
aec.feed_farend(&vec![0i16; frame_len]);
let mut frame = nearend_signal.clone();
aec.process_frame(&mut frame);
// Output energy should be close to input energy (not suppressed)
let input_energy: f64 = nearend_signal.iter().map(|&s| (s as f64).powi(2)).sum();
let output_energy: f64 = frame.iter().map(|&s| (s as f64).powi(2)).sum();
let ratio = output_energy / input_energy;
assert!(
ratio > 0.8,
"near-end speech should be preserved, energy ratio = {ratio:.3}"
);
}
} }