feat: improved AEC, keyboard shortcuts, dedup participants, dev-fast profile
Some checks failed
Build Release Binaries / build-amd64 (push) Failing after 3m40s

AEC improvements:
- Reduce echo tail from 100ms to 30ms (3.3x faster, suited for laptops)
- Add double-talk detection: freeze adaptation when near-end speaks
- Add residual echo suppression
- Disable AEC by default in --android mode (macOS has built-in AEC)

CLI features:
- Keyboard shortcuts: m=mic mute, s=speaker mute, q=quit (raw terminal mode)
- Dedup participants in RoomUpdate display (same fingerprint+alias shown once)
- Add dev-fast profile (opt-level 2 with incremental compilation)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Siavash Sameni
2026-04-06 10:15:23 +04:00
parent cfb48df1ef
commit 1b00b5e2a4
6 changed files with 428 additions and 69 deletions

181
Cargo.lock generated
View File

@@ -291,6 +291,12 @@ dependencies = [
"tower-service",
]
[[package]]
name = "base16ct"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c7f02d4ea65f2c1853089ffd8d2787bdbc63de2f0d29dedbcf8ccdfa0ccd4cf"
[[package]]
name = "base64"
version = "0.22.1"
@@ -461,6 +467,7 @@ dependencies = [
"iana-time-zone",
"js-sys",
"num-traits",
"serde",
"wasm-bindgen",
"windows-link",
]
@@ -621,6 +628,24 @@ dependencies = [
"libc",
]
[[package]]
name = "crunchy"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5"
[[package]]
name = "crypto-bigint"
version = "0.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0dc92fb57ca44df6db8059111ab3af99a63d5d0f8375d9972e319a379c6bab76"
dependencies = [
"generic-array",
"rand_core 0.6.4",
"subtle",
"zeroize",
]
[[package]]
name = "crypto-common"
version = "0.1.7"
@@ -644,6 +669,7 @@ dependencies = [
"digest",
"fiat-crypto",
"rustc_version",
"serde",
"subtle",
"zeroize",
]
@@ -810,6 +836,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
dependencies = [
"block-buffer",
"const-oid",
"crypto-common",
"subtle",
]
@@ -844,6 +871,21 @@ dependencies = [
"rustfft",
]
[[package]]
name = "ecdsa"
version = "0.16.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ee27f32b5c5292967d2d4a9d7f1e0b0aed2c15daded5a60300e4abb9d8020bca"
dependencies = [
"der",
"digest",
"elliptic-curve",
"rfc6979",
"serdect",
"signature",
"spki",
]
[[package]]
name = "ed25519"
version = "2.2.3"
@@ -851,6 +893,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "115531babc129696a58c64a4fef0a8bf9e9698629fb97e9e40767d235cfbcd53"
dependencies = [
"pkcs8",
"serde",
"signature",
]
@@ -875,6 +918,26 @@ version = "1.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
[[package]]
name = "elliptic-curve"
version = "0.13.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5e6043086bf7973472e0c7dff2142ea0b680d30e18d9cc40f267efbf222bd47"
dependencies = [
"base16ct",
"crypto-bigint",
"digest",
"ff",
"generic-array",
"group",
"pkcs8",
"rand_core 0.6.4",
"sec1",
"serdect",
"subtle",
"zeroize",
]
[[package]]
name = "encoding_rs"
version = "0.8.35"
@@ -918,6 +981,16 @@ version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
[[package]]
name = "ff"
version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0b50bfb653653f9ca9095b427bed08ab8d75a137839d9ad64eb11810d5b6393"
dependencies = [
"rand_core 0.6.4",
"subtle",
]
[[package]]
name = "fiat-crypto"
version = "0.2.9"
@@ -1078,6 +1151,7 @@ checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
dependencies = [
"typenum",
"version_check",
"zeroize",
]
[[package]]
@@ -1137,6 +1211,17 @@ version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
[[package]]
name = "group"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0f9ef7462f7c099f518d754361858f86d8a07af53ba9af0fe635bbccb151a63"
dependencies = [
"ff",
"rand_core 0.6.4",
"subtle",
]
[[package]]
name = "h2"
version = "0.4.13"
@@ -1620,6 +1705,21 @@ dependencies = [
"wasm-bindgen",
]
[[package]]
name = "k256"
version = "0.13.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f6e3919bbaa2945715f0bb6d3934a173d1e9a59ac23767fbaaef277265a7411b"
dependencies = [
"cfg-if",
"ecdsa",
"elliptic-curve",
"once_cell",
"serdect",
"sha2",
"signature",
]
[[package]]
name = "lazy_static"
version = "1.5.0"
@@ -2383,6 +2483,16 @@ dependencies = [
"web-sys",
]
[[package]]
name = "rfc6979"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8dd2a808d456c4a54e300a23e9f5a67e122c3024119acbfd73e3bf664491cb2"
dependencies = [
"hmac",
"subtle",
]
[[package]]
name = "ring"
version = "0.17.14"
@@ -2561,6 +2671,21 @@ version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "sec1"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3e97a565f76233a6003f9f5c54be1d9c5bdfa3eccfb189469f11ec4901c47dc"
dependencies = [
"base16ct",
"der",
"generic-array",
"pkcs8",
"serdect",
"subtle",
"zeroize",
]
[[package]]
name = "security-framework"
version = "3.7.0"
@@ -2665,6 +2790,16 @@ dependencies = [
"serde",
]
[[package]]
name = "serdect"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a84f14a19e9a014bb9f4512488d9829a68e04ecabffb0f9904cd1ace94598177"
dependencies = [
"base16ct",
"serde",
]
[[package]]
name = "sha1"
version = "0.10.6"
@@ -2718,6 +2853,7 @@ version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de"
dependencies = [
"digest",
"rand_core 0.6.4",
]
@@ -2931,6 +3067,15 @@ version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca"
[[package]]
name = "tiny-keccak"
version = "2.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237"
dependencies = [
"crunchy",
]
[[package]]
name = "tinystr"
version = "0.8.2"
@@ -3350,6 +3495,18 @@ version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
[[package]]
name = "uuid"
version = "1.23.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ac8b6f42ead25368cf5b098aeb3dc8a1a2c05a3eee8a9a1a68c640edbfc79d9"
dependencies = [
"getrandom 0.4.2",
"js-sys",
"serde_core",
"wasm-bindgen",
]
[[package]]
name = "valuable"
version = "0.1.1"
@@ -3389,7 +3546,28 @@ dependencies = [
[[package]]
name = "warzone-protocol"
version = "0.1.0"
version = "0.0.38"
dependencies = [
"base64",
"bincode",
"bip39",
"chacha20poly1305",
"chrono",
"curve25519-dalek",
"ed25519-dalek",
"hex",
"hkdf",
"k256",
"rand 0.8.5",
"serde",
"serde_json",
"sha2",
"thiserror 2.0.18",
"tiny-keccak",
"uuid",
"x25519-dalek",
"zeroize",
]
[[package]]
name = "wasi"
@@ -4035,6 +4213,7 @@ dependencies = [
"bytes",
"chrono",
"cpal",
"libc",
"rustls",
"serde",
"serde_json",

View File

@@ -54,6 +54,12 @@ wzp-crypto = { path = "crates/wzp-crypto" }
wzp-transport = { path = "crates/wzp-transport" }
wzp-client = { path = "crates/wzp-client" }
# Fast dev profile: optimized but with debug info and incremental compilation.
# Use with: cargo run --profile dev-fast
[profile.dev-fast]
inherits = "dev"
opt-level = 2
# Optimize heavy compute deps even in debug builds —
# real-time audio needs < 20ms per frame, impossible unoptimized.
[profile.dev.package.nnnoiseless]

View File

@@ -23,6 +23,7 @@ serde_json = "1"
chrono = "0.4"
rustls = { version = "0.23", default-features = false, features = ["ring", "std"] }
cpal = { version = "0.15", optional = true }
libc = "0.2"
[features]
default = []

View File

@@ -241,7 +241,7 @@ impl CallEncoder {
block_id: 0,
frame_in_block: 0,
timestamp_ms: 0,
aec: EchoCanceller::new(48000, 100), // 100 ms echo tail
aec: EchoCanceller::new(48000, 30), // 30ms echo tail (laptop/phone)
agc: AutoGainControl::new(),
silence_detector: SilenceDetector::new(
config.silence_threshold_rms,

View File

@@ -375,7 +375,7 @@ async fn main() -> anyhow::Result<()> {
{
let audio_opts = AudioOpts {
no_denoise: cli.no_denoise || cli.direct_playout,
no_aec: cli.no_aec,
no_aec: cli.no_aec || cli.direct_playout, // AEC disabled by default — macOS has built-in AEC
no_agc: cli.no_agc,
no_fec: cli.no_fec,
no_silence: cli.no_silence || cli.direct_playout,
@@ -642,6 +642,41 @@ async fn run_file_mode(
/// QUIC → recv task → jitter buffer → decode tick (20ms) → AudioRing → CPAL playback callback
///
/// All lock-free: CPAL callbacks use atomic ring buffers, no Mutex on the audio path.
/// RAII guard for terminal raw mode. Restores on drop.
struct RawModeGuard {
orig: libc::termios,
}
impl RawModeGuard {
fn enter() -> Option<Self> {
unsafe {
let mut orig: libc::termios = std::mem::zeroed();
if libc::tcgetattr(libc::STDIN_FILENO, &mut orig) != 0 {
return None;
}
let mut raw = orig;
// ICANON: character-at-a-time input
// ECHO: don't echo typed characters
// ISIG: let us handle Ctrl+C as a byte
raw.c_lflag &= !(libc::ICANON | libc::ECHO | libc::ISIG);
// IXON: disable Ctrl+S/Ctrl+Q flow control so we receive them
raw.c_iflag &= !libc::IXON;
raw.c_cc[libc::VMIN] = 1;
raw.c_cc[libc::VTIME] = 0;
libc::tcsetattr(libc::STDIN_FILENO, libc::TCSANOW, &raw);
Some(Self { orig })
}
}
}
impl Drop for RawModeGuard {
fn drop(&mut self) {
unsafe {
libc::tcsetattr(libc::STDIN_FILENO, libc::TCSANOW, &self.orig);
}
}
}
struct AudioOpts {
no_denoise: bool,
no_aec: bool,
@@ -674,6 +709,8 @@ async fn run_live(
let farend_ring = StdArc::new(AudioRing::new());
let running = StdArc::new(AtomicBool::new(true));
let mic_muted = StdArc::new(AtomicBool::new(false));
let spk_muted = StdArc::new(AtomicBool::new(false));
// --- Signal handler: set running=false on first Ctrl+C, force-quit on second ---
let signal_running = running.clone();
@@ -709,6 +746,7 @@ async fn run_live(
// --- Send task: poll capture ring → encode → send via async ---
let send_transport = transport.clone();
let send_running = running.clone();
let send_mic_muted = mic_muted.clone();
let no_aec = opts.no_aec;
let no_agc = opts.no_agc;
let _no_fec = opts.no_fec;
@@ -745,6 +783,11 @@ async fn run_live(
continue;
}
// Mic mute: zero out capture buffer (still encode + send silence to keep stream alive)
if send_mic_muted.load(Ordering::Relaxed) {
capture_buf.fill(0);
}
// Feed AEC far-end reference: what was played through the speaker.
// Must be called BEFORE encode_frame processes the mic signal.
if !no_aec {
@@ -781,6 +824,7 @@ async fn run_live(
// --- Recv + playout ---
let recv_transport = transport.clone();
let recv_running = running.clone();
let recv_spk_muted = spk_muted.clone();
let direct_playout = opts.direct_playout;
// Direct playout: decode on recv, write straight to playout ring (like Android).
@@ -826,13 +870,18 @@ async fn run_live(
if !no_agc {
playout_agc.process_frame(&mut pcm_buf[..n]);
}
playout_ring.write(&pcm_buf[..n]);
// Feed far-end ring for AEC
// Always feed AEC (even when speaker muted)
farend_ring.write(&pcm_buf[..n]);
// Speaker mute: don't write to playout ring
if !recv_spk_muted.load(Ordering::Relaxed) {
playout_ring.write(&pcm_buf[..n]);
}
}
Err(e) => {
if let Ok(n) = dec.decode_lost(&mut pcm_buf) {
playout_ring.write(&pcm_buf[..n]);
if !recv_spk_muted.load(Ordering::Relaxed) {
playout_ring.write(&pcm_buf[..n]);
}
}
if packets_received < 10 {
warn!("decode error: {e}");
@@ -924,9 +973,15 @@ async fn run_live(
)
.await;
match result {
Ok(Ok(Some(wzp_proto::SignalMessage::RoomUpdate { count, participants }))) => {
info!(count, "room update");
for p in &participants {
Ok(Ok(Some(wzp_proto::SignalMessage::RoomUpdate { participants, .. }))) => {
// Dedup by (fingerprint, alias) — same peer may appear multiple times
let mut seen = std::collections::HashSet::new();
let unique: Vec<_> = participants
.iter()
.filter(|p| seen.insert((&p.fingerprint, &p.alias)))
.collect();
info!(count = unique.len(), "room update");
for p in &unique {
let name = p
.alias
.as_deref()
@@ -955,12 +1010,68 @@ async fn run_live(
}
};
// --- Keyboard task: Ctrl+M = toggle mic mute, Ctrl+S = toggle speaker mute ---
let kb_running = running.clone();
let kb_mic = mic_muted.clone();
let kb_spk = spk_muted.clone();
let keyboard_task = async move {
use tokio::io::AsyncReadExt;
// Put terminal in raw mode so we get individual keypresses
let _raw_guard = RawModeGuard::enter();
let mut stdin = tokio::io::stdin();
let mut buf = [0u8; 1];
loop {
if !kb_running.load(Ordering::Relaxed) {
break;
}
match tokio::time::timeout(
std::time::Duration::from_millis(200),
stdin.read(&mut buf),
)
.await
{
Ok(Ok(1)) => match buf[0] {
b'm' | b'M' | 0x0D => {
// 'm' or Ctrl+M
let was = kb_mic.fetch_xor(true, Ordering::SeqCst);
let state = if !was { "MUTED" } else { "unmuted" };
eprintln!("\r[mic {state}]");
}
b's' | b'S' | 0x13 => {
// 's' or Ctrl+S
let was = kb_spk.fetch_xor(true, Ordering::SeqCst);
let state = if !was { "MUTED" } else { "unmuted" };
eprintln!("\r[speaker {state}]");
}
0x03 => {
// Ctrl+C
eprintln!();
info!("Ctrl+C received, shutting down...");
kb_running.store(false, Ordering::SeqCst);
break;
}
b'q' | b'Q' => {
eprintln!("\r[quit]");
kb_running.store(false, Ordering::SeqCst);
break;
}
_ => {}
},
Ok(Ok(_)) | Ok(Err(_)) => break,
Err(_) => {} // timeout
}
}
};
// --- Run all tasks, exit when any finishes (or running flag cleared by Ctrl+C) ---
tokio::select! {
_ = send_task => info!("send task ended"),
_ = recv_task => info!("recv task ended"),
_ = playout_task => info!("playout task ended"),
_ = signal_task => info!("signal task ended"),
_ = keyboard_task => info!("keyboard task ended"),
}
running.store(false, Ordering::SeqCst);

View File

@@ -1,25 +1,40 @@
//! Acoustic Echo Cancellation using NLMS adaptive filter.
//! Processes 480-sample (10ms) sub-frames at 48kHz.
//!
//! Improvements over naive NLMS:
//! - Double-talk detection: freezes adaptation when near-end speech dominates,
//! preventing the filter from cancelling the local speaker's voice.
//! - Short default tail (30ms) tuned for laptops/phones where speaker and mic
//! are close together.
//! - Residual suppression: attenuates output when echo estimate is confident.
/// NLMS (Normalized Least Mean Squares) adaptive filter echo canceller.
///
/// Removes acoustic echo by modelling the echo path between the far-end
/// (speaker) signal and the near-end (microphone) signal, then subtracting
/// the estimated echo from the near-end in real time.
/// NLMS (Normalized Least Mean Squares) adaptive filter echo canceller
/// with double-talk detection.
pub struct EchoCanceller {
filter_coeffs: Vec<f32>,
filter_len: usize,
far_end_buf: Vec<f32>,
far_end_pos: usize,
/// NLMS step size (adaptation rate).
mu: f32,
enabled: bool,
/// Running far-end power estimate (for double-talk detection).
far_power_avg: f32,
/// Running near-end power estimate (for double-talk detection).
near_power_avg: f32,
/// Smoothing factor for power estimates.
power_alpha: f32,
/// Double-talk threshold: if near/far power ratio exceeds this,
/// freeze adaptation to protect near-end speech.
dt_threshold: f32,
/// Residual echo suppression factor (0.0 = none, 1.0 = full).
suppress: f32,
}
impl EchoCanceller {
/// Create a new echo canceller.
///
/// * `sample_rate` — typically 48000
/// * `filter_ms` — echo-tail length in milliseconds (e.g. 100 for 100 ms)
/// * `filter_ms` — echo-tail length in milliseconds (30ms recommended for laptops)
pub fn new(sample_rate: u32, filter_ms: u32) -> Self {
let filter_len = (sample_rate as usize) * (filter_ms as usize) / 1000;
Self {
@@ -27,8 +42,13 @@ impl EchoCanceller {
filter_len,
far_end_buf: vec![0.0f32; filter_len],
far_end_pos: 0,
mu: 0.01,
mu: 0.005,
enabled: true,
far_power_avg: 0.0,
near_power_avg: 0.0,
power_alpha: 0.01,
dt_threshold: 4.0,
suppress: 0.6,
}
}
@@ -45,9 +65,7 @@ impl EchoCanceller {
/// Process a near-end (microphone) frame, removing the estimated echo.
///
/// Returns the echo-return-loss enhancement (ERLE) as a ratio: the RMS of
/// the original near-end divided by the RMS of the residual. Values > 1.0
/// mean echo was reduced.
/// Returns the echo-return-loss enhancement (ERLE) as a ratio.
pub fn process_frame(&mut self, nearend: &mut [i16]) -> f32 {
if !self.enabled {
return 1.0;
@@ -56,34 +74,44 @@ impl EchoCanceller {
let n = nearend.len();
let fl = self.filter_len;
// Compute frame-level power for double-talk detection.
let near_power: f32 = nearend.iter().map(|&s| {
let f = s as f32;
f * f
}).sum::<f32>() / n as f32;
let far_start = (self.far_end_pos + fl * ((n / fl) + 1) - n) % fl;
let far_power: f32 = (0..n).map(|i| {
let fe = self.far_end_buf[(far_start + i) % fl];
fe * fe
}).sum::<f32>() / n as f32;
// Smooth power estimates
self.far_power_avg += self.power_alpha * (far_power - self.far_power_avg);
self.near_power_avg += self.power_alpha * (near_power - self.near_power_avg);
// Double-talk detection: if near-end is much louder than far-end,
// the local speaker is active — freeze adaptation.
let adapt = if self.far_power_avg < 1.0 {
// No far-end signal — nothing to cancel, skip adaptation
false
} else {
let ratio = self.near_power_avg / (self.far_power_avg + 1.0);
ratio < self.dt_threshold
};
let mut sum_near_sq: f64 = 0.0;
let mut sum_err_sq: f64 = 0.0;
for i in 0..n {
let near_f = nearend[i] as f32;
// --- estimate echo as dot(coeffs, farend_window) ---
// The far-end window for this sample starts at
// (far_end_pos - 1 - i) mod filter_len (most recent)
// and goes back filter_len samples.
// Estimate echo: dot(coeffs, farend_window)
let base = (self.far_end_pos + fl * ((n / fl) + 2) + i - n) % fl;
let mut echo_est: f32 = 0.0;
let mut power: f32 = 0.0;
// Position of the most-recent far-end sample for this near-end sample.
// far_end_pos points to the *next write* position, so the most-recent
// sample written is at far_end_pos - 1. We have already called
// feed_farend for this block, so the relevant samples are the last
// filter_len entries ending just before the current write position,
// offset by how far we are into this near-end frame.
//
// For sample i of the near-end frame, the corresponding far-end
// "now" is far_end_pos - n + i (wrapping).
// far_end_pos points to next-write, so most recent sample is at
// far_end_pos - 1. For the i-th near-end sample we want the
// far-end "now" to be at (far_end_pos - n + i). We add fl
// repeatedly to avoid underflow on the usize subtraction.
let base = (self.far_end_pos + fl * ((n / fl) + 2) + i - n) % fl;
for k in 0..fl {
let fe_idx = (base + fl - k) % fl;
let fe = self.far_end_buf[fe_idx];
@@ -93,27 +121,40 @@ impl EchoCanceller {
let error = near_f - echo_est;
// --- NLMS coefficient update ---
let norm = power + 1.0; // +1 regularisation to avoid div-by-zero
let step = self.mu * error / norm;
// NLMS coefficient update — only when not in double-talk
if adapt && power > 1.0 {
let norm = power + 1.0;
let step = self.mu * error / norm;
for k in 0..fl {
let fe_idx = (base + fl - k) % fl;
let fe = self.far_end_buf[fe_idx];
self.filter_coeffs[k] += step * fe;
for k in 0..fl {
let fe_idx = (base + fl - k) % fl;
let fe = self.far_end_buf[fe_idx];
self.filter_coeffs[k] += step * fe;
}
}
// Clamp output
let out = error.max(-32768.0).min(32767.0);
// Residual echo suppression: when far-end is active, attenuate
// the residual to reduce perceptible echo.
let out = if self.far_power_avg > 100.0 && !adapt {
// Double-talk: pass through near-end with minimal suppression
error
} else if self.far_power_avg > 100.0 {
// Far-end active, not double-talk: apply suppression
error * (1.0 - self.suppress * (echo_est.abs() / (near_f.abs() + 1.0)).min(1.0))
} else {
// No far-end: pass through
error
};
let out = out.max(-32768.0).min(32767.0);
nearend[i] = out as i16;
sum_near_sq += (near_f as f64) * (near_f as f64);
sum_err_sq += (out as f64) * (out as f64);
}
// ERLE ratio
if sum_err_sq < 1.0 {
return 100.0; // near-perfect cancellation
return 100.0;
}
(sum_near_sq / sum_err_sq).sqrt() as f32
}
@@ -129,12 +170,12 @@ impl EchoCanceller {
}
/// Reset the adaptive filter to its initial state.
///
/// Zeroes out all filter coefficients and the far-end circular buffer.
pub fn reset(&mut self) {
self.filter_coeffs.iter_mut().for_each(|c| *c = 0.0);
self.far_end_buf.iter_mut().for_each(|s| *s = 0.0);
self.far_end_pos = 0;
self.far_power_avg = 0.0;
self.near_power_avg = 0.0;
}
}
@@ -144,15 +185,15 @@ mod tests {
#[test]
fn aec_creates_with_correct_filter_len() {
let aec = EchoCanceller::new(48000, 100);
assert_eq!(aec.filter_len, 4800);
assert_eq!(aec.filter_coeffs.len(), 4800);
assert_eq!(aec.far_end_buf.len(), 4800);
let aec = EchoCanceller::new(48000, 30);
assert_eq!(aec.filter_len, 1440);
assert_eq!(aec.filter_coeffs.len(), 1440);
assert_eq!(aec.far_end_buf.len(), 1440);
}
#[test]
fn aec_passthrough_when_disabled() {
let mut aec = EchoCanceller::new(48000, 100);
let mut aec = EchoCanceller::new(48000, 30);
aec.set_enabled(false);
assert!(!aec.is_enabled());
@@ -165,7 +206,7 @@ mod tests {
#[test]
fn aec_reset_zeroes_state() {
let mut aec = EchoCanceller::new(48000, 10); // short for test speed
let mut aec = EchoCanceller::new(48000, 10);
let farend: Vec<i16> = (0..480).map(|i| ((i * 37) % 1000) as i16).collect();
aec.feed_farend(&farend);
@@ -178,13 +219,9 @@ mod tests {
#[test]
fn aec_reduces_echo_of_known_signal() {
// Use a small filter for speed. Feed a known far-end signal, then
// present the *same* signal as near-end (perfect echo, no room).
// After adaptation the output energy should drop.
let filter_ms = 5; // 240 taps at 48 kHz
let filter_ms = 5;
let mut aec = EchoCanceller::new(48000, filter_ms);
// Generate a simple repeating pattern.
let frame_len = 480usize;
let make_frame = |offset: usize| -> Vec<i16> {
(0..frame_len)
@@ -195,18 +232,15 @@ mod tests {
.collect()
};
// Warm up the adaptive filter with several frames.
let mut last_erle = 1.0f32;
for frame_idx in 0..40 {
let farend = make_frame(frame_idx * frame_len);
aec.feed_farend(&farend);
// Near-end = exact copy of far-end (pure echo).
let mut nearend = farend.clone();
last_erle = aec.process_frame(&mut nearend);
}
// After 40 frames the ERLE should be meaningfully > 1.
assert!(
last_erle > 1.0,
"expected ERLE > 1.0 after adaptation, got {last_erle}"
@@ -216,13 +250,41 @@ mod tests {
#[test]
fn aec_silence_passthrough() {
let mut aec = EchoCanceller::new(48000, 10);
// Feed silence far-end
aec.feed_farend(&vec![0i16; 480]);
// Near-end is silence too
let mut frame = vec![0i16; 480];
let erle = aec.process_frame(&mut frame);
assert!(erle >= 1.0);
// Output should still be silence
assert!(frame.iter().all(|&s| s == 0));
}
#[test]
fn aec_preserves_nearend_during_doubletalk() {
// When only near-end is active (no far-end), output should
// closely match input — the AEC should not suppress speech.
let mut aec = EchoCanceller::new(48000, 30);
let frame_len = 960;
let nearend_signal: Vec<i16> = (0..frame_len)
.map(|i| {
let t = i as f64 / 48000.0;
(10000.0 * (2.0 * std::f64::consts::PI * 440.0 * t).sin()) as i16
})
.collect();
// Feed silence as far-end
aec.feed_farend(&vec![0i16; frame_len]);
let mut frame = nearend_signal.clone();
aec.process_frame(&mut frame);
// Output energy should be close to input energy (not suppressed)
let input_energy: f64 = nearend_signal.iter().map(|&s| (s as f64).powi(2)).sum();
let output_energy: f64 = frame.iter().map(|&s| (s as f64).powi(2)).sum();
let ratio = output_energy / input_energy;
assert!(
ratio > 0.8,
"near-end speech should be preserved, energy ratio = {ratio:.3}"
);
}
}